diff --git a/packages/multimedia/ffmpeg/package.mk b/packages/multimedia/ffmpeg/package.mk
index f7be74ee75..057d1692fe 100644
--- a/packages/multimedia/ffmpeg/package.mk
+++ b/packages/multimedia/ffmpeg/package.mk
@@ -17,9 +17,9 @@
 ################################################################################
 
 PKG_NAME="ffmpeg"
-# Current branch is: release/3.4-kodi
-PKG_VERSION="f96fd5c"
-PKG_SHA256="35ccc07c72b203101030a35b4bb11779365adb7bbf143ef1d68a1f87c781e38b"
+# Current branch is: release/4.0-kodi
+PKG_VERSION="e115b34"
+PKG_SHA256="d9aa2a281f002982474b45980553d3669a8c79021cf08e4cfcff5dd6e8e81268"
 PKG_ARCH="any"
 PKG_LICENSE="LGPLv2.1+"
 PKG_SITE="https://ffmpeg.org"
@@ -127,7 +127,6 @@ configure_target() {
               --disable-extra-warnings \
               --disable-ffprobe \
               --disable-ffplay \
-              --disable-ffserver \
               --enable-ffmpeg \
               --enable-avdevice \
               --enable-avcodec \
diff --git a/packages/multimedia/ffmpeg/patches/ffmpeg-99.0001-backport-rkmppdec-fixes.patch b/packages/multimedia/ffmpeg/patches/ffmpeg-99.0001-backport-rkmppdec-fixes.patch
deleted file mode 100644
index b4359c08e5..0000000000
--- a/packages/multimedia/ffmpeg/patches/ffmpeg-99.0001-backport-rkmppdec-fixes.patch
+++ /dev/null
@@ -1,168 +0,0 @@
-From ed4a91d4f4bd7ab99f2be901285b20a0cde52902 Mon Sep 17 00:00:00 2001
-From: LongChair <longchair@hotmail.com>
-Date: Sat, 6 Jan 2018 09:36:58 +0100
-Subject: [PATCH] avcodec/rkmpp : Fix broken build due to missing control
- operation
-
-This patch is taking care of https://trac.ffmpeg.org/ticket/6834.
-It seems that one of the control operations that was available to get
-the free decoders input slots was removed.
-
-There is another control operation to retrieve the used slots. Given
-that the input slot count is hardcoded to 4 in mpp at this point,
-replacing the old control operation by the other one.
-
-This was tested on Rockchip ROCK64.
-
-Signed-off-by: wm4 <nfxjfg@googlemail.com>
-(cherry picked from commit c6f84106366c6f243a8b07dbffcc7880009aa904)
----
- configure             |  6 ++----
- libavcodec/rkmppdec.c | 10 ++++++----
- 2 files changed, 8 insertions(+), 8 deletions(-)
-
-diff --git a/configure b/configure
-index 1797c5dd4f..4db1c9b73f 100755
---- a/configure
-+++ b/configure
-@@ -6077,10 +6077,8 @@ enabled openssl           && { use_pkg_config openssl openssl openssl/ssl.h OPEN
-                                check_lib openssl openssl/ssl.h SSL_library_init -lssl32 -leay32 ||
-                                check_lib openssl openssl/ssl.h SSL_library_init -lssl -lcrypto -lws2_32 -lgdi32 ||
-                                die "ERROR: openssl not found"; }
--enabled rkmpp             && { { require_pkg_config rockchip_mpp rockchip_mpp rockchip/rk_mpi.h mpp_create ||
--                                 die "ERROR : Rockchip MPP was not found."; } &&
--                               { check_func_headers rockchip/rk_mpi_cmd.h "MPP_DEC_GET_FREE_PACKET_SLOT_COUNT" ||
--                                 die "ERROR: Rockchip MPP is outdated, please get a more recent one."; } &&
-+enabled rkmpp             && { require_pkg_config rkmpp rockchip_mpp  rockchip/rk_mpi.h mpp_create &&
-+                               require_pkg_config rockchip_mpp "rockchip_mpp >= 1.3.7" rockchip/rk_mpi.h mpp_create &&
-                                { enabled libdrm ||
-                                  die "ERROR: rkmpp requires --enable-libdrm"; }
-                              }
-diff --git a/libavcodec/rkmppdec.c b/libavcodec/rkmppdec.c
-index bdf4dc4208..ebc021e3d8 100644
---- a/libavcodec/rkmppdec.c
-+++ b/libavcodec/rkmppdec.c
-@@ -39,6 +39,7 @@
- 
- #define RECEIVE_FRAME_TIMEOUT   100
- #define FRAMEGROUP_MAX_FRAMES   16
-+#define INPUT_MAX_PACKETS       4
- 
- typedef struct {
-     MppCtx ctx;
-@@ -514,16 +515,17 @@ static int rkmpp_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-     RKMPPDecoder *decoder = (RKMPPDecoder *)rk_context->decoder_ref->data;
-     int ret = MPP_NOK;
-     AVPacket pkt = {0};
--    RK_S32 freeslots;
-+    RK_S32 usedslots, freeslots;
- 
-     if (!decoder->eos_reached) {
-         // we get the available slots in decoder
--        ret = decoder->mpi->control(decoder->ctx, MPP_DEC_GET_FREE_PACKET_SLOT_COUNT, &freeslots);
-+        ret = decoder->mpi->control(decoder->ctx, MPP_DEC_GET_STREAM_COUNT, &usedslots);
-         if (ret != MPP_OK) {
--            av_log(avctx, AV_LOG_ERROR, "Failed to get decoder free slots (code = %d).\n", ret);
-+            av_log(avctx, AV_LOG_ERROR, "Failed to get decoder used slots (code = %d).\n", ret);
-             return ret;
-         }
- 
-+        freeslots = INPUT_MAX_PACKETS - usedslots;
-         if (freeslots > 0) {
-             ret = ff_decode_get_packet(avctx, &pkt);
-             if (ret < 0 && ret != AVERROR_EOF) {
-@@ -540,7 +542,7 @@ static int rkmpp_receive_frame(AVCodecContext *avctx, AVFrame *frame)
-         }
- 
-         // make sure we keep decoder full
--        if (freeslots > 1 && decoder->first_frame)
-+        if (freeslots > 1)
-             return AVERROR(EAGAIN);
-     }
- 
-
-From 617c895d27198bb9391a001932e288d1dfe4f728 Mon Sep 17 00:00:00 2001
-From: LongChair <longchair@hotmail.com>
-Date: Tue, 2 Jan 2018 12:38:01 +0100
-Subject: [PATCH] avcodec/rkmpp : remove stream start retries before first
- frame.
-
-those were needed because of some odd mpp behavior that seems to have
-been fixed.
-
-Makes the code cleaner.
-
-Signed-off-by: wm4 <nfxjfg@googlemail.com>
-(cherry picked from commit 2ca65fc7b74444edd51d5803a2c1e05a801a6023)
----
- libavcodec/rkmppdec.c | 24 +++---------------------
- 1 file changed, 3 insertions(+), 21 deletions(-)
-
-diff --git a/libavcodec/rkmppdec.c b/libavcodec/rkmppdec.c
-index ebc021e3d8..9dfeb742ab 100644
---- a/libavcodec/rkmppdec.c
-+++ b/libavcodec/rkmppdec.c
-@@ -46,7 +46,6 @@ typedef struct {
-     MppApi *mpi;
-     MppBufferGroup frame_group;
- 
--    char first_frame;
-     char first_packet;
-     char eos_reached;
- 
-@@ -328,28 +327,14 @@ static int rkmpp_retrieve_frame(AVCodecContext *avctx, AVFrame *frame)
-     MppBuffer buffer = NULL;
-     AVDRMFrameDescriptor *desc = NULL;
-     AVDRMLayerDescriptor *layer = NULL;
--    int retrycount = 0;
-     int mode;
-     MppFrameFormat mppformat;
-     uint32_t drmformat;
- 
--    // on start of decoding, MPP can return -1, which is supposed to be expected
--    // this is due to some internal MPP init which is not completed, that will
--    // only happen in the first few frames queries, but should not be interpreted
--    // as an error, Therefore we need to retry a couple times when we get -1
--    // in order to let it time to complete it's init, then we sleep a bit between retries.
--retry_get_frame:
-     ret = decoder->mpi->decode_get_frame(decoder->ctx, &mppframe);
--    if (ret != MPP_OK && ret != MPP_ERR_TIMEOUT && !decoder->first_frame) {
--        if (retrycount < 5) {
--            av_log(avctx, AV_LOG_DEBUG, "Failed to get a frame, retrying (code = %d, retrycount = %d)\n", ret, retrycount);
--            usleep(10000);
--            retrycount++;
--            goto retry_get_frame;
--        } else {
--            av_log(avctx, AV_LOG_ERROR, "Failed to get a frame from MPP (code = %d)\n", ret);
--            goto fail;
--        }
-+    if (ret != MPP_OK && ret != MPP_ERR_TIMEOUT) {
-+        av_log(avctx, AV_LOG_ERROR, "Failed to get a frame from MPP (code = %d)\n", ret);
-+        goto fail;
-     }
- 
-     if (mppframe) {
-@@ -365,7 +350,6 @@ retry_get_frame:
-             avctx->height = mpp_frame_get_height(mppframe);
- 
-             decoder->mpi->control(decoder->ctx, MPP_DEC_SET_INFO_CHANGE_READY, NULL);
--            decoder->first_frame = 1;
- 
-             av_buffer_unref(&decoder->frames_ref);
- 
-@@ -479,7 +463,6 @@ retry_get_frame:
-                 goto fail;
-             }
- 
--            decoder->first_frame = 0;
-             return 0;
-         } else {
-             av_log(avctx, AV_LOG_ERROR, "Failed to retrieve the frame buffer, frame is dropped (code = %d)\n", ret);
-@@ -559,7 +542,6 @@ static void rkmpp_flush(AVCodecContext *avctx)
- 
-     ret = decoder->mpi->reset(decoder->ctx);
-     if (ret == MPP_OK) {
--        decoder->first_frame = 1;
-         decoder->first_packet = 1;
-     } else
-         av_log(avctx, AV_LOG_ERROR, "Failed to reset MPI (code = %d)\n", ret);
diff --git a/packages/multimedia/ffmpeg/patches/ffmpeg-99.1003-pfcd_hevc_optimisations.patch b/packages/multimedia/ffmpeg/patches/ffmpeg-99.1003-pfcd_hevc_optimisations.patch
index 629ea1b225..ef2f4d7d62 100644
--- a/packages/multimedia/ffmpeg/patches/ffmpeg-99.1003-pfcd_hevc_optimisations.patch
+++ b/packages/multimedia/ffmpeg/patches/ffmpeg-99.1003-pfcd_hevc_optimisations.patch
@@ -1,5 +1,5 @@
 diff --git a/.gitignore b/.gitignore
-index dabb51762d..0b1f739d22 100644
+index 0e57cb0b4c..b2e3374fea 100644
 --- a/.gitignore
 +++ b/.gitignore
 @@ -1,6 +1,7 @@
@@ -19,18 +19,18 @@ index dabb51762d..0b1f739d22 100644
  /ffplay
  /ffprobe
 diff --git a/configure b/configure
-index 18d80ee87a..9e621d09c1 100755
+index dee507cb6a..9a93189107 100755
 --- a/configure
 +++ b/configure
-@@ -313,6 +313,7 @@ External library support:
+@@ -318,6 +318,7 @@ External library support:
    --enable-libmfx          enable Intel MediaSDK (AKA Quick Sync Video) code via libmfx [no]
    --enable-libnpp          enable Nvidia Performance Primitives-based code [no]
    --enable-mmal            enable Broadcom Multi-Media Abstraction Layer (Raspberry Pi) via MMAL [no]
 +  --enable-rpi             enable other rpi specific stuff [no]
+   --disable-nvdec          disable Nvidia video decoding acceleration (via hwaccel) [autodetect]
    --disable-nvenc          disable Nvidia video encoding code [autodetect]
    --enable-omx             enable OpenMAX IL code [no]
-   --enable-omx-rpi         enable OpenMAX IL code for Raspberry Pi [no]
-@@ -1682,6 +1683,7 @@ FEATURE_LIST="
+@@ -1776,6 +1777,7 @@ FEATURE_LIST="
      gray
      hardcoded_tables
      omx_rpi
@@ -38,7 +38,7 @@ index 18d80ee87a..9e621d09c1 100755
      runtime_cpudetect
      safe_bitstream_reader
      shared
-@@ -2198,6 +2200,7 @@ CONFIG_EXTRA="
+@@ -2293,6 +2295,7 @@ CONFIG_EXTRA="
      rtpdec
      rtpenc_chain
      rv34dsp
@@ -46,7 +46,7 @@ index 18d80ee87a..9e621d09c1 100755
      sinewin
      snappy
      srtp
-@@ -2500,6 +2503,8 @@ hap_decoder_select="snappy texturedsp"
+@@ -2610,6 +2613,8 @@ hap_decoder_select="snappy texturedsp"
  hap_encoder_deps="libsnappy"
  hap_encoder_select="texturedspenc"
  hevc_decoder_select="bswapdsp cabac golomb hevcparse videodsp"
@@ -55,17 +55,17 @@ index 18d80ee87a..9e621d09c1 100755
  huffyuv_decoder_select="bswapdsp huffyuvdsp llviddsp"
  huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp llvidencdsp"
  iac_decoder_select="imc_decoder"
-@@ -3269,6 +3274,8 @@ tinterlace_filter_deps="gpl"
+@@ -3392,6 +3397,8 @@ tinterlace_filter_deps="gpl"
  tinterlace_merge_test_deps="tinterlace_filter"
  tinterlace_pad_test_deps="tinterlace_filter"
  tonemap_filter_deps="const_nan"
 +unsand_filter_deps="rpi"
 +unsand_filter_select="sand"
+ unsharp_opencl_filter_deps="opencl"
  uspp_filter_deps="gpl avcodec"
  vaguedenoiser_filter_deps="gpl"
- vidstabdetect_filter_deps="libvidstab"
 diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
-index 3ee31473dc..6875200380 100644
+index 4dbe72186d..0e48ecb9da 100644
 --- a/fftools/ffmpeg.c
 +++ b/fftools/ffmpeg.c
 @@ -24,6 +24,12 @@
@@ -81,15 +81,7 @@ index 3ee31473dc..6875200380 100644
  #include <ctype.h>
  #include <string.h>
  #include <math.h>
-@@ -43,6 +49,7 @@
- #include "libavformat/avformat.h"
- #include "libavdevice/avdevice.h"
- #include "libswresample/swresample.h"
-+#include "libavutil/atomic.h"
- #include "libavutil/opt.h"
- #include "libavutil/channel_layout.h"
- #include "libavutil/parseutils.h"
-@@ -69,6 +76,25 @@
+@@ -70,6 +76,25 @@
  # include "libavfilter/buffersrc.h"
  # include "libavfilter/buffersink.h"
  
@@ -115,7 +107,7 @@ index 3ee31473dc..6875200380 100644
  #if HAVE_SYS_RESOURCE_H
  #include <sys/time.h>
  #include <sys/types.h>
-@@ -165,6 +191,241 @@ static int restore_tty;
+@@ -162,6 +187,241 @@ static int restore_tty;
  static void free_input_threads(void);
  #endif
  
@@ -152,7 +144,7 @@ index 3ee31473dc..6875200380 100644
 +static void display_cb_input(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer) {
 +    rpi_display_env_t *const de = (rpi_display_env_t *)port->userdata;
 +    av_rpi_zc_unref(buffer->user_data);
-+    avpriv_atomic_int_add_and_fetch(&de->rpi_display_count, -1);
++    atomic_fetch_add(&de->rpi_display_count, -1);
 +    mmal_buffer_header_release(buffer);
 +}
 +
@@ -275,7 +267,7 @@ index 3ee31473dc..6875200380 100644
 +    if (de == NULL)
 +        return;
 +
-+    if (avpriv_atomic_int_get(&de->rpi_display_count) >= DISPLAY_PORT_DEPTH - 1) {
++    if (atomic_load(&de->rpi_display_count) >= DISPLAY_PORT_DEPTH - 1) {
 +        av_log(s, AV_LOG_VERBOSE, "Frame dropped\n");
 +        return;
 +    }
@@ -302,10 +294,10 @@ index 3ee31473dc..6875200380 100644
 +        buf->offset = av_rpi_zc_offset(fr_buf);
 +        buf->length = av_rpi_zc_length(fr_buf);
 +        buf->alloc_size = av_rpi_zc_numbytes(fr_buf);
-+        avpriv_atomic_int_add_and_fetch(&de->rpi_display_count, 1);
++        atomic_fetch_add(&de->rpi_display_count, 1);
 +    }
 +#if RPI_DISPLAY_ALL
-+    while (avpriv_atomic_int_get(&de->rpi_display_count) >= DISPLAY_PORT_DEPTH - 1) {
++    while (atomic_load(&de->rpi_display_count) >= DISPLAY_PORT_DEPTH - 1) {
 +        usleep(5000);
 +    }
 +#endif
@@ -330,8 +322,8 @@ index 3ee31473dc..6875200380 100644
 +        }
 +
 +        // The above disable should kick out all buffers - check that
-+        if (avpriv_atomic_int_get(&de->rpi_display_count) != 0) {
-+            av_log(NULL, AV_LOG_WARNING, "Exiting with display count non-zero:%d\n", avpriv_atomic_int_get(&de->rpi_display_count));
++        if (atomic_load(&de->rpi_display_count) != 0) {
++            av_log(NULL, AV_LOG_WARNING, "Exiting with display count non-zero:%d\n", atomic_load(&de->rpi_display_count));
 +        }
 +
 +        if (de->conn != NULL) {
@@ -357,7 +349,7 @@ index 3ee31473dc..6875200380 100644
  /* sub2video hack:
     Convert subtitles to video with alpha to insert them in filter graphs.
     This is a temporary solution until libavfilter gets real subtitles support.
-@@ -575,6 +836,11 @@ static void ffmpeg_cleanup(int ret)
+@@ -583,6 +843,11 @@ static void ffmpeg_cleanup(int ret)
          avformat_close_input(&input_files[i]->ctx);
          av_freep(&input_files[i]);
      }
@@ -369,7 +361,7 @@ index 3ee31473dc..6875200380 100644
      for (i = 0; i < nb_input_streams; i++) {
          InputStream *ist = input_streams[i];
  
-@@ -586,7 +852,9 @@ static void ffmpeg_cleanup(int ret)
+@@ -594,7 +859,9 @@ static void ffmpeg_cleanup(int ret)
          av_freep(&ist->filters);
          av_freep(&ist->hwaccel_device);
          av_freep(&ist->dts_buffer);
@@ -380,7 +372,7 @@ index 3ee31473dc..6875200380 100644
          avcodec_free_context(&ist->dec_ctx);
  
          av_freep(&input_streams[i]);
-@@ -617,6 +885,7 @@ static void ffmpeg_cleanup(int ret)
+@@ -625,6 +892,7 @@ static void ffmpeg_cleanup(int ret)
      }
      term_exit();
      ffmpeg_exited = 1;
@@ -388,7 +380,7 @@ index 3ee31473dc..6875200380 100644
  }
  
  void remove_avoptions(AVDictionary **a, AVDictionary *b)
-@@ -1052,6 +1321,17 @@ static void do_video_out(OutputFile *of,
+@@ -1060,6 +1328,17 @@ static void do_video_out(OutputFile *of,
      if (ost->source_index >= 0)
          ist = input_streams[ost->source_index];
  
@@ -406,7 +398,7 @@ index 3ee31473dc..6875200380 100644
      frame_rate = av_buffersink_get_frame_rate(filter);
      if (frame_rate.num > 0 && frame_rate.den > 0)
          duration = 1/(av_q2d(frame_rate) * av_q2d(enc->time_base));
-@@ -2165,8 +2445,8 @@ static int ifilter_send_frame(InputFilter *ifilter, AVFrame *frame)
+@@ -2132,8 +2411,8 @@ static int ifilter_send_frame(InputFilter *ifilter, AVFrame *frame)
                         ifilter->channel_layout != frame->channel_layout;
          break;
      case AVMEDIA_TYPE_VIDEO:
@@ -417,7 +409,7 @@ index 3ee31473dc..6875200380 100644
          break;
      }
  
-@@ -2896,6 +3176,12 @@ static int init_input_stream(int ist_index, char *error, int error_len)
+@@ -2887,6 +3166,12 @@ static int init_input_stream(int ist_index, char *error, int error_len)
          ist->dec_ctx->opaque                = ist;
          ist->dec_ctx->get_format            = get_format;
          ist->dec_ctx->get_buffer2           = get_buffer;
@@ -431,10 +423,10 @@ index 3ee31473dc..6875200380 100644
  
          av_opt_set_int(ist->dec_ctx, "refcounted_frames", 1, 0);
 diff --git a/fftools/ffmpeg_filter.c b/fftools/ffmpeg_filter.c
-index aacc185059..33c054294c 100644
+index 877fd670e6..1efd3a43a8 100644
 --- a/fftools/ffmpeg_filter.c
 +++ b/fftools/ffmpeg_filter.c
-@@ -1178,8 +1178,8 @@ int ifilter_parameters_from_frame(InputFilter *ifilter, const AVFrame *frame)
+@@ -1179,8 +1179,8 @@ int ifilter_parameters_from_frame(InputFilter *ifilter, const AVFrame *frame)
  
      ifilter->format = frame->format;
  
@@ -446,10 +438,10 @@ index aacc185059..33c054294c 100644
  
      ifilter->sample_rate         = frame->sample_rate;
 diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
-index 100fa76e46..93a1b8edaf 100644
+index d7a7eb0662..3949c9e76b 100644
 --- a/fftools/ffmpeg_opt.c
 +++ b/fftools/ffmpeg_opt.c
-@@ -706,11 +706,19 @@ static AVCodec *choose_decoder(OptionsContext *o, AVFormatContext *s, AVStream *
+@@ -684,11 +684,19 @@ static AVCodec *choose_decoder(OptionsContext *o, AVFormatContext *s, AVStream *
  
      MATCH_PER_STREAM_OPT(codec_names, str, codec_name, s, st);
      if (codec_name) {
@@ -470,18 +462,18 @@ index 100fa76e46..93a1b8edaf 100644
  
  /* Add all the streams from the given input file to the global
 diff --git a/libavcodec/Makefile b/libavcodec/Makefile
-index c4ec09b1c4..f2abbb06b3 100644
+index 4b8ad121db..f6e6784e5a 100644
 --- a/libavcodec/Makefile
 +++ b/libavcodec/Makefile
-@@ -4,6 +4,7 @@ DESC = FFmpeg codec library
- HEADERS = avcodec.h                                                     \
+@@ -6,6 +6,7 @@ HEADERS = ac3_parser.h                                                  \
+           avcodec.h                                                     \
            avdct.h                                                       \
            avfft.h                                                       \
 +          rpi_zc.h                                                      \
            d3d11va.h                                                     \
            dirac.h                                                       \
            dv_profile.h                                                  \
-@@ -123,6 +124,7 @@ OBJS-$(CONFIG_QSVDEC)                  += qsvdec.o
+@@ -128,6 +129,7 @@ OBJS-$(CONFIG_QSVDEC)                  += qsvdec.o
  OBJS-$(CONFIG_QSVENC)                  += qsvenc.o
  OBJS-$(CONFIG_RANGECODER)              += rangecoder.o
  OBJS-$(CONFIG_RDFT)                    += rdft.o
@@ -489,7 +481,7 @@ index c4ec09b1c4..f2abbb06b3 100644
  OBJS-$(CONFIG_RV34DSP)                 += rv34dsp.o
  OBJS-$(CONFIG_SHARED)                  += log2_tab.o reverse.o
  OBJS-$(CONFIG_SINEWIN)                 += sinewin.o sinewin_fixed.o
-@@ -351,6 +353,12 @@ OBJS-$(CONFIG_HAP_ENCODER)             += hapenc.o hap.o
+@@ -360,6 +362,13 @@ OBJS-$(CONFIG_HAP_ENCODER)             += hapenc.o hap.o
  OBJS-$(CONFIG_HEVC_DECODER)            += hevcdec.o hevc_mvs.o \
                                            hevc_cabac.o hevc_refs.o hevcpred.o    \
                                            hevcdsp.o hevc_filter.o hevc_data.o
@@ -499,10 +491,11 @@ index c4ec09b1c4..f2abbb06b3 100644
 +                                          rpi_hevc_shader.o rpi_hevc_shader_template.o       \
 +                                          rpi_hevc_parse.o h2645_parse.o rpi_hevc_ps.o \
 +                                          rpi_hevc_sei.o rpi_hevc_data.o
- OBJS-$(CONFIG_HEVC_CUVID_DECODER)      += cuvid.o
++OBJS-$(CONFIG_HEVC_CUVID_DECODER)      += cuvid.o
+ OBJS-$(CONFIG_HEVC_AMF_ENCODER)        += amfenc_hevc.o
+ OBJS-$(CONFIG_HEVC_CUVID_DECODER)      += cuviddec.o
  OBJS-$(CONFIG_HEVC_MEDIACODEC_DECODER) += mediacodecdec.o
- OBJS-$(CONFIG_HEVC_NVENC_ENCODER)      += nvenc_hevc.o
-@@ -1143,3 +1151,31 @@ $(SUBDIR)qdm2.o: $(SUBDIR)qdm2_tables.h
+@@ -1188,3 +1197,31 @@ $(SUBDIR)qdm2.o: $(SUBDIR)qdm2_tables.h
  $(SUBDIR)sinewin.o: $(SUBDIR)sinewin_tables.h
  $(SUBDIR)sinewin_fixed.o: $(SUBDIR)sinewin_fixed_tables.h
  endif
@@ -535,19 +528,61 @@ index c4ec09b1c4..f2abbb06b3 100644
 +$(SUBDIR)rpi_hevcdec.o $(SUBDIR)rpi_shader_template.o $(SUBDIR)rpi_qpu.o: $(SUBDIR)rpi_hevc_shader.h
 +endif
 diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
-index 4f34312e67..5361a22141 100644
+index 4d4ef530e4..fba8776c9f 100644
 --- a/libavcodec/allcodecs.c
 +++ b/libavcodec/allcodecs.c
-@@ -222,6 +222,7 @@ static void register_all(void)
-     REGISTER_DECODER(H264_VDPAU,        h264_vdpau);
- #endif
-     REGISTER_ENCDEC (HAP,               hap);
-+    REGISTER_DECODER(HEVC_RPI,          hevc_rpi);
-     REGISTER_DECODER(HEVC,              hevc);
-     REGISTER_DECODER(HEVC_QSV,          hevc_qsv);
-     REGISTER_DECODER(HEVC_RKMPP,        hevc_rkmpp);
+@@ -142,6 +142,7 @@ extern AVCodec ff_h264_qsv_decoder;
+ extern AVCodec ff_h264_rkmpp_decoder;
+ extern AVCodec ff_hap_encoder;
+ extern AVCodec ff_hap_decoder;
++extern AVCodec ff_hevc_rpi_decoder;
+ extern AVCodec ff_hevc_decoder;
+ extern AVCodec ff_hevc_qsv_decoder;
+ extern AVCodec ff_hevc_rkmpp_decoder;
+@@ -833,6 +834,41 @@ static enum AVCodecID remap_deprecated_codec_id(enum AVCodecID id)
+     }
+ }
+ 
++static int codec_supports_format(const AVCodec * const p, const enum AVPixelFormat fmt)
++{
++    const enum AVPixelFormat *pf = p->pix_fmts;
++
++    // Assume good if we lack info
++    if (pf == NULL)
++        return 1;
++    if (fmt == AV_PIX_FMT_NONE)
++        return 0;
++
++    for (; *pf != AV_PIX_FMT_NONE; ++pf) {
++        if (*pf == fmt)
++            return 1;
++    }
++    return 0;
++}
++
++AVCodec *avcodec_find_decoder_by_id_and_fmt(enum AVCodecID id, enum AVPixelFormat fmt)
++{
++    const AVCodec *p, *experimental = NULL;
++    void *i = 0;
++
++    id= remap_deprecated_codec_id(id);
++    while ((p = av_codec_iterate(&i))) {
++        if (av_codec_is_decoder(p) && p->id == id && codec_supports_format(p, fmt)) {
++            if (p->capabilities & AV_CODEC_CAP_EXPERIMENTAL && !experimental) {
++                experimental = p;
++            } else
++                return (AVCodec *)p;
++        }
++        p = p->next;
++    }
++    return (AVCodec *)experimental;
++}
++
+ static AVCodec *find_codec(enum AVCodecID id, int (*x)(const AVCodec *))
+ {
+     const AVCodec *p, *experimental = NULL;
 diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
-index 1eeac5449e..022ab7ab3d 100644
+index e656011c3c..69cd820f06 100644
 --- a/libavcodec/arm/Makefile
 +++ b/libavcodec/arm/Makefile
 @@ -40,6 +40,7 @@ OBJS-$(CONFIG_AAC_DECODER)             += arm/aacpsdsp_init_arm.o       \
@@ -557,30 +592,31 @@ index 1eeac5449e..022ab7ab3d 100644
 +OBJS-$(CONFIG_HEVC_RPI_DECODER)        += arm/rpi_hevcdsp_init_arm.o
  OBJS-$(CONFIG_MLP_DECODER)             += arm/mlpdsp_init_arm.o
  OBJS-$(CONFIG_RV40_DECODER)            += arm/rv40dsp_init_arm.o
- OBJS-$(CONFIG_VORBIS_DECODER)          += arm/vorbisdsp_init_arm.o
-@@ -134,9 +135,16 @@ NEON-OBJS-$(CONFIG_AAC_DECODER)        += arm/aacpsdsp_neon.o           \
+ OBJS-$(CONFIG_SBC_ENCODER)             += arm/sbcdsp_init_arm.o
+@@ -136,10 +137,18 @@ NEON-OBJS-$(CONFIG_AAC_DECODER)        += arm/aacpsdsp_neon.o           \
  NEON-OBJS-$(CONFIG_LLAUDDSP)           += arm/lossless_audiodsp_neon.o
  NEON-OBJS-$(CONFIG_DCA_DECODER)        += arm/synth_filter_neon.o
  NEON-OBJS-$(CONFIG_HEVC_DECODER)       += arm/hevcdsp_init_neon.o       \
 +                                          arm/hevcdsp_idct_neon.o    \
                                            arm/hevcdsp_deblock_neon.o    \
--                                          arm/hevcdsp_idct_neon.o       \
-                                           arm/hevcdsp_qpel_neon.o
+                                           arm/hevcdsp_idct_neon.o       \
+                                           arm/hevcdsp_qpel_neon.o       \
+                                           arm/hevcdsp_sao_neon.o
 +NEON-OBJS-$(CONFIG_HEVC_RPI_DECODER)   += arm/rpi_hevcdsp_init_neon.o    \
 +                                          arm/rpi_hevc_misc_neon.o       \
 +                                          arm/rpi_hevcdsp_deblock_neon.o \
 +                                          arm/rpi_hevcdsp_idct_neon.o    \
++                                          arm/rpi_hevcdsp_res8_neon.o    \
 +                                          arm/rpi_hevcdsp_res16_neon.o   \
-+                                          arm/rpi_hevcdsp_sao_neon.o     \
-+                                          arm/rpi_hevcdsp_cres_neon.o
++                                          arm/rpi_hevcdsp_sao_neon.o
  NEON-OBJS-$(CONFIG_RV30_DECODER)       += arm/rv34dsp_neon.o
  NEON-OBJS-$(CONFIG_RV40_DECODER)       += arm/rv34dsp_neon.o            \
                                            arm/rv40dsp_neon.o
 diff --git a/libavcodec/arm/cabac.h b/libavcodec/arm/cabac.h
-index fdbf86b45e..2ff0973f31 100644
+index fdbf86b45e..4755f20e2e 100644
 --- a/libavcodec/arm/cabac.h
 +++ b/libavcodec/arm/cabac.h
-@@ -26,83 +26,198 @@
+@@ -26,83 +26,209 @@
  #include "libavutil/internal.h"
  #include "libavcodec/cabac.h"
  
@@ -605,9 +641,11 @@ index fdbf86b45e..2ff0973f31 100644
 +        "sub     %[bit], %[bit], %[tmp2]                  \n\t"
 +        "mov     %[tmp1], %[bit]                          \n\t"
 +        "cmp     %[low], %[bit], lsl #17                  \n\t"
++        "itt     ge                                       \n\t"
 +        "movge   %[tmp1], %[tmp2]                         \n\t"
 +        "mvnge   %[ptr], %[ptr]                           \n\t"
 +        "clz     %[tmp2], %[tmp1]                         \n\t"
++        "it      ge                                       \n\t"
 +        "subge   %[low], %[low], %[bit], lsl #17          \n\t"
 +        "sub     %[tmp2], %[tmp2], #23                    \n\t"
 +        "and     %[bit], %[ptr], #1                       \n\t"
@@ -626,8 +664,13 @@ index fdbf86b45e..2ff0973f31 100644
 +        "strb    %[mlps_tables], [%[state]]               \n\t"
 +        "rbit    %[state], %[low]                         \n\t"
 +        "cmp     %[tmp1], %[ptr]                          \n\t"
++#if CONFIG_THUMB
++        "it      cs                                       \n\t"
++        "ldrhcs  %[tmp1], [%[ptr]], #2                    \n\t"
++#else
 +        "ldrcsh  %[tmp1], [%[ptr]], #2                    \n\t"
 +#endif
++#endif
 +        "clz     %[state], %[state]                       \n\t"
 +        "movw    %[mlps_tables], #0xffff                  \n\t"
 +        "sub     %[state], %[state], #16                  \n\t"
@@ -636,7 +679,12 @@ index fdbf86b45e..2ff0973f31 100644
 +        "str     %[ptr], [%[c], %[ptr_off]]               \n\t"
 +        "lsr     %[tmp1], %[tmp1], #15                    \n\t"
 +        "sub     %[tmp1], %[tmp1], %[mlps_tables]         \n\t"
++#if CONFIG_THUMB
++        "lsl     %[tmp1], %[tmp1], %[state]               \n\t"
++        "add     %[low], %[low], %[tmp1]                  \n\t"
++#else
 +        "add     %[low], %[low], %[tmp1], lsl %[state]    \n\t"
++#endif
 +        "str     %[low], [%[c], %[low_off]]               \n\t"
 +        "b       2f                                       \n\t"
 +        "1:                                               \n\t"
@@ -706,9 +754,8 @@ index fdbf86b45e..2ff0973f31 100644
 +        "ldr        %[tmp]   , [%[c], %[end_off]]   \n\t"
 +#endif
 +        "cmp        %[low]   , %[range], lsl #17    \n\t"
-+        "it         cs                              \n\t"
-+        "subcs      %[low]   , %[range], lsl #17    \n\t"
-+        "it         cs                              \n\t"
++        "itt         cs                              \n\t"
++        "subcs      %[low]   , %[low], %[range], lsl #17 \n\t"
 +        "movcs      %[rv]    , #1                   \n\t"
  #if UNCHECKED_BITSTREAM_READER
 -        "ldrh       %[tmp]        , [%[r_c]]                    \n\t"
@@ -765,7 +812,7 @@ index fdbf86b45e..2ff0973f31 100644
 -    return bit & 1;
 +        "str        %[ptr]   , [%[c], %[ptr_off]]   \n\t"
 +        "rev        %[tmp]   , %[tmp]               \n\t"
-+        "add        %[low]   , %[tmp], lsr #15      \n\t"
++        "add        %[low]   , %[low], %[tmp], lsr #15 \n\t"
 +        "movw       %[tmp]   , 0xFFFF               \n\t"
 +        "sub        %[low]   , %[tmp]               \n\t"
 +        "1:                                         \n\t"
@@ -802,7 +849,7 @@ index fdbf86b45e..2ff0973f31 100644
 +#endif
 +        "cmp        %[low]   , %[range], lsl #17    \n\t"
 +        "it         cs                              \n\t"
-+        "subcs      %[low]   , %[range], lsl #17    \n\t"
++        "subcs      %[low]   , %[low], %[range], lsl #17 \n\t"
 +        "it         cc                              \n\t"
 +        "rsbcc      %[rv]    , %[rv], #0            \n\t"
 +#if UNCHECKED_BITSTREAM_READER
@@ -821,7 +868,7 @@ index fdbf86b45e..2ff0973f31 100644
 +
 +        "str        %[ptr]   , [%[c], %[ptr_off]]   \n\t"
 +        "rev        %[tmp]   , %[tmp]               \n\t"
-+        "add        %[low]   , %[tmp], lsr #15      \n\t"
++        "add        %[low]   , %[low], %[tmp], lsr #15 \n\t"
 +        "movw       %[tmp]   , 0xFFFF               \n\t"
 +        "sub        %[low]   , %[tmp]               \n\t"
 +        "1:                                         \n\t"
@@ -849,10 +896,10 @@ index fdbf86b45e..2ff0973f31 100644
  #endif /* AVCODEC_ARM_CABAC_H */
 diff --git a/libavcodec/arm/rpi_hevc_cabac.h b/libavcodec/arm/rpi_hevc_cabac.h
 new file mode 100644
-index 0000000000..9c317c713b
+index 0000000000..c7df9f1e5a
 --- /dev/null
 +++ b/libavcodec/arm/rpi_hevc_cabac.h
-@@ -0,0 +1,526 @@
+@@ -0,0 +1,605 @@
 +/*
 + * This file is part of FFmpeg.
 + *
@@ -1037,7 +1084,12 @@ index 0000000000..9c317c713b
 +    unsigned int reg_b, tmp, st, bit;
 +     __asm__ (
 +// Get bin from map
++#if CONFIG_THUMB
++         "add        %[ctx_map]    , %[n]                        \n\t"
++         "ldrb       %[st]         , [%[ctx_map]]                \n\t"
++#else
 +         "ldrb       %[st]         , [%[ctx_map], %[n]]!         \n\t"
++#endif
 +         "1:                                                     \n\t"
 +
 +// Load state & ranges
@@ -1131,20 +1183,31 @@ index 0000000000..9c317c713b
 +
 +
 +#define get_cabac_by22_start get_cabac_by22_start_arm
-+static const uint32_t cabac_by22_inv_range[256] __attribute__((section(".text")));
 +static inline void get_cabac_by22_start_arm(CABACContext * const c)
 +{
 +    const uint8_t *ptr = c->bytestream;
 +    register uint32_t low __asm__("r1"), range __asm__("r2");
-+    uint32_t inv, m, range8, bits;
-+    av_assert0(offsetof (CABACContext, low) == 0);
-+    av_assert0(offsetof (CABACContext, range) == 4);
-+    av_assert0(offsetof (CABACContext, by22.range) == offsetof (CABACContext, by22.bits) + 2);
-+    __asm__ (
-+        "ldmia   %[c], {%[low], %[range]}                         \n\t"
++    uint32_t m, range8, bits;
 +#if !USE_BY22_DIV
-+        "adrl    %[inv], cabac_by22_inv_range                     \n\t"
++    uintptr_t inv;
 +#endif
++
++    av_assert2(offsetof (CABACContext, low) == 0);
++    av_assert2(offsetof (CABACContext, range) == 4);
++    av_assert2(offsetof (CABACContext, by22.range) == offsetof (CABACContext, by22.bits) + 2);
++    __asm__ volatile (
++        "ldmia   %[c], {%[low], %[range]}                         \n\t"
++        : // Outputs
++               [low]"=r"(low),
++             [range]"=r"(range)
++        : // Inputs
++                 [c]"r"(c)
++        : // Clobbers
++    );
++#if !USE_BY22_DIV
++    inv = (uintptr_t)cabac_by22_inv_range;
++#endif
++    __asm__ volatile (
 +        "ldr     %[m], [%[ptr]], #-("AV_STRINGIFY(CABAC_BITS)"/8) \n\t"
 +#if !USE_BY22_DIV
 +        "uxtb    %[range8], %[range]                              \n\t"
@@ -1163,20 +1226,26 @@ index 0000000000..9c317c713b
 +#else
 +        "strh    %[bits], [%[c], %[bits_off]]                     \n\t"
 +#endif
++#if CONFIG_THUMB
++        "lsr     %[m], %[ptr]                                     \n\t"
++        "eor     %[range], %[low], %[m]                           \n\t"
++#else
 +        "eor     %[range], %[low], %[m], lsr %[ptr]               \n\t"
++#endif
 +        : // Outputs
-+               [ptr]"+r"(ptr),
-+               [low]"=&r"(low),
-+             [range]"=&r"(range),
-+               [inv]"=&r"(inv),
++               [ptr]"+&r"(ptr),
++               [low]"+&r"(low),
++             [range]"+&r"(range),
++#if !USE_BY22_DIV
++               [inv]"+&r"(inv),
++#endif
 +                 [m]"=&r"(m),
 +            [range8]"=&r"(range8),
 +              [bits]"=&r"(bits)
 +        : // Inputs
 +                   [c]"r"(c),
 +            [bits_off]"J"(offsetof (CABACContext, by22.bits)),
-+             [ptr_off]"J"(offsetof (CABACContext, bytestream)),
-+                [cbir]"X"(cabac_by22_inv_range)
++             [ptr_off]"J"(offsetof (CABACContext, bytestream))
 +        : // Clobbers
 +            "memory"
 +    );
@@ -1217,13 +1286,23 @@ index 0000000000..9c317c713b
 +        "ldrh    %[tmp2], [%[cc], %[range_off]]    \n\t"
 +        "lsr     %[tmp1], %[val], %[tmp1]          \n\t"
 +        "ldr     %[val], [%[cc], %[low_off]]       \n\t"
++#if CONFIG_THUMB
++        "add     %[ptr], %[ptr], %[bits], lsr #3   \n\t"
++        "ldr     %[ptr], [%[ptr]]                  \n\t"
++#else
 +        "ldr     %[ptr], [%[ptr], %[bits], lsr #3] \n\t"
++#endif
 +        "mul     %[tmp1], %[tmp2], %[tmp1]         \n\t"
 +        "and     %[tmp2], %[bits], #7              \n\t"
 +        "strh    %[bits], [%[cc], %[bits_off]]     \n\t"
 +        "rev     %[ptr], %[ptr]                    \n\t"
 +        "lsl     %[tmp1], %[tmp1], #23             \n\t"
++#if CONFIG_THUMB
++        "lsl     %[val], %[n]                      \n\t"
++        "sub     %[val], %[tmp1]                   \n\t"
++#else
 +        "rsb     %[val], %[tmp1], %[val], lsl %[n] \n\t"
++#endif
 +        "lsl     %[ptr], %[ptr], %[tmp2]           \n\t"
 +        "orr     %[val], %[val], %[ptr], lsr #9    \n\t"
 +        "str     %[val], [%[cc], %[low_off]]       \n\t"
@@ -1257,6 +1336,7 @@ index 0000000000..9c317c713b
 +        "ldrh    %[tmp2], [%[cc], %[by22_bits_off]]           \n\t"
 +        "ldr     %[ptr], [%[cc], %[ptr_off]]                  \n\t"
 +        "cmp     %[prefix], #0                                \n\t"
++        "it      ne                                           \n\t"
 +        "umullne %[prefix], %[remain], %[prefix], %[remain]   \n\t"
 +        "ldrh    %[range], [%[cc], %[by22_range_off]]         \n\t"
 +        "lsl     %[remain], %[remain], #1                     \n\t"
@@ -1277,10 +1357,20 @@ index 0000000000..9c317c713b
 +        "rsb     %[tmp2], %[rice], #31                        \n\t"
 +        "lsl     %[remain], %[remain], %[prefix]              \n\t"
 +        "lsl     %[n2], %[n2], #23                            \n\t"
++#if CONFIG_THUMB
++        "lsl     %[range], %[n1]                              \n\t"
++        "sub     %[range], %[n2]                              \n\t"
++#else
 +        "rsb     %[range], %[n2], %[range], lsl %[n1]         \n\t"
++#endif
 +        "rev     %[ptr], %[ptr]                               \n\t"
 +        "lsl     %[n2], %[prefix], %[rice]                    \n\t"
++#if CONFIG_THUMB
++        "lsr     %[remain], %[tmp2]                           \n\t"
++        "add     %[remain], %[n2]                             \n\t"
++#else
 +        "add     %[remain], %[n2], %[remain], lsr %[tmp2]     \n\t"
++#endif
 +        "b       3f                                           \n\t"
 +        "1:                                                   \n\t"
 +        "add     %[n2], %[rice], %[prefix], lsl #1            \n\t"
@@ -1303,15 +1393,30 @@ index 0000000000..9c317c713b
 +        "rev     %[ptr], %[ptr]                               \n\t"
 +        "lsl     %[n2], %[n2], #23                            \n\t"
 +        "mov     %[range], #2                                 \n\t"
++#if CONFIG_THUMB
++        "lsl     %[tmp2], %[n1]                               \n\t"
++        "sub     %[tmp2], %[n2]                               \n\t"
++#else
 +        "rsb     %[tmp2], %[n2], %[tmp2], lsl %[n1]           \n\t"
++#endif
 +        "lsl     %[ptr], %[ptr], %[tmp1]                      \n\t"
 +        "lsl     %[rice], %[range], %[rice]                   \n\t"
 +        "orr     %[range], %[tmp2], %[ptr], lsr #9            \n\t"
++#if CONFIG_THUMB
++        "lsr     %[remain], %[prefix]                         \n\t"
++        "add     %[remain], %[rice]                           \n\t"
++#else
 +        "add     %[remain], %[rice], %[remain], lsr %[prefix] \n\t"
++#endif
 +        "b       4f                                           \n\t"
 +        "2:                                                   \n\t"
 +        "add     %[n1], %[tmp2], %[prefix]                    \n\t"
++#if CONFIG_THUMB
++        "add     %[tmp2], %[ptr], %[n1], lsr #3               \n\t"
++        "ldr     %[tmp2], [%[tmp2]]                           \n\t"
++#else
 +        "ldr     %[tmp2], [%[ptr], %[n1], lsr #3]             \n\t"
++#endif
 +        "rsb     %[tmp1], %[prefix], #32                      \n\t"
 +        "push    {%[rice]}                                    \n\t"
 +        "and     %[rice], %[n1], #7                           \n\t"
@@ -1324,16 +1429,27 @@ index 0000000000..9c317c713b
 +        "lsl     %[rice], %[tmp2], %[rice]                    \n\t"
 +        "sub     %[tmp2], %[n2], #2                           \n\t"
 +        "lsl     %[remain], %[remain], #23                    \n\t"
++#if CONFIG_THUMB
++        "lsl     %[ptr], %[prefix]                            \n\t"
++        "rsb     %[remain], %[ptr]                            \n\t"
++#else
 +        "rsb     %[remain], %[remain], %[ptr], lsl %[prefix]  \n\t"
++#endif
 +        "orr     %[remain], %[remain], %[rice], lsr #9        \n\t"
 +        "add     %[prefix], %[n1], %[tmp2]                    \n\t"
 +        "bic     %[n1], %[remain], #1                         \n\t"
 +        "ldr     %[ptr], [%[cc], %[ptr_off]]                  \n\t"
 +        "cmp     %[tmp1], #0                                  \n\t"
 +        "rsb     %[rice], %[tmp2], #32                        \n\t"
++        "it      ne                                           \n\t"
 +        "umullne %[tmp1], %[n1], %[tmp1], %[n1]               \n\t"
 +        "and     %[tmp1], %[prefix], #7                       \n\t"
++#if CONFIG_THUMB
++        "add     %[ptr], %[ptr], %[prefix], lsr #3            \n\t"
++        "ldr     %[ptr], [%[ptr]]                             \n\t"
++#else
 +        "ldr     %[ptr], [%[ptr], %[prefix], lsr #3]          \n\t"
++#endif
 +        "lsl     %[n1], %[n1], #1                             \n\t"
 +        "lsr     %[rice], %[n1], %[rice]                      \n\t"
 +        "rsb     %[n2], %[n2], #34                            \n\t"
@@ -1344,9 +1460,19 @@ index 0000000000..9c317c713b
 +        "strh    %[prefix], [%[cc], %[by22_bits_off]]         \n\t"
 +        "mov     %[prefix], #2                                \n\t"
 +        "lsl     %[range], %[range], #23                      \n\t"
++#if CONFIG_THUMB
++        "lsl     %[remain], %[tmp2]                           \n\t"
++        "rsb     %[range], %[remain]                          \n\t"
++#else
 +        "rsb     %[range], %[range], %[remain], lsl %[tmp2]   \n\t"
++#endif
 +        "lsl     %[remain], %[prefix], %[rice]                \n\t"
++#if CONFIG_THUMB
++        "lsr     %[n1], %[n2]                                 \n\t"
++        "add     %[remain], %[n1]                             \n\t"
++#else
 +        "add     %[remain], %[remain], %[n1], lsr %[n2]       \n\t"
++#endif
 +        "3:                                                   \n\t"
 +        "lsl     %[ptr], %[ptr], %[tmp1]                      \n\t"
 +        "orr     %[range], %[range], %[ptr], lsr #9           \n\t"
@@ -1381,10 +1507,10 @@ index 0000000000..9c317c713b
 +#endif /* AVCODEC_ARM_HEVC_CABAC_H */
 diff --git a/libavcodec/arm/rpi_hevc_idct_fn_neon.S b/libavcodec/arm/rpi_hevc_idct_fn_neon.S
 new file mode 100644
-index 0000000000..c8d68f7294
+index 0000000000..0211e447a8
 --- /dev/null
 +++ b/libavcodec/arm/rpi_hevc_idct_fn_neon.S
-@@ -0,0 +1,181 @@
+@@ -0,0 +1,161 @@
 +@ Included multiple times from hevc_idct_neon.S
 +@ Macros defined there
 +
@@ -1404,113 +1530,93 @@ index 0000000000..c8d68f7294
 +
 +function JOIN(ff_hevc_rpi_idct_8x8_dc_neon_, BIT_DEPTH), export=1
 +        ldrsh       r1, [r0]
++        add         r2, r0, #32
++        mov         r3, #64
 +        add         r1, #DC_ADD
 +        asr         r1, #DC_SHIFT
 +        vdup.16     q8, r1
 +        vdup.16     q9, r1
-+        vmov.16     q10, q8
-+        vmov.16     q11, q8
-+        vmov.16     q12, q8
-+        vmov.16     q13, q8
-+        vmov.16     q14, q8
-+        vmov.16     q15, q8
-+        vstm        r0, {q8-q15}
++        vst1.16     {q8, q9}, [r0], r3
++        vst1.16     {q8, q9}, [r2], r3
++        vst1.16     {q8, q9}, [r0]
++        vst1.16     {q8, q9}, [r2]
 +        bx lr
 +endfunc
 +
 +function JOIN(ff_hevc_rpi_idct_16x16_dc_neon_, BIT_DEPTH), export=1
 +        ldrsh       r1, [r0]
++        add         r2, r0, #32
++        mov         r3, #64
 +        add         r1, #DC_ADD
++        mov         ip, #16*16
 +        asr         r1, #DC_SHIFT
 +        vdup.16     q8, r1
 +        vdup.16     q9, r1
-+        vmov.16     q10, q8
-+        vmov.16     q11, q8
-+        vmov.16     q12, q8
-+        vmov.16     q13, q8
-+        vmov.16     q14, q8
-+        vmov.16     q15, q8
-+        vstm        r0!, {q8-q15}
-+        vstm        r0!, {q8-q15}
-+        vstm        r0!, {q8-q15}
-+        vstm        r0, {q8-q15}
++1:      vst1.16     {q8, q9}, [r0], r3
++        subs        ip, ip, #32
++        vst1.16     {q8, q9}, [r2], r3
++        bhi         1b
 +        bx lr
 +endfunc
 +
 +function JOIN(ff_hevc_rpi_idct_32x32_dc_neon_, BIT_DEPTH), export=1
 +        ldrsh       r1, [r0]
++        add         r2, r0, #32
++        mov         r3, #64
 +        add         r1, #DC_ADD
++        mov         ip, #32*32
 +        asr         r1, #DC_SHIFT
-+        mov         r3, #16
 +        vdup.16     q8, r1
 +        vdup.16     q9, r1
-+        vmov.16     q10, q8
-+        vmov.16     q11, q8
-+        vmov.16     q12, q8
-+        vmov.16     q13, q8
-+        vmov.16     q14, q8
-+        vmov.16     q15, q8
-+1:      subs        r3, #1
-+        vstm        r0!, {q8-q15}
-+        bne         1b
++1:      vst1.16     {q8, q9}, [r0], r3
++        subs        ip, ip, #32
++        vst1.16     {q8, q9}, [r2], r3
++        bhi         1b
 +        bx lr
 +endfunc
 +
 +
 +function JOIN(ff_hevc_rpi_transform_4x4_neon_, BIT_DEPTH), export=1
-+        vpush       {d8-d15}
-+        vld1.16     {q14, q15}, [r0]  // coeffs
-+        ldr         r3, =0x00240053 // 36 and 83
-+        vmov.32     d0[0], r3
++        vldr.i32    s0, =0x00240053 // 36 and 83
++        vld1.16     {q14, q15}, [r0 :256]  // coeffs
 +
-+        tr4_shift d28, d29, d30, d31, #7
++        tr4_shift   #7
 +
-+        vtrn.16     d28, d29
-+        vtrn.16     d30, d31
-+        vtrn.32     q14, q15
++        vzip.16     d28, d29
++        vzip.16     d30, d31
++        vzip.32     q14, q15
 +
-+        tr4_shift d28, d29, d30, d31, #(TRN_SHIFT)
++        tr4_shift   #TRN_SHIFT
 +
-+        vtrn.16     d28, d29
-+        vtrn.16     d30, d31
-+        vtrn.32     q14, q15
-+
-+        vst1.16     {q14, q15}, [r0]
-+        vpop        {d8-d15}
++        vst4.16     {q14, q15}, [r0 :256]
 +        bx lr
++
++        .ltorg
 +endfunc
 +
 +
 +
 +function JOIN(ff_hevc_rpi_transform_luma_4x4_neon_, BIT_DEPTH), export=1
-+        vpush       {d8-d15}
-+        vld1.16     {q14, q15}, [r0]  // coeffs
-+        ldr         r3, =0x4a  // 74
-+        vmov.32     d0[0], r3
-+        ldr         r3, =0x1d  // 29
-+        vmov.32     d0[1], r3
-+        ldr         r3, =0x37  // 55
-+        vmov.32     d1[0], r3
++        vmov.i32    d0, #0x4a  // 74
++        vld1.16     {q14, q15}, [r0 :256]  // coeffs
++        vmov.i32    d1, #0x1d  // 29
++        vmov.i32    d2, #0x37  // 55
 +
-+        tr4_luma_shift d28, d29, d30, d31, #7
++        tr4_luma_shift #7
 +
-+        vtrn.16     d28, d29
-+        vtrn.16     d30, d31
-+        vtrn.32     q14, q15
++        vzip.16     d28, d29
++        vzip.16     d30, d31
++        vzip.32     q14, q15
 +
-+        tr4_luma_shift d28, d29, d30, d31, #(TRN_SHIFT)
++        tr4_luma_shift #TRN_SHIFT
 +
-+        vtrn.16     d28, d29
-+        vtrn.16     d30, d31
-+        vtrn.32     q14, q15
-+        vst1.16     {q14, q15}, [r0]
-+        vpop        {d8-d15}
++        vst4.16     {q14, q15}, [r0 :256]
 +        bx lr
 +endfunc
 +
 +function JOIN(ff_hevc_rpi_transform_8x8_neon_, BIT_DEPTH), export=1
 +        add      r2, r0, #16
-+        adrl     r3, tr4f
++        adr      r3, tr4f
 +        vpush    {d8-d15}
 +        vld1.16  {d0, d1}, [r3]
 +        mov      r3, #32
@@ -1630,314 +1736,12 @@ index 0000000000..62b9326532
 +void ff_hevcdsp_rpi_init_neon(HEVCDSPContext *c, const int bit_depth);
 +
 +#endif /* AVCODEC_ARM_HEVCDSP_ARM_H */
-diff --git a/libavcodec/arm/rpi_hevcdsp_cres_neon.S b/libavcodec/arm/rpi_hevcdsp_cres_neon.S
-new file mode 100644
-index 0000000000..883cde35dc
---- /dev/null
-+++ b/libavcodec/arm/rpi_hevcdsp_cres_neon.S
-@@ -0,0 +1,296 @@
-+#include "libavutil/arm/asm.S"
-+#include "neon.S"
-+
-+@ General notes:
-+@
-+@ Residual is only guaranteed to be cliped to 16 bits
-+@ This means that we do need to do movul, qadd, qmovun
-+@ rather than addw, qmovun (if we were clipped to 15 then we could get away
-+@ with this)
-+
-+@ ============================================================================
-+@ U add
-+
-+@ add_residual4x4_c(
-+@   uint8_t *_dst,        [r0]
-+@   const int16_t *res,   [r1]
-+@   ptrdiff_t stride,     [r2]
-+@   int dc_v)             [r3]
-+
-+function ff_hevc_rpi_add_residual_4x4_u_neon_8, export=1
-+        vld1.8      {d16}, [r0, :64], r2
-+        vld1.8      {d17}, [r0, :64], r2
-+        vld1.8      {d18}, [r0, :64], r2
-+        vld1.8      {d19}, [r0, :64], r2
-+        vld1.16     {q0, q1}, [r1]
-+        vdup.16     q2, r3
-+        vdup.16     q3, r3
-+        vmovl.u8    q10, d16
-+        sub         r0, r0, r2, lsl #2
-+        vmovl.u8    q11, d17
-+        vmovl.u8    q12, d18
-+        vmovl.u8    q13, d19
-+        vzip.16     q0, q2
-+        vzip.16     q1, q3
-+        vqadd.s16   q0,  q10
-+        vqadd.s16   q2,  q11
-+        vqadd.s16   q1,  q12
-+        vqadd.s16   q3,  q13
-+        vqmovun.s16 d0,  q0
-+        vqmovun.s16 d1,  q2
-+        vqmovun.s16 d2,  q1
-+        vqmovun.s16 d3,  q3
-+        vst1.8      {d0}, [r0, :64], r2
-+        vst1.8      {d1}, [r0, :64], r2
-+        vst1.8      {d2}, [r0, :64], r2
-+        vst1.8      {d3}, [r0, :64]
-+        bx          lr
-+endfunc
-+
-+@ add_residual8x8_c(
-+@   uint8_t *_dst,        [r0]
-+@   const int16_t *res,   [r1]
-+@   ptrdiff_t stride)     [r2]
-+@   int dc_v)             [r3]
-+
-+function ff_hevc_rpi_add_residual_8x8_u_neon_8, export=1
-+        mov         r12,    #4
-+        vdup.16     q15, r3
-+1:
-+        vld2.8      {d16, d17}, [r0, :128], r2
-+        vld2.8      {d18, d19}, [r0, :128]
-+        vld1.16     {q0, q1}, [r1, :256]!
-+        subs        r12, #1
-+        vmovl.u8    q10, d16
-+        sub         r0, r2
-+        vmovl.u8    q11, d18
-+        vqadd.s16   q0,  q10
-+        vaddw.u8    q2,  q15, d17
-+        vqadd.s16   q1,  q11
-+        vaddw.u8    q3,  q15, d19
-+        vqmovun.s16 d16,  q0
-+        vqmovun.s16 d17,  q2
-+        vqmovun.s16 d18,  q1
-+        vqmovun.s16 d19,  q3
-+        vst2.8      {d16, d17}, [r0, :128], r2
-+        vst2.8      {d18, d19}, [r0, :128], r2
-+        bne         1b
-+        bx          lr
-+endfunc
-+
-+@ add_residual16x16_u(
-+@   uint8_t *_dst,        [r0]
-+@   const int16_t *res,   [r1]
-+@   ptrdiff_t stride)     [r2]
-+@   int dc_v)             [r3]
-+
-+function ff_hevc_rpi_add_residual_16x16_u_neon_8, export=1
-+        mov         r12,    #16
-+        vdup.16     q15, r3
-+1:
-+        vld2.8      {q8, q9}, [r0, :256]
-+        vld1.16     {q0, q1}, [r1, :256]!
-+        subs        r12,   #1
-+        vmovl.u8    q10, d16
-+        vmovl.u8    q11, d17
-+        vqadd.s16   q0,  q10
-+        vqadd.s16   q1,  q11
-+        vaddw.u8    q2,  q15, d18
-+        vaddw.u8    q3,  q15, d19
-+        vqmovun.s16 d16, q0
-+        vqmovun.s16 d17, q1
-+        vqmovun.s16 d18, q2
-+        vqmovun.s16 d19, q3
-+        vst2.8      {q8, q9}, [r0, :256], r2
-+        bne         1b
-+        bx          lr
-+endfunc
-+
-+@ ============================================================================
-+@ V add
-+
-+@ add_residual4x4_v(
-+@   uint8_t *_dst,        [r0]
-+@   const int16_t *res,   [r1]
-+@   ptrdiff_t stride)     [r2]
-+
-+function ff_hevc_rpi_add_residual_4x4_v_neon_8, export=1
-+        vld1.8      {d16}, [r0, :64], r2
-+        vld1.8      {d17}, [r0, :64], r2
-+        vld1.8      {d18}, [r0, :64], r2
-+        vld1.8      {d19}, [r0, :64], r2
-+        vld1.16     {q2, q3}, [r1]
-+        vdup.16     q0, r3
-+        vdup.16     q1, r3
-+        vmovl.u8    q10, d16
-+        sub         r0, r0, r2, lsl #2
-+        vmovl.u8    q11, d17
-+        vmovl.u8    q12, d18
-+        vmovl.u8    q13, d19
-+        vzip.16     q0, q2
-+        vzip.16     q1, q3
-+        vqadd.s16   q0,  q10
-+        vqadd.s16   q2,  q11
-+        vqadd.s16   q1,  q12
-+        vqadd.s16   q3,  q13
-+        vqmovun.s16 d0,  q0
-+        vqmovun.s16 d1,  q2
-+        vqmovun.s16 d2,  q1
-+        vqmovun.s16 d3,  q3
-+        vst1.8      {d0}, [r0, :64], r2
-+        vst1.8      {d1}, [r0, :64], r2
-+        vst1.8      {d2}, [r0, :64], r2
-+        vst1.8      {d3}, [r0, :64]
-+        bx          lr
-+endfunc
-+
-+@ add_residual8x8_v(
-+@   uint8_t *_dst,        [r0]
-+@   const int16_t *res,   [r1]
-+@   ptrdiff_t stride)     [r2]
-+
-+function ff_hevc_rpi_add_residual_8x8_v_neon_8, export=1
-+        mov         r12,    #4
-+        vdup.16     q15, r3
-+1:
-+        vld2.8      {d16, d17}, [r0, :128], r2
-+        vld2.8      {d18, d19}, [r0, :128]
-+        vld1.16     {q0, q1}, [r1, :256]!
-+        subs        r12, #1
-+        vmovl.u8    q10, d17
-+        sub         r0, r2
-+        vmovl.u8    q11, d19
-+        vqadd.s16   q0,  q10
-+        vqadd.s16   q1,  q11
-+        vaddw.u8    q2,  q15, d16
-+        vaddw.u8    q3,  q15, d18
-+        vqmovun.s16 d17,  q0
-+        vqmovun.s16 d16,  q2
-+        vqmovun.s16 d19,  q1
-+        vqmovun.s16 d18,  q3
-+        vst2.8      {d16, d17}, [r0, :128], r2
-+        vst2.8      {d18, d19}, [r0, :128], r2
-+        bne         1b
-+        bx          lr
-+endfunc
-+
-+@ add_residual16x16_v(
-+@   uint8_t *_dst,        [r0]
-+@   const int16_t *res,   [r1]
-+@   ptrdiff_t stride)     [r2]
-+
-+function ff_hevc_rpi_add_residual_16x16_v_neon_8, export=1
-+        mov         r12,    #16
-+        vdup.16     q15, r3
-+1:
-+        vld2.8      {q8, q9}, [r0, :256]
-+        vld1.16     {q0, q1}, [r1, :256]!
-+        subs        r12,   #1
-+        vmovl.u8    q10, d18
-+        vmovl.u8    q11, d19
-+        vaddw.u8    q2,  q15, d16
-+        vaddw.u8    q3,  q15, d17
-+        vqadd.s16   q0,  q10
-+        vqadd.s16   q1,  q11
-+        vqmovun.s16 d16, q2
-+        vqmovun.s16 d17, q3
-+        vqmovun.s16 d18, q0
-+        vqmovun.s16 d19, q1
-+        vst2.8      {q8, q9}, [r0, :256], r2
-+        bne         1b
-+        bx          lr
-+endfunc
-+
-+@ ============================================================================
-+@ U & V add
-+
-+@ add_residual4x4_c(
-+@   uint8_t *_dst,        [r0]
-+@   const int16_t *res,   [r1]
-+@   ptrdiff_t stride)     [r2]
-+
-+function ff_hevc_rpi_add_residual_4x4_c_neon_8, export=1
-+        vld1.8      {d16}, [r0, :64], r2
-+        vld1.8      {d17}, [r0, :64], r2
-+        vld1.8      {d18}, [r0, :64], r2
-+        vld1.8      {d19}, [r0, :64], r2
-+        vldm        r1, {q0-q3}           @ Q0/1 gets all of U, Q2/3 gets all of V
-+        vmovl.u8    q10, d16
-+        sub         r0, r0, r2, lsl #2
-+        vmovl.u8    q11, d17
-+        vmovl.u8    q12, d18
-+        vmovl.u8    q13, d19
-+        vzip.16     q0, q2
-+        vzip.16     q1, q3
-+        vqadd.s16   q0,  q10
-+        vqadd.s16   q2,  q11
-+        vqadd.s16   q1,  q12
-+        vqadd.s16   q3,  q13
-+        vqmovun.s16 d0,  q0
-+        vqmovun.s16 d1,  q2
-+        vqmovun.s16 d2,  q1
-+        vqmovun.s16 d3,  q3
-+        vst1.8      {d0}, [r0, :64], r2
-+        vst1.8      {d1}, [r0, :64], r2
-+        vst1.8      {d2}, [r0, :64], r2
-+        vst1.8      {d3}, [r0, :64]
-+        bx          lr
-+endfunc
-+
-+@ add_residual8x8_c(
-+@   uint8_t *_dst,        [r0]
-+@   const int16_t *res,   [r1]
-+@   ptrdiff_t stride)     [r2]
-+
-+function ff_hevc_rpi_add_residual_8x8_c_neon_8, export=1
-+        mov         r12,    #8
-+        add         r3, r1, #(8*8*2)  @ Offset to V
-+1:
-+        vld2.8      {d16, d17}, [r0, :128]
-+        vld1.16     {q0}, [r1, :128]!
-+        vld1.16     {q1}, [r3, :128]!
-+        subs        r12, #1
-+        vmovl.u8    q10, d16
-+        vmovl.u8    q11, d17
-+        vqadd.s16   q0,  q10
-+        vqadd.s16   q1,  q11
-+        vqmovun.s16 d0,  q0
-+        vqmovun.s16 d1,  q1
-+        vst2.8      {d0, d1}, [r0, :128], r2
-+        bne         1b
-+        bx          lr
-+endfunc
-+
-+@ add_residual16x16_c(
-+@   uint8_t *_dst,        [r0]
-+@   const int16_t *res,   [r1]
-+@   ptrdiff_t stride)     [r2]
-+
-+function ff_hevc_rpi_add_residual_16x16_c_neon_8, export=1
-+        mov         r12,    #16
-+        add         r3, r1, #(16*16*2)  @ Offset to V
-+1:
-+        vld2.8      {q8, q9}, [r0, :256]
-+        vld1.16     {q0, q1}, [r1, :256]!
-+        vld1.16     {q2, q3}, [r3, :256]!
-+        subs        r12,   #1
-+        vmovl.u8    q10, d16
-+        vmovl.u8    q11, d17
-+        vmovl.u8    q12, d18
-+        vmovl.u8    q13, d19
-+        vqadd.s16   q0,  q10
-+        vqadd.s16   q1,  q11
-+        vqadd.s16   q2,  q12
-+        vqadd.s16   q3,  q13
-+        vqmovun.s16 d0,  q0
-+        vqmovun.s16 d1,  q1
-+        vqmovun.s16 d2,  q2
-+        vqmovun.s16 d3,  q3
-+        vst2.8      {q0, q1}, [r0, :256], r2
-+        bne         1b
-+        bx          lr
-+endfunc
-+
-+@ 32x32 chroma never occurs so NIF
-+
-+@ ============================================================================
 diff --git a/libavcodec/arm/rpi_hevcdsp_deblock_neon.S b/libavcodec/arm/rpi_hevcdsp_deblock_neon.S
 new file mode 100644
-index 0000000000..d691cda836
+index 0000000000..e665bd848a
 --- /dev/null
 +++ b/libavcodec/arm/rpi_hevcdsp_deblock_neon.S
-@@ -0,0 +1,1483 @@
+@@ -0,0 +1,1249 @@
 +/*
 + * Copyright (c) 2014 Seppo Tomperi <seppo.tomperi@vtt.fi>
 + *
@@ -1962,80 +1766,29 @@ index 0000000000..d691cda836
 +#include "libavutil/arm/asm.S"
 +#include "neon.S"
 +
-+.macro hevc_loop_filter_chroma_start
-+        ldr      r12, [r2]
-+        ldr      r2, [r2, #4]
-+        orrs     r2, r12, r2, lsl #16
-+        it       eq
-+        bxeq     lr
-+.endm
-+
-+@ Uses: d2, d4, d18, d19
-+@ Returns: d2, d4
-+@ Modifies: d0-d7, d22-d25, r12
-+
-+.macro hevc_loop_filter_chroma_body P1, P0, Q0, Q1
-+        vsubl.u8  q0, \Q0, \P0
-+        vsubl.u8  q1, \P1, \Q1
++.macro hevc_loop_filter_uv_body1 P1a, P0a, Q0a, Q1a
++        vsubl.u8  q0, \Q0a, \P0a
++        vsubl.u8  q2, \P1a, \Q1a
++        vshl.i16  q0, #2
++        vadd.i16  q0, q2
 +        vdup.16   d4, r2
-+        lsr       r2, r2, #16
-+        vshl.i16  q0, #2
-+        ldr       r12, [sp, #0] @ r12 = &no_q
-+        vadd.i16  q0, q1
-+        ldrh      r3, [r3]      @ r3[0:8] = no_p[0], r3[8:15] = no_p[1]
-+        vdup.16   d5, r2
 +
-+        vrshr.s16 q0, q0, #3
-+        ldrh      r12, [r12]
-+        vneg.s16  q3, q2
-+        vmin.s16  q0, q0, q2
-+        vmovl.u8  q2, \Q0
-+        vmax.s16  q0, q0, q3
-+        vaddw.u8  q1, q0, \P0
++        vrshr.s16 q0, #3
++        vmovl.u8  q2, d4
++
++        vmin.s16  q0, q2
++        vneg.s16  q2, q2
++        vmax.s16  q0, q2
++        vaddw.u8  q2, q0, \P0a
++
++        vqmovun.s16 \P0a, q2
++        vmovl.u8  q2, \Q0a
 +        vsub.i16  q2, q0
-+        orrs      r12, r3, r12, lsl #16  @ So should have b1:no_p[0], b9:no_p[1], b17: no_q[0], b25:no_q[1]
-+        vqmovun.s16 \P0, q1
-+        vqmovun.s16 \Q0, q2
-+.endm
 +
-+@ Uses r2 (tc a;b)
-+@ Modifies: q0-q3
-+@ On exit
-+@   r12 (and flags) contain no_p;no_q
-+.macro hevc_loop_filter_chroma_body_16 P1, P0, Q0, Q1, bit_depth
-+        vsub.i16  q0, \Q0, \P0
-+        lsl       r12, r2, #(\bit_depth - 8)
-+        vsub.i16  q1, \P1, \Q1
-+        vshl.i16  q0, #2
-+        vdup.16   d4, r12
-+        lsr       r12, r12, #16
-+        vadd.i16  q0, q1
-+        ldrh      r3, [r3]
-+        vdup.16   d5, r12
-+
-+        vrshr.s16 q0, q0, #3
-+        vneg.s16  q3, q2
-+        movw      r12, #(1 << \bit_depth) - 1
-+        vmin.s16  q0, q0, q2
-+        vmax.s16  q0, q0, q3
-+        vdup.i16  q3, r12
-+        ldr       r12, [sp, #0]
-+
-+        vadd.i16  \P0, q0, \P0
-+        vsub.i16  \Q0, q0
-+
-+        vmov.i64  q2, #0
-+        ldrh      r12, [r12]
-+        vmin.s16  \P0, q3
-+        vmin.s16  \Q0, q3
-+        orrs      r12, r3, r12, lsl #16  @ So should have b1:no_p[0], b9:no_p[1], b17: no_q[0], b25:no_q[1]
-+        vmax.s16  \P0, q2
-+        vmax.s16  \Q0, q2
++        vqmovun.s16 \Q0a, q2
 +.endm
 +
 +
-+@ Preserves r12
-+@ Clobbers r2
 +.macro hevc_loop_filter_uv_body2 P1u, P1v, P0u, P0v, Q0u, Q0v, Q1u, Q1v
 +        vsubl.u8  q0, \Q0u, \P0u
 +        vsubl.u8  q1, \Q0v, \P0v
@@ -2048,14 +1801,11 @@ index 0000000000..d691cda836
 +        lsr       r2, #16
 +        vadd.i16  q1, q3
 +
-+        @ r2[0:7] -> d4.16 (all), r2[8:15] -> d5.16(all)
 +        vrshr.s16 q0, #3
 +        vdup.16   d6, r2
 +        vmovl.u8  q2, d4
 +        vmovl.u8  q3, d6
-+        vuzp.16   d4, d5
 +        vrshr.s16 q1, #3
-+        vuzp.16   d6, d7
 +
 +        vmin.s16  q0, q2
 +        vneg.s16  q2, q2
@@ -2077,28 +1827,65 @@ index 0000000000..d691cda836
 +        vqmovun.s16 \Q0v, q3
 +.endm
 +
++
 +@ Preserves r12
 +@ Clobbers r2
-+.macro hevc_loop_filter_uv_body2_16 P1u, P1v, P0u, P0v, Q0u, Q0v, Q1u, Q1v, bit_depth
-+        vsub.i16  q0, \Q0u, \P0u
-+        vsub.i16  q1, \Q0v, \P0v
-+        vsub.i16  q2, \P1u, \Q1u
-+        vsub.i16  q3, \P1v, \Q1v
++@ P0a et al all contain UVUVUVUV
++@ r2 (tc4) contains
++@   [0..7]   tc U a
++@   [8..15]  tc V a
++
++.macro hevc_loop_filter_uv_body1_16 P1a, P0a, Q0a, Q1a, bit_depth
++        vsub.i16  q0, \Q0a, \P0a
++        vsub.i16  q2, \P1a, \Q1a
++        vshl.i16  q0, #2
++        vadd.i16  q0, q2
++        vrshr.s16 q0, #3
++
++        vdup.16   d4, r2
++        vshll.u8  q2, d4, #\bit_depth - 8
++
++        movw      r2, #(1 << \bit_depth) - 1
++        vmin.s16  q0, q2
++        vneg.s16  q2, q2
++        vmax.s16  q0, q2
++        vmov.i64  q2, #0
++        vdup.i16  q3, r2
++        vadd.i16  \P0a, q0
++        vsub.i16  \Q0a, q0
++
++        vmax.s16  \P0a, q2
++        vmax.s16  \Q0a, q2
++        vmin.s16  \P0a, q3
++        vmin.s16  \Q0a, q3
++.endm
++
++@ Preserves r12
++@ Clobbers r2
++@ P0a et al all contain UVUVUVUV
++@ r2 (tc4) contains
++@   [0..7]   tc U a
++@   [8..15]  tc V a
++@  [16..23]  tc U b
++@  [24..31]  tc V b
++
++.macro hevc_loop_filter_uv_body2_16 P1a, P1b, P0a, P0b, Q0a, Q0b, Q1a, Q1b, bit_depth
++        vsub.i16  q0, \Q0a, \P0a
++        vsub.i16  q1, \Q0b, \P0b
++        vsub.i16  q2, \P1a, \Q1a
++        vsub.i16  q3, \P1b, \Q1b
 +        vshl.i16  q0, #2
 +        vshl.i16  q1, #2
 +        vadd.i16  q0, q2
++        vrshr.s16 q0, #3
++        vadd.i16  q1, q3
++        vrshr.s16 q1, #3
++
 +        vdup.16   d4, r2
 +        lsr       r2, #16
-+        vadd.i16  q1, q3
-+
-+        @ r2[0:7] -> d4.16 (all), r2[8:15] -> d5.16(all)
-+        vrshr.s16 q0, #3
 +        vdup.16   d6, r2
 +        vshll.u8  q2, d4, #\bit_depth - 8
 +        vshll.u8  q3, d6, #\bit_depth - 8
-+        vuzp.16   d4, d5
-+        vrshr.s16 q1, #3
-+        vuzp.16   d6, d7
 +
 +        movw      r2, #(1 << \bit_depth) - 1
 +        vmin.s16  q0, q2
@@ -2109,52 +1896,59 @@ index 0000000000..d691cda836
 +        vmov.i64  q2, #0
 +        vmax.s16  q1, q3
 +        vdup.i16  q3, r2
-+        vadd.i16  \P0u, q0
-+        vsub.i16  \Q0u, q0
-+        vadd.i16  \P0v, q1
-+        vsub.i16  \Q0v, q1
++        vadd.i16  \P0a, q0
++        vsub.i16  \Q0a, q0
++        vadd.i16  \P0b, q1
++        vsub.i16  \Q0b, q1
 +
-+        vmax.s16  \P0u, q2
-+        vmax.s16  \Q0u, q2
-+        vmax.s16  \P0v, q2
-+        vmax.s16  \Q0v, q2
-+        vmin.s16  \P0u, q3
-+        vmin.s16  \Q0u, q3
-+        vmin.s16  \P0v, q3
-+        vmin.s16  \Q0v, q3
++        vmax.s16  \P0a, q2
++        vmax.s16  \Q0a, q2
++        vmax.s16  \P0b, q2
++        vmax.s16  \Q0b, q2
++        vmin.s16  \P0a, q3
++        vmin.s16  \Q0a, q3
++        vmin.s16  \P0b, q3
++        vmin.s16  \Q0b, q3
 +.endm
 +
 +
 +
++@   uint8_t *_no_p,     [sp+0]
++@   uint8_t *_no_q)     [sp+4]
++
 +.macro hevc_loop_filter_luma_start
 +        ldr     r12, [r3]
 +        ldr      r3, [r3, #4]
 +        orrs     r3, r12, r3, lsl #16
 +        it       eq
 +        bxeq     lr
++        push     {r4-r10,lr}            @ 32 bytes
++        ldr      r5, [sp, #32]          @ &_no_p
++        ldrb     r10, [r5]
++        ldr      r5, [sp, #36]          @ &_no_q
++        ldrb     r5, [r5]
++        cmp      r10, #0
++        it ne
++        movne    r10, #1
++        cmp      r5, #0
++        it ne
++        orrne    r10, #2
 +.endm
 +
-+@ Uses: r2, r3, r12
-+@ Modifies: r5, r6, r7, r8, r9
-+
 +@ Input:
 +@  r2          beta    (raw: needs shift for bitdepth > 8)
 +@  r3[ 0:15]   tc[0]   (raw: needs shift for bitdepth > 8)
 +@  r3[16:31]   tc[1]   (raw: needs shift for bitdepth > 8)
-+@  [sp,#96]    &no_p[0]
-+@  [sp,#100]   &no_q[0]
 +@
 +@ Input & output
-+@  8-bit: d16-d23
++@  8-bit: d16-d23      (Q3,Q2,Q1,Q0,P0,P1,P2,P3)
 +@ 16-bit:  q8-q15
 +@
-+@ Output
-+@  Z           r10==0
-+@  r10[ 0:7 ]  no_p[0]
-+@  r10[ 8:15]  no_p[1]
-+@  r10[16:23]  no_q[0]
-+@  r10[24:31]  no_q[1]
-+
++@  r1         -r1
++@  r10        b1->C, b0->N  (r10 junk)
++@
++@ Junks:
++@  r5, r6, r7, r8, r9
 +
 +.macro m_filter_luma bit_depth
 +.if \bit_depth == 8
@@ -2176,26 +1970,21 @@ index 0000000000..d691cda836
 +        lsl        r3, r3, #(\bit_depth - 8)
 +.endif
 +        vsub.i16   q7, q10
-+        ldr        r5, [sp, #96]        @ Bolt no_x values together into r10
 +        vsub.i16   q6, q13
 +        vabd.s16   q7, q7, q10
 +        vabd.s16   q6, q6, q13
-+        ldrh       r10, [r5]
 +
 +        vdup.16    q0, r2
 +        vmov       q4, q7
 +        vmov       q5, q6
-+        ldr        r5, [sp, #100]
 +        vdup.16    d4, r3
 +        lsr        r3, r3, #16
 +        vtrn.16    q7, q4
-+        ldrh       r5, [r5]
 +        vtrn.16    q6, q5
 +
 +        vshl.u64   q7, #32
 +        vshr.u64   q4, #32
 +        vshl.u64   q6, #32
-+        orr        r10, r10, r5, lsl #16
 +        vshr.u64   q5, #32
 +        vshr.u64   q7, #32
 +        vshr.u64   q6, #32
@@ -2221,7 +2010,7 @@ index 0000000000..d691cda836
 +        vcgt.s16   q5, q1, q5
 +        vmov       r7, s12
 +        cmp        r7, #0
-+        beq        bypasswrite
++        beq        .Lbypasswrite
 +
 +        vpadd.i32  d0, d14, d12
 +        vpadd.i32  d1, d15, d13
@@ -2385,27 +2174,29 @@ index 0000000000..d691cda836
 +
 +2:
 +.if \bit_depth == 8
++        neg       r1, r1
 +        vqmovun.s16 d16, q8
-+        cmp       r10, #0
 +        vqmovun.s16 d17, q9
 +        vqmovun.s16 d18, q10
 +        vqmovun.s16 d19, q11
++        lsls      r10, #31
 +        vqmovun.s16 d20, q12
 +        vqmovun.s16 d21, q13
 +        vqmovun.s16 d22, q14
 +        vqmovun.s16 d23, q15
 +.else
-+        movw      r12, #(1 << \bit_depth - 1)
++        movw      r5, #(1 << \bit_depth - 1)
 +        vmov.i64  q0, #0
-+        vdup.i16  q1, r12
++        vdup.i16  q1, r5
 +        @ q8 & q15 should be unaltered and so don't require clipping
++        neg       r1, r1
 +        vmax.s16  q9,  q0
-+        cmp       r10, #0
 +        vmax.s16  q10, q0
 +        vmax.s16  q11, q0
 +        vmax.s16  q12, q0
 +        vmax.s16  q13, q0
 +        vmax.s16  q14, q0
++        lsls      r10, #31
 +        vmin.s16  q9,  q1
 +        vmin.s16  q10, q1
 +        vmin.s16  q11, q1
@@ -2420,16 +2211,6 @@ index 0000000000..d691cda836
 +        m_filter_luma 8
 +endfunc
 +
-+@ ff_hevc_rpi_v_loop_filter_luma2_neon(src (r0), stride (r1), beta (r2), tc (r3), np_p (sp[0]), no_q (sp[4]), src2 (sp[8]))
-+function ff_hevc_rpi_v_loop_filter_luma2_neon_8, export=1
-+        hevc_loop_filter_luma_start
-+        push     {r4-r10,lr}       @ 8 regs = 32 bytes
-+
-+        ldr      r4, [sp, #40]
-+        b        v_loop_luma_common
-+endfunc
-+
-+
 +@ void ff_hevc_rpi_v_loop_filter_luma_neon(
 +@   uint8_t *_pix,      [r0]
 +@   ptrdiff_t _stride,  [r1]
@@ -2438,13 +2219,30 @@ index 0000000000..d691cda836
 +@   uint8_t *_no_p,     [sp+0]
 +@   uint8_t *_no_q)     [sp+4]
 +
-+
 +function ff_hevc_rpi_v_loop_filter_luma_neon, export=1
 +        hevc_loop_filter_luma_start
-+        push     {r4-r10,lr}
 +
 +        sub      r4, r0, #4
-+v_loop_luma_common:
++        b        .Lv_loop_luma_common
++endfunc
++
++@ void ff_hevc_rpi_v_loop_filter2_luma_neon(
++@   uint8_t * pix_r,    [r0]
++@   ptrdiff_t _stride,  [r1]
++@   int _beta,          [r2]
++@   int tc2,            [r3]
++@   int no_f,           [sp+0]
++@   uint8_t * pix_l)    [sp+4]
++
++function ff_hevc_rpi_v_loop_filter_luma2_neon_8, export=1
++        cmp      r3, #0
++        it       eq
++        bxeq     lr
++        push     {r4-r10,lr}            @ 32 bytes
++        ldr      r4, [sp, #36]
++        ldr      r10, [sp, #32]
++
++.Lv_loop_luma_common:
 +        vpush    {d8-d15}
 +
 +        @ Uses slightly fewer instructions to do laned loads than unlaned
@@ -2476,43 +2274,32 @@ index 0000000000..d691cda836
 +
 +        bl hevc_loop_filter_luma_body
 +
-+        neg     r1, r1
-+
 +        @ no_p[1]
-+        tst     r10, #0xff00
-+        add     r2, r4, r1, lsl #2
-+        bne     1f
++        bmi     1f
 +        vst4.8  {d16[7],d17[7],d18[7],d19[7]}, [r4:32], r1
 +        vst4.8  {d16[6],d17[6],d18[6],d19[6]}, [r4:32], r1
 +        vst4.8  {d16[5],d17[5],d18[5],d19[5]}, [r4:32], r1
-+        vst4.8  {d16[4],d17[4],d18[4],d19[4]}, [r4:32]
-+1:
-+        @ no_p[0]
-+        tst     r10, #0xff
-+        bne     1f
-+        vst4.8  {d16[3],d17[3],d18[3],d19[3]}, [r2:32], r1
-+        vst4.8  {d16[2],d17[2],d18[2],d19[2]}, [r2:32], r1
-+        vst4.8  {d16[1],d17[1],d18[1],d19[1]}, [r2:32], r1
-+        vst4.8  {d16[0],d17[0],d18[0],d19[0]}, [r2:32]
++        vst4.8  {d16[4],d17[4],d18[4],d19[4]}, [r4:32], r1
++
++        vst4.8  {d16[3],d17[3],d18[3],d19[3]}, [r4:32], r1
++        vst4.8  {d16[2],d17[2],d18[2],d19[2]}, [r4:32], r1
++        vst4.8  {d16[1],d17[1],d18[1],d19[1]}, [r4:32], r1
++        vst4.8  {d16[0],d17[0],d18[0],d19[0]}, [r4:32]
 +1:
 +        @ no_q[1]
-+        tst     r10, #0xff000000
-+        add     r2, r0, r1, lsl #2
-+        bne     1f
++@        tst     r10, #2
++        bcs     1f
 +        vst4.8  {d20[7],d21[7],d22[7],d23[7]}, [r0:32], r1
 +        vst4.8  {d20[6],d21[6],d22[6],d23[6]}, [r0:32], r1
 +        vst4.8  {d20[5],d21[5],d22[5],d23[5]}, [r0:32], r1
-+        vst4.8  {d20[4],d21[4],d22[4],d23[4]}, [r0:32]
++        vst4.8  {d20[4],d21[4],d22[4],d23[4]}, [r0:32], r1
++
++        vst4.8  {d20[3],d21[3],d22[3],d23[3]}, [r0:32], r1
++        vst4.8  {d20[2],d21[2],d22[2],d23[2]}, [r0:32], r1
++        vst4.8  {d20[1],d21[1],d22[1],d23[1]}, [r0:32], r1
++        vst4.8  {d20[0],d21[0],d22[0],d23[0]}, [r0:32]
 +1:
-+        @ no_q[0]
-+        tst     r10, #0xff0000
-+        bne     1f
-+        vst4.8  {d20[3],d21[3],d22[3],d23[3]}, [r2:32], r1
-+        vst4.8  {d20[2],d21[2],d22[2],d23[2]}, [r2:32], r1
-+        vst4.8  {d20[1],d21[1],d22[1],d23[1]}, [r2:32], r1
-+        vst4.8  {d20[0],d21[0],d22[0],d23[0]}, [r2:32]
-+1:
-+bypasswrite:
++.Lbypasswrite:
 +        vpop     {d8-d15}
 +        pop      {r4-r10,pc}
 +endfunc
@@ -2549,41 +2336,27 @@ index 0000000000..d691cda836
 +
 +        bl hevc_loop_filter_luma_body_\bit_depth
 +
-+        neg     r1, r1
-+
 +        @ p[1]
-+        tst      r10, #0xff00
-+        add      r2, r4, r1, lsl #2
-+        bne      1f
++        bmi      1f
 +        vst4.16  {d17[3], d19[3], d21[3], d23[3]}, [r4], r1
 +        vst4.16  {d17[2], d19[2], d21[2], d23[2]}, [r4], r1
 +        vst4.16  {d17[1], d19[1], d21[1], d23[1]}, [r4], r1
-+        vst4.16  {d17[0], d19[0], d21[0], d23[0]}, [r4]
-+1:
-+        @ p[0]
-+        tst      r10, #0xff
-+        bne      1f
-+        vst4.16  {d16[3], d18[3], d20[3], d22[3]}, [r2], r1
-+        vst4.16  {d16[2], d18[2], d20[2], d22[2]}, [r2], r1
-+        vst4.16  {d16[1], d18[1], d20[1], d22[1]}, [r2], r1
-+        vst4.16  {d16[0], d18[0], d20[0], d22[0]}, [r2]
++        vst4.16  {d17[0], d19[0], d21[0], d23[0]}, [r4], r1
++        vst4.16  {d16[3], d18[3], d20[3], d22[3]}, [r4], r1
++        vst4.16  {d16[2], d18[2], d20[2], d22[2]}, [r4], r1
++        vst4.16  {d16[1], d18[1], d20[1], d22[1]}, [r4], r1
++        vst4.16  {d16[0], d18[0], d20[0], d22[0]}, [r4]
 +1:
 +        @ q[1]
-+        tst      r10, #0xff000000
-+        add      r2, r0, r1, lsl #2
-+        bne      1f
++        bcs      1f
 +        vst4.16  {d25[3], d27[3], d29[3], d31[3]}, [r0], r1
 +        vst4.16  {d25[2], d27[2], d29[2], d31[2]}, [r0], r1
 +        vst4.16  {d25[1], d27[1], d29[1], d31[1]}, [r0], r1
-+        vst4.16  {d25[0], d27[0], d29[0], d31[0]}, [r0]
-+1:
-+        @ q[0]
-+        tst      r10, #0xff0000
-+        bne      1f
-+        vst4.16  {d24[3], d26[3], d28[3], d30[3]}, [r2], r1
-+        vst4.16  {d24[2], d26[2], d28[2], d30[2]}, [r2], r1
-+        vst4.16  {d24[1], d26[1], d28[1], d30[1]}, [r2], r1
-+        vst4.16  {d24[0], d26[0], d28[0], d30[0]}, [r2]
++        vst4.16  {d25[0], d27[0], d29[0], d31[0]}, [r0], r1
++        vst4.16  {d24[3], d26[3], d28[3], d30[3]}, [r0], r1
++        vst4.16  {d24[2], d26[2], d28[2], d30[2]}, [r0], r1
++        vst4.16  {d24[1], d26[1], d28[1], d30[1]}, [r0], r1
++        vst4.16  {d24[0], d26[0], d28[0], d30[0]}, [r0]
 +1:
 +        vpop     {d8-d15}
 +        pop      {r4-r10,pc}
@@ -2603,8 +2376,17 @@ index 0000000000..d691cda836
 +
 +function ff_hevc_rpi_h_loop_filter_luma_neon, export=1
 +        hevc_loop_filter_luma_start
-+        push     {r4-r10,lr}
++        b        .Lh_loop_filter_luma_common_8
++endfunc
 +
++function ff_hevc_rpi_h_loop_filter_luma2_neon_8, export=1
++        cmp      r3, #0
++        it       eq
++        bxeq     lr
++        push     {r4-r10,lr}            @ 32 bytes
++        ldr      r10, [sp, #32]
++
++.Lh_loop_filter_luma_common_8:
 +        vpush    {d8-d15}
 +        sub      r0, r0, r1, lsl #2
 +
@@ -2619,68 +2401,28 @@ index 0000000000..d691cda836
 +
 +        bl hevc_loop_filter_luma_body
 +
++        add      r2, r0, r1, lsl #2
++        add      r0, r0, r1
++
 +        vpop     {d8-d15}
 +
-+        neg     r1, r1
-+        add     r0, r0, r1
-+
-+        bne      1f
-+
-+        vst1.8  {d22}, [r0], r1
-+        vst1.8  {d21}, [r0], r1
-+        vst1.8  {d20}, [r0], r1
-+        vst1.8  {d19}, [r0], r1
-+        vst1.8  {d18}, [r0], r1
-+        vst1.8  {d17}, [r0]
-+
-+        pop      {r4-r10,pc}
-+
-+@ Partial write
++        @ P2-P0
++        bcs      1f
++        vst1.8   {d22}, [r0], r1
++        vst1.8   {d21}, [r0], r1
++        vst1.8   {d20}, [r0]
++1:
++        @ Q0-Q2
++        bmi      1f
++        vst1.8   {d19}, [r2], r1
++        vst1.8   {d18}, [r2], r1
++        vst1.8   {d17}, [r2]
 +1:
-+        vmov     r2, r3, d22
-+        vmov     r4, r5, d21
-+        vmov     r6, r7, d20
-+
-+        tst      r10, #0xff0000
-+        ittt eq
-+        streq    r2, [r0]
-+        streq    r4, [r0, r1]
-+        streq    r6, [r0, r1, lsl # 1]
-+
-+        add      r0, r0, #4
-+        tst      r10, #0xff000000
-+        ittt eq
-+        streq    r3, [r0]
-+        streq    r5, [r0, r1]
-+        streq    r7, [r0, r1, lsl # 1]
-+
-+        vmov     r2, r3, d19
-+        vmov     r4, r5, d18
-+        vmov     r6, r7, d17
-+        add      r0, r0, r1
-+        add      r0, r0, r1, lsl # 1
-+
-+        tst      r10, #0xff00
-+        ittt eq
-+        streq    r3, [r0]
-+        streq    r5, [r0, r1]
-+        streq    r7, [r0, r1, lsl # 1]
-+
-+        tst      r10, #0xff
-+        ittt eq
-+        streq    r2, [r0, #-4]!
-+        streq    r4, [r0, r1]
-+        streq    r6, [r0, r1, lsl # 1]
-+
 +        pop      {r4-r10,pc}
-+
 +endfunc
 +
 +
 +.macro m_filter_h_luma_16 bit_depth
-+        hevc_loop_filter_luma_start
-+        push     {r4-r10,lr}
-+
 +        vpush    {d8-d15}
 +        sub      r0, r0, r1, lsl #2
 +
@@ -2695,55 +2437,21 @@ index 0000000000..d691cda836
 +
 +        bl hevc_loop_filter_luma_body_\bit_depth
 +
++        add      r2, r0, r1, lsl #2
++        add      r0, r1
++
 +        vpop     {d8-d15}
 +
-+        sub      r0, r1
-+        neg      r1, r1
-+        bne      1f
-+
++        @ P2-P0
++        bcs      1f
 +        vst1.16  {q14}, [r0], r1
 +        vst1.16  {q13}, [r0], r1
-+        vst1.16  {q12}, [r0], r1
-+        vst1.16  {q11}, [r0], r1
-+        vst1.16  {q10}, [r0], r1
-+        vst1.16  { q9}, [r0]
-+        pop      {r4-r10,pc}
-+
-+@ Partial write
++        vst1.16  {q12}, [r0]
 +1:
-+        tst      r10, #0xff0000
-+        mov      r2, r0
-+        bne      1f
-+        vst1.16  {d28}, [r2], r1
-+        vst1.16  {d26}, [r2], r1
-+        vst1.16  {d24}, [r2]
-+
-+1:
-+        tst      r10, #0xff000000
-+        add      r2, r0, #8
-+        bne      1f
-+        vst1.16  {d29}, [r2], r1
-+        vst1.16  {d27}, [r2], r1
-+        vst1.16  {d25}, [r2]
-+
-+1:
-+        tst      r10, #0xff
-+        @ r0 = r0 + r1 * 3
-+        add      r0, r0, r1
-+        add      r0, r0, r1, lsl # 1
-+        add      r2, r0, #8
-+        bne      1f
-+        vst1.16  {d22}, [r0], r1
-+        vst1.16  {d20}, [r0], r1
-+        vst1.16  {d18}, [r0]
-+
-+1:
-+        tst      r10, #0xff00
-+        bne      1f
-+        vst1.16  {d23}, [r2], r1
-+        vst1.16  {d21}, [r2], r1
-+        vst1.16  {d19}, [r2]
-+
++        bmi      1f
++        vst1.16  {q11}, [r2], r1
++        vst1.16  {q10}, [r2], r1
++        vst1.16  { q9}, [r2]
 +1:
 +        pop      {r4-r10,pc}
 +.endm
@@ -2754,42 +2462,35 @@ index 0000000000..d691cda836
 +@                                     uint32_t tc4,          // r2
 +@                                     unsigned int no_f);    // r3
 +@
-+@ no-F = b0:no_p[0], b1:no_p[1], b2:no_q[0], b3:no_q[1]
++@ no_f
++@ 0  tl P0
++@ 1  tr P1
++@ 2  bl Q0
++@ 3  br Q1
++@
++@ Probably not worth having the P/Qa only special case in this direction
++@ Given layout we won't save any memory reads or avoid any cache dirtying
++@ We would save a bit of computation but I expect the partials to be less
++@ common in the H direction than V due to how we arrange deblock.
++
 +function ff_hevc_rpi_h_loop_filter_uv_neon_8, export=1
++        cmp      r2, #0
++        bxeq     lr
 +        sub      r0, r0, r1, lsl #1
-+        vld2.8   {d16,d17}, [r0], r1
-+        vld2.8   {d18,d19}, [r0], r1
-+        vld2.8   {d26,d27}, [r0], r1
-+        vld2.8   {d28,d29}, [r0]
++        vld1.8   {d16,d17}, [r0], r1
++        vld1.8   {d18,d19}, [r0], r1
++        vld1.8   {d26,d27}, [r0], r1
++        vld1.8   {d28,d29}, [r0]
 +        sub      r0, r0, r1, lsl #1
 +        hevc_loop_filter_uv_body2 d16, d17, d18, d19, d26, d27, d28, d29
-+        cmp      r3, #0
-+        bne      1f
-+        vst2.8   {d18,d19}, [r0], r1
-+        vst2.8   {d26,d27}, [r0]
-+        bx       lr
 +
-+        @ At least one no_f bit is set
-+        @ Which means we need to break this apart in an ugly fashion
-+1:      vzip.8   d18, d19
 +        lsls     r2, r3, #31            @ b0 -> N, b1 -> C
-+        vzip.8   d26, d27
-+        sub      r1, r1, #8
-+
-+        bmi      1f
-+        vst1.8   {d18}, [r0]
-+1:      add      r0, r0, #8
-+        bcs      2f
-+        vst1.8   {d19}, [r0]
-+2:      lsls     r2, r3, #29            @ b2 -> N, b3 -> C
-+        add      r0, r0, r1
-+
-+        bmi      1f
-+        vst1.8   {d26}, [r0]
-+1:      it cs
-+        bxcs     lr
-+        add      r0, r0, #8
-+        vst1.8   {d27}, [r0]
++        vstrpl   d18, [r0, #0]
++        vstrcc   d19, [r0, #8]
++        add      r0, r1
++        lsls     r3, #29                @ b2 -> N, b3 -> C
++        vstrpl   d26, [r0, #0]
++        vstrcc   d27, [r0, #8]
 +        bx       lr
 +
 +endfunc
@@ -2805,42 +2506,37 @@ index 0000000000..d691cda836
 +@ Macro here actual function near bottom
 +
 +.macro m_filter_h_uv_16 bit_depth
++        cmp      r2, #0
++        bxeq     lr
 +        sub      r0, r0, r1, lsl #1
-+        vld2.16  {q8,  q9 }, [r0], r1
-+        vld2.16  {q10, q11}, [r0], r1
-+        vld2.16  {q12, q13}, [r0], r1
-+        vld2.16  {q14, q15}, [r0]
++        vld1.16  {q8,  q9 }, [r0], r1
++        vld1.16  {q10, q11}, [r0], r1
++        vld1.16  {q12, q13}, [r0], r1
++        vld1.16  {q14, q15}, [r0]
 +        sub      r0, r0, r1, lsl #1
 +
 +        hevc_loop_filter_uv_body2_16 q8, q9, q10, q11, q12, q13, q14, q15, \bit_depth
 +
 +        cmp      r3, #0
 +        bne      1f
-+        vst2.16  {q10, q11}, [r0], r1
-+        vst2.16  {q12, q13}, [r0]
++        vst1.16  {q10, q11}, [r0], r1
++        vst1.16  {q12, q13}, [r0]
 +        bx       lr
 +
 +        @ At least one no_f bit is set
 +        @ Which means we need to break this apart in an ugly fashion
-+1:      vzip.16  q10, q11
++1:
 +        lsls     r2, r3, #31            @ b0 -> N, b1 -> C
-+        vzip.16  q12, q13
-+        sub      r1, r1, #16
-+
-+        bmi      1f
-+        vst1.16  {q10}, [r0]
-+1:      add      r0, r0, #16
-+        bcs      2f
-+        vst1.16  {q11}, [r0]
-+2:      lsls     r2, r3, #29            @ b2 -> N, b3 -> C
-+        add      r0, r0, r1
-+
-+        bmi      1f
-+        vst1.16  {q12}, [r0]
-+1:      it cs
-+        bxcs     lr
-+        add      r0, r0, #16
-+        vst1.16  {q13}, [r0]
++        vstrpl   d20, [r0, #0]
++        vstrpl   d21, [r0, #8]
++        vstrcc   d22, [r0, #16]
++        vstrcc   d23, [r0, #24]
++        add      r0, r1
++        lsls     r3, #29                @ b2 -> N, b3 -> C
++        vstrpl   d24, [r0, #0]
++        vstrpl   d25, [r0, #8]
++        vstrcc   d26, [r0, #16]
++        vstrcc   d27, [r0, #24]
 +        bx       lr
 +.endm
 +
@@ -2851,34 +2547,42 @@ index 0000000000..d691cda836
 +@                                     uint8_t * src_l,       // r3
 +@                                     unsigned int no_f);   // sp[0]
 +@
-+@ no_f = b0:no_p[0], b1:no_p[1], b2:no_q[0], b3:no_q[1]
++@ no_f:
++@ 0  tl P0
++@ 1  tr Q0
++@ 2  bl P1
++@ 3  br Q1
 +
 +function ff_hevc_rpi_v_loop_filter_uv2_neon_8, export=1
-+        vld4.8   {d16[0], d17[0], d18[0], d19[0]}, [r3], r1
-+        vld4.8   {d20[0], d21[0], d22[0], d23[0]}, [r0], r1
++        cmp      r2, #0
++        bxeq     lr
++        vld2.16  {d16[0], d18[0]}, [r3], r1
++        vld2.16  {d20[0], d22[0]}, [r0], r1
++
++        cmp      r2, #0x10000
++        vld2.16  {d16[1], d18[1]}, [r3], r1
++        vld2.16  {d20[1], d22[1]}, [r0], r1
++
++        vld2.16  {d16[2], d18[2]}, [r3], r1
++        vld2.16  {d20[2], d22[2]}, [r0], r1
++
++        vld2.16  {d16[3], d18[3]}, [r3], r1
++        vld2.16  {d20[3], d22[3]}, [r0], r1
++        blo      10f
++
 +        sub      r12, r0, r3
++        vld2.16  {d17[0], d19[0]}, [r3], r1
++        vld2.16  {d21[0], d23[0]}, [r0], r1
 +
-+        vld4.8   {d16[1], d17[1], d18[1], d19[1]}, [r3], r1
-+        vld4.8   {d20[1], d21[1], d22[1], d23[1]}, [r0], r1
 +        cmp      r12, #4
++        vld2.16  {d17[1], d19[1]}, [r3], r1
++        vld2.16  {d21[1], d23[1]}, [r0], r1
 +
-+        vld4.8   {d16[2], d17[2], d18[2], d19[2]}, [r3], r1
-+        vld4.8   {d20[2], d21[2], d22[2], d23[2]}, [r0], r1
++        vld2.16  {d17[2], d19[2]}, [r3], r1
++        vld2.16  {d21[2], d23[2]}, [r0], r1
 +
-+        vld4.8   {d16[3], d17[3], d18[3], d19[3]}, [r3], r1
-+        vld4.8   {d20[3], d21[3], d22[3], d23[3]}, [r0], r1
-+
-+        vld4.8   {d16[4], d17[4], d18[4], d19[4]}, [r3], r1
-+        vld4.8   {d20[4], d21[4], d22[4], d23[4]}, [r0], r1
-+
-+        vld4.8   {d16[5], d17[5], d18[5], d19[5]}, [r3], r1
-+        vld4.8   {d20[5], d21[5], d22[5], d23[5]}, [r0], r1
-+
-+        vld4.8   {d16[6], d17[6], d18[6], d19[6]}, [r3], r1
-+        vld4.8   {d20[6], d21[6], d22[6], d23[6]}, [r0], r1
-+
-+        vld4.8   {d16[7], d17[7], d18[7], d19[7]}, [r3]
-+        vld4.8   {d20[7], d21[7], d22[7], d23[7]}, [r0]
++        vld2.16  {d17[3], d19[3]}, [r3]
++        vld2.16  {d21[3], d23[3]}, [r0]
 +        it eq
 +        ldreq    r12, [sp, #0]
 +
@@ -2890,49 +2594,83 @@ index 0000000000..d691cda836
 +
 +@ Much/most of the time r0 == r3 + 4 and no_f == 0
 +@ so it is worth having this special case
-+        vst4.8   {d18[7], d19[7], d20[7], d21[7]}, [r3], r1
-+        vst4.8   {d18[6], d19[6], d20[6], d21[6]}, [r3], r1
-+        vst4.8   {d18[5], d19[5], d20[5], d21[5]}, [r3], r1
-+        vst4.8   {d18[4], d19[4], d20[4], d21[4]}, [r3], r1
-+        vst4.8   {d18[3], d19[3], d20[3], d21[3]}, [r3], r1
-+        vst4.8   {d18[2], d19[2], d20[2], d21[2]}, [r3], r1
-+        vst4.8   {d18[1], d19[1], d20[1], d21[1]}, [r3], r1
-+        vst4.8   {d18[0], d19[0], d20[0], d21[0]}, [r3]
++        vst2.16   {d19[3], d21[3]}, [r3], r1    @ P0b, Q0b
++        vst2.16   {d19[2], d21[2]}, [r3], r1
++        vst2.16   {d19[1], d21[1]}, [r3], r1
++        vst2.16   {d19[0], d21[0]}, [r3], r1
++        vst2.16   {d18[3], d20[3]}, [r3], r1    @ P0a, Q0a
++        vst2.16   {d18[2], d20[2]}, [r3], r1
++        vst2.16   {d18[1], d20[1]}, [r3], r1
++        vst2.16   {d18[0], d20[0]}, [r3]
 +        bx       lr
 +
 +@ Either split or partial
 +1:
 +        ldr      r12, [sp, #0]
-+        lsls     r12, #29               @ b2 -> N, b3 -> C
++        @ I have no idea if this is faster than any of the other ways of
++        @ testing these bits but it does free up r12
++        lsl      r12, #28
 +        add      r2, r0, r1, lsl #2
-+        bcs      1f
-+        vst2.8   {d20[7], d21[7]}, [r0], r1
-+        vst2.8   {d20[6], d21[6]}, [r0], r1
-+        vst2.8   {d20[5], d21[5]}, [r0], r1
-+        vst2.8   {d20[4], d21[4]}, [r0]
++        msr      APSR_nzcvq, r12        @ b0 (P0a) -> V, b1 (Q0a) -> C, b2 (P0b) -> Z, b3 (Q0b) -> N
++        add      r12, r3, r1, lsl #2
++        bmi      1f
++        @ Q0b
++        vst1.16  {d21[3]}, [r0], r1
++        vst1.16  {d21[2]}, [r0], r1
++        vst1.16  {d21[1]}, [r0], r1
++        vst1.16  {d21[0]}, [r0]
 +1:
-+        bmi      2f
-+        vst2.8   {d20[3], d21[3]}, [r2], r1
-+        vst2.8   {d20[2], d21[2]}, [r2], r1
-+        vst2.8   {d20[1], d21[1]}, [r2], r1
-+        vst2.8   {d20[0], d21[0]}, [r2]
++        beq      2f
++        @ P0b
++        vst1.16  {d19[3]}, [r3], r1
++        vst1.16  {d19[2]}, [r3], r1
++        vst1.16  {d19[1]}, [r3], r1
++        vst1.16  {d19[0]}, [r3]
 +
 +2:
-+        lsls     r12, #2
-+        add      r2, r3, r1, lsl #2
 +        bcs      3f
-+        vst2.8   {d18[7], d19[7]}, [r3], r1
-+        vst2.8   {d18[6], d19[6]}, [r3], r1
-+        vst2.8   {d18[5], d19[5]}, [r3], r1
-+        vst2.8   {d18[4], d19[4]}, [r3]
++        @ Q0a
++        vst1.16  {d20[3]}, [r2], r1
++        vst1.16  {d20[2]}, [r2], r1
++        vst1.16  {d20[1]}, [r2], r1
++        vst1.16  {d20[0]}, [r2]
++
++3:
++        it vs
++        bxvs     lr
++        vst1.16  {d18[3]}, [r12], r1
++        vst1.16  {d18[2]}, [r12], r1
++        vst1.16  {d18[1]}, [r12], r1
++        vst1.16  {d18[0]}, [r12]
++        bx       lr
++
++@ Single lump (rather than double)
++10:
++        hevc_loop_filter_uv_body1 d16, d18, d20, d22
++
++        @ As we have post inced r0/r3 in the load the easiest thing to do is
++        @ to subtract and write forwards, rather than backwards (as above)
++        ldr      r12, [sp, #0]
++        add      r3, #2
++        sub      r0, r0, r1, lsl #2
++        sub      r3, r3, r1, lsl #2
++        lsls     r12, #31               @ b0 (P0a) -> N, b1 (Q0a) -> C
++
++        bcs      3f
++        vst1.16  {d20[0]}, [r0], r1
++        vst1.16  {d20[1]}, [r0], r1
++        vst1.16  {d20[2]}, [r0], r1
++        vst1.16  {d20[3]}, [r0]
++
 +3:
 +        it mi
 +        bxmi     lr
-+        vst2.8   {d18[3], d19[3]}, [r2], r1
-+        vst2.8   {d18[2], d19[2]}, [r2], r1
-+        vst2.8   {d18[1], d19[1]}, [r2], r1
-+        vst2.8   {d18[0], d19[0]}, [r2]
++        vst1.16  {d18[0]}, [r3], r1
++        vst1.16  {d18[1]}, [r3], r1
++        vst1.16  {d18[2]}, [r3], r1
++        vst1.16  {d18[3]}, [r3]
 +        bx       lr
++
 +endfunc
 +
 +
@@ -2942,37 +2680,53 @@ index 0000000000..d691cda836
 +@                                     uint8_t * src_l,       // r3
 +@                                     unsigned int no_f);   // sp[0]
 +@
-+@ no_f = b0:no_p[0], b1:no_p[1], b2:no_q[0], b3:no_q[1]
++
++@ no_f
++@ 0  tl P0a
++@ 1  tr Q0a
++@ 2  bl P0b
++@ 3  br Q0b
++
++@ P1: q8,  q12
++@ P0: q9,  q13
++@ Q0: q10, q14
++@ Q1: q11, q15
++
 +.macro m_filter_v_uv2_16 bit_depth
-+        vld4.16  {d16[0], d18[0], d20[0], d22[0]}, [r3], r1
-+        vld4.16  {d24[0], d26[0], d28[0], d30[0]}, [r0], r1
++        cmp      r2, #0
++        bxeq     lr
++
++        vld2.32  {d16[0], d18[0]}, [r3], r1
++        vld2.32  {d20[0], d22[0]}, [r0], r1
++
++        vld2.32  {d16[1], d18[1]}, [r3], r1
++        vld2.32  {d20[1], d22[1]}, [r0], r1
++
++        cmp      r2, #0x10000
++        vld2.32  {d17[0], d19[0]}, [r3], r1
++        vld2.32  {d21[0], d23[0]}, [r0], r1
++
++        vld2.32  {d17[1], d19[1]}, [r3], r1
++        vld2.32  {d21[1], d23[1]}, [r0], r1
++        blo      10f
++
++        vld2.32  {d24[0], d26[0]}, [r3], r1
++        vld2.32  {d28[0], d30[0]}, [r0], r1
++
++        vld2.32  {d24[1], d26[1]}, [r3], r1
++        vld2.32  {d28[1], d30[1]}, [r0], r1
 +        sub      r12, r0, r3
 +
-+        vld4.16  {d16[1], d18[1], d20[1], d22[1]}, [r3], r1
-+        vld4.16  {d24[1], d26[1], d28[1], d30[1]}, [r0], r1
++        vld2.32  {d25[0], d27[0]}, [r3], r1
++        vld2.32  {d29[0], d31[0]}, [r0], r1
 +        cmp      r12, #8
 +
-+        vld4.16  {d16[2], d18[2], d20[2], d22[2]}, [r3], r1
-+        vld4.16  {d24[2], d26[2], d28[2], d30[2]}, [r0], r1
-+
-+        vld4.16  {d16[3], d18[3], d20[3], d22[3]}, [r3], r1
-+        vld4.16  {d24[3], d26[3], d28[3], d30[3]}, [r0], r1
-+
-+        vld4.16  {d17[0], d19[0], d21[0], d23[0]}, [r3], r1
-+        vld4.16  {d25[0], d27[0], d29[0], d31[0]}, [r0], r1
-+
-+        vld4.16  {d17[1], d19[1], d21[1], d23[1]}, [r3], r1
-+        vld4.16  {d25[1], d27[1], d29[1], d31[1]}, [r0], r1
-+
-+        vld4.16  {d17[2], d19[2], d21[2], d23[2]}, [r3], r1
-+        vld4.16  {d25[2], d27[2], d29[2], d31[2]}, [r0], r1
-+
-+        vld4.16  {d17[3], d19[3], d21[3], d23[3]}, [r3]
-+        vld4.16  {d25[3], d27[3], d29[3], d31[3]}, [r0]
++        vld2.32  {d25[1], d27[1]}, [r3]
++        vld2.32  {d29[1], d31[1]}, [r0]
 +        it eq
 +        ldreq    r12, [sp, #0]
 +
-+        hevc_loop_filter_uv_body2_16  q8, q9, q10, q11, q12, q13, q14, q15, \bit_depth
++        hevc_loop_filter_uv_body2_16  q8, q12, q9, q13, q10, q14, q11, q15, \bit_depth
 +        cmp      r12, #0
 +        add      r3, #4
 +        neg      r1, r1
@@ -2980,277 +2734,89 @@ index 0000000000..d691cda836
 +
 +@ Much/most of the time r0 == r3 + 4 and no_f == 0
 +@ so it is worth having this special case
-+        vst4.16  {d21[3], d23[3],d25[3], d27[3]}, [r3], r1
-+        vst4.16  {d21[2], d23[2],d25[2], d27[2]}, [r3], r1
-+        vst4.16  {d21[1], d23[1],d25[1], d27[1]}, [r3], r1
-+        vst4.16  {d21[0], d23[0],d25[0], d27[0]}, [r3], r1
-+        vst4.16  {d20[3], d22[3],d24[3], d26[3]}, [r3], r1
-+        vst4.16  {d20[2], d22[2],d24[2], d26[2]}, [r3], r1
-+        vst4.16  {d20[1], d22[1],d24[1], d26[1]}, [r3], r1
-+        vst4.16  {d20[0], d22[0],d24[0], d26[0]}, [r3], r1
++        vst2.32  {d27[1], d29[1]}, [r3], r1
++        vst2.32  {d27[0], d29[0]}, [r3], r1
++        vst2.32  {d26[1], d28[1]}, [r3], r1
++        vst2.32  {d26[0], d28[0]}, [r3], r1
++        vst2.32  {d19[1], d21[1]}, [r3], r1
++        vst2.32  {d19[0], d21[0]}, [r3], r1
++        vst2.32  {d18[1], d20[1]}, [r3], r1
++        vst2.32  {d18[0], d20[0]}, [r3]
 +        bx       lr
 +
 +@ Either split or partial
 +1:
 +        ldr      r12, [sp, #0]
-+        lsls     r12, #29               @ b2 -> N, b3 -> C
-+        add      r2, r0, r1, lsl #2
++        lsls     r12, #29               @ b2 (P0b) -> N, b3 (Q0b) -> C
 +        bcs      1f
-+        vst2.16  {d25[3], d27[3]}, [r0], r1
-+        vst2.16  {d25[2], d27[2]}, [r0], r1
-+        vst2.16  {d25[1], d27[1]}, [r0], r1
-+        vst2.16  {d25[0], d27[0]}, [r0]
++        @ Q0b
++        mov      r2, r0
++        vst1.32  {d29[1]}, [r2], r1
++        vst1.32  {d29[0]}, [r2], r1
++        vst1.32  {d28[1]}, [r2], r1
++        vst1.32  {d28[0]}, [r2]
 +1:
 +        bmi      2f
-+        vst2.16  {d24[3], d26[3]}, [r2], r1
-+        vst2.16  {d24[2], d26[2]}, [r2], r1
-+        vst2.16  {d24[1], d26[1]}, [r2], r1
-+        vst2.16  {d24[0], d26[0]}, [r2]
++        @ P0b
++        mov      r2, r3
++        vst1.32  {d27[1]}, [r2], r1
++        vst1.32  {d27[0]}, [r2], r1
++        vst1.32  {d26[1]}, [r2], r1
++        vst1.32  {d26[0]}, [r2]
 +
 +2:
-+        lsls     r12, #2
-+        add      r2, r3, r1, lsl #2
++        lsls     r12, #2                @ b0 (P0a) -> N, b1 (Q0a) -> C
 +        bcs      3f
-+        vst2.16  {d21[3], d23[3]}, [r3], r1
-+        vst2.16  {d21[2], d23[2]}, [r3], r1
-+        vst2.16  {d21[1], d23[1]}, [r3], r1
-+        vst2.16  {d21[0], d23[0]}, [r3]
++        @ Q0a
++        add      r0, r0, r1, lsl #2
++        vst1.32  {d21[1]}, [r0], r1
++        vst1.32  {d21[0]}, [r0], r1
++        vst1.32  {d20[1]}, [r0], r1
++        vst1.32  {d20[0]}, [r0]
++
 +3:
 +        it mi
 +        bxmi     lr
-+        vst2.16  {d20[3], d22[3]}, [r2], r1
-+        vst2.16  {d20[2], d22[2]}, [r2], r1
-+        vst2.16  {d20[1], d22[1]}, [r2], r1
-+        vst2.16  {d20[0], d22[0]}, [r2]
++        @ P0a
++        add      r3, r3, r1, lsl #2
++        vst1.32  {d19[1]}, [r3], r1
++        vst1.32  {d19[0]}, [r3], r1
++        vst1.32  {d18[1]}, [r3], r1
++        vst1.32  {d18[0]}, [r3]
++        bx       lr
++
++
++10:
++        hevc_loop_filter_uv_body1_16  q8, q9, q10, q11, \bit_depth
++
++        @ As we have post inced r0/r3 in the load the easiest thing to do is
++        @ to subtract and write forwards, rather than backwards (as above)
++        ldr      r12, [sp, #0]
++        add      r3, #4
++        sub      r0, r0, r1, lsl #2
++        sub      r3, r3, r1, lsl #2
++        lsls     r12, #31               @ b0 (P0a) -> N, b1 (Q0a) -> C
++
++        bcs      3f
++        @ Q0a
++        vst1.32  {d20[0]}, [r0], r1
++        vst1.32  {d20[1]}, [r0], r1
++        vst1.32  {d21[0]}, [r0], r1
++        vst1.32  {d21[1]}, [r0]
++
++3:
++        it mi
++        bxmi     lr
++        @ P0a
++        vst1.32  {d18[0]}, [r3], r1
++        vst1.32  {d18[1]}, [r3], r1
++        vst1.32  {d19[0]}, [r3], r1
++        vst1.32  {d19[1]}, [r3]
 +        bx       lr
 +.endm
 +
 +
 +
-+function ff_hevc_rpi_v_loop_filter_chroma_neon, export=1
-+        hevc_loop_filter_chroma_start
-+
-+        sub      r0, #2
-+        vld4.8   {d16[0], d17[0], d18[0], d19[0]}, [r0], r1
-+        vld4.8   {d16[1], d17[1], d18[1], d19[1]}, [r0], r1
-+        vld4.8   {d16[2], d17[2], d18[2], d19[2]}, [r0], r1
-+        vld4.8   {d16[3], d17[3], d18[3], d19[3]}, [r0], r1
-+        vld4.8   {d16[4], d17[4], d18[4], d19[4]}, [r0], r1
-+        vld4.8   {d16[5], d17[5], d18[5], d19[5]}, [r0], r1
-+        vld4.8   {d16[6], d17[6], d18[6], d19[6]}, [r0], r1
-+        vld4.8   {d16[7], d17[7], d18[7], d19[7]}, [r0], r1
-+
-+        sub      r0, r0, r1, lsl #3
-+        add      r0, r0, #1
-+        hevc_loop_filter_chroma_body d16, d17, d18, d19
-+        bne      1f
-+
-+        vst2.8   {d17[0], d18[0]}, [r0], r1
-+        vst2.8   {d17[1], d18[1]}, [r0], r1
-+        vst2.8   {d17[2], d18[2]}, [r0], r1
-+        vst2.8   {d17[3], d18[3]}, [r0], r1
-+        vst2.8   {d17[4], d18[4]}, [r0], r1
-+        vst2.8   {d17[5], d18[5]}, [r0], r1
-+        vst2.8   {d17[6], d18[6]}, [r0], r1
-+        vst2.8   {d17[7], d18[7]}, [r0], r1
-+        bx       lr
-+
-+1:
-+        tst      r12, #0xff             @ P0a
-+        bne      2f
-+
-+        vst1.8   {d17[0]}, [r0], r1
-+        vst1.8   {d17[1]}, [r0], r1
-+        vst1.8   {d17[2]}, [r0], r1
-+        vst1.8   {d17[3]}, [r0], r1
-+        sub      r0, r0, r1, lsl #2
-+
-+2:
-+        tst      r12, #0xff0000         @ Q0a
-+        add      r0, #1
-+        bne      3f
-+        vst1.8   {d18[0]}, [r0], r1
-+        vst1.8   {d18[1]}, [r0], r1
-+        vst1.8   {d18[2]}, [r0], r1
-+        vst1.8   {d18[3]}, [r0], r1
-+        sub      r0, r0, r1, lsl #2
-+
-+3:
-+        tst      r12, #0xff000000       @ Q0b
-+        add      r0, r0, r1, lsl #2
-+        bne      4f
-+        vst1.8   {d18[4]}, [r0], r1
-+        vst1.8   {d18[5]}, [r0], r1
-+        vst1.8   {d18[6]}, [r0], r1
-+        vst1.8   {d18[7]}, [r0], r1
-+        sub      r0, r0, r1, lsl #2
-+
-+4:
-+        tst      r12, #0xff00           @ P0b
-+        it ne
-+        bxne     lr
-+
-+        sub      r0, #1
-+        vst1.8   {d17[4]}, [r0], r1
-+        vst1.8   {d17[5]}, [r0], r1
-+        vst1.8   {d17[6]}, [r0], r1
-+        vst1.8   {d17[7]}, [r0], r1
-+        bx       lr
-+
-+endfunc
-+
-+
-+.macro m_filter_v_chroma_16 bit_depth
-+        hevc_loop_filter_chroma_start
-+
-+        sub      r0, #4
-+        vld4.16  {d16[0], d18[0], d20[0], d22[0]}, [r0], r1
-+        vld4.16  {d16[1], d18[1], d20[1], d22[1]}, [r0], r1
-+        vld4.16  {d16[2], d18[2], d20[2], d22[2]}, [r0], r1
-+        vld4.16  {d16[3], d18[3], d20[3], d22[3]}, [r0], r1
-+        vld4.16  {d17[0], d19[0], d21[0], d23[0]}, [r0], r1
-+        vld4.16  {d17[1], d19[1], d21[1], d23[1]}, [r0], r1
-+        vld4.16  {d17[2], d19[2], d21[2], d23[2]}, [r0], r1
-+        vld4.16  {d17[3], d19[3], d21[3], d23[3]}, [r0], r1
-+
-+        sub      r0, r0, r1, lsl #3
-+        add      r0, r0, #2
-+        hevc_loop_filter_chroma_body_16 q8, q9, q10, q11, \bit_depth
-+        bne      1f
-+
-+        vst2.16  {d18[0], d20[0]}, [r0], r1
-+        vst2.16  {d18[1], d20[1]}, [r0], r1
-+        vst2.16  {d18[2], d20[2]}, [r0], r1
-+        vst2.16  {d18[3], d20[3]}, [r0], r1
-+        vst2.16  {d19[0], d21[0]}, [r0], r1
-+        vst2.16  {d19[1], d21[1]}, [r0], r1
-+        vst2.16  {d19[2], d21[2]}, [r0], r1
-+        vst2.16  {d19[3], d21[3]}, [r0], r1
-+        bx       lr
-+
-+1:
-+        tst      r12, #0xff             @ P0a
-+        bne      2f
-+
-+        vst1.16  {d18[0]}, [r0], r1
-+        vst1.16  {d18[1]}, [r0], r1
-+        vst1.16  {d18[2]}, [r0], r1
-+        vst1.16  {d18[3]}, [r0], r1
-+        sub      r0, r0, r1, lsl #2
-+
-+2:
-+        tst      r12, #0xff0000         @ Q0a
-+        add      r0, #1
-+        bne      3f
-+        vst1.16  {d20[0]}, [r0], r1
-+        vst1.16  {d20[1]}, [r0], r1
-+        vst1.16  {d20[2]}, [r0], r1
-+        vst1.16  {d20[3]}, [r0], r1
-+        sub      r0, r0, r1, lsl #2
-+
-+3:
-+        tst      r12, #0xff000000       @ Q0b
-+        add      r0, r0, r1, lsl #2
-+        bne      4f
-+        vst1.16  {d21[0]}, [r0], r1
-+        vst1.16  {d21[1]}, [r0], r1
-+        vst1.16  {d21[2]}, [r0], r1
-+        vst1.16  {d21[3]}, [r0], r1
-+        sub      r0, r0, r1, lsl #2
-+
-+4:
-+        tst      r12, #0xff00           @ P0b
-+        it ne
-+        bxne     lr
-+
-+        sub      r0, #1
-+        vst1.16  {d19[0]}, [r0], r1
-+        vst1.16  {d19[1]}, [r0], r1
-+        vst1.16  {d19[2]}, [r0], r1
-+        vst1.16  {d19[3]}, [r0], r1
-+        bx       lr
-+.endm
-+
-+
-+@ void ff_hevc_rpi_h_loop_filter_chroma_neon(
-+@   uint8_t *_pix,     [r0]
-+@   ptrdiff_t _stride, [r1]
-+@   int *_tc,          [r2]
-+@   uint8_t *_no_p,    [r3]
-+@   uint8_t *_no_q);   [sp+0]
-+
-+function ff_hevc_rpi_h_loop_filter_chroma_neon, export=1
-+        hevc_loop_filter_chroma_start
-+        sub      r0, r0, r1, lsl #1
-+        vld1.8   {d16}, [r0], r1
-+        vld1.8   {d17}, [r0], r1
-+        vld1.8   {d18}, [r0], r1
-+        vld1.8   {d19}, [r0]
-+        sub      r0, r0, r1, lsl #1
-+        hevc_loop_filter_chroma_body d16, d17, d18, d19
-+        bne      1f     @ Partial write
-+        vst1.8   {d17}, [r0], r1
-+        vst1.8   {d18}, [r0]
-+        bx       lr
-+1:
-+        tst      r12, #0xff
-+        vmov     r2, r3, d17
-+        it eq
-+        streq    r2, [r0]
-+        tst      r12, #0xff00
-+        it eq
-+        streq    r3, [r0, #4]
-+
-+        add      r0, r1
-+        tst      r12, #0xff0000
-+        vmov     r2, r3, d18
-+        it eq
-+        streq    r2, [r0]
-+        tst      r12, #0xff000000
-+        it eq
-+        streq    r3, [r0, #4]
-+
-+        bx       lr
-+endfunc
-+
-+.macro m_filter_h_chroma_16 bit_depth
-+        hevc_loop_filter_chroma_start
-+        sub      r0, r0, r1, lsl #1
-+        vld1.16  {q8}, [r0], r1
-+        vld1.16  {q9}, [r0], r1
-+        vld1.16  {q10}, [r0], r1
-+        vld1.16  {q11}, [r0]
-+        sub      r0, r0, r1, lsl #1
-+        hevc_loop_filter_chroma_body_16 q8, q9, q10, q11, \bit_depth
-+        bne      1f     @ Partial write
-+        vst1.16  {q9}, [r0], r1
-+        vst1.16  {q10}, [r0]
-+        bx       lr
-+1:
-+        tst      r12, #0xff
-+        bne      2f
-+        vst1.16  {d18}, [r0]
-+2:
-+        tst      r12, #0xff00
-+        bne      3f
-+        add      r0, #8
-+        vst1.16  {d19}, [r0]
-+        sub      r0, #8
-+3:
-+        tst      r12, #0xff0000
-+        add      r0, r1
-+        bne      4f
-+        vst1.16  {d20}, [r0]
-+4:
-+        tst      r12, #0xff000000
-+        it ne
-+        bxne     lr
-+        add      r0, #8
-+        vst1.16  {d21}, [r0]
-+
-+        bx       lr
-+.endm
-+
 +
 +/* ff_hevc_rpi_deblocking_boundary_strengths_neon(int pus, int dup, int in_i
 + *                                            int *curr_rpl0, int *curr_
@@ -3385,23 +2951,35 @@ index 0000000000..d691cda836
 +endfunc
 +
 +function ff_hevc_rpi_h_loop_filter_luma_neon_10, export=1
-+        m_filter_h_luma_16 10
++        hevc_loop_filter_luma_start
++        b        .Lh_loop_luma_common_10
 +endfunc
 +
-+function ff_hevc_rpi_v_loop_filter_luma2_neon_10, export=1
-+        hevc_loop_filter_luma_start
-+        push     {r4-r10,lr}       @ 8 regs = 32 bytes
-+
-+        ldr      r4, [sp, #40]
-+        b        v_loop_luma_common_10
++function ff_hevc_rpi_h_loop_filter_luma2_neon_10, export=1
++        cmp      r3, #0
++        it       eq
++        bxeq     lr
++        push     {r4-r10,lr}            @ 32 bytes
++        ldr      r10, [sp, #32]
++.Lh_loop_luma_common_10:
++        m_filter_h_luma_16 10
 +endfunc
 +
 +function ff_hevc_rpi_v_loop_filter_luma_neon_10, export=1
 +        hevc_loop_filter_luma_start
-+        push     {r4-r10,lr}
-+
 +        sub      r4, r0, #8
-+v_loop_luma_common_10:
++        b        .Lv_loop_luma_common_10
++endfunc
++
++function ff_hevc_rpi_v_loop_filter_luma2_neon_10, export=1
++        cmp      r3, #0
++        it       eq
++        bxeq     lr
++        push     {r4-r10,lr}            @ 32 bytes
++        ldr      r4, [sp, #36]
++        ldr      r10, [sp, #32]
++
++.Lv_loop_luma_common_10:
 +        m_filter_v_luma_common_16 10
 +endfunc
 +
@@ -3413,20 +2991,12 @@ index 0000000000..d691cda836
 +        m_filter_v_uv2_16 10
 +endfunc
 +
-+function ff_hevc_rpi_h_loop_filter_chroma_neon_10, export=1
-+        m_filter_h_chroma_16 10
-+endfunc
-+
-+function ff_hevc_rpi_v_loop_filter_chroma_neon_10, export=1
-+        m_filter_v_chroma_16 10
-+endfunc
-+
 diff --git a/libavcodec/arm/rpi_hevcdsp_idct_neon.S b/libavcodec/arm/rpi_hevcdsp_idct_neon.S
 new file mode 100644
-index 0000000000..2ebbf15d2b
+index 0000000000..db10da16d3
 --- /dev/null
 +++ b/libavcodec/arm/rpi_hevcdsp_idct_neon.S
-@@ -0,0 +1,423 @@
+@@ -0,0 +1,183 @@
 +/*
 + * Copyright (c) 2014 Seppo Tomperi <seppo.tomperi@vtt.fi>
 + *
@@ -3450,295 +3020,53 @@ index 0000000000..2ebbf15d2b
 +#include "libavutil/arm/asm.S"
 +#include "neon.S"
 +
-+function ff_hevc_rpi_add_residual_4x4_neon_8, export=1
-+        vldm        r1, {q0-q1}
-+        vld1.32     d4[0], [r0], r2
-+        vld1.32     d4[1], [r0], r2
-+        vld1.32     d5[0], [r0], r2
-+        vld1.32     d5[1], [r0], r2
-+        sub         r0, r0, r2, lsl #2
-+        vmovl.u8    q8, d4
-+        vmovl.u8    q9, d5
-+        vqadd.s16   q0, q0, q8
-+        vqadd.s16   q1, q1, q9
-+        vqmovun.s16 d0, q0
-+        vqmovun.s16 d1, q1
-+        vst1.32     d0[0], [r0], r2
-+        vst1.32     d0[1], [r0], r2
-+        vst1.32     d1[0], [r0], r2
-+        vst1.32     d1[1], [r0], r2
-+        bx          lr
-+endfunc
++/* uses registers q8 - q13 for temp values */
++.macro tr4_luma_shift shift
++        vaddl.s16   q8, d28, d30    // c0 = src0 + src2
++        vaddl.s16   q9, d30, d31    // c1 = src2 + src3
++        vsubl.s16   q10, d28, d31   // c2 = src0 - src3
++        vaddl.s16   q11, d28, d31   // src0 + src3
 +
-+function ff_hevc_rpi_add_residual_8x8_neon_8, export=1
-+        mov         r3,   #8
-+1:      subs        r3,   #1
-+        vld1.16     {q0}, [r1]!
-+        vld1.8      d16,  [r0]
-+        vmovl.u8    q8,   d16
-+        vqadd.s16   q0,   q8
-+        vqmovun.s16 d0,   q0
-+        vst1.32     d0,   [r0], r2
-+        bne         1b
-+        bx          lr
-+endfunc
++        vmul.i32    q12, q8, d1[0]  // 29 * c0
++        vmul.i32    q13, q10, d2[0] // 55 * c2
++        vmul.i32    q8, q8, d2[0]   // 55 * c0
++        vmull.s16   q14, d29, d0[0] // c3 = 74 * src1
 +
-+function ff_hevc_rpi_add_residual_16x16_neon_8, export=1
-+        mov         r3,   #16
-+1:      subs        r3,   #1
-+        vld1.16     {q0, q1}, [r1]!
-+        vld1.8      {q8},  [r0]
-+        vmovl.u8    q9,  d16
-+        vmovl.u8    q10, d17
-+        vqadd.s16   q0,  q9
-+        vqadd.s16   q1,  q10
-+        vqmovun.s16 d0,  q0
-+        vqmovun.s16 d1,  q1
-+        vst1.8      {q0},   [r0], r2
-+        bne         1b
-+        bx          lr
-+endfunc
++        vsubw.s16   q11, q11, d30   // src0 - src2 + src3
++        vmla.i32    q12, q9, d2[0]  // 29 * c0 + 55 * c1
++        vmls.i32    q13, q9, d1[0]  // 55 * c2 - 29 * c1
++        vmla.i32    q8, q10, d1[0]  // 55 * c0 + 29 * c2
 +
-+function ff_hevc_rpi_add_residual_32x32_neon_8, export=1
-+        mov         r3,   #32
-+1:      subs        r3,   #1
-+        vldm        r1!, {q0-q3}
-+        vld1.8      {q8, q9},  [r0]
-+        vmovl.u8    q10, d16
-+        vmovl.u8    q11, d17
-+        vmovl.u8    q12, d18
-+        vmovl.u8    q13, d19
-+        vqadd.s16   q0,  q10
-+        vqadd.s16   q1,  q11
-+        vqadd.s16   q2,  q12
-+        vqadd.s16   q3,  q13
-+        vqmovun.s16 d0,  q0
-+        vqmovun.s16 d1,  q1
-+        vqmovun.s16 d2,  q2
-+        vqmovun.s16 d3,  q3
-+        vst1.8     {q0, q1},   [r0], r2
-+        bne         1b
-+        bx          lr
-+endfunc
++        vmul.i32    q11, q11, d0[0] // dst2 = 74 * (src0 - src2 + src3)
++        vadd.i32    q12, q12, q14   // dst0 = 29 * c0 + 55 * c1 + c3
++        vadd.i32    q13, q13, q14   // dst1 = 55 * c2 - 29 * c1 + c3
++        vsub.i32    q8, q8, q14     // dst3 = 55 * c0 + 29 * c2 - c3
 +
-+
-+@ ff_hevc_rpi_add_residual_4x4_dc_neon_8(
-+@   uint8_t * dst,              // [r0]
-+@   unsigned int stride,        // [r1]
-+@   int dc)                     // [r2]
-+
-+function ff_hevc_rpi_add_residual_4x4_dc_neon_8, export=1
-+        vdup.16     q15, r2
-+
-+        vld1.32     d4[0], [r0], r1
-+        vld1.32     d4[1], [r0], r1
-+        vld1.32     d5[0], [r0], r1
-+        vld1.32     d5[1], [r0], r1
-+        sub         r0, r0, r1, lsl #2
-+        vaddw.u8    q0, q15, d4
-+        vaddw.u8    q1, q15, d5
-+        vqmovun.s16 d0, q0
-+        vqmovun.s16 d1, q1
-+        vst1.32     d0[0], [r0], r1
-+        vst1.32     d0[1], [r0], r1
-+        vst1.32     d1[0], [r0], r1
-+        vst1.32     d1[1], [r0], r1
-+        bx          lr
-+endfunc
-+
-+
-+@ ff_hevc_rpi_add_residual_4x4_dc_c_neon_8(
-+@   uint8_t * dst,              // [r0]
-+@   unsigned int stride,        // [r1]
-+@   int dc)                     // [r2]
-+
-+function ff_hevc_rpi_add_residual_4x4_dc_c_neon_8, export=1
-+        vdup.32     q15, r2
-+        mov         r3,  #4
-+        b           1f
-+endfunc
-+
-+@ ff_hevc_rpi_add_residual_8x8_dc_neon_8(
-+@   uint8_t * dst,              // [r0]
-+@   unsigned int stride,        // [r1]
-+@   int dc)                     // [r2]
-+
-+function ff_hevc_rpi_add_residual_8x8_dc_neon_8, export=1
-+        vdup.16     q15, r2
-+        mov         r3,  #8
-+
-+1:      subs        r3,   #1
-+        vld1.8      d16,  [r0]
-+        vaddw.u8    q0,   q15, d16
-+        vqmovun.s16 d0,   q0
-+        vst1.32     d0,   [r0], r1
-+        bne         1b
-+        bx          lr
-+endfunc
-+
-+
-+@ ff_hevc_rpi_add_residual_8x8_dc_c_neon_8(
-+@   uint8_t * dst,              // [r0]
-+@   unsigned int stride,        // [r1]
-+@   int dc)                     // [r2]
-+
-+function ff_hevc_rpi_add_residual_8x8_dc_c_neon_8, export=1
-+        vdup.32     q15, r2
-+        mov         r3,  #8
-+        b           1f
-+endfunc
-+
-+@ ff_hevc_rpi_add_residual_16x16_dc_neon_8(
-+@   uint8_t * dst,              // [r0]
-+@   unsigned int stride,        // [r1]
-+@   int dc)                     // [r2]
-+
-+function ff_hevc_rpi_add_residual_16x16_dc_neon_8, export=1
-+        vdup.16     q15, r2
-+        mov         r3,  #16
-+
-+1:      subs        r3,   #1
-+        vld1.8      {q8},  [r0]
-+        vaddw.u8    q0,  q15, d16
-+        vaddw.u8    q1,  q15, d17
-+        vqmovun.s16 d0,  q0
-+        vqmovun.s16 d1,  q1
-+        vst1.8      {q0},   [r0], r1
-+        bne         1b
-+        bx          lr
-+endfunc
-+
-+
-+@ ff_hevc_rpi_add_residual_16x16_dc_c_neon_8(
-+@   uint8_t * dst,              // [r0]
-+@   unsigned int stride,        // [r1]
-+@   int dc)                     // [r2]
-+
-+function ff_hevc_rpi_add_residual_16x16_dc_c_neon_8, export=1
-+        vdup.32     q15, r2
-+        mov         r3,  #16
-+        b           1f
-+endfunc
-+
-+@ ff_hevc_rpi_add_residual_32x32_dc_neon_8(
-+@   uint8_t * dst,              // [r0]
-+@   unsigned int stride,        // [r1]
-+@   int dc)                     // [r2]
-+
-+function ff_hevc_rpi_add_residual_32x32_dc_neon_8, export=1
-+        vdup.16     q15, r2
-+        mov         r3,  #32
-+
-+1:      subs        r3,   #1
-+        vld1.8      {q8, q9},  [r0]
-+        vaddw.u8    q0,  q15, d16
-+        vaddw.u8    q1,  q15, d17
-+        vaddw.u8    q2,  q15, d18
-+        vaddw.u8    q3,  q15, d19
-+        vqmovun.s16 d0,  q0
-+        vqmovun.s16 d1,  q1
-+        vqmovun.s16 d2,  q2
-+        vqmovun.s16 d3,  q3
-+        vst1.8     {q0, q1},   [r0], r1
-+        bne         1b
-+        bx          lr
-+endfunc
-+
-+
-+
-+.macro  transpose_16b_8x8   r0, r1, r2, r3, r4, r5, r6, r7
-+        vtrn.64         \r0, \r4
-+        vtrn.64         \r1, \r5
-+        vtrn.64         \r2, \r6
-+        vtrn.64         \r3, \r7
-+        vtrn.32         \r0, \r2
-+        vtrn.32         \r1, \r3
-+        vtrn.32         \r4, \r6
-+        vtrn.32         \r5, \r7
-+        vtrn.16         \r0, \r1
-+        vtrn.16         \r2, \r3
-+        vtrn.16         \r4, \r5
-+        vtrn.16         \r6, \r7
++        vqrshrn.s32 d28, q12, \shift
++        vqrshrn.s32 d29, q13, \shift
++        vqrshrn.s32 d30, q11, \shift
++        vqrshrn.s32 d31, q8, \shift
 +.endm
 +
-+// in 4 q regs
-+// output 8 d regs
-+.macro transpose_16b_4x4    r0, r1, r2, r3
-+        vtrn.32         \r0, \r2
-+        vtrn.32         \r1, \r3
-+        vtrn.16         \r0, \r1
-+        vtrn.16         \r2, \r3
-+.endm
++/* uses registers q8 - q11 for temp values */
++.macro tr4_shift shift
++        vmull.s16   q9, d29, d0[0]   // 83 * src1
++        vmull.s16   q8, d29, d0[1]   // 36 * src1
++        vshll.s16   q14, d28, #6     // 64 * src0
++        vshll.s16   q10, d30, #6     // 64 * src2
++        vmlal.s16   q9, d31, d0[1]   // 83 * src1 + 36 * src3  o0
++        vmlsl.s16   q8, d31, d0[0]   // 36 * src1 - 83 * src3  o1
++        vadd.s32    q11, q14, q10    // 64 * (src0 + src2)     e0
++        vsub.s32    q10, q14, q10    // 64 * (src0 - src2)     e1
++        vadd.s32    q14, q11, q9     // e0 + o0
++        vadd.s32    q15, q10, q8     // e1 + o1
++        vsub.s32    q8, q10, q8      // e1 - o1
++        vsub.s32    q9, q11, q9      // e0 - o0
 +
-+/* uses registers q2 - q9 for temp values */
-+/* TODO: reorder */
-+.macro tr4_luma_shift r0, r1, r2, r3, shift
-+        vaddl.s16   q5, \r0, \r2    // c0 = src0 + src2
-+        vaddl.s16   q2, \r2, \r3    // c1 = src2 + src3
-+        vsubl.s16   q4, \r0, \r3    // c2 = src0 - src3
-+        vmull.s16   q6, \r1, d0[0]  // c3 = 74 * src1
-+
-+        vaddl.s16   q7, \r0, \r3    // src0 + src3
-+        vsubw.s16   q7, q7, \r2     // src0 - src2 + src3
-+        vmul.s32    q7, q7, d0[0]   // dst2 = 74 * (src0 - src2 + src3)
-+
-+        vmul.s32    q8, q5, d0[1]   // 29 * c0
-+        vmul.s32    q9, q2, d1[0]   // 55 * c1
-+        vadd.s32    q8, q9          // 29 * c0 + 55 * c1
-+        vadd.s32    q8, q6          // dst0 = 29 * c0 + 55 * c1 + c3
-+
-+        vmul.s32    q2, q2, d0[1]   // 29 * c1
-+        vmul.s32    q9, q4, d1[0]   // 55 * c2
-+        vsub.s32    q9, q2          // 55 * c2 - 29 * c1
-+        vadd.s32    q9, q6          // dst1 = 55 * c2 - 29 * c1 + c3
-+
-+        vmul.s32    q5, q5, d1[0]   // 55 * c0
-+        vmul.s32    q4, q4, d0[1]   // 29 * c2
-+        vadd.s32    q5, q4          // 55 * c0 + 29 * c2
-+        vsub.s32    q5, q6          // dst3 = 55 * c0 + 29 * c2 - c3
-+
-+        vqrshrn.s32   \r0, q8, \shift
-+        vqrshrn.s32   \r1, q9, \shift
-+        vqrshrn.s32   \r2, q7, \shift
-+        vqrshrn.s32   \r3, q5, \shift
-+.endm
-+
-+/* uses registers q2 - q6 for temp values */
-+.macro tr4 r0, r1, r2, r3
-+        vmull.s16  q4, \r1, d0[0]   // 83 * src1
-+        vmull.s16  q6, \r1, d0[1]   // 36 * src1
-+        vshll.s16  q2, \r0, #6   // 64 * src0
-+        vshll.s16  q3, \r2, #6   // 64 * src2
-+        vadd.s32   q5, q2, q3    // 64 * (src0 + src2)     e0
-+        vsub.s32   q2, q2, q3    // 64 * (src0 - src2)     e1
-+        vmlal.s16  q4, \r3, d0[1]   // 83 * src1 + 36 * src3  o0
-+        vmlsl.s16  q6, \r3, d0[0]   // 36 * src1 - 83 * src3  o1
-+
-+        vsub.s32   q3, q5, q4    // e0 - o0
-+        vadd.s32   q4, q5, q4    // e0 + o0
-+        vadd.s32   q5, q2, q6    // e1 + o1
-+        vsub.s32   q6, q2, q6    // e1 - o1
-+.endm
-+
-+.macro tr4_shift r0, r1, r2, r3, shift
-+        vmull.s16  q4, \r1, d0[0]   // 83 * src1
-+        vmull.s16  q6, \r1, d0[1]   // 36 * src1
-+        vshll.s16  q2, \r0, #6   // 64 * src0
-+        vshll.s16  q3, \r2, #6   // 64 * src2
-+        vadd.s32   q5, q2, q3    // 64 * (src0 + src2)     e0
-+        vsub.s32   q2, q2, q3    // 64 * (src0 - src2)     e1
-+        vmlal.s16  q4, \r3, d0[1]   // 83 * src1 + 36 * src3  o0
-+        vmlsl.s16  q6, \r3, d0[0]   // 36 * src1 - 83 * src3  o1
-+
-+        vsub.s32   q3, q5, q4    // e0 - o0
-+        vadd.s32   q4, q5, q4    // e0 + o0
-+        vadd.s32   q5, q2, q6    // e1 + o1
-+        vsub.s32   q6, q2, q6    // e1 - o1
-+
-+        vqrshrn.s32   \r0, q4, \shift
-+        vqrshrn.s32   \r1, q5, \shift
-+        vqrshrn.s32   \r2, q6, \shift
-+        vqrshrn.s32   \r3, q3, \shift
++        vqrshrn.s32 d28, q14, \shift
++        vqrshrn.s32 d29, q15, \shift
++        vqrshrn.s32 d30, q8, \shift
++        vqrshrn.s32 d31, q9, \shift
 +.endm
 +
 +.macro tr8_process d0, d1, d2, d3, d4, d5, d6, d7,                         \
@@ -3830,6 +3158,11 @@ index 0000000000..2ebbf15d2b
 +        vst4.16    {\d4-\d7}, [r2 :128], r3
 +.endm
 +
++#define BIT_DEPTH 8
++#include "rpi_hevc_idct_fn_neon.S"
++
++.text
++
 +.align 4
 +tr4f:
 +.word 0x00240053  // 36 and d1[0] = 83
@@ -3843,9 +3176,6 @@ index 0000000000..2ebbf15d2b
 +.word 0x0039002b  // 57, d2[0] = 43
 +.word 0x00190009  // 25, d2[2] = 9
 +
-+#define BIT_DEPTH 8
-+#include "rpi_hevc_idct_fn_neon.S"
-+
 +#undef BIT_DEPTH
 +#define BIT_DEPTH 10
 +#include "rpi_hevc_idct_fn_neon.S"
@@ -3890,10 +3220,10 @@ index 0000000000..109fa98c29
 +}
 diff --git a/libavcodec/arm/rpi_hevcdsp_init_neon.c b/libavcodec/arm/rpi_hevcdsp_init_neon.c
 new file mode 100644
-index 0000000000..764647fed9
+index 0000000000..a721e392ab
 --- /dev/null
 +++ b/libavcodec/arm/rpi_hevcdsp_init_neon.c
-@@ -0,0 +1,473 @@
+@@ -0,0 +1,465 @@
 +/*
 + * Copyright (c) 2014 Seppo Tomperi <seppo.tomperi@vtt.fi>
 + *
@@ -3927,17 +3257,14 @@ index 0000000000..764647fed9
 +
 +void ff_hevc_rpi_v_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
 +void ff_hevc_rpi_h_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
-+void ff_hevc_rpi_v_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
-+void ff_hevc_rpi_h_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
 +
 +void ff_hevc_rpi_v_loop_filter_luma_neon_10(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
 +void ff_hevc_rpi_h_loop_filter_luma_neon_10(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
-+void ff_hevc_rpi_v_loop_filter_chroma_neon_10(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
-+void ff_hevc_rpi_h_loop_filter_chroma_neon_10(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
 +
++void ff_hevc_rpi_h_loop_filter_luma2_neon_8(uint8_t * _pix_r,
++                             unsigned int _stride, unsigned int beta, unsigned int tc2, unsigned int no_f);
 +void ff_hevc_rpi_v_loop_filter_luma2_neon_8(uint8_t * _pix_r,
-+                             unsigned int _stride, unsigned int beta, const int32_t tc[2],
-+                             const uint8_t no_p[2], const uint8_t no_q[2],
++                             unsigned int _stride, unsigned int beta, unsigned int tc2, unsigned int no_f,
 +                             uint8_t * _pix_l);
 +void ff_hevc_rpi_h_loop_filter_uv_neon_8(uint8_t * src, unsigned int stride, uint32_t tc4,
 +                             unsigned int no_f);
@@ -3945,9 +3272,10 @@ index 0000000000..764647fed9
 +                             uint8_t * src_l,
 +                             unsigned int no_f);
 +
++void ff_hevc_rpi_h_loop_filter_luma2_neon_10(uint8_t * _pix_r,
++                             unsigned int _stride, unsigned int beta, unsigned int tc2, unsigned int no_f);
 +void ff_hevc_rpi_v_loop_filter_luma2_neon_10(uint8_t * _pix_r,
-+                             unsigned int _stride, unsigned int beta, const int32_t tc[2],
-+                             const uint8_t no_p[2], const uint8_t no_q[2],
++                             unsigned int _stride, unsigned int beta, unsigned int tc2, unsigned int no_f,
 +                             uint8_t * _pix_l);
 +void ff_hevc_rpi_h_loop_filter_uv_neon_10(uint8_t * src, unsigned int stride, uint32_t tc4,
 +                             unsigned int no_f);
@@ -4222,7 +3550,7 @@ index 0000000000..764647fed9
 +
 +
 +
-+#if (2*MAX_PB_SIZE + FF_INPUT_BUFFER_PADDING_SIZE) != 160
++#if RPI_HEVC_SAO_BUF_STRIDE != 160
 +#error SAO edge src stride not 160 - value used in .S
 +#endif
 +
@@ -4233,10 +3561,7 @@ index 0000000000..764647fed9
 +        c->hevc_v_loop_filter_luma_c   = ff_hevc_rpi_v_loop_filter_luma_neon;
 +        c->hevc_h_loop_filter_luma     = ff_hevc_rpi_h_loop_filter_luma_neon;
 +        c->hevc_h_loop_filter_luma_c   = ff_hevc_rpi_h_loop_filter_luma_neon;
-+        c->hevc_v_loop_filter_chroma   = ff_hevc_rpi_v_loop_filter_chroma_neon;
-+        c->hevc_v_loop_filter_chroma_c = ff_hevc_rpi_v_loop_filter_chroma_neon;
-+        c->hevc_h_loop_filter_chroma   = ff_hevc_rpi_h_loop_filter_chroma_neon;
-+        c->hevc_h_loop_filter_chroma_c = ff_hevc_rpi_h_loop_filter_chroma_neon;
++        c->hevc_h_loop_filter_luma2    = ff_hevc_rpi_h_loop_filter_luma2_neon_8;
 +        c->hevc_v_loop_filter_luma2    = ff_hevc_rpi_v_loop_filter_luma2_neon_8;
 +        c->hevc_h_loop_filter_uv       = ff_hevc_rpi_h_loop_filter_uv_neon_8;
 +        c->hevc_v_loop_filter_uv2      = ff_hevc_rpi_v_loop_filter_uv2_neon_8;
@@ -4299,10 +3624,7 @@ index 0000000000..764647fed9
 +        c->hevc_v_loop_filter_luma_c   = ff_hevc_rpi_v_loop_filter_luma_neon_10;
 +        c->hevc_h_loop_filter_luma     = ff_hevc_rpi_h_loop_filter_luma_neon_10;
 +        c->hevc_h_loop_filter_luma_c   = ff_hevc_rpi_h_loop_filter_luma_neon_10;
-+        c->hevc_v_loop_filter_chroma   = ff_hevc_rpi_v_loop_filter_chroma_neon_10;
-+        c->hevc_v_loop_filter_chroma_c = ff_hevc_rpi_v_loop_filter_chroma_neon_10;
-+        c->hevc_h_loop_filter_chroma   = ff_hevc_rpi_h_loop_filter_chroma_neon_10;
-+        c->hevc_h_loop_filter_chroma_c = ff_hevc_rpi_h_loop_filter_chroma_neon_10;
++        c->hevc_h_loop_filter_luma2    = ff_hevc_rpi_h_loop_filter_luma2_neon_10;
 +        c->hevc_v_loop_filter_luma2    = ff_hevc_rpi_v_loop_filter_luma2_neon_10;
 +        c->hevc_h_loop_filter_uv       = ff_hevc_rpi_h_loop_filter_uv_neon_10;
 +        c->hevc_v_loop_filter_uv2      = ff_hevc_rpi_v_loop_filter_uv2_neon_10;
@@ -4369,13 +3691,15 @@ index 0000000000..764647fed9
 +}
 diff --git a/libavcodec/arm/rpi_hevcdsp_res16_neon.S b/libavcodec/arm/rpi_hevcdsp_res16_neon.S
 new file mode 100644
-index 0000000000..7dfcc2751a
+index 0000000000..f831e55a6d
 --- /dev/null
 +++ b/libavcodec/arm/rpi_hevcdsp_res16_neon.S
-@@ -0,0 +1,610 @@
+@@ -0,0 +1,591 @@
 +#include "libavutil/arm/asm.S"
 +#include "neon.S"
 +
++ .arch_extension mp @ enable PLDW
++
 +#define BIT_DEPTH 10
 +
 +.macro clip16_4 Q0, Q1, Q2, Q3, Q_MIN, Q_MAX
@@ -4390,228 +3714,214 @@ index 0000000000..7dfcc2751a
 +.endm
 +
 +@ add_residual4x4(
-+@  uint8_t *_dst,     [r0]
++@  uint16_t *_dst,    [r0]
 +@  int16_t *res,      [r1]
 +@  ptrdiff_t stride)  [r2]
 +
 +function JOIN(ff_hevc_rpi_add_residual_4x4_neon_, BIT_DEPTH), export=1
++        add         ip, r0, r2
 +        vld1.16     {q10, q11}, [r1]
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
-+        vld1.16     {d0}, [r0, :64], r2
-+        vld1.16     {d1}, [r0, :64], r2
-+        vld1.16     {d2}, [r0, :64], r2
-+        vld1.16     {d3}, [r0, :64], r2
-+        vmov.i64    q8,  #0
-+        vdup.i16    q9,  r3
++        lsl         r2, #1
++        vld1.16     {d0}, [r0 :64], r2
++        vld1.16     {d1}, [ip :64], r2
++        vld1.16     {d2}, [r0 :64]
++        vld1.16     {d3}, [ip :64]
++        sub         r0, r2
 +        vqadd.s16   q0,  q10
++        sub         ip, r2
 +        vqadd.s16   q1,  q11
-+        sub         r0,  r0,  r2, lsl #2
++        vmov.i16    q8,  #0
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
 +        vmax.s16    q0,  q0,  q8
 +        vmax.s16    q1,  q1,  q8
 +        vmin.s16    q0,  q0,  q9
 +        vmin.s16    q1,  q1,  q9
-+        vst1.16     {d0}, [r0, :64], r2
-+        vst1.16     {d1}, [r0, :64], r2
-+        vst1.16     {d2}, [r0, :64], r2
-+        vst1.16     {d3}, [r0, :64], r2
++        vst1.16     {d0}, [r0 :64], r2
++        vst1.16     {d1}, [ip :64], r2
++        vst1.16     {d2}, [r0 :64]
++        vst1.16     {d3}, [ip :64]
 +        bx          lr
 +
 +endfunc
 +
-+@ add_residual4x4(
-+@  uint8_t *_dst,     [r0]
++@ add_residual4x4_dc(
++@  uint16_t *_dst,    [r0]
 +@  ptrdiff_t stride,  [r1]
 +@  int dc)            [r2]
 +
 +function JOIN(ff_hevc_rpi_add_residual_4x4_dc_neon_, BIT_DEPTH), export=1
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
-+        vdup.i16    q9,  r3
-+        vld1.16     {d0}, [r0, :64], r1
-+        vld1.16     {d1}, [r0, :64], r1
++        add         ip, r0, r1
 +        vdup.16     q15, r2
-+        vld1.16     {d2}, [r0, :64], r1
-+        vld1.16     {d3}, [r0, :64], r1
-+        vmov.i64    q8,  #0
-+        vdup.i16    q9,  r3
++        lsl         r1, #1
++        vld1.16     {d0}, [r0 :64], r1
++        vld1.16     {d1}, [ip :64], r1
++        vld1.16     {d2}, [r0 :64]
++        vld1.16     {d3}, [ip :64]
++        sub         r0, r1
 +        vqadd.s16   q0,  q15
++        sub         ip, r1
 +        vqadd.s16   q1,  q15
-+        sub         r0,  r0,  r1, lsl #2
++        vmov.i16    q8,  #0
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
 +        vmax.s16    q0,  q0,  q8
 +        vmax.s16    q1,  q1,  q8
 +        vmin.s16    q0,  q0,  q9
 +        vmin.s16    q1,  q1,  q9
-+        vst1.16     {d0}, [r0, :64], r1
-+        vst1.16     {d1}, [r0, :64], r1
-+        vst1.16     {d2}, [r0, :64], r1
-+        vst1.16     {d3}, [r0, :64], r1
++        vst1.16     {d0}, [r0 :64], r1
++        vst1.16     {d1}, [ip :64], r1
++        vst1.16     {d2}, [r0 :64]
++        vst1.16     {d3}, [ip :64]
 +        bx          lr
 +
 +endfunc
 +
 +
 +@ add_residual8x8(
-+@  uint8_t *_dst,     [r0]
++@  uint16_t *_dst,    [r0]
 +@  int16_t *res,      [r1]
 +@  ptrdiff_t stride)  [r2]
 +
 +function JOIN(ff_hevc_rpi_add_residual_8x8_neon_, BIT_DEPTH), export=1
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
++        mov         r3, #8
 +        vmov.i64    q8,  #0
-+        vdup.i16    q9,  r3
-+        mov         r12, #2
++        add         ip, r0, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        lsl         r2, #1
 +1:
 +        vldm        r1!, {q10-q13}
-+        vld1.16     {q0}, [r0, :128], r2
-+        subs        r12, #1
-+        vld1.16     {q1}, [r0, :128], r2
++        vld1.16     {q0}, [r0 :128], r2
++        vld1.16     {q1}, [ip :128], r2
++        vld1.16     {q2}, [r0 :128]
++        vld1.16     {q3}, [ip :128]
++        sub         r0, r2
 +        vqadd.s16   q0,  q10
-+        vld1.16     {q2}, [r0, :128], r2
++        sub         ip, r2
 +        vqadd.s16   q1,  q11
-+        vld1.16     {q3}, [r0, :128], r2
++        subs        r3, #4
 +        vqadd.s16   q2,  q12
 +        vqadd.s16   q3,  q13
-+        sub         r0,  r0,  r2, lsl #2
-+        vmax.s16    q0,  q0,  q8
-+        vmax.s16    q1,  q1,  q8
-+        vmax.s16    q2,  q2,  q8
-+        vmax.s16    q3,  q3,  q8
-+        vmin.s16    q0,  q0,  q9
-+        vmin.s16    q1,  q1,  q9
-+        vst1.16     {q0}, [r0, :128], r2
-+        vmin.s16    q2,  q2,  q9
-+        vst1.16     {q1}, [r0, :128], r2
-+        vmin.s16    q3,  q3,  q9
-+        vst1.16     {q2}, [r0, :128], r2
-+        vst1.16     {q3}, [r0, :128], r2
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst1.16     {q0}, [r0 :128], r2
++        vst1.16     {q1}, [ip :128], r2
++        vst1.16     {q2}, [r0 :128], r2
++        vst1.16     {q3}, [ip :128], r2
 +        bne         1b
 +        bx          lr
 +
 +endfunc
 +
 +@ add_residual4x4_dc_c(
-+@  uint8_t *_dst,     [r0]
++@  uint16_t *_dst,    [r0]
 +@  ptrdiff_t stride,  [r1]
 +@  int dc_uv)         [r2]
 +
 +function JOIN(ff_hevc_rpi_add_residual_4x4_dc_c_neon_, BIT_DEPTH), export=1
-+        mov         r12, #1
++        mov         r3, #4
 +        vdup.32     q15, r2
 +        b           9f
 +endfunc
 +
 +@ add_residual8x8_dc(
-+@  uint8_t *_dst,     [r0]
++@  uint16_t *_dst,    [r0]
 +@  ptrdiff_t stride,  [r1]
 +@  int dc)            [r2]
 +
 +function JOIN(ff_hevc_rpi_add_residual_8x8_dc_neon_, BIT_DEPTH), export=1
-+        mov         r12, #2
 +        vdup.16     q15, r2
++        mov         r3, #8
 +9:
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
-+        vmov.i64    q8,  #0
-+        vdup.i16    q9,  r3
++        vmov.i16    q8,  #0
++        add         ip, r0, r1
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        lsl         r1, #1
 +1:
-+        vld1.16     {q0}, [r0, :128], r1
-+        subs        r12, #1
-+        vld1.16     {q1}, [r0, :128], r1
++        vld1.16     {q0}, [r0 :128], r1
++        vld1.16     {q1}, [ip :128], r1
++        vld1.16     {q2}, [r0 :128]
++        vld1.16     {q3}, [ip :128]
++        sub         r0, r1
 +        vqadd.s16   q0,  q15
-+        vld1.16     {q2}, [r0, :128], r1
++        sub         ip, r1
 +        vqadd.s16   q1,  q15
-+        vld1.16     {q3}, [r0, :128], r1
++        subs        r3, #4
 +        vqadd.s16   q2,  q15
 +        vqadd.s16   q3,  q15
-+        sub         r0,  r0,  r1, lsl #2
-+        vmax.s16    q0,  q8
-+        vmax.s16    q1,  q8
-+        vmax.s16    q2,  q8
-+        vmax.s16    q3,  q8
-+        vmin.s16    q0,  q9
-+        vmin.s16    q1,  q9
-+        vst1.16     {q0}, [r0, :128], r1
-+        vmin.s16    q2,  q9
-+        vst1.16     {q1}, [r0, :128], r1
-+        vmin.s16    q3,  q9
-+        vst1.16     {q2}, [r0, :128], r1
-+        vst1.16     {q3}, [r0, :128], r1
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst1.16     {q0}, [r0 :128], r1
++        vst1.16     {q1}, [ip :128], r1
++        vst1.16     {q2}, [r0 :128], r1
++        vst1.16     {q3}, [ip :128], r1
 +        bne         1b
 +        bx          lr
 +
 +endfunc
 +
 +@ add_residual16x16(
-+@  uint8_t *_dst,     [r0]
++@  uint16_t *_dst,    [r0]
 +@  int16_t *res,      [r1]
 +@  ptrdiff_t stride)  [r2]
 +
 +function JOIN(ff_hevc_rpi_add_residual_16x16_neon_, BIT_DEPTH), export=1
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
-+        vmov.i64    q8,  #0
-+        vdup.i16    q9,  r3
-+        mov         r12, #8
++        add         ip, r0, r2
++        vmov.i16    q8,  #0
++        lsl         r2, #1
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        mov         r3, #16
 +1:
 +        vldm        r1!, {q10-q13}
 +        @ For RPI Sand we could guarantee :256 but not for general
 +        @ non-RPI allocation. :128 is as good as we can claim
-+        vld1.16     {q0, q1}, [r0, :128], r2
-+        subs        r12, #1
-+        vld1.16     {q2, q3}, [r0, :128]
++        vld1.16     {q0, q1}, [r0 :128]
++        subs        r3, #2
++        vld1.16     {q2, q3}, [ip :128]
 +        vqadd.s16   q0,  q10
 +        vqadd.s16   q1,  q11
 +        vqadd.s16   q2,  q12
 +        vqadd.s16   q3,  q13
-+        sub         r0,  r2
-+        vmax.s16    q0,  q0,  q8
-+        vmax.s16    q1,  q1,  q8
-+        vmax.s16    q2,  q2,  q8
-+        vmax.s16    q3,  q3,  q8
-+        vmin.s16    q0,  q0,  q9
-+        vmin.s16    q1,  q1,  q9
-+        vmin.s16    q2,  q2,  q9
-+        vmin.s16    q3,  q3,  q9
-+        vst1.16     {q0, q1}, [r0, :128], r2
-+        vst1.16     {q2, q3}, [r0, :128], r2
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst1.16     {q0, q1}, [r0 :128], r2
++        vst1.16     {q2, q3}, [ip :128], r2
 +        bne         1b
 +        bx          lr
 +endfunc
 +
 +@ add_residual8x8_dc_c(
-+@  uint8_t *_dst,     [r0]
++@  uint16_t *_dst,    [r0]
 +@  ptrdiff_t stride,  [r1]
 +@  int dc_uv)         [r2]
 +
 +function JOIN(ff_hevc_rpi_add_residual_8x8_dc_c_neon_, BIT_DEPTH), export=1
-+        mov         r12, #4
++        mov         r3, #8
 +        vdup.32     q15, r2
 +        b           9f
 +endfunc
 +
 +@ add_residual16x16_dc(
-+@  uint8_t *_dst,     [r0]
++@  uint16_t *_dst,    [r0]
 +@  ptrdiff_t stride,  [r1]
 +@  int dc)            [r2]
 +
 +function JOIN(ff_hevc_rpi_add_residual_16x16_dc_neon_, BIT_DEPTH), export=1
 +        vdup.i16    q15, r2
-+        mov         r12, #8
++        mov         r3, #16
 +9:
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
-+        vmov.i64    q8,  #0
-+        vdup.i16    q9,  r3
++        vmov.i16    q8,  #0
++        add         ip, r0, r1
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        lsl         r1, #1
 +1:
 +        @ For RPI Sand we could guarantee :256 but not for general
 +        @ non-RPI allocation. :128 is as good as we can claim
-+        vld1.16     {q0, q1}, [r0, :128], r1
-+        subs        r12, #1
-+        vld1.16     {q2, q3}, [r0, :128]
++        vld1.16     {q0, q1}, [r0 :128]
++        subs        r3, #2
 +        vqadd.s16   q0,  q15
 +        vqadd.s16   q1,  q15
++        vld1.16     {q2, q3}, [ip :128]
 +        vqadd.s16   q2,  q15
 +        vqadd.s16   q3,  q15
-+        sub         r0,  r1
-+        clip16_4 q0, q1, q2, q3, q8, q9
-+        vst1.16     {q0, q1}, [r0, :128], r1
-+        vst1.16     {q2, q3}, [r0, :128], r1
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst1.16     {q0, q1}, [r0 :128], r1
++        vst1.16     {q2, q3}, [ip :128], r1
 +        bne         1b
 +        bx          lr
 +
@@ -4619,64 +3929,68 @@ index 0000000000..7dfcc2751a
 +
 +
 +@ add_residual32x32(
-+@  uint8_t *_dst,     [r0]
++@  uint16_t *_dst,    [r0]
 +@  int16_t *res,      [r1]
 +@  ptrdiff_t stride)  [r2]
 +
 +function JOIN(ff_hevc_rpi_add_residual_32x32_neon_, BIT_DEPTH), export=1
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
-+        vmov.i64    q8,  #0
-+        vdup.i16    q9,  r3
-+        mov         r12, #32
++        push        {lr}
++        mov         r3, #32
++        vmov.i16    q8,  #0
++        add         lr, r0, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        add         ip, r0, #32
 +1:
 +        vldm        r1!, {q10-q13}
 +        vldm        r0,  {q0-q3}
-+        subs        r12, #1
 +        vqadd.s16   q0,  q10
++          pldw        [lr]
 +        vqadd.s16   q1,  q11
++          add         lr, r2
 +        vqadd.s16   q2,  q12
++        subs        r3, #1
 +        vqadd.s16   q3,  q13
-+        clip16_4 q0, q1, q2, q3, q8, q9
-+        vstm        r0,  {q0-q3}
-+        add         r0,  r2
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst1.16     {q0-q1}, [r0], r2
++        vst1.16     {q2-q3}, [ip], r2
 +        bne         1b
-+        bx          lr
++        pop         {pc}
 +
 +endfunc
 +
-+@ add_residual8x8_dc_c(
-+@  uint8_t *_dst,     [r0]
++@ add_residual16x16_dc_c(
++@  uint16_t *_dst,    [r0]
 +@  ptrdiff_t stride,  [r1]
 +@  int dc_uv)         [r2]
 +
 +function JOIN(ff_hevc_rpi_add_residual_16x16_dc_c_neon_, BIT_DEPTH), export=1
-+        mov         r12, #16
++        mov         r3, #16
 +        vdup.32     q15, r2
 +        b           9f
 +endfunc
 +
 +@ add_residual32x32_dc(
-+@  uint8_t *_dst,     [r0]
++@  uint16_t *_dst,    [r0]
 +@  ptrdiff_t stride,  [r1]
 +@  int dc)            [r2]
 +
 +function JOIN(ff_hevc_rpi_add_residual_32x32_dc_neon_, BIT_DEPTH), export=1
-+        vdup.i16    q15, r2
-+        mov         r12, #32
++        vdup.16     q15, r2
++        mov         r3, #32
 +9:
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
-+        vmov.i64    q8,  #0
-+        vdup.i16    q9,  r3
++        vmov.i16    q8,  #0
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        add         ip, r0, #32
 +1:
 +        vldm        r0,  {q0-q3}
-+        subs        r12, #1
 +        vqadd.s16   q0,  q15
++        subs        r3, #1
 +        vqadd.s16   q1,  q15
 +        vqadd.s16   q2,  q15
 +        vqadd.s16   q3,  q15
-+        clip16_4 q0, q1, q2, q3, q8, q9
-+        vstm        r0,  {q0-q3}
-+        add         r0,  r1
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst1.16     {q0-q1}, [r0], r1
++        vst1.16     {q2-q3}, [ip], r1
 +        bne         1b
 +        bx          lr
 +
@@ -4686,188 +4000,878 @@ index 0000000000..7dfcc2751a
 +@ U add
 +
 +@ add_residual4x4_u(
-+@   uint8_t *_dst,        [r0]
++@   uint16_t *_dst,       [r0]
 +@   const int16_t *res,   [r1]
 +@   ptrdiff_t stride,     [r2]
 +@   int dc)               [r3]
 +
 +function JOIN(ff_hevc_rpi_add_residual_4x4_u_neon_, BIT_DEPTH), export=1
-+        vld1.16     {q10, q11}, [r1, :256]
 +        vdup.16     q15, r3
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
-+        vmov.i64    q8,  #0
-+        vdup.i16    q9,  r3
-+
-+        vld2.16     {d0, d2}, [r0, :128], r2
-+        vld2.16     {d1, d3}, [r0, :128], r2
-+        vld2.16     {d4, d6}, [r0, :128], r2
-+        vld2.16     {d5, d7}, [r0, :128], r2
++        add         ip, r0, r2
++        vld1.16     {q10, q11}, [r1 :256]
++        lsl         r2, #1
++        vld2.16     {d0, d2}, [r0 :128], r2
++        vld2.16     {d1, d3}, [ip :128], r2
++        vld2.16     {d4, d6}, [r0 :128]
++        vld2.16     {d5, d7}, [ip :128]
++        sub         r0, r2
++        vmov.i16    q8,  #0
++        sub         ip, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
 +
 +        vqadd.s16   q0,  q10
 +        vqadd.s16   q1,  q15
 +        vqadd.s16   q2,  q11
 +        vqadd.s16   q3,  q15
-+        sub         r0,  r0,  r2, lsl #2
-+        clip16_4 q0, q1, q2, q3, q8, q9
++        clip16_4    q0, q1, q2, q3, q8, q9
 +
-+        vst2.16     {d0, d2}, [r0, :128], r2
-+        vst2.16     {d1, d3}, [r0, :128], r2
-+        vst2.16     {d4, d6}, [r0, :128], r2
-+        vst2.16     {d5, d7}, [r0, :128]
++        vst2.16     {d0, d2}, [r0 :128], r2
++        vst2.16     {d1, d3}, [ip :128], r2
++        vst2.16     {d4, d6}, [r0 :128]
++        vst2.16     {d5, d7}, [ip :128]
 +        bx          lr
 +endfunc
 +
 +@ add_residual8x8_u(
-+@   uint8_t *_dst,        [r0]
++@   uint16_t *_dst,       [r0]
 +@   const int16_t *res,   [r1]
 +@   ptrdiff_t stride,     [r2]
 +@   int dc)               [r3]
 +
 +function JOIN(ff_hevc_rpi_add_residual_8x8_u_neon_, BIT_DEPTH), export=1
 +        vdup.16     q15, r3
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
-+        vmov.i64    q8,  #0
-+        mov         r12, #4
-+        vdup.i16    q9,  r3
++        mov         r3, #8
++        vmov.i16    q8,  #0
++        add         ip, r0, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        lsl         r2, #1
 +1:
-+        vld2.16     {q0, q1}, [r0, :256], r2
-+        vld2.16     {q2, q3}, [r0, :256]
-+        vld1.16     {q10, q11}, [r1, :256]!
-+        subs        r12, #1
++        vld2.16     {q0, q1}, [r0 :256]
++        subs        r3, #2
++        vld2.16     {q2, q3}, [ip :256]
++        vld1.16     {q10, q11}, [r1 :256]!
 +        vqadd.s16   q0,  q10
 +        vqadd.s16   q1,  q15
 +        vqadd.s16   q2,  q11
 +        vqadd.s16   q3,  q15
-+        sub         r0,  r2
-+        clip16_4 q0, q1, q2, q3, q8, q9
-+        vst2.16     {q0, q1}, [r0, :256], r2
-+        vst2.16     {q2, q3}, [r0, :256], r2
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst2.16     {q0, q1}, [r0 :256], r2
++        vst2.16     {q2, q3}, [ip :256], r2
 +        bne         1b
 +        bx          lr
 +endfunc
 +
 +@ add_residual16x16_u(
-+@   uint8_t *_dst,        [r0]
++@   uint16_t *_dst,       [r0]
 +@   const int16_t *res,   [r1]
 +@   ptrdiff_t stride,     [r2]
 +@   int dc)               [r3]
 +
 +function JOIN(ff_hevc_rpi_add_residual_16x16_u_neon_, BIT_DEPTH), export=1
++        push        {lr}
 +        vdup.16     q15, r3
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
-+        vmov.i64    q8,  #0
-+        mov         r12, #16
-+        vdup.i16    q9,  r3
-+        sub         r2,  #32
++        mov         r3, #16
++        vmov.i16    q8,  #0
++        add         lr, r0, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        add         ip, r0, #32
 +1:
-+        vld2.16     {q0, q1}, [r0, :256]!
-+        vld2.16     {q2, q3}, [r0, :256]
-+        vld1.16     {q10, q11}, [r1, :256]!
-+        subs        r12, #1
++        vld2.16     {q0, q1}, [r0 :256]
++        vld2.16     {q2, q3}, [ip :256]
++        vld1.16     {q10, q11}, [r1 :256]!
 +        vqadd.s16   q0,  q10
++          pldw        [lr]
 +        vqadd.s16   q1,  q15
++          add         lr, r2
 +        vqadd.s16   q2,  q11
++        subs        r3, #1
 +        vqadd.s16   q3,  q15
-+        sub         r0,  #32
-+        clip16_4 q0, q1, q2, q3, q8, q9
-+        vst2.16     {q0, q1}, [r0, :256]!
-+        vst2.16     {q2, q3}, [r0, :256], r2
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst2.16     {q0, q1}, [r0 :256], r2
++        vst2.16     {q2, q3}, [ip :256], r2
++        bne         1b
++        pop         {pc}
++endfunc
++
++@ ============================================================================
++@ V add
++
++@ add_residual4x4_v(
++@   uint16_t *_dst,       [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride,     [r2]
++@   int dc)               [r3]
++
++function JOIN(ff_hevc_rpi_add_residual_4x4_v_neon_, BIT_DEPTH), export=1
++        vdup.16     q15, r3
++        add         ip, r0, r2
++        vld1.16     {q10, q11}, [r1 :256]
++        lsl         r2, #1
++        vld2.16     {d0, d2}, [r0 :128], r2
++        vld2.16     {d1, d3}, [ip :128], r2
++        vld2.16     {d4, d6}, [r0 :128]
++        vld2.16     {d5, d7}, [ip :128]
++        sub         r0, r2
++        vmov.i16    q8,  #0
++        sub         ip, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++
++        vqadd.s16   q0,  q15
++        vqadd.s16   q1,  q10
++        vqadd.s16   q2,  q15
++        vqadd.s16   q3,  q11
++        clip16_4    q0, q1, q2, q3, q8, q9
++
++        vst2.16     {d0, d2}, [r0 :128], r2
++        vst2.16     {d1, d3}, [ip :128], r2
++        vst2.16     {d4, d6}, [r0 :128]
++        vst2.16     {d5, d7}, [ip :128]
++        bx          lr
++endfunc
++
++@ add_residual8x8_v(
++@   uint16_t *_dst,       [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride,     [r2]
++@   int dc)               [r3]
++
++function JOIN(ff_hevc_rpi_add_residual_8x8_v_neon_, BIT_DEPTH), export=1
++        vdup.16     q15, r3
++        mov         r3, #8
++        vmov.i16    q8,  #0
++        add         ip, r0, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        lsl         r2, #1
++1:
++        vld2.16     {q0, q1}, [r0 :256]
++        subs        r3, #2
++        vld2.16     {q2, q3}, [ip :256]
++        vld1.16     {q10, q11}, [r1 :256]!
++        vqadd.s16   q0,  q15
++        vqadd.s16   q1,  q10
++        vqadd.s16   q2,  q15
++        vqadd.s16   q3,  q11
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst2.16     {q0, q1}, [r0 :256], r2
++        vst2.16     {q2, q3}, [ip :256], r2
 +        bne         1b
 +        bx          lr
 +endfunc
 +
++@ add_residual16x16_v(
++@   uint16_t *_dst,       [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride,     [r2]
++@   int dc)               [r3]
++
++function JOIN(ff_hevc_rpi_add_residual_16x16_v_neon_, BIT_DEPTH), export=1
++        push        {lr}
++        vdup.16     q15, r3
++        mov         r3, #16
++        vmov.i16    q8,  #0
++        add         lr, r0, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        add         ip, r0, #32
++1:
++        vld2.16     {q0, q1}, [r0 :256]
++        vld2.16     {q2, q3}, [ip :256]
++        vld1.16     {q10, q11}, [r1 :256]!
++        vqadd.s16   q0,  q15
++          pldw        [lr]
++        vqadd.s16   q1,  q10
++          add         lr, r2
++        vqadd.s16   q2,  q15
++        subs        r3, #1
++        vqadd.s16   q3,  q11
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst2.16     {q0, q1}, [r0 :256], r2
++        vst2.16     {q2, q3}, [ip :256], r2
++        bne         1b
++        pop         {pc}
++endfunc
++
++@ ============================================================================
++@ U & V add
++
++@ add_residual4x4_c(
++@   uint16_t *_dst,       [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride)     [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_4x4_c_neon_, BIT_DEPTH), export=1
++        vmov.i16    q8,  #0
++        add         ip, r0, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        lsl         r2, #1
++        vldm        r1, {q10-q13}
++        vld2.16     {d0, d2}, [r0 :128], r2
++        vld2.16     {d1, d3}, [ip :128], r2
++        vld2.16     {d4, d6}, [r0 :128]
++        vld2.16     {d5, d7}, [ip :128]
++
++        sub         r0, r2
++        vqadd.s16   q0,  q10
++        sub         ip, r2
++        vqadd.s16   q1,  q12
++        vqadd.s16   q2,  q11
++        vqadd.s16   q3,  q13
++        clip16_4    q0, q1, q2, q3, q8, q9
++
++        vst2.16     {d0, d2}, [r0 :128], r2
++        vst2.16     {d1, d3}, [ip :128], r2
++        vst2.16     {d4, d6}, [r0 :128]
++        vst2.16     {d5, d7}, [ip :128]
++        bx          lr
++endfunc
++
++@ add_residual8x8_c(
++@   uint16_t *_dst,       [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride)     [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_8x8_c_neon_, BIT_DEPTH), export=1
++        push        {lr}
++        add         ip, r0, r2
++        lsl         r2, #1
++        vmov.i16    q8,  #0
++        add         r3, r1, #(8*8*2)  @ Offset to V
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        mov         lr, #8
++1:
++        vld1.16     {q10, q11}, [r1 :256]!
++        subs        lr, #2
++        vld2.16     {q0, q1}, [r0 :256]
++        vld2.16     {q2, q3}, [ip :256]
++        vld1.16     {q12, q13}, [r3 :256]!
++        vqadd.s16   q0,  q10
++        vqadd.s16   q1,  q12
++        vqadd.s16   q2,  q11
++        vqadd.s16   q3,  q13
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst2.16     {q0, q1}, [r0 :256], r2
++        vst2.16     {q2, q3}, [ip :256], r2
++        bne         1b
++        pop         {pc}
++endfunc
++
++@ add_residual16x16_c(
++@   uint16_t *_dst,       [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride)     [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_16x16_c_neon_, BIT_DEPTH), export=1
++        push        {r4, lr}
++        vmov.i16    q8,  #0
++        add         r3,  r1, #(16*16*2)  @ Offset to V
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        add         ip, r0, #32
++        add         r4, r0, r2
++        mov         lr, #16
++1:
++        vld2.16     {q0, q1}, [r0 :256]
++        vld2.16     {q2, q3}, [ip :256]
++        vld1.16     {q10, q11}, [r1 :256]!
++        vld1.16     {q12, q13}, [r3 :256]!
++        vqadd.s16   q0,  q10
++          pldw        [r4]
++        vqadd.s16   q1,  q12
++          add         r4, r2
++        vqadd.s16   q2,  q11
++        subs        lr, #1
++        vqadd.s16   q3,  q13
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst2.16     {q0, q1}, [r0 :256], r2
++        vst2.16     {q2, q3}, [ip :256], r2
++        bne         1b
++        pop         {r4,pc}
++endfunc
++
+diff --git a/libavcodec/arm/rpi_hevcdsp_res8_neon.S b/libavcodec/arm/rpi_hevcdsp_res8_neon.S
+new file mode 100644
+index 0000000000..ea3b3faf6f
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcdsp_res8_neon.S
+@@ -0,0 +1,712 @@
++#include "libavutil/arm/asm.S"
++#include "neon.S"
++
++ .arch_extension mp @ enable PLDW
++
++@ General notes:
++@
++@ Residual is generally only guaranteed to be clipped to 16 bits.
++@ This means that we do need to do vmovl, vqadd, vqmovun
++@ rather than vaddw, vqmovun (if we were clipped to 15 then we could get away
++@ with this).
++@
++@ There is an exception for the DC case because its transform is guaranteed
++@ to be small enough that overflow cannot occur during the first add.
++
++@ ============================================================================
++@ Y add
++
++function ff_hevc_rpi_add_residual_4x4_neon_8, export=1
++        add         ip, r0, r2
++        vld1.16     {q0, q1}, [r1]
++        lsl         r2, #1
++        vld1.32     d4[0], [r0], r2
++        rsb         r3, r2, #0
++        vld1.32     d4[1], [ip], r2
++        vld1.32     d5[0], [r0], r3
++        vld1.32     d5[1], [ip], r3
++        vmovl.u8    q8, d4
++        vmovl.u8    q9, d5
++        vqadd.s16   q0, q8
++        vqadd.s16   q1, q9
++        vqmovun.s16 d0, q0
++        vqmovun.s16 d1, q1
++        vst1.32     d0[0], [r0], r2
++        vst1.32     d0[1], [ip], r2
++        vst1.32     d1[0], [r0]
++        vst1.32     d1[1], [ip]
++        bx          lr
++endfunc
++
++function ff_hevc_rpi_add_residual_8x8_neon_8, export=1
++        push        {r4, lr}
++        vld1.16     {q0, q1}, [r1]!
++        add         ip, r0, r2
++        vld1.8      {d6}, [r0]
++        add         r4, r0, r2, lsl #1
++        vld1.8      {d7}, [ip]
++        add         lr, ip, r2, lsl #1
++        lsl         r2, #1
++        mov         r3, #8-2
++        vmovl.u8    q2, d6
++        vmovl.u8    q3, d7
++        vqadd.s16   q2, q0
++        vqadd.s16   q3, q1
++1:
++          vld1.16     {q0, q1}, [r1]!
++        subs        r3, #2
++        vqmovun.s16 d4, q2
++        vqmovun.s16 d5, q3
++          vld1.8      {d6}, [r4], r2
++          vld1.8      {d7}, [lr], r2
++        vst1.8      {d4}, [r0], r2
++        vst1.8      {d5}, [ip], r2
++          vmovl.u8    q2, d6
++            pldw        [r4]
++          vmovl.u8    q3, d7
++          vqadd.s16   q2, q0
++          vqadd.s16   q3, q1
++        bne         1b
++
++          vqmovun.s16 d4, q2
++          vqmovun.s16 d5, q3
++          vst1.8      {d4}, [r0]
++          vst1.8      {d5}, [ip]
++          pop         {r4, pc}
++endfunc
++
++function ff_hevc_rpi_add_residual_16x16_neon_8, export=1
++        vld1.16     {q0, q1}, [r1]!
++        add         ip, r0, r2
++        vld1.8      {q3}, [r0]
++        mov         r3, #16-1
++        vmovl.u8    q2, d6
++        vmovl.u8    q3, d7
++        vqadd.s16   q2, q0
++        vqadd.s16   q3, q1
++1:
++          vld1.16     {q0, q1}, [r1]!
++        subs        r3, #1
++        vqmovun.s16 d4, q2
++        vqmovun.s16 d5, q3
++          vld1.8      {q3}, [ip], r2
++        vst1.8      {q2}, [r0], r2
++          vmovl.u8    q2, d6
++            pldw        [ip]
++          vmovl.u8    q3, d7
++          vqadd.s16   q2, q0
++          vqadd.s16   q3, q1
++        bne         1b
++
++          vqmovun.s16 d4, q2
++          vqmovun.s16 d5, q3
++          vst1.8      {q2}, [r0]
++          bx          lr
++endfunc
++
++function ff_hevc_rpi_add_residual_32x32_neon_8, export=1
++        vldm        r1!, {q0-q3}
++        vld1.8      {q8, q9}, [r0]
++        add         ip, r0, r2
++        vmovl.u8    q10, d16
++        mov         r3, #32-1
++        vmovl.u8    q11, d17
++        vmovl.u8    q12, d18
++        vmovl.u8    q13, d19
++        vqadd.s16   q10, q0
++        vqadd.s16   q11, q1
++        vqadd.s16   q12, q2
++        vqadd.s16   q13, q3
++1:
++          vldm        r1!, {q0-q3}
++        vqmovun.s16 d20, q10
++        vqmovun.s16 d21, q11
++        vqmovun.s16 d22, q12
++        vqmovun.s16 d23, q13
++          vld1.8      {q8, q9}, [ip], r2
++        subs        r3, #1
++        vst1.8      {q10, q11}, [r0], r2
++          vmovl.u8    q10, d16
++            pldw        [ip]
++          vmovl.u8    q11, d17
++          vmovl.u8    q12, d18
++          vmovl.u8    q13, d19
++          vqadd.s16   q10, q0
++          vqadd.s16   q11, q1
++          vqadd.s16   q12, q2
++          vqadd.s16   q13, q3
++        bne     1b
++
++          vqmovun.s16 d20, q10
++          vqmovun.s16 d21, q11
++          vqmovun.s16 d22, q12
++          vqmovun.s16 d23, q13
++          vst1.8      {q10, q11}, [r0]
++          bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_add_residual_4x4_dc_neon_8(
++@   uint8_t * dst,              // [r0]
++@   unsigned int stride,        // [r1]
++@   int dc)                     // [r2]
++
++function ff_hevc_rpi_add_residual_4x4_dc_neon_8, export=1
++        add         ip, r0, r1
++        vdup.16     q15, r2
++        lsl         r1, #1
++        vld1.32     d4[0], [r0], r1
++        rsb         r3, r1, #0
++        vld1.32     d4[1], [ip], r1
++        vld1.32     d5[0], [r0], r3
++        vld1.32     d5[1], [ip], r3
++        vaddw.u8    q0, q15, d4
++        vaddw.u8    q1, q15, d5
++        vqmovun.s16 d0, q0
++        vqmovun.s16 d1, q1
++        vst1.32     d0[0], [r0], r1
++        vst1.32     d0[1], [ip], r1
++        vst1.32     d1[0], [r0]
++        vst1.32     d1[1], [ip]
++        bx          lr
++endfunc
++
++@ ============================================================================
++@ DC Y or C add
++
++@ ff_hevc_rpi_add_residual_4x4_dc_c_neon_8(
++@   uint8_t * dst,              // [r0]
++@   unsigned int stride,        // [r1]
++@   int dc)                     // [r2]
++
++function ff_hevc_rpi_add_residual_4x4_dc_c_neon_8, export=1
++        mov         r3,  #4-2
++        vdup.32     q15, r2
++        b           1f
++endfunc
++
++@ ff_hevc_rpi_add_residual_8x8_dc_neon_8(
++@   uint8_t * dst,              // [r0]
++@   unsigned int stride,        // [r1]
++@   int dc)                     // [r2]
++
++function ff_hevc_rpi_add_residual_8x8_dc_neon_8, export=1
++        vdup.16     q15, r2
++        mov         r3, #8-2
++1:      vld1.8      d16, [r0]
++        add         ip, r0, r1
++        push        {r4, lr}
++        vld1.8      d17, [ip]
++        add         r4, r0, r1, lsl #1
++        vaddw.u8    q0, q15, d16
++        lsl         r1, #1
++        vaddw.u8    q1, q15, d17
++        add         lr, ip, r1
++1:
++          vld1.8      {d16}, [r4], r1
++          vld1.8      {d17}, [lr], r1
++        subs        r3, #2
++        vqmovun.s16 d4, q0
++        vqmovun.s16 d5, q1
++          vaddw.u8    q0, q15, d16
++          vaddw.u8    q1, q15, d17
++        vst1.8      {d4}, [r0], r1
++        vst1.8      {d5}, [ip], r1
++        bne         1b
++
++          vqmovun.s16 d4, q0
++          vqmovun.s16 d5, q1
++          vst1.8      {d4}, [r0]
++          vst1.8      {d5}, [ip]
++          pop         {r4, pc}
++endfunc
++
++
++@ ff_hevc_rpi_add_residual_8x8_dc_c_neon_8(
++@   uint8_t * dst,              // [r0]
++@   unsigned int stride,        // [r1]
++@   int dc)                     // [r2]
++
++function ff_hevc_rpi_add_residual_8x8_dc_c_neon_8, export=1
++        mov         r3,  #8-1
++        vdup.32     q15, r2
++        b           1f
++endfunc
++
++@ ff_hevc_rpi_add_residual_16x16_dc_neon_8(
++@   uint8_t * dst,              // [r0]
++@   unsigned int stride,        // [r1]
++@   int dc)                     // [r2]
++
++function ff_hevc_rpi_add_residual_16x16_dc_neon_8, export=1
++        vdup.16     q15, r2
++        mov         r3,  #16-1
++1:      vld1.8      {q8}, [r0]
++        add         ip, r0, r1
++        vaddw.u8    q0, q15, d16
++        vaddw.u8    q1, q15, d17
++1:
++          vld1.8      {q8}, [ip], r1
++        subs        r3, #1
++        vqmovun.s16 d4, q0
++        vqmovun.s16 d5, q1
++          vaddw.u8    q0, q15, d16
++          vaddw.u8    q1, q15, d17
++        vst1.8      {q2}, [r0], r1
++        bne         1b
++
++          vqmovun.s16 d4, q0
++          vqmovun.s16 d5, q1
++          vst1.8      {q2}, [r0]
++          bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_add_residual_16x16_dc_c_neon_8(
++@   uint8_t * dst,              // [r0]
++@   unsigned int stride,        // [r1]
++@   int dc)                     // [r2]
++
++function ff_hevc_rpi_add_residual_16x16_dc_c_neon_8, export=1
++        mov         r3,  #16-1
++        vdup.32     q15, r2
++        b           1f
++endfunc
++
++@ ff_hevc_rpi_add_residual_32x32_dc_neon_8(
++@   uint8_t * dst,              // [r0]
++@   unsigned int stride,        // [r1]
++@   int dc)                     // [r2]
++
++function ff_hevc_rpi_add_residual_32x32_dc_neon_8, export=1
++        vdup.16     q15, r2
++        mov         r3, #32-1
++1:      vld1.8      {q8, q9}, [r0]
++        add         ip, r0, r1
++        vaddw.u8    q0, q15, d16
++        vaddw.u8    q1, q15, d17
++        vaddw.u8    q2, q15, d18
++        vaddw.u8    q3, q15, d19
++1:
++        vqmovun.s16 d20, q0
++        vqmovun.s16 d21, q1
++        vqmovun.s16 d22, q2
++        vqmovun.s16 d23, q3
++          vld1.8      {q8, q9}, [ip], r1
++        subs        r3, #1
++          vaddw.u8    q0, q15, d16
++          vaddw.u8    q1, q15, d17
++          vaddw.u8    q2, q15, d18
++          vaddw.u8    q3, q15, d19
++        vst1.8      {q10, q11}, [r0], r1
++        bne     1b
++
++          vqmovun.s16 d20, q0
++          vqmovun.s16 d21, q1
++          vqmovun.s16 d22, q2
++          vqmovun.s16 d23, q3
++          vst1.8      {q10, q11}, [r0]
++          bx          lr
++endfunc
++
++@ ============================================================================
++@ U add
++
++@ add_residual4x4_c(
++@   uint8_t *_dst,        [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride,     [r2]
++@   int dc_v)             [r3]
++
++function ff_hevc_rpi_add_residual_4x4_u_neon_8, export=1
++        add         ip, r0, r2
++        vld1.16     {q0, q1}, [r1]
++        lsl         r2, #1
++        vld1.8      {d16}, [r0 :64], r2
++        vld1.8      {d17}, [ip :64], r2
++        vld1.8      {d18}, [r0 :64]
++        sub         r0, r2
++        vld1.8      {d19}, [ip :64]
++        sub         ip, r2
++        vdup.16     q2, r3
++        vdup.16     q3, r3
++        vmovl.u8    q10, d16
++        vmovl.u8    q11, d17
++        vmovl.u8    q12, d18
++        vmovl.u8    q13, d19
++        vzip.16     q0, q2
++        vzip.16     q1, q3
++        vqadd.s16   q0,  q10
++        vqadd.s16   q2,  q11
++        vqadd.s16   q1,  q12
++        vqadd.s16   q3,  q13
++        vqmovun.s16 d0,  q0
++        vqmovun.s16 d1,  q2
++        vqmovun.s16 d2,  q1
++        vqmovun.s16 d3,  q3
++        vst1.8      {d0}, [r0 :64], r2
++        vst1.8      {d1}, [ip :64], r2
++        vst1.8      {d2}, [r0 :64]
++        vst1.8      {d3}, [ip :64]
++        bx          lr
++endfunc
++
++@ add_residual8x8_c(
++@   uint8_t *_dst,        [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride)     [r2]
++@   int dc_v)             [r3]
++
++function ff_hevc_rpi_add_residual_8x8_u_neon_8, export=1
++        vdup.16     q15, r3
++        add         ip, r0, r2
++        push        {r4, lr}
++        vld2.8      {d16, d17}, [r0 :128]
++        lsl         r2, #1
++        vld2.8      {d18, d19}, [ip :128]
++        mov         r3, #8-2
++        vld1.16     {q0, q1}, [r1 :256]!
++        add         r4, r0, r2
++        vmovl.u8    q10, d16
++        add         lr, ip, r2
++        vmovl.u8    q11, d18
++        vqadd.s16   q0,  q10
++        vaddw.u8    q2,  q15, d17
++        vqadd.s16   q1,  q11
++        vaddw.u8    q3,  q15, d19
++1:
++        vqmovun.s16 d20,  q0
++        vqmovun.s16 d21,  q2
++          vld2.8      {d16, d17}, [r4 :128], r2
++        subs        r3, #2
++        vqmovun.s16 d22,  q1
++        vqmovun.s16 d23,  q3
++        vst2.8      {d20, d21}, [r0 :128], r2
++          vld2.8      {d18, d19}, [lr :128], r2
++        vst2.8      {d22, d23}, [ip :128], r2
++          vld1.16     {q0, q1}, [r1 :256]!
++          vmovl.u8    q10, d16
++          vmovl.u8    q11, d18
++          vqadd.s16   q0,  q10
++          vaddw.u8    q2,  q15, d17
++          vqadd.s16   q1,  q11
++          vaddw.u8    q3,  q15, d19
++        bne         1b
++
++          vqmovun.s16 d20,  q0
++          vqmovun.s16 d21,  q2
++          vqmovun.s16 d22,  q1
++          vqmovun.s16 d23,  q3
++          vst2.8      {d20, d21}, [r0 :128]
++          vst2.8      {d22, d23}, [ip :128]
++          pop         {r4, pc}
++endfunc
++
++@ add_residual16x16_u(
++@   uint8_t *_dst,        [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride)     [r2]
++@   int dc_v)             [r3]
++
++function ff_hevc_rpi_add_residual_16x16_u_neon_8, export=1
++        vdup.16     q15, r3
++        add         ip, r0, r2
++        vld2.8      {q8, q9}, [r0 :256]
++        mov         r3, #16-1
++        vld1.16     {q0, q1}, [r1 :256]!
++        vmovl.u8    q11, d16
++        vmovl.u8    q12, d17
++        vqadd.s16   q0,  q11
++        vaddw.u8    q11, q15, d18
++        vqadd.s16   q1,  q12
++        vaddw.u8    q12, q15, d19
++1:
++          vld2.8      {q8, q9}, [ip :256], r2
++        subs        r3, #1
++        vqmovun.s16 d20, q0
++        vqmovun.s16 d22, q11
++        vqmovun.s16 d21, q1
++        vqmovun.s16 d23, q12
++          vld1.16     {q0, q1}, [r1 :256]!
++        vst2.8      {q10, q11}, [r0 :256], r2
++          vmovl.u8    q11, d16
++            pldw        [ip]
++          vmovl.u8    q12, d17
++          vqadd.s16   q0,  q11
++          vaddw.u8    q11, q15, d18
++          vqadd.s16   q1,  q12
++          vaddw.u8    q12, q15, d19
++        bne         1b
++
++          vqmovun.s16 d20, q0
++          vqmovun.s16 d22, q11
++          vqmovun.s16 d21, q1
++          vqmovun.s16 d23, q12
++          vst2.8      {q10, q11}, [r0 :256]
++          bx          lr
++endfunc
++
 +@ ============================================================================
 +@ V add
 +
 +@ add_residual4x4_v(
 +@   uint8_t *_dst,        [r0]
 +@   const int16_t *res,   [r1]
-+@   ptrdiff_t stride,     [r2]
-+@   int dc)               [r3]
++@   ptrdiff_t stride)     [r2]
 +
-+function JOIN(ff_hevc_rpi_add_residual_4x4_v_neon_, BIT_DEPTH), export=1
-+        vld1.16     {q10, q11}, [r1, :256]
-+        vdup.16     q15, r3
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
-+        vmov.i64    q8,  #0
-+        vdup.i16    q9,  r3
-+
-+        vld2.16     {d0, d2}, [r0, :128], r2
-+        vld2.16     {d1, d3}, [r0, :128], r2
-+        vld2.16     {d4, d6}, [r0, :128], r2
-+        vld2.16     {d5, d7}, [r0, :128], r2
-+
-+        vqadd.s16   q0,  q15
-+        vqadd.s16   q1,  q10
-+        vqadd.s16   q2,  q15
-+        vqadd.s16   q3,  q11
-+        sub         r0,  r0,  r2, lsl #2
-+        clip16_4 q0, q1, q2, q3, q8, q9
-+
-+        vst2.16     {d0, d2}, [r0, :128], r2
-+        vst2.16     {d1, d3}, [r0, :128], r2
-+        vst2.16     {d4, d6}, [r0, :128], r2
-+        vst2.16     {d5, d7}, [r0, :128]
++function ff_hevc_rpi_add_residual_4x4_v_neon_8, export=1
++        add         ip, r0, r2
++        vld1.16     {q2, q3}, [r1]
++        lsl         r2, #1
++        vld1.8      {d16}, [r0 :64], r2
++        vld1.8      {d17}, [ip :64], r2
++        vld1.8      {d18}, [r0 :64]
++        sub         r0, r2
++        vld1.8      {d19}, [ip :64]
++        sub         ip, r2
++        vdup.16     q0, r3
++        vdup.16     q1, r3
++        vmovl.u8    q10, d16
++        vmovl.u8    q11, d17
++        vmovl.u8    q12, d18
++        vmovl.u8    q13, d19
++        vzip.16     q0, q2
++        vzip.16     q1, q3
++        vqadd.s16   q0,  q10
++        vqadd.s16   q2,  q11
++        vqadd.s16   q1,  q12
++        vqadd.s16   q3,  q13
++        vqmovun.s16 d0,  q0
++        vqmovun.s16 d1,  q2
++        vqmovun.s16 d2,  q1
++        vqmovun.s16 d3,  q3
++        vst1.8      {d0}, [r0 :64], r2
++        vst1.8      {d1}, [ip :64], r2
++        vst1.8      {d2}, [r0 :64]
++        vst1.8      {d3}, [ip :64]
 +        bx          lr
 +endfunc
 +
 +@ add_residual8x8_v(
 +@   uint8_t *_dst,        [r0]
 +@   const int16_t *res,   [r1]
-+@   ptrdiff_t stride,     [r2]
-+@   int dc)               [r3]
++@   ptrdiff_t stride)     [r2]
 +
-+function JOIN(ff_hevc_rpi_add_residual_8x8_v_neon_, BIT_DEPTH), export=1
++function ff_hevc_rpi_add_residual_8x8_v_neon_8, export=1
 +        vdup.16     q15, r3
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
-+        vmov.i64    q8,  #0
-+        mov         r12, #4
-+        vdup.i16    q9,  r3
++        add         ip, r0, r2
++        push        {r4, lr}
++        vld2.8      {d16, d17}, [r0 :128]
++        lsl         r2, #1
++        vld2.8      {d18, d19}, [ip :128]
++        mov         r3, #8-2
++        vld1.16     {q0, q1}, [r1 :256]!
++        add         r4, r0, r2
++        vmovl.u8    q10, d17
++        add         lr, ip, r2
++        vmovl.u8    q11, d19
++        vqadd.s16   q0,  q10
++        vaddw.u8    q2,  q15, d16
++        vqadd.s16   q1,  q11
++        vaddw.u8    q3,  q15, d18
 +1:
-+        vld2.16     {q0, q1}, [r0, :256], r2
-+        vld2.16     {q2, q3}, [r0, :256]
-+        vld1.16     {q10, q11}, [r1, :256]!
-+        subs        r12, #1
-+        vqadd.s16   q0,  q15
-+        vqadd.s16   q1,  q10
-+        vqadd.s16   q2,  q15
-+        vqadd.s16   q3,  q11
-+        sub         r0,  r2
-+        clip16_4 q0, q1, q2, q3, q8, q9
-+        vst2.16     {q0, q1}, [r0, :256], r2
-+        vst2.16     {q2, q3}, [r0, :256], r2
++        vqmovun.s16 d20,  q2
++        vqmovun.s16 d21,  q0
++          vld2.8      {d16, d17}, [r4 :128], r2
++        subs        r3, #2
++        vqmovun.s16 d22,  q3
++        vqmovun.s16 d23,  q1
++        vst2.8      {d20, d21}, [r0 :128], r2
++          vld2.8      {d18, d19}, [lr :128], r2
++        vst2.8      {d22, d23}, [ip :128], r2
++          vld1.16     {q0, q1}, [r1 :256]!
++          vmovl.u8    q10, d17
++          vmovl.u8    q11, d19
++          vqadd.s16   q0,  q10
++          vaddw.u8    q2,  q15, d16
++          vqadd.s16   q1,  q11
++          vaddw.u8    q3,  q15, d18
 +        bne         1b
-+        bx          lr
++
++          vqmovun.s16 d20,  q2
++          vqmovun.s16 d21,  q0
++          vqmovun.s16 d22,  q3
++          vqmovun.s16 d23,  q1
++          vst2.8      {d20, d21}, [r0 :128]
++          vst2.8      {d22, d23}, [ip :128]
++          pop         {r4, pc}
 +endfunc
 +
 +@ add_residual16x16_v(
 +@   uint8_t *_dst,        [r0]
 +@   const int16_t *res,   [r1]
-+@   ptrdiff_t stride,     [r2]
-+@   int dc)               [r3]
++@   ptrdiff_t stride)     [r2]
 +
-+function JOIN(ff_hevc_rpi_add_residual_16x16_v_neon_, BIT_DEPTH), export=1
++function ff_hevc_rpi_add_residual_16x16_v_neon_8, export=1
 +        vdup.16     q15, r3
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
-+        vmov.i64    q8,  #0
-+        mov         r12, #16
-+        vdup.i16    q9,  r3
-+        sub         r2,  #32
++        add         ip, r0, r2
++        vld2.8      {q8, q9}, [r0 :256]
++        mov         r3, #16-1
++        vld1.16     {q0, q1}, [r1 :256]!
++        vmovl.u8    q11, d18
++        vmovl.u8    q12, d19
++        vqadd.s16   q0,  q11
++        vaddw.u8    q11, q15, d16
++        vqadd.s16   q1,  q12
++        vaddw.u8    q12, q15, d17
 +1:
-+        vld2.16     {q0, q1}, [r0, :256]!
-+        vld2.16     {q2, q3}, [r0, :256]
-+        vld1.16     {q10, q11}, [r1, :256]!
-+        subs        r12, #1
-+        vqadd.s16   q0,  q15
-+        vqadd.s16   q1,  q10
-+        vqadd.s16   q2,  q15
-+        vqadd.s16   q3,  q11
-+        sub         r0,  #32
-+        clip16_4 q0, q1, q2, q3, q8, q9
-+        vst2.16     {q0, q1}, [r0, :256]!
-+        vst2.16     {q2, q3}, [r0, :256], r2
++          vld2.8      {q8, q9}, [ip :256], r2
++        subs        r3, #1
++        vqmovun.s16 d20, q11
++        vqmovun.s16 d22, q0
++        vqmovun.s16 d21, q12
++        vqmovun.s16 d23, q1
++          vld1.16     {q0, q1}, [r1 :256]!
++        vst2.8      {q10, q11}, [r0 :256], r2
++          vmovl.u8    q11, d18
++            pldw        [ip]
++          vmovl.u8    q12, d19
++          vqadd.s16   q0,  q11
++          vaddw.u8    q11, q15, d16
++          vqadd.s16   q1,  q12
++          vaddw.u8    q12, q15, d17
 +        bne         1b
-+        bx          lr
++
++          vqmovun.s16 d20, q11
++          vqmovun.s16 d22, q0
++          vqmovun.s16 d21, q12
++          vqmovun.s16 d23, q1
++          vst2.8      {q10, q11}, [r0 :256]
++          bx          lr
 +endfunc
 +
 +@ ============================================================================
@@ -4878,35 +4882,34 @@ index 0000000000..7dfcc2751a
 +@   const int16_t *res,   [r1]
 +@   ptrdiff_t stride)     [r2]
 +
-+function JOIN(ff_hevc_rpi_add_residual_4x4_c_neon_, BIT_DEPTH), export=1
-+        vldm        r1, {q10-q13}
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
-+        vmov.i64    q8,  #0
-+        vdup.i16    q9,  r3
-+
-+        vld2.16     {d0, d2}, [r0, :128], r2
-+        vld2.16     {d1, d3}, [r0, :128], r2
-+        vld2.16     {d4, d6}, [r0, :128], r2
-+        vld2.16     {d5, d7}, [r0, :128], r2
-+
++function ff_hevc_rpi_add_residual_4x4_c_neon_8, export=1
++        add         ip, r0, r2
++        vld1.16     {q0, q1}, [r1]!       @ all of U
++        lsl         r2, #1
++        vld1.8      {d16}, [r0 :64], r2
++        rsb         r3, r2, #0
++        vld1.8      {d17}, [ip :64], r2
++        vld1.16     {q2, q3}, [r1]        @ all of V
++        vld1.8      {d18}, [r0 :64], r3
++        vld1.8      {d19}, [ip :64], r3
++        vmovl.u8    q10, d16
++        vmovl.u8    q11, d17
++        vmovl.u8    q12, d18
++        vmovl.u8    q13, d19
++        vzip.16     q0, q2
++        vzip.16     q1, q3
 +        vqadd.s16   q0,  q10
 +        vqadd.s16   q2,  q11
 +        vqadd.s16   q1,  q12
 +        vqadd.s16   q3,  q13
-+        sub         r0,  r0,  r2, lsl #2
-+        vmax.s16    q0,  q0,  q8
-+        vmax.s16    q1,  q1,  q8
-+        vmax.s16    q2,  q2,  q8
-+        vmax.s16    q3,  q3,  q8
-+        vmin.s16    q0,  q0,  q9
-+        vmin.s16    q1,  q1,  q9
-+        vmin.s16    q2,  q2,  q9
-+        vmin.s16    q3,  q3,  q9
-+
-+        vst2.16     {d0, d2}, [r0, :128], r2
-+        vst2.16     {d1, d3}, [r0, :128], r2
-+        vst2.16     {d4, d6}, [r0, :128], r2
-+        vst2.16     {d5, d7}, [r0, :128]
++        vqmovun.s16 d0,  q0
++        vqmovun.s16 d1,  q2
++        vqmovun.s16 d2,  q1
++        vqmovun.s16 d3,  q3
++        vst1.8      {d0}, [r0 :64], r2
++        vst1.8      {d1}, [ip :64], r2
++        vst1.8      {d2}, [r0 :64]
++        vst1.8      {d3}, [ip :64]
 +        bx          lr
 +endfunc
 +
@@ -4915,35 +4918,37 @@ index 0000000000..7dfcc2751a
 +@   const int16_t *res,   [r1]
 +@   ptrdiff_t stride)     [r2]
 +
-+function JOIN(ff_hevc_rpi_add_residual_8x8_c_neon_, BIT_DEPTH), export=1
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
-+        vmov.i64    q8,  #0
-+        mov         r12, #4
-+        vdup.i16    q9,  r3
++function ff_hevc_rpi_add_residual_8x8_c_neon_8, export=1
++        vld2.8      {d16, d17}, [r0 :128]
 +        add         r3, r1, #(8*8*2)  @ Offset to V
++        vld1.16     {q0}, [r1 :128]!
++        add         ip, r0, r2
++        vld1.16     {q1}, [r3 :128]!
++        vmovl.u8    q10, d16
++        push        {lr}
++        vmovl.u8    q8,  d17
++        mov         lr, #8-1
++        vqadd.s16   q10, q0
++        vqadd.s16   q1,  q8
 +1:
-+        vld2.16     {q0, q1}, [r0, :256], r2
-+        vld2.16     {q2, q3}, [r0, :256]
-+        vld1.16     {q10, q11}, [r1, :256]!
-+        vld1.16     {q12, q13}, [r3, :256]!
-+        subs        r12, #1
-+        vqadd.s16   q0,  q10
-+        vqadd.s16   q2,  q11
-+        vqadd.s16   q1,  q12
-+        vqadd.s16   q3,  q13
-+        sub         r0,  r2
-+        vmax.s16    q0,  q0,  q8
-+        vmax.s16    q1,  q1,  q8
-+        vmax.s16    q2,  q2,  q8
-+        vmax.s16    q3,  q3,  q8
-+        vmin.s16    q0,  q0,  q9
-+        vmin.s16    q1,  q1,  q9
-+        vmin.s16    q2,  q2,  q9
-+        vmin.s16    q3,  q3,  q9
-+        vst2.16     {q0, q1}, [r0, :256], r2
-+        vst2.16     {q2, q3}, [r0, :256], r2
++          vld2.8      {d16, d17}, [ip :128], r2
++        subs        lr, #1
++          vld1.16     {q0}, [r1 :128]!
++        vqmovun.s16 d20, q10
++        vqmovun.s16 d21, q1
++          vld1.16     {q1}, [r3 :128]!
++        vst2.8      {d20, d21}, [r0 :128], r2
++          vmovl.u8    q10, d16
++            pldw        [ip]
++          vmovl.u8    q8,  d17
++          vqadd.s16   q10, q0
++          vqadd.s16   q1,  q8
 +        bne         1b
-+        bx          lr
++
++          vqmovun.s16 d20, q10
++          vqmovun.s16 d21, q1
++          vst2.8      {d20, d21}, [r0 :128]
++          pop         {pc}
 +endfunc
 +
 +@ add_residual16x16_c(
@@ -4951,44 +4956,60 @@ index 0000000000..7dfcc2751a
 +@   const int16_t *res,   [r1]
 +@   ptrdiff_t stride)     [r2]
 +
-+function JOIN(ff_hevc_rpi_add_residual_16x16_c_neon_, BIT_DEPTH), export=1
-+        movw        r3,  #(1 << BIT_DEPTH) - 1
-+        vmov.i64    q8,  #0
-+        mov         r12, #16
-+        vdup.i16    q9,  r3
-+        add         r3,  r1, #(16*16*2)  @ Offset to V
-+        sub         r2,  #32
-+1:
-+        vld2.16     {q0, q1}, [r0, :256]!
-+        vld2.16     {q2, q3}, [r0, :256]
-+        vld1.16     {q10, q11}, [r1, :256]!
-+        vld1.16     {q12, q13}, [r3, :256]!
-+        subs        r12, #1
++function ff_hevc_rpi_add_residual_16x16_c_neon_8, export=1
++        vld2.8      {q8, q9}, [r0 :256]
++        add         r3, r1, #(16*16*2)  @ Offset to V
++        vld1.16     {q0, q1}, [r1 :256]!
++        add         ip, r0, r2
++        vld1.16     {q2, q3}, [r3 :256]!
++        vmovl.u8    q10, d16
++        push        {lr}
++        vmovl.u8    q8,  d17
++        mov         lr, #16-1
++        vmovl.u8    q11, d18
++        vmovl.u8    q9,  d19
 +        vqadd.s16   q0,  q10
++        vqadd.s16   q1,  q8
 +        vqadd.s16   q2,  q11
-+        vqadd.s16   q1,  q12
-+        vqadd.s16   q3,  q13
-+        sub         r0,  #32
-+        vmax.s16    q0,  q0,  q8
-+        vmax.s16    q1,  q1,  q8
-+        vmax.s16    q2,  q2,  q8
-+        vmax.s16    q3,  q3,  q8
-+        vmin.s16    q0,  q0,  q9
-+        vmin.s16    q1,  q1,  q9
-+        vmin.s16    q2,  q2,  q9
-+        vmin.s16    q3,  q3,  q9
-+        vst2.16     {q0, q1}, [r0, :256]!
-+        vst2.16     {q2, q3}, [r0, :256], r2
++        vqadd.s16   q3,  q9
++1:
++          vld2.8      {q8, q9}, [ip :256], r2
++        subs        lr, #1
++        vqmovun.s16 d20, q0
++        vqmovun.s16 d22, q2
++        vqmovun.s16 d21, q1
++        vqmovun.s16 d23, q3
++          vld1.16     {q0, q1}, [r1 :256]!
++        vst2.8      {d20-d23}, [r0 :256], r2
++          vld1.16     {q2, q3}, [r3 :256]!
++          vmovl.u8    q10, d16
++            pldw        [ip]
++          vmovl.u8    q8,  d17
++          vmovl.u8    q11, d18
++          vmovl.u8    q9,  d19
++          vqadd.s16   q0,  q10
++          vqadd.s16   q1,  q8
++          vqadd.s16   q2,  q11
++          vqadd.s16   q3,  q9
 +        bne         1b
-+        bx          lr
++
++          vqmovun.s16 d20, q0
++          vqmovun.s16 d22, q2
++          vqmovun.s16 d21, q1
++          vqmovun.s16 d23, q3
++          vst2.8      {d20-d23}, [r0 :256]
++          pop         {pc}
 +endfunc
 +
++@ 32x32 chroma never occurs so NIF
++
++@ ============================================================================
 diff --git a/libavcodec/arm/rpi_hevcdsp_sao_neon.S b/libavcodec/arm/rpi_hevcdsp_sao_neon.S
 new file mode 100644
-index 0000000000..12ffc5708a
+index 0000000000..b56e0f9644
 --- /dev/null
 +++ b/libavcodec/arm/rpi_hevcdsp_sao_neon.S
-@@ -0,0 +1,2199 @@
+@@ -0,0 +1,2245 @@
 +/*
 + * Copyright (c) 2014 - 2015 Seppo Tomperi <seppo.tomperi@vtt.fi>
 + *               2017 John Cox <jc@kynesim.co.uk> (for Raspberry Pi)
@@ -5015,6 +5036,10 @@ index 0000000000..12ffc5708a
 +
 +.set EDGE_SRC_STRIDE, 160
 +
++@ PIC jump tables are fractionally more expensive than absolute in our code
++.set jent_pic, CONFIG_PIC
++
++
 +.macro sao_band_64b_8 XLAT0, XLAT1, Q_K128, I1, I2, I3, I4
 +        vshr.u8   q12, q8, #3
 +        \I1
@@ -6049,7 +6074,21 @@ index 0000000000..12ffc5708a
 +@   int width,                        [sp, #sp_base + 4]
 +@   int height)                       [sp, #sp_base + 8]
 +
-+.macro  edge_xxb_init, bit_depth, is_chroma, jump_tab, setup_64b = 0, setup_16b = 0, check_w4 = 0, do2 = 0
++@ Jumps via jump_tab with
++@   uint8_t *_dst,                    [r0]
++@   const uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,             [r2]
++@   EDGE_SRC_STRIDE                   [r3]
++@   (1 << \bit_depth) - 1             [r4]
++@   * xlat_table                      [r5]  // setup_64b only
++@   int height                        [r12]
++@
++@   0                                 [q12] // > 8 bit
++@   2                                 [q14]
++@   128                               [q15] // = 8 bit
++@   r4                                [q15] // > 8 bit
++
++.macro  edge_xxb_init, bit_depth, is_chroma, jump_tab, setup_64b = 0, setup_16b = 0, check_w4 = 0, do2 = 0, xjump = 0
 +
 +@ Build translate registers
 +@ As translate values can only be 0-4 we don't care about junk in the rest
@@ -6141,20 +6180,34 @@ index 0000000000..12ffc5708a
 +@ For 16 bit width 64 (or chroma 32) we need to do this in 2 passes
 +.if \do2
 +        push     {r0, r1, r6, r12}
++.if jent_pic
++        bl       98f
++.else
 +        blx      r6
++.endif
 +        pop      {r0, r1, r6, r12}
 +
 +        add      r0, #64
 +        add      r1, #64
 +.endif
 +
++.if jent_pic
++        bl       98f
++.else
 +        blx      r6
++.endif
 +
 +@ Tidy up & return
 +.if \setup_64b
 +        vpop     {q4-q8}        @ spurious but harmless load of q8
 +.endif
 +        pop      {r4-r6, pc}
++
++.if jent_pic && !\xjump
++@ Magic label - used as 98b in jent macro
++98:
++        add      pc, r6
++.endif
 +.endm
 +
 +
@@ -6162,8 +6215,8 @@ index 0000000000..12ffc5708a
 +        edge_xxb_init \bit_depth, \is_chroma, \jump_tab, check_w4=\check_w4, setup_16b=1
 +.endm
 +
-+.macro  edge_64b_init, bit_depth, is_chroma, do2, jump_tab
-+        edge_xxb_init \bit_depth, \is_chroma, \jump_tab, do2=\do2, setup_64b=1
++.macro  edge_64b_init, bit_depth, is_chroma, do2, jump_tab, xjump=0
++        edge_xxb_init \bit_depth, \is_chroma, \jump_tab, do2=\do2, setup_64b=1, xjump=\xjump
 +.endm
 +
 +
@@ -6873,11 +6926,25 @@ index 0000000000..12ffc5708a
 +        edge_4bx4_e2 \body_fn, (-\pb)
 +.endm
 +
++@ Jump table entry - if in neon mode the bottom bit must be set
++@ ? There is probably a real asm instruction to do this but I haven't found it
++.macro jent lab
++.if jent_pic
++@ Could use .short here but due to A32 not supporting ldrh [lsl#1] it is
++@ simpler and clearer in the code to stick with .word
++T       .word  (0 + \lab) - (4 + 98b)
++A       .word  (0 + \lab) - (8 + 98b)
++.else
++T       .word   1 + \lab
++A       .word   \lab
++.endif
++.endm
++
 +.macro edge_64b_bodies, body_fn, pb
-+        .word   0f
-+        .word   10f
-+        .word   20f
-+        .word   30f
++        jent    0f
++        jent    10f
++        jent    20f
++        jent    30f
 +
 +0:      edge_64b_e0     \body_fn, \pb
 +10:     edge_64b_e1     \body_fn
@@ -6886,10 +6953,10 @@ index 0000000000..12ffc5708a
 +.endm
 +
 +.macro edge_32bx2_bodies, body_fn, pb
-+        .word   0f
-+        .word   10f
-+        .word   20f
-+        .word   30f
++        jent    0f
++        jent    10f
++        jent    20f
++        jent    30f
 +
 +0:      edge_32bx2_e0   \body_fn, \pb
 +10:     edge_32bx2_e1   \body_fn
@@ -6898,10 +6965,10 @@ index 0000000000..12ffc5708a
 +.endm
 +
 +.macro edge_16b_bodies, body_fn, pb
-+        .word   0f
-+        .word   10f
-+        .word   20f
-+        .word   30f
++        jent    0f
++        jent    10f
++        jent    20f
++        jent    30f
 +
 +0:      edge_16b_e0     \body_fn, \pb
 +10:     edge_16b_e1     \body_fn
@@ -6910,14 +6977,14 @@ index 0000000000..12ffc5708a
 +.endm
 +
 +.macro edge_32bx2_16b_bodies, body_fn_64b, body_fn_16b, pb
-+        .word   0f
-+        .word   10f
-+        .word   20f
-+        .word   30f
-+        .word   5f
-+        .word   15f
-+        .word   25f
-+        .word   35f
++        jent    0f
++        jent    10f
++        jent    20f
++        jent    30f
++        jent    5f
++        jent    15f
++        jent    25f
++        jent    35f
 +
 +0:      edge_32bx2_e0   \body_fn_64b, \pb
 +10:     edge_32bx2_e1   \body_fn_64b
@@ -6930,14 +6997,14 @@ index 0000000000..12ffc5708a
 +.endm
 +
 +.macro edge_16b_8bx2_bodies, body_fn, pb
-+        .word   0f
-+        .word   10f
-+        .word   20f
-+        .word   30f
-+        .word   5f
-+        .word   15f
-+        .word   25f
-+        .word   35f
++        jent    0f
++        jent    10f
++        jent    20f
++        jent    30f
++        jent    5f
++        jent    15f
++        jent    25f
++        jent    35f
 +
 +0:      edge_16b_e0     \body_fn, \pb
 +10:     edge_16b_e1     \body_fn
@@ -6950,14 +7017,14 @@ index 0000000000..12ffc5708a
 +.endm
 +
 +.macro edge_8bx2_4bx4_bodies, body_fn, pb
-+        .word   0f
-+        .word   10f
-+        .word   20f
-+        .word   30f
-+        .word   5f
-+        .word   15f
-+        .word   25f
-+        .word   35f
++        jent    0f
++        jent    10f
++        jent    20f
++        jent    30f
++        jent    5f
++        jent    15f
++        jent    25f
++        jent    35f
 +
 +0:      edge_8bx2_e0    \body_fn, \pb
 +10:     edge_8bx2_e1    \body_fn
@@ -7123,7 +7190,7 @@ index 0000000000..12ffc5708a
 +@ about edge effects
 +
 +function ff_hevc_rpi_sao_edge_64_neon_10, export=1
-+        edge_64b_init   10, 0, 1, 99f
++        edge_64b_init   10, 0, 1, 99f, xjump=1
 +endfunc
 +
 +@ void ff_hevc_rpi_sao_edge_32_neon_10(
@@ -7168,7 +7235,7 @@ index 0000000000..12ffc5708a
 +@   int height)                       [sp, #12]
 +
 +function ff_hevc_rpi_sao_edge_c_32_neon_10, export=1
-+        edge_64b_init   10, 1, 1, 99f
++        edge_64b_init   10, 1, 1, 99f, xjump=1
 +endfunc
 +
 +
@@ -7189,10 +7256,10 @@ index 0000000000..12ffc5708a
 +endfunc
 +
 diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
-index 18c3e3ea1e..c26b6d607c 100644
+index fb0c6fae70..9f2ebb16f3 100644
 --- a/libavcodec/avcodec.h
 +++ b/libavcodec/avcodec.h
-@@ -3627,7 +3627,13 @@ typedef struct AVCodecContext {
+@@ -3208,7 +3208,13 @@ typedef struct AVCodecContext {
  #endif
  
      /**
@@ -7207,7 +7274,7 @@ index 18c3e3ea1e..c26b6d607c 100644
       * the end of the audio. I.e. this number of decoded samples must be
       * discarded by the caller from the end of the stream to get the original
       * audio without any trailing padding.
-@@ -4816,6 +4822,17 @@ void av_packet_rescale_ts(AVPacket *pkt, AVRational tb_src, AVRational tb_dst);
+@@ -4593,6 +4599,17 @@ void av_packet_rescale_ts(AVPacket *pkt, AVRational tb_src, AVRational tb_dst);
   */
  AVCodec *avcodec_find_decoder(enum AVCodecID id);
  
@@ -7246,7 +7313,7 @@ index 1bf1c620d6..ccfa991f60 100644
      const uint8_t *bytestream;
      const uint8_t *bytestream_end;
 diff --git a/libavcodec/mmaldec.c b/libavcodec/mmaldec.c
-index 0b1195dc3e..5ef81fa739 100644
+index 647a22ef7c..4ed35d1126 100644
 --- a/libavcodec/mmaldec.c
 +++ b/libavcodec/mmaldec.c
 @@ -24,6 +24,9 @@
@@ -7378,10 +7445,10 @@ index d181b74570..c52c450956 100644
      if ((ret = av_image_copy_to_buffer(pkt->data, pkt->size,
 diff --git a/libavcodec/rpi_hevc_cabac.c b/libavcodec/rpi_hevc_cabac.c
 new file mode 100644
-index 0000000000..6566063cd8
+index 0000000000..4891a79eb5
 --- /dev/null
 +++ b/libavcodec/rpi_hevc_cabac.c
-@@ -0,0 +1,2387 @@
+@@ -0,0 +1,2269 @@
 +/*
 + * HEVC CABAC decoding
 + *
@@ -7414,6 +7481,7 @@ index 0000000000..6566063cd8
 +#include "rpi_hevc_data.h"
 +#include "hevc.h"
 +#include "rpi_hevcdec.h"
++#include "rpi_hevc_cabac_fns.h"
 +
 +#include "libavutil/rpi_sand_fns.h"
 +
@@ -7444,10 +7512,6 @@ index 0000000000..6566063cd8
 +#define CABAC_BY22_PEEK_BITS  23
 +#endif
 +
-+#if ARCH_ARM
-+#include "arm/rpi_hevc_cabac.h"
-+#endif
-+
 +#define CABAC_MAX_BIN 31
 +
 +
@@ -7486,6 +7550,10 @@ index 0000000000..6566063cd8
 +#undef I
 +#endif  // USE_BY22
 +
++#if ARCH_ARM
++#include "arm/rpi_hevc_cabac.h"
++#endif
++
 +/**
 + * number of bin by SyntaxElement.
 + */
@@ -8171,9 +8239,14 @@ index 0000000000..6566063cd8
 +
 +#define GET_CABAC_LC(ctx) get_cabac(&lc->cc, lc->cabac_state + (ctx))
 +
-+int ff_hevc_rpi_sao_merge_flag_decode(HEVCRpiLocalContext * const lc)
++int ff_hevc_rpi_get_cabac(CABACContext * const c, uint8_t * const state)
 +{
-+    return get_cabac(&lc->cc, lc->cabac_state + elem_offset[SAO_MERGE_FLAG]);
++    return get_cabac_inline(c, state);
++}
++
++int ff_hevc_rpi_get_cabac_terminate(CABACContext * const c)
++{
++    return get_cabac_terminate(c);
 +}
 +
 +int ff_hevc_rpi_sao_type_idx_decode(HEVCRpiLocalContext * const lc)
@@ -8218,65 +8291,30 @@ index 0000000000..6566063cd8
 +    return ret;
 +}
 +
-+int ff_hevc_rpi_end_of_slice_flag_decode(HEVCRpiLocalContext * const lc)
++int ff_hevc_rpi_cu_qp_delta(HEVCRpiLocalContext * const lc)
 +{
-+    return get_cabac_terminate(&lc->cc);
-+}
++    int val = 1;
 +
-+int ff_hevc_rpi_cu_transquant_bypass_flag_decode(HEVCRpiLocalContext * const lc)
-+{
-+    return GET_CABAC_LC(elem_offset[CU_TRANSQUANT_BYPASS_FLAG]);
-+}
++    if (get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_CU_QP_DELTA) == 0)
++        return 0;
 +
-+int ff_hevc_rpi_skip_flag_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
-+                             const int x0, const int y0, const int x_cb, const int y_cb)
-+{
-+    int min_cb_width = s->ps.sps->min_cb_width;
-+    int inc = 0;
-+    int x0b = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
-+    int y0b = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
++    while (val < 5 &&
++           get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_CU_QP_DELTA + 1) != 0)
++        val++;
 +
-+    if (lc->ctb_left_flag || x0b)
-+        inc = !!SAMPLE_CTB(s->skip_flag, x_cb - 1, y_cb);
-+    if (lc->ctb_up_flag || y0b)
-+        inc += !!SAMPLE_CTB(s->skip_flag, x_cb, y_cb - 1);
-+
-+    return GET_CABAC_LC(elem_offset[SKIP_FLAG] + inc);
-+}
-+
-+int ff_hevc_rpi_cu_qp_delta_abs(HEVCRpiLocalContext * const lc)
-+{
-+    int prefix_val = 0;
-+    int suffix_val = 0;
-+    int inc = 0;
-+
-+    while (prefix_val < 5 && GET_CABAC_LC(elem_offset[CU_QP_DELTA] + inc)) {
-+        prefix_val++;
-+        inc = 1;
-+    }
-+    if (prefix_val >= 5) {
-+        int k = 0;
++    if (val >= 5) {
++        unsigned int k = 0;
 +        while (k < CABAC_MAX_BIN && get_cabac_bypass(&lc->cc)) {
-+            suffix_val += 1 << k;
++            val += 1 << k;
 +            k++;
 +        }
 +//        if (k == CABAC_MAX_BIN)
 +//            av_log(s->avctx, AV_LOG_ERROR, "CABAC_MAX_BIN : %d\n", k);
 +
 +        while (k--)
-+            suffix_val += get_cabac_bypass(&lc->cc) << k;
++            val += get_cabac_bypass(&lc->cc) << k;
 +    }
-+    return prefix_val + suffix_val;
-+}
-+
-+int ff_hevc_rpi_cu_qp_delta_sign_flag(HEVCRpiLocalContext * const lc)
-+{
-+    return get_cabac_bypass(&lc->cc);
-+}
-+
-+int ff_hevc_rpi_cu_chroma_qp_offset_flag(HEVCRpiLocalContext * const lc)
-+{
-+    return GET_CABAC_LC(elem_offset[CU_CHROMA_QP_OFFSET_FLAG]);
++    return get_cabac_bypass(&lc->cc) ? -val : val;
 +}
 +
 +int ff_hevc_rpi_cu_chroma_qp_offset_idx(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc)
@@ -8290,11 +8328,6 @@ index 0000000000..6566063cd8
 +    return i;
 +}
 +
-+int ff_hevc_rpi_pred_mode_decode(HEVCRpiLocalContext * const lc)
-+{
-+    return GET_CABAC_LC(elem_offset[PRED_MODE_FLAG]);
-+}
-+
 +int ff_hevc_rpi_split_coding_unit_flag_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int ct_depth, int x0, int y0)
 +{
 +    int inc = 0, depth_left = 0, depth_top = 0;
@@ -8351,16 +8384,6 @@ index 0000000000..6566063cd8
 +    return PART_nLx2N;  // 0000
 +}
 +
-+int ff_hevc_rpi_pcm_flag_decode(HEVCRpiLocalContext * const lc)
-+{
-+    return get_cabac_terminate(&lc->cc);
-+}
-+
-+int ff_hevc_rpi_prev_intra_luma_pred_flag_decode(HEVCRpiLocalContext * const lc)
-+{
-+    return GET_CABAC_LC(elem_offset[PREV_INTRA_LUMA_PRED_FLAG]);
-+}
-+
 +int ff_hevc_rpi_mpm_idx_decode(HEVCRpiLocalContext * const lc)
 +{
 +    int i = 0;
@@ -8401,11 +8424,6 @@ index 0000000000..6566063cd8
 +    return i;
 +}
 +
-+int ff_hevc_rpi_merge_flag_decode(HEVCRpiLocalContext * const lc)
-+{
-+    return GET_CABAC_LC(elem_offset[MERGE_FLAG]);
-+}
-+
 +int ff_hevc_rpi_inter_pred_idc_decode(HEVCRpiLocalContext * const lc, int nPbW, int nPbH)
 +{
 +    if (nPbW + nPbH == 12)
@@ -8432,16 +8450,6 @@ index 0000000000..6566063cd8
 +    return i;
 +}
 +
-+int ff_hevc_rpi_mvp_lx_flag_decode(HEVCRpiLocalContext * const lc)
-+{
-+    return GET_CABAC_LC(elem_offset[MVP_LX_FLAG]);
-+}
-+
-+int ff_hevc_rpi_no_residual_syntax_flag_decode(HEVCRpiLocalContext * const lc)
-+{
-+    return GET_CABAC_LC(elem_offset[NO_RESIDUAL_DATA_FLAG]);
-+}
-+
 +static av_always_inline int abs_mvd_greater0_flag_decode(HEVCRpiLocalContext * const lc)
 +{
 +    return GET_CABAC_LC(elem_offset[ABS_MVD_GREATER0_FLAG]);
@@ -8478,21 +8486,6 @@ index 0000000000..6566063cd8
 +    return get_cabac_bypass_sign(&lc->cc, -1);
 +}
 +
-+int ff_hevc_rpi_split_transform_flag_decode(HEVCRpiLocalContext * const lc, const int log2_trafo_size)
-+{
-+    return GET_CABAC_LC(elem_offset[SPLIT_TRANSFORM_FLAG] + 5 - log2_trafo_size);
-+}
-+
-+int ff_hevc_rpi_cbf_cb_cr_decode(HEVCRpiLocalContext * const lc, const int trafo_depth)
-+{
-+    return GET_CABAC_LC(elem_offset[CBF_CB_CR] + trafo_depth);
-+}
-+
-+int ff_hevc_rpi_cbf_luma_decode(HEVCRpiLocalContext * const lc, const int trafo_depth)
-+{
-+    return GET_CABAC_LC(elem_offset[CBF_LUMA] + !trafo_depth);
-+}
-+
 +static int hevc_transform_skip_flag_decode(HEVCRpiLocalContext * const lc, int c_idx_nz)
 +{
 +    return GET_CABAC_LC(elem_offset[TRANSFORM_SKIP_FLAG] + c_idx_nz);
@@ -8508,6 +8501,7 @@ index 0000000000..6566063cd8
 +    return GET_CABAC_LC(elem_offset[EXPLICIT_RDPCM_DIR_FLAG] + c_idx_nz);
 +}
 +
++
 +int ff_hevc_rpi_log2_res_scale_abs(HEVCRpiLocalContext * const lc, const int idx) {
 +    int i =0;
 +
@@ -8517,10 +8511,6 @@ index 0000000000..6566063cd8
 +    return i;
 +}
 +
-+int ff_hevc_rpi_res_scale_sign_flag(HEVCRpiLocalContext *const lc, const int idx) {
-+    return GET_CABAC_LC(elem_offset[RES_SCALE_SIGN_FLAG] + idx);
-+}
-+
 +static av_always_inline void last_significant_coeff_xy_prefix_decode(HEVCRpiLocalContext * const lc, int c_idx_nz,
 +                                                   int log2_size, int *last_scx_prefix, int *last_scy_prefix)
 +{
@@ -8993,8 +8983,7 @@ index 0000000000..6566063cd8
 +    int explicit_rdpcm_dir_flag;
 +
 +    int i;
-+    int qp,shift,scale;
-+    static const uint8_t const level_scale[] = { 40, 45, 51, 57, 64, 72 };
++    int shift,scale;
 +    const uint8_t *scale_matrix = NULL;
 +    uint8_t dc_scale;
 +    const int c_idx_nz = (c_idx != 0);
@@ -9004,22 +8993,6 @@ index 0000000000..6566063cd8
 +
 +    // Derive QP for dequant
 +    if (!lc->cu.cu_transquant_bypass_flag) {
-+        static const uint8_t qp_c[] = { 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37 };
-+        static const uint8_t rem6[51 + 4 * 6 + 1] = {
-+            0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2,
-+            3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
-+            0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
-+            4, 5, 0, 1, 2, 3, 4, 5, 0, 1
-+        };
-+
-+        static const uint8_t div6[51 + 4 * 6 + 1] = {
-+            0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3,  3,  3,
-+            3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6,  6,  6,
-+            7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10,
-+            10, 10, 11, 11, 11, 11, 11, 11, 12, 12
-+        };
-+        int qp_y = lc->qp_y;
-+
 +        may_hide_sign = s->ps.pps->sign_data_hiding_flag;
 +
 +        if (s->ps.pps->transform_skip_enabled_flag &&
@@ -9035,53 +9008,29 @@ index 0000000000..6566063cd8
 +            }
 +        }
 +
-+        if (c_idx == 0) {
-+            qp = qp_y + s->ps.sps->qp_bd_offset;
-+        } else {
-+            int qp_i, offset;
++        {
++            static const uint8_t level_scale[8] = {
++                40, 45, 51, 57, 64, 72, 0, 0  // Pad to 8
++            };
++            const int qp6 = (int8_t)lc->tu.qp_divmod6[c_idx][lc->qp_y];
 +
-+            if (c_idx == 1)
-+                offset = s->ps.pps->cb_qp_offset + s->sh.slice_cb_qp_offset +
-+                         lc->tu.cu_qp_offset_cb;
-+            else
-+                offset = s->ps.pps->cr_qp_offset + s->sh.slice_cr_qp_offset +
-+                         lc->tu.cu_qp_offset_cr;
++            // Shift is set to one less than will actually occur as the scale
++            // and saturate step adds 1 and then shifts right again
++            scale = level_scale[qp6 & 7];
++//            shift = s->ps.sps->bit_depth + log2_trafo_size - (int)(qp6 >> 3);
++            shift = log2_trafo_size - (qp6 >> 3);
 +
-+            qp_i = av_clip(qp_y + offset, - s->ps.sps->qp_bd_offset, 57);
-+            if (ctx_cfmt(s) == 1) {
-+                if (qp_i < 30)
-+                    qp = qp_i;
-+                else if (qp_i > 43)
-+                    qp = qp_i - 6;
-+                else
-+                    qp = qp_c[qp_i - 30];
-+            } else {
-+                if (qp_i > 51)
-+                    qp = 51;
-+                else
-+                    qp = qp_i;
++            if (shift < 0) {
++                scale <<= -shift;
++                shift = 0;
 +            }
-+
-+            qp += s->ps.sps->qp_bd_offset;
-+        }
-+
-+        // Shift is set to one less than will actually occur as the scale
-+        // and saturate step adds 1 and then shifts right again
-+        shift = s->ps.sps->bit_depth + log2_trafo_size - 6;
-+        scale = level_scale[rem6[qp]];
-+        if (div6[qp] >= shift) {
-+            scale <<= (div6[qp] - shift);
-+            shift = 0;
-+        } else {
-+            shift -= div6[qp];
 +        }
 +
 +        if (s->ps.sps->scaling_list_enable_flag && !(trans_skip_or_bypass && log2_trafo_size > 2)) {
-+            const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ?
++            const ScalingList * const sl = s->ps.pps->scaling_list_data_present_flag ?
 +                &s->ps.pps->scaling_list : &s->ps.sps->scaling_list;
-+            int matrix_id = lc->cu.pred_mode != MODE_INTRA;
-+
-+            matrix_id = 3 * matrix_id + c_idx;
++            const unsigned int matrix_id =
++                lc->cu.pred_mode != MODE_INTRA ? 3 + c_idx : c_idx;
 +
 +            scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
 +            dc_scale = scale_matrix[0];
@@ -9378,7 +9327,7 @@ index 0000000000..6566063cd8
 +            int16_t * const blk_coeffs = coeffs +
 +                ((x_cg + (y_cg << log2_trafo_size)) << 2);
 +            // This calculation is 'wrong' for log2_traffo_size == 2
-+            // but that doesn't mattor as in this case x_cg & y_cg
++            // but that doesn't matter as in this case x_cg & y_cg
 +            // are always 0 so result is correct (0) anyway
 +            const uint8_t * const blk_scale = scale_matrix +
 +                (((x_cg + (y_cg << 3)) << (5 - log2_trafo_size)));
@@ -9769,6 +9718,202 @@ index 0000000000..6566063cd8
 +//    printf("BY: X=%d,Y=%d\n", lc->pu.mvd.x, lc->pu.mvd.y);
 +}
 +#endif
+diff --git a/libavcodec/rpi_hevc_cabac_fns.h b/libavcodec/rpi_hevc_cabac_fns.h
+new file mode 100644
+index 0000000000..a360815a36
+--- /dev/null
++++ b/libavcodec/rpi_hevc_cabac_fns.h
+@@ -0,0 +1,190 @@
++#ifndef AVCODEC_RPI_HEVC_CABAC_FNS_H
++#define AVCODEC_RPI_HEVC_CABAC_FNS_H
++
++#include "config.h"
++#include "rpi_hevcdec.h"
++
++void ff_hevc_rpi_save_states(HEVCRpiContext *s, const HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_cabac_init_decoder(HEVCRpiLocalContext * const lc);
++void ff_hevc_rpi_cabac_init(const HEVCRpiContext * const s, HEVCRpiLocalContext *const lc, const unsigned int ctb_flags);
++int ff_hevc_rpi_sao_type_idx_decode(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_sao_band_position_decode(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_sao_offset_abs_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_sao_offset_sign_decode(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_sao_eo_class_decode(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_split_coding_unit_flag_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, const int ct_depth,
++                                          const int x0, const int y0);
++int ff_hevc_rpi_part_mode_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, const int log2_cb_size);
++int ff_hevc_rpi_mpm_idx_decode(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_rem_intra_luma_pred_mode_decode(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_intra_chroma_pred_mode_decode(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_merge_idx_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_inter_pred_idc_decode(HEVCRpiLocalContext * const lc, int nPbW, int nPbH);
++int ff_hevc_rpi_ref_idx_lx_decode(HEVCRpiLocalContext * const lc, const int num_ref_idx_lx);
++int ff_hevc_rpi_log2_res_scale_abs(HEVCRpiLocalContext * const lc, const int idx);
++
++//int ff_hevc_rpi_cu_qp_delta_sign_flag(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_cu_qp_delta(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_cu_chroma_qp_offset_idx(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc);
++void ff_hevc_rpi_hls_residual_coding(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                                const int x0, const int y0,
++                                const int log2_trafo_size, const enum ScanType scan_idx,
++                                const int c_idx);
++
++void ff_hevc_rpi_hls_mvd_coding(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_cabac_overflow(const HEVCRpiLocalContext * const lc);
++
++#define HEVC_BIN_SAO_MERGE_FLAG                         0
++#define HEVC_BIN_SAO_TYPE_IDX                           1
++#define HEVC_BIN_SAO_EO_CLASS                           2
++#define HEVC_BIN_SAO_BAND_POSITION                      2
++#define HEVC_BIN_SAO_OFFSET_ABS                         2
++#define HEVC_BIN_SAO_OFFSET_SIGN                        2
++#define HEVC_BIN_END_OF_SLICE_FLAG                      2
++#define HEVC_BIN_SPLIT_CODING_UNIT_FLAG                 2
++#define HEVC_BIN_CU_TRANSQUANT_BYPASS_FLAG              5
++#define HEVC_BIN_SKIP_FLAG                              6
++#define HEVC_BIN_CU_QP_DELTA                            9
++#define HEVC_BIN_PRED_MODE                              12
++#define HEVC_BIN_PART_MODE                              13
++#define HEVC_BIN_PCM_FLAG                               17
++#define HEVC_BIN_PREV_INTRA_LUMA_PRED_MODE              17
++#define HEVC_BIN_MPM_IDX                                18
++#define HEVC_BIN_REM_INTRA_LUMA_PRED_MODE               18
++#define HEVC_BIN_INTRA_CHROMA_PRED_MODE                 18
++#define HEVC_BIN_MERGE_FLAG                             20
++#define HEVC_BIN_MERGE_IDX                              21
++#define HEVC_BIN_INTER_PRED_IDC                         22
++#define HEVC_BIN_REF_IDX_L0                             27
++#define HEVC_BIN_REF_IDX_L1                             29
++#define HEVC_BIN_ABS_MVD_GREATER0_FLAG                  31
++#define HEVC_BIN_ABS_MVD_GREATER1_FLAG                  33
++#define HEVC_BIN_ABS_MVD_MINUS2                         35
++#define HEVC_BIN_MVD_SIGN_FLAG                          35
++#define HEVC_BIN_MVP_LX_FLAG                            35
++#define HEVC_BIN_NO_RESIDUAL_DATA_FLAG                  36
++#define HEVC_BIN_SPLIT_TRANSFORM_FLAG                   37
++#define HEVC_BIN_CBF_LUMA                               40
++#define HEVC_BIN_CBF_CB_CR                              42
++#define HEVC_BIN_TRANSFORM_SKIP_FLAG                    46
++#define HEVC_BIN_EXPLICIT_RDPCM_FLAG                    48
++#define HEVC_BIN_EXPLICIT_RDPCM_DIR_FLAG                50
++#define HEVC_BIN_LAST_SIGNIFICANT_COEFF_X_PREFIX        52
++#define HEVC_BIN_LAST_SIGNIFICANT_COEFF_Y_PREFIX        70
++#define HEVC_BIN_LAST_SIGNIFICANT_COEFF_X_SUFFIX        88
++#define HEVC_BIN_LAST_SIGNIFICANT_COEFF_Y_SUFFIX        88
++#define HEVC_BIN_SIGNIFICANT_COEFF_GROUP_FLAG           88
++#define HEVC_BIN_SIGNIFICANT_COEFF_FLAG                 92
++#define HEVC_BIN_COEFF_ABS_LEVEL_GREATER1_FLAG          136
++#define HEVC_BIN_COEFF_ABS_LEVEL_GREATER2_FLAG          160
++#define HEVC_BIN_COEFF_ABS_LEVEL_REMAINING              166
++#define HEVC_BIN_COEFF_SIGN_FLAG                        166
++#define HEVC_BIN_LOG2_RES_SCALE_ABS                     166
++#define HEVC_BIN_RES_SCALE_SIGN_FLAG                    174
++#define HEVC_BIN_CU_CHROMA_QP_OFFSET_FLAG               176
++#define HEVC_BIN_CU_CHROMA_QP_OFFSET_IDX                177
++
++
++int ff_hevc_rpi_get_cabac(CABACContext * const c, uint8_t * const state);
++int ff_hevc_rpi_get_cabac_terminate(CABACContext * const c);
++
++static inline const uint8_t* ff_hevc_rpi_cabac_skip_bytes(CABACContext * const c, int n) {
++    const uint8_t *ptr = c->bytestream;
++
++    if (c->low & 0x1)
++        ptr--;
++#if CABAC_BITS == 16
++    if (c->low & 0x1FF)
++        ptr--;
++#endif
++    if ((int) (c->bytestream_end - ptr) < n)
++        return NULL;
++    if (ff_init_cabac_decoder(c, ptr + n, c->bytestream_end - ptr - n) < 0)
++        return NULL;
++
++    return ptr;
++}
++
++static inline int ff_hevc_rpi_sao_merge_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_SAO_MERGE_FLAG);
++}
++
++static inline int ff_hevc_rpi_cu_transquant_bypass_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_CU_TRANSQUANT_BYPASS_FLAG);
++}
++
++static inline int ff_hevc_rpi_cu_chroma_qp_offset_flag(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_CU_CHROMA_QP_OFFSET_FLAG);
++}
++
++static inline int ff_hevc_rpi_skip_flag_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                             const int x0, const int y0, const int x_cb, const int y_cb)
++{
++    const unsigned int ctb_mask = (1 << s->ps.sps->log2_ctb_size) - 1;
++    const unsigned int stride = s->skip_flag_stride;
++    const uint8_t * const skip_bits = s->skip_flag + y_cb * stride;
++
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_SKIP_FLAG +
++        ((!lc->ctb_left_flag && (x0 & ctb_mask) == 0) ? 0 :
++            (skip_bits[((x_cb - 1) >> 3)] >> ((x_cb - 1) & 7)) & 1) +
++        ((!lc->ctb_up_flag && (y0 & ctb_mask) == 0) ? 0 :
++            (skip_bits[(x_cb >> 3) - stride] >> (x_cb & 7)) & 1));
++}
++
++static inline int ff_hevc_rpi_pred_mode_decode(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_PRED_MODE);
++}
++
++static inline int ff_hevc_rpi_pcm_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac_terminate(&lc->cc);
++}
++
++static inline int ff_hevc_rpi_prev_intra_luma_pred_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_PREV_INTRA_LUMA_PRED_MODE);
++}
++
++static inline int ff_hevc_rpi_merge_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_MERGE_FLAG);
++}
++
++static inline int ff_hevc_rpi_mvp_lx_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_MVP_LX_FLAG);
++}
++
++static inline int ff_hevc_rpi_no_residual_syntax_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_NO_RESIDUAL_DATA_FLAG);
++}
++
++static inline int ff_hevc_rpi_cbf_cb_cr_decode(HEVCRpiLocalContext * const lc, const int trafo_depth)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_CBF_CB_CR + trafo_depth);
++}
++
++static inline int ff_hevc_rpi_cbf_luma_decode(HEVCRpiLocalContext * const lc, const int trafo_depth)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_CBF_LUMA + !trafo_depth);
++}
++
++static inline int ff_hevc_rpi_split_transform_flag_decode(HEVCRpiLocalContext * const lc, const int log2_trafo_size)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_SPLIT_TRANSFORM_FLAG + 5 - log2_trafo_size);
++}
++
++static inline int ff_hevc_rpi_res_scale_sign_flag(HEVCRpiLocalContext *const lc, const int idx)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_RES_SCALE_SIGN_FLAG + idx);
++}
++
++
++
++#endif
++
 diff --git a/libavcodec/rpi_hevc_data.c b/libavcodec/rpi_hevc_data.c
 new file mode 100644
 index 0000000000..341bb77d9d
@@ -9889,10 +10034,10 @@ index 0000000000..0aee673d8b
 +#endif /* AVCODEC_RPI_HEVC_DATA_H */
 diff --git a/libavcodec/rpi_hevc_filter.c b/libavcodec/rpi_hevc_filter.c
 new file mode 100644
-index 0000000000..5ae479dd0b
+index 0000000000..a8601da4e7
 --- /dev/null
 +++ b/libavcodec/rpi_hevc_filter.c
-@@ -0,0 +1,1069 @@
+@@ -0,0 +1,1165 @@
 +/*
 + * HEVC video decoder
 + *
@@ -9939,74 +10084,68 @@ index 0000000000..5ae479dd0b
 +#define CB 1
 +#define CR 2
 +
-+static const uint8_t tctable[54] = {
++// tcoffset: -12,12; qp: 0,51; (bs-1)*2: 0,2
++// so -12,75 overall
++static const uint8_t tctablex[] = {
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  // -ve quant padding
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,
++
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,                          // -12..-1
 +    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0, 0, 1, // QP  0...18
 +    1, 1, 1, 1, 1, 1, 1,  1,  2,  2,  2,  2,  3,  3,  3,  3, 4, 4, 4, // QP 19...37
-+    5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24           // QP 38...53
++    5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24,          // QP 38...53
++    24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24                    // 54..75
 +};
++#define tctable (tctablex + 12 + 6*8)
 +
-+static const uint8_t betatable[52] = {
++static const uint8_t betatablex[] = {
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  // -ve quant padding
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,
++
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,                          // -12..-1
 +     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  7,  8, // QP 0...18
 +     9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, // QP 19...37
-+    38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64                      // QP 38...51
++    38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64,                      // QP 38...51
++    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64                    // 52..73
 +};
++#define betatable (betatablex + 12 + 6*8)
 +
-+static int chroma_tc(HEVCRpiContext *s, int qp_y, int c_idx, int tc_offset)
++static inline int chroma_tc(const HEVCRpiContext * const s, const int qp_y,
++                            const int c_idx, const int tc_offset)
 +{
-+    static const int qp_c[] = {
-+        29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37
-+    };
-+    int qp, qp_i, offset, idxt;
-+
-+    // slice qp offset is not used for deblocking
-+    if (c_idx == 1)
-+        offset = s->ps.pps->cb_qp_offset;
-+    else
-+        offset = s->ps.pps->cr_qp_offset;
-+
-+    qp_i = av_clip(qp_y + offset, 0, 57);
-+    if (ctx_cfmt(s) == 1) {
-+        if (qp_i < 30)
-+            qp = qp_i;
-+        else if (qp_i > 43)
-+            qp = qp_i - 6;
-+        else
-+            qp = qp_c[qp_i - 30];
-+    } else {
-+        qp = av_clip(qp_i, 0, 51);
-+    }
-+
-+    idxt = av_clip(qp + DEFAULT_INTRA_TC_OFFSET + tc_offset, 0, 53);
-+    return tctable[idxt];
++    return tctable[(int)s->ps.pps->qp_dblk_x[c_idx][qp_y] + tc_offset + 2];
 +}
 +
-+static inline int get_qPy_pred(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int xBase, int yBase, int log2_cb_size)
++static inline int get_qPy_pred(const HEVCRpiContext * const s, const HEVCRpiLocalContext * const lc,
++                               const unsigned int xBase, const unsigned int yBase)
 +{
-+    int ctb_size_mask        = (1 << s->ps.sps->log2_ctb_size) - 1;
-+    int MinCuQpDeltaSizeMask = ~((1 << (s->ps.sps->log2_ctb_size -
-+                                      s->ps.pps->diff_cu_qp_delta_depth)) - 1);
-+    int xQgBase              = xBase & MinCuQpDeltaSizeMask;
-+    int yQgBase              = yBase & MinCuQpDeltaSizeMask;
-+    int min_cb_width         = s->ps.sps->min_cb_width;
-+    int x_cb                 = xQgBase >> s->ps.sps->log2_min_cb_size;
-+    int y_cb                 = yQgBase >> s->ps.sps->log2_min_cb_size;
-+    int availableA           = (xBase   & ctb_size_mask) &&
-+                               (xQgBase & ctb_size_mask);
-+    int availableB           = (yBase   & ctb_size_mask) &&
-+                               (yQgBase & ctb_size_mask);
++    const unsigned int ctb_size_mask        = (1 << s->ps.sps->log2_ctb_size) - 1;
++    const unsigned int MinCuQpDeltaSizeMask = ~0U << s->ps.pps->log2_min_cu_qp_delta_size;
++    const unsigned int xQgBase              = xBase & MinCuQpDeltaSizeMask;
++    const unsigned int yQgBase              = yBase & MinCuQpDeltaSizeMask;
++    const unsigned int min_cb_width         = s->ps.sps->min_cb_width;
++    const unsigned int x_cb                 = xQgBase >> s->ps.sps->log2_min_cb_size;
++    const unsigned int y_cb                 = yQgBase >> s->ps.sps->log2_min_cb_size;
 +    const int qPy_pred = lc->qPy_pred;
 +
-+    return ((!availableA ? qPy_pred : s->qp_y_tab[(x_cb - 1) + y_cb * min_cb_width]) +
-+            (!availableB ? qPy_pred : s->qp_y_tab[x_cb + (y_cb - 1) * min_cb_width]) + 1) >> 1;
++    return (((xQgBase & ctb_size_mask) == 0 ? qPy_pred :
++             s->qp_y_tab[(x_cb - 1) + y_cb * min_cb_width]) +
++            ((yQgBase & ctb_size_mask) == 0 ? qPy_pred :
++             s->qp_y_tab[x_cb + (y_cb - 1) * min_cb_width]) + 1) >> 1;
 +}
 +
 +// * Only called from bitstream decode in foreground
 +//   so should be safe
-+void ff_hevc_rpi_set_qPy(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int xBase, int yBase, int log2_cb_size)
++void ff_hevc_rpi_set_qPy(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int xBase, int yBase)
 +{
-+    const int qp_y = get_qPy_pred(s, lc, xBase, yBase, log2_cb_size);
++    const int qp_y = get_qPy_pred(s, lc, xBase, yBase);
 +
 +    if (lc->tu.cu_qp_delta != 0) {
++        // ?? I suspect that the -bd_offset here leads to us adding it elsewhere
 +        int off = s->ps.sps->qp_bd_offset;
 +        lc->qp_y = FFUMOD(qp_y + lc->tu.cu_qp_delta + 52 + 2 * off,
 +                                 52 + off) - off;
@@ -10014,14 +10153,6 @@ index 0000000000..5ae479dd0b
 +        lc->qp_y = qp_y;
 +}
 +
-+static int get_qPy(const HEVCRpiContext * const s, const int xC, const int yC)
-+{
-+    const int log2_min_cb_size  = s->ps.sps->log2_min_cb_size;
-+    const int x                 = xC >> log2_min_cb_size;
-+    const int y                 = yC >> log2_min_cb_size;
-+    return s->qp_y_tab[x + y * s->ps.sps->min_cb_width];
-+}
-+
 +static inline unsigned int pixel_shift(const HEVCRpiContext * const s, const unsigned int c_idx)
 +{
 +    return c_idx != 0 ? 1 + s->ps.sps->pixel_shift : s->ps.sps->pixel_shift;
@@ -10118,37 +10249,49 @@ index 0000000000..5ae479dd0b
 +}
 +
 +// N.B. Src & dst are swapped as this is a restore!
++// x0 & y0 are in luma coords
++// Width & height are in Y/C pels as appropriate
++// * Clear scope for optimsation here but not used enough to be worth it
 +static void restore_tqb_pixels(const HEVCRpiContext * const s,
 +                               uint8_t *src1, const uint8_t *dst1,
-+                               ptrdiff_t stride_src, ptrdiff_t stride_dst,
-+                               int x0, int y0, int width, int height, int c_idx)
++                               const ptrdiff_t stride_src, const ptrdiff_t stride_dst,
++                               const unsigned int x0, const unsigned int y0,
++                               const unsigned int width, const int height,
++                               const int c_idx)
 +{
-+    if ( s->ps.pps->transquant_bypass_enable_flag ||
-+            (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) {
-+        int x, y;
-+        int min_pu_size  = 1 << s->ps.sps->log2_min_pu_size;
-+        const unsigned int hshift = ctx_hshift(s, c_idx);
-+        const unsigned int vshift = ctx_vshift(s, c_idx);
-+        int x_min        = ((x0         ) >> s->ps.sps->log2_min_pu_size);
-+        int y_min        = ((y0         ) >> s->ps.sps->log2_min_pu_size);
-+        int x_max        = ((x0 + width ) >> s->ps.sps->log2_min_pu_size);
-+        int y_max        = ((y0 + height) >> s->ps.sps->log2_min_pu_size);
-+        const unsigned int sh = pixel_shift(s, c_idx);
-+        int len          = (min_pu_size >> hshift) << sh;
-+        for (y = y_min; y < y_max; y++) {
-+            for (x = x_min; x < x_max; x++) {
-+                if (s->is_pcm[y * s->ps.sps->min_pu_width + x]) {
-+                    int n;
-+                    uint8_t *src = src1 + (((y << s->ps.sps->log2_min_pu_size) - y0) >> vshift) * stride_src + ((((x << s->ps.sps->log2_min_pu_size) - x0) >> hshift) << sh);
-+                    const uint8_t *dst = dst1 + (((y << s->ps.sps->log2_min_pu_size) - y0) >> vshift) * stride_dst + ((((x << s->ps.sps->log2_min_pu_size) - x0) >> hshift) << sh);
-+                    for (n = 0; n < (min_pu_size >> vshift); n++) {
-+                        memcpy(src, dst, len);
-+                        src += stride_src;
-+                        dst += stride_dst;
++    if (s->ps.pps->transquant_bypass_enable_flag ||
++        s->ps.sps->pcm.loop_filter_disable_flag)
++    {
++        const uint8_t *pcm = s->is_pcm + (x0 >> 6) + (y0 >> 3) * s->ps.sps->pcm_width;
++        int blks_y = height >> (c_idx == 0 ? 3 : 2);
++        const unsigned int bwidth = 8 << s->ps.sps->pixel_shift;  // Y & C have the same width in sand
++        const unsigned int bheight = (c_idx == 0) ? 8 : 4;
++        const unsigned int sh = ((x0 >> 3) & 7);
++        const unsigned int mask = (1 << (width >> (c_idx == 0 ? 3 : 2))) - 1;
++
++        do {
++            unsigned int m = (*pcm >> sh) & mask;
++            uint8_t * bd = src1;
++            const uint8_t * bs = dst1;
++            while (m != 0) {
++                if ((m & 1) != 0) {
++                    unsigned int i;
++                    uint8_t * d = bd;
++                    const uint8_t * s = bs;
++                    for (i = 0; i != bheight; ++i) {
++                        memcpy(d, s, bwidth);
++                        d += stride_src;
++                        s += stride_dst;
 +                    }
 +                }
++                m >>= 1;
++                bs += bwidth;
++                bd += bwidth;
 +            }
-+        }
++            src1 += stride_src * bheight;
++            dst1 += stride_dst * bheight;
++            pcm += s->ps.sps->pcm_width;
++        } while (--blks_y > 0);
 +    }
 +}
 +
@@ -10169,7 +10312,7 @@ index 0000000000..5ae479dd0b
 +    int y_ctb                = y >> s->ps.sps->log2_ctb_size;
 +    int ctb_addr_rs          = y_ctb * s->ps.sps->ctb_width + x_ctb;
 +    int ctb_addr_ts          = s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
-+    SAOParams *sao           = &CTB(s->sao, x_ctb, y_ctb);
++    RpiSAOParams *sao           = &CTB(s->sao, x_ctb, y_ctb);
 +    // flags indicating unfilterable edges
 +    uint8_t vert_edge[]      = { 0, 0 };
 +    uint8_t horiz_edge[]     = { 0, 0 };
@@ -10231,9 +10374,9 @@ index 0000000000..5ae479dd0b
 +        const int x0 = x >> hshift;
 +        const int y0 = y >> vshift;
 +        const ptrdiff_t stride_src = frame_stride1(s->frame, c_idx);
-+        int ctb_size_h = (1 << (s->ps.sps->log2_ctb_size)) >> hshift;
-+        int ctb_size_v = (1 << (s->ps.sps->log2_ctb_size)) >> vshift;
-+        int width    = FFMIN(ctb_size_h, (s->ps.sps->width  >> hshift) - x0);
++        const int ctb_size_h = (1 << (s->ps.sps->log2_ctb_size)) >> hshift;
++        const int ctb_size_v = (1 << (s->ps.sps->log2_ctb_size)) >> vshift;
++        const int width    = FFMIN(ctb_size_h, (s->ps.sps->width  >> hshift) - x0);
 +        const int height = FFMIN(ctb_size_v, (s->ps.sps->height >> vshift) - y0);
 +        int tab      = sao_tab[(FFALIGN(width, 8) >> 3) - 1];
 +        ptrdiff_t stride_dst;
@@ -10261,12 +10404,16 @@ index 0000000000..5ae479dd0b
 +            break;
 +        }
 +
++//        if (c_idx == 1)
++//            printf("%d: %dx%d %d,%d: lr=%d\n", c_idx, width, height, x0, y0, wants_lr);
++
 +        switch (sao->type_idx[c_idx]) {
 +        case SAO_BAND:
 +            copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
 +                           x_ctb, y_ctb);
 +            if (s->ps.pps->transquant_bypass_enable_flag ||
-+                (s->ps.sps->pcm.loop_filter_disable_flag && s->ps.sps->pcm_enabled_flag)) {
++                s->ps.sps->pcm.loop_filter_disable_flag)
++            {
 +                // Can't use the edge buffer here as it may be in use by the foreground
 +                DECLARE_ALIGNED(64, uint8_t, dstbuf)
 +                    [2*MAX_PB_SIZE*MAX_PB_SIZE];
@@ -10313,10 +10460,10 @@ index 0000000000..5ae479dd0b
 +            int bottom_edge = edges[3];
 +            // Can't use the edge buffer here as it may be in use by the foreground
 +            DECLARE_ALIGNED(64, uint8_t, dstbuf)
-+                [2*(MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE)*(MAX_PB_SIZE + 2) + 64];
++                [RPI_HEVC_SAO_BUF_STRIDE * (MAX_PB_SIZE + 2) + 64];
 +
-+            stride_dst = 2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE;
-+            dst = dstbuf + stride_dst + AV_INPUT_BUFFER_PADDING_SIZE;
++            stride_dst = RPI_HEVC_SAO_BUF_STRIDE;
++            dst = dstbuf + stride_dst + 32;
 +
 +            if (!top_edge) {
 +                uint8_t *dst1;
@@ -10452,389 +10599,511 @@ index 0000000000..5ae479dd0b
 +#endif
 +}
 +
-+// Returns 2 or 0.
-+static int get_pcm(HEVCRpiContext *s, int x, int y)
++static inline uint32_t pcm4(const HEVCRpiContext * const s, const unsigned int x, const unsigned int y)
 +{
-+    int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
-+    int x_pu, y_pu;
-+
-+    if (x < 0 || y < 0)
-+        return 2;
-+
-+    x_pu = x >> log2_min_pu_size;
-+    y_pu = y >> log2_min_pu_size;
-+
-+    if (x_pu >= s->ps.sps->min_pu_width || y_pu >= s->ps.sps->min_pu_height)
-+        return 2;
-+    return s->is_pcm[y_pu * s->ps.sps->min_pu_width + x_pu];
++    const uint8_t * const pcm = s->is_pcm + (x >> 6) + (y >> 3) * s->ps.sps->pcm_width;
++    return (pcm[0] |
++        (pcm[1] << 8) |
++        (pcm[s->ps.sps->pcm_width] << 16) |
++        (pcm[s->ps.sps->pcm_width + 1] << 24)) >> ((x >> 3) & 7);
 +}
 +
-+#define TC_CALC(qp, bs)                                                 \
-+    tctable[av_clip((qp) + DEFAULT_INTRA_TC_OFFSET * ((bs) - 1) +       \
-+                    (tc_offset & -2),                                   \
-+                    0, MAX_QP + DEFAULT_INTRA_TC_OFFSET)]
-+
-+static void deblocking_filter_CTB(HEVCRpiContext *s, int x0, int y0)
++static inline uint32_t pcm2(const HEVCRpiContext * const s, const unsigned int x, const unsigned int y)
 +{
-+    uint8_t *src;
-+    int x, y;
-+    int beta;
-+    int32_t tc[2];
-+    uint8_t no_p[2] = { 0 };
-+    uint8_t no_q[2] = { 0 };
++    const uint8_t * const pcm = s->is_pcm + (x >> 6) + (y >> 3) * s->ps.sps->pcm_width;
++    return (pcm[0] | (pcm[1] << 8)) >> ((x >> 3) & 7);
++}
 +
-+    int log2_ctb_size = s->ps.sps->log2_ctb_size;
-+    int x_end, x_end2, y_end;
-+    int ctb_size        = 1 << log2_ctb_size;
-+    int ctb             = (x0 >> log2_ctb_size) +
-+                          (y0 >> log2_ctb_size) * s->ps.sps->ctb_width;
-+    int cur_tc_offset   = s->deblock[ctb].tc_offset;
-+    int cur_beta_offset = s->deblock[ctb].beta_offset;
-+    int left_tc_offset, left_beta_offset;
-+    int tc_offset, beta_offset;
-+    int pcmf = (s->ps.sps->pcm_enabled_flag &&
-+                s->ps.sps->pcm.loop_filter_disable_flag) ||
-+               s->ps.pps->transquant_bypass_enable_flag;
++// We sometimes need 17 2-bit entries (annoying!)
++// * This could be avoided if we separate out the H filter left-stub deblock
++//   but 64 bit constant shr shouldn't be too bad - though the variable mask here is probably quite nasty
++static inline uint64_t hbs_get(const HEVCRpiContext * const s, const unsigned int xl, const unsigned int xr, const unsigned int y)
++{
++    unsigned int n = (xr - xl + 7) & ~7;
 +
-+#ifdef DISABLE_DEBLOCK_NONREF
-+    if (!s->used_for_ref)
-+      return; // Don't deblock non-reference frames
-+#endif
-+#ifdef DISABLE_DEBLOCK
-+    return;
-+#endif
-+    if (!s->used_for_ref && s->avctx->skip_loop_filter >= AVDISCARD_NONREF)
-+        return;
-+    if (x0) {
-+        left_tc_offset   = s->deblock[ctb - 1].tc_offset;
-+        left_beta_offset = s->deblock[ctb - 1].beta_offset;
-+    } else {
-+        left_tc_offset   = 0;
-+        left_beta_offset = 0;
-+    }
++    return n == 0 ? (uint64_t)0 :
++        (*(uint64_t *)(s->horizontal_bs + (xl >> 4) + (y >> 3) * s->hbs_stride) >> ((xl >> 1) & 7)) & (((uint64_t)1 << (n >> 1)) - 1);
++}
 +
-+    x_end = x0 + ctb_size;
-+    if (x_end > s->ps.sps->width)
-+        x_end = s->ps.sps->width;
-+    y_end = y0 + ctb_size;
-+    if (y_end > s->ps.sps->height)
-+        y_end = s->ps.sps->height;
++static inline uint64_t vbs_get(const HEVCRpiContext * const s, const unsigned int xl, const unsigned int xr, const unsigned int y)
++{
++    unsigned int n = (xr - xl + 7) & ~7;
 +
-+    tc_offset   = cur_tc_offset;
-+    beta_offset = cur_beta_offset;
++    return n == 0 ? (uint64_t)0 :
++        (*(uint64_t *)(s->vertical_bs2 + (xl >> 4) + (y >> 3) * s->hbs_stride) >> ((xl >> 1) & 7)) & (((uint64_t)1 << (n >> 1)) - 1);
++}
 +
-+    x_end2 = x_end;
-+    if (x_end2 != s->ps.sps->width)
-+        x_end2 -= 8;
-+    for (y = y0; y < y_end; y += 8) {
-+        // vertical filtering luma
-+        for (x = x0 ? x0 : 8; x < x_end; x += 8) {
-+            const int bs0 = s->vertical_bs[(x +  y      * s->bs_width) >> 2];
-+            const int bs1 = s->vertical_bs[(x + (y + 4) * s->bs_width) >> 2];
-+            if (bs0 || bs1) {
-+                const int qp = (get_qPy(s, x - 1, y)     + get_qPy(s, x, y)     + 1) >> 1;
 +
-+                beta = betatable[av_clip(qp + beta_offset, 0, MAX_QP)];
++static void deblock_y_blk(const HEVCRpiContext * const s, const RpiBlk bounds, const int end_x, const int end_y)
++{
++    const unsigned int log2_ctb_size = s->ps.sps->log2_ctb_size;
++    const unsigned int log2_min_cb_size  = s->ps.sps->log2_min_cb_size;
++    const unsigned int ctb_size = (1 << log2_ctb_size);
++    const unsigned int cb_r = FFMIN(bounds.x + bounds.w, s->ps.sps->width) - (end_x ? 0 :  1);
++    const unsigned int ctb_n = (bounds.x + bounds.y * s->ps.sps->ctb_width) >> log2_ctb_size;
++    const DBParams * cb_dbp = s->deblock + ctb_n;
++    const unsigned int b_b = FFMIN(bounds.y + bounds.h, s->ps.sps->height) - (end_y ? 0 : 8);
 +
-+                tc[0]   = bs0 ? TC_CALC(qp, bs0) : 0;
-+                tc[1]   = bs1 ? TC_CALC(qp, bs1) : 0;
-+                if (pcmf) {
-+                    no_p[0] = get_pcm(s, x - 1, y);
-+                    no_p[1] = get_pcm(s, x - 1, y + 4);
-+                    no_q[0] = get_pcm(s, x, y);
-+                    no_q[1] = get_pcm(s, x, y + 4);
-+                }
++    unsigned int cb_x;
 +
-+                // This copes properly with no_p/no_q
-+                s->hevcdsp.hevc_v_loop_filter_luma2(av_rpi_sand_frame_pos_y(s->frame, x, y),
-+                                                 frame_stride1(s->frame, LUMA),
-+                                                 beta, tc, no_p, no_q,
-+                                                 av_rpi_sand_frame_pos_y(s->frame, x - 4, y));
-+                // *** VPU deblock lost here
-+            }
-+        }
++    // Do in CTB-shaped blocks
++    for (cb_x = bounds.x; cb_x < cb_r; cb_x += ctb_size, ++cb_dbp)
++    {
++        const unsigned int bv_r = FFMIN(cb_x + ctb_size, cb_r);
++        const unsigned int bv_l = FFMAX(cb_x, 8);
++        const unsigned int bh_r = cb_x + ctb_size >= cb_r ? cb_r - 8 : cb_x + ctb_size - 9;
++        const unsigned int bh_l = bv_l - 8;
++        unsigned int y;
 +
-+        if(!y)
-+             continue;
++        // Main body
++        for (y = (bounds.y == 0 ? 0 : bounds.y - 8); y < b_b; y += 8)
++        {
++            const DBParams * const dbp = y < bounds.y ? cb_dbp - s->ps.sps->ctb_width : cb_dbp;
++            const int8_t * const qta = s->qp_y_tab + ((y - 1) >> log2_min_cb_size) * s->ps.sps->min_cb_width;
++            const int8_t * const qtb = s->qp_y_tab + (y >> log2_min_cb_size) * s->ps.sps->min_cb_width;
 +
-+        // horizontal filtering luma
-+        for (x = x0 ? x0 - 8 : 0; x < x_end2; x += 8) {
-+            const int bs0 = s->horizontal_bs[( x      + y * s->bs_width) >> 2];
-+            const int bs1 = s->horizontal_bs[((x + 4) + y * s->bs_width) >> 2];
-+            if (bs0 || bs1) {
-+                const int qp = (get_qPy(s, x, y - 1)     + get_qPy(s, x, y)     + 1) >> 1;
++            {
++                const uint8_t * const tcv = tctable + dbp->tc_offset;
++                const uint8_t * const betav = betatable + dbp->beta_offset;
++                unsigned int pcmfa = pcm2(s, bv_l - 1, y);
++//                const uint8_t * vbs = s->vertical_bs + (bv_l >> 3) * s->bs_height + (y >> 2);
++                uint64_t vbs2 = vbs_get(s, bv_l, bv_r, y);
++                unsigned int x;
 +
-+                tc_offset   = x >= x0 ? cur_tc_offset : left_tc_offset;
-+                beta_offset = x >= x0 ? cur_beta_offset : left_beta_offset;
++                for (x = bv_l; x < bv_r; x += 8)
++                {
++                    const unsigned int pcmf_v = pcmfa & 3;
++                    const unsigned int bs0 = vbs2 & 3;
++                    const unsigned int bs1 = (vbs2 & 0xc) >> 2;
 +
-+                beta = betatable[av_clip(qp + beta_offset, 0, MAX_QP)];
-+                tc[0]   = bs0 ? TC_CALC(qp, bs0) : 0;
-+                tc[1]   = bs1 ? TC_CALC(qp, bs1) : 0;
-+                src = av_rpi_sand_frame_pos_y(s->frame, x, y);
-+
-+                if (pcmf) {
-+                    no_p[0] = get_pcm(s, x, y - 1);
-+                    no_p[1] = get_pcm(s, x + 4, y - 1);
-+                    no_q[0] = get_pcm(s, x, y);
-+                    no_q[1] = get_pcm(s, x + 4, y);
-+                    s->hevcdsp.hevc_h_loop_filter_luma_c(src,
++                    if ((bs0 | bs1) != 0 && pcmf_v != 3)
++                    {
++                        const int qp = (qtb[(x - 1) >> log2_min_cb_size] + qtb[x >> log2_min_cb_size] + 1) >> 1;
++                        s->hevcdsp.hevc_v_loop_filter_luma2(av_rpi_sand_frame_pos_y(s->frame, x, y),
 +                                                         frame_stride1(s->frame, LUMA),
-+                                                         beta, tc, no_p, no_q);
-+                } else
-+#ifdef RPI_DEBLOCK_VPU
-+                if (s->enable_rpi_deblock) {
-+                    uint8_t (*setup)[2][2][4];
-+                    int num16 = (y>>4)*s->setup_width + (x>>4);
-+                    int a = ((x>>3) & 1) << 1;
-+                    int b = (y>>3) & 1;
-+                    setup = s->dvq->y_setup_arm[num16];
-+                    setup[1][b][0][a] = beta;
-+                    setup[1][b][0][a + 1] = beta;
-+                    setup[1][b][1][a] = tc[0];
-+                    setup[1][b][1][a + 1] = tc[1];
-+                } else
-+#endif
-+                    s->hevcdsp.hevc_h_loop_filter_luma(src,
-+                                                       frame_stride1(s->frame, LUMA),
-+                                                       beta, tc, no_p, no_q);
-+            }
-+        }
-+    }
-+
-+    if (ctx_cfmt(s) != 0) {
-+        const int v = 2;
-+        const int h = 2;
-+
-+        // vertical filtering chroma
-+        for (y = y0; y < y_end; y += 8 * v) {
-+//                const int demi_y = y + 4 * v >= s->ps.sps->height;
-+            const int demi_y = 0;
-+            for (x = x0 ? x0 : 8 * h; x < x_end; x += 8 * h) {
-+                const int bs0 = s->vertical_bs[(x +  y          * s->bs_width) >> 2];
-+                const int bs1 = s->vertical_bs[(x + (y + 4 * v) * s->bs_width) >> 2];
-+
-+                if ((bs0 == 2) || (bs1 == 2)) {
-+                    const int qp0 = (get_qPy(s, x - 1, y)         + get_qPy(s, x, y)         + 1) >> 1;
-+                    const int qp1 = (get_qPy(s, x - 1, y + 4 * v) + get_qPy(s, x, y + 4 * v) + 1) >> 1;
-+                    unsigned int no_f = !demi_y ? 0 : 2 | 8;
-+
-+                    // tc_offset here should be set to cur_tc_offset I think
-+                    const uint32_t tc4 =
-+                        ((bs0 != 2) ? 0 : chroma_tc(s, qp0, 1, cur_tc_offset) | (chroma_tc(s, qp0, 2, cur_tc_offset) << 16)) |
-+                        ((bs1 != 2) ? 0 : ((chroma_tc(s, qp1, 1, cur_tc_offset) | (chroma_tc(s, qp1, 2, cur_tc_offset) << 16)) << 8));
-+
-+                    if (tc4 == 0)
-+                        continue;
-+
-+                    if (pcmf) {
-+                        no_f =
-+                            (get_pcm(s, x - 1, y) ? 1 : 0) |
-+                            (get_pcm(s, x - 1, y + 4 * v) ? 2 : 0) |
-+                            (get_pcm(s, x, y) ? 4 : 0) |
-+                            (get_pcm(s, x, y + 4 * v) ? 8 : 0);
-+                        if (no_f == 0xf)
-+                            continue;
++                                                         betav[qp],
++                                                         (bs0 == 0 ? 0 : tcv[qp + (int)(bs0 & 2)]) |
++                                                          ((bs1 == 0 ? 0 : tcv[qp + (int)(bs1 & 2)]) << 16),
++                                                         pcmf_v,
++                                                         av_rpi_sand_frame_pos_y(s->frame, x - 4, y));
 +                    }
 +
++                    pcmfa >>= 1;
++//                    vbs += s->bs_height;
++                    vbs2 >>= 4;
++                }
++            }
++
++            if (y != 0)
++            {
++                unsigned int x;
++                unsigned int pcmfa = pcm4(s, bh_l, y - 1);
++                uint64_t hbs = hbs_get(s, bh_l, bh_r + 1, y);  // Will give (x <= bh_r) in for loop
++
++                for (x = bh_l; hbs != 0; x += 8, hbs >>= 4)
++                {
++                    const unsigned int pcmf_h = (pcmfa & 1) | ((pcmfa & 0x10000) >> 15);
++                    const unsigned int bs0 = hbs & 3;
++                    const unsigned int bs1 = (hbs >> 2) & 3;
++
++                    if ((bs0 | bs1) != 0 && pcmf_h != 3)
++                    {
++                        const int qp = (qta[x >> log2_min_cb_size] + qtb[x >> log2_min_cb_size] + 1) >> 1;
++                        const DBParams * const dbph = (x < cb_x ? dbp - 1 : dbp);
++                        const uint8_t * const tc = tctable + dbph->tc_offset + qp;
++                        s->hevcdsp.hevc_h_loop_filter_luma2(av_rpi_sand_frame_pos_y(s->frame, x, y),
++                                                             frame_stride1(s->frame, LUMA),
++                                                             betatable[qp + dbph->beta_offset],
++                                                             (bs0 == 0 ? 0 : tc[bs0 & 2]) |
++                                                                ((bs1 == 0 ? 0 : tc[bs1 & 2]) << 16),
++                                                             pcmf_h);
++                    }
++
++                    pcmfa >>= 1;
++                }
++            }
++
++        }
++    }
++}
++
++#define TL 1
++#define TR 2
++#define BL 4
++#define BR 8
++
++static av_always_inline int q2h(const HEVCRpiContext * const s, const unsigned int x, const unsigned int y)
++{
++    const unsigned int log2_min_cb_size  = s->ps.sps->log2_min_cb_size;
++    const int8_t * const qt = s->qp_y_tab + (y >> log2_min_cb_size) * s->ps.sps->min_cb_width;
++    return (qt[(x - 1) >> log2_min_cb_size] + qt[x >> log2_min_cb_size] + 1) >> 1;
++}
++
++static void deblock_uv_blk(const HEVCRpiContext * const s, const RpiBlk bounds, const int end_x, const int end_y)
++{
++    const unsigned int log2_ctb_size = s->ps.sps->log2_ctb_size;
++    const unsigned int log2_min_cb_size  = s->ps.sps->log2_min_cb_size;
++    const unsigned int ctb_size = (1 << log2_ctb_size);
++    const unsigned int cb_r = FFMIN(bounds.x + bounds.w, s->ps.sps->width) - (end_x ? 0 :  8);
++    const unsigned int ctb_n = (bounds.x + bounds.y * s->ps.sps->ctb_width) >> log2_ctb_size;
++    const DBParams * dbp = s->deblock + ctb_n;
++    const unsigned int b_b = FFMIN(bounds.y + bounds.h, s->ps.sps->height) - (end_y ? 0 : 8);
++    const uint8_t * const tcq_u = s->ps.pps->qp_dblk_x[1];
++    const uint8_t * const tcq_v = s->ps.pps->qp_dblk_x[2];
++
++    unsigned int cb_x;
++
++    av_assert1((bounds.x & (ctb_size - 1)) == 0);
++    av_assert1((bounds.y & (ctb_size - 1)) == 0);
++    av_assert1(bounds.h <= ctb_size);
++
++    // Do in CTB-shaped blocks
++    for (cb_x = bounds.x; cb_x < cb_r; cb_x += ctb_size, ++dbp) {
++        const unsigned int bv_r = FFMIN(cb_x + ctb_size, cb_r);
++        const unsigned int bv_l = FFMAX(cb_x, 16);
++        unsigned int y;
++
++        // V above
++        if (bounds.y != 0) {
++            // Deblock V up 8
++            // CTB above current
++            // Top-half only (tc4 & ~0xffff == 0) is special cased in asm
++            unsigned int x;
++            const unsigned int y = bounds.y - 8;
++
++            unsigned int pcmfa = pcm2(s, bv_l - 1, y);
++            const uint8_t * const tc = tctable + 2 + (dbp - s->ps.sps->ctb_width)->tc_offset;
++            uint64_t vbs2 = (vbs_get(s, bv_l, bv_r, y) & 0x0202020202020202U);
++
++            for (x = bv_l; x < bv_r; x += 16, vbs2 >>= 8)
++            {
++                const unsigned int pcmf_v = (pcmfa & 3);
++                if ((vbs2 & 2) != 0 && pcmf_v != 3)
++                {
++                    const int qp0 = q2h(s, x, y);
 +                    s->hevcdsp.hevc_v_loop_filter_uv2(av_rpi_sand_frame_pos_c(s->frame, x >> 1, y >> 1),
 +                                                   frame_stride1(s->frame, 1),
-+                                                   tc4,
++                                                   tc[tcq_u[qp0]] | (tc[tcq_v[qp0]] << 8),
 +                                                   av_rpi_sand_frame_pos_c(s->frame, (x >> 1) - 2, y >> 1),
-+                                                   no_f);
++                                                   pcmf_v);
 +                }
++                pcmfa >>= 2;
 +            }
++        }
 +
-+            if (y == 0)
-+                continue;
++        for (y = bounds.y; y < b_b; y += 16)
++        {
++            // V
++            {
++                unsigned int x;
++                unsigned int pcmfa = pcm4(s, bv_l - 1, y);
++                const unsigned int pcmf_or = (y + 16 <= b_b) ? 0 : BL | BR;
++                const uint8_t * const tc = tctable + 2 + dbp->tc_offset;
++                uint64_t vbs2 = (vbs_get(s, bv_l, bv_r, y) & 0x0202020202020202U) |
++                                 ((vbs_get(s, bv_l, bv_r, y + 8) & 0x0202020202020202U) << 4);
 +
-+            // horizontal filtering chroma
-+            tc_offset = x0 ? left_tc_offset : cur_tc_offset;
-+            x_end2 = x_end;
-+            if (x_end != s->ps.sps->width)
-+                x_end2 = x_end - 8 * h;
++                for (x = bv_l; x < bv_r; x += 16, vbs2 >>= 8)
++                {
++                    const unsigned int pcmf_v = pcmf_or | (pcmfa & 3) | ((pcmfa >> 14) & 0xc);
++                    const unsigned int bs0 = (~pcmf_v & (TL | TR)) == 0 ? 0 : vbs2 & 2;
++                    const unsigned int bs1 = (~pcmf_v & (BL | BR)) == 0 ? 0 : (vbs2 & 0x20) >> 4;
 +
-+            for (x = x0 ? x0 - 8 * h: 0; x < x_end2; x += 8 * h) {
-+//                    const int demi_x = x + 4 * v >= s->ps.sps->width;
-+                const int demi_x = 0;
-+
-+                const int bs0 = s->horizontal_bs[( x          + y * s->bs_width) >> 2];
-+                const int bs1 = s->horizontal_bs[((x + 4 * h) + y * s->bs_width) >> 2];
-+                if ((bs0 == 2) || (bs1 == 2)) {
-+                    const int qp0 = bs0 == 2 ? (get_qPy(s, x,         y - 1) + get_qPy(s, x,         y) + 1) >> 1 : 0;
-+                    const int qp1 = bs1 == 2 ? (get_qPy(s, x + 4 * h, y - 1) + get_qPy(s, x + 4 * h, y) + 1) >> 1 : 0;
-+                    const uint32_t tc4 =
-+                        ((bs0 != 2) ? 0 : chroma_tc(s, qp0, 1, tc_offset) | (chroma_tc(s, qp0, 2, tc_offset) << 16)) |
-+                        ((bs1 != 2) ? 0 : ((chroma_tc(s, qp1, 1, cur_tc_offset) | (chroma_tc(s, qp1, 2, cur_tc_offset) << 16)) << 8));
-+                    unsigned int no_f = !demi_x ? 0 : 2 | 8;
-+
-+                    if (tc4 == 0)
-+                        continue;
-+
-+                    if (pcmf) {
-+                        no_f =
-+                            (get_pcm(s, x,         y - 1) ? 1 : 0) |
-+                            (get_pcm(s, x + 4 * h, y - 1) ? 2 : 0) |
-+                            (get_pcm(s, x,         y)     ? 4 : 0) |
-+                            (get_pcm(s, x + 4 * h, y)     ? 8 : 0);
-+
-+                        if (no_f == 0xf)
-+                            continue;
++                    if ((bs0 | bs1) != 0)
++                    {
++                        const int qp0 = q2h(s, x, y);
++                        const int qp1 = q2h(s, x, y + 8);
++                        s->hevcdsp.hevc_v_loop_filter_uv2(av_rpi_sand_frame_pos_c(s->frame, x >> 1, y >> 1),
++                            frame_stride1(s->frame, 1),
++                            ((bs0 == 0) ? 0 : (tc[tcq_u[qp0]] << 0) | (tc[tcq_v[qp0]] << 8)) |
++                                ((bs1 == 0) ? 0 : (tc[tcq_u[qp1]] << 16) | (tc[tcq_v[qp1]] << 24)),
++                            av_rpi_sand_frame_pos_c(s->frame, (x >> 1) - 2, y >> 1),
++                            pcmf_v);
 +                    }
 +
-+                    s->hevcdsp.hevc_h_loop_filter_uv(av_rpi_sand_frame_pos_c(s->frame, x >> 1, y >> 1),
-+                                                     frame_stride1(s->frame, LUMA),
-+                                                     tc4, no_f);
++                    pcmfa >>= 2;
++                }
++            }
++
++            // H
++            if (y != 0)
++            {
++                unsigned int x;
++                const unsigned int bh_r = cb_x + ctb_size >= cb_r ? cb_r : cb_x + ctb_size - 16;
++                const unsigned int bh_l = bv_l - 16;
++                unsigned int pcmfa = pcm4(s, bh_l, y - 1);
++                uint64_t hbs = hbs_get(s, bh_l, bh_r, y) & 0x2222222222222222U;
++                const int8_t * const qta = s->qp_y_tab + ((y - 1) >> log2_min_cb_size) * s->ps.sps->min_cb_width;
++                const int8_t * const qtb = s->qp_y_tab + (y >> log2_min_cb_size) * s->ps.sps->min_cb_width;
++
++                // Chop off bits we don't want...
++                if (bh_l < bounds.x) {
++                    pcmfa |= 0x10001; // TL|BL pre rearrangement
++                    hbs &= ~(uint64_t)3;  // Make BS 0
++                }
++
++                for (x = bh_l; hbs != 0; x += 16, hbs >>= 8)
++                {
++                    const unsigned int pcmf_h = (x + 16 > bh_r ? TR | BR : 0) |
++                        (pcmfa & 3) | ((pcmfa >> 14) & 0xc);
++                    const int bs0 = hbs & 2;
++                    const int bs1 = (~pcmf_h & (TR | BR)) == 0 ? 0 : (hbs >> 4) & 2;
++                    if ((bs0 | bs1) != 0)
++                    {
++                        const int qp0 = (qta[x >> log2_min_cb_size] + qtb[x >> log2_min_cb_size] + 1) >> 1;
++                        const int qp1 = (qta[(x + 8) >> log2_min_cb_size] + qtb[(x + 8) >> log2_min_cb_size] + 1) >> 1;
++                        const uint8_t * const tc = tctable + 2 + (x < cb_x ? dbp - 1 : dbp)->tc_offset;
++
++                        s->hevcdsp.hevc_h_loop_filter_uv(av_rpi_sand_frame_pos_c(s->frame, x >> 1, y >> 1),
++                            frame_stride1(s->frame, 1),
++                            ((bs0 == 0) ? 0 : (tc[tcq_u[qp0]] << 0) | (tc[tcq_v[qp0]] << 8)) |
++                                ((bs1 == 0) ? 0 : (tc[tcq_u[qp1]] << 16) | (tc[tcq_v[qp1]] << 24)),
++                            pcmf_h);
++                    }
++                    pcmfa >>= 2;
 +                }
 +            }
-+            // **** VPU deblock code gone from here....
 +        }
 +    }
 +}
 +
-+
-+void ff_hevc_rpi_deblocking_boundary_strengths(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int x0, int y0,
-+                                           int log2_trafo_size)
++static inline unsigned int off_boundary(const unsigned int x, const unsigned int log2_n)
 +{
-+    MvField *tab_mvf     = s->ref->tab_mvf;
-+    int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
-+    int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
-+    int min_pu_width     = s->ps.sps->min_pu_width;
-+    int min_tu_width     = s->ps.sps->min_tb_width;
-+    int boundary_upper, boundary_left;
-+    int i, j;
-+    const RefPicList *rpl = s->ref->refPicList;
-+    const unsigned int log2_dup = FFMIN(log2_min_pu_size, log2_trafo_size);
-+    const unsigned int min_pu_in_4pix = 1 << (log2_dup - 2);  // Dup
-+    const unsigned int trafo_in_min_pus = 1 << (log2_trafo_size - log2_dup); // Rep
-+    int y_pu             = y0 >> log2_min_pu_size;
-+    int x_pu             = x0 >> log2_min_pu_size;
-+    MvField *curr        = &tab_mvf[y_pu * min_pu_width + x_pu];
-+    int is_intra         = curr->pred_flag == PF_INTRA;
-+    int inc              = log2_min_pu_size == 2 ? 2 : 1;
-+    uint8_t *bs;
++    return x & ~(~0U << log2_n);
++}
++
++static inline void set_bs_h(const HEVCRpiContext * const s, const unsigned int x, const unsigned int y, const uint32_t mask, uint32_t bsf)
++{
++    av_assert2((y & 7) == 0);
++
++    // This doesn't have the same simultainious update issues that bsf_stash
++    // does (other threads will have a different y) so we can do it the easy way
++    if ((bsf &= mask) != 0)
++        *(uint32_t *)(s->horizontal_bs + ((x >> 4) & ~3) + (y >> 3) * s->hbs_stride) |= bsf << ((x >> 1) & 31);
++}
++
++
++static void set_bs_v(const HEVCRpiContext * const s, const unsigned int x, const unsigned int y, const uint32_t mask, uint32_t bsf)
++{
++    // We arrange this in a slightly odd fashion but it lines up with
++    // how we are going to use it in the actual deblock code & it is easier
++    // to do the contortions here than there
++    //
++    // Arrange (LE) {x0y0, x0y4, x8y0, x8,y4}, {x16y0, x16y4, x24y0, x24y4},...
++
++    av_assert2((x & 7) == 0);
++
++    if ((bsf &= mask) != 0)
++    {
++        const unsigned int stride1 = s->hbs_stride;
++        uint8_t *p = s->vertical_bs2 + (x >> 4) + (y >> 3) * stride1;
++        const unsigned int sh = ((x & 8) | (y & 4)) >> 1;
++
++        if (mask <= 0xf)
++        {
++            *p |= (bsf << sh);
++        }
++        else
++        {
++            do {
++                *p |= (bsf & 0xf) << sh;
++                p += stride1;
++            } while ((bsf >>= 4) != 0);
++        }
++    }
++}
++
++static inline uint32_t bsf_mv(const HEVCRpiContext * const s,
++                              const unsigned int rep, const unsigned int dup,
++                              const unsigned int mvf_stride,
++                              const RefPicList * const rpl_p, const RefPicList * const rpl_q,
++                              const MvField * const mvf_p, const MvField * const mvf_q)
++{
++    uint8_t res[16];
++    unsigned int i;
++    unsigned int a = 0;
++
++    s->hevcdsp.hevc_deblocking_boundary_strengths(rep, dup,
++            sizeof(MvField) * mvf_stride, 1,
++            rpl_p[0].list, rpl_p[1].list, rpl_q[0].list, rpl_q[1].list,
++            mvf_p, mvf_q, res);
++
++    for (i = 0; i != rep * dup; ++i) {
++        a |= res[i] << (i * 2);
++    }
++    return a;
++}
++
++
++void ff_hevc_rpi_deblocking_boundary_strengths(const HEVCRpiContext * const s,
++                                               const HEVCRpiLocalContext * const lc,
++                                               const unsigned int x0, const unsigned int y0,
++                                               const unsigned int log2_trafo_size,
++                                               const int is_coded_block)
++{
++    const MvField * const tab_mvf       = s->ref->tab_mvf;
++    const unsigned int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
++    const unsigned int mvf_stride       = s->ps.sps->min_pu_width;  // width in pus; mvf stride
++    const RefPicList * const rpl        = s->ref->refPicList;
++    // Rep count for bsf_mv when running with min_pu chuncks
++    const unsigned int log2_rep_min_pu  = log2_trafo_size <= log2_min_pu_size ? 0 : log2_trafo_size - log2_min_pu_size;
++    const MvField * const mvf_curr      = tab_mvf + (y0 >> log2_min_pu_size) * mvf_stride + (x0 >> log2_min_pu_size);
++    const unsigned int boundary_flags   = s->sh.no_dblk_boundary_flags & lc->boundary_flags;
++    const unsigned int trafo_size       = (1U << log2_trafo_size);
++    const uint32_t bsf_mask             = log2_trafo_size > 5 ? ~0U : (1U << (trafo_size >> 1)) - 1;
++    const uint32_t bsf_cbf              = (bsf_mask & 0x55555555);
++
++    // Do we cover a pred split line?
++    const int has_x_split = x0 < lc->cu.x_split && x0 + trafo_size > lc->cu.x_split;
++    const int has_y_split = y0 < lc->cu.y_split && y0 + trafo_size > lc->cu.y_split;
++
++    uint32_t bsf_h;
++    uint32_t bsf_v;
 +
 +#ifdef DISABLE_STRENGTHS
 +    return;
 +#endif
 +
-+    boundary_upper = y0 > 0 && !(y0 & 7);
-+    if (boundary_upper &&
-+        ((!s->sh.slice_loop_filter_across_slices_enabled_flag &&
-+          lc->boundary_flags & BOUNDARY_UPPER_SLICE &&
-+          (y0 % (1 << s->ps.sps->log2_ctb_size)) == 0) ||
-+         (!s->ps.pps->loop_filter_across_tiles_enabled_flag &&
-+          lc->boundary_flags & BOUNDARY_UPPER_TILE &&
-+          (y0 % (1 << s->ps.sps->log2_ctb_size)) == 0)))
-+        boundary_upper = 0;
++    // We are always on a size boundary
++    av_assert2((x0 & (trafo_size - 1)) == 0);
++    av_assert2((y0 & (trafo_size - 1)) == 0);
++    // log2_trafo_size not really a transform size; we can have to deal
++    // with size 2^6 blocks
++    av_assert2(log2_trafo_size >= 2 && log2_trafo_size <= 6);
 +
-+    bs = &s->horizontal_bs[(x0 + y0 * s->bs_width) >> 2];
++    // Retrieve and update coded (b0), intra (b1) bs flags
++    //
++    // Store on min width (rather than uint32_t) to avoid possible issues
++    // with another thread on another core running wpp using the same
++    // memory (min CTB = 16 pels = 4 bsf els = 8 bits)
++    //
++    // In bsf BS=2 is represented by 3 as it is much easier to test & set
++    // and the actual deblock code tests for 0 and b1 set/not-set so 2 and
++    // 3 will work the same
++    {
++        // Given where we are called from is_cbf_luma & is_intra will be constant over the block
++        const uint32_t bsf0 =  (lc->cu.pred_mode == MODE_INTRA) ? bsf_mask : is_coded_block ? bsf_cbf : 0;
++        uint8_t *const p = s->bsf_stash_up + (x0 >> 4);
++        uint8_t *const q = s->bsf_stash_left + (y0 >> 4);
 +
-+    if (boundary_upper) {
-+        const RefPicList *const rpl_top = (lc->boundary_flags & BOUNDARY_UPPER_SLICE) ?
-+                              ff_hevc_rpi_get_ref_list(s, s->ref, x0, y0 - 1) :
-+                              rpl;
-+        MvField *top = curr - min_pu_width;
-+
-+        if (is_intra) {
-+            for (i = 0; i < (1 << log2_trafo_size); i += 4)
-+                bs[i >> 2] = 2;
-+
-+        } else {
-+            int y_tu = y0 >> log2_min_tu_size;
-+            int x_tu = x0 >> log2_min_tu_size;
-+            uint8_t *curr_cbf_luma = &s->cbf_luma[y_tu * min_tu_width + x_tu];
-+            uint8_t *top_cbf_luma = curr_cbf_luma - min_tu_width;
-+
-+            s->hevcdsp.hevc_deblocking_boundary_strengths(trafo_in_min_pus,
-+                    min_pu_in_4pix, sizeof (MvField), 4 >> 2,
-+                    rpl[0].list, rpl[1].list, rpl_top[0].list, rpl_top[1].list,
-+                    curr, top, bs);
-+
-+            for (i = 0; i < (1 << log2_trafo_size); i += 4) {
-+                int i_pu = i >> log2_min_pu_size;
-+                int i_tu = i >> log2_min_tu_size;
-+
-+                if (top[i_pu].pred_flag == PF_INTRA)
-+                    bs[i >> 2] = 2;
-+                else if (curr_cbf_luma[i_tu] || top_cbf_luma[i_tu])
-+                    bs[i >> 2] = 1;
++        switch (log2_trafo_size)
++        {
++            case 2:
++            case 3:
++            {
++                const unsigned int sh_h = (x0 >> 1) & 7;
++                const unsigned int sh_v = (y0 >> 1) & 7;
++                bsf_h = *p;
++                bsf_v = *q;
++                *p = (bsf_h & ~(bsf_mask << sh_h)) | (bsf0 << sh_h);
++                *q = (bsf_v & ~(bsf_mask << sh_v)) | (bsf0 << sh_v);
++                bsf_h >>= sh_h;
++                bsf_v >>= sh_v;
++                break;
 +            }
++            case 4:
++                bsf_h = *p;
++                bsf_v = *q;
++                *p = bsf0;
++                *q = bsf0;
++                break;
++            case 5:
++                bsf_h = *(uint16_t *)p;
++                bsf_v = *(uint16_t *)q;
++                *(uint16_t *)p = bsf0;
++                *(uint16_t *)q = bsf0;
++                break;
++            case 6:
++            default:
++                bsf_h = *(uint32_t *)p;
++                bsf_v = *(uint32_t *)q;
++                *(uint32_t *)p = bsf0;
++                *(uint32_t *)q = bsf0;
++                break;
 +        }
++
++        bsf_h |= bsf0;
++        bsf_v |= bsf0;
 +    }
 +
-+    if (!is_intra) {
-+        for (j = inc; j < trafo_in_min_pus; j += inc) {
-+            MvField *top;
++    // Do Horizontal
++    if ((y0 & 7) == 0)
++    {
++        // Boundary upper
++        if (y0 != 0 &&
++            (off_boundary(y0, s->ps.sps->log2_ctb_size) ||
++             (boundary_flags & (BOUNDARY_UPPER_SLICE | BOUNDARY_UPPER_TILE)) == 0))
++        {
++            // Look at MVs (BS=1) if we don't already has a full set of bs bits
++            if ((~bsf_h & bsf_cbf) != 0 && (y0 == lc->cu.y || y0 == lc->cu.y_split))
++            {
++                // If we aren't on the top boundary we must be in the middle
++                // and in that case we know where mvf can change
++                const unsigned int log2_rep = (y0 == lc->cu.y) ? log2_rep_min_pu : has_x_split ? 1 : 0;
++                const RefPicList *const rpl_top = (lc->boundary_flags & BOUNDARY_UPPER_SLICE) ?
++                                      ff_hevc_rpi_get_ref_list(s, s->ref, x0, y0 - 1) :
++                                      rpl;
 +
-+            curr += min_pu_width * inc;
-+            top = curr - min_pu_width;
-+            bs += s->bs_width * inc << log2_min_pu_size >> 2;
-+
-+            s->hevcdsp.hevc_deblocking_boundary_strengths(trafo_in_min_pus,
-+                    min_pu_in_4pix, sizeof (MvField), 4 >> 2,
-+                    rpl[0].list, rpl[1].list, rpl[0].list, rpl[1].list,
-+                    curr, top, bs);
-+        }
-+    }
-+
-+    boundary_left = x0 > 0 && !(x0 & 7);
-+    if (boundary_left &&
-+        ((!s->sh.slice_loop_filter_across_slices_enabled_flag &&
-+          lc->boundary_flags & BOUNDARY_LEFT_SLICE &&
-+          (x0 % (1 << s->ps.sps->log2_ctb_size)) == 0) ||
-+         (!s->ps.pps->loop_filter_across_tiles_enabled_flag &&
-+          lc->boundary_flags & BOUNDARY_LEFT_TILE &&
-+          (x0 % (1 << s->ps.sps->log2_ctb_size)) == 0)))
-+        boundary_left = 0;
-+
-+    curr = &tab_mvf[y_pu * min_pu_width + x_pu];
-+    bs = &s->vertical_bs[(x0 + y0 * s->bs_width) >> 2];
-+
-+    if (boundary_left) {
-+        const RefPicList *rpl_left = (lc->boundary_flags & BOUNDARY_LEFT_SLICE) ?
-+                               ff_hevc_rpi_get_ref_list(s, s->ref, x0 - 1, y0) :
-+                               rpl;
-+        MvField *left = curr - 1;
-+
-+        if (is_intra) {
-+            for (j = 0; j < (1 << log2_trafo_size); j += 4)
-+                bs[j * s->bs_width >> 2] = 2;
-+
-+        } else {
-+            int y_tu = y0 >> log2_min_tu_size;
-+            int x_tu = x0 >> log2_min_tu_size;
-+            uint8_t *curr_cbf_luma = &s->cbf_luma[y_tu * min_tu_width + x_tu];
-+            uint8_t *left_cbf_luma = curr_cbf_luma - 1;
-+
-+            s->hevcdsp.hevc_deblocking_boundary_strengths(trafo_in_min_pus,
-+                    min_pu_in_4pix, min_pu_width * sizeof (MvField), 4 * s->bs_width >> 2,
-+                    rpl[0].list, rpl[1].list, rpl_left[0].list, rpl_left[1].list,
-+                    curr, left, bs);
-+
-+            for (j = 0; j < (1 << log2_trafo_size); j += 4) {
-+                int j_pu = j >> log2_min_pu_size;
-+                int j_tu = j >> log2_min_tu_size;
-+
-+                if (left[j_pu * min_pu_width].pred_flag == PF_INTRA)
-+                    bs[j * s->bs_width >> 2] = 2;
-+                else if (curr_cbf_luma[j_tu * min_tu_width] || left_cbf_luma[j_tu * min_tu_width])
-+                    bs[j * s->bs_width >> 2] = 1;
++                bsf_h |= bsf_mv(s, 1 << log2_rep, trafo_size >> (2 + log2_rep),
++                    trafo_size >> (log2_min_pu_size + log2_rep),
++                    rpl, rpl_top,
++                    mvf_curr, mvf_curr - mvf_stride);
 +            }
++
++            // Finally put the results into bs
++            set_bs_h(s, x0, y0, bsf_mask, bsf_h);
++        }
++
++        // Max of 1 pu internal split - ignore if not on 8pel boundary
++        if (has_y_split && !off_boundary(lc->cu.y_split, 3))
++        {
++            const MvField * const mvf = tab_mvf +
++                (lc->cu.y_split >> log2_min_pu_size) * mvf_stride + (x0 >> log2_min_pu_size);
++            // If we have the x split as well then it must be in the middle
++            const unsigned int log2_rep = has_x_split ? 1 : 0;
++
++            set_bs_h(s, x0, lc->cu.y_split, bsf_mask,
++                bsf_mv(s, 1 << log2_rep, trafo_size >> (2 + log2_rep),
++                   trafo_size >> (log2_min_pu_size + log2_rep),
++                   rpl, rpl,
++                   mvf, mvf - mvf_stride));
 +        }
 +    }
 +
-+    if (!is_intra) {
-+        for (i = inc; i < trafo_in_min_pus; i += inc) {
-+            MvField *left;
++    // And again for vertical - same logic as horizontal just in the other direction
++    if ((x0 & 7) == 0)
++    {
++        // Boundary left
++        if (x0 != 0 &&
++            ((x0 & ((1 << s->ps.sps->log2_ctb_size) - 1)) != 0 ||
++             (boundary_flags & (BOUNDARY_LEFT_SLICE | BOUNDARY_LEFT_TILE)) == 0))
++        {
++            if ((~bsf_v & bsf_cbf) != 0 && (x0 == lc->cu.x || x0 == lc->cu.x_split))
++            {
++                const unsigned int log2_rep = (x0 == lc->cu.x) ? log2_rep_min_pu : has_y_split ? 1 : 0;
++                const RefPicList *const rpl_left = (lc->boundary_flags & BOUNDARY_LEFT_SLICE) ?
++                                       ff_hevc_rpi_get_ref_list(s, s->ref, x0 - 1, y0) :
++                                       rpl;
 +
-+            curr += inc;
-+            left = curr - 1;
-+            bs += inc << log2_min_pu_size >> 2;
++                bsf_v |= bsf_mv(s, 1 << log2_rep, trafo_size >> (2 + log2_rep),
++                    (mvf_stride << log2_trafo_size) >> (log2_min_pu_size + log2_rep),
++                    rpl, rpl_left,
++                    mvf_curr, mvf_curr - 1);
++            }
 +
-+            s->hevcdsp.hevc_deblocking_boundary_strengths(trafo_in_min_pus,
-+                    min_pu_in_4pix, min_pu_width * sizeof (MvField), 4 * s->bs_width >> 2,
-+                    rpl[0].list, rpl[1].list, rpl[0].list, rpl[1].list,
-+                    curr, left, bs);
++            set_bs_v(s, x0, y0, bsf_mask, bsf_v);
++        }
++
++        if (has_x_split && !off_boundary(lc->cu.x_split, 3))
++        {
++            const MvField * const mvf = tab_mvf +
++                (y0 >> log2_min_pu_size) * mvf_stride + (lc->cu.x_split >> log2_min_pu_size);
++            const unsigned int log2_rep = has_y_split ? 1 : 0;
++
++            set_bs_v(s, lc->cu.x_split, y0, bsf_mask,
++                bsf_mv(s, 1 << log2_rep, trafo_size >> (2 + log2_rep),
++                   (mvf_stride << log2_trafo_size) >> (log2_min_pu_size + log2_rep),
++                   rpl, rpl,
++                   mvf, mvf - 1));
 +        }
 +    }
 +}
@@ -10843,125 +11112,97 @@ index 0000000000..5ae479dd0b
 +#undef CB
 +#undef CR
 +
-+#ifdef RPI_DEBLOCK_VPU
-+// ff_hevc_rpi_flush_buffer_lines
-+// flushes and invalidates all pixel rows in [start,end-1]
-+static void ff_hevc_rpi_flush_buffer_lines(HEVCRpiContext *s, int start, int end, int flush_luma, int flush_chroma)
++static inline unsigned int ussub(const unsigned int a, const unsigned int b)
 +{
-+    rpi_cache_buf_t cbuf;
-+    rpi_cache_flush_env_t * const rfe = rpi_cache_flush_init(&cbuf);
-+    rpi_cache_flush_add_frame_block(rfe, s->frame, RPI_CACHE_FLUSH_MODE_WB_INVALIDATE,
-+      0, start, s->ps.sps->width, end - start, ctx_vshift(s, 1), flush_luma, flush_chroma);
-+    rpi_cache_flush_finish(rfe);
++    return a < b ? 0 : a - b;
 +}
 +
-+/* rpi_deblock deblocks an entire row of ctbs using the VPU */
-+static void rpi_deblock(HEVCRpiContext *s, int y, int ctb_size)
++static inline int cache_boundry(const AVFrame * const frame, const unsigned int x)
 +{
-+  int num16high = (ctb_size+15)>>4;  // May go over bottom of the image, but setup will be zero for these so should have no effect.
-+  // TODO check that image allocation is large enough for this to be okay as well.
-+  
-+  // Flush image, 4 lines above to bottom of ctb stripe
-+  ff_hevc_rpi_flush_buffer_lines(s, FFMAX(y-4,0), y+ctb_size, 1, 1);
-+  // TODO flush buffer of beta/tc setup when it becomes cached
-+
-+  // Prepare three commands at once to avoid calling overhead
-+  s->dvq->vpu_cmds_arm[0][0] = get_vc_address_y(s->frame) + s->frame->linesize[0] * y;
-+  s->dvq->vpu_cmds_arm[0][1] = s->frame->linesize[0];
-+  s->dvq->vpu_cmds_arm[0][2] = s->setup_width;
-+  s->dvq->vpu_cmds_arm[0][3] = (int) ( s->dvq->y_setup_vc + s->setup_width * (y>>4) );
-+  s->dvq->vpu_cmds_arm[0][4] = num16high;
-+  s->dvq->vpu_cmds_arm[0][5] = 2;
-+
-+  s->dvq->vpu_cmds_arm[1][0] = get_vc_address_u(s->frame) + s->frame->linesize[1] * (y>> s->ps.sps->vshift[1]);
-+  s->dvq->vpu_cmds_arm[1][1] = s->frame->linesize[1];
-+  s->dvq->vpu_cmds_arm[1][2] = s->uv_setup_width;
-+  s->dvq->vpu_cmds_arm[1][3] = (int) ( s->dvq->uv_setup_vc + s->uv_setup_width * ((y>>4)>> s->ps.sps->vshift[1]) );
-+  s->dvq->vpu_cmds_arm[1][4] = (num16high + 1) >> s->ps.sps->vshift[1];
-+  s->dvq->vpu_cmds_arm[1][5] = 3;
-+
-+  s->dvq->vpu_cmds_arm[2][0] = get_vc_address_v(s->frame) + s->frame->linesize[2] * (y>> s->ps.sps->vshift[2]);
-+  s->dvq->vpu_cmds_arm[2][1] = s->frame->linesize[2];
-+  s->dvq->vpu_cmds_arm[2][2] = s->uv_setup_width;
-+  s->dvq->vpu_cmds_arm[2][3] = (int) ( s->dvq->uv_setup_vc + s->uv_setup_width * ((y>>4)>> s->ps.sps->vshift[1]) );
-+  s->dvq->vpu_cmds_arm[2][4] = (num16high + 1) >> s->ps.sps->vshift[1];
-+  s->dvq->vpu_cmds_arm[2][5] = 4;
-+  
-+  // Call VPU
-+  {
-+      vpu_qpu_job_env_t qvbuf;
-+      const vpu_qpu_job_h vqj = vpu_qpu_job_init(&qvbuf);
-+      vpu_qpu_job_add_vpu(vqj, vpu_get_fn(s->ps.sps->bit_depth), s->dvq->vpu_cmds_vc, 3, 0, 0, 0, 5);  // 5 means to do all the commands
-+      vpu_qpu_job_add_sync_this(vqj, &s->dvq->cmd_id);
-+      vpu_qpu_job_finish(vqj);
-+  }
-+
-+  s->dvq_n = (s->dvq_n + 1) & (RPI_DEBLOCK_VPU_Q_COUNT - 1);
-+  s->dvq = s->dvq_ents + s->dvq_n;
-+
-+  vpu_qpu_wait(&s->dvq->cmd_id);
++    return ((x >> av_rpi_sand_frame_xshl(frame)) & ~63) == 0;
 +}
 +
-+#endif
-+
-+void ff_hevc_rpi_hls_filter(HEVCRpiContext * const s, const int x, const int y, const int ctb_size)
++int ff_hevc_rpi_hls_filter_blk(const HEVCRpiContext * const s, const RpiBlk bounds, const int eot)
 +{
-+    const int x_end = x >= s->ps.sps->width  - ctb_size;
++    const int ctb_size = (1 << s->ps.sps->log2_ctb_size);
++    int x, y;
 +
-+    if (s->avctx->skip_loop_filter < AVDISCARD_ALL)
-+        deblocking_filter_CTB(s, x, y);
++    const unsigned int br = FFMIN(bounds.x + bounds.w, s->ps.sps->width);
++    const unsigned int bb = FFMIN(bounds.y + bounds.h, s->ps.sps->height);
 +
-+#ifdef RPI_DEBLOCK_VPU
-+    if (s->enable_rpi_deblock && x_end)
++    const int x_end = (br >= s->ps.sps->width);
++    const int y_end = (bb >= s->ps.sps->height);
++
++    // Deblock may not touch the edges of the bound as they are still needed
++    // for Intra pred
++
++    deblock_y_blk(s, bounds, x_end, y_end);
++    deblock_uv_blk(s, bounds, x_end, y_end);
++
++    // SAO needs
++    // (a) CTB alignment
++    // (b) Valid pixels all the way around the CTB in particular it needs the DR pixel
 +    {
-+      int y_at_end = y >= s->ps.sps->height - ctb_size;
-+      int height = 64;  // Deblock in units 64 high to avoid too many VPU calls
-+      int y_start = y&~63;
-+      if (y_at_end) height = s->ps.sps->height - y_start;
-+      if ((((y+ctb_size)&63)==0) || y_at_end) {
-+        rpi_deblock(s, y_start, height);
-+      }
-+    }
-+#endif
++        const unsigned int xo = bounds.x - ((bounds.x - 16) & ~(ctb_size - 1));
++        const unsigned int yo = bounds.y - ((bounds.y - 16) & ~(ctb_size - 1));
++        const unsigned int yt = ussub(bounds.y, yo);
++        const unsigned int yb = y_end ? bb : ussub(bb, yo);
++        const unsigned int xl = ussub(bounds.x, xo);
++        const unsigned int xr = x_end ? br : ussub(br, xo);
 +
-+    if (s->ps.sps->sao_enabled) {
-+        int y_end = y >= s->ps.sps->height - ctb_size;
-+        if (y != 0 && x != 0)
-+            sao_filter_CTB(s, x - ctb_size, y - ctb_size);
-+        if (x != 0 && y_end)
-+            sao_filter_CTB(s, x - ctb_size, y);
-+        if (y != 0 && x_end)
-+            sao_filter_CTB(s, x, y - ctb_size);
-+        if (x_end && y_end)
-+            sao_filter_CTB(s, x , y);
++        for (y = yt; y < yb; y += ctb_size) {
++            for (x = xl; x < xr; x += ctb_size) {
++                sao_filter_CTB(s, x, y);
++            }
++        }
++
++        // Cache invalidate
++        y = 0;
++        if (xr != 0 && yb != 0)
++        {
++            const unsigned int llen =
++                (av_rpi_sand_frame_stride1(s->frame) >> av_rpi_sand_frame_xshl(s->frame));
++            const unsigned int mask = ~(llen - 1);
++            const unsigned int il = (xl == 0) ? 0 : (xl - 1) & mask;
++            const unsigned int ir = x_end || !cache_boundry(s->frame, br) ? br : (xr - 1) & mask;
++            const unsigned int it = ussub(yt, 1);
++            const unsigned int ib = y_end ? bb : yb - 1;
++
++            if (il < ir) {
++                rpi_cache_buf_t cbuf;
++                rpi_cache_flush_env_t * const rfe = rpi_cache_flush_init(&cbuf);
++                rpi_cache_flush_add_frame_block(rfe, s->frame, RPI_CACHE_FLUSH_MODE_WB_INVALIDATE,
++                  il, it, ir - il, ib - it,
++                  ctx_vshift(s, 1), 1, 1);
++
++                // *** Tiles where V tile boundries aren't on cache boundries
++                // We have a race condition between ARM side recon in the tlle
++                // on the left & QPU pred in the tile on the right
++                // The code below ameliorates it as does turning off WPP in
++                // these cases but it still exists :-(
++
++                // If we have to commit the right hand tile boundry due to
++                // cache boundry considerations then at EoTile we must commit
++                // that boundry to bottom of tile (bounds)
++                if (ib != bb && ir == br && eot) {
++                    rpi_cache_flush_add_frame_block(rfe, s->frame, RPI_CACHE_FLUSH_MODE_WB_INVALIDATE,
++                      br - 1, ib, 1, bb - ib,
++                      ctx_vshift(s, 1), 1, 1);
++                }
++
++                rpi_cache_flush_finish(rfe);
++
++                if (x_end)
++                    y = y_end ? INT_MAX : ib;
++
++//                printf("Flush: %4d,%4d -> %4d,%4d: signal: %d\n", il, it, ir, ib, y - 1);
++            }
++        }
 +    }
++
++    return y;
 +}
 +
-+void ff_hevc_rpi_hls_filters(HEVCRpiContext *s, int x_ctb, int y_ctb, int ctb_size)
-+{
-+    // * This can break strict L->R then U->D ordering - mostly it doesn't matter
-+    // Never called if rpi_enabled so no need for cache flush ops
-+    const int x_end = x_ctb >= s->ps.sps->width  - ctb_size;
-+    const int y_end = y_ctb >= s->ps.sps->height - ctb_size;
-+    if (y_ctb && x_ctb)
-+        ff_hevc_rpi_hls_filter(s, x_ctb - ctb_size, y_ctb - ctb_size, ctb_size);
-+    if (y_ctb && x_end)
-+    {
-+        ff_hevc_rpi_hls_filter(s, x_ctb, y_ctb - ctb_size, ctb_size);
-+        // Signal progress - this is safe for SAO
-+        if (s->threads_type == FF_THREAD_FRAME && y_ctb > ctb_size)
-+            ff_hevc_rpi_progress_signal_recon(s, y_ctb - ctb_size - 1);
-+    }
-+    if (x_ctb && y_end)
-+        ff_hevc_rpi_hls_filter(s, x_ctb - ctb_size, y_ctb, ctb_size);
-+    if (x_end && y_end)
-+    {
-+        ff_hevc_rpi_hls_filter(s, x_ctb, y_ctb, ctb_size);
-+        // All done - signal such
-+        if (s->threads_type == FF_THREAD_FRAME)
-+            ff_hevc_rpi_progress_signal_recon(s, INT_MAX);
-+    }
-+}
 diff --git a/libavcodec/rpi_hevc_mvs.c b/libavcodec/rpi_hevc_mvs.c
 new file mode 100644
 index 0000000000..93f3530ff5
@@ -11921,10 +12162,10 @@ index 0000000000..4b4d032a16
 +#endif /* AVCODEC_RPI_HEVC_PARSE_H */
 diff --git a/libavcodec/rpi_hevc_ps.c b/libavcodec/rpi_hevc_ps.c
 new file mode 100644
-index 0000000000..f8abc30eef
+index 0000000000..e8df452021
 --- /dev/null
 +++ b/libavcodec/rpi_hevc_ps.c
-@@ -0,0 +1,1789 @@
+@@ -0,0 +1,1957 @@
 +/*
 + * HEVC Parameter Set decoding
 + *
@@ -11998,6 +12239,89 @@ index 0000000000..f8abc30eef
 +    {  2,   1 },
 +};
 +
++
++// pps_cb_qp_offset: -12,+12
++// slice_cb_qp_offset: -12,+12 also
++//   "The value of pps_cb_qp_offset + slice_cb_qp_offset shall be in the range of -12 to +12, inclusive."
++// cr_qp_offset_list[n]: -12,+12
++// So worst case total offset: -24,+24
++
++#define T(n) ((((48+(n))/6-10)<<3) | (48+(n))%6)
++#define C(B,n) T(B*6+(n) < 0 ? -B*6 : (n) > 51 ? 51 : (n))
++#define M(B,n) C(B,(-n))
++
++// Sizeof the QP_START_BLOCK
++#define QP_OFFSET_0 (8*6 + 12*2)
++#define QP_START(B) \
++    M(B,48), M(B,48), M(B,48), M(B,48), M(B,48), M(B,48),\
++    M(B,48), M(B,48), M(B,48), M(B,48), M(B,48), M(B,48),\
++    M(B,48), M(B,48), M(B,48), M(B,48), M(B,48), M(B,48),\
++    M(B,48), M(B,48), M(B,48), M(B,48), M(B,48), M(B,48),\
++\
++    M(B,48), M(B,47), M(B,46), M(B,45), M(B,44), M(B,43),\
++    M(B,42), M(B,41), M(B,40), M(B,39), M(B,38), M(B,37),\
++    M(B,36), M(B,35), M(B,34), M(B,33), M(B,32), M(B,31),\
++    M(B,30), M(B,29), M(B,28), M(B,27), M(B,26), M(B,25),\
++    M(B,24), M(B,23), M(B,22), M(B,21), M(B,20), M(B,19),\
++    M(B,18), M(B,17), M(B,16), M(B,15), M(B,14), M(B,13),\
++    M(B,12), M(B,11), M(B,10), M(B, 9), M(B, 8), M(B, 7),\
++    M(B, 6), M(B, 5), M(B, 4), M(B, 3), M(B, 2), M(B, 1)
++#define QP_END(B) \
++    C(B,51), C(B,51), C(B,51), C(B,51), C(B,51), C(B,51),\
++    C(B,51), C(B,51), C(B,51), C(B,51), C(B,51), C(B,51),\
++    C(B,51), C(B,51), C(B,51), C(B,51), C(B,51), C(B,51)
++
++#define T1(B)\
++{\
++    QP_START(B),\
++    C(B, 0), C(B, 1), C(B, 2), C(B, 3), C(B, 4), C(B, 5), C(B, 6), C(B, 7), C(B, 8), C(B, 9),\
++    C(B,10), C(B,11), C(B,12), C(B,13), C(B,14), C(B,15), C(B,16), C(B,17), C(B,18), C(B,19),\
++    C(B,20), C(B,21), C(B,22), C(B,23), C(B,24), C(B,25), C(B,26), C(B,27), C(B,28), C(B,29),\
++    C(B,29), C(B,30), C(B,31), C(B,32), C(B,33), C(B,33), C(B,34), C(B,34), C(B,35), C(B,35),\
++    C(B,36), C(B,36), C(B,37), C(B,37), C(B,38), C(B,39), C(B,40), C(B,41), C(B,42), C(B,43),\
++    C(B,44), C(B,45),\
++    C(B,46), C(B,47), C(B,48), C(B,49), C(B,50), C(B,51),\
++    QP_END(B)\
++}
++#define T0(B)\
++{\
++    QP_START(B),\
++    C(B, 0), C(B, 1), C(B, 2), C(B, 3), C(B, 4), C(B, 5), C(B, 6), C(B, 7), C(B, 8), C(B, 9),\
++    C(B,10), C(B,11), C(B,12), C(B,13), C(B,14), C(B,15), C(B,16), C(B,17), C(B,18), C(B,19),\
++    C(B,20), C(B,21), C(B,22), C(B,23), C(B,24), C(B,25), C(B,26), C(B,27), C(B,28), C(B,29),\
++    C(B,30), C(B,31), C(B,32), C(B,33), C(B,34), C(B,35), C(B,36), C(B,37), C(B,38), C(B,39),\
++    C(B,40), C(B,41), C(B,42), C(B,43), C(B,44), C(B,45), C(B,46), C(B,47), C(B,48), C(B,49),\
++    C(B,50), C(B,51),\
++    C(B,51), C(B,51), C(B,51), C(B,51), C(B,51), C(B,51),\
++    QP_END(B)\
++}
++
++#define QP_TABLE_SIZE (QP_OFFSET_0 + 52 + 12*2)
++
++static const int8_t qp_c_bd_0[8][QP_TABLE_SIZE] = {T0(0),T0(1),T0(2),T0(3),T0(4),T0(5),T0(6),T0(7)};
++static const int8_t qp_c_bd_1[8][QP_TABLE_SIZE] = {T1(0),T1(1),T1(2),T1(3),T1(4),T1(5),T1(6),T1(7)};
++
++#undef T
++#undef C
++#undef QP_END
++
++#define C(B,n) ((n)<0?0:(n)>51?51:(n))
++// We do need a lot of -ve padding to cope with high bit depths that give -ve qps
++#define QP_DBLK_OFFSET_0 QP_OFFSET_0
++#define QP_END(B)\
++ 51, 51, 51, 51, 51, 51
++
++// These don't need all the padding we have here (12 top/bottom would be enough)
++static const uint8_t qp_c_dblk_0[] = T0(0);
++static const uint8_t qp_c_dblk_1[] = T1(0);
++
++#undef T
++#undef M
++#undef C
++#undef QP_END
++#undef QP_START
++
++
 +static void remove_pps(HEVCRpiParamSets * const s, const int id)
 +{
 +    if (s->pps_list[id] && s->pps == (const HEVCRpiPPS*)s->pps_list[id]->data)
@@ -12650,21 +12974,27 @@ index 0000000000..f8abc30eef
 +        sl->sl_dc[0][matrixId] = 16; // default for 16x16
 +        sl->sl_dc[1][matrixId] = 16; // default for 32x32
 +    }
++
 +    memcpy(sl->sl[1][0], default_scaling_list_intra, 64);
 +    memcpy(sl->sl[1][1], default_scaling_list_intra, 64);
 +    memcpy(sl->sl[1][2], default_scaling_list_intra, 64);
++
 +    memcpy(sl->sl[1][3], default_scaling_list_inter, 64);
 +    memcpy(sl->sl[1][4], default_scaling_list_inter, 64);
 +    memcpy(sl->sl[1][5], default_scaling_list_inter, 64);
++
 +    memcpy(sl->sl[2][0], default_scaling_list_intra, 64);
 +    memcpy(sl->sl[2][1], default_scaling_list_intra, 64);
 +    memcpy(sl->sl[2][2], default_scaling_list_intra, 64);
++
 +    memcpy(sl->sl[2][3], default_scaling_list_inter, 64);
 +    memcpy(sl->sl[2][4], default_scaling_list_inter, 64);
 +    memcpy(sl->sl[2][5], default_scaling_list_inter, 64);
++
 +    memcpy(sl->sl[3][0], default_scaling_list_intra, 64);
 +    memcpy(sl->sl[3][1], default_scaling_list_intra, 64);
 +    memcpy(sl->sl[3][2], default_scaling_list_intra, 64);
++
 +    memcpy(sl->sl[3][3], default_scaling_list_inter, 64);
 +    memcpy(sl->sl[3][4], default_scaling_list_inter, 64);
 +    memcpy(sl->sl[3][5], default_scaling_list_inter, 64);
@@ -12933,6 +13263,24 @@ index 0000000000..f8abc30eef
 +        return AVERROR_INVALIDDATA;
 +    }
 +
++    {
++        const unsigned int CtbLog2SizeY = sps->log2_min_cb_size + sps->log2_diff_max_min_coding_block_size;
++        // Not a bitstream limitation, but all profiles
++        if (CtbLog2SizeY < 4 || CtbLog2SizeY > HEVC_MAX_LOG2_CTB_SIZE) {
++            av_log(avctx, AV_LOG_ERROR, "Invalid value %d for CtbLog2SizeY", CtbLog2SizeY);
++            return AVERROR_INVALIDDATA;
++        }
++
++        if (sps->log2_max_trafo_size > FFMIN(5, CtbLog2SizeY)) {
++            av_log(avctx, AV_LOG_ERROR, "Invalid value %d for MaxTbLog2SizeY", sps->log2_max_trafo_size);
++            return AVERROR_INVALIDDATA;
++        }
++
++        // Inferred parameters
++        sps->log2_ctb_size = CtbLog2SizeY;
++        sps->log2_min_pu_size = sps->log2_min_cb_size - 1;
++    }
++
 +    sps->max_transform_hierarchy_depth_inter = get_ue_golomb_long(gb);
 +    sps->max_transform_hierarchy_depth_intra = get_ue_golomb_long(gb);
 +
@@ -12950,8 +13298,14 @@ index 0000000000..f8abc30eef
 +    sps->amp_enabled_flag = get_bits1(gb);
 +    sps->sao_enabled      = get_bits1(gb);
 +
-+    sps->pcm_enabled_flag = get_bits1(gb);
-+    if (sps->pcm_enabled_flag) {
++    // Set pcm defaults (0) so we don't have to test _enabled when we
++    // want to use them
++    memset(&sps->pcm, 0, sizeof(sps->pcm));
++
++    if (get_bits1(gb))  // pcm_enabled_flag
++    {
++        const unsigned int limit_max_pcm = FFMIN(5,
++            sps->log2_min_cb_size + sps->log2_diff_max_min_coding_block_size);
 +        sps->pcm.bit_depth   = get_bits(gb, 4) + 1;
 +        sps->pcm.bit_depth_chroma = get_bits(gb, 4) + 1;
 +        sps->pcm.log2_min_pcm_cb_size = get_ue_golomb_long(gb) + 3;
@@ -12963,12 +13317,23 @@ index 0000000000..f8abc30eef
 +                   sps->pcm.bit_depth, sps->pcm.bit_depth_chroma, sps->bit_depth);
 +            return AVERROR_INVALIDDATA;
 +        }
++        if (sps->pcm.log2_min_pcm_cb_size < sps->log2_min_cb_size ||
++            sps->pcm.log2_max_pcm_cb_size > limit_max_pcm) {
++            av_log(avctx, AV_LOG_ERROR, "Bad PCM CB min/max size (%d->%d)",
++                   sps->pcm.log2_min_pcm_cb_size, sps->pcm.log2_max_pcm_cb_size);
++            return AVERROR_INVALIDDATA;
++        }
 +
 +        sps->pcm.loop_filter_disable_flag = get_bits1(gb);
 +    }
 +
++    // Could be based on min_pcm_cb_size but much easier logic if we just stick
++    // with 8 (and costs us little)
++    sps->pcm_width = (sps->width + 63) >> 6;  // 8 for min size, 8 bits per byte - round up
++    sps->pcm_height = (sps->height + 7) >> 3;
++
 +    sps->nb_st_rps = get_ue_golomb_long(gb);
-+    if (sps->nb_st_rps > HEVC_MAX_SHORT_TERM_RPS_COUNT) {
++    if (sps->nb_st_rps > HEVC_MAX_SHORT_TERM_REF_PIC_SETS) {
 +        av_log(avctx, AV_LOG_ERROR, "Too many short term RPS: %d.\n",
 +               sps->nb_st_rps);
 +        return AVERROR_INVALIDDATA;
@@ -13058,22 +13423,6 @@ index 0000000000..f8abc30eef
 +    }
 +
 +    // Inferred parameters
-+    sps->log2_ctb_size = sps->log2_min_cb_size +
-+                         sps->log2_diff_max_min_coding_block_size;
-+    sps->log2_min_pu_size = sps->log2_min_cb_size - 1;
-+
-+    if (sps->log2_ctb_size > HEVC_MAX_LOG2_CTB_SIZE) {
-+        av_log(avctx, AV_LOG_ERROR, "CTB size out of range: 2^%d\n", sps->log2_ctb_size);
-+        return AVERROR_INVALIDDATA;
-+    }
-+    if (sps->log2_ctb_size < 4) {
-+        av_log(avctx,
-+               AV_LOG_ERROR,
-+               "log2_ctb_size %d differs from the bounds of any known profile\n",
-+               sps->log2_ctb_size);
-+        avpriv_request_sample(avctx, "log2_ctb_size %d", sps->log2_ctb_size);
-+        return AVERROR_INVALIDDATA;
-+    }
 +
 +    sps->ctb_width  = (sps->width  + (1 << sps->log2_ctb_size) - 1) >> sps->log2_ctb_size;
 +    sps->ctb_height = (sps->height + (1 << sps->log2_ctb_size) - 1) >> sps->log2_ctb_size;
@@ -13199,16 +13548,31 @@ index 0000000000..f8abc30eef
 +    av_freep(&pps);
 +}
 +
-+static int pps_range_extensions(GetBitContext * const gb, AVCodecContext * const avctx,
-+                                HEVCRpiPPS * const pps, const HEVCRpiSPS * const sps) {
-+    int i;
++static int get_offset_list(GetBitContext * const gb, AVCodecContext * const avctx, unsigned int n_minus_1, int8_t * offsets)
++{
++    do
++    {
++        const int offset = get_se_golomb_long(gb);
++        if (offset < -12 || offset > 12) {
++            av_log(avctx, AV_LOG_ERROR, "qp_offset_list[]: %d out of range\n", offset);
++            return AVERROR_INVALIDDATA;
++        }
++        *offsets++ = offset;
++    } while (n_minus_1-- != 0);
++    return 0;
++}
 +
++static int pps_range_extensions(GetBitContext * const gb, AVCodecContext * const avctx,
++                                HEVCRpiPPS * const pps, const HEVCRpiSPS * const sps)
++{
 +    if (pps->transform_skip_enabled_flag) {
 +        pps->log2_max_transform_skip_block_size = get_ue_golomb_long(gb) + 2;
 +    }
 +    pps->cross_component_prediction_enabled_flag = get_bits1(gb);
 +    pps->chroma_qp_offset_list_enabled_flag = get_bits1(gb);
 +    if (pps->chroma_qp_offset_list_enabled_flag) {
++        int err;
++
 +        pps->diff_cu_chroma_qp_offset_depth = get_ue_golomb_long(gb);
 +        pps->chroma_qp_offset_list_len_minus1 = get_ue_golomb_long(gb);
 +        if (pps->chroma_qp_offset_list_len_minus1 > 5) {
@@ -13216,18 +13580,11 @@ index 0000000000..f8abc30eef
 +                   "chroma_qp_offset_list_len_minus1 shall be in the range [0, 5].\n");
 +            return AVERROR_INVALIDDATA;
 +        }
-+        for (i = 0; i <= pps->chroma_qp_offset_list_len_minus1; i++) {
-+            pps->cb_qp_offset_list[i] = get_se_golomb_long(gb);
-+            if (pps->cb_qp_offset_list[i]) {
-+                av_log(avctx, AV_LOG_WARNING,
-+                       "cb_qp_offset_list not tested yet.\n");
-+            }
-+            pps->cr_qp_offset_list[i] = get_se_golomb_long(gb);
-+            if (pps->cr_qp_offset_list[i]) {
-+                av_log(avctx, AV_LOG_WARNING,
-+                       "cb_qp_offset_list not tested yet.\n");
-+            }
-+        }
++        av_log(avctx, AV_LOG_WARNING, "cb_qp_offset_list not tested yet.\n");
++
++        if ((err = get_offset_list(gb, avctx, pps->chroma_qp_offset_list_len_minus1, pps->cb_qp_offset_list)) != 0 ||
++            (err = get_offset_list(gb, avctx, pps->chroma_qp_offset_list_len_minus1, pps->cr_qp_offset_list)) != 0)
++            return err;
 +    }
 +
 +    {
@@ -13256,6 +13613,28 @@ index 0000000000..f8abc30eef
 +    int i, j, x, y, ctb_addr_rs, tile_id;
 +
 +    // Inferred parameters
++
++    // qp_y -> qp_u/qp_v tables
++    // The tables have at least -24,+24 overrun after adding offset here
++    // which should allow for clipless offseting
++
++    pps->qp_dblk_x[0] = qp_c_dblk_0 + QP_DBLK_OFFSET_0;  // No offset for luma, but may be useful for general code
++    pps->qp_bd_x[0] = qp_c_bd_0[sps->bit_depth - 8] + QP_OFFSET_0;
++
++    if (sps->chroma_format_idc == 1) {
++        pps->qp_dblk_x[1] = qp_c_dblk_1 + pps->cb_qp_offset + QP_DBLK_OFFSET_0;
++        pps->qp_bd_x[1] = qp_c_bd_1[sps->bit_depth - 8] + pps->cb_qp_offset + QP_OFFSET_0;
++        pps->qp_dblk_x[2] = qp_c_dblk_1 + pps->cr_qp_offset + QP_DBLK_OFFSET_0;
++        pps->qp_bd_x[2] = qp_c_bd_1[sps->bit_depth - 8] + pps->cr_qp_offset + QP_OFFSET_0;
++    }
++    else
++    {
++        pps->qp_dblk_x[1] = qp_c_dblk_0 + pps->cb_qp_offset + QP_DBLK_OFFSET_0;
++        pps->qp_bd_x[1] = qp_c_bd_0[sps->bit_depth - 8] + pps->cb_qp_offset + QP_OFFSET_0;
++        pps->qp_dblk_x[2] = qp_c_dblk_0 + pps->cr_qp_offset + QP_DBLK_OFFSET_0;
++        pps->qp_bd_x[2] = qp_c_bd_0[sps->bit_depth - 8] + pps->cr_qp_offset + QP_OFFSET_0;
++    }
++
 +    pps->col_bd   = av_malloc_array(pps->num_tile_columns + 1, sizeof(*pps->col_bd));
 +    pps->row_bd   = av_malloc_array(pps->num_tile_rows + 1,    sizeof(*pps->row_bd));
 +    pps->col_idxX = av_malloc_array(sps->ctb_width,    sizeof(*pps->col_idxX));
@@ -13281,9 +13660,36 @@ index 0000000000..f8abc30eef
 +        }
 +    }
 +
-+    pps->col_bd[0] = 0;
-+    for (i = 0; i < pps->num_tile_columns; i++)
-+        pps->col_bd[i + 1] = pps->col_bd[i] + pps->column_width[i];
++    {
++        const unsigned int td_mask = 63 >> (sps->log2_ctb_size + sps->pixel_shift);
++        pps->col_bd[0] = 0;
++        pps->tile_wpp_inter_disable = 0;
++        for (i = 0; i < pps->num_tile_columns; i++)
++        {
++            pps->col_bd[i + 1] = pps->col_bd[i] + pps->column_width[i];
++
++            // Avoid trying tile parallel if the columns don't fall on cache boundries
++            // (this causes too much pain syncing flushes with the QPU)
++            // Ignore the final (RHS of pic) tile boundry
++            if ((pps->col_bd[i] & td_mask) != 0) {
++                pps->tile_wpp_inter_disable = 1;
++            }
++        }
++
++        // If we can start the next row before finishing the first line of
++        // this one then we must wait at the end of the tile
++        // * if this happens a lot then there are better but more complicated
++        //   conditions that we could apply
++        if (pps->tile_wpp_inter_disable) {
++            for (i = 0; i < pps->num_tile_rows; i++)
++            {
++                if (pps->row_height[i] <= RPI_MAX_JOBS) {
++                    pps->tile_wpp_inter_disable = 2;
++                    break;
++                }
++            }
++        }
++    }
 +
 +    pps->row_bd[0] = 0;
 +    for (i = 0; i < pps->num_tile_rows; i++)
@@ -13528,16 +13934,19 @@ index 0000000000..f8abc30eef
 +    pps->transform_skip_enabled_flag = get_bits1(gb);
 +
 +    pps->cu_qp_delta_enabled_flag = get_bits1(gb);
-+    pps->diff_cu_qp_delta_depth   = 0;
++    pps->log2_min_cu_qp_delta_size = sps->log2_ctb_size;
 +    if (pps->cu_qp_delta_enabled_flag)
-+        pps->diff_cu_qp_delta_depth = get_ue_golomb_long(gb);
++    {
++        const unsigned int diff_cu_qp_delta_depth = get_ue_golomb_long(gb);
 +
-+    if (pps->diff_cu_qp_delta_depth < 0 ||
-+        pps->diff_cu_qp_delta_depth > sps->log2_diff_max_min_coding_block_size) {
-+        av_log(avctx, AV_LOG_ERROR, "diff_cu_qp_delta_depth %d is invalid\n",
-+               pps->diff_cu_qp_delta_depth);
-+        ret = AVERROR_INVALIDDATA;
-+        goto err;
++        if (diff_cu_qp_delta_depth > sps->log2_diff_max_min_coding_block_size) {
++            av_log(avctx, AV_LOG_ERROR, "diff_cu_qp_delta_depth %d is invalid\n",
++                   diff_cu_qp_delta_depth);
++            ret = AVERROR_INVALIDDATA;
++            goto err;
++        }
++
++        pps->log2_min_cu_qp_delta_size = sps->log2_ctb_size - diff_cu_qp_delta_depth;
 +    }
 +
 +    pps->cb_qp_offset = get_se_golomb(gb);
@@ -13716,10 +14125,10 @@ index 0000000000..f8abc30eef
 +}
 diff --git a/libavcodec/rpi_hevc_ps.h b/libavcodec/rpi_hevc_ps.h
 new file mode 100644
-index 0000000000..3f192b70a4
+index 0000000000..c9ecf9a268
 --- /dev/null
 +++ b/libavcodec/rpi_hevc_ps.h
-@@ -0,0 +1,435 @@
+@@ -0,0 +1,441 @@
 +/*
 + * HEVC parameter set parsing
 + *
@@ -13807,11 +14216,13 @@ index 0000000000..3f192b70a4
 +    uint8_t slice_loop_filter_across_slices_enabled_flag;
 +    uint8_t collocated_list;
 +
++    uint8_t no_dblk_boundary_flags;
++
 +    unsigned int collocated_ref_idx;
 +
 +    int slice_qp_delta;
-+    int slice_cb_qp_offset;
-+    int slice_cr_qp_offset;
++    int slice_cb_qp_offset;  // -12, +12
++    int slice_cr_qp_offset;  // -12, +12
 +
 +    uint8_t cu_chroma_qp_offset_enabled_flag;
 +
@@ -13958,7 +14369,6 @@ index 0000000000..3f192b70a4
 +    enum AVPixelFormat pix_fmt;
 +
 +    unsigned int log2_max_poc_lsb;
-+    int pcm_enabled_flag;
 +
 +    int max_sub_layers;
 +    struct {
@@ -13968,14 +14378,11 @@ index 0000000000..3f192b70a4
 +    } temporal_layer[HEVC_MAX_SUB_LAYERS];
 +    uint8_t temporal_id_nesting_flag;
 +
-+    VUI vui;
-+    PTL ptl;
-+
 +    uint8_t scaling_list_enable_flag;
 +    ScalingList scaling_list;
 +
 +    unsigned int nb_st_rps;
-+    ShortTermRPS st_rps[HEVC_MAX_SHORT_TERM_RPS_COUNT];
++    ShortTermRPS st_rps[HEVC_MAX_SHORT_TERM_REF_PIC_SETS];
 +
 +    uint8_t amp_enabled_flag;
 +    uint8_t sao_enabled;
@@ -13988,19 +14395,19 @@ index 0000000000..3f192b70a4
 +    struct {
 +        uint8_t bit_depth;
 +        uint8_t bit_depth_chroma;
-+        unsigned int log2_min_pcm_cb_size;
-+        unsigned int log2_max_pcm_cb_size;
++        uint8_t log2_min_pcm_cb_size;
++        uint8_t log2_max_pcm_cb_size;
 +        uint8_t loop_filter_disable_flag;
 +    } pcm;
 +    uint8_t sps_temporal_mvp_enabled_flag;
 +    uint8_t sps_strong_intra_smoothing_enable_flag;
 +
-+    unsigned int log2_min_cb_size;
++    unsigned int log2_min_cb_size;  // 3..6
 +    unsigned int log2_diff_max_min_coding_block_size;
-+    unsigned int log2_min_tb_size;
++    unsigned int log2_min_tb_size;  // 2..5
 +    unsigned int log2_max_trafo_size;
-+    unsigned int log2_ctb_size;
-+    unsigned int log2_min_pu_size;
++    unsigned int log2_ctb_size;     // 4..6
++    unsigned int log2_min_pu_size;  // 2..5 (min_cb_size - 1)
 +
 +    int max_transform_hierarchy_depth_inter;
 +    int max_transform_hierarchy_depth_intra;
@@ -14018,13 +14425,15 @@ index 0000000000..3f192b70a4
 +    int height;
 +    int ctb_width;
 +    int ctb_height;
-+    int ctb_size;
++    int ctb_size;   // Pic size in CTBs not size of a CTB
 +    int min_cb_width;
 +    int min_cb_height;
 +    int min_tb_width;
 +    int min_tb_height;
 +    int min_pu_width;
 +    int min_pu_height;
++    int pcm_width;
++    int pcm_height;
 +    int tb_mask;
 +
 +    int hshift[3];
@@ -14034,15 +14443,18 @@ index 0000000000..3f192b70a4
 +
 +    uint8_t data[4096];
 +    int data_size;
++
++    VUI vui;
++    PTL ptl;
 +} HEVCRpiSPS;
 +
 +#define CTB_TS_FLAGS_SOTL       (1U << 0)       // X start of tile line
-+#define CTB_TS_FLAGS_EOTL       (1U << 1)
-+#define CTB_TS_FLAGS_EOL        (1U << 2)
-+#define CTB_TS_FLAGS_EOT        (1U << 3)
++#define CTB_TS_FLAGS_EOTL       (1U << 1)       // Last CTB of a tile line
++#define CTB_TS_FLAGS_EOL        (1U << 2)       // Last CTB of a complete line
++#define CTB_TS_FLAGS_EOT        (1U << 3)       // Last CTB of a tile
 +#define CTB_TS_FLAGS_CSAVE      (1U << 4)
-+#define CTB_TS_FLAGS_CIREQ      (1U << 5)     // Cabac init request
-+#define CTB_TS_FLAGS_TOT        (1U << 6)
++#define CTB_TS_FLAGS_CIREQ      (1U << 5)       // Cabac init request
++#define CTB_TS_FLAGS_TOT        (1U << 6)       // CTB on top row of a tile
 +#define CTB_TS_FLAGS_CLOAD      (1U << 7)
 +
 +typedef struct HEVCRpiPPS {
@@ -14060,10 +14472,12 @@ index 0000000000..3f192b70a4
 +    uint8_t transform_skip_enabled_flag;
 +
 +    uint8_t cu_qp_delta_enabled_flag;
-+    int diff_cu_qp_delta_depth;
++    uint8_t log2_min_cu_qp_delta_size;
++    int cb_qp_offset;   // -12..12
++    int cr_qp_offset;   // -12..12
++    const uint8_t * qp_dblk_x[3];
++    const int8_t * qp_bd_x[3];
 +
-+    int cb_qp_offset;
-+    int cr_qp_offset;
 +    uint8_t pic_slice_level_chroma_qp_offsets_present_flag;
 +    uint8_t weighted_pred_flag;
 +    uint8_t weighted_bipred_flag;
@@ -14074,6 +14488,7 @@ index 0000000000..3f192b70a4
 +    uint8_t tiles_enabled_flag;
 +    uint8_t entropy_coding_sync_enabled_flag;
 +
++    uint8_t tile_wpp_inter_disable;
 +    int num_tile_columns;   ///< num_tile_columns_minus1 + 1
 +    int num_tile_rows;      ///< num_tile_rows_minus1 + 1
 +    uint8_t uniform_spacing_flag;
@@ -19298,10 +19713,10 @@ index 0000000000..b9e7c07fe3
 +
 diff --git a/libavcodec/rpi_hevc_transform.s b/libavcodec/rpi_hevc_transform.s
 new file mode 100644
-index 0000000000..e14c0e099e
+index 0000000000..3caef20137
 --- /dev/null
 +++ b/libavcodec/rpi_hevc_transform.s
-@@ -0,0 +1,1090 @@
+@@ -0,0 +1,444 @@
 +# ******************************************************************************
 +# Argon Design Ltd.
 +# (c) Copyright 2015 Argon Design Ltd. All rights reserved.
@@ -19397,37 +19812,6 @@ index 0000000000..e14c0e099e
 +
 +
 +hevc_trans_16x16:
-+  cmp r5,1
-+  beq memclear16
-+  #cmp r5,2
-+  #beq hevc_deblock_16x16
-+  #cmp r5,3
-+  #beq hevc_uv_deblock_16x16
-+  #cmp r5,4
-+  #beq hevc_uv_deblock_16x16_with_clear
-+  cmp r5,5
-+  beq hevc_run_command_list
-+
-+.if USE_STACK==0
-+  b do_transform
-+
-+  .balign 32
-+packed_buffer:
-+  .space 16*2
-+intermediate_results:
-+  .space 32*32*2
-+unpacked_buffer:
-+  .space 32*32*2
-+
-+packed_buffer2:
-+  .space 16*2
-+intermediate_results2:
-+  .space 32*32*2
-+unpacked_buffer2:
-+  .space 32*32*2
-+.endif
-+
-+do_transform:
 +  push r6-r15, lr # TODO cut down number of used registers
 +  mov r14,r3 # coeffs32
 +  mov r15,r4 # num32
@@ -19752,23868 +20136,237 @@ index 0000000000..e14c0e099e
 +  vsth VX(48,32++),(r0+=r6) REP 16
 +  pop pc
 +
-+memclear16:
-+  # r0 is address
-+  # r1 is number of 16bits values to set to 0 (may overrun past end and clear more than specified)
-+  vmov HX(0++,0),0 REP 16
-+  mov r2,32
-+loop:
-+  vsth HX(0++,0),(r0+=r2) REP 16
-+  add r0,16*16*2
-+  sub r1,16*16
-+  cmp r1,0
-+  bgt loop
-+  b lr
++.if USE_STACK == 0
++  .balign 32
 +
++# .space directives generate 0's in the bin so avoid unnecessary padding by
++# just setting to appropriate value
++.equ intermediate_results, $+16*2
 +
-+################################################################################
-+# HEVC VPU Deblock
++# Layout goes:
 +#
-+# Vertical edges before horizontal
-+# Decision can change every 4 pixels, but only 8 pixel boundaries are deblocked
++#packed_buffer:
++#  .space 16*2
++#intermediate_results:
++#  .space 32*32*2
++#unpacked_buffer:
++#  .space 32*32*2
 +#
-+# ARM is responsible for storing beta and tc for each 4 pixels horiz and vert edge.
-+# The VPU code works in units of 16x16 blocks.
-+# We do vertical filtering for the current block followed by horizontal filtering for the previous (except for the first time).
-+# One final horizontal filter is required at the end.
-+# PCM is not allowed in this code.
-+#
-+#
-+# H(16-4:16+15,0) contains previous block (note that we need 4 lines above of context that may get altered during filtering)
-+# H(16:31,16) contains current block (note that we do not need the upper lines until the horizontal filtering.
++#packed_buffer2:
++#  .space 16*2
++#intermediate_results2:
++#  .space 32*32*2
++#unpacked_buffer2:
++#  .space 32*32*2
++.endif
 +
-+.set P0,63
-+.set P1,62
-+.set P2,61
-+.set P3,60
-+.set Q0,59
-+.set Q1,58
-+.set Q2,57
-+.set Q3,56
 +
-+.set dp,32
-+.set dq,33
-+.set d,34
-+.set decision,35
-+.set beta,36
-+.set beta2,37
-+.set beta3,38
-+.set ptest,39
-+.set qtest,40
-+.set pqtest,41
-+.set thresh,42
-+.set deltatest, 44
-+.set deltap1, 45
-+.set tc25, 46
-+.set setup,47
-+.set tc,48
-+.set tc25,49
-+.set tc2, 50
-+.set do_filter, 51
-+.set delta, 52
-+.set tc10, 53
-+.set delta0, 54
-+.set delta1, 55
-+.set zeros, 0
-+.set setup_input, 1
-+.set deltaq1, 2
-+
-+
-+
-+# hevc_deblock_16x16 deblocks an entire row that is 16 pixels high by the full width of the image.
-+# Row has num16 16x16 blocks across
-+# Beta goes from 0 to 64
-+# tc goes from 0 to 24
-+# setup[block_idx][0=vert,1=horz][0=first edge, 1=second edge][0=beta,1=tc][0..3=edge number]
-+#   has 8 bytes per edge
-+#   has 16 bytes per direction
-+#   has 32 bytes per 16x16 block
-+# hevc_deblock_16x16(uint8_t *img (r0), int stride (r1), int num16w (r2), uint8_t setup[num16][2][2][2][4](r3),int num16h(r4))
-+hevc_deblock_16x16:
-+  push r6-r15, lr
-+  mov r9,r4
-+  mov r4,r3
-+  mov r13,r2
-+  mov r2,r0
-+  mov r10,r0
-+  subscale4 r0,r1
-+  mov r8,63
-+  mov r6,-3
-+  vmov H(zeros,0),0
-+# r7 is number of blocks still to load
-+# r0 is location of current block - 4 * stride
-+# r1 is stride
-+# r2 is location of current block
-+# r3 is offset of start of block (actual edges start at H(16,16)+r3 for horizontal and H(16,0)+r3 for vertical
-+# r4 is setup
-+# r5 is for temporary calculations
-+# r8 holds 63
-+# r6 holds -3
-+# r9 holds the number of 16 high rows to process
-+# r10 holds the original img base
-+# r11 returns 0 if no filtering was done on the edge
-+# r12 saves a copy of this
-+# r13 is copy of width
-+
-+process_row:
-+  # First iteration does not do horizontal filtering on previous
-+  mov r7, r13
-+  mov r3,0
-+  vldb H(12++,16)+r3,(r0 += r1) REP 4    # Load the current block
-+  vldb H(16++,16)+r3,(r2 += r1) REP 16
-+  vldb H(setup_input,0), (r4)  # We may wish to prefetch these
-+  vstb H(zeros,0),(r4)
-+  bl vert_filter
-+  add r3,8
-+  vadd H(setup_input,0),H(setup_input,8),0 # Rotate to second set of 8
-+  bl vert_filter
-+  sub r3,8
-+  b start_deblock_loop
-+deblock_loop:
-+  # Middle iterations do vertical on current block and horizontal on preceding
-+  vldb H(12++,16)+r3,(r0 += r1) REP 4  # load the current block
-+  vldb H(16++,16)+r3,(r2 += r1) REP 16
-+  vldb H(setup_input,0), (r4)
-+  vstb H(zeros,0),(r4)
-+  bl vert_filter
-+  add r3,8
-+  vadd H(setup_input,0),H(setup_input,8),0
-+  bl vert_filter
-+  sub r3,8
-+  vldb H(setup_input,0), -16(r4)
-+  vstb H(zeros,0),-16(r4)
-+  bl horz_filter
-+  mov r12,r11
-+  add r3,8*64
-+  vadd H(setup_input,0),H(setup_input,8),0
-+  bl horz_filter
-+  sub r3,8*64
-+  addcmpbeq r12,0,0,skip_save_top
-+  vstb H(12++,0)+r3,-16(r0 += r1) REP 4  # Save the deblocked pixels for the previous block
-+skip_save_top:
-+  vstb H(16++,0)+r3,-16(r2 += r1) REP 16
-+start_deblock_loop:
-+  # move onto next 16x16 (could do this with circular buffer support instead)
-+  add r3,16
-+  and r3,r8
-+  add r4,32
-+  # Perform loop counter operations (may work with an addcmpbgt as well?)
-+  add r0,16
-+  add r2,16
-+  sub r7,1
-+  cmp r7,0 # Are there still more blocks to load
-+  bgt deblock_loop
-+
-+  # Final iteration needs to just do horizontal filtering
-+  vldb H(setup_input,0), -16(r4)
-+  vstb H(zeros,0),-16(r4)
-+  bl horz_filter
-+  mov r12,r11
-+  add r3,8*64
-+  vadd H(setup_input,0),H(setup_input,8),0
-+  bl horz_filter
-+  sub r3,64*8
-+  addcmpbeq r12,0,0,skip_save_top2
-+  vstb H(12++,0)+r3,-16(r0 += r1) REP 4  # Save the deblocked pixels for the previous block
-+skip_save_top2:
-+  vstb H(16++,0)+r3,-16(r2 += r1) REP 16
-+
-+# Now look to see if we should do another row
-+  sub r9,1
-+  cmp r9,0
-+  bgt start_again
-+  pop r6-r15, pc
-+start_again:
-+  # Need to sort out r0,r2 to point to next row down
-+  addscale16 r10,r1
-+  mov r2,r10
-+  subscale4 r0,r2,r1
-+  b process_row
-+
-+
-+# At this stage H(16,16)+r3 points to the first pixel of the 16 high edge to be filtered
-+# So we can reuse the code we move the parts to be filtered into HX(P0/P1/P2/P3/Q0/Q1/Q2/Q3,0) - we will perform a final saturation step on placing them back into the correct locations
-+
-+vert_filter:
-+  push lr
-+
-+  vmov HX(P3,0), V(16,12)+r3
-+  vmov HX(P2,0), V(16,13)+r3
-+  vmov HX(P1,0), V(16,14)+r3
-+  vmov HX(P0,0), V(16,15)+r3
-+  vmov HX(Q0,0), V(16,16)+r3
-+  vmov HX(Q1,0), V(16,17)+r3
-+  vmov HX(Q2,0), V(16,18)+r3
-+  vmov HX(Q3,0), V(16,19)+r3
-+
-+  bl do_luma_filter
-+
-+  vadds V(16,13)+r3, HX(P2,0), 0
-+  vadds V(16,14)+r3, HX(P1,0), 0
-+  vadds V(16,15)+r3, HX(P0,0), 0
-+  # P3 and Q3 never change so don't bother saving back
-+  vadds V(16,16)+r3, HX(Q0,0), 0
-+  vadds V(16,17)+r3, HX(Q1,0), 0
-+  vadds V(16,18)+r3, HX(Q2,0), 0
-+
-+  pop pc
-+
-+# Filter edge at H(16,0)+r3
-+horz_filter:
-+  push lr
-+
-+  vmov HX(P3,0), H(12,0)+r3
-+  vmov HX(P2,0), H(13,0)+r3
-+  vmov HX(P1,0), H(14,0)+r3
-+  vmov HX(P0,0), H(15,0)+r3
-+  vmov HX(Q0,0), H(16,0)+r3
-+  vmov HX(Q1,0), H(17,0)+r3
-+  vmov HX(Q2,0), H(18,0)+r3
-+  vmov HX(Q3,0), H(19,0)+r3
-+
-+  bl do_luma_filter
-+
-+  vadds H(13,0)+r3, HX(P2,0), 0
-+  vadds H(14,0)+r3, HX(P1,0), 0
-+  vadds H(15,0)+r3, HX(P0,0), 0
-+  # P3 and Q3 never change so don't bother saving back
-+  vadds H(16,0)+r3, HX(Q0,0), 0
-+  vadds H(17,0)+r3, HX(Q1,0), 0
-+  vadds H(18,0)+r3, HX(Q2,0), 0
-+
-+  pop pc
-+
-+# r4 points to array of beta/tc for each 4 length edge
-+do_luma_filter:
-+  valtl H(setup,0),H(setup_input,0),H(setup_input,0) # b*8tc*8
-+  valtl HX(beta,0),H(setup,0),H(setup,0)
-+  valtu HX(tc,0),H(setup,0),H(setup,0)
-+  vmul HX(tc25,0), HX(tc,0), 5
-+  vadd HX(tc25,0),HX(tc25,0), 1
-+  vasr HX(tc25,0), HX(tc25,0), 1
-+
-+  # Compute decision
-+  vadd HX(dp,0),HX(P1,0),HX(P1,0) # 2*P1
-+  vsub HX(dp,0),HX(P2,0),HX(dp,0) # P2-2*P1
-+  vadd HX(dp,0),HX(dp,0),HX(P0,0) # P2-2*P1+P0
-+  vdist HX(dp,0),HX(dp,0),0 # abs(P2-2*P1+P0) # dp0
-+
-+  vadd HX(dq,0),HX(Q1,0),HX(Q1,0) # 2*Q1
-+  vsub HX(dq,0),HX(Q2,0),HX(dq,0) # Q2-2*Q1
-+  vadd HX(dq,0),HX(dq,0),HX(Q0,0) # Q2-2*Q1+Q0
-+  vdist HX(dq,0),HX(dq,0),0 # abs(Q2-2*Q1+Q0) # dq0
-+
-+  vadd HX(d,0), HX(dp,0), HX(dq,0)
-+  vasr HX(beta2,0),HX(beta,0),2
-+  vasr HX(beta3,0),HX(beta,0),3
-+
-+  # Compute flags that are negative if all conditions pass
-+  vdist HX(decision,0), HX(P0,0), HX(P3,0) CLRA SACC
-+  vdist HX(decision,0), HX(Q0,0), HX(Q3,0) SACC
-+  vsub HX(decision,0), HX(decision,0), HX(beta3,0) SETF
-+
-+  vdist HX(decision,0), HX(P0,0), HX(Q0,0) IFN
-+  vsub HX(decision,0), HX(decision,0), HX(tc25,0) IFN SETF
-+  vadd HX(decision,0), HX(d,0), HX(d,0) IFN
-+  vsub HX(decision,0), HX(decision,0), HX(beta2,0) IFN SETF
-+  vmov HX(decision,0), 1 IFNN
-+  vadd H(decision,0),H(decision,3),0 IFN
-+  vadd H(decision,16),H(decision,19),0 IFN
-+  vmov -,HX(decision,0) SETF   # N marks strong filter
-+  vmov HX(decision,0), 1 IFNN  # NN marks normal filter
-+
-+  vadd HX(do_filter,0), HX(d,3), HX(d,0)
-+  vsub HX(do_filter,0), HX(do_filter,0), HX(beta,0) SETF # IFNN means no filter
-+  vmov HX(decision,0),0 IFNN # Z marks no filter
-+
-+  # Expand out decision (currently valid one every 4 pixels)  0...1...2...3
-+  # First extract out even terms
-+  vodd HX(decision,0),HX(decision,0),HX(decision,0)  # 0.1.2.3
-+  vodd HX(decision,0),HX(decision,0),HX(decision,0)  # 0123
-+  # Now expand back
-+  valtl HX(decision,0),HX(decision,0),HX(decision,0) # 00112233
-+  valtl HX(decision,0),HX(decision,0),HX(decision,0) SETF # 0000111122223333
-+
-+  # HX(decision,0) is negative if want strong filtering, 1 if want normal filtering, 0 if want no filtering
-+
-+  # Do a quick check to see if there is anything to do
-+  mov r11, 0 # Signal no filtering
-+  vmov -,1 IFNZ SUMS r5
-+  cmp r5,0
-+  beq filtering_done
-+  mov r11, 1 # Signal some filtering
-+  # And whether there is any strong filtering
-+  vmov -,1 IFN SUMS r5
-+  cmp r5,0
-+  beq normal_filtering
-+
-+  ##############################################################################
-+  # Strong filtering - could maybe fast case if all have same sign? (especially if all disabled!)
-+  vshl HX(tc2,0), HX(tc,0), 1  # Note that in normal filtering tx2 is tc/2, while here it is tc*2
-+
-+  # Take a copy of the original pixels for use in decision calculation
-+  vmov HX(P0,32),HX(P0,0)
-+  vmov HX(Q0,32),HX(Q0,0)
-+  vmov HX(P1,32),HX(P1,0)
-+  vmov HX(Q1,32),HX(Q1,0)
-+  vmov HX(P2,32),HX(P2,0)
-+  vmov HX(Q2,32),HX(Q2,0)
-+
-+  vadd -,HX(P2,32),4 CLRA SACC
-+  vshl -,HX(P1,32),1 SACC
-+  vshl -,HX(P0,32),1 SACC
-+  vshl -,HX(Q0,32),1 SACC
-+  vshl HX(delta,0),HX(Q1,32),0 SACC
-+  vasr HX(delta,0),HX(delta,0), 3
-+  vsub HX(delta,0),HX(delta,0),HX(P0,32)
-+  vclamps HX(delta,0), HX(delta,0), HX(tc2,0)
-+  vadd HX(P0,0),HX(P0,32),HX(delta,0) IFN
-+
-+  vadd -,HX(P2,32),2 CLRA SACC
-+  vadd -,HX(P1,32),HX(P0,32) SACC
-+  vshl HX(delta,0),HX(Q0,32),0 SACC
-+  vasr HX(delta,0),HX(delta,0), 2
-+  vsub HX(delta,0),HX(delta,0),HX(P1,32)
-+  vclamps HX(delta,0), HX(delta,0), HX(tc2,0)
-+  vadd HX(P1,0),HX(P1,32),HX(delta,0) IFN
-+
-+  vadd -,HX(Q0,32),4 CLRA SACC
-+  vadd -,HX(P1,32),HX(P0,32) SACC
-+  vmul -,HX(P2,32),3 SACC
-+  vshl HX(delta,0),HX(P3,0),1 SACC # Note that we have not made a copy of P3, so using P3,0 is correct
-+  vasr HX(delta,0),HX(delta,0), 3
-+  vsub HX(delta,0),HX(delta,0),HX(P2,32)
-+  vclamps HX(delta,0), HX(delta,0), HX(tc2,0)
-+  vadd HX(P2,0),HX(P2,32),HX(delta,0) IFN
-+  #vmov HX(P2,0),3 IFN
-+
-+  # Now reverse all P/Qs
-+
-+  vadd -,HX(Q2,32),4 CLRA SACC
-+  vshl -,HX(Q1,32),1 SACC
-+  vshl -,HX(Q0,32),1 SACC
-+  vshl -,HX(P0,32),1 SACC
-+  vshl HX(delta,0),HX(P1,32),0 SACC
-+  vasr HX(delta,0),HX(delta,0), 3
-+  vsub HX(delta,0),HX(delta,0),HX(Q0,32)
-+  vclamps HX(delta,0), HX(delta,0), HX(tc2,0)
-+  vadd HX(Q0,0),HX(Q0,32),HX(delta,0) IFN
-+
-+  vadd -,HX(Q2,32),2 CLRA SACC
-+  vadd -,HX(Q1,32),HX(Q0,32) SACC
-+  vshl HX(delta,0),HX(P0,32),0 SACC
-+  vasr HX(delta,0),HX(delta,0), 2
-+  vsub HX(delta,0),HX(delta,0),HX(Q1,32)
-+  vclamps HX(delta,0), HX(delta,0), HX(tc2,0)
-+  vadd HX(Q1,0),HX(Q1,32),HX(delta,0) IFN
-+
-+  vadd -,HX(P0,32),4 CLRA SACC
-+  vadd -,HX(Q1,32),HX(Q0,32) SACC
-+  vmul -,HX(Q2,32),3 SACC
-+  vshl HX(delta,0),HX(Q3,0),1 SACC # Note that we have not made a copy of Q3, so using Q3,0 is correct
-+  vasr HX(delta,0),HX(delta,0), 3
-+  vsub HX(delta,0),HX(delta,0),HX(Q2,32)
-+  vclamps HX(delta,0), HX(delta,0), HX(tc2,0)
-+  vadd HX(Q2,0),HX(Q2,32),HX(delta,0) IFN
-+
-+  ##############################################################################
-+  # Normal filtering
-+normal_filtering:
-+  # Invert the decision flags
-+  # make instruction more complicated as assembler has error and loses SETF
-+  vrsub HX(tc10,0), HX(decision,0), 0 SETF # IFN means normal filtering
-+  vmov  -, HX(tc10,0) SETF # IFN means normal filtering
-+
-+  vmov -,1 IFN SUMS r5
-+  cmp r5,0
-+  beq filtering_done
-+
-+  vasr HX(tc2,0), HX(tc,0), 1
-+  vmul HX(tc10,0), HX(tc,0), 10
-+
-+  vasr HX(thresh,0), HX(beta,0), 1
-+  vadd HX(thresh,0), HX(thresh,0), HX(beta,0)
-+  vasr HX(thresh,0), HX(thresh,0), 3 CLRA SACC
-+
-+  vadd HX(ptest,0),HX(dp,3),HX(dp,0)
-+  vsub HX(ptest,0),HX(ptest,0),HX(thresh,0) # ptest is negative if we need to do the P2 pixel
-+  vadd HX(qtest,0),HX(dq,3),HX(dq,0)
-+  vsub HX(qtest,0),HX(qtest,0),HX(thresh,0) # qtest is negative if we need to do the Q2 pixel
-+  # Expand ptest and qtest together
-+  vodd HX(pqtest,0),HX(ptest,0),HX(qtest,0)  # p.p.p.p.q.q.q.q
-+  vodd HX(pqtest,0),HX(pqtest,0),HX(pqtest,0) # ppppqqqq........
-+  valtl HX(pqtest,0),HX(pqtest,0),HX(pqtest,0) # ppppppppqqqqqqqq
-+  valtl HX(ptest,0),HX(pqtest,0),HX(pqtest,0)
-+  valtu HX(qtest,0),HX(pqtest,0),HX(pqtest,0)
-+
-+  vsub HX(delta0,0), HX(Q0,0), HX(P0,0)
-+  vsub HX(delta1,0), HX(Q1,0), HX(P1,0)
-+  vmov -,8 CLRA SACC
-+  vmul -,HX(delta0,0), 9 SACC
-+  vmul HX(delta0,0),HX(delta1,0), r6 SACC
-+  vasr HX(delta0,0), HX(delta0,0), 4
-+  vdist HX(deltatest,0), HX(delta0,0), 0
-+  vsub HX(deltatest,0), HX(deltatest,0), HX(tc10,0) IFN SETF # negative if still need to do something
-+  vmov HX(deltatest,0), 0 IFNN # clear if no need to do anything so we can reload flags later
-+
-+  vclamps HX(delta0,0), HX(delta0,0), HX(tc,0)
-+
-+  vadd HX(deltap1,0), HX(P2,0), HX(P0,0)
-+  vadd HX(deltap1,0), HX(deltap1,0), 1
-+  vasr HX(deltap1,0), HX(deltap1,0), 1 CLRA SACC
-+  vsub HX(deltap1,0), HX(delta0,0), HX(P1,0) SACC
-+  vasr HX(deltap1,0), HX(deltap1,0), 1
-+  vclamps HX(deltap1,0), HX(deltap1,0), HX(tc2,0)
-+
-+  vadd HX(deltaq1,0), HX(Q2,0), HX(Q0,0)
-+  vadd HX(deltaq1,0), HX(deltaq1,0), 1
-+  vasr HX(deltaq1,0), HX(deltaq1,0), 1 CLRA SACC
-+  vadd HX(deltaq1,0), HX(delta0,0), HX(Q1,0)
-+  vrsub -, HX(delta0,0), 0 SACC
-+  vrsub HX(deltaq1,0), HX(Q1,0), 0 SACC
-+  vasr HX(deltaq1,0), HX(deltaq1,0), 1
-+  vclamps HX(deltaq1,0), HX(deltaq1,0), HX(tc2,0)
-+
-+  vadds HX(P0,0), HX(P0,0), HX(delta0,0) IFN
-+  vsubs HX(Q0,0), HX(Q0,0), HX(delta0,0) IFN
-+
-+  vmov -,HX(ptest,0) IFN SETF # Negative if need to do p1
-+  vadds HX(P1,0), HX(P1,0), HX(deltap1,0) IFN
-+
-+  vmov -,HX(deltatest,0) SETF
-+  vmov -,HX(qtest,0) IFN SETF # Negative if need to do q1
-+  vadds HX(Q1,0), HX(Q1,0), HX(deltaq1,0) IFN
-+
-+  #vmov HX(P2,0),1 IFN
-+
-+filtering_done:
-+  b lr
-+
-+
-+hevc_uv_deblock_16x16:
-+  push r6-r15, lr
-+  mov r14,0
-+  b hevc_uv_start
-+hevc_uv_deblock_16x16_with_clear:
-+  push r6-r15, lr
-+  mov r14,1
-+  b hevc_uv_start
-+
-+hevc_uv_start:
-+  mov r9,r4
-+  mov r4,r3
-+  mov r13,r2
-+  mov r2,r0
-+  mov r10,r0
-+  subscale4 r0,r1
-+  mov r8,63
-+  mov r6,-3
-+  vmov H(zeros,0),0
-+# r7 is number of blocks still to load
-+# r0 is location of current block - 4 * stride
-+# r1 is stride
-+# r2 is location of current block
-+# r3 is offset of start of block (actual edges start at H(16,16)+r3 for horizontal and H(16,0)+r3 for vertical
-+# r4 is setup
-+# r5 is for temporary calculations
-+# r8 holds 63
-+# r6 holds -3
-+# r9 holds the number of 16 high rows to process
-+# r10 holds the original img base
-+# r11 returns 0 if no filtering was done on the edge
-+# r12 saves a copy of this
-+# r13 is copy of width
-+# r14 is 1 if we should clear the old contents, or 0 if not
-+
-+uv_process_row:
-+  # First iteration does not do horizontal filtering on previous
-+  mov r7, r13
-+  mov r3,0
-+  vldb H(12++,16)+r3,(r0 += r1) REP 4    # Load the current block
-+  vldb H(16++,16)+r3,(r2 += r1) REP 16
-+  vldb H(setup_input,0), (r4)  # We may wish to prefetch these
-+  cmp r14,1
-+  bne uv_skip0
-+  vadd H(setup_input,0),H(setup_input,4),0 # Rotate by 4 to access V strengths
-+  vstb H(zeros,0),(r4)
-+uv_skip0:
-+  bl uv_vert_filter
-+  add r3,8
-+  vadd H(setup_input,0),H(setup_input,8),0 # Rotate to second set of 8
-+  bl uv_vert_filter
-+  sub r3,8
-+  b uv_start_deblock_loop
-+uv_deblock_loop:
-+  # Middle iterations do vertical on current block and horizontal on preceding
-+  vldb H(12++,16)+r3,(r0 += r1) REP 4  # load the current block
-+  vldb H(16++,16)+r3,(r2 += r1) REP 16
-+  vldb H(setup_input,0), (r4)
-+  cmp r14,1
-+  bne uv_skip1
-+  vadd H(setup_input,0),H(setup_input,4),0 # Rotate by 4 to access V strengths
-+  vstb H(zeros,0),(r4)
-+uv_skip1:
-+  bl uv_vert_filter
-+  add r3,8
-+  vadd H(setup_input,0),H(setup_input,8),0
-+  bl uv_vert_filter
-+  sub r3,8
-+  vldb H(setup_input,0), -16(r4)
-+  cmp r14,1
-+  bne uv_skip3
-+  vadd H(setup_input,0),H(setup_input,4),0 # Rotate by 4 to access V strengths
-+  vstb H(zeros,0),-16(r4)
-+uv_skip3:
-+  bl uv_horz_filter
-+  mov r12,r11
-+  add r3,8*64
-+  vadd H(setup_input,0),H(setup_input,8),0
-+  bl uv_horz_filter
-+  sub r3,8*64
-+  addcmpbeq r12,0,0,uv_skip_save_top
-+  vstb H(12++,0)+r3,-16(r0 += r1) REP 4  # Save the deblocked pixels for the previous block
-+uv_skip_save_top:
-+  vstb H(16++,0)+r3,-16(r2 += r1) REP 16
-+uv_start_deblock_loop:
-+  # move onto next 16x16 (could do this with circular buffer support instead)
-+  add r3,16
-+  and r3,r8
-+  add r4,32
-+  # Perform loop counter operations (may work with an addcmpbgt as well?)
-+  add r0,16
-+  add r2,16
-+  sub r7,1
-+  cmp r7,0 # Are there still more blocks to load
-+  bgt uv_deblock_loop
-+
-+  # Final iteration needs to just do horizontal filtering
-+  vldb H(setup_input,0), -16(r4)
-+  cmp r14,1
-+  bne uv_skip2
-+  vadd H(setup_input,0),H(setup_input,4),0 # Rotate by 4 to access V strengths
-+  vstb H(zeros,0),-16(r4)
-+uv_skip2:
-+  bl uv_horz_filter
-+  mov r12,r11
-+  add r3,8*64
-+  vadd H(setup_input,0),H(setup_input,8),0
-+  bl uv_horz_filter
-+  sub r3,64*8
-+  addcmpbeq r12,0,0,uv_skip_save_top2
-+  vstb H(12++,0)+r3,-16(r0 += r1) REP 4  # Save the deblocked pixels for the previous block
-+uv_skip_save_top2:
-+  vstb H(16++,0)+r3,-16(r2 += r1) REP 16
-+
-+# Now look to see if we should do another row
-+  sub r9,1
-+  cmp r9,0
-+  bgt uv_start_again
-+  pop r6-r15, pc
-+uv_start_again:
-+  # Need to sort out r0,r2 to point to next row down
-+  addscale16 r10,r1
-+  mov r2,r10
-+  subscale4 r0,r2,r1
-+  b uv_process_row
-+
-+
-+# At this stage H(16,16)+r3 points to the first pixel of the 16 high edge to be filtered
-+# So we can reuse the code we move the parts to be filtered into HX(P0/P1/P2/P3/Q0/Q1/Q2/Q3,0) - we will perform a final saturation step on placing them back into the correct locations
-+
-+uv_vert_filter:
-+  push lr
-+
-+  vmov HX(P1,0), V(16,14)+r3
-+  vmov HX(P0,0), V(16,15)+r3
-+  vmov HX(Q0,0), V(16,16)+r3
-+  vmov HX(Q1,0), V(16,17)+r3
-+
-+  bl do_chroma_filter
-+
-+  vadds V(16,15)+r3, HX(P0,0), 0
-+  vadds V(16,16)+r3, HX(Q0,0), 0
-+
-+  pop pc
-+
-+# Filter edge at H(16,0)+r3
-+uv_horz_filter:
-+  push lr
-+
-+  vmov HX(P1,0), H(14,0)+r3
-+  vmov HX(P0,0), H(15,0)+r3
-+  vmov HX(Q0,0), H(16,0)+r3
-+  vmov HX(Q1,0), H(17,0)+r3
-+
-+  bl do_chroma_filter
-+
-+  vadds H(15,0)+r3, HX(P0,0), 0
-+  # P3 and Q3 never change so don't bother saving back
-+  vadds H(16,0)+r3, HX(Q0,0), 0
-+
-+  pop pc
-+
-+# r4 points to array of beta/tc for each 4 length edge
-+do_chroma_filter:
-+  valtl H(setup,0),H(setup_input,0),H(setup_input,0) # tc*8
-+  valtl HX(tc,0),H(setup,0),H(setup,0)
-+
-+  vsub HX(delta,0),HX(Q0,0),HX(P0,0)
-+  vshl HX(delta,0),HX(delta,0),2 CLRA SACC
-+  vsub -,HX(P1,0),HX(Q1,0) SACC
-+  vmov HX(delta,0),4 SACC
-+  vasr HX(delta,0),HX(delta,0),3
-+  vclamps HX(delta,0), HX(delta,0), HX(tc,0)
-+  vadd HX(P0,0),HX(P0,0),HX(delta,0)
-+  vsub HX(Q0,0),HX(Q0,0),HX(delta,0)
-+  b lr
-+
-+# r0 = list
-+# r1 = number
-+hevc_run_command_list:
-+  push r6-r7, lr
-+  mov r6, r0
-+  mov r7, r1
-+loop_cmds:
-+  ld r0,(r6) # How to encode r6++?
-+  add r6,4
-+  ld r1,(r6)
-+  add r6,4
-+  ld r2,(r6)
-+  add r6,4
-+  ld r3,(r6)
-+  add r6,4
-+  ld r4,(r6)
-+  add r6,4
-+  ld r5,(r6)
-+  add r6,4
-+  bl hevc_trans_16x16
-+  sub r7,1
-+  cmp r7,0
-+  bgt loop_cmds
-+
-+  pop r6-r7, pc
 diff --git a/libavcodec/rpi_hevc_transform10.h b/libavcodec/rpi_hevc_transform10.h
 new file mode 100644
-index 0000000000..d5c2661186
+index 0000000000..1c364492d0
 --- /dev/null
 +++ b/libavcodec/rpi_hevc_transform10.h
-@@ -0,0 +1,11602 @@
+@@ -0,0 +1,94 @@
 +static const unsigned char rpi_hevc_transform10 [] = {
-+21,
-+106,
-+0,
-+144,
-+197,
-+17,
-+85,
-+106,
-+0,
-+144,
-+143,
-+22,
-+0,
-+158,
-+42,
-+16,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+169,
-+3,
-+62,
-+64,
-+79,
-+64,
-+3,
-+232,
-+32,
-+0,
-+0,
-+0,
-+12,
-+248,
-+0,
-+136,
-+0,
-+0,
-+192,
-+248,
-+0,
-+0,
-+64,
-+232,
-+0,
-+2,
-+0,
-+0,
-+12,
-+248,
-+0,
-+168,
-+0,
-+0,
-+192,
-+248,
-+0,
-+0,
-+0,
-+96,
-+3,
-+232,
-+32,
-+0,
-+0,
-+0,
-+7,
-+232,
-+0,
-+2,
-+0,
-+0,
-+8,
-+232,
-+0,
-+4,
-+0,
-+0,
-+4,
-+232,
-+64,
-+0,
-+0,
-+0,
-+5,
-+232,
-+0,
-+2,
-+0,
-+0,
-+57,
-+239,
-+192,
-+253,
-+255,
-+255,
-+43,
-+239,
-+64,
-+0,
-+0,
-+0,
-+91,
-+122,
-+91,
-+124,
-+74,
-+195,
-+80,
-+23,
-+2,
-+111,
-+2,
-+106,
-+50,
-+24,
-+10,
-+106,
-+22,
-+64,
-+4,
-+24,
-+26,
-+102,
-+128,
-+144,
-+51,
-+0,
-+12,
-+248,
-+0,
-+128,
-+0,
-+0,
-+192,
-+8,
-+24,
-+0,
-+128,
-+144,
-+94,
-+0,
-+4,
-+255,
-+48,
-+192,
-+128,
-+3,
-+32,
-+8,
-+16,
-+0,
-+76,
-+254,
-+48,
-+192,
-+9,
-+4,
-+32,
-+8,
-+0,
-+0,
-+4,
-+254,
-+0,
-+144,
-+128,
-+2,
-+0,
-+8,
-+2,
-+0,
-+128,
-+144,
-+77,
-+0,
-+4,
-+255,
-+48,
-+192,
-+128,
-+3,
-+32,
-+8,
-+20,
-+0,
-+76,
-+254,
-+48,
-+192,
-+6,
-+4,
-+32,
-+8,
-+0,
-+0,
-+140,
-+248,
-+44,
-+0,
-+0,
-+0,
-+32,
-+48,
-+4,
-+0,
-+128,
-+69,
-+113,
-+66,
-+242,
-+140,
-+209,
-+192,
-+57,
-+239,
-+64,
-+2,
-+0,
-+0,
-+0,
-+158,
-+127,
-+0,
-+41,
-+3,
-+0,
-+254,
-+0,
-+128,
-+0,
-+4,
-+0,
-+0,
-+0,
-+0,
-+182,
-+64,
-+140,
-+248,
-+32,
-+0,
-+0,
-+0,
-+0,
-+48,
-+24,
-+0,
-+21,
-+64,
-+8,
-+248,
-+0,
-+128,
-+0,
-+0,
-+192,
-+3,
-+20,
-+0,
-+102,
-+237,
-+224,
-+255,
-+255,
-+255,
-+136,
-+248,
-+32,
-+0,
-+0,
-+0,
-+0,
-+240,
-+24,
-+0,
-+12,
-+96,
-+100,
-+8,
-+70,
-+192,
-+68,
-+55,
-+73,
-+195,
-+80,
-+39,
-+4,
-+106,
-+132,
-+110,
-+9,
-+24,
-+105,
-+160,
-+4,
-+95,
-+28,
-+139,
-+246,
-+200,
-+69,
-+232,
-+32,
-+0,
-+0,
-+0,
-+99,
-+31,
-+182,
-+64,
-+4,
-+232,
-+64,
-+0,
-+0,
-+0,
-+5,
-+232,
-+0,
-+2,
-+0,
-+0,
-+90,
-+0,
-+70,
-+192,
-+80,
-+7,
-+164,
-+255,
-+36,
-+204,
-+96,
-+2,
-+0,
-+248,
-+62,
-+0,
-+3,
-+255,
-+55,
-+208,
-+120,
-+3,
-+224,
-+3,
-+190,
-+11,
-+16,
-+139,
-+246,
-+91,
-+0,
-+103,
-+90,
-+0,
-+70,
-+192,
-+80,
-+7,
-+164,
-+255,
-+36,
-+204,
-+224,
-+2,
-+0,
-+248,
-+62,
-+0,
-+3,
-+255,
-+55,
-+208,
-+120,
-+3,
-+224,
-+3,
-+190,
-+11,
-+16,
-+139,
-+246,
-+91,
-+0,
-+103,
-+90,
-+0,
-+0,
-+246,
-+0,
-+128,
-+0,
-+4,
-+32,
-+237,
-+0,
-+8,
-+0,
-+0,
-+4,
-+232,
-+32,
-+0,
-+0,
-+0,
-+142,
-+248,
-+32,
-+0,
-+0,
-+0,
-+192,
-+67,
-+0,
-+0,
-+8,
-+240,
-+0,
-+128,
-+129,
-+3,
-+38,
-+237,
-+224,
-+255,
-+255,
-+255,
-+136,
-+240,
-+32,
-+0,
-+134,
-+3,
-+8,
-+96,
-+100,
-+8,
-+70,
-+192,
-+68,
-+55,
-+69,
-+195,
-+80,
-+39,
-+4,
-+106,
-+164,
-+110,
-+127,
-+144,
-+185,
-+255,
-+101,
-+160,
-+4,
-+7,
-+24,
-+139,
-+245,
-+200,
-+65,
-+232,
-+32,
-+0,
-+0,
-+0,
-+102,
-+31,
-+90,
-+0,
-+225,
-+64,
-+242,
-+64,
-+79,
-+195,
-+80,
-+127,
-+2,
-+111,
-+3,
-+232,
-+128,
-+0,
-+0,
-+0,
-+7,
-+232,
-+0,
-+2,
-+0,
-+0,
-+232,
-+0,
-+8,
-+109,
-+232,
-+191,
-+0,
-+222,
-+4,
-+24,
-+8,
-+237,
-+32,
-+16,
-+0,
-+0,
-+137,
-+64,
-+26,
-+64,
-+2,
-+106,
-+46,
-+24,
-+161,
-+64,
-+152,
-+64,
-+242,
-+74,
-+7,
-+30,
-+255,
-+159,
-+187,
-+255,
-+33,
-+237,
-+0,
-+8,
-+0,
-+0,
-+152,
-+64,
-+4,
-+232,
-+64,
-+0,
-+0,
-+0,
-+149,
-+96,
-+128,
-+144,
-+32,
-+0,
-+72,
-+232,
-+0,
-+4,
-+0,
-+0,
-+65,
-+232,
-+32,
-+0,
-+0,
-+0,
-+128,
-+144,
-+24,
-+0,
-+4,
-+232,
-+0,
-+2,
-+0,
-+0,
-+101,
-+96,
-+145,
-+64,
-+168,
-+64,
-+128,
-+144,
-+16,
-+0,
-+72,
-+232,
-+0,
-+4,
-+0,
-+0,
-+65,
-+232,
-+32,
-+0,
-+0,
-+0,
-+128,
-+144,
-+8,
-+0,
-+74,
-+232,
-+0,
-+8,
-+0,
-+0,
-+242,
-+140,
-+213,
-+192,
-+41,
-+3,
-+239,
-+3,
-+12,
-+248,
-+0,
-+128,
-+0,
-+0,
-+192,
-+248,
-+4,
-+0,
-+12,
-+248,
-+0,
-+132,
-+64,
-+0,
-+192,
-+248,
-+4,
-+0,
-+0,
-+96,
-+255,
-+159,
-+101,
-+255,
-+0,
-+232,
-+0,
-+4,
-+0,
-+0,
-+255,
-+159,
-+112,
-+255,
-+4,
-+255,
-+48,
-+204,
-+16,
-+3,
-+224,
-+251,
-+62,
-+0,
-+4,
-+255,
-+51,
-+204,
-+128,
-+3,
-+224,
-+251,
-+16,
-+0,
-+76,
-+254,
-+51,
-+204,
-+128,
-+3,
-+224,
-+251,
-+20,
-+0,
-+128,
-+64,
-+6,
-+232,
-+64,
-+0,
-+0,
-+0,
-+140,
-+248,
-+47,
-+0,
-+0,
-+0,
-+224,
-+99,
-+0,
-+0,
-+32,
-+247,
-+240,
-+207,
-+16,
-+3,
-+32,
-+247,
-+176,
-+207,
-+17,
-+19,
-+32,
-+247,
-+112,
-+207,
-+18,
-+35,
-+32,
-+247,
-+48,
-+207,
-+19,
-+51,
-+32,
-+247,
-+240,
-+206,
-+20,
-+67,
-+32,
-+247,
-+176,
-+206,
-+21,
-+83,
-+32,
-+247,
-+112,
-+206,
-+22,
-+99,
-+32,
-+247,
-+48,
-+206,
-+23,
-+115,
-+32,
-+247,
-+240,
-+205,
-+24,
-+131,
-+32,
-+247,
-+176,
-+205,
-+25,
-+147,
-+32,
-+247,
-+112,
-+205,
-+26,
-+163,
-+32,
-+247,
-+48,
-+205,
-+27,
-+179,
-+32,
-+247,
-+240,
-+204,
-+28,
-+195,
-+32,
-+247,
-+176,
-+204,
-+29,
-+211,
-+32,
-+247,
-+112,
-+204,
-+30,
-+227,
-+32,
-+247,
-+48,
-+204,
-+31,
-+243,
-+4,
-+255,
-+51,
-+204,
-+128,
-+3,
-+224,
-+251,
-+16,
-+0,
-+76,
-+254,
-+51,
-+204,
-+128,
-+3,
-+224,
-+251,
-+20,
-+0,
-+0,
-+237,
-+32,
-+0,
-+0,
-+0,
-+140,
-+248,
-+47,
-+0,
-+0,
-+0,
-+224,
-+99,
-+0,
-+0,
-+111,
-+3,
-+4,
-+254,
-+0,
-+128,
-+0,
-+4,
-+0,
-+248,
-+0,
-+0,
-+2,
-+232,
-+32,
-+0,
-+0,
-+0,
-+140,
-+248,
-+32,
-+0,
-+0,
-+0,
-+224,
-+35,
-+0,
-+0,
-+64,
-+232,
-+0,
-+2,
-+0,
-+0,
-+193,
-+232,
-+0,
-+1,
-+0,
-+0,
-+1,
-+106,
-+116,
-+30,
-+90,
-+0,
-+169,
-+3,
-+73,
-+64,
-+52,
-+64,
-+45,
-+64,
-+2,
-+64,
-+10,
-+64,
-+64,
-+198,
-+1,
-+7,
-+8,
-+232,
-+63,
-+0,
-+0,
-+0,
-+6,
-+232,
-+253,
-+255,
-+255,
-+255,
-+0,
-+246,
-+0,
-+0,
-+0,
-+4,
-+215,
-+64,
-+3,
-+96,
-+2,
-+248,
-+0,
-+35,
-+0,
-+0,
-+64,
-+56,
-+0,
-+0,
-+4,
-+248,
-+0,
-+36,
-+0,
-+0,
-+64,
-+56,
-+8,
-+0,
-+0,
-+240,
-+64,
-+0,
-+132,
-+3,
-+128,
-+240,
-+0,
-+0,
-+132,
-+3,
-+128,
-+144,
-+137,
-+0,
-+131,
-+98,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+200,
-+243,
-+0,
-+0,
-+128,
-+144,
-+129,
-+0,
-+131,
-+102,
-+0,
-+158,
-+67,
-+0,
-+2,
-+248,
-+0,
-+35,
-+0,
-+0,
-+64,
-+56,
-+0,
-+0,
-+4,
-+248,
-+0,
-+36,
-+0,
-+0,
-+64,
-+56,
-+8,
-+0,
-+0,
-+240,
-+64,
-+0,
-+132,
-+3,
-+128,
-+240,
-+0,
-+0,
-+132,
-+3,
-+128,
-+144,
-+108,
-+0,
-+131,
-+98,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+200,
-+243,
-+0,
-+0,
-+128,
-+144,
-+100,
-+0,
-+131,
-+102,
-+0,
-+248,
-+64,
-+0,
-+112,
-+0,
-+192,
-+243,
-+211,
-+31,
-+128,
-+248,
-+0,
-+0,
-+112,
-+0,
-+192,
-+243,
-+211,
-+31,
-+128,
-+144,
-+161,
-+0,
-+188,
-+64,
-+67,
-+232,
-+0,
-+2,
-+0,
-+0,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+200,
-+243,
-+0,
-+0,
-+128,
-+144,
-+150,
-+0,
-+195,
-+232,
-+0,
-+2,
-+0,
-+0,
-+12,
-+128,
-+7,
-+192,
-+130,
-+248,
-+0,
-+0,
-+112,
-+192,
-+224,
-+16,
-+195,
-+31,
-+132,
-+248,
-+1,
-+0,
-+112,
-+0,
-+224,
-+16,
-+203,
-+31,
-+3,
-+99,
-+131,
-+71,
-+68,
-+232,
-+32,
-+0,
-+0,
-+0,
-+0,
-+99,
-+2,
-+99,
-+23,
-+102,
-+7,
-+106,
-+127,
-+156,
-+182,
-+255,
-+0,
-+248,
-+64,
-+0,
-+112,
-+0,
-+192,
-+243,
-+211,
-+31,
-+128,
-+248,
-+0,
-+0,
-+112,
-+0,
-+192,
-+243,
-+211,
-+31,
-+128,
-+144,
-+112,
-+0,
-+188,
-+64,
-+67,
-+232,
-+0,
-+2,
-+0,
-+0,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+200,
-+243,
-+0,
-+0,
-+128,
-+144,
-+101,
-+0,
-+195,
-+232,
-+0,
-+2,
-+0,
-+0,
-+12,
-+128,
-+7,
-+192,
-+130,
-+248,
-+0,
-+0,
-+112,
-+192,
-+224,
-+16,
-+195,
-+31,
-+132,
-+248,
-+1,
-+0,
-+112,
-+0,
-+224,
-+16,
-+203,
-+31,
-+25,
-+102,
-+9,
-+106,
-+2,
-+30,
-+41,
-+3,
-+26,
-+87,
-+162,
-+64,
-+64,
-+198,
-+1,
-+23,
-+127,
-+158,
-+103,
-+255,
-+239,
-+3,
-+0,
-+254,
-+0,
-+143,
-+92,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+64,
-+143,
-+93,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+128,
-+143,
-+94,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+192,
-+143,
-+95,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+192,
-+142,
-+208,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+128,
-+142,
-+209,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+64,
-+142,
-+210,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+0,
-+142,
-+211,
-+0,
-+0,
-+240,
-+12,
-+0,
-+128,
-+144,
-+107,
-+0,
-+8,
-+255,
-+99,
-+23,
-+0,
-+212,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+163,
-+23,
-+0,
-+228,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+227,
-+23,
-+0,
-+244,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+35,
-+52,
-+0,
-+180,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+99,
-+52,
-+0,
-+164,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+163,
-+52,
-+0,
-+148,
-+192,
-+51,
-+0,
-+0,
-+111,
-+3,
-+239,
-+3,
-+0,
-+254,
-+0,
-+143,
-+12,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+64,
-+143,
-+13,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+128,
-+143,
-+14,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+192,
-+143,
-+15,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+192,
-+142,
-+16,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+128,
-+142,
-+17,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+64,
-+142,
-+18,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+0,
-+142,
-+19,
-+0,
-+0,
-+240,
-+12,
-+0,
-+128,
-+144,
-+33,
-+0,
-+8,
-+255,
-+99,
-+3,
-+0,
-+212,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+163,
-+3,
-+0,
-+228,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+227,
-+3,
-+0,
-+244,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+35,
-+4,
-+0,
-+180,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+99,
-+4,
-+0,
-+164,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+163,
-+4,
-+0,
-+148,
-+192,
-+51,
-+0,
-+0,
-+111,
-+3,
-+32,
-+246,
-+192,
-+11,
-+1,
-+16,
-+32,
-+246,
-+2,
-+137,
-+47,
-+240,
-+40,
-+246,
-+2,
-+140,
-+47,
-+240,
-+128,
-+245,
-+99,
-+140,
-+5,
-+4,
-+0,
-+247,
-+99,
-+140,
-+1,
-+20,
-+88,
-+246,
-+99,
-+140,
-+1,
-+20,
-+0,
-+247,
-+35,
-+136,
-+62,
-+226,
-+32,
-+247,
-+35,
-+136,
-+32,
-+210,
-+0,
-+247,
-+34,
-+136,
-+63,
-+2,
-+208,
-+246,
-+34,
-+136,
-+0,
-+4,
-+0,
-+247,
-+99,
-+136,
-+58,
-+162,
-+32,
-+247,
-+99,
-+136,
-+33,
-+146,
-+0,
-+247,
-+98,
-+136,
-+59,
-+18,
-+208,
-+246,
-+98,
-+136,
-+0,
-+20,
-+0,
-+247,
-+162,
-+136,
-+33,
-+2,
-+88,
-+246,
-+98,
-+137,
-+2,
-+68,
-+88,
-+246,
-+162,
-+137,
-+3,
-+68,
-+208,
-+254,
-+227,
-+136,
-+60,
-+242,
-+192,
-+243,
-+188,
-+11,
-+208,
-+254,
-+227,
-+136,
-+56,
-+178,
-+192,
-+243,
-+188,
-+10,
-+32,
-+255,
-+226,
-+136,
-+38,
-+58,
-+192,
-+243,
-+60,
-+0,
-+208,
-+254,
-+227,
-+136,
-+59,
-+242,
-+192,
-+243,
-+60,
-+128,
-+32,
-+255,
-+226,
-+136,
-+49,
-+58,
-+192,
-+243,
-+60,
-+128,
-+0,
-+255,
-+226,
-+136,
-+34,
-+34,
-+192,
-+243,
-+60,
-+128,
-+32,
-+255,
-+226,
-+136,
-+37,
-+58,
-+192,
-+243,
-+60,
-+128,
-+0,
-+254,
-+192,
-+136,
-+1,
-+4,
-+0,
-+240,
-+0,
-+160,
-+0,
-+255,
-+194,
-+8,
-+0,
-+52,
-+195,
-+243,
-+0,
-+128,
-+0,
-+255,
-+202,
-+40,
-+0,
-+52,
-+195,
-+243,
-+0,
-+128,
-+0,
-+254,
-+0,
-+240,
-+35,
-+10,
-+0,
-+240,
-+60,
-+0,
-+0,
-+254,
-+192,
-+136,
-+1,
-+4,
-+0,
-+240,
-+0,
-+160,
-+0,
-+255,
-+226,
-+140,
-+34,
-+34,
-+195,
-+243,
-+60,
-+0,
-+32,
-+255,
-+227,
-+140,
-+36,
-+58,
-+192,
-+243,
-+60,
-+0,
-+0,
-+254,
-+192,
-+136,
-+0,
-+4,
-+0,
-+240,
-+0,
-+160,
-+16,
-+246,
-+226,
-+136,
-+35,
-+50,
-+16,
-+246,
-+226,
-+136,
-+35,
-+50,
-+32,
-+246,
-+226,
-+136,
-+35,
-+50,
-+32,
-+254,
-+226,
-+136,
-+35,
-+58,
-+192,
-+243,
-+60,
-+0,
-+11,
-+96,
-+0,
-+254,
-+0,
-+240,
-+1,
-+4,
-+0,
-+240,
-+64,
-+115,
-+5,
-+106,
-+0,
-+144,
-+173,
-+1,
-+27,
-+96,
-+0,
-+254,
-+0,
-+240,
-+1,
-+4,
-+0,
-+240,
-+64,
-+147,
-+5,
-+106,
-+0,
-+144,
-+227,
-+0,
-+64,
-+246,
-+163,
-+140,
-+1,
-+4,
-+0,
-+246,
-+192,
-+175,
-+63,
-+2,
-+0,
-+246,
-+192,
-+174,
-+59,
-+2,
-+0,
-+246,
-+128,
-+175,
-+62,
-+2,
-+0,
-+246,
-+128,
-+174,
-+58,
-+2,
-+0,
-+246,
-+64,
-+175,
-+61,
-+2,
-+0,
-+246,
-+64,
-+174,
-+57,
-+2,
-+0,
-+255,
-+43,
-+240,
-+4,
-+212,
-+192,
-+243,
-+128,
-+11,
-+64,
-+254,
-+43,
-+240,
-+1,
-+228,
-+192,
-+243,
-+128,
-+10,
-+64,
-+254,
-+43,
-+240,
-+1,
-+244,
-+192,
-+243,
-+128,
-+10,
-+64,
-+254,
-+43,
-+240,
-+1,
-+180,
-+192,
-+243,
-+128,
-+10,
-+64,
-+254,
-+43,
-+141,
-+0,
-+164,
-+192,
-+243,
-+128,
-+10,
-+88,
-+246,
-+35,
-+141,
-+3,
-+68,
-+32,
-+247,
-+35,
-+141,
-+191,
-+66,
-+240,
-+246,
-+35,
-+141,
-+50,
-+66,
-+0,
-+255,
-+235,
-+143,
-+52,
-+242,
-+192,
-+243,
-+60,
-+128,
-+0,
-+255,
-+43,
-+240,
-+2,
-+212,
-+192,
-+243,
-+128,
-+11,
-+0,
-+255,
-+43,
-+240,
-+191,
-+226,
-+192,
-+243,
-+188,
-+10,
-+64,
-+254,
-+43,
-+141,
-+0,
-+180,
-+192,
-+243,
-+128,
-+10,
-+88,
-+246,
-+35,
-+141,
-+2,
-+68,
-+32,
-+247,
-+35,
-+141,
-+190,
-+66,
-+240,
-+246,
-+35,
-+141,
-+50,
-+66,
-+0,
-+255,
-+171,
-+143,
-+52,
-+226,
-+192,
-+243,
-+60,
-+128,
-+0,
-+255,
-+43,
-+240,
-+4,
-+180,
-+192,
-+243,
-+128,
-+11,
-+0,
-+255,
-+43,
-+240,
-+191,
-+226,
-+192,
-+243,
-+188,
-+10,
-+128,
-+253,
-+43,
-+240,
-+3,
-+212,
-+192,
-+243,
-+128,
-+10,
-+64,
-+254,
-+35,
-+141,
-+1,
-+196,
-+192,
-+243,
-+128,
-+10,
-+88,
-+246,
-+35,
-+141,
-+3,
-+68,
-+32,
-+247,
-+35,
-+141,
-+189,
-+66,
-+240,
-+246,
-+35,
-+141,
-+50,
-+66,
-+0,
-+255,
-+107,
-+143,
-+52,
-+210,
-+192,
-+243,
-+60,
-+128,
-+0,
-+255,
-+43,
-+240,
-+4,
-+148,
-+192,
-+243,
-+128,
-+11,
-+64,
-+254,
-+43,
-+240,
-+1,
-+164,
-+192,
-+243,
-+128,
-+10,
-+64,
-+254,
-+43,
-+240,
-+1,
-+180,
-+192,
-+243,
-+128,
-+10,
-+64,
-+254,
-+43,
-+240,
-+1,
-+244,
-+192,
-+243,
-+128,
-+10,
-+64,
-+254,
-+43,
-+141,
-+0,
-+228,
-+192,
-+243,
-+128,
-+10,
-+88,
-+246,
-+35,
-+141,
-+3,
-+68,
-+32,
-+247,
-+35,
-+141,
-+187,
-+66,
-+240,
-+246,
-+35,
-+141,
-+50,
-+66,
-+0,
-+255,
-+235,
-+142,
-+52,
-+178,
-+192,
-+243,
-+60,
-+128,
-+0,
-+255,
-+43,
-+240,
-+2,
-+148,
-+192,
-+243,
-+128,
-+11,
-+0,
-+255,
-+43,
-+240,
-+187,
-+162,
-+192,
-+243,
-+188,
-+10,
-+64,
-+254,
-+43,
-+141,
-+0,
-+244,
-+192,
-+243,
-+128,
-+10,
-+88,
-+246,
-+35,
-+141,
-+2,
-+68,
-+32,
-+247,
-+35,
-+141,
-+186,
-+66,
-+240,
-+246,
-+35,
-+141,
-+50,
-+66,
-+0,
-+255,
-+171,
-+142,
-+52,
-+162,
-+192,
-+243,
-+60,
-+128,
-+0,
-+255,
-+43,
-+240,
-+4,
-+244,
-+192,
-+243,
-+128,
-+11,
-+0,
-+255,
-+43,
-+240,
-+187,
-+162,
-+192,
-+243,
-+188,
-+10,
-+128,
-+253,
-+43,
-+240,
-+3,
-+148,
-+192,
-+243,
-+128,
-+10,
-+64,
-+254,
-+35,
-+141,
-+1,
-+132,
-+192,
-+243,
-+128,
-+10,
-+88,
-+246,
-+35,
-+141,
-+3,
-+68,
-+32,
-+247,
-+35,
-+141,
-+185,
-+66,
-+240,
-+246,
-+35,
-+141,
-+50,
-+66,
-+0,
-+255,
-+107,
-+142,
-+52,
-+146,
-+192,
-+243,
-+60,
-+128,
-+64,
-+255,
-+98,
-+141,
-+0,
-+52,
-+192,
-+243,
-+0,
-+0,
-+0,
-+254,
-+0,
-+240,
-+53,
-+10,
-+0,
-+240,
-+60,
-+0,
-+0,
-+254,
-+0,
-+240,
-+1,
-+4,
-+0,
-+240,
-+64,
-+147,
-+5,
-+106,
-+0,
-+144,
-+177,
-+0,
-+88,
-+246,
-+163,
-+140,
-+1,
-+4,
-+128,
-+245,
-+99,
-+141,
-+10,
-+4,
-+88,
-+246,
-+162,
-+138,
-+1,
-+68,
-+0,
-+247,
-+162,
-+138,
-+36,
-+162,
-+88,
-+254,
-+162,
-+138,
-+3,
-+164,
-+192,
-+243,
-+128,
-+11,
-+0,
-+255,
-+226,
-+137,
-+32,
-+2,
-+195,
-+243,
-+60,
-+0,
-+32,
-+247,
-+226,
-+137,
-+42,
-+114,
-+0,
-+255,
-+34,
-+138,
-+33,
-+18,
-+195,
-+243,
-+60,
-+0,
-+32,
-+247,
-+34,
-+138,
-+42,
-+130,
-+16,
-+246,
-+98,
-+138,
-+40,
-+114,
-+16,
-+246,
-+98,
-+138,
-+41,
-+146,
-+32,
-+246,
-+98,
-+138,
-+41,
-+146,
-+32,
-+246,
-+226,
-+137,
-+41,
-+146,
-+40,
-+246,
-+34,
-+138,
-+41,
-+146,
-+32,
-+247,
-+163,
-+141,
-+63,
-+178,
-+32,
-+247,
-+227,
-+141,
-+62,
-+162,
-+0,
-+254,
-+0,
-+240,
-+8,
-+4,
-+0,
-+240,
-+128,
-+11,
-+128,
-+253,
-+35,
-+240,
-+9,
-+100,
-+192,
-+243,
-+128,
-+10,
-+128,
-+253,
-+163,
-+141,
-+128,
-+115,
-+192,
-+243,
-+152,
-+10,
-+88,
-+246,
-+163,
-+141,
-+4,
-+100,
-+208,
-+246,
-+35,
-+139,
-+0,
-+100,
-+32,
-+255,
-+34,
-+139,
-+53,
-+202,
-+192,
-+243,
-+60,
-+128,
-+0,
-+254,
-+0,
-+139,
-+0,
-+4,
-+0,
-+240,
-+0,
-+160,
-+240,
-+246,
-+163,
-+141,
-+48,
-+98,
-+0,
-+247,
-+99,
-+139,
-+63,
-+210,
-+0,
-+247,
-+98,
-+139,
-+1,
-+212,
-+88,
-+254,
-+98,
-+139,
-+1,
-+212,
-+192,
-+243,
-+128,
-+11,
-+32,
-+255,
-+99,
-+139,
-+62,
-+98,
-+192,
-+243,
-+188,
-+10,
-+88,
-+246,
-+98,
-+139,
-+1,
-+212,
-+240,
-+246,
-+98,
-+139,
-+50,
-+210,
-+0,
-+247,
-+163,
-+128,
-+59,
-+146,
-+0,
-+247,
-+160,
-+128,
-+1,
-+36,
-+88,
-+254,
-+160,
-+128,
-+1,
-+36,
-+192,
-+243,
-+128,
-+11,
-+0,
-+247,
-+163,
-+128,
-+58,
-+98,
-+64,
-+255,
-+35,
-+240,
-+0,
-+100,
-+192,
-+243,
-+128,
-+10,
-+64,
-+255,
-+163,
-+128,
-+0,
-+164,
-+192,
-+243,
-+128,
-+10,
-+88,
-+246,
-+160,
-+128,
-+1,
-+36,
-+240,
-+246,
-+160,
-+128,
-+50,
-+34,
-+8,
-+255,
-+227,
-+143,
-+54,
-+242,
-+192,
-+243,
-+60,
-+128,
-+40,
-+255,
-+227,
-+142,
-+54,
-+178,
-+192,
-+243,
-+60,
-+128,
-+0,
-+254,
-+0,
-+240,
-+39,
-+10,
-+0,
-+240,
-+60,
-+128,
-+8,
-+255,
-+163,
-+143,
-+45,
-+226,
-+192,
-+243,
-+60,
-+128,
-+0,
-+254,
-+0,
-+240,
-+44,
-+10,
-+0,
-+240,
-+60,
-+0,
-+0,
-+254,
-+0,
-+240,
-+40,
-+10,
-+0,
-+240,
-+60,
-+128,
-+8,
-+255,
-+163,
-+142,
-+2,
-+162,
-+192,
-+243,
-+60,
-+128,
-+90,
-+0,
-+169,
-+3,
-+14,
-+96,
-+4,
-+31,
-+169,
-+3,
-+30,
-+96,
-+1,
-+31,
-+73,
-+64,
-+52,
-+64,
-+45,
-+64,
-+2,
-+64,
-+10,
-+64,
-+64,
-+198,
-+1,
-+7,
-+8,
-+232,
-+63,
-+0,
-+0,
-+0,
-+6,
-+232,
-+253,
-+255,
-+255,
-+255,
-+0,
-+246,
-+0,
-+0,
-+0,
-+4,
-+215,
-+64,
-+3,
-+96,
-+2,
-+248,
-+0,
-+35,
-+0,
-+0,
-+64,
-+56,
-+0,
-+0,
-+4,
-+248,
-+0,
-+36,
-+0,
-+0,
-+64,
-+56,
-+8,
-+0,
-+0,
-+240,
-+64,
-+0,
-+132,
-+3,
-+30,
-+106,
-+137,
-+24,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+196,
-+243,
-+0,
-+0,
-+128,
-+240,
-+0,
-+0,
-+132,
-+3,
-+128,
-+144,
-+158,
-+0,
-+131,
-+98,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+200,
-+243,
-+0,
-+0,
-+128,
-+144,
-+150,
-+0,
-+131,
-+102,
-+0,
-+158,
-+81,
-+0,
-+2,
-+248,
-+0,
-+35,
-+0,
-+0,
-+64,
-+56,
-+0,
-+0,
-+4,
-+248,
-+0,
-+36,
-+0,
-+0,
-+64,
-+56,
-+8,
-+0,
-+0,
-+240,
-+64,
-+0,
-+132,
-+3,
-+30,
-+106,
-+137,
-+24,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+196,
-+243,
-+0,
-+0,
-+128,
-+240,
-+0,
-+0,
-+132,
-+3,
-+128,
-+144,
-+122,
-+0,
-+131,
-+98,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+200,
-+243,
-+0,
-+0,
-+128,
-+144,
-+114,
-+0,
-+131,
-+102,
-+0,
-+248,
-+64,
-+0,
-+112,
-+0,
-+192,
-+243,
-+211,
-+31,
-+30,
-+106,
-+139,
-+24,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+196,
-+243,
-+0,
-+0,
-+128,
-+248,
-+0,
-+0,
-+112,
-+0,
-+192,
-+243,
-+211,
-+31,
-+128,
-+144,
-+128,
-+0,
-+188,
-+64,
-+67,
-+232,
-+0,
-+2,
-+0,
-+0,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+200,
-+243,
-+0,
-+0,
-+128,
-+144,
-+117,
-+0,
-+195,
-+232,
-+0,
-+2,
-+0,
-+0,
-+12,
-+128,
-+7,
-+192,
-+130,
-+248,
-+0,
-+0,
-+112,
-+192,
-+224,
-+16,
-+195,
-+31,
-+132,
-+248,
-+1,
-+0,
-+112,
-+0,
-+224,
-+16,
-+203,
-+31,
-+3,
-+99,
-+131,
-+71,
-+68,
-+232,
-+32,
-+0,
-+0,
-+0,
-+0,
-+99,
-+2,
-+99,
-+23,
-+102,
-+7,
-+106,
-+127,
-+156,
-+168,
-+255,
-+0,
-+248,
-+64,
-+0,
-+112,
-+0,
-+192,
-+243,
-+211,
-+31,
-+30,
-+106,
-+139,
-+24,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+196,
-+243,
-+0,
-+0,
-+128,
-+248,
-+0,
-+0,
-+112,
-+0,
-+192,
-+243,
-+211,
-+31,
-+128,
-+144,
-+72,
-+0,
-+188,
-+64,
-+67,
-+232,
-+0,
-+2,
-+0,
-+0,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+200,
-+243,
-+0,
-+0,
-+128,
-+144,
-+61,
-+0,
-+195,
-+232,
-+0,
-+2,
-+0,
-+0,
-+12,
-+128,
-+7,
-+192,
-+130,
-+248,
-+0,
-+0,
-+112,
-+192,
-+224,
-+16,
-+195,
-+31,
-+132,
-+248,
-+1,
-+0,
-+112,
-+0,
-+224,
-+16,
-+203,
-+31,
-+25,
-+102,
-+9,
-+106,
-+2,
-+30,
-+41,
-+3,
-+26,
-+87,
-+162,
-+64,
-+64,
-+198,
-+1,
-+23,
-+127,
-+158,
-+75,
-+255,
-+239,
-+3,
-+0,
-+254,
-+128,
-+143,
-+94,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+192,
-+143,
-+95,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+192,
-+142,
-+208,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+128,
-+142,
-+209,
-+0,
-+0,
-+240,
-+12,
-+0,
-+128,
-+144,
-+47,
-+0,
-+8,
-+255,
-+227,
-+23,
-+0,
-+244,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+35,
-+52,
-+0,
-+180,
-+192,
-+51,
-+0,
-+0,
-+111,
-+3,
-+239,
-+3,
-+0,
-+254,
-+128,
-+143,
-+14,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+192,
-+143,
-+15,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+192,
-+142,
-+16,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+128,
-+142,
-+17,
-+0,
-+0,
-+240,
-+12,
-+0,
-+128,
-+144,
-+13,
-+0,
-+8,
-+255,
-+227,
-+3,
-+0,
-+244,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+35,
-+4,
-+0,
-+180,
-+192,
-+51,
-+0,
-+0,
-+111,
-+3,
-+32,
-+246,
-+192,
-+11,
-+1,
-+16,
-+32,
-+246,
-+2,
-+140,
-+47,
-+240,
-+32,
-+247,
-+35,
-+141,
-+63,
-+178,
-+64,
-+254,
-+35,
-+141,
-+2,
-+68,
-+192,
-+243,
-+128,
-+11,
-+32,
-+255,
-+35,
-+240,
-+58,
-+226,
-+192,
-+243,
-+188,
-+10,
-+0,
-+254,
-+0,
-+141,
-+4,
-+4,
-+0,
-+240,
-+128,
-+10,
-+88,
-+246,
-+35,
-+141,
-+3,
-+68,
-+240,
-+246,
-+35,
-+141,
-+48,
-+66,
-+0,
-+247,
-+227,
-+143,
-+52,
-+242,
-+32,
-+247,
-+227,
-+142,
-+52,
-+178,
-+90,
-+0,
-+161,
-+3,
-+6,
-+64,
-+23,
-+64,
-+96,
-+8,
-+70,
-+98,
-+97,
-+8,
-+70,
-+98,
-+98,
-+8,
-+70,
-+98,
-+99,
-+8,
-+70,
-+98,
-+100,
-+8,
-+70,
-+98,
-+101,
-+8,
-+70,
-+98,
-+255,
-+159,
-+94,
-+233,
-+23,
-+102,
-+7,
-+106,
-+112,
-+30,
-+33,
-+3,
++0xa9,  0x03,  0x3e,  0x40,  0x4f,  0x40,  0x03,  0xb0,   // 0000
++0x20,  0x00,  0x0c,  0xf8,  0x38,  0x88,  0x80,  0x03,   // 0008
++0xc0,  0xf8,  0x00,  0x00,  0x40,  0xb0,  0x00,  0x02,   // 0010
++0x0c,  0xf8,  0x38,  0xa8,  0x80,  0x03,  0xc0,  0xf8,   // 0018
++0x00,  0x00,  0x00,  0x60,  0x03,  0xb0,  0x20,  0x00,   // 0020
++0x07,  0xb0,  0x00,  0x02,  0x08,  0xb0,  0x00,  0x04,   // 0028
++0x04,  0xb0,  0x40,  0x00,  0x05,  0xb0,  0x00,  0x02,   // 0030
++0x59,  0xb0,  0xc0,  0xfd,  0x0b,  0x12,  0x5b,  0x7a,   // 0038
++0x5b,  0x7c,  0x4a,  0xc3,  0x50,  0x17,  0x02,  0x6f,   // 0040
++0x02,  0x6a,  0x32,  0x18,  0x0a,  0x6a,  0x16,  0x40,   // 0048
++0x04,  0x18,  0x1a,  0x66,  0x80,  0x90,  0x32,  0x00,   // 0050
++0x0c,  0xf8,  0x38,  0x80,  0x80,  0x03,  0xc0,  0x08,   // 0058
++0x18,  0x00,  0x80,  0x90,  0x51,  0x00,  0x04,  0xff,   // 0060
++0x30,  0xc0,  0x80,  0x03,  0x20,  0x08,  0x10,  0x00,   // 0068
++0x4c,  0xfe,  0x30,  0xc0,  0x09,  0x04,  0x20,  0x08,   // 0070
++0x00,  0x00,  0x04,  0xfc,  0x38,  0x90,  0x80,  0x02,   // 0078
++0xc0,  0x0b,  0x02,  0x00,  0x80,  0x90,  0x40,  0x00,   // 0080
++0x04,  0xff,  0x30,  0xc0,  0x80,  0x03,  0x20,  0x08,   // 0088
++0x14,  0x00,  0x4c,  0xfe,  0x30,  0xc0,  0x06,  0x04,   // 0090
++0x20,  0x08,  0x00,  0x00,  0x8c,  0xf8,  0x2c,  0xe0,   // 0098
++0x80,  0x03,  0x20,  0x30,  0x04,  0x00,  0x80,  0x45,   // 00a0
++0x71,  0x42,  0xf2,  0x8c,  0xd1,  0xc0,  0x59,  0xb0,   // 00a8
++0x40,  0x02,  0x00,  0x9e,  0x6d,  0x00,  0x29,  0x03,   // 00b0
++0x00,  0xf4,  0x38,  0x80,  0x00,  0x0c,  0xb6,  0x40,   // 00b8
++0x8c,  0xf8,  0x20,  0xe0,  0x80,  0x03,  0x00,  0x30,   // 00c0
++0x18,  0x00,  0x15,  0x40,  0x08,  0xf0,  0x38,  0x80,   // 00c8
++0x85,  0x0b,  0x66,  0xb5,  0xe0,  0xff,  0x88,  0xf0,   // 00d0
++0x24,  0xe0,  0x86,  0x03,  0x0c,  0x60,  0x64,  0x08,   // 00d8
++0x46,  0x62,  0x49,  0xc3,  0x50,  0x27,  0x04,  0x6a,   // 00e0
++0x84,  0x6e,  0x07,  0x18,  0x69,  0xa0,  0x04,  0x5f,   // 00e8
++0x1c,  0x8b,  0xf7,  0xc8,  0x45,  0x76,  0x6b,  0x1f,   // 00f0
++0xb6,  0x40,  0x04,  0xb0,  0x40,  0x00,  0x05,  0xb0,   // 00f8
++0x00,  0x02,  0x5a,  0x00,  0x06,  0xb4,  0x10,  0x00,   // 0100
++0xa4,  0xff,  0x24,  0xcc,  0x60,  0x02,  0x00,  0xf8,   // 0108
++0x3e,  0x00,  0x03,  0xff,  0x37,  0xd0,  0x78,  0x03,   // 0110
++0xe0,  0x03,  0xbe,  0x0b,  0x10,  0x8b,  0xf6,  0x5b,   // 0118
++0x00,  0x67,  0x5a,  0x00,  0x06,  0xb4,  0x10,  0x00,   // 0120
++0xa4,  0xff,  0x24,  0xcc,  0xe0,  0x02,  0x00,  0xf8,   // 0128
++0x3e,  0x00,  0x03,  0xff,  0x37,  0xd0,  0x78,  0x03,   // 0130
++0xe0,  0x03,  0xbe,  0x0b,  0x10,  0x8b,  0xf6,  0x5b,   // 0138
++0x00,  0x67,  0x5a,  0x00,  0x00,  0xf4,  0x38,  0x80,   // 0140
++0x00,  0x04,  0x20,  0xb5,  0x00,  0x08,  0x04,  0xb0,   // 0148
++0x20,  0x00,  0x8e,  0xf8,  0x20,  0xe0,  0x80,  0x03,   // 0150
++0xc0,  0x43,  0x00,  0x00,  0x08,  0xf0,  0x38,  0x80,   // 0158
++0x81,  0x03,  0x26,  0xb5,  0xe0,  0xff,  0x88,  0xf0,   // 0160
++0x20,  0xe0,  0x86,  0x03,  0x08,  0x60,  0x64,  0x08,   // 0168
++0x46,  0x62,  0x45,  0xc3,  0x50,  0x27,  0x04,  0x6a,   // 0170
++0xa4,  0x6e,  0x7f,  0x90,  0xbf,  0xff,  0x65,  0xa0,   // 0178
++0x04,  0x07,  0x18,  0x8b,  0xf6,  0xc8,  0x41,  0x76,   // 0180
++0x6a,  0x1f,  0x5a,  0x00,  0xe1,  0x40,  0xf2,  0x40,   // 0188
++0x0f,  0x7b,  0x02,  0x6f,  0x03,  0xb0,  0x80,  0x00,   // 0190
++0x07,  0xb0,  0x00,  0x02,  0xe8,  0x00,  0x08,  0x6d,   // 0198
++0xe8,  0xbf,  0x60,  0x01,  0x03,  0x18,  0x48,  0xb0,   // 01a0
++0x20,  0x10,  0x89,  0x40,  0x1a,  0x40,  0x02,  0x6a,   // 01a8
++0x24,  0x18,  0xa1,  0x40,  0x98,  0x40,  0xf2,  0x4a,   // 01b0
++0x06,  0x1e,  0xff,  0x9f,  0xc5,  0xff,  0x21,  0xb5,   // 01b8
++0x00,  0x08,  0x98,  0x40,  0x04,  0xb0,  0x40,  0x00,   // 01c0
++0x95,  0x60,  0x80,  0x90,  0x18,  0x00,  0x48,  0xb0,   // 01c8
++0x00,  0x04,  0x41,  0x76,  0x80,  0x90,  0x13,  0x00,   // 01d0
++0x04,  0xb0,  0x00,  0x02,  0x65,  0x60,  0x91,  0x40,   // 01d8
++0xa8,  0x40,  0x80,  0x90,  0x0c,  0x00,  0x48,  0xb0,   // 01e0
++0x00,  0x04,  0x41,  0x76,  0x80,  0x90,  0x07,  0x00,   // 01e8
++0x4a,  0xb0,  0x00,  0x08,  0xf2,  0x8c,  0xdf,  0xc0,   // 01f0
++0x29,  0x03,  0xef,  0x03,  0x0c,  0xf8,  0x38,  0x80,   // 01f8
++0x80,  0x03,  0xc0,  0xf8,  0x04,  0x00,  0x0c,  0xf8,   // 0200
++0x38,  0x84,  0xc0,  0x03,  0xc0,  0xf8,  0x04,  0x00,   // 0208
++0x00,  0x60,  0xff,  0x9f,  0x79,  0xff,  0x00,  0xb0,   // 0210
++0x00,  0x04,  0xff,  0x9f,  0x85,  0xff,  0x04,  0xff,   // 0218
++0x30,  0xcc,  0x10,  0x03,  0xe0,  0xfb,  0x3e,  0x00,   // 0220
++0x04,  0xff,  0x33,  0xcc,  0x80,  0x03,  0xe0,  0xfb,   // 0228
++0x10,  0x00,  0x4c,  0xfe,  0x33,  0xcc,  0x80,  0x03,   // 0230
++0xe0,  0xfb,  0x14,  0x00,  0x80,  0x40,  0x06,  0xb0,   // 0238
++0x40,  0x00,  0x8c,  0xf8,  0x2f,  0xe0,  0x80,  0x03,   // 0240
++0xe0,  0x63,  0x00,  0x00,  0x20,  0xf7,  0xf0,  0xcf,   // 0248
++0x10,  0x03,  0x20,  0xf7,  0xb0,  0xcf,  0x11,  0x13,   // 0250
++0x20,  0xf7,  0x70,  0xcf,  0x12,  0x23,  0x20,  0xf7,   // 0258
++0x30,  0xcf,  0x13,  0x33,  0x20,  0xf7,  0xf0,  0xce,   // 0260
++0x14,  0x43,  0x20,  0xf7,  0xb0,  0xce,  0x15,  0x53,   // 0268
++0x20,  0xf7,  0x70,  0xce,  0x16,  0x63,  0x20,  0xf7,   // 0270
++0x30,  0xce,  0x17,  0x73,  0x20,  0xf7,  0xf0,  0xcd,   // 0278
++0x18,  0x83,  0x20,  0xf7,  0xb0,  0xcd,  0x19,  0x93,   // 0280
++0x20,  0xf7,  0x70,  0xcd,  0x1a,  0xa3,  0x20,  0xf7,   // 0288
++0x30,  0xcd,  0x1b,  0xb3,  0x20,  0xf7,  0xf0,  0xcc,   // 0290
++0x1c,  0xc3,  0x20,  0xf7,  0xb0,  0xcc,  0x1d,  0xd3,   // 0298
++0x20,  0xf7,  0x70,  0xcc,  0x1e,  0xe3,  0x20,  0xf7,   // 02a0
++0x30,  0xcc,  0x1f,  0xf3,  0x04,  0xff,  0x33,  0xcc,   // 02a8
++0x80,  0x03,  0xe0,  0xfb,  0x10,  0x00,  0x4c,  0xfe,   // 02b0
++0x33,  0xcc,  0x80,  0x03,  0xe0,  0xfb,  0x14,  0x00,   // 02b8
++0x00,  0xb5,  0x20,  0x00,  0x8c,  0xf8,  0x2f,  0xe0,   // 02c0
++0x80,  0x03,  0xe0,  0x63,  0x00,  0x00,  0x6f,  0x03,   // 02c8
++0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,   // 02d0
++0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,   // 02d8
 +};
 diff --git a/libavcodec/rpi_hevc_transform8.h b/libavcodec/rpi_hevc_transform8.h
 new file mode 100644
-index 0000000000..2ba8583694
+index 0000000000..1128a2c054
 --- /dev/null
 +++ b/libavcodec/rpi_hevc_transform8.h
-@@ -0,0 +1,11602 @@
+@@ -0,0 +1,94 @@
 +static const unsigned char rpi_hevc_transform8 [] = {
-+21,
-+106,
-+0,
-+144,
-+197,
-+17,
-+85,
-+106,
-+0,
-+144,
-+143,
-+22,
-+0,
-+158,
-+42,
-+16,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+0,
-+169,
-+3,
-+62,
-+64,
-+79,
-+64,
-+3,
-+232,
-+32,
-+0,
-+0,
-+0,
-+12,
-+248,
-+0,
-+136,
-+0,
-+0,
-+192,
-+248,
-+0,
-+0,
-+64,
-+232,
-+0,
-+2,
-+0,
-+0,
-+12,
-+248,
-+0,
-+168,
-+0,
-+0,
-+192,
-+248,
-+0,
-+0,
-+0,
-+96,
-+3,
-+232,
-+32,
-+0,
-+0,
-+0,
-+7,
-+232,
-+0,
-+2,
-+0,
-+0,
-+8,
-+232,
-+0,
-+4,
-+0,
-+0,
-+4,
-+232,
-+64,
-+0,
-+0,
-+0,
-+5,
-+232,
-+0,
-+8,
-+0,
-+0,
-+57,
-+239,
-+192,
-+253,
-+255,
-+255,
-+43,
-+239,
-+64,
-+0,
-+0,
-+0,
-+91,
-+122,
-+91,
-+124,
-+74,
-+195,
-+80,
-+23,
-+2,
-+111,
-+2,
-+106,
-+50,
-+24,
-+10,
-+106,
-+22,
-+64,
-+4,
-+24,
-+26,
-+102,
-+128,
-+144,
-+51,
-+0,
-+12,
-+248,
-+0,
-+128,
-+0,
-+0,
-+192,
-+8,
-+24,
-+0,
-+128,
-+144,
-+94,
-+0,
-+4,
-+255,
-+48,
-+192,
-+128,
-+3,
-+32,
-+8,
-+16,
-+0,
-+76,
-+254,
-+48,
-+192,
-+9,
-+4,
-+32,
-+8,
-+0,
-+0,
-+4,
-+254,
-+0,
-+144,
-+128,
-+2,
-+0,
-+8,
-+2,
-+0,
-+128,
-+144,
-+77,
-+0,
-+4,
-+255,
-+48,
-+192,
-+128,
-+3,
-+32,
-+8,
-+20,
-+0,
-+76,
-+254,
-+48,
-+192,
-+4,
-+4,
-+32,
-+8,
-+0,
-+0,
-+140,
-+248,
-+44,
-+0,
-+0,
-+0,
-+32,
-+48,
-+4,
-+0,
-+128,
-+69,
-+113,
-+66,
-+242,
-+140,
-+209,
-+192,
-+57,
-+239,
-+64,
-+2,
-+0,
-+0,
-+0,
-+158,
-+127,
-+0,
-+41,
-+3,
-+0,
-+254,
-+0,
-+128,
-+0,
-+4,
-+0,
-+0,
-+0,
-+0,
-+182,
-+64,
-+140,
-+248,
-+32,
-+0,
-+0,
-+0,
-+0,
-+48,
-+24,
-+0,
-+21,
-+64,
-+8,
-+248,
-+0,
-+128,
-+0,
-+0,
-+192,
-+3,
-+20,
-+0,
-+102,
-+237,
-+224,
-+255,
-+255,
-+255,
-+136,
-+248,
-+32,
-+0,
-+0,
-+0,
-+0,
-+240,
-+24,
-+0,
-+12,
-+96,
-+100,
-+8,
-+70,
-+192,
-+68,
-+55,
-+73,
-+195,
-+80,
-+39,
-+4,
-+106,
-+132,
-+110,
-+9,
-+24,
-+105,
-+160,
-+4,
-+95,
-+28,
-+139,
-+246,
-+200,
-+69,
-+232,
-+32,
-+0,
-+0,
-+0,
-+99,
-+31,
-+182,
-+64,
-+4,
-+232,
-+64,
-+0,
-+0,
-+0,
-+5,
-+232,
-+0,
-+8,
-+0,
-+0,
-+90,
-+0,
-+70,
-+192,
-+80,
-+7,
-+164,
-+255,
-+36,
-+204,
-+96,
-+2,
-+0,
-+248,
-+62,
-+0,
-+3,
-+255,
-+55,
-+208,
-+120,
-+3,
-+224,
-+3,
-+190,
-+11,
-+16,
-+139,
-+246,
-+91,
-+0,
-+103,
-+90,
-+0,
-+70,
-+192,
-+80,
-+7,
-+164,
-+255,
-+36,
-+204,
-+224,
-+2,
-+0,
-+248,
-+62,
-+0,
-+3,
-+255,
-+55,
-+208,
-+120,
-+3,
-+224,
-+3,
-+190,
-+11,
-+16,
-+139,
-+246,
-+91,
-+0,
-+103,
-+90,
-+0,
-+0,
-+246,
-+0,
-+128,
-+0,
-+4,
-+32,
-+237,
-+0,
-+8,
-+0,
-+0,
-+4,
-+232,
-+32,
-+0,
-+0,
-+0,
-+142,
-+248,
-+32,
-+0,
-+0,
-+0,
-+192,
-+67,
-+0,
-+0,
-+8,
-+240,
-+0,
-+128,
-+129,
-+3,
-+38,
-+237,
-+224,
-+255,
-+255,
-+255,
-+136,
-+240,
-+32,
-+0,
-+134,
-+3,
-+8,
-+96,
-+100,
-+8,
-+70,
-+192,
-+68,
-+55,
-+69,
-+195,
-+80,
-+39,
-+4,
-+106,
-+164,
-+110,
-+127,
-+144,
-+185,
-+255,
-+101,
-+160,
-+4,
-+7,
-+24,
-+139,
-+245,
-+200,
-+65,
-+232,
-+32,
-+0,
-+0,
-+0,
-+102,
-+31,
-+90,
-+0,
-+225,
-+64,
-+242,
-+64,
-+79,
-+195,
-+80,
-+127,
-+2,
-+111,
-+3,
-+232,
-+128,
-+0,
-+0,
-+0,
-+7,
-+232,
-+0,
-+2,
-+0,
-+0,
-+232,
-+0,
-+8,
-+109,
-+232,
-+191,
-+0,
-+222,
-+4,
-+24,
-+8,
-+237,
-+32,
-+16,
-+0,
-+0,
-+137,
-+64,
-+26,
-+64,
-+2,
-+106,
-+46,
-+24,
-+161,
-+64,
-+152,
-+64,
-+242,
-+74,
-+7,
-+30,
-+255,
-+159,
-+187,
-+255,
-+33,
-+237,
-+0,
-+8,
-+0,
-+0,
-+152,
-+64,
-+4,
-+232,
-+64,
-+0,
-+0,
-+0,
-+149,
-+96,
-+128,
-+144,
-+32,
-+0,
-+72,
-+232,
-+0,
-+4,
-+0,
-+0,
-+65,
-+232,
-+32,
-+0,
-+0,
-+0,
-+128,
-+144,
-+24,
-+0,
-+4,
-+232,
-+0,
-+8,
-+0,
-+0,
-+69,
-+96,
-+145,
-+64,
-+168,
-+64,
-+128,
-+144,
-+16,
-+0,
-+72,
-+232,
-+0,
-+4,
-+0,
-+0,
-+65,
-+232,
-+32,
-+0,
-+0,
-+0,
-+128,
-+144,
-+8,
-+0,
-+74,
-+232,
-+0,
-+8,
-+0,
-+0,
-+242,
-+140,
-+213,
-+192,
-+41,
-+3,
-+239,
-+3,
-+12,
-+248,
-+0,
-+128,
-+0,
-+0,
-+192,
-+248,
-+4,
-+0,
-+12,
-+248,
-+0,
-+132,
-+64,
-+0,
-+192,
-+248,
-+4,
-+0,
-+0,
-+96,
-+255,
-+159,
-+101,
-+255,
-+0,
-+232,
-+0,
-+4,
-+0,
-+0,
-+255,
-+159,
-+112,
-+255,
-+4,
-+255,
-+48,
-+204,
-+16,
-+3,
-+224,
-+251,
-+62,
-+0,
-+4,
-+255,
-+51,
-+204,
-+128,
-+3,
-+224,
-+251,
-+16,
-+0,
-+76,
-+254,
-+51,
-+204,
-+128,
-+3,
-+224,
-+251,
-+20,
-+0,
-+128,
-+64,
-+6,
-+232,
-+64,
-+0,
-+0,
-+0,
-+140,
-+248,
-+47,
-+0,
-+0,
-+0,
-+224,
-+99,
-+0,
-+0,
-+32,
-+247,
-+240,
-+207,
-+16,
-+3,
-+32,
-+247,
-+176,
-+207,
-+17,
-+19,
-+32,
-+247,
-+112,
-+207,
-+18,
-+35,
-+32,
-+247,
-+48,
-+207,
-+19,
-+51,
-+32,
-+247,
-+240,
-+206,
-+20,
-+67,
-+32,
-+247,
-+176,
-+206,
-+21,
-+83,
-+32,
-+247,
-+112,
-+206,
-+22,
-+99,
-+32,
-+247,
-+48,
-+206,
-+23,
-+115,
-+32,
-+247,
-+240,
-+205,
-+24,
-+131,
-+32,
-+247,
-+176,
-+205,
-+25,
-+147,
-+32,
-+247,
-+112,
-+205,
-+26,
-+163,
-+32,
-+247,
-+48,
-+205,
-+27,
-+179,
-+32,
-+247,
-+240,
-+204,
-+28,
-+195,
-+32,
-+247,
-+176,
-+204,
-+29,
-+211,
-+32,
-+247,
-+112,
-+204,
-+30,
-+227,
-+32,
-+247,
-+48,
-+204,
-+31,
-+243,
-+4,
-+255,
-+51,
-+204,
-+128,
-+3,
-+224,
-+251,
-+16,
-+0,
-+76,
-+254,
-+51,
-+204,
-+128,
-+3,
-+224,
-+251,
-+20,
-+0,
-+0,
-+237,
-+32,
-+0,
-+0,
-+0,
-+140,
-+248,
-+47,
-+0,
-+0,
-+0,
-+224,
-+99,
-+0,
-+0,
-+111,
-+3,
-+4,
-+254,
-+0,
-+128,
-+0,
-+4,
-+0,
-+248,
-+0,
-+0,
-+2,
-+232,
-+32,
-+0,
-+0,
-+0,
-+140,
-+248,
-+32,
-+0,
-+0,
-+0,
-+224,
-+35,
-+0,
-+0,
-+64,
-+232,
-+0,
-+2,
-+0,
-+0,
-+193,
-+232,
-+0,
-+1,
-+0,
-+0,
-+1,
-+106,
-+116,
-+30,
-+90,
-+0,
-+169,
-+3,
-+73,
-+64,
-+52,
-+64,
-+45,
-+64,
-+2,
-+64,
-+10,
-+64,
-+64,
-+198,
-+1,
-+7,
-+8,
-+232,
-+63,
-+0,
-+0,
-+0,
-+6,
-+232,
-+253,
-+255,
-+255,
-+255,
-+0,
-+246,
-+0,
-+0,
-+0,
-+4,
-+215,
-+64,
-+3,
-+96,
-+2,
-+248,
-+0,
-+35,
-+0,
-+0,
-+64,
-+56,
-+0,
-+0,
-+4,
-+248,
-+0,
-+36,
-+0,
-+0,
-+64,
-+56,
-+8,
-+0,
-+0,
-+240,
-+64,
-+0,
-+132,
-+3,
-+128,
-+240,
-+0,
-+0,
-+132,
-+3,
-+128,
-+144,
-+137,
-+0,
-+131,
-+98,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+200,
-+243,
-+0,
-+0,
-+128,
-+144,
-+129,
-+0,
-+131,
-+102,
-+0,
-+158,
-+67,
-+0,
-+2,
-+248,
-+0,
-+35,
-+0,
-+0,
-+64,
-+56,
-+0,
-+0,
-+4,
-+248,
-+0,
-+36,
-+0,
-+0,
-+64,
-+56,
-+8,
-+0,
-+0,
-+240,
-+64,
-+0,
-+132,
-+3,
-+128,
-+240,
-+0,
-+0,
-+132,
-+3,
-+128,
-+144,
-+108,
-+0,
-+131,
-+98,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+200,
-+243,
-+0,
-+0,
-+128,
-+144,
-+100,
-+0,
-+131,
-+102,
-+0,
-+248,
-+64,
-+0,
-+112,
-+0,
-+192,
-+243,
-+211,
-+31,
-+128,
-+248,
-+0,
-+0,
-+112,
-+0,
-+192,
-+243,
-+211,
-+31,
-+128,
-+144,
-+161,
-+0,
-+188,
-+64,
-+67,
-+232,
-+0,
-+2,
-+0,
-+0,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+200,
-+243,
-+0,
-+0,
-+128,
-+144,
-+150,
-+0,
-+195,
-+232,
-+0,
-+2,
-+0,
-+0,
-+12,
-+128,
-+7,
-+192,
-+130,
-+248,
-+0,
-+0,
-+112,
-+192,
-+224,
-+16,
-+195,
-+31,
-+132,
-+248,
-+1,
-+0,
-+112,
-+0,
-+224,
-+16,
-+203,
-+31,
-+3,
-+99,
-+131,
-+71,
-+68,
-+232,
-+32,
-+0,
-+0,
-+0,
-+0,
-+99,
-+2,
-+99,
-+23,
-+102,
-+7,
-+106,
-+127,
-+156,
-+182,
-+255,
-+0,
-+248,
-+64,
-+0,
-+112,
-+0,
-+192,
-+243,
-+211,
-+31,
-+128,
-+248,
-+0,
-+0,
-+112,
-+0,
-+192,
-+243,
-+211,
-+31,
-+128,
-+144,
-+112,
-+0,
-+188,
-+64,
-+67,
-+232,
-+0,
-+2,
-+0,
-+0,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+200,
-+243,
-+0,
-+0,
-+128,
-+144,
-+101,
-+0,
-+195,
-+232,
-+0,
-+2,
-+0,
-+0,
-+12,
-+128,
-+7,
-+192,
-+130,
-+248,
-+0,
-+0,
-+112,
-+192,
-+224,
-+16,
-+195,
-+31,
-+132,
-+248,
-+1,
-+0,
-+112,
-+0,
-+224,
-+16,
-+203,
-+31,
-+25,
-+102,
-+9,
-+106,
-+2,
-+30,
-+41,
-+3,
-+26,
-+87,
-+162,
-+64,
-+64,
-+198,
-+1,
-+23,
-+127,
-+158,
-+103,
-+255,
-+239,
-+3,
-+0,
-+254,
-+0,
-+143,
-+92,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+64,
-+143,
-+93,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+128,
-+143,
-+94,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+192,
-+143,
-+95,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+192,
-+142,
-+208,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+128,
-+142,
-+209,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+64,
-+142,
-+210,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+0,
-+142,
-+211,
-+0,
-+0,
-+240,
-+12,
-+0,
-+128,
-+144,
-+107,
-+0,
-+8,
-+255,
-+99,
-+23,
-+0,
-+212,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+163,
-+23,
-+0,
-+228,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+227,
-+23,
-+0,
-+244,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+35,
-+52,
-+0,
-+180,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+99,
-+52,
-+0,
-+164,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+163,
-+52,
-+0,
-+148,
-+192,
-+51,
-+0,
-+0,
-+111,
-+3,
-+239,
-+3,
-+0,
-+254,
-+0,
-+143,
-+12,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+64,
-+143,
-+13,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+128,
-+143,
-+14,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+192,
-+143,
-+15,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+192,
-+142,
-+16,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+128,
-+142,
-+17,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+64,
-+142,
-+18,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+0,
-+142,
-+19,
-+0,
-+0,
-+240,
-+12,
-+0,
-+128,
-+144,
-+33,
-+0,
-+8,
-+255,
-+99,
-+3,
-+0,
-+212,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+163,
-+3,
-+0,
-+228,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+227,
-+3,
-+0,
-+244,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+35,
-+4,
-+0,
-+180,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+99,
-+4,
-+0,
-+164,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+163,
-+4,
-+0,
-+148,
-+192,
-+51,
-+0,
-+0,
-+111,
-+3,
-+32,
-+246,
-+192,
-+11,
-+1,
-+16,
-+32,
-+246,
-+2,
-+137,
-+47,
-+240,
-+40,
-+246,
-+2,
-+140,
-+47,
-+240,
-+128,
-+245,
-+99,
-+140,
-+5,
-+4,
-+0,
-+247,
-+99,
-+140,
-+1,
-+20,
-+88,
-+246,
-+99,
-+140,
-+1,
-+20,
-+0,
-+247,
-+35,
-+136,
-+62,
-+226,
-+32,
-+247,
-+35,
-+136,
-+32,
-+210,
-+0,
-+247,
-+34,
-+136,
-+63,
-+2,
-+208,
-+246,
-+34,
-+136,
-+0,
-+4,
-+0,
-+247,
-+99,
-+136,
-+58,
-+162,
-+32,
-+247,
-+99,
-+136,
-+33,
-+146,
-+0,
-+247,
-+98,
-+136,
-+59,
-+18,
-+208,
-+246,
-+98,
-+136,
-+0,
-+20,
-+0,
-+247,
-+162,
-+136,
-+33,
-+2,
-+88,
-+246,
-+98,
-+137,
-+2,
-+68,
-+88,
-+246,
-+162,
-+137,
-+3,
-+68,
-+208,
-+254,
-+227,
-+136,
-+60,
-+242,
-+192,
-+243,
-+188,
-+11,
-+208,
-+254,
-+227,
-+136,
-+56,
-+178,
-+192,
-+243,
-+188,
-+10,
-+32,
-+255,
-+226,
-+136,
-+38,
-+58,
-+192,
-+243,
-+60,
-+0,
-+208,
-+254,
-+227,
-+136,
-+59,
-+242,
-+192,
-+243,
-+60,
-+128,
-+32,
-+255,
-+226,
-+136,
-+49,
-+58,
-+192,
-+243,
-+60,
-+128,
-+0,
-+255,
-+226,
-+136,
-+34,
-+34,
-+192,
-+243,
-+60,
-+128,
-+32,
-+255,
-+226,
-+136,
-+37,
-+58,
-+192,
-+243,
-+60,
-+128,
-+0,
-+254,
-+192,
-+136,
-+1,
-+4,
-+0,
-+240,
-+0,
-+160,
-+0,
-+255,
-+194,
-+8,
-+0,
-+52,
-+195,
-+243,
-+0,
-+128,
-+0,
-+255,
-+202,
-+40,
-+0,
-+52,
-+195,
-+243,
-+0,
-+128,
-+0,
-+254,
-+0,
-+240,
-+35,
-+10,
-+0,
-+240,
-+60,
-+0,
-+0,
-+254,
-+192,
-+136,
-+1,
-+4,
-+0,
-+240,
-+0,
-+160,
-+0,
-+255,
-+226,
-+140,
-+34,
-+34,
-+195,
-+243,
-+60,
-+0,
-+32,
-+255,
-+227,
-+140,
-+36,
-+58,
-+192,
-+243,
-+60,
-+0,
-+0,
-+254,
-+192,
-+136,
-+0,
-+4,
-+0,
-+240,
-+0,
-+160,
-+16,
-+246,
-+226,
-+136,
-+35,
-+50,
-+16,
-+246,
-+226,
-+136,
-+35,
-+50,
-+32,
-+246,
-+226,
-+136,
-+35,
-+50,
-+32,
-+254,
-+226,
-+136,
-+35,
-+58,
-+192,
-+243,
-+60,
-+0,
-+11,
-+96,
-+0,
-+254,
-+0,
-+240,
-+1,
-+4,
-+0,
-+240,
-+64,
-+115,
-+5,
-+106,
-+0,
-+144,
-+173,
-+1,
-+27,
-+96,
-+0,
-+254,
-+0,
-+240,
-+1,
-+4,
-+0,
-+240,
-+64,
-+147,
-+5,
-+106,
-+0,
-+144,
-+227,
-+0,
-+64,
-+246,
-+163,
-+140,
-+1,
-+4,
-+0,
-+246,
-+192,
-+175,
-+63,
-+2,
-+0,
-+246,
-+192,
-+174,
-+59,
-+2,
-+0,
-+246,
-+128,
-+175,
-+62,
-+2,
-+0,
-+246,
-+128,
-+174,
-+58,
-+2,
-+0,
-+246,
-+64,
-+175,
-+61,
-+2,
-+0,
-+246,
-+64,
-+174,
-+57,
-+2,
-+0,
-+255,
-+43,
-+240,
-+4,
-+212,
-+192,
-+243,
-+128,
-+11,
-+64,
-+254,
-+43,
-+240,
-+1,
-+228,
-+192,
-+243,
-+128,
-+10,
-+64,
-+254,
-+43,
-+240,
-+1,
-+244,
-+192,
-+243,
-+128,
-+10,
-+64,
-+254,
-+43,
-+240,
-+1,
-+180,
-+192,
-+243,
-+128,
-+10,
-+64,
-+254,
-+43,
-+141,
-+0,
-+164,
-+192,
-+243,
-+128,
-+10,
-+88,
-+246,
-+35,
-+141,
-+3,
-+68,
-+32,
-+247,
-+35,
-+141,
-+191,
-+66,
-+240,
-+246,
-+35,
-+141,
-+50,
-+66,
-+0,
-+255,
-+235,
-+143,
-+52,
-+242,
-+192,
-+243,
-+60,
-+128,
-+0,
-+255,
-+43,
-+240,
-+2,
-+212,
-+192,
-+243,
-+128,
-+11,
-+0,
-+255,
-+43,
-+240,
-+191,
-+226,
-+192,
-+243,
-+188,
-+10,
-+64,
-+254,
-+43,
-+141,
-+0,
-+180,
-+192,
-+243,
-+128,
-+10,
-+88,
-+246,
-+35,
-+141,
-+2,
-+68,
-+32,
-+247,
-+35,
-+141,
-+190,
-+66,
-+240,
-+246,
-+35,
-+141,
-+50,
-+66,
-+0,
-+255,
-+171,
-+143,
-+52,
-+226,
-+192,
-+243,
-+60,
-+128,
-+0,
-+255,
-+43,
-+240,
-+4,
-+180,
-+192,
-+243,
-+128,
-+11,
-+0,
-+255,
-+43,
-+240,
-+191,
-+226,
-+192,
-+243,
-+188,
-+10,
-+128,
-+253,
-+43,
-+240,
-+3,
-+212,
-+192,
-+243,
-+128,
-+10,
-+64,
-+254,
-+35,
-+141,
-+1,
-+196,
-+192,
-+243,
-+128,
-+10,
-+88,
-+246,
-+35,
-+141,
-+3,
-+68,
-+32,
-+247,
-+35,
-+141,
-+189,
-+66,
-+240,
-+246,
-+35,
-+141,
-+50,
-+66,
-+0,
-+255,
-+107,
-+143,
-+52,
-+210,
-+192,
-+243,
-+60,
-+128,
-+0,
-+255,
-+43,
-+240,
-+4,
-+148,
-+192,
-+243,
-+128,
-+11,
-+64,
-+254,
-+43,
-+240,
-+1,
-+164,
-+192,
-+243,
-+128,
-+10,
-+64,
-+254,
-+43,
-+240,
-+1,
-+180,
-+192,
-+243,
-+128,
-+10,
-+64,
-+254,
-+43,
-+240,
-+1,
-+244,
-+192,
-+243,
-+128,
-+10,
-+64,
-+254,
-+43,
-+141,
-+0,
-+228,
-+192,
-+243,
-+128,
-+10,
-+88,
-+246,
-+35,
-+141,
-+3,
-+68,
-+32,
-+247,
-+35,
-+141,
-+187,
-+66,
-+240,
-+246,
-+35,
-+141,
-+50,
-+66,
-+0,
-+255,
-+235,
-+142,
-+52,
-+178,
-+192,
-+243,
-+60,
-+128,
-+0,
-+255,
-+43,
-+240,
-+2,
-+148,
-+192,
-+243,
-+128,
-+11,
-+0,
-+255,
-+43,
-+240,
-+187,
-+162,
-+192,
-+243,
-+188,
-+10,
-+64,
-+254,
-+43,
-+141,
-+0,
-+244,
-+192,
-+243,
-+128,
-+10,
-+88,
-+246,
-+35,
-+141,
-+2,
-+68,
-+32,
-+247,
-+35,
-+141,
-+186,
-+66,
-+240,
-+246,
-+35,
-+141,
-+50,
-+66,
-+0,
-+255,
-+171,
-+142,
-+52,
-+162,
-+192,
-+243,
-+60,
-+128,
-+0,
-+255,
-+43,
-+240,
-+4,
-+244,
-+192,
-+243,
-+128,
-+11,
-+0,
-+255,
-+43,
-+240,
-+187,
-+162,
-+192,
-+243,
-+188,
-+10,
-+128,
-+253,
-+43,
-+240,
-+3,
-+148,
-+192,
-+243,
-+128,
-+10,
-+64,
-+254,
-+35,
-+141,
-+1,
-+132,
-+192,
-+243,
-+128,
-+10,
-+88,
-+246,
-+35,
-+141,
-+3,
-+68,
-+32,
-+247,
-+35,
-+141,
-+185,
-+66,
-+240,
-+246,
-+35,
-+141,
-+50,
-+66,
-+0,
-+255,
-+107,
-+142,
-+52,
-+146,
-+192,
-+243,
-+60,
-+128,
-+64,
-+255,
-+98,
-+141,
-+0,
-+52,
-+192,
-+243,
-+0,
-+0,
-+0,
-+254,
-+0,
-+240,
-+53,
-+10,
-+0,
-+240,
-+60,
-+0,
-+0,
-+254,
-+0,
-+240,
-+1,
-+4,
-+0,
-+240,
-+64,
-+147,
-+5,
-+106,
-+0,
-+144,
-+177,
-+0,
-+88,
-+246,
-+163,
-+140,
-+1,
-+4,
-+128,
-+245,
-+99,
-+141,
-+10,
-+4,
-+88,
-+246,
-+162,
-+138,
-+1,
-+68,
-+0,
-+247,
-+162,
-+138,
-+36,
-+162,
-+88,
-+254,
-+162,
-+138,
-+3,
-+164,
-+192,
-+243,
-+128,
-+11,
-+0,
-+255,
-+226,
-+137,
-+32,
-+2,
-+195,
-+243,
-+60,
-+0,
-+32,
-+247,
-+226,
-+137,
-+42,
-+114,
-+0,
-+255,
-+34,
-+138,
-+33,
-+18,
-+195,
-+243,
-+60,
-+0,
-+32,
-+247,
-+34,
-+138,
-+42,
-+130,
-+16,
-+246,
-+98,
-+138,
-+40,
-+114,
-+16,
-+246,
-+98,
-+138,
-+41,
-+146,
-+32,
-+246,
-+98,
-+138,
-+41,
-+146,
-+32,
-+246,
-+226,
-+137,
-+41,
-+146,
-+40,
-+246,
-+34,
-+138,
-+41,
-+146,
-+32,
-+247,
-+163,
-+141,
-+63,
-+178,
-+32,
-+247,
-+227,
-+141,
-+62,
-+162,
-+0,
-+254,
-+0,
-+240,
-+8,
-+4,
-+0,
-+240,
-+128,
-+11,
-+128,
-+253,
-+35,
-+240,
-+9,
-+100,
-+192,
-+243,
-+128,
-+10,
-+128,
-+253,
-+163,
-+141,
-+128,
-+115,
-+192,
-+243,
-+152,
-+10,
-+88,
-+246,
-+163,
-+141,
-+4,
-+100,
-+208,
-+246,
-+35,
-+139,
-+0,
-+100,
-+32,
-+255,
-+34,
-+139,
-+53,
-+202,
-+192,
-+243,
-+60,
-+128,
-+0,
-+254,
-+0,
-+139,
-+0,
-+4,
-+0,
-+240,
-+0,
-+160,
-+240,
-+246,
-+163,
-+141,
-+48,
-+98,
-+0,
-+247,
-+99,
-+139,
-+63,
-+210,
-+0,
-+247,
-+98,
-+139,
-+1,
-+212,
-+88,
-+254,
-+98,
-+139,
-+1,
-+212,
-+192,
-+243,
-+128,
-+11,
-+32,
-+255,
-+99,
-+139,
-+62,
-+98,
-+192,
-+243,
-+188,
-+10,
-+88,
-+246,
-+98,
-+139,
-+1,
-+212,
-+240,
-+246,
-+98,
-+139,
-+50,
-+210,
-+0,
-+247,
-+163,
-+128,
-+59,
-+146,
-+0,
-+247,
-+160,
-+128,
-+1,
-+36,
-+88,
-+254,
-+160,
-+128,
-+1,
-+36,
-+192,
-+243,
-+128,
-+11,
-+0,
-+247,
-+163,
-+128,
-+58,
-+98,
-+64,
-+255,
-+35,
-+240,
-+0,
-+100,
-+192,
-+243,
-+128,
-+10,
-+64,
-+255,
-+163,
-+128,
-+0,
-+164,
-+192,
-+243,
-+128,
-+10,
-+88,
-+246,
-+160,
-+128,
-+1,
-+36,
-+240,
-+246,
-+160,
-+128,
-+50,
-+34,
-+8,
-+255,
-+227,
-+143,
-+54,
-+242,
-+192,
-+243,
-+60,
-+128,
-+40,
-+255,
-+227,
-+142,
-+54,
-+178,
-+192,
-+243,
-+60,
-+128,
-+0,
-+254,
-+0,
-+240,
-+39,
-+10,
-+0,
-+240,
-+60,
-+128,
-+8,
-+255,
-+163,
-+143,
-+45,
-+226,
-+192,
-+243,
-+60,
-+128,
-+0,
-+254,
-+0,
-+240,
-+44,
-+10,
-+0,
-+240,
-+60,
-+0,
-+0,
-+254,
-+0,
-+240,
-+40,
-+10,
-+0,
-+240,
-+60,
-+128,
-+8,
-+255,
-+163,
-+142,
-+2,
-+162,
-+192,
-+243,
-+60,
-+128,
-+90,
-+0,
-+169,
-+3,
-+14,
-+96,
-+4,
-+31,
-+169,
-+3,
-+30,
-+96,
-+1,
-+31,
-+73,
-+64,
-+52,
-+64,
-+45,
-+64,
-+2,
-+64,
-+10,
-+64,
-+64,
-+198,
-+1,
-+7,
-+8,
-+232,
-+63,
-+0,
-+0,
-+0,
-+6,
-+232,
-+253,
-+255,
-+255,
-+255,
-+0,
-+246,
-+0,
-+0,
-+0,
-+4,
-+215,
-+64,
-+3,
-+96,
-+2,
-+248,
-+0,
-+35,
-+0,
-+0,
-+64,
-+56,
-+0,
-+0,
-+4,
-+248,
-+0,
-+36,
-+0,
-+0,
-+64,
-+56,
-+8,
-+0,
-+0,
-+240,
-+64,
-+0,
-+132,
-+3,
-+30,
-+106,
-+137,
-+24,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+196,
-+243,
-+0,
-+0,
-+128,
-+240,
-+0,
-+0,
-+132,
-+3,
-+128,
-+144,
-+158,
-+0,
-+131,
-+98,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+200,
-+243,
-+0,
-+0,
-+128,
-+144,
-+150,
-+0,
-+131,
-+102,
-+0,
-+158,
-+81,
-+0,
-+2,
-+248,
-+0,
-+35,
-+0,
-+0,
-+64,
-+56,
-+0,
-+0,
-+4,
-+248,
-+0,
-+36,
-+0,
-+0,
-+64,
-+56,
-+8,
-+0,
-+0,
-+240,
-+64,
-+0,
-+132,
-+3,
-+30,
-+106,
-+137,
-+24,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+196,
-+243,
-+0,
-+0,
-+128,
-+240,
-+0,
-+0,
-+132,
-+3,
-+128,
-+144,
-+122,
-+0,
-+131,
-+98,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+200,
-+243,
-+0,
-+0,
-+128,
-+144,
-+114,
-+0,
-+131,
-+102,
-+0,
-+248,
-+64,
-+0,
-+112,
-+0,
-+192,
-+243,
-+211,
-+31,
-+30,
-+106,
-+139,
-+24,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+196,
-+243,
-+0,
-+0,
-+128,
-+248,
-+0,
-+0,
-+112,
-+0,
-+192,
-+243,
-+211,
-+31,
-+128,
-+144,
-+128,
-+0,
-+188,
-+64,
-+67,
-+232,
-+0,
-+2,
-+0,
-+0,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+200,
-+243,
-+0,
-+0,
-+128,
-+144,
-+117,
-+0,
-+195,
-+232,
-+0,
-+2,
-+0,
-+0,
-+12,
-+128,
-+7,
-+192,
-+130,
-+248,
-+0,
-+0,
-+112,
-+192,
-+224,
-+16,
-+195,
-+31,
-+132,
-+248,
-+1,
-+0,
-+112,
-+0,
-+224,
-+16,
-+203,
-+31,
-+3,
-+99,
-+131,
-+71,
-+68,
-+232,
-+32,
-+0,
-+0,
-+0,
-+0,
-+99,
-+2,
-+99,
-+23,
-+102,
-+7,
-+106,
-+127,
-+156,
-+168,
-+255,
-+0,
-+248,
-+64,
-+0,
-+112,
-+0,
-+192,
-+243,
-+211,
-+31,
-+30,
-+106,
-+139,
-+24,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+196,
-+243,
-+0,
-+0,
-+128,
-+248,
-+0,
-+0,
-+112,
-+0,
-+192,
-+243,
-+211,
-+31,
-+128,
-+144,
-+72,
-+0,
-+188,
-+64,
-+67,
-+232,
-+0,
-+2,
-+0,
-+0,
-+0,
-+255,
-+64,
-+0,
-+0,
-+20,
-+200,
-+243,
-+0,
-+0,
-+128,
-+144,
-+61,
-+0,
-+195,
-+232,
-+0,
-+2,
-+0,
-+0,
-+12,
-+128,
-+7,
-+192,
-+130,
-+248,
-+0,
-+0,
-+112,
-+192,
-+224,
-+16,
-+195,
-+31,
-+132,
-+248,
-+1,
-+0,
-+112,
-+0,
-+224,
-+16,
-+203,
-+31,
-+25,
-+102,
-+9,
-+106,
-+2,
-+30,
-+41,
-+3,
-+26,
-+87,
-+162,
-+64,
-+64,
-+198,
-+1,
-+23,
-+127,
-+158,
-+75,
-+255,
-+239,
-+3,
-+0,
-+254,
-+128,
-+143,
-+94,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+192,
-+143,
-+95,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+192,
-+142,
-+208,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+128,
-+142,
-+209,
-+0,
-+0,
-+240,
-+12,
-+0,
-+128,
-+144,
-+47,
-+0,
-+8,
-+255,
-+227,
-+23,
-+0,
-+244,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+35,
-+52,
-+0,
-+180,
-+192,
-+51,
-+0,
-+0,
-+111,
-+3,
-+239,
-+3,
-+0,
-+254,
-+128,
-+143,
-+14,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+192,
-+143,
-+15,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+192,
-+142,
-+16,
-+0,
-+0,
-+240,
-+12,
-+0,
-+0,
-+254,
-+128,
-+142,
-+17,
-+0,
-+0,
-+240,
-+12,
-+0,
-+128,
-+144,
-+13,
-+0,
-+8,
-+255,
-+227,
-+3,
-+0,
-+244,
-+192,
-+51,
-+0,
-+0,
-+8,
-+255,
-+35,
-+4,
-+0,
-+180,
-+192,
-+51,
-+0,
-+0,
-+111,
-+3,
-+32,
-+246,
-+192,
-+11,
-+1,
-+16,
-+32,
-+246,
-+2,
-+140,
-+47,
-+240,
-+32,
-+247,
-+35,
-+141,
-+63,
-+178,
-+64,
-+254,
-+35,
-+141,
-+2,
-+68,
-+192,
-+243,
-+128,
-+11,
-+32,
-+255,
-+35,
-+240,
-+58,
-+226,
-+192,
-+243,
-+188,
-+10,
-+0,
-+254,
-+0,
-+141,
-+4,
-+4,
-+0,
-+240,
-+128,
-+10,
-+88,
-+246,
-+35,
-+141,
-+3,
-+68,
-+240,
-+246,
-+35,
-+141,
-+48,
-+66,
-+0,
-+247,
-+227,
-+143,
-+52,
-+242,
-+32,
-+247,
-+227,
-+142,
-+52,
-+178,
-+90,
-+0,
-+161,
-+3,
-+6,
-+64,
-+23,
-+64,
-+96,
-+8,
-+70,
-+98,
-+97,
-+8,
-+70,
-+98,
-+98,
-+8,
-+70,
-+98,
-+99,
-+8,
-+70,
-+98,
-+100,
-+8,
-+70,
-+98,
-+101,
-+8,
-+70,
-+98,
-+255,
-+159,
-+94,
-+233,
-+23,
-+102,
-+7,
-+106,
-+112,
-+30,
-+33,
-+3,
++0xa9,  0x03,  0x3e,  0x40,  0x4f,  0x40,  0x03,  0xb0,   // 0000
++0x20,  0x00,  0x0c,  0xf8,  0x38,  0x88,  0x80,  0x03,   // 0008
++0xc0,  0xf8,  0x00,  0x00,  0x40,  0xb0,  0x00,  0x02,   // 0010
++0x0c,  0xf8,  0x38,  0xa8,  0x80,  0x03,  0xc0,  0xf8,   // 0018
++0x00,  0x00,  0x00,  0x60,  0x03,  0xb0,  0x20,  0x00,   // 0020
++0x07,  0xb0,  0x00,  0x02,  0x08,  0xb0,  0x00,  0x04,   // 0028
++0x04,  0xb0,  0x40,  0x00,  0x05,  0xb0,  0x00,  0x08,   // 0030
++0x59,  0xb0,  0xc0,  0xfd,  0x0b,  0x12,  0x5b,  0x7a,   // 0038
++0x5b,  0x7c,  0x4a,  0xc3,  0x50,  0x17,  0x02,  0x6f,   // 0040
++0x02,  0x6a,  0x32,  0x18,  0x0a,  0x6a,  0x16,  0x40,   // 0048
++0x04,  0x18,  0x1a,  0x66,  0x80,  0x90,  0x32,  0x00,   // 0050
++0x0c,  0xf8,  0x38,  0x80,  0x80,  0x03,  0xc0,  0x08,   // 0058
++0x18,  0x00,  0x80,  0x90,  0x51,  0x00,  0x04,  0xff,   // 0060
++0x30,  0xc0,  0x80,  0x03,  0x20,  0x08,  0x10,  0x00,   // 0068
++0x4c,  0xfe,  0x30,  0xc0,  0x09,  0x04,  0x20,  0x08,   // 0070
++0x00,  0x00,  0x04,  0xfc,  0x38,  0x90,  0x80,  0x02,   // 0078
++0xc0,  0x0b,  0x02,  0x00,  0x80,  0x90,  0x40,  0x00,   // 0080
++0x04,  0xff,  0x30,  0xc0,  0x80,  0x03,  0x20,  0x08,   // 0088
++0x14,  0x00,  0x4c,  0xfe,  0x30,  0xc0,  0x04,  0x04,   // 0090
++0x20,  0x08,  0x00,  0x00,  0x8c,  0xf8,  0x2c,  0xe0,   // 0098
++0x80,  0x03,  0x20,  0x30,  0x04,  0x00,  0x80,  0x45,   // 00a0
++0x71,  0x42,  0xf2,  0x8c,  0xd1,  0xc0,  0x59,  0xb0,   // 00a8
++0x40,  0x02,  0x00,  0x9e,  0x6d,  0x00,  0x29,  0x03,   // 00b0
++0x00,  0xf4,  0x38,  0x80,  0x00,  0x0c,  0xb6,  0x40,   // 00b8
++0x8c,  0xf8,  0x20,  0xe0,  0x80,  0x03,  0x00,  0x30,   // 00c0
++0x18,  0x00,  0x15,  0x40,  0x08,  0xf0,  0x38,  0x80,   // 00c8
++0x85,  0x0b,  0x66,  0xb5,  0xe0,  0xff,  0x88,  0xf0,   // 00d0
++0x24,  0xe0,  0x86,  0x03,  0x0c,  0x60,  0x64,  0x08,   // 00d8
++0x46,  0x62,  0x49,  0xc3,  0x50,  0x27,  0x04,  0x6a,   // 00e0
++0x84,  0x6e,  0x07,  0x18,  0x69,  0xa0,  0x04,  0x5f,   // 00e8
++0x1c,  0x8b,  0xf7,  0xc8,  0x45,  0x76,  0x6b,  0x1f,   // 00f0
++0xb6,  0x40,  0x04,  0xb0,  0x40,  0x00,  0x05,  0xb0,   // 00f8
++0x00,  0x08,  0x5a,  0x00,  0x06,  0xb4,  0x10,  0x00,   // 0100
++0xa4,  0xff,  0x24,  0xcc,  0x60,  0x02,  0x00,  0xf8,   // 0108
++0x3e,  0x00,  0x03,  0xff,  0x37,  0xd0,  0x78,  0x03,   // 0110
++0xe0,  0x03,  0xbe,  0x0b,  0x10,  0x8b,  0xf6,  0x5b,   // 0118
++0x00,  0x67,  0x5a,  0x00,  0x06,  0xb4,  0x10,  0x00,   // 0120
++0xa4,  0xff,  0x24,  0xcc,  0xe0,  0x02,  0x00,  0xf8,   // 0128
++0x3e,  0x00,  0x03,  0xff,  0x37,  0xd0,  0x78,  0x03,   // 0130
++0xe0,  0x03,  0xbe,  0x0b,  0x10,  0x8b,  0xf6,  0x5b,   // 0138
++0x00,  0x67,  0x5a,  0x00,  0x00,  0xf4,  0x38,  0x80,   // 0140
++0x00,  0x04,  0x20,  0xb5,  0x00,  0x08,  0x04,  0xb0,   // 0148
++0x20,  0x00,  0x8e,  0xf8,  0x20,  0xe0,  0x80,  0x03,   // 0150
++0xc0,  0x43,  0x00,  0x00,  0x08,  0xf0,  0x38,  0x80,   // 0158
++0x81,  0x03,  0x26,  0xb5,  0xe0,  0xff,  0x88,  0xf0,   // 0160
++0x20,  0xe0,  0x86,  0x03,  0x08,  0x60,  0x64,  0x08,   // 0168
++0x46,  0x62,  0x45,  0xc3,  0x50,  0x27,  0x04,  0x6a,   // 0170
++0xa4,  0x6e,  0x7f,  0x90,  0xbf,  0xff,  0x65,  0xa0,   // 0178
++0x04,  0x07,  0x18,  0x8b,  0xf6,  0xc8,  0x41,  0x76,   // 0180
++0x6a,  0x1f,  0x5a,  0x00,  0xe1,  0x40,  0xf2,  0x40,   // 0188
++0x0f,  0x7b,  0x02,  0x6f,  0x03,  0xb0,  0x80,  0x00,   // 0190
++0x07,  0xb0,  0x00,  0x02,  0xe8,  0x00,  0x08,  0x6d,   // 0198
++0xe8,  0xbf,  0x60,  0x01,  0x03,  0x18,  0x48,  0xb0,   // 01a0
++0x20,  0x10,  0x89,  0x40,  0x1a,  0x40,  0x02,  0x6a,   // 01a8
++0x24,  0x18,  0xa1,  0x40,  0x98,  0x40,  0xf2,  0x4a,   // 01b0
++0x06,  0x1e,  0xff,  0x9f,  0xc5,  0xff,  0x21,  0xb5,   // 01b8
++0x00,  0x08,  0x98,  0x40,  0x04,  0xb0,  0x40,  0x00,   // 01c0
++0x95,  0x60,  0x80,  0x90,  0x18,  0x00,  0x48,  0xb0,   // 01c8
++0x00,  0x04,  0x41,  0x76,  0x80,  0x90,  0x13,  0x00,   // 01d0
++0x04,  0xb0,  0x00,  0x08,  0x45,  0x60,  0x91,  0x40,   // 01d8
++0xa8,  0x40,  0x80,  0x90,  0x0c,  0x00,  0x48,  0xb0,   // 01e0
++0x00,  0x04,  0x41,  0x76,  0x80,  0x90,  0x07,  0x00,   // 01e8
++0x4a,  0xb0,  0x00,  0x08,  0xf2,  0x8c,  0xdf,  0xc0,   // 01f0
++0x29,  0x03,  0xef,  0x03,  0x0c,  0xf8,  0x38,  0x80,   // 01f8
++0x80,  0x03,  0xc0,  0xf8,  0x04,  0x00,  0x0c,  0xf8,   // 0200
++0x38,  0x84,  0xc0,  0x03,  0xc0,  0xf8,  0x04,  0x00,   // 0208
++0x00,  0x60,  0xff,  0x9f,  0x79,  0xff,  0x00,  0xb0,   // 0210
++0x00,  0x04,  0xff,  0x9f,  0x85,  0xff,  0x04,  0xff,   // 0218
++0x30,  0xcc,  0x10,  0x03,  0xe0,  0xfb,  0x3e,  0x00,   // 0220
++0x04,  0xff,  0x33,  0xcc,  0x80,  0x03,  0xe0,  0xfb,   // 0228
++0x10,  0x00,  0x4c,  0xfe,  0x33,  0xcc,  0x80,  0x03,   // 0230
++0xe0,  0xfb,  0x14,  0x00,  0x80,  0x40,  0x06,  0xb0,   // 0238
++0x40,  0x00,  0x8c,  0xf8,  0x2f,  0xe0,  0x80,  0x03,   // 0240
++0xe0,  0x63,  0x00,  0x00,  0x20,  0xf7,  0xf0,  0xcf,   // 0248
++0x10,  0x03,  0x20,  0xf7,  0xb0,  0xcf,  0x11,  0x13,   // 0250
++0x20,  0xf7,  0x70,  0xcf,  0x12,  0x23,  0x20,  0xf7,   // 0258
++0x30,  0xcf,  0x13,  0x33,  0x20,  0xf7,  0xf0,  0xce,   // 0260
++0x14,  0x43,  0x20,  0xf7,  0xb0,  0xce,  0x15,  0x53,   // 0268
++0x20,  0xf7,  0x70,  0xce,  0x16,  0x63,  0x20,  0xf7,   // 0270
++0x30,  0xce,  0x17,  0x73,  0x20,  0xf7,  0xf0,  0xcd,   // 0278
++0x18,  0x83,  0x20,  0xf7,  0xb0,  0xcd,  0x19,  0x93,   // 0280
++0x20,  0xf7,  0x70,  0xcd,  0x1a,  0xa3,  0x20,  0xf7,   // 0288
++0x30,  0xcd,  0x1b,  0xb3,  0x20,  0xf7,  0xf0,  0xcc,   // 0290
++0x1c,  0xc3,  0x20,  0xf7,  0xb0,  0xcc,  0x1d,  0xd3,   // 0298
++0x20,  0xf7,  0x70,  0xcc,  0x1e,  0xe3,  0x20,  0xf7,   // 02a0
++0x30,  0xcc,  0x1f,  0xf3,  0x04,  0xff,  0x33,  0xcc,   // 02a8
++0x80,  0x03,  0xe0,  0xfb,  0x10,  0x00,  0x4c,  0xfe,   // 02b0
++0x33,  0xcc,  0x80,  0x03,  0xe0,  0xfb,  0x14,  0x00,   // 02b8
++0x00,  0xb5,  0x20,  0x00,  0x8c,  0xf8,  0x2f,  0xe0,   // 02c0
++0x80,  0x03,  0xe0,  0x63,  0x00,  0x00,  0x6f,  0x03,   // 02c8
++0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,   // 02d0
++0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,   // 02d8
 +};
 diff --git a/libavcodec/rpi_hevcdec.c b/libavcodec/rpi_hevcdec.c
 new file mode 100644
-index 0000000000..fac0dde400
+index 0000000000..4034c77979
 --- /dev/null
 +++ b/libavcodec/rpi_hevcdec.c
-@@ -0,0 +1,5738 @@
+@@ -0,0 +1,5753 @@
 +/*
 + * HEVC video Decoder
 + *
@@ -43651,13 +20404,14 @@ index 0000000000..fac0dde400
 +
 +#include "bswapdsp.h"
 +#include "bytestream.h"
-+#include "cabac_functions.h"
 +#include "golomb.h"
 +#include "hevc.h"
 +#include "rpi_hevc_data.h"
 +#include "rpi_hevc_parse.h"
 +#include "rpi_hevcdec.h"
++#include "rpi_hevc_cabac_fns.h"
 +#include "profiles.h"
++#include "hwaccel.h"
 +
 +#include "rpi_qpu.h"
 +#include "rpi_hevc_shader.h"
@@ -43667,7 +20421,7 @@ index 0000000000..fac0dde400
 +#include "libavutil/rpi_sand_fns.h"
 +
 +#include "pthread.h"
-+#include "libavutil/atomic.h"
++#include <stdatomic.h>
 +
 +#define DEBUG_DECODE_N 0   // 0 = do all, n = frames idr onwards
 +
@@ -43806,17 +20560,92 @@ index 0000000000..fac0dde400
 +    ipe_chan_info_t chroma;
 +} ipe_init_info_t;
 +
-+static void *small_memset(void *s, int c, size_t n)
++static void set_bytes(uint8_t * b, const unsigned int stride, const unsigned int ln, unsigned int a)
 +{
-+    if (n == 1)
-+        *(int8_t *) s = c;
-+    else if (n == 2)
-+        *(int16_t *) s = c * 0x0101;
-+    else if (n == 4)
-+        *(int32_t *) s = c * 0x01010101;
-+    else
-+        return memset(s, c, n);
-+    return s;
++    switch (ln)
++    {
++        default:  // normally 0
++            *b = a;
++            break;
++        case 1:
++            a |= a << 8;
++            *(uint16_t *)b = a;
++            b += stride;
++            *(uint16_t *)b = a;
++            break;
++        case 2:
++            a |= a << 8;
++            a |= a << 16;
++            *(uint32_t *)b = a;
++            b += stride;
++            *(uint32_t *)b = a;
++            b += stride;
++            *(uint32_t *)b = a;
++            b += stride;
++            *(uint32_t *)b = a;
++            break;
++        case 3:
++        {
++            unsigned int i;
++            uint64_t d;
++            a |= a << 8;
++            a |= a << 16;
++            d = ((uint64_t)a << 32) | a;
++            for (i = 0; i != 8; ++i, b += stride)
++                *(uint64_t *)b = d;
++            break;
++        }
++        case 4:
++        {
++            unsigned int i;
++            uint64_t d;
++            a |= a << 8;
++            a |= a << 16;
++            d = ((uint64_t)a << 32) | a;
++            for (i = 0; i != 16; ++i, b += stride)
++            {
++                *(uint64_t *)b = d;
++                *(uint64_t *)(b + 8) = d;
++            }
++            break;
++        }
++    }
++}
++
++// Set a small square block of bits in a bitmap
++// Bits must be aligned on their size boundry (which will be true of all split CBs)
++static void set_bits(uint8_t * f, const unsigned int x, const unsigned int stride, const unsigned int ln)
++{
++    unsigned int n;
++    const unsigned int sh = (x & 7);
++
++    f += (x >> 3);
++
++    av_assert2(ln <= 3);
++    av_assert2((x & ((1 << ln) - 1)) == 0);
++
++    switch (ln)
++    {
++        case 0:  // 1
++            f[0] |= 1 << sh;
++            break;
++        case 1:  // 3 * 2
++            n = 3 << sh;
++            f[0] |= n;
++            f[stride] |= n;
++            break;
++        case 2:  // 0xf * 4
++            n = 0xf << sh;
++            f[0] |= n;
++            f[stride] |= n;
++            f[stride * 2] |= n;
++            f[stride * 3] |= n;
++            break;
++        default:  // 0xff * 8
++            for (n = 0; n != 8; ++n, f += stride)
++                *f = 0xff;
++            break;
++    }
 +}
 +
 +static const ipe_init_info_t ipe_init_infos[9] = {  // Alloc for bit depths of 8-16
@@ -44460,19 +21289,6 @@ index 0000000000..fac0dde400
 +/* free everything allocated  by pic_arrays_init() */
 +static void pic_arrays_free(HEVCRpiContext *s)
 +{
-+#ifdef RPI_DEBLOCK_VPU
-+    {
-+        int i;
-+        for (i = 0; i != RPI_DEBLOCK_VPU_Q_COUNT; ++i) {
-+            struct dblk_vpu_q_s * const dvq = s->dvq_ents + i;
-+
-+            if (dvq->vpu_cmds_arm) {
-+                gpu_free(&dvq->deblock_vpu_gmem);
-+              dvq->vpu_cmds_arm = 0;
-+            }
-+        }
-+    }
-+#endif
 +    av_freep(&s->sao);
 +    av_freep(&s->deblock);
 +
@@ -44480,7 +21296,6 @@ index 0000000000..fac0dde400
 +    av_freep(&s->tab_ct_depth);
 +
 +    av_freep(&s->tab_ipm);
-+    av_freep(&s->cbf_luma);
 +    av_freep(&s->is_pcm);
 +
 +    av_freep(&s->qp_y_tab);
@@ -44488,7 +21303,10 @@ index 0000000000..fac0dde400
 +    av_freep(&s->filter_slice_edges);
 +
 +    av_freep(&s->horizontal_bs);
-+    av_freep(&s->vertical_bs);
++//    av_freep(&s->vertical_bs);
++    av_freep(&s->vertical_bs2);
++    av_freep(&s->bsf_stash_left);
++    av_freep(&s->bsf_stash_up);
 +
 +    alloc_entry_points(&s->sh, 0);
 +
@@ -44507,74 +21325,23 @@ index 0000000000..fac0dde400
 +    int ctb_count        = sps->ctb_width * sps->ctb_height;
 +    int min_pu_size      = sps->min_pu_width * sps->min_pu_height;
 +
-+#ifdef RPI_DEBLOCK_VPU
-+    {
-+        int i;
-+        s->enable_rpi_deblock = !sps->sao_enabled;
-+        s->setup_width = (sps->width+15) / 16;
-+        s->setup_height = (sps->height+15) / 16;
-+        s->uv_setup_width = ( (sps->width >> ctx_hshift(s, 1)) + 15) / 16;
-+        s->uv_setup_height = ( (sps->height >> ctx_vshift(s, 1)) + 15) / 16;
-+
-+        for (i = 0; i != RPI_DEBLOCK_VPU_Q_COUNT; ++i)
-+        {
-+            struct dblk_vpu_q_s * const dvq = s->dvq_ents + i;
-+            const unsigned int cmd_size = (sizeof(*dvq->vpu_cmds_arm) * 3 + 15) & ~15;
-+            const unsigned int y_size = (sizeof(*dvq->y_setup_arm) * s->setup_width * s->setup_height + 15) & ~15;
-+            const unsigned int uv_size = (sizeof(*dvq->uv_setup_arm) * s->uv_setup_width * s->uv_setup_height + 15) & ~15;
-+            const unsigned int total_size =- cmd_size + y_size + uv_size;
-+            int p_vc;
-+            uint8_t * p_arm;
-+#if RPI_VPU_DEBLOCK_CACHED
-+            gpu_malloc_cached(total_size, &dvq->deblock_vpu_gmem);
-+#else
-+            gpu_malloc_uncached(total_size, &dvq->deblock_vpu_gmem);
-+#endif
-+            p_vc = dvq->deblock_vpu_gmem.vc;
-+            p_arm = dvq->deblock_vpu_gmem.arm;
-+
-+            // Zap all
-+            memset(p_arm, 0, dvq->deblock_vpu_gmem.numbytes);
-+
-+            // Subdivide
-+            dvq->vpu_cmds_arm = (void*)p_arm;
-+            dvq->vpu_cmds_vc = p_vc;
-+
-+            p_arm += cmd_size;
-+            p_vc += cmd_size;
-+
-+            dvq->y_setup_arm = (void*)p_arm;
-+            dvq->y_setup_vc = (void*)p_vc;
-+
-+            p_arm += y_size;
-+            p_vc += y_size;
-+
-+            dvq->uv_setup_arm = (void*)p_arm;
-+            dvq->uv_setup_vc = (void*)p_vc;
-+        }
-+
-+        s->dvq_n = 0;
-+        s->dvq = s->dvq_ents + s->dvq_n;
-+    }
-+#endif
-+
-+    s->bs_width  = (width  >> 2) + 1;
-+    s->bs_height = (height >> 2) + 1;
++    s->hbs_stride = ((width + 63) & ~63) >> 4;
++    s->bs_size = (((height + 15) & ~15) >> 3) * s->hbs_stride;
 +
 +    s->sao           = av_mallocz(ctb_count * sizeof(*s->sao) + 8); // Our sao code overreads this array slightly
 +    s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
 +    if (!s->sao || !s->deblock)
 +        goto fail;
 +
-+    s->skip_flag    = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
++    s->skip_flag_stride = (sps->min_cb_width + 7) >> 3;
++    s->skip_flag    = av_malloc_array(sps->min_cb_height, s->skip_flag_stride);
 +    s->tab_ct_depth = av_malloc_array(sps->min_cb_height, sps->min_cb_width);
 +    if (!s->skip_flag || !s->tab_ct_depth)
 +        goto fail;
 +
-+    s->cbf_luma = av_malloc_array(sps->min_tb_width, sps->min_tb_height);
 +    s->tab_ipm  = av_mallocz(min_pu_size);
-+    s->is_pcm   = av_malloc_array(sps->min_pu_width + 1, sps->min_pu_height + 1);
-+    if (!s->tab_ipm || !s->cbf_luma || !s->is_pcm)
++    s->is_pcm   = av_malloc_array(sps->pcm_width, sps->pcm_height);
++    if (!s->tab_ipm || !s->is_pcm)
 +        goto fail;
 +
 +    s->filter_slice_edges = av_mallocz(ctb_count);
@@ -44585,9 +21352,13 @@ index 0000000000..fac0dde400
 +    if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
 +        goto fail;
 +
-+    s->horizontal_bs = av_mallocz_array(s->bs_width, s->bs_height);
-+    s->vertical_bs   = av_mallocz_array(s->bs_width, s->bs_height);
-+    if (!s->horizontal_bs || !s->vertical_bs)
++    s->horizontal_bs = av_mallocz(s->bs_size);
++    s->vertical_bs2  = av_mallocz(s->bs_size);
++    if (s->horizontal_bs == NULL || s->vertical_bs2 == NULL)
++        goto fail;
++
++    if ((s->bsf_stash_left = av_mallocz(((height + 63) & ~63) >> 4)) == NULL ||
++        (s->bsf_stash_up   = av_mallocz(((width + 63) & ~63) >> 4)) == NULL)
 +        goto fail;
 +
 +    s->tab_mvf_pool = av_buffer_pool_init(min_pu_size * sizeof(MvField),
@@ -44882,7 +21653,6 @@ index 0000000000..fac0dde400
 +
 +    ff_hevc_rpi_pred_init(&s->hpc,     sps->bit_depth);
 +    ff_hevc_rpi_dsp_init (&s->hevcdsp, sps->bit_depth);
-+    ff_videodsp_init (&s->vdsp,    sps->bit_depth);
 +
 +    // * We don't support cross_component_prediction_enabled_flag but as that
 +    //   must be 0 unless we have 4:4:4 there is no point testing for it as we
@@ -44931,10 +21701,15 @@ index 0000000000..fac0dde400
 +    return ret;
 +}
 +
-+static int hls_slice_header(HEVCRpiContext *s)
++static inline int qp_offset_valid(const int qp_offset)
 +{
-+    GetBitContext *gb = &s->HEVClc->gb;
-+    RpiSliceHeader *sh   = &s->sh;
++    return qp_offset >= -12 && qp_offset <= 12;
++}
++
++static int hls_slice_header(HEVCRpiContext * const s)
++{
++    GetBitContext * const gb = &s->HEVClc->gb;
++    RpiSliceHeader * const sh   = &s->sh;
 +    int i, ret;
 +
 +    // Coded parameters
@@ -45207,7 +21982,18 @@ index 0000000000..fac0dde400
 +        if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
 +            sh->slice_cb_qp_offset = get_se_golomb(gb);
 +            sh->slice_cr_qp_offset = get_se_golomb(gb);
-+        } else {
++            if (!qp_offset_valid(sh->slice_cb_qp_offset) ||
++                !qp_offset_valid(s->ps.pps->cb_qp_offset + sh->slice_cb_qp_offset) ||
++                !qp_offset_valid(sh->slice_cr_qp_offset) ||
++                !qp_offset_valid(s->ps.pps->cr_qp_offset + sh->slice_cr_qp_offset))
++            {
++                av_log(s->avctx, AV_LOG_ERROR, "Bad chroma offset (pps:%d/%d; slice=%d/%d\n",
++                       sh->slice_cr_qp_offset, sh->slice_cr_qp_offset,
++                       s->ps.pps->cb_qp_offset, s->ps.pps->cr_qp_offset);
++                return AVERROR_INVALIDDATA;
++            }
++        } else
++        {
 +            sh->slice_cb_qp_offset = 0;
 +            sh->slice_cr_qp_offset = 0;
 +        }
@@ -45257,6 +22043,13 @@ index 0000000000..fac0dde400
 +        } else {
 +            sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
 +        }
++        sh->no_dblk_boundary_flags =
++            (sh->slice_loop_filter_across_slices_enabled_flag ? 0 :
++                BOUNDARY_UPPER_SLICE | BOUNDARY_LEFT_SLICE) |
++            (s->ps.pps->loop_filter_across_tiles_enabled_flag ? 0 :
++                BOUNDARY_UPPER_TILE | BOUNDARY_LEFT_TILE);
++
++
 +    } else if (!s->slice_initialized) {
 +        av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
 +        return AVERROR_INVALIDDATA;
@@ -45336,7 +22129,7 @@ index 0000000000..fac0dde400
 +
 +static void hls_sao_param(const HEVCRpiContext *s, HEVCRpiLocalContext * const lc, const int rx, const int ry)
 +{
-+    SAOParams * const sao = s->sao + rx + ry * s->ps.sps->ctb_width;
++    RpiSAOParams * const sao = s->sao + rx + ry * s->ps.sps->ctb_width;
 +    int c_idx, i;
 +
 +    if (s->sh.slice_sample_adaptive_offset_flag[0] ||
@@ -45412,7 +22205,7 @@ index 0000000000..fac0dde400
 +
 +
 +static int hls_cross_component_pred(HEVCRpiLocalContext * const lc, const int idx) {
-+    int log2_res_scale_abs_plus1 = ff_hevc_rpi_log2_res_scale_abs(lc, idx);
++    int log2_res_scale_abs_plus1 = ff_hevc_rpi_log2_res_scale_abs(lc, idx);  // 0..4
 +
 +    if (log2_res_scale_abs_plus1 !=  0) {
 +        int res_scale_sign_flag = ff_hevc_rpi_res_scale_sign_flag(lc, idx);
@@ -45447,54 +22240,61 @@ index 0000000000..fac0dde400
 +    }
 +}
 +
-+static int hls_transform_unit(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int x0, int y0,
-+                              int xBase, int yBase, int cb_xBase, int cb_yBase,
-+                              int log2_cb_size, int log2_trafo_size,
-+                              int blk_idx, int cbf_luma, int *cbf_cb, int *cbf_cr)
++#define CBF_CB0_S 0
++#define CBF_CB1_S 1 // CB1 must be CB0 + 1
++#define CBF_CR0_S 2
++#define CBF_CR1_S 3
++
++#define CBF_CB0 (1 << CBF_CB0_S)
++#define CBF_CR0 (1 << CBF_CR0_S)
++#define CBF_CB1 (1 << CBF_CB1_S)
++#define CBF_CR1 (1 << CBF_CR1_S)
++
++
++static int hls_transform_unit(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                              const unsigned int x0, const unsigned int y0,
++                              const unsigned int xBase, const unsigned int yBase,
++                              const unsigned int cb_xBase, const unsigned int cb_yBase,
++                              const unsigned int log2_cb_size, const unsigned int log2_trafo_size,
++                              const unsigned int blk_idx, const int cbf_luma,
++                              const unsigned int const cbf_chroma)
 +{
-+//    const int log2_trafo_size_c = log2_trafo_size - s->ps.sps->hshift[1];
-+    const int log2_trafo_size_c = log2_trafo_size - ctx_hshift(s, 1);
++    const unsigned int log2_trafo_size_c = log2_trafo_size - ctx_hshift(s, 1);
 +    int i;
 +
 +    if (lc->cu.pred_mode == MODE_INTRA) {
-+        int trafo_size = 1 << log2_trafo_size;
++        const unsigned int trafo_size = 1 << log2_trafo_size;
 +        ff_hevc_rpi_set_neighbour_available(s, lc, x0, y0, trafo_size, trafo_size);
 +        do_intra_pred(s, lc, log2_trafo_size, x0, y0, 0);
 +    }
 +
-+    if (cbf_luma || cbf_cb[0] || cbf_cr[0] ||
-+        (ctx_cfmt(s) == 2 && (cbf_cb[1] || cbf_cr[1]))) {
++    if (cbf_luma || cbf_chroma != 0)
++    {
 +        int scan_idx   = SCAN_DIAG;
 +        int scan_idx_c = SCAN_DIAG;
-+        int cbf_chroma = cbf_cb[0] || cbf_cr[0] ||
-+                         (ctx_cfmt(s) == 2 &&
-+                         (cbf_cb[1] || cbf_cr[1]));
 +
-+        if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded) {
-+            lc->tu.cu_qp_delta = ff_hevc_rpi_cu_qp_delta_abs(lc);
-+            if (lc->tu.cu_qp_delta != 0)
++        if (s->ps.pps->cu_qp_delta_enabled_flag && !lc->tu.is_cu_qp_delta_coded)
++        {
++            const int qp_delta = ff_hevc_rpi_cu_qp_delta(lc);
++
++            if (qp_delta < -(26 + (s->ps.sps->qp_bd_offset >> 1)) ||
++                qp_delta >  (25 + (s->ps.sps->qp_bd_offset >> 1)))
 +            {
-+                if (ff_hevc_rpi_cu_qp_delta_sign_flag(lc) == 1)
-+                    lc->tu.cu_qp_delta = -lc->tu.cu_qp_delta;
-+
-+                if (lc->tu.cu_qp_delta < -(26 + s->ps.sps->qp_bd_offset/2) ||
-+                    lc->tu.cu_qp_delta >  (25 + s->ps.sps->qp_bd_offset/2))
-+                {
-+                    av_log(s->avctx, AV_LOG_ERROR,
-+                           "The cu_qp_delta %d is outside the valid range "
-+                           "[%d, %d].\n",
-+                           lc->tu.cu_qp_delta,
-+                           -(26 + s->ps.sps->qp_bd_offset/2),
-+                            (25 + s->ps.sps->qp_bd_offset/2));
-+                    return AVERROR_INVALIDDATA;
-+                }
++                av_log(s->avctx, AV_LOG_ERROR,
++                       "The cu_qp_delta %d is outside the valid range "
++                       "[%d, %d].\n",
++                       qp_delta,
++                       -(26 + (s->ps.sps->qp_bd_offset >> 1)),
++                        (25 + (s->ps.sps->qp_bd_offset >> 1)));
++                return AVERROR_INVALIDDATA;
 +            }
-+            lc->tu.is_cu_qp_delta_coded = 1;
 +
-+            ff_hevc_rpi_set_qPy(s, lc, cb_xBase, cb_yBase, log2_cb_size);
++            lc->tu.is_cu_qp_delta_coded = 1;
++            lc->tu.cu_qp_delta = qp_delta;
++            ff_hevc_rpi_set_qPy(s, lc, cb_xBase, cb_yBase);
 +        }
 +
-+        if (!lc->tu.is_cu_chroma_qp_offset_coded && cbf_chroma &&
++        if (lc->tu.cu_chroma_qp_offset_wanted && cbf_chroma &&
 +            !lc->cu.cu_transquant_bypass_flag) {
 +            int cu_chroma_qp_offset_flag = ff_hevc_rpi_cu_chroma_qp_offset_flag(lc);
 +            if (cu_chroma_qp_offset_flag) {
@@ -45504,10 +22304,10 @@ index 0000000000..fac0dde400
 +                    av_log(s->avctx, AV_LOG_ERROR,
 +                        "cu_chroma_qp_offset_idx not yet tested.\n");
 +                }
-+                lc->tu.cu_qp_offset_cb = s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
-+                lc->tu.cu_qp_offset_cr = s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
++                lc->tu.qp_divmod6[1] += s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
++                lc->tu.qp_divmod6[2] += s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
 +            }
-+            lc->tu.is_cu_chroma_qp_offset_coded = 1;
++            lc->tu.cu_chroma_qp_offset_wanted = 0;
 +        }
 +
 +        if (lc->cu.pred_mode == MODE_INTRA && log2_trafo_size < 4) {
@@ -45547,25 +22347,24 @@ index 0000000000..fac0dde400
 +                    ff_hevc_rpi_set_neighbour_available(s, lc, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
 +                    do_intra_pred(s, lc, log2_trafo_size_c, x0, y0 + (i << log2_trafo_size_c), 1);
 +                }
-+                if (cbf_cb[i])
++                if (((cbf_chroma >> i) & CBF_CB0) != 0)
 +                    ff_hevc_rpi_hls_residual_coding(s, lc, x0, y0 + (i << log2_trafo_size_c),
 +                                                log2_trafo_size_c, scan_idx_c, 1);
-+                else
-+                    if (lc->tu.cross_pf) {
-+                        const ptrdiff_t stride = frame_stride1(s->frame, 1);
-+                        const int hshift = ctx_hshift(s, 1);
-+                        const int vshift = ctx_vshift(s, 1);
-+                        int16_t * const coeffs_y = (int16_t*)lc->edge_emu_buffer;
-+                        int16_t * const coeffs   = (int16_t*)lc->edge_emu_buffer2;
-+                        int size = 1 << log2_trafo_size_c;
++                else if (lc->tu.cross_pf) {
++                    const ptrdiff_t stride = frame_stride1(s->frame, 1);
++                    const int hshift = ctx_hshift(s, 1);
++                    const int vshift = ctx_vshift(s, 1);
++                    int16_t * const coeffs_y = (int16_t*)lc->edge_emu_buffer;
++                    int16_t * const coeffs   = (int16_t*)lc->edge_emu_buffer2;
++                    int size = 1 << log2_trafo_size_c;
 +
-+                        uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
-+                                                              ((x0 >> hshift) << s->ps.sps->pixel_shift)];
-+                        for (i = 0; i < (size * size); i++) {
-+                            coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
-+                        }
-+                        s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
++                    uint8_t *dst = &s->frame->data[1][(y0 >> vshift) * stride +
++                                                          ((x0 >> hshift) << s->ps.sps->pixel_shift)];
++                    for (i = 0; i < (size * size); i++) {
++                        coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
 +                    }
++                    s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
++                }
 +            }
 +
 +            if (lc->tu.cross_pf) {
@@ -45576,25 +22375,24 @@ index 0000000000..fac0dde400
 +                    ff_hevc_rpi_set_neighbour_available(s, lc, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v);
 +                    do_intra_pred(s, lc, log2_trafo_size_c, x0, y0 + (i << log2_trafo_size_c), 2);
 +                }
-+                if (cbf_cr[i])
++                if (((cbf_chroma >> i) & CBF_CR0) != 0)
 +                    ff_hevc_rpi_hls_residual_coding(s, lc, x0, y0 + (i << log2_trafo_size_c),
 +                                                log2_trafo_size_c, scan_idx_c, 2);
-+                else
-+                    if (lc->tu.cross_pf) {
-+                        ptrdiff_t stride = frame_stride1(s->frame, 2);
-+                        const int hshift = ctx_hshift(s, 2);
-+                        const int vshift = ctx_vshift(s, 2);
-+                        int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
-+                        int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
-+                        const int size = 1 << log2_trafo_size_c;
++                else if (lc->tu.cross_pf) {
++                    ptrdiff_t stride = frame_stride1(s->frame, 2);
++                    const int hshift = ctx_hshift(s, 2);
++                    const int vshift = ctx_vshift(s, 2);
++                    int16_t *coeffs_y = (int16_t*)lc->edge_emu_buffer;
++                    int16_t *coeffs   = (int16_t*)lc->edge_emu_buffer2;
++                    const int size = 1 << log2_trafo_size_c;
 +
-+                        uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
-+                                                          ((x0 >> hshift) << s->ps.sps->pixel_shift)];
-+                        for (i = 0; i < (size * size); i++) {
-+                            coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
-+                        }
-+                        s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
++                    uint8_t *dst = &s->frame->data[2][(y0 >> vshift) * stride +
++                                                      ((x0 >> hshift) << s->ps.sps->pixel_shift)];
++                    for (i = 0; i < (size * size); i++) {
++                        coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
 +                    }
++                    s->hevcdsp.add_residual[log2_trafo_size_c-2](dst, coeffs, stride);
++                }
 +            }
 +        } else if (ctx_cfmt(s) != 0 && blk_idx == 3) {
 +            int trafo_size_h = 1 << (log2_trafo_size + 1);
@@ -45605,7 +22403,7 @@ index 0000000000..fac0dde400
 +                                                    trafo_size_h, trafo_size_v);
 +                    do_intra_pred(s, lc, log2_trafo_size, xBase, yBase + (i << log2_trafo_size), 1);
 +                }
-+                if (cbf_cb[i])
++                if (((cbf_chroma >> i) & CBF_CB0) != 0)
 +                    ff_hevc_rpi_hls_residual_coding(s, lc, xBase, yBase + (i << log2_trafo_size),
 +                                                log2_trafo_size, scan_idx_c, 1);
 +            }
@@ -45615,7 +22413,7 @@ index 0000000000..fac0dde400
 +                                                trafo_size_h, trafo_size_v);
 +                    do_intra_pred(s, lc, log2_trafo_size, xBase, yBase + (i << log2_trafo_size), 2);
 +                }
-+                if (cbf_cr[i])
++                if (((cbf_chroma >> i) & CBF_CR0) != 0)
 +                    ff_hevc_rpi_hls_residual_coding(s, lc, xBase, yBase + (i << log2_trafo_size),
 +                                                log2_trafo_size, scan_idx_c, 2);
 +            }
@@ -45652,36 +22450,25 @@ index 0000000000..fac0dde400
 +    return 0;
 +}
 +
-+static void set_deblocking_bypass(const HEVCRpiContext * const s, const int x0, const int y0, const int log2_cb_size)
++static inline void set_deblocking_bypass(const HEVCRpiContext * const s, const int x0, const int y0, const int log2_cb_size)
 +{
-+    int cb_size          = 1 << log2_cb_size;
-+    int log2_min_pu_size = s->ps.sps->log2_min_pu_size;
-+
-+    int min_pu_width     = s->ps.sps->min_pu_width;
-+    int x_end = FFMIN(x0 + cb_size, s->ps.sps->width);
-+    int y_end = FFMIN(y0 + cb_size, s->ps.sps->height);
-+    int i, j;
-+
-+    for (j = (y0 >> log2_min_pu_size); j < (y_end >> log2_min_pu_size); j++)
-+        for (i = (x0 >> log2_min_pu_size); i < (x_end >> log2_min_pu_size); i++)
-+            s->is_pcm[i + j * min_pu_width] = 2;
++    set_bits(s->is_pcm + (y0 >> 3) * s->ps.sps->pcm_width, x0 >> 3, s->ps.sps->pcm_width, log2_cb_size - 3);
 +}
 +
-+static int hls_transform_tree(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int x0, int y0,
-+                              int xBase, int yBase, int cb_xBase, int cb_yBase,
-+                              int log2_cb_size, int log2_trafo_size,
-+                              int trafo_depth, int blk_idx,
-+                              const int *base_cbf_cb, const int *base_cbf_cr)
-+{
-+    uint8_t split_transform_flag;
-+    int cbf_cb[2];
-+    int cbf_cr[2];
-+    int ret;
 +
-+    cbf_cb[0] = base_cbf_cb[0];
-+    cbf_cb[1] = base_cbf_cb[1];
-+    cbf_cr[0] = base_cbf_cr[0];
-+    cbf_cr[1] = base_cbf_cr[1];
++static int hls_transform_tree(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                              const unsigned int x0, const unsigned int y0,
++                              const unsigned int xBase, const unsigned int yBase,
++                              const unsigned int cb_xBase, const unsigned int cb_yBase,
++                              const unsigned int log2_cb_size,
++                              const unsigned int log2_trafo_size,
++                              const unsigned int trafo_depth, const unsigned int blk_idx,
++                              const unsigned int cbf_c0)
++{
++    // When trafo_size == 2 hls_transform_unit uses c0 so put in c1
++    unsigned int cbf_c1 = cbf_c0;
++    int split_transform_flag;
++    int ret;
 +
 +    if (lc->cu.intra_split_flag) {
 +        if (trafo_depth == 1) {
@@ -45703,7 +22490,8 @@ index 0000000000..fac0dde400
 +    if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
 +        log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
 +        trafo_depth     < lc->cu.max_trafo_depth       &&
-+        !(lc->cu.intra_split_flag && trafo_depth == 0)) {
++        !(lc->cu.intra_split_flag && trafo_depth == 0))
++    {
 +        split_transform_flag = ff_hevc_rpi_split_transform_flag_decode(lc, log2_trafo_size);
 +    } else {
 +        int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
@@ -45716,19 +22504,23 @@ index 0000000000..fac0dde400
 +                               inter_split;
 +    }
 +
-+    if (ctx_cfmt(s) != 0 && (log2_trafo_size > 2 || ctx_cfmt(s) == 3)) {
-+        if (trafo_depth == 0 || cbf_cb[0]) {
-+            cbf_cb[0] = ff_hevc_rpi_cbf_cb_cr_decode(lc, trafo_depth);
-+            if (ctx_cfmt(s) == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
-+                cbf_cb[1] = ff_hevc_rpi_cbf_cb_cr_decode(lc, trafo_depth);
-+            }
++    if (log2_trafo_size > 2 || ctx_cfmt(s) == 3)
++    {
++        const int wants_c1 = ctx_cfmt(s) == 2 && (!split_transform_flag || log2_trafo_size == 3);
++        cbf_c1 = 0;
++
++        if ((cbf_c0 & CBF_CB0) != 0)
++        {
++            cbf_c1 = ff_hevc_rpi_cbf_cb_cr_decode(lc, trafo_depth) << CBF_CB0_S;
++            if (wants_c1)
++                cbf_c1 |= ff_hevc_rpi_cbf_cb_cr_decode(lc, trafo_depth) << CBF_CB1_S;
 +        }
 +
-+        if (trafo_depth == 0 || cbf_cr[0]) {
-+            cbf_cr[0] = ff_hevc_rpi_cbf_cb_cr_decode(lc, trafo_depth);
-+            if (ctx_cfmt(s) == 2 && (!split_transform_flag || log2_trafo_size == 3)) {
-+                cbf_cr[1] = ff_hevc_rpi_cbf_cb_cr_decode(lc, trafo_depth);
-+            }
++        if ((cbf_c0 & CBF_CR0) != 0)
++        {
++            cbf_c1 |= ff_hevc_rpi_cbf_cb_cr_decode(lc, trafo_depth) << CBF_CR0_S;
++            if (wants_c1)
++                cbf_c1 |= ff_hevc_rpi_cbf_cb_cr_decode(lc, trafo_depth) << CBF_CR1_S;
 +        }
 +    }
 +
@@ -45741,7 +22533,7 @@ index 0000000000..fac0dde400
 +do {                                                                            \
 +    ret = hls_transform_tree(s, lc, x, y, x0, y0, cb_xBase, cb_yBase, log2_cb_size, \
 +                             log2_trafo_size - 1, trafo_depth + 1, idx,         \
-+                             cbf_cb, cbf_cr);                                   \
++                             cbf_c1);                                   \
 +    if (ret < 0)                                                                \
 +        return ret;                                                             \
 +} while (0)
@@ -45753,37 +22545,19 @@ index 0000000000..fac0dde400
 +
 +#undef SUBDIVIDE
 +    } else {
-+        int min_tu_size      = 1 << s->ps.sps->log2_min_tb_size;
-+        int log2_min_tu_size = s->ps.sps->log2_min_tb_size;
-+        int min_tu_width     = s->ps.sps->min_tb_width;
-+        int cbf_luma         = 1;
-+
-+        if (lc->cu.pred_mode == MODE_INTRA || trafo_depth != 0 ||
-+            cbf_cb[0] || cbf_cr[0] ||
-+            (ctx_cfmt(s) == 2 && (cbf_cb[1] || cbf_cr[1]))) {
-+            cbf_luma = ff_hevc_rpi_cbf_luma_decode(lc, trafo_depth);
-+        }
++        // If trafo_size == 2 then we should have cbf_c == 0 here but as we can't have
++        // trafo_size == 2 with depth == 0 the issue is moot
++        const int cbf_luma = ((lc->cu.pred_mode != MODE_INTRA && trafo_depth == 0 && cbf_c1 == 0) ||
++            ff_hevc_rpi_cbf_luma_decode(lc, trafo_depth));
 +
 +        ret = hls_transform_unit(s, lc, x0, y0, xBase, yBase, cb_xBase, cb_yBase,
 +                                 log2_cb_size, log2_trafo_size,
-+                                 blk_idx, cbf_luma, cbf_cb, cbf_cr);
++                                 blk_idx, cbf_luma, cbf_c1);
 +        if (ret < 0)
 +            return ret;
-+        // TODO: store cbf_luma somewhere else
-+        if (cbf_luma) {
-+            int i, j;
-+            for (i = 0; i < (1 << log2_trafo_size); i += min_tu_size)
-+                for (j = 0; j < (1 << log2_trafo_size); j += min_tu_size) {
-+                    int x_tu = (x0 + j) >> log2_min_tu_size;
-+                    int y_tu = (y0 + i) >> log2_min_tu_size;
-+                    s->cbf_luma[y_tu * min_tu_width + x_tu] = 1;
-+                }
-+        }
++
 +        if (!s->sh.disable_deblocking_filter_flag) {
-+            ff_hevc_rpi_deblocking_boundary_strengths(s, lc, x0, y0, log2_trafo_size);
-+            if (s->ps.pps->transquant_bypass_enable_flag &&
-+                lc->cu.cu_transquant_bypass_flag)
-+                set_deblocking_bypass(s, x0, y0, log2_trafo_size);
++            ff_hevc_rpi_deblocking_boundary_strengths(s, lc, x0, y0, log2_trafo_size, cbf_luma);
 +        }
 +    }
 +    return 0;
@@ -45826,10 +22600,10 @@ index 0000000000..fac0dde400
 +        xyexp2(s->ps.sps->pcm.bit_depth_chroma, log2_cb_size - ctx_vshift(s, 1)) +
 +        xyexp2(s->ps.sps->pcm.bit_depth_chroma, log2_cb_size - ctx_vshift(s, 2));
 +
-+    const uint8_t * const pcm = skip_bytes(&lc->cc, (length + 7) >> 3);
++    const uint8_t * const pcm = ff_hevc_rpi_cabac_skip_bytes(&lc->cc, (length + 7) >> 3);
 +
 +    if (!s->sh.disable_deblocking_filter_flag)
-+        ff_hevc_rpi_deblocking_boundary_strengths(s, lc, x0, y0, log2_cb_size);
++        ff_hevc_rpi_deblocking_boundary_strengths(s, lc, x0, y0, log2_cb_size, 0);
 +
 +    // Copy coeffs
 +    {
@@ -46070,7 +22844,7 @@ index 0000000000..fac0dde400
 +
 +#if RPI_TSTATS
 +            {
-+                HEVCRpiStats *const ts = &s->tstats;
++                HEVCRpiStats *const ts = (HEVCRpiStats *)&s->tstats;
 +                ++ts->y_pred1_x0y0;
 +
 +                if (nPbW > 8)
@@ -46126,7 +22900,7 @@ index 0000000000..fac0dde400
 +            jb->last_y8_l1 = NULL;
 +            start_x = bw;
 +#if RPI_TSTATS
-+            ++s->tstats.y_pred1_y8_merge;
++            ++((HEVCRpiStats *)&s->tstats)->y_pred1_y8_merge;
 +#endif
 +        }
 +#endif
@@ -46140,7 +22914,7 @@ index 0000000000..fac0dde400
 +            qpu_mc_pred_y_p_t *const cmd_y = &yp->qpu_mc_curr->y.p;
 +#if RPI_TSTATS
 +            {
-+                HEVCRpiStats *const ts = &s->tstats;
++                HEVCRpiStats *const ts = (HEVCRpiStats *)&s->tstats;
 +                if (mx == 0 && my == 0)
 +                    ++ts->y_pred1_x0y0;
 +                else if (mx == 0)
@@ -46247,7 +23021,7 @@ index 0000000000..fac0dde400
 +            qpu_mc_pred_y_p_t *const cmd_y = &yp->qpu_mc_curr->y.p;
 +#if RPI_TSTATS
 +            {
-+                HEVCRpiStats *const ts = &s->tstats;
++                HEVCRpiStats *const ts = (HEVCRpiStats *)&s->tstats;
 +                ++ts->y_pred2_x0y0;
 +
 +                if (nPbH > 16)
@@ -46292,7 +23066,7 @@ index 0000000000..fac0dde400
 +            qpu_mc_pred_y_p_t *const cmd_y = &yp->qpu_mc_curr->y.p;
 +#if RPI_TSTATS
 +            {
-+                HEVCRpiStats *const ts = &s->tstats;
++                HEVCRpiStats *const ts = (HEVCRpiStats *)&s->tstats;
 +                const unsigned int mmx = mx | mx2;
 +                const unsigned int mmy = my | my2;
 +                if (mmx == 0 && mmy == 0)
@@ -46466,7 +23240,6 @@ index 0000000000..fac0dde400
 +{
 +    HEVCRpiJob * const jb = lc->jb0;
 +
-+    int merge_idx = 0;
 +    struct MvField current_mv = {{{ 0 }}};
 +
 +    int min_pu_width = s->ps.sps->min_pu_width;
@@ -46474,28 +23247,21 @@ index 0000000000..fac0dde400
 +    MvField * const tab_mvf = s->ref->tab_mvf;
 +    const RefPicList  *const refPicList = s->ref->refPicList;
 +    const HEVCFrame *ref0 = NULL, *ref1 = NULL;
-+    int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
-+    int min_cb_width     = s->ps.sps->min_cb_width;
-+    int x_cb             = x0 >> log2_min_cb_size;
-+    int y_cb             = y0 >> log2_min_cb_size;
 +    int x_pu, y_pu;
 +    int i, j;
-+    const int skip_flag = SAMPLE_CTB(s->skip_flag, x_cb, y_cb);
 +
-+    if (!skip_flag)
++    if (lc->cu.pred_mode != MODE_SKIP)
 +        lc->pu.merge_flag = ff_hevc_rpi_merge_flag_decode(lc);
 +
-+    if (skip_flag || lc->pu.merge_flag) {
-+        if (s->sh.max_num_merge_cand > 1)
-+            merge_idx = ff_hevc_rpi_merge_idx_decode(s, lc);
-+        else
-+            merge_idx = 0;
++    if (lc->cu.pred_mode == MODE_SKIP || lc->pu.merge_flag) {
++        const unsigned int merge_idx = s->sh.max_num_merge_cand <= 1 ? 0 :
++            ff_hevc_rpi_merge_idx_decode(s, lc);
 +
 +        ff_hevc_rpi_luma_mv_merge_mode(s, lc, x0, y0, nPbW, nPbH, log2_cb_size,
 +                                   partIdx, merge_idx, &current_mv);
 +    } else {
 +        hevc_luma_mv_mvp_mode(s, lc, x0, y0, nPbW, nPbH, log2_cb_size,
-+                              partIdx, merge_idx, &current_mv);
++                              partIdx, 0, &current_mv);
 +    }
 +
 +    x_pu = x0 >> s->ps.sps->log2_min_pu_size;
@@ -46572,145 +23338,156 @@ index 0000000000..fac0dde400
 +    }
 +}
 +
++static void set_ipm(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                    const unsigned int x0, const unsigned int y0,
++                    const unsigned int log2_cb_size,
++                    const unsigned int ipm)
++{
++    const unsigned int min_pu_width     = s->ps.sps->min_pu_width;
++    const unsigned int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
++    const unsigned int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
++
++    set_bytes(s->tab_ipm + y_pu * min_pu_width + x_pu, min_pu_width, log2_cb_size - s->ps.sps->log2_min_pu_size, ipm);
++
++    if (lc->cu.pred_mode == MODE_INTRA)
++    {
++        unsigned int j, k;
++        MvField * tab_mvf     = s->ref->tab_mvf + y_pu * min_pu_width + x_pu;
++        const unsigned int size_in_pus = (1 << log2_cb_size) >> s->ps.sps->log2_min_pu_size;
++
++        if (size_in_pus <= 1)
++            tab_mvf[0].pred_flag = PF_INTRA;
++        else
++        {
++            for (j = 0; j < size_in_pus; j++, tab_mvf += min_pu_width)
++                for (k = 0; k < size_in_pus; k++)
++                    tab_mvf[k].pred_flag = PF_INTRA;
++        }
++    }
++}
++
++static void intra_prediction_unit_default_value(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                                                const unsigned int x0, const unsigned int y0,
++                                                const unsigned int log2_cb_size)
++{
++    set_ipm(s, lc, x0, y0, log2_cb_size, INTRA_DC);
++}
++
++
 +/**
 + * 8.4.1
 + */
-+static int luma_intra_pred_mode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int x0, int y0, int pu_size,
-+                                int prev_intra_luma_pred_flag)
++static int luma_intra_pred_mode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                                int x0, int y0, int log2_pu_size,
++                                int prev_intra_luma_pred_flag,
++                                const unsigned int idx)
 +{
 +    int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
 +    int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
 +    int min_pu_width     = s->ps.sps->min_pu_width;
-+    int size_in_pus      = pu_size >> s->ps.sps->log2_min_pu_size;
 +    int x0b              = av_mod_uintp2(x0, s->ps.sps->log2_ctb_size);
 +    int y0b              = av_mod_uintp2(y0, s->ps.sps->log2_ctb_size);
 +
-+    int cand_up   = (lc->ctb_up_flag || y0b) ?
-+                    s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
-+    int cand_left = (lc->ctb_left_flag || x0b) ?
-+                    s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
-+
 +    int y_ctb = (y0 >> (s->ps.sps->log2_ctb_size)) << (s->ps.sps->log2_ctb_size);
 +
-+    MvField *tab_mvf = s->ref->tab_mvf;
-+    int intra_pred_mode;
-+    int candidate[3];
-+    int i, j;
-+
 +    // intra_pred_mode prediction does not cross vertical CTB boundaries
-+    if ((y0 - 1) < y_ctb)
-+        cand_up = INTRA_DC;
++    const unsigned int cand_up   = (lc->ctb_up_flag || y0b) && (y0 > y_ctb) ?
++                    s->tab_ipm[(y_pu - 1) * min_pu_width + x_pu] : INTRA_DC;
++    const unsigned int cand_left = (lc->ctb_left_flag || x0b) ?
++                    s->tab_ipm[y_pu * min_pu_width + x_pu - 1]   : INTRA_DC;
++
++    int intra_pred_mode;
++    int a, b, c;
 +
 +    if (cand_left == cand_up) {
 +        if (cand_left < 2) {
-+            candidate[0] = INTRA_PLANAR;
-+            candidate[1] = INTRA_DC;
-+            candidate[2] = INTRA_ANGULAR_26;
++            a = INTRA_PLANAR;
++            b = INTRA_DC;
++            c = INTRA_ANGULAR_26;
 +        } else {
-+            candidate[0] = cand_left;
-+            candidate[1] = 2 + ((cand_left - 2 - 1 + 32) & 31);
-+            candidate[2] = 2 + ((cand_left - 2 + 1) & 31);
++            a = cand_left;
++            b = 2 + ((cand_left - 2 - 1 + 32) & 31);
++            c = 2 + ((cand_left - 2 + 1) & 31);
 +        }
 +    } else {
-+        candidate[0] = cand_left;
-+        candidate[1] = cand_up;
-+        if (candidate[0] != INTRA_PLANAR && candidate[1] != INTRA_PLANAR) {
-+            candidate[2] = INTRA_PLANAR;
-+        } else if (candidate[0] != INTRA_DC && candidate[1] != INTRA_DC) {
-+            candidate[2] = INTRA_DC;
-+        } else {
-+            candidate[2] = INTRA_ANGULAR_26;
-+        }
++        a = cand_left;
++        b = cand_up;
++        c = (cand_left != INTRA_PLANAR && cand_up != INTRA_PLANAR) ?
++                INTRA_PLANAR :
++            (cand_left != INTRA_DC && cand_up != INTRA_DC) ?
++                INTRA_DC :
++                INTRA_ANGULAR_26;
 +    }
 +
 +    if (prev_intra_luma_pred_flag) {
-+        intra_pred_mode = candidate[lc->pu.mpm_idx];
++        intra_pred_mode = idx == 0 ? a : idx == 1 ? b : c;
 +    } else {
-+        if (candidate[0] > candidate[1])
-+            FFSWAP(uint8_t, candidate[0], candidate[1]);
-+        if (candidate[0] > candidate[2])
-+            FFSWAP(uint8_t, candidate[0], candidate[2]);
-+        if (candidate[1] > candidate[2])
-+            FFSWAP(uint8_t, candidate[1], candidate[2]);
++        // Sort lowest 1st
++        if (a > b)
++            FFSWAP(int, a, b);
++        if (a > c)
++            FFSWAP(int, a, c);
++        if (b > c)
++            FFSWAP(int, b, c);
 +
-+        intra_pred_mode = lc->pu.rem_intra_luma_pred_mode;
-+        for (i = 0; i < 3; i++)
-+            if (intra_pred_mode >= candidate[i])
-+                intra_pred_mode++;
++        intra_pred_mode = idx;
++        if (intra_pred_mode >= a)
++            intra_pred_mode++;
++        if (intra_pred_mode >= b)
++            intra_pred_mode++;
++        if (intra_pred_mode >= c)
++            intra_pred_mode++;
 +    }
 +
 +    /* write the intra prediction units into the mv array */
-+    if (!size_in_pus)
-+        size_in_pus = 1;
-+    for (i = 0; i < size_in_pus; i++) {
-+        small_memset(&s->tab_ipm[(y_pu + i) * min_pu_width + x_pu],
-+               intra_pred_mode, size_in_pus);
-+
-+        for (j = 0; j < size_in_pus; j++) {
-+            tab_mvf[(y_pu + j) * min_pu_width + x_pu + i].pred_flag = PF_INTRA;
-+        }
-+    }
 +
++    set_ipm(s, lc, x0, y0, log2_pu_size, intra_pred_mode);
 +    return intra_pred_mode;
 +}
 +
-+static av_always_inline void set_ct_depth(const HEVCRpiContext * const s, int x0, int y0,
-+                                          int log2_cb_size, int ct_depth)
-+{
-+    int length = (1 << log2_cb_size) >> s->ps.sps->log2_min_cb_size;
-+    int x_cb   = x0 >> s->ps.sps->log2_min_cb_size;
-+    int y_cb   = y0 >> s->ps.sps->log2_min_cb_size;
-+    int y;
-+
-+    for (y = 0; y < length; y++)
-+        small_memset(&s->tab_ct_depth[(y_cb + y) * s->ps.sps->min_cb_width + x_cb],
-+               ct_depth, length);
-+}
-+
 +static const uint8_t tab_mode_idx[] = {
 +     0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
 +    21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
 +
-+static void intra_prediction_unit(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, const int x0, const int y0,
-+                                  const int log2_cb_size)
++static void intra_prediction_unit(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                                  const unsigned int x0, const unsigned int y0,
++                                  const unsigned int log2_cb_size)
 +{
 +    static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
 +    uint8_t prev_intra_luma_pred_flag[4];
 +    int split   = lc->cu.part_mode == PART_NxN;
-+    int pb_size = (1 << log2_cb_size) >> split;
-+    int side    = split + 1;
++    const unsigned int split_size = (1 << (log2_cb_size - 1));
 +    int chroma_mode;
-+    int i, j;
++    const unsigned int n = split ? 4 : 1;
++    unsigned int i;
 +
-+    for (i = 0; i < side; i++)
-+        for (j = 0; j < side; j++)
-+            prev_intra_luma_pred_flag[2 * i + j] = ff_hevc_rpi_prev_intra_luma_pred_flag_decode(lc);
++    for (i = 0; i != n; i++)
++        prev_intra_luma_pred_flag[i] = ff_hevc_rpi_prev_intra_luma_pred_flag_decode(lc);
 +
-+    for (i = 0; i < side; i++) {
-+        for (j = 0; j < side; j++) {
-+            if (prev_intra_luma_pred_flag[2 * i + j])
-+                lc->pu.mpm_idx = ff_hevc_rpi_mpm_idx_decode(lc);
-+            else
-+                lc->pu.rem_intra_luma_pred_mode = ff_hevc_rpi_rem_intra_luma_pred_mode_decode(lc);
++    for (i = 0; i < n; i++) {
++        // depending on mode idx is mpm or luma_pred_mode
++        const unsigned int idx = prev_intra_luma_pred_flag[i] ?
++            ff_hevc_rpi_mpm_idx_decode(lc) :
++            ff_hevc_rpi_rem_intra_luma_pred_mode_decode(lc);
 +
-+            lc->pu.intra_pred_mode[2 * i + j] =
-+                luma_intra_pred_mode(s, lc, x0 + pb_size * j, y0 + pb_size * i, pb_size,
-+                                     prev_intra_luma_pred_flag[2 * i + j]);
-+        }
++        lc->pu.intra_pred_mode[i] =
++            luma_intra_pred_mode(s, lc,
++                                 x0 + ((i & 1) == 0 ? 0 : split_size),
++                                 y0 + ((i & 2) == 0 ? 0 : split_size),
++                                 log2_cb_size - split,
++                                 prev_intra_luma_pred_flag[i], idx);
 +    }
 +
 +    if (ctx_cfmt(s) == 3) {
-+        for (i = 0; i < side; i++) {
-+            for (j = 0; j < side; j++) {
-+                lc->pu.chroma_mode_c[2 * i + j] = chroma_mode = ff_hevc_rpi_intra_chroma_pred_mode_decode(lc);
-+                if (chroma_mode != 4) {
-+                    if (lc->pu.intra_pred_mode[2 * i + j] == intra_chroma_table[chroma_mode])
-+                        lc->pu.intra_pred_mode_c[2 * i + j] = 34;
-+                    else
-+                        lc->pu.intra_pred_mode_c[2 * i + j] = intra_chroma_table[chroma_mode];
-+                } else {
-+                    lc->pu.intra_pred_mode_c[2 * i + j] = lc->pu.intra_pred_mode[2 * i + j];
-+                }
++        for (i = 0; i < n; i++) {
++            lc->pu.chroma_mode_c[i] = chroma_mode = ff_hevc_rpi_intra_chroma_pred_mode_decode(lc);
++            if (chroma_mode != 4) {
++                if (lc->pu.intra_pred_mode[i] == intra_chroma_table[chroma_mode])
++                    lc->pu.intra_pred_mode_c[i] = 34;
++                else
++                    lc->pu.intra_pred_mode_c[i] = intra_chroma_table[chroma_mode];
++            } else {
++                lc->pu.intra_pred_mode_c[i] = lc->pu.intra_pred_mode[i];
 +            }
 +        }
 +    } else if (ctx_cfmt(s) == 2) {
@@ -46738,79 +23515,58 @@ index 0000000000..fac0dde400
 +    }
 +}
 +
-+static void intra_prediction_unit_default_value(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
-+                                                int x0, int y0,
-+                                                int log2_cb_size)
++static inline void set_skip(const HEVCRpiContext * const s, const unsigned int x_cb, const unsigned int y_cb, const unsigned int ln)
 +{
-+    int pb_size          = 1 << log2_cb_size;
-+    int size_in_pus      = pb_size >> s->ps.sps->log2_min_pu_size;
-+    int min_pu_width     = s->ps.sps->min_pu_width;
-+    MvField *tab_mvf     = s->ref->tab_mvf;
-+    int x_pu             = x0 >> s->ps.sps->log2_min_pu_size;
-+    int y_pu             = y0 >> s->ps.sps->log2_min_pu_size;
-+    int j, k;
-+
-+    if (size_in_pus == 0)
-+        size_in_pus = 1;
-+    for (j = 0; j < size_in_pus; j++)
-+        small_memset(&s->tab_ipm[(y_pu + j) * min_pu_width + x_pu], INTRA_DC, size_in_pus);
-+    if (lc->cu.pred_mode == MODE_INTRA)
-+        for (j = 0; j < size_in_pus; j++)
-+            for (k = 0; k < size_in_pus; k++)
-+                tab_mvf[(y_pu + j) * min_pu_width + x_pu + k].pred_flag = PF_INTRA;
++    const unsigned int stride = s->skip_flag_stride;
++    set_bits(s->skip_flag + y_cb * stride, x_cb, stride, ln);
 +}
 +
-+static int hls_coding_unit(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int x0, int y0, int log2_cb_size)
++static int hls_coding_unit(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                           const unsigned int x0, const unsigned int y0, const unsigned int log2_cb_size)
 +{
-+    int cb_size          = 1 << log2_cb_size;
-+    int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
-+    int length           = cb_size >> log2_min_cb_size;
-+    int min_cb_width     = s->ps.sps->min_cb_width;
-+    int x_cb             = x0 >> log2_min_cb_size;
-+    int y_cb             = y0 >> log2_min_cb_size;
-+    int idx              = log2_cb_size - 2;
-+    int qp_block_mask    = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
-+    int x, y, ret;
++    const unsigned int cb_size          = 1 << log2_cb_size;
++    const unsigned int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
++    const unsigned int min_cb_width     = s->ps.sps->min_cb_width;
++    const unsigned int x_cb             = x0 >> log2_min_cb_size;
++    const unsigned int y_cb             = y0 >> log2_min_cb_size;
++    const unsigned int idx              = log2_cb_size - 2;
++    const unsigned int qp_block_mask    = (1 << s->ps.pps->log2_min_cu_qp_delta_size) - 1;
++    int skip_flag = 0;
 +
 +    lc->cu.x                = x0;
 +    lc->cu.y                = y0;
++    lc->cu.x_split          = x0;
++    lc->cu.y_split          = y0;
++
 +    lc->cu.pred_mode        = MODE_INTRA;
 +    lc->cu.part_mode        = PART_2Nx2N;
 +    lc->cu.intra_split_flag = 0;
++    lc->cu.cu_transquant_bypass_flag = 0;
++    lc->pu.intra_pred_mode[0] = 1;
++    lc->pu.intra_pred_mode[1] = 1;
++    lc->pu.intra_pred_mode[2] = 1;
++    lc->pu.intra_pred_mode[3] = 1;
 +
-+    SAMPLE_CTB(s->skip_flag, x_cb, y_cb) = 0;
-+    for (x = 0; x < 4; x++)
-+        lc->pu.intra_pred_mode[x] = 1;
 +    if (s->ps.pps->transquant_bypass_enable_flag) {
 +        lc->cu.cu_transquant_bypass_flag = ff_hevc_rpi_cu_transquant_bypass_flag_decode(lc);
 +        if (lc->cu.cu_transquant_bypass_flag)
 +            set_deblocking_bypass(s, x0, y0, log2_cb_size);
-+    } else
-+        lc->cu.cu_transquant_bypass_flag = 0;
-+
-+    if (s->sh.slice_type != HEVC_SLICE_I) {
-+        uint8_t skip_flag = ff_hevc_rpi_skip_flag_decode(s, lc, x0, y0, x_cb, y_cb);
-+
-+        x = y_cb * min_cb_width + x_cb;
-+        for (y = 0; y < length; y++) {
-+            small_memset(&s->skip_flag[x], skip_flag, length);
-+            x += min_cb_width;
-+        }
-+        lc->cu.pred_mode = skip_flag ? MODE_SKIP : MODE_INTER;
-+    } else {
-+        x = y_cb * min_cb_width + x_cb;
-+        for (y = 0; y < length; y++) {
-+            small_memset(&s->skip_flag[x], 0, length);
-+            x += min_cb_width;
-+        }
 +    }
 +
-+    if (SAMPLE_CTB(s->skip_flag, x_cb, y_cb)) {
++    if (s->sh.slice_type != HEVC_SLICE_I) {
++        lc->cu.pred_mode = MODE_INTER;
++        skip_flag = ff_hevc_rpi_skip_flag_decode(s, lc, x0, y0, x_cb, y_cb);
++    }
++
++    if (skip_flag) {
++        set_skip(s, x_cb, y_cb, log2_cb_size - log2_min_cb_size);
++        lc->cu.pred_mode = MODE_SKIP;
++
 +        hls_prediction_unit(s, lc, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
 +        intra_prediction_unit_default_value(s, lc, x0, y0, log2_cb_size);
 +
 +        if (!s->sh.disable_deblocking_filter_flag)
-+            ff_hevc_rpi_deblocking_boundary_strengths(s, lc, x0, y0, log2_cb_size);
++            ff_hevc_rpi_deblocking_boundary_strengths(s, lc, x0, y0, log2_cb_size, 0);
 +    } else {
 +        int pcm_flag = 0;
 +
@@ -46824,21 +23580,19 @@ index 0000000000..fac0dde400
 +        }
 +
 +        if (lc->cu.pred_mode == MODE_INTRA) {
-+            if (lc->cu.part_mode == PART_2Nx2N && s->ps.sps->pcm_enabled_flag &&
++            if (lc->cu.part_mode == PART_2Nx2N &&
++                log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size &&  // 0 if not enabled
 +                log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
-+                log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size) {
-+                pcm_flag = ff_hevc_rpi_pcm_flag_decode(lc);
-+            }
-+            if (pcm_flag) {
++                ff_hevc_rpi_pcm_flag_decode(lc) != 0)
++            {
++                int ret;
++                pcm_flag = 1;
 +                intra_prediction_unit_default_value(s, lc, x0, y0, log2_cb_size);
-+                ret = hls_pcm_sample(s, lc, x0, y0, log2_cb_size);
-+                if (s->ps.sps->pcm.loop_filter_disable_flag)
-+                {
-+                    set_deblocking_bypass(s, x0, y0, log2_cb_size);
-+                }
-+
-+                if (ret < 0)
++                if ((ret = hls_pcm_sample(s, lc, x0, y0, log2_cb_size)) < 0)
 +                    return ret;
++
++                if (s->ps.sps->pcm.loop_filter_disable_flag)
++                    set_deblocking_bypass(s, x0, y0, log2_cb_size);
 +            } else {
 +                intra_prediction_unit(s, lc, x0, y0, log2_cb_size);
 +            }
@@ -46850,31 +23604,39 @@ index 0000000000..fac0dde400
 +                break;
 +            case PART_2NxN:
 +                hls_prediction_unit(s, lc, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
++                lc->cu.y_split = y0 + cb_size / 2;
 +                hls_prediction_unit(s, lc, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
 +                break;
 +            case PART_Nx2N:
 +                hls_prediction_unit(s, lc, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
++                lc->cu.x_split = x0 + cb_size / 2;
 +                hls_prediction_unit(s, lc, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
 +                break;
 +            case PART_2NxnU:
 +                hls_prediction_unit(s, lc, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
-+                hls_prediction_unit(s, lc, x0, y0 + cb_size / 4, cb_size, cb_size * 3 / 4, log2_cb_size, 1, idx);
++                lc->cu.y_split = y0 + cb_size / 4;
++                hls_prediction_unit(s, lc, x0, y0 + cb_size / 4, cb_size, cb_size / 4 * 3, log2_cb_size, 1, idx);
 +                break;
 +            case PART_2NxnD:
-+                hls_prediction_unit(s, lc, x0, y0,                   cb_size, cb_size * 3 / 4, log2_cb_size, 0, idx);
-+                hls_prediction_unit(s, lc, x0, y0 + cb_size * 3 / 4, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
++                hls_prediction_unit(s, lc, x0, y0,                   cb_size, cb_size / 4 * 3, log2_cb_size, 0, idx);
++                lc->cu.y_split = y0 + cb_size / 4 * 3;
++                hls_prediction_unit(s, lc, x0, y0 + cb_size / 4 * 3, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
 +                break;
 +            case PART_nLx2N:
 +                hls_prediction_unit(s, lc, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
++                lc->cu.x_split = x0 + cb_size / 4;
 +                hls_prediction_unit(s, lc, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
 +                break;
 +            case PART_nRx2N:
-+                hls_prediction_unit(s, lc, x0,                   y0, cb_size * 3 / 4, cb_size, log2_cb_size, 0, idx - 2);
-+                hls_prediction_unit(s, lc, x0 + cb_size * 3 / 4, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
++                hls_prediction_unit(s, lc, x0,                   y0, cb_size / 4 * 3, cb_size, log2_cb_size, 0, idx - 2);
++                lc->cu.x_split = x0 + cb_size / 4 * 3;
++                hls_prediction_unit(s, lc, x0 + cb_size / 4 * 3, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
 +                break;
 +            case PART_NxN:
 +                hls_prediction_unit(s, lc, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
++                lc->cu.x_split = x0 + cb_size / 2;
 +                hls_prediction_unit(s, lc, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
++                lc->cu.y_split = y0 + cb_size / 2;
 +                hls_prediction_unit(s, lc, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
 +                hls_prediction_unit(s, lc, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
 +                break;
@@ -46889,37 +23651,37 @@ index 0000000000..fac0dde400
 +                rqt_root_cbf = ff_hevc_rpi_no_residual_syntax_flag_decode(lc);
 +            }
 +            if (rqt_root_cbf) {
-+                const static int cbf[2] = { 0 };
++                const unsigned int cbf_c = ctx_cfmt(s) == 0 ? 0 : (CBF_CR0 | CBF_CB0);
++                int ret;
++
 +                lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
 +                                         s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
 +                                         s->ps.sps->max_transform_hierarchy_depth_inter;
++                // transform_tree does deblock_boundary_strengths
 +                ret = hls_transform_tree(s, lc, x0, y0, x0, y0, x0, y0,
 +                                         log2_cb_size,
-+                                         log2_cb_size, 0, 0, cbf, cbf);
++                                         log2_cb_size, 0, 0, cbf_c);
 +                if (ret < 0)
 +                    return ret;
 +            } else {
 +                if (!s->sh.disable_deblocking_filter_flag)
-+                    ff_hevc_rpi_deblocking_boundary_strengths(s, lc, x0, y0, log2_cb_size);
++                    ff_hevc_rpi_deblocking_boundary_strengths(s, lc, x0, y0, log2_cb_size, 0);
 +            }
 +        }
 +    }
 +
++    // ?? We do a set where we read the delta too ??
 +    if (s->ps.pps->cu_qp_delta_enabled_flag && lc->tu.is_cu_qp_delta_coded == 0)
-+        ff_hevc_rpi_set_qPy(s, lc, x0, y0, log2_cb_size);
-+
-+    x = y_cb * min_cb_width + x_cb;
-+    for (y = 0; y < length; y++) {
-+        small_memset(&s->qp_y_tab[x], lc->qp_y, length);
-+        x += min_cb_width;
-+    }
++        ff_hevc_rpi_set_qPy(s, lc, x0, y0);
 +
 +    if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
 +       ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
 +        lc->qPy_pred = lc->qp_y;
 +    }
 +
-+    set_ct_depth(s, x0, y0, log2_cb_size, lc->ct_depth);
++    set_bytes(s->qp_y_tab + y_cb * min_cb_width + x_cb, min_cb_width, log2_cb_size - log2_min_cb_size, lc->qp_y & 0xff);
++
++    set_bytes(s->tab_ct_depth + y_cb * min_cb_width + x_cb, min_cb_width, log2_cb_size - log2_min_cb_size, lc->ct_depth);
 +
 +    return 0;
 +}
@@ -46929,7 +23691,7 @@ index 0000000000..fac0dde400
 +//  0    More data wanted
 +//  1    EoSlice / EoPicture
 +static int hls_coding_quadtree(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, const int x0, const int y0,
-+                               const int log2_cb_size, const int cb_depth)
++                               const int log2_cb_size, const unsigned int cb_depth)
 +{
 +    const int cb_size    = 1 << log2_cb_size;
 +    int ret;
@@ -46944,18 +23706,19 @@ index 0000000000..fac0dde400
 +        split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
 +    }
 +    if (s->ps.pps->cu_qp_delta_enabled_flag &&
-+        log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth) {
++        log2_cb_size >= s->ps.pps->log2_min_cu_qp_delta_size) {
 +        lc->tu.is_cu_qp_delta_coded = 0;
 +        lc->tu.cu_qp_delta          = 0;
 +    }
 +
-+    lc->tu.is_cu_chroma_qp_offset_coded = !(s->sh.cu_chroma_qp_offset_enabled_flag &&
-+        log2_cb_size >= s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_chroma_qp_offset_depth);
-+    lc->tu.cu_qp_offset_cb = 0;
-+    lc->tu.cu_qp_offset_cr = 0;
++    lc->tu.cu_chroma_qp_offset_wanted = s->sh.cu_chroma_qp_offset_enabled_flag &&
++        log2_cb_size >= s->ps.pps->log2_min_cu_qp_delta_size;
++    lc->tu.qp_divmod6[0] = s->ps.pps->qp_bd_x[0];
++    lc->tu.qp_divmod6[1] = s->ps.pps->qp_bd_x[1] + s->sh.slice_cb_qp_offset;
++    lc->tu.qp_divmod6[2] = s->ps.pps->qp_bd_x[2] + s->sh.slice_cr_qp_offset;
 +
 +    if (split_cu) {
-+        int qp_block_mask = (1<<(s->ps.sps->log2_ctb_size - s->ps.pps->diff_cu_qp_delta_depth)) - 1;
++        int qp_block_mask = (1 << s->ps.pps->log2_min_cu_qp_delta_size) - 1;
 +        const int cb_size_split = cb_size >> 1;
 +        const int x1 = x0 + cb_size_split;
 +        const int y1 = y0 + cb_size_split;
@@ -47002,7 +23765,7 @@ index 0000000000..fac0dde400
 +            (!((y0 + cb_size) %
 +               (1 << (s->ps.sps->log2_ctb_size))) ||
 +             (y0 + cb_size >= s->ps.sps->height))) {
-+            int end_of_slice_flag = ff_hevc_rpi_end_of_slice_flag_decode(lc);
++            int end_of_slice_flag = ff_hevc_rpi_get_cabac_terminate(&lc->cc);
 +            return !end_of_slice_flag;
 +        } else {
 +            return 1;
@@ -47050,46 +23813,15 @@ index 0000000000..fac0dde400
 +}
 +
 +
-+static void rpi_execute_dblk_cmds(HEVCRpiContext * const s, HEVCRpiJob * const jb)
++static void rpi_execute_dblk_cmds(const HEVCRpiContext * const s, HEVCRpiJob * const jb)
 +{
-+    const unsigned int ctb_size = 1 << s->ps.sps->log2_ctb_size;
-+    const unsigned int x0 = FFMAX(jb->bounds.x, ctb_size) - ctb_size;
-+    const unsigned int y0 = FFMAX(jb->bounds.y, ctb_size) - ctb_size;
-+    const unsigned int bound_r = jb->bounds.x + jb->bounds.w;
-+    const unsigned int bound_b = jb->bounds.y + jb->bounds.h;
-+    const int x_end = (bound_r >= s->ps.sps->width);
-+    const int y_end = (bound_b >= s->ps.sps->height);
-+    const unsigned int xr = bound_r - (x_end ? 0 : ctb_size);
-+    const unsigned int yb = bound_b - (y_end ? 0 : ctb_size);
-+    unsigned int x, y;
-+
-+    for (y = y0; y < yb; y += ctb_size ) {
-+        for (x = x0; x < xr; x += ctb_size ) {
-+            ff_hevc_rpi_hls_filter(s, x, y, ctb_size);
-+        }
-+    }
-+
-+    // Flush (SAO)
-+    if (y > y0) {
-+        const unsigned int xl = x0 > ctb_size ? x0 - ctb_size : 0;
-+        const unsigned int yt = y0 > ctb_size ? y0 - ctb_size : 0;
-+        const unsigned int yb = (s->ps.pps->ctb_ts_flags[jb->ctu_ts_last] & CTB_TS_FLAGS_EOT) != 0 ?
-+            bound_b : y - ctb_size;
-+
-+        if (yb > yt && bound_r > xl)
-+        {
-+            rpi_cache_buf_t cbuf;
-+            rpi_cache_flush_env_t * const rfe = rpi_cache_flush_init(&cbuf);
-+            rpi_cache_flush_add_frame_block(rfe, s->frame, RPI_CACHE_FLUSH_MODE_WB_INVALIDATE,
-+              xl, yt, bound_r - xl, yb - yt,
-+              ctx_vshift(s, 1), 1, 1);
-+            rpi_cache_flush_finish(rfe);
-+        }
-+    }
++    int y = ff_hevc_rpi_hls_filter_blk(s, jb->bounds,
++        (s->ps.pps->ctb_ts_flags[jb->ctu_ts_last] & CTB_TS_FLAGS_EOT) != 0);
 +
 +    // Signal
-+    if (s->threads_type == FF_THREAD_FRAME && x_end && y0 > 0) {
-+        ff_hevc_rpi_progress_signal_recon(s, y_end ? INT_MAX : y0 - 1);
++    if (s->threads_type == FF_THREAD_FRAME && y > 0) {
++        // Cast away const as progress is held in s, but this really shouldn't confuse anything
++        ff_hevc_rpi_progress_signal_recon((HEVCRpiContext *)s, y - 1);
 +    }
 +
 +    // Job done now
@@ -47097,15 +23829,19 @@ index 0000000000..fac0dde400
 +    job_free(s->jbc, jb);
 +}
 +
-+
 +// I-pred, transform_and_add for all blocks types done here
 +// All ARM
-+static void rpi_execute_pred_cmds(HEVCRpiContext * const s, HEVCRpiJob * const jb)
++static void rpi_execute_pred_cmds(const HEVCRpiContext * const s, HEVCRpiJob * const jb)
 +{
 +    unsigned int i;
 +    HEVCRpiIntraPredEnv * const iap = &jb->intra;
 +    const HEVCPredCmd *cmd = iap->cmds;
 +
++#if !RPI_WORKER_WAIT_PASS_0
++    rpi_sem_wait(&jb->sem);
++    rpi_cache_flush_execute(jb->rfe);  // Invalidate data set up in pass1
++#endif
++
 +    for (i = iap->n; i > 0; i--, cmd++)
 +    {
 +        switch (cmd->type)
@@ -47401,17 +24137,15 @@ index 0000000000..fac0dde400
 +}
 +#endif
 +
-+
 +// Core execution tasks
-+static void worker_core(HEVCRpiContext * const s0, HEVCRpiJob * const jb)
++static void worker_core(const HEVCRpiContext * const s, HEVCRpiJob * const jb)
 +{
-+    const HEVCRpiContext * const s = s0;
-+    vpu_qpu_wait_h sync_y;
 +    int pred_y, pred_c;
 +    vpu_qpu_job_env_t qvbuf;
 +    const vpu_qpu_job_h vqj = vpu_qpu_job_init(&qvbuf);
-+    rpi_cache_buf_t cbuf;
-+    rpi_cache_flush_env_t * const rfe = rpi_cache_flush_init(&cbuf);
++#if RPI_WORKER_WAIT_PASS_0
++    int do_wait;
++#endif
 +
 +    {
 +        const HEVCRpiCoeffsEnv * const cf = &jb->coeffs;
@@ -47444,21 +24178,27 @@ index 0000000000..fac0dde400
 +                n32,
 +                0);
 +
-+            rpi_cache_flush_add_gm_range(rfe, &cf->gptr, RPI_CACHE_FLUSH_MODE_WB_INVALIDATE, 0, cf->s[2].n * csize);
-+            rpi_cache_flush_add_gm_range(rfe, &cf->gptr, RPI_CACHE_FLUSH_MODE_WB_INVALIDATE, offset32, cf->s[3].n * csize);
++            rpi_cache_flush_add_gm_range(jb->rfe, &cf->gptr, RPI_CACHE_FLUSH_MODE_WB_INVALIDATE, 0, cf->s[2].n * csize);
++            rpi_cache_flush_add_gm_range(jb->rfe, &cf->gptr, RPI_CACHE_FLUSH_MODE_WB_INVALIDATE, offset32, cf->s[3].n * csize);
 +        }
 +    }
 +
-+    pred_c = mc_terminate_add_c(s, vqj, rfe, &jb->chroma_ip);
++    pred_c = mc_terminate_add_c(s, vqj, jb->rfe, &jb->chroma_ip);
 +
 +// We could take a sync here and try to locally overlap QPU processing with ARM
 +// but testing showed a slightly negative benefit with noticable extra complexity
 +
-+    pred_y = mc_terminate_add_y(s, vqj, rfe, &jb->luma_ip);
++    pred_y = mc_terminate_add_y(s, vqj, jb->rfe, &jb->luma_ip);
 +
-+    vpu_qpu_job_add_sync_this(vqj, &sync_y);
++    // Returns 0 if nothing to do, 1 if sync added
++#if RPI_WORKER_WAIT_PASS_0
++    do_wait = vpu_qpu_job_add_sync_sem(vqj, &jb->sem);
++#else
++    if (vpu_qpu_job_add_sync_sem(vqj, &jb->sem) == 0)
++        sem_post(&jb->sem);
++#endif
 +
-+    rpi_cache_flush_execute(rfe);
++    rpi_cache_flush_execute(jb->rfe);
 +
 +    // Await progress as required
 +    // jb->waited will only be clear if we have already tested the progress values
@@ -47478,7 +24218,7 @@ index 0000000000..fac0dde400
 +    // We always work on a rectangular block
 +    if (pred_y || pred_c)
 +    {
-+        rpi_cache_flush_add_frame_block(rfe, s->frame, RPI_CACHE_FLUSH_MODE_INVALIDATE,
++        rpi_cache_flush_add_frame_block(jb->rfe, s->frame, RPI_CACHE_FLUSH_MODE_INVALIDATE,
 +                                        jb->bounds.x, jb->bounds.y, jb->bounds.w, jb->bounds.h,
 +                                        ctx_vshift(s, 1), pred_y, pred_c);
 +    }
@@ -47507,12 +24247,11 @@ index 0000000000..fac0dde400
 +    }
 +#endif
 +
-+    // Wait for transform completion
-+    // ? Could/should be moved to next pass which would let us add more jobs
-+    //   to the VPU Q on this thread but when I tried that it all went a bit slower
-+    vpu_qpu_wait(&sync_y);
-+
-+    rpi_cache_flush_finish(rfe);
++#if RPI_WORKER_WAIT_PASS_0
++    if (do_wait)
++        rpi_sem_wait(&jb->sem);
++    rpi_cache_flush_execute(jb->rfe);
++#endif
 +}
 +
 +
@@ -47526,6 +24265,8 @@ index 0000000000..fac0dde400
 +{
 +    HEVCRpiJob * const jb = av_mallocz(sizeof(HEVCRpiJob));
 +
++    sem_init(&jb->sem, 0, 0);
++    jb->rfe = rpi_cache_flush_init(&jb->flush_buf);
 +    ff_hevc_rpi_progress_init_wait(&jb->progress_wait);
 +
 +    jb->intra.n = 0;
@@ -47559,6 +24300,8 @@ index 0000000000..fac0dde400
 +    av_freep(&jb->intra.cmds);
 +    rpi_free_inter_pred(&jb->chroma_ip);
 +    rpi_free_inter_pred(&jb->luma_ip);
++    rpi_cache_flush_finish(jb->rfe);  // Not really needed - should do nothing
++    sem_destroy(&jb->sem);
 +    av_free(jb);
 +}
 +
@@ -47835,8 +24578,8 @@ index 0000000000..fac0dde400
 +        if (more_data && ((ctb_flags & CTB_TS_FLAGS_EOT) != 0 ||
 +             (s->ps.pps->entropy_coding_sync_enabled_flag && (ctb_flags & CTB_TS_FLAGS_EOTL) != 0)))
 +        {
-+            if (get_cabac_terminate(&lc->cc) < 0 ||
-+                skip_bytes(&lc->cc, 0) == NULL)
++            if (ff_hevc_rpi_get_cabac_terminate(&lc->cc) < 0 ||
++                ff_hevc_rpi_cabac_skip_bytes(&lc->cc, 0) == NULL)
 +            {
 +                av_log(s->avctx, AV_LOG_ERROR, "Error reading terminate el\n ");
 +                return -1;
@@ -48004,6 +24747,7 @@ index 0000000000..fac0dde400
 +#endif
 +            sem_post(lc->bt_psem_out);
 +        }
++        // The wait for loop_n == 0 has been done in bit_thread
 +        if (!is_first && loop_n != 0)
 +        {
 +#if TRACE_WPP
@@ -48236,6 +24980,18 @@ index 0000000000..fac0dde400
 +#endif
 +
 +
++// If we are at EoT and the row is shorter than the number of jobs
++// we can Q we have to wait for it finish otherwise we risk cache/QPU
++// disasters
++static inline int tile_needs_wait(const HEVCRpiContext * const s, const int n)
++{
++    return
++        s->ps.pps->tile_wpp_inter_disable >= 2 &&
++        s->sh.slice_type != HEVC_SLICE_I &&
++        n >= 0 &&
++        (s->ps.pps->ctb_ts_flags[n] & (CTB_TS_FLAGS_EOT | CTB_TS_FLAGS_EOL)) == CTB_TS_FLAGS_EOT;
++}
++
 +static int rpi_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
 +{
 +    HEVCRpiContext * const s  = avctxt->priv_data;
@@ -48249,6 +25005,7 @@ index 0000000000..fac0dde400
 +#if RPI_EXTRA_BIT_THREADS > 0
 +
 +    if (s->sh.num_entry_point_offsets != 0 &&
++        (!s->ps.pps->tile_wpp_inter_disable || s->sh.slice_type == HEVC_SLICE_I) &&
 +        s->ps.pps->num_tile_columns > 1)
 +    {
 +        unsigned int slice_row = 0;
@@ -48347,6 +25104,10 @@ index 0000000000..fac0dde400
 +                goto fail;
 +
 +            worker_submit_job(s, lc);
++
++            if (tile_needs_wait(s, lc->ts - 1))
++                worker_wait(s, lc);
++
 +        } while (!lc->unit_done);
 +
 +#if TRACE_WPP
@@ -48354,10 +25115,10 @@ index 0000000000..fac0dde400
 +#endif
 +    }
 +
-+    // If we have reached the end of the frame then wait for the worker to finish all its jobs
-+    if (lc->ts >= s->ps.sps->ctb_size) {
++    // If we have reached the end of the frame or
++    // then wait for the worker to finish all its jobs
++    if (lc->ts >= s->ps.sps->ctb_size)
 +        worker_wait(s, lc);
-+    }
 +
 +#if RPI_TSTATS
 +    {
@@ -48536,10 +25297,10 @@ index 0000000000..fac0dde400
 +                           ((s->ps.sps->height >> s->ps.sps->log2_min_cb_size) + 1);
 +    int ret;
 +
-+    memset(s->horizontal_bs, 0, s->bs_width * s->bs_height);
-+    memset(s->vertical_bs,   0, s->bs_width * s->bs_height);
-+    memset(s->cbf_luma,      0, s->ps.sps->min_tb_width * s->ps.sps->min_tb_height);
-+    memset(s->is_pcm,        0, (s->ps.sps->min_pu_width + 1) * (s->ps.sps->min_pu_height + 1));
++    memset(s->horizontal_bs, 0, s->bs_size);
++    memset(s->vertical_bs2, 0, s->bs_size);
++    memset(s->is_pcm,        0, s->ps.sps->pcm_width * s->ps.sps->pcm_height);
++    memset(s->skip_flag,     0, s->ps.sps->min_cb_height * s->skip_flag_stride);
 +    memset(s->tab_slice_address, -1, pic_size_in_ctb * sizeof(*s->tab_slice_address));
 +
 +    s->is_decoded        = 0;
@@ -49114,8 +25875,6 @@ index 0000000000..fac0dde400
 +    if (!s->sei.picture_hash.md5_ctx)
 +        goto fail;
 +
-+    ff_bswapdsp_init(&s->bdsp);
-+
 +    s->context_initialized = 1;
 +    s->eos = 0;
 +
@@ -49325,6 +26084,12 @@ index 0000000000..fac0dde400
 +    AV_PIX_FMT_NONE
 +};
 +
++//static const AVCodecHWConfigInternal *hevc_rpi_hw_configs[] = {
++//    HW_CONFIG_INTERNAL(HEVC_RPI),
++//    NULL
++//};
++
++
 +AVCodec ff_hevc_rpi_decoder = {
 +    .name                  = "hevc_rpi",
 +    .long_name             = NULL_IF_CONFIG_SMALL("HEVC (rpi)"),
@@ -49339,6 +26104,7 @@ index 0000000000..fac0dde400
 +    .update_thread_context = hevc_update_thread_context,
 +    .init_thread_copy      = hevc_init_thread_copy,
 +    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
++//                             AV_CODEC_CAP_HARDWARE |
 +#if 0
 +    // Debugging is often easier without threads getting in the way
 +                            0,
@@ -49350,14 +26116,16 @@ index 0000000000..fac0dde400
 +    .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_EXPORTS_CROPPING,
 +    .pix_fmts              = hevc_rpi_pix_fmts,
 +    .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
++//    .hw_configs            = hevc_rpi_hw_configs,
++//    .wrapper_name          = "hevc_rpi",
 +};
 +
 diff --git a/libavcodec/rpi_hevcdec.h b/libavcodec/rpi_hevcdec.h
 new file mode 100644
-index 0000000000..59d7be4fe8
+index 0000000000..117432de0a
 --- /dev/null
 +++ b/libavcodec/rpi_hevcdec.h
-@@ -0,0 +1,1066 @@
+@@ -0,0 +1,985 @@
 +/*
 + * HEVC video decoder
 + *
@@ -49452,19 +26220,24 @@ index 0000000000..59d7be4fe8
 +// Number of separate threads/passes in worker
 +// 2 and 3 are the currently valid numbers
 +// At the moment 3 seems fractionally faster
-+//#define RPI_PASSES 2
++//#define RPI_PASSES              2
 +#define RPI_PASSES              3
 +
 +// Print out various usage stats
 +#define RPI_TSTATS              0
 +
 +// Define RPI_COMPRESS_COEFFS to 1 to send coefficients in compressed form
-+#define RPI_COMPRESS_COEFFS 1
-+// Define RPI_DEBLOCK_VPU to perform deblocking on the VPUs
-+// (currently slower than deblocking on the ARM)
-+// #define RPI_DEBLOCK_VPU
++#define RPI_COMPRESS_COEFFS     1
 +
-+#define RPI_VPU_DEBLOCK_CACHED 0
++// Wait for VPU/QPU to finish in worker pass 0
++// If 0 then the wait is in pass 1
++//
++// One might expect the better place to wait would be in pass 1 however
++// testing shows that pass 0 produces overall faster decode.
++// Interestingly it is QPU/VPU limited streams that seem to suffer
++// from pass 1 waits, CPU limited ones tend to show a very mild gain.
++// This define exists so it is easy to test this.
++#define RPI_WORKER_WAIT_PASS_0  1
 +
 +// Use ARM emulation of QPU pred
 +// These are for debug only as the emulation makes only limited
@@ -49656,9 +26429,11 @@ index 0000000000..59d7be4fe8
 +    RefPicList refPicList[2];
 +} RefPicListTab;
 +
-+typedef struct CodingUnit {
-+    int x;
-+    int y;
++typedef struct RpiCodingUnit {
++    unsigned int x;             // Passed to deblock
++    unsigned int y;
++    unsigned int x_split;
++    unsigned int y_split;
 +
 +    enum PredMode pred_mode;    ///< PredMode
 +    enum PartMode part_mode;    ///< PartMode
@@ -49667,7 +26442,7 @@ index 0000000000..59d7be4fe8
 +    uint8_t intra_split_flag;   ///< IntraSplitFlag
 +    uint8_t max_trafo_depth;    ///< MaxTrafoDepth
 +    uint8_t cu_transquant_bypass_flag;
-+} CodingUnit;
++} RpiCodingUnit;
 +
 +typedef struct RpiNeighbourAvailable {
 +    char cand_bottom_left;
@@ -49677,30 +26452,27 @@ index 0000000000..59d7be4fe8
 +    char cand_up_right;
 +} RpiNeighbourAvailable;
 +
-+typedef struct PredictionUnit {
-+    int mpm_idx;
-+    int rem_intra_luma_pred_mode;
++typedef struct RpiPredictionUnit {
 +    uint8_t intra_pred_mode[4];
-+    Mv mvd;
-+    uint8_t merge_flag;
 +    uint8_t intra_pred_mode_c[4];
 +    uint8_t chroma_mode_c[4];
-+} PredictionUnit;
++    Mv mvd;
++    uint8_t merge_flag;
++} RpiPredictionUnit;
 +
 +typedef struct TransformUnit {
-+    int cu_qp_delta;
-+
-+    int res_scale_val;
++    int8_t cu_qp_delta;
++    int8_t res_scale_val;
 +
 +    // Inferred parameters;
-+    int intra_pred_mode;
-+    int intra_pred_mode_c;
-+    int chroma_mode_c;
++    uint8_t intra_pred_mode;
++    uint8_t intra_pred_mode_c;
++    uint8_t chroma_mode_c;
 +    uint8_t is_cu_qp_delta_coded;
-+    uint8_t is_cu_chroma_qp_offset_coded;
-+    int8_t  cu_qp_offset_cb;
-+    int8_t  cu_qp_offset_cr;
++    uint8_t cu_chroma_qp_offset_wanted;
 +    uint8_t cross_pf;
++
++    const int8_t * qp_divmod6[3];
 +} TransformUnit;
 +
 +typedef struct DBParams {
@@ -49788,6 +26560,7 @@ index 0000000000..59d7be4fe8
 +    uint8_t stat_coeff[4];
 +    GetBitContext gb;
 +
++    uint8_t ct_depth;
 +    int8_t qp_y;
 +    int8_t curr_qp_y;
 +    int8_t qPy_pred;
@@ -49799,9 +26572,8 @@ index 0000000000..59d7be4fe8
 +    int     end_of_ctb_x;
 +    int     end_of_ctb_y;
 +
-+    int ct_depth;
-+    CodingUnit cu;
-+    PredictionUnit pu;
++    RpiCodingUnit cu;
++    RpiPredictionUnit pu;
 +
 +#define BOUNDARY_LEFT_SLICE     (1 << 0)
 +#define BOUNDARY_LEFT_TILE      (1 << 1)
@@ -49819,18 +26591,12 @@ index 0000000000..59d7be4fe8
 +
 +} HEVCRpiLocalContext;
 +
-+
 +// Each block can have an intra prediction and an add_residual command
 +// noof-cmds(2) * max-ctu height(64) / min-transform(4) * planes(3) * MAX_WIDTH
 +
 +// Sand only has 2 planes (Y/C)
 +#define RPI_MAX_PRED_CMDS (2*(HEVC_MAX_CTB_SIZE/4)*2*(HEVC_RPI_MAX_WIDTH/4))
 +
-+#ifdef RPI_DEBLOCK_VPU
-+// Worst case is 16x16 CTUs
-+#define RPI_MAX_DEBLOCK_CMDS (HEVC_RPI_MAX_WIDTH*4/16)
-+#endif
-+
 +// Command for intra prediction and transform_add of predictions to coefficients
 +enum rpi_pred_cmd_e
 +{
@@ -49958,6 +26724,7 @@ index 0000000000..59d7be4fe8
 +
 +    struct qpu_mc_pred_y_p_s * last_y8_p;
 +    struct qpu_mc_src_s * last_y8_l1;
++    rpi_cache_flush_env_t * rfe;
 +
 +    HEVCRpiInterPredEnv chroma_ip;
 +    HEVCRpiInterPredEnv luma_ip;
@@ -49965,11 +26732,13 @@ index 0000000000..59d7be4fe8
 +    HEVCRpiIntraPredEnv intra;
 +    HEVCRpiCoeffsEnv coeffs;
 +    HEVCRpiFrameProgressWait progress_wait;
++    sem_t sem;
++    rpi_cache_buf_t flush_buf;
 +} HEVCRpiJob;
 +
 +struct HEVCRpiContext;
 +
-+typedef void HEVCRpiWorkerFn(struct HEVCRpiContext * const s, HEVCRpiJob * const jb);
++typedef void HEVCRpiWorkerFn(const struct HEVCRpiContext * const s, HEVCRpiJob * const jb);
 +
 +typedef struct HEVCRpiPassQueue
 +{
@@ -50056,6 +26825,16 @@ index 0000000000..59d7be4fe8
 +    uint8_t             threads_type;
 +    uint8_t             threads_number;
 +
++    /** 1 if the independent slice segment header was successfully parsed */
++    uint8_t slice_initialized;
++
++    /**
++     * Sequence counters for decoded and output frames, so that old
++     * frames are output first after a POC reset
++     */
++    uint16_t seq_decode;
++    uint16_t seq_output;
++
 +    int                 width;
 +    int                 height;
 +
@@ -50075,49 +26854,8 @@ index 0000000000..59d7be4fe8
 +
 +    HEVCRpiFrameProgressState progress_states[2];
 +
-+#ifdef RPI_DEBLOCK_VPU
-+// With the new scheme of rpi_execute_dblk_cmds 
-+// it looks like ff_hevc_rpi_hls_filter is no longer called in raster order.
-+// This causes trouble if RPI_DEBLOCK_VPU_Q_COUNT > 1 because we prepare setup
-+// data for more than one row at a time before triggering the deblocker for one row.
-+// This means that the deblock of the final row can use the wrong setup buffer.
-+// 
-+// Also concerned that the thread progress and waiting for job completion is
-+// not done correctly with RPI_DEBLOCK_VPU at the end of the frame, or for small CTU sizes.
-+#define RPI_DEBLOCK_VPU_Q_COUNT 1
-+
-+    int enable_rpi_deblock;
-+
-+    int uv_setup_width;
-+    int uv_setup_height;
-+    int setup_width; // Number of 16x16 blocks across the image
-+    int setup_height; // Number of 16x16 blocks down the image
-+
-+    struct dblk_vpu_q_s
-+    {
-+        GPU_MEM_PTR_T deblock_vpu_gmem;
-+
-+        uint8_t (*y_setup_arm)[2][2][2][4];
-+        uint8_t (*y_setup_vc)[2][2][2][4];
-+
-+        uint8_t (*uv_setup_arm)[2][2][2][4];
-+        uint8_t (*uv_setup_vc)[2][2][2][4];
-+
-+        int (*vpu_cmds_arm)[6]; // r0-r5 for each command
-+        int vpu_cmds_vc;
-+
-+        vpu_qpu_wait_h cmd_id;
-+    } dvq_ents[RPI_DEBLOCK_VPU_Q_COUNT];
-+
-+    struct dblk_vpu_q_s * dvq;
-+    unsigned int dvq_n;
-+#endif
-+
 +    HEVCRpiCabacState *cabac_save;
 +
-+    /** 1 if the independent slice segment header was successfully parsed */
-+    uint8_t slice_initialized;
-+
 +    AVFrame *frame;
 +    AVFrame *output_frame;
 +    uint8_t *sao_pixel_buffer_h[3];
@@ -50132,7 +26870,7 @@ index 0000000000..59d7be4fe8
 +    RefPicList rps[5];
 +
 +    RpiSliceHeader sh;
-+    SAOParams *sao;
++    RpiSAOParams *sao;
 +    DBParams *deblock;
 +    enum HEVCNALUnitType nal_unit_type;
 +    int temporal_id;  ///< temporal_id_plus1 - 1
@@ -50144,29 +26882,29 @@ index 0000000000..59d7be4fe8
 +    int eos;       ///< current packet contains an EOS/EOB NAL
 +    int last_eos;  ///< last packet contains an EOS/EOB NAL
 +    int max_ra;
-+    int bs_width;
-+    int bs_height;
++    unsigned int hbs_stride;
++    unsigned int bs_size;
 +
 +    int is_decoded;
 +    int no_rasl_output_flag;
 +
 +    HEVCPredContext hpc;
 +    HEVCDSPContext hevcdsp;
-+    VideoDSPContext vdsp;
-+    BswapDSPContext bdsp;
 +    int8_t *qp_y_tab;
 +    uint8_t *horizontal_bs;
-+    uint8_t *vertical_bs;
++    uint8_t *vertical_bs2;
++    uint8_t *bsf_stash_up;
++    uint8_t *bsf_stash_left;
 +
 +    int32_t *tab_slice_address;
 +
 +    //  CU
++    unsigned int skip_flag_stride;
 +    uint8_t *skip_flag;
 +    uint8_t *tab_ct_depth;
 +    // PU
 +    uint8_t *tab_ipm;
 +
-+    uint8_t *cbf_luma; // cbf_luma of colocated TU
 +    uint8_t *is_pcm;
 +
 +    // CTB-level flags affecting loop filter operation
@@ -50176,13 +26914,6 @@ index 0000000000..59d7be4fe8
 +    uint8_t *checksum_buf;
 +    int      checksum_buf_size;
 +
-+    /**
-+     * Sequence counters for decoded and output frames, so that old
-+     * frames are output first after a POC reset
-+     */
-+    uint16_t seq_decode;
-+    uint16_t seq_output;
-+
 +    atomic_int wpp_err;
 +
 +    const uint8_t *data;
@@ -50238,39 +26969,6 @@ index 0000000000..59d7be4fe8
 + */
 +int ff_hevc_rpi_slice_rpl(HEVCRpiContext *s);
 +
-+void ff_hevc_rpi_save_states(HEVCRpiContext *s, const HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_cabac_init_decoder(HEVCRpiLocalContext * const lc);
-+void ff_hevc_rpi_cabac_init(const HEVCRpiContext * const s, HEVCRpiLocalContext *const lc, const unsigned int ctb_flags);
-+int ff_hevc_rpi_sao_merge_flag_decode(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_sao_type_idx_decode(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_sao_band_position_decode(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_sao_offset_abs_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_sao_offset_sign_decode(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_sao_eo_class_decode(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_end_of_slice_flag_decode(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_cu_transquant_bypass_flag_decode(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_skip_flag_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
-+                             const int x0, const int y0, const int x_cb, const int y_cb);
-+int ff_hevc_rpi_pred_mode_decode(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_split_coding_unit_flag_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, const int ct_depth,
-+                                          const int x0, const int y0);
-+int ff_hevc_rpi_part_mode_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, const int log2_cb_size);
-+int ff_hevc_rpi_pcm_flag_decode(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_prev_intra_luma_pred_flag_decode(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_mpm_idx_decode(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_rem_intra_luma_pred_mode_decode(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_intra_chroma_pred_mode_decode(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_merge_idx_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_merge_flag_decode(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_inter_pred_idc_decode(HEVCRpiLocalContext * const lc, int nPbW, int nPbH);
-+int ff_hevc_rpi_ref_idx_lx_decode(HEVCRpiLocalContext * const lc, const int num_ref_idx_lx);
-+int ff_hevc_rpi_mvp_lx_flag_decode(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_no_residual_syntax_flag_decode(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_split_transform_flag_decode(HEVCRpiLocalContext * const lc, const int log2_trafo_size);
-+int ff_hevc_rpi_cbf_cb_cr_decode(HEVCRpiLocalContext * const lc, const int trafo_depth);
-+int ff_hevc_rpi_cbf_luma_decode(HEVCRpiLocalContext * const lc, const int trafo_depth);
-+int ff_hevc_rpi_log2_res_scale_abs(HEVCRpiLocalContext * const lc, const int idx);
-+int ff_hevc_rpi_res_scale_sign_flag(HEVCRpiLocalContext *const lc, const int idx);
 +
 +/**
 + * Get the number of candidate references for the current frame.
@@ -50298,22 +26996,11 @@ index 0000000000..59d7be4fe8
 +                              int nPbH, int log2_cb_size, int part_idx,
 +                              int merge_idx, MvField * const mv,
 +                              int mvp_lx_flag, int LX);
-+void ff_hevc_rpi_set_qPy(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int xBase, int yBase, int log2_cb_size);
-+void ff_hevc_rpi_deblocking_boundary_strengths(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int x0, int y0,
-+                                           int log2_trafo_size);
-+int ff_hevc_rpi_cu_qp_delta_sign_flag(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_cu_qp_delta_abs(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_cu_chroma_qp_offset_flag(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_cu_chroma_qp_offset_idx(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc);
-+void ff_hevc_rpi_hls_filter(HEVCRpiContext * const s, const int x, const int y, const int ctb_size);
-+void ff_hevc_rpi_hls_filters(HEVCRpiContext *s, int x_ctb, int y_ctb, int ctb_size);
-+void ff_hevc_rpi_hls_residual_coding(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
-+                                const int x0, const int y0,
-+                                const int log2_trafo_size, const enum ScanType scan_idx,
-+                                const int c_idx);
-+
-+void ff_hevc_rpi_hls_mvd_coding(HEVCRpiLocalContext * const lc);
-+int ff_hevc_rpi_cabac_overflow(const HEVCRpiLocalContext * const lc);
++void ff_hevc_rpi_set_qPy(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int xBase, int yBase);
++void ff_hevc_rpi_deblocking_boundary_strengths(const HEVCRpiContext * const s, const HEVCRpiLocalContext * const lc,
++                                               const unsigned int x0, const unsigned int y0,
++                                               const unsigned int log2_trafo_size, const int is_coded_block);
++int ff_hevc_rpi_hls_filter_blk(const HEVCRpiContext * const s, const RpiBlk bounds, const int eot);
 +
 +extern const uint8_t ff_hevc_rpi_qpel_extra_before[4];
 +extern const uint8_t ff_hevc_rpi_qpel_extra_after[4];
@@ -50373,7 +27060,7 @@ index 0000000000..59d7be4fe8
 +{
 +    if (ref->tf.progress != NULL)
 +    {
-+        int * const p = (int *)&ref->tf.progress->data;
++        int * const p = (int *)ref->tf.progress->data;
 +        p[0] = INT_MAX;
 +        p[1] = INT_MAX;
 +    }
@@ -50426,10 +27113,10 @@ index 0000000000..59d7be4fe8
 +#endif /* AVCODEC_RPI_HEVCDEC_H */
 diff --git a/libavcodec/rpi_hevcdsp.c b/libavcodec/rpi_hevcdsp.c
 new file mode 100644
-index 0000000000..3e4cfe8d46
+index 0000000000..a6af5ecd85
 --- /dev/null
 +++ b/libavcodec/rpi_hevcdsp.c
-@@ -0,0 +1,415 @@
+@@ -0,0 +1,416 @@
 +/*
 + * HEVC video decoder
 + *
@@ -50758,6 +27445,7 @@ index 0000000000..3e4cfe8d46
 +    hevcdsp->add_residual_dc_c[3]   = FUNC(add_residual32x32_dc_c, depth);       \
 +    hevcdsp->put_pcm_c              = FUNC(put_pcm_c, depth)
 +#define SLICED_LOOP_FILTERS(depth)\
++    hevcdsp->hevc_h_loop_filter_luma2 = FUNC(hevc_h_loop_filter_luma2, depth); \
 +    hevcdsp->hevc_v_loop_filter_luma2 = FUNC(hevc_v_loop_filter_luma2, depth); \
 +    hevcdsp->hevc_h_loop_filter_uv    = FUNC(hevc_h_loop_filter_uv, depth);    \
 +    hevcdsp->hevc_v_loop_filter_uv2   = FUNC(hevc_v_loop_filter_uv2, depth)
@@ -50847,10 +27535,10 @@ index 0000000000..3e4cfe8d46
 +}
 diff --git a/libavcodec/rpi_hevcdsp.h b/libavcodec/rpi_hevcdsp.h
 new file mode 100644
-index 0000000000..c974baa820
+index 0000000000..59d06bbe28
 --- /dev/null
 +++ b/libavcodec/rpi_hevcdsp.h
-@@ -0,0 +1,182 @@
+@@ -0,0 +1,183 @@
 +/*
 + * HEVC video decoder
 + *
@@ -50883,17 +27571,17 @@ index 0000000000..c974baa820
 +
 +#define MAX_PB_SIZE 64
 +
-+typedef struct SAOParams {
-+//    int offset_abs[3][4];   ///< sao_offset_abs
-+//    int offset_sign[3][4];  ///< sao_offset_sign
++#define RPI_HEVC_SAO_BUF_STRIDE 160
 +
-+    uint8_t band_position[3];   ///< sao_band_position
-+    uint8_t eo_class[3];        ///< sao_eo_class
-+    uint8_t type_idx[3];    ///< sao_type_idx
 +
-+    int16_t offset_val[3][5];   ///<SaoOffsetVal
++typedef struct RpiSAOParams {
++    uint8_t band_position[3];   ///< sao_band_position (Y,U,V)
++    uint8_t eo_class[3];        ///< sao_eo_class      (Y,U=V)
++    uint8_t type_idx[3];        ///< sao_type_idx      (Y,U=V)
 +
-+} SAOParams;
++    int16_t offset_val[3][5];   ///<SaoOffsetVal       (Y,U,V)
++
++} RpiSAOParams;
 +
 +typedef struct Mv {
 +    int16_t x;  ///< horizontal component of motion vector
@@ -50951,10 +27639,10 @@ index 0000000000..c974baa820
 +                               const int16_t *sao_offset_val_u, const int16_t *sao_offset_val_v, int sao_eo_class, int width, int height);
 +
 +    void (*sao_edge_restore[2])(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
-+                                struct SAOParams *sao, int *borders, int _width, int _height, int c_idx,
++                                struct RpiSAOParams *sao, int *borders, int _width, int _height, int c_idx,
 +                                uint8_t *vert_edge, uint8_t *horiz_edge, uint8_t *diag_edge);
 +    void (*sao_edge_restore_c[2])(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
-+                                struct SAOParams *sao, int *borders, int _width, int _height, int c_idx,
++                                struct RpiSAOParams *sao, int *borders, int _width, int _height, int c_idx,
 +                                uint8_t *vert_edge, uint8_t *horiz_edge, uint8_t *diag_edge);
 +
 +    void (*put_hevc_qpel[10][2][2])(int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
@@ -51008,9 +27696,10 @@ index 0000000000..c974baa820
 +    void (*hevc_v_loop_filter_chroma_c)(uint8_t *pix, ptrdiff_t stride,
 +                                        int32_t *tc, uint8_t *no_p,
 +                                        uint8_t *no_q);
++    void (*hevc_h_loop_filter_luma2)(uint8_t * _pix_r,
++                                 unsigned int _stride, unsigned int beta, unsigned int tc2, unsigned int no_f);
 +    void (*hevc_v_loop_filter_luma2)(uint8_t * _pix_r,
-+                                 unsigned int _stride, unsigned int beta, const int32_t tc[2],
-+                                 const uint8_t no_p[2], const uint8_t no_q[2],
++                                 unsigned int _stride, unsigned int beta, unsigned int tc2, unsigned int no_f,
 +                                 uint8_t * _pix_l);
 +    void (*hevc_h_loop_filter_uv)(uint8_t * src, unsigned int stride, uint32_t tc4,
 +                                 unsigned int no_f);
@@ -51035,10 +27724,10 @@ index 0000000000..c974baa820
 +#endif /* AVCODEC_RPI_HEVCDSP_H */
 diff --git a/libavcodec/rpi_hevcdsp_template.c b/libavcodec/rpi_hevcdsp_template.c
 new file mode 100644
-index 0000000000..b129a70315
+index 0000000000..cfe9264fc3
 --- /dev/null
 +++ b/libavcodec/rpi_hevcdsp_template.c
-@@ -0,0 +1,2269 @@
+@@ -0,0 +1,2278 @@
 +/*
 + * HEVC video decoder
 + *
@@ -51662,7 +28351,7 @@ index 0000000000..b129a70315
 +#endif
 +#if BIT_DEPTH <= 9 || BIT_DEPTH == 32
 +static void FUNC(sao_edge_restore_0)(uint8_t *_dst, uint8_t *_src,
-+                                    ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
++                                    ptrdiff_t stride_dst, ptrdiff_t stride_src, RpiSAOParams *sao,
 +                                    int *borders, int _width, int _height,
 +                                    int c_idx, uint8_t *vert_edge,
 +                                    uint8_t *horiz_edge, uint8_t *diag_edge)
@@ -51707,7 +28396,7 @@ index 0000000000..b129a70315
 +}
 +
 +static void FUNC(sao_edge_restore_1)(uint8_t *_dst, uint8_t *_src,
-+                                    ptrdiff_t stride_dst, ptrdiff_t stride_src, SAOParams *sao,
++                                    ptrdiff_t stride_dst, ptrdiff_t stride_src, RpiSAOParams *sao,
 +                                    int *borders, int _width, int _height,
 +                                    int c_idx, uint8_t *vert_edge,
 +                                    uint8_t *horiz_edge, uint8_t *diag_edge)
@@ -53123,8 +29812,7 @@ index 0000000000..b129a70315
 +// This is identical to hevc_loop_filter_luma except that the P/Q
 +// components are on separate pointers
 +static void FUNC(hevc_v_loop_filter_luma2)(uint8_t * _pix_r,
-+                                 unsigned int _stride, unsigned int beta, const int32_t _tc[2],
-+                                 const uint8_t _no_p[2], const uint8_t _no_q[2],
++                                 unsigned int _stride, unsigned int beta, unsigned int tc2, unsigned int no_f,
 +                                 uint8_t * _pix_l)
 +{
 +    int d, j;
@@ -53142,9 +29830,9 @@ index 0000000000..b129a70315
 +        const int dq3  = abs(TQ2 - 2 * TQ1 + TQ0);
 +        const int d0   = dp0 + dq0;
 +        const int d3   = dp3 + dq3;
-+        const int tc   = _tc[j]   << (BIT_DEPTH - 8);
-+        const int no_p = _no_p[j];
-+        const int no_q = _no_q[j];
++        const int tc   = ((tc2 >> (j << 4)) & 0xffff) << (BIT_DEPTH - 8);
++        const int no_p = no_f & 1;
++        const int no_q = no_f & 2;
 +
 +        if (d0 + d3 >= beta) {
 +            pix_l += 4 * ystride;
@@ -53222,6 +29910,16 @@ index 0000000000..b129a70315
 +    }
 +}
 +
++static void FUNC(hevc_h_loop_filter_luma2)(uint8_t * _pix_r,
++                                 unsigned int _stride, unsigned int beta, unsigned int tc2, unsigned int no_f)
++{
++    // Just call the non-2 function having massaged the parameters
++    int32_t tc[2] = {tc2 & 0xffff, tc2 >> 16};
++    uint8_t no_p[2] = {no_f & 1, no_f & 1};
++    uint8_t no_q[2] = {no_f & 2, no_f & 2};
++    FUNC(hevc_h_loop_filter_luma)(_pix_r, _stride, beta, tc, no_p, no_q);
++}
++
 +#undef TP3
 +#undef TP2
 +#undef TP1
@@ -54572,10 +31270,10 @@ index 0000000000..b3168788d2
 +#endif
 diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c
 new file mode 100644
-index 0000000000..89d6220684
+index 0000000000..ddbb1eb9a6
 --- /dev/null
 +++ b/libavcodec/rpi_qpu.c
-@@ -0,0 +1,954 @@
+@@ -0,0 +1,992 @@
 +#include <stdio.h>
 +#include <stdlib.h>
 +#include <string.h>
@@ -54781,6 +31479,8 @@ index 0000000000..89d6220684
 +// GPU memory alloc fns (internal)
 +
 +// GPU_MEM_PTR_T alloc fns
++// The magic 0x80 on the cache type means: map all pages to arm memory now
++//   rather than demand page later
 +static int gpu_malloc_cached_internal(const int mb, const int numbytes, GPU_MEM_PTR_T * const p) {
 +  p->numbytes = (numbytes + 255) & ~255;  // Round up
 +  p->vcsm_handle = vcsm_malloc_cache(p->numbytes, VCSM_CACHE_TYPE_HOST | 0x80, (char *)"Video Frame" );
@@ -54794,22 +31494,19 @@ index 0000000000..89d6220684
 +  av_assert0(p->arm);
 +  p->vc = mbox_mem_lock(mb, p->vc_handle);
 +  av_assert0(p->vc);
-+//  printf("***** %s, %d\n", __func__, numbytes);
-+
 +  return 0;
 +}
 +
 +static int gpu_malloc_vccached_internal(const int mb, const int numbytes, GPU_MEM_PTR_T * const p) {
 +  p->numbytes = numbytes;
-+  p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_VC , (char *)"VPU code" );
++  p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_VC | 0x80, (char *)"VPU code" );
 +  av_assert0(p->vcsm_handle);
 +  p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle);
 +  av_assert0(p->vc_handle);
 +  p->arm = vcsm_lock(p->vcsm_handle);
 +  av_assert0(p->arm);
-+  p->vc = mbox_mem_lock(mb, p->vc_handle) & 0x3fffffff;  // ??? If I want caching then lose the top 2 bits
++  p->vc = mbox_mem_lock(mb, p->vc_handle);
 +  av_assert0(p->vc);
-+//  printf("***** %s, %d\n", __func__, numbytes);
 +  return 0;
 +}
 +
@@ -54823,7 +31520,6 @@ index 0000000000..89d6220684
 +  av_assert0(p->arm);
 +  p->vc = mbox_mem_lock(mb, p->vc_handle);
 +  av_assert0(p->vc);
-+//  printf("***** %s, %d\n", __func__, numbytes);
 +  return 0;
 +}
 +
@@ -54832,7 +31528,6 @@ index 0000000000..89d6220684
 +  vcsm_unlock_ptr(p->arm);
 +  vcsm_free(p->vcsm_handle);
 +  memset(p, 0, sizeof(*p));  // Ensure we crash hard if we try and use this again
-+//  printf("***** %s\n", __func__);
 +}
 +
 +
@@ -54925,7 +31620,7 @@ index 0000000000..89d6220684
 +static gpu_env_t * gpu_lock(void) {
 +  pthread_mutex_lock(&gpu_mutex);
 +
-+  av_assert0(gpu != NULL);
++  av_assert1(gpu != NULL);
 +  return gpu;
 +}
 +
@@ -54955,7 +31650,7 @@ index 0000000000..89d6220684
 +
 +static inline gpu_env_t * gpu_ptr(void)
 +{
-+  av_assert0(gpu != NULL);
++  av_assert1(gpu != NULL);
 +  return gpu;
 +}
 +
@@ -55001,7 +31696,7 @@ index 0000000000..89d6220684
 +  uint32_t a = 0;
 +
 +  // Make sure that the gpu is initialized
-+  av_assert0(gpu != NULL);
++  av_assert1(gpu != NULL);
 +  switch (bit_depth){
 +    case 8:
 +      a = gpu->code_gm_ptr.vc + offsetof(struct GPU, vpu_code8);
@@ -55016,13 +31711,13 @@ index 0000000000..89d6220684
 +}
 +
 +unsigned int vpu_get_constants(void) {
-+  av_assert0(gpu != NULL);
++  av_assert1(gpu != NULL);
 +  return (gpu->code_gm_ptr.vc + offsetof(struct GPU,transMatrix2even));
 +}
 +
 +int gpu_get_mailbox(void)
 +{
-+  av_assert0(gpu);
++  av_assert1(gpu);
 +  return gpu->mb;
 +}
 +
@@ -55082,7 +31777,7 @@ index 0000000000..89d6220684
 +{
 +  struct vcsm_user_clean_invalid2_block_s * const b = rfe->v.s + rfe->v.op_count++;
 +
-+  av_assert0(rfe->v.op_count <= CACHE_EL_MAX);
++  av_assert1(rfe->v.op_count <= CACHE_EL_MAX);
 +
 +  b->invalidate_mode = mode;
 +  b->block_count = blocks;
@@ -55098,9 +31793,9 @@ index 0000000000..89d6220684
 +  if (gm == NULL || size == 0)
 +    return;
 +
-+  av_assert0(offset <= gm->numbytes);
-+  av_assert0(size <= gm->numbytes);
-+  av_assert0(offset + size <= gm->numbytes);
++  av_assert1(offset <= gm->numbytes);
++  av_assert1(size <= gm->numbytes);
++  av_assert1(offset + size <= gm->numbytes);
 +
 +  rpi_cache_flush_add_gm_blocks(rfe, gm, mode, offset, size, 1, 0);
 +}
@@ -55177,7 +31872,7 @@ index 0000000000..89d6220684
 +    const unsigned int xshl = av_rpi_sand_frame_xshl(frame);
 +    const unsigned int xleft = x0 & ~((stride1 >> xshl) - 1);
 +    const unsigned int block_count = (((x0 + width - xleft) << xshl) + stride1 - 1) / stride1;  // Same for Y & C
-+    av_assert0(rfe->v.op_count + do_chroma + do_luma < CACHE_EL_MAX);
++    av_assert1(rfe->v.op_count + do_chroma + do_luma < CACHE_EL_MAX);
 +
 +    if (do_chroma)
 +    {
@@ -55335,7 +32030,7 @@ index 0000000000..89d6220684
 +static inline struct gpu_job_s * new_job(vpu_qpu_job_env_t * const vqj)
 +{
 +  struct gpu_job_s * const j = vqj->j + vqj->n++;
-+  av_assert0(vqj->n <= VPU_QPU_JOB_MAX);
++  av_assert1(vqj->n <= VPU_QPU_JOB_MAX);
 +  return j;
 +}
 +
@@ -55391,6 +32086,12 @@ index 0000000000..89d6220684
 +  vq_wait_post(v);
 +}
 +
++// Poke a user-supplied sem
++static void vpu_qpu_job_callback_sem(void * v)
++{
++  sem_post((sem_t *)v);
++}
++
 +void vpu_qpu_job_add_sync_this(vpu_qpu_job_env_t * const vqj, vpu_qpu_wait_h * const wait_h)
 +{
 +  vq_wait_t * wait;
@@ -55408,7 +32109,7 @@ index 0000000000..89d6220684
 +  if (vqj->n == 1 || vqj->mask == VPU_QPU_MASK_QPU)
 +  {
 +    struct gpu_job_s * const j = vqj->j + (vqj->n - 1);
-+    av_assert0(j->callback.func == 0);
++    av_assert1(j->callback.func == 0);
 +
 +    j->callback.func = vpu_qpu_job_callback_wait;
 +    j->callback.cookie = wait;
@@ -55427,9 +32128,44 @@ index 0000000000..89d6220684
 +  *wait_h = wait;
 +}
 +
++// Returns 0 if no sync added ('cos Q empty), 1 if sync added
++int vpu_qpu_job_add_sync_sem(vpu_qpu_job_env_t * const vqj, sem_t * const sem)
++{
++  // If nothing on q then just return
++  if (vqj->mask == 0)
++    return 0;
++
++  // There are 2 VPU Qs & 1 QPU Q so we can collapse sync
++  // If we only posted one thing or only QPU jobs
++  if (vqj->n == 1 || vqj->mask == VPU_QPU_MASK_QPU)
++  {
++    struct gpu_job_s * const j = vqj->j + (vqj->n - 1);
++    av_assert1(j->callback.func == 0);
++
++    j->callback.func = vpu_qpu_job_callback_sem;
++    j->callback.cookie = sem;
++  }
++  else
++  {
++    struct gpu_job_s *const j = new_job(vqj);
++
++    j->command = EXECUTE_SYNC;
++    j->u.s.mask = vqj->mask;
++    j->callback.func = vpu_qpu_job_callback_sem;
++    j->callback.cookie = sem;
++  }
++
++  vqj->mask = 0;
++  return 1;
++}
++
++
 +int vpu_qpu_job_start(vpu_qpu_job_env_t * const vqj)
 +{
-+  return vqj->n == 0 ? 0 : vc_gpuserv_execute_code(vqj->n, vqj->j);
++  if (vqj->n == 0)
++    return 0;
++
++  return vc_gpuserv_execute_code(vqj->n, vqj->j);
 +}
 +
 +// Simple wrapper of start + delete
@@ -55532,10 +32268,10 @@ index 0000000000..89d6220684
 +
 diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h
 new file mode 100644
-index 0000000000..f75be88ad4
+index 0000000000..e1b4d9c39e
 --- /dev/null
 +++ b/libavcodec/rpi_qpu.h
-@@ -0,0 +1,228 @@
+@@ -0,0 +1,229 @@
 +#ifndef RPI_QPU_H
 +#define RPI_QPU_H
 +
@@ -55748,6 +32484,7 @@ index 0000000000..f75be88ad4
 +  const unsigned r0, const unsigned r1, const unsigned r2, const unsigned r3, const unsigned r4, const unsigned r5);
 +void vpu_qpu_job_add_qpu(const vpu_qpu_job_h vqj, const unsigned int n, const uint32_t * const mail);
 +void vpu_qpu_job_add_sync_this(const vpu_qpu_job_h vqj, vpu_qpu_wait_h * const wait_h);
++int vpu_qpu_job_add_sync_sem(vpu_qpu_job_env_t * const vqj, sem_t * const sem);
 +int vpu_qpu_job_start(const vpu_qpu_job_h vqj);
 +int vpu_qpu_job_finish(const vpu_qpu_job_h vqj);
 +
@@ -56622,80 +33359,35 @@ index 0000000000..26fb3be999
 +
 +#endif
 +
-diff --git a/libavcodec/utils.c b/libavcodec/utils.c
-index 9551f312e7..a1f68b8e30 100644
---- a/libavcodec/utils.c
-+++ b/libavcodec/utils.c
-@@ -1277,6 +1277,40 @@ AVCodec *avcodec_find_decoder(enum AVCodecID id)
-     return find_encdec(id, 0);
- }
- 
-+static int codec_supports_format(const AVCodec * const p, const enum AVPixelFormat fmt)
-+{
-+    const enum AVPixelFormat *pf = p->pix_fmts;
-+
-+    // Assume good if we lack info
-+    if (pf == NULL)
-+        return 1;
-+    if (fmt == AV_PIX_FMT_NONE)
-+        return 0;
-+
-+    for (; *pf != AV_PIX_FMT_NONE; ++pf) {
-+        if (*pf == fmt)
-+            return 1;
-+    }
-+    return 0;
-+}
-+
-+AVCodec *avcodec_find_decoder_by_id_and_fmt(enum AVCodecID id, enum AVPixelFormat fmt)
-+{
-+    AVCodec *p, *experimental = NULL;
-+    p = first_avcodec;
-+    id= remap_deprecated_codec_id(id);
-+    while (p) {
-+        if (av_codec_is_decoder(p) && p->id == id && codec_supports_format(p, fmt)) {
-+            if (p->capabilities & AV_CODEC_CAP_EXPERIMENTAL && !experimental) {
-+                experimental = p;
-+            } else
-+                return p;
-+        }
-+        p = p->next;
-+    }
-+    return experimental;
-+}
-+
- AVCodec *avcodec_find_decoder_by_name(const char *name)
- {
-     AVCodec *p;
 diff --git a/libavfilter/Makefile b/libavfilter/Makefile
-index d2f0495f37..56bb87f851 100644
+index 3a9fb02556..32e56f6b15 100644
 --- a/libavfilter/Makefile
 +++ b/libavfilter/Makefile
-@@ -323,6 +323,7 @@ OBJS-$(CONFIG_TONEMAP_FILTER)                += vf_tonemap.o
+@@ -346,6 +346,7 @@ OBJS-$(CONFIG_TONEMAP_FILTER)                += vf_tonemap.o
  OBJS-$(CONFIG_TRANSPOSE_FILTER)              += vf_transpose.o
  OBJS-$(CONFIG_TRIM_FILTER)                   += trim.o
  OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)          += vf_premultiply.o framesync.o
 +OBJS-$(CONFIG_UNSAND_FILTER)                 += vf_unsand.o
  OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
- OBJS-$(CONFIG_USPP_FILTER)                   += vf_uspp.o
- OBJS-$(CONFIG_VAGUEDENOISER_FILTER)          += vf_vaguedenoiser.o
+ OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER)         += vf_unsharp_opencl.o opencl.o \
+                                                 opencl/unsharp.o
 diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
-index 9b672a7a7e..d92e47e651 100644
+index 68b2992027..3b059fce4e 100644
 --- a/libavfilter/allfilters.c
 +++ b/libavfilter/allfilters.c
-@@ -334,6 +334,7 @@ static void register_all(void)
-     REGISTER_FILTER(TRANSPOSE,      transpose,      vf);
-     REGISTER_FILTER(TRIM,           trim,           vf);
-     REGISTER_FILTER(UNPREMULTIPLY,  unpremultiply,  vf);
-+    REGISTER_FILTER(UNSAND,         unsand,         vf);
-     REGISTER_FILTER(UNSHARP,        unsharp,        vf);
-     REGISTER_FILTER(USPP,           uspp,           vf);
-     REGISTER_FILTER(VAGUEDENOISER,  vaguedenoiser,  vf);
+@@ -338,6 +338,7 @@ extern AVFilter ff_vf_transpose;
+ extern AVFilter ff_vf_trim;
+ extern AVFilter ff_vf_unpremultiply;
+ extern AVFilter ff_vf_unsharp;
++extern AVFilter ff_vf_unsand;
+ extern AVFilter ff_vf_unsharp_opencl;
+ extern AVFilter ff_vf_uspp;
+ extern AVFilter ff_vf_vaguedenoiser;
 diff --git a/libavfilter/avfiltergraph.c b/libavfilter/avfiltergraph.c
-index 4304c06847..7bed282dff 100644
+index 4cc6892404..9db92322a4 100644
 --- a/libavfilter/avfiltergraph.c
 +++ b/libavfilter/avfiltergraph.c
-@@ -31,6 +31,9 @@
+@@ -32,6 +32,9 @@
  #include "libavutil/internal.h"
  #include "libavutil/opt.h"
  #include "libavutil/pixdesc.h"
@@ -56705,7 +33397,7 @@ index 4304c06847..7bed282dff 100644
  
  #define FF_INTERNAL_FIELDS 1
  #include "framequeue.h"
-@@ -420,6 +423,19 @@ static int can_merge_formats(AVFilterFormats *a_arg,
+@@ -427,6 +430,19 @@ static int can_merge_formats(AVFilterFormats *a_arg,
      }
  }
  
@@ -56725,7 +33417,7 @@ index 4304c06847..7bed282dff 100644
  /**
   * Perform one round of query_formats() and merging formats lists on the
   * filter graph.
-@@ -460,6 +476,7 @@ static int query_formats(AVFilterGraph *graph, AVClass *log_ctx)
+@@ -467,6 +483,7 @@ static int query_formats(AVFilterGraph *graph, AVClass *log_ctx)
          for (j = 0; j < filter->nb_inputs; j++) {
              AVFilterLink *link = filter->inputs[j];
              int convert_needed = 0;
@@ -56733,16 +33425,15 @@ index 4304c06847..7bed282dff 100644
  
              if (!link)
                  continue;
-@@ -507,12 +524,15 @@ static int query_formats(AVFilterGraph *graph, AVClass *log_ctx)
+@@ -514,11 +531,14 @@ static int query_formats(AVFilterGraph *graph, AVClass *log_ctx)
              )
  #undef MERGE_DISPATCH
  
 -            if (convert_needed) {
 +            while (convert_needed) {
                  AVFilterContext *convert;
-                 AVFilter *filter;
+                 const AVFilter *filter;
                  AVFilterLink *inlink, *outlink;
-                 char scale_args[256];
                  char inst_name[30];
 +                int can_retry = 0;
 +
@@ -56750,7 +33441,7 @@ index 4304c06847..7bed282dff 100644
  
                  if (graph->disable_auto_convert) {
                      av_log(log_ctx, AV_LOG_ERROR,
-@@ -525,19 +545,45 @@ static int query_formats(AVFilterGraph *graph, AVClass *log_ctx)
+@@ -531,19 +551,45 @@ static int query_formats(AVFilterGraph *graph, AVClass *log_ctx)
                  /* couldn't merge format lists. auto-insert conversion filter */
                  switch (link->type) {
                  case AVMEDIA_TYPE_VIDEO:
@@ -56808,7 +33499,7 @@ index 4304c06847..7bed282dff 100644
                      break;
                  case AVMEDIA_TYPE_AUDIO:
                      if (!(filter = avfilter_get_by_name("aresample"))) {
-@@ -583,9 +629,19 @@ static int query_formats(AVFilterGraph *graph, AVClass *log_ctx)
+@@ -585,9 +631,19 @@ static int query_formats(AVFilterGraph *graph, AVClass *log_ctx)
                      av_assert0(outlink-> in_channel_layouts->refcount > 0);
                      av_assert0(outlink->out_channel_layouts->refcount > 0);
                  }
@@ -56831,7 +33522,7 @@ index 4304c06847..7bed282dff 100644
                      (!ff_merge_samplerates(inlink->in_samplerates,
                                             inlink->out_samplerates) ||
 diff --git a/libavfilter/buffersrc.c b/libavfilter/buffersrc.c
-index ad5aedd5f7..0d2df8b870 100644
+index cd56f8ca45..813a682aa1 100644
 --- a/libavfilter/buffersrc.c
 +++ b/libavfilter/buffersrc.c
 @@ -207,7 +207,7 @@ static int av_buffersrc_add_frame_internal(AVFilterContext *ctx,
@@ -57082,10 +33773,10 @@ index 0000000000..64578b7ac4
 +};
 +
 diff --git a/libavformat/utils.c b/libavformat/utils.c
-index 1a7996c4fd..8119fc07f7 100644
+index f2f2cc4239..f152a3bcc2 100644
 --- a/libavformat/utils.c
 +++ b/libavformat/utils.c
-@@ -2940,6 +2940,40 @@ static int has_codec_parameters(AVStream *st, const char **errmsg_ptr)
+@@ -2996,6 +2996,40 @@ static int has_codec_parameters(AVStream *st, const char **errmsg_ptr)
      return 1;
  }
  
@@ -57126,7 +33817,7 @@ index 1a7996c4fd..8119fc07f7 100644
  /* returns 1 or 0 if or if not decoded data was returned, or a negative error */
  static int try_decode_frame(AVFormatContext *s, AVStream *st, AVPacket *avpkt,
                              AVDictionary **options)
-@@ -2974,7 +3008,11 @@ static int try_decode_frame(AVFormatContext *s, AVStream *st, AVPacket *avpkt,
+@@ -3030,7 +3064,11 @@ static int try_decode_frame(AVFormatContext *s, AVStream *st, AVPacket *avpkt,
          av_dict_set(options ? options : &thread_opt, "threads", "1", 0);
          if (s->codec_whitelist)
              av_dict_set(options ? options : &thread_opt, "codec_whitelist", s->codec_whitelist, 0);
@@ -57139,7 +33830,7 @@ index 1a7996c4fd..8119fc07f7 100644
          if (!options)
              av_dict_free(&thread_opt);
          if (ret < 0) {
-@@ -3005,6 +3043,14 @@ static int try_decode_frame(AVFormatContext *s, AVStream *st, AVPacket *avpkt,
+@@ -3061,6 +3099,14 @@ static int try_decode_frame(AVFormatContext *s, AVStream *st, AVPacket *avpkt,
          if (avctx->codec_type == AVMEDIA_TYPE_VIDEO ||
              avctx->codec_type == AVMEDIA_TYPE_AUDIO) {
              ret = avcodec_send_packet(avctx, &pkt);
@@ -57154,7 +33845,7 @@ index 1a7996c4fd..8119fc07f7 100644
              if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
                  break;
              if (ret >= 0)
-@@ -3601,9 +3647,20 @@ FF_ENABLE_DEPRECATION_WARNINGS
+@@ -3654,9 +3700,20 @@ FF_ENABLE_DEPRECATION_WARNINGS
          // Try to just open decoders, in case this is enough to get parameters.
          if (!has_codec_parameters(st, NULL) && st->request_probe <= 0) {
              if (codec && !avctx->codec)
@@ -57179,13 +33870,13 @@ index 1a7996c4fd..8119fc07f7 100644
          if (!options)
              av_dict_free(&thread_opt);
 diff --git a/libavutil/Makefile b/libavutil/Makefile
-index 65e285a701..4909d2682e 100644
+index a63ba523c9..4f9a19e800 100644
 --- a/libavutil/Makefile
 +++ b/libavutil/Makefile
-@@ -165,6 +165,7 @@ OBJS-$(CONFIG_QSV)                   += hwcontext_qsv.o
+@@ -164,6 +164,7 @@ OBJS-$(CONFIG_QSV)                   += hwcontext_qsv.o
  OBJS-$(CONFIG_LIBDRM)                   += hwcontext_drm.o
  OBJS-$(CONFIG_LZO)                      += lzo.o
- OBJS-$(CONFIG_OPENCL)                   += opencl.o opencl_internal.o
+ OBJS-$(CONFIG_OPENCL)                   += hwcontext_opencl.o
 +OBJS-$(CONFIG_SAND)                     += rpi_sand_fns.o
  OBJS-$(CONFIG_VAAPI)                    += hwcontext_vaapi.o
  OBJS-$(CONFIG_VIDEOTOOLBOX)             += hwcontext_videotoolbox.o
@@ -57274,7 +33965,7 @@ index 73b6bd0b14..d907de3f1c 100644
   * @}
   */
 diff --git a/libavutil/frame.c b/libavutil/frame.c
-index d5fd2932e3..b127cd833b 100644
+index 00215ac29a..d068f437e7 100644
 --- a/libavutil/frame.c
 +++ b/libavutil/frame.c
 @@ -16,6 +16,8 @@
@@ -57294,9 +33985,9 @@ index d5fd2932e3..b127cd833b 100644
 +#include "rpi_sand_fns.h"
 +#endif
  
- 
- static AVFrameSideData *frame_new_side_data(AVFrame *frame,
-@@ -833,6 +838,12 @@ int av_frame_apply_cropping(AVFrame *frame, int flags)
+ #if FF_API_FRAME_GET_SET
+ MAKE_ACCESSORS(AVFrame, frame, int64_t, best_effort_timestamp)
+@@ -885,6 +890,12 @@ int av_frame_apply_cropping(AVFrame *frame, int flags)
          (frame->crop_top + frame->crop_bottom) >= frame->height)
          return AVERROR(ERANGE);
  
@@ -57310,10 +34001,10 @@ index d5fd2932e3..b127cd833b 100644
      if (!desc)
          return AVERROR_BUG;
 diff --git a/libavutil/frame.h b/libavutil/frame.h
-index abe4f4fd17..52c1713936 100644
+index 9d57d6ce66..1ade7bd707 100644
 --- a/libavutil/frame.h
 +++ b/libavutil/frame.h
-@@ -814,6 +814,16 @@ int av_frame_apply_cropping(AVFrame *frame, int flags);
+@@ -886,6 +886,16 @@ int av_frame_apply_cropping(AVFrame *frame, int flags);
   */
  const char *av_frame_side_data_name(enum AVFrameSideDataType type);
  
@@ -57331,11 +34022,11 @@ index abe4f4fd17..52c1713936 100644
   * @}
   */
 diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
-index 2cfab89c03..9feca821b2 100644
+index 8ed52751c1..71d6dd4250 100644
 --- a/libavutil/pixdesc.c
 +++ b/libavutil/pixdesc.c
-@@ -2241,6 +2241,30 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
-         .name = "drm_prime",
+@@ -2185,6 +2185,30 @@ static const AVPixFmtDescriptor av_pix_fmt_descriptors[AV_PIX_FMT_NB] = {
+         .name  = "opencl",
          .flags = AV_PIX_FMT_FLAG_HWACCEL,
      },
 +    [AV_PIX_FMT_SAND128] = {
@@ -57366,12 +34057,12 @@ index 2cfab89c03..9feca821b2 100644
  #if FF_API_PLUS1_MINUS1
  FF_ENABLE_DEPRECATION_WARNINGS
 diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
-index 24889c8e52..3234ab782b 100644
+index e184a56672..1078c192a6 100644
 --- a/libavutil/pixfmt.h
 +++ b/libavutil/pixfmt.h
-@@ -341,6 +341,11 @@ enum AVPixelFormat {
+@@ -330,6 +330,11 @@ enum AVPixelFormat {
       */
-     AV_PIX_FMT_DRM_PRIME,
+     AV_PIX_FMT_OPENCL,
  
 +    // RPI - not on ifdef so can be got at by calling progs
 +    AV_PIX_FMT_SAND128,    ///< 4:2:0  8-bit 128x*Y stripe, 64x*UV stripe, then next x stripe, mysterious padding
@@ -57901,10 +34592,10 @@ index 0000000000..b1e99a6a89
 +
 diff --git a/pi-util/conf_h265.2016.csv b/pi-util/conf_h265.2016.csv
 new file mode 100644
-index 0000000000..e176c503f9
+index 0000000000..b5381794be
 --- /dev/null
 +++ b/pi-util/conf_h265.2016.csv
-@@ -0,0 +1,193 @@
+@@ -0,0 +1,194 @@
 +1,HEVC_v1/AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5
 +1,HEVC_v1/AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5
 +1,HEVC_v1/AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5
@@ -58016,7 +34707,7 @@ index 0000000000..e176c503f9
 +1,HEVC_v1/SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5
 +1,HEVC_v1/SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5
 +1,HEVC_v1/SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt
-+2,HEVC_v1/SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt
++1,HEVC_v1/SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt
 +1,HEVC_v1/SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5
 +1,HEVC_v1/SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5
 +1,HEVC_v1/SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5
@@ -58097,7 +34788,8 @@ index 0000000000..e176c503f9
 +0,RExt/WAVETILES_RExt_Sony_2,WAVETILES_RExt_Sony_2.bit,WAVETILES_RExt_Sony_2.md5
 +1,local/sao_cu16_mobile_344x280,sao_cu16_mobile_344x280.265,sao_cu16_mobile_344x280.md5
 +1,local/dblk_cu16_mobile_344x280,dblk_cu16_mobile_344x280.265,dblk_cu16_mobile_344x280.md5
-+2,local/dblksao_cu16_mobile_344x280,dblksao_cu16_mobile_344x280.265,dblksao_cu16_mobile_344x280.md5
++1,local/dblksao_cu16_mobile_344x280,dblksao_cu16_mobile_344x280.265,dblksao_cu16_mobile_344x280.md5
++1,local/dblk_pu32_horses_832x448,dblk_pu32_horses_832x448.265,dblk_pu32_horses_832x448.md5
 diff --git a/pi-util/conf_h265.2016_HEVC_v1.csv b/pi-util/conf_h265.2016_HEVC_v1.csv
 new file mode 100644
 index 0000000000..6082641271
@@ -58439,14 +35131,14 @@ index 0000000000..59c0d3959e
 +# -Wa,-ahls
 diff --git a/pi-util/conf_pi2.sh b/pi-util/conf_pi2.sh
 new file mode 100755
-index 0000000000..4de256bc8a
+index 0000000000..28b7a4f483
 --- /dev/null
 +++ b/pi-util/conf_pi2.sh
 @@ -0,0 +1,32 @@
 +echo "Configure for Pi2/3"
 +
 +RPI_TOOLROOT=`pwd`/../tools/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf
-+RPI_OPT_VC=`pwd`/../firmware/opt/vc
++RPI_OPT_VC=`pwd`/../firmware/hardfp/opt/vc
 +
 +RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux"
 +RPI_LIBDIRS="-L$RPI_TOOLROOT/lib -L$RPI_OPT_VC/lib"
@@ -58788,10 +35480,10 @@ index 0000000000..27cc453963
 +
 diff --git a/pi-util/make_array.py b/pi-util/make_array.py
 new file mode 100755
-index 0000000000..864fa5e704
+index 0000000000..67b22d2d51
 --- /dev/null
 +++ b/pi-util/make_array.py
-@@ -0,0 +1,19 @@
+@@ -0,0 +1,23 @@
 +#!/usr/bin/env python
 +
 +# Usage
@@ -58806,9 +35498,13 @@ index 0000000000..864fa5e704
 +  print 'Converting',file
 +  with open(prefix+'.h','wb') as out:
 +    print >>out, 'static const unsigned char',name,'[] = {'
-+    with open(file,'rb') as fd:  
++    with open(file,'rb') as fd:
++      i = 0
 +      for byte in fd.read():
-+        print >>out, '%d,' % ord(byte)
++        print >>out, '0x%02x, ' % ord(byte),
++        i = i + 1
++        if i % 8 == 0:
++          print >>out, ' // %04x' % (i - 8)
 +    print >>out,'};'
 +
 diff --git a/pi-util/qem.sh b/pi-util/qem.sh
diff --git a/packages/multimedia/ffmpeg/patches/ffmpeg-99.1004-added_upstream_mvc_patches.patch b/packages/multimedia/ffmpeg/patches/ffmpeg-99.1004-added_upstream_mvc_patches.patch
index 5eac8a1bcd..981a88e102 100644
--- a/packages/multimedia/ffmpeg/patches/ffmpeg-99.1004-added_upstream_mvc_patches.patch
+++ b/packages/multimedia/ffmpeg/patches/ffmpeg-99.1004-added_upstream_mvc_patches.patch
@@ -1,7 +1,7 @@
-From e75d7807cc97b3ddd8d8f6fe2fcf3dc4de58863f Mon Sep 17 00:00:00 2001
+From 20af7af23a9f366476e67669f14957dfaf58f141 Mon Sep 17 00:00:00 2001
 From: Hendrik Leppkes <h.leppkes@gmail.com>
 Date: Sat, 9 Jan 2016 16:34:09 +0100
-Subject: [PATCH 1/4] avcodec: add h264_mvc codec id and profiles
+Subject: [PATCH 1/3] avcodec: add h264_mvc codec id and profiles
 
 ---
  libavcodec/avcodec.h    | 3 +++
@@ -11,10 +11,10 @@ Subject: [PATCH 1/4] avcodec: add h264_mvc codec id and profiles
  4 files changed, 12 insertions(+), 1 deletion(-)
 
 diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
-index 6c4b011b5c..8f1f5a3e53 100644
+index d962b9cf0a..4c4581c895 100644
 --- a/libavcodec/avcodec.h
 +++ b/libavcodec/avcodec.h
-@@ -449,6 +449,8 @@ enum AVCodecID {
+@@ -447,6 +447,8 @@ enum AVCodecID {
      AV_CODEC_ID_GDV,
      AV_CODEC_ID_FITS,
  
@@ -23,7 +23,7 @@ index 6c4b011b5c..8f1f5a3e53 100644
      /* various PCM "codecs" */
      AV_CODEC_ID_FIRST_AUDIO = 0x10000,     ///< A dummy id pointing at the start of audio codecs
      AV_CODEC_ID_PCM_S16LE = 0x10000,
-@@ -3318,6 +3320,7 @@ typedef struct AVCodecContext {
+@@ -2895,6 +2897,7 @@ typedef struct AVCodecContext {
  #define FF_PROFILE_H264_HIGH_444_PREDICTIVE  244
  #define FF_PROFILE_H264_HIGH_444_INTRA       (244|FF_PROFILE_H264_INTRA)
  #define FF_PROFILE_H264_CAVLC_444            44
@@ -32,12 +32,12 @@ index 6c4b011b5c..8f1f5a3e53 100644
  #define FF_PROFILE_VC1_SIMPLE   0
  #define FF_PROFILE_VC1_MAIN     1
 diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
-index 6a13bbbf0e..03ae4838d2 100644
+index 79552a910d..b55955476c 100644
 --- a/libavcodec/codec_desc.c
 +++ b/libavcodec/codec_desc.c
-@@ -1665,6 +1665,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
-         .props     = AV_CODEC_PROP_LOSSLESS,
-         .mime_types= MT("image/png"),
+@@ -1647,6 +1647,13 @@ static const AVCodecDescriptor codec_descriptors[] = {
+         .long_name = NULL_IF_CONFIG_SMALL("FITS (Flexible Image Transport System)"),
+         .props     = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
      },
 +    {
 +        .id        = AV_CODEC_ID_H264_MVC,
@@ -50,7 +50,7 @@ index 6a13bbbf0e..03ae4838d2 100644
      /* various PCM "codecs" */
      {
 diff --git a/libavcodec/profiles.c b/libavcodec/profiles.c
-index 30498efedf..9d3cf4b535 100644
+index d7dc960f36..e4651f12f9 100644
 --- a/libavcodec/profiles.c
 +++ b/libavcodec/profiles.c
 @@ -72,6 +72,7 @@ const AVProfile ff_h264_profiles[] = {
@@ -62,7 +62,7 @@ index 30498efedf..9d3cf4b535 100644
  };
  
 diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
-index 53cbcfb543..f93f06fcfb 100644
+index 37a6aa8bff..52c5b659c4 100644
 --- a/libavformat/mpegts.c
 +++ b/libavformat/mpegts.c
 @@ -701,7 +701,7 @@ static const StreamType ISO_types[] = {
@@ -78,31 +78,19 @@ index 53cbcfb543..f93f06fcfb 100644
 2.14.1
 
 
-From 51f6cec0b87840c32482df5d2b09f50d503d2b2b Mon Sep 17 00:00:00 2001
+From 0f3fda4e348e6b12570f5d279713f6da46511846 Mon Sep 17 00:00:00 2001
 From: Hendrik Leppkes <h.leppkes@gmail.com>
 Date: Sat, 9 Jan 2016 16:34:40 +0100
-Subject: [PATCH 2/4] h264_parser: add support for parsing h264 mvc NALUs
+Subject: [PATCH 2/3] h264_parser: add support for parsing h264 mvc NALUs
 
 ---
- libavcodec/allcodecs.c   |  1 +
  libavcodec/h264.h        |  2 ++
  libavcodec/h264_parser.c | 34 ++++++++++++++++++++++++++++++----
+ libavcodec/parser.c      |  1 +
  3 files changed, 33 insertions(+), 4 deletions(-)
 
-diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
-index 5361a22141..a5289a5e14 100644
---- a/libavcodec/allcodecs.c
-+++ b/libavcodec/allcodecs.c
-@@ -732,6 +732,7 @@ static void register_all(void)
-     REGISTER_PARSER(H261,               h261);
-     REGISTER_PARSER(H263,               h263);
-     REGISTER_PARSER(H264,               h264);
-+    REGISTER_PARSER(H264_MVC,           h264_mvc);
-     REGISTER_PARSER(HEVC,               hevc);
-     REGISTER_PARSER(MJPEG,              mjpeg);
-     REGISTER_PARSER(MLP,                mlp);
 diff --git a/libavcodec/h264.h b/libavcodec/h264.h
-index 86df5eb9b3..22c4f1d82a 100644
+index 650580bf3a..c44a0cbedd 100644
 --- a/libavcodec/h264.h
 +++ b/libavcodec/h264.h
 @@ -41,7 +41,9 @@ enum {
@@ -114,9 +102,9 @@ index 86df5eb9b3..22c4f1d82a 100644
 +    H264_NAL_SLICE_EXT       = 20,
  };
  
- #endif /* AVCODEC_H264_H */
+ 
 diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
-index dd0a965af0..855c74896e 100644
+index 1a9840a62c..be8b9db9b0 100644
 --- a/libavcodec/h264_parser.c
 +++ b/libavcodec/h264_parser.c
 @@ -62,6 +62,7 @@ typedef struct H264ParseContext {
@@ -148,7 +136,7 @@ index dd0a965af0..855c74896e 100644
                  continue;
              }
              state = 7;
-@@ -594,7 +599,8 @@ static int h264_parse(AVCodecParserContext *s,
+@@ -601,7 +606,8 @@ static int h264_parse(AVCodecParserContext *s,
          }
      }
  
@@ -158,7 +146,7 @@ index dd0a965af0..855c74896e 100644
  
      if (avctx->framerate.num)
          avctx->time_base = av_inv_q(av_mul_q(avctx->framerate, (AVRational){avctx->ticks_per_frame, 1}));
-@@ -651,7 +657,7 @@ static int h264_split(AVCodecContext *avctx,
+@@ -658,7 +664,7 @@ static int h264_split(AVCodecContext *avctx,
          if ((state & 0xFFFFFF00) != 0x100)
              break;
          nalu_type = state & 0x1F;
@@ -167,7 +155,7 @@ index dd0a965af0..855c74896e 100644
              has_sps = 1;
          } else if (nalu_type == H264_NAL_PPS)
              has_pps = 1;
-@@ -703,3 +709,23 @@ AVCodecParser ff_h264_parser = {
+@@ -710,3 +716,23 @@ AVCodecParser ff_h264_parser = {
      .parser_close   = h264_close,
      .split          = h264_split,
  };
@@ -191,14 +179,26 @@ index dd0a965af0..855c74896e 100644
 +    .parser_close   = h264_close,
 +    .split          = h264_split,
 +};
+diff --git a/libavcodec/parser.c b/libavcodec/parser.c
+index f43b197d5e..f96e005ef3 100644
+--- a/libavcodec/parser.c
++++ b/libavcodec/parser.c
+@@ -54,6 +54,7 @@ extern AVCodecParser ff_gsm_parser;
+ extern AVCodecParser ff_h261_parser;
+ extern AVCodecParser ff_h263_parser;
+ extern AVCodecParser ff_h264_parser;
++extern AVCodecParser ff_h264_mvc_parser;
+ extern AVCodecParser ff_hevc_parser;
+ extern AVCodecParser ff_mjpeg_parser;
+ extern AVCodecParser ff_mlp_parser;
 -- 
 2.14.1
 
 
-From 6edab559331e83ad11e7940233dbbaae121e528c Mon Sep 17 00:00:00 2001
+From cdd668dc436b9c78dcb31df477e329492356e7ec Mon Sep 17 00:00:00 2001
 From: Hendrik Leppkes <h.leppkes@gmail.com>
 Date: Tue, 28 Nov 2017 16:12:12 +0000
-Subject: [PATCH 3/4] h264_parser: force grabing a new timestamp until a frame
+Subject: [PATCH 3/3] h264_parser: force grabing a new timestamp until a frame
  start was found
 
 ---
@@ -206,10 +206,10 @@ Subject: [PATCH 3/4] h264_parser: force grabing a new timestamp until a frame
  1 file changed, 3 insertions(+)
 
 diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
-index 855c74896e..90a99a19a8 100644
+index be8b9db9b0..81c9a1bbae 100644
 --- a/libavcodec/h264_parser.c
 +++ b/libavcodec/h264_parser.c
-@@ -587,6 +587,9 @@ static int h264_parse(AVCodecParserContext *s,
+@@ -594,6 +594,9 @@ static int h264_parse(AVCodecParserContext *s,
      } else {
          next = h264_find_frame_end(p, buf, buf_size, avctx);
  
@@ -222,62 +222,3 @@ index 855c74896e..90a99a19a8 100644
 -- 
 2.14.1
 
-
-From 2263d8d3a16ccf886c3692597331779a726373b5 Mon Sep 17 00:00:00 2001
-From: popcornmix <popcornmix@gmail.com>
-Date: Sun, 21 Jan 2018 20:31:31 +0000
-Subject: [PATCH 4/4] fixup
-
----
- libavcodec/extract_extradata_bsf.c | 8 +++++---
- 1 file changed, 5 insertions(+), 3 deletions(-)
-
-diff --git a/libavcodec/extract_extradata_bsf.c b/libavcodec/extract_extradata_bsf.c
-index ed6509c681..188e62a42d 100644
---- a/libavcodec/extract_extradata_bsf.c
-+++ b/libavcodec/extract_extradata_bsf.c
-@@ -56,7 +56,7 @@ static int extract_extradata_h2645(AVBSFContext *ctx, AVPacket *pkt,
-         HEVC_NAL_VPS, HEVC_NAL_SPS, HEVC_NAL_PPS,
-     };
-     static const int extradata_nal_types_h264[] = {
--        H264_NAL_SPS, H264_NAL_PPS,
-+        H264_NAL_SPS, H264_NAL_SPS_SUBSET, H264_NAL_PPS,
-     };
- 
-     ExtractExtradataContext *s = ctx->priv_data;
-@@ -88,14 +88,14 @@ static int extract_extradata_h2645(AVBSFContext *ctx, AVPacket *pkt,
-                 if (nal->type == HEVC_NAL_SPS) has_sps = 1;
-                 if (nal->type == HEVC_NAL_VPS) has_vps = 1;
-             } else {
--                if (nal->type == H264_NAL_SPS) has_sps = 1;
-+                if (nal->type == H264_NAL_SPS || nal->type == H264_NAL_SPS_SUBSET) has_sps = 1;
-             }
-         }
-     }
- 
-     if (extradata_size &&
-         ((ctx->par_in->codec_id == AV_CODEC_ID_HEVC && has_sps && has_vps) ||
--         (ctx->par_in->codec_id == AV_CODEC_ID_H264 && has_sps))) {
-+         ((ctx->par_in->codec_id == AV_CODEC_ID_H264 || ctx->par_in->codec_id == AV_CODEC_ID_H264_MVC) && has_sps))) {
-         AVBufferRef *filtered_buf;
-         uint8_t *extradata, *filtered_data;
- 
-@@ -247,6 +247,7 @@ static const struct {
- } extract_tab[] = {
-     { AV_CODEC_ID_CAVS,       extract_extradata_mpeg4   },
-     { AV_CODEC_ID_H264,       extract_extradata_h2645   },
-+    { AV_CODEC_ID_H264_MVC,   extract_extradata_h2645   },
-     { AV_CODEC_ID_HEVC,       extract_extradata_h2645   },
-     { AV_CODEC_ID_MPEG1VIDEO, extract_extradata_mpeg12  },
-     { AV_CODEC_ID_MPEG2VIDEO, extract_extradata_mpeg12  },
-@@ -306,6 +307,7 @@ fail:
- static const enum AVCodecID codec_ids[] = {
-     AV_CODEC_ID_CAVS,
-     AV_CODEC_ID_H264,
-+    AV_CODEC_ID_H264_MVC,
-     AV_CODEC_ID_HEVC,
-     AV_CODEC_ID_MPEG1VIDEO,
-     AV_CODEC_ID_MPEG2VIDEO,
--- 
-2.14.1
-