diff --git a/packages/multimedia/ffmpeg/patches/ffmpeg-99.1003-pfcd_hevc_optimisations.patch b/packages/multimedia/ffmpeg/patches/ffmpeg-99.1003-pfcd_hevc_optimisations.patch
index 91ea9da3dd..32c0f1f17b 100644
--- a/packages/multimedia/ffmpeg/patches/ffmpeg-99.1003-pfcd_hevc_optimisations.patch
+++ b/packages/multimedia/ffmpeg/patches/ffmpeg-99.1003-pfcd_hevc_optimisations.patch
@@ -16771,10 +16771,10 @@ index 0000000000..4bfa000da4
 +
 diff --git a/libavcodec/rpi_hevc_mvs.c b/libavcodec/rpi_hevc_mvs.c
 new file mode 100644
-index 0000000000..93f3530ff5
+index 0000000000..93a6294c76
 --- /dev/null
 +++ b/libavcodec/rpi_hevc_mvs.c
-@@ -0,0 +1,761 @@
+@@ -0,0 +1,759 @@
 +/*
 + * HEVC video decoder
 + *
@@ -17017,8 +17017,7 @@ index 0000000000..93f3530ff5
 +        x < s->ps.sps->width) {
 +        x                 &= ~15;
 +        y                 &= ~15;
-+        if (s->threads_type == FF_THREAD_FRAME)
-+            ff_hevc_rpi_progress_wait_mv(s, lc->jb0, ref, y);
++        ff_hevc_rpi_progress_wait_mv(s, lc->jb0, ref, y);
 +        x_pu               = x >> s->ps.sps->log2_min_pu_size;
 +        y_pu               = y >> s->ps.sps->log2_min_pu_size;
 +        temp_col           = TAB_MVF(x_pu, y_pu);
@@ -17031,8 +17030,7 @@ index 0000000000..93f3530ff5
 +        y                  = y0 + (nPbH >> 1);
 +        x                 &= ~15;
 +        y                 &= ~15;
-+        if (s->threads_type == FF_THREAD_FRAME)
-+            ff_hevc_rpi_progress_wait_mv(s, lc->jb0, ref, y);
++        ff_hevc_rpi_progress_wait_mv(s, lc->jb0, ref, y);
 +        x_pu               = x >> s->ps.sps->log2_min_pu_size;
 +        y_pu               = y >> s->ps.sps->log2_min_pu_size;
 +        temp_col           = TAB_MVF(x_pu, y_pu);
@@ -19691,10 +19689,10 @@ index 0000000000..744e7cf248
 +}
 diff --git a/libavcodec/rpi_hevc_ps.h b/libavcodec/rpi_hevc_ps.h
 new file mode 100644
-index 0000000000..1e7120a43d
+index 0000000000..00c1f14614
 --- /dev/null
 +++ b/libavcodec/rpi_hevc_ps.h
-@@ -0,0 +1,441 @@
+@@ -0,0 +1,444 @@
 +/*
 + * HEVC parameter set parsing
 + *
@@ -19803,6 +19801,9 @@ index 0000000000..1e7120a43d
 +    int num_entry_point_offsets;
 +    int offsets_allocated;
 +
++    uint8_t offload_wpp;
++    uint8_t offload_tiles;
++
 +    int8_t slice_qp;
 +
 +    uint8_t luma_log2_weight_denom;
@@ -25929,10 +25930,10 @@ index 0000000000..1128a2c054
 +};
 diff --git a/libavcodec/rpi_hevcdec.c b/libavcodec/rpi_hevcdec.c
 new file mode 100644
-index 0000000000..08686ff260
+index 0000000000..bddf0c3417
 --- /dev/null
 +++ b/libavcodec/rpi_hevcdec.c
-@@ -0,0 +1,5787 @@
+@@ -0,0 +1,5782 @@
 +/*
 + * HEVC video Decoder
 + *
@@ -26911,7 +26912,10 @@ index 0000000000..08686ff260
 +        goto fail;
 +
 +    s->tab_ipm  = av_mallocz(min_pu_size);
-+    s->is_pcm   = av_malloc_array(sps->pcm_width, sps->pcm_height);
++    // We can overread by 1 line & one byte in deblock so alloc & zero
++    // We don't need to zero the extra @ start of frame as it will never be
++    // written
++    s->is_pcm   = av_mallocz(sps->pcm_width * (sps->pcm_height + 1) + 1);
 +    if (!s->tab_ipm || !s->is_pcm)
 +        goto fail;
 +
@@ -27645,6 +27649,9 @@ index 0000000000..08686ff260
 +    }
 +
 +    sh->num_entry_point_offsets = 0;
++    sh->offload_wpp = 0;
++    sh->offload_wpp = 0;
++
 +    if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
 +        unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
 +        // It would be possible to bound this tighter but this here is simpler
@@ -27681,6 +27688,18 @@ index 0000000000..08686ff260
 +                }
 +                sh->entry_point_offset[i] = val_minus1 + 1; // +1 to get the size
 +            }
++
++            // Do we want to offload this
++            if (s->threads_type != 0)
++            {
++                sh->offload_wpp = (!s->ps.pps->tile_wpp_inter_disable || sh->slice_type == HEVC_SLICE_I) &&
++                    s->ps.pps->num_tile_columns > 1;
++                // * We only cope with WPP in a single column
++                //   Probably want to deal with that case as tiles rather than WPP anyway
++                // ?? Not actually sure that the main code deals with WPP + multi-col correctly
++                sh->offload_wpp = s->ps.pps->entropy_coding_sync_enabled_flag &&
++                    s->ps.pps->num_tile_columns == 1;
++            }
 +        }
 +    }
 +
@@ -28231,7 +28250,7 @@ index 0000000000..08686ff260
 +static void hevc_await_progress(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, const HEVCFrame * const ref,
 +                                const Mv * const mv, const int y0, const int height)
 +{
-+    if (s->threads_type == FF_THREAD_FRAME) {
++    if (s->threads_type != 0) {
 +        const int y = FFMAX(0, (mv->y >> 2) + y0 + height + 9);
 +
 +        // Progress has to be attached to current job as the actual wait
@@ -29408,7 +29427,7 @@ index 0000000000..08686ff260
 +        (s->ps.pps->ctb_ts_flags[jb->ctu_ts_last] & CTB_TS_FLAGS_EOT) != 0);
 +
 +    // Signal
-+    if (s->threads_type == FF_THREAD_FRAME && y > 0) {
++    if (y > 0) {
 +        // Cast away const as progress is held in s, but this really shouldn't confuse anything
 +        ff_hevc_rpi_progress_signal_recon((HEVCRpiContext *)s, y - 1);
 +    }
@@ -30179,7 +30198,7 @@ index 0000000000..08686ff260
 +            ff_hevc_rpi_save_states(s, lc);
 +
 +        // Report progress so we can use our MVs in other frames
-+        if (s->threads_type == FF_THREAD_FRAME && (ctb_flags & CTB_TS_FLAGS_EOL) != 0)
++        if ((ctb_flags & CTB_TS_FLAGS_EOL) != 0)
 +            ff_hevc_rpi_progress_signal_mv(s, y_ctb + ctb_size - 1);
 +
 +        // End of line || End of tile line || End of tile
@@ -30593,9 +30612,7 @@ index 0000000000..08686ff260
 +
 +#if RPI_EXTRA_BIT_THREADS > 0
 +
-+    if (s->sh.num_entry_point_offsets != 0 &&
-+        (!s->ps.pps->tile_wpp_inter_disable || s->sh.slice_type == HEVC_SLICE_I) &&
-+        s->ps.pps->num_tile_columns > 1)
++    if (s->sh.offload_tiles)
 +    {
 +        unsigned int slice_row = 0;
 +
@@ -30640,14 +30657,7 @@ index 0000000000..08686ff260
 +        printf("%s: Done wait: ts=%d\n", __func__, lc->ts);
 +#endif
 +    }
-+    else
-+
-+    // * We only cope with WPP in a single column
-+    //   Probably want to deal with that case as tiles rather than WPP anyway
-+    // ?? Not actually sure that the main code deals with WPP + multi-col correctly
-+    if (s->ps.pps->entropy_coding_sync_enabled_flag &&
-+        s->ps.pps->num_tile_columns == 1 &&
-+        s->sh.num_entry_point_offsets != 0)
++    else if (s->sh.offload_wpp)
 +    {
 +#if TRACE_WPP
 +        printf("%s: Do WPP\n", __func__);
@@ -31002,8 +31012,7 @@ index 0000000000..08686ff260
 +                        s->nal_unit_type == HEVC_NAL_STSA_N  ||
 +                        s->nal_unit_type == HEVC_NAL_RADL_N  ||
 +                        s->nal_unit_type == HEVC_NAL_RASL_N);
-+        s->offload_recon = s->used_for_ref;
-+//        s->offload_recon = 0;
++        s->offload_recon = s->threads_type != 0 && s->used_for_ref;
 +
 +#if DEBUG_DECODE_N
 +        {
@@ -31145,7 +31154,7 @@ index 0000000000..08686ff260
 +
 +fail:  // Also success path
 +    if (s->ref != NULL) {
-+        if (s->used_for_ref && s->threads_type == FF_THREAD_FRAME) {
++        if (s->used_for_ref && s->threads_type != 0) {
 +            ff_hevc_rpi_progress_signal_all_done(s);
 +        }
 +        else {
@@ -31394,12 +31403,6 @@ index 0000000000..08686ff260
 +    s->ps.pps = NULL;
 +    s->ps.vps = NULL;
 +
-+    for (i = 1; i < s->threads_number; i++) {
-+        if (s->sList[i] != NULL) {
-+            av_freep(&s->sList[i]);
-+        }
-+    }
-+
 +    // Free separately from sLists as used that way by RPI WPP
 +    for (i = 0; i < MAX_NB_THREADS && s->HEVClcList[i] != NULL; ++i) {
 +        av_freep(s->HEVClcList + i);
@@ -31428,7 +31431,6 @@ index 0000000000..08686ff260
 +    if (!s->HEVClc)
 +        goto fail;
 +    s->HEVClcList[0] = s->HEVClc;
-+    s->sList[0] = s;
 +
 +    // Whilst FFmpegs init fn is only called once the close fn is called as
 +    // many times as we have threads (init_thread_copy is called for the
@@ -31553,7 +31555,6 @@ index 0000000000..08686ff260
 +    s->is_nalff        = s0->is_nalff;
 +    s->nal_length_size = s0->nal_length_size;
 +
-+    s->threads_number      = s0->threads_number;
 +    s->threads_type        = s0->threads_type;
 +
 +    if (s0->eos) {
@@ -31611,11 +31612,6 @@ index 0000000000..08686ff260
 +
 +    atomic_init(&s->wpp_err, 0);
 +
-+    if(avctx->active_thread_type & FF_THREAD_SLICE)
-+        s->threads_number = avctx->thread_count;
-+    else
-+        s->threads_number = 1;
-+
 +    if (avctx->extradata_size > 0 && avctx->extradata) {
 +        ret = hevc_rpi_decode_extradata(s, avctx->extradata, avctx->extradata_size, 1);
 +
@@ -31632,7 +31628,7 @@ index 0000000000..08686ff260
 +    if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
 +        s->threads_type = FF_THREAD_FRAME;
 +    else
-+        s->threads_type = FF_THREAD_SLICE;
++        s->threads_type = 0;
 +
 +    return 0;
 +}
@@ -31722,10 +31718,10 @@ index 0000000000..08686ff260
 +
 diff --git a/libavcodec/rpi_hevcdec.h b/libavcodec/rpi_hevcdec.h
 new file mode 100644
-index 0000000000..df2bac1df4
+index 0000000000..d242727b2a
 --- /dev/null
 +++ b/libavcodec/rpi_hevcdec.h
-@@ -0,0 +1,1002 @@
+@@ -0,0 +1,1000 @@
 +/*
 + * HEVC video decoder
 + *
@@ -32430,13 +32426,10 @@ index 0000000000..df2bac1df4
 +    const AVClass *c;  // needed by private avoptions
 +    AVCodecContext *avctx;
 +
-+    struct HEVCRpiContext  *sList[MAX_NB_THREADS];
-+
 +    HEVCRpiLocalContext    *HEVClcList[MAX_NB_THREADS];
 +    HEVCRpiLocalContext    *HEVClc;
 +
 +    uint8_t             threads_type;
-+    uint8_t             threads_number;
 +
 +    /** 1 if the independent slice segment header was successfully parsed */
 +    uint8_t slice_initialized;
@@ -32641,12 +32634,13 @@ index 0000000000..df2bac1df4
 +static inline void ff_hevc_rpi_progress_wait_mv(const HEVCRpiContext * const s, HEVCRpiJob * const jb,
 +                                     const HEVCFrame * const ref, const int y)
 +{
-+    ff_hevc_rpi_progress_wait_field(s, jb, ref, y, 1);
++    if (s->threads_type != 0)
++        ff_hevc_rpi_progress_wait_field(s, jb, ref, y, 1);
 +}
 +
 +static inline void ff_hevc_rpi_progress_signal_mv(HEVCRpiContext * const s, const int y)
 +{
-+    if (s->used_for_ref)
++    if (s->used_for_ref && s->threads_type != 0)
 +        ff_hevc_rpi_progress_signal_field(s, y, 1);
 +}
 +
@@ -32658,7 +32652,7 @@ index 0000000000..df2bac1df4
 +
 +static inline void ff_hevc_rpi_progress_signal_recon(HEVCRpiContext * const s, const int y)
 +{
-+    if (s->used_for_ref)
++    if (s->used_for_ref && s->threads_type != 0)
 +    {
 +        ff_hevc_rpi_progress_signal_field(s, y, 0);
 +    }
@@ -33344,7 +33338,7 @@ index 0000000000..8c9bf725bf
 +#endif /* AVCODEC_RPI_HEVCDSP_H */
 diff --git a/libavcodec/rpi_hevcdsp_template.c b/libavcodec/rpi_hevcdsp_template.c
 new file mode 100644
-index 0000000000..cfe9264fc3
+index 0000000000..d1196a4440
 --- /dev/null
 +++ b/libavcodec/rpi_hevcdsp_template.c
 @@ -0,0 +1,2278 @@
@@ -33929,7 +33923,7 @@ index 0000000000..cfe9264fc3
 +    pixel *src = (pixel *)_src;
 +    int a_stride, b_stride;
 +    int x, y;
-+    ptrdiff_t stride_src = (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) / sizeof(pixel);
++    const ptrdiff_t stride_src = RPI_HEVC_SAO_BUF_STRIDE / sizeof(pixel);
 +    stride_dst /= sizeof(pixel);
 +
 +    a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src;
@@ -34157,7 +34151,7 @@ index 0000000000..cfe9264fc3
 +    pixel *src = (pixel *)_src;
 +    int a_stride, b_stride;
 +    int x, y;
-+    ptrdiff_t stride_src = (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) / sizeof(pixel);
++    const ptrdiff_t stride_src = RPI_HEVC_SAO_BUF_STRIDE / sizeof(pixel);
 +
 +    stride_dst /= sizeof(pixel);
 +    width *= 2;