diff --git a/projects/RPi/patches/ffmpeg/0001-h264-Move-search-code-search-functions-into-separate.patch b/projects/RPi/patches/ffmpeg/0001-h264-Move-search-code-search-functions-into-separate.patch
new file mode 100644
index 0000000000..62e473d94e
--- /dev/null
+++ b/projects/RPi/patches/ffmpeg/0001-h264-Move-search-code-search-functions-into-separate.patch
@@ -0,0 +1,752 @@
+From 8cdb3bf2837a3fb4fff3c6586316f81ae5f7b6cd Mon Sep 17 00:00:00 2001
+From: Ben Avison <bavison@riscosopen.org>
+Date: Wed, 16 Apr 2014 01:51:31 +0100
+Subject: [PATCH 1/3] h264: Move search code search functions into separate
+ source files.
+
+This permits re-use with parsers for codecs which use similar start codes.
+
+Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
+---
+ libavcodec/Makefile               |   2 +-
+ libavcodec/arm/Makefile           |   2 +-
+ libavcodec/arm/h264dsp_armv6.S    | 253 --------------------------------------
+ libavcodec/arm/h264dsp_init_arm.c |   4 +-
+ libavcodec/arm/startcode_armv6.S  | 253 ++++++++++++++++++++++++++++++++++++++
+ libavcodec/h264dsp.c              |  31 +----
+ libavcodec/startcode.c            |  57 +++++++++
+ libavcodec/startcode.h            |  35 ++++++
+ 8 files changed, 351 insertions(+), 286 deletions(-)
+ delete mode 100644 libavcodec/arm/h264dsp_armv6.S
+ create mode 100644 libavcodec/arm/startcode_armv6.S
+ create mode 100644 libavcodec/startcode.c
+ create mode 100644 libavcodec/startcode.h
+
+diff --git a/libavcodec/Makefile b/libavcodec/Makefile
+index b56ecd1..19caf11 100644
+--- a/libavcodec/Makefile
++++ b/libavcodec/Makefile
+@@ -49,7 +49,7 @@ OBJS-$(CONFIG_FFT)                     += avfft.o fft_fixed.o fft_float.o \
+ OBJS-$(CONFIG_GOLOMB)                  += golomb.o
+ OBJS-$(CONFIG_H263DSP)                 += h263dsp.o
+ OBJS-$(CONFIG_H264CHROMA)              += h264chroma.o
+-OBJS-$(CONFIG_H264DSP)                 += h264dsp.o h264idct.o
++OBJS-$(CONFIG_H264DSP)                 += h264dsp.o h264idct.o startcode.o
+ OBJS-$(CONFIG_H264PRED)                += h264pred.o
+ OBJS-$(CONFIG_H264QPEL)                += h264qpel.o
+ OBJS-$(CONFIG_HPELDSP)                 += hpeldsp.o
+diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
+index a8446b2..b6410b2 100644
+--- a/libavcodec/arm/Makefile
++++ b/libavcodec/arm/Makefile
+@@ -47,7 +47,7 @@ ARMV6-OBJS-$(CONFIG_DSPUTIL)           += arm/dsputil_init_armv6.o      \
+                                           arm/simple_idct_armv6.o       \
+
+ ARMV6-OBJS-$(CONFIG_AC3DSP)            += arm/ac3dsp_armv6.o
+-ARMV6-OBJS-$(CONFIG_H264DSP)           += arm/h264dsp_armv6.o
++ARMV6-OBJS-$(CONFIG_H264DSP)           += arm/startcode_armv6.o
+ ARMV6-OBJS-$(CONFIG_HPELDSP)           += arm/hpeldsp_init_armv6.o      \
+                                           arm/hpeldsp_armv6.o
+ ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP)      += arm/mpegaudiodsp_fixed_armv6.o
+diff --git a/libavcodec/arm/h264dsp_armv6.S b/libavcodec/arm/h264dsp_armv6.S
+deleted file mode 100644
+index 2758262..0000000
+--- a/libavcodec/arm/h264dsp_armv6.S
++++ /dev/null
+@@ -1,253 +0,0 @@
+-/*
+- * Copyright (c) 2013 RISC OS Open Ltd
+- * Author: Ben Avison <bavison@riscosopen.org>
+- *
+- * This file is part of FFmpeg.
+- *
+- * FFmpeg is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU Lesser General Public
+- * License as published by the Free Software Foundation; either
+- * version 2.1 of the License, or (at your option) any later version.
+- *
+- * FFmpeg is distributed in the hope that it will be useful,
+- * but WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+- * Lesser General Public License for more details.
+- *
+- * You should have received a copy of the GNU Lesser General Public
+- * License along with FFmpeg; if not, write to the Free Software
+- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+- */
+-
+-#include "libavutil/arm/asm.S"
+-
+-RESULT  .req    a1
+-BUF     .req    a1
+-SIZE    .req    a2
+-PATTERN .req    a3
+-PTR     .req    a4
+-DAT0    .req    v1
+-DAT1    .req    v2
+-DAT2    .req    v3
+-DAT3    .req    v4
+-TMP0    .req    v5
+-TMP1    .req    v6
+-TMP2    .req    ip
+-TMP3    .req    lr
+-
+-#define PRELOAD_DISTANCE 4
+-
+-.macro innerloop4
+-        ldr     DAT0, [PTR], #4
+-        subs    SIZE, SIZE, #4 @ C flag survives rest of macro
+-        sub     TMP0, DAT0, PATTERN, lsr #14
+-        bic     TMP0, TMP0, DAT0
+-        ands    TMP0, TMP0, PATTERN
+-.endm
+-
+-.macro innerloop16  decrement, do_preload
+-        ldmia   PTR!, {DAT0,DAT1,DAT2,DAT3}
+- .ifnc "\do_preload",""
+-        pld     [PTR, #PRELOAD_DISTANCE*32]
+- .endif
+- .ifnc "\decrement",""
+-        subs    SIZE, SIZE, #\decrement @ C flag survives rest of macro
+- .endif
+-        sub     TMP0, DAT0, PATTERN, lsr #14
+-        sub     TMP1, DAT1, PATTERN, lsr #14
+-        bic     TMP0, TMP0, DAT0
+-        bic     TMP1, TMP1, DAT1
+-        sub     TMP2, DAT2, PATTERN, lsr #14
+-        sub     TMP3, DAT3, PATTERN, lsr #14
+-        ands    TMP0, TMP0, PATTERN
+-        bic     TMP2, TMP2, DAT2
+-        it      eq
+-        andseq  TMP1, TMP1, PATTERN
+-        bic     TMP3, TMP3, DAT3
+-        itt     eq
+-        andseq  TMP2, TMP2, PATTERN
+-        andseq  TMP3, TMP3, PATTERN
+-.endm
+-
+-/* int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size) */
+-function ff_h264_find_start_code_candidate_armv6, export=1
+-        push    {v1-v6,lr}
+-        mov     PTR, BUF
+-        @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go
+-        @ before using code that does preloads
+-        cmp     SIZE, #(PRELOAD_DISTANCE+3)*32 - 1
+-        blo     60f
+-
+-        @ Get to word-alignment, 1 byte at a time
+-        tst     PTR, #3
+-        beq     2f
+-1:      ldrb    DAT0, [PTR], #1
+-        sub     SIZE, SIZE, #1
+-        teq     DAT0, #0
+-        beq     90f
+-        tst     PTR, #3
+-        bne     1b
+-2:      @ Get to 4-word alignment, 1 word at a time
+-        ldr     PATTERN, =0x80008000
+-        setend  be
+-        tst     PTR, #12
+-        beq     4f
+-3:      innerloop4
+-        bne     91f
+-        tst     PTR, #12
+-        bne     3b
+-4:      @ Get to cacheline (8-word) alignment
+-        tst     PTR, #16
+-        beq     5f
+-        innerloop16  16
+-        bne     93f
+-5:      @ Check complete cachelines, with preloading
+-        @ We need to stop when there are still (PRELOAD_DISTANCE+1)
+-        @ complete cachelines to go
+-        sub     SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32
+-6:      innerloop16  , do_preload
+-        bne     93f
+-        innerloop16  32
+-        bne     93f
+-        bcs     6b
+-        @ Preload trailing part-cacheline, if any
+-        tst     SIZE, #31
+-        beq     7f
+-        pld     [PTR, #(PRELOAD_DISTANCE+1)*32]
+-        @ Check remaining data without doing any more preloads. First
+-        @ do in chunks of 4 words:
+-7:      adds    SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16
+-        bmi     9f
+-8:      innerloop16  16
+-        bne     93f
+-        bcs     8b
+-        @ Then in words:
+-9:      adds    SIZE, SIZE, #16 - 4
+-        bmi     11f
+-10:     innerloop4
+-        bne     91f
+-        bcs     10b
+-11:     setend  le
+-        @ Check second byte of final halfword
+-        ldrb    DAT0, [PTR, #-1]
+-        teq     DAT0, #0
+-        beq     90f
+-        @ Check any remaining bytes
+-        tst     SIZE, #3
+-        beq     13f
+-12:     ldrb    DAT0, [PTR], #1
+-        sub     SIZE, SIZE, #1
+-        teq     DAT0, #0
+-        beq     90f
+-        tst     SIZE, #3
+-        bne     12b
+-        @ No candidate found
+-13:     sub     RESULT, PTR, BUF
+-        b       99f
+-
+-60:     @ Small buffer - simply check by looping over bytes
+-        subs    SIZE, SIZE, #1
+-        bcc     99f
+-61:     ldrb    DAT0, [PTR], #1
+-        subs    SIZE, SIZE, #1
+-        teq     DAT0, #0
+-        beq     90f
+-        bcs     61b
+-        @ No candidate found
+-        sub     RESULT, PTR, BUF
+-        b       99f
+-
+-90:     @ Found a candidate at the preceding byte
+-        sub     RESULT, PTR, BUF
+-        sub     RESULT, RESULT, #1
+-        b       99f
+-
+-91:     @ Found a candidate somewhere in the preceding 4 bytes
+-        sub     RESULT, PTR, BUF
+-        sub     RESULT, RESULT, #4
+-        sub     TMP0, DAT0, #0x20000
+-        bics    TMP0, TMP0, DAT0
+-        itt     pl
+-        ldrbpl  DAT0, [PTR, #-3]
+-        addpl   RESULT, RESULT, #2
+-        bpl     92f
+-        teq     RESULT, #0
+-        beq     98f @ don't look back a byte if found at first byte in buffer
+-        ldrb    DAT0, [PTR, #-5]
+-92:     teq     DAT0, #0
+-        it      eq
+-        subeq   RESULT, RESULT, #1
+-        b       98f
+-
+-93:     @ Found a candidate somewhere in the preceding 16 bytes
+-        sub     RESULT, PTR, BUF
+-        sub     RESULT, RESULT, #16
+-        teq     TMP0, #0
+-        beq     95f @ not in first 4 bytes
+-        sub     TMP0, DAT0, #0x20000
+-        bics    TMP0, TMP0, DAT0
+-        itt     pl
+-        ldrbpl  DAT0, [PTR, #-15]
+-        addpl   RESULT, RESULT, #2
+-        bpl     94f
+-        teq     RESULT, #0
+-        beq     98f @ don't look back a byte if found at first byte in buffer
+-        ldrb    DAT0, [PTR, #-17]
+-94:     teq     DAT0, #0
+-        it      eq
+-        subeq   RESULT, RESULT, #1
+-        b       98f
+-95:     add     RESULT, RESULT, #4
+-        teq     TMP1, #0
+-        beq     96f @ not in next 4 bytes
+-        sub     TMP1, DAT1, #0x20000
+-        bics    TMP1, TMP1, DAT1
+-        itee    mi
+-        ldrbmi  DAT0, [PTR, #-13]
+-        ldrbpl  DAT0, [PTR, #-11]
+-        addpl   RESULT, RESULT, #2
+-        teq     DAT0, #0
+-        it      eq
+-        subeq   RESULT, RESULT, #1
+-        b       98f
+-96:     add     RESULT, RESULT, #4
+-        teq     TMP2, #0
+-        beq     97f @ not in next 4 bytes
+-        sub     TMP2, DAT2, #0x20000
+-        bics    TMP2, TMP2, DAT2
+-        itee    mi
+-        ldrbmi  DAT0, [PTR, #-9]
+-        ldrbpl  DAT0, [PTR, #-7]
+-        addpl   RESULT, RESULT, #2
+-        teq     DAT0, #0
+-        it      eq
+-        subeq   RESULT, RESULT, #1
+-        b       98f
+-97:     add     RESULT, RESULT, #4
+-        sub     TMP3, DAT3, #0x20000
+-        bics    TMP3, TMP3, DAT3
+-        itee    mi
+-        ldrbmi  DAT0, [PTR, #-5]
+-        ldrbpl  DAT0, [PTR, #-3]
+-        addpl   RESULT, RESULT, #2
+-        teq     DAT0, #0
+-        it      eq
+-        subeq   RESULT, RESULT, #1
+-        @ drop through to 98f
+-98:     setend  le
+-99:     pop     {v1-v6,pc}
+-endfunc
+-
+-        .unreq  RESULT
+-        .unreq  BUF
+-        .unreq  SIZE
+-        .unreq  PATTERN
+-        .unreq  PTR
+-        .unreq  DAT0
+-        .unreq  DAT1
+-        .unreq  DAT2
+-        .unreq  DAT3
+-        .unreq  TMP0
+-        .unreq  TMP1
+-        .unreq  TMP2
+-        .unreq  TMP3
+diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c
+index a0418fd..eb6c514 100644
+--- a/libavcodec/arm/h264dsp_init_arm.c
++++ b/libavcodec/arm/h264dsp_init_arm.c
+@@ -24,7 +24,7 @@
+ #include "libavutil/arm/cpu.h"
+ #include "libavcodec/h264dsp.h"
+
+-int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size);
++int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size);
+
+ void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
+                                      int beta, int8_t *tc0);
+@@ -109,7 +109,7 @@ av_cold void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth,
+     int cpu_flags = av_get_cpu_flags();
+
+     if (have_armv6(cpu_flags))
+-        c->h264_find_start_code_candidate = ff_h264_find_start_code_candidate_armv6;
++        c->h264_find_start_code_candidate = ff_startcode_find_candidate_armv6;
+     if (have_neon(cpu_flags))
+         h264dsp_init_neon(c, bit_depth, chroma_format_idc);
+ }
+diff --git a/libavcodec/arm/startcode_armv6.S b/libavcodec/arm/startcode_armv6.S
+new file mode 100644
+index 0000000..a46f009
+--- /dev/null
++++ b/libavcodec/arm/startcode_armv6.S
+@@ -0,0 +1,253 @@
++/*
++ * Copyright (c) 2013 RISC OS Open Ltd
++ * Author: Ben Avison <bavison@riscosopen.org>
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "libavutil/arm/asm.S"
++
++RESULT  .req    a1
++BUF     .req    a1
++SIZE    .req    a2
++PATTERN .req    a3
++PTR     .req    a4
++DAT0    .req    v1
++DAT1    .req    v2
++DAT2    .req    v3
++DAT3    .req    v4
++TMP0    .req    v5
++TMP1    .req    v6
++TMP2    .req    ip
++TMP3    .req    lr
++
++#define PRELOAD_DISTANCE 4
++
++.macro innerloop4
++        ldr     DAT0, [PTR], #4
++        subs    SIZE, SIZE, #4 @ C flag survives rest of macro
++        sub     TMP0, DAT0, PATTERN, lsr #14
++        bic     TMP0, TMP0, DAT0
++        ands    TMP0, TMP0, PATTERN
++.endm
++
++.macro innerloop16  decrement, do_preload
++        ldmia   PTR!, {DAT0,DAT1,DAT2,DAT3}
++ .ifnc "\do_preload",""
++        pld     [PTR, #PRELOAD_DISTANCE*32]
++ .endif
++ .ifnc "\decrement",""
++        subs    SIZE, SIZE, #\decrement @ C flag survives rest of macro
++ .endif
++        sub     TMP0, DAT0, PATTERN, lsr #14
++        sub     TMP1, DAT1, PATTERN, lsr #14
++        bic     TMP0, TMP0, DAT0
++        bic     TMP1, TMP1, DAT1
++        sub     TMP2, DAT2, PATTERN, lsr #14
++        sub     TMP3, DAT3, PATTERN, lsr #14
++        ands    TMP0, TMP0, PATTERN
++        bic     TMP2, TMP2, DAT2
++        it      eq
++        andseq  TMP1, TMP1, PATTERN
++        bic     TMP3, TMP3, DAT3
++        itt     eq
++        andseq  TMP2, TMP2, PATTERN
++        andseq  TMP3, TMP3, PATTERN
++.endm
++
++/* int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size) */
++function ff_startcode_find_candidate_armv6, export=1
++        push    {v1-v6,lr}
++        mov     PTR, BUF
++        @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go
++        @ before using code that does preloads
++        cmp     SIZE, #(PRELOAD_DISTANCE+3)*32 - 1
++        blo     60f
++
++        @ Get to word-alignment, 1 byte at a time
++        tst     PTR, #3
++        beq     2f
++1:      ldrb    DAT0, [PTR], #1
++        sub     SIZE, SIZE, #1
++        teq     DAT0, #0
++        beq     90f
++        tst     PTR, #3
++        bne     1b
++2:      @ Get to 4-word alignment, 1 word at a time
++        ldr     PATTERN, =0x80008000
++        setend  be
++        tst     PTR, #12
++        beq     4f
++3:      innerloop4
++        bne     91f
++        tst     PTR, #12
++        bne     3b
++4:      @ Get to cacheline (8-word) alignment
++        tst     PTR, #16
++        beq     5f
++        innerloop16  16
++        bne     93f
++5:      @ Check complete cachelines, with preloading
++        @ We need to stop when there are still (PRELOAD_DISTANCE+1)
++        @ complete cachelines to go
++        sub     SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32
++6:      innerloop16  , do_preload
++        bne     93f
++        innerloop16  32
++        bne     93f
++        bcs     6b
++        @ Preload trailing part-cacheline, if any
++        tst     SIZE, #31
++        beq     7f
++        pld     [PTR, #(PRELOAD_DISTANCE+1)*32]
++        @ Check remaining data without doing any more preloads. First
++        @ do in chunks of 4 words:
++7:      adds    SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16
++        bmi     9f
++8:      innerloop16  16
++        bne     93f
++        bcs     8b
++        @ Then in words:
++9:      adds    SIZE, SIZE, #16 - 4
++        bmi     11f
++10:     innerloop4
++        bne     91f
++        bcs     10b
++11:     setend  le
++        @ Check second byte of final halfword
++        ldrb    DAT0, [PTR, #-1]
++        teq     DAT0, #0
++        beq     90f
++        @ Check any remaining bytes
++        tst     SIZE, #3
++        beq     13f
++12:     ldrb    DAT0, [PTR], #1
++        sub     SIZE, SIZE, #1
++        teq     DAT0, #0
++        beq     90f
++        tst     SIZE, #3
++        bne     12b
++        @ No candidate found
++13:     sub     RESULT, PTR, BUF
++        b       99f
++
++60:     @ Small buffer - simply check by looping over bytes
++        subs    SIZE, SIZE, #1
++        bcc     99f
++61:     ldrb    DAT0, [PTR], #1
++        subs    SIZE, SIZE, #1
++        teq     DAT0, #0
++        beq     90f
++        bcs     61b
++        @ No candidate found
++        sub     RESULT, PTR, BUF
++        b       99f
++
++90:     @ Found a candidate at the preceding byte
++        sub     RESULT, PTR, BUF
++        sub     RESULT, RESULT, #1
++        b       99f
++
++91:     @ Found a candidate somewhere in the preceding 4 bytes
++        sub     RESULT, PTR, BUF
++        sub     RESULT, RESULT, #4
++        sub     TMP0, DAT0, #0x20000
++        bics    TMP0, TMP0, DAT0
++        itt     pl
++        ldrbpl  DAT0, [PTR, #-3]
++        addpl   RESULT, RESULT, #2
++        bpl     92f
++        teq     RESULT, #0
++        beq     98f @ don't look back a byte if found at first byte in buffer
++        ldrb    DAT0, [PTR, #-5]
++92:     teq     DAT0, #0
++        it      eq
++        subeq   RESULT, RESULT, #1
++        b       98f
++
++93:     @ Found a candidate somewhere in the preceding 16 bytes
++        sub     RESULT, PTR, BUF
++        sub     RESULT, RESULT, #16
++        teq     TMP0, #0
++        beq     95f @ not in first 4 bytes
++        sub     TMP0, DAT0, #0x20000
++        bics    TMP0, TMP0, DAT0
++        itt     pl
++        ldrbpl  DAT0, [PTR, #-15]
++        addpl   RESULT, RESULT, #2
++        bpl     94f
++        teq     RESULT, #0
++        beq     98f @ don't look back a byte if found at first byte in buffer
++        ldrb    DAT0, [PTR, #-17]
++94:     teq     DAT0, #0
++        it      eq
++        subeq   RESULT, RESULT, #1
++        b       98f
++95:     add     RESULT, RESULT, #4
++        teq     TMP1, #0
++        beq     96f @ not in next 4 bytes
++        sub     TMP1, DAT1, #0x20000
++        bics    TMP1, TMP1, DAT1
++        itee    mi
++        ldrbmi  DAT0, [PTR, #-13]
++        ldrbpl  DAT0, [PTR, #-11]
++        addpl   RESULT, RESULT, #2
++        teq     DAT0, #0
++        it      eq
++        subeq   RESULT, RESULT, #1
++        b       98f
++96:     add     RESULT, RESULT, #4
++        teq     TMP2, #0
++        beq     97f @ not in next 4 bytes
++        sub     TMP2, DAT2, #0x20000
++        bics    TMP2, TMP2, DAT2
++        itee    mi
++        ldrbmi  DAT0, [PTR, #-9]
++        ldrbpl  DAT0, [PTR, #-7]
++        addpl   RESULT, RESULT, #2
++        teq     DAT0, #0
++        it      eq
++        subeq   RESULT, RESULT, #1
++        b       98f
++97:     add     RESULT, RESULT, #4
++        sub     TMP3, DAT3, #0x20000
++        bics    TMP3, TMP3, DAT3
++        itee    mi
++        ldrbmi  DAT0, [PTR, #-5]
++        ldrbpl  DAT0, [PTR, #-3]
++        addpl   RESULT, RESULT, #2
++        teq     DAT0, #0
++        it      eq
++        subeq   RESULT, RESULT, #1
++        @ drop through to 98f
++98:     setend  le
++99:     pop     {v1-v6,pc}
++endfunc
++
++        .unreq  RESULT
++        .unreq  BUF
++        .unreq  SIZE
++        .unreq  PATTERN
++        .unreq  PTR
++        .unreq  DAT0
++        .unreq  DAT1
++        .unreq  DAT2
++        .unreq  DAT3
++        .unreq  TMP0
++        .unreq  TMP1
++        .unreq  TMP2
++        .unreq  TMP3
+diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
+index a2a4aba..a4da776 100644
+--- a/libavcodec/h264dsp.c
++++ b/libavcodec/h264dsp.c
+@@ -33,6 +33,7 @@
+ #include "avcodec.h"
+ #include "h264dsp.h"
+ #include "h264idct.h"
++#include "startcode.h"
+ #include "libavutil/common.h"
+
+ #define BIT_DEPTH 8
+@@ -63,34 +64,6 @@
+ #include "h264addpx_template.c"
+ #undef BIT_DEPTH
+
+-static int h264_find_start_code_candidate_c(const uint8_t *buf, int size)
+-{
+-    int i = 0;
+-#if HAVE_FAST_UNALIGNED
+-    /* we check i < size instead of i + 3 / 7 because it is
+-     * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE
+-     * bytes at the end.
+-     */
+-#       if HAVE_FAST_64BIT
+-    while (i < size &&
+-            !((~*(const uint64_t *)(buf + i) &
+-                    (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
+-                    0x8080808080808080ULL))
+-        i += 8;
+-#       else
+-    while (i < size &&
+-            !((~*(const uint32_t *)(buf + i) &
+-                    (*(const uint32_t *)(buf + i) - 0x01010101U)) &
+-                    0x80808080U))
+-        i += 4;
+-#       endif
+-#endif
+-    for (; i < size; i++)
+-        if (!buf[i])
+-            break;
+-    return i;
+-}
+-
+ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
+                              const int chroma_format_idc)
+ {
+@@ -178,7 +151,7 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
+         H264_DSP(8);
+         break;
+     }
+-    c->h264_find_start_code_candidate = h264_find_start_code_candidate_c;
++    c->h264_find_start_code_candidate = ff_startcode_find_candidate_c;
+
+     if (ARCH_AARCH64) ff_h264dsp_init_aarch64(c, bit_depth, chroma_format_idc);
+     if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc);
+diff --git a/libavcodec/startcode.c b/libavcodec/startcode.c
+new file mode 100644
+index 0000000..5df7695
+--- /dev/null
++++ b/libavcodec/startcode.c
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/**
++ * @file
++ * Accelerated start code search function for start codes common to
++ * MPEG-1/2/4 video, VC-1, H.264/5
++ * @author Michael Niedermayer <michaelni@gmx.at>
++ */
++
++#include "startcode.h"
++#include "config.h"
++
++int ff_startcode_find_candidate_c(const uint8_t *buf, int size)
++{
++    int i = 0;
++#if HAVE_FAST_UNALIGNED
++    /* we check i < size instead of i + 3 / 7 because it is
++     * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE
++     * bytes at the end.
++     */
++#       if HAVE_FAST_64BIT
++    while (i < size &&
++            !((~*(const uint64_t *)(buf + i) &
++                    (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
++                    0x8080808080808080ULL))
++        i += 8;
++#       else
++    while (i < size &&
++            !((~*(const uint32_t *)(buf + i) &
++                    (*(const uint32_t *)(buf + i) - 0x01010101U)) &
++                    0x80808080U))
++        i += 4;
++#       endif
++#endif
++    for (; i < size; i++)
++        if (!buf[i])
++            break;
++    return i;
++}
+diff --git a/libavcodec/startcode.h b/libavcodec/startcode.h
+new file mode 100644
+index 0000000..cc55d5f
+--- /dev/null
++++ b/libavcodec/startcode.h
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/**
++ * @file
++ * Accelerated start code search function for start codes common to
++ * MPEG-1/2/4 video, VC-1, H.264/5
++ * @author Michael Niedermayer <michaelni@gmx.at>
++ */
++
++#ifndef AVCODEC_STARTCODE_H
++#define AVCODEC_STARTCODE_H
++
++#include <stdint.h>
++
++int ff_startcode_find_candidate_c(const uint8_t *buf, int size);
++
++#endif /* AVCODEC_STARTCODE_H */
+--
+1.9.1
diff --git a/projects/RPi/patches/ffmpeg/0001-truehd-tune-VLC-decoding-for-ARM.patch b/projects/RPi/patches/ffmpeg/0001-truehd-tune-VLC-decoding-for-ARM.patch
new file mode 100644
index 0000000000..29508437e5
--- /dev/null
+++ b/projects/RPi/patches/ffmpeg/0001-truehd-tune-VLC-decoding-for-ARM.patch
@@ -0,0 +1,65 @@
+From 425d69b993d25489e4830766507d9d8f6c819802 Mon Sep 17 00:00:00 2001
+From: Ben Avison <bavison@riscosopen.org>
+Date: Wed, 19 Mar 2014 17:26:19 +0000
+Subject: [PATCH 1/6] truehd: tune VLC decoding for ARM.
+
+Profiling on a Raspberry Pi revealed the best performance to correspond
+with VLC_BITS = 5. Results for overall audio decode and the get_vlc2 function
+in particular are as follows:
+
+              Before          After
+              Mean   StdDev   Mean   StdDev  Confidence  Change
+6:2 total     348.8  20.1     339.6  15.1    88.8%       +2.7%  (insignificant)
+6:2 function  38.1   8.1      26.4   4.1     100.0%      +44.5%
+8:2 total     339.1  15.4     324.5  15.5    99.4%       +4.5%
+8:2 function  33.8   7.0      27.3   5.6     99.7%       +23.6%
+6:6 total     604.6  20.8     572.8  20.6    100.0%      +5.6%
+6:6 function  95.8   8.4      68.9   8.2     100.0%      +39.1%
+8:8 total     766.4  17.6     741.5  21.2    100.0%      +3.4%
+8:8 function  106.0  11.4     86.1   9.9     100.0%      +23.1%
+
+Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
+---
+ libavcodec/mlpdec.c | 13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
+index 93ed552..cbd9000 100644
+--- a/libavcodec/mlpdec.c
++++ b/libavcodec/mlpdec.c
+@@ -37,9 +37,16 @@
+ #include "mlp_parser.h"
+ #include "mlpdsp.h"
+ #include "mlp.h"
++#include "config.h"
+
+ /** number of bits used for VLC lookup - longest Huffman code is 9 */
++#if ARCH_ARM == 1
++#define VLC_BITS            5
++#define VLC_STATIC_SIZE     64
++#else
+ #define VLC_BITS            9
++#define VLC_STATIC_SIZE     512
++#endif
+
+ typedef struct SubStream {
+     /// Set if a valid restart header has been read. Otherwise the substream cannot be decoded.
+@@ -193,13 +200,13 @@ static av_cold void init_static(void)
+     if (!huff_vlc[0].bits) {
+         INIT_VLC_STATIC(&huff_vlc[0], VLC_BITS, 18,
+                     &ff_mlp_huffman_tables[0][0][1], 2, 1,
+-                    &ff_mlp_huffman_tables[0][0][0], 2, 1, 512);
++                    &ff_mlp_huffman_tables[0][0][0], 2, 1, VLC_STATIC_SIZE);
+         INIT_VLC_STATIC(&huff_vlc[1], VLC_BITS, 16,
+                     &ff_mlp_huffman_tables[1][0][1], 2, 1,
+-                    &ff_mlp_huffman_tables[1][0][0], 2, 1, 512);
++                    &ff_mlp_huffman_tables[1][0][0], 2, 1, VLC_STATIC_SIZE);
+         INIT_VLC_STATIC(&huff_vlc[2], VLC_BITS, 15,
+                     &ff_mlp_huffman_tables[2][0][1], 2, 1,
+-                    &ff_mlp_huffman_tables[2][0][0], 2, 1, 512);
++                    &ff_mlp_huffman_tables[2][0][0], 2, 1, VLC_STATIC_SIZE);
+     }
+
+     ff_mlp_init_crc();
+--
+1.9.1
diff --git a/projects/RPi/patches/ffmpeg/0002-truehd-add-hand-scheduled-ARM-asm-version-of-mlp_fil.patch b/projects/RPi/patches/ffmpeg/0002-truehd-add-hand-scheduled-ARM-asm-version-of-mlp_fil.patch
new file mode 100644
index 0000000000..4aea35f9fd
--- /dev/null
+++ b/projects/RPi/patches/ffmpeg/0002-truehd-add-hand-scheduled-ARM-asm-version-of-mlp_fil.patch
@@ -0,0 +1,557 @@
+From bfe3d8c8e4e046163dc314aa16207413e377283f Mon Sep 17 00:00:00 2001
+From: Ben Avison <bavison@riscosopen.org>
+Date: Mon, 3 Mar 2014 19:44:23 +0000
+Subject: [PATCH 2/6] truehd: add hand-scheduled ARM asm version of
+ mlp_filter_channel.
+
+Profiling results for overall audio decode and the mlp_filter_channel(_arm)
+function in particular are as follows:
+
+              Before          After
+              Mean   StdDev   Mean   StdDev  Confidence  Change
+6:2 total     380.4  22.0     370.8  17.0    87.4%       +2.6%  (insignificant)
+6:2 function  60.7   7.2      36.6   8.1     100.0%      +65.8%
+8:2 total     357.0  17.5     343.2  19.0    97.8%       +4.0%  (insignificant)
+8:2 function  60.3   8.8      37.3   3.8     100.0%      +61.8%
+6:6 total     717.2  23.2     658.4  15.7    100.0%      +8.9%
+6:6 function  140.4  12.9     81.5   9.2     100.0%      +72.4%
+8:8 total     981.9  16.2     896.2  24.5    100.0%      +9.6%
+8:8 function  193.4  15.0     103.3  11.5    100.0%      +87.2%
+
+Experiments with adding preload instructions to this function yielded no
+useful benefit, so these have not been included.
+
+The assembly version has also been tested with a fuzz tester to ensure that
+any combinations of inputs not exercised by my available test streams still
+generate mathematically identical results to the C version.
+---
+ libavcodec/arm/Makefile          |   2 +
+ libavcodec/arm/mlpdsp_arm.S      | 433 +++++++++++++++++++++++++++++++++++++++
+ libavcodec/arm/mlpdsp_init_arm.c |  36 ++++
+ libavcodec/mlpdsp.c              |   2 +
+ libavcodec/mlpdsp.h              |   1 +
+ 5 files changed, 474 insertions(+)
+ create mode 100644 libavcodec/arm/mlpdsp_arm.S
+ create mode 100644 libavcodec/arm/mlpdsp_init_arm.c
+
+diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
+index a8446b2..ba673b1 100644
+--- a/libavcodec/arm/Makefile
++++ b/libavcodec/arm/Makefile
+@@ -22,6 +22,8 @@ OBJS-$(CONFIG_H264PRED)                += arm/h264pred_init_arm.o
+ OBJS-$(CONFIG_H264QPEL)                += arm/h264qpel_init_arm.o
+ OBJS-$(CONFIG_HPELDSP)                 += arm/hpeldsp_init_arm.o        \
+                                           arm/hpeldsp_arm.o
++OBJS-$(CONFIG_MLP_DECODER)             += arm/mlpdsp_init_arm.o         \
++                                          arm/mlpdsp_arm.o
+ OBJS-$(CONFIG_MPEGAUDIODSP)            += arm/mpegaudiodsp_init_arm.o
+ OBJS-$(CONFIG_MPEGVIDEO)               += arm/mpegvideo_arm.o
+ OBJS-$(CONFIG_NEON_CLOBBER_TEST)       += arm/neontest.o
+diff --git a/libavcodec/arm/mlpdsp_arm.S b/libavcodec/arm/mlpdsp_arm.S
+new file mode 100644
+index 0000000..615819d
+--- /dev/null
++++ b/libavcodec/arm/mlpdsp_arm.S
+@@ -0,0 +1,433 @@
++/*
++ * Copyright (c) 2014 RISC OS Open Ltd
++ * Author: Ben Avison <bavison@riscosopen.org>
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "libavutil/arm/asm.S"
++
++#define MAX_CHANNELS        8
++#define MAX_FIR_ORDER       8
++#define MAX_IIR_ORDER       4
++#define MAX_RATEFACTOR      4
++#define MAX_BLOCKSIZE       (40 * MAX_RATEFACTOR)
++
++PST     .req    a1
++PCO     .req    a2
++AC0     .req    a3
++AC1     .req    a4
++CO0     .req    v1
++CO1     .req    v2
++CO2     .req    v3
++CO3     .req    v4
++ST0     .req    v5
++ST1     .req    v6
++ST2     .req    sl
++ST3     .req    fp
++I       .req    ip
++PSAMP   .req    lr
++
++
++// Some macros that do loads/multiplies where the register number is determined
++// from an assembly-time expression. Boy is GNU assembler's syntax ugly...
++
++.macro load  group, index, base, offset
++       .altmacro
++       load_ \group, %(\index), \base, \offset
++       .noaltmacro
++.endm
++
++.macro load_ group, index, base, offset
++        ldr     \group\index, [\base, #\offset]
++.endm
++
++.macro loadd  group, index, base, offset
++       .altmacro
++       loadd_ \group, %(\index), %(\index+1), \base, \offset
++       .noaltmacro
++.endm
++
++.macro loadd_ group, index0, index1, base, offset
++A .if offset >= 256
++A       ldr     \group\index0, [\base, #\offset]
++A       ldr     \group\index1, [\base, #(\offset) + 4]
++A .else
++        ldrd    \group\index0, \group\index1, [\base, #\offset]
++A .endif
++.endm
++
++.macro multiply  index, accumulate, long
++        .altmacro
++        multiply_ %(\index), \accumulate, \long
++        .noaltmacro
++.endm
++
++.macro multiply_  index, accumulate, long
++ .if \long
++  .if \accumulate
++        smlal   AC0, AC1, CO\index, ST\index
++  .else
++        smull   AC0, AC1, CO\index, ST\index
++  .endif
++ .else
++  .if \accumulate
++        mla     AC0, CO\index, ST\index, AC0
++  .else
++        mul     AC0, CO\index, ST\index
++  .endif
++ .endif
++.endm
++
++// A macro to update the load register number and load offsets
++
++.macro inc  howmany
++  .set LOAD_REG, (LOAD_REG + \howmany) & 3
++  .set OFFSET_CO, OFFSET_CO + 4 * \howmany
++  .set OFFSET_ST, OFFSET_ST + 4 * \howmany
++  .if FIR_REMAIN > 0
++    .set FIR_REMAIN, FIR_REMAIN - \howmany
++    .if FIR_REMAIN == 0
++      .set OFFSET_CO, 4 * MAX_FIR_ORDER
++      .set OFFSET_ST, 4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)
++    .endif
++  .elseif IIR_REMAIN > 0
++    .set IIR_REMAIN, IIR_REMAIN - \howmany
++  .endif
++.endm
++
++// Macro to implement the inner loop for one specific combination of parameters
++
++.macro implement_filter  mask_minus1, shift_0, shift_8, iir_taps, fir_taps
++  .set TOTAL_TAPS, \iir_taps + \fir_taps
++
++  // Deal with register allocation...
++  .set DEFINED_SHIFT, 0
++  .set DEFINED_MASK, 0
++  .set SHUFFLE_SHIFT, 0
++  .set SHUFFLE_MASK, 0
++  .set SPILL_SHIFT, 0
++  .set SPILL_MASK, 0
++  .if TOTAL_TAPS == 0
++    // Little register pressure in this case - just keep MASK where it was
++    .if !\mask_minus1
++      MASK .req ST1
++      .set DEFINED_MASK, 1
++    .endif
++  .else
++    .if \shift_0
++      .if !\mask_minus1
++        // AC1 is unused with shift 0
++        MASK .req AC1
++        .set DEFINED_MASK, 1
++        .set SHUFFLE_MASK, 1
++      .endif
++    .elseif \shift_8
++      .if !\mask_minus1
++        .if TOTAL_TAPS <= 4
++        // All coefficients are preloaded (so pointer not needed)
++          MASK .req PCO
++          .set DEFINED_MASK, 1
++          .set SHUFFLE_MASK, 1
++        .else
++          .set SPILL_MASK, 1
++        .endif
++      .endif
++    .else // shift not 0 or 8
++      .if TOTAL_TAPS <= 3
++        // All coefficients are preloaded, and at least one CO register is unused
++        .if \fir_taps & 1
++          SHIFT .req CO0
++          .set DEFINED_SHIFT, 1
++          .set SHUFFLE_SHIFT, 1
++        .else
++          SHIFT .req CO3
++          .set DEFINED_SHIFT, 1
++          .set SHUFFLE_SHIFT, 1
++        .endif
++        .if !\mask_minus1
++          MASK .req PCO
++          .set DEFINED_MASK, 1
++          .set SHUFFLE_MASK, 1
++        .endif
++      .elseif TOTAL_TAPS == 4
++        // All coefficients are preloaded
++        SHIFT .req PCO
++        .set DEFINED_SHIFT, 1
++        .set SHUFFLE_SHIFT, 1
++        .if !\mask_minus1
++          .set SPILL_MASK, 1
++        .endif
++      .else
++        .set SPILL_SHIFT, 1
++        .if !\mask_minus1
++          .set SPILL_MASK, 1
++        .endif
++      .endif
++    .endif
++  .endif
++  .if SPILL_SHIFT
++    SHIFT .req ST0
++    .set DEFINED_SHIFT, 1
++  .endif
++  .if SPILL_MASK
++    MASK .req ST1
++    .set DEFINED_MASK, 1
++  .endif
++
++        // Preload coefficients if possible
++  .if TOTAL_TAPS <= 4
++    .set OFFSET_CO, 0
++    .if \fir_taps & 1
++      .set LOAD_REG, 1
++    .else
++      .set LOAD_REG, 0
++    .endif
++    .rept \fir_taps
++        load    CO, LOAD_REG, PCO, OFFSET_CO
++      .set LOAD_REG, (LOAD_REG + 1) & 3
++      .set OFFSET_CO, OFFSET_CO + 4
++    .endr
++    .set OFFSET_CO, 4 * MAX_FIR_ORDER
++    .rept \iir_taps
++        load    CO, LOAD_REG, PCO, OFFSET_CO
++      .set LOAD_REG, (LOAD_REG + 1) & 3
++      .set OFFSET_CO, OFFSET_CO + 4
++    .endr
++  .endif
++
++        // Move mask/shift to final positions if necessary
++        // Need to do this after preloading, because in some cases we
++        // reuse the coefficient pointer register
++  .if SHUFFLE_SHIFT
++        mov     SHIFT, ST0
++  .endif
++  .if SHUFFLE_MASK
++        mov     MASK, ST1
++  .endif
++
++        // Begin loop
++01:
++  .if TOTAL_TAPS == 0
++        // Things simplify a lot in this case
++        // In fact this could be pipelined further if it's worth it...
++        ldr     ST0, [PSAMP]
++        subs    I, I, #1
++    .if !\mask_minus1
++        and     ST0, ST0, MASK
++    .endif
++        str     ST0, [PST, #-4]!
++        str     ST0, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
++        str     ST0, [PSAMP], #4 * MAX_CHANNELS
++        bne     01b
++  .else
++    .if \fir_taps & 1
++      .set LOAD_REG, 1
++    .else
++      .set LOAD_REG, 0
++    .endif
++    .set LOAD_BANK, 0
++    .set FIR_REMAIN, \fir_taps
++    .set IIR_REMAIN, \iir_taps
++    .if FIR_REMAIN == 0 // only IIR terms
++      .set OFFSET_CO, 4 * MAX_FIR_ORDER
++      .set OFFSET_ST, 4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)
++    .else
++      .set OFFSET_CO, 0
++      .set OFFSET_ST, 0
++    .endif
++    .set MUL_REG, LOAD_REG
++    .set COUNTER, 0
++    .rept TOTAL_TAPS + 2
++        // Do load(s)
++     .if FIR_REMAIN != 0 || IIR_REMAIN != 0
++      .if COUNTER == 0
++       .if TOTAL_TAPS > 4
++        load    CO, LOAD_REG, PCO, OFFSET_CO
++       .endif
++        load    ST, LOAD_REG, PST, OFFSET_ST
++        inc     1
++      .elseif COUNTER == 1 && (\fir_taps & 1) == 0
++       .if TOTAL_TAPS > 4
++        load    CO, LOAD_REG, PCO, OFFSET_CO
++       .endif
++        load    ST, LOAD_REG, PST, OFFSET_ST
++        inc     1
++      .elseif LOAD_BANK == 0
++       .if TOTAL_TAPS > 4
++        .if FIR_REMAIN == 0 && IIR_REMAIN == 1
++        load    CO, LOAD_REG, PCO, OFFSET_CO
++        .else
++        loadd   CO, LOAD_REG, PCO, OFFSET_CO
++        .endif
++       .endif
++       .set LOAD_BANK, 1
++      .else
++       .if FIR_REMAIN == 0 && IIR_REMAIN == 1
++        load    ST, LOAD_REG, PST, OFFSET_ST
++        inc     1
++       .else
++        loadd   ST, LOAD_REG, PST, OFFSET_ST
++        inc     2
++       .endif
++       .set LOAD_BANK, 0
++      .endif
++     .endif
++
++        // Do interleaved multiplies, slightly delayed
++     .if COUNTER >= 2
++        multiply MUL_REG, COUNTER > 2, !\shift_0
++      .set MUL_REG, (MUL_REG + 1) & 3
++     .endif
++     .set COUNTER, COUNTER + 1
++    .endr
++
++        // Post-process the result of the multiplies
++    .if SPILL_SHIFT
++        ldr     SHIFT, [sp, #9*4 + 0*4]
++    .endif
++    .if SPILL_MASK
++        ldr     MASK, [sp, #9*4 + 1*4]
++    .endif
++        ldr     ST2, [PSAMP]
++        subs    I, I, #1
++    .if \shift_8
++        mov     AC0, AC0, lsr #8
++        orr     AC0, AC0, AC1, lsl #24
++    .elseif !\shift_0
++        rsb     ST3, SHIFT, #32
++        mov     AC0, AC0, lsr SHIFT
++A       orr     AC0, AC0, AC1, lsl ST3
++T       mov     AC1, AC1, lsl ST3
++T       orr     AC0, AC0, AC1
++    .endif
++    .if \mask_minus1
++        add     ST3, ST2, AC0
++    .else
++        add     ST2, ST2, AC0
++        and     ST3, ST2, MASK
++        sub     ST2, ST3, AC0
++    .endif
++        str     ST3, [PST, #-4]!
++        str     ST2, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
++        str     ST3, [PSAMP], #4 * MAX_CHANNELS
++        bne     01b
++  .endif
++        b       99f
++
++  .if DEFINED_SHIFT
++    .unreq SHIFT
++  .endif
++  .if DEFINED_MASK
++    .unreq MASK
++  .endif
++.endm
++
++.macro switch_on_fir_taps  mask_minus1, shift_0, shift_8, iir_taps
++A       ldr     pc, [pc, a3, LSL #2] // firorder is in range 0-(8-iir_taps)
++T       tbh     [pc, a3, lsl #1]
++0:
++A       .word   0, 70f, 71f, 72f, 73f, 74f
++T       .hword  (70f - 0b) / 2, (71f - 0b) / 2, (72f - 0b) / 2, (73f - 0b) / 2, (74f - 0b) / 2
++ .if \iir_taps <= 3
++A       .word   75f
++T       .hword  (75f - 0b) / 2
++  .if \iir_taps <= 2
++A       .word   76f
++T       .hword  (76f - 0b) / 2
++   .if \iir_taps <= 1
++A       .word   77f
++T       .hword  (77f - 0b) / 2
++    .if \iir_taps == 0
++A       .word   78f
++T       .hword  (78f - 0b) / 2
++    .endif
++   .endif
++  .endif
++ .endif
++70:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 0
++71:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 1
++72:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 2
++73:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 3
++74:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 4
++ .if \iir_taps <= 3
++75:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 5
++  .if \iir_taps <= 2
++76:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 6
++   .if \iir_taps <= 1
++77:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 7
++    .if \iir_taps == 0
++78:     implement_filter  \mask_minus1, \shift_0, \shift_8, \iir_taps, 8
++    .endif
++   .endif
++  .endif
++ .endif
++.endm
++
++.macro switch_on_iir_taps  mask_minus1, shift_0, shift_8
++A       ldr     pc, [pc, a4, LSL #2] // irorder is in range 0-4
++T       tbh    [pc, a4, lsl #1]
++0:
++A       .word   0, 60f, 61f, 62f, 63f, 64f
++T       .hword  (60f - 0b) / 2, (61f - 0b) / 2, (62f - 0b) / 2, (63f - 0b) / 2, (64f - 0b) / 2
++60:     switch_on_fir_taps  \mask_minus1, \shift_0, \shift_8, 0
++61:     switch_on_fir_taps  \mask_minus1, \shift_0, \shift_8, 1
++62:     switch_on_fir_taps  \mask_minus1, \shift_0, \shift_8, 2
++63:     switch_on_fir_taps  \mask_minus1, \shift_0, \shift_8, 3
++64:     switch_on_fir_taps  \mask_minus1, \shift_0, \shift_8, 4
++.endm
++
++/* void ff_mlp_filter_channel_arm(int32_t *state, const int32_t *coeff,
++ *                                int firorder, int iirorder,
++ *                                unsigned int filter_shift, int32_t mask,
++ *                                int blocksize, int32_t *sample_buffer);
++ */
++function ff_mlp_filter_channel_arm, export=1
++        push    {v1-fp,lr}
++        add     v1, sp, #9*4 // point at arguments on stack
++        ldm     v1, {ST0,ST1,I,PSAMP}
++        cmp     ST1, #-1
++        bne     30f
++        movs    ST2, ST0, lsl #29 // shift is in range 0-15; we want to special-case 0 and 8
++        bne     20f
++        bcs     10f
++        switch_on_iir_taps 1, 1, 0
++10:     switch_on_iir_taps 1, 0, 1
++20:     switch_on_iir_taps 1, 0, 0
++30:     movs    ST2, ST0, lsl #29 // shift is in range 0-15; we want to special-case 0 and 8
++        bne     50f
++        bcs     40f
++        switch_on_iir_taps 0, 1, 0
++40:     switch_on_iir_taps 0, 0, 1
++50:     switch_on_iir_taps 0, 0, 0
++99:     pop     {v1-fp,pc}
++endfunc
++
++        .unreq  PST
++        .unreq  PCO
++        .unreq  AC0
++        .unreq  AC1
++        .unreq  CO0
++        .unreq  CO1
++        .unreq  CO2
++        .unreq  CO3
++        .unreq  ST0
++        .unreq  ST1
++        .unreq  ST2
++        .unreq  ST3
++        .unreq  I
++        .unreq  PSAMP
+diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c
+new file mode 100644
+index 0000000..9a14815
+--- /dev/null
++++ b/libavcodec/arm/mlpdsp_init_arm.c
+@@ -0,0 +1,36 @@
++/*
++ * Copyright (c) 2014 RISC OS Open Ltd
++ * Author: Ben Avison <bavison@riscosopen.org>
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include <stdint.h>
++
++#include "libavutil/arm/cpu.h"
++#include "libavutil/attributes.h"
++#include "libavcodec/mlpdsp.h"
++
++void ff_mlp_filter_channel_arm(int32_t *state, const int32_t *coeff,
++                               int firorder, int iirorder,
++                               unsigned int filter_shift, int32_t mask,
++                               int blocksize, int32_t *sample_buffer);
++
++av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
++{
++    c->mlp_filter_channel = ff_mlp_filter_channel_arm;
++}
+diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c
+index b413e86..4b403b8 100644
+--- a/libavcodec/mlpdsp.c
++++ b/libavcodec/mlpdsp.c
+@@ -60,6 +60,8 @@ static void mlp_filter_channel(int32_t *state, const int32_t *coeff,
+ av_cold void ff_mlpdsp_init(MLPDSPContext *c)
+ {
+     c->mlp_filter_channel = mlp_filter_channel;
++    if (ARCH_ARM)
++        ff_mlpdsp_init_arm(c);
+     if (ARCH_X86)
+         ff_mlpdsp_init_x86(c);
+ }
+diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h
+index 84a8aa3..129bcfe 100644
+--- a/libavcodec/mlpdsp.h
++++ b/libavcodec/mlpdsp.h
+@@ -32,6 +32,7 @@ typedef struct MLPDSPContext {
+ } MLPDSPContext;
+
+ void ff_mlpdsp_init(MLPDSPContext *c);
++void ff_mlpdsp_init_arm(MLPDSPContext *c);
+ void ff_mlpdsp_init_x86(MLPDSPContext *c);
+
+ #endif /* AVCODEC_MLPDSP_H */
+--
+1.9.1
diff --git a/projects/RPi/patches/ffmpeg/0002-vc-1-Add-platform-specific-start-code-search-routine.patch b/projects/RPi/patches/ffmpeg/0002-vc-1-Add-platform-specific-start-code-search-routine.patch
new file mode 100644
index 0000000000..e84ace6065
--- /dev/null
+++ b/projects/RPi/patches/ffmpeg/0002-vc-1-Add-platform-specific-start-code-search-routine.patch
@@ -0,0 +1,143 @@
+From a60747132a1a6652ac0d18f3f110a20ea637ac30 Mon Sep 17 00:00:00 2001
+From: Ben Avison <bavison@riscosopen.org>
+Date: Wed, 16 Apr 2014 01:51:32 +0100
+Subject: [PATCH 2/3] vc-1: Add platform-specific start code search routine to
+ VC1DSPContext.
+
+Initialise VC1DSPContext for parser as well as for decoder.
+Note, the VC-1 code doesn't actually use the function pointer yet.
+
+Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
+---
+ libavcodec/Makefile              | 6 +++---
+ libavcodec/arm/Makefile          | 2 ++
+ libavcodec/arm/vc1dsp_init_arm.c | 4 ++++
+ libavcodec/vc1.c                 | 2 ++
+ libavcodec/vc1dec.c              | 1 -
+ libavcodec/vc1dsp.c              | 3 +++
+ libavcodec/vc1dsp.h              | 8 ++++++++
+ 7 files changed, 22 insertions(+), 4 deletions(-)
+
+diff --git a/libavcodec/Makefile b/libavcodec/Makefile
+index 19caf11..120f85a 100644
+--- a/libavcodec/Makefile
++++ b/libavcodec/Makefile
+@@ -458,7 +458,7 @@ OBJS-$(CONFIG_VB_DECODER)              += vb.o
+ OBJS-$(CONFIG_VBLE_DECODER)            += vble.o
+ OBJS-$(CONFIG_VC1_DECODER)             += vc1dec.o vc1.o vc1data.o vc1dsp.o \
+                                           msmpeg4dec.o msmpeg4.o msmpeg4data.o \
+-                                          wmv2dsp.o
++                                          wmv2dsp.o startcode.o
+ OBJS-$(CONFIG_VCR1_DECODER)            += vcr1.o
+ OBJS-$(CONFIG_VMDAUDIO_DECODER)        += vmdav.o
+ OBJS-$(CONFIG_VMDVIDEO_DECODER)        += vmdav.o
+@@ -783,9 +783,9 @@ OBJS-$(CONFIG_PNM_PARSER)              += pnm_parser.o pnm.o
+ OBJS-$(CONFIG_RV30_PARSER)             += rv34_parser.o
+ OBJS-$(CONFIG_RV40_PARSER)             += rv34_parser.o
+ OBJS-$(CONFIG_TAK_PARSER)              += tak_parser.o tak.o
+-OBJS-$(CONFIG_VC1_PARSER)              += vc1_parser.o vc1.o vc1data.o \
++OBJS-$(CONFIG_VC1_PARSER)              += vc1_parser.o vc1.o vc1data.o vc1dsp.o \
+                                           msmpeg4.o msmpeg4data.o mpeg4video.o \
+-                                          h263.o
++                                          h263.o startcode.o
+ OBJS-$(CONFIG_VORBIS_PARSER)           += vorbis_parser.o xiph.o
+ OBJS-$(CONFIG_VP3_PARSER)              += vp3_parser.o
+ OBJS-$(CONFIG_VP8_PARSER)              += vp8_parser.o
+diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
+index b6410b2..fa2b18e 100644
+--- a/libavcodec/arm/Makefile
++++ b/libavcodec/arm/Makefile
+@@ -51,6 +51,8 @@ ARMV6-OBJS-$(CONFIG_H264DSP)           += arm/startcode_armv6.o
+ ARMV6-OBJS-$(CONFIG_HPELDSP)           += arm/hpeldsp_init_armv6.o      \
+                                           arm/hpeldsp_armv6.o
+ ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP)      += arm/mpegaudiodsp_fixed_armv6.o
++ARMV6-OBJS-$(CONFIG_VC1_DECODER)       += arm/startcode_armv6.o
++ARMV6-OBJS-$(CONFIG_VC1_PARSER)        += arm/startcode_armv6.o
+ ARMV6-OBJS-$(CONFIG_VP8_DECODER)       += arm/vp8_armv6.o               \
+                                           arm/vp8dsp_init_armv6.o       \
+                                           arm/vp8dsp_armv6.o
+diff --git a/libavcodec/arm/vc1dsp_init_arm.c b/libavcodec/arm/vc1dsp_init_arm.c
+index 47d4126..4a84848 100644
+--- a/libavcodec/arm/vc1dsp_init_arm.c
++++ b/libavcodec/arm/vc1dsp_init_arm.c
+@@ -23,10 +23,14 @@
+ #include "libavcodec/vc1dsp.h"
+ #include "vc1dsp.h"
+
++int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size);
++
+ av_cold void ff_vc1dsp_init_arm(VC1DSPContext *dsp)
+ {
+     int cpu_flags = av_get_cpu_flags();
+
++    if (have_armv6(cpu_flags))
++        dsp->vc1_find_start_code_candidate = ff_startcode_find_candidate_armv6;
+     if (have_neon(cpu_flags))
+         ff_vc1dsp_init_neon(dsp);
+ }
+diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c
+index 49d4885..cb941dd 100644
+--- a/libavcodec/vc1.c
++++ b/libavcodec/vc1.c
+@@ -1706,5 +1706,7 @@ av_cold int ff_vc1_init_common(VC1Context *v)
+     v->pq      = -1;
+     v->mvrange = 0; /* 7.1.1.18, p80 */
+
++    ff_vc1dsp_init(&v->vc1dsp);
++
+     return 0;
+ }
+diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
+index 30fee47..67cda42 100644
+--- a/libavcodec/vc1dec.c
++++ b/libavcodec/vc1dec.c
+@@ -5631,7 +5631,6 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
+     ff_vc1_decode_end(avctx);
+
+     ff_h264chroma_init(&v->h264chroma, 8);
+-    ff_vc1dsp_init(&v->vc1dsp);
+
+     if (avctx->codec_id == AV_CODEC_ID_WMV3 || avctx->codec_id == AV_CODEC_ID_WMV3IMAGE) {
+         int count = 0;
+diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c
+index ec9c17b..09a9006 100644
+--- a/libavcodec/vc1dsp.c
++++ b/libavcodec/vc1dsp.c
+@@ -30,6 +30,7 @@
+ #include "h264chroma.h"
+ #include "rnd_avg.h"
+ #include "vc1dsp.h"
++#include "startcode.h"
+
+ /* Apply overlap transform to horizontal edge */
+ static void vc1_v_overlap_c(uint8_t *src, int stride)
+@@ -947,6 +948,8 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
+     dsp->sprite_v_double_twoscale = sprite_v_double_twoscale_c;
+ #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
+
++    dsp->vc1_find_start_code_candidate = ff_startcode_find_candidate_c;
++
+     if (ARCH_AARCH64)
+         ff_vc1dsp_init_aarch64(dsp);
+     if (ARCH_ARM)
+diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h
+index 990fbc3..6a90eed 100644
+--- a/libavcodec/vc1dsp.h
++++ b/libavcodec/vc1dsp.h
+@@ -74,6 +74,14 @@ typedef struct VC1DSPContext {
+     void (*sprite_v_double_twoscale)(uint8_t *dst, const uint8_t *src1a, const uint8_t *src1b, int offset1,
+                                                    const uint8_t *src2a, const uint8_t *src2b, int offset2,
+                                      int alpha, int width);
++
++    /**
++     * Search buf from the start for up to size bytes. Return the index
++     * of a zero byte, or >= size if not found. Ideally, use lookahead
++     * to filter out any zero bytes that are known to not be followed by
++     * one or more further zero bytes and a one byte.
++     */
++    int (*vc1_find_start_code_candidate)(const uint8_t *buf, int size);
+ } VC1DSPContext;
+
+ void ff_vc1dsp_init(VC1DSPContext* c);
+--
+1.9.1
diff --git a/projects/RPi/patches/ffmpeg/0003-truehd-break-out-part-of-rematrix_channels-into-plat.patch b/projects/RPi/patches/ffmpeg/0003-truehd-break-out-part-of-rematrix_channels-into-plat.patch
new file mode 100644
index 0000000000..9c06f8fe4e
--- /dev/null
+++ b/projects/RPi/patches/ffmpeg/0003-truehd-break-out-part-of-rematrix_channels-into-plat.patch
@@ -0,0 +1,158 @@
+From bb74fc44081fb6d7923ce1b7ed3e3e6514695f3e Mon Sep 17 00:00:00 2001
+From: Ben Avison <bavison@riscosopen.org>
+Date: Wed, 5 Mar 2014 21:01:28 +0000
+Subject: [PATCH 3/6] truehd: break out part of rematrix_channels into
+ platform-specific callback.
+
+Verified with profiling that this doesn't have a measurable effect upon
+overall performance.
+---
+ libavcodec/mlpdec.c | 37 ++++++++++++-------------------------
+ libavcodec/mlpdsp.c | 33 +++++++++++++++++++++++++++++++++
+ libavcodec/mlpdsp.h | 23 +++++++++++++++++++++++
+ 3 files changed, 68 insertions(+), 25 deletions(-)
+
+diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
+index cbd9000..01ded5c 100644
+--- a/libavcodec/mlpdec.c
++++ b/libavcodec/mlpdec.c
+@@ -1024,7 +1024,7 @@ static void fill_noise_buffer(MLPDecodeContext *m, unsigned int substr)
+ static void rematrix_channels(MLPDecodeContext *m, unsigned int substr)
+ {
+     SubStream *s = &m->substream[substr];
+-    unsigned int mat, src_ch, i;
++    unsigned int mat;
+     unsigned int maxchan;
+
+     maxchan = s->max_matrix_channel;
+@@ -1036,31 +1036,18 @@ static void rematrix_channels(MLPDecodeContext *m, unsigned int substr)
+     }
+
+     for (mat = 0; mat < s->num_primitive_matrices; mat++) {
+-        int matrix_noise_shift = s->matrix_noise_shift[mat];
+         unsigned int dest_ch = s->matrix_out_ch[mat];
+-        int32_t mask = MSB_MASK(s->quant_step_size[dest_ch]);
+-        int32_t *coeffs = s->matrix_coeff[mat];
+-        int index  = s->num_primitive_matrices - mat;
+-        int index2 = 2 * index + 1;
+-
+-        /* TODO: DSPContext? */
+-
+-        for (i = 0; i < s->blockpos; i++) {
+-            int32_t bypassed_lsb = m->bypassed_lsbs[i][mat];
+-            int32_t *samples = m->sample_buffer[i];
+-            int64_t accum = 0;
+-
+-            for (src_ch = 0; src_ch <= maxchan; src_ch++)
+-                accum += (int64_t) samples[src_ch] * coeffs[src_ch];
+-
+-            if (matrix_noise_shift) {
+-                index &= m->access_unit_size_pow2 - 1;
+-                accum += m->noise_buffer[index] << (matrix_noise_shift + 7);
+-                index += index2;
+-            }
+-
+-            samples[dest_ch] = ((accum >> 14) & mask) + bypassed_lsb;
+-        }
++        m->dsp.mlp_rematrix_channel(&m->sample_buffer[0][0],
++                                    s->matrix_coeff[mat],
++                                    &m->bypassed_lsbs[0][mat],
++                                    m->noise_buffer,
++                                    s->num_primitive_matrices - mat,
++                                    dest_ch,
++                                    s->blockpos,
++                                    maxchan,
++                                    s->matrix_noise_shift[mat],
++                                    m->access_unit_size_pow2,
++                                    MSB_MASK(s->quant_step_size[dest_ch]));
+     }
+ }
+
+diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c
+index 4b403b8..7a359b0 100644
+--- a/libavcodec/mlpdsp.c
++++ b/libavcodec/mlpdsp.c
+@@ -57,9 +57,42 @@ static void mlp_filter_channel(int32_t *state, const int32_t *coeff,
+     }
+ }
+
++void ff_mlp_rematrix_channel(int32_t *samples,
++                             const int32_t *coeffs,
++                             const uint8_t *bypassed_lsbs,
++                             const int8_t *noise_buffer,
++                             int index,
++                             unsigned int dest_ch,
++                             uint16_t blockpos,
++                             unsigned int maxchan,
++                             int matrix_noise_shift,
++                             int access_unit_size_pow2,
++                             int32_t mask)
++{
++    unsigned int src_ch, i;
++    int index2 = 2 * index + 1;
++    for (i = 0; i < blockpos; i++) {
++        int64_t accum = 0;
++
++        for (src_ch = 0; src_ch <= maxchan; src_ch++)
++            accum += (int64_t) samples[src_ch] * coeffs[src_ch];
++
++        if (matrix_noise_shift) {
++            index &= access_unit_size_pow2 - 1;
++            accum += noise_buffer[index] << (matrix_noise_shift + 7);
++            index += index2;
++        }
++
++        samples[dest_ch] = ((accum >> 14) & mask) + *bypassed_lsbs;
++        bypassed_lsbs += MAX_CHANNELS;
++        samples += MAX_CHANNELS;
++    }
++}
++
+ av_cold void ff_mlpdsp_init(MLPDSPContext *c)
+ {
+     c->mlp_filter_channel = mlp_filter_channel;
++    c->mlp_rematrix_channel = ff_mlp_rematrix_channel;
+     if (ARCH_ARM)
+         ff_mlpdsp_init_arm(c);
+     if (ARCH_X86)
+diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h
+index 129bcfe..f98e9be 100644
+--- a/libavcodec/mlpdsp.h
++++ b/libavcodec/mlpdsp.h
+@@ -24,11 +24,34 @@
+
+ #include <stdint.h>
+
++void ff_mlp_rematrix_channel(int32_t *samples,
++                             const int32_t *coeffs,
++                             const uint8_t *bypassed_lsbs,
++                             const int8_t *noise_buffer,
++                             int index,
++                             unsigned int dest_ch,
++                             uint16_t blockpos,
++                             unsigned int maxchan,
++                             int matrix_noise_shift,
++                             int access_unit_size_pow2,
++                             int32_t mask);
++
+ typedef struct MLPDSPContext {
+     void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff,
+                                int firorder, int iirorder,
+                                unsigned int filter_shift, int32_t mask,
+                                int blocksize, int32_t *sample_buffer);
++    void (*mlp_rematrix_channel)(int32_t *samples,
++                                 const int32_t *coeffs,
++                                 const uint8_t *bypassed_lsbs,
++                                 const int8_t *noise_buffer,
++                                 int index,
++                                 unsigned int dest_ch,
++                                 uint16_t blockpos,
++                                 unsigned int maxchan,
++                                 int matrix_noise_shift,
++                                 int access_unit_size_pow2,
++                                 int32_t mask);
+ } MLPDSPContext;
+
+ void ff_mlpdsp_init(MLPDSPContext *c);
+--
+1.9.1
diff --git a/projects/RPi/patches/ffmpeg/0003-vc-1-Optimise-parser-with-special-attention-to-ARM.patch b/projects/RPi/patches/ffmpeg/0003-vc-1-Optimise-parser-with-special-attention-to-ARM.patch
new file mode 100644
index 0000000000..1f0cf40951
--- /dev/null
+++ b/projects/RPi/patches/ffmpeg/0003-vc-1-Optimise-parser-with-special-attention-to-ARM.patch
@@ -0,0 +1,401 @@
+From c39df43eae03768427243668c040de8437c4f79c Mon Sep 17 00:00:00 2001
+From: Ben Avison <bavison@riscosopen.org>
+Date: Wed, 23 Apr 2014 01:41:04 +0100
+Subject: [PATCH 3/3] vc-1: Optimise parser (with special attention to ARM)
+
+The previous implementation of the parser made four passes over each input
+buffer (reduced to two if the container format already guaranteed the input
+buffer corresponded to frames, such as with MKV). But these buffers are
+often 200K in size, certainly enough to flush the data out of L1 cache, and
+for many CPUs, all the way out to main memory. The passes were:
+
+1) locate frame boundaries (not needed for MKV etc)
+2) copy the data into a contiguous block (not needed for MKV etc)
+3) locate the start codes within each frame
+4) unescape the data between start codes
+
+After this, the unescaped data was parsed to extract certain header fields,
+but because the unescape operation was so large, this was usually also
+effectively operating on uncached memory. Most of the unescaped data was
+simply thrown away and never processed further. Only step 2 - because it
+used memcpy - was using prefetch, making things even worse.
+
+This patch reorganises these steps so that, aside from the copying, the
+operations are performed in parallel, maximising cache utilisation. No more
+than the worst-case number of bytes needed for header parsing is unescaped.
+Most of the data is, in practice, only read in order to search for a start
+code, for which optimised implementations already existed in the H264 codec
+(notably the ARM version uses prefetch, so we end up doing both remaining
+passes at maximum speed). For MKV files, we know when we've found the last
+start code of interest in a given frame, so we are able to avoid doing even
+that one remaining pass for most of the buffer.
+
+In some use-cases (such as the Raspberry Pi) video decode is handled by the
+GPU, but the entire elementary stream is still fed through the parser to
+pick out certain elements of the header which are necessary to manage the
+decode process. As you might expect, in these cases, the performance of the
+parser is significant.
+
+To measure parser performance, I used the same VC-1 elementary stream in
+either an MPEG-2 transport stream or a MKV file, and fed it through ffmpeg
+with -c:v copy -c:a copy -f null. These are the gperftools counts for
+those streams, both filtered to only include vc1_parse() and its callees,
+and unfiltered (to include the whole binary). Lower numbers are better:
+
+                Before          After
+File  Filtered  Mean   StdDev   Mean   StdDev  Confidence  Change
+M2TS  No        861.7  8.2      650.5  8.1     100.0%      +32.5%
+MKV   No        868.9  7.4      731.7  9.0     100.0%      +18.8%
+M2TS  Yes       250.0  11.2     27.2   3.4     100.0%      +817.9%
+MKV   Yes       149.0  12.8     1.7    0.8     100.0%      +8526.3%
+
+Yes, that last case shows vc1_parse() running 86 times faster! The M2TS
+case does show a larger absolute improvement though, since it was worse
+to begin with.
+
+This patch has been tested with the FATE suite (albeit on x86 for speed).
+
+Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
+---
+ libavcodec/vc1_parser.c | 284 ++++++++++++++++++++++++++++++------------------
+ 1 file changed, 180 insertions(+), 104 deletions(-)
+
+diff --git a/libavcodec/vc1_parser.c b/libavcodec/vc1_parser.c
+index cc29ce1..4ed14bc 100644
+--- a/libavcodec/vc1_parser.c
++++ b/libavcodec/vc1_parser.c
+@@ -30,122 +30,88 @@
+ #include "vc1.h"
+ #include "get_bits.h"
+
++/** The maximum number of bytes of a sequence, entry point or
++ *  frame header whose values we pay any attention to */
++#define UNESCAPED_THRESHOLD 37
++
++/** The maximum number of bytes of a sequence, entry point or
++ *  frame header which must be valid memory (because they are
++ *  used to update the bitstream cache in skip_bits() calls)
++ */
++#define UNESCAPED_LIMIT 144
++
++typedef enum {
++    NO_MATCH,
++    ONE_ZERO,
++    TWO_ZEROS,
++    ONE
++} VC1ParseSearchState;
++
+ typedef struct {
+     ParseContext pc;
+     VC1Context v;
++    uint8_t prev_start_code;
++    size_t bytes_to_skip;
++    uint8_t unesc_buffer[UNESCAPED_LIMIT];
++    size_t unesc_index;
++    VC1ParseSearchState search_state;
+ } VC1ParseContext;
+
+-static void vc1_extract_headers(AVCodecParserContext *s, AVCodecContext *avctx,
+-                                const uint8_t *buf, int buf_size)
++static void vc1_extract_header(AVCodecParserContext *s, AVCodecContext *avctx,
++                               const uint8_t *buf, int buf_size)
+ {
++    /* Parse the header we just finished unescaping */
+     VC1ParseContext *vpc = s->priv_data;
+     GetBitContext gb;
+-    const uint8_t *start, *end, *next;
+-    uint8_t *buf2 = av_mallocz(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
+-
++    int ret;
+     vpc->v.s.avctx = avctx;
+     vpc->v.parse_only = 1;
+-    vpc->v.first_pic_header_flag = 1;
+-    next = buf;
+-    s->repeat_pict = 0;
+-
+-    for(start = buf, end = buf + buf_size; next < end; start = next){
+-        int buf2_size, size;
+-        int ret;
+-
+-        next = find_next_marker(start + 4, end);
+-        size = next - start - 4;
+-        buf2_size = vc1_unescape_buffer(start + 4, size, buf2);
+-        init_get_bits(&gb, buf2, buf2_size * 8);
+-        if(size <= 0) continue;
+-        switch(AV_RB32(start)){
+-        case VC1_CODE_SEQHDR:
+-            ff_vc1_decode_sequence_header(avctx, &vpc->v, &gb);
+-            break;
+-        case VC1_CODE_ENTRYPOINT:
+-            ff_vc1_decode_entry_point(avctx, &vpc->v, &gb);
+-            break;
+-        case VC1_CODE_FRAME:
+-            if(vpc->v.profile < PROFILE_ADVANCED)
+-                ret = ff_vc1_parse_frame_header    (&vpc->v, &gb);
+-            else
+-                ret = ff_vc1_parse_frame_header_adv(&vpc->v, &gb);
+-
+-            if (ret < 0)
+-                break;
+-
+-            /* keep AV_PICTURE_TYPE_BI internal to VC1 */
+-            if (vpc->v.s.pict_type == AV_PICTURE_TYPE_BI)
+-                s->pict_type = AV_PICTURE_TYPE_B;
+-            else
+-                s->pict_type = vpc->v.s.pict_type;
+-
+-            if (avctx->ticks_per_frame > 1){
+-                // process pulldown flags
+-                s->repeat_pict = 1;
+-                // Pulldown flags are only valid when 'broadcast' has been set.
+-                // So ticks_per_frame will be 2
+-                if (vpc->v.rff){
+-                    // repeat field
+-                    s->repeat_pict = 2;
+-                }else if (vpc->v.rptfrm){
+-                    // repeat frames
+-                    s->repeat_pict = vpc->v.rptfrm * 2 + 1;
+-                }
+-            }
+-
+-            if (vpc->v.broadcast && vpc->v.interlace && !vpc->v.psf)
+-                s->field_order = vpc->v.tff ? AV_FIELD_TT : AV_FIELD_BB;
+-            else
+-                s->field_order = AV_FIELD_PROGRESSIVE;
++    init_get_bits(&gb, buf, buf_size * 8);
++    switch (vpc->prev_start_code) {
++    case VC1_CODE_SEQHDR & 0xFF:
++        ff_vc1_decode_sequence_header(avctx, &vpc->v, &gb);
++        break;
++    case VC1_CODE_ENTRYPOINT & 0xFF:
++        ff_vc1_decode_entry_point(avctx, &vpc->v, &gb);
++        break;
++    case VC1_CODE_FRAME & 0xFF:
++        if(vpc->v.profile < PROFILE_ADVANCED)
++            ret = ff_vc1_parse_frame_header    (&vpc->v, &gb);
++        else
++            ret = ff_vc1_parse_frame_header_adv(&vpc->v, &gb);
+
++        if (ret < 0)
+             break;
+-        }
+-    }
+
+-    av_free(buf2);
+-}
++        /* keep AV_PICTURE_TYPE_BI internal to VC1 */
++        if (vpc->v.s.pict_type == AV_PICTURE_TYPE_BI)
++            s->pict_type = AV_PICTURE_TYPE_B;
++        else
++            s->pict_type = vpc->v.s.pict_type;
+
+-/**
+- * Find the end of the current frame in the bitstream.
+- * @return the position of the first byte of the next frame, or -1
+- */
+-static int vc1_find_frame_end(ParseContext *pc, const uint8_t *buf,
+-                               int buf_size) {
+-    int pic_found, i;
+-    uint32_t state;
+-
+-    pic_found= pc->frame_start_found;
+-    state= pc->state;
+-
+-    i=0;
+-    if(!pic_found){
+-        for(i=0; i<buf_size; i++){
+-            state= (state<<8) | buf[i];
+-            if(state == VC1_CODE_FRAME || state == VC1_CODE_FIELD){
+-                i++;
+-                pic_found=1;
+-                break;
++        if (avctx->ticks_per_frame > 1){
++            // process pulldown flags
++            s->repeat_pict = 1;
++            // Pulldown flags are only valid when 'broadcast' has been set.
++            // So ticks_per_frame will be 2
++            if (vpc->v.rff){
++                // repeat field
++                s->repeat_pict = 2;
++            }else if (vpc->v.rptfrm){
++                // repeat frames
++                s->repeat_pict = vpc->v.rptfrm * 2 + 1;
+             }
++        }else{
++            s->repeat_pict = 0;
+         }
+-    }
+
+-    if(pic_found){
+-        /* EOF considered as end of frame */
+-        if (buf_size == 0)
+-            return 0;
+-        for(; i<buf_size; i++){
+-            state= (state<<8) | buf[i];
+-            if(IS_MARKER(state) && state != VC1_CODE_FIELD && state != VC1_CODE_SLICE){
+-                pc->frame_start_found=0;
+-                pc->state=-1;
+-                return i-3;
+-            }
+-        }
++        if (vpc->v.broadcast && vpc->v.interlace && !vpc->v.psf)
++            s->field_order = vpc->v.tff ? AV_FIELD_TT : AV_FIELD_BB;
++        else
++            s->field_order = AV_FIELD_PROGRESSIVE;
++
++        break;
+     }
+-    pc->frame_start_found= pic_found;
+-    pc->state= state;
+-    return END_NOT_FOUND;
+ }
+
+ static int vc1_parse(AVCodecParserContext *s,
+@@ -153,22 +119,127 @@ static int vc1_parse(AVCodecParserContext *s,
+                            const uint8_t **poutbuf, int *poutbuf_size,
+                            const uint8_t *buf, int buf_size)
+ {
++    /* Here we do the searching for frame boundaries and headers at
++     * the same time. Only a minimal amount at the start of each
++     * header is unescaped. */
+     VC1ParseContext *vpc = s->priv_data;
+-    int next;
++    int pic_found = vpc->pc.frame_start_found;
++    uint8_t *unesc_buffer = vpc->unesc_buffer;
++    size_t unesc_index = vpc->unesc_index;
++    VC1ParseSearchState search_state = vpc->search_state;
++    int next = END_NOT_FOUND;
++    int i = vpc->bytes_to_skip;
++
++    if (pic_found && buf_size == 0) {
++        /* EOF considered as end of frame */
++        memset(unesc_buffer + unesc_index, 0, UNESCAPED_THRESHOLD - unesc_index);
++        vc1_extract_header(s, avctx, unesc_buffer, unesc_index);
++        next = 0;
++    }
++    while (i < buf_size) {
++        int start_code_found = 0;
++        uint8_t b;
++        while (i < buf_size && unesc_index < UNESCAPED_THRESHOLD) {
++            b = buf[i++];
++            unesc_buffer[unesc_index++] = b;
++            if (search_state <= ONE_ZERO)
++                search_state = b ? NO_MATCH : search_state + 1;
++            else if (search_state == TWO_ZEROS) {
++                if (b == 1)
++                    search_state = ONE;
++                else if (b > 1) {
++                    if (b == 3)
++                        unesc_index--; // swallow emulation prevention byte
++                    search_state = NO_MATCH;
++                }
++            }
++            else { // search_state == ONE
++                // Header unescaping terminates early due to detection of next start code
++                search_state = NO_MATCH;
++                start_code_found = 1;
++                break;
++            }
++        }
++        if ((s->flags & PARSER_FLAG_COMPLETE_FRAMES) &&
++                unesc_index >= UNESCAPED_THRESHOLD &&
++                vpc->prev_start_code == (VC1_CODE_FRAME & 0xFF))
++        {
++            // No need to keep scanning the rest of the buffer for
++            // start codes if we know it contains a complete frame and
++            // we've already unescaped all we need of the frame header
++            vc1_extract_header(s, avctx, unesc_buffer, unesc_index);
++            break;
++        }
++        if (unesc_index >= UNESCAPED_THRESHOLD && !start_code_found) {
++            while (i < buf_size) {
++                if (search_state == NO_MATCH) {
++                    i += vpc->v.vc1dsp.vc1_find_start_code_candidate(buf + i, buf_size - i);
++                    if (i < buf_size) {
++                        search_state = ONE_ZERO;
++                    }
++                    i++;
++                } else {
++                    b = buf[i++];
++                    if (search_state == ONE_ZERO)
++                        search_state = b ? NO_MATCH : TWO_ZEROS;
++                    else if (search_state == TWO_ZEROS) {
++                        if (b >= 1)
++                            search_state = b == 1 ? ONE : NO_MATCH;
++                    }
++                    else { // search_state == ONE
++                        search_state = NO_MATCH;
++                        start_code_found = 1;
++                        break;
++                    }
++                }
++            }
++        }
++        if (start_code_found) {
++            vc1_extract_header(s, avctx, unesc_buffer, unesc_index);
++
++            vpc->prev_start_code = b;
++            unesc_index = 0;
++
++            if (!(s->flags & PARSER_FLAG_COMPLETE_FRAMES)) {
++                if (!pic_found && (b == (VC1_CODE_FRAME & 0xFF) || b == (VC1_CODE_FIELD & 0xFF))) {
++                    pic_found = 1;
++                }
++                else if (pic_found && b != (VC1_CODE_FIELD & 0xFF) && b != (VC1_CODE_SLICE & 0xFF)) {
++                    next = i - 4;
++                    pic_found = b == (VC1_CODE_FRAME & 0xFF);
++                    break;
++                }
++            }
++        }
++    }
+
+-    if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){
+-        next= buf_size;
+-    }else{
+-        next= vc1_find_frame_end(&vpc->pc, buf, buf_size);
++    vpc->pc.frame_start_found = pic_found;
++    vpc->unesc_index = unesc_index;
++    vpc->search_state = search_state;
+
++    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
++        next = buf_size;
++    } else {
+         if (ff_combine_frame(&vpc->pc, next, &buf, &buf_size) < 0) {
++            vpc->bytes_to_skip = 0;
+             *poutbuf = NULL;
+             *poutbuf_size = 0;
+             return buf_size;
+         }
+     }
+
+-    vc1_extract_headers(s, avctx, buf, buf_size);
++    vpc->v.first_pic_header_flag = 1;
++
++    /* If we return with a valid pointer to a combined frame buffer
++     * then on the next call then we'll have been unhelpfully rewound
++     * by up to 4 bytes (depending upon whether the start code
++     * overlapped the input buffer, and if so by how much). We don't
++     * want this: it will either cause spurious second detections of
++     * the start code we've already seen, or cause extra bytes to be
++     * inserted at the start of the unescaped buffer. */
++    vpc->bytes_to_skip = 4;
++    if (next < 0)
++        vpc->bytes_to_skip += next;
+
+     *poutbuf = buf;
+     *poutbuf_size = buf_size;
+@@ -199,6 +270,11 @@ static av_cold int vc1_parse_init(AVCodecParserContext *s)
+ {
+     VC1ParseContext *vpc = s->priv_data;
+     vpc->v.s.slice_context_count = 1;
++    vpc->v.first_pic_header_flag = 1;
++    vpc->prev_start_code = 0;
++    vpc->bytes_to_skip = 0;
++    vpc->unesc_index = 0;
++    vpc->search_state = NO_MATCH;
+     return ff_vc1_init_common(&vpc->v);
+ }
+
+--
+1.9.1
diff --git a/projects/RPi/patches/ffmpeg/0004-truehd-add-hand-scheduled-ARM-asm-version-of-ff_mlp_.patch b/projects/RPi/patches/ffmpeg/0004-truehd-add-hand-scheduled-ARM-asm-version-of-ff_mlp_.patch
new file mode 100644
index 0000000000..575622e346
--- /dev/null
+++ b/projects/RPi/patches/ffmpeg/0004-truehd-add-hand-scheduled-ARM-asm-version-of-ff_mlp_.patch
@@ -0,0 +1,285 @@
+From 98428a8cf593587b403076bb54b46cc70ed17ff2 Mon Sep 17 00:00:00 2001
+From: Ben Avison <bavison@riscosopen.org>
+Date: Mon, 10 Mar 2014 14:42:05 +0000
+Subject: [PATCH 4/6] truehd: add hand-scheduled ARM asm version of
+ ff_mlp_rematrix_channel.
+
+Profiling results for overall audio decode and the rematrix_channels function
+in particular are as follows:
+
+              Before          After
+              Mean   StdDev   Mean   StdDev  Confidence  Change
+6:2 total     370.8  17.0     348.8  20.1    99.9%       +6.3%
+6:2 function  46.4   8.4      45.8   6.6     18.0%       +1.2%  (insignificant)
+8:2 total     343.2  19.0     339.1  15.4    54.7%       +1.2%  (insignificant)
+8:2 function  38.9   3.9      40.2   6.9     52.4%       -3.2%  (insignificant)
+6:6 total     658.4  15.7     604.6  20.8    100.0%      +8.9%
+6:6 function  109.0  8.7      59.5   5.4     100.0%      +83.3%
+8:8 total     896.2  24.5     766.4  17.6    100.0%      +16.9%
+8:8 function  223.4  12.8     93.8   5.0     100.0%      +138.3%
+
+The assembly version has also been tested with a fuzz tester to ensure that
+any combinations of inputs not exercised by my available test streams still
+generate mathematically identical results to the C version.
+---
+ libavcodec/arm/mlpdsp_arm.S      | 222 +++++++++++++++++++++++++++++++++++++++
+ libavcodec/arm/mlpdsp_init_arm.c |  12 +++
+ 2 files changed, 234 insertions(+)
+
+diff --git a/libavcodec/arm/mlpdsp_arm.S b/libavcodec/arm/mlpdsp_arm.S
+index 615819d..9b51d0c 100644
+--- a/libavcodec/arm/mlpdsp_arm.S
++++ b/libavcodec/arm/mlpdsp_arm.S
+@@ -431,3 +431,225 @@ endfunc
+         .unreq  ST3
+         .unreq  I
+         .unreq  PSAMP
++
++/********************************************************************/
++
++PSA     .req    a1 // samples
++PCO     .req    a2 // coeffs
++PBL     .req    a3 // bypassed_lsbs
++INDEX   .req    a4
++CO0     .req    v1
++CO1     .req    v2
++CO2     .req    v3
++CO3     .req    v4
++SA0     .req    v5
++SA1     .req    v6
++SA2     .req    sl
++SA3     .req    fp
++AC0     .req    ip
++AC1     .req    lr
++NOISE   .req    SA0
++LSB     .req    SA1
++DCH     .req    SA2 // dest_ch
++MASK    .req    SA3
++
++    // INDEX is used as follows:
++    // bits 0..6   index2 (values up to 17, but wider so that we can
++    //               add to index field without needing to mask)
++    // bits 7..14  i (values up to 160)
++    // bit 15      underflow detect for i
++    // bits 25..31 (if access_unit_size_pow2 == 128)  \ index
++    // bits 26..31 (if access_unit_size_pow2 == 64)   /
++
++.macro implement_rematrix  shift, index_mask, mask_minus1, maxchan
++    .if \maxchan == 1
++        // We can just leave the coefficients in registers in this case
++        ldrd    CO0, CO1, [PCO]
++    .endif
++1:
++    .if \maxchan == 1
++        ldrd    SA0, SA1, [PSA]
++        smull   AC0, AC1, CO0, SA0
++    .elseif \maxchan == 5
++        ldr     CO0, [PCO, #0]
++        ldr     SA0, [PSA, #0]
++        ldr     CO1, [PCO, #4]
++        ldr     SA1, [PSA, #4]
++        ldrd    CO2, CO3, [PCO, #8]
++        smull   AC0, AC1, CO0, SA0
++        ldrd    SA2, SA3, [PSA, #8]
++        smlal   AC0, AC1, CO1, SA1
++        ldrd    CO0, CO1, [PCO, #16]
++        smlal   AC0, AC1, CO2, SA2
++        ldrd    SA0, SA1, [PSA, #16]
++        smlal   AC0, AC1, CO3, SA3
++        smlal   AC0, AC1, CO0, SA0
++    .else // \maxchan == 7
++        ldr     CO2, [PCO, #0]
++        ldr     SA2, [PSA, #0]
++        ldr     CO3, [PCO, #4]
++        ldr     SA3, [PSA, #4]
++        ldrd    CO0, CO1, [PCO, #8]
++        smull   AC0, AC1, CO2, SA2
++        ldrd    SA0, SA1, [PSA, #8]
++        smlal   AC0, AC1, CO3, SA3
++        ldrd    CO2, CO3, [PCO, #16]
++        smlal   AC0, AC1, CO0, SA0
++        ldrd    SA2, SA3, [PSA, #16]
++        smlal   AC0, AC1, CO1, SA1
++        ldrd    CO0, CO1, [PCO, #24]
++        smlal   AC0, AC1, CO2, SA2
++        ldrd    SA0, SA1, [PSA, #24]
++        smlal   AC0, AC1, CO3, SA3
++        smlal   AC0, AC1, CO0, SA0
++    .endif
++        ldm     sp, {NOISE, DCH, MASK}
++        smlal   AC0, AC1, CO1, SA1
++    .if \shift != 0
++      .if \index_mask == 63
++        add     NOISE, NOISE, INDEX, lsr #32-6
++        ldrb    LSB, [PBL], #MAX_CHANNELS
++        ldrsb   NOISE, [NOISE]
++        add     INDEX, INDEX, INDEX, lsl #32-6
++      .else // \index_mask == 127
++        add     NOISE, NOISE, INDEX, lsr #32-7
++        ldrb    LSB, [PBL], #MAX_CHANNELS
++        ldrsb   NOISE, [NOISE]
++        add     INDEX, INDEX, INDEX, lsl #32-7
++      .endif
++        sub     INDEX, INDEX, #1<<7
++        adds    AC0, AC0, NOISE, lsl #\shift + 7
++        adc     AC1, AC1, NOISE, asr #31
++    .else
++        ldrb    LSB, [PBL], #MAX_CHANNELS
++        sub     INDEX, INDEX, #1<<7
++    .endif
++        add     PSA, PSA, #MAX_CHANNELS*4
++        mov     AC0, AC0, lsr #14
++        orr     AC0, AC0, AC1, lsl #18
++    .if !\mask_minus1
++        and     AC0, AC0, MASK
++    .endif
++        add     AC0, AC0, LSB
++        tst     INDEX, #1<<15
++        str     AC0, [PSA, DCH, lsl #2]  // DCH is precompensated for the early increment of PSA
++        beq     1b
++        b       98f
++.endm
++
++.macro switch_on_maxchan  shift, index_mask, mask_minus1
++        cmp     v4, #5
++        blo     51f
++        beq     50f
++        implement_rematrix  \shift, \index_mask, \mask_minus1, 7
++50:     implement_rematrix  \shift, \index_mask, \mask_minus1, 5
++51:     implement_rematrix  \shift, \index_mask, \mask_minus1, 1
++.endm
++
++.macro switch_on_mask  shift, index_mask
++        cmp     sl, #-1
++        bne     40f
++        switch_on_maxchan  \shift, \index_mask, 1
++40:     switch_on_maxchan  \shift, \index_mask, 0
++.endm
++
++.macro switch_on_au_size  shift
++  .if \shift == 0
++        switch_on_mask  \shift, undefined
++  .else
++        teq     v6, #64
++        bne     30f
++        orr     INDEX, INDEX, v1, lsl #32-6
++        switch_on_mask  \shift, 63
++30:     orr     INDEX, INDEX, v1, lsl #32-7
++        switch_on_mask  \shift, 127
++  .endif
++.endm
++
++/* void ff_mlp_rematrix_channel_arm(int32_t *samples,
++ *                                  const int32_t *coeffs,
++ *                                  const uint8_t *bypassed_lsbs,
++ *                                  const int8_t *noise_buffer,
++ *                                  int index,
++ *                                  unsigned int dest_ch,
++ *                                  uint16_t blockpos,
++ *                                  unsigned int maxchan,
++ *                                  int matrix_noise_shift,
++ *                                  int access_unit_size_pow2,
++ *                                  int32_t mask);
++ */
++function ff_mlp_rematrix_channel_arm, export=1
++        push    {v1-fp,lr}
++        add     v1, sp, #9*4 // point at arguments on stack
++        ldm     v1, {v1-sl}
++        teq     v4, #1
++        itt     ne
++        teqne   v4, #5
++        teqne   v4, #7
++        bne     99f
++        teq     v6, #64
++        it      ne
++        teqne   v6, #128
++        bne     99f
++        sub     v2, v2, #MAX_CHANNELS
++        push    {a4,v2,sl}          // initialise NOISE,DCH,MASK; make sp dword-aligned
++        movs    INDEX, v3, lsl #7
++        beq     98f                 // just in case, do nothing if blockpos = 0
++        subs    INDEX, INDEX, #1<<7 // offset by 1 so we borrow at the right time
++        adc     lr, v1, v1          // calculate index2 (C was set by preceding subs)
++        orr     INDEX, INDEX, lr
++        // Switch on matrix_noise_shift: values 0 and 1 are
++        // disproportionately common so do those in a form the branch
++        // predictor can accelerate. Values can only go up to 15.
++        cmp     v5, #1
++        beq     11f
++        blo     10f
++A       ldr     pc, [pc, v5, lsl #2]
++T       tbh     [pc, v5, lsl #1]
++0:
++A       .word   0, 0, 0, 12f, 13f, 14f, 15f, 16f, 17f, 18f, 19f, 20f, 21f, 22f, 23f, 24f, 25f
++T       .hword  0, 0, (12f - 0b) / 2, (13f - 0b) / 2, (14f - 0b) / 2, (15f - 0b) / 2
++T       .hword  (16f - 0b) / 2, (17f - 0b) / 2, (18f - 0b) / 2, (19f - 0b) / 2
++T       .hword  (20f - 0b) / 2, (21f - 0b) / 2, (22f - 0b) / 2, (23f - 0b) / 2, (24f - 0b) / 2, (25f - 0b) / 2
++10:     switch_on_au_size  0
++11:     switch_on_au_size  1
++12:     switch_on_au_size  2
++13:     switch_on_au_size  3
++14:     switch_on_au_size  4
++15:     switch_on_au_size  5
++16:     switch_on_au_size  6
++17:     switch_on_au_size  7
++18:     switch_on_au_size  8
++19:     switch_on_au_size  9
++20:     switch_on_au_size  10
++21:     switch_on_au_size  11
++22:     switch_on_au_size  12
++23:     switch_on_au_size  13
++24:     switch_on_au_size  14
++25:     switch_on_au_size  15
++
++98:     add     sp, sp, #3*4
++        pop     {v1-fp,pc}
++99:     // Can't handle these parameters, drop back to C
++        pop     {v1-fp,lr}
++        b       X(ff_mlp_rematrix_channel)
++endfunc
++
++        .unreq  PSA
++        .unreq  PCO
++        .unreq  PBL
++        .unreq  INDEX
++        .unreq  CO0
++        .unreq  CO1
++        .unreq  CO2
++        .unreq  CO3
++        .unreq  SA0
++        .unreq  SA1
++        .unreq  SA2
++        .unreq  SA3
++        .unreq  AC0
++        .unreq  AC1
++        .unreq  NOISE
++        .unreq  LSB
++        .unreq  DCH
++        .unreq  MASK
+diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c
+index 9a14815..1bb2276 100644
+--- a/libavcodec/arm/mlpdsp_init_arm.c
++++ b/libavcodec/arm/mlpdsp_init_arm.c
+@@ -29,8 +29,20 @@ void ff_mlp_filter_channel_arm(int32_t *state, const int32_t *coeff,
+                                int firorder, int iirorder,
+                                unsigned int filter_shift, int32_t mask,
+                                int blocksize, int32_t *sample_buffer);
++void ff_mlp_rematrix_channel_arm(int32_t *samples,
++                                 const int32_t *coeffs,
++                                 const uint8_t *bypassed_lsbs,
++                                 const int8_t *noise_buffer,
++                                 int index,
++                                 unsigned int dest_ch,
++                                 uint16_t blockpos,
++                                 unsigned int maxchan,
++                                 int matrix_noise_shift,
++                                 int access_unit_size_pow2,
++                                 int32_t mask);
+
+ av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
+ {
+     c->mlp_filter_channel = ff_mlp_filter_channel_arm;
++    c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm;
+ }
+--
+1.9.1
diff --git a/projects/RPi/patches/ffmpeg/0005-truehd-break-out-part-of-output_data-into-platform-s.patch b/projects/RPi/patches/ffmpeg/0005-truehd-break-out-part-of-output_data-into-platform-s.patch
new file mode 100644
index 0000000000..c5880e909a
--- /dev/null
+++ b/projects/RPi/patches/ffmpeg/0005-truehd-break-out-part-of-output_data-into-platform-s.patch
@@ -0,0 +1,197 @@
+From 5bfcb7a691eb63c56f1485b60f399d79ff943799 Mon Sep 17 00:00:00 2001
+From: Ben Avison <bavison@riscosopen.org>
+Date: Wed, 12 Mar 2014 18:18:39 +0000
+Subject: [PATCH 5/6] truehd: break out part of output_data into
+ platform-specific callback.
+
+Verified with profiling that this doesn't have a measurable effect upon
+overall performance.
+---
+ libavcodec/mlpdec.c | 40 +++++++++++++++++++++++-----------------
+ libavcodec/mlpdsp.c | 38 ++++++++++++++++++++++++++++++++++++++
+ libavcodec/mlpdsp.h | 22 ++++++++++++++++++++++
+ 3 files changed, 83 insertions(+), 17 deletions(-)
+
+diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
+index 01ded5c..061dabc 100644
+--- a/libavcodec/mlpdec.c
++++ b/libavcodec/mlpdec.c
+@@ -363,6 +363,10 @@ static int read_major_sync(MLPDecodeContext *m, GetBitContext *gb)
+         m->avctx->sample_fmt = AV_SAMPLE_FMT_S32;
+     else
+         m->avctx->sample_fmt = AV_SAMPLE_FMT_S16;
++    m->dsp.mlp_pack_output = m->dsp.mlp_select_pack_output(m->substream[m->max_decoded_substream].ch_assign,
++                                                           m->substream[m->max_decoded_substream].output_shift,
++                                                           m->substream[m->max_decoded_substream].max_matrix_channel,
++                                                           m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
+
+     m->params_valid = 1;
+     for (substr = 0; substr < MAX_SUBSTREAMS; substr++)
+@@ -612,6 +616,10 @@ FF_ENABLE_DEPRECATION_WARNINGS
+     if (substr == m->max_decoded_substream) {
+         m->avctx->channels       = s->max_matrix_channel + 1;
+         m->avctx->channel_layout = s->ch_layout;
++        m->dsp.mlp_pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
++                                                               s->output_shift,
++                                                               s->max_matrix_channel,
++                                                               m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
+
+         if (m->avctx->codec_id == AV_CODEC_ID_MLP && m->needs_reordering) {
+             if (m->avctx->channel_layout == (AV_CH_LAYOUT_QUAD|AV_CH_LOW_FREQUENCY) ||
+@@ -857,9 +865,15 @@ static int read_decoding_params(MLPDecodeContext *m, GetBitContext *gbp,
+                 return ret;
+
+     if (s->param_presence_flags & PARAM_OUTSHIFT)
+-        if (get_bits1(gbp))
++        if (get_bits1(gbp)) {
+             for (ch = 0; ch <= s->max_matrix_channel; ch++)
+                 s->output_shift[ch] = get_sbits(gbp, 4);
++            if (substr == m->max_decoded_substream)
++                m->dsp.mlp_pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
++                                                                       s->output_shift,
++                                                                       s->max_matrix_channel,
++                                                                       m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
++        }
+
+     if (s->param_presence_flags & PARAM_QUANTSTEP)
+         if (get_bits1(gbp))
+@@ -1058,9 +1072,6 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
+ {
+     AVCodecContext *avctx = m->avctx;
+     SubStream *s = &m->substream[substr];
+-    unsigned int i, out_ch = 0;
+-    int32_t *data_32;
+-    int16_t *data_16;
+     int ret;
+     int is32 = (m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
+
+@@ -1078,19 +1089,14 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
+     frame->nb_samples = s->blockpos;
+     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
+         return ret;
+-    data_32 = (int32_t *)frame->data[0];
+-    data_16 = (int16_t *)frame->data[0];
+-
+-    for (i = 0; i < s->blockpos; i++) {
+-        for (out_ch = 0; out_ch <= s->max_matrix_channel; out_ch++) {
+-            int mat_ch = s->ch_assign[out_ch];
+-            int32_t sample = m->sample_buffer[i][mat_ch]
+-                          << s->output_shift[mat_ch];
+-            s->lossless_check_data ^= (sample & 0xffffff) << mat_ch;
+-            if (is32) *data_32++ = sample << 8;
+-            else      *data_16++ = sample >> 8;
+-        }
+-    }
++    s->lossless_check_data = m->dsp.mlp_pack_output(s->lossless_check_data,
++                                                    s->blockpos,
++                                                    m->sample_buffer,
++                                                    frame->data[0],
++                                                    s->ch_assign,
++                                                    s->output_shift,
++                                                    s->max_matrix_channel,
++                                                    is32);
+
+     /* Update matrix encoding side data */
+     if ((ret = ff_side_data_update_matrix_encoding(frame, s->matrix_encoding)) < 0)
+diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c
+index 7a359b0..3ae8c37 100644
+--- a/libavcodec/mlpdsp.c
++++ b/libavcodec/mlpdsp.c
+@@ -89,10 +89,48 @@ void ff_mlp_rematrix_channel(int32_t *samples,
+     }
+ }
+
++static int32_t (*mlp_select_pack_output(uint8_t *ch_assign,
++                                        int8_t *output_shift,
++                                        uint8_t max_matrix_channel,
++                                        int is32))(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int)
++{
++    return ff_mlp_pack_output;
++}
++
++int32_t ff_mlp_pack_output(int32_t lossless_check_data,
++                           uint16_t blockpos,
++                           int32_t (*sample_buffer)[MAX_CHANNELS],
++                           void *data,
++                           uint8_t *ch_assign,
++                           int8_t *output_shift,
++                           uint8_t max_matrix_channel,
++                           int is32)
++{
++    unsigned int i, out_ch = 0;
++    int32_t *data_32 = data;
++    int16_t *data_16 = data;
++
++    for (i = 0; i < blockpos; i++) {
++        for (out_ch = 0; out_ch <= max_matrix_channel; out_ch++) {
++            int mat_ch = ch_assign[out_ch];
++            int32_t sample = sample_buffer[i][mat_ch]
++                          << output_shift[mat_ch];
++            lossless_check_data ^= (sample & 0xffffff) << mat_ch;
++            if (is32)
++                *data_32++ = sample << 8;
++            else
++                *data_16++ = sample >> 8;
++        }
++    }
++    return lossless_check_data;
++}
++
+ av_cold void ff_mlpdsp_init(MLPDSPContext *c)
+ {
+     c->mlp_filter_channel = mlp_filter_channel;
+     c->mlp_rematrix_channel = ff_mlp_rematrix_channel;
++    c->mlp_select_pack_output = mlp_select_pack_output;
++    c->mlp_pack_output = ff_mlp_pack_output;
+     if (ARCH_ARM)
+         ff_mlpdsp_init_arm(c);
+     if (ARCH_X86)
+diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h
+index f98e9be..a0edeb7 100644
+--- a/libavcodec/mlpdsp.h
++++ b/libavcodec/mlpdsp.h
+@@ -23,6 +23,7 @@
+ #define AVCODEC_MLPDSP_H
+
+ #include <stdint.h>
++#include "mlp.h"
+
+ void ff_mlp_rematrix_channel(int32_t *samples,
+                              const int32_t *coeffs,
+@@ -36,6 +37,15 @@ void ff_mlp_rematrix_channel(int32_t *samples,
+                              int access_unit_size_pow2,
+                              int32_t mask);
+
++int32_t ff_mlp_pack_output(int32_t lossless_check_data,
++                           uint16_t blockpos,
++                           int32_t (*sample_buffer)[MAX_CHANNELS],
++                           void *data,
++                           uint8_t *ch_assign,
++                           int8_t *output_shift,
++                           uint8_t max_matrix_channel,
++                           int is32);
++
+ typedef struct MLPDSPContext {
+     void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff,
+                                int firorder, int iirorder,
+@@ -52,6 +62,18 @@ typedef struct MLPDSPContext {
+                                  int matrix_noise_shift,
+                                  int access_unit_size_pow2,
+                                  int32_t mask);
++    int32_t (*(*mlp_select_pack_output)(uint8_t *ch_assign,
++                                        int8_t *output_shift,
++                                        uint8_t max_matrix_channel,
++                                        int is32))(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int);
++    int32_t (*mlp_pack_output)(int32_t lossless_check_data,
++                               uint16_t blockpos,
++                               int32_t (*sample_buffer)[MAX_CHANNELS],
++                               void *data,
++                               uint8_t *ch_assign,
++                               int8_t *output_shift,
++                               uint8_t max_matrix_channel,
++                               int is32);
+ } MLPDSPContext;
+
+ void ff_mlpdsp_init(MLPDSPContext *c);
+--
+1.9.1
diff --git a/projects/RPi/patches/ffmpeg/0006-truehd-add-hand-scheduled-ARM-asm-version-of-ff_mlp_.patch b/projects/RPi/patches/ffmpeg/0006-truehd-add-hand-scheduled-ARM-asm-version-of-ff_mlp_.patch
new file mode 100644
index 0000000000..93add62da5
--- /dev/null
+++ b/projects/RPi/patches/ffmpeg/0006-truehd-add-hand-scheduled-ARM-asm-version-of-ff_mlp_.patch
@@ -0,0 +1,689 @@
+From c647209386bd811cc1c33b4fc8ec17a00f8c8ded Mon Sep 17 00:00:00 2001
+From: Ben Avison <bavison@riscosopen.org>
+Date: Thu, 13 Mar 2014 00:21:55 +0000
+Subject: [PATCH 6/6] truehd: add hand-scheduled ARM asm version of
+ ff_mlp_pack_output.
+
+Profiling results for overall decode and the output_data function in
+particular are as follows:
+
+              Before          After
+              Mean   StdDev   Mean   StdDev  Confidence  Change
+6:2 total     339.6  15.1     329.3  16.0    95.8%       +3.1%  (insignificant)
+6:2 function  24.6   6.0      9.9    3.1     100.0%      +148.5%
+8:2 total     324.5  15.5     323.6  14.3    15.2%       +0.3%  (insignificant)
+8:2 function  20.4   3.9      9.9    3.4     100.0%      +104.7%
+6:6 total     572.8  20.6     539.9  24.2    100.0%      +6.1%
+6:6 function  54.5   5.6      16.0   3.8     100.0%      +240.9%
+8:8 total     741.5  21.2     702.5  18.5    100.0%      +5.6%
+8:8 function  63.9   7.6      18.4   4.8     100.0%      +247.3%
+
+The assembly version has also been tested with a fuzz tester to ensure that
+any combinations of inputs not exercised by my available test streams still
+generate mathematically identical results to the C version.
+---
+ libavcodec/arm/Makefile          |   1 +
+ libavcodec/arm/mlpdsp_armv6.S    | 530 +++++++++++++++++++++++++++++++++++++++
+ libavcodec/arm/mlpdsp_init_arm.c |  96 +++++++
+ 3 files changed, 627 insertions(+)
+ create mode 100644 libavcodec/arm/mlpdsp_armv6.S
+
+diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
+index ba673b1..7b2f923 100644
+--- a/libavcodec/arm/Makefile
++++ b/libavcodec/arm/Makefile
+@@ -52,6 +52,7 @@ ARMV6-OBJS-$(CONFIG_AC3DSP)            += arm/ac3dsp_armv6.o
+ ARMV6-OBJS-$(CONFIG_H264DSP)           += arm/h264dsp_armv6.o
+ ARMV6-OBJS-$(CONFIG_HPELDSP)           += arm/hpeldsp_init_armv6.o      \
+                                           arm/hpeldsp_armv6.o
++ARMV6-OBJS-$(CONFIG_MLP_DECODER)       += arm/mlpdsp_armv6.o
+ ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP)      += arm/mpegaudiodsp_fixed_armv6.o
+ ARMV6-OBJS-$(CONFIG_VP8_DECODER)       += arm/vp8_armv6.o               \
+                                           arm/vp8dsp_init_armv6.o       \
+diff --git a/libavcodec/arm/mlpdsp_armv6.S b/libavcodec/arm/mlpdsp_armv6.S
+new file mode 100644
+index 0000000..05a2c85
+--- /dev/null
++++ b/libavcodec/arm/mlpdsp_armv6.S
+@@ -0,0 +1,530 @@
++/*
++ * Copyright (c) 2014 RISC OS Open Ltd
++ * Author: Ben Avison <bavison@riscosopen.org>
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "libavutil/arm/asm.S"
++
++.macro loadregoffsh2  group, index, base, offgroup, offindex
++       .altmacro
++       loadregoffsh2_ \group, %(\index), \base, \offgroup, %(\offindex)
++       .noaltmacro
++.endm
++
++.macro loadregoffsh2_ group, index, base, offgroup, offindex
++        ldr     \group\index, [\base, \offgroup\offindex, lsl #2]
++.endm
++
++.macro eorlslreg  check, data, group, index
++        .altmacro
++        eorlslreg_ \check, \data, \group, %(\index)
++        .noaltmacro
++.endm
++
++.macro eorlslreg_ check, data, group, index
++        eor     \check, \check, \data, lsl \group\index
++.endm
++
++.macro decr_modulo var, by, modulus
++ .set \var, \var - \by
++ .if \var == 0
++  .set \var, \modulus
++ .endif
++.endm
++
++ .macro load_group1  size, channels, r0, r1, r2, r3, pointer_dead=0
++  .if \size == 2
++        ldrd    \r0, \r1, [IN], #(\size + 8 - \channels) * 4
++  .else // size == 4
++   .if IDX1 > 4 || \channels==8
++        ldm     IN!, {\r0, \r1, \r2, \r3}
++   .else
++        ldm     IN, {\r0, \r1, \r2, \r3}
++    .if !\pointer_dead
++        add     IN, IN, #(4 + 8 - \channels) * 4
++     .endif
++   .endif
++  .endif
++        decr_modulo IDX1, \size, \channels
++ .endm
++
++ .macro load_group2  size, channels, r0, r1, r2, r3, pointer_dead=0
++  .if \size == 2
++   .if IDX1 > 2
++        ldm     IN!, {\r2, \r3}
++   .else
++//A   .ifc \r2, ip
++//A    .if \pointer_dead
++//A       ldm     IN, {\r2, \r3}
++//A    .else
++//A       ldr     \r2, [IN], #4
++//A       ldr     \r3, [IN], #(\size - 1 + 8 - \channels) * 4
++//A    .endif
++//A   .else
++        ldrd    \r2, \r3, [IN], #(\size + 8 - \channels) * 4
++//A   .endif
++   .endif
++  .endif
++        decr_modulo IDX1, \size, \channels
++ .endm
++
++.macro implement_pack  inorder, channels, shift
++.if \inorder
++.ifc \shift, mixed
++
++CHECK   .req    a1
++COUNT   .req    a2
++IN      .req    a3
++OUT     .req    a4
++DAT0    .req    v1
++DAT1    .req    v2
++DAT2    .req    v3
++DAT3    .req    v4
++SHIFT0  .req    v5
++SHIFT1  .req    v6
++SHIFT2  .req    sl
++SHIFT3  .req    fp
++SHIFT4  .req    ip
++SHIFT5  .req    lr
++
++ .macro output4words
++  .set SIZE_GROUP1, IDX1
++  .if SIZE_GROUP1 > 4
++   .set SIZE_GROUP1, 4
++  .endif
++  .set SIZE_GROUP2, 4 - SIZE_GROUP1
++        load_group1  SIZE_GROUP1, \channels, DAT0, DAT1, DAT2, DAT3
++        load_group2  SIZE_GROUP2, \channels, DAT0, DAT1, DAT2, DAT3
++   .if \channels == 2
++        lsl     DAT0, SHIFT0
++        lsl     DAT1, SHIFT1
++        lsl     DAT2, SHIFT0
++        lsl     DAT3, SHIFT1
++   .elseif \channels == 6
++    .if IDX2 == 6
++        lsl     DAT0, SHIFT0
++        lsl     DAT1, SHIFT1
++        lsl     DAT2, SHIFT2
++        lsl     DAT3, SHIFT3
++    .elseif IDX2 == 2
++        lsl     DAT0, SHIFT4
++        lsl     DAT1, SHIFT5
++        lsl     DAT2, SHIFT0
++        lsl     DAT3, SHIFT1
++    .else // IDX2 == 4
++        lsl     DAT0, SHIFT2
++        lsl     DAT1, SHIFT3
++        lsl     DAT2, SHIFT4
++        lsl     DAT3, SHIFT5
++    .endif
++   .elseif \channels == 8
++    .if IDX2 == 8
++        uxtb    SHIFT0, SHIFT4, ror #0
++        uxtb    SHIFT1, SHIFT4, ror #8
++        uxtb    SHIFT2, SHIFT4, ror #16
++        uxtb    SHIFT3, SHIFT4, ror #24
++    .else
++        uxtb    SHIFT0, SHIFT5, ror #0
++        uxtb    SHIFT1, SHIFT5, ror #8
++        uxtb    SHIFT2, SHIFT5, ror #16
++        uxtb    SHIFT3, SHIFT5, ror #24
++    .endif
++        lsl     DAT0, SHIFT0
++        lsl     DAT1, SHIFT1
++        lsl     DAT2, SHIFT2
++        lsl     DAT3, SHIFT3
++   .endif
++        eor     CHECK, CHECK, DAT0, lsr #8 - (\channels - IDX2)
++        eor     CHECK, CHECK, DAT1, lsr #7 - (\channels - IDX2)
++   decr_modulo IDX2, 2, \channels
++        eor     CHECK, CHECK, DAT2, lsr #8 - (\channels - IDX2)
++        eor     CHECK, CHECK, DAT3, lsr #7 - (\channels - IDX2)
++   decr_modulo IDX2, 2, \channels
++        stm     OUT!, {DAT0 - DAT3}
++ .endm
++
++ .set WORDS_PER_LOOP, \channels  // calculate LCM (channels, 4)
++ .if (WORDS_PER_LOOP % 2) == 0
++  .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
++ .endif
++ .if (WORDS_PER_LOOP % 2) == 0
++  .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
++ .endif
++ .set WORDS_PER_LOOP, WORDS_PER_LOOP * 4
++ .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
++
++function ff_mlp_pack_output_inorder_\channels\()ch_mixedshift_armv6, export=1
++ .if SAMPLES_PER_LOOP > 1
++        tst     COUNT, #SAMPLES_PER_LOOP - 1  // always seems to be in practice
++        bne     X(ff_mlp_pack_output)         // but just in case, branch to C implementation if not
++ .endif
++        teq     COUNT, #0
++        it      eq
++        bxeq    lr
++        push    {v1-v6,sl,fp,lr}
++        ldr     SHIFT0, [sp, #(9+1)*4]  // get output_shift from stack
++        ldr     SHIFT1, =0x08080808
++        ldr     SHIFT4, [SHIFT0]
++ .if \channels == 2
++        uadd8   SHIFT4, SHIFT4, SHIFT1 // increase all shifts by 8
++        uxtb    SHIFT0, SHIFT4, ror #0
++        uxtb    SHIFT1, SHIFT4, ror #8
++ .else
++        ldr     SHIFT5, [SHIFT0, #4]
++        uadd8   SHIFT4, SHIFT4, SHIFT1 // increase all shifts by 8
++        uadd8   SHIFT5, SHIFT5, SHIFT1
++  .if \channels == 6
++        uxtb    SHIFT0, SHIFT4, ror #0
++        uxtb    SHIFT1, SHIFT4, ror #8
++        uxtb    SHIFT2, SHIFT4, ror #16
++        uxtb    SHIFT3, SHIFT4, ror #24
++        uxtb    SHIFT4, SHIFT5, ror #0
++        uxtb    SHIFT5, SHIFT5, ror #8
++  .endif
++ .endif
++ .set IDX1, \channels
++ .set IDX2, \channels
++0:
++ .rept WORDS_PER_LOOP / 4
++        output4words
++ .endr
++        subs    COUNT, COUNT, #SAMPLES_PER_LOOP
++        bne     0b
++        pop     {v1-v6,sl,fp,pc}
++        .ltorg
++endfunc
++ .purgem output4words
++
++        .unreq  CHECK
++        .unreq  COUNT
++        .unreq  IN
++        .unreq  OUT
++        .unreq  DAT0
++        .unreq  DAT1
++        .unreq  DAT2
++        .unreq  DAT3
++        .unreq  SHIFT0
++        .unreq  SHIFT1
++        .unreq  SHIFT2
++        .unreq  SHIFT3
++        .unreq  SHIFT4
++        .unreq  SHIFT5
++
++.else // not mixed
++
++CHECK   .req    a1
++COUNT   .req    a2
++IN      .req    a3
++OUT     .req    a4
++DAT0    .req    v1
++DAT1    .req    v2
++DAT2    .req    v3
++DAT3    .req    v4
++DAT4    .req    v5
++DAT5    .req    v6
++DAT6    .req    sl // use these rather than the otherwise unused
++DAT7    .req    fp // ip and lr so that we can load them usinf LDRD
++
++ .macro output4words  tail, head, r0, r1, r2, r3, r4, r5, r6, r7, pointer_dead=0
++  .if \head
++   .set SIZE_GROUP1, IDX1
++   .if SIZE_GROUP1 > 4
++    .set SIZE_GROUP1, 4
++   .endif
++   .set SIZE_GROUP2, 4 - SIZE_GROUP1
++        load_group1  SIZE_GROUP1, \channels, \r0, \r1, \r2, \r3, \pointer_dead
++  .endif
++  .if \tail
++        eor     CHECK, CHECK, \r4, lsr #8 - (\channels - IDX2)
++        eor     CHECK, CHECK, \r5, lsr #7 - (\channels - IDX2)
++   decr_modulo IDX2, 2, \channels
++  .endif
++  .if \head
++        load_group2  SIZE_GROUP2, \channels, \r0, \r1, \r2, \r3, \pointer_dead
++  .endif
++  .if \tail
++        eor     CHECK, CHECK, \r6, lsr #8 - (\channels - IDX2)
++        eor     CHECK, CHECK, \r7, lsr #7 - (\channels - IDX2)
++   decr_modulo IDX2, 2, \channels
++        stm     OUT!, {\r4, \r5, \r6, \r7}
++  .endif
++  .if \head
++        lsl     \r0, #8 + \shift
++        lsl     \r1, #8 + \shift
++        lsl     \r2, #8 + \shift
++        lsl     \r3, #8 + \shift
++  .endif
++ .endm
++
++ .set WORDS_PER_LOOP, \channels  // calculate LCM (channels, 8)
++ .if (WORDS_PER_LOOP % 2) == 0
++  .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
++ .endif
++ .if (WORDS_PER_LOOP % 2) == 0
++  .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
++ .endif
++ .if (WORDS_PER_LOOP % 2) == 0
++  .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
++ .endif
++ .set WORDS_PER_LOOP, WORDS_PER_LOOP * 8
++ .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
++
++function ff_mlp_pack_output_inorder_\channels\()ch_\shift\()shift_armv6, export=1
++ .if SAMPLES_PER_LOOP > 1
++        tst     COUNT, #SAMPLES_PER_LOOP - 1  // always seems to be in practice
++        bne     X(ff_mlp_pack_output)         // but just in case, branch to C implementation if not
++ .endif
++        subs    COUNT, COUNT, #SAMPLES_PER_LOOP
++        it      lo
++        bxlo    lr
++        push    {v1-v6,sl,fp,lr}
++ .set IDX1, \channels
++ .set IDX2, \channels
++        output4words  0, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
++0:      beq     1f
++ .rept WORDS_PER_LOOP / 8
++        output4words  1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3
++        output4words  1, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
++ .endr
++        subs    COUNT, COUNT, #SAMPLES_PER_LOOP
++        bne     0b
++1:
++ .rept WORDS_PER_LOOP / 8 - 1
++        output4words  1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3
++        output4words  1, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
++ .endr
++        output4words  1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3, pointer_dead=1
++        output4words  1, 0, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
++        pop     {v1-v6,sl,fp,pc}
++endfunc
++ .purgem output4words
++
++        .unreq  CHECK
++        .unreq  COUNT
++        .unreq  IN
++        .unreq  OUT
++        .unreq  DAT0
++        .unreq  DAT1
++        .unreq  DAT2
++        .unreq  DAT3
++        .unreq  DAT4
++        .unreq  DAT5
++        .unreq  DAT6
++        .unreq  DAT7
++
++.endif // mixed
++.else // not inorder
++.ifc \shift, mixed
++
++// This case not currently handled
++
++.else // not mixed
++
++#if !CONFIG_THUMB
++
++CHECK   .req    a1
++COUNT   .req    a2
++IN      .req    a3
++OUT     .req    a4
++DAT0    .req    v1
++DAT1    .req    v2
++DAT2    .req    v3
++DAT3    .req    v4
++CHAN0   .req    v5
++CHAN1   .req    v6
++CHAN2   .req    sl
++CHAN3   .req    fp
++CHAN4   .req    ip
++CHAN5   .req    lr
++
++ .macro output4words
++  .if \channels == 8
++   .if IDX1 == 8
++        uxtb    CHAN0, CHAN4, ror #0
++        uxtb    CHAN1, CHAN4, ror #8
++        uxtb    CHAN2, CHAN4, ror #16
++        uxtb    CHAN3, CHAN4, ror #24
++   .else
++        uxtb    CHAN0, CHAN5, ror #0
++        uxtb    CHAN1, CHAN5, ror #8
++        uxtb    CHAN2, CHAN5, ror #16
++        uxtb    CHAN3, CHAN5, ror #24
++   .endif
++        ldr     DAT0, [IN, CHAN0, lsl #2]
++        ldr     DAT1, [IN, CHAN1, lsl #2]
++        ldr     DAT2, [IN, CHAN2, lsl #2]
++        ldr     DAT3, [IN, CHAN3, lsl #2]
++   .if IDX1 == 4
++        add     IN, IN, #8*4
++   .endif
++        decr_modulo IDX1, 4, \channels
++  .else
++   .set SIZE_GROUP1, IDX1
++   .if SIZE_GROUP1 > 4
++    .set SIZE_GROUP1, 4
++   .endif
++   .set SIZE_GROUP2, 4 - SIZE_GROUP1
++   .if SIZE_GROUP1 == 2
++        loadregoffsh2  DAT, 0, IN, CHAN, 0 + (\channels - IDX1)
++        loadregoffsh2  DAT, 1, IN, CHAN, 1 + (\channels - IDX1)
++        add     IN, IN, #8*4
++   .else // SIZE_GROUP1 == 4
++        loadregoffsh2  DAT, 0, IN, CHAN, 0 + (\channels - IDX1)
++        loadregoffsh2  DAT, 1, IN, CHAN, 1 + (\channels - IDX1)
++        loadregoffsh2  DAT, 2, IN, CHAN, 2 + (\channels - IDX1)
++        loadregoffsh2  DAT, 3, IN, CHAN, 3 + (\channels - IDX1)
++    .if IDX1 == 4
++        add     IN, IN, #8*4
++    .endif
++   .endif
++        decr_modulo IDX1, SIZE_GROUP1, \channels
++   .if SIZE_GROUP2 == 2
++        loadregoffsh2  DAT, 2, IN, CHAN, 0 + (\channels - IDX1)
++        loadregoffsh2  DAT, 3, IN, CHAN, 1 + (\channels - IDX1)
++    .if IDX1 == 2
++        add     IN, IN, #8*4
++    .endif
++   .endif
++        decr_modulo IDX1, SIZE_GROUP2, \channels
++  .endif
++  .if \channels == 8 // in this case we can corrupt CHAN0-3
++        rsb     CHAN0, CHAN0, #8
++        rsb     CHAN1, CHAN1, #8
++        rsb     CHAN2, CHAN2, #8
++        rsb     CHAN3, CHAN3, #8
++        lsl     DAT0, #8 + \shift
++        lsl     DAT1, #8 + \shift
++        lsl     DAT2, #8 + \shift
++        lsl     DAT3, #8 + \shift
++        eor     CHECK, CHECK, DAT0, lsr CHAN0
++        eor     CHECK, CHECK, DAT1, lsr CHAN1
++        eor     CHECK, CHECK, DAT2, lsr CHAN2
++        eor     CHECK, CHECK, DAT3, lsr CHAN3
++  .else
++   .if \shift != 0
++        lsl     DAT0, #\shift
++        lsl     DAT1, #\shift
++        lsl     DAT2, #\shift
++        lsl     DAT3, #\shift
++   .endif
++        bic     DAT0, DAT0, #0xff000000
++        bic     DAT1, DAT1, #0xff000000
++        bic     DAT2, DAT2, #0xff000000
++        bic     DAT3, DAT3, #0xff000000
++        eorlslreg CHECK, DAT0, CHAN, 0 + (\channels - IDX2)
++        eorlslreg CHECK, DAT1, CHAN, 1 + (\channels - IDX2)
++   decr_modulo IDX2, 2, \channels
++        eorlslreg CHECK, DAT2, CHAN, 0 + (\channels - IDX2)
++        eorlslreg CHECK, DAT3, CHAN, 1 + (\channels - IDX2)
++   decr_modulo IDX2, 2, \channels
++        lsl     DAT0, #8
++        lsl     DAT1, #8
++        lsl     DAT2, #8
++        lsl     DAT3, #8
++  .endif
++        stm     OUT!, {DAT0 - DAT3}
++ .endm
++
++ .set WORDS_PER_LOOP, \channels  // calculate LCM (channels, 4)
++ .if (WORDS_PER_LOOP % 2) == 0
++  .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
++ .endif
++ .if (WORDS_PER_LOOP % 2) == 0
++  .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
++ .endif
++ .set WORDS_PER_LOOP, WORDS_PER_LOOP * 4
++ .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
++
++function ff_mlp_pack_output_outoforder_\channels\()ch_\shift\()shift_armv6, export=1
++ .if SAMPLES_PER_LOOP > 1
++        tst     COUNT, #SAMPLES_PER_LOOP - 1  // always seems to be in practice
++        bne     X(ff_mlp_pack_output)         // but just in case, branch to C implementation if not
++ .endif
++        teq     COUNT, #0
++        it      eq
++        bxeq    lr
++        push    {v1-v6,sl,fp,lr}
++        ldr     CHAN0, [sp, #(9+0)*4]  // get ch_assign from stack
++        ldr     CHAN4, [CHAN0]
++ .if \channels == 2
++        uxtb    CHAN0, CHAN4, ror #0
++        uxtb    CHAN1, CHAN4, ror #8
++ .else
++        ldr     CHAN5, [CHAN0, #4]
++  .if \channels == 6
++        uxtb    CHAN0, CHAN4, ror #0
++        uxtb    CHAN1, CHAN4, ror #8
++        uxtb    CHAN2, CHAN4, ror #16
++        uxtb    CHAN3, CHAN4, ror #24
++        uxtb    CHAN4, CHAN5, ror #0
++        uxtb    CHAN5, CHAN5, ror #8
++  .endif
++ .endif
++ .set IDX1, \channels
++ .set IDX2, \channels
++0:
++ .rept WORDS_PER_LOOP / 4
++        output4words
++ .endr
++        subs    COUNT, COUNT, #SAMPLES_PER_LOOP
++        bne     0b
++        pop     {v1-v6,sl,fp,pc}
++        .ltorg
++endfunc
++ .purgem output4words
++
++        .unreq  CHECK
++        .unreq  COUNT
++        .unreq  IN
++        .unreq  OUT
++        .unreq  DAT0
++        .unreq  DAT1
++        .unreq  DAT2
++        .unreq  DAT3
++        .unreq  CHAN0
++        .unreq  CHAN1
++        .unreq  CHAN2
++        .unreq  CHAN3
++        .unreq  CHAN4
++        .unreq  CHAN5
++
++#endif // !CONFIG_THUMB
++
++.endif // mixed
++.endif // inorder
++.endm // implement_pack
++
++.macro pack_channels  inorder, channels
++        implement_pack  \inorder, \channels, 0
++        implement_pack  \inorder, \channels, 1
++        implement_pack  \inorder, \channels, 2
++        implement_pack  \inorder, \channels, 3
++        implement_pack  \inorder, \channels, 4
++        implement_pack  \inorder, \channels, 5
++        implement_pack  \inorder, \channels, mixed
++.endm
++
++.macro pack_order  inorder
++        pack_channels  \inorder, 2
++        pack_channels  \inorder, 6
++        pack_channels  \inorder, 8
++.endm
++
++        pack_order  0
++        pack_order  1
+diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c
+index 1bb2276..10ec316 100644
+--- a/libavcodec/arm/mlpdsp_init_arm.c
++++ b/libavcodec/arm/mlpdsp_init_arm.c
+@@ -41,8 +41,104 @@ void ff_mlp_rematrix_channel_arm(int32_t *samples,
+                                  int access_unit_size_pow2,
+                                  int32_t mask);
+
++#define DECLARE_PACK(order,channels,shift) \
++    int32_t ff_mlp_pack_output_##order##order_##channels##ch_##shift##shift_armv6(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int);
++#define ENUMERATE_PACK(order,channels,shift) \
++    ff_mlp_pack_output_##order##order_##channels##ch_##shift##shift_armv6,
++#define PACK_CHANNELS(macro,order,channels) \
++        macro(order,channels,0) \
++        macro(order,channels,1) \
++        macro(order,channels,2) \
++        macro(order,channels,3) \
++        macro(order,channels,4) \
++        macro(order,channels,5) \
++        macro(order,channels,mixed)
++#define PACK_ORDER(macro,order) \
++        PACK_CHANNELS(macro,order,2) \
++        PACK_CHANNELS(macro,order,6) \
++        PACK_CHANNELS(macro,order,8)
++#define PACK_ALL(macro) \
++        PACK_ORDER(macro,outof) \
++        PACK_ORDER(macro,in)
++PACK_ALL(DECLARE_PACK)
++
++#define ff_mlp_pack_output_outoforder_2ch_mixedshift_armv6 0
++#define ff_mlp_pack_output_outoforder_6ch_mixedshift_armv6 0
++#define ff_mlp_pack_output_outoforder_8ch_mixedshift_armv6 0
++#if CONFIG_THUMB
++#define ff_mlp_pack_output_outoforder_2ch_0shift_armv6 0
++#define ff_mlp_pack_output_outoforder_2ch_1shift_armv6 0
++#define ff_mlp_pack_output_outoforder_2ch_2shift_armv6 0
++#define ff_mlp_pack_output_outoforder_2ch_3shift_armv6 0
++#define ff_mlp_pack_output_outoforder_2ch_4shift_armv6 0
++#define ff_mlp_pack_output_outoforder_2ch_5shift_armv6 0
++#define ff_mlp_pack_output_outoforder_6ch_0shift_armv6 0
++#define ff_mlp_pack_output_outoforder_6ch_1shift_armv6 0
++#define ff_mlp_pack_output_outoforder_6ch_2shift_armv6 0
++#define ff_mlp_pack_output_outoforder_6ch_3shift_armv6 0
++#define ff_mlp_pack_output_outoforder_6ch_4shift_armv6 0
++#define ff_mlp_pack_output_outoforder_6ch_5shift_armv6 0
++#define ff_mlp_pack_output_outoforder_8ch_0shift_armv6 0
++#define ff_mlp_pack_output_outoforder_8ch_1shift_armv6 0
++#define ff_mlp_pack_output_outoforder_8ch_2shift_armv6 0
++#define ff_mlp_pack_output_outoforder_8ch_3shift_armv6 0
++#define ff_mlp_pack_output_outoforder_8ch_4shift_armv6 0
++#define ff_mlp_pack_output_outoforder_8ch_5shift_armv6 0
++#endif
++
++static int32_t (*mlp_select_pack_output_armv6(uint8_t *ch_assign,
++                                              int8_t *output_shift,
++                                              uint8_t max_matrix_channel,
++                                              int is32))(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int)
++{
++    int ch_index;
++    int shift = output_shift[0] < 0 || output_shift[0] > 5 ? 6 : output_shift[0];
++    int inorder = 1;
++    static int32_t (*const routine[2*3*7])(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int) = {
++            PACK_ALL(ENUMERATE_PACK)
++    };
++    int i;
++
++    if (!is32) // don't support 16-bit output (it's not used by TrueHD)
++        return ff_mlp_pack_output;
++
++    switch (max_matrix_channel) {
++    case 1:
++        ch_index = 0;
++        break;
++    case 5:
++        ch_index = 1;
++        break;
++    case 7:
++        ch_index = 2;
++        break;
++    default:
++        return ff_mlp_pack_output;
++    }
++
++    for (i = 0; i <= max_matrix_channel; i++) {
++        if (shift != 6 && output_shift[i] != shift)
++            shift = 6; // indicate mixed shifts
++        if (ch_assign[i] != i)
++            inorder = 0;
++    }
++#if CONFIG_THUMB
++    if (!inorder)
++        return ff_mlp_pack_output; // can't currently handle an order array except in ARM mode
++#else
++    if (shift == 6 && !inorder)
++        return ff_mlp_pack_output; // can't currently handle both an order array and a shift array
++#endif
++
++    return routine[(inorder*3+ch_index)*7+shift];
++}
++
+ av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
+ {
++    int cpu_flags = av_get_cpu_flags();
++
+     c->mlp_filter_channel = ff_mlp_filter_channel_arm;
+     c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm;
++    if (cpu_flags & AV_CPU_FLAG_ARMV6)
++        c->mlp_select_pack_output = mlp_select_pack_output_armv6;
+ }
+--
+1.9.1
diff --git a/projects/RPi/patches/ffmpeg/ffmpeg_Speed_up_wtv_index_creation.patch b/projects/RPi/patches/ffmpeg/ffmpeg_Speed_up_wtv_index_creation.patch
new file mode 100644
index 0000000000..962feb2759
--- /dev/null
+++ b/projects/RPi/patches/ffmpeg/ffmpeg_Speed_up_wtv_index_creation.patch
@@ -0,0 +1,47 @@
+commit 0e7427498cb1131671f6fe9d054245ae7e5a36f5
+Author: popcornmix <popcornmix@gmail.com>
+Date:   Tue Mar 25 19:43:07 2014 +0000
+
+    [ffmpeg] Speed up wtv index creation
+
+    The index creation is O(N^2) with number of entries (typically thousands).
+    On a Pi this can take more than 60 seconds to execute for a recording of a few hours.
+
+    By replacing with an O(N) loop, this takes virtually zero time
+
+diff --git a/libavformat/wtvdec.c b/libavformat/wtvdec.c
+index e423370..70898bd 100644
+--- a/libavformat/wtvdec.c
++++ b/libavformat/wtvdec.c
+@@ -980,21 +980,23 @@ static int read_header(AVFormatContext *s)
+                 pb = wtvfile_open(s, root, root_size, ff_timeline_table_0_entries_Events_le16);
+                 if (pb) {
+                     int i;
++                    AVIndexEntry *e = wtv->index_entries;
++                    AVIndexEntry *e_end = wtv->index_entries + wtv->nb_index_entries - 1;
++                    uint64_t last_position = 0;
+                     while (1) {
+                         uint64_t frame_nb = avio_rl64(pb);
+                         uint64_t position = avio_rl64(pb);
++                        while (frame_nb > e->size && e <= e_end) {
++                           e->pos = last_position;
++                           e++;
++                        }
+                         if (url_feof(pb))
+                             break;
+-                        for (i = wtv->nb_index_entries - 1; i >= 0; i--) {
+-                            AVIndexEntry *e = wtv->index_entries + i;
+-                            if (frame_nb > e->size)
+-                                break;
+-                            if (position > e->pos)
+-                                e->pos = position;
+-                        }
++                        last_position = position;
+                     }
++                    e_end->pos = last_position;
+                     wtvfile_close(pb);
+-                    st->duration = wtv->index_entries[wtv->nb_index_entries - 1].timestamp;
++                    st->duration = e_end->timestamp;
+                 }
+             }
+         }