diff --git a/packages/multimedia/ffmpeg/meta b/packages/multimedia/ffmpeg/meta index 53985e81ce..b76243fd99 100644 --- a/packages/multimedia/ffmpeg/meta +++ b/packages/multimedia/ffmpeg/meta @@ -21,7 +21,7 @@ PKG_NAME="ffmpeg" PKG_VERSION="0.10.7" if [ "$XBMC" = "master" ]; then - PKG_VERSION="1.2.1" + PKG_VERSION="1.2.3" fi PKG_REV="1" PKG_ARCH="any" diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0038-backport-vaapi-return-early-from-ff_vaapi_render_picture-without-picture.patch b/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0038-backport-vaapi-return-early-from-ff_vaapi_render_picture-without-picture.patch deleted file mode 100644 index f52c8313b4..0000000000 --- a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0038-backport-vaapi-return-early-from-ff_vaapi_render_picture-without-picture.patch +++ /dev/null @@ -1,22 +0,0 @@ -Subject: [libav-devel] [PATCH 1/2] vaapi: return early from ff_vaapi_render_picture() without picture -From: Janne Grunau janne-libav at jannau.net - -Fixes an assertion when called on uninitialized frame. Spotted after -seeking in vlc. (backported from libav mailing list) - ---- - -diff --git a/libavcodec/vaapi.c b/libavcodec/vaapi.c -index a220a9d..94959bf 100644 ---- a/libavcodec/vaapi.c -+++ b/libavcodec/vaapi.c -@@ -46,6 +46,9 @@ int ff_vaapi_render_picture(struct vaapi_context *vactx, VASurfaceID surface) - VABufferID va_buffers[3]; - unsigned int n_va_buffers = 0; - -+ if (!vactx->pic_param_buf_id) -+ return 0; -+ - vaUnmapBuffer(vactx->display, vactx->pic_param_buf_id); - va_buffers[n_va_buffers++] = vactx->pic_param_buf_id; - diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-905.01-VFP_acceleration.patch b/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-905.01-VFP_acceleration.patch deleted file mode 100644 index a756285072..0000000000 --- a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-905.01-VFP_acceleration.patch +++ /dev/null @@ -1,1560 +0,0 @@ -From f097ecc4f42e29c6e8013a622bb569702ffe4546 Mon Sep 17 00:00:00 2001 -From: Ben Avison -Date: Fri, 14 Jun 2013 16:07:53 +0100 -Subject: [PATCH 1/6] Add VFP-accelerated version of synth_filter_float(), used - by DTS Coherent Acoustics decoder - ---- - libavcodec/arm/Makefile | 3 +- - libavcodec/arm/fft_init_arm.c | 8 ++ - libavcodec/arm/synth_filter_vfp.S | 206 +++++++++++++++++++++++++++ - 3 files changed, 216 insertions(+), 1 deletion(-) - create mode 100644 libavcodec/arm/synth_filter_vfp.S - -diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile -index 1c91d62..9079270 100644 ---- a/libavcodec/arm/Makefile -+++ b/libavcodec/arm/Makefile -@@ -58,7 +58,8 @@ ARMV6-OBJS += arm/dsputil_init_armv6.o \ - arm/dsputil_armv6.o \ - arm/simple_idct_armv6.o \ - --VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o -+VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o \ -+ arm/synth_filter_vfp.o - - NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ - arm/fft_fixed_neon.o \ -diff --git a/libavcodec/arm/fft_init_arm.c b/libavcodec/arm/fft_init_arm.c -index 8c98abc..44c811d 100644 ---- a/libavcodec/arm/fft_init_arm.c -+++ b/libavcodec/arm/fft_init_arm.c -@@ -32,6 +32,12 @@ - - void ff_rdft_calc_neon(struct RDFTContext *s, FFTSample *z); - -+void ff_synth_filter_float_vfp(FFTContext *imdct, -+ float *synth_buf_ptr, int *synth_buf_offset, -+ float synth_buf2[32], const float window[512], -+ float out[32], const float in[32], -+ float scale); -+ - void ff_synth_filter_float_neon(FFTContext *imdct, - float *synth_buf_ptr, int *synth_buf_offset, - float synth_buf2[32], const float window[512], -@@ -71,6 +77,8 @@ av_cold void ff_synth_filter_init_arm(SynthFilterContext *s) - { - int cpu_flags = av_get_cpu_flags(); - -+ if (have_vfp(cpu_flags)) -+ s->synth_filter_float = ff_synth_filter_float_vfp; - if (have_neon(cpu_flags)) - s->synth_filter_float = ff_synth_filter_float_neon; - } -diff --git a/libavcodec/arm/synth_filter_vfp.S b/libavcodec/arm/synth_filter_vfp.S -new file mode 100644 -index 0000000..451fe5c ---- /dev/null -+++ b/libavcodec/arm/synth_filter_vfp.S -@@ -0,0 +1,206 @@ -+/* -+ * Copyright (c) 2013 RISC OS Open Ltd -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Author: Ben Avison -+ */ -+ -+#include "libavutil/arm/asm.S" -+ -+IMDCT .req r0 -+ORIG_P_SB .req r1 -+P_SB_OFF .req r2 -+I .req r0 -+P_SB2_UP .req r1 -+OLDFPSCR .req r2 -+P_SB2_DN .req r3 -+P_WIN_DN .req r4 -+P_OUT_DN .req r5 -+P_SB .req r6 -+J_WRAP .req r7 -+P_WIN_UP .req r12 -+P_OUT_UP .req r14 -+ -+SCALE .req s0 -+SBUF_DAT_REV0 .req s4 -+SBUF_DAT_REV1 .req s5 -+SBUF_DAT_REV2 .req s6 -+SBUF_DAT_REV3 .req s7 -+VA0 .req s8 -+VA3 .req s11 -+VB0 .req s12 -+VB3 .req s15 -+VC0 .req s8 -+VC3 .req s11 -+VD0 .req s12 -+VD3 .req s15 -+SBUF_DAT0 .req s16 -+SBUF_DAT1 .req s17 -+SBUF_DAT2 .req s18 -+SBUF_DAT3 .req s19 -+SBUF_DAT_ALT0 .req s20 -+SBUF_DAT_ALT1 .req s21 -+SBUF_DAT_ALT2 .req s22 -+SBUF_DAT_ALT3 .req s23 -+WIN_DN_DAT0 .req s24 -+WIN_UP_DAT0 .req s28 -+ -+ -+.macro inner_loop half, tail, head -+ .if (OFFSET & (64*4)) == 0 @ even numbered call -+ SBUF_DAT_THIS0 .req SBUF_DAT0 -+ SBUF_DAT_THIS1 .req SBUF_DAT1 -+ SBUF_DAT_THIS2 .req SBUF_DAT2 -+ SBUF_DAT_THIS3 .req SBUF_DAT3 -+ .ifnc "\head","" -+ vldr d8, [P_SB, #OFFSET] @ d8 = SBUF_DAT -+ vldr d9, [P_SB, #OFFSET+8] -+ .endif -+ .else -+ SBUF_DAT_THIS0 .req SBUF_DAT_ALT0 -+ SBUF_DAT_THIS1 .req SBUF_DAT_ALT1 -+ SBUF_DAT_THIS2 .req SBUF_DAT_ALT2 -+ SBUF_DAT_THIS3 .req SBUF_DAT_ALT3 -+ .ifnc "\head","" -+ vldr d10, [P_SB, #OFFSET] @ d10 = SBUF_DAT_ALT -+ vldr d11, [P_SB, #OFFSET+8] -+ .endif -+ .endif -+ .ifnc "\tail","" -+ .ifc "\half","ab" -+ vmls.f VA0, SBUF_DAT_REV0, WIN_DN_DAT0 @ all operands treated as vectors -+ .else -+ vmla.f VD0, SBUF_DAT_REV0, WIN_DN_DAT0 @ all operands treated as vectors -+ .endif -+ .endif -+ .ifnc "\head","" -+ vldr d14, [P_WIN_UP, #OFFSET] @ d14 = WIN_UP_DAT -+ vldr d15, [P_WIN_UP, #OFFSET+8] -+ vldr d12, [P_WIN_DN, #OFFSET] @ d12 = WIN_DN_DAT -+ vldr d13, [P_WIN_DN, #OFFSET+8] -+ vmov SBUF_DAT_REV3, SBUF_DAT_THIS0 -+ vmov SBUF_DAT_REV2, SBUF_DAT_THIS1 -+ vmov SBUF_DAT_REV1, SBUF_DAT_THIS2 -+ vmov SBUF_DAT_REV0, SBUF_DAT_THIS3 -+ .ifc "\half","ab" -+ vmla.f VB0, SBUF_DAT_THIS0, WIN_UP_DAT0 -+ .else -+ vmla.f VC0, SBUF_DAT_THIS0, WIN_UP_DAT0 -+ .endif -+ teq J_WRAP, #J -+ bne 2f @ strongly predictable, so better than cond exec in this case -+ sub P_SB, P_SB, #512*4 -+2: -+ .set J, J - 64 -+ .set OFFSET, OFFSET + 64*4 -+ .endif -+ .unreq SBUF_DAT_THIS0 -+ .unreq SBUF_DAT_THIS1 -+ .unreq SBUF_DAT_THIS2 -+ .unreq SBUF_DAT_THIS3 -+.endm -+ -+ -+/* void ff_synth_filter_float_vfp(FFTContext *imdct, -+ * float *synth_buf_ptr, int *synth_buf_offset, -+ * float synth_buf2[32], const float window[512], -+ * float out[32], const float in[32], float scale) -+ */ -+function ff_synth_filter_float_vfp, export=1 -+ push {r3-r7,lr} -+ vpush {s16-s31} -+ ldr lr, [P_SB_OFF] -+ add a2, ORIG_P_SB, lr, LSL #2 @ calculate synth_buf to pass to imdct_half -+ mov P_SB, a2 @ and keep a copy for ourselves -+ bic J_WRAP, lr, #63 @ mangled to make testing for wrap easier in inner loop -+ sub lr, lr, #32 -+ and lr, lr, #512-32 -+ str lr, [P_SB_OFF] @ rotate offset, modulo buffer size, ready for next call -+ ldr a3, [sp, #(16+6+2)*4] @ fetch in from stack, to pass to imdct_half -+VFP vmov s16, SCALE @ imdct_half is free to corrupt s0, but it contains one of our arguments in hardfp case -+ bl ff_imdct_half_c -+VFP vmov SCALE, s16 -+ -+ vmrs OLDFPSCR, FPSCR -+ ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 -+ vmsr FPSCR, lr -+ ldr P_SB2_DN, [sp, #16*4] -+ ldr P_WIN_DN, [sp, #(16+6+0)*4] -+ ldr P_OUT_DN, [sp, #(16+6+1)*4] -+NOVFP vldr SCALE, [sp, #(16+6+3)*4] -+ -+#define IMM_OFF_SKEW 956 /* also valid immediate constant when you add 16*4 */ -+ add P_SB, P_SB, #IMM_OFF_SKEW @ so we can use -ve offsets to use full immediate offset range -+ add P_SB2_UP, P_SB2_DN, #16*4 -+ add P_WIN_UP, P_WIN_DN, #16*4+IMM_OFF_SKEW -+ add P_OUT_UP, P_OUT_DN, #16*4 -+ add P_SB2_DN, P_SB2_DN, #16*4 -+ add P_WIN_DN, P_WIN_DN, #12*4+IMM_OFF_SKEW -+ add P_OUT_DN, P_OUT_DN, #16*4 -+ mov I, #4 -+1: -+ vldmia P_SB2_UP!, {VB0-VB3} -+ vldmdb P_SB2_DN!, {VA0-VA3} -+ .set J, 512 - 64 -+ .set OFFSET, -IMM_OFF_SKEW -+ inner_loop ab,, head -+ .rept 7 -+ inner_loop ab, tail, head -+ .endr -+ inner_loop ab, tail -+ add P_WIN_UP, P_WIN_UP, #4*4 -+ sub P_WIN_DN, P_WIN_DN, #4*4 -+ vmul.f VB0, VB0, SCALE @ SCALE treated as scalar -+ add P_SB, P_SB, #(512+4)*4 -+ subs I, I, #1 -+ vmul.f VA0, VA0, SCALE -+ vstmia P_OUT_UP!, {VB0-VB3} -+ vstmdb P_OUT_DN!, {VA0-VA3} -+ bne 1b -+ -+ add P_SB2_DN, P_SB2_DN, #(16+28-12)*4 -+ sub P_SB2_UP, P_SB2_UP, #(16+16)*4 -+ add P_WIN_DN, P_WIN_DN, #(32+16+28-12)*4 -+ mov I, #4 -+1: -+ vldr.d d4, zero @ d4 = VC0 -+ vldr.d d5, zero -+ vldr.d d6, zero @ d6 = VD0 -+ vldr.d d7, zero -+ .set J, 512 - 64 -+ .set OFFSET, -IMM_OFF_SKEW -+ inner_loop cd,, head -+ .rept 7 -+ inner_loop cd, tail, head -+ .endr -+ inner_loop cd, tail -+ add P_WIN_UP, P_WIN_UP, #4*4 -+ sub P_WIN_DN, P_WIN_DN, #4*4 -+ add P_SB, P_SB, #(512+4)*4 -+ subs I, I, #1 -+ vstmia P_SB2_UP!, {VC0-VC3} -+ vstmdb P_SB2_DN!, {VD0-VD3} -+ bne 1b -+ -+ vmsr FPSCR, OLDFPSCR -+ vpop {s16-s31} -+ pop {r3-r7,pc} -+endfunc -+ -+ .align 3 -+zero: .word 0, 0 --- -1.8.1.6 - - -From 36ddeb1bc2f84c42ea29333444efad04f82a0f92 Mon Sep 17 00:00:00 2001 -From: Ben Avison -Date: Tue, 25 Jun 2013 17:22:50 +0100 -Subject: [PATCH 2/6] 1st version of ff_int32_to_float_fmul_scalar_vfp - ---- - libavcodec/arm/fmtconvert_init_arm.c | 11 +++++-- - libavcodec/arm/fmtconvert_vfp.S | 38 +++++++++++++++++++++++++ - 2 files changed, 47 insertions(+), 2 deletions(-) - -diff --git a/libavcodec/arm/fmtconvert_init_arm.c b/libavcodec/arm/fmtconvert_init_arm.c -index 1d99c97..fc32bdd 100644 ---- a/libavcodec/arm/fmtconvert_init_arm.c -+++ b/libavcodec/arm/fmtconvert_init_arm.c -@@ -31,14 +31,21 @@ void ff_int32_to_float_fmul_scalar_neon(float *dst, const int *src, - void ff_float_to_int16_neon(int16_t *dst, const float *src, long len); - void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int); - -+void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int *src, -+ float mul, int len); -+ - void ff_float_to_int16_vfp(int16_t *dst, const float *src, long len); - - av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx) - { - int cpu_flags = av_get_cpu_flags(); - -- if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) { -- c->float_to_int16 = ff_float_to_int16_vfp; -+ if (have_vfp(cpu_flags)) { -+ c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp; -+ -+ if (have_armv6(cpu_flags)) { -+ c->float_to_int16 = ff_float_to_int16_vfp; -+ } - } - - if (have_neon(cpu_flags)) { -diff --git a/libavcodec/arm/fmtconvert_vfp.S b/libavcodec/arm/fmtconvert_vfp.S -index 7b012bc..817ce49 100644 ---- a/libavcodec/arm/fmtconvert_vfp.S -+++ b/libavcodec/arm/fmtconvert_vfp.S -@@ -1,5 +1,6 @@ - /* - * Copyright (c) 2008 Siarhei Siamashka -+ * Copyright (c) 2013 RISC OS Open Ltd - * - * This file is part of FFmpeg. - * -@@ -76,3 +77,40 @@ function ff_float_to_int16_vfp, export=1 - vpop {d8-d11} - pop {r4-r8,pc} - endfunc -+ -+/** -+ * ARM VFP optimised int32 to float conversion. -+ * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned -+ * (16 bytes alignment is best for BCM2835), little-endian. -+ */ -+@ void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int *src, float mul, int len) -+function ff_int32_to_float_fmul_scalar_vfp, export=1 -+VFP tmp .req a4 -+VFP len .req a3 -+NOVFP tmp .req a3 -+NOVFP len .req a4 -+NOVFP vmov s0, a3 -+ ldr tmp, =0x03070000 @ RunFast mode, short vectors of length 8, stride 1 -+ vmrs ip, FPSCR -+ vmsr FPSCR, tmp -+1: -+ vldmia a2!, {s8-s15} -+ vcvt.f32.s32 s8, s8 -+ vcvt.f32.s32 s9, s9 -+ vcvt.f32.s32 s10, s10 -+ vcvt.f32.s32 s11, s11 -+ vcvt.f32.s32 s12, s12 -+ vcvt.f32.s32 s13, s13 -+ vcvt.f32.s32 s14, s14 -+ vcvt.f32.s32 s15, s15 -+ vmul.f32 s8, s8, s0 -+ subs len, len, #8 -+ vstmia a1!, {s8-s11} -+ vstmia a1!, {s12-s15} -+ bne 1b -+ -+ vmsr FPSCR, ip -+ bx lr -+endfunc -+ .unreq tmp -+ .unreq len --- -1.8.1.6 - - -From 1e6f32e2f6330bfbf8ae661069eb3ce1cb1b33d3 Mon Sep 17 00:00:00 2001 -From: Ben Avison -Date: Wed, 26 Jun 2013 00:49:15 +0100 -Subject: [PATCH 3/6] 2nd version of fmul_scalar - ---- - libavcodec/arm/fmtconvert_init_arm.c | 5 + - libavcodec/arm/fmtconvert_vfp.S | 162 ++++++++++++++++++++++++ - libavcodec/dcadec.c | 23 ++-- - libavcodec/fmtconvert.c | 7 + - libavcodec/fmtconvert.h | 14 ++ - 5 files changed, 203 insertions(+), 8 deletions(-) - -diff --git a/libavcodec/arm/fmtconvert_init_arm.c b/libavcodec/arm/fmtconvert_init_arm.c -index fc32bdd..0a71417 100644 ---- a/libavcodec/arm/fmtconvert_init_arm.c -+++ b/libavcodec/arm/fmtconvert_init_arm.c -@@ -33,6 +33,8 @@ void ff_int32_to_float_fmul_scalar_neon(float *dst, const int *src, - - void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int *src, - float mul, int len); -+void ff_int32_to_float_fmul_scalar_array_vfp(FmtConvertContext *c, float *dst, const int *src, -+ float *mul, int len); - - void ff_float_to_int16_vfp(int16_t *dst, const float *src, long len); - -@@ -43,6 +45,9 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx - if (have_vfp(cpu_flags)) { - c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp; - -+ if (!have_neon(cpu_flags)) { -+ c->int32_to_float_fmul_scalar_array = ff_int32_to_float_fmul_scalar_array_vfp; -+ } - if (have_armv6(cpu_flags)) { - c->float_to_int16 = ff_float_to_int16_vfp; - } -diff --git a/libavcodec/arm/fmtconvert_vfp.S b/libavcodec/arm/fmtconvert_vfp.S -index 817ce49..ae7a43c 100644 ---- a/libavcodec/arm/fmtconvert_vfp.S -+++ b/libavcodec/arm/fmtconvert_vfp.S -@@ -83,6 +83,168 @@ endfunc - * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned - * (16 bytes alignment is best for BCM2835), little-endian. - */ -+@ void ff_int32_to_float_fmul_scalar_array_vfp(if (FmtConvertContext *c, float *dst, const int *src, float *mul, int len) -+function ff_int32_to_float_fmul_scalar_array_vfp, export=1 -+ push {lr} -+ ldr a1, [sp, #4] -+ subs lr, a1, #3*8 -+ bcc 50f @ too short to pipeline -+ @ Now need to find (len / 8) % 3. The approximation -+ @ x / 24 = (x * 0xAB) >> 12 -+ @ is good for x < 4096, which is true for both AC3 and DCA. -+ mov a1, #0xAB -+ ldr ip, =0x03070000 @ RunFast mode, short vectors of length 8, stride 1 -+ mul a1, lr, a1 -+ vpush {s16-s31} -+ mov a1, a1, lsr #12 -+ add a1, a1, a1, lsl #1 -+ rsb a1, a1, lr, lsr #3 -+ cmp a1, #1 -+ vmrs a1, FPSCR -+ vmsr FPSCR, ip -+ beq 11f -+ blo 10f -+ @ Array is (2 + multiple of 3) x 8 floats long -+ @ drop through... -+ vldmia a3!, {s16-s23} -+ vldmia a4!, {s2,s3} -+ vldmia a3!, {s24-s31} -+ vcvt.f32.s32 s16, s16 -+ vcvt.f32.s32 s17, s17 -+ vcvt.f32.s32 s18, s18 -+ vcvt.f32.s32 s19, s19 -+ vcvt.f32.s32 s20, s20 -+ vcvt.f32.s32 s21, s21 -+ vcvt.f32.s32 s22, s22 -+ vcvt.f32.s32 s23, s23 -+ vmul.f32 s16, s16, s2 -+ @ drop through... -+3: -+ vldmia a3!, {s8-s15} -+ vldmia a4!, {s1} -+ vcvt.f32.s32 s24, s24 -+ vcvt.f32.s32 s25, s25 -+ vcvt.f32.s32 s26, s26 -+ vcvt.f32.s32 s27, s27 -+ vcvt.f32.s32 s28, s28 -+ vcvt.f32.s32 s29, s29 -+ vcvt.f32.s32 s30, s30 -+ vcvt.f32.s32 s31, s31 -+ vmul.f32 s24, s24, s3 -+ vstmia a2!, {s16-s19} -+ vstmia a2!, {s20-s23} -+2: -+ vldmia a3!, {s16-s23} -+ vldmia a4!, {s2} -+ vcvt.f32.s32 s8, s8 -+ vcvt.f32.s32 s9, s9 -+ vcvt.f32.s32 s10, s10 -+ vcvt.f32.s32 s11, s11 -+ vcvt.f32.s32 s12, s12 -+ vcvt.f32.s32 s13, s13 -+ vcvt.f32.s32 s14, s14 -+ vcvt.f32.s32 s15, s15 -+ vmul.f32 s8, s8, s1 -+ vstmia a2!, {s24-s27} -+ vstmia a2!, {s28-s31} -+1: -+ vldmia a3!, {s24-s31} -+ vldmia a4!, {s3} -+ vcvt.f32.s32 s16, s16 -+ vcvt.f32.s32 s17, s17 -+ vcvt.f32.s32 s18, s18 -+ vcvt.f32.s32 s19, s19 -+ vcvt.f32.s32 s20, s20 -+ vcvt.f32.s32 s21, s21 -+ vcvt.f32.s32 s22, s22 -+ vcvt.f32.s32 s23, s23 -+ vmul.f32 s16, s16, s2 -+ vstmia a2!, {s8-s11} -+ vstmia a2!, {s12-s15} -+ -+ subs lr, lr, #8*3 -+ bpl 3b -+ -+ vcvt.f32.s32 s24, s24 -+ vcvt.f32.s32 s25, s25 -+ vcvt.f32.s32 s26, s26 -+ vcvt.f32.s32 s27, s27 -+ vcvt.f32.s32 s28, s28 -+ vcvt.f32.s32 s29, s29 -+ vcvt.f32.s32 s30, s30 -+ vcvt.f32.s32 s31, s31 -+ vmul.f32 s24, s24, s3 -+ vstmia a2!, {s16-s19} -+ vstmia a2!, {s20-s23} -+ vstmia a2!, {s24-s27} -+ vstmia a2!, {s28-s31} -+ -+ vmsr FPSCR, a1 -+ vpop {s16-s31} -+ pop {pc} -+ -+10: @ Array is (multiple of 3) x 8 floats long -+ vldmia a3!, {s8-s15} -+ vldmia a4!, {s1,s2} -+ vldmia a3!, {s16-s23} -+ vcvt.f32.s32 s8, s8 -+ vcvt.f32.s32 s9, s9 -+ vcvt.f32.s32 s10, s10 -+ vcvt.f32.s32 s11, s11 -+ vcvt.f32.s32 s12, s12 -+ vcvt.f32.s32 s13, s13 -+ vcvt.f32.s32 s14, s14 -+ vcvt.f32.s32 s15, s15 -+ vmul.f32 s8, s8, s1 -+ b 1b -+ -+11: @ Array is (1 + multiple of 3) x 8 floats long -+ vldmia a3!, {s24-s31} -+ vldmia a4!, {s3} -+ vldmia a3!, {s8-s15} -+ vldmia a4!, {s1} -+ vcvt.f32.s32 s24, s24 -+ vcvt.f32.s32 s25, s25 -+ vcvt.f32.s32 s26, s26 -+ vcvt.f32.s32 s27, s27 -+ vcvt.f32.s32 s28, s28 -+ vcvt.f32.s32 s29, s29 -+ vcvt.f32.s32 s30, s30 -+ vcvt.f32.s32 s31, s31 -+ vmul.f32 s24, s24, s3 -+ b 2b -+ -+50: -+ ldr lr, =0x03070000 @ RunFast mode, short vectors of length 8, stride 1 -+ vmrs ip, FPSCR -+ vmsr FPSCR, lr -+51: -+ vldmia a3!, {s8-s15} -+ vldmia a4!, {s0} -+ vcvt.f32.s32 s8, s8 -+ vcvt.f32.s32 s9, s9 -+ vcvt.f32.s32 s10, s10 -+ vcvt.f32.s32 s11, s11 -+ vcvt.f32.s32 s12, s12 -+ vcvt.f32.s32 s13, s13 -+ vcvt.f32.s32 s14, s14 -+ vcvt.f32.s32 s15, s15 -+ vmul.f32 s8, s8, s0 -+ subs a1, a1, #8 -+ vstmia a2!, {s8-s11} -+ vstmia a2!, {s12-s15} -+ bne 51b -+ -+ vmsr FPSCR, ip -+ pop {pc} -+endfunc -+ -+/** -+ * ARM VFP optimised int32 to float conversion. -+ * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned -+ * (16 bytes alignment is best for BCM2835), little-endian. -+ * TODO: could be further optimised by unrolling and interleaving, as above -+ */ - @ void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int *src, float mul, int len) - function ff_int32_to_float_fmul_scalar_vfp, export=1 - VFP tmp .req a4 -diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c -index 1b955e4..fe568ee 100644 ---- a/libavcodec/dcadec.c -+++ b/libavcodec/dcadec.c -@@ -1302,7 +1302,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index) - - /* FIXME */ - float (*subband_samples)[DCA_SUBBANDS][8] = s->subband_samples[block_index]; -- LOCAL_ALIGNED_16(int, block, [8]); -+ LOCAL_ALIGNED_16(int, block, [8 * DCA_SUBBANDS]); - - /* - * Audio data -@@ -1315,6 +1315,8 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index) - quant_step_table = lossy_quant_d; - - for (k = base_channel; k < s->prim_channels; k++) { -+ float rscale[DCA_SUBBANDS]; -+ - if (get_bits_left(&s->gb) < 0) - return AVERROR_INVALIDDATA; - -@@ -1337,11 +1339,12 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index) - * Extract bits from the bit stream - */ - if (!abits) { -- memset(subband_samples[k][l], 0, 8 * sizeof(subband_samples[0][0][0])); -+ rscale[l] = 0; -+ memset(block + 8 * l, 0, 8 * sizeof(block[0])); - } else { - /* Deal with transients */ - int sfi = s->transition_mode[k][l] && subsubframe >= s->transition_mode[k][l]; -- float rscale = quant_step_size * s->scale_factor[k][l][sfi] * -+ rscale[l] = quant_step_size * s->scale_factor[k][l][sfi] * - s->scalefactor_adj[k][sel]; - - if (abits >= 11 || !dca_smpl_bitalloc[abits].vlc[sel].table) { -@@ -1355,7 +1358,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index) - block_code1 = get_bits(&s->gb, size); - block_code2 = get_bits(&s->gb, size); - err = decode_blockcodes(block_code1, block_code2, -- levels, block); -+ levels, block + 8 * l); - if (err) { - av_log(s->avctx, AV_LOG_ERROR, - "ERROR: block code look-up failed\n"); -@@ -1364,19 +1367,23 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index) - } else { - /* no coding */ - for (m = 0; m < 8; m++) -- block[m] = get_sbits(&s->gb, abits - 3); -+ block[8 * l + m] = get_sbits(&s->gb, abits - 3); - } - } else { - /* Huffman coded */ - for (m = 0; m < 8; m++) -- block[m] = get_bitalloc(&s->gb, -+ block[8 * l + m] = get_bitalloc(&s->gb, - &dca_smpl_bitalloc[abits], sel); - } - -- s->fmt_conv.int32_to_float_fmul_scalar(subband_samples[k][l], -- block, rscale, 8); - } -+ } - -+ s->fmt_conv.int32_to_float_fmul_scalar_array(&s->fmt_conv, subband_samples[k][0], -+ block, rscale, 8 * s->vq_start_subband[k]); -+ -+ for (l = 0; l < s->vq_start_subband[k]; l++) { -+ int m; - /* - * Inverse ADPCM if in prediction mode - */ -diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c -index 79e9645..0fb2436 100644 ---- a/libavcodec/fmtconvert.c -+++ b/libavcodec/fmtconvert.c -@@ -30,6 +30,12 @@ static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, - dst[i] = src[i] * mul; - } - -+static void int32_to_float_fmul_scalar_array_c(FmtConvertContext *c, float *dst, const int *src, float *mul, int len){ -+ int i; -+ for(i=0; iint32_to_float_fmul_scalar(dst, src, *mul++, 8); -+} -+ - static av_always_inline int float_to_int16_one(const float *src){ - return av_clip_int16(lrintf(*src)); - } -@@ -79,6 +85,7 @@ void ff_float_interleave_c(float *dst, const float **src, unsigned int len, - av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx) - { - c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c; -+ c->int32_to_float_fmul_scalar_array = int32_to_float_fmul_scalar_array_c; - c->float_to_int16 = float_to_int16_c; - c->float_to_int16_interleave = float_to_int16_interleave_c; - c->float_interleave = ff_float_interleave_c; -diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h -index 3fb9f4e..f5768c2 100644 ---- a/libavcodec/fmtconvert.h -+++ b/libavcodec/fmtconvert.h -@@ -38,6 +38,20 @@ - void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len); - - /** -+ * Convert an array of int32_t to float and multiply by a float value from another array, -+ * stepping along the float array once for each 8 integers. -+ * @param c pointer to FmtConvertContext. -+ * @param dst destination array of float. -+ * constraints: 16-byte aligned -+ * @param src source array of int32_t. -+ * constraints: 16-byte aligned -+ * @param mul source array of float multipliers. -+ * @param len number of elements to convert. -+ * constraints: multiple of 8 -+ */ -+ void (*int32_to_float_fmul_scalar_array)(struct FmtConvertContext *c, float *dst, const int *src, float *mul, int len); -+ -+ /** - * Convert an array of float to an array of int16_t. - * - * Convert floats from in the range [-32768.0,32767.0] to ints --- -1.8.1.6 - - -From e8d7a9e5e58b9dd5b57713c0ce860b51e19b62a0 Mon Sep 17 00:00:00 2001 -From: Ben Avison -Date: Thu, 27 Jun 2013 23:11:44 +0100 -Subject: [PATCH 4/6] Add VFP-accelerated version of imdct_half - ---- - libavcodec/arm/Makefile | 1 + - libavcodec/arm/fft_init_arm.c | 6 + - libavcodec/arm/mdct_vfp.S | 193 +++++++++++++++++++++++++++ - libavcodec/arm/synth_filter_vfp.S | 2 +- - 4 files changed, 201 insertions(+), 1 deletion(-) - create mode 100644 libavcodec/arm/mdct_vfp.S - -diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile -index 9079270..457e9a8 100644 ---- a/libavcodec/arm/Makefile -+++ b/libavcodec/arm/Makefile -@@ -59,6 +59,7 @@ ARMV6-OBJS += arm/dsputil_init_armv6.o \ - arm/simple_idct_armv6.o \ - - VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o \ -+ arm/mdct_vfp.o \ - arm/synth_filter_vfp.o - - NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ -diff --git a/libavcodec/arm/fft_init_arm.c b/libavcodec/arm/fft_init_arm.c -index 44c811d..131c6c4 100644 ---- a/libavcodec/arm/fft_init_arm.c -+++ b/libavcodec/arm/fft_init_arm.c -@@ -26,6 +26,8 @@ - void ff_fft_permute_neon(FFTContext *s, FFTComplex *z); - void ff_fft_calc_neon(FFTContext *s, FFTComplex *z); - -+void ff_imdct_half_vfp(FFTContext *s, FFTSample *output, const FFTSample *input); -+ - void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input); - void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input); - void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input); -@@ -48,6 +50,10 @@ av_cold void ff_fft_init_arm(FFTContext *s) - { - int cpu_flags = av_get_cpu_flags(); - -+ if (have_vfp(cpu_flags)) { -+ s->imdct_half = ff_imdct_half_vfp; -+ } -+ - if (have_neon(cpu_flags)) { - #if CONFIG_FFT - s->fft_permute = ff_fft_permute_neon; -diff --git a/libavcodec/arm/mdct_vfp.S b/libavcodec/arm/mdct_vfp.S -new file mode 100644 -index 0000000..7d55e7d ---- /dev/null -+++ b/libavcodec/arm/mdct_vfp.S -@@ -0,0 +1,193 @@ -+/* -+ * Copyright (c) 2013 RISC OS Open Ltd -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Author: Ben Avison -+ */ -+ -+#include "libavutil/arm/asm.S" -+ -+CONTEXT .req a1 -+ORIGOUT .req a2 -+IN .req a3 -+OUT .req v1 -+REVTAB .req v2 -+TCOS .req v3 -+TSIN .req v4 -+OLDFPSCR .req v5 -+J0 .req a2 -+J1 .req a4 -+J2 .req ip -+J3 .req lr -+ -+.macro prerotation_innerloop -+ .set trig_lo, k -+ .set trig_hi, n4 - k - 2 -+ .set in_lo, trig_lo * 2 -+ .set in_hi, trig_hi * 2 -+ vldr d8, [TCOS, #trig_lo*4] @ s16,s17 -+ vldr d9, [TCOS, #trig_hi*4] @ s18,s19 -+ vldr s0, [IN, #in_hi*4 + 12] -+ vldr s1, [IN, #in_hi*4 + 4] -+ vldr s2, [IN, #in_lo*4 + 12] -+ vldr s3, [IN, #in_lo*4 + 4] -+ vmul.f s8, s0, s16 @ vector operation -+ vldr d10, [TSIN, #trig_lo*4] @ s20,s21 -+ vldr d11, [TSIN, #trig_hi*4] @ s22,s23 -+ vldr s4, [IN, #in_lo*4] -+ vldr s5, [IN, #in_lo*4 + 8] -+ vldr s6, [IN, #in_hi*4] -+ vldr s7, [IN, #in_hi*4 + 8] -+ ldr J0, [REVTAB, #trig_lo*2] -+ vmul.f s12, s0, s20 @ vector operation -+ ldr J2, [REVTAB, #trig_hi*2] -+ mov J1, J0, lsr #16 -+ and J0, J0, #255 @ halfword value will be < n4 -+ vmls.f s8, s4, s20 @ vector operation -+ mov J3, J2, lsr #16 -+ and J2, J2, #255 @ halfword value will be < n4 -+ add J0, OUT, J0, lsl #3 -+ vmla.f s12, s4, s16 @ vector operation -+ add J1, OUT, J1, lsl #3 -+ add J2, OUT, J2, lsl #3 -+ add J3, OUT, J3, lsl #3 -+ vstr s8, [J0] -+ vstr s9, [J1] -+ vstr s10, [J2] -+ vstr s11, [J3] -+ vstr s12, [J0, #4] -+ vstr s13, [J1, #4] -+ vstr s14, [J2, #4] -+ vstr s15, [J3, #4] -+ .set k, k + 2 -+.endm -+ -+.macro postrotation_innerloop tail, head -+ .set trig_lo_head, n8 - k - 2 -+ .set trig_hi_head, n8 + k -+ .set out_lo_head, trig_lo_head * 2 -+ .set out_hi_head, trig_hi_head * 2 -+ .set trig_lo_tail, n8 - (k - 2) - 2 -+ .set trig_hi_tail, n8 + (k - 2) -+ .set out_lo_tail, trig_lo_tail * 2 -+ .set out_hi_tail, trig_hi_tail * 2 -+ .if (k & 2) == 0 -+ TCOS_D0_HEAD .req d10 @ s20,s21 -+ TCOS_D1_HEAD .req d11 @ s22,s23 -+ TCOS_S0_TAIL .req s24 -+ .else -+ TCOS_D0_HEAD .req d12 @ s24,s25 -+ TCOS_D1_HEAD .req d13 @ s26,s27 -+ TCOS_S0_TAIL .req s20 -+ .endif -+ .ifnc "\tail","" -+ vmls.f s8, s0, TCOS_S0_TAIL @ vector operation -+ .endif -+ .ifnc "\head","" -+ vldr d8, [TSIN, #trig_lo_head*4] @ s16,s17 -+ vldr d9, [TSIN, #trig_hi_head*4] @ s18,s19 -+ vldr TCOS_D0_HEAD, [TCOS, #trig_lo_head*4] -+ .endif -+ .ifnc "\tail","" -+ vmla.f s12, s4, TCOS_S0_TAIL @ vector operation -+ .endif -+ .ifnc "\head","" -+ vldr s0, [OUT, #out_lo_head*4] -+ vldr s1, [OUT, #out_lo_head*4 + 8] -+ vldr s2, [OUT, #out_hi_head*4] -+ vldr s3, [OUT, #out_hi_head*4 + 8] -+ vldr s4, [OUT, #out_lo_head*4 + 4] -+ vldr s5, [OUT, #out_lo_head*4 + 12] -+ vldr s6, [OUT, #out_hi_head*4 + 4] -+ vldr s7, [OUT, #out_hi_head*4 + 12] -+ .endif -+ .ifnc "\tail","" -+ vstr s8, [OUT, #out_lo_tail*4] -+ vstr s9, [OUT, #out_lo_tail*4 + 8] -+ vstr s10, [OUT, #out_hi_tail*4] -+ vstr s11, [OUT, #out_hi_tail*4 + 8] -+ .endif -+ .ifnc "\head","" -+ vmul.f s8, s4, s16 @ vector operation -+ .endif -+ .ifnc "\tail","" -+ vstr s12, [OUT, #out_hi_tail*4 + 12] -+ vstr s13, [OUT, #out_hi_tail*4 + 4] -+ vstr s14, [OUT, #out_lo_tail*4 + 12] -+ vstr s15, [OUT, #out_lo_tail*4 + 4] -+ .endif -+ .ifnc "\head","" -+ vmul.f s12, s0, s16 @ vector operation -+ vldr TCOS_D1_HEAD, [TCOS, #trig_hi_head*4] -+ .endif -+ .unreq TCOS_D0_HEAD -+ .unreq TCOS_D1_HEAD -+ .unreq TCOS_S0_TAIL -+ .ifnc "\head","" -+ .set k, k + 2 -+ .endif -+.endm -+ -+ -+/* void ff_imdct_half_vfp(FFTContext *s, -+ * FFTSample *output, -+ * const FFTSample *input) -+ */ -+function ff_imdct_half_vfp, export=1 -+ ldr ip, [CONTEXT, #5*4] @ mdct_bits -+ teq ip, #6 -+ bne ff_imdct_half_c @ only case currently accelerated is the one used by DCA -+ -+ .set n, 1<<6 -+ .set n2, n/2 -+ .set n4, n/4 -+ .set n8, n/8 -+ -+ push {v1-v5,lr} -+ vpush {s16-s27} -+ vmrs OLDFPSCR, FPSCR -+ ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 -+ vmsr FPSCR, lr -+ mov OUT, ORIGOUT -+ ldr REVTAB, [CONTEXT, #2*4] -+ ldr TCOS, [CONTEXT, #6*4] -+ ldr TSIN, [CONTEXT, #7*4] -+ -+ .set k, 0 -+ .rept n8/2 -+ prerotation_innerloop -+ .endr -+ -+ vmsr FPSCR, OLDFPSCR -+ mov ORIGOUT, OUT -+ ldr ip, [CONTEXT, #9*4] -+ blx ip @ s->fft_calc(s, output) -+ ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 -+ vmsr FPSCR, lr -+ -+ .set k, 0 -+ postrotation_innerloop , head -+ .rept n8/2 - 1 -+ postrotation_innerloop tail, head -+ .endr -+ postrotation_innerloop tail -+ -+ vmsr FPSCR, OLDFPSCR -+ vpop {s16-s27} -+ pop {v1-v5,pc} -+endfunc -diff --git a/libavcodec/arm/synth_filter_vfp.S b/libavcodec/arm/synth_filter_vfp.S -index 451fe5c..f5845fb 100644 ---- a/libavcodec/arm/synth_filter_vfp.S -+++ b/libavcodec/arm/synth_filter_vfp.S -@@ -133,7 +133,7 @@ function ff_synth_filter_float_vfp, export=1 - str lr, [P_SB_OFF] @ rotate offset, modulo buffer size, ready for next call - ldr a3, [sp, #(16+6+2)*4] @ fetch in from stack, to pass to imdct_half - VFP vmov s16, SCALE @ imdct_half is free to corrupt s0, but it contains one of our arguments in hardfp case -- bl ff_imdct_half_c -+ bl ff_imdct_half_vfp - VFP vmov SCALE, s16 - - vmrs OLDFPSCR, FPSCR --- -1.8.1.6 - - -From b11427a8aa2ea581a8a8a28bf8e5847e42451f26 Mon Sep 17 00:00:00 2001 -From: Ben Avison -Date: Fri, 28 Jun 2013 21:21:06 +0100 -Subject: [PATCH 5/6] Add VFP_accelerated version of dca_lfe_fir - ---- - libavcodec/arm/Makefile | 3 +- - libavcodec/arm/dcadsp_init_arm.c | 4 + - libavcodec/arm/dcadsp_vfp.S | 189 ++++++++++++++++++++++++++++ - 3 files changed, 195 insertions(+), 1 deletion(-) - create mode 100644 libavcodec/arm/dcadsp_vfp.S - -diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile -index 457e9a8..8538276 100644 ---- a/libavcodec/arm/Makefile -+++ b/libavcodec/arm/Makefile -@@ -58,7 +58,8 @@ ARMV6-OBJS += arm/dsputil_init_armv6.o \ - arm/dsputil_armv6.o \ - arm/simple_idct_armv6.o \ - --VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o \ -+VFP-OBJS-$(HAVE_ARMV6) += arm/dcadsp_vfp.o \ -+ arm/fmtconvert_vfp.o \ - arm/mdct_vfp.o \ - arm/synth_filter_vfp.o - -diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c -index 56568e0..9406b86 100644 ---- a/libavcodec/arm/dcadsp_init_arm.c -+++ b/libavcodec/arm/dcadsp_init_arm.c -@@ -24,6 +24,8 @@ - #include "libavutil/attributes.h" - #include "libavcodec/dcadsp.h" - -+void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs, -+ int decifactor, float scale); - void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs, - int decifactor, float scale); - -@@ -31,6 +33,8 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s) - { - int cpu_flags = av_get_cpu_flags(); - -+ if (have_vfp(cpu_flags)) -+ s->lfe_fir = ff_dca_lfe_fir_vfp; - if (have_neon(cpu_flags)) - s->lfe_fir = ff_dca_lfe_fir_neon; - } -diff --git a/libavcodec/arm/dcadsp_vfp.S b/libavcodec/arm/dcadsp_vfp.S -new file mode 100644 -index 0000000..a479831 ---- /dev/null -+++ b/libavcodec/arm/dcadsp_vfp.S -@@ -0,0 +1,189 @@ -+/* -+ * Copyright (c) 2013 RISC OS Open Ltd -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Author: Ben Avison -+ */ -+ -+#include "libavutil/arm/asm.S" -+ -+POUT .req a1 -+PIN .req a2 -+PCOEF .req a3 -+DECIFACTOR .req a4 -+OLDFPSCR .req a4 -+COUNTER .req ip -+ -+SCALE32 .req s28 @ use vector of 4 in place of 9th scalar when decifactor=32 / JMAX=8 -+SCALE64 .req s0 @ spare register in scalar bank when decifactor=64 / JMAX=4 -+IN0 .req s4 -+IN1 .req s5 -+IN2 .req s6 -+IN3 .req s7 -+IN4 .req s0 -+IN5 .req s1 -+IN6 .req s2 -+IN7 .req s3 -+COEF0 .req s8 @ coefficient elements -+COEF1 .req s9 -+COEF2 .req s10 -+COEF3 .req s11 -+COEF4 .req s12 -+COEF5 .req s13 -+COEF6 .req s14 -+COEF7 .req s15 -+ACCUM0 .req s16 @ double-buffered multiply-accumulate results -+ACCUM4 .req s20 -+POST0 .req s24 @ do long-latency post-multiply in this vector in parallel -+POST1 .req s25 -+POST2 .req s26 -+POST3 .req s27 -+ -+ -+.macro inner_loop decifactor, dir, tail, head -+ .ifc "\dir","up" -+ .set X, 0 -+ .set Y, 4 -+ .else -+ .set X, 4*JMAX*4 - 4 -+ .set Y, -4 -+ .endif -+ .ifnc "\head","" -+ vldr COEF0, [PCOEF, #X + (0*JMAX + 0) * Y] -+ vldr COEF1, [PCOEF, #X + (1*JMAX + 0) * Y] -+ vldr COEF2, [PCOEF, #X + (2*JMAX + 0) * Y] -+ vldr COEF3, [PCOEF, #X + (3*JMAX + 0) * Y] -+ .endif -+ .ifnc "\tail","" -+ vadd.f POST0, ACCUM0, ACCUM4 @ vector operation -+ .endif -+ .ifnc "\head","" -+ vmul.f ACCUM0, COEF0, IN0 @ vector = vector * scalar -+ vldr COEF4, [PCOEF, #X + (0*JMAX + 1) * Y] -+ vldr COEF5, [PCOEF, #X + (1*JMAX + 1) * Y] -+ vldr COEF6, [PCOEF, #X + (2*JMAX + 1) * Y] -+ .endif -+ .ifnc "\tail","" -+ vmul.f POST0, POST0, SCALE\decifactor @ vector operation (SCALE may be scalar) -+ .endif -+ .ifnc "\head","" -+ vldr COEF7, [PCOEF, #X + (3*JMAX + 1) * Y] -+ .ifc "\tail","" -+ vmul.f ACCUM4, COEF4, IN1 @ vector operation -+ .endif -+ vldr COEF0, [PCOEF, #X + (0*JMAX + 2) * Y] -+ vldr COEF1, [PCOEF, #X + (1*JMAX + 2) * Y] -+ .ifnc "\tail","" -+ vmul.f ACCUM4, COEF4, IN1 @ vector operation -+ .endif -+ vldr COEF2, [PCOEF, #X + (2*JMAX + 2) * Y] -+ vldr COEF3, [PCOEF, #X + (3*JMAX + 2) * Y] -+ .endif -+ .ifnc "\tail","" -+ vstmia POUT!, {POST0-POST3} -+ .endif -+ .ifnc "\head","" -+ vmla.f ACCUM0, COEF0, IN2 @ vector = vector * scalar -+ vldr COEF4, [PCOEF, #X + (0*JMAX + 3) * Y] -+ vldr COEF5, [PCOEF, #X + (1*JMAX + 3) * Y] -+ vldr COEF6, [PCOEF, #X + (2*JMAX + 3) * Y] -+ vldr COEF7, [PCOEF, #X + (3*JMAX + 3) * Y] -+ vmla.f ACCUM4, COEF4, IN3 @ vector = vector * scalar -+ .if \decifactor == 32 -+ vldr COEF0, [PCOEF, #X + (0*JMAX + 4) * Y] -+ vldr COEF1, [PCOEF, #X + (1*JMAX + 4) * Y] -+ vldr COEF2, [PCOEF, #X + (2*JMAX + 4) * Y] -+ vldr COEF3, [PCOEF, #X + (3*JMAX + 4) * Y] -+ vmla.f ACCUM0, COEF0, IN4 @ vector = vector * scalar -+ vldr COEF4, [PCOEF, #X + (0*JMAX + 5) * Y] -+ vldr COEF5, [PCOEF, #X + (1*JMAX + 5) * Y] -+ vldr COEF6, [PCOEF, #X + (2*JMAX + 5) * Y] -+ vldr COEF7, [PCOEF, #X + (3*JMAX + 5) * Y] -+ vmla.f ACCUM4, COEF4, IN5 @ vector = vector * scalar -+ vldr COEF0, [PCOEF, #X + (0*JMAX + 6) * Y] -+ vldr COEF1, [PCOEF, #X + (1*JMAX + 6) * Y] -+ vldr COEF2, [PCOEF, #X + (2*JMAX + 6) * Y] -+ vldr COEF3, [PCOEF, #X + (3*JMAX + 6) * Y] -+ vmla.f ACCUM0, COEF0, IN6 @ vector = vector * scalar -+ vldr COEF4, [PCOEF, #X + (0*JMAX + 7) * Y] -+ vldr COEF5, [PCOEF, #X + (1*JMAX + 7) * Y] -+ vldr COEF6, [PCOEF, #X + (2*JMAX + 7) * Y] -+ vldr COEF7, [PCOEF, #X + (3*JMAX + 7) * Y] -+ vmla.f ACCUM4, COEF4, IN7 @ vector = vector * scalar -+ .endif -+ .endif -+.endm -+ -+.macro dca_lfe_fir decifactor -+ .if \decifactor == 32 -+ .set JMAX, 8 -+ vpush {s16-s31} -+ vmov SCALE32, s0 @ duplicate scalar across vector -+ vldr IN4, [PIN, #-4*4] -+ vldr IN5, [PIN, #-5*4] -+ vldr IN6, [PIN, #-6*4] -+ vldr IN7, [PIN, #-7*4] -+ .else -+ .set JMAX, 4 -+ vpush {s16-s27} -+ .endif -+ -+ mov COUNTER, #\decifactor/4 - 1 -+ inner_loop \decifactor, up,, head -+1: add PCOEF, PCOEF, #4*JMAX*4 -+ subs COUNTER, COUNTER, #1 -+ inner_loop \decifactor, up, tail, head -+ bne 1b -+ inner_loop \decifactor, up, tail -+ -+ mov COUNTER, #\decifactor/4 - 1 -+ inner_loop \decifactor, down,, head -+1: sub PCOEF, PCOEF, #4*JMAX*4 -+ subs COUNTER, COUNTER, #1 -+ inner_loop \decifactor, down, tail, head -+ bne 1b -+ inner_loop \decifactor, down, tail -+ -+ .if \decifactor == 32 -+ vpop {s16-s31} -+ .else -+ vpop {s16-s27} -+ .endif -+ vmsr FPSCR, OLDFPSCR -+ bx lr -+.endm -+ -+ -+/* void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs, -+ * int decifactor, float scale) -+ */ -+function ff_dca_lfe_fir_vfp, export=1 -+ teq DECIFACTOR, #32 -+ vmrs OLDFPSCR, FPSCR -+ ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 -+ vmsr FPSCR, ip -+NOVFP vldr s0, [sp] -+ vldr IN0, [PIN, #-0*4] -+ vldr IN1, [PIN, #-1*4] -+ vldr IN2, [PIN, #-2*4] -+ vldr IN3, [PIN, #-3*4] -+ beq 32f -+64: dca_lfe_fir 64 -+ .ltorg -+32: dca_lfe_fir 32 -+endfunc --- -1.8.1.6 - - -From 24b72b0117acebae215cb5abb997f68cd0fe1938 Mon Sep 17 00:00:00 2001 -From: Ben Avison -Date: Tue, 9 Jul 2013 17:44:50 +0100 -Subject: [PATCH 6/6] Add VFP-accelerated version of fft16 - ---- - libavcodec/arm/Makefile | 1 + - libavcodec/arm/fft_vfp.S | 299 +++++++++++++++++++++++++++++++++++ - libavcodec/arm/mdct_vfp.S | 5 +- - 3 files changed, 302 insertions(+), 3 deletions(-) - create mode 100644 libavcodec/arm/fft_vfp.S - -diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile -index 8538276..660d1d4 100644 ---- a/libavcodec/arm/Makefile -+++ b/libavcodec/arm/Makefile -@@ -59,6 +59,7 @@ ARMV6-OBJS += arm/dsputil_init_armv6.o \ - arm/simple_idct_armv6.o \ - - VFP-OBJS-$(HAVE_ARMV6) += arm/dcadsp_vfp.o \ -+ arm/fft_vfp.o \ - arm/fmtconvert_vfp.o \ - arm/mdct_vfp.o \ - arm/synth_filter_vfp.o -diff --git a/libavcodec/arm/fft_vfp.S b/libavcodec/arm/fft_vfp.S -new file mode 100644 -index 0000000..32ea0aa ---- /dev/null -+++ b/libavcodec/arm/fft_vfp.S -@@ -0,0 +1,299 @@ -+/* -+ * Copyright (c) 2013 RISC OS Open Ltd -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ * -+ * Author: Ben Avison -+ */ -+ -+#include "libavutil/arm/asm.S" -+ -+@ TODO: * FFTs wider than 16 -+@ * dispatch code -+ -+function fft4_vfp -+ vldr d0, [a1, #0*2*4] @ s0,s1 = z[0] -+ vldr d4, [a1, #1*2*4] @ s8,s9 = z[1] -+ vldr d1, [a1, #2*2*4] @ s2,s3 = z[2] -+ vldr d5, [a1, #3*2*4] @ s10,s11 = z[3] -+ @ stall -+ vadd.f s12, s0, s8 @ i0 -+ vadd.f s13, s1, s9 @ i1 -+ vadd.f s14, s2, s10 @ i2 -+ vadd.f s15, s3, s11 @ i3 -+ vsub.f s8, s0, s8 @ i4 -+ vsub.f s9, s1, s9 @ i5 -+ vsub.f s10, s2, s10 @ i6 -+ vsub.f s11, s3, s11 @ i7 -+ @ stall -+ @ stall -+ vadd.f s0, s12, s14 @ z[0].re -+ vsub.f s4, s12, s14 @ z[2].re -+ vadd.f s1, s13, s15 @ z[0].im -+ vsub.f s5, s13, s15 @ z[2].im -+ vadd.f s7, s9, s10 @ z[3].im -+ vsub.f s3, s9, s10 @ z[1].im -+ vadd.f s2, s8, s11 @ z[1].re -+ vsub.f s6, s8, s11 @ z[3].re -+ @ stall -+ @ stall -+ vstr d0, [a1, #0*2*4] -+ vstr d2, [a1, #2*2*4] -+ @ stall -+ @ stall -+ vstr d1, [a1, #1*2*4] -+ vstr d3, [a1, #3*2*4] -+ -+ bx lr -+endfunc -+ -+.macro macro_fft8_head -+ @ FFT4 -+ vldr d4, [a1, #0 * 2*4] -+ vldr d6, [a1, #1 * 2*4] -+ vldr d5, [a1, #2 * 2*4] -+ vldr d7, [a1, #3 * 2*4] -+ @ BF -+ vldr d12, [a1, #4 * 2*4] -+ vadd.f s16, s8, s12 @ vector op -+ vldr d14, [a1, #5 * 2*4] -+ vldr d13, [a1, #6 * 2*4] -+ vldr d15, [a1, #7 * 2*4] -+ vsub.f s20, s8, s12 @ vector op -+ vadd.f s0, s16, s18 -+ vsub.f s2, s16, s18 -+ vadd.f s1, s17, s19 -+ vsub.f s3, s17, s19 -+ vadd.f s7, s21, s22 -+ vsub.f s5, s21, s22 -+ vadd.f s4, s20, s23 -+ vsub.f s6, s20, s23 -+ vsub.f s20, s24, s28 @ vector op -+ vstr d0, [a1, #0 * 2*4] @ transfer s0-s7 to s24-s31 via memory -+ vstr d1, [a1, #1 * 2*4] -+ vldr s0, cos1pi4 -+ vadd.f s16, s24, s28 @ vector op -+ vstr d2, [a1, #2 * 2*4] -+ vstr d3, [a1, #3 * 2*4] -+ vldr d12, [a1, #0 * 2*4] -+ @ TRANSFORM -+ vmul.f s20, s20, s0 @ vector x scalar op -+ vldr d13, [a1, #1 * 2*4] -+ vldr d14, [a1, #2 * 2*4] -+ vldr d15, [a1, #3 * 2*4] -+ @ BUTTERFLIES -+ vadd.f s0, s18, s16 -+ vadd.f s1, s17, s19 -+ vsub.f s2, s17, s19 -+ vsub.f s3, s18, s16 -+ vadd.f s4, s21, s20 -+ vsub.f s5, s21, s20 -+ vadd.f s6, s22, s23 -+ vsub.f s7, s22, s23 -+ vadd.f s8, s0, s24 @ vector op -+ vstr d0, [a1, #0 * 2*4] @ transfer s0-s3 to s12-s15 via memory -+ vstr d1, [a1, #1 * 2*4] -+ vldr d6, [a1, #0 * 2*4] -+ vldr d7, [a1, #1 * 2*4] -+ vadd.f s1, s5, s6 -+ vadd.f s0, s7, s4 -+ vsub.f s2, s5, s6 -+ vsub.f s3, s7, s4 -+ vsub.f s12, s24, s12 @ vector op -+ vsub.f s5, s29, s1 -+ vsub.f s4, s28, s0 -+ vsub.f s6, s30, s2 -+ vsub.f s7, s31, s3 -+ vadd.f s16, s0, s28 @ vector op -+ vstr d6, [a1, #4 * 2*4] -+ vstr d7, [a1, #6 * 2*4] -+ vstr d4, [a1, #0 * 2*4] -+ vstr d5, [a1, #2 * 2*4] -+ vstr d2, [a1, #5 * 2*4] -+ vstr d3, [a1, #7 * 2*4] -+.endm -+ -+.macro macro_fft8_tail -+ vstr d8, [a1, #1 * 2*4] -+ vstr d9, [a1, #3 * 2*4] -+.endm -+ -+function fft8_vfp -+ ldr a3, =0x03030000 @ RunFast mode, vector length 4, stride 1 -+ vmrs a2, FPSCR -+ vmsr FPSCR, a3 -+ vpush {s16-s31} -+ -+ macro_fft8_head -+ macro_fft8_tail -+ -+ vpop {s16-s31} -+ vmsr FPSCR, a2 -+ bx lr -+endfunc -+ -+.align 3 -+cos1pi4: @ cos(1*pi/4) = sqrt(2) -+ .float 0.707106769084930419921875 -+cos1pi8: @ cos(1*pi/8) = sqrt(2+sqrt(2))/2 -+ .float 0.92387950420379638671875 -+cos3pi8: @ cos(2*pi/8) = sqrt(2-sqrt(2))/2 -+ .float 0.3826834261417388916015625 -+ -+function ff_fft16_vfp, export=1 -+ ldr a3, =0x03030000 @ RunFast mode, vector length 4, stride 1 -+ vmrs a2, FPSCR -+ vmsr FPSCR, a3 -+ vpush {s16-s31} -+ -+ macro_fft8_head -+ @ FFT4(z+8) -+ vldr d10, [a1, #8 * 2*4] -+ vldr d12, [a1, #9 * 2*4] -+ vldr d11, [a1, #10 * 2*4] -+ vldr d13, [a1, #11 * 2*4] -+ macro_fft8_tail -+ vadd.f s16, s20, s24 @ vector op -+ @ FFT4(z+12) -+ vldr d4, [a1, #12 * 2*4] -+ vldr d6, [a1, #13 * 2*4] -+ vldr d5, [a1, #14 * 2*4] -+ vsub.f s20, s20, s24 @ vector op -+ vldr d7, [a1, #15 * 2*4] -+ vadd.f s0, s16, s18 -+ vsub.f s4, s16, s18 -+ vadd.f s1, s17, s19 -+ vsub.f s5, s17, s19 -+ vadd.f s7, s21, s22 -+ vsub.f s3, s21, s22 -+ vadd.f s2, s20, s23 -+ vsub.f s6, s20, s23 -+ vadd.f s16, s8, s12 @ vector op -+ vstr d0, [a1, #8 * 2*4] -+ vstr d2, [a1, #10 * 2*4] -+ vstr d1, [a1, #9 * 2*4] -+ vsub.f s20, s8, s12 -+ vstr d3, [a1, #11 * 2*4] -+ @ TRANSFORM(z[2],z[6],z[10],z[14],cos1pi4,cos1pi4) -+ vldr d12, [a1, #10 * 2*4] -+ vadd.f s0, s16, s18 -+ vadd.f s1, s17, s19 -+ vsub.f s6, s16, s18 -+ vsub.f s7, s17, s19 -+ vsub.f s3, s21, s22 -+ vadd.f s2, s20, s23 -+ vadd.f s5, s21, s22 -+ vsub.f s4, s20, s23 -+ vstr d0, [a1, #12 * 2*4] -+ vmov s0, s6 -+ @ TRANSFORM(z[1],z[5],z[9],z[13],cos1pi8,cos3pi8) -+ vldr d6, [a1, #9 * 2*4] -+ vstr d1, [a1, #13 * 2*4] -+ vldr d1, cos1pi4 @ s2 = cos1pi4, s3 = cos1pi8 -+ vstr d2, [a1, #15 * 2*4] -+ vldr d7, [a1, #13 * 2*4] -+ vadd.f s4, s25, s24 -+ vsub.f s5, s25, s24 -+ vsub.f s6, s0, s7 -+ vadd.f s7, s0, s7 -+ vmul.f s20, s12, s3 @ vector op -+ @ TRANSFORM(z[3],z[7],z[11],z[15],cos3pi8,cos1pi8) -+ vldr d4, [a1, #11 * 2*4] -+ vldr d5, [a1, #15 * 2*4] -+ vldr s1, cos3pi8 -+ vmul.f s24, s4, s2 @ vector * scalar op -+ vmul.f s28, s12, s1 @ vector * scalar op -+ vmul.f s12, s8, s1 @ vector * scalar op -+ vadd.f s4, s20, s29 -+ vsub.f s5, s21, s28 -+ vsub.f s6, s22, s31 -+ vadd.f s7, s23, s30 -+ vmul.f s8, s8, s3 @ vector * scalar op -+ vldr d8, [a1, #1 * 2*4] -+ vldr d9, [a1, #5 * 2*4] -+ vldr d10, [a1, #3 * 2*4] -+ vldr d11, [a1, #7 * 2*4] -+ vldr d14, [a1, #2 * 2*4] -+ vadd.f s0, s6, s4 -+ vadd.f s1, s5, s7 -+ vsub.f s2, s5, s7 -+ vsub.f s3, s6, s4 -+ vadd.f s4, s12, s9 -+ vsub.f s5, s13, s8 -+ vsub.f s6, s14, s11 -+ vadd.f s7, s15, s10 -+ vadd.f s12, s0, s16 @ vector op -+ vstr d0, [a1, #1 * 2*4] -+ vstr d1, [a1, #5 * 2*4] -+ vldr d4, [a1, #1 * 2*4] -+ vldr d5, [a1, #5 * 2*4] -+ vadd.f s0, s6, s4 -+ vadd.f s1, s5, s7 -+ vsub.f s2, s5, s7 -+ vsub.f s3, s6, s4 -+ vsub.f s8, s16, s8 @ vector op -+ vstr d6, [a1, #1 * 2*4] -+ vstr d7, [a1, #5 * 2*4] -+ vldr d15, [a1, #6 * 2*4] -+ vsub.f s4, s20, s0 -+ vsub.f s5, s21, s1 -+ vsub.f s6, s22, s2 -+ vsub.f s7, s23, s3 -+ vadd.f s20, s0, s20 @ vector op -+ vstr d4, [a1, #9 * 2*4] -+ @ TRANSFORM_ZERO(z[0],z[4],z[8],z[12]) -+ vldr d6, [a1, #8 * 2*4] -+ vstr d5, [a1, #13 * 2*4] -+ vldr d7, [a1, #12 * 2*4] -+ vstr d2, [a1, #11 * 2*4] -+ vldr d8, [a1, #0 * 2*4] -+ vstr d3, [a1, #15 * 2*4] -+ vldr d9, [a1, #4 * 2*4] -+ vadd.f s0, s26, s24 -+ vadd.f s1, s25, s27 -+ vsub.f s2, s25, s27 -+ vsub.f s3, s26, s24 -+ vadd.f s4, s14, s12 -+ vadd.f s5, s13, s15 -+ vsub.f s6, s13, s15 -+ vsub.f s7, s14, s12 -+ vadd.f s8, s0, s28 @ vector op -+ vstr d0, [a1, #3 * 2*4] -+ vstr d1, [a1, #7 * 2*4] -+ vldr d6, [a1, #3 * 2*4] -+ vldr d7, [a1, #7 * 2*4] -+ vsub.f s0, s16, s4 -+ vsub.f s1, s17, s5 -+ vsub.f s2, s18, s6 -+ vsub.f s3, s19, s7 -+ vsub.f s12, s28, s12 @ vector op -+ vadd.f s16, s4, s16 @ vector op -+ vstr d10, [a1, #3 * 2*4] -+ vstr d11, [a1, #7 * 2*4] -+ vstr d4, [a1, #2 * 2*4] -+ vstr d5, [a1, #6 * 2*4] -+ vstr d0, [a1, #8 * 2*4] -+ vstr d1, [a1, #12 * 2*4] -+ vstr d6, [a1, #10 * 2*4] -+ vstr d7, [a1, #14 * 2*4] -+ vstr d8, [a1, #0 * 2*4] -+ vstr d9, [a1, #4 * 2*4] -+ -+ vpop {s16-s31} -+ vmsr FPSCR, a2 -+ bx lr -+endfunc -diff --git a/libavcodec/arm/mdct_vfp.S b/libavcodec/arm/mdct_vfp.S -index 7d55e7d..5374dd5 100644 ---- a/libavcodec/arm/mdct_vfp.S -+++ b/libavcodec/arm/mdct_vfp.S -@@ -174,9 +174,8 @@ function ff_imdct_half_vfp, export=1 - .endr - - vmsr FPSCR, OLDFPSCR -- mov ORIGOUT, OUT -- ldr ip, [CONTEXT, #9*4] -- blx ip @ s->fft_calc(s, output) -+ mov a1, OUT -+ bl ff_fft16_vfp - ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 - vmsr FPSCR, lr - --- -1.8.1.6 - diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0001-Support-raw-dvdsub-palette-as-stored-on-normal-dvd-s.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0001-Support-raw-dvdsub-palette-as-stored-on-normal-dvd-s.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0001-Support-raw-dvdsub-palette-as-stored-on-normal-dvd-s.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0001-Support-raw-dvdsub-palette-as-stored-on-normal-dvd-s.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0002-Change-fallthrough-logic-for-read_seek-to-be-based-o.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0002-Change-fallthrough-logic-for-read_seek-to-be-based-o.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0002-Change-fallthrough-logic-for-read_seek-to-be-based-o.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0002-Change-fallthrough-logic-for-read_seek-to-be-based-o.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0003-matroska-Check-return-value-of-avio_seek-and-avoid-m.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0003-matroska-Check-return-value-of-avio_seek-and-avoid-m.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0003-matroska-Check-return-value-of-avio_seek-and-avoid-m.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0003-matroska-Check-return-value-of-avio_seek-and-avoid-m.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0004-asf-hacks.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0004-asf-hacks.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0004-asf-hacks.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0004-asf-hacks.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0005-if-av_read_packet-returns-AVERROR_IO-we-are-done.-ff.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0005-if-av_read_packet-returns-AVERROR_IO-we-are-done.-ff.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0005-if-av_read_packet-returns-AVERROR_IO-we-are-done.-ff.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0005-if-av_read_packet-returns-AVERROR_IO-we-are-done.-ff.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0006-added-Ticket-7187-TV-Teletext-support-for-DVB-EBU-Te.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0006-added-Ticket-7187-TV-Teletext-support-for-DVB-EBU-Te.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0006-added-Ticket-7187-TV-Teletext-support-for-DVB-EBU-Te.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0006-added-Ticket-7187-TV-Teletext-support-for-DVB-EBU-Te.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0007-Don-t-accept-mpegts-PMT-that-isn-t-current.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0007-Don-t-accept-mpegts-PMT-that-isn-t-current.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0007-Don-t-accept-mpegts-PMT-that-isn-t-current.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0007-Don-t-accept-mpegts-PMT-that-isn-t-current.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0008-Don-t-reparse-PMT-unless-it-s-version-has-changed.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0008-Don-t-reparse-PMT-unless-it-s-version-has-changed.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0008-Don-t-reparse-PMT-unless-it-s-version-has-changed.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0008-Don-t-reparse-PMT-unless-it-s-version-has-changed.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0009-fixed-compile-with-VDPAU-header-versions-without-MPE.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0009-fixed-compile-with-VDPAU-header-versions-without-MPE.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0009-fixed-compile-with-VDPAU-header-versions-without-MPE.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0009-fixed-compile-with-VDPAU-header-versions-without-MPE.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0010-Read-PID-timestamps-as-well-as-PCR-timestamps-to-fin.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0010-Read-PID-timestamps-as-well-as-PCR-timestamps-to-fin.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0010-Read-PID-timestamps-as-well-as-PCR-timestamps-to-fin.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0010-Read-PID-timestamps-as-well-as-PCR-timestamps-to-fin.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0011-Get-stream-durations-using-read_timestamp.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0011-Get-stream-durations-using-read_timestamp.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0011-Get-stream-durations-using-read_timestamp.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0011-Get-stream-durations-using-read_timestamp.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0013-aacenc-add-recognized-profiles-array.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0013-aacenc-add-recognized-profiles-array.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0013-aacenc-add-recognized-profiles-array.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0013-aacenc-add-recognized-profiles-array.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0014-changed-allow-4-second-skew-between-streams-in-mov-b.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0014-changed-allow-4-second-skew-between-streams-in-mov-b.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0014-changed-allow-4-second-skew-between-streams-in-mov-b.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0014-changed-allow-4-second-skew-between-streams-in-mov-b.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0015-fixed-memleak-in-mpegts-demuxer-on-some-malformed-mp.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0015-fixed-memleak-in-mpegts-demuxer-on-some-malformed-mp.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0015-fixed-memleak-in-mpegts-demuxer-on-some-malformed-mp.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0015-fixed-memleak-in-mpegts-demuxer-on-some-malformed-mp.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0016-Speed-up-mpegts-av_find_stream_info.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0016-Speed-up-mpegts-av_find_stream_info.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0016-Speed-up-mpegts-av_find_stream_info.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0016-Speed-up-mpegts-av_find_stream_info.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0017-allow-customizing-shared-library-soname-name-with-ma.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0017-allow-customizing-shared-library-soname-name-with-ma.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0017-allow-customizing-shared-library-soname-name-with-ma.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0017-allow-customizing-shared-library-soname-name-with-ma.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0018-dxva-mpeg2-Allocate-slices-array-dynamically-fixes-v.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0018-dxva-mpeg2-Allocate-slices-array-dynamically-fixes-v.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0018-dxva-mpeg2-Allocate-slices-array-dynamically-fixes-v.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0018-dxva-mpeg2-Allocate-slices-array-dynamically-fixes-v.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0019-dxva-mpeg2-speed-up-slice-allocation.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0019-dxva-mpeg2-speed-up-slice-allocation.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0019-dxva-mpeg2-speed-up-slice-allocation.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0019-dxva-mpeg2-speed-up-slice-allocation.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0020-dxva-vc1-Take-BI-into-account-for-forward-and-backwa.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0020-dxva-vc1-Take-BI-into-account-for-forward-and-backwa.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0020-dxva-vc1-Take-BI-into-account-for-forward-and-backwa.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0020-dxva-vc1-Take-BI-into-account-for-forward-and-backwa.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0021-dxva-vc1-Pass-overlapping-transforms-hint.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0021-dxva-vc1-Pass-overlapping-transforms-hint.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0021-dxva-vc1-Pass-overlapping-transforms-hint.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0021-dxva-vc1-Pass-overlapping-transforms-hint.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0022-dxva-h264-Fix-dxva-playback-of-streams-that-don-t-st.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0022-dxva-h264-Fix-dxva-playback-of-streams-that-don-t-st.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0022-dxva-h264-Fix-dxva-playback-of-streams-that-don-t-st.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0022-dxva-h264-Fix-dxva-playback-of-streams-that-don-t-st.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0024-add-public-version-of-ff_read_frame_flush.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0024-add-public-version-of-ff_read_frame_flush.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0024-add-public-version-of-ff_read_frame_flush.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0024-add-public-version-of-ff_read_frame_flush.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0026-Handle-return-value-of-BeginFrame-better.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0026-Handle-return-value-of-BeginFrame-better.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0026-Handle-return-value-of-BeginFrame-better.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0026-Handle-return-value-of-BeginFrame-better.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0028-ffmpeg-1.2-fixed-dvd-still-frames-ended-up-in-intern.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0028-ffmpeg-1.2-fixed-dvd-still-frames-ended-up-in-intern.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0028-ffmpeg-1.2-fixed-dvd-still-frames-ended-up-in-intern.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0028-ffmpeg-1.2-fixed-dvd-still-frames-ended-up-in-intern.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0029-stop-forcing-gas-preprocessor-for-darwin.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0029-stop-forcing-gas-preprocessor-for-darwin.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0029-stop-forcing-gas-preprocessor-for-darwin.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0029-stop-forcing-gas-preprocessor-for-darwin.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0036-backport-register-vdpau-hwaccel-for-mpeg12-fe1f36547d0be963e352de0cde1a6cba59ea2e78.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0036-backport-register-vdpau-hwaccel-for-mpeg12-fe1f36547d0be963e352de0cde1a6cba59ea2e78.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0036-backport-register-vdpau-hwaccel-for-mpeg12-fe1f36547d0be963e352de0cde1a6cba59ea2e78.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0036-backport-register-vdpau-hwaccel-for-mpeg12-fe1f36547d0be963e352de0cde1a6cba59ea2e78.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0037-backport-fix-vdpau-vc1-interlace-modes-b37cc5995b88ec68a68cb8e496a008e1cd467077.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0037-backport-fix-vdpau-vc1-interlace-modes-b37cc5995b88ec68a68cb8e496a008e1cd467077.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0037-backport-fix-vdpau-vc1-interlace-modes-b37cc5995b88ec68a68cb8e496a008e1cd467077.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0037-backport-fix-vdpau-vc1-interlace-modes-b37cc5995b88ec68a68cb8e496a008e1cd467077.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0039-h264-expose-h264-frame-packing-as-stereo_mode-metada.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0039-h264-expose-h264-frame-packing-as-stereo_mode-metada.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-0039-h264-expose-h264-frame-packing-as-stereo_mode-metada.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0039-h264-expose-h264-frame-packing-as-stereo_mode-metada.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0040-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-s.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0040-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-s.patch new file mode 100644 index 0000000000..7c2c9d2237 --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0040-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-s.patch @@ -0,0 +1,311 @@ +From 40daea3c1bafa9cea37b65f856c3c0432767d760 Mon Sep 17 00:00:00 2001 +From: Ben Avison +Date: Mon, 15 Jul 2013 18:28:09 +0100 +Subject: [PATCH 39/49] [ffmpeg] - backport - arm: Add VFP-accelerated version + of synth_filter_float +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + + Before After + Mean StdDev Mean StdDev Change +This function 9295.0 114.9 4853.2 83.5 +91.5% +Overall 23699.8 397.6 19285.5 292.0 +22.9% + +Signed-off-by: Martin Storsjö +--- + libavcodec/arm/Makefile | 1 + + libavcodec/arm/fft_init_arm.c | 8 + + libavcodec/arm/synth_filter_vfp.S | 243 ++++++++++++++++++++++++++ + 3 files changed, 252 insertions(+) + create mode 100644 libavcodec/arm/synth_filter_vfp.S + +diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile +index 1c91d62..aee9d73 100644 +--- a/libavcodec/arm/Makefile ++++ b/libavcodec/arm/Makefile +@@ -58,6 +58,7 @@ ARMV6-OBJS += arm/dsputil_init_armv6.o \ + arm/dsputil_armv6.o \ + arm/simple_idct_armv6.o \ + ++VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_vfp.o + VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o + + NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ +diff --git a/libavcodec/arm/fft_init_arm.c b/libavcodec/arm/fft_init_arm.c +index 8c98abc..fe0acc5 100644 +--- a/libavcodec/arm/fft_init_arm.c ++++ b/libavcodec/arm/fft_init_arm.c +@@ -32,6 +32,12 @@ void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input) + + void ff_rdft_calc_neon(struct RDFTContext *s, FFTSample *z); + ++void ff_synth_filter_float_vfp(FFTContext *imdct, ++ float *synth_buf_ptr, int *synth_buf_offset, ++ float synth_buf2[32], const float window[512], ++ float out[32], const float in[32], ++ float scale); ++ + void ff_synth_filter_float_neon(FFTContext *imdct, + float *synth_buf_ptr, int *synth_buf_offset, + float synth_buf2[32], const float window[512], +@@ -71,6 +77,8 @@ av_cold void ff_synth_filter_init_arm(SynthFilterContext *s) + { + int cpu_flags = av_get_cpu_flags(); + ++ if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) ++ s->synth_filter_float = ff_synth_filter_float_vfp; + if (have_neon(cpu_flags)) + s->synth_filter_float = ff_synth_filter_float_neon; + } +diff --git a/libavcodec/arm/synth_filter_vfp.S b/libavcodec/arm/synth_filter_vfp.S +new file mode 100644 +index 0000000..c219c41 +--- /dev/null ++++ b/libavcodec/arm/synth_filter_vfp.S +@@ -0,0 +1,243 @@ ++/* ++ * Copyright (c) 2013 RISC OS Open Ltd ++ * Author: Ben Avison ++ * ++ * This file is part of Libav. ++ * ++ * Libav is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * Libav is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with Libav; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "libavutil/arm/asm.S" ++ ++IMDCT .req r0 ++ORIG_P_SB .req r1 ++P_SB_OFF .req r2 ++I .req r0 ++P_SB2_UP .req r1 ++OLDFPSCR .req r2 ++P_SB2_DN .req r3 ++P_WIN_DN .req r4 ++P_OUT_DN .req r5 ++P_SB .req r6 ++J_WRAP .req r7 ++P_WIN_UP .req r12 ++P_OUT_UP .req r14 ++ ++SCALE .req s0 ++SBUF_DAT_REV0 .req s4 ++SBUF_DAT_REV1 .req s5 ++SBUF_DAT_REV2 .req s6 ++SBUF_DAT_REV3 .req s7 ++VA0 .req s8 ++VA3 .req s11 ++VB0 .req s12 ++VB3 .req s15 ++VC0 .req s8 ++VC3 .req s11 ++VD0 .req s12 ++VD3 .req s15 ++SBUF_DAT0 .req s16 ++SBUF_DAT1 .req s17 ++SBUF_DAT2 .req s18 ++SBUF_DAT3 .req s19 ++SBUF_DAT_ALT0 .req s20 ++SBUF_DAT_ALT1 .req s21 ++SBUF_DAT_ALT2 .req s22 ++SBUF_DAT_ALT3 .req s23 ++WIN_DN_DAT0 .req s24 ++WIN_UP_DAT0 .req s28 ++ ++ ++.macro inner_loop half, tail, head ++ .if (OFFSET & (64*4)) == 0 @ even numbered call ++ SBUF_DAT_THIS0 .req SBUF_DAT0 ++ SBUF_DAT_THIS1 .req SBUF_DAT1 ++ SBUF_DAT_THIS2 .req SBUF_DAT2 ++ SBUF_DAT_THIS3 .req SBUF_DAT3 ++ .ifnc "\head","" ++ vldr d8, [P_SB, #OFFSET] @ d8 = SBUF_DAT ++ vldr d9, [P_SB, #OFFSET+8] ++ .endif ++ .else ++ SBUF_DAT_THIS0 .req SBUF_DAT_ALT0 ++ SBUF_DAT_THIS1 .req SBUF_DAT_ALT1 ++ SBUF_DAT_THIS2 .req SBUF_DAT_ALT2 ++ SBUF_DAT_THIS3 .req SBUF_DAT_ALT3 ++ .ifnc "\head","" ++ vldr d10, [P_SB, #OFFSET] @ d10 = SBUF_DAT_ALT ++ vldr d11, [P_SB, #OFFSET+8] ++ .endif ++ .endif ++ .ifnc "\tail","" ++ .ifc "\half","ab" ++ vmls.f VA0, SBUF_DAT_REV0, WIN_DN_DAT0 @ all operands treated as vectors ++ .else ++ vmla.f VD0, SBUF_DAT_REV0, WIN_DN_DAT0 @ all operands treated as vectors ++ .endif ++ .endif ++ .ifnc "\head","" ++ vldr d14, [P_WIN_UP, #OFFSET] @ d14 = WIN_UP_DAT ++ vldr d15, [P_WIN_UP, #OFFSET+8] ++ vldr d12, [P_WIN_DN, #OFFSET] @ d12 = WIN_DN_DAT ++ vldr d13, [P_WIN_DN, #OFFSET+8] ++ vmov SBUF_DAT_REV3, SBUF_DAT_THIS0 ++ vmov SBUF_DAT_REV2, SBUF_DAT_THIS1 ++ vmov SBUF_DAT_REV1, SBUF_DAT_THIS2 ++ vmov SBUF_DAT_REV0, SBUF_DAT_THIS3 ++ .ifc "\half","ab" ++ vmla.f VB0, SBUF_DAT_THIS0, WIN_UP_DAT0 ++ .else ++ vmla.f VC0, SBUF_DAT_THIS0, WIN_UP_DAT0 ++ .endif ++ teq J_WRAP, #J ++ bne 2f @ strongly predictable, so better than cond exec in this case ++ sub P_SB, P_SB, #512*4 ++2: ++ .set J, J - 64 ++ .set OFFSET, OFFSET + 64*4 ++ .endif ++ .unreq SBUF_DAT_THIS0 ++ .unreq SBUF_DAT_THIS1 ++ .unreq SBUF_DAT_THIS2 ++ .unreq SBUF_DAT_THIS3 ++.endm ++ ++ ++/* void ff_synth_filter_float_vfp(FFTContext *imdct, ++ * float *synth_buf_ptr, int *synth_buf_offset, ++ * float synth_buf2[32], const float window[512], ++ * float out[32], const float in[32], float scale) ++ */ ++function ff_synth_filter_float_vfp, export=1 ++ push {r3-r7,lr} ++ vpush {s16-s31} ++ ldr lr, [P_SB_OFF] ++ add a2, ORIG_P_SB, lr, LSL #2 @ calculate synth_buf to pass to imdct_half ++ mov P_SB, a2 @ and keep a copy for ourselves ++ bic J_WRAP, lr, #63 @ mangled to make testing for wrap easier in inner loop ++ sub lr, lr, #32 ++ and lr, lr, #512-32 ++ str lr, [P_SB_OFF] @ rotate offset, modulo buffer size, ready for next call ++ ldr a3, [sp, #(16+6+2)*4] @ fetch in from stack, to pass to imdct_half ++VFP vmov s16, SCALE @ imdct_half is free to corrupt s0, but it contains one of our arguments in hardfp case ++ bl ff_imdct_half_vfp ++VFP vmov SCALE, s16 ++ ++ fmrx OLDFPSCR, FPSCR ++ ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 ++ fmxr FPSCR, lr ++ ldr P_SB2_DN, [sp, #16*4] ++ ldr P_WIN_DN, [sp, #(16+6+0)*4] ++ ldr P_OUT_DN, [sp, #(16+6+1)*4] ++NOVFP vldr SCALE, [sp, #(16+6+3)*4] ++ ++#define IMM_OFF_SKEW 956 /* also valid immediate constant when you add 16*4 */ ++ add P_SB, P_SB, #IMM_OFF_SKEW @ so we can use -ve offsets to use full immediate offset range ++ add P_SB2_UP, P_SB2_DN, #16*4 ++ add P_WIN_UP, P_WIN_DN, #16*4+IMM_OFF_SKEW ++ add P_OUT_UP, P_OUT_DN, #16*4 ++ add P_SB2_DN, P_SB2_DN, #16*4 ++ add P_WIN_DN, P_WIN_DN, #12*4+IMM_OFF_SKEW ++ add P_OUT_DN, P_OUT_DN, #16*4 ++ mov I, #4 ++1: ++ vldmia P_SB2_UP!, {VB0-VB3} ++ vldmdb P_SB2_DN!, {VA0-VA3} ++ .set J, 512 - 64 ++ .set OFFSET, -IMM_OFF_SKEW ++ inner_loop ab,, head ++ .rept 7 ++ inner_loop ab, tail, head ++ .endr ++ inner_loop ab, tail ++ add P_WIN_UP, P_WIN_UP, #4*4 ++ sub P_WIN_DN, P_WIN_DN, #4*4 ++ vmul.f VB0, VB0, SCALE @ SCALE treated as scalar ++ add P_SB, P_SB, #(512+4)*4 ++ subs I, I, #1 ++ vmul.f VA0, VA0, SCALE ++ vstmia P_OUT_UP!, {VB0-VB3} ++ vstmdb P_OUT_DN!, {VA0-VA3} ++ bne 1b ++ ++ add P_SB2_DN, P_SB2_DN, #(16+28-12)*4 ++ sub P_SB2_UP, P_SB2_UP, #(16+16)*4 ++ add P_WIN_DN, P_WIN_DN, #(32+16+28-12)*4 ++ mov I, #4 ++1: ++ vldr.d d4, zero @ d4 = VC0 ++ vldr.d d5, zero ++ vldr.d d6, zero @ d6 = VD0 ++ vldr.d d7, zero ++ .set J, 512 - 64 ++ .set OFFSET, -IMM_OFF_SKEW ++ inner_loop cd,, head ++ .rept 7 ++ inner_loop cd, tail, head ++ .endr ++ inner_loop cd, tail ++ add P_WIN_UP, P_WIN_UP, #4*4 ++ sub P_WIN_DN, P_WIN_DN, #4*4 ++ add P_SB, P_SB, #(512+4)*4 ++ subs I, I, #1 ++ vstmia P_SB2_UP!, {VC0-VC3} ++ vstmdb P_SB2_DN!, {VD0-VD3} ++ bne 1b ++ ++ fmxr FPSCR, OLDFPSCR ++ vpop {s16-s31} ++ pop {r3-r7,pc} ++endfunc ++ ++ .unreq IMDCT ++ .unreq ORIG_P_SB ++ .unreq P_SB_OFF ++ .unreq I ++ .unreq P_SB2_UP ++ .unreq OLDFPSCR ++ .unreq P_SB2_DN ++ .unreq P_WIN_DN ++ .unreq P_OUT_DN ++ .unreq P_SB ++ .unreq J_WRAP ++ .unreq P_WIN_UP ++ .unreq P_OUT_UP ++ ++ .unreq SCALE ++ .unreq SBUF_DAT_REV0 ++ .unreq SBUF_DAT_REV1 ++ .unreq SBUF_DAT_REV2 ++ .unreq SBUF_DAT_REV3 ++ .unreq VA0 ++ .unreq VA3 ++ .unreq VB0 ++ .unreq VB3 ++ .unreq VC0 ++ .unreq VC3 ++ .unreq VD0 ++ .unreq VD3 ++ .unreq SBUF_DAT0 ++ .unreq SBUF_DAT1 ++ .unreq SBUF_DAT2 ++ .unreq SBUF_DAT3 ++ .unreq SBUF_DAT_ALT0 ++ .unreq SBUF_DAT_ALT1 ++ .unreq SBUF_DAT_ALT2 ++ .unreq SBUF_DAT_ALT3 ++ .unreq WIN_DN_DAT0 ++ .unreq WIN_UP_DAT0 ++ ++ .align 3 ++zero: .word 0, 0 +-- +1.7.9.5 diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0041-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-i.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0041-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-i.patch new file mode 100644 index 0000000000..72dee038f0 --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0041-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-i.patch @@ -0,0 +1,102 @@ +From 8ead63b22d31bf71976fc6964922b43d8e0d660b Mon Sep 17 00:00:00 2001 +From: Ben Avison +Date: Mon, 15 Jul 2013 18:28:10 +0100 +Subject: [PATCH 40/49] [ffmpeg] - backport - arm: Add VFP-accelerated version + of int32_to_float_fmul_scalar +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + + Before After + Mean StdDev Mean StdDev Change +This function 1175.0 4.4 366.2 18.3 +220.8% +Overall 19285.5 292.0 18420.5 489.1 +4.7% + +Signed-off-by: Martin Storsjö +--- + libavcodec/arm/fmtconvert_init_arm.c | 10 ++++++ + libavcodec/arm/fmtconvert_vfp.S | 38 +++++++++++++++++++++++ + 2 files changed, 48 insertions(+) + +diff --git a/libavcodec/arm/fmtconvert_init_arm.c b/libavcodec/arm/fmtconvert_init_arm.c +index 1d99c97..de3b78b 100644 +--- a/libavcodec/arm/fmtconvert_init_arm.c ++++ b/libavcodec/arm/fmtconvert_init_arm.c +@@ -28,6 +28,9 @@ + void ff_int32_to_float_fmul_scalar_neon(float *dst, const int *src, + float mul, int len); + ++void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int32_t *src, ++ float mul, int len); ++ + void ff_float_to_int16_neon(int16_t *dst, const float *src, long len); + void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int); + +@@ -38,6 +41,13 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx + int cpu_flags = av_get_cpu_flags(); + + if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) { ++ if (!have_vfpv3(cpu_flags)) { ++ // This function doesn't use anything armv6 specific in itself, ++ // but ff_float_to_int16_vfp which is in the same assembly source ++ // file does, thus the whole file requires armv6 to be built. ++ c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp; ++ } ++ + c->float_to_int16 = ff_float_to_int16_vfp; + } + +diff --git a/libavcodec/arm/fmtconvert_vfp.S b/libavcodec/arm/fmtconvert_vfp.S +index 7b012bc..3cc3e56 100644 +--- a/libavcodec/arm/fmtconvert_vfp.S ++++ b/libavcodec/arm/fmtconvert_vfp.S +@@ -1,5 +1,6 @@ + /* + * Copyright (c) 2008 Siarhei Siamashka ++ * Copyright (c) 2013 RISC OS Open Ltd + * + * This file is part of FFmpeg. + * +@@ -76,3 +77,40 @@ function ff_float_to_int16_vfp, export=1 + vpop {d8-d11} + pop {r4-r8,pc} + endfunc ++ ++/** ++ * ARM VFP optimised int32 to float conversion. ++ * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned ++ * (16 bytes alignment is best for BCM2835), little-endian. ++ */ ++@ void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int32_t *src, float mul, int len) ++function ff_int32_to_float_fmul_scalar_vfp, export=1 ++VFP tmp .req a4 ++VFP len .req a3 ++NOVFP tmp .req a3 ++NOVFP len .req a4 ++NOVFP vmov s0, a3 ++ ldr tmp, =0x03070000 @ RunFast mode, short vectors of length 8, stride 1 ++ fmrx ip, FPSCR ++ fmxr FPSCR, tmp ++1: ++ vldmia a2!, {s8-s15} ++ vcvt.f32.s32 s8, s8 ++ vcvt.f32.s32 s9, s9 ++ vcvt.f32.s32 s10, s10 ++ vcvt.f32.s32 s11, s11 ++ vcvt.f32.s32 s12, s12 ++ vcvt.f32.s32 s13, s13 ++ vcvt.f32.s32 s14, s14 ++ vcvt.f32.s32 s15, s15 ++ vmul.f32 s8, s8, s0 ++ subs len, len, #8 ++ vstmia a1!, {s8-s11} ++ vstmia a1!, {s12-s15} ++ bne 1b ++ ++ fmxr FPSCR, ip ++ bx lr ++endfunc ++ .unreq tmp ++ .unreq len +-- +1.7.9.5 diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0042-ffmpeg-backport-fmtconvert-Add-a-new-method-int32_to.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0042-ffmpeg-backport-fmtconvert-Add-a-new-method-int32_to.patch new file mode 100644 index 0000000000..960ea13972 --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0042-ffmpeg-backport-fmtconvert-Add-a-new-method-int32_to.patch @@ -0,0 +1,78 @@ +From 7901e7216cf6406a2ea430c71af94ebee72f262b Mon Sep 17 00:00:00 2001 +From: Ben Avison +Date: Mon, 15 Jul 2013 18:28:11 +0100 +Subject: [PATCH 41/49] [ffmpeg] - backport - fmtconvert: Add a new method, + int32_to_float_fmul_array8 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This is similar to int32_to_float_fmul_scalar, but +loads a new scalar multiplier every 8 input samples. +This enables the use of much larger input arrays, which +is important for pipelining on some CPUs (such as +ARMv6). + +Signed-off-by: Martin Storsjö +--- + libavcodec/fmtconvert.c | 10 ++++++++++ + libavcodec/fmtconvert.h | 16 ++++++++++++++++ + 2 files changed, 26 insertions(+) + +diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c +index 79e9645..1c45d35 100644 +--- a/libavcodec/fmtconvert.c ++++ b/libavcodec/fmtconvert.c +@@ -30,6 +30,15 @@ static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, + dst[i] = src[i] * mul; + } + ++static void int32_to_float_fmul_array8_c(FmtConvertContext *c, float *dst, ++ const int32_t *src, const float *mul, ++ int len) ++{ ++ int i; ++ for (i = 0; i < len; i += 8) ++ c->int32_to_float_fmul_scalar(&dst[i], &src[i], *mul++, 8); ++} ++ + static av_always_inline int float_to_int16_one(const float *src){ + return av_clip_int16(lrintf(*src)); + } +@@ -79,6 +88,7 @@ void ff_float_interleave_c(float *dst, const float **src, unsigned int len, + av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx) + { + c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c; ++ c->int32_to_float_fmul_array8 = int32_to_float_fmul_array8_c; + c->float_to_int16 = float_to_int16_c; + c->float_to_int16_interleave = float_to_int16_interleave_c; + c->float_interleave = ff_float_interleave_c; +diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h +index 3fb9f4e..02468dc 100644 +--- a/libavcodec/fmtconvert.h ++++ b/libavcodec/fmtconvert.h +@@ -38,6 +38,22 @@ typedef struct FmtConvertContext { + void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len); + + /** ++ * Convert an array of int32_t to float and multiply by a float value from another array, ++ * stepping along the float array once for each 8 integers. ++ * @param c pointer to FmtConvertContext. ++ * @param dst destination array of float. ++ * constraints: 16-byte aligned ++ * @param src source array of int32_t. ++ * constraints: 16-byte aligned ++ * @param mul source array of float multipliers. ++ * @param len number of elements to convert. ++ * constraints: multiple of 8 ++ */ ++ void (*int32_to_float_fmul_array8)(struct FmtConvertContext *c, ++ float *dst, const int32_t *src, ++ const float *mul, int len); ++ ++ /** + * Convert an array of float to an array of int16_t. + * + * Convert floats from in the range [-32768.0,32767.0] to ints +-- +1.7.9.5 diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0043-ffmpeg-backport-dcadec-Use-int32_to_float_fmul_array.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0043-ffmpeg-backport-dcadec-Use-int32_to_float_fmul_array.patch new file mode 100644 index 0000000000..5fc52e9fd4 --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0043-ffmpeg-backport-dcadec-Use-int32_to_float_fmul_array.patch @@ -0,0 +1,90 @@ +From fa755fe82fe4cfbb85b7c57501912da2e1f316bc Mon Sep 17 00:00:00 2001 +From: Ben Avison +Date: Tue, 16 Jul 2013 15:41:18 +0300 +Subject: [PATCH 42/49] [ffmpeg] - backport - dcadec: Use + int32_to_float_fmul_array8 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Martin Storsjö +--- + libavcodec/dcadec.c | 23 +++++++++++++++-------- + 1 file changed, 15 insertions(+), 8 deletions(-) + +diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c +index 1b955e4..b648613 100644 +--- a/libavcodec/dcadec.c ++++ b/libavcodec/dcadec.c +@@ -1302,7 +1302,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index) + + /* FIXME */ + float (*subband_samples)[DCA_SUBBANDS][8] = s->subband_samples[block_index]; +- LOCAL_ALIGNED_16(int, block, [8]); ++ LOCAL_ALIGNED_16(int, block, [8 * DCA_SUBBANDS]); + + /* + * Audio data +@@ -1315,6 +1315,8 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index) + quant_step_table = lossy_quant_d; + + for (k = base_channel; k < s->prim_channels; k++) { ++ float rscale[DCA_SUBBANDS]; ++ + if (get_bits_left(&s->gb) < 0) + return AVERROR_INVALIDDATA; + +@@ -1337,11 +1339,12 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index) + * Extract bits from the bit stream + */ + if (!abits) { +- memset(subband_samples[k][l], 0, 8 * sizeof(subband_samples[0][0][0])); ++ rscale[l] = 0; ++ memset(block + 8 * l, 0, 8 * sizeof(block[0])); + } else { + /* Deal with transients */ + int sfi = s->transition_mode[k][l] && subsubframe >= s->transition_mode[k][l]; +- float rscale = quant_step_size * s->scale_factor[k][l][sfi] * ++ rscale[l] = quant_step_size * s->scale_factor[k][l][sfi] * + s->scalefactor_adj[k][sel]; + + if (abits >= 11 || !dca_smpl_bitalloc[abits].vlc[sel].table) { +@@ -1355,7 +1358,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index) + block_code1 = get_bits(&s->gb, size); + block_code2 = get_bits(&s->gb, size); + err = decode_blockcodes(block_code1, block_code2, +- levels, block); ++ levels, block + 8 * l); + if (err) { + av_log(s->avctx, AV_LOG_ERROR, + "ERROR: block code look-up failed\n"); +@@ -1364,19 +1367,23 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index) + } else { + /* no coding */ + for (m = 0; m < 8; m++) +- block[m] = get_sbits(&s->gb, abits - 3); ++ block[8 * l + m] = get_sbits(&s->gb, abits - 3); + } + } else { + /* Huffman coded */ + for (m = 0; m < 8; m++) +- block[m] = get_bitalloc(&s->gb, ++ block[8 * l + m] = get_bitalloc(&s->gb, + &dca_smpl_bitalloc[abits], sel); + } + +- s->fmt_conv.int32_to_float_fmul_scalar(subband_samples[k][l], +- block, rscale, 8); + } ++ } + ++ s->fmt_conv.int32_to_float_fmul_array8(&s->fmt_conv, subband_samples[k][0], ++ block, rscale, 8 * s->vq_start_subband[k]); ++ ++ for (l = 0; l < s->vq_start_subband[k]; l++) { ++ int m; + /* + * Inverse ADPCM if in prediction mode + */ +-- +1.7.9.5 diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0044-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-i.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0044-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-i.patch new file mode 100644 index 0000000000..5048613376 --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0044-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-i.patch @@ -0,0 +1,222 @@ +From c908a710261f33130569c4360175d8f19a282d67 Mon Sep 17 00:00:00 2001 +From: Ben Avison +Date: Mon, 15 Jul 2013 18:28:12 +0100 +Subject: [PATCH 43/49] [ffmpeg] - backport - arm: Add VFP-accelerated version + of int32_to_float_fmul_array8 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + + Before After + Mean StdDev Mean StdDev Change +This function 366.2 18.3 277.8 13.7 +31.9% +Overall 18420.5 489.1 17049.5 408.2 +8.0% + +Signed-off-by: Martin Storsjö +--- + libavcodec/arm/fmtconvert_init_arm.c | 6 +- + libavcodec/arm/fmtconvert_vfp.S | 162 +++++++++++++++++++++++ + 2 files changed, 167 insertions(+), 1 deletion(-) + +diff --git a/libavcodec/arm/fmtconvert_init_arm.c b/libavcodec/arm/fmtconvert_init_arm.c +index de3b78b..92d94a0 100644 +--- a/libavcodec/arm/fmtconvert_init_arm.c ++++ b/libavcodec/arm/fmtconvert_init_arm.c +@@ -30,6 +30,9 @@ void ff_int32_to_float_fmul_scalar_neon(float *dst, const int *src, + + void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int32_t *src, + float mul, int len); ++void ff_int32_to_float_fmul_array8_vfp(FmtConvertContext *c, float *dst, ++ const int32_t *src, const float *mul, ++ int len); + + void ff_float_to_int16_neon(int16_t *dst, const float *src, long len); + void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int); +@@ -42,10 +45,11 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx + + if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) { + if (!have_vfpv3(cpu_flags)) { +- // This function doesn't use anything armv6 specific in itself, ++ // These functions don't use anything armv6 specific in themselves, + // but ff_float_to_int16_vfp which is in the same assembly source + // file does, thus the whole file requires armv6 to be built. + c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp; ++ c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp; + } + + c->float_to_int16 = ff_float_to_int16_vfp; +diff --git a/libavcodec/arm/fmtconvert_vfp.S b/libavcodec/arm/fmtconvert_vfp.S +index 3cc3e56..a6d4ebd 100644 +--- a/libavcodec/arm/fmtconvert_vfp.S ++++ b/libavcodec/arm/fmtconvert_vfp.S +@@ -83,6 +83,168 @@ endfunc + * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned + * (16 bytes alignment is best for BCM2835), little-endian. + */ ++@ void ff_int32_to_float_fmul_array8_vfp(FmtConvertContext *c, float *dst, const int32_t *src, const float *mul, int len) ++function ff_int32_to_float_fmul_array8_vfp, export=1 ++ push {lr} ++ ldr a1, [sp, #4] ++ subs lr, a1, #3*8 ++ bcc 50f @ too short to pipeline ++ @ Now need to find (len / 8) % 3. The approximation ++ @ x / 24 = (x * 0xAB) >> 12 ++ @ is good for x < 4096, which is true for both AC3 and DCA. ++ mov a1, #0xAB ++ ldr ip, =0x03070000 @ RunFast mode, short vectors of length 8, stride 1 ++ mul a1, lr, a1 ++ vpush {s16-s31} ++ mov a1, a1, lsr #12 ++ add a1, a1, a1, lsl #1 ++ rsb a1, a1, lr, lsr #3 ++ cmp a1, #1 ++ fmrx a1, FPSCR ++ fmxr FPSCR, ip ++ beq 11f ++ blo 10f ++ @ Array is (2 + multiple of 3) x 8 floats long ++ @ drop through... ++ vldmia a3!, {s16-s23} ++ vldmia a4!, {s2,s3} ++ vldmia a3!, {s24-s31} ++ vcvt.f32.s32 s16, s16 ++ vcvt.f32.s32 s17, s17 ++ vcvt.f32.s32 s18, s18 ++ vcvt.f32.s32 s19, s19 ++ vcvt.f32.s32 s20, s20 ++ vcvt.f32.s32 s21, s21 ++ vcvt.f32.s32 s22, s22 ++ vcvt.f32.s32 s23, s23 ++ vmul.f32 s16, s16, s2 ++ @ drop through... ++3: ++ vldmia a3!, {s8-s15} ++ vldmia a4!, {s1} ++ vcvt.f32.s32 s24, s24 ++ vcvt.f32.s32 s25, s25 ++ vcvt.f32.s32 s26, s26 ++ vcvt.f32.s32 s27, s27 ++ vcvt.f32.s32 s28, s28 ++ vcvt.f32.s32 s29, s29 ++ vcvt.f32.s32 s30, s30 ++ vcvt.f32.s32 s31, s31 ++ vmul.f32 s24, s24, s3 ++ vstmia a2!, {s16-s19} ++ vstmia a2!, {s20-s23} ++2: ++ vldmia a3!, {s16-s23} ++ vldmia a4!, {s2} ++ vcvt.f32.s32 s8, s8 ++ vcvt.f32.s32 s9, s9 ++ vcvt.f32.s32 s10, s10 ++ vcvt.f32.s32 s11, s11 ++ vcvt.f32.s32 s12, s12 ++ vcvt.f32.s32 s13, s13 ++ vcvt.f32.s32 s14, s14 ++ vcvt.f32.s32 s15, s15 ++ vmul.f32 s8, s8, s1 ++ vstmia a2!, {s24-s27} ++ vstmia a2!, {s28-s31} ++1: ++ vldmia a3!, {s24-s31} ++ vldmia a4!, {s3} ++ vcvt.f32.s32 s16, s16 ++ vcvt.f32.s32 s17, s17 ++ vcvt.f32.s32 s18, s18 ++ vcvt.f32.s32 s19, s19 ++ vcvt.f32.s32 s20, s20 ++ vcvt.f32.s32 s21, s21 ++ vcvt.f32.s32 s22, s22 ++ vcvt.f32.s32 s23, s23 ++ vmul.f32 s16, s16, s2 ++ vstmia a2!, {s8-s11} ++ vstmia a2!, {s12-s15} ++ ++ subs lr, lr, #8*3 ++ bpl 3b ++ ++ vcvt.f32.s32 s24, s24 ++ vcvt.f32.s32 s25, s25 ++ vcvt.f32.s32 s26, s26 ++ vcvt.f32.s32 s27, s27 ++ vcvt.f32.s32 s28, s28 ++ vcvt.f32.s32 s29, s29 ++ vcvt.f32.s32 s30, s30 ++ vcvt.f32.s32 s31, s31 ++ vmul.f32 s24, s24, s3 ++ vstmia a2!, {s16-s19} ++ vstmia a2!, {s20-s23} ++ vstmia a2!, {s24-s27} ++ vstmia a2!, {s28-s31} ++ ++ fmxr FPSCR, a1 ++ vpop {s16-s31} ++ pop {pc} ++ ++10: @ Array is (multiple of 3) x 8 floats long ++ vldmia a3!, {s8-s15} ++ vldmia a4!, {s1,s2} ++ vldmia a3!, {s16-s23} ++ vcvt.f32.s32 s8, s8 ++ vcvt.f32.s32 s9, s9 ++ vcvt.f32.s32 s10, s10 ++ vcvt.f32.s32 s11, s11 ++ vcvt.f32.s32 s12, s12 ++ vcvt.f32.s32 s13, s13 ++ vcvt.f32.s32 s14, s14 ++ vcvt.f32.s32 s15, s15 ++ vmul.f32 s8, s8, s1 ++ b 1b ++ ++11: @ Array is (1 + multiple of 3) x 8 floats long ++ vldmia a3!, {s24-s31} ++ vldmia a4!, {s3} ++ vldmia a3!, {s8-s15} ++ vldmia a4!, {s1} ++ vcvt.f32.s32 s24, s24 ++ vcvt.f32.s32 s25, s25 ++ vcvt.f32.s32 s26, s26 ++ vcvt.f32.s32 s27, s27 ++ vcvt.f32.s32 s28, s28 ++ vcvt.f32.s32 s29, s29 ++ vcvt.f32.s32 s30, s30 ++ vcvt.f32.s32 s31, s31 ++ vmul.f32 s24, s24, s3 ++ b 2b ++ ++50: ++ ldr lr, =0x03070000 @ RunFast mode, short vectors of length 8, stride 1 ++ fmrx ip, FPSCR ++ fmxr FPSCR, lr ++51: ++ vldmia a3!, {s8-s15} ++ vldmia a4!, {s0} ++ vcvt.f32.s32 s8, s8 ++ vcvt.f32.s32 s9, s9 ++ vcvt.f32.s32 s10, s10 ++ vcvt.f32.s32 s11, s11 ++ vcvt.f32.s32 s12, s12 ++ vcvt.f32.s32 s13, s13 ++ vcvt.f32.s32 s14, s14 ++ vcvt.f32.s32 s15, s15 ++ vmul.f32 s8, s8, s0 ++ subs a1, a1, #8 ++ vstmia a2!, {s8-s11} ++ vstmia a2!, {s12-s15} ++ bne 51b ++ ++ fmxr FPSCR, ip ++ pop {pc} ++endfunc ++ ++/** ++ * ARM VFP optimised int32 to float conversion. ++ * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned ++ * (16 bytes alignment is best for BCM2835), little-endian. ++ * TODO: could be further optimised by unrolling and interleaving, as above ++ */ + @ void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int32_t *src, float mul, int len) + function ff_int32_to_float_fmul_scalar_vfp, export=1 + VFP tmp .req a4 +-- +1.7.9.5 diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0045-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-i.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0045-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-i.patch new file mode 100644 index 0000000000..aef7f47ffd --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0045-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-i.patch @@ -0,0 +1,274 @@ +From 15520de67fc951213ab32661b8b368a9439e8b9a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Martin=20Storsj=C3=B6?= +Date: Fri, 19 Jul 2013 10:59:17 +0300 +Subject: [PATCH 44/49] [ffmpeg] - backport - arm: Add VFP-accelerated version + of imdct_half +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + + Before After + Mean StdDev Mean StdDev Change +This function 2653.0 28.5 1108.8 51.4 +139.3% +Overall 17049.5 408.2 15973.0 223.2 +6.7% + +Signed-off-by: Martin Storsjö +--- + libavcodec/arm/Makefile | 1 + + libavcodec/arm/fft_init_arm.c | 9 ++ + libavcodec/arm/mdct_vfp.S | 205 ++++++++++++++++++++++++++++++ + 3 files changed, 215 insertions(+) + create mode 100644 libavcodec/arm/mdct_vfp.S + +diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile +index aee9d73..27e80d5 100644 +--- a/libavcodec/arm/Makefile ++++ b/libavcodec/arm/Makefile +@@ -59,6 +59,7 @@ ARMV6-OBJS += arm/dsputil_init_armv6.o \ + arm/simple_idct_armv6.o \ + + VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_vfp.o ++VFP-OBJS-$(CONFIG_MDCT) += arm/mdct_vfp.o + VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o + + NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \ +diff --git a/libavcodec/arm/fft_init_arm.c b/libavcodec/arm/fft_init_arm.c +index fe0acc5..a000ea5 100644 +--- a/libavcodec/arm/fft_init_arm.c ++++ b/libavcodec/arm/fft_init_arm.c +@@ -26,6 +26,8 @@ + void ff_fft_permute_neon(FFTContext *s, FFTComplex *z); + void ff_fft_calc_neon(FFTContext *s, FFTComplex *z); + ++void ff_imdct_half_vfp(FFTContext *s, FFTSample *output, const FFTSample *input); ++ + void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input); + void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input); + void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input); +@@ -48,6 +50,13 @@ av_cold void ff_fft_init_arm(FFTContext *s) + { + int cpu_flags = av_get_cpu_flags(); + ++ if (have_vfp(cpu_flags)) { ++#if CONFIG_MDCT ++ if (!have_vfpv3(cpu_flags)) ++ s->imdct_half = ff_imdct_half_vfp; ++#endif ++ } ++ + if (have_neon(cpu_flags)) { + #if CONFIG_FFT + s->fft_permute = ff_fft_permute_neon; +diff --git a/libavcodec/arm/mdct_vfp.S b/libavcodec/arm/mdct_vfp.S +new file mode 100644 +index 0000000..0623e96 +--- /dev/null ++++ b/libavcodec/arm/mdct_vfp.S +@@ -0,0 +1,205 @@ ++/* ++ * Copyright (c) 2013 RISC OS Open Ltd ++ * Author: Ben Avison ++ * ++ * This file is part of Libav. ++ * ++ * Libav is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * Libav is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with Libav; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "libavutil/arm/asm.S" ++ ++CONTEXT .req a1 ++ORIGOUT .req a2 ++IN .req a3 ++OUT .req v1 ++REVTAB .req v2 ++TCOS .req v3 ++TSIN .req v4 ++OLDFPSCR .req v5 ++J0 .req a2 ++J1 .req a4 ++J2 .req ip ++J3 .req lr ++ ++.macro prerotation_innerloop ++ .set trig_lo, k ++ .set trig_hi, n4 - k - 2 ++ .set in_lo, trig_lo * 2 ++ .set in_hi, trig_hi * 2 ++ vldr d8, [TCOS, #trig_lo*4] @ s16,s17 ++ vldr d9, [TCOS, #trig_hi*4] @ s18,s19 ++ vldr s0, [IN, #in_hi*4 + 12] ++ vldr s1, [IN, #in_hi*4 + 4] ++ vldr s2, [IN, #in_lo*4 + 12] ++ vldr s3, [IN, #in_lo*4 + 4] ++ vmul.f s8, s0, s16 @ vector operation ++ vldr d10, [TSIN, #trig_lo*4] @ s20,s21 ++ vldr d11, [TSIN, #trig_hi*4] @ s22,s23 ++ vldr s4, [IN, #in_lo*4] ++ vldr s5, [IN, #in_lo*4 + 8] ++ vldr s6, [IN, #in_hi*4] ++ vldr s7, [IN, #in_hi*4 + 8] ++ ldr J0, [REVTAB, #trig_lo*2] ++ vmul.f s12, s0, s20 @ vector operation ++ ldr J2, [REVTAB, #trig_hi*2] ++ mov J1, J0, lsr #16 ++ and J0, J0, #255 @ halfword value will be < n4 ++ vmls.f s8, s4, s20 @ vector operation ++ mov J3, J2, lsr #16 ++ and J2, J2, #255 @ halfword value will be < n4 ++ add J0, OUT, J0, lsl #3 ++ vmla.f s12, s4, s16 @ vector operation ++ add J1, OUT, J1, lsl #3 ++ add J2, OUT, J2, lsl #3 ++ add J3, OUT, J3, lsl #3 ++ vstr s8, [J0] ++ vstr s9, [J1] ++ vstr s10, [J2] ++ vstr s11, [J3] ++ vstr s12, [J0, #4] ++ vstr s13, [J1, #4] ++ vstr s14, [J2, #4] ++ vstr s15, [J3, #4] ++ .set k, k + 2 ++.endm ++ ++.macro postrotation_innerloop tail, head ++ .set trig_lo_head, n8 - k - 2 ++ .set trig_hi_head, n8 + k ++ .set out_lo_head, trig_lo_head * 2 ++ .set out_hi_head, trig_hi_head * 2 ++ .set trig_lo_tail, n8 - (k - 2) - 2 ++ .set trig_hi_tail, n8 + (k - 2) ++ .set out_lo_tail, trig_lo_tail * 2 ++ .set out_hi_tail, trig_hi_tail * 2 ++ .if (k & 2) == 0 ++ TCOS_D0_HEAD .req d10 @ s20,s21 ++ TCOS_D1_HEAD .req d11 @ s22,s23 ++ TCOS_S0_TAIL .req s24 ++ .else ++ TCOS_D0_HEAD .req d12 @ s24,s25 ++ TCOS_D1_HEAD .req d13 @ s26,s27 ++ TCOS_S0_TAIL .req s20 ++ .endif ++ .ifnc "\tail","" ++ vmls.f s8, s0, TCOS_S0_TAIL @ vector operation ++ .endif ++ .ifnc "\head","" ++ vldr d8, [TSIN, #trig_lo_head*4] @ s16,s17 ++ vldr d9, [TSIN, #trig_hi_head*4] @ s18,s19 ++ vldr TCOS_D0_HEAD, [TCOS, #trig_lo_head*4] ++ .endif ++ .ifnc "\tail","" ++ vmla.f s12, s4, TCOS_S0_TAIL @ vector operation ++ .endif ++ .ifnc "\head","" ++ vldr s0, [OUT, #out_lo_head*4] ++ vldr s1, [OUT, #out_lo_head*4 + 8] ++ vldr s2, [OUT, #out_hi_head*4] ++ vldr s3, [OUT, #out_hi_head*4 + 8] ++ vldr s4, [OUT, #out_lo_head*4 + 4] ++ vldr s5, [OUT, #out_lo_head*4 + 12] ++ vldr s6, [OUT, #out_hi_head*4 + 4] ++ vldr s7, [OUT, #out_hi_head*4 + 12] ++ .endif ++ .ifnc "\tail","" ++ vstr s8, [OUT, #out_lo_tail*4] ++ vstr s9, [OUT, #out_lo_tail*4 + 8] ++ vstr s10, [OUT, #out_hi_tail*4] ++ vstr s11, [OUT, #out_hi_tail*4 + 8] ++ .endif ++ .ifnc "\head","" ++ vmul.f s8, s4, s16 @ vector operation ++ .endif ++ .ifnc "\tail","" ++ vstr s12, [OUT, #out_hi_tail*4 + 12] ++ vstr s13, [OUT, #out_hi_tail*4 + 4] ++ vstr s14, [OUT, #out_lo_tail*4 + 12] ++ vstr s15, [OUT, #out_lo_tail*4 + 4] ++ .endif ++ .ifnc "\head","" ++ vmul.f s12, s0, s16 @ vector operation ++ vldr TCOS_D1_HEAD, [TCOS, #trig_hi_head*4] ++ .endif ++ .unreq TCOS_D0_HEAD ++ .unreq TCOS_D1_HEAD ++ .unreq TCOS_S0_TAIL ++ .ifnc "\head","" ++ .set k, k + 2 ++ .endif ++.endm ++ ++ ++/* void ff_imdct_half_vfp(FFTContext *s, ++ * FFTSample *output, ++ * const FFTSample *input) ++ */ ++function ff_imdct_half_vfp, export=1 ++ ldr ip, [CONTEXT, #5*4] @ mdct_bits ++ teq ip, #6 ++ it ne ++ bne ff_imdct_half_c @ only case currently accelerated is the one used by DCA ++ ++ .set n, 1<<6 ++ .set n2, n/2 ++ .set n4, n/4 ++ .set n8, n/8 ++ ++ push {v1-v5,lr} ++ vpush {s16-s27} ++ fmrx OLDFPSCR, FPSCR ++ ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 ++ fmxr FPSCR, lr ++ mov OUT, ORIGOUT ++ ldr REVTAB, [CONTEXT, #2*4] ++ ldr TCOS, [CONTEXT, #6*4] ++ ldr TSIN, [CONTEXT, #7*4] ++ ++ .set k, 0 ++ .rept n8/2 ++ prerotation_innerloop ++ .endr ++ ++ fmxr FPSCR, OLDFPSCR ++ mov a1, OUT ++ bl ff_fft16_vfp ++ ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 ++ fmxr FPSCR, lr ++ ++ .set k, 0 ++ postrotation_innerloop , head ++ .rept n8/2 - 1 ++ postrotation_innerloop tail, head ++ .endr ++ postrotation_innerloop tail ++ ++ fmxr FPSCR, OLDFPSCR ++ vpop {s16-s27} ++ pop {v1-v5,pc} ++endfunc ++ ++ .unreq CONTEXT ++ .unreq ORIGOUT ++ .unreq IN ++ .unreq OUT ++ .unreq REVTAB ++ .unreq TCOS ++ .unreq TSIN ++ .unreq OLDFPSCR ++ .unreq J0 ++ .unreq J1 ++ .unreq J2 ++ .unreq J3 +-- +1.7.9.5 diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0046-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-d.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0046-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-d.patch new file mode 100644 index 0000000000..2e0ea7815f --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0046-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-d.patch @@ -0,0 +1,58 @@ +From 8e0babd84c7e03cf678aab8bcf7e2106fe2b3de6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Martin=20Storsj=C3=B6?= +Date: Fri, 19 Jul 2013 11:03:32 +0300 +Subject: [PATCH 45/49] [ffmpeg] - backport - arm: Add VFP-accelerated version + of dca_lfe_fir +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + + Before After + Mean StdDev Mean StdDev Change +This function 868.2 33.5 436.0 27.0 +99.1% +Overall 15973.0 223.2 15577.5 83.2 +2.5% + +Signed-off-by: Martin Storsjö +--- + libavcodec/arm/Makefile | 3 ++- + libavcodec/arm/dcadsp_init_arm.c | 4 ++++ + 2 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile +index 27e80d5..7fe5bb5 100644 +--- a/libavcodec/arm/Makefile ++++ b/libavcodec/arm/Makefile +@@ -58,7 +58,8 @@ ARMV6-OBJS += arm/dsputil_init_armv6.o \ + arm/dsputil_armv6.o \ + arm/simple_idct_armv6.o \ + +-VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_vfp.o ++VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \ ++ arm/synth_filter_vfp.o + VFP-OBJS-$(CONFIG_MDCT) += arm/mdct_vfp.o + VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o + +diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c +index 56568e0..a1efbff 100644 +--- a/libavcodec/arm/dcadsp_init_arm.c ++++ b/libavcodec/arm/dcadsp_init_arm.c +@@ -24,6 +24,8 @@ + #include "libavutil/attributes.h" + #include "libavcodec/dcadsp.h" + ++void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs, ++ int decifactor, float scale); + void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs, + int decifactor, float scale); + +@@ -31,6 +33,8 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s) + { + int cpu_flags = av_get_cpu_flags(); + ++ if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) ++ s->lfe_fir = ff_dca_lfe_fir_vfp; + if (have_neon(cpu_flags)) + s->lfe_fir = ff_dca_lfe_fir_neon; + } +-- +1.7.9.5 diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0047-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-f.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0047-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-f.patch new file mode 100644 index 0000000000..37284fd7ed --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0047-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-f.patch @@ -0,0 +1,339 @@ +From 018b74ea9d8f52788db18ed40838afca05e7b4df Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Martin=20Storsj=C3=B6?= +Date: Fri, 19 Jul 2013 11:23:57 +0300 +Subject: [PATCH 46/49] [ffmpeg] - backport - arm: Add VFP-accelerated version + of fft16 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + + Before After + Mean StdDev Mean StdDev Change +This function 1389.3 4.2 967.8 35.1 +43.6% +Overall 15577.5 83.2 15400.0 336.4 +1.2% + +Signed-off-by: Martin Storsjö +--- + libavcodec/arm/Makefile | 1 + + libavcodec/arm/fft_vfp.S | 298 +++++++++++++++++++++++++++++++++++ + 2 files changed, 299 insertions(+) + create mode 100644 libavcodec/arm/fft_vfp.S + +diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile +index 7fe5bb5..7390a8b 100644 +--- a/libavcodec/arm/Makefile ++++ b/libavcodec/arm/Makefile +@@ -60,6 +60,7 @@ ARMV6-OBJS += arm/dsputil_init_armv6.o \ + + VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \ + arm/synth_filter_vfp.o ++VFP-OBJS-$(CONFIG_FFT) += arm/fft_vfp.o + VFP-OBJS-$(CONFIG_MDCT) += arm/mdct_vfp.o + VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o + +diff --git a/libavcodec/arm/fft_vfp.S b/libavcodec/arm/fft_vfp.S +new file mode 100644 +index 0000000..7845ebb +--- /dev/null ++++ b/libavcodec/arm/fft_vfp.S +@@ -0,0 +1,298 @@ ++/* ++ * Copyright (c) 2013 RISC OS Open Ltd ++ * Author: Ben Avison ++ * ++ * This file is part of Libav. ++ * ++ * Libav is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * Libav is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with Libav; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "libavutil/arm/asm.S" ++ ++@ TODO: * FFTs wider than 16 ++@ * dispatch code ++ ++function fft4_vfp ++ vldr d0, [a1, #0*2*4] @ s0,s1 = z[0] ++ vldr d4, [a1, #1*2*4] @ s8,s9 = z[1] ++ vldr d1, [a1, #2*2*4] @ s2,s3 = z[2] ++ vldr d5, [a1, #3*2*4] @ s10,s11 = z[3] ++ @ stall ++ vadd.f s12, s0, s8 @ i0 ++ vadd.f s13, s1, s9 @ i1 ++ vadd.f s14, s2, s10 @ i2 ++ vadd.f s15, s3, s11 @ i3 ++ vsub.f s8, s0, s8 @ i4 ++ vsub.f s9, s1, s9 @ i5 ++ vsub.f s10, s2, s10 @ i6 ++ vsub.f s11, s3, s11 @ i7 ++ @ stall ++ @ stall ++ vadd.f s0, s12, s14 @ z[0].re ++ vsub.f s4, s12, s14 @ z[2].re ++ vadd.f s1, s13, s15 @ z[0].im ++ vsub.f s5, s13, s15 @ z[2].im ++ vadd.f s7, s9, s10 @ z[3].im ++ vsub.f s3, s9, s10 @ z[1].im ++ vadd.f s2, s8, s11 @ z[1].re ++ vsub.f s6, s8, s11 @ z[3].re ++ @ stall ++ @ stall ++ vstr d0, [a1, #0*2*4] ++ vstr d2, [a1, #2*2*4] ++ @ stall ++ @ stall ++ vstr d1, [a1, #1*2*4] ++ vstr d3, [a1, #3*2*4] ++ ++ bx lr ++endfunc ++ ++.macro macro_fft8_head ++ @ FFT4 ++ vldr d4, [a1, #0 * 2*4] ++ vldr d6, [a1, #1 * 2*4] ++ vldr d5, [a1, #2 * 2*4] ++ vldr d7, [a1, #3 * 2*4] ++ @ BF ++ vldr d12, [a1, #4 * 2*4] ++ vadd.f s16, s8, s12 @ vector op ++ vldr d14, [a1, #5 * 2*4] ++ vldr d13, [a1, #6 * 2*4] ++ vldr d15, [a1, #7 * 2*4] ++ vsub.f s20, s8, s12 @ vector op ++ vadd.f s0, s16, s18 ++ vsub.f s2, s16, s18 ++ vadd.f s1, s17, s19 ++ vsub.f s3, s17, s19 ++ vadd.f s7, s21, s22 ++ vsub.f s5, s21, s22 ++ vadd.f s4, s20, s23 ++ vsub.f s6, s20, s23 ++ vsub.f s20, s24, s28 @ vector op ++ vstr d0, [a1, #0 * 2*4] @ transfer s0-s7 to s24-s31 via memory ++ vstr d1, [a1, #1 * 2*4] ++ vldr s0, cos1pi4 ++ vadd.f s16, s24, s28 @ vector op ++ vstr d2, [a1, #2 * 2*4] ++ vstr d3, [a1, #3 * 2*4] ++ vldr d12, [a1, #0 * 2*4] ++ @ TRANSFORM ++ vmul.f s20, s20, s0 @ vector x scalar op ++ vldr d13, [a1, #1 * 2*4] ++ vldr d14, [a1, #2 * 2*4] ++ vldr d15, [a1, #3 * 2*4] ++ @ BUTTERFLIES ++ vadd.f s0, s18, s16 ++ vadd.f s1, s17, s19 ++ vsub.f s2, s17, s19 ++ vsub.f s3, s18, s16 ++ vadd.f s4, s21, s20 ++ vsub.f s5, s21, s20 ++ vadd.f s6, s22, s23 ++ vsub.f s7, s22, s23 ++ vadd.f s8, s0, s24 @ vector op ++ vstr d0, [a1, #0 * 2*4] @ transfer s0-s3 to s12-s15 via memory ++ vstr d1, [a1, #1 * 2*4] ++ vldr d6, [a1, #0 * 2*4] ++ vldr d7, [a1, #1 * 2*4] ++ vadd.f s1, s5, s6 ++ vadd.f s0, s7, s4 ++ vsub.f s2, s5, s6 ++ vsub.f s3, s7, s4 ++ vsub.f s12, s24, s12 @ vector op ++ vsub.f s5, s29, s1 ++ vsub.f s4, s28, s0 ++ vsub.f s6, s30, s2 ++ vsub.f s7, s31, s3 ++ vadd.f s16, s0, s28 @ vector op ++ vstr d6, [a1, #4 * 2*4] ++ vstr d7, [a1, #6 * 2*4] ++ vstr d4, [a1, #0 * 2*4] ++ vstr d5, [a1, #2 * 2*4] ++ vstr d2, [a1, #5 * 2*4] ++ vstr d3, [a1, #7 * 2*4] ++.endm ++ ++.macro macro_fft8_tail ++ vstr d8, [a1, #1 * 2*4] ++ vstr d9, [a1, #3 * 2*4] ++.endm ++ ++function fft8_vfp ++ ldr a3, =0x03030000 @ RunFast mode, vector length 4, stride 1 ++ fmrx a2, FPSCR ++ fmxr FPSCR, a3 ++ vpush {s16-s31} ++ ++ macro_fft8_head ++ macro_fft8_tail ++ ++ vpop {s16-s31} ++ fmxr FPSCR, a2 ++ bx lr ++endfunc ++ ++.align 3 ++cos1pi4: @ cos(1*pi/4) = sqrt(2) ++ .float 0.707106769084930419921875 ++cos1pi8: @ cos(1*pi/8) = sqrt(2+sqrt(2))/2 ++ .float 0.92387950420379638671875 ++cos3pi8: @ cos(2*pi/8) = sqrt(2-sqrt(2))/2 ++ .float 0.3826834261417388916015625 ++ ++function ff_fft16_vfp, export=1 ++ ldr a3, =0x03030000 @ RunFast mode, vector length 4, stride 1 ++ fmrx a2, FPSCR ++ fmxr FPSCR, a3 ++ vpush {s16-s31} ++ ++ macro_fft8_head ++ @ FFT4(z+8) ++ vldr d10, [a1, #8 * 2*4] ++ vldr d12, [a1, #9 * 2*4] ++ vldr d11, [a1, #10 * 2*4] ++ vldr d13, [a1, #11 * 2*4] ++ macro_fft8_tail ++ vadd.f s16, s20, s24 @ vector op ++ @ FFT4(z+12) ++ vldr d4, [a1, #12 * 2*4] ++ vldr d6, [a1, #13 * 2*4] ++ vldr d5, [a1, #14 * 2*4] ++ vsub.f s20, s20, s24 @ vector op ++ vldr d7, [a1, #15 * 2*4] ++ vadd.f s0, s16, s18 ++ vsub.f s4, s16, s18 ++ vadd.f s1, s17, s19 ++ vsub.f s5, s17, s19 ++ vadd.f s7, s21, s22 ++ vsub.f s3, s21, s22 ++ vadd.f s2, s20, s23 ++ vsub.f s6, s20, s23 ++ vadd.f s16, s8, s12 @ vector op ++ vstr d0, [a1, #8 * 2*4] ++ vstr d2, [a1, #10 * 2*4] ++ vstr d1, [a1, #9 * 2*4] ++ vsub.f s20, s8, s12 ++ vstr d3, [a1, #11 * 2*4] ++ @ TRANSFORM(z[2],z[6],z[10],z[14],cos1pi4,cos1pi4) ++ vldr d12, [a1, #10 * 2*4] ++ vadd.f s0, s16, s18 ++ vadd.f s1, s17, s19 ++ vsub.f s6, s16, s18 ++ vsub.f s7, s17, s19 ++ vsub.f s3, s21, s22 ++ vadd.f s2, s20, s23 ++ vadd.f s5, s21, s22 ++ vsub.f s4, s20, s23 ++ vstr d0, [a1, #12 * 2*4] ++ vmov s0, s6 ++ @ TRANSFORM(z[1],z[5],z[9],z[13],cos1pi8,cos3pi8) ++ vldr d6, [a1, #9 * 2*4] ++ vstr d1, [a1, #13 * 2*4] ++ vldr d1, cos1pi4 @ s2 = cos1pi4, s3 = cos1pi8 ++ vstr d2, [a1, #15 * 2*4] ++ vldr d7, [a1, #13 * 2*4] ++ vadd.f s4, s25, s24 ++ vsub.f s5, s25, s24 ++ vsub.f s6, s0, s7 ++ vadd.f s7, s0, s7 ++ vmul.f s20, s12, s3 @ vector op ++ @ TRANSFORM(z[3],z[7],z[11],z[15],cos3pi8,cos1pi8) ++ vldr d4, [a1, #11 * 2*4] ++ vldr d5, [a1, #15 * 2*4] ++ vldr s1, cos3pi8 ++ vmul.f s24, s4, s2 @ vector * scalar op ++ vmul.f s28, s12, s1 @ vector * scalar op ++ vmul.f s12, s8, s1 @ vector * scalar op ++ vadd.f s4, s20, s29 ++ vsub.f s5, s21, s28 ++ vsub.f s6, s22, s31 ++ vadd.f s7, s23, s30 ++ vmul.f s8, s8, s3 @ vector * scalar op ++ vldr d8, [a1, #1 * 2*4] ++ vldr d9, [a1, #5 * 2*4] ++ vldr d10, [a1, #3 * 2*4] ++ vldr d11, [a1, #7 * 2*4] ++ vldr d14, [a1, #2 * 2*4] ++ vadd.f s0, s6, s4 ++ vadd.f s1, s5, s7 ++ vsub.f s2, s5, s7 ++ vsub.f s3, s6, s4 ++ vadd.f s4, s12, s9 ++ vsub.f s5, s13, s8 ++ vsub.f s6, s14, s11 ++ vadd.f s7, s15, s10 ++ vadd.f s12, s0, s16 @ vector op ++ vstr d0, [a1, #1 * 2*4] ++ vstr d1, [a1, #5 * 2*4] ++ vldr d4, [a1, #1 * 2*4] ++ vldr d5, [a1, #5 * 2*4] ++ vadd.f s0, s6, s4 ++ vadd.f s1, s5, s7 ++ vsub.f s2, s5, s7 ++ vsub.f s3, s6, s4 ++ vsub.f s8, s16, s8 @ vector op ++ vstr d6, [a1, #1 * 2*4] ++ vstr d7, [a1, #5 * 2*4] ++ vldr d15, [a1, #6 * 2*4] ++ vsub.f s4, s20, s0 ++ vsub.f s5, s21, s1 ++ vsub.f s6, s22, s2 ++ vsub.f s7, s23, s3 ++ vadd.f s20, s0, s20 @ vector op ++ vstr d4, [a1, #9 * 2*4] ++ @ TRANSFORM_ZERO(z[0],z[4],z[8],z[12]) ++ vldr d6, [a1, #8 * 2*4] ++ vstr d5, [a1, #13 * 2*4] ++ vldr d7, [a1, #12 * 2*4] ++ vstr d2, [a1, #11 * 2*4] ++ vldr d8, [a1, #0 * 2*4] ++ vstr d3, [a1, #15 * 2*4] ++ vldr d9, [a1, #4 * 2*4] ++ vadd.f s0, s26, s24 ++ vadd.f s1, s25, s27 ++ vsub.f s2, s25, s27 ++ vsub.f s3, s26, s24 ++ vadd.f s4, s14, s12 ++ vadd.f s5, s13, s15 ++ vsub.f s6, s13, s15 ++ vsub.f s7, s14, s12 ++ vadd.f s8, s0, s28 @ vector op ++ vstr d0, [a1, #3 * 2*4] ++ vstr d1, [a1, #7 * 2*4] ++ vldr d6, [a1, #3 * 2*4] ++ vldr d7, [a1, #7 * 2*4] ++ vsub.f s0, s16, s4 ++ vsub.f s1, s17, s5 ++ vsub.f s2, s18, s6 ++ vsub.f s3, s19, s7 ++ vsub.f s12, s28, s12 @ vector op ++ vadd.f s16, s4, s16 @ vector op ++ vstr d10, [a1, #3 * 2*4] ++ vstr d11, [a1, #7 * 2*4] ++ vstr d4, [a1, #2 * 2*4] ++ vstr d5, [a1, #6 * 2*4] ++ vstr d0, [a1, #8 * 2*4] ++ vstr d1, [a1, #12 * 2*4] ++ vstr d6, [a1, #10 * 2*4] ++ vstr d7, [a1, #14 * 2*4] ++ vstr d8, [a1, #0 * 2*4] ++ vstr d9, [a1, #4 * 2*4] ++ ++ vpop {s16-s31} ++ fmxr FPSCR, a2 ++ bx lr ++endfunc +-- +1.7.9.5 diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0048-ffmpeg-backport-dcadsp-Add-a-new-method-qmf_32_subba.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0048-ffmpeg-backport-dcadsp-Add-a-new-method-qmf_32_subba.patch new file mode 100644 index 0000000000..1fdb8c0ae9 --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0048-ffmpeg-backport-dcadsp-Add-a-new-method-qmf_32_subba.patch @@ -0,0 +1,140 @@ +From ed16009b0a05fbd344832d5ad2e982c169aec42c Mon Sep 17 00:00:00 2001 +From: Ben Avison +Date: Mon, 15 Jul 2013 18:28:16 +0100 +Subject: [PATCH 47/49] [ffmpeg] - backport - dcadsp: Add a new method, + qmf_32_subbands +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This does most of the work formerly carried out by +the static function qmf_32_subbands() in dcadec.c. + +Signed-off-by: Martin Storsjö +--- + libavcodec/dcadec.c | 26 +++++--------------------- + libavcodec/dcadsp.c | 30 ++++++++++++++++++++++++++++++ + libavcodec/dcadsp.h | 9 +++++++++ + 3 files changed, 44 insertions(+), 21 deletions(-) + +diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c +index b648613..4054d63 100644 +--- a/libavcodec/dcadec.c ++++ b/libavcodec/dcadec.c +@@ -1108,10 +1108,8 @@ static void qmf_32_subbands(DCAContext *s, int chans, + float scale) + { + const float *prCoeff; +- int i; + + int sb_act = s->subband_activity[chans]; +- int subindex; + + scale *= sqrt(1 / 8.0); + +@@ -1121,25 +1119,11 @@ static void qmf_32_subbands(DCAContext *s, int chans, + else /* Perfect reconstruction */ + prCoeff = fir_32bands_perfect; + +- for (i = sb_act; i < 32; i++) +- s->raXin[i] = 0.0; +- +- /* Reconstructed channel sample index */ +- for (subindex = 0; subindex < 8; subindex++) { +- /* Load in one sample from each subband and clear inactive subbands */ +- for (i = 0; i < sb_act; i++) { +- unsigned sign = (i - 1) & 2; +- uint32_t v = AV_RN32A(&samples_in[i][subindex]) ^ sign << 30; +- AV_WN32A(&s->raXin[i], v); +- } +- +- s->synth.synth_filter_float(&s->imdct, +- s->subband_fir_hist[chans], +- &s->hist_index[chans], +- s->subband_fir_noidea[chans], prCoeff, +- samples_out, s->raXin, scale); +- samples_out += 32; +- } ++ s->dcadsp.qmf_32_subbands(samples_in, sb_act, &s->synth, &s->imdct, ++ s->subband_fir_hist[chans], ++ &s->hist_index[chans], ++ s->subband_fir_noidea[chans], prCoeff, ++ samples_out, s->raXin, scale); + } + + static void lfe_interpolation_fir(DCAContext *s, int decimation_select, +diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c +index dd4994d..ab63f1b 100644 +--- a/libavcodec/dcadsp.c ++++ b/libavcodec/dcadsp.c +@@ -20,6 +20,7 @@ + */ + + #include "config.h" ++#include "libavutil/intreadwrite.h" + #include "dcadsp.h" + + static void dca_lfe_fir_c(float *out, const float *in, const float *coefs, +@@ -44,8 +45,37 @@ static void dca_lfe_fir_c(float *out, const float *in, const float *coefs, + } + } + ++static void dca_qmf_32_subbands(float samples_in[32][8], int sb_act, ++ SynthFilterContext *synth, FFTContext *imdct, ++ float synth_buf_ptr[512], ++ int *synth_buf_offset, float synth_buf2[32], ++ const float window[512], float *samples_out, ++ float raXin[32], float scale) ++{ ++ int i; ++ int subindex; ++ ++ for (i = sb_act; i < 32; i++) ++ raXin[i] = 0.0; ++ ++ /* Reconstructed channel sample index */ ++ for (subindex = 0; subindex < 8; subindex++) { ++ /* Load in one sample from each subband and clear inactive subbands */ ++ for (i = 0; i < sb_act; i++) { ++ unsigned sign = (i - 1) & 2; ++ uint32_t v = AV_RN32A(&samples_in[i][subindex]) ^ sign << 30; ++ AV_WN32A(&raXin[i], v); ++ } ++ ++ synth->synth_filter_float(imdct, synth_buf_ptr, synth_buf_offset, ++ synth_buf2, window, samples_out, raXin, scale); ++ samples_out += 32; ++ } ++} ++ + void ff_dcadsp_init(DCADSPContext *s) + { + s->lfe_fir = dca_lfe_fir_c; ++ s->qmf_32_subbands = dca_qmf_32_subbands; + if (ARCH_ARM) ff_dcadsp_init_arm(s); + } +diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h +index bb157f7..d86c1f3 100644 +--- a/libavcodec/dcadsp.h ++++ b/libavcodec/dcadsp.h +@@ -19,9 +19,18 @@ + #ifndef AVCODEC_DCADSP_H + #define AVCODEC_DCADSP_H + ++#include "avfft.h" ++#include "synth_filter.h" ++ + typedef struct DCADSPContext { + void (*lfe_fir)(float *out, const float *in, const float *coefs, + int decifactor, float scale); ++ void (*qmf_32_subbands)(float samples_in[32][8], int sb_act, ++ SynthFilterContext *synth, FFTContext *imdct, ++ float synth_buf_ptr[512], ++ int *synth_buf_offset, float synth_buf2[32], ++ const float window[512], float *samples_out, ++ float raXin[32], float scale); + } DCADSPContext; + + void ff_dcadsp_init(DCADSPContext *s); +-- +1.7.9.5 diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0049-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-q.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0049-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-q.patch new file mode 100644 index 0000000000..72e0ff21d4 --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0049-ffmpeg-backport-arm-Add-VFP-accelerated-version-of-q.patch @@ -0,0 +1,551 @@ +From a6c273927c5bb212e806be6ae10c81dcd81b2152 Mon Sep 17 00:00:00 2001 +From: Ben Avison +Date: Mon, 15 Jul 2013 18:28:17 +0100 +Subject: [PATCH 48/49] [ffmpeg] - backport - arm: Add VFP-accelerated version + of qmf_32_subbands +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + + Before After + Mean StdDev Mean StdDev Change +This function 1323.0 98.0 746.2 60.6 +77.3% +Overall 15400.0 336.4 14147.5 288.4 +8.9% + +Signed-off-by: Martin Storsjö +--- + libavcodec/arm/dcadsp_init_arm.c | 10 +- + libavcodec/arm/dcadsp_vfp.S | 493 +++++++++++++++++++++++++++ + 2 files changed, 502 insertions(+), 1 deletion(-) + create mode 100644 libavcodec/arm/dcadsp_vfp.S + +diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c +index a1efbff..58267a2 100644 +--- a/libavcodec/arm/dcadsp_init_arm.c ++++ b/libavcodec/arm/dcadsp_init_arm.c +@@ -26,6 +26,12 @@ + + void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs, + int decifactor, float scale); ++void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act, ++ SynthFilterContext *synth, FFTContext *imdct, ++ float synth_buf_ptr[512], ++ int *synth_buf_offset, float synth_buf2[32], ++ const float window[512], float *samples_out, ++ float raXin[32], float scale); + void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs, + int decifactor, float scale); + +@@ -33,8 +39,10 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s) + { + int cpu_flags = av_get_cpu_flags(); + +- if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) ++ if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) { + s->lfe_fir = ff_dca_lfe_fir_vfp; ++ s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp; ++ } + if (have_neon(cpu_flags)) + s->lfe_fir = ff_dca_lfe_fir_neon; + } +diff --git a/libavcodec/arm/dcadsp_vfp.S b/libavcodec/arm/dcadsp_vfp.S +new file mode 100644 +index 0000000..6039e87 +--- /dev/null ++++ b/libavcodec/arm/dcadsp_vfp.S +@@ -0,0 +1,493 @@ ++/* ++ * Copyright (c) 2013 RISC OS Open Ltd ++ * Author: Ben Avison ++ * ++ * This file is part of Libav. ++ * ++ * Libav is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * Libav is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with Libav; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "libavutil/arm/asm.S" ++ ++POUT .req a1 ++PIN .req a2 ++PCOEF .req a3 ++DECIFACTOR .req a4 ++OLDFPSCR .req a4 ++COUNTER .req ip ++ ++SCALE32 .req s28 @ use vector of 4 in place of 9th scalar when decifactor=32 / JMAX=8 ++SCALE64 .req s0 @ spare register in scalar bank when decifactor=64 / JMAX=4 ++IN0 .req s4 ++IN1 .req s5 ++IN2 .req s6 ++IN3 .req s7 ++IN4 .req s0 ++IN5 .req s1 ++IN6 .req s2 ++IN7 .req s3 ++COEF0 .req s8 @ coefficient elements ++COEF1 .req s9 ++COEF2 .req s10 ++COEF3 .req s11 ++COEF4 .req s12 ++COEF5 .req s13 ++COEF6 .req s14 ++COEF7 .req s15 ++ACCUM0 .req s16 @ double-buffered multiply-accumulate results ++ACCUM4 .req s20 ++POST0 .req s24 @ do long-latency post-multiply in this vector in parallel ++POST1 .req s25 ++POST2 .req s26 ++POST3 .req s27 ++ ++ ++.macro inner_loop decifactor, dir, tail, head ++ .ifc "\dir","up" ++ .set X, 0 ++ .set Y, 4 ++ .else ++ .set X, 4*JMAX*4 - 4 ++ .set Y, -4 ++ .endif ++ .ifnc "\head","" ++ vldr COEF0, [PCOEF, #X + (0*JMAX + 0) * Y] ++ vldr COEF1, [PCOEF, #X + (1*JMAX + 0) * Y] ++ vldr COEF2, [PCOEF, #X + (2*JMAX + 0) * Y] ++ vldr COEF3, [PCOEF, #X + (3*JMAX + 0) * Y] ++ .endif ++ .ifnc "\tail","" ++ vadd.f POST0, ACCUM0, ACCUM4 @ vector operation ++ .endif ++ .ifnc "\head","" ++ vmul.f ACCUM0, COEF0, IN0 @ vector = vector * scalar ++ vldr COEF4, [PCOEF, #X + (0*JMAX + 1) * Y] ++ vldr COEF5, [PCOEF, #X + (1*JMAX + 1) * Y] ++ vldr COEF6, [PCOEF, #X + (2*JMAX + 1) * Y] ++ .endif ++ .ifnc "\tail","" ++ vmul.f POST0, POST0, SCALE\decifactor @ vector operation (SCALE may be scalar) ++ .endif ++ .ifnc "\head","" ++ vldr COEF7, [PCOEF, #X + (3*JMAX + 1) * Y] ++ .ifc "\tail","" ++ vmul.f ACCUM4, COEF4, IN1 @ vector operation ++ .endif ++ vldr COEF0, [PCOEF, #X + (0*JMAX + 2) * Y] ++ vldr COEF1, [PCOEF, #X + (1*JMAX + 2) * Y] ++ .ifnc "\tail","" ++ vmul.f ACCUM4, COEF4, IN1 @ vector operation ++ .endif ++ vldr COEF2, [PCOEF, #X + (2*JMAX + 2) * Y] ++ vldr COEF3, [PCOEF, #X + (3*JMAX + 2) * Y] ++ .endif ++ .ifnc "\tail","" ++ vstmia POUT!, {POST0-POST3} ++ .endif ++ .ifnc "\head","" ++ vmla.f ACCUM0, COEF0, IN2 @ vector = vector * scalar ++ vldr COEF4, [PCOEF, #X + (0*JMAX + 3) * Y] ++ vldr COEF5, [PCOEF, #X + (1*JMAX + 3) * Y] ++ vldr COEF6, [PCOEF, #X + (2*JMAX + 3) * Y] ++ vldr COEF7, [PCOEF, #X + (3*JMAX + 3) * Y] ++ vmla.f ACCUM4, COEF4, IN3 @ vector = vector * scalar ++ .if \decifactor == 32 ++ vldr COEF0, [PCOEF, #X + (0*JMAX + 4) * Y] ++ vldr COEF1, [PCOEF, #X + (1*JMAX + 4) * Y] ++ vldr COEF2, [PCOEF, #X + (2*JMAX + 4) * Y] ++ vldr COEF3, [PCOEF, #X + (3*JMAX + 4) * Y] ++ vmla.f ACCUM0, COEF0, IN4 @ vector = vector * scalar ++ vldr COEF4, [PCOEF, #X + (0*JMAX + 5) * Y] ++ vldr COEF5, [PCOEF, #X + (1*JMAX + 5) * Y] ++ vldr COEF6, [PCOEF, #X + (2*JMAX + 5) * Y] ++ vldr COEF7, [PCOEF, #X + (3*JMAX + 5) * Y] ++ vmla.f ACCUM4, COEF4, IN5 @ vector = vector * scalar ++ vldr COEF0, [PCOEF, #X + (0*JMAX + 6) * Y] ++ vldr COEF1, [PCOEF, #X + (1*JMAX + 6) * Y] ++ vldr COEF2, [PCOEF, #X + (2*JMAX + 6) * Y] ++ vldr COEF3, [PCOEF, #X + (3*JMAX + 6) * Y] ++ vmla.f ACCUM0, COEF0, IN6 @ vector = vector * scalar ++ vldr COEF4, [PCOEF, #X + (0*JMAX + 7) * Y] ++ vldr COEF5, [PCOEF, #X + (1*JMAX + 7) * Y] ++ vldr COEF6, [PCOEF, #X + (2*JMAX + 7) * Y] ++ vldr COEF7, [PCOEF, #X + (3*JMAX + 7) * Y] ++ vmla.f ACCUM4, COEF4, IN7 @ vector = vector * scalar ++ .endif ++ .endif ++.endm ++ ++.macro dca_lfe_fir decifactor ++ .if \decifactor == 32 ++ .set JMAX, 8 ++ vpush {s16-s31} ++ vmov SCALE32, s0 @ duplicate scalar across vector ++ vldr IN4, [PIN, #-4*4] ++ vldr IN5, [PIN, #-5*4] ++ vldr IN6, [PIN, #-6*4] ++ vldr IN7, [PIN, #-7*4] ++ .else ++ .set JMAX, 4 ++ vpush {s16-s27} ++ .endif ++ ++ mov COUNTER, #\decifactor/4 - 1 ++ inner_loop \decifactor, up,, head ++1: add PCOEF, PCOEF, #4*JMAX*4 ++ subs COUNTER, COUNTER, #1 ++ inner_loop \decifactor, up, tail, head ++ bne 1b ++ inner_loop \decifactor, up, tail ++ ++ mov COUNTER, #\decifactor/4 - 1 ++ inner_loop \decifactor, down,, head ++1: sub PCOEF, PCOEF, #4*JMAX*4 ++ subs COUNTER, COUNTER, #1 ++ inner_loop \decifactor, down, tail, head ++ bne 1b ++ inner_loop \decifactor, down, tail ++ ++ .if \decifactor == 32 ++ vpop {s16-s31} ++ .else ++ vpop {s16-s27} ++ .endif ++ fmxr FPSCR, OLDFPSCR ++ bx lr ++.endm ++ ++ ++/* void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs, ++ * int decifactor, float scale) ++ */ ++function ff_dca_lfe_fir_vfp, export=1 ++ teq DECIFACTOR, #32 ++ fmrx OLDFPSCR, FPSCR ++ ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 ++ fmxr FPSCR, ip ++NOVFP vldr s0, [sp] ++ vldr IN0, [PIN, #-0*4] ++ vldr IN1, [PIN, #-1*4] ++ vldr IN2, [PIN, #-2*4] ++ vldr IN3, [PIN, #-3*4] ++ beq 32f ++64: dca_lfe_fir 64 ++ .ltorg ++32: dca_lfe_fir 32 ++endfunc ++ ++ .unreq POUT ++ .unreq PIN ++ .unreq PCOEF ++ .unreq DECIFACTOR ++ .unreq OLDFPSCR ++ .unreq COUNTER ++ ++ .unreq SCALE32 ++ .unreq SCALE64 ++ .unreq IN0 ++ .unreq IN1 ++ .unreq IN2 ++ .unreq IN3 ++ .unreq IN4 ++ .unreq IN5 ++ .unreq IN6 ++ .unreq IN7 ++ .unreq COEF0 ++ .unreq COEF1 ++ .unreq COEF2 ++ .unreq COEF3 ++ .unreq COEF4 ++ .unreq COEF5 ++ .unreq COEF6 ++ .unreq COEF7 ++ .unreq ACCUM0 ++ .unreq ACCUM4 ++ .unreq POST0 ++ .unreq POST1 ++ .unreq POST2 ++ .unreq POST3 ++ ++ ++IN .req a1 ++SBACT .req a2 ++OLDFPSCR .req a3 ++IMDCT .req a4 ++WINDOW .req v1 ++OUT .req v2 ++BUF .req v3 ++SCALEINT .req v4 @ only used in softfp case ++COUNT .req v5 ++ ++SCALE .req s0 ++ ++/* Stack layout differs in softfp and hardfp cases: ++ * ++ * hardfp ++ * fp -> 6 arg words saved by caller ++ * a3,a4,v1-v3,v5,fp,lr on entry (a3 just to pad to 8 bytes) ++ * s16-s23 on entry ++ * align 16 ++ * buf -> 8*32*4 bytes buffer ++ * s0 on entry ++ * sp -> 3 arg words for callee ++ * ++ * softfp ++ * fp -> 7 arg words saved by caller ++ * a4,v1-v5,fp,lr on entry ++ * s16-s23 on entry ++ * align 16 ++ * buf -> 8*32*4 bytes buffer ++ * sp -> 4 arg words for callee ++ */ ++ ++/* void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act, ++ * SynthFilterContext *synth, FFTContext *imdct, ++ * float (*synth_buf_ptr)[512], ++ * int *synth_buf_offset, float (*synth_buf2)[32], ++ * const float (*window)[512], float *samples_out, ++ * float (*raXin)[32], float scale); ++ */ ++function ff_dca_qmf_32_subbands_vfp, export=1 ++VFP push {a3-a4,v1-v3,v5,fp,lr} ++NOVFP push {a4,v1-v5,fp,lr} ++ add fp, sp, #8*4 ++ vpush {s16-s23} ++ @ The buffer pointed at by raXin isn't big enough for us to do a ++ @ complete matrix transposition as we want to, so allocate an ++ @ alternative buffer from the stack. Align to 4 words for speed. ++ sub BUF, sp, #8*32*4 ++ bic BUF, BUF, #15 ++ mov sp, BUF ++ ldr lr, =0x03330000 @ RunFast mode, short vectors of length 4, stride 2 ++ fmrx OLDFPSCR, FPSCR ++ fmxr FPSCR, lr ++ @ COUNT is used to count down 2 things at once: ++ @ bits 0-4 are the number of word pairs remaining in the output row ++ @ bits 5-31 are the number of words to copy (with possible negation) ++ @ from the source matrix before we start zeroing the remainder ++ mov COUNT, #(-4 << 5) + 16 ++ adds COUNT, COUNT, SBACT, lsl #5 ++ bmi 2f ++1: ++ vldr s8, [IN, #(0*8+0)*4] ++ vldr s10, [IN, #(0*8+1)*4] ++ vldr s12, [IN, #(0*8+2)*4] ++ vldr s14, [IN, #(0*8+3)*4] ++ vldr s16, [IN, #(0*8+4)*4] ++ vldr s18, [IN, #(0*8+5)*4] ++ vldr s20, [IN, #(0*8+6)*4] ++ vldr s22, [IN, #(0*8+7)*4] ++ vneg.f s8, s8 ++ vldr s9, [IN, #(1*8+0)*4] ++ vldr s11, [IN, #(1*8+1)*4] ++ vldr s13, [IN, #(1*8+2)*4] ++ vldr s15, [IN, #(1*8+3)*4] ++ vneg.f s16, s16 ++ vldr s17, [IN, #(1*8+4)*4] ++ vldr s19, [IN, #(1*8+5)*4] ++ vldr s21, [IN, #(1*8+6)*4] ++ vldr s23, [IN, #(1*8+7)*4] ++ vstr d4, [BUF, #(0*32+0)*4] ++ vstr d5, [BUF, #(1*32+0)*4] ++ vstr d6, [BUF, #(2*32+0)*4] ++ vstr d7, [BUF, #(3*32+0)*4] ++ vstr d8, [BUF, #(4*32+0)*4] ++ vstr d9, [BUF, #(5*32+0)*4] ++ vstr d10, [BUF, #(6*32+0)*4] ++ vstr d11, [BUF, #(7*32+0)*4] ++ vldr s9, [IN, #(3*8+0)*4] ++ vldr s11, [IN, #(3*8+1)*4] ++ vldr s13, [IN, #(3*8+2)*4] ++ vldr s15, [IN, #(3*8+3)*4] ++ vldr s17, [IN, #(3*8+4)*4] ++ vldr s19, [IN, #(3*8+5)*4] ++ vldr s21, [IN, #(3*8+6)*4] ++ vldr s23, [IN, #(3*8+7)*4] ++ vneg.f s9, s9 ++ vldr s8, [IN, #(2*8+0)*4] ++ vldr s10, [IN, #(2*8+1)*4] ++ vldr s12, [IN, #(2*8+2)*4] ++ vldr s14, [IN, #(2*8+3)*4] ++ vneg.f s17, s17 ++ vldr s16, [IN, #(2*8+4)*4] ++ vldr s18, [IN, #(2*8+5)*4] ++ vldr s20, [IN, #(2*8+6)*4] ++ vldr s22, [IN, #(2*8+7)*4] ++ vstr d4, [BUF, #(0*32+2)*4] ++ vstr d5, [BUF, #(1*32+2)*4] ++ vstr d6, [BUF, #(2*32+2)*4] ++ vstr d7, [BUF, #(3*32+2)*4] ++ vstr d8, [BUF, #(4*32+2)*4] ++ vstr d9, [BUF, #(5*32+2)*4] ++ vstr d10, [BUF, #(6*32+2)*4] ++ vstr d11, [BUF, #(7*32+2)*4] ++ add IN, IN, #4*8*4 ++ add BUF, BUF, #4*4 ++ subs COUNT, COUNT, #(4 << 5) + 2 ++ bpl 1b ++2: @ Now deal with trailing < 4 samples ++ adds COUNT, COUNT, #3 << 5 ++ bmi 4f @ sb_act was a multiple of 4 ++ bics lr, COUNT, #0x1F ++ bne 3f ++ @ sb_act was n*4+1 ++ vldr s8, [IN, #(0*8+0)*4] ++ vldr s10, [IN, #(0*8+1)*4] ++ vldr s12, [IN, #(0*8+2)*4] ++ vldr s14, [IN, #(0*8+3)*4] ++ vldr s16, [IN, #(0*8+4)*4] ++ vldr s18, [IN, #(0*8+5)*4] ++ vldr s20, [IN, #(0*8+6)*4] ++ vldr s22, [IN, #(0*8+7)*4] ++ vneg.f s8, s8 ++ vldr s9, zero ++ vldr s11, zero ++ vldr s13, zero ++ vldr s15, zero ++ vneg.f s16, s16 ++ vldr s17, zero ++ vldr s19, zero ++ vldr s21, zero ++ vldr s23, zero ++ vstr d4, [BUF, #(0*32+0)*4] ++ vstr d5, [BUF, #(1*32+0)*4] ++ vstr d6, [BUF, #(2*32+0)*4] ++ vstr d7, [BUF, #(3*32+0)*4] ++ vstr d8, [BUF, #(4*32+0)*4] ++ vstr d9, [BUF, #(5*32+0)*4] ++ vstr d10, [BUF, #(6*32+0)*4] ++ vstr d11, [BUF, #(7*32+0)*4] ++ add BUF, BUF, #2*4 ++ sub COUNT, COUNT, #1 ++ b 4f ++3: @ sb_act was n*4+2 or n*4+3, so do the first 2 ++ vldr s8, [IN, #(0*8+0)*4] ++ vldr s10, [IN, #(0*8+1)*4] ++ vldr s12, [IN, #(0*8+2)*4] ++ vldr s14, [IN, #(0*8+3)*4] ++ vldr s16, [IN, #(0*8+4)*4] ++ vldr s18, [IN, #(0*8+5)*4] ++ vldr s20, [IN, #(0*8+6)*4] ++ vldr s22, [IN, #(0*8+7)*4] ++ vneg.f s8, s8 ++ vldr s9, [IN, #(1*8+0)*4] ++ vldr s11, [IN, #(1*8+1)*4] ++ vldr s13, [IN, #(1*8+2)*4] ++ vldr s15, [IN, #(1*8+3)*4] ++ vneg.f s16, s16 ++ vldr s17, [IN, #(1*8+4)*4] ++ vldr s19, [IN, #(1*8+5)*4] ++ vldr s21, [IN, #(1*8+6)*4] ++ vldr s23, [IN, #(1*8+7)*4] ++ vstr d4, [BUF, #(0*32+0)*4] ++ vstr d5, [BUF, #(1*32+0)*4] ++ vstr d6, [BUF, #(2*32+0)*4] ++ vstr d7, [BUF, #(3*32+0)*4] ++ vstr d8, [BUF, #(4*32+0)*4] ++ vstr d9, [BUF, #(5*32+0)*4] ++ vstr d10, [BUF, #(6*32+0)*4] ++ vstr d11, [BUF, #(7*32+0)*4] ++ add BUF, BUF, #2*4 ++ sub COUNT, COUNT, #(2 << 5) + 1 ++ bics lr, COUNT, #0x1F ++ bne 4f ++ @ sb_act was n*4+3 ++ vldr s8, [IN, #(2*8+0)*4] ++ vldr s10, [IN, #(2*8+1)*4] ++ vldr s12, [IN, #(2*8+2)*4] ++ vldr s14, [IN, #(2*8+3)*4] ++ vldr s16, [IN, #(2*8+4)*4] ++ vldr s18, [IN, #(2*8+5)*4] ++ vldr s20, [IN, #(2*8+6)*4] ++ vldr s22, [IN, #(2*8+7)*4] ++ vldr s9, zero ++ vldr s11, zero ++ vldr s13, zero ++ vldr s15, zero ++ vldr s17, zero ++ vldr s19, zero ++ vldr s21, zero ++ vldr s23, zero ++ vstr d4, [BUF, #(0*32+0)*4] ++ vstr d5, [BUF, #(1*32+0)*4] ++ vstr d6, [BUF, #(2*32+0)*4] ++ vstr d7, [BUF, #(3*32+0)*4] ++ vstr d8, [BUF, #(4*32+0)*4] ++ vstr d9, [BUF, #(5*32+0)*4] ++ vstr d10, [BUF, #(6*32+0)*4] ++ vstr d11, [BUF, #(7*32+0)*4] ++ add BUF, BUF, #2*4 ++ sub COUNT, COUNT, #1 ++4: @ Now fill the remainder with 0 ++ vldr s8, zero ++ vldr s9, zero ++ ands COUNT, COUNT, #0x1F ++ beq 6f ++5: vstr d4, [BUF, #(0*32+0)*4] ++ vstr d4, [BUF, #(1*32+0)*4] ++ vstr d4, [BUF, #(2*32+0)*4] ++ vstr d4, [BUF, #(3*32+0)*4] ++ vstr d4, [BUF, #(4*32+0)*4] ++ vstr d4, [BUF, #(5*32+0)*4] ++ vstr d4, [BUF, #(6*32+0)*4] ++ vstr d4, [BUF, #(7*32+0)*4] ++ add BUF, BUF, #2*4 ++ subs COUNT, COUNT, #1 ++ bne 5b ++6: ++ fmxr FPSCR, OLDFPSCR ++ ldr WINDOW, [fp, #3*4] ++ ldr OUT, [fp, #4*4] ++ sub BUF, BUF, #32*4 ++NOVFP ldr SCALEINT, [fp, #6*4] ++ mov COUNT, #8 ++VFP vpush {SCALE} ++VFP sub sp, sp, #3*4 ++NOVFP sub sp, sp, #4*4 ++7: ++VFP ldr a1, [fp, #-7*4] @ imdct ++NOVFP ldr a1, [fp, #-8*4] ++ ldmia fp, {a2-a4} ++VFP stmia sp, {WINDOW, OUT, BUF} ++NOVFP stmia sp, {WINDOW, OUT, BUF, SCALEINT} ++VFP vldr SCALE, [sp, #3*4] ++ bl ff_synth_filter_float_vfp ++ add OUT, OUT, #32*4 ++ add BUF, BUF, #32*4 ++ subs COUNT, COUNT, #1 ++ bne 7b ++ ++A sub sp, fp, #(8+8)*4 ++T sub fp, fp, #(8+8)*4 ++T mov sp, fp ++ vpop {s16-s23} ++VFP pop {a3-a4,v1-v3,v5,fp,pc} ++NOVFP pop {a4,v1-v5,fp,pc} ++endfunc ++ ++ .unreq IN ++ .unreq SBACT ++ .unreq OLDFPSCR ++ .unreq IMDCT ++ .unreq WINDOW ++ .unreq OUT ++ .unreq BUF ++ .unreq SCALEINT ++ .unreq COUNT ++ ++ .unreq SCALE ++ ++ .align 2 ++zero: .word 0 +-- +1.7.9.5 diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0050-ffmpeg-backport-arm-Mangle-external-symbols-properly.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0050-ffmpeg-backport-arm-Mangle-external-symbols-properly.patch new file mode 100644 index 0000000000..1b64e2bdb2 --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0050-ffmpeg-backport-arm-Mangle-external-symbols-properly.patch @@ -0,0 +1,64 @@ +From 101f5a2c5db12605c24fe4aa41b3fabacfd3bad3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Martin=20Storsj=C3=B6?= +Date: Mon, 22 Jul 2013 12:33:22 +0300 +Subject: [PATCH 49/49] [ffmpeg] - backport - arm: Mangle external symbols + properly in new vfp assembly files + +Reviewed-by: Kostya Shishkov +Signed-off-by: Michael Niedermayer +--- + libavcodec/arm/dcadsp_vfp.S | 2 +- + libavcodec/arm/mdct_vfp.S | 4 ++-- + libavcodec/arm/synth_filter_vfp.S | 2 +- + 3 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/libavcodec/arm/dcadsp_vfp.S b/libavcodec/arm/dcadsp_vfp.S +index 6039e87..5892a84 100644 +--- a/libavcodec/arm/dcadsp_vfp.S ++++ b/libavcodec/arm/dcadsp_vfp.S +@@ -463,7 +463,7 @@ NOVFP ldr a1, [fp, #-8*4] + VFP stmia sp, {WINDOW, OUT, BUF} + NOVFP stmia sp, {WINDOW, OUT, BUF, SCALEINT} + VFP vldr SCALE, [sp, #3*4] +- bl ff_synth_filter_float_vfp ++ bl X(ff_synth_filter_float_vfp) + add OUT, OUT, #32*4 + add BUF, BUF, #32*4 + subs COUNT, COUNT, #1 +diff --git a/libavcodec/arm/mdct_vfp.S b/libavcodec/arm/mdct_vfp.S +index 0623e96..94db24f 100644 +--- a/libavcodec/arm/mdct_vfp.S ++++ b/libavcodec/arm/mdct_vfp.S +@@ -151,7 +151,7 @@ function ff_imdct_half_vfp, export=1 + ldr ip, [CONTEXT, #5*4] @ mdct_bits + teq ip, #6 + it ne +- bne ff_imdct_half_c @ only case currently accelerated is the one used by DCA ++ bne X(ff_imdct_half_c) @ only case currently accelerated is the one used by DCA + + .set n, 1<<6 + .set n2, n/2 +@@ -175,7 +175,7 @@ function ff_imdct_half_vfp, export=1 + + fmxr FPSCR, OLDFPSCR + mov a1, OUT +- bl ff_fft16_vfp ++ bl X(ff_fft16_vfp) + ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 + fmxr FPSCR, lr + +diff --git a/libavcodec/arm/synth_filter_vfp.S b/libavcodec/arm/synth_filter_vfp.S +index c219c41..e6e6408 100644 +--- a/libavcodec/arm/synth_filter_vfp.S ++++ b/libavcodec/arm/synth_filter_vfp.S +@@ -132,7 +132,7 @@ function ff_synth_filter_float_vfp, export=1 + str lr, [P_SB_OFF] @ rotate offset, modulo buffer size, ready for next call + ldr a3, [sp, #(16+6+2)*4] @ fetch in from stack, to pass to imdct_half + VFP vmov s16, SCALE @ imdct_half is free to corrupt s0, but it contains one of our arguments in hardfp case +- bl ff_imdct_half_vfp ++ bl X(ff_imdct_half_vfp) + VFP vmov SCALE, s16 + + fmrx OLDFPSCR, FPSCR +-- +1.7.9.5 diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0051-ffmpeg-backport-avio-Add-an-internal-function-for-re.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0051-ffmpeg-backport-avio-Add-an-internal-function-for-re.patch new file mode 100644 index 0000000000..1b17ab596c --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0051-ffmpeg-backport-avio-Add-an-internal-function-for-re.patch @@ -0,0 +1,72 @@ +From 5ce8f2bf354b7adf904ac3e1438915586c5a0bb1 Mon Sep 17 00:00:00 2001 +From: Ben Avison +Date: Wed, 31 Jul 2013 23:46:08 +0100 +Subject: [PATCH 51/54] [ffmpeg] - backport - avio: Add an internal function + for reading without copying +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +As long as there is enough contiguous data in the avio buffer, +just return a pointer to it instead of copying it to the caller +provided buffer. + +Signed-off-by: Martin Storsjö +--- + libavformat/avio_internal.h | 17 +++++++++++++++++ + libavformat/aviobuf.c | 12 ++++++++++++ + 2 files changed, 29 insertions(+) + +diff --git a/libavformat/avio_internal.h b/libavformat/avio_internal.h +index cf36764..e9ece57 100644 +--- a/libavformat/avio_internal.h ++++ b/libavformat/avio_internal.h +@@ -38,6 +38,23 @@ int ffio_init_context(AVIOContext *s, + + + /** ++ * Read size bytes from AVIOContext, returning a pointer. ++ * Note that the data pointed at by the returned pointer is only ++ * valid until the next call that references the same IO context. ++ * @param s IO context ++ * @param buf pointer to buffer into which to assemble the requested ++ * data if it is not available in contiguous addresses in the ++ * underlying buffer ++ * @param size number of bytes requested ++ * @param data address at which to store pointer: this will be a ++ * a direct pointer into the underlying buffer if the requested ++ * number of bytes are available at contiguous addresses, otherwise ++ * will be a copy of buf ++ * @return number of bytes read or AVERROR ++ */ ++int ffio_read_indirect(AVIOContext *s, unsigned char *buf, int size, unsigned char **data); ++ ++/** + * Read size bytes from AVIOContext into buf. + * This reads at most 1 packet. If that is not enough fewer bytes will be + * returned. +diff --git a/libavformat/aviobuf.c b/libavformat/aviobuf.c +index 7a73a17..465c46d 100644 +--- a/libavformat/aviobuf.c ++++ b/libavformat/aviobuf.c +@@ -522,6 +522,18 @@ int avio_read(AVIOContext *s, unsigned char *buf, int size) + return size1 - size; + } + ++int ffio_read_indirect(AVIOContext *s, unsigned char *buf, int size, unsigned char **data) ++{ ++ if (s->buf_end - s->buf_ptr >= size && !s->write_flag) { ++ *data = s->buf_ptr; ++ s->buf_ptr += size; ++ return size; ++ } else { ++ *data = buf; ++ return avio_read(s, buf, size); ++ } ++} ++ + int ffio_read_partial(AVIOContext *s, unsigned char *buf, int size) + { + int len; +-- +1.7.9.5 diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0052-ffmpeg-backport-mpegts-Remove-one-memcpy-per-packet.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0052-ffmpeg-backport-mpegts-Remove-one-memcpy-per-packet.patch new file mode 100644 index 0000000000..3bc1814dde --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0052-ffmpeg-backport-mpegts-Remove-one-memcpy-per-packet.patch @@ -0,0 +1,149 @@ +From 1496d8c12075c0f3783e348a5d73fef9e3000b0f Mon Sep 17 00:00:00 2001 +From: Ben Avison +Date: Wed, 31 Jul 2013 23:46:08 +0100 +Subject: [PATCH 52/54] [ffmpeg] - backport - mpegts: Remove one memcpy per + packet +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This was being performed to ensure that a complete packet was held in +contiguous memory, prior to parsing the packet. However, the source buffer +is typically large enough that the packet was already contiguous, so it is +beneficial to return the packet by reference in most cases. + + Before After + Mean StdDev Mean StdDev Change +memcpy 720.7 32.7 649.8 25.1 +10.9% +Overall 2372.7 46.1 2291.7 21.8 +3.5% + +Signed-off-by: Martin Storsjö +--- + libavformat/mpegts.c | 41 ++++++++++++++++++++++++++------------- + 1 file changed, 28 insertions(+), 13 deletions(-) + +diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c +index b5f5d63..5307521 100644 +--- a/libavformat/mpegts.c ++++ b/libavformat/mpegts.c +@@ -1863,17 +1863,17 @@ static int mpegts_resync(AVFormatContext *s) + } + + /* return -1 if error or EOF. Return 0 if OK. */ +-static int read_packet(AVFormatContext *s, uint8_t *buf, int raw_packet_size) ++static int read_packet(AVFormatContext *s, uint8_t *buf, int raw_packet_size, uint8_t **data) + { + AVIOContext *pb = s->pb; +- int skip, len; ++ int len; + + for(;;) { +- len = avio_read(pb, buf, TS_PACKET_SIZE); ++ len = ffio_read_indirect(pb, buf, TS_PACKET_SIZE, data); + if (len != TS_PACKET_SIZE) + return len < 0 ? len : AVERROR_EOF; + /* check packet sync byte */ +- if (buf[0] != 0x47) { ++ if ((*data)[0] != 0x47) { + /* find a new packet start */ + avio_seek(pb, -TS_PACKET_SIZE, SEEK_CUR); + if (mpegts_resync(s) < 0) +@@ -1881,19 +1881,25 @@ static int read_packet(AVFormatContext *s, uint8_t *buf, int raw_packet_size) + else + continue; + } else { +- skip = raw_packet_size - TS_PACKET_SIZE; +- if (skip > 0) +- avio_skip(pb, skip); + break; + } + } + return 0; + } + ++static void finished_reading_packet(AVFormatContext *s, int raw_packet_size) ++{ ++ AVIOContext *pb = s->pb; ++ int skip = raw_packet_size - TS_PACKET_SIZE; ++ if (skip > 0) ++ avio_skip(pb, skip); ++} ++ + static int handle_packets(MpegTSContext *ts, int nb_packets) + { + AVFormatContext *s = ts->stream; + uint8_t packet[TS_PACKET_SIZE + FF_INPUT_BUFFER_PADDING_SIZE]; ++ uint8_t *data; + int packet_num, ret = 0; + + if (avio_tell(s->pb) != ts->last_pos) { +@@ -1926,10 +1932,11 @@ static int handle_packets(MpegTSContext *ts, int nb_packets) + if (ts->stop_parse > 0) + break; + +- ret = read_packet(s, packet, ts->raw_packet_size); ++ ret = read_packet(s, packet, ts->raw_packet_size, &data); + if (ret != 0) + break; +- ret = handle_packet(ts, packet); ++ ret = handle_packet(ts, data); ++ finished_reading_packet(s, ts->raw_packet_size); + if (ret != 0) + break; + } +@@ -2087,6 +2094,7 @@ static int mpegts_read_header(AVFormatContext *s) + int64_t pcrs[2], pcr_h; + int packet_count[2]; + uint8_t packet[TS_PACKET_SIZE]; ++ uint8_t *data; + + /* only read packets */ + +@@ -2102,18 +2110,21 @@ static int mpegts_read_header(AVFormatContext *s) + nb_pcrs = 0; + nb_packets = 0; + for(;;) { +- ret = read_packet(s, packet, ts->raw_packet_size); ++ ret = read_packet(s, packet, ts->raw_packet_size, &data); + if (ret < 0) + return -1; +- pid = AV_RB16(packet + 1) & 0x1fff; ++ pid = AV_RB16(data + 1) & 0x1fff; + if ((pcr_pid == -1 || pcr_pid == pid) && +- parse_pcr(&pcr_h, &pcr_l, packet) == 0) { ++ parse_pcr(&pcr_h, &pcr_l, data) == 0) { ++ finished_reading_packet(s, ts->raw_packet_size); + pcr_pid = pid; + packet_count[nb_pcrs] = nb_packets; + pcrs[nb_pcrs] = pcr_h * 300 + pcr_l; + nb_pcrs++; + if (nb_pcrs >= 2) + break; ++ } else { ++ finished_reading_packet(s, ts->raw_packet_size); + } + nb_packets++; + } +@@ -2145,15 +2156,19 @@ static int mpegts_raw_read_packet(AVFormatContext *s, + int64_t pcr_h, next_pcr_h, pos; + int pcr_l, next_pcr_l; + uint8_t pcr_buf[12]; ++ uint8_t *data; + + if (av_new_packet(pkt, TS_PACKET_SIZE) < 0) + return AVERROR(ENOMEM); + pkt->pos= avio_tell(s->pb); +- ret = read_packet(s, pkt->data, ts->raw_packet_size); ++ ret = read_packet(s, pkt->data, ts->raw_packet_size, &data); + if (ret < 0) { + av_free_packet(pkt); + return ret; + } ++ if (data != pkt->data) ++ memcpy(pkt->data, data, ts->raw_packet_size); ++ finished_reading_packet(s, ts->raw_packet_size); + if (ts->mpeg2ts_compute_pcr) { + /* compute exact PCR for each packet */ + if (parse_pcr(&pcr_h, &pcr_l, pkt->data) == 0) { +-- +1.7.9.5 diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0053-ffmpeg-backport-mpegts-Make-discard_pid-faster-for-s.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0053-ffmpeg-backport-mpegts-Make-discard_pid-faster-for-s.patch new file mode 100644 index 0000000000..744f7683d5 --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0053-ffmpeg-backport-mpegts-Make-discard_pid-faster-for-s.patch @@ -0,0 +1,47 @@ +From 6aec5772fd5331b3514f308ab0895f6234b60045 Mon Sep 17 00:00:00 2001 +From: Ben Avison +Date: Mon, 5 Aug 2013 13:12:51 +0100 +Subject: [PATCH 53/54] [ffmpeg] - backport - mpegts: Make discard_pid() + faster for single-program streams +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When a stream contains a single program, there's no point in doing a +PID -> program lookup. Normally the one and only program isn't disabled, +so no packets should be discarded. + + Before After + Mean StdDev Mean StdDev Change +discard_pid() 73.8 9.4 20.2 1.5 +264.8% +Overall 2300.8 28.0 2253.1 20.6 +2.1% + +Signed-off-by: Martin Storsjö +--- + libavformat/mpegts.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c +index 5307521..82dd209 100644 +--- a/libavformat/mpegts.c ++++ b/libavformat/mpegts.c +@@ -268,6 +268,17 @@ static int discard_pid(MpegTSContext *ts, unsigned int pid) + int i, j, k; + int used = 0, discarded = 0; + struct Program *p; ++ ++ /* If none of the programs have .discard=AVDISCARD_ALL then there's ++ * no way we have to discard this packet ++ */ ++ for (k = 0; k < ts->stream->nb_programs; k++) { ++ if (ts->stream->programs[k]->discard == AVDISCARD_ALL) ++ break; ++ } ++ if (k == ts->stream->nb_programs) ++ return 0; ++ + for(i=0; inb_prg; i++) { + p = &ts->prg[i]; + for(j=0; jnb_pids; j++) { +-- +1.7.9.5 diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0054-ffmpeg-backport-mpegts-Remove-one-64-bit-integer-mod.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0054-ffmpeg-backport-mpegts-Remove-one-64-bit-integer-mod.patch new file mode 100644 index 0000000000..e684da9133 --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0054-ffmpeg-backport-mpegts-Remove-one-64-bit-integer-mod.patch @@ -0,0 +1,76 @@ +From b79aa2b89ed9027a72a10c1d26ccdf2bb385d57b Mon Sep 17 00:00:00 2001 +From: Ben Avison +Date: Mon, 5 Aug 2013 13:12:49 +0100 +Subject: [PATCH 54/54] [ffmpeg] - backport - mpegts: Remove one 64-bit + integer modulus operation per packet +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The common case of the pointer having increased by one packet (which results +in no change to the modulus) can be detected with a 64-bit subtraction, +which is far cheaper than a division on many platforms. + + Before After + Mean StdDev Mean StdDev Change +Divisions 248.3 8.8 51.5 7.4 +381.7% +Overall 2773.2 25.6 2372.5 43.1 +16.9% + +Signed-off-by: Martin Storsjö +--- + libavcodec/mathops.h | 9 +++++++++ + libavformat/mpegts.c | 5 ++++- + 2 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h +index 592f5a5..1d57342 100644 +--- a/libavcodec/mathops.h ++++ b/libavcodec/mathops.h +@@ -195,6 +195,15 @@ if ((y) < (x)) {\ + # define FASTDIV(a,b) ((uint32_t)((((uint64_t)a) * ff_inverse[b]) >> 32)) + #endif /* FASTDIV */ + ++#ifndef MOD_UNLIKELY ++# define MOD_UNLIKELY(modulus, dividend, divisor, prev_dividend) \ ++ do { \ ++ if ((prev_dividend) == 0 || (dividend) - (prev_dividend) != (divisor)) \ ++ (modulus) = (dividend) % (divisor); \ ++ (prev_dividend) = (dividend); \ ++ } while (0) ++#endif ++ + static inline av_const unsigned int ff_sqrt(unsigned int a) + { + unsigned int b; +diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c +index 82dd209..b995f60 100644 +--- a/libavformat/mpegts.c ++++ b/libavformat/mpegts.c +@@ -28,6 +28,7 @@ + #include "libavutil/avassert.h" + #include "libavcodec/bytestream.h" + #include "libavcodec/get_bits.h" ++#include "libavcodec/mathops.h" + #include "avformat.h" + #include "mpegts.h" + #include "internal.h" +@@ -99,6 +100,8 @@ struct MpegTSContext { + int raw_packet_size; + + int pos47; ++ /** position corresponding to pos47, or 0 if pos47 invalid */ ++ int64_t pos; + + /** if true, all pids are analyzed to find streams */ + int auto_guess; +@@ -1814,7 +1817,7 @@ static int handle_packet(MpegTSContext *ts, const uint8_t *packet) + return 0; + + pos = avio_tell(ts->stream->pb); +- ts->pos47= pos % ts->raw_packet_size; ++ MOD_UNLIKELY(ts->pos47, pos, ts->raw_packet_size, ts->pos); + + if (tss->type == MPEGTS_SECTION) { + if (is_start) { +-- +1.7.9.5 diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0055-ffmpeg-backport-fix-compilation-droid-gcc-4-7.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0055-ffmpeg-backport-fix-compilation-droid-gcc-4-7.patch new file mode 100644 index 0000000000..5e986fe0df --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-0055-ffmpeg-backport-fix-compilation-droid-gcc-4-7.patch @@ -0,0 +1,110 @@ +From 8067f55edf3719182aed6e5b57b7863889f80218 Mon Sep 17 00:00:00 2001 +From: =?utf8?q?Reimar=20D=C3=B6ffinger?= +Date: Sat, 16 Mar 2013 13:36:20 +0100 +Subject: [PATCH] Fix compilation on ARM with android gcc 4.7 +MIME-Version: 1.0 +Content-Type: text/plain; charset=utf8 +Content-Transfer-Encoding: 8bit + +With the current code it fails due to running out +of registers. +So code the store offsets manually into the assembler +instead. +Passes "make fate-dts". + +Signed-off-by: Reimar Döffinger +--- + libavcodec/arm/dca.h | 74 ++++++++++++++++++++++++-------------------------- + 1 file changed, 36 insertions(+), 38 deletions(-) + +diff --git a/libavcodec/arm/dca.h b/libavcodec/arm/dca.h +index 2cfd18a..431b62e 100644 +--- a/libavcodec/arm/dca.h ++++ b/libavcodec/arm/dca.h +@@ -34,46 +34,44 @@ static inline int decode_blockcodes(int code1, int code2, int levels, + { + int v0, v1, v2, v3, v4, v5; + +- __asm__ ("smmul %8, %14, %18 \n" +- "smmul %11, %15, %18 \n" +- "smlabb %14, %8, %17, %14 \n" +- "smlabb %15, %11, %17, %15 \n" +- "smmul %9, %8, %18 \n" +- "smmul %12, %11, %18 \n" +- "sub %14, %14, %16, lsr #1 \n" +- "sub %15, %15, %16, lsr #1 \n" +- "smlabb %8, %9, %17, %8 \n" +- "smlabb %11, %12, %17, %11 \n" +- "smmul %10, %9, %18 \n" +- "smmul %13, %12, %18 \n" +- "str %14, %0 \n" +- "str %15, %4 \n" +- "sub %8, %8, %16, lsr #1 \n" +- "sub %11, %11, %16, lsr #1 \n" +- "smlabb %9, %10, %17, %9 \n" +- "smlabb %12, %13, %17, %12 \n" +- "smmul %14, %10, %18 \n" +- "smmul %15, %13, %18 \n" +- "str %8, %1 \n" +- "str %11, %5 \n" +- "sub %9, %9, %16, lsr #1 \n" +- "sub %12, %12, %16, lsr #1 \n" +- "smlabb %10, %14, %17, %10 \n" +- "smlabb %13, %15, %17, %13 \n" +- "str %9, %2 \n" +- "str %12, %6 \n" +- "sub %10, %10, %16, lsr #1 \n" +- "sub %13, %13, %16, lsr #1 \n" +- "str %10, %3 \n" +- "str %13, %7 \n" +- : "=m"(values[0]), "=m"(values[1]), +- "=m"(values[2]), "=m"(values[3]), +- "=m"(values[4]), "=m"(values[5]), +- "=m"(values[6]), "=m"(values[7]), +- "=&r"(v0), "=&r"(v1), "=&r"(v2), ++ __asm__ ("smmul %0, %6, %10 \n" ++ "smmul %3, %7, %10 \n" ++ "smlabb %6, %0, %9, %6 \n" ++ "smlabb %7, %3, %9, %7 \n" ++ "smmul %1, %0, %10 \n" ++ "smmul %4, %3, %10 \n" ++ "sub %6, %6, %8, lsr #1 \n" ++ "sub %7, %7, %8, lsr #1 \n" ++ "smlabb %0, %1, %9, %0 \n" ++ "smlabb %3, %4, %9, %3 \n" ++ "smmul %2, %1, %10 \n" ++ "smmul %5, %4, %10 \n" ++ "str %6, [%11, #0] \n" ++ "str %7, [%11, #16] \n" ++ "sub %0, %0, %8, lsr #1 \n" ++ "sub %3, %3, %8, lsr #1 \n" ++ "smlabb %1, %2, %9, %1 \n" ++ "smlabb %4, %5, %9, %4 \n" ++ "smmul %6, %2, %10 \n" ++ "smmul %7, %5, %10 \n" ++ "str %0, [%11, #4] \n" ++ "str %3, [%11, #20] \n" ++ "sub %1, %1, %8, lsr #1 \n" ++ "sub %4, %4, %8, lsr #1 \n" ++ "smlabb %2, %6, %9, %2 \n" ++ "smlabb %5, %7, %9, %5 \n" ++ "str %1, [%11, #8] \n" ++ "str %4, [%11, #24] \n" ++ "sub %2, %2, %8, lsr #1 \n" ++ "sub %5, %5, %8, lsr #1 \n" ++ "str %2, [%11, #12] \n" ++ "str %5, [%11, #28] \n" ++ : "=&r"(v0), "=&r"(v1), "=&r"(v2), + "=&r"(v3), "=&r"(v4), "=&r"(v5), + "+&r"(code1), "+&r"(code2) +- : "r"(levels - 1), "r"(-levels), "r"(ff_inverse[levels])); ++ : "r"(levels - 1), "r"(-levels), ++ "r"(ff_inverse[levels]), "r"(values) ++ : "memory"); + + return code1 | code2; + } +-- +1.7.10.4 + + diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.01-add_xvba_support.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.01-add_xvba_support.patch similarity index 88% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.01-add_xvba_support.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.01-add_xvba_support.patch index f384f8632f..7cab84567d 100644 --- a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.01-add_xvba_support.patch +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.01-add_xvba_support.patch @@ -1,42 +1,15 @@ -From 67895a77c9e5f519166dd0ce4a2a98649194b11b Mon Sep 17 00:00:00 2001 -From: Rainer Hochecker -Date: Sat, 8 Oct 2011 16:45:13 +0200 -Subject: [PATCH] ffmpeg: add xvba hwaccel - ---- - configure | 11 ++ - libavcodec/Makefile | 6 ++ - libavcodec/allcodecs.c | 4 + - libavcodec/h264.c | 3 + - libavcodec/xvba.c | 66 ++++++++++++ - libavcodec/xvba.h | 71 +++++++++++++ - libavcodec/xvba_h264.c | 192 ++++++++++++++++++++++++++++++++++ - libavcodec/xvba_internal.h | 24 +++++ - libavcodec/xvba_mpeg2.c | 52 +++++++++ - libavcodec/xvba_vc1.c | 190 +++++++++++++++++++++++++++++++++ - libavutil/pixdesc.c | 6 ++ - libavutil/pixfmt.h | 1 + - 12 files changed, 626 insertions(+) - create mode 100644 libavcodec/xvba.c - create mode 100644 libavcodec/xvba.h - create mode 100644 libavcodec/xvba_h264.c - create mode 100644 libavcodec/xvba_internal.h - create mode 100644 libavcodec/xvba_mpeg2.c - create mode 100644 libavcodec/xvba_vc1.c - -diff --git a/configure b/configure -index 351611d..876a6ea 100755 ---- a/configure -+++ b/configure -@@ -144,6 +144,7 @@ Hardware accelerators: - --enable-vaapi enable VAAPI code +diff -Naur ffmpeg-1.2.3/configure ffmpeg-1.2.3.patch/configure +--- ffmpeg-1.2.3/configure 2013-09-09 22:46:04.636832059 +0200 ++++ ffmpeg-1.2.3.patch/configure 2013-09-09 22:47:15.023872481 +0200 +@@ -144,6 +144,7 @@ + --disable-vaapi disable VAAPI code [autodetect] --enable-vda enable VDA code - --enable-vdpau enable VDPAU code + --disable-vdpau disable VDPAU code [autodetect] + --disable-xvba disable XVBA code Individual component options: --disable-everything disable all components listed below -@@ -1197,6 +1198,7 @@ HWACCEL_LIST=" +@@ -1197,6 +1198,7 @@ vaapi vda vdpau @@ -44,7 +17,7 @@ index 351611d..876a6ea 100755 " LIBRARY_LIST=" -@@ -1827,6 +1829,7 @@ crystalhd_deps="libcrystalhd_libcrystalhd_if_h" +@@ -1827,6 +1829,7 @@ dxva2_deps="dxva2api_h" vaapi_deps="va_va_h" vda_deps="VideoDecodeAcceleration_VDADecoder_h pthreads" @@ -52,7 +25,7 @@ index 351611d..876a6ea 100755 vda_extralibs="-framework CoreFoundation -framework VideoDecodeAcceleration -framework QuartzCore" vdpau_deps="vdpau_vdpau_h vdpau_vdpau_x11_h" -@@ -1847,6 +1850,8 @@ h264_vdpau_decoder_deps="vdpau" +@@ -1847,6 +1850,8 @@ h264_vdpau_decoder_select="h264_decoder" h264_vdpau_hwaccel_deps="vdpau" h264_vdpau_hwaccel_select="h264_decoder" @@ -61,7 +34,7 @@ index 351611d..876a6ea 100755 mpeg_vdpau_decoder_deps="vdpau" mpeg_vdpau_decoder_select="mpegvideo_decoder" mpeg1_vdpau_decoder_deps="vdpau" -@@ -1859,6 +1864,8 @@ mpeg2_dxva2_hwaccel_select="mpeg2video_decoder" +@@ -1859,6 +1864,8 @@ mpeg2_vaapi_hwaccel_deps="vaapi" mpeg2_vaapi_hwaccel_select="mpeg2video_decoder" mpeg2_vdpau_hwaccel_deps="vdpau" @@ -70,7 +43,7 @@ index 351611d..876a6ea 100755 mpeg2_vdpau_hwaccel_select="mpeg2video_decoder" mpeg4_crystalhd_decoder_select="crystalhd" mpeg4_vaapi_hwaccel_deps="vaapi" -@@ -1877,11 +1884,14 @@ vc1_vdpau_decoder_deps="vdpau" +@@ -1877,11 +1884,14 @@ vc1_vdpau_decoder_select="vc1_decoder" vc1_vdpau_hwaccel_deps="vdpau" vc1_vdpau_hwaccel_select="vc1_decoder" @@ -85,71 +58,18 @@ index 351611d..876a6ea 100755 # parsers h264_parser_select="golomb h264chroma h264dsp h264pred h264qpel videodsp" -@@ -3832,6 +3842,7 @@ check_header termios.h - check_header unistd.h +@@ -3836,6 +3846,7 @@ check_header vdpau/vdpau.h check_header vdpau/vdpau_x11.h -+check_header amd/amdxvba.h check_cpp_condition vdpau/vdpau.h "defined(VDP_DECODER_PROFILE_MPEG4_PART2_SP)" && enable vdpau_mpeg4_support ++check_header amd/amdxvba.h check_header VideoDecodeAcceleration/VDADecoder.h -diff --git a/libavcodec/Makefile b/libavcodec/Makefile -index dc065a5..c386923 100644 ---- a/libavcodec/Makefile -+++ b/libavcodec/Makefile -@@ -12,6 +12,7 @@ HEADERS = avcodec.h \ - vdpau.h \ - version.h \ - xvmc.h \ -+ xvba.h \ - - OBJS = allcodecs.o \ - audioconvert.o \ -@@ -73,6 +74,7 @@ OBJS-$(CONFIG_SHARED) += log2_tab.o - OBJS-$(CONFIG_SINEWIN) += sinewin.o - OBJS-$(CONFIG_VAAPI) += vaapi.o - OBJS-$(CONFIG_VDPAU) += vdpau.o -+OBJS-$(CONFIG_XVBA) += xvba.o - OBJS-$(CONFIG_VIDEODSP) += videodsp.o - OBJS-$(CONFIG_VP3DSP) += vp3dsp.o - -@@ -232,6 +234,7 @@ OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o - OBJS-$(CONFIG_H264_VDA_HWACCEL) += vda_h264.o - OBJS-$(CONFIG_H264_VDA_DECODER) += vda_h264_dec.o - OBJS-$(CONFIG_H264_VDPAU_HWACCEL) += vdpau_h264.o -+OBJS-$(CONFIG_H264_XVBA_HWACCEL) += xvba_h264.o - OBJS-$(CONFIG_HUFFYUV_DECODER) += huffyuv.o huffyuvdec.o - OBJS-$(CONFIG_HUFFYUV_ENCODER) += huffyuv.o huffyuvenc.o - OBJS-$(CONFIG_IAC_DECODER) += imc.o -@@ -295,6 +298,7 @@ OBJS-$(CONFIG_MPEG1VIDEO_ENCODER) += mpeg12enc.o mpeg12.o \ - OBJS-$(CONFIG_MPEG2_DXVA2_HWACCEL) += dxva2_mpeg2.o - OBJS-$(CONFIG_MPEG2_VAAPI_HWACCEL) += vaapi_mpeg2.o - OBJS-$(CONFIG_MPEG2_VDPAU_HWACCEL) += vdpau_mpeg12.o -+OBJS-$(CONFIG_MPEG2_XVBA_HWACCEL) += xvba_mpeg2.o - OBJS-$(CONFIG_MPEG2VIDEO_DECODER) += mpeg12.o mpeg12data.o - OBJS-$(CONFIG_MPEG2VIDEO_ENCODER) += mpeg12enc.o mpeg12.o \ - timecode.o -@@ -459,6 +463,7 @@ OBJS-$(CONFIG_VC1_DECODER) += vc1dec.o vc1.o vc1data.o vc1dsp.o \ - OBJS-$(CONFIG_VC1_DXVA2_HWACCEL) += dxva2_vc1.o - OBJS-$(CONFIG_VC1_VAAPI_HWACCEL) += vaapi_vc1.o - OBJS-$(CONFIG_VC1_VDPAU_HWACCEL) += vdpau_vc1.o -+OBJS-$(CONFIG_VC1_XVBA_HWACCEL) += xvba_vc1.o - OBJS-$(CONFIG_VCR1_DECODER) += vcr1.o - OBJS-$(CONFIG_VMDAUDIO_DECODER) += vmdav.o - OBJS-$(CONFIG_VMDVIDEO_DECODER) += vmdav.o -@@ -788,6 +793,7 @@ SKIPHEADERS-$(CONFIG_LIBSCHROEDINGER) += libschroedinger.h - SKIPHEADERS-$(CONFIG_LIBUTVIDEO) += libutvideo.h - SKIPHEADERS-$(CONFIG_MPEG_XVMC_DECODER) += xvmc.h - SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_internal.h -+SKIPHEADERS-$(CONFIG_XVBA) += xvba_internal.h - SKIPHEADERS-$(CONFIG_VDA) += vda.h - SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h - SKIPHEADERS-$(HAVE_OS2THREADS) += os2threads.h -diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c -index 584446f..7a8f61c 100644 ---- a/libavcodec/allcodecs.c -+++ b/libavcodec/allcodecs.c -@@ -79,18 +79,22 @@ void avcodec_register_all(void) + check_header windows.h +diff -Naur ffmpeg-1.2.3/libavcodec/allcodecs.c ffmpeg-1.2.3.patch/libavcodec/allcodecs.c +--- ffmpeg-1.2.3/libavcodec/allcodecs.c 2013-08-27 02:13:44.000000000 +0200 ++++ ffmpeg-1.2.3.patch/libavcodec/allcodecs.c 2013-09-09 22:46:40.577852790 +0200 +@@ -79,18 +79,22 @@ REGISTER_HWACCEL(H264_VAAPI, h264_vaapi); REGISTER_HWACCEL(H264_VDA, h264_vda); REGISTER_HWACCEL(H264_VDPAU, h264_vdpau); @@ -172,10 +92,9 @@ index 584446f..7a8f61c 100644 /* video codecs */ REGISTER_ENCODER(A64MULTI, a64multi); -diff --git a/libavcodec/h264.c b/libavcodec/h264.c -index 937ad7a..299039f 100644 ---- a/libavcodec/h264.c -+++ b/libavcodec/h264.c +diff -Naur ffmpeg-1.2.3/libavcodec/h264.c ffmpeg-1.2.3.patch/libavcodec/h264.c +--- ffmpeg-1.2.3/libavcodec/h264.c 2013-09-09 22:46:04.639832061 +0200 ++++ ffmpeg-1.2.3.patch/libavcodec/h264.c 2013-09-09 22:46:40.579852792 +0200 @@ -81,6 +81,9 @@ #if CONFIG_H264_VDPAU_HWACCEL AV_PIX_FMT_VDPAU, @@ -186,11 +105,60 @@ index 937ad7a..299039f 100644 AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE }; -diff --git a/libavcodec/xvba.c b/libavcodec/xvba.c -new file mode 100644 -index 0000000..be29e5d ---- /dev/null -+++ b/libavcodec/xvba.c +diff -Naur ffmpeg-1.2.3/libavcodec/Makefile ffmpeg-1.2.3.patch/libavcodec/Makefile +--- ffmpeg-1.2.3/libavcodec/Makefile 2013-08-27 02:13:44.000000000 +0200 ++++ ffmpeg-1.2.3.patch/libavcodec/Makefile 2013-09-09 22:46:40.580852793 +0200 +@@ -12,6 +12,7 @@ + vdpau.h \ + version.h \ + xvmc.h \ ++ xvba.h \ + + OBJS = allcodecs.o \ + audioconvert.o \ +@@ -73,6 +74,7 @@ + OBJS-$(CONFIG_SINEWIN) += sinewin.o + OBJS-$(CONFIG_VAAPI) += vaapi.o + OBJS-$(CONFIG_VDPAU) += vdpau.o ++OBJS-$(CONFIG_XVBA) += xvba.o + OBJS-$(CONFIG_VIDEODSP) += videodsp.o + OBJS-$(CONFIG_VP3DSP) += vp3dsp.o + +@@ -232,6 +234,7 @@ + OBJS-$(CONFIG_H264_VDA_HWACCEL) += vda_h264.o + OBJS-$(CONFIG_H264_VDA_DECODER) += vda_h264_dec.o + OBJS-$(CONFIG_H264_VDPAU_HWACCEL) += vdpau_h264.o ++OBJS-$(CONFIG_H264_XVBA_HWACCEL) += xvba_h264.o + OBJS-$(CONFIG_HUFFYUV_DECODER) += huffyuv.o huffyuvdec.o + OBJS-$(CONFIG_HUFFYUV_ENCODER) += huffyuv.o huffyuvenc.o + OBJS-$(CONFIG_IAC_DECODER) += imc.o +@@ -295,6 +298,7 @@ + OBJS-$(CONFIG_MPEG2_DXVA2_HWACCEL) += dxva2_mpeg2.o + OBJS-$(CONFIG_MPEG2_VAAPI_HWACCEL) += vaapi_mpeg2.o + OBJS-$(CONFIG_MPEG2_VDPAU_HWACCEL) += vdpau_mpeg12.o ++OBJS-$(CONFIG_MPEG2_XVBA_HWACCEL) += xvba_mpeg2.o + OBJS-$(CONFIG_MPEG2VIDEO_DECODER) += mpeg12.o mpeg12data.o + OBJS-$(CONFIG_MPEG2VIDEO_ENCODER) += mpeg12enc.o mpeg12.o \ + timecode.o +@@ -459,6 +463,7 @@ + OBJS-$(CONFIG_VC1_DXVA2_HWACCEL) += dxva2_vc1.o + OBJS-$(CONFIG_VC1_VAAPI_HWACCEL) += vaapi_vc1.o + OBJS-$(CONFIG_VC1_VDPAU_HWACCEL) += vdpau_vc1.o ++OBJS-$(CONFIG_VC1_XVBA_HWACCEL) += xvba_vc1.o + OBJS-$(CONFIG_VCR1_DECODER) += vcr1.o + OBJS-$(CONFIG_VMDAUDIO_DECODER) += vmdav.o + OBJS-$(CONFIG_VMDVIDEO_DECODER) += vmdav.o +@@ -788,6 +793,7 @@ + SKIPHEADERS-$(CONFIG_LIBUTVIDEO) += libutvideo.h + SKIPHEADERS-$(CONFIG_MPEG_XVMC_DECODER) += xvmc.h + SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_internal.h ++SKIPHEADERS-$(CONFIG_XVBA) += xvba_internal.h + SKIPHEADERS-$(CONFIG_VDA) += vda.h + SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h + SKIPHEADERS-$(HAVE_OS2THREADS) += os2threads.h +diff -Naur ffmpeg-1.2.3/libavcodec/xvba.c ffmpeg-1.2.3.patch/libavcodec/xvba.c +--- ffmpeg-1.2.3/libavcodec/xvba.c 1970-01-01 01:00:00.000000000 +0100 ++++ ffmpeg-1.2.3.patch/libavcodec/xvba.c 2013-09-09 22:46:40.580852793 +0200 @@ -0,0 +1,66 @@ +/* + * HW decode acceleration for MPEG-2, H.264 and VC-1 @@ -258,11 +226,9 @@ index 0000000..be29e5d + render->num_slices++; +} + -diff --git a/libavcodec/xvba.h b/libavcodec/xvba.h -new file mode 100644 -index 0000000..9f9ff0c ---- /dev/null -+++ b/libavcodec/xvba.h +diff -Naur ffmpeg-1.2.3/libavcodec/xvba.h ffmpeg-1.2.3.patch/libavcodec/xvba.h +--- ffmpeg-1.2.3/libavcodec/xvba.h 1970-01-01 01:00:00.000000000 +0100 ++++ ffmpeg-1.2.3.patch/libavcodec/xvba.h 2013-09-09 22:46:40.581852794 +0200 @@ -0,0 +1,71 @@ +/* + * HW decode acceleration for MPEG-2, H.264 and VC-1 @@ -335,11 +301,9 @@ index 0000000..9f9ff0c +}; + +#endif /* AVCODEC_XVBA_H */ -diff --git a/libavcodec/xvba_h264.c b/libavcodec/xvba_h264.c -new file mode 100644 -index 0000000..ae45f3a ---- /dev/null -+++ b/libavcodec/xvba_h264.c +diff -Naur ffmpeg-1.2.3/libavcodec/xvba_h264.c ffmpeg-1.2.3.patch/libavcodec/xvba_h264.c +--- ffmpeg-1.2.3/libavcodec/xvba_h264.c 1970-01-01 01:00:00.000000000 +0100 ++++ ffmpeg-1.2.3.patch/libavcodec/xvba_h264.c 2013-09-09 22:46:40.582852794 +0200 @@ -0,0 +1,192 @@ +/* + * H.264 HW decode acceleration through XVBA @@ -533,11 +497,9 @@ index 0000000..ae45f3a + .end_frame = end_frame, + .decode_slice = decode_slice, +}; -diff --git a/libavcodec/xvba_internal.h b/libavcodec/xvba_internal.h -new file mode 100644 -index 0000000..9653f85 ---- /dev/null -+++ b/libavcodec/xvba_internal.h +diff -Naur ffmpeg-1.2.3/libavcodec/xvba_internal.h ffmpeg-1.2.3.patch/libavcodec/xvba_internal.h +--- ffmpeg-1.2.3/libavcodec/xvba_internal.h 1970-01-01 01:00:00.000000000 +0100 ++++ ffmpeg-1.2.3.patch/libavcodec/xvba_internal.h 2013-09-09 22:46:40.582852794 +0200 @@ -0,0 +1,24 @@ +/* + * HW decode acceleration for MPEG-2, H.264 and VC-1 @@ -563,11 +525,9 @@ index 0000000..9653f85 + +int ff_xvba_translate_profile(int profile); +void ff_xvba_add_slice_data(struct xvba_render_state *render, const uint8_t *buffer, uint32_t size); -diff --git a/libavcodec/xvba_mpeg2.c b/libavcodec/xvba_mpeg2.c -new file mode 100644 -index 0000000..0fc7d78 ---- /dev/null -+++ b/libavcodec/xvba_mpeg2.c +diff -Naur ffmpeg-1.2.3/libavcodec/xvba_mpeg2.c ffmpeg-1.2.3.patch/libavcodec/xvba_mpeg2.c +--- ffmpeg-1.2.3/libavcodec/xvba_mpeg2.c 1970-01-01 01:00:00.000000000 +0100 ++++ ffmpeg-1.2.3.patch/libavcodec/xvba_mpeg2.c 2013-09-09 22:46:40.582852794 +0200 @@ -0,0 +1,52 @@ +/* + * MPEG-2 HW decode acceleration through XVBA @@ -621,11 +581,9 @@ index 0000000..0fc7d78 + .decode_slice = decode_slice, + .priv_data_size = 0, +}; -diff --git a/libavcodec/xvba_vc1.c b/libavcodec/xvba_vc1.c -new file mode 100644 -index 0000000..bf3d9c2 ---- /dev/null -+++ b/libavcodec/xvba_vc1.c +diff -Naur ffmpeg-1.2.3/libavcodec/xvba_vc1.c ffmpeg-1.2.3.patch/libavcodec/xvba_vc1.c +--- ffmpeg-1.2.3/libavcodec/xvba_vc1.c 1970-01-01 01:00:00.000000000 +0100 ++++ ffmpeg-1.2.3.patch/libavcodec/xvba_vc1.c 2013-09-09 22:46:40.583852794 +0200 @@ -0,0 +1,190 @@ +/* + * VC-1 HW decode acceleration through XVBA @@ -817,11 +775,10 @@ index 0000000..bf3d9c2 + .end_frame = end_frame, + .decode_slice = decode_slice, +}; -diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c -index 1016dba..53dfec1 100644 ---- a/libavutil/pixdesc.c -+++ b/libavutil/pixdesc.c -@@ -1141,6 +1141,12 @@ void av_write_image_line(const uint16_t *src, +diff -Naur ffmpeg-1.2.3/libavutil/pixdesc.c ffmpeg-1.2.3.patch/libavutil/pixdesc.c +--- ffmpeg-1.2.3/libavutil/pixdesc.c 2013-08-27 02:13:47.000000000 +0200 ++++ ffmpeg-1.2.3.patch/libavutil/pixdesc.c 2013-09-09 22:46:40.584852795 +0200 +@@ -1141,6 +1141,12 @@ .log2_chroma_h = 1, .flags = PIX_FMT_HWACCEL, }, @@ -834,11 +791,10 @@ index 1016dba..53dfec1 100644 [AV_PIX_FMT_YUV420P9LE] = { .name = "yuv420p9le", .nb_components = 3, -diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h -index 1c00ac4..6437e29 100644 ---- a/libavutil/pixfmt.h -+++ b/libavutil/pixfmt.h -@@ -124,6 +124,7 @@ enum AVPixelFormat { +diff -Naur ffmpeg-1.2.3/libavutil/pixfmt.h ffmpeg-1.2.3.patch/libavutil/pixfmt.h +--- ffmpeg-1.2.3/libavutil/pixfmt.h 2013-08-27 02:13:47.000000000 +0200 ++++ ffmpeg-1.2.3.patch/libavutil/pixfmt.h 2013-09-09 22:46:40.585852796 +0200 +@@ -124,6 +124,7 @@ AV_PIX_FMT_VAAPI_MOCO, ///< HW acceleration through VA API at motion compensation entry-point, Picture.data[3] contains a vaapi_render_state struct which contains macroblocks as well as various fields extracted from headers AV_PIX_FMT_VAAPI_IDCT, ///< HW acceleration through VA API at IDCT entry-point, Picture.data[3] contains a vaapi_render_state struct which contains fields extracted from headers AV_PIX_FMT_VAAPI_VLD, ///< HW decoding through VA API, Picture.data[3] contains a vaapi_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers @@ -846,6 +802,3 @@ index 1c00ac4..6437e29 100644 AV_PIX_FMT_YUV420P16LE, ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian AV_PIX_FMT_YUV420P16BE, ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian --- -1.8.1.5 - diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.05-XVBA-revisit_draw_functions.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.05-XVBA-revisit_draw_functions.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.05-XVBA-revisit_draw_functions.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.05-XVBA-revisit_draw_functions.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.05-xvba_fix_pix_fmt_vda_vld_assert.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.05-xvba_fix_pix_fmt_vda_vld_assert.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.05-xvba_fix_pix_fmt_vda_vld_assert.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.05-xvba_fix_pix_fmt_vda_vld_assert.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.06-XVBA-Make_XVBA_codec_available.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.06-XVBA-Make_XVBA_codec_available.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.06-XVBA-Make_XVBA_codec_available.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.06-XVBA-Make_XVBA_codec_available.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.09-make_vc1_interlaced_working.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.09-make_vc1_interlaced_working.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.09-make_vc1_interlaced_working.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.09-make_vc1_interlaced_working.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.10-XVBA_Only_set_second_field_when_we_are_interlaced_and_an_interlaced.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.10-XVBA_Only_set_second_field_when_we_are_interlaced_and_an_interlaced.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.10-XVBA_Only_set_second_field_when_we_are_interlaced_and_an_interlaced.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.10-XVBA_Only_set_second_field_when_we_are_interlaced_and_an_interlaced.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.11-XVBA-translate_picture_structure_to_a_value_xvba_understands.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.11-XVBA-translate_picture_structure_to_a_value_xvba_understands.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.11-XVBA-translate_picture_structure_to_a_value_xvba_understands.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.11-XVBA-translate_picture_structure_to_a_value_xvba_understands.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.12-XVBA-fix_vc1_field_interlace.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.12-XVBA-fix_vc1_field_interlace.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-901.12-XVBA-fix_vc1_field_interlace.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-901.12-XVBA-fix_vc1_field_interlace.patch diff --git a/packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-913-h264-do_not_discard_NAL_SEI_when_skipping_frames.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-913-h264-do_not_discard_NAL_SEI_when_skipping_frames.patch similarity index 100% rename from packages/multimedia/ffmpeg/patches/1.2.1/ffmpeg-913-h264-do_not_discard_NAL_SEI_when_skipping_frames.patch rename to packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-913-h264-do_not_discard_NAL_SEI_when_skipping_frames.patch