diff --git a/packages/multimedia/ffmpeg/patches/0.10.7/ffmpeg-0.10.6-921.01-h264_parser-Initialize_the_h264dsp_context_in_the_parser_as_well.patch b/packages/multimedia/ffmpeg/patches/0.10.7/ffmpeg-0.10.6-921.01-h264_parser-Initialize_the_h264dsp_context_in_the_parser_as_well.patch new file mode 100644 index 0000000000..ce5891af17 --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/0.10.7/ffmpeg-0.10.6-921.01-h264_parser-Initialize_the_h264dsp_context_in_the_parser_as_well.patch @@ -0,0 +1,40 @@ +From 90ac611930e202c8255887c6b3e82ff97a1117f0 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Thu, 22 Aug 2013 14:34:37 +0100 +Subject: [PATCH] h264_parser: Initialize the h264dsp context in the parser as + well +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Each AVStream struct for an H.264 elementary stream actually has two +copies of the H264DSPContext struct (and in fact all the other members +of H264Context as well): + +((H264Context *) ((AVStream *)st)->codec->priv_data)->h264dsp +((H264Context *) ((AVStream *)st)->parser->priv_data)->h264dsp + +but only the first of these was actually being initialised. This +prevented the addition of platform-specific implementations of +parser-related functions. + +Signed-off-by: Martin Storsjö +--- + libavcodec/h264_parser.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c +index c9dea81..98e2844 100644 +--- a/libavcodec/h264_parser.c ++++ b/libavcodec/h264_parser.c +@@ -377,6 +377,7 @@ static int init(AVCodecParserContext *s) + H264Context *h = s->priv_data; + h->thread_context[0] = h; + h->s.slice_context_count = 1; ++ ff_h264dsp_init(&h->h264dsp, 8, 1); + return 0; + } + +-- +1.8.1.6 + diff --git a/packages/multimedia/ffmpeg/patches/0.10.7/ffmpeg-0.10.6-921.02-h264dsp-Factorize_code_into_a_new_function-h264_find_start_code_canditate.patch b/packages/multimedia/ffmpeg/patches/0.10.7/ffmpeg-0.10.6-921.02-h264dsp-Factorize_code_into_a_new_function-h264_find_start_code_canditate.patch new file mode 100644 index 0000000000..21bb17d783 --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/0.10.7/ffmpeg-0.10.6-921.02-h264dsp-Factorize_code_into_a_new_function-h264_find_start_code_canditate.patch @@ -0,0 +1,129 @@ +From c58d69bea87c77c7a1e8221624137beaaa670586 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Thu, 22 Aug 2013 14:37:18 +0100 +Subject: [PATCH] h264dsp: Factorize code into a new function, + h264_find_start_code_candidate +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This performs the start code search which was previously part of +h264_find_frame_end() - the most CPU intensive part of the function. + +By itself, this results in a performance regression: + Before After + Mean StdDev Mean StdDev Change +Overall time 2925.6 26.2 3068.5 31.7 -4.7% + +but this can more than be made up for by platform-optimised +implementations of the function. + +Signed-off-by: Martin Storsjö +--- + libavcodec/h264_parser.c | 21 +++------------------ + libavcodec/h264dsp.c | 29 +++++++++++++++++++++++++++++ + libavcodec/h264dsp.h | 9 +++++++++ + 3 files changed, 41 insertions(+), 18 deletions(-) + +diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c +index 98e2844..41c874d 100644 +--- a/libavcodec/h264_parser.c ++++ b/libavcodec/h264_parser.c +@@ -65,24 +65,9 @@ static int ff_h264_find_frame_end(H264Context *h, const uint8_t *buf, int buf_si + } + + if(state==7){ +-#if HAVE_FAST_UNALIGNED +- /* we check ih264dsp.h264_find_start_code_candidate(buf + i, buf_size - i); ++ if (i < buf_size) ++ state = 2; + }else if(state<=2){ + if(buf[i]==1) state^= 5; //2->7, 1->4, 0->5 + else if(buf[i]) state = 7; +diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c +index bd35aa3..0703172 100644 +--- a/libavcodec/h264dsp.c ++++ b/libavcodec/h264dsp.c +@@ -41,6 +41,34 @@ + #include "h264dsp_template.c" + #undef BIT_DEPTH + ++static int h264_find_start_code_candidate_c(const uint8_t *buf, int size) ++{ ++ int i = 0; ++#if HAVE_FAST_UNALIGNED ++ /* we check i < size instead of i + 3 / 7 because it is ++ * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE ++ * bytes at the end. ++ */ ++#if HAVE_FAST_64BIT ++ while (i < size && ++ !((~*(const uint64_t *)(buf + i) & ++ (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) & ++ 0x8080808080808080ULL)) ++ i += 8; ++#else ++ while (i < size && ++ !((~*(const uint32_t *)(buf + i) & ++ (*(const uint32_t *)(buf + i) - 0x01010101U)) & ++ 0x80808080U)) ++ i += 4; ++#endif ++#endif ++ for (; i < size; i++) ++ if (!buf[i]) ++ break; ++ return i; ++} ++ + void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) + { + #undef FUNC +@@ -110,6 +138,7 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_fo + H264_DSP(8); + break; + } ++ c->h264_find_start_code_candidate = h264_find_start_code_candidate_c; + + if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc); + if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc); +diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h +index 490a936..1d172f1 100644 +--- a/libavcodec/h264dsp.h ++++ b/libavcodec/h264dsp.h +@@ -74,6 +74,15 @@ typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int h + void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[15*8]); + void (*h264_luma_dc_dequant_idct)(DCTELEM *output, DCTELEM *input/*align 16*/, int qmul); + void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul); ++ ++ /** ++ * Search buf from the start for up to size bytes. Return the index ++ * of a zero byte, or >= size if not found. Ideally, use lookahead ++ * to filter out any zero bytes that are known to not be followed by ++ * one or more further zero bytes and a one byte. Better still, filter ++ * out any bytes that form the trailing_zero_8bits syntax element too. ++ */ ++ int (*h264_find_start_code_candidate)(const uint8_t *buf, int size); + }H264DSPContext; + + void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); +-- +1.8.1.6 + diff --git a/packages/multimedia/ffmpeg/patches/0.10.7/ffmpeg-0.10.6-921.03-arm-Add_assembly_version_of-h264_find_start_code_candidate.patch b/packages/multimedia/ffmpeg/patches/0.10.7/ffmpeg-0.10.6-921.03-arm-Add_assembly_version_of-h264_find_start_code_candidate.patch new file mode 100644 index 0000000000..57f50520b2 --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/0.10.7/ffmpeg-0.10.6-921.03-arm-Add_assembly_version_of-h264_find_start_code_candidate.patch @@ -0,0 +1,321 @@ +From 4b8f90c0702324a9fcc3909c8bf7d80f090c41ba Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Thu, 22 Aug 2013 14:39:21 +0100 +Subject: [PATCH] arm: Add assembly version of h264_find_start_code_candidate +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + + Before After + Mean StdDev Mean StdDev Change +This function 508.8 23.4 185.4 9.0 +174.4% +Overall 3068.5 31.7 2752.1 29.4 +11.5% + +In combination with the preceding patch: + Before After + Mean StdDev Mean StdDev Change +Overall 2925.6 26.2 2752.1 29.4 +6.3% + +Signed-off-by: Martin Storsjö +--- + libavcodec/arm/Makefile | 1 + + libavcodec/arm/h264dsp_armv6.S | 253 +++++++++++++++++++++++++++ + libavcodec/arm/h264dsp_init_arm.c | 4 + + 3 files changed, 258 insertions(+) + create mode 100644 libavcodec/arm/h264dsp_armv6.S + +diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile +index 798dfe3..8f2b683 100644 +--- a/libavcodec/arm/Makefile ++++ b/libavcodec/arm/Makefile +@@ -4,6 +4,7 @@ OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_init_arm.o \ + OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_init_arm.o \ + + ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o ++ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o + + OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o + ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o +diff --git a/libavcodec/arm/h264dsp_armv6.S b/libavcodec/arm/h264dsp_armv6.S +new file mode 100644 +index 0000000..c2cafc2 +--- /dev/null ++++ b/libavcodec/arm/h264dsp_armv6.S +@@ -0,0 +1,253 @@ ++/* ++ * Copyright (c) 2013 RISC OS Open Ltd ++ * Author: Ben Avison ++ * ++ * This file is part of Libav. ++ * ++ * Libav is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * Libav is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with Libav; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "asm.S" ++ ++RESULT .req a1 ++BUF .req a1 ++SIZE .req a2 ++PATTERN .req a3 ++PTR .req a4 ++DAT0 .req v1 ++DAT1 .req v2 ++DAT2 .req v3 ++DAT3 .req v4 ++TMP0 .req v5 ++TMP1 .req v6 ++TMP2 .req ip ++TMP3 .req lr ++ ++#define PRELOAD_DISTANCE 4 ++ ++.macro innerloop4 ++ ldr DAT0, [PTR], #4 ++ subs SIZE, SIZE, #4 @ C flag survives rest of macro ++ sub TMP0, DAT0, PATTERN, lsr #14 ++ bic TMP0, TMP0, DAT0 ++ ands TMP0, TMP0, PATTERN ++.endm ++ ++.macro innerloop16 decrement, do_preload ++ ldmia PTR!, {DAT0,DAT1,DAT2,DAT3} ++ .ifnc "\do_preload","" ++ pld [PTR, #PRELOAD_DISTANCE*32] ++ .endif ++ .ifnc "\decrement","" ++ subs SIZE, SIZE, #\decrement @ C flag survives rest of macro ++ .endif ++ sub TMP0, DAT0, PATTERN, lsr #14 ++ sub TMP1, DAT1, PATTERN, lsr #14 ++ bic TMP0, TMP0, DAT0 ++ bic TMP1, TMP1, DAT1 ++ sub TMP2, DAT2, PATTERN, lsr #14 ++ sub TMP3, DAT3, PATTERN, lsr #14 ++ ands TMP0, TMP0, PATTERN ++ bic TMP2, TMP2, DAT2 ++ it eq ++ andseq TMP1, TMP1, PATTERN ++ bic TMP3, TMP3, DAT3 ++ itt eq ++ andseq TMP2, TMP2, PATTERN ++ andseq TMP3, TMP3, PATTERN ++.endm ++ ++/* int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size) */ ++function ff_h264_find_start_code_candidate_armv6, export=1 ++ push {v1-v6,lr} ++ mov PTR, BUF ++ @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go ++ @ before using code that does preloads ++ cmp SIZE, #(PRELOAD_DISTANCE+3)*32 - 1 ++ blo 60f ++ ++ @ Get to word-alignment, 1 byte at a time ++ tst PTR, #3 ++ beq 2f ++1: ldrb DAT0, [PTR], #1 ++ sub SIZE, SIZE, #1 ++ teq DAT0, #0 ++ beq 90f ++ tst PTR, #3 ++ bne 1b ++2: @ Get to 4-word alignment, 1 word at a time ++ ldr PATTERN, =0x80008000 ++ setend be ++ tst PTR, #12 ++ beq 4f ++3: innerloop4 ++ bne 91f ++ tst PTR, #12 ++ bne 3b ++4: @ Get to cacheline (8-word) alignment ++ tst PTR, #16 ++ beq 5f ++ innerloop16 16 ++ bne 93f ++5: @ Check complete cachelines, with preloading ++ @ We need to stop when there are still (PRELOAD_DISTANCE+1) ++ @ complete cachelines to go ++ sub SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 ++6: innerloop16 , do_preload ++ bne 93f ++ innerloop16 32 ++ bne 93f ++ bcs 6b ++ @ Preload trailing part-cacheline, if any ++ tst SIZE, #31 ++ beq 7f ++ pld [PTR, #(PRELOAD_DISTANCE+1)*32] ++ @ Check remaining data without doing any more preloads. First ++ @ do in chunks of 4 words: ++7: adds SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16 ++ bmi 9f ++8: innerloop16 16 ++ bne 93f ++ bcs 8b ++ @ Then in words: ++9: adds SIZE, SIZE, #16 - 4 ++ bmi 11f ++10: innerloop4 ++ bne 91f ++ bcs 10b ++11: setend le ++ @ Check second byte of final halfword ++ ldrb DAT0, [PTR, #-1] ++ teq DAT0, #0 ++ beq 90f ++ @ Check any remaining bytes ++ tst SIZE, #3 ++ beq 13f ++12: ldrb DAT0, [PTR], #1 ++ sub SIZE, SIZE, #1 ++ teq DAT0, #0 ++ beq 90f ++ tst SIZE, #3 ++ bne 12b ++ @ No candidate found ++13: sub RESULT, PTR, BUF ++ b 99f ++ ++60: @ Small buffer - simply check by looping over bytes ++ subs SIZE, SIZE, #1 ++ bcc 99f ++61: ldrb DAT0, [PTR], #1 ++ subs SIZE, SIZE, #1 ++ teq DAT0, #0 ++ beq 90f ++ bcs 61b ++ @ No candidate found ++ sub RESULT, PTR, BUF ++ b 99f ++ ++90: @ Found a candidate at the preceding byte ++ sub RESULT, PTR, BUF ++ sub RESULT, RESULT, #1 ++ b 99f ++ ++91: @ Found a candidate somewhere in the preceding 4 bytes ++ sub RESULT, PTR, BUF ++ sub RESULT, RESULT, #4 ++ sub TMP0, DAT0, #0x20000 ++ bics TMP0, TMP0, DAT0 ++ itt pl ++ ldrbpl DAT0, [PTR, #-3] ++ addpl RESULT, RESULT, #2 ++ bpl 92f ++ teq RESULT, #0 ++ beq 98f @ don't look back a byte if found at first byte in buffer ++ ldrb DAT0, [PTR, #-5] ++92: teq DAT0, #0 ++ it eq ++ subeq RESULT, RESULT, #1 ++ b 98f ++ ++93: @ Found a candidate somewhere in the preceding 16 bytes ++ sub RESULT, PTR, BUF ++ sub RESULT, RESULT, #16 ++ teq TMP0, #0 ++ beq 95f @ not in first 4 bytes ++ sub TMP0, DAT0, #0x20000 ++ bics TMP0, TMP0, DAT0 ++ itt pl ++ ldrbpl DAT0, [PTR, #-15] ++ addpl RESULT, RESULT, #2 ++ bpl 94f ++ teq RESULT, #0 ++ beq 98f @ don't look back a byte if found at first byte in buffer ++ ldrb DAT0, [PTR, #-17] ++94: teq DAT0, #0 ++ it eq ++ subeq RESULT, RESULT, #1 ++ b 98f ++95: add RESULT, RESULT, #4 ++ teq TMP1, #0 ++ beq 96f @ not in next 4 bytes ++ sub TMP1, DAT1, #0x20000 ++ bics TMP1, TMP1, DAT1 ++ itee mi ++ ldrbmi DAT0, [PTR, #-13] ++ ldrbpl DAT0, [PTR, #-11] ++ addpl RESULT, RESULT, #2 ++ teq DAT0, #0 ++ it eq ++ subeq RESULT, RESULT, #1 ++ b 98f ++96: add RESULT, RESULT, #4 ++ teq TMP2, #0 ++ beq 97f @ not in next 4 bytes ++ sub TMP2, DAT2, #0x20000 ++ bics TMP2, TMP2, DAT2 ++ itee mi ++ ldrbmi DAT0, [PTR, #-9] ++ ldrbpl DAT0, [PTR, #-7] ++ addpl RESULT, RESULT, #2 ++ teq DAT0, #0 ++ it eq ++ subeq RESULT, RESULT, #1 ++ b 98f ++97: add RESULT, RESULT, #4 ++ sub TMP3, DAT3, #0x20000 ++ bics TMP3, TMP3, DAT3 ++ itee mi ++ ldrbmi DAT0, [PTR, #-5] ++ ldrbpl DAT0, [PTR, #-3] ++ addpl RESULT, RESULT, #2 ++ teq DAT0, #0 ++ it eq ++ subeq RESULT, RESULT, #1 ++ @ drop through to 98f ++98: setend le ++99: pop {v1-v6,pc} ++.endfunc ++ ++ .unreq RESULT ++ .unreq BUF ++ .unreq SIZE ++ .unreq PATTERN ++ .unreq PTR ++ .unreq DAT0 ++ .unreq DAT1 ++ .unreq DAT2 ++ .unreq DAT3 ++ .unreq TMP0 ++ .unreq TMP1 ++ .unreq TMP2 ++ .unreq TMP3 +diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c +index cc4c688..c282c21 100644 +--- a/libavcodec/arm/h264dsp_init_arm.c ++++ b/libavcodec/arm/h264dsp_init_arm.c +@@ -23,6 +23,8 @@ + #include "libavcodec/dsputil.h" + #include "libavcodec/h264dsp.h" + ++int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size); ++ + void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, + int beta, int8_t *tc0); + void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, +@@ -99,5 +101,7 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const i + + void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) + { ++ if (HAVE_ARMV6) ++ c->h264_find_start_code_candidate = ff_h264_find_start_code_candidate_armv6; + if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc); + } +-- +1.8.1.6 +