diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-999-0001-h264_parser-Initialize-the-h264dsp-context-in-the-.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-999-0001-h264_parser-Initialize-the-h264dsp-context-in-the-.patch new file mode 100644 index 0000000000..5d39db99a7 --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-999-0001-h264_parser-Initialize-the-h264dsp-context-in-the-.patch @@ -0,0 +1,40 @@ +From 6f99f0779ea56e4bfe40f7ca56e60b3dfd84eba6 Mon Sep 17 00:00:00 2001 +From: Ben Avison +Date: Mon, 5 Aug 2013 13:12:46 +0100 +Subject: [PATCH] h264_parser: Initialize the h264dsp context in the parser as + well +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Each AVStream struct for an H.264 elementary stream actually has two +copies of the H264DSPContext struct (and in fact all the other members +of H264Context as well): + +((H264Context *) ((AVStream *)st)->codec->priv_data)->h264dsp +((H264Context *) ((AVStream *)st)->parser->priv_data)->h264dsp + +but only the first of these was actually being initialised. This +prevented the addition of platform-specific implementations of +parser-related functions. + +Signed-off-by: Martin Storsjö +--- + libavcodec/h264_parser.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c +index aff9ba1..a732f79 100644 +--- a/libavcodec/h264_parser.c ++++ b/libavcodec/h264_parser.c +@@ -386,6 +386,7 @@ static int init(AVCodecParserContext *s) + H264Context *h = s->priv_data; + h->thread_context[0] = h; + h->slice_context_count = 1; ++ ff_h264dsp_init(&h->h264dsp, 8, 1); + return 0; + } + +-- +1.8.5.1 + diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-999-0002-h264dsp-Factorize-code-into-a-new-function-.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-999-0002-h264dsp-Factorize-code-into-a-new-function-.patch new file mode 100644 index 0000000000..fe87f838cb --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-999-0002-h264dsp-Factorize-code-into-a-new-function-.patch @@ -0,0 +1,128 @@ +From 971a57f6067c96f8dba087285065618f1ac3ecd5 Mon Sep 17 00:00:00 2001 +From: Ben Avison +Date: Mon, 5 Aug 2013 13:12:47 +0100 +Subject: [PATCH] h264dsp: Factorize code into a new function, + h264_find_start_code_candidate +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +This performs the start code search which was previously part of +h264_find_frame_end() - the most CPU intensive part of the function. + +By itself, this results in a performance regression: + Before After + Mean StdDev Mean StdDev Change +Overall time 2925.6 26.2 3068.5 31.7 -4.7% + +but this can more than be made up for by platform-optimised +implementations of the function. + +Signed-off-by: Martin Storsjö +--- + libavcodec/h264_parser.c | 20 +++----------------- + libavcodec/h264dsp.c | 29 +++++++++++++++++++++++++++++ + libavcodec/h264dsp.h | 9 +++++++++ + 3 files changed, 41 insertions(+), 17 deletions(-) + +diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c +index a732f79..972aace 100644 +--- a/libavcodec/h264_parser.c ++++ b/libavcodec/h264_parser.c +@@ -62,23 +62,9 @@ static int ff_h264_find_frame_end(H264Context *h, const uint8_t *buf, int buf_si + } + + if(state==7){ +-#if HAVE_FAST_UNALIGNED +- /* we check ih264dsp.h264_find_start_code_candidate(buf + i, buf_size - i); ++ if (i < buf_size) ++ state = 2; + } + }else if(state<=2){ + if(buf[i]==1) state^= 5; //2->7, 1->4, 0->5 +diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c +index da9e417..b7d61cd 100644 +--- a/libavcodec/h264dsp.c ++++ b/libavcodec/h264dsp.c +@@ -60,6 +60,34 @@ + #include "h264addpx_template.c" + #undef BIT_DEPTH + ++static int h264_find_start_code_candidate_c(const uint8_t *buf, int size) ++{ ++ int i = 0; ++#if HAVE_FAST_UNALIGNED ++ /* we check i < size instead of i + 3 / 7 because it is ++ * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE ++ * bytes at the end. ++ */ ++#if HAVE_FAST_64BIT ++ while (i < size && ++ !((~*(const uint64_t *)(buf + i) & ++ (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) & ++ 0x8080808080808080ULL)) ++ i += 8; ++#else ++ while (i < size && ++ !((~*(const uint32_t *)(buf + i) & ++ (*(const uint32_t *)(buf + i) - 0x01010101U)) & ++ 0x80808080U)) ++ i += 4; ++#endif ++#endif ++ for (; i < size; i++) ++ if (!buf[i]) ++ break; ++ return i; ++} ++ + void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) + { + #undef FUNC +@@ -146,6 +174,7 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_fo + H264_DSP(8); + break; + } ++ c->h264_find_start_code_candidate = h264_find_start_code_candidate_c; + + if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc); + if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc); +diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h +index 98ea15c..1be4804 100644 +--- a/libavcodec/h264dsp.h ++++ b/libavcodec/h264dsp.h +@@ -105,6 +105,15 @@ typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, + /* bypass-transform */ + void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride); + void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride); ++ ++ /** ++ * Search buf from the start for up to size bytes. Return the index ++ * of a zero byte, or >= size if not found. Ideally, use lookahead ++ * to filter out any zero bytes that are known to not be followed by ++ * one or more further zero bytes and a one byte. Better still, filter ++ * out any bytes that form the trailing_zero_8bits syntax element too. ++ */ ++ int (*h264_find_start_code_candidate)(const uint8_t *buf, int size); + } H264DSPContext; + + void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, +-- +1.8.5.1 + diff --git a/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-999-0003-arm-Add-assembly-version-of-h264_find_start_code_candidate.patch b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-999-0003-arm-Add-assembly-version-of-h264_find_start_code_candidate.patch new file mode 100644 index 0000000000..a8698b2aa0 --- /dev/null +++ b/packages/multimedia/ffmpeg/patches/1.2.3/ffmpeg-999-0003-arm-Add-assembly-version-of-h264_find_start_code_candidate.patch @@ -0,0 +1,335 @@ +From fdc814cc6701f3e882a7ea7f29d16500c7340f0d Mon Sep 17 00:00:00 2001 +From: Ben Avison +Date: Mon, 5 Aug 2013 13:12:48 +0100 +Subject: [PATCH] arm: Add assembly version of h264_find_start_code_candidate +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + + Before After + Mean StdDev Mean StdDev Change +This function 508.8 23.4 185.4 9.0 +174.4% +Overall 3068.5 31.7 2752.1 29.4 +11.5% + +In combination with the preceding patch: + Before After + Mean StdDev Mean StdDev Change +Overall 2925.6 26.2 2752.1 29.4 +6.3% + +Signed-off-by: Martin Storsjö +--- + libavcodec/arm/Makefile | 1 + + libavcodec/arm/h264dsp_armv6.S | 253 +++++++++++++++++++++++++++ + libavcodec/arm/h264dsp_init_arm.c | 4 + + libavcodec/h264_parser.c | 1 - + 4 files changed, 258 insertions(+), 1 deletion(-) + create mode 100644 libavcodec/arm/h264dsp_armv6.S + +diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile +index 7390a8b..480000b 100644 +--- a/libavcodec/arm/Makefile ++++ b/libavcodec/arm/Makefile +@@ -9,6 +9,7 @@ OBJS-$(CONFIG_AAC_DECODER) += arm/sbrdsp_init_arm.o \ + OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_init_arm.o \ + + ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o ++ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o + + OBJS-$(CONFIG_FLAC_DECODER) += arm/flacdsp_init_arm.o \ + arm/flacdsp_arm.o \ +diff --git a/libavcodec/arm/h264dsp_armv6.S b/libavcodec/arm/h264dsp_armv6.S +new file mode 100644 +index 0000000..c4f12a6 +--- /dev/null ++++ b/libavcodec/arm/h264dsp_armv6.S +@@ -0,0 +1,253 @@ ++/* ++ * Copyright (c) 2013 RISC OS Open Ltd ++ * Author: Ben Avison ++ * ++ * This file is part of Libav. ++ * ++ * Libav is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * Libav is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with Libav; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++#include "libavutil/arm/asm.S" ++ ++RESULT .req a1 ++BUF .req a1 ++SIZE .req a2 ++PATTERN .req a3 ++PTR .req a4 ++DAT0 .req v1 ++DAT1 .req v2 ++DAT2 .req v3 ++DAT3 .req v4 ++TMP0 .req v5 ++TMP1 .req v6 ++TMP2 .req ip ++TMP3 .req lr ++ ++#define PRELOAD_DISTANCE 4 ++ ++.macro innerloop4 ++ ldr DAT0, [PTR], #4 ++ subs SIZE, SIZE, #4 @ C flag survives rest of macro ++ sub TMP0, DAT0, PATTERN, lsr #14 ++ bic TMP0, TMP0, DAT0 ++ ands TMP0, TMP0, PATTERN ++.endm ++ ++.macro innerloop16 decrement, do_preload ++ ldmia PTR!, {DAT0,DAT1,DAT2,DAT3} ++ .ifnc "\do_preload","" ++ pld [PTR, #PRELOAD_DISTANCE*32] ++ .endif ++ .ifnc "\decrement","" ++ subs SIZE, SIZE, #\decrement @ C flag survives rest of macro ++ .endif ++ sub TMP0, DAT0, PATTERN, lsr #14 ++ sub TMP1, DAT1, PATTERN, lsr #14 ++ bic TMP0, TMP0, DAT0 ++ bic TMP1, TMP1, DAT1 ++ sub TMP2, DAT2, PATTERN, lsr #14 ++ sub TMP3, DAT3, PATTERN, lsr #14 ++ ands TMP0, TMP0, PATTERN ++ bic TMP2, TMP2, DAT2 ++ it eq ++ andseq TMP1, TMP1, PATTERN ++ bic TMP3, TMP3, DAT3 ++ itt eq ++ andseq TMP2, TMP2, PATTERN ++ andseq TMP3, TMP3, PATTERN ++.endm ++ ++/* int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size) */ ++function ff_h264_find_start_code_candidate_armv6, export=1 ++ push {v1-v6,lr} ++ mov PTR, BUF ++ @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go ++ @ before using code that does preloads ++ cmp SIZE, #(PRELOAD_DISTANCE+3)*32 - 1 ++ blo 60f ++ ++ @ Get to word-alignment, 1 byte at a time ++ tst PTR, #3 ++ beq 2f ++1: ldrb DAT0, [PTR], #1 ++ sub SIZE, SIZE, #1 ++ teq DAT0, #0 ++ beq 90f ++ tst PTR, #3 ++ bne 1b ++2: @ Get to 4-word alignment, 1 word at a time ++ ldr PATTERN, =0x80008000 ++ setend be ++ tst PTR, #12 ++ beq 4f ++3: innerloop4 ++ bne 91f ++ tst PTR, #12 ++ bne 3b ++4: @ Get to cacheline (8-word) alignment ++ tst PTR, #16 ++ beq 5f ++ innerloop16 16 ++ bne 93f ++5: @ Check complete cachelines, with preloading ++ @ We need to stop when there are still (PRELOAD_DISTANCE+1) ++ @ complete cachelines to go ++ sub SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 ++6: innerloop16 , do_preload ++ bne 93f ++ innerloop16 32 ++ bne 93f ++ bcs 6b ++ @ Preload trailing part-cacheline, if any ++ tst SIZE, #31 ++ beq 7f ++ pld [PTR, #(PRELOAD_DISTANCE+1)*32] ++ @ Check remaining data without doing any more preloads. First ++ @ do in chunks of 4 words: ++7: adds SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16 ++ bmi 9f ++8: innerloop16 16 ++ bne 93f ++ bcs 8b ++ @ Then in words: ++9: adds SIZE, SIZE, #16 - 4 ++ bmi 11f ++10: innerloop4 ++ bne 91f ++ bcs 10b ++11: setend le ++ @ Check second byte of final halfword ++ ldrb DAT0, [PTR, #-1] ++ teq DAT0, #0 ++ beq 90f ++ @ Check any remaining bytes ++ tst SIZE, #3 ++ beq 13f ++12: ldrb DAT0, [PTR], #1 ++ sub SIZE, SIZE, #1 ++ teq DAT0, #0 ++ beq 90f ++ tst SIZE, #3 ++ bne 12b ++ @ No candidate found ++13: sub RESULT, PTR, BUF ++ b 99f ++ ++60: @ Small buffer - simply check by looping over bytes ++ subs SIZE, SIZE, #1 ++ bcc 99f ++61: ldrb DAT0, [PTR], #1 ++ subs SIZE, SIZE, #1 ++ teq DAT0, #0 ++ beq 90f ++ bcs 61b ++ @ No candidate found ++ sub RESULT, PTR, BUF ++ b 99f ++ ++90: @ Found a candidate at the preceding byte ++ sub RESULT, PTR, BUF ++ sub RESULT, RESULT, #1 ++ b 99f ++ ++91: @ Found a candidate somewhere in the preceding 4 bytes ++ sub RESULT, PTR, BUF ++ sub RESULT, RESULT, #4 ++ sub TMP0, DAT0, #0x20000 ++ bics TMP0, TMP0, DAT0 ++ itt pl ++ ldrbpl DAT0, [PTR, #-3] ++ addpl RESULT, RESULT, #2 ++ bpl 92f ++ teq RESULT, #0 ++ beq 98f @ don't look back a byte if found at first byte in buffer ++ ldrb DAT0, [PTR, #-5] ++92: teq DAT0, #0 ++ it eq ++ subeq RESULT, RESULT, #1 ++ b 98f ++ ++93: @ Found a candidate somewhere in the preceding 16 bytes ++ sub RESULT, PTR, BUF ++ sub RESULT, RESULT, #16 ++ teq TMP0, #0 ++ beq 95f @ not in first 4 bytes ++ sub TMP0, DAT0, #0x20000 ++ bics TMP0, TMP0, DAT0 ++ itt pl ++ ldrbpl DAT0, [PTR, #-15] ++ addpl RESULT, RESULT, #2 ++ bpl 94f ++ teq RESULT, #0 ++ beq 98f @ don't look back a byte if found at first byte in buffer ++ ldrb DAT0, [PTR, #-17] ++94: teq DAT0, #0 ++ it eq ++ subeq RESULT, RESULT, #1 ++ b 98f ++95: add RESULT, RESULT, #4 ++ teq TMP1, #0 ++ beq 96f @ not in next 4 bytes ++ sub TMP1, DAT1, #0x20000 ++ bics TMP1, TMP1, DAT1 ++ itee mi ++ ldrbmi DAT0, [PTR, #-13] ++ ldrbpl DAT0, [PTR, #-11] ++ addpl RESULT, RESULT, #2 ++ teq DAT0, #0 ++ it eq ++ subeq RESULT, RESULT, #1 ++ b 98f ++96: add RESULT, RESULT, #4 ++ teq TMP2, #0 ++ beq 97f @ not in next 4 bytes ++ sub TMP2, DAT2, #0x20000 ++ bics TMP2, TMP2, DAT2 ++ itee mi ++ ldrbmi DAT0, [PTR, #-9] ++ ldrbpl DAT0, [PTR, #-7] ++ addpl RESULT, RESULT, #2 ++ teq DAT0, #0 ++ it eq ++ subeq RESULT, RESULT, #1 ++ b 98f ++97: add RESULT, RESULT, #4 ++ sub TMP3, DAT3, #0x20000 ++ bics TMP3, TMP3, DAT3 ++ itee mi ++ ldrbmi DAT0, [PTR, #-5] ++ ldrbpl DAT0, [PTR, #-3] ++ addpl RESULT, RESULT, #2 ++ teq DAT0, #0 ++ it eq ++ subeq RESULT, RESULT, #1 ++ @ drop through to 98f ++98: setend le ++99: pop {v1-v6,pc} ++.endfunc ++ ++ .unreq RESULT ++ .unreq BUF ++ .unreq SIZE ++ .unreq PATTERN ++ .unreq PTR ++ .unreq DAT0 ++ .unreq DAT1 ++ .unreq DAT2 ++ .unreq DAT3 ++ .unreq TMP0 ++ .unreq TMP1 ++ .unreq TMP2 ++ .unreq TMP3 +diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c +index 785b604..2804e56 100644 +--- a/libavcodec/arm/h264dsp_init_arm.c ++++ b/libavcodec/arm/h264dsp_init_arm.c +@@ -24,6 +24,8 @@ + #include "libavutil/arm/cpu.h" + #include "libavcodec/h264dsp.h" + ++int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size); ++ + void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, + int beta, int8_t *tc0); + void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, +@@ -106,6 +108,8 @@ av_cold void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, + { + int cpu_flags = av_get_cpu_flags(); + ++ if (have_armv6(cpu_flags)) ++ c->h264_find_start_code_candidate = ff_h264_find_start_code_candidate_armv6; + if (have_neon(cpu_flags)) + ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc); + } +diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c +index 972aace..363843c 100644 +--- a/libavcodec/h264_parser.c ++++ b/libavcodec/h264_parser.c +@@ -65,7 +65,6 @@ static int ff_h264_find_frame_end(H264Context *h, const uint8_t *buf, int buf_si + i += h->h264dsp.h264_find_start_code_candidate(buf + i, buf_size - i); + if (i < buf_size) + state = 2; +- } + }else if(state<=2){ + if(buf[i]==1) state^= 5; //2->7, 1->4, 0->5 + else if(buf[i]) state = 7; +-- +1.8.5.1 +