diff --git a/packages/multimedia/ffmpeg/package.mk b/packages/multimedia/ffmpeg/package.mk index be25434128..4bc0b91418 100644 --- a/packages/multimedia/ffmpeg/package.mk +++ b/packages/multimedia/ffmpeg/package.mk @@ -17,7 +17,7 @@ ################################################################################ PKG_NAME="ffmpeg" -PKG_VERSION="2.2.4" +PKG_VERSION="2.3" PKG_REV="1" PKG_ARCH="any" PKG_LICENSE="LGPL" @@ -224,7 +224,6 @@ configure_target() { --disable-altivec \ $FFMPEG_CPU \ $FFMPEG_FPU \ - --disable-vis \ --enable-yasm \ --disable-sram \ --disable-symver diff --git a/packages/multimedia/ffmpeg/patches/ffmpeg-0000-xbmc.patch b/packages/multimedia/ffmpeg/patches/ffmpeg-0000-xbmc.patch index ddc75c5a1e..8a0ef64cd4 100644 --- a/packages/multimedia/ffmpeg/patches/ffmpeg-0000-xbmc.patch +++ b/packages/multimedia/ffmpeg/patches/ffmpeg-0000-xbmc.patch @@ -1,7 +1,7 @@ -From 35ed29f583447d1d323c0cbdcf629f02a3601a27 Mon Sep 17 00:00:00 2001 +From f68c860bdc70e440f047ca60c8f9497a0e5a2122 Mon Sep 17 00:00:00 2001 From: Joakim Plate Date: Sun, 11 Sep 2011 19:04:51 +0200 -Subject: [PATCH 01/19] Support raw dvdsub palette as stored on normal dvd's +Subject: [PATCH 01/15] Support raw dvdsub palette as stored on normal dvd's This is how the palette is stored on dvd's. Currently only xbmc passes the palette information to libavcodec @@ -11,7 +11,7 @@ this way. 1 file changed, 24 insertions(+) diff --git a/libavcodec/dvdsubdec.c b/libavcodec/dvdsubdec.c -index 637f3e6..eb4a7b8 100644 +index 39b0e25..a19086d 100644 --- a/libavcodec/dvdsubdec.c +++ b/libavcodec/dvdsubdec.c @@ -61,6 +61,24 @@ static void yuv_a_to_rgba(const uint8_t *ycbcr, const uint8_t *alpha, uint32_t * @@ -20,7 +20,7 @@ index 637f3e6..eb4a7b8 100644 +static void ayvu_to_argb(const uint8_t *ayvu, uint32_t *argb, int num_values) +{ -+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; ++ uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; + uint8_t r, g, b; + int i, y, cb, cr, a; + int r_add, g_add, b_add; @@ -39,7 +39,7 @@ index 637f3e6..eb4a7b8 100644 static int decode_run_2bit(GetBitContext *gb, int *color) { unsigned int v, t; -@@ -624,6 +642,12 @@ static av_cold int dvdsub_init(AVCodecContext *avctx) +@@ -628,6 +646,12 @@ static av_cold int dvdsub_init(AVCodecContext *avctx) if (ctx->palette_str) parse_palette(ctx, ctx->palette_str); @@ -56,33 +56,34 @@ index 637f3e6..eb4a7b8 100644 1.9.3 -From e881447700f405bb702f91822eb576913675e9eb Mon Sep 17 00:00:00 2001 +From d53ff2a91f95b2b6ef3974921228e90a4a765af6 Mon Sep 17 00:00:00 2001 From: Joakim Plate Date: Sat, 22 Oct 2011 18:33:45 +0200 -Subject: [PATCH 02/19] Check return value of avio_seek and avoid modifying +Subject: [PATCH 02/15] Check return value of avio_seek and avoid modifying state if it fails The code still modifies state if the timestamp is not found. Not sure exactly how to avoid that. --- - libavformat/matroskadec.c | 19 ++++++++++++------- - 1 file changed, 12 insertions(+), 7 deletions(-) + libavformat/matroskadec.c | 23 ++++++++++++++--------- + 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c -index 4726e94..b04be90 100644 +index ec43526..66d5e8e 100644 --- a/libavformat/matroskadec.c +++ b/libavformat/matroskadec.c -@@ -2832,7 +2832,8 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index, +@@ -2992,8 +2992,8 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index, timestamp = FFMAX(timestamp, st->index_entries[0].timestamp); if ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) { -- avio_seek(s->pb, st->index_entries[st->nb_index_entries-1].pos, SEEK_SET); +- avio_seek(s->pb, st->index_entries[st->nb_index_entries - 1].pos, +- SEEK_SET); + if (avio_seek(s->pb, st->index_entries[st->nb_index_entries-1].pos, SEEK_SET) < 0) + return -1; matroska->current_id = 0; while ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) { matroska_clear_queue(matroska); -@@ -2841,16 +2842,11 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index, +@@ -3002,16 +3002,11 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index, } } @@ -91,19 +92,20 @@ index 4726e94..b04be90 100644 goto err; index_min = index; - for (i=0; i < matroska->tracks.nb_elem; i++) { -- tracks[i].audio.pkt_cnt = 0; + for (i = 0; i < matroska->tracks.nb_elem; i++) { +- tracks[i].audio.pkt_cnt = 0; - tracks[i].audio.sub_packet_cnt = 0; -- tracks[i].audio.buf_timecode = AV_NOPTS_VALUE; -- tracks[i].end_timecode = 0; - if (tracks[i].type == MATROSKA_TRACK_TYPE_SUBTITLE - && tracks[i].stream->discard != AVDISCARD_ALL) { - index_sub = av_index_search_timestamp(tracks[i].stream, st->index_entries[index].timestamp, AVSEEK_FLAG_BACKWARD); -@@ -2862,7 +2858,16 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index, +- tracks[i].audio.buf_timecode = AV_NOPTS_VALUE; +- tracks[i].end_timecode = 0; + if (tracks[i].type == MATROSKA_TRACK_TYPE_SUBTITLE && + tracks[i].stream->discard != AVDISCARD_ALL) { + index_sub = av_index_search_timestamp( +@@ -3025,8 +3020,18 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index, } } - avio_seek(s->pb, st->index_entries[index_min].pos, SEEK_SET); +- matroska->current_id = 0; + if (avio_seek(s->pb, st->index_entries[index_min].pos, SEEK_SET) < 0) + return -1; + @@ -114,27 +116,29 @@ index 4726e94..b04be90 100644 + tracks[i].audio.buf_timecode = AV_NOPTS_VALUE; + tracks[i].end_timecode = 0; + } - matroska->current_id = 0; ++ matroska->current_id = 0; ++ if (flags & AVSEEK_FLAG_ANY) { st->skip_to_keyframe = 0; + matroska->skip_to_timecode = timestamp; -- 1.9.3 -From 24830f831e0e2dfb71aceb06050328b8bb9d2195 Mon Sep 17 00:00:00 2001 +From d8c6b50095900bbc4f40dfb3c2d321a35361820a Mon Sep 17 00:00:00 2001 From: Joakim Plate Date: Mon, 12 Sep 2011 21:37:17 +0200 -Subject: [PATCH 03/19] asf hacks +Subject: [PATCH 03/15] asf hacks --- libavformat/asfdec.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c -index 9bbc704..387d77c 100644 +index 978b956..30f099d 100644 --- a/libavformat/asfdec.c +++ b/libavformat/asfdec.c -@@ -1537,9 +1537,20 @@ static int asf_read_seek(AVFormatContext *s, int stream_index, +@@ -1546,9 +1546,20 @@ static int asf_read_seek(AVFormatContext *s, int stream_index, AVStream *st = s->streams[stream_index]; int ret = 0; @@ -159,10 +163,10 @@ index 9bbc704..387d77c 100644 1.9.3 -From 52f1304731513b06690cab5821cfc3c15b5b1518 Mon Sep 17 00:00:00 2001 +From bb32180f7e9fe2ff89888c26731dc043844b49e2 Mon Sep 17 00:00:00 2001 From: Cory Fields Date: Mon, 28 Jun 2010 01:55:31 -0400 -Subject: [PATCH 04/19] if av_read_packet returns AVERROR_IO, we are done. +Subject: [PATCH 04/15] if av_read_packet returns AVERROR_IO, we are done. ffmpeg's codecs might or might not handle returning any completed demuxed packets correctly @@ -171,10 +175,10 @@ Subject: [PATCH 04/19] if av_read_packet returns AVERROR_IO, we are done. 1 file changed, 2 insertions(+) diff --git a/libavformat/utils.c b/libavformat/utils.c -index 27b37b2..2de79d7 100644 +index e095d60..9fa0bb0 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c -@@ -1437,6 +1437,8 @@ static int read_frame_internal(AVFormatContext *s, AVPacket *pkt) +@@ -1460,6 +1460,8 @@ static int read_frame_internal(AVFormatContext *s, AVPacket *pkt) if (ret < 0) { if (ret == AVERROR(EAGAIN)) return ret; @@ -187,10 +191,10 @@ index 27b37b2..2de79d7 100644 1.9.3 -From 6e2610a072dbf5c339a7f9053cd6d0760b03f3a7 Mon Sep 17 00:00:00 2001 +From aae4de70cac340ed7e1b8db34125216c1e13cb00 Mon Sep 17 00:00:00 2001 From: Cory Fields Date: Mon, 28 Jun 2010 02:10:50 -0400 -Subject: [PATCH 05/19] added: Ticket #7187, TV Teletext support for DVB EBU +Subject: [PATCH 05/15] added: Ticket #7187, TV Teletext support for DVB EBU Teletext streams --- @@ -199,10 +203,10 @@ Subject: [PATCH 05/19] added: Ticket #7187, TV Teletext support for DVB EBU 2 files changed, 6 insertions(+) diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h -index 5df717c..36c1bda 100644 +index 93ba4d0..f3de33a 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h -@@ -505,6 +505,10 @@ enum AVCodecID { +@@ -523,6 +523,10 @@ enum AVCodecID { AV_CODEC_ID_PJS = MKBETAG('P','h','J','S'), AV_CODEC_ID_ASS = MKBETAG('A','S','S',' '), ///< ASS as defined in Matroska @@ -214,11 +218,11 @@ index 5df717c..36c1bda 100644 AV_CODEC_ID_FIRST_UNKNOWN = 0x18000, ///< A dummy ID pointing at the start of various fake codecs. AV_CODEC_ID_TTF = 0x18000, diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c -index 286b30b..162033f 100644 +index 7114088..e55193b 100644 --- a/libavformat/mpegts.c +++ b/libavformat/mpegts.c -@@ -673,6 +673,8 @@ static const StreamType DESC_types[] = { - { 0x7b, AVMEDIA_TYPE_AUDIO, AV_CODEC_ID_DTS }, +@@ -708,6 +708,8 @@ static const StreamType DESC_types[] = { + { 0x7b, AVMEDIA_TYPE_AUDIO, AV_CODEC_ID_DTS }, { 0x56, AVMEDIA_TYPE_SUBTITLE, AV_CODEC_ID_DVB_TELETEXT }, { 0x59, AVMEDIA_TYPE_SUBTITLE, AV_CODEC_ID_DVB_SUBTITLE }, /* subtitling descriptor */ + { 0x45, AVMEDIA_TYPE_DATA, AV_CODEC_ID_VBI_DATA }, /* VBI Data descriptor */ @@ -230,20 +234,20 @@ index 286b30b..162033f 100644 1.9.3 -From 373aefe13f7941931a6b56ccb6c99cb19d81f169 Mon Sep 17 00:00:00 2001 +From e71d4c1755bd4e23fe9b65fb6128a8b41cecfdb1 Mon Sep 17 00:00:00 2001 From: Joakim Plate Date: Sun, 18 Sep 2011 19:16:34 +0200 -Subject: [PATCH 06/19] Don't accept mpegts PMT that isn't current +Subject: [PATCH 06/15] Don't accept mpegts PMT that isn't current --- libavformat/mpegts.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c -index 162033f..402d8b3 100644 +index e55193b..9ec6220 100644 --- a/libavformat/mpegts.c +++ b/libavformat/mpegts.c -@@ -517,6 +517,7 @@ typedef struct SectionHeader { +@@ -552,6 +552,7 @@ typedef struct SectionHeader { uint8_t tid; uint16_t id; uint8_t version; @@ -251,17 +255,17 @@ index 162033f..402d8b3 100644 uint8_t sec_num; uint8_t last_sec_num; } SectionHeader; -@@ -588,6 +589,7 @@ static int parse_section_header(SectionHeader *h, +@@ -623,6 +624,7 @@ static int parse_section_header(SectionHeader *h, val = get8(pp, p_end); if (val < 0) - return -1; + return val; + h->current = val & 0x1; h->version = (val >> 1) & 0x1f; val = get8(pp, p_end); if (val < 0) -@@ -1790,6 +1792,8 @@ static void pat_cb(MpegTSFilter *filter, const uint8_t *section, int section_len +@@ -1891,6 +1893,8 @@ static void pat_cb(MpegTSFilter *filter, const uint8_t *section, int section_len return; - if (h->tid != PAT_TID) + if (ts->skip_changes) return; + if (!h->current) + return; @@ -272,36 +276,36 @@ index 162033f..402d8b3 100644 1.9.3 -From 2be1b4b7db563067a8b41d116a15f86fa1b8186c Mon Sep 17 00:00:00 2001 +From 473091d11f4e3a0c1820054368a76074a0e239cb Mon Sep 17 00:00:00 2001 From: Joakim Plate Date: Sun, 18 Sep 2011 19:17:23 +0200 -Subject: [PATCH 07/19] Don't reparse PMT unless it's version has changed +Subject: [PATCH 07/15] Don't reparse PMT unless it's version has changed --- libavformat/mpegts.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c -index 402d8b3..3625bfa 100644 +index 9ec6220..ab03372 100644 --- a/libavformat/mpegts.c +++ b/libavformat/mpegts.c -@@ -78,6 +78,7 @@ struct MpegTSFilter { - int pid; +@@ -87,6 +87,7 @@ struct MpegTSFilter { int es_id; int last_cc; /* last cc code (-1 if first packet) */ + int64_t last_pcr; + int last_version; /* last version of data on this pid */ enum MpegTSFilterType type; union { MpegTSPESFilter pes_filter; -@@ -416,6 +417,7 @@ static MpegTSFilter *mpegts_open_section_filter(MpegTSContext *ts, unsigned int - filter->pid = pid; - filter->es_id = -1; +@@ -432,6 +433,7 @@ static MpegTSFilter *mpegts_open_filter(MpegTSContext *ts, unsigned int pid, + filter->es_id = -1; filter->last_cc = -1; + filter->last_pcr= -1; + filter->last_version = -1; - sec = &filter->u.section_filter; - sec->section_cb = section_cb; - sec->opaque = opaque; -@@ -1794,6 +1796,10 @@ static void pat_cb(MpegTSFilter *filter, const uint8_t *section, int section_len + + return filter; + } +@@ -1895,6 +1897,10 @@ static void pat_cb(MpegTSFilter *filter, const uint8_t *section, int section_len return; if (!h->current) return; @@ -316,10 +320,10 @@ index 402d8b3..3625bfa 100644 1.9.3 -From 027d6070086fa0ca50be2bba1bac442ad8eb5def Mon Sep 17 00:00:00 2001 +From aa357f84bcdb105910478aee74d5b675d65114bd Mon Sep 17 00:00:00 2001 From: Cory Fields Date: Fri, 9 Jul 2010 16:43:31 -0400 -Subject: [PATCH 08/19] Read PID timestamps as well as PCR timestamps to find +Subject: [PATCH 08/15] Read PID timestamps as well as PCR timestamps to find location in mpegts stream --- @@ -327,10 +331,10 @@ Subject: [PATCH 08/19] Read PID timestamps as well as PCR timestamps to find 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c -index 3625bfa..7937c63 100644 +index ab03372..9962ccf 100644 --- a/libavformat/mpegts.c +++ b/libavformat/mpegts.c -@@ -2269,6 +2269,44 @@ static void seek_back(AVFormatContext *s, AVIOContext *pb, int64_t pos) { +@@ -2375,6 +2375,44 @@ static void seek_back(AVFormatContext *s, AVIOContext *pb, int64_t pos) { av_log(s, pb->seekable ? AV_LOG_ERROR : AV_LOG_INFO, "Unable to seek back to the start\n"); } @@ -375,15 +379,15 @@ index 3625bfa..7937c63 100644 static int mpegts_read_header(AVFormatContext *s) { MpegTSContext *ts = s->priv_data; -@@ -2470,6 +2508,7 @@ static av_unused int64_t mpegts_get_pcr(AVFormatContext *s, int stream_index, - int64_t pos, timestamp; +@@ -2574,6 +2612,7 @@ static av_unused int64_t mpegts_get_pcr(AVFormatContext *s, int stream_index, uint8_t buf[TS_PACKET_SIZE]; - int pcr_l, pcr_pid = ((PESContext*)s->streams[stream_index]->priv_data)->pcr_pid; + int pcr_l, pcr_pid = + ((PESContext *)s->streams[stream_index]->priv_data)->pcr_pid; + int pid = ((PESContext*)s->streams[stream_index]->priv_data)->pid; int pos47 = ts->pos47_full % ts->raw_packet_size; - pos = ((*ppos + ts->raw_packet_size - 1 - pos47) / ts->raw_packet_size) * ts->raw_packet_size + pos47; - while(pos < pos_limit) { -@@ -2489,6 +2528,11 @@ static av_unused int64_t mpegts_get_pcr(AVFormatContext *s, int stream_index, + pos = + ((*ppos + ts->raw_packet_size - 1 - pos47) / ts->raw_packet_size) * +@@ -2595,6 +2634,11 @@ static av_unused int64_t mpegts_get_pcr(AVFormatContext *s, int stream_index, *ppos = pos; return timestamp; } @@ -395,7 +399,7 @@ index 3625bfa..7937c63 100644 pos += ts->raw_packet_size; } -@@ -2588,7 +2632,7 @@ AVInputFormat ff_mpegts_demuxer = { +@@ -2694,7 +2738,7 @@ AVInputFormat ff_mpegts_demuxer = { .read_header = mpegts_read_header, .read_packet = mpegts_read_packet, .read_close = mpegts_read_close, @@ -404,7 +408,7 @@ index 3625bfa..7937c63 100644 .flags = AVFMT_SHOW_IDS | AVFMT_TS_DISCONT, .priv_class = &mpegts_class, }; -@@ -2600,7 +2644,7 @@ AVInputFormat ff_mpegtsraw_demuxer = { +@@ -2706,7 +2750,7 @@ AVInputFormat ff_mpegtsraw_demuxer = { .read_header = mpegts_read_header, .read_packet = mpegts_raw_read_packet, .read_close = mpegts_read_close, @@ -417,20 +421,20 @@ index 3625bfa..7937c63 100644 1.9.3 -From 52325298742c77793489c36edf66cc3883d206b1 Mon Sep 17 00:00:00 2001 +From 8deda04d599f1e248cba4d175257dea469feb719 Mon Sep 17 00:00:00 2001 From: Joakim Plate Date: Sat, 22 Oct 2011 19:01:38 +0200 -Subject: [PATCH 09/19] Get stream durations using read_timestamp +Subject: [PATCH 09/15] Get stream durations using read_timestamp --- libavformat/utils.c | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/libavformat/utils.c b/libavformat/utils.c -index 2de79d7..670e6ec 100644 +index 9fa0bb0..cbeaa9c 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c -@@ -2434,6 +2434,41 @@ static void estimate_timings_from_bit_rate(AVFormatContext *ic) +@@ -2480,6 +2480,41 @@ static void estimate_timings_from_bit_rate(AVFormatContext *ic) #define DURATION_MAX_READ_SIZE 250000LL #define DURATION_MAX_RETRY 4 @@ -472,7 +476,7 @@ index 2de79d7..670e6ec 100644 /* only usable for MPEG-PS streams */ static void estimate_timings_from_pts(AVFormatContext *ic, int64_t old_offset) { -@@ -2542,6 +2577,10 @@ static void estimate_timings(AVFormatContext *ic, int64_t old_offset) +@@ -2630,6 +2665,10 @@ static void estimate_timings(AVFormatContext *ic, int64_t old_offset) * the components */ fill_all_stream_timings(ic); ic->duration_estimation_method = AVFMT_DURATION_FROM_STREAM; @@ -487,10 +491,10 @@ index 2de79d7..670e6ec 100644 1.9.3 -From 2e20016a27a0cd1db61ad41add18b94e3acc9899 Mon Sep 17 00:00:00 2001 +From 77caa1aab9b838a0085e2f4133d0e27eb6588f4b Mon Sep 17 00:00:00 2001 From: Joakim Plate Date: Wed, 8 Dec 2010 14:03:43 +0000 -Subject: [PATCH 10/19] changed: allow 4 second skew between streams in mov +Subject: [PATCH 10/15] changed: allow 4 second skew between streams in mov before attempting to seek --- @@ -498,10 +502,10 @@ Subject: [PATCH 10/19] changed: allow 4 second skew between streams in mov 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libavformat/mov.c b/libavformat/mov.c -index 3fb7747..50926e8 100644 +index 9b4832f..41be8b7 100644 --- a/libavformat/mov.c +++ b/libavformat/mov.c -@@ -3459,8 +3459,8 @@ static AVIndexEntry *mov_find_next_sample(AVFormatContext *s, AVStream **st) +@@ -3673,8 +3673,8 @@ static AVIndexEntry *mov_find_next_sample(AVFormatContext *s, AVStream **st) if (!sample || (!s->pb->seekable && current_sample->pos < sample->pos) || (s->pb->seekable && ((msc->pb != s->pb && dts < best_dts) || (msc->pb == s->pb && @@ -516,10 +520,10 @@ index 3fb7747..50926e8 100644 1.9.3 -From 4c32619d53473499ee382981d420930a8d36801f Mon Sep 17 00:00:00 2001 +From c3d69fb6f71a674310fefb17aebab01a6744881c Mon Sep 17 00:00:00 2001 From: Joakim Plate Date: Fri, 26 Nov 2010 20:56:48 +0000 -Subject: [PATCH 11/19] fixed: memleak in mpegts demuxer on some malformed (??) +Subject: [PATCH 11/15] fixed: memleak in mpegts demuxer on some malformed (??) mpegts files with too large pes packets at-visions sample file brokenStream.mpg @@ -528,10 +532,10 @@ at-visions sample file brokenStream.mpg 1 file changed, 6 insertions(+) diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c -index 7937c63..86bbf52 100644 +index 9962ccf..66ea11c 100644 --- a/libavformat/mpegts.c +++ b/libavformat/mpegts.c -@@ -767,6 +767,10 @@ static int mpegts_set_stream_info(AVStream *st, PESContext *pes, +@@ -811,6 +811,10 @@ static void reset_pes_packet_state(PESContext *pes) static void new_pes_packet(PESContext *pes, AVPacket *pkt) { @@ -542,7 +546,7 @@ index 7937c63..86bbf52 100644 av_init_packet(pkt); pkt->buf = pes->buffer; -@@ -2462,6 +2466,8 @@ static int mpegts_read_packet(AVFormatContext *s, +@@ -2565,6 +2569,8 @@ static int mpegts_read_packet(AVFormatContext *s, AVPacket *pkt) pkt->size = -1; ts->pkt = pkt; @@ -555,49 +559,50 @@ index 7937c63..86bbf52 100644 1.9.3 -From fce8e4a5e10c9b03243623a22e9969473b31b07d Mon Sep 17 00:00:00 2001 +From e621e2b83b43a5fade298251094458451eecad41 Mon Sep 17 00:00:00 2001 From: Joakim Plate Date: Mon, 28 Jun 2010 21:26:54 +0000 -Subject: [PATCH 12/19] Speed up mpegts av_find_stream_info +Subject: [PATCH 12/15] Speed up mpegts av_find_stream_info --- libavformat/mpegts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c -index 86bbf52..28d6a6b 100644 +index 66ea11c..5811d26 100644 --- a/libavformat/mpegts.c +++ b/libavformat/mpegts.c -@@ -927,7 +927,7 @@ static int mpegts_push_data(MpegTSFilter *filter, +@@ -969,7 +969,7 @@ static int mpegts_push_data(MpegTSFilter *filter, goto skip; /* stream not present in PMT */ - if (!pes->st) { + if (ts->auto_guess && !pes->st) { - pes->st = avformat_new_stream(ts->stream, NULL); - if (!pes->st) - return AVERROR(ENOMEM); + if (ts->skip_changes) + goto skip; + -- 1.9.3 -From f9e506d81f43fdc06abb6d4d85296fec51dbfeb9 Mon Sep 17 00:00:00 2001 +From 07a31ecbe3493cbc1d1a5b6dee7784257a70ca17 Mon Sep 17 00:00:00 2001 From: marc Date: Mon, 18 Feb 2013 17:18:18 +0000 -Subject: [PATCH 13/19] dxva-h264 Fix dxva playback of streams that don't start +Subject: [PATCH 13/15] dxva-h264 Fix dxva playback of streams that don't start with an I-Frame. --- libavcodec/dxva2_h264.c | 8 ++++++++ - libavcodec/h264.c | 2 ++ + libavcodec/h264.c | 1 + libavcodec/h264.h | 2 ++ - 3 files changed, 12 insertions(+) + libavcodec/h264_slice.c | 1 + + 4 files changed, 12 insertions(+) diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c -index 02d3f54..848fa35 100644 +index 1174188..263a272 100644 --- a/libavcodec/dxva2_h264.c +++ b/libavcodec/dxva2_h264.c -@@ -431,6 +431,14 @@ static int dxva2_h264_end_frame(AVCodecContext *avctx) +@@ -448,6 +448,14 @@ static int dxva2_h264_end_frame(AVCodecContext *avctx) if (ctx_pic->slice_count <= 0 || ctx_pic->bitstream_size <= 0) return -1; @@ -609,34 +614,26 @@ index 02d3f54..848fa35 100644 + h->got_first_iframe = 1; + } + - ret = ff_dxva2_common_end_frame(avctx, h->cur_pic_ptr, + ret = ff_dxva2_common_end_frame(avctx, &h->cur_pic_ptr->f, &ctx_pic->pp, sizeof(ctx_pic->pp), &ctx_pic->qm, sizeof(ctx_pic->qm), diff --git a/libavcodec/h264.c b/libavcodec/h264.c -index d855b7b..c560fa9 100644 +index 1d91987..8b7b026 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c -@@ -2869,6 +2869,7 @@ static void flush_dpb(AVCodecContext *avctx) - h->delayed_pic[i]->reference = 0; - h->delayed_pic[i] = NULL; - } +@@ -1085,6 +1085,7 @@ void ff_h264_flush_change(H264Context *h) + h->list_count = 0; + h->current_slice = 0; + h->mmco_reset = 1; + h->got_first_iframe = 0; + } - flush_change(h); - -@@ -3312,6 +3313,7 @@ static int h264_slice_header_init(H264Context *h, int reinit) - free_tables(h, 0); - h->first_field = 0; - h->prev_interlaced_frame = 1; -+ h->got_first_iframe = 0; - - init_scan_tables(h); - ret = ff_h264_alloc_tables(h); + /* forget old pics after a seek */ diff --git a/libavcodec/h264.h b/libavcodec/h264.h -index 4a41fff..6b3ca1d 100644 +index 228558b..5e92043 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h -@@ -682,6 +682,8 @@ typedef struct H264Context { +@@ -740,6 +740,8 @@ typedef struct H264Context { int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag @@ -645,14 +642,26 @@ index 4a41fff..6b3ca1d 100644 // Timestamp stuff int sei_buffering_period_present; ///< Buffering period SEI flag int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs +diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c +index ded26f8..e20f2c8 100644 +--- a/libavcodec/h264_slice.c ++++ b/libavcodec/h264_slice.c +@@ -1166,6 +1166,7 @@ static int h264_slice_header_init(H264Context *h, int reinit) + ff_h264_free_tables(h, 0); + h->first_field = 0; + h->prev_interlaced_frame = 1; ++ h->got_first_iframe = 0; + + init_scan_tables(h); + ret = ff_h264_alloc_tables(h); -- 1.9.3 -From d499ee3d255c811b13d2749c2d4d5a013d5d8975 Mon Sep 17 00:00:00 2001 +From 67247a541dc1dfb547d35eb326ecf26b6c10b4d3 Mon Sep 17 00:00:00 2001 From: elupus Date: Tue, 1 Nov 2011 20:18:35 +0100 -Subject: [PATCH 14/19] add public version of ff_read_frame_flush +Subject: [PATCH 14/15] add public version of ff_read_frame_flush We need this since we sometimes seek on the input stream behind ffmpeg's back. After this @@ -663,10 +672,10 @@ all data need to be flushed completely. 2 files changed, 10 insertions(+) diff --git a/libavformat/avformat.h b/libavformat/avformat.h -index 7839c0a..33f10a8 100644 +index a9abfbd..ff19215 100644 --- a/libavformat/avformat.h +++ b/libavformat/avformat.h -@@ -1944,6 +1944,11 @@ int av_read_packet(AVFormatContext *s, AVPacket *pkt); +@@ -2074,6 +2074,11 @@ int av_read_packet(AVFormatContext *s, AVPacket *pkt); int av_read_frame(AVFormatContext *s, AVPacket *pkt); /** @@ -679,10 +688,10 @@ index 7839c0a..33f10a8 100644 * 'timestamp' in 'stream_index'. * diff --git a/libavformat/utils.c b/libavformat/utils.c -index 670e6ec..a61613f 100644 +index cbeaa9c..185706f 100644 --- a/libavformat/utils.c +++ b/libavformat/utils.c -@@ -1702,6 +1702,11 @@ void ff_read_frame_flush(AVFormatContext *s) +@@ -1748,6 +1748,11 @@ void ff_read_frame_flush(AVFormatContext *s) } } @@ -698,293 +707,10 @@ index 670e6ec..a61613f 100644 1.9.3 -From 951fff16d4a49571f54f3c212504d8e71aa537e7 Mon Sep 17 00:00:00 2001 -From: Hendrik Leppkes -Date: Tue, 4 Mar 2014 08:28:38 +0100 -Subject: [PATCH 15/19] dxva2_h264: set the correct ref frame index in the long - slice struct - -The latest H.264 DXVA specification states that the index in this -structure should refer to a valid entry in the RefFrameList of the picture -parameter structure, and not to the actual surface index. - -Fixes H.264 DXVA2 decoding on recent Intel GPUs (tested on Sandy and Ivy) - -Signed-off-by: Michael Niedermayer ---- - libavcodec/dxva2_h264.c | 18 ++++++++++++++---- - 1 file changed, 14 insertions(+), 4 deletions(-) - -diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c -index 848fa35..20dd64e 100644 ---- a/libavcodec/dxva2_h264.c -+++ b/libavcodec/dxva2_h264.c -@@ -194,8 +194,18 @@ static void fill_slice_short(DXVA_Slice_H264_Short *slice, - slice->wBadSliceChopping = 0; - } - -+static int get_refpic_index(const DXVA_PicParams_H264 *pp, int surface_index) -+{ -+ int i; -+ for (i = 0; i < FF_ARRAY_ELEMS(pp->RefFrameList); i++) { -+ if ((pp->RefFrameList[i].bPicEntry & 0x7f) == surface_index) -+ return i; -+ } -+ return 0x7f; -+} -+ - static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice, -- unsigned position, unsigned size) -+ const DXVA_PicParams_H264 *pp, unsigned position, unsigned size) - { - const H264Context *h = avctx->priv_data; - struct dxva_context *ctx = avctx->hwaccel_context; -@@ -228,8 +238,8 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice, - if (list < h->list_count && i < h->ref_count[list]) { - const Picture *r = &h->ref_list[list][i]; - unsigned plane; -- fill_picture_entry(&slice->RefPicList[list][i], -- ff_dxva2_get_surface_index(ctx, r), -+ unsigned index = get_refpic_index(pp, ff_dxva2_get_surface_index(ctx, r)); -+ fill_picture_entry(&slice->RefPicList[list][i], index, - r->reference == PICT_BOTTOM_FIELD); - for (plane = 0; plane < 3; plane++) { - int w, o; -@@ -414,7 +424,7 @@ static int dxva2_h264_decode_slice(AVCodecContext *avctx, - position, size); - else - fill_slice_long(avctx, &ctx_pic->slice_long[ctx_pic->slice_count], -- position, size); -+ &ctx_pic->pp, position, size); - ctx_pic->slice_count++; - - if (h->slice_type != AV_PICTURE_TYPE_I && h->slice_type != AV_PICTURE_TYPE_SI) --- -1.9.3 - - -From b083afc1a02c143b9c7f6f3970da69f90cb42c18 Mon Sep 17 00:00:00 2001 -From: Hendrik Leppkes -Date: Tue, 4 Mar 2014 08:28:39 +0100 -Subject: [PATCH 16/19] dxva2_h264: add a workaround for old intel GPUs - -Old Intel GPUs expect the reference frame index to the actual surface, -instead of the index into RefFrameList as specified by the spec. - -This workaround should be set when using one of the "ClearVideo" decoder -devices. - -Signed-off-by: Michael Niedermayer ---- - libavcodec/dxva2.h | 1 + - libavcodec/dxva2_h264.c | 8 +++++++- - 2 files changed, 8 insertions(+), 1 deletion(-) - -diff --git a/libavcodec/dxva2.h b/libavcodec/dxva2.h -index ac39e06..2639d89 100644 ---- a/libavcodec/dxva2.h -+++ b/libavcodec/dxva2.h -@@ -49,6 +49,7 @@ - */ - - #define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for DXVA2 and old UVD/UVD+ ATI video cards -+#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work around for DXVA2 and old Intel GPUs with ClearVideo interface - - /** - * This structure is used to provides the necessary configurations and data -diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c -index 20dd64e..5833185 100644 ---- a/libavcodec/dxva2_h264.c -+++ b/libavcodec/dxva2_h264.c -@@ -115,6 +115,8 @@ static void fill_picture_parameters(struct dxva_context *ctx, const H264Context - pp->bit_depth_chroma_minus8 = h->sps.bit_depth_chroma - 8; - if (ctx->workaround & FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG) - pp->Reserved16Bits = 0; -+ else if (ctx->workaround & FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO) -+ pp->Reserved16Bits = 0x34c; - else - pp->Reserved16Bits = 3; /* FIXME is there a way to detect the right mode ? */ - pp->StatusReportFeedbackNumber = 1 + ctx->report_id++; -@@ -238,7 +240,11 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice, - if (list < h->list_count && i < h->ref_count[list]) { - const Picture *r = &h->ref_list[list][i]; - unsigned plane; -- unsigned index = get_refpic_index(pp, ff_dxva2_get_surface_index(ctx, r)); -+ unsigned index; -+ if (ctx->workaround & FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO) -+ index = ff_dxva2_get_surface_index(ctx, r); -+ else -+ index = get_refpic_index(pp, ff_dxva2_get_surface_index(ctx, r)); - fill_picture_entry(&slice->RefPicList[list][i], index, - r->reference == PICT_BOTTOM_FIELD); - for (plane = 0; plane < 3; plane++) { --- -1.9.3 - - -From f1d6d2e9186fde4ebf5b4a48b3f9b9474180dec4 Mon Sep 17 00:00:00 2001 -From: Rainer Hochecker -Date: Sat, 12 Apr 2014 18:13:32 +0200 -Subject: [PATCH 17/19] flac demuxer: improve seeking - ---- - libavcodec/flac_parser.c | 13 +++++++++++++ - libavformat/flacdec.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 58 insertions(+) - -diff --git a/libavcodec/flac_parser.c b/libavcodec/flac_parser.c -index ba1f060..6ff4d9c 100644 ---- a/libavcodec/flac_parser.c -+++ b/libavcodec/flac_parser.c -@@ -489,6 +489,14 @@ static int get_best_header(FLACParseContext* fpc, const uint8_t **poutbuf, - &fpc->wrap_buf, - &fpc->wrap_buf_allocated_size); - -+ -+ if (fpc->pc->flags & PARSER_FLAG_USE_CODEC_TS){ -+ if (header->fi.is_var_size) -+ fpc->pc->pts = header->fi.frame_or_sample_num; -+ else if (header->best_child) -+ fpc->pc->pts = header->fi.frame_or_sample_num * header->fi.blocksize; -+ } -+ - fpc->best_header_valid = 0; - fpc->last_fi_valid = 1; - fpc->last_fi = header->fi; -@@ -516,6 +524,11 @@ static int flac_parse(AVCodecParserContext *s, AVCodecContext *avctx, - s->duration = fi.blocksize; - if (!avctx->sample_rate) - avctx->sample_rate = fi.samplerate; -+ if (fpc->pc->flags & PARSER_FLAG_USE_CODEC_TS){ -+ fpc->pc->pts = fi.frame_or_sample_num; -+ if (!fi.is_var_size) -+ fpc->pc->pts *= fi.blocksize; -+ } - } - *poutbuf = buf; - *poutbuf_size = buf_size; -diff --git a/libavformat/flacdec.c b/libavformat/flacdec.c -index 29310b8..9386da0 100644 ---- a/libavformat/flacdec.c -+++ b/libavformat/flacdec.c -@@ -157,12 +157,57 @@ static int flac_probe(AVProbeData *p) - return AVPROBE_SCORE_EXTENSION; - } - -+static av_unused int64_t flac_read_timestamp(AVFormatContext *s, int stream_index, -+ int64_t *ppos, int64_t pos_limit) -+{ -+ AVPacket pkt, out_pkt; -+ AVStream *st = s->streams[stream_index]; -+ int ret; -+ -+ if (avio_seek(s->pb, *ppos, SEEK_SET) < 0) -+ return AV_NOPTS_VALUE; -+ -+ av_init_packet(&pkt); -+ st->parser = av_parser_init(st->codec->codec_id); -+ if (!st->parser){ -+ return AV_NOPTS_VALUE; -+ } -+ st->parser->flags |= PARSER_FLAG_USE_CODEC_TS; -+ -+ for (;;){ -+ ret = ff_raw_read_partial_packet(s, &pkt); -+ if (ret < 0){ -+ if (ret == AVERROR(EAGAIN)) -+ continue; -+ else -+ return AV_NOPTS_VALUE; -+ } -+ av_init_packet(&out_pkt); -+ ret = av_parser_parse2(st->parser, st->codec, -+ &out_pkt.data, &out_pkt.size, pkt.data, pkt.size, -+ pkt.pts, pkt.dts, *ppos); -+ -+ if (out_pkt.size){ -+ int size = out_pkt.size; -+ av_free_packet(&out_pkt); -+ if (st->parser->pts != AV_NOPTS_VALUE){ -+ // seeking may not have started from beginning of a frame -+ // calculate frame start position from next frame backwards -+ *ppos = st->parser->next_frame_offset - size; -+ return st->parser->pts; -+ } -+ } -+ } -+ return AV_NOPTS_VALUE; -+} -+ - AVInputFormat ff_flac_demuxer = { - .name = "flac", - .long_name = NULL_IF_CONFIG_SMALL("raw FLAC"), - .read_probe = flac_probe, - .read_header = flac_read_header, - .read_packet = ff_raw_read_partial_packet, -+ .read_timestamp = flac_read_timestamp, - .flags = AVFMT_GENERIC_INDEX, - .extensions = "flac", - .raw_codec_id = AV_CODEC_ID_FLAC, --- -1.9.3 - - -From ffe6e9d200eeba76b68ea57aa00d7cfe8d13eb93 Mon Sep 17 00:00:00 2001 -From: Rainer Hochecker -Date: Mon, 14 Apr 2014 16:06:55 +0200 -Subject: [PATCH 18/19] fate: update seeking reference for flac - ---- - tests/ref/seek/acodec-flac | 12 ++++++++---- - 1 file changed, 8 insertions(+), 4 deletions(-) - -diff --git a/tests/ref/seek/acodec-flac b/tests/ref/seek/acodec-flac -index ab31891..f6add9a 100644 ---- a/tests/ref/seek/acodec-flac -+++ b/tests/ref/seek/acodec-flac -@@ -5,14 +5,16 @@ ret: 0 st:-1 flags:1 ts: 1.894167 - ret: 0 st: 0 flags:1 dts: 1.880816 pts: 1.880816 pos: 86742 size: 2191 - ret: 0 st: 0 flags:0 ts: 0.788345 - ret: 0 st: 0 flags:1 dts: 0.809796 pts: 0.809796 pos: 27366 size: 615 --ret:-1 st: 0 flags:1 ts:-0.317506 -+ret: 0 st: 0 flags:1 ts:-0.317506 -+ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 8256 size: 614 - ret: 0 st:-1 flags:0 ts: 2.576668 - ret: 0 st: 0 flags:1 dts: 2.586122 pts: 2.586122 pos: 145606 size: 2384 - ret: 0 st:-1 flags:1 ts: 1.470835 - ret: 0 st: 0 flags:1 dts: 1.462857 pts: 1.462857 pos: 53388 size: 1851 - ret: 0 st: 0 flags:0 ts: 0.365011 - ret: 0 st: 0 flags:1 dts: 0.365714 pts: 0.365714 pos: 16890 size: 614 --ret:-1 st: 0 flags:1 ts:-0.740839 -+ret: 0 st: 0 flags:1 ts:-0.740839 -+ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 8256 size: 614 - ret: 0 st:-1 flags:0 ts: 2.153336 - ret: 0 st: 0 flags:1 dts: 2.168163 pts: 2.168163 pos: 110531 size: 2143 - ret: 0 st:-1 flags:1 ts: 1.047503 -@@ -39,11 +41,13 @@ ret: 0 st: 0 flags:1 ts: 1.989184 - ret: 0 st: 0 flags:1 dts: 1.985306 pts: 1.985306 pos: 95508 size: 2169 - ret: 0 st:-1 flags:0 ts: 0.883340 - ret: 0 st: 0 flags:1 dts: 0.888163 pts: 0.888163 pos: 29211 size: 620 --ret:-1 st:-1 flags:1 ts:-0.222493 -+ret: 0 st:-1 flags:1 ts:-0.222493 -+ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 8256 size: 614 - ret: 0 st: 0 flags:0 ts: 2.671678 - ret: 0 st: 0 flags:1 dts: 2.690612 pts: 2.690612 pos: 155154 size: 2394 - ret: 0 st: 0 flags:1 ts: 1.565850 - ret: 0 st: 0 flags:1 dts: 1.541224 pts: 1.541224 pos: 59082 size: 1974 - ret: 0 st:-1 flags:0 ts: 0.460008 - ret: 0 st: 0 flags:1 dts: 0.470204 pts: 0.470204 pos: 19353 size: 608 --ret:-1 st:-1 flags:1 ts:-0.645825 -+ret: 0 st:-1 flags:1 ts:-0.645825 -+ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 8256 size: 614 --- -1.9.3 - - -From d6104d92e0e7248585cee2da796e458ef5937f12 Mon Sep 17 00:00:00 2001 +From d028c907004e8a3c0f5161ce595331e4cc57c86c Mon Sep 17 00:00:00 2001 From: Memphiz Date: Mon, 12 May 2014 18:27:01 +0200 -Subject: [PATCH 19/19] fix --disable-ffplay should disable any needs to check +Subject: [PATCH 15/15] fix --disable-ffplay should disable any needs to check or add compile/link flags otherwise SDL gets spewed all over pkg-config files and generally causes a mess @@ -993,10 +719,10 @@ Subject: [PATCH 19/19] fix --disable-ffplay should disable any needs to check 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/configure b/configure -index f9dce4e..0b582f0 100755 +index 4ed43a0..4520e8c 100755 --- a/configure +++ b/configure -@@ -4571,22 +4571,24 @@ if enabled libdc1394; then +@@ -4827,22 +4827,24 @@ if enabled libdc1394; then die "ERROR: No version of libdc1394 found " fi diff --git a/packages/multimedia/ffmpeg/patches/ffmpeg-0001-h264-Move-search-code-search-functions-into-separate.patch b/packages/multimedia/ffmpeg/patches/ffmpeg-0001-h264-Move-search-code-search-functions-into-separate.patch deleted file mode 100644 index 62e473d94e..0000000000 --- a/packages/multimedia/ffmpeg/patches/ffmpeg-0001-h264-Move-search-code-search-functions-into-separate.patch +++ /dev/null @@ -1,752 +0,0 @@ -From 8cdb3bf2837a3fb4fff3c6586316f81ae5f7b6cd Mon Sep 17 00:00:00 2001 -From: Ben Avison -Date: Wed, 16 Apr 2014 01:51:31 +0100 -Subject: [PATCH 1/3] h264: Move search code search functions into separate - source files. - -This permits re-use with parsers for codecs which use similar start codes. - -Signed-off-by: Michael Niedermayer ---- - libavcodec/Makefile | 2 +- - libavcodec/arm/Makefile | 2 +- - libavcodec/arm/h264dsp_armv6.S | 253 -------------------------------------- - libavcodec/arm/h264dsp_init_arm.c | 4 +- - libavcodec/arm/startcode_armv6.S | 253 ++++++++++++++++++++++++++++++++++++++ - libavcodec/h264dsp.c | 31 +---- - libavcodec/startcode.c | 57 +++++++++ - libavcodec/startcode.h | 35 ++++++ - 8 files changed, 351 insertions(+), 286 deletions(-) - delete mode 100644 libavcodec/arm/h264dsp_armv6.S - create mode 100644 libavcodec/arm/startcode_armv6.S - create mode 100644 libavcodec/startcode.c - create mode 100644 libavcodec/startcode.h - -diff --git a/libavcodec/Makefile b/libavcodec/Makefile -index b56ecd1..19caf11 100644 ---- a/libavcodec/Makefile -+++ b/libavcodec/Makefile -@@ -49,7 +49,7 @@ OBJS-$(CONFIG_FFT) += avfft.o fft_fixed.o fft_float.o \ - OBJS-$(CONFIG_GOLOMB) += golomb.o - OBJS-$(CONFIG_H263DSP) += h263dsp.o - OBJS-$(CONFIG_H264CHROMA) += h264chroma.o --OBJS-$(CONFIG_H264DSP) += h264dsp.o h264idct.o -+OBJS-$(CONFIG_H264DSP) += h264dsp.o h264idct.o startcode.o - OBJS-$(CONFIG_H264PRED) += h264pred.o - OBJS-$(CONFIG_H264QPEL) += h264qpel.o - OBJS-$(CONFIG_HPELDSP) += hpeldsp.o -diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile -index a8446b2..b6410b2 100644 ---- a/libavcodec/arm/Makefile -+++ b/libavcodec/arm/Makefile -@@ -47,7 +47,7 @@ ARMV6-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv6.o \ - arm/simple_idct_armv6.o \ - - ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o --ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o -+ARMV6-OBJS-$(CONFIG_H264DSP) += arm/startcode_armv6.o - ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \ - arm/hpeldsp_armv6.o - ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o -diff --git a/libavcodec/arm/h264dsp_armv6.S b/libavcodec/arm/h264dsp_armv6.S -deleted file mode 100644 -index 2758262..0000000 ---- a/libavcodec/arm/h264dsp_armv6.S -+++ /dev/null -@@ -1,253 +0,0 @@ --/* -- * Copyright (c) 2013 RISC OS Open Ltd -- * Author: Ben Avison -- * -- * This file is part of FFmpeg. -- * -- * FFmpeg is free software; you can redistribute it and/or -- * modify it under the terms of the GNU Lesser General Public -- * License as published by the Free Software Foundation; either -- * version 2.1 of the License, or (at your option) any later version. -- * -- * FFmpeg is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -- * Lesser General Public License for more details. -- * -- * You should have received a copy of the GNU Lesser General Public -- * License along with FFmpeg; if not, write to the Free Software -- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -- */ -- --#include "libavutil/arm/asm.S" -- --RESULT .req a1 --BUF .req a1 --SIZE .req a2 --PATTERN .req a3 --PTR .req a4 --DAT0 .req v1 --DAT1 .req v2 --DAT2 .req v3 --DAT3 .req v4 --TMP0 .req v5 --TMP1 .req v6 --TMP2 .req ip --TMP3 .req lr -- --#define PRELOAD_DISTANCE 4 -- --.macro innerloop4 -- ldr DAT0, [PTR], #4 -- subs SIZE, SIZE, #4 @ C flag survives rest of macro -- sub TMP0, DAT0, PATTERN, lsr #14 -- bic TMP0, TMP0, DAT0 -- ands TMP0, TMP0, PATTERN --.endm -- --.macro innerloop16 decrement, do_preload -- ldmia PTR!, {DAT0,DAT1,DAT2,DAT3} -- .ifnc "\do_preload","" -- pld [PTR, #PRELOAD_DISTANCE*32] -- .endif -- .ifnc "\decrement","" -- subs SIZE, SIZE, #\decrement @ C flag survives rest of macro -- .endif -- sub TMP0, DAT0, PATTERN, lsr #14 -- sub TMP1, DAT1, PATTERN, lsr #14 -- bic TMP0, TMP0, DAT0 -- bic TMP1, TMP1, DAT1 -- sub TMP2, DAT2, PATTERN, lsr #14 -- sub TMP3, DAT3, PATTERN, lsr #14 -- ands TMP0, TMP0, PATTERN -- bic TMP2, TMP2, DAT2 -- it eq -- andseq TMP1, TMP1, PATTERN -- bic TMP3, TMP3, DAT3 -- itt eq -- andseq TMP2, TMP2, PATTERN -- andseq TMP3, TMP3, PATTERN --.endm -- --/* int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size) */ --function ff_h264_find_start_code_candidate_armv6, export=1 -- push {v1-v6,lr} -- mov PTR, BUF -- @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go -- @ before using code that does preloads -- cmp SIZE, #(PRELOAD_DISTANCE+3)*32 - 1 -- blo 60f -- -- @ Get to word-alignment, 1 byte at a time -- tst PTR, #3 -- beq 2f --1: ldrb DAT0, [PTR], #1 -- sub SIZE, SIZE, #1 -- teq DAT0, #0 -- beq 90f -- tst PTR, #3 -- bne 1b --2: @ Get to 4-word alignment, 1 word at a time -- ldr PATTERN, =0x80008000 -- setend be -- tst PTR, #12 -- beq 4f --3: innerloop4 -- bne 91f -- tst PTR, #12 -- bne 3b --4: @ Get to cacheline (8-word) alignment -- tst PTR, #16 -- beq 5f -- innerloop16 16 -- bne 93f --5: @ Check complete cachelines, with preloading -- @ We need to stop when there are still (PRELOAD_DISTANCE+1) -- @ complete cachelines to go -- sub SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 --6: innerloop16 , do_preload -- bne 93f -- innerloop16 32 -- bne 93f -- bcs 6b -- @ Preload trailing part-cacheline, if any -- tst SIZE, #31 -- beq 7f -- pld [PTR, #(PRELOAD_DISTANCE+1)*32] -- @ Check remaining data without doing any more preloads. First -- @ do in chunks of 4 words: --7: adds SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16 -- bmi 9f --8: innerloop16 16 -- bne 93f -- bcs 8b -- @ Then in words: --9: adds SIZE, SIZE, #16 - 4 -- bmi 11f --10: innerloop4 -- bne 91f -- bcs 10b --11: setend le -- @ Check second byte of final halfword -- ldrb DAT0, [PTR, #-1] -- teq DAT0, #0 -- beq 90f -- @ Check any remaining bytes -- tst SIZE, #3 -- beq 13f --12: ldrb DAT0, [PTR], #1 -- sub SIZE, SIZE, #1 -- teq DAT0, #0 -- beq 90f -- tst SIZE, #3 -- bne 12b -- @ No candidate found --13: sub RESULT, PTR, BUF -- b 99f -- --60: @ Small buffer - simply check by looping over bytes -- subs SIZE, SIZE, #1 -- bcc 99f --61: ldrb DAT0, [PTR], #1 -- subs SIZE, SIZE, #1 -- teq DAT0, #0 -- beq 90f -- bcs 61b -- @ No candidate found -- sub RESULT, PTR, BUF -- b 99f -- --90: @ Found a candidate at the preceding byte -- sub RESULT, PTR, BUF -- sub RESULT, RESULT, #1 -- b 99f -- --91: @ Found a candidate somewhere in the preceding 4 bytes -- sub RESULT, PTR, BUF -- sub RESULT, RESULT, #4 -- sub TMP0, DAT0, #0x20000 -- bics TMP0, TMP0, DAT0 -- itt pl -- ldrbpl DAT0, [PTR, #-3] -- addpl RESULT, RESULT, #2 -- bpl 92f -- teq RESULT, #0 -- beq 98f @ don't look back a byte if found at first byte in buffer -- ldrb DAT0, [PTR, #-5] --92: teq DAT0, #0 -- it eq -- subeq RESULT, RESULT, #1 -- b 98f -- --93: @ Found a candidate somewhere in the preceding 16 bytes -- sub RESULT, PTR, BUF -- sub RESULT, RESULT, #16 -- teq TMP0, #0 -- beq 95f @ not in first 4 bytes -- sub TMP0, DAT0, #0x20000 -- bics TMP0, TMP0, DAT0 -- itt pl -- ldrbpl DAT0, [PTR, #-15] -- addpl RESULT, RESULT, #2 -- bpl 94f -- teq RESULT, #0 -- beq 98f @ don't look back a byte if found at first byte in buffer -- ldrb DAT0, [PTR, #-17] --94: teq DAT0, #0 -- it eq -- subeq RESULT, RESULT, #1 -- b 98f --95: add RESULT, RESULT, #4 -- teq TMP1, #0 -- beq 96f @ not in next 4 bytes -- sub TMP1, DAT1, #0x20000 -- bics TMP1, TMP1, DAT1 -- itee mi -- ldrbmi DAT0, [PTR, #-13] -- ldrbpl DAT0, [PTR, #-11] -- addpl RESULT, RESULT, #2 -- teq DAT0, #0 -- it eq -- subeq RESULT, RESULT, #1 -- b 98f --96: add RESULT, RESULT, #4 -- teq TMP2, #0 -- beq 97f @ not in next 4 bytes -- sub TMP2, DAT2, #0x20000 -- bics TMP2, TMP2, DAT2 -- itee mi -- ldrbmi DAT0, [PTR, #-9] -- ldrbpl DAT0, [PTR, #-7] -- addpl RESULT, RESULT, #2 -- teq DAT0, #0 -- it eq -- subeq RESULT, RESULT, #1 -- b 98f --97: add RESULT, RESULT, #4 -- sub TMP3, DAT3, #0x20000 -- bics TMP3, TMP3, DAT3 -- itee mi -- ldrbmi DAT0, [PTR, #-5] -- ldrbpl DAT0, [PTR, #-3] -- addpl RESULT, RESULT, #2 -- teq DAT0, #0 -- it eq -- subeq RESULT, RESULT, #1 -- @ drop through to 98f --98: setend le --99: pop {v1-v6,pc} --endfunc -- -- .unreq RESULT -- .unreq BUF -- .unreq SIZE -- .unreq PATTERN -- .unreq PTR -- .unreq DAT0 -- .unreq DAT1 -- .unreq DAT2 -- .unreq DAT3 -- .unreq TMP0 -- .unreq TMP1 -- .unreq TMP2 -- .unreq TMP3 -diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c -index a0418fd..eb6c514 100644 ---- a/libavcodec/arm/h264dsp_init_arm.c -+++ b/libavcodec/arm/h264dsp_init_arm.c -@@ -24,7 +24,7 @@ - #include "libavutil/arm/cpu.h" - #include "libavcodec/h264dsp.h" - --int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size); -+int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size); - - void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, - int beta, int8_t *tc0); -@@ -109,7 +109,7 @@ av_cold void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, - int cpu_flags = av_get_cpu_flags(); - - if (have_armv6(cpu_flags)) -- c->h264_find_start_code_candidate = ff_h264_find_start_code_candidate_armv6; -+ c->h264_find_start_code_candidate = ff_startcode_find_candidate_armv6; - if (have_neon(cpu_flags)) - h264dsp_init_neon(c, bit_depth, chroma_format_idc); - } -diff --git a/libavcodec/arm/startcode_armv6.S b/libavcodec/arm/startcode_armv6.S -new file mode 100644 -index 0000000..a46f009 ---- /dev/null -+++ b/libavcodec/arm/startcode_armv6.S -@@ -0,0 +1,253 @@ -+/* -+ * Copyright (c) 2013 RISC OS Open Ltd -+ * Author: Ben Avison -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#include "libavutil/arm/asm.S" -+ -+RESULT .req a1 -+BUF .req a1 -+SIZE .req a2 -+PATTERN .req a3 -+PTR .req a4 -+DAT0 .req v1 -+DAT1 .req v2 -+DAT2 .req v3 -+DAT3 .req v4 -+TMP0 .req v5 -+TMP1 .req v6 -+TMP2 .req ip -+TMP3 .req lr -+ -+#define PRELOAD_DISTANCE 4 -+ -+.macro innerloop4 -+ ldr DAT0, [PTR], #4 -+ subs SIZE, SIZE, #4 @ C flag survives rest of macro -+ sub TMP0, DAT0, PATTERN, lsr #14 -+ bic TMP0, TMP0, DAT0 -+ ands TMP0, TMP0, PATTERN -+.endm -+ -+.macro innerloop16 decrement, do_preload -+ ldmia PTR!, {DAT0,DAT1,DAT2,DAT3} -+ .ifnc "\do_preload","" -+ pld [PTR, #PRELOAD_DISTANCE*32] -+ .endif -+ .ifnc "\decrement","" -+ subs SIZE, SIZE, #\decrement @ C flag survives rest of macro -+ .endif -+ sub TMP0, DAT0, PATTERN, lsr #14 -+ sub TMP1, DAT1, PATTERN, lsr #14 -+ bic TMP0, TMP0, DAT0 -+ bic TMP1, TMP1, DAT1 -+ sub TMP2, DAT2, PATTERN, lsr #14 -+ sub TMP3, DAT3, PATTERN, lsr #14 -+ ands TMP0, TMP0, PATTERN -+ bic TMP2, TMP2, DAT2 -+ it eq -+ andseq TMP1, TMP1, PATTERN -+ bic TMP3, TMP3, DAT3 -+ itt eq -+ andseq TMP2, TMP2, PATTERN -+ andseq TMP3, TMP3, PATTERN -+.endm -+ -+/* int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size) */ -+function ff_startcode_find_candidate_armv6, export=1 -+ push {v1-v6,lr} -+ mov PTR, BUF -+ @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go -+ @ before using code that does preloads -+ cmp SIZE, #(PRELOAD_DISTANCE+3)*32 - 1 -+ blo 60f -+ -+ @ Get to word-alignment, 1 byte at a time -+ tst PTR, #3 -+ beq 2f -+1: ldrb DAT0, [PTR], #1 -+ sub SIZE, SIZE, #1 -+ teq DAT0, #0 -+ beq 90f -+ tst PTR, #3 -+ bne 1b -+2: @ Get to 4-word alignment, 1 word at a time -+ ldr PATTERN, =0x80008000 -+ setend be -+ tst PTR, #12 -+ beq 4f -+3: innerloop4 -+ bne 91f -+ tst PTR, #12 -+ bne 3b -+4: @ Get to cacheline (8-word) alignment -+ tst PTR, #16 -+ beq 5f -+ innerloop16 16 -+ bne 93f -+5: @ Check complete cachelines, with preloading -+ @ We need to stop when there are still (PRELOAD_DISTANCE+1) -+ @ complete cachelines to go -+ sub SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 -+6: innerloop16 , do_preload -+ bne 93f -+ innerloop16 32 -+ bne 93f -+ bcs 6b -+ @ Preload trailing part-cacheline, if any -+ tst SIZE, #31 -+ beq 7f -+ pld [PTR, #(PRELOAD_DISTANCE+1)*32] -+ @ Check remaining data without doing any more preloads. First -+ @ do in chunks of 4 words: -+7: adds SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16 -+ bmi 9f -+8: innerloop16 16 -+ bne 93f -+ bcs 8b -+ @ Then in words: -+9: adds SIZE, SIZE, #16 - 4 -+ bmi 11f -+10: innerloop4 -+ bne 91f -+ bcs 10b -+11: setend le -+ @ Check second byte of final halfword -+ ldrb DAT0, [PTR, #-1] -+ teq DAT0, #0 -+ beq 90f -+ @ Check any remaining bytes -+ tst SIZE, #3 -+ beq 13f -+12: ldrb DAT0, [PTR], #1 -+ sub SIZE, SIZE, #1 -+ teq DAT0, #0 -+ beq 90f -+ tst SIZE, #3 -+ bne 12b -+ @ No candidate found -+13: sub RESULT, PTR, BUF -+ b 99f -+ -+60: @ Small buffer - simply check by looping over bytes -+ subs SIZE, SIZE, #1 -+ bcc 99f -+61: ldrb DAT0, [PTR], #1 -+ subs SIZE, SIZE, #1 -+ teq DAT0, #0 -+ beq 90f -+ bcs 61b -+ @ No candidate found -+ sub RESULT, PTR, BUF -+ b 99f -+ -+90: @ Found a candidate at the preceding byte -+ sub RESULT, PTR, BUF -+ sub RESULT, RESULT, #1 -+ b 99f -+ -+91: @ Found a candidate somewhere in the preceding 4 bytes -+ sub RESULT, PTR, BUF -+ sub RESULT, RESULT, #4 -+ sub TMP0, DAT0, #0x20000 -+ bics TMP0, TMP0, DAT0 -+ itt pl -+ ldrbpl DAT0, [PTR, #-3] -+ addpl RESULT, RESULT, #2 -+ bpl 92f -+ teq RESULT, #0 -+ beq 98f @ don't look back a byte if found at first byte in buffer -+ ldrb DAT0, [PTR, #-5] -+92: teq DAT0, #0 -+ it eq -+ subeq RESULT, RESULT, #1 -+ b 98f -+ -+93: @ Found a candidate somewhere in the preceding 16 bytes -+ sub RESULT, PTR, BUF -+ sub RESULT, RESULT, #16 -+ teq TMP0, #0 -+ beq 95f @ not in first 4 bytes -+ sub TMP0, DAT0, #0x20000 -+ bics TMP0, TMP0, DAT0 -+ itt pl -+ ldrbpl DAT0, [PTR, #-15] -+ addpl RESULT, RESULT, #2 -+ bpl 94f -+ teq RESULT, #0 -+ beq 98f @ don't look back a byte if found at first byte in buffer -+ ldrb DAT0, [PTR, #-17] -+94: teq DAT0, #0 -+ it eq -+ subeq RESULT, RESULT, #1 -+ b 98f -+95: add RESULT, RESULT, #4 -+ teq TMP1, #0 -+ beq 96f @ not in next 4 bytes -+ sub TMP1, DAT1, #0x20000 -+ bics TMP1, TMP1, DAT1 -+ itee mi -+ ldrbmi DAT0, [PTR, #-13] -+ ldrbpl DAT0, [PTR, #-11] -+ addpl RESULT, RESULT, #2 -+ teq DAT0, #0 -+ it eq -+ subeq RESULT, RESULT, #1 -+ b 98f -+96: add RESULT, RESULT, #4 -+ teq TMP2, #0 -+ beq 97f @ not in next 4 bytes -+ sub TMP2, DAT2, #0x20000 -+ bics TMP2, TMP2, DAT2 -+ itee mi -+ ldrbmi DAT0, [PTR, #-9] -+ ldrbpl DAT0, [PTR, #-7] -+ addpl RESULT, RESULT, #2 -+ teq DAT0, #0 -+ it eq -+ subeq RESULT, RESULT, #1 -+ b 98f -+97: add RESULT, RESULT, #4 -+ sub TMP3, DAT3, #0x20000 -+ bics TMP3, TMP3, DAT3 -+ itee mi -+ ldrbmi DAT0, [PTR, #-5] -+ ldrbpl DAT0, [PTR, #-3] -+ addpl RESULT, RESULT, #2 -+ teq DAT0, #0 -+ it eq -+ subeq RESULT, RESULT, #1 -+ @ drop through to 98f -+98: setend le -+99: pop {v1-v6,pc} -+endfunc -+ -+ .unreq RESULT -+ .unreq BUF -+ .unreq SIZE -+ .unreq PATTERN -+ .unreq PTR -+ .unreq DAT0 -+ .unreq DAT1 -+ .unreq DAT2 -+ .unreq DAT3 -+ .unreq TMP0 -+ .unreq TMP1 -+ .unreq TMP2 -+ .unreq TMP3 -diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c -index a2a4aba..a4da776 100644 ---- a/libavcodec/h264dsp.c -+++ b/libavcodec/h264dsp.c -@@ -33,6 +33,7 @@ - #include "avcodec.h" - #include "h264dsp.h" - #include "h264idct.h" -+#include "startcode.h" - #include "libavutil/common.h" - - #define BIT_DEPTH 8 -@@ -63,34 +64,6 @@ - #include "h264addpx_template.c" - #undef BIT_DEPTH - --static int h264_find_start_code_candidate_c(const uint8_t *buf, int size) --{ -- int i = 0; --#if HAVE_FAST_UNALIGNED -- /* we check i < size instead of i + 3 / 7 because it is -- * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE -- * bytes at the end. -- */ --# if HAVE_FAST_64BIT -- while (i < size && -- !((~*(const uint64_t *)(buf + i) & -- (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) & -- 0x8080808080808080ULL)) -- i += 8; --# else -- while (i < size && -- !((~*(const uint32_t *)(buf + i) & -- (*(const uint32_t *)(buf + i) - 0x01010101U)) & -- 0x80808080U)) -- i += 4; --# endif --#endif -- for (; i < size; i++) -- if (!buf[i]) -- break; -- return i; --} -- - av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, - const int chroma_format_idc) - { -@@ -178,7 +151,7 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, - H264_DSP(8); - break; - } -- c->h264_find_start_code_candidate = h264_find_start_code_candidate_c; -+ c->h264_find_start_code_candidate = ff_startcode_find_candidate_c; - - if (ARCH_AARCH64) ff_h264dsp_init_aarch64(c, bit_depth, chroma_format_idc); - if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc); -diff --git a/libavcodec/startcode.c b/libavcodec/startcode.c -new file mode 100644 -index 0000000..5df7695 ---- /dev/null -+++ b/libavcodec/startcode.c -@@ -0,0 +1,57 @@ -+/* -+ * Copyright (c) 2003-2010 Michael Niedermayer -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+/** -+ * @file -+ * Accelerated start code search function for start codes common to -+ * MPEG-1/2/4 video, VC-1, H.264/5 -+ * @author Michael Niedermayer -+ */ -+ -+#include "startcode.h" -+#include "config.h" -+ -+int ff_startcode_find_candidate_c(const uint8_t *buf, int size) -+{ -+ int i = 0; -+#if HAVE_FAST_UNALIGNED -+ /* we check i < size instead of i + 3 / 7 because it is -+ * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE -+ * bytes at the end. -+ */ -+# if HAVE_FAST_64BIT -+ while (i < size && -+ !((~*(const uint64_t *)(buf + i) & -+ (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) & -+ 0x8080808080808080ULL)) -+ i += 8; -+# else -+ while (i < size && -+ !((~*(const uint32_t *)(buf + i) & -+ (*(const uint32_t *)(buf + i) - 0x01010101U)) & -+ 0x80808080U)) -+ i += 4; -+# endif -+#endif -+ for (; i < size; i++) -+ if (!buf[i]) -+ break; -+ return i; -+} -diff --git a/libavcodec/startcode.h b/libavcodec/startcode.h -new file mode 100644 -index 0000000..cc55d5f ---- /dev/null -+++ b/libavcodec/startcode.h -@@ -0,0 +1,35 @@ -+/* -+ * Copyright (c) 2003-2010 Michael Niedermayer -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+/** -+ * @file -+ * Accelerated start code search function for start codes common to -+ * MPEG-1/2/4 video, VC-1, H.264/5 -+ * @author Michael Niedermayer -+ */ -+ -+#ifndef AVCODEC_STARTCODE_H -+#define AVCODEC_STARTCODE_H -+ -+#include -+ -+int ff_startcode_find_candidate_c(const uint8_t *buf, int size); -+ -+#endif /* AVCODEC_STARTCODE_H */ --- -1.9.1 diff --git a/packages/multimedia/ffmpeg/patches/ffmpeg-0001-truehd-tune-VLC-decoding-for-ARM.patch b/packages/multimedia/ffmpeg/patches/ffmpeg-0001-truehd-tune-VLC-decoding-for-ARM.patch deleted file mode 100644 index 29508437e5..0000000000 --- a/packages/multimedia/ffmpeg/patches/ffmpeg-0001-truehd-tune-VLC-decoding-for-ARM.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 425d69b993d25489e4830766507d9d8f6c819802 Mon Sep 17 00:00:00 2001 -From: Ben Avison -Date: Wed, 19 Mar 2014 17:26:19 +0000 -Subject: [PATCH 1/6] truehd: tune VLC decoding for ARM. - -Profiling on a Raspberry Pi revealed the best performance to correspond -with VLC_BITS = 5. Results for overall audio decode and the get_vlc2 function -in particular are as follows: - - Before After - Mean StdDev Mean StdDev Confidence Change -6:2 total 348.8 20.1 339.6 15.1 88.8% +2.7% (insignificant) -6:2 function 38.1 8.1 26.4 4.1 100.0% +44.5% -8:2 total 339.1 15.4 324.5 15.5 99.4% +4.5% -8:2 function 33.8 7.0 27.3 5.6 99.7% +23.6% -6:6 total 604.6 20.8 572.8 20.6 100.0% +5.6% -6:6 function 95.8 8.4 68.9 8.2 100.0% +39.1% -8:8 total 766.4 17.6 741.5 21.2 100.0% +3.4% -8:8 function 106.0 11.4 86.1 9.9 100.0% +23.1% - -Signed-off-by: Michael Niedermayer ---- - libavcodec/mlpdec.c | 13 ++++++++++--- - 1 file changed, 10 insertions(+), 3 deletions(-) - -diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c -index 93ed552..cbd9000 100644 ---- a/libavcodec/mlpdec.c -+++ b/libavcodec/mlpdec.c -@@ -37,9 +37,16 @@ - #include "mlp_parser.h" - #include "mlpdsp.h" - #include "mlp.h" -+#include "config.h" - - /** number of bits used for VLC lookup - longest Huffman code is 9 */ -+#if ARCH_ARM == 1 -+#define VLC_BITS 5 -+#define VLC_STATIC_SIZE 64 -+#else - #define VLC_BITS 9 -+#define VLC_STATIC_SIZE 512 -+#endif - - typedef struct SubStream { - /// Set if a valid restart header has been read. Otherwise the substream cannot be decoded. -@@ -193,13 +200,13 @@ static av_cold void init_static(void) - if (!huff_vlc[0].bits) { - INIT_VLC_STATIC(&huff_vlc[0], VLC_BITS, 18, - &ff_mlp_huffman_tables[0][0][1], 2, 1, -- &ff_mlp_huffman_tables[0][0][0], 2, 1, 512); -+ &ff_mlp_huffman_tables[0][0][0], 2, 1, VLC_STATIC_SIZE); - INIT_VLC_STATIC(&huff_vlc[1], VLC_BITS, 16, - &ff_mlp_huffman_tables[1][0][1], 2, 1, -- &ff_mlp_huffman_tables[1][0][0], 2, 1, 512); -+ &ff_mlp_huffman_tables[1][0][0], 2, 1, VLC_STATIC_SIZE); - INIT_VLC_STATIC(&huff_vlc[2], VLC_BITS, 15, - &ff_mlp_huffman_tables[2][0][1], 2, 1, -- &ff_mlp_huffman_tables[2][0][0], 2, 1, 512); -+ &ff_mlp_huffman_tables[2][0][0], 2, 1, VLC_STATIC_SIZE); - } - - ff_mlp_init_crc(); --- -1.9.1 diff --git a/packages/multimedia/ffmpeg/patches/ffmpeg-0002-truehd-add-hand-scheduled-ARM-asm-version-of-mlp_fil.patch b/packages/multimedia/ffmpeg/patches/ffmpeg-0002-truehd-add-hand-scheduled-ARM-asm-version-of-mlp_fil.patch deleted file mode 100644 index 4aea35f9fd..0000000000 --- a/packages/multimedia/ffmpeg/patches/ffmpeg-0002-truehd-add-hand-scheduled-ARM-asm-version-of-mlp_fil.patch +++ /dev/null @@ -1,557 +0,0 @@ -From bfe3d8c8e4e046163dc314aa16207413e377283f Mon Sep 17 00:00:00 2001 -From: Ben Avison -Date: Mon, 3 Mar 2014 19:44:23 +0000 -Subject: [PATCH 2/6] truehd: add hand-scheduled ARM asm version of - mlp_filter_channel. - -Profiling results for overall audio decode and the mlp_filter_channel(_arm) -function in particular are as follows: - - Before After - Mean StdDev Mean StdDev Confidence Change -6:2 total 380.4 22.0 370.8 17.0 87.4% +2.6% (insignificant) -6:2 function 60.7 7.2 36.6 8.1 100.0% +65.8% -8:2 total 357.0 17.5 343.2 19.0 97.8% +4.0% (insignificant) -8:2 function 60.3 8.8 37.3 3.8 100.0% +61.8% -6:6 total 717.2 23.2 658.4 15.7 100.0% +8.9% -6:6 function 140.4 12.9 81.5 9.2 100.0% +72.4% -8:8 total 981.9 16.2 896.2 24.5 100.0% +9.6% -8:8 function 193.4 15.0 103.3 11.5 100.0% +87.2% - -Experiments with adding preload instructions to this function yielded no -useful benefit, so these have not been included. - -The assembly version has also been tested with a fuzz tester to ensure that -any combinations of inputs not exercised by my available test streams still -generate mathematically identical results to the C version. ---- - libavcodec/arm/Makefile | 2 + - libavcodec/arm/mlpdsp_arm.S | 433 +++++++++++++++++++++++++++++++++++++++ - libavcodec/arm/mlpdsp_init_arm.c | 36 ++++ - libavcodec/mlpdsp.c | 2 + - libavcodec/mlpdsp.h | 1 + - 5 files changed, 474 insertions(+) - create mode 100644 libavcodec/arm/mlpdsp_arm.S - create mode 100644 libavcodec/arm/mlpdsp_init_arm.c - -diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile -index a8446b2..ba673b1 100644 ---- a/libavcodec/arm/Makefile -+++ b/libavcodec/arm/Makefile -@@ -22,6 +22,8 @@ OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o - OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_init_arm.o - OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_arm.o \ - arm/hpeldsp_arm.o -+OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_init_arm.o \ -+ arm/mlpdsp_arm.o - OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o - OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o - OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o -diff --git a/libavcodec/arm/mlpdsp_arm.S b/libavcodec/arm/mlpdsp_arm.S -new file mode 100644 -index 0000000..615819d ---- /dev/null -+++ b/libavcodec/arm/mlpdsp_arm.S -@@ -0,0 +1,433 @@ -+/* -+ * Copyright (c) 2014 RISC OS Open Ltd -+ * Author: Ben Avison -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#include "libavutil/arm/asm.S" -+ -+#define MAX_CHANNELS 8 -+#define MAX_FIR_ORDER 8 -+#define MAX_IIR_ORDER 4 -+#define MAX_RATEFACTOR 4 -+#define MAX_BLOCKSIZE (40 * MAX_RATEFACTOR) -+ -+PST .req a1 -+PCO .req a2 -+AC0 .req a3 -+AC1 .req a4 -+CO0 .req v1 -+CO1 .req v2 -+CO2 .req v3 -+CO3 .req v4 -+ST0 .req v5 -+ST1 .req v6 -+ST2 .req sl -+ST3 .req fp -+I .req ip -+PSAMP .req lr -+ -+ -+// Some macros that do loads/multiplies where the register number is determined -+// from an assembly-time expression. Boy is GNU assembler's syntax ugly... -+ -+.macro load group, index, base, offset -+ .altmacro -+ load_ \group, %(\index), \base, \offset -+ .noaltmacro -+.endm -+ -+.macro load_ group, index, base, offset -+ ldr \group\index, [\base, #\offset] -+.endm -+ -+.macro loadd group, index, base, offset -+ .altmacro -+ loadd_ \group, %(\index), %(\index+1), \base, \offset -+ .noaltmacro -+.endm -+ -+.macro loadd_ group, index0, index1, base, offset -+A .if offset >= 256 -+A ldr \group\index0, [\base, #\offset] -+A ldr \group\index1, [\base, #(\offset) + 4] -+A .else -+ ldrd \group\index0, \group\index1, [\base, #\offset] -+A .endif -+.endm -+ -+.macro multiply index, accumulate, long -+ .altmacro -+ multiply_ %(\index), \accumulate, \long -+ .noaltmacro -+.endm -+ -+.macro multiply_ index, accumulate, long -+ .if \long -+ .if \accumulate -+ smlal AC0, AC1, CO\index, ST\index -+ .else -+ smull AC0, AC1, CO\index, ST\index -+ .endif -+ .else -+ .if \accumulate -+ mla AC0, CO\index, ST\index, AC0 -+ .else -+ mul AC0, CO\index, ST\index -+ .endif -+ .endif -+.endm -+ -+// A macro to update the load register number and load offsets -+ -+.macro inc howmany -+ .set LOAD_REG, (LOAD_REG + \howmany) & 3 -+ .set OFFSET_CO, OFFSET_CO + 4 * \howmany -+ .set OFFSET_ST, OFFSET_ST + 4 * \howmany -+ .if FIR_REMAIN > 0 -+ .set FIR_REMAIN, FIR_REMAIN - \howmany -+ .if FIR_REMAIN == 0 -+ .set OFFSET_CO, 4 * MAX_FIR_ORDER -+ .set OFFSET_ST, 4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER) -+ .endif -+ .elseif IIR_REMAIN > 0 -+ .set IIR_REMAIN, IIR_REMAIN - \howmany -+ .endif -+.endm -+ -+// Macro to implement the inner loop for one specific combination of parameters -+ -+.macro implement_filter mask_minus1, shift_0, shift_8, iir_taps, fir_taps -+ .set TOTAL_TAPS, \iir_taps + \fir_taps -+ -+ // Deal with register allocation... -+ .set DEFINED_SHIFT, 0 -+ .set DEFINED_MASK, 0 -+ .set SHUFFLE_SHIFT, 0 -+ .set SHUFFLE_MASK, 0 -+ .set SPILL_SHIFT, 0 -+ .set SPILL_MASK, 0 -+ .if TOTAL_TAPS == 0 -+ // Little register pressure in this case - just keep MASK where it was -+ .if !\mask_minus1 -+ MASK .req ST1 -+ .set DEFINED_MASK, 1 -+ .endif -+ .else -+ .if \shift_0 -+ .if !\mask_minus1 -+ // AC1 is unused with shift 0 -+ MASK .req AC1 -+ .set DEFINED_MASK, 1 -+ .set SHUFFLE_MASK, 1 -+ .endif -+ .elseif \shift_8 -+ .if !\mask_minus1 -+ .if TOTAL_TAPS <= 4 -+ // All coefficients are preloaded (so pointer not needed) -+ MASK .req PCO -+ .set DEFINED_MASK, 1 -+ .set SHUFFLE_MASK, 1 -+ .else -+ .set SPILL_MASK, 1 -+ .endif -+ .endif -+ .else // shift not 0 or 8 -+ .if TOTAL_TAPS <= 3 -+ // All coefficients are preloaded, and at least one CO register is unused -+ .if \fir_taps & 1 -+ SHIFT .req CO0 -+ .set DEFINED_SHIFT, 1 -+ .set SHUFFLE_SHIFT, 1 -+ .else -+ SHIFT .req CO3 -+ .set DEFINED_SHIFT, 1 -+ .set SHUFFLE_SHIFT, 1 -+ .endif -+ .if !\mask_minus1 -+ MASK .req PCO -+ .set DEFINED_MASK, 1 -+ .set SHUFFLE_MASK, 1 -+ .endif -+ .elseif TOTAL_TAPS == 4 -+ // All coefficients are preloaded -+ SHIFT .req PCO -+ .set DEFINED_SHIFT, 1 -+ .set SHUFFLE_SHIFT, 1 -+ .if !\mask_minus1 -+ .set SPILL_MASK, 1 -+ .endif -+ .else -+ .set SPILL_SHIFT, 1 -+ .if !\mask_minus1 -+ .set SPILL_MASK, 1 -+ .endif -+ .endif -+ .endif -+ .endif -+ .if SPILL_SHIFT -+ SHIFT .req ST0 -+ .set DEFINED_SHIFT, 1 -+ .endif -+ .if SPILL_MASK -+ MASK .req ST1 -+ .set DEFINED_MASK, 1 -+ .endif -+ -+ // Preload coefficients if possible -+ .if TOTAL_TAPS <= 4 -+ .set OFFSET_CO, 0 -+ .if \fir_taps & 1 -+ .set LOAD_REG, 1 -+ .else -+ .set LOAD_REG, 0 -+ .endif -+ .rept \fir_taps -+ load CO, LOAD_REG, PCO, OFFSET_CO -+ .set LOAD_REG, (LOAD_REG + 1) & 3 -+ .set OFFSET_CO, OFFSET_CO + 4 -+ .endr -+ .set OFFSET_CO, 4 * MAX_FIR_ORDER -+ .rept \iir_taps -+ load CO, LOAD_REG, PCO, OFFSET_CO -+ .set LOAD_REG, (LOAD_REG + 1) & 3 -+ .set OFFSET_CO, OFFSET_CO + 4 -+ .endr -+ .endif -+ -+ // Move mask/shift to final positions if necessary -+ // Need to do this after preloading, because in some cases we -+ // reuse the coefficient pointer register -+ .if SHUFFLE_SHIFT -+ mov SHIFT, ST0 -+ .endif -+ .if SHUFFLE_MASK -+ mov MASK, ST1 -+ .endif -+ -+ // Begin loop -+01: -+ .if TOTAL_TAPS == 0 -+ // Things simplify a lot in this case -+ // In fact this could be pipelined further if it's worth it... -+ ldr ST0, [PSAMP] -+ subs I, I, #1 -+ .if !\mask_minus1 -+ and ST0, ST0, MASK -+ .endif -+ str ST0, [PST, #-4]! -+ str ST0, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)] -+ str ST0, [PSAMP], #4 * MAX_CHANNELS -+ bne 01b -+ .else -+ .if \fir_taps & 1 -+ .set LOAD_REG, 1 -+ .else -+ .set LOAD_REG, 0 -+ .endif -+ .set LOAD_BANK, 0 -+ .set FIR_REMAIN, \fir_taps -+ .set IIR_REMAIN, \iir_taps -+ .if FIR_REMAIN == 0 // only IIR terms -+ .set OFFSET_CO, 4 * MAX_FIR_ORDER -+ .set OFFSET_ST, 4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER) -+ .else -+ .set OFFSET_CO, 0 -+ .set OFFSET_ST, 0 -+ .endif -+ .set MUL_REG, LOAD_REG -+ .set COUNTER, 0 -+ .rept TOTAL_TAPS + 2 -+ // Do load(s) -+ .if FIR_REMAIN != 0 || IIR_REMAIN != 0 -+ .if COUNTER == 0 -+ .if TOTAL_TAPS > 4 -+ load CO, LOAD_REG, PCO, OFFSET_CO -+ .endif -+ load ST, LOAD_REG, PST, OFFSET_ST -+ inc 1 -+ .elseif COUNTER == 1 && (\fir_taps & 1) == 0 -+ .if TOTAL_TAPS > 4 -+ load CO, LOAD_REG, PCO, OFFSET_CO -+ .endif -+ load ST, LOAD_REG, PST, OFFSET_ST -+ inc 1 -+ .elseif LOAD_BANK == 0 -+ .if TOTAL_TAPS > 4 -+ .if FIR_REMAIN == 0 && IIR_REMAIN == 1 -+ load CO, LOAD_REG, PCO, OFFSET_CO -+ .else -+ loadd CO, LOAD_REG, PCO, OFFSET_CO -+ .endif -+ .endif -+ .set LOAD_BANK, 1 -+ .else -+ .if FIR_REMAIN == 0 && IIR_REMAIN == 1 -+ load ST, LOAD_REG, PST, OFFSET_ST -+ inc 1 -+ .else -+ loadd ST, LOAD_REG, PST, OFFSET_ST -+ inc 2 -+ .endif -+ .set LOAD_BANK, 0 -+ .endif -+ .endif -+ -+ // Do interleaved multiplies, slightly delayed -+ .if COUNTER >= 2 -+ multiply MUL_REG, COUNTER > 2, !\shift_0 -+ .set MUL_REG, (MUL_REG + 1) & 3 -+ .endif -+ .set COUNTER, COUNTER + 1 -+ .endr -+ -+ // Post-process the result of the multiplies -+ .if SPILL_SHIFT -+ ldr SHIFT, [sp, #9*4 + 0*4] -+ .endif -+ .if SPILL_MASK -+ ldr MASK, [sp, #9*4 + 1*4] -+ .endif -+ ldr ST2, [PSAMP] -+ subs I, I, #1 -+ .if \shift_8 -+ mov AC0, AC0, lsr #8 -+ orr AC0, AC0, AC1, lsl #24 -+ .elseif !\shift_0 -+ rsb ST3, SHIFT, #32 -+ mov AC0, AC0, lsr SHIFT -+A orr AC0, AC0, AC1, lsl ST3 -+T mov AC1, AC1, lsl ST3 -+T orr AC0, AC0, AC1 -+ .endif -+ .if \mask_minus1 -+ add ST3, ST2, AC0 -+ .else -+ add ST2, ST2, AC0 -+ and ST3, ST2, MASK -+ sub ST2, ST3, AC0 -+ .endif -+ str ST3, [PST, #-4]! -+ str ST2, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)] -+ str ST3, [PSAMP], #4 * MAX_CHANNELS -+ bne 01b -+ .endif -+ b 99f -+ -+ .if DEFINED_SHIFT -+ .unreq SHIFT -+ .endif -+ .if DEFINED_MASK -+ .unreq MASK -+ .endif -+.endm -+ -+.macro switch_on_fir_taps mask_minus1, shift_0, shift_8, iir_taps -+A ldr pc, [pc, a3, LSL #2] // firorder is in range 0-(8-iir_taps) -+T tbh [pc, a3, lsl #1] -+0: -+A .word 0, 70f, 71f, 72f, 73f, 74f -+T .hword (70f - 0b) / 2, (71f - 0b) / 2, (72f - 0b) / 2, (73f - 0b) / 2, (74f - 0b) / 2 -+ .if \iir_taps <= 3 -+A .word 75f -+T .hword (75f - 0b) / 2 -+ .if \iir_taps <= 2 -+A .word 76f -+T .hword (76f - 0b) / 2 -+ .if \iir_taps <= 1 -+A .word 77f -+T .hword (77f - 0b) / 2 -+ .if \iir_taps == 0 -+A .word 78f -+T .hword (78f - 0b) / 2 -+ .endif -+ .endif -+ .endif -+ .endif -+70: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 0 -+71: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 1 -+72: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 2 -+73: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 3 -+74: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 4 -+ .if \iir_taps <= 3 -+75: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 5 -+ .if \iir_taps <= 2 -+76: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 6 -+ .if \iir_taps <= 1 -+77: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 7 -+ .if \iir_taps == 0 -+78: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 8 -+ .endif -+ .endif -+ .endif -+ .endif -+.endm -+ -+.macro switch_on_iir_taps mask_minus1, shift_0, shift_8 -+A ldr pc, [pc, a4, LSL #2] // irorder is in range 0-4 -+T tbh [pc, a4, lsl #1] -+0: -+A .word 0, 60f, 61f, 62f, 63f, 64f -+T .hword (60f - 0b) / 2, (61f - 0b) / 2, (62f - 0b) / 2, (63f - 0b) / 2, (64f - 0b) / 2 -+60: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 0 -+61: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 1 -+62: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 2 -+63: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 3 -+64: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 4 -+.endm -+ -+/* void ff_mlp_filter_channel_arm(int32_t *state, const int32_t *coeff, -+ * int firorder, int iirorder, -+ * unsigned int filter_shift, int32_t mask, -+ * int blocksize, int32_t *sample_buffer); -+ */ -+function ff_mlp_filter_channel_arm, export=1 -+ push {v1-fp,lr} -+ add v1, sp, #9*4 // point at arguments on stack -+ ldm v1, {ST0,ST1,I,PSAMP} -+ cmp ST1, #-1 -+ bne 30f -+ movs ST2, ST0, lsl #29 // shift is in range 0-15; we want to special-case 0 and 8 -+ bne 20f -+ bcs 10f -+ switch_on_iir_taps 1, 1, 0 -+10: switch_on_iir_taps 1, 0, 1 -+20: switch_on_iir_taps 1, 0, 0 -+30: movs ST2, ST0, lsl #29 // shift is in range 0-15; we want to special-case 0 and 8 -+ bne 50f -+ bcs 40f -+ switch_on_iir_taps 0, 1, 0 -+40: switch_on_iir_taps 0, 0, 1 -+50: switch_on_iir_taps 0, 0, 0 -+99: pop {v1-fp,pc} -+endfunc -+ -+ .unreq PST -+ .unreq PCO -+ .unreq AC0 -+ .unreq AC1 -+ .unreq CO0 -+ .unreq CO1 -+ .unreq CO2 -+ .unreq CO3 -+ .unreq ST0 -+ .unreq ST1 -+ .unreq ST2 -+ .unreq ST3 -+ .unreq I -+ .unreq PSAMP -diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c -new file mode 100644 -index 0000000..9a14815 ---- /dev/null -+++ b/libavcodec/arm/mlpdsp_init_arm.c -@@ -0,0 +1,36 @@ -+/* -+ * Copyright (c) 2014 RISC OS Open Ltd -+ * Author: Ben Avison -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#include -+ -+#include "libavutil/arm/cpu.h" -+#include "libavutil/attributes.h" -+#include "libavcodec/mlpdsp.h" -+ -+void ff_mlp_filter_channel_arm(int32_t *state, const int32_t *coeff, -+ int firorder, int iirorder, -+ unsigned int filter_shift, int32_t mask, -+ int blocksize, int32_t *sample_buffer); -+ -+av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c) -+{ -+ c->mlp_filter_channel = ff_mlp_filter_channel_arm; -+} -diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c -index b413e86..4b403b8 100644 ---- a/libavcodec/mlpdsp.c -+++ b/libavcodec/mlpdsp.c -@@ -60,6 +60,8 @@ static void mlp_filter_channel(int32_t *state, const int32_t *coeff, - av_cold void ff_mlpdsp_init(MLPDSPContext *c) - { - c->mlp_filter_channel = mlp_filter_channel; -+ if (ARCH_ARM) -+ ff_mlpdsp_init_arm(c); - if (ARCH_X86) - ff_mlpdsp_init_x86(c); - } -diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h -index 84a8aa3..129bcfe 100644 ---- a/libavcodec/mlpdsp.h -+++ b/libavcodec/mlpdsp.h -@@ -32,6 +32,7 @@ typedef struct MLPDSPContext { - } MLPDSPContext; - - void ff_mlpdsp_init(MLPDSPContext *c); -+void ff_mlpdsp_init_arm(MLPDSPContext *c); - void ff_mlpdsp_init_x86(MLPDSPContext *c); - - #endif /* AVCODEC_MLPDSP_H */ --- -1.9.1 diff --git a/packages/multimedia/ffmpeg/patches/ffmpeg-0002-vc-1-Add-platform-specific-start-code-search-routine.patch b/packages/multimedia/ffmpeg/patches/ffmpeg-0002-vc-1-Add-platform-specific-start-code-search-routine.patch deleted file mode 100644 index e84ace6065..0000000000 --- a/packages/multimedia/ffmpeg/patches/ffmpeg-0002-vc-1-Add-platform-specific-start-code-search-routine.patch +++ /dev/null @@ -1,143 +0,0 @@ -From a60747132a1a6652ac0d18f3f110a20ea637ac30 Mon Sep 17 00:00:00 2001 -From: Ben Avison -Date: Wed, 16 Apr 2014 01:51:32 +0100 -Subject: [PATCH 2/3] vc-1: Add platform-specific start code search routine to - VC1DSPContext. - -Initialise VC1DSPContext for parser as well as for decoder. -Note, the VC-1 code doesn't actually use the function pointer yet. - -Signed-off-by: Michael Niedermayer ---- - libavcodec/Makefile | 6 +++--- - libavcodec/arm/Makefile | 2 ++ - libavcodec/arm/vc1dsp_init_arm.c | 4 ++++ - libavcodec/vc1.c | 2 ++ - libavcodec/vc1dec.c | 1 - - libavcodec/vc1dsp.c | 3 +++ - libavcodec/vc1dsp.h | 8 ++++++++ - 7 files changed, 22 insertions(+), 4 deletions(-) - -diff --git a/libavcodec/Makefile b/libavcodec/Makefile -index 19caf11..120f85a 100644 ---- a/libavcodec/Makefile -+++ b/libavcodec/Makefile -@@ -458,7 +458,7 @@ OBJS-$(CONFIG_VB_DECODER) += vb.o - OBJS-$(CONFIG_VBLE_DECODER) += vble.o - OBJS-$(CONFIG_VC1_DECODER) += vc1dec.o vc1.o vc1data.o vc1dsp.o \ - msmpeg4dec.o msmpeg4.o msmpeg4data.o \ -- wmv2dsp.o -+ wmv2dsp.o startcode.o - OBJS-$(CONFIG_VCR1_DECODER) += vcr1.o - OBJS-$(CONFIG_VMDAUDIO_DECODER) += vmdav.o - OBJS-$(CONFIG_VMDVIDEO_DECODER) += vmdav.o -@@ -783,9 +783,9 @@ OBJS-$(CONFIG_PNM_PARSER) += pnm_parser.o pnm.o - OBJS-$(CONFIG_RV30_PARSER) += rv34_parser.o - OBJS-$(CONFIG_RV40_PARSER) += rv34_parser.o - OBJS-$(CONFIG_TAK_PARSER) += tak_parser.o tak.o --OBJS-$(CONFIG_VC1_PARSER) += vc1_parser.o vc1.o vc1data.o \ -+OBJS-$(CONFIG_VC1_PARSER) += vc1_parser.o vc1.o vc1data.o vc1dsp.o \ - msmpeg4.o msmpeg4data.o mpeg4video.o \ -- h263.o -+ h263.o startcode.o - OBJS-$(CONFIG_VORBIS_PARSER) += vorbis_parser.o xiph.o - OBJS-$(CONFIG_VP3_PARSER) += vp3_parser.o - OBJS-$(CONFIG_VP8_PARSER) += vp8_parser.o -diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile -index b6410b2..fa2b18e 100644 ---- a/libavcodec/arm/Makefile -+++ b/libavcodec/arm/Makefile -@@ -51,6 +51,8 @@ ARMV6-OBJS-$(CONFIG_H264DSP) += arm/startcode_armv6.o - ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \ - arm/hpeldsp_armv6.o - ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o -+ARMV6-OBJS-$(CONFIG_VC1_DECODER) += arm/startcode_armv6.o -+ARMV6-OBJS-$(CONFIG_VC1_PARSER) += arm/startcode_armv6.o - ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \ - arm/vp8dsp_init_armv6.o \ - arm/vp8dsp_armv6.o -diff --git a/libavcodec/arm/vc1dsp_init_arm.c b/libavcodec/arm/vc1dsp_init_arm.c -index 47d4126..4a84848 100644 ---- a/libavcodec/arm/vc1dsp_init_arm.c -+++ b/libavcodec/arm/vc1dsp_init_arm.c -@@ -23,10 +23,14 @@ - #include "libavcodec/vc1dsp.h" - #include "vc1dsp.h" - -+int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size); -+ - av_cold void ff_vc1dsp_init_arm(VC1DSPContext *dsp) - { - int cpu_flags = av_get_cpu_flags(); - -+ if (have_armv6(cpu_flags)) -+ dsp->vc1_find_start_code_candidate = ff_startcode_find_candidate_armv6; - if (have_neon(cpu_flags)) - ff_vc1dsp_init_neon(dsp); - } -diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c -index 49d4885..cb941dd 100644 ---- a/libavcodec/vc1.c -+++ b/libavcodec/vc1.c -@@ -1706,5 +1706,7 @@ av_cold int ff_vc1_init_common(VC1Context *v) - v->pq = -1; - v->mvrange = 0; /* 7.1.1.18, p80 */ - -+ ff_vc1dsp_init(&v->vc1dsp); -+ - return 0; - } -diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c -index 30fee47..67cda42 100644 ---- a/libavcodec/vc1dec.c -+++ b/libavcodec/vc1dec.c -@@ -5631,7 +5631,6 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx) - ff_vc1_decode_end(avctx); - - ff_h264chroma_init(&v->h264chroma, 8); -- ff_vc1dsp_init(&v->vc1dsp); - - if (avctx->codec_id == AV_CODEC_ID_WMV3 || avctx->codec_id == AV_CODEC_ID_WMV3IMAGE) { - int count = 0; -diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c -index ec9c17b..09a9006 100644 ---- a/libavcodec/vc1dsp.c -+++ b/libavcodec/vc1dsp.c -@@ -30,6 +30,7 @@ - #include "h264chroma.h" - #include "rnd_avg.h" - #include "vc1dsp.h" -+#include "startcode.h" - - /* Apply overlap transform to horizontal edge */ - static void vc1_v_overlap_c(uint8_t *src, int stride) -@@ -947,6 +948,8 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp) - dsp->sprite_v_double_twoscale = sprite_v_double_twoscale_c; - #endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */ - -+ dsp->vc1_find_start_code_candidate = ff_startcode_find_candidate_c; -+ - if (ARCH_AARCH64) - ff_vc1dsp_init_aarch64(dsp); - if (ARCH_ARM) -diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h -index 990fbc3..6a90eed 100644 ---- a/libavcodec/vc1dsp.h -+++ b/libavcodec/vc1dsp.h -@@ -74,6 +74,14 @@ typedef struct VC1DSPContext { - void (*sprite_v_double_twoscale)(uint8_t *dst, const uint8_t *src1a, const uint8_t *src1b, int offset1, - const uint8_t *src2a, const uint8_t *src2b, int offset2, - int alpha, int width); -+ -+ /** -+ * Search buf from the start for up to size bytes. Return the index -+ * of a zero byte, or >= size if not found. Ideally, use lookahead -+ * to filter out any zero bytes that are known to not be followed by -+ * one or more further zero bytes and a one byte. -+ */ -+ int (*vc1_find_start_code_candidate)(const uint8_t *buf, int size); - } VC1DSPContext; - - void ff_vc1dsp_init(VC1DSPContext* c); --- -1.9.1 diff --git a/packages/multimedia/ffmpeg/patches/ffmpeg-0003-truehd-break-out-part-of-rematrix_channels-into-plat.patch b/packages/multimedia/ffmpeg/patches/ffmpeg-0003-truehd-break-out-part-of-rematrix_channels-into-plat.patch deleted file mode 100644 index 9c06f8fe4e..0000000000 --- a/packages/multimedia/ffmpeg/patches/ffmpeg-0003-truehd-break-out-part-of-rematrix_channels-into-plat.patch +++ /dev/null @@ -1,158 +0,0 @@ -From bb74fc44081fb6d7923ce1b7ed3e3e6514695f3e Mon Sep 17 00:00:00 2001 -From: Ben Avison -Date: Wed, 5 Mar 2014 21:01:28 +0000 -Subject: [PATCH 3/6] truehd: break out part of rematrix_channels into - platform-specific callback. - -Verified with profiling that this doesn't have a measurable effect upon -overall performance. ---- - libavcodec/mlpdec.c | 37 ++++++++++++------------------------- - libavcodec/mlpdsp.c | 33 +++++++++++++++++++++++++++++++++ - libavcodec/mlpdsp.h | 23 +++++++++++++++++++++++ - 3 files changed, 68 insertions(+), 25 deletions(-) - -diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c -index cbd9000..01ded5c 100644 ---- a/libavcodec/mlpdec.c -+++ b/libavcodec/mlpdec.c -@@ -1024,7 +1024,7 @@ static void fill_noise_buffer(MLPDecodeContext *m, unsigned int substr) - static void rematrix_channels(MLPDecodeContext *m, unsigned int substr) - { - SubStream *s = &m->substream[substr]; -- unsigned int mat, src_ch, i; -+ unsigned int mat; - unsigned int maxchan; - - maxchan = s->max_matrix_channel; -@@ -1036,31 +1036,18 @@ static void rematrix_channels(MLPDecodeContext *m, unsigned int substr) - } - - for (mat = 0; mat < s->num_primitive_matrices; mat++) { -- int matrix_noise_shift = s->matrix_noise_shift[mat]; - unsigned int dest_ch = s->matrix_out_ch[mat]; -- int32_t mask = MSB_MASK(s->quant_step_size[dest_ch]); -- int32_t *coeffs = s->matrix_coeff[mat]; -- int index = s->num_primitive_matrices - mat; -- int index2 = 2 * index + 1; -- -- /* TODO: DSPContext? */ -- -- for (i = 0; i < s->blockpos; i++) { -- int32_t bypassed_lsb = m->bypassed_lsbs[i][mat]; -- int32_t *samples = m->sample_buffer[i]; -- int64_t accum = 0; -- -- for (src_ch = 0; src_ch <= maxchan; src_ch++) -- accum += (int64_t) samples[src_ch] * coeffs[src_ch]; -- -- if (matrix_noise_shift) { -- index &= m->access_unit_size_pow2 - 1; -- accum += m->noise_buffer[index] << (matrix_noise_shift + 7); -- index += index2; -- } -- -- samples[dest_ch] = ((accum >> 14) & mask) + bypassed_lsb; -- } -+ m->dsp.mlp_rematrix_channel(&m->sample_buffer[0][0], -+ s->matrix_coeff[mat], -+ &m->bypassed_lsbs[0][mat], -+ m->noise_buffer, -+ s->num_primitive_matrices - mat, -+ dest_ch, -+ s->blockpos, -+ maxchan, -+ s->matrix_noise_shift[mat], -+ m->access_unit_size_pow2, -+ MSB_MASK(s->quant_step_size[dest_ch])); - } - } - -diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c -index 4b403b8..7a359b0 100644 ---- a/libavcodec/mlpdsp.c -+++ b/libavcodec/mlpdsp.c -@@ -57,9 +57,42 @@ static void mlp_filter_channel(int32_t *state, const int32_t *coeff, - } - } - -+void ff_mlp_rematrix_channel(int32_t *samples, -+ const int32_t *coeffs, -+ const uint8_t *bypassed_lsbs, -+ const int8_t *noise_buffer, -+ int index, -+ unsigned int dest_ch, -+ uint16_t blockpos, -+ unsigned int maxchan, -+ int matrix_noise_shift, -+ int access_unit_size_pow2, -+ int32_t mask) -+{ -+ unsigned int src_ch, i; -+ int index2 = 2 * index + 1; -+ for (i = 0; i < blockpos; i++) { -+ int64_t accum = 0; -+ -+ for (src_ch = 0; src_ch <= maxchan; src_ch++) -+ accum += (int64_t) samples[src_ch] * coeffs[src_ch]; -+ -+ if (matrix_noise_shift) { -+ index &= access_unit_size_pow2 - 1; -+ accum += noise_buffer[index] << (matrix_noise_shift + 7); -+ index += index2; -+ } -+ -+ samples[dest_ch] = ((accum >> 14) & mask) + *bypassed_lsbs; -+ bypassed_lsbs += MAX_CHANNELS; -+ samples += MAX_CHANNELS; -+ } -+} -+ - av_cold void ff_mlpdsp_init(MLPDSPContext *c) - { - c->mlp_filter_channel = mlp_filter_channel; -+ c->mlp_rematrix_channel = ff_mlp_rematrix_channel; - if (ARCH_ARM) - ff_mlpdsp_init_arm(c); - if (ARCH_X86) -diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h -index 129bcfe..f98e9be 100644 ---- a/libavcodec/mlpdsp.h -+++ b/libavcodec/mlpdsp.h -@@ -24,11 +24,34 @@ - - #include - -+void ff_mlp_rematrix_channel(int32_t *samples, -+ const int32_t *coeffs, -+ const uint8_t *bypassed_lsbs, -+ const int8_t *noise_buffer, -+ int index, -+ unsigned int dest_ch, -+ uint16_t blockpos, -+ unsigned int maxchan, -+ int matrix_noise_shift, -+ int access_unit_size_pow2, -+ int32_t mask); -+ - typedef struct MLPDSPContext { - void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff, - int firorder, int iirorder, - unsigned int filter_shift, int32_t mask, - int blocksize, int32_t *sample_buffer); -+ void (*mlp_rematrix_channel)(int32_t *samples, -+ const int32_t *coeffs, -+ const uint8_t *bypassed_lsbs, -+ const int8_t *noise_buffer, -+ int index, -+ unsigned int dest_ch, -+ uint16_t blockpos, -+ unsigned int maxchan, -+ int matrix_noise_shift, -+ int access_unit_size_pow2, -+ int32_t mask); - } MLPDSPContext; - - void ff_mlpdsp_init(MLPDSPContext *c); --- -1.9.1 diff --git a/packages/multimedia/ffmpeg/patches/ffmpeg-0003-vc-1-Optimise-parser-with-special-attention-to-ARM.patch b/packages/multimedia/ffmpeg/patches/ffmpeg-0003-vc-1-Optimise-parser-with-special-attention-to-ARM.patch deleted file mode 100644 index 1f0cf40951..0000000000 --- a/packages/multimedia/ffmpeg/patches/ffmpeg-0003-vc-1-Optimise-parser-with-special-attention-to-ARM.patch +++ /dev/null @@ -1,401 +0,0 @@ -From c39df43eae03768427243668c040de8437c4f79c Mon Sep 17 00:00:00 2001 -From: Ben Avison -Date: Wed, 23 Apr 2014 01:41:04 +0100 -Subject: [PATCH 3/3] vc-1: Optimise parser (with special attention to ARM) - -The previous implementation of the parser made four passes over each input -buffer (reduced to two if the container format already guaranteed the input -buffer corresponded to frames, such as with MKV). But these buffers are -often 200K in size, certainly enough to flush the data out of L1 cache, and -for many CPUs, all the way out to main memory. The passes were: - -1) locate frame boundaries (not needed for MKV etc) -2) copy the data into a contiguous block (not needed for MKV etc) -3) locate the start codes within each frame -4) unescape the data between start codes - -After this, the unescaped data was parsed to extract certain header fields, -but because the unescape operation was so large, this was usually also -effectively operating on uncached memory. Most of the unescaped data was -simply thrown away and never processed further. Only step 2 - because it -used memcpy - was using prefetch, making things even worse. - -This patch reorganises these steps so that, aside from the copying, the -operations are performed in parallel, maximising cache utilisation. No more -than the worst-case number of bytes needed for header parsing is unescaped. -Most of the data is, in practice, only read in order to search for a start -code, for which optimised implementations already existed in the H264 codec -(notably the ARM version uses prefetch, so we end up doing both remaining -passes at maximum speed). For MKV files, we know when we've found the last -start code of interest in a given frame, so we are able to avoid doing even -that one remaining pass for most of the buffer. - -In some use-cases (such as the Raspberry Pi) video decode is handled by the -GPU, but the entire elementary stream is still fed through the parser to -pick out certain elements of the header which are necessary to manage the -decode process. As you might expect, in these cases, the performance of the -parser is significant. - -To measure parser performance, I used the same VC-1 elementary stream in -either an MPEG-2 transport stream or a MKV file, and fed it through ffmpeg -with -c:v copy -c:a copy -f null. These are the gperftools counts for -those streams, both filtered to only include vc1_parse() and its callees, -and unfiltered (to include the whole binary). Lower numbers are better: - - Before After -File Filtered Mean StdDev Mean StdDev Confidence Change -M2TS No 861.7 8.2 650.5 8.1 100.0% +32.5% -MKV No 868.9 7.4 731.7 9.0 100.0% +18.8% -M2TS Yes 250.0 11.2 27.2 3.4 100.0% +817.9% -MKV Yes 149.0 12.8 1.7 0.8 100.0% +8526.3% - -Yes, that last case shows vc1_parse() running 86 times faster! The M2TS -case does show a larger absolute improvement though, since it was worse -to begin with. - -This patch has been tested with the FATE suite (albeit on x86 for speed). - -Signed-off-by: Michael Niedermayer ---- - libavcodec/vc1_parser.c | 284 ++++++++++++++++++++++++++++++------------------ - 1 file changed, 180 insertions(+), 104 deletions(-) - -diff --git a/libavcodec/vc1_parser.c b/libavcodec/vc1_parser.c -index cc29ce1..4ed14bc 100644 ---- a/libavcodec/vc1_parser.c -+++ b/libavcodec/vc1_parser.c -@@ -30,122 +30,88 @@ - #include "vc1.h" - #include "get_bits.h" - -+/** The maximum number of bytes of a sequence, entry point or -+ * frame header whose values we pay any attention to */ -+#define UNESCAPED_THRESHOLD 37 -+ -+/** The maximum number of bytes of a sequence, entry point or -+ * frame header which must be valid memory (because they are -+ * used to update the bitstream cache in skip_bits() calls) -+ */ -+#define UNESCAPED_LIMIT 144 -+ -+typedef enum { -+ NO_MATCH, -+ ONE_ZERO, -+ TWO_ZEROS, -+ ONE -+} VC1ParseSearchState; -+ - typedef struct { - ParseContext pc; - VC1Context v; -+ uint8_t prev_start_code; -+ size_t bytes_to_skip; -+ uint8_t unesc_buffer[UNESCAPED_LIMIT]; -+ size_t unesc_index; -+ VC1ParseSearchState search_state; - } VC1ParseContext; - --static void vc1_extract_headers(AVCodecParserContext *s, AVCodecContext *avctx, -- const uint8_t *buf, int buf_size) -+static void vc1_extract_header(AVCodecParserContext *s, AVCodecContext *avctx, -+ const uint8_t *buf, int buf_size) - { -+ /* Parse the header we just finished unescaping */ - VC1ParseContext *vpc = s->priv_data; - GetBitContext gb; -- const uint8_t *start, *end, *next; -- uint8_t *buf2 = av_mallocz(buf_size + FF_INPUT_BUFFER_PADDING_SIZE); -- -+ int ret; - vpc->v.s.avctx = avctx; - vpc->v.parse_only = 1; -- vpc->v.first_pic_header_flag = 1; -- next = buf; -- s->repeat_pict = 0; -- -- for(start = buf, end = buf + buf_size; next < end; start = next){ -- int buf2_size, size; -- int ret; -- -- next = find_next_marker(start + 4, end); -- size = next - start - 4; -- buf2_size = vc1_unescape_buffer(start + 4, size, buf2); -- init_get_bits(&gb, buf2, buf2_size * 8); -- if(size <= 0) continue; -- switch(AV_RB32(start)){ -- case VC1_CODE_SEQHDR: -- ff_vc1_decode_sequence_header(avctx, &vpc->v, &gb); -- break; -- case VC1_CODE_ENTRYPOINT: -- ff_vc1_decode_entry_point(avctx, &vpc->v, &gb); -- break; -- case VC1_CODE_FRAME: -- if(vpc->v.profile < PROFILE_ADVANCED) -- ret = ff_vc1_parse_frame_header (&vpc->v, &gb); -- else -- ret = ff_vc1_parse_frame_header_adv(&vpc->v, &gb); -- -- if (ret < 0) -- break; -- -- /* keep AV_PICTURE_TYPE_BI internal to VC1 */ -- if (vpc->v.s.pict_type == AV_PICTURE_TYPE_BI) -- s->pict_type = AV_PICTURE_TYPE_B; -- else -- s->pict_type = vpc->v.s.pict_type; -- -- if (avctx->ticks_per_frame > 1){ -- // process pulldown flags -- s->repeat_pict = 1; -- // Pulldown flags are only valid when 'broadcast' has been set. -- // So ticks_per_frame will be 2 -- if (vpc->v.rff){ -- // repeat field -- s->repeat_pict = 2; -- }else if (vpc->v.rptfrm){ -- // repeat frames -- s->repeat_pict = vpc->v.rptfrm * 2 + 1; -- } -- } -- -- if (vpc->v.broadcast && vpc->v.interlace && !vpc->v.psf) -- s->field_order = vpc->v.tff ? AV_FIELD_TT : AV_FIELD_BB; -- else -- s->field_order = AV_FIELD_PROGRESSIVE; -+ init_get_bits(&gb, buf, buf_size * 8); -+ switch (vpc->prev_start_code) { -+ case VC1_CODE_SEQHDR & 0xFF: -+ ff_vc1_decode_sequence_header(avctx, &vpc->v, &gb); -+ break; -+ case VC1_CODE_ENTRYPOINT & 0xFF: -+ ff_vc1_decode_entry_point(avctx, &vpc->v, &gb); -+ break; -+ case VC1_CODE_FRAME & 0xFF: -+ if(vpc->v.profile < PROFILE_ADVANCED) -+ ret = ff_vc1_parse_frame_header (&vpc->v, &gb); -+ else -+ ret = ff_vc1_parse_frame_header_adv(&vpc->v, &gb); - -+ if (ret < 0) - break; -- } -- } - -- av_free(buf2); --} -+ /* keep AV_PICTURE_TYPE_BI internal to VC1 */ -+ if (vpc->v.s.pict_type == AV_PICTURE_TYPE_BI) -+ s->pict_type = AV_PICTURE_TYPE_B; -+ else -+ s->pict_type = vpc->v.s.pict_type; - --/** -- * Find the end of the current frame in the bitstream. -- * @return the position of the first byte of the next frame, or -1 -- */ --static int vc1_find_frame_end(ParseContext *pc, const uint8_t *buf, -- int buf_size) { -- int pic_found, i; -- uint32_t state; -- -- pic_found= pc->frame_start_found; -- state= pc->state; -- -- i=0; -- if(!pic_found){ -- for(i=0; iticks_per_frame > 1){ -+ // process pulldown flags -+ s->repeat_pict = 1; -+ // Pulldown flags are only valid when 'broadcast' has been set. -+ // So ticks_per_frame will be 2 -+ if (vpc->v.rff){ -+ // repeat field -+ s->repeat_pict = 2; -+ }else if (vpc->v.rptfrm){ -+ // repeat frames -+ s->repeat_pict = vpc->v.rptfrm * 2 + 1; - } -+ }else{ -+ s->repeat_pict = 0; - } -- } - -- if(pic_found){ -- /* EOF considered as end of frame */ -- if (buf_size == 0) -- return 0; -- for(; iframe_start_found=0; -- pc->state=-1; -- return i-3; -- } -- } -+ if (vpc->v.broadcast && vpc->v.interlace && !vpc->v.psf) -+ s->field_order = vpc->v.tff ? AV_FIELD_TT : AV_FIELD_BB; -+ else -+ s->field_order = AV_FIELD_PROGRESSIVE; -+ -+ break; - } -- pc->frame_start_found= pic_found; -- pc->state= state; -- return END_NOT_FOUND; - } - - static int vc1_parse(AVCodecParserContext *s, -@@ -153,22 +119,127 @@ static int vc1_parse(AVCodecParserContext *s, - const uint8_t **poutbuf, int *poutbuf_size, - const uint8_t *buf, int buf_size) - { -+ /* Here we do the searching for frame boundaries and headers at -+ * the same time. Only a minimal amount at the start of each -+ * header is unescaped. */ - VC1ParseContext *vpc = s->priv_data; -- int next; -+ int pic_found = vpc->pc.frame_start_found; -+ uint8_t *unesc_buffer = vpc->unesc_buffer; -+ size_t unesc_index = vpc->unesc_index; -+ VC1ParseSearchState search_state = vpc->search_state; -+ int next = END_NOT_FOUND; -+ int i = vpc->bytes_to_skip; -+ -+ if (pic_found && buf_size == 0) { -+ /* EOF considered as end of frame */ -+ memset(unesc_buffer + unesc_index, 0, UNESCAPED_THRESHOLD - unesc_index); -+ vc1_extract_header(s, avctx, unesc_buffer, unesc_index); -+ next = 0; -+ } -+ while (i < buf_size) { -+ int start_code_found = 0; -+ uint8_t b; -+ while (i < buf_size && unesc_index < UNESCAPED_THRESHOLD) { -+ b = buf[i++]; -+ unesc_buffer[unesc_index++] = b; -+ if (search_state <= ONE_ZERO) -+ search_state = b ? NO_MATCH : search_state + 1; -+ else if (search_state == TWO_ZEROS) { -+ if (b == 1) -+ search_state = ONE; -+ else if (b > 1) { -+ if (b == 3) -+ unesc_index--; // swallow emulation prevention byte -+ search_state = NO_MATCH; -+ } -+ } -+ else { // search_state == ONE -+ // Header unescaping terminates early due to detection of next start code -+ search_state = NO_MATCH; -+ start_code_found = 1; -+ break; -+ } -+ } -+ if ((s->flags & PARSER_FLAG_COMPLETE_FRAMES) && -+ unesc_index >= UNESCAPED_THRESHOLD && -+ vpc->prev_start_code == (VC1_CODE_FRAME & 0xFF)) -+ { -+ // No need to keep scanning the rest of the buffer for -+ // start codes if we know it contains a complete frame and -+ // we've already unescaped all we need of the frame header -+ vc1_extract_header(s, avctx, unesc_buffer, unesc_index); -+ break; -+ } -+ if (unesc_index >= UNESCAPED_THRESHOLD && !start_code_found) { -+ while (i < buf_size) { -+ if (search_state == NO_MATCH) { -+ i += vpc->v.vc1dsp.vc1_find_start_code_candidate(buf + i, buf_size - i); -+ if (i < buf_size) { -+ search_state = ONE_ZERO; -+ } -+ i++; -+ } else { -+ b = buf[i++]; -+ if (search_state == ONE_ZERO) -+ search_state = b ? NO_MATCH : TWO_ZEROS; -+ else if (search_state == TWO_ZEROS) { -+ if (b >= 1) -+ search_state = b == 1 ? ONE : NO_MATCH; -+ } -+ else { // search_state == ONE -+ search_state = NO_MATCH; -+ start_code_found = 1; -+ break; -+ } -+ } -+ } -+ } -+ if (start_code_found) { -+ vc1_extract_header(s, avctx, unesc_buffer, unesc_index); -+ -+ vpc->prev_start_code = b; -+ unesc_index = 0; -+ -+ if (!(s->flags & PARSER_FLAG_COMPLETE_FRAMES)) { -+ if (!pic_found && (b == (VC1_CODE_FRAME & 0xFF) || b == (VC1_CODE_FIELD & 0xFF))) { -+ pic_found = 1; -+ } -+ else if (pic_found && b != (VC1_CODE_FIELD & 0xFF) && b != (VC1_CODE_SLICE & 0xFF)) { -+ next = i - 4; -+ pic_found = b == (VC1_CODE_FRAME & 0xFF); -+ break; -+ } -+ } -+ } -+ } - -- if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){ -- next= buf_size; -- }else{ -- next= vc1_find_frame_end(&vpc->pc, buf, buf_size); -+ vpc->pc.frame_start_found = pic_found; -+ vpc->unesc_index = unesc_index; -+ vpc->search_state = search_state; - -+ if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) { -+ next = buf_size; -+ } else { - if (ff_combine_frame(&vpc->pc, next, &buf, &buf_size) < 0) { -+ vpc->bytes_to_skip = 0; - *poutbuf = NULL; - *poutbuf_size = 0; - return buf_size; - } - } - -- vc1_extract_headers(s, avctx, buf, buf_size); -+ vpc->v.first_pic_header_flag = 1; -+ -+ /* If we return with a valid pointer to a combined frame buffer -+ * then on the next call then we'll have been unhelpfully rewound -+ * by up to 4 bytes (depending upon whether the start code -+ * overlapped the input buffer, and if so by how much). We don't -+ * want this: it will either cause spurious second detections of -+ * the start code we've already seen, or cause extra bytes to be -+ * inserted at the start of the unescaped buffer. */ -+ vpc->bytes_to_skip = 4; -+ if (next < 0) -+ vpc->bytes_to_skip += next; - - *poutbuf = buf; - *poutbuf_size = buf_size; -@@ -199,6 +270,11 @@ static av_cold int vc1_parse_init(AVCodecParserContext *s) - { - VC1ParseContext *vpc = s->priv_data; - vpc->v.s.slice_context_count = 1; -+ vpc->v.first_pic_header_flag = 1; -+ vpc->prev_start_code = 0; -+ vpc->bytes_to_skip = 0; -+ vpc->unesc_index = 0; -+ vpc->search_state = NO_MATCH; - return ff_vc1_init_common(&vpc->v); - } - --- -1.9.1 diff --git a/packages/multimedia/ffmpeg/patches/ffmpeg-0004-truehd-add-hand-scheduled-ARM-asm-version-of-ff_mlp_.patch b/packages/multimedia/ffmpeg/patches/ffmpeg-0004-truehd-add-hand-scheduled-ARM-asm-version-of-ff_mlp_.patch deleted file mode 100644 index 575622e346..0000000000 --- a/packages/multimedia/ffmpeg/patches/ffmpeg-0004-truehd-add-hand-scheduled-ARM-asm-version-of-ff_mlp_.patch +++ /dev/null @@ -1,285 +0,0 @@ -From 98428a8cf593587b403076bb54b46cc70ed17ff2 Mon Sep 17 00:00:00 2001 -From: Ben Avison -Date: Mon, 10 Mar 2014 14:42:05 +0000 -Subject: [PATCH 4/6] truehd: add hand-scheduled ARM asm version of - ff_mlp_rematrix_channel. - -Profiling results for overall audio decode and the rematrix_channels function -in particular are as follows: - - Before After - Mean StdDev Mean StdDev Confidence Change -6:2 total 370.8 17.0 348.8 20.1 99.9% +6.3% -6:2 function 46.4 8.4 45.8 6.6 18.0% +1.2% (insignificant) -8:2 total 343.2 19.0 339.1 15.4 54.7% +1.2% (insignificant) -8:2 function 38.9 3.9 40.2 6.9 52.4% -3.2% (insignificant) -6:6 total 658.4 15.7 604.6 20.8 100.0% +8.9% -6:6 function 109.0 8.7 59.5 5.4 100.0% +83.3% -8:8 total 896.2 24.5 766.4 17.6 100.0% +16.9% -8:8 function 223.4 12.8 93.8 5.0 100.0% +138.3% - -The assembly version has also been tested with a fuzz tester to ensure that -any combinations of inputs not exercised by my available test streams still -generate mathematically identical results to the C version. ---- - libavcodec/arm/mlpdsp_arm.S | 222 +++++++++++++++++++++++++++++++++++++++ - libavcodec/arm/mlpdsp_init_arm.c | 12 +++ - 2 files changed, 234 insertions(+) - -diff --git a/libavcodec/arm/mlpdsp_arm.S b/libavcodec/arm/mlpdsp_arm.S -index 615819d..9b51d0c 100644 ---- a/libavcodec/arm/mlpdsp_arm.S -+++ b/libavcodec/arm/mlpdsp_arm.S -@@ -431,3 +431,225 @@ endfunc - .unreq ST3 - .unreq I - .unreq PSAMP -+ -+/********************************************************************/ -+ -+PSA .req a1 // samples -+PCO .req a2 // coeffs -+PBL .req a3 // bypassed_lsbs -+INDEX .req a4 -+CO0 .req v1 -+CO1 .req v2 -+CO2 .req v3 -+CO3 .req v4 -+SA0 .req v5 -+SA1 .req v6 -+SA2 .req sl -+SA3 .req fp -+AC0 .req ip -+AC1 .req lr -+NOISE .req SA0 -+LSB .req SA1 -+DCH .req SA2 // dest_ch -+MASK .req SA3 -+ -+ // INDEX is used as follows: -+ // bits 0..6 index2 (values up to 17, but wider so that we can -+ // add to index field without needing to mask) -+ // bits 7..14 i (values up to 160) -+ // bit 15 underflow detect for i -+ // bits 25..31 (if access_unit_size_pow2 == 128) \ index -+ // bits 26..31 (if access_unit_size_pow2 == 64) / -+ -+.macro implement_rematrix shift, index_mask, mask_minus1, maxchan -+ .if \maxchan == 1 -+ // We can just leave the coefficients in registers in this case -+ ldrd CO0, CO1, [PCO] -+ .endif -+1: -+ .if \maxchan == 1 -+ ldrd SA0, SA1, [PSA] -+ smull AC0, AC1, CO0, SA0 -+ .elseif \maxchan == 5 -+ ldr CO0, [PCO, #0] -+ ldr SA0, [PSA, #0] -+ ldr CO1, [PCO, #4] -+ ldr SA1, [PSA, #4] -+ ldrd CO2, CO3, [PCO, #8] -+ smull AC0, AC1, CO0, SA0 -+ ldrd SA2, SA3, [PSA, #8] -+ smlal AC0, AC1, CO1, SA1 -+ ldrd CO0, CO1, [PCO, #16] -+ smlal AC0, AC1, CO2, SA2 -+ ldrd SA0, SA1, [PSA, #16] -+ smlal AC0, AC1, CO3, SA3 -+ smlal AC0, AC1, CO0, SA0 -+ .else // \maxchan == 7 -+ ldr CO2, [PCO, #0] -+ ldr SA2, [PSA, #0] -+ ldr CO3, [PCO, #4] -+ ldr SA3, [PSA, #4] -+ ldrd CO0, CO1, [PCO, #8] -+ smull AC0, AC1, CO2, SA2 -+ ldrd SA0, SA1, [PSA, #8] -+ smlal AC0, AC1, CO3, SA3 -+ ldrd CO2, CO3, [PCO, #16] -+ smlal AC0, AC1, CO0, SA0 -+ ldrd SA2, SA3, [PSA, #16] -+ smlal AC0, AC1, CO1, SA1 -+ ldrd CO0, CO1, [PCO, #24] -+ smlal AC0, AC1, CO2, SA2 -+ ldrd SA0, SA1, [PSA, #24] -+ smlal AC0, AC1, CO3, SA3 -+ smlal AC0, AC1, CO0, SA0 -+ .endif -+ ldm sp, {NOISE, DCH, MASK} -+ smlal AC0, AC1, CO1, SA1 -+ .if \shift != 0 -+ .if \index_mask == 63 -+ add NOISE, NOISE, INDEX, lsr #32-6 -+ ldrb LSB, [PBL], #MAX_CHANNELS -+ ldrsb NOISE, [NOISE] -+ add INDEX, INDEX, INDEX, lsl #32-6 -+ .else // \index_mask == 127 -+ add NOISE, NOISE, INDEX, lsr #32-7 -+ ldrb LSB, [PBL], #MAX_CHANNELS -+ ldrsb NOISE, [NOISE] -+ add INDEX, INDEX, INDEX, lsl #32-7 -+ .endif -+ sub INDEX, INDEX, #1<<7 -+ adds AC0, AC0, NOISE, lsl #\shift + 7 -+ adc AC1, AC1, NOISE, asr #31 -+ .else -+ ldrb LSB, [PBL], #MAX_CHANNELS -+ sub INDEX, INDEX, #1<<7 -+ .endif -+ add PSA, PSA, #MAX_CHANNELS*4 -+ mov AC0, AC0, lsr #14 -+ orr AC0, AC0, AC1, lsl #18 -+ .if !\mask_minus1 -+ and AC0, AC0, MASK -+ .endif -+ add AC0, AC0, LSB -+ tst INDEX, #1<<15 -+ str AC0, [PSA, DCH, lsl #2] // DCH is precompensated for the early increment of PSA -+ beq 1b -+ b 98f -+.endm -+ -+.macro switch_on_maxchan shift, index_mask, mask_minus1 -+ cmp v4, #5 -+ blo 51f -+ beq 50f -+ implement_rematrix \shift, \index_mask, \mask_minus1, 7 -+50: implement_rematrix \shift, \index_mask, \mask_minus1, 5 -+51: implement_rematrix \shift, \index_mask, \mask_minus1, 1 -+.endm -+ -+.macro switch_on_mask shift, index_mask -+ cmp sl, #-1 -+ bne 40f -+ switch_on_maxchan \shift, \index_mask, 1 -+40: switch_on_maxchan \shift, \index_mask, 0 -+.endm -+ -+.macro switch_on_au_size shift -+ .if \shift == 0 -+ switch_on_mask \shift, undefined -+ .else -+ teq v6, #64 -+ bne 30f -+ orr INDEX, INDEX, v1, lsl #32-6 -+ switch_on_mask \shift, 63 -+30: orr INDEX, INDEX, v1, lsl #32-7 -+ switch_on_mask \shift, 127 -+ .endif -+.endm -+ -+/* void ff_mlp_rematrix_channel_arm(int32_t *samples, -+ * const int32_t *coeffs, -+ * const uint8_t *bypassed_lsbs, -+ * const int8_t *noise_buffer, -+ * int index, -+ * unsigned int dest_ch, -+ * uint16_t blockpos, -+ * unsigned int maxchan, -+ * int matrix_noise_shift, -+ * int access_unit_size_pow2, -+ * int32_t mask); -+ */ -+function ff_mlp_rematrix_channel_arm, export=1 -+ push {v1-fp,lr} -+ add v1, sp, #9*4 // point at arguments on stack -+ ldm v1, {v1-sl} -+ teq v4, #1 -+ itt ne -+ teqne v4, #5 -+ teqne v4, #7 -+ bne 99f -+ teq v6, #64 -+ it ne -+ teqne v6, #128 -+ bne 99f -+ sub v2, v2, #MAX_CHANNELS -+ push {a4,v2,sl} // initialise NOISE,DCH,MASK; make sp dword-aligned -+ movs INDEX, v3, lsl #7 -+ beq 98f // just in case, do nothing if blockpos = 0 -+ subs INDEX, INDEX, #1<<7 // offset by 1 so we borrow at the right time -+ adc lr, v1, v1 // calculate index2 (C was set by preceding subs) -+ orr INDEX, INDEX, lr -+ // Switch on matrix_noise_shift: values 0 and 1 are -+ // disproportionately common so do those in a form the branch -+ // predictor can accelerate. Values can only go up to 15. -+ cmp v5, #1 -+ beq 11f -+ blo 10f -+A ldr pc, [pc, v5, lsl #2] -+T tbh [pc, v5, lsl #1] -+0: -+A .word 0, 0, 0, 12f, 13f, 14f, 15f, 16f, 17f, 18f, 19f, 20f, 21f, 22f, 23f, 24f, 25f -+T .hword 0, 0, (12f - 0b) / 2, (13f - 0b) / 2, (14f - 0b) / 2, (15f - 0b) / 2 -+T .hword (16f - 0b) / 2, (17f - 0b) / 2, (18f - 0b) / 2, (19f - 0b) / 2 -+T .hword (20f - 0b) / 2, (21f - 0b) / 2, (22f - 0b) / 2, (23f - 0b) / 2, (24f - 0b) / 2, (25f - 0b) / 2 -+10: switch_on_au_size 0 -+11: switch_on_au_size 1 -+12: switch_on_au_size 2 -+13: switch_on_au_size 3 -+14: switch_on_au_size 4 -+15: switch_on_au_size 5 -+16: switch_on_au_size 6 -+17: switch_on_au_size 7 -+18: switch_on_au_size 8 -+19: switch_on_au_size 9 -+20: switch_on_au_size 10 -+21: switch_on_au_size 11 -+22: switch_on_au_size 12 -+23: switch_on_au_size 13 -+24: switch_on_au_size 14 -+25: switch_on_au_size 15 -+ -+98: add sp, sp, #3*4 -+ pop {v1-fp,pc} -+99: // Can't handle these parameters, drop back to C -+ pop {v1-fp,lr} -+ b X(ff_mlp_rematrix_channel) -+endfunc -+ -+ .unreq PSA -+ .unreq PCO -+ .unreq PBL -+ .unreq INDEX -+ .unreq CO0 -+ .unreq CO1 -+ .unreq CO2 -+ .unreq CO3 -+ .unreq SA0 -+ .unreq SA1 -+ .unreq SA2 -+ .unreq SA3 -+ .unreq AC0 -+ .unreq AC1 -+ .unreq NOISE -+ .unreq LSB -+ .unreq DCH -+ .unreq MASK -diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c -index 9a14815..1bb2276 100644 ---- a/libavcodec/arm/mlpdsp_init_arm.c -+++ b/libavcodec/arm/mlpdsp_init_arm.c -@@ -29,8 +29,20 @@ void ff_mlp_filter_channel_arm(int32_t *state, const int32_t *coeff, - int firorder, int iirorder, - unsigned int filter_shift, int32_t mask, - int blocksize, int32_t *sample_buffer); -+void ff_mlp_rematrix_channel_arm(int32_t *samples, -+ const int32_t *coeffs, -+ const uint8_t *bypassed_lsbs, -+ const int8_t *noise_buffer, -+ int index, -+ unsigned int dest_ch, -+ uint16_t blockpos, -+ unsigned int maxchan, -+ int matrix_noise_shift, -+ int access_unit_size_pow2, -+ int32_t mask); - - av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c) - { - c->mlp_filter_channel = ff_mlp_filter_channel_arm; -+ c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm; - } --- -1.9.1 diff --git a/packages/multimedia/ffmpeg/patches/ffmpeg-0005-truehd-break-out-part-of-output_data-into-platform-s.patch b/packages/multimedia/ffmpeg/patches/ffmpeg-0005-truehd-break-out-part-of-output_data-into-platform-s.patch deleted file mode 100644 index c5880e909a..0000000000 --- a/packages/multimedia/ffmpeg/patches/ffmpeg-0005-truehd-break-out-part-of-output_data-into-platform-s.patch +++ /dev/null @@ -1,197 +0,0 @@ -From 5bfcb7a691eb63c56f1485b60f399d79ff943799 Mon Sep 17 00:00:00 2001 -From: Ben Avison -Date: Wed, 12 Mar 2014 18:18:39 +0000 -Subject: [PATCH 5/6] truehd: break out part of output_data into - platform-specific callback. - -Verified with profiling that this doesn't have a measurable effect upon -overall performance. ---- - libavcodec/mlpdec.c | 40 +++++++++++++++++++++++----------------- - libavcodec/mlpdsp.c | 38 ++++++++++++++++++++++++++++++++++++++ - libavcodec/mlpdsp.h | 22 ++++++++++++++++++++++ - 3 files changed, 83 insertions(+), 17 deletions(-) - -diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c -index 01ded5c..061dabc 100644 ---- a/libavcodec/mlpdec.c -+++ b/libavcodec/mlpdec.c -@@ -363,6 +363,10 @@ static int read_major_sync(MLPDecodeContext *m, GetBitContext *gb) - m->avctx->sample_fmt = AV_SAMPLE_FMT_S32; - else - m->avctx->sample_fmt = AV_SAMPLE_FMT_S16; -+ m->dsp.mlp_pack_output = m->dsp.mlp_select_pack_output(m->substream[m->max_decoded_substream].ch_assign, -+ m->substream[m->max_decoded_substream].output_shift, -+ m->substream[m->max_decoded_substream].max_matrix_channel, -+ m->avctx->sample_fmt == AV_SAMPLE_FMT_S32); - - m->params_valid = 1; - for (substr = 0; substr < MAX_SUBSTREAMS; substr++) -@@ -612,6 +616,10 @@ FF_ENABLE_DEPRECATION_WARNINGS - if (substr == m->max_decoded_substream) { - m->avctx->channels = s->max_matrix_channel + 1; - m->avctx->channel_layout = s->ch_layout; -+ m->dsp.mlp_pack_output = m->dsp.mlp_select_pack_output(s->ch_assign, -+ s->output_shift, -+ s->max_matrix_channel, -+ m->avctx->sample_fmt == AV_SAMPLE_FMT_S32); - - if (m->avctx->codec_id == AV_CODEC_ID_MLP && m->needs_reordering) { - if (m->avctx->channel_layout == (AV_CH_LAYOUT_QUAD|AV_CH_LOW_FREQUENCY) || -@@ -857,9 +865,15 @@ static int read_decoding_params(MLPDecodeContext *m, GetBitContext *gbp, - return ret; - - if (s->param_presence_flags & PARAM_OUTSHIFT) -- if (get_bits1(gbp)) -+ if (get_bits1(gbp)) { - for (ch = 0; ch <= s->max_matrix_channel; ch++) - s->output_shift[ch] = get_sbits(gbp, 4); -+ if (substr == m->max_decoded_substream) -+ m->dsp.mlp_pack_output = m->dsp.mlp_select_pack_output(s->ch_assign, -+ s->output_shift, -+ s->max_matrix_channel, -+ m->avctx->sample_fmt == AV_SAMPLE_FMT_S32); -+ } - - if (s->param_presence_flags & PARAM_QUANTSTEP) - if (get_bits1(gbp)) -@@ -1058,9 +1072,6 @@ static int output_data(MLPDecodeContext *m, unsigned int substr, - { - AVCodecContext *avctx = m->avctx; - SubStream *s = &m->substream[substr]; -- unsigned int i, out_ch = 0; -- int32_t *data_32; -- int16_t *data_16; - int ret; - int is32 = (m->avctx->sample_fmt == AV_SAMPLE_FMT_S32); - -@@ -1078,19 +1089,14 @@ static int output_data(MLPDecodeContext *m, unsigned int substr, - frame->nb_samples = s->blockpos; - if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) - return ret; -- data_32 = (int32_t *)frame->data[0]; -- data_16 = (int16_t *)frame->data[0]; -- -- for (i = 0; i < s->blockpos; i++) { -- for (out_ch = 0; out_ch <= s->max_matrix_channel; out_ch++) { -- int mat_ch = s->ch_assign[out_ch]; -- int32_t sample = m->sample_buffer[i][mat_ch] -- << s->output_shift[mat_ch]; -- s->lossless_check_data ^= (sample & 0xffffff) << mat_ch; -- if (is32) *data_32++ = sample << 8; -- else *data_16++ = sample >> 8; -- } -- } -+ s->lossless_check_data = m->dsp.mlp_pack_output(s->lossless_check_data, -+ s->blockpos, -+ m->sample_buffer, -+ frame->data[0], -+ s->ch_assign, -+ s->output_shift, -+ s->max_matrix_channel, -+ is32); - - /* Update matrix encoding side data */ - if ((ret = ff_side_data_update_matrix_encoding(frame, s->matrix_encoding)) < 0) -diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c -index 7a359b0..3ae8c37 100644 ---- a/libavcodec/mlpdsp.c -+++ b/libavcodec/mlpdsp.c -@@ -89,10 +89,48 @@ void ff_mlp_rematrix_channel(int32_t *samples, - } - } - -+static int32_t (*mlp_select_pack_output(uint8_t *ch_assign, -+ int8_t *output_shift, -+ uint8_t max_matrix_channel, -+ int is32))(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int) -+{ -+ return ff_mlp_pack_output; -+} -+ -+int32_t ff_mlp_pack_output(int32_t lossless_check_data, -+ uint16_t blockpos, -+ int32_t (*sample_buffer)[MAX_CHANNELS], -+ void *data, -+ uint8_t *ch_assign, -+ int8_t *output_shift, -+ uint8_t max_matrix_channel, -+ int is32) -+{ -+ unsigned int i, out_ch = 0; -+ int32_t *data_32 = data; -+ int16_t *data_16 = data; -+ -+ for (i = 0; i < blockpos; i++) { -+ for (out_ch = 0; out_ch <= max_matrix_channel; out_ch++) { -+ int mat_ch = ch_assign[out_ch]; -+ int32_t sample = sample_buffer[i][mat_ch] -+ << output_shift[mat_ch]; -+ lossless_check_data ^= (sample & 0xffffff) << mat_ch; -+ if (is32) -+ *data_32++ = sample << 8; -+ else -+ *data_16++ = sample >> 8; -+ } -+ } -+ return lossless_check_data; -+} -+ - av_cold void ff_mlpdsp_init(MLPDSPContext *c) - { - c->mlp_filter_channel = mlp_filter_channel; - c->mlp_rematrix_channel = ff_mlp_rematrix_channel; -+ c->mlp_select_pack_output = mlp_select_pack_output; -+ c->mlp_pack_output = ff_mlp_pack_output; - if (ARCH_ARM) - ff_mlpdsp_init_arm(c); - if (ARCH_X86) -diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h -index f98e9be..a0edeb7 100644 ---- a/libavcodec/mlpdsp.h -+++ b/libavcodec/mlpdsp.h -@@ -23,6 +23,7 @@ - #define AVCODEC_MLPDSP_H - - #include -+#include "mlp.h" - - void ff_mlp_rematrix_channel(int32_t *samples, - const int32_t *coeffs, -@@ -36,6 +37,15 @@ void ff_mlp_rematrix_channel(int32_t *samples, - int access_unit_size_pow2, - int32_t mask); - -+int32_t ff_mlp_pack_output(int32_t lossless_check_data, -+ uint16_t blockpos, -+ int32_t (*sample_buffer)[MAX_CHANNELS], -+ void *data, -+ uint8_t *ch_assign, -+ int8_t *output_shift, -+ uint8_t max_matrix_channel, -+ int is32); -+ - typedef struct MLPDSPContext { - void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff, - int firorder, int iirorder, -@@ -52,6 +62,18 @@ typedef struct MLPDSPContext { - int matrix_noise_shift, - int access_unit_size_pow2, - int32_t mask); -+ int32_t (*(*mlp_select_pack_output)(uint8_t *ch_assign, -+ int8_t *output_shift, -+ uint8_t max_matrix_channel, -+ int is32))(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int); -+ int32_t (*mlp_pack_output)(int32_t lossless_check_data, -+ uint16_t blockpos, -+ int32_t (*sample_buffer)[MAX_CHANNELS], -+ void *data, -+ uint8_t *ch_assign, -+ int8_t *output_shift, -+ uint8_t max_matrix_channel, -+ int is32); - } MLPDSPContext; - - void ff_mlpdsp_init(MLPDSPContext *c); --- -1.9.1 diff --git a/packages/multimedia/ffmpeg/patches/ffmpeg-0006-truehd-add-hand-scheduled-ARM-asm-version-of-ff_mlp_.patch b/packages/multimedia/ffmpeg/patches/ffmpeg-0006-truehd-add-hand-scheduled-ARM-asm-version-of-ff_mlp_.patch deleted file mode 100644 index 93add62da5..0000000000 --- a/packages/multimedia/ffmpeg/patches/ffmpeg-0006-truehd-add-hand-scheduled-ARM-asm-version-of-ff_mlp_.patch +++ /dev/null @@ -1,689 +0,0 @@ -From c647209386bd811cc1c33b4fc8ec17a00f8c8ded Mon Sep 17 00:00:00 2001 -From: Ben Avison -Date: Thu, 13 Mar 2014 00:21:55 +0000 -Subject: [PATCH 6/6] truehd: add hand-scheduled ARM asm version of - ff_mlp_pack_output. - -Profiling results for overall decode and the output_data function in -particular are as follows: - - Before After - Mean StdDev Mean StdDev Confidence Change -6:2 total 339.6 15.1 329.3 16.0 95.8% +3.1% (insignificant) -6:2 function 24.6 6.0 9.9 3.1 100.0% +148.5% -8:2 total 324.5 15.5 323.6 14.3 15.2% +0.3% (insignificant) -8:2 function 20.4 3.9 9.9 3.4 100.0% +104.7% -6:6 total 572.8 20.6 539.9 24.2 100.0% +6.1% -6:6 function 54.5 5.6 16.0 3.8 100.0% +240.9% -8:8 total 741.5 21.2 702.5 18.5 100.0% +5.6% -8:8 function 63.9 7.6 18.4 4.8 100.0% +247.3% - -The assembly version has also been tested with a fuzz tester to ensure that -any combinations of inputs not exercised by my available test streams still -generate mathematically identical results to the C version. ---- - libavcodec/arm/Makefile | 1 + - libavcodec/arm/mlpdsp_armv6.S | 530 +++++++++++++++++++++++++++++++++++++++ - libavcodec/arm/mlpdsp_init_arm.c | 96 +++++++ - 3 files changed, 627 insertions(+) - create mode 100644 libavcodec/arm/mlpdsp_armv6.S - -diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile -index ba673b1..7b2f923 100644 ---- a/libavcodec/arm/Makefile -+++ b/libavcodec/arm/Makefile -@@ -52,6 +52,7 @@ ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o - ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o - ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \ - arm/hpeldsp_armv6.o -+ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o - ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o - ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \ - arm/vp8dsp_init_armv6.o \ -diff --git a/libavcodec/arm/mlpdsp_armv6.S b/libavcodec/arm/mlpdsp_armv6.S -new file mode 100644 -index 0000000..05a2c85 ---- /dev/null -+++ b/libavcodec/arm/mlpdsp_armv6.S -@@ -0,0 +1,530 @@ -+/* -+ * Copyright (c) 2014 RISC OS Open Ltd -+ * Author: Ben Avison -+ * -+ * This file is part of FFmpeg. -+ * -+ * FFmpeg is free software; you can redistribute it and/or -+ * modify it under the terms of the GNU Lesser General Public -+ * License as published by the Free Software Foundation; either -+ * version 2.1 of the License, or (at your option) any later version. -+ * -+ * FFmpeg is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -+ * Lesser General Public License for more details. -+ * -+ * You should have received a copy of the GNU Lesser General Public -+ * License along with FFmpeg; if not, write to the Free Software -+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -+ */ -+ -+#include "libavutil/arm/asm.S" -+ -+.macro loadregoffsh2 group, index, base, offgroup, offindex -+ .altmacro -+ loadregoffsh2_ \group, %(\index), \base, \offgroup, %(\offindex) -+ .noaltmacro -+.endm -+ -+.macro loadregoffsh2_ group, index, base, offgroup, offindex -+ ldr \group\index, [\base, \offgroup\offindex, lsl #2] -+.endm -+ -+.macro eorlslreg check, data, group, index -+ .altmacro -+ eorlslreg_ \check, \data, \group, %(\index) -+ .noaltmacro -+.endm -+ -+.macro eorlslreg_ check, data, group, index -+ eor \check, \check, \data, lsl \group\index -+.endm -+ -+.macro decr_modulo var, by, modulus -+ .set \var, \var - \by -+ .if \var == 0 -+ .set \var, \modulus -+ .endif -+.endm -+ -+ .macro load_group1 size, channels, r0, r1, r2, r3, pointer_dead=0 -+ .if \size == 2 -+ ldrd \r0, \r1, [IN], #(\size + 8 - \channels) * 4 -+ .else // size == 4 -+ .if IDX1 > 4 || \channels==8 -+ ldm IN!, {\r0, \r1, \r2, \r3} -+ .else -+ ldm IN, {\r0, \r1, \r2, \r3} -+ .if !\pointer_dead -+ add IN, IN, #(4 + 8 - \channels) * 4 -+ .endif -+ .endif -+ .endif -+ decr_modulo IDX1, \size, \channels -+ .endm -+ -+ .macro load_group2 size, channels, r0, r1, r2, r3, pointer_dead=0 -+ .if \size == 2 -+ .if IDX1 > 2 -+ ldm IN!, {\r2, \r3} -+ .else -+//A .ifc \r2, ip -+//A .if \pointer_dead -+//A ldm IN, {\r2, \r3} -+//A .else -+//A ldr \r2, [IN], #4 -+//A ldr \r3, [IN], #(\size - 1 + 8 - \channels) * 4 -+//A .endif -+//A .else -+ ldrd \r2, \r3, [IN], #(\size + 8 - \channels) * 4 -+//A .endif -+ .endif -+ .endif -+ decr_modulo IDX1, \size, \channels -+ .endm -+ -+.macro implement_pack inorder, channels, shift -+.if \inorder -+.ifc \shift, mixed -+ -+CHECK .req a1 -+COUNT .req a2 -+IN .req a3 -+OUT .req a4 -+DAT0 .req v1 -+DAT1 .req v2 -+DAT2 .req v3 -+DAT3 .req v4 -+SHIFT0 .req v5 -+SHIFT1 .req v6 -+SHIFT2 .req sl -+SHIFT3 .req fp -+SHIFT4 .req ip -+SHIFT5 .req lr -+ -+ .macro output4words -+ .set SIZE_GROUP1, IDX1 -+ .if SIZE_GROUP1 > 4 -+ .set SIZE_GROUP1, 4 -+ .endif -+ .set SIZE_GROUP2, 4 - SIZE_GROUP1 -+ load_group1 SIZE_GROUP1, \channels, DAT0, DAT1, DAT2, DAT3 -+ load_group2 SIZE_GROUP2, \channels, DAT0, DAT1, DAT2, DAT3 -+ .if \channels == 2 -+ lsl DAT0, SHIFT0 -+ lsl DAT1, SHIFT1 -+ lsl DAT2, SHIFT0 -+ lsl DAT3, SHIFT1 -+ .elseif \channels == 6 -+ .if IDX2 == 6 -+ lsl DAT0, SHIFT0 -+ lsl DAT1, SHIFT1 -+ lsl DAT2, SHIFT2 -+ lsl DAT3, SHIFT3 -+ .elseif IDX2 == 2 -+ lsl DAT0, SHIFT4 -+ lsl DAT1, SHIFT5 -+ lsl DAT2, SHIFT0 -+ lsl DAT3, SHIFT1 -+ .else // IDX2 == 4 -+ lsl DAT0, SHIFT2 -+ lsl DAT1, SHIFT3 -+ lsl DAT2, SHIFT4 -+ lsl DAT3, SHIFT5 -+ .endif -+ .elseif \channels == 8 -+ .if IDX2 == 8 -+ uxtb SHIFT0, SHIFT4, ror #0 -+ uxtb SHIFT1, SHIFT4, ror #8 -+ uxtb SHIFT2, SHIFT4, ror #16 -+ uxtb SHIFT3, SHIFT4, ror #24 -+ .else -+ uxtb SHIFT0, SHIFT5, ror #0 -+ uxtb SHIFT1, SHIFT5, ror #8 -+ uxtb SHIFT2, SHIFT5, ror #16 -+ uxtb SHIFT3, SHIFT5, ror #24 -+ .endif -+ lsl DAT0, SHIFT0 -+ lsl DAT1, SHIFT1 -+ lsl DAT2, SHIFT2 -+ lsl DAT3, SHIFT3 -+ .endif -+ eor CHECK, CHECK, DAT0, lsr #8 - (\channels - IDX2) -+ eor CHECK, CHECK, DAT1, lsr #7 - (\channels - IDX2) -+ decr_modulo IDX2, 2, \channels -+ eor CHECK, CHECK, DAT2, lsr #8 - (\channels - IDX2) -+ eor CHECK, CHECK, DAT3, lsr #7 - (\channels - IDX2) -+ decr_modulo IDX2, 2, \channels -+ stm OUT!, {DAT0 - DAT3} -+ .endm -+ -+ .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 4) -+ .if (WORDS_PER_LOOP % 2) == 0 -+ .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2 -+ .endif -+ .if (WORDS_PER_LOOP % 2) == 0 -+ .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2 -+ .endif -+ .set WORDS_PER_LOOP, WORDS_PER_LOOP * 4 -+ .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels -+ -+function ff_mlp_pack_output_inorder_\channels\()ch_mixedshift_armv6, export=1 -+ .if SAMPLES_PER_LOOP > 1 -+ tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice -+ bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not -+ .endif -+ teq COUNT, #0 -+ it eq -+ bxeq lr -+ push {v1-v6,sl,fp,lr} -+ ldr SHIFT0, [sp, #(9+1)*4] // get output_shift from stack -+ ldr SHIFT1, =0x08080808 -+ ldr SHIFT4, [SHIFT0] -+ .if \channels == 2 -+ uadd8 SHIFT4, SHIFT4, SHIFT1 // increase all shifts by 8 -+ uxtb SHIFT0, SHIFT4, ror #0 -+ uxtb SHIFT1, SHIFT4, ror #8 -+ .else -+ ldr SHIFT5, [SHIFT0, #4] -+ uadd8 SHIFT4, SHIFT4, SHIFT1 // increase all shifts by 8 -+ uadd8 SHIFT5, SHIFT5, SHIFT1 -+ .if \channels == 6 -+ uxtb SHIFT0, SHIFT4, ror #0 -+ uxtb SHIFT1, SHIFT4, ror #8 -+ uxtb SHIFT2, SHIFT4, ror #16 -+ uxtb SHIFT3, SHIFT4, ror #24 -+ uxtb SHIFT4, SHIFT5, ror #0 -+ uxtb SHIFT5, SHIFT5, ror #8 -+ .endif -+ .endif -+ .set IDX1, \channels -+ .set IDX2, \channels -+0: -+ .rept WORDS_PER_LOOP / 4 -+ output4words -+ .endr -+ subs COUNT, COUNT, #SAMPLES_PER_LOOP -+ bne 0b -+ pop {v1-v6,sl,fp,pc} -+ .ltorg -+endfunc -+ .purgem output4words -+ -+ .unreq CHECK -+ .unreq COUNT -+ .unreq IN -+ .unreq OUT -+ .unreq DAT0 -+ .unreq DAT1 -+ .unreq DAT2 -+ .unreq DAT3 -+ .unreq SHIFT0 -+ .unreq SHIFT1 -+ .unreq SHIFT2 -+ .unreq SHIFT3 -+ .unreq SHIFT4 -+ .unreq SHIFT5 -+ -+.else // not mixed -+ -+CHECK .req a1 -+COUNT .req a2 -+IN .req a3 -+OUT .req a4 -+DAT0 .req v1 -+DAT1 .req v2 -+DAT2 .req v3 -+DAT3 .req v4 -+DAT4 .req v5 -+DAT5 .req v6 -+DAT6 .req sl // use these rather than the otherwise unused -+DAT7 .req fp // ip and lr so that we can load them usinf LDRD -+ -+ .macro output4words tail, head, r0, r1, r2, r3, r4, r5, r6, r7, pointer_dead=0 -+ .if \head -+ .set SIZE_GROUP1, IDX1 -+ .if SIZE_GROUP1 > 4 -+ .set SIZE_GROUP1, 4 -+ .endif -+ .set SIZE_GROUP2, 4 - SIZE_GROUP1 -+ load_group1 SIZE_GROUP1, \channels, \r0, \r1, \r2, \r3, \pointer_dead -+ .endif -+ .if \tail -+ eor CHECK, CHECK, \r4, lsr #8 - (\channels - IDX2) -+ eor CHECK, CHECK, \r5, lsr #7 - (\channels - IDX2) -+ decr_modulo IDX2, 2, \channels -+ .endif -+ .if \head -+ load_group2 SIZE_GROUP2, \channels, \r0, \r1, \r2, \r3, \pointer_dead -+ .endif -+ .if \tail -+ eor CHECK, CHECK, \r6, lsr #8 - (\channels - IDX2) -+ eor CHECK, CHECK, \r7, lsr #7 - (\channels - IDX2) -+ decr_modulo IDX2, 2, \channels -+ stm OUT!, {\r4, \r5, \r6, \r7} -+ .endif -+ .if \head -+ lsl \r0, #8 + \shift -+ lsl \r1, #8 + \shift -+ lsl \r2, #8 + \shift -+ lsl \r3, #8 + \shift -+ .endif -+ .endm -+ -+ .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 8) -+ .if (WORDS_PER_LOOP % 2) == 0 -+ .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2 -+ .endif -+ .if (WORDS_PER_LOOP % 2) == 0 -+ .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2 -+ .endif -+ .if (WORDS_PER_LOOP % 2) == 0 -+ .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2 -+ .endif -+ .set WORDS_PER_LOOP, WORDS_PER_LOOP * 8 -+ .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels -+ -+function ff_mlp_pack_output_inorder_\channels\()ch_\shift\()shift_armv6, export=1 -+ .if SAMPLES_PER_LOOP > 1 -+ tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice -+ bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not -+ .endif -+ subs COUNT, COUNT, #SAMPLES_PER_LOOP -+ it lo -+ bxlo lr -+ push {v1-v6,sl,fp,lr} -+ .set IDX1, \channels -+ .set IDX2, \channels -+ output4words 0, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7 -+0: beq 1f -+ .rept WORDS_PER_LOOP / 8 -+ output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3 -+ output4words 1, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7 -+ .endr -+ subs COUNT, COUNT, #SAMPLES_PER_LOOP -+ bne 0b -+1: -+ .rept WORDS_PER_LOOP / 8 - 1 -+ output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3 -+ output4words 1, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7 -+ .endr -+ output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3, pointer_dead=1 -+ output4words 1, 0, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7 -+ pop {v1-v6,sl,fp,pc} -+endfunc -+ .purgem output4words -+ -+ .unreq CHECK -+ .unreq COUNT -+ .unreq IN -+ .unreq OUT -+ .unreq DAT0 -+ .unreq DAT1 -+ .unreq DAT2 -+ .unreq DAT3 -+ .unreq DAT4 -+ .unreq DAT5 -+ .unreq DAT6 -+ .unreq DAT7 -+ -+.endif // mixed -+.else // not inorder -+.ifc \shift, mixed -+ -+// This case not currently handled -+ -+.else // not mixed -+ -+#if !CONFIG_THUMB -+ -+CHECK .req a1 -+COUNT .req a2 -+IN .req a3 -+OUT .req a4 -+DAT0 .req v1 -+DAT1 .req v2 -+DAT2 .req v3 -+DAT3 .req v4 -+CHAN0 .req v5 -+CHAN1 .req v6 -+CHAN2 .req sl -+CHAN3 .req fp -+CHAN4 .req ip -+CHAN5 .req lr -+ -+ .macro output4words -+ .if \channels == 8 -+ .if IDX1 == 8 -+ uxtb CHAN0, CHAN4, ror #0 -+ uxtb CHAN1, CHAN4, ror #8 -+ uxtb CHAN2, CHAN4, ror #16 -+ uxtb CHAN3, CHAN4, ror #24 -+ .else -+ uxtb CHAN0, CHAN5, ror #0 -+ uxtb CHAN1, CHAN5, ror #8 -+ uxtb CHAN2, CHAN5, ror #16 -+ uxtb CHAN3, CHAN5, ror #24 -+ .endif -+ ldr DAT0, [IN, CHAN0, lsl #2] -+ ldr DAT1, [IN, CHAN1, lsl #2] -+ ldr DAT2, [IN, CHAN2, lsl #2] -+ ldr DAT3, [IN, CHAN3, lsl #2] -+ .if IDX1 == 4 -+ add IN, IN, #8*4 -+ .endif -+ decr_modulo IDX1, 4, \channels -+ .else -+ .set SIZE_GROUP1, IDX1 -+ .if SIZE_GROUP1 > 4 -+ .set SIZE_GROUP1, 4 -+ .endif -+ .set SIZE_GROUP2, 4 - SIZE_GROUP1 -+ .if SIZE_GROUP1 == 2 -+ loadregoffsh2 DAT, 0, IN, CHAN, 0 + (\channels - IDX1) -+ loadregoffsh2 DAT, 1, IN, CHAN, 1 + (\channels - IDX1) -+ add IN, IN, #8*4 -+ .else // SIZE_GROUP1 == 4 -+ loadregoffsh2 DAT, 0, IN, CHAN, 0 + (\channels - IDX1) -+ loadregoffsh2 DAT, 1, IN, CHAN, 1 + (\channels - IDX1) -+ loadregoffsh2 DAT, 2, IN, CHAN, 2 + (\channels - IDX1) -+ loadregoffsh2 DAT, 3, IN, CHAN, 3 + (\channels - IDX1) -+ .if IDX1 == 4 -+ add IN, IN, #8*4 -+ .endif -+ .endif -+ decr_modulo IDX1, SIZE_GROUP1, \channels -+ .if SIZE_GROUP2 == 2 -+ loadregoffsh2 DAT, 2, IN, CHAN, 0 + (\channels - IDX1) -+ loadregoffsh2 DAT, 3, IN, CHAN, 1 + (\channels - IDX1) -+ .if IDX1 == 2 -+ add IN, IN, #8*4 -+ .endif -+ .endif -+ decr_modulo IDX1, SIZE_GROUP2, \channels -+ .endif -+ .if \channels == 8 // in this case we can corrupt CHAN0-3 -+ rsb CHAN0, CHAN0, #8 -+ rsb CHAN1, CHAN1, #8 -+ rsb CHAN2, CHAN2, #8 -+ rsb CHAN3, CHAN3, #8 -+ lsl DAT0, #8 + \shift -+ lsl DAT1, #8 + \shift -+ lsl DAT2, #8 + \shift -+ lsl DAT3, #8 + \shift -+ eor CHECK, CHECK, DAT0, lsr CHAN0 -+ eor CHECK, CHECK, DAT1, lsr CHAN1 -+ eor CHECK, CHECK, DAT2, lsr CHAN2 -+ eor CHECK, CHECK, DAT3, lsr CHAN3 -+ .else -+ .if \shift != 0 -+ lsl DAT0, #\shift -+ lsl DAT1, #\shift -+ lsl DAT2, #\shift -+ lsl DAT3, #\shift -+ .endif -+ bic DAT0, DAT0, #0xff000000 -+ bic DAT1, DAT1, #0xff000000 -+ bic DAT2, DAT2, #0xff000000 -+ bic DAT3, DAT3, #0xff000000 -+ eorlslreg CHECK, DAT0, CHAN, 0 + (\channels - IDX2) -+ eorlslreg CHECK, DAT1, CHAN, 1 + (\channels - IDX2) -+ decr_modulo IDX2, 2, \channels -+ eorlslreg CHECK, DAT2, CHAN, 0 + (\channels - IDX2) -+ eorlslreg CHECK, DAT3, CHAN, 1 + (\channels - IDX2) -+ decr_modulo IDX2, 2, \channels -+ lsl DAT0, #8 -+ lsl DAT1, #8 -+ lsl DAT2, #8 -+ lsl DAT3, #8 -+ .endif -+ stm OUT!, {DAT0 - DAT3} -+ .endm -+ -+ .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 4) -+ .if (WORDS_PER_LOOP % 2) == 0 -+ .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2 -+ .endif -+ .if (WORDS_PER_LOOP % 2) == 0 -+ .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2 -+ .endif -+ .set WORDS_PER_LOOP, WORDS_PER_LOOP * 4 -+ .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels -+ -+function ff_mlp_pack_output_outoforder_\channels\()ch_\shift\()shift_armv6, export=1 -+ .if SAMPLES_PER_LOOP > 1 -+ tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice -+ bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not -+ .endif -+ teq COUNT, #0 -+ it eq -+ bxeq lr -+ push {v1-v6,sl,fp,lr} -+ ldr CHAN0, [sp, #(9+0)*4] // get ch_assign from stack -+ ldr CHAN4, [CHAN0] -+ .if \channels == 2 -+ uxtb CHAN0, CHAN4, ror #0 -+ uxtb CHAN1, CHAN4, ror #8 -+ .else -+ ldr CHAN5, [CHAN0, #4] -+ .if \channels == 6 -+ uxtb CHAN0, CHAN4, ror #0 -+ uxtb CHAN1, CHAN4, ror #8 -+ uxtb CHAN2, CHAN4, ror #16 -+ uxtb CHAN3, CHAN4, ror #24 -+ uxtb CHAN4, CHAN5, ror #0 -+ uxtb CHAN5, CHAN5, ror #8 -+ .endif -+ .endif -+ .set IDX1, \channels -+ .set IDX2, \channels -+0: -+ .rept WORDS_PER_LOOP / 4 -+ output4words -+ .endr -+ subs COUNT, COUNT, #SAMPLES_PER_LOOP -+ bne 0b -+ pop {v1-v6,sl,fp,pc} -+ .ltorg -+endfunc -+ .purgem output4words -+ -+ .unreq CHECK -+ .unreq COUNT -+ .unreq IN -+ .unreq OUT -+ .unreq DAT0 -+ .unreq DAT1 -+ .unreq DAT2 -+ .unreq DAT3 -+ .unreq CHAN0 -+ .unreq CHAN1 -+ .unreq CHAN2 -+ .unreq CHAN3 -+ .unreq CHAN4 -+ .unreq CHAN5 -+ -+#endif // !CONFIG_THUMB -+ -+.endif // mixed -+.endif // inorder -+.endm // implement_pack -+ -+.macro pack_channels inorder, channels -+ implement_pack \inorder, \channels, 0 -+ implement_pack \inorder, \channels, 1 -+ implement_pack \inorder, \channels, 2 -+ implement_pack \inorder, \channels, 3 -+ implement_pack \inorder, \channels, 4 -+ implement_pack \inorder, \channels, 5 -+ implement_pack \inorder, \channels, mixed -+.endm -+ -+.macro pack_order inorder -+ pack_channels \inorder, 2 -+ pack_channels \inorder, 6 -+ pack_channels \inorder, 8 -+.endm -+ -+ pack_order 0 -+ pack_order 1 -diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c -index 1bb2276..10ec316 100644 ---- a/libavcodec/arm/mlpdsp_init_arm.c -+++ b/libavcodec/arm/mlpdsp_init_arm.c -@@ -41,8 +41,104 @@ void ff_mlp_rematrix_channel_arm(int32_t *samples, - int access_unit_size_pow2, - int32_t mask); - -+#define DECLARE_PACK(order,channels,shift) \ -+ int32_t ff_mlp_pack_output_##order##order_##channels##ch_##shift##shift_armv6(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int); -+#define ENUMERATE_PACK(order,channels,shift) \ -+ ff_mlp_pack_output_##order##order_##channels##ch_##shift##shift_armv6, -+#define PACK_CHANNELS(macro,order,channels) \ -+ macro(order,channels,0) \ -+ macro(order,channels,1) \ -+ macro(order,channels,2) \ -+ macro(order,channels,3) \ -+ macro(order,channels,4) \ -+ macro(order,channels,5) \ -+ macro(order,channels,mixed) -+#define PACK_ORDER(macro,order) \ -+ PACK_CHANNELS(macro,order,2) \ -+ PACK_CHANNELS(macro,order,6) \ -+ PACK_CHANNELS(macro,order,8) -+#define PACK_ALL(macro) \ -+ PACK_ORDER(macro,outof) \ -+ PACK_ORDER(macro,in) -+PACK_ALL(DECLARE_PACK) -+ -+#define ff_mlp_pack_output_outoforder_2ch_mixedshift_armv6 0 -+#define ff_mlp_pack_output_outoforder_6ch_mixedshift_armv6 0 -+#define ff_mlp_pack_output_outoforder_8ch_mixedshift_armv6 0 -+#if CONFIG_THUMB -+#define ff_mlp_pack_output_outoforder_2ch_0shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_2ch_1shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_2ch_2shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_2ch_3shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_2ch_4shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_2ch_5shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_6ch_0shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_6ch_1shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_6ch_2shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_6ch_3shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_6ch_4shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_6ch_5shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_8ch_0shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_8ch_1shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_8ch_2shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_8ch_3shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_8ch_4shift_armv6 0 -+#define ff_mlp_pack_output_outoforder_8ch_5shift_armv6 0 -+#endif -+ -+static int32_t (*mlp_select_pack_output_armv6(uint8_t *ch_assign, -+ int8_t *output_shift, -+ uint8_t max_matrix_channel, -+ int is32))(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int) -+{ -+ int ch_index; -+ int shift = output_shift[0] < 0 || output_shift[0] > 5 ? 6 : output_shift[0]; -+ int inorder = 1; -+ static int32_t (*const routine[2*3*7])(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int) = { -+ PACK_ALL(ENUMERATE_PACK) -+ }; -+ int i; -+ -+ if (!is32) // don't support 16-bit output (it's not used by TrueHD) -+ return ff_mlp_pack_output; -+ -+ switch (max_matrix_channel) { -+ case 1: -+ ch_index = 0; -+ break; -+ case 5: -+ ch_index = 1; -+ break; -+ case 7: -+ ch_index = 2; -+ break; -+ default: -+ return ff_mlp_pack_output; -+ } -+ -+ for (i = 0; i <= max_matrix_channel; i++) { -+ if (shift != 6 && output_shift[i] != shift) -+ shift = 6; // indicate mixed shifts -+ if (ch_assign[i] != i) -+ inorder = 0; -+ } -+#if CONFIG_THUMB -+ if (!inorder) -+ return ff_mlp_pack_output; // can't currently handle an order array except in ARM mode -+#else -+ if (shift == 6 && !inorder) -+ return ff_mlp_pack_output; // can't currently handle both an order array and a shift array -+#endif -+ -+ return routine[(inorder*3+ch_index)*7+shift]; -+} -+ - av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c) - { -+ int cpu_flags = av_get_cpu_flags(); -+ - c->mlp_filter_channel = ff_mlp_filter_channel_arm; - c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm; -+ if (cpu_flags & AV_CPU_FLAG_ARMV6) -+ c->mlp_select_pack_output = mlp_select_pack_output_armv6; - } --- -1.9.1