ffmpeg: update to ffmpeg-2.3

Signed-off-by: Stephan Raue <stephan@openelec.tv>
This commit is contained in:
Stephan Raue 2014-07-17 02:22:02 +02:00
parent abbe488d68
commit 473d04c43e
11 changed files with 134 additions and 3656 deletions

View File

@ -17,7 +17,7 @@
################################################################################
PKG_NAME="ffmpeg"
PKG_VERSION="2.2.4"
PKG_VERSION="2.3"
PKG_REV="1"
PKG_ARCH="any"
PKG_LICENSE="LGPL"
@ -224,7 +224,6 @@ configure_target() {
--disable-altivec \
$FFMPEG_CPU \
$FFMPEG_FPU \
--disable-vis \
--enable-yasm \
--disable-sram \
--disable-symver

View File

@ -1,7 +1,7 @@
From 35ed29f583447d1d323c0cbdcf629f02a3601a27 Mon Sep 17 00:00:00 2001
From f68c860bdc70e440f047ca60c8f9497a0e5a2122 Mon Sep 17 00:00:00 2001
From: Joakim Plate <elupus@ecce.se>
Date: Sun, 11 Sep 2011 19:04:51 +0200
Subject: [PATCH 01/19] Support raw dvdsub palette as stored on normal dvd's
Subject: [PATCH 01/15] Support raw dvdsub palette as stored on normal dvd's
This is how the palette is stored on dvd's. Currently
only xbmc passes the palette information to libavcodec
@ -11,7 +11,7 @@ this way.
1 file changed, 24 insertions(+)
diff --git a/libavcodec/dvdsubdec.c b/libavcodec/dvdsubdec.c
index 637f3e6..eb4a7b8 100644
index 39b0e25..a19086d 100644
--- a/libavcodec/dvdsubdec.c
+++ b/libavcodec/dvdsubdec.c
@@ -61,6 +61,24 @@ static void yuv_a_to_rgba(const uint8_t *ycbcr, const uint8_t *alpha, uint32_t *
@ -20,7 +20,7 @@ index 637f3e6..eb4a7b8 100644
+static void ayvu_to_argb(const uint8_t *ayvu, uint32_t *argb, int num_values)
+{
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+ uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
+ uint8_t r, g, b;
+ int i, y, cb, cr, a;
+ int r_add, g_add, b_add;
@ -39,7 +39,7 @@ index 637f3e6..eb4a7b8 100644
static int decode_run_2bit(GetBitContext *gb, int *color)
{
unsigned int v, t;
@@ -624,6 +642,12 @@ static av_cold int dvdsub_init(AVCodecContext *avctx)
@@ -628,6 +646,12 @@ static av_cold int dvdsub_init(AVCodecContext *avctx)
if (ctx->palette_str)
parse_palette(ctx, ctx->palette_str);
@ -56,33 +56,34 @@ index 637f3e6..eb4a7b8 100644
1.9.3
From e881447700f405bb702f91822eb576913675e9eb Mon Sep 17 00:00:00 2001
From d53ff2a91f95b2b6ef3974921228e90a4a765af6 Mon Sep 17 00:00:00 2001
From: Joakim Plate <elupus@ecce.se>
Date: Sat, 22 Oct 2011 18:33:45 +0200
Subject: [PATCH 02/19] Check return value of avio_seek and avoid modifying
Subject: [PATCH 02/15] Check return value of avio_seek and avoid modifying
state if it fails
The code still modifies state if the timestamp is not found. Not
sure exactly how to avoid that.
---
libavformat/matroskadec.c | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
libavformat/matroskadec.c | 23 ++++++++++++++---------
1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 4726e94..b04be90 100644
index ec43526..66d5e8e 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -2832,7 +2832,8 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
@@ -2992,8 +2992,8 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
timestamp = FFMAX(timestamp, st->index_entries[0].timestamp);
if ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
- avio_seek(s->pb, st->index_entries[st->nb_index_entries-1].pos, SEEK_SET);
- avio_seek(s->pb, st->index_entries[st->nb_index_entries - 1].pos,
- SEEK_SET);
+ if (avio_seek(s->pb, st->index_entries[st->nb_index_entries-1].pos, SEEK_SET) < 0)
+ return -1;
matroska->current_id = 0;
while ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
matroska_clear_queue(matroska);
@@ -2841,16 +2842,11 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
@@ -3002,16 +3002,11 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
}
}
@ -91,19 +92,20 @@ index 4726e94..b04be90 100644
goto err;
index_min = index;
for (i=0; i < matroska->tracks.nb_elem; i++) {
- tracks[i].audio.pkt_cnt = 0;
for (i = 0; i < matroska->tracks.nb_elem; i++) {
- tracks[i].audio.pkt_cnt = 0;
- tracks[i].audio.sub_packet_cnt = 0;
- tracks[i].audio.buf_timecode = AV_NOPTS_VALUE;
- tracks[i].end_timecode = 0;
if (tracks[i].type == MATROSKA_TRACK_TYPE_SUBTITLE
&& tracks[i].stream->discard != AVDISCARD_ALL) {
index_sub = av_index_search_timestamp(tracks[i].stream, st->index_entries[index].timestamp, AVSEEK_FLAG_BACKWARD);
@@ -2862,7 +2858,16 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
- tracks[i].audio.buf_timecode = AV_NOPTS_VALUE;
- tracks[i].end_timecode = 0;
if (tracks[i].type == MATROSKA_TRACK_TYPE_SUBTITLE &&
tracks[i].stream->discard != AVDISCARD_ALL) {
index_sub = av_index_search_timestamp(
@@ -3025,8 +3020,18 @@ static int matroska_read_seek(AVFormatContext *s, int stream_index,
}
}
- avio_seek(s->pb, st->index_entries[index_min].pos, SEEK_SET);
- matroska->current_id = 0;
+ if (avio_seek(s->pb, st->index_entries[index_min].pos, SEEK_SET) < 0)
+ return -1;
+
@ -114,27 +116,29 @@ index 4726e94..b04be90 100644
+ tracks[i].audio.buf_timecode = AV_NOPTS_VALUE;
+ tracks[i].end_timecode = 0;
+ }
matroska->current_id = 0;
+ matroska->current_id = 0;
+
if (flags & AVSEEK_FLAG_ANY) {
st->skip_to_keyframe = 0;
matroska->skip_to_timecode = timestamp;
--
1.9.3
From 24830f831e0e2dfb71aceb06050328b8bb9d2195 Mon Sep 17 00:00:00 2001
From d8c6b50095900bbc4f40dfb3c2d321a35361820a Mon Sep 17 00:00:00 2001
From: Joakim Plate <elupus@ecce.se>
Date: Mon, 12 Sep 2011 21:37:17 +0200
Subject: [PATCH 03/19] asf hacks
Subject: [PATCH 03/15] asf hacks
---
libavformat/asfdec.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/libavformat/asfdec.c b/libavformat/asfdec.c
index 9bbc704..387d77c 100644
index 978b956..30f099d 100644
--- a/libavformat/asfdec.c
+++ b/libavformat/asfdec.c
@@ -1537,9 +1537,20 @@ static int asf_read_seek(AVFormatContext *s, int stream_index,
@@ -1546,9 +1546,20 @@ static int asf_read_seek(AVFormatContext *s, int stream_index,
AVStream *st = s->streams[stream_index];
int ret = 0;
@ -159,10 +163,10 @@ index 9bbc704..387d77c 100644
1.9.3
From 52f1304731513b06690cab5821cfc3c15b5b1518 Mon Sep 17 00:00:00 2001
From bb32180f7e9fe2ff89888c26731dc043844b49e2 Mon Sep 17 00:00:00 2001
From: Cory Fields <theuni-nospam-@xbmc.org>
Date: Mon, 28 Jun 2010 01:55:31 -0400
Subject: [PATCH 04/19] if av_read_packet returns AVERROR_IO, we are done.
Subject: [PATCH 04/15] if av_read_packet returns AVERROR_IO, we are done.
ffmpeg's codecs might or might not handle returning any completed demuxed
packets correctly
@ -171,10 +175,10 @@ Subject: [PATCH 04/19] if av_read_packet returns AVERROR_IO, we are done.
1 file changed, 2 insertions(+)
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 27b37b2..2de79d7 100644
index e095d60..9fa0bb0 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -1437,6 +1437,8 @@ static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
@@ -1460,6 +1460,8 @@ static int read_frame_internal(AVFormatContext *s, AVPacket *pkt)
if (ret < 0) {
if (ret == AVERROR(EAGAIN))
return ret;
@ -187,10 +191,10 @@ index 27b37b2..2de79d7 100644
1.9.3
From 6e2610a072dbf5c339a7f9053cd6d0760b03f3a7 Mon Sep 17 00:00:00 2001
From aae4de70cac340ed7e1b8db34125216c1e13cb00 Mon Sep 17 00:00:00 2001
From: Cory Fields <theuni-nospam-@xbmc.org>
Date: Mon, 28 Jun 2010 02:10:50 -0400
Subject: [PATCH 05/19] added: Ticket #7187, TV Teletext support for DVB EBU
Subject: [PATCH 05/15] added: Ticket #7187, TV Teletext support for DVB EBU
Teletext streams
---
@ -199,10 +203,10 @@ Subject: [PATCH 05/19] added: Ticket #7187, TV Teletext support for DVB EBU
2 files changed, 6 insertions(+)
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 5df717c..36c1bda 100644
index 93ba4d0..f3de33a 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -505,6 +505,10 @@ enum AVCodecID {
@@ -523,6 +523,10 @@ enum AVCodecID {
AV_CODEC_ID_PJS = MKBETAG('P','h','J','S'),
AV_CODEC_ID_ASS = MKBETAG('A','S','S',' '), ///< ASS as defined in Matroska
@ -214,11 +218,11 @@ index 5df717c..36c1bda 100644
AV_CODEC_ID_FIRST_UNKNOWN = 0x18000, ///< A dummy ID pointing at the start of various fake codecs.
AV_CODEC_ID_TTF = 0x18000,
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 286b30b..162033f 100644
index 7114088..e55193b 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -673,6 +673,8 @@ static const StreamType DESC_types[] = {
{ 0x7b, AVMEDIA_TYPE_AUDIO, AV_CODEC_ID_DTS },
@@ -708,6 +708,8 @@ static const StreamType DESC_types[] = {
{ 0x7b, AVMEDIA_TYPE_AUDIO, AV_CODEC_ID_DTS },
{ 0x56, AVMEDIA_TYPE_SUBTITLE, AV_CODEC_ID_DVB_TELETEXT },
{ 0x59, AVMEDIA_TYPE_SUBTITLE, AV_CODEC_ID_DVB_SUBTITLE }, /* subtitling descriptor */
+ { 0x45, AVMEDIA_TYPE_DATA, AV_CODEC_ID_VBI_DATA }, /* VBI Data descriptor */
@ -230,20 +234,20 @@ index 286b30b..162033f 100644
1.9.3
From 373aefe13f7941931a6b56ccb6c99cb19d81f169 Mon Sep 17 00:00:00 2001
From e71d4c1755bd4e23fe9b65fb6128a8b41cecfdb1 Mon Sep 17 00:00:00 2001
From: Joakim Plate <elupus@ecce.se>
Date: Sun, 18 Sep 2011 19:16:34 +0200
Subject: [PATCH 06/19] Don't accept mpegts PMT that isn't current
Subject: [PATCH 06/15] Don't accept mpegts PMT that isn't current
---
libavformat/mpegts.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 162033f..402d8b3 100644
index e55193b..9ec6220 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -517,6 +517,7 @@ typedef struct SectionHeader {
@@ -552,6 +552,7 @@ typedef struct SectionHeader {
uint8_t tid;
uint16_t id;
uint8_t version;
@ -251,17 +255,17 @@ index 162033f..402d8b3 100644
uint8_t sec_num;
uint8_t last_sec_num;
} SectionHeader;
@@ -588,6 +589,7 @@ static int parse_section_header(SectionHeader *h,
@@ -623,6 +624,7 @@ static int parse_section_header(SectionHeader *h,
val = get8(pp, p_end);
if (val < 0)
return -1;
return val;
+ h->current = val & 0x1;
h->version = (val >> 1) & 0x1f;
val = get8(pp, p_end);
if (val < 0)
@@ -1790,6 +1792,8 @@ static void pat_cb(MpegTSFilter *filter, const uint8_t *section, int section_len
@@ -1891,6 +1893,8 @@ static void pat_cb(MpegTSFilter *filter, const uint8_t *section, int section_len
return;
if (h->tid != PAT_TID)
if (ts->skip_changes)
return;
+ if (!h->current)
+ return;
@ -272,36 +276,36 @@ index 162033f..402d8b3 100644
1.9.3
From 2be1b4b7db563067a8b41d116a15f86fa1b8186c Mon Sep 17 00:00:00 2001
From 473091d11f4e3a0c1820054368a76074a0e239cb Mon Sep 17 00:00:00 2001
From: Joakim Plate <elupus@ecce.se>
Date: Sun, 18 Sep 2011 19:17:23 +0200
Subject: [PATCH 07/19] Don't reparse PMT unless it's version has changed
Subject: [PATCH 07/15] Don't reparse PMT unless it's version has changed
---
libavformat/mpegts.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 402d8b3..3625bfa 100644
index 9ec6220..ab03372 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -78,6 +78,7 @@ struct MpegTSFilter {
int pid;
@@ -87,6 +87,7 @@ struct MpegTSFilter {
int es_id;
int last_cc; /* last cc code (-1 if first packet) */
int64_t last_pcr;
+ int last_version; /* last version of data on this pid */
enum MpegTSFilterType type;
union {
MpegTSPESFilter pes_filter;
@@ -416,6 +417,7 @@ static MpegTSFilter *mpegts_open_section_filter(MpegTSContext *ts, unsigned int
filter->pid = pid;
filter->es_id = -1;
@@ -432,6 +433,7 @@ static MpegTSFilter *mpegts_open_filter(MpegTSContext *ts, unsigned int pid,
filter->es_id = -1;
filter->last_cc = -1;
filter->last_pcr= -1;
+ filter->last_version = -1;
sec = &filter->u.section_filter;
sec->section_cb = section_cb;
sec->opaque = opaque;
@@ -1794,6 +1796,10 @@ static void pat_cb(MpegTSFilter *filter, const uint8_t *section, int section_len
return filter;
}
@@ -1895,6 +1897,10 @@ static void pat_cb(MpegTSFilter *filter, const uint8_t *section, int section_len
return;
if (!h->current)
return;
@ -316,10 +320,10 @@ index 402d8b3..3625bfa 100644
1.9.3
From 027d6070086fa0ca50be2bba1bac442ad8eb5def Mon Sep 17 00:00:00 2001
From aa357f84bcdb105910478aee74d5b675d65114bd Mon Sep 17 00:00:00 2001
From: Cory Fields <theuni-nospam-@xbmc.org>
Date: Fri, 9 Jul 2010 16:43:31 -0400
Subject: [PATCH 08/19] Read PID timestamps as well as PCR timestamps to find
Subject: [PATCH 08/15] Read PID timestamps as well as PCR timestamps to find
location in mpegts stream
---
@ -327,10 +331,10 @@ Subject: [PATCH 08/19] Read PID timestamps as well as PCR timestamps to find
1 file changed, 46 insertions(+), 2 deletions(-)
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 3625bfa..7937c63 100644
index ab03372..9962ccf 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -2269,6 +2269,44 @@ static void seek_back(AVFormatContext *s, AVIOContext *pb, int64_t pos) {
@@ -2375,6 +2375,44 @@ static void seek_back(AVFormatContext *s, AVIOContext *pb, int64_t pos) {
av_log(s, pb->seekable ? AV_LOG_ERROR : AV_LOG_INFO, "Unable to seek back to the start\n");
}
@ -375,15 +379,15 @@ index 3625bfa..7937c63 100644
static int mpegts_read_header(AVFormatContext *s)
{
MpegTSContext *ts = s->priv_data;
@@ -2470,6 +2508,7 @@ static av_unused int64_t mpegts_get_pcr(AVFormatContext *s, int stream_index,
int64_t pos, timestamp;
@@ -2574,6 +2612,7 @@ static av_unused int64_t mpegts_get_pcr(AVFormatContext *s, int stream_index,
uint8_t buf[TS_PACKET_SIZE];
int pcr_l, pcr_pid = ((PESContext*)s->streams[stream_index]->priv_data)->pcr_pid;
int pcr_l, pcr_pid =
((PESContext *)s->streams[stream_index]->priv_data)->pcr_pid;
+ int pid = ((PESContext*)s->streams[stream_index]->priv_data)->pid;
int pos47 = ts->pos47_full % ts->raw_packet_size;
pos = ((*ppos + ts->raw_packet_size - 1 - pos47) / ts->raw_packet_size) * ts->raw_packet_size + pos47;
while(pos < pos_limit) {
@@ -2489,6 +2528,11 @@ static av_unused int64_t mpegts_get_pcr(AVFormatContext *s, int stream_index,
pos =
((*ppos + ts->raw_packet_size - 1 - pos47) / ts->raw_packet_size) *
@@ -2595,6 +2634,11 @@ static av_unused int64_t mpegts_get_pcr(AVFormatContext *s, int stream_index,
*ppos = pos;
return timestamp;
}
@ -395,7 +399,7 @@ index 3625bfa..7937c63 100644
pos += ts->raw_packet_size;
}
@@ -2588,7 +2632,7 @@ AVInputFormat ff_mpegts_demuxer = {
@@ -2694,7 +2738,7 @@ AVInputFormat ff_mpegts_demuxer = {
.read_header = mpegts_read_header,
.read_packet = mpegts_read_packet,
.read_close = mpegts_read_close,
@ -404,7 +408,7 @@ index 3625bfa..7937c63 100644
.flags = AVFMT_SHOW_IDS | AVFMT_TS_DISCONT,
.priv_class = &mpegts_class,
};
@@ -2600,7 +2644,7 @@ AVInputFormat ff_mpegtsraw_demuxer = {
@@ -2706,7 +2750,7 @@ AVInputFormat ff_mpegtsraw_demuxer = {
.read_header = mpegts_read_header,
.read_packet = mpegts_raw_read_packet,
.read_close = mpegts_read_close,
@ -417,20 +421,20 @@ index 3625bfa..7937c63 100644
1.9.3
From 52325298742c77793489c36edf66cc3883d206b1 Mon Sep 17 00:00:00 2001
From 8deda04d599f1e248cba4d175257dea469feb719 Mon Sep 17 00:00:00 2001
From: Joakim Plate <elupus@ecce.se>
Date: Sat, 22 Oct 2011 19:01:38 +0200
Subject: [PATCH 09/19] Get stream durations using read_timestamp
Subject: [PATCH 09/15] Get stream durations using read_timestamp
---
libavformat/utils.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 2de79d7..670e6ec 100644
index 9fa0bb0..cbeaa9c 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -2434,6 +2434,41 @@ static void estimate_timings_from_bit_rate(AVFormatContext *ic)
@@ -2480,6 +2480,41 @@ static void estimate_timings_from_bit_rate(AVFormatContext *ic)
#define DURATION_MAX_READ_SIZE 250000LL
#define DURATION_MAX_RETRY 4
@ -472,7 +476,7 @@ index 2de79d7..670e6ec 100644
/* only usable for MPEG-PS streams */
static void estimate_timings_from_pts(AVFormatContext *ic, int64_t old_offset)
{
@@ -2542,6 +2577,10 @@ static void estimate_timings(AVFormatContext *ic, int64_t old_offset)
@@ -2630,6 +2665,10 @@ static void estimate_timings(AVFormatContext *ic, int64_t old_offset)
* the components */
fill_all_stream_timings(ic);
ic->duration_estimation_method = AVFMT_DURATION_FROM_STREAM;
@ -487,10 +491,10 @@ index 2de79d7..670e6ec 100644
1.9.3
From 2e20016a27a0cd1db61ad41add18b94e3acc9899 Mon Sep 17 00:00:00 2001
From 77caa1aab9b838a0085e2f4133d0e27eb6588f4b Mon Sep 17 00:00:00 2001
From: Joakim Plate <elupus@ecce.se>
Date: Wed, 8 Dec 2010 14:03:43 +0000
Subject: [PATCH 10/19] changed: allow 4 second skew between streams in mov
Subject: [PATCH 10/15] changed: allow 4 second skew between streams in mov
before attempting to seek
---
@ -498,10 +502,10 @@ Subject: [PATCH 10/19] changed: allow 4 second skew between streams in mov
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 3fb7747..50926e8 100644
index 9b4832f..41be8b7 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -3459,8 +3459,8 @@ static AVIndexEntry *mov_find_next_sample(AVFormatContext *s, AVStream **st)
@@ -3673,8 +3673,8 @@ static AVIndexEntry *mov_find_next_sample(AVFormatContext *s, AVStream **st)
if (!sample || (!s->pb->seekable && current_sample->pos < sample->pos) ||
(s->pb->seekable &&
((msc->pb != s->pb && dts < best_dts) || (msc->pb == s->pb &&
@ -516,10 +520,10 @@ index 3fb7747..50926e8 100644
1.9.3
From 4c32619d53473499ee382981d420930a8d36801f Mon Sep 17 00:00:00 2001
From c3d69fb6f71a674310fefb17aebab01a6744881c Mon Sep 17 00:00:00 2001
From: Joakim Plate <elupus@ecce.se>
Date: Fri, 26 Nov 2010 20:56:48 +0000
Subject: [PATCH 11/19] fixed: memleak in mpegts demuxer on some malformed (??)
Subject: [PATCH 11/15] fixed: memleak in mpegts demuxer on some malformed (??)
mpegts files with too large pes packets
at-visions sample file brokenStream.mpg
@ -528,10 +532,10 @@ at-visions sample file brokenStream.mpg
1 file changed, 6 insertions(+)
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 7937c63..86bbf52 100644
index 9962ccf..66ea11c 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -767,6 +767,10 @@ static int mpegts_set_stream_info(AVStream *st, PESContext *pes,
@@ -811,6 +811,10 @@ static void reset_pes_packet_state(PESContext *pes)
static void new_pes_packet(PESContext *pes, AVPacket *pkt)
{
@ -542,7 +546,7 @@ index 7937c63..86bbf52 100644
av_init_packet(pkt);
pkt->buf = pes->buffer;
@@ -2462,6 +2466,8 @@ static int mpegts_read_packet(AVFormatContext *s,
@@ -2565,6 +2569,8 @@ static int mpegts_read_packet(AVFormatContext *s, AVPacket *pkt)
pkt->size = -1;
ts->pkt = pkt;
@ -555,49 +559,50 @@ index 7937c63..86bbf52 100644
1.9.3
From fce8e4a5e10c9b03243623a22e9969473b31b07d Mon Sep 17 00:00:00 2001
From e621e2b83b43a5fade298251094458451eecad41 Mon Sep 17 00:00:00 2001
From: Joakim Plate <elupus@ecce.se>
Date: Mon, 28 Jun 2010 21:26:54 +0000
Subject: [PATCH 12/19] Speed up mpegts av_find_stream_info
Subject: [PATCH 12/15] Speed up mpegts av_find_stream_info
---
libavformat/mpegts.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 86bbf52..28d6a6b 100644
index 66ea11c..5811d26 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -927,7 +927,7 @@ static int mpegts_push_data(MpegTSFilter *filter,
@@ -969,7 +969,7 @@ static int mpegts_push_data(MpegTSFilter *filter,
goto skip;
/* stream not present in PMT */
- if (!pes->st) {
+ if (ts->auto_guess && !pes->st) {
pes->st = avformat_new_stream(ts->stream, NULL);
if (!pes->st)
return AVERROR(ENOMEM);
if (ts->skip_changes)
goto skip;
--
1.9.3
From f9e506d81f43fdc06abb6d4d85296fec51dbfeb9 Mon Sep 17 00:00:00 2001
From 07a31ecbe3493cbc1d1a5b6dee7784257a70ca17 Mon Sep 17 00:00:00 2001
From: marc <mhocking@ubuntu-desktop.(none)>
Date: Mon, 18 Feb 2013 17:18:18 +0000
Subject: [PATCH 13/19] dxva-h264 Fix dxva playback of streams that don't start
Subject: [PATCH 13/15] dxva-h264 Fix dxva playback of streams that don't start
with an I-Frame.
---
libavcodec/dxva2_h264.c | 8 ++++++++
libavcodec/h264.c | 2 ++
libavcodec/h264.c | 1 +
libavcodec/h264.h | 2 ++
3 files changed, 12 insertions(+)
libavcodec/h264_slice.c | 1 +
4 files changed, 12 insertions(+)
diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c
index 02d3f54..848fa35 100644
index 1174188..263a272 100644
--- a/libavcodec/dxva2_h264.c
+++ b/libavcodec/dxva2_h264.c
@@ -431,6 +431,14 @@ static int dxva2_h264_end_frame(AVCodecContext *avctx)
@@ -448,6 +448,14 @@ static int dxva2_h264_end_frame(AVCodecContext *avctx)
if (ctx_pic->slice_count <= 0 || ctx_pic->bitstream_size <= 0)
return -1;
@ -609,34 +614,26 @@ index 02d3f54..848fa35 100644
+ h->got_first_iframe = 1;
+ }
+
ret = ff_dxva2_common_end_frame(avctx, h->cur_pic_ptr,
ret = ff_dxva2_common_end_frame(avctx, &h->cur_pic_ptr->f,
&ctx_pic->pp, sizeof(ctx_pic->pp),
&ctx_pic->qm, sizeof(ctx_pic->qm),
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index d855b7b..c560fa9 100644
index 1d91987..8b7b026 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -2869,6 +2869,7 @@ static void flush_dpb(AVCodecContext *avctx)
h->delayed_pic[i]->reference = 0;
h->delayed_pic[i] = NULL;
}
@@ -1085,6 +1085,7 @@ void ff_h264_flush_change(H264Context *h)
h->list_count = 0;
h->current_slice = 0;
h->mmco_reset = 1;
+ h->got_first_iframe = 0;
}
flush_change(h);
@@ -3312,6 +3313,7 @@ static int h264_slice_header_init(H264Context *h, int reinit)
free_tables(h, 0);
h->first_field = 0;
h->prev_interlaced_frame = 1;
+ h->got_first_iframe = 0;
init_scan_tables(h);
ret = ff_h264_alloc_tables(h);
/* forget old pics after a seek */
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 4a41fff..6b3ca1d 100644
index 228558b..5e92043 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -682,6 +682,8 @@ typedef struct H264Context {
@@ -740,6 +740,8 @@ typedef struct H264Context {
int luma_weight_flag[2]; ///< 7.4.3.2 luma_weight_lX_flag
int chroma_weight_flag[2]; ///< 7.4.3.2 chroma_weight_lX_flag
@ -645,14 +642,26 @@ index 4a41fff..6b3ca1d 100644
// Timestamp stuff
int sei_buffering_period_present; ///< Buffering period SEI flag
int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index ded26f8..e20f2c8 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -1166,6 +1166,7 @@ static int h264_slice_header_init(H264Context *h, int reinit)
ff_h264_free_tables(h, 0);
h->first_field = 0;
h->prev_interlaced_frame = 1;
+ h->got_first_iframe = 0;
init_scan_tables(h);
ret = ff_h264_alloc_tables(h);
--
1.9.3
From d499ee3d255c811b13d2749c2d4d5a013d5d8975 Mon Sep 17 00:00:00 2001
From 67247a541dc1dfb547d35eb326ecf26b6c10b4d3 Mon Sep 17 00:00:00 2001
From: elupus <elupus@xbmc.org>
Date: Tue, 1 Nov 2011 20:18:35 +0100
Subject: [PATCH 14/19] add public version of ff_read_frame_flush
Subject: [PATCH 14/15] add public version of ff_read_frame_flush
We need this since we sometimes seek on the
input stream behind ffmpeg's back. After this
@ -663,10 +672,10 @@ all data need to be flushed completely.
2 files changed, 10 insertions(+)
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 7839c0a..33f10a8 100644
index a9abfbd..ff19215 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -1944,6 +1944,11 @@ int av_read_packet(AVFormatContext *s, AVPacket *pkt);
@@ -2074,6 +2074,11 @@ int av_read_packet(AVFormatContext *s, AVPacket *pkt);
int av_read_frame(AVFormatContext *s, AVPacket *pkt);
/**
@ -679,10 +688,10 @@ index 7839c0a..33f10a8 100644
* 'timestamp' in 'stream_index'.
*
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 670e6ec..a61613f 100644
index cbeaa9c..185706f 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -1702,6 +1702,11 @@ void ff_read_frame_flush(AVFormatContext *s)
@@ -1748,6 +1748,11 @@ void ff_read_frame_flush(AVFormatContext *s)
}
}
@ -698,293 +707,10 @@ index 670e6ec..a61613f 100644
1.9.3
From 951fff16d4a49571f54f3c212504d8e71aa537e7 Mon Sep 17 00:00:00 2001
From: Hendrik Leppkes <h.leppkes@gmail.com>
Date: Tue, 4 Mar 2014 08:28:38 +0100
Subject: [PATCH 15/19] dxva2_h264: set the correct ref frame index in the long
slice struct
The latest H.264 DXVA specification states that the index in this
structure should refer to a valid entry in the RefFrameList of the picture
parameter structure, and not to the actual surface index.
Fixes H.264 DXVA2 decoding on recent Intel GPUs (tested on Sandy and Ivy)
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
libavcodec/dxva2_h264.c | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)
diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c
index 848fa35..20dd64e 100644
--- a/libavcodec/dxva2_h264.c
+++ b/libavcodec/dxva2_h264.c
@@ -194,8 +194,18 @@ static void fill_slice_short(DXVA_Slice_H264_Short *slice,
slice->wBadSliceChopping = 0;
}
+static int get_refpic_index(const DXVA_PicParams_H264 *pp, int surface_index)
+{
+ int i;
+ for (i = 0; i < FF_ARRAY_ELEMS(pp->RefFrameList); i++) {
+ if ((pp->RefFrameList[i].bPicEntry & 0x7f) == surface_index)
+ return i;
+ }
+ return 0x7f;
+}
+
static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
- unsigned position, unsigned size)
+ const DXVA_PicParams_H264 *pp, unsigned position, unsigned size)
{
const H264Context *h = avctx->priv_data;
struct dxva_context *ctx = avctx->hwaccel_context;
@@ -228,8 +238,8 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
if (list < h->list_count && i < h->ref_count[list]) {
const Picture *r = &h->ref_list[list][i];
unsigned plane;
- fill_picture_entry(&slice->RefPicList[list][i],
- ff_dxva2_get_surface_index(ctx, r),
+ unsigned index = get_refpic_index(pp, ff_dxva2_get_surface_index(ctx, r));
+ fill_picture_entry(&slice->RefPicList[list][i], index,
r->reference == PICT_BOTTOM_FIELD);
for (plane = 0; plane < 3; plane++) {
int w, o;
@@ -414,7 +424,7 @@ static int dxva2_h264_decode_slice(AVCodecContext *avctx,
position, size);
else
fill_slice_long(avctx, &ctx_pic->slice_long[ctx_pic->slice_count],
- position, size);
+ &ctx_pic->pp, position, size);
ctx_pic->slice_count++;
if (h->slice_type != AV_PICTURE_TYPE_I && h->slice_type != AV_PICTURE_TYPE_SI)
--
1.9.3
From b083afc1a02c143b9c7f6f3970da69f90cb42c18 Mon Sep 17 00:00:00 2001
From: Hendrik Leppkes <h.leppkes@gmail.com>
Date: Tue, 4 Mar 2014 08:28:39 +0100
Subject: [PATCH 16/19] dxva2_h264: add a workaround for old intel GPUs
Old Intel GPUs expect the reference frame index to the actual surface,
instead of the index into RefFrameList as specified by the spec.
This workaround should be set when using one of the "ClearVideo" decoder
devices.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
libavcodec/dxva2.h | 1 +
libavcodec/dxva2_h264.c | 8 +++++++-
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/libavcodec/dxva2.h b/libavcodec/dxva2.h
index ac39e06..2639d89 100644
--- a/libavcodec/dxva2.h
+++ b/libavcodec/dxva2.h
@@ -49,6 +49,7 @@
*/
#define FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG 1 ///< Work around for DXVA2 and old UVD/UVD+ ATI video cards
+#define FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO 2 ///< Work around for DXVA2 and old Intel GPUs with ClearVideo interface
/**
* This structure is used to provides the necessary configurations and data
diff --git a/libavcodec/dxva2_h264.c b/libavcodec/dxva2_h264.c
index 20dd64e..5833185 100644
--- a/libavcodec/dxva2_h264.c
+++ b/libavcodec/dxva2_h264.c
@@ -115,6 +115,8 @@ static void fill_picture_parameters(struct dxva_context *ctx, const H264Context
pp->bit_depth_chroma_minus8 = h->sps.bit_depth_chroma - 8;
if (ctx->workaround & FF_DXVA2_WORKAROUND_SCALING_LIST_ZIGZAG)
pp->Reserved16Bits = 0;
+ else if (ctx->workaround & FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO)
+ pp->Reserved16Bits = 0x34c;
else
pp->Reserved16Bits = 3; /* FIXME is there a way to detect the right mode ? */
pp->StatusReportFeedbackNumber = 1 + ctx->report_id++;
@@ -238,7 +240,11 @@ static void fill_slice_long(AVCodecContext *avctx, DXVA_Slice_H264_Long *slice,
if (list < h->list_count && i < h->ref_count[list]) {
const Picture *r = &h->ref_list[list][i];
unsigned plane;
- unsigned index = get_refpic_index(pp, ff_dxva2_get_surface_index(ctx, r));
+ unsigned index;
+ if (ctx->workaround & FF_DXVA2_WORKAROUND_INTEL_CLEARVIDEO)
+ index = ff_dxva2_get_surface_index(ctx, r);
+ else
+ index = get_refpic_index(pp, ff_dxva2_get_surface_index(ctx, r));
fill_picture_entry(&slice->RefPicList[list][i], index,
r->reference == PICT_BOTTOM_FIELD);
for (plane = 0; plane < 3; plane++) {
--
1.9.3
From f1d6d2e9186fde4ebf5b4a48b3f9b9474180dec4 Mon Sep 17 00:00:00 2001
From: Rainer Hochecker <fernetmenta@online.de>
Date: Sat, 12 Apr 2014 18:13:32 +0200
Subject: [PATCH 17/19] flac demuxer: improve seeking
---
libavcodec/flac_parser.c | 13 +++++++++++++
libavformat/flacdec.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 58 insertions(+)
diff --git a/libavcodec/flac_parser.c b/libavcodec/flac_parser.c
index ba1f060..6ff4d9c 100644
--- a/libavcodec/flac_parser.c
+++ b/libavcodec/flac_parser.c
@@ -489,6 +489,14 @@ static int get_best_header(FLACParseContext* fpc, const uint8_t **poutbuf,
&fpc->wrap_buf,
&fpc->wrap_buf_allocated_size);
+
+ if (fpc->pc->flags & PARSER_FLAG_USE_CODEC_TS){
+ if (header->fi.is_var_size)
+ fpc->pc->pts = header->fi.frame_or_sample_num;
+ else if (header->best_child)
+ fpc->pc->pts = header->fi.frame_or_sample_num * header->fi.blocksize;
+ }
+
fpc->best_header_valid = 0;
fpc->last_fi_valid = 1;
fpc->last_fi = header->fi;
@@ -516,6 +524,11 @@ static int flac_parse(AVCodecParserContext *s, AVCodecContext *avctx,
s->duration = fi.blocksize;
if (!avctx->sample_rate)
avctx->sample_rate = fi.samplerate;
+ if (fpc->pc->flags & PARSER_FLAG_USE_CODEC_TS){
+ fpc->pc->pts = fi.frame_or_sample_num;
+ if (!fi.is_var_size)
+ fpc->pc->pts *= fi.blocksize;
+ }
}
*poutbuf = buf;
*poutbuf_size = buf_size;
diff --git a/libavformat/flacdec.c b/libavformat/flacdec.c
index 29310b8..9386da0 100644
--- a/libavformat/flacdec.c
+++ b/libavformat/flacdec.c
@@ -157,12 +157,57 @@ static int flac_probe(AVProbeData *p)
return AVPROBE_SCORE_EXTENSION;
}
+static av_unused int64_t flac_read_timestamp(AVFormatContext *s, int stream_index,
+ int64_t *ppos, int64_t pos_limit)
+{
+ AVPacket pkt, out_pkt;
+ AVStream *st = s->streams[stream_index];
+ int ret;
+
+ if (avio_seek(s->pb, *ppos, SEEK_SET) < 0)
+ return AV_NOPTS_VALUE;
+
+ av_init_packet(&pkt);
+ st->parser = av_parser_init(st->codec->codec_id);
+ if (!st->parser){
+ return AV_NOPTS_VALUE;
+ }
+ st->parser->flags |= PARSER_FLAG_USE_CODEC_TS;
+
+ for (;;){
+ ret = ff_raw_read_partial_packet(s, &pkt);
+ if (ret < 0){
+ if (ret == AVERROR(EAGAIN))
+ continue;
+ else
+ return AV_NOPTS_VALUE;
+ }
+ av_init_packet(&out_pkt);
+ ret = av_parser_parse2(st->parser, st->codec,
+ &out_pkt.data, &out_pkt.size, pkt.data, pkt.size,
+ pkt.pts, pkt.dts, *ppos);
+
+ if (out_pkt.size){
+ int size = out_pkt.size;
+ av_free_packet(&out_pkt);
+ if (st->parser->pts != AV_NOPTS_VALUE){
+ // seeking may not have started from beginning of a frame
+ // calculate frame start position from next frame backwards
+ *ppos = st->parser->next_frame_offset - size;
+ return st->parser->pts;
+ }
+ }
+ }
+ return AV_NOPTS_VALUE;
+}
+
AVInputFormat ff_flac_demuxer = {
.name = "flac",
.long_name = NULL_IF_CONFIG_SMALL("raw FLAC"),
.read_probe = flac_probe,
.read_header = flac_read_header,
.read_packet = ff_raw_read_partial_packet,
+ .read_timestamp = flac_read_timestamp,
.flags = AVFMT_GENERIC_INDEX,
.extensions = "flac",
.raw_codec_id = AV_CODEC_ID_FLAC,
--
1.9.3
From ffe6e9d200eeba76b68ea57aa00d7cfe8d13eb93 Mon Sep 17 00:00:00 2001
From: Rainer Hochecker <fernetmenta@online.de>
Date: Mon, 14 Apr 2014 16:06:55 +0200
Subject: [PATCH 18/19] fate: update seeking reference for flac
---
tests/ref/seek/acodec-flac | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/tests/ref/seek/acodec-flac b/tests/ref/seek/acodec-flac
index ab31891..f6add9a 100644
--- a/tests/ref/seek/acodec-flac
+++ b/tests/ref/seek/acodec-flac
@@ -5,14 +5,16 @@ ret: 0 st:-1 flags:1 ts: 1.894167
ret: 0 st: 0 flags:1 dts: 1.880816 pts: 1.880816 pos: 86742 size: 2191
ret: 0 st: 0 flags:0 ts: 0.788345
ret: 0 st: 0 flags:1 dts: 0.809796 pts: 0.809796 pos: 27366 size: 615
-ret:-1 st: 0 flags:1 ts:-0.317506
+ret: 0 st: 0 flags:1 ts:-0.317506
+ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 8256 size: 614
ret: 0 st:-1 flags:0 ts: 2.576668
ret: 0 st: 0 flags:1 dts: 2.586122 pts: 2.586122 pos: 145606 size: 2384
ret: 0 st:-1 flags:1 ts: 1.470835
ret: 0 st: 0 flags:1 dts: 1.462857 pts: 1.462857 pos: 53388 size: 1851
ret: 0 st: 0 flags:0 ts: 0.365011
ret: 0 st: 0 flags:1 dts: 0.365714 pts: 0.365714 pos: 16890 size: 614
-ret:-1 st: 0 flags:1 ts:-0.740839
+ret: 0 st: 0 flags:1 ts:-0.740839
+ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 8256 size: 614
ret: 0 st:-1 flags:0 ts: 2.153336
ret: 0 st: 0 flags:1 dts: 2.168163 pts: 2.168163 pos: 110531 size: 2143
ret: 0 st:-1 flags:1 ts: 1.047503
@@ -39,11 +41,13 @@ ret: 0 st: 0 flags:1 ts: 1.989184
ret: 0 st: 0 flags:1 dts: 1.985306 pts: 1.985306 pos: 95508 size: 2169
ret: 0 st:-1 flags:0 ts: 0.883340
ret: 0 st: 0 flags:1 dts: 0.888163 pts: 0.888163 pos: 29211 size: 620
-ret:-1 st:-1 flags:1 ts:-0.222493
+ret: 0 st:-1 flags:1 ts:-0.222493
+ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 8256 size: 614
ret: 0 st: 0 flags:0 ts: 2.671678
ret: 0 st: 0 flags:1 dts: 2.690612 pts: 2.690612 pos: 155154 size: 2394
ret: 0 st: 0 flags:1 ts: 1.565850
ret: 0 st: 0 flags:1 dts: 1.541224 pts: 1.541224 pos: 59082 size: 1974
ret: 0 st:-1 flags:0 ts: 0.460008
ret: 0 st: 0 flags:1 dts: 0.470204 pts: 0.470204 pos: 19353 size: 608
-ret:-1 st:-1 flags:1 ts:-0.645825
+ret: 0 st:-1 flags:1 ts:-0.645825
+ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 8256 size: 614
--
1.9.3
From d6104d92e0e7248585cee2da796e458ef5937f12 Mon Sep 17 00:00:00 2001
From d028c907004e8a3c0f5161ce595331e4cc57c86c Mon Sep 17 00:00:00 2001
From: Memphiz <memphis@machzwo.de>
Date: Mon, 12 May 2014 18:27:01 +0200
Subject: [PATCH 19/19] fix --disable-ffplay should disable any needs to check
Subject: [PATCH 15/15] fix --disable-ffplay should disable any needs to check
or add compile/link flags otherwise SDL gets spewed all over pkg-config files
and generally causes a mess
@ -993,10 +719,10 @@ Subject: [PATCH 19/19] fix --disable-ffplay should disable any needs to check
1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/configure b/configure
index f9dce4e..0b582f0 100755
index 4ed43a0..4520e8c 100755
--- a/configure
+++ b/configure
@@ -4571,22 +4571,24 @@ if enabled libdc1394; then
@@ -4827,22 +4827,24 @@ if enabled libdc1394; then
die "ERROR: No version of libdc1394 found "
fi

View File

@ -1,752 +0,0 @@
From 8cdb3bf2837a3fb4fff3c6586316f81ae5f7b6cd Mon Sep 17 00:00:00 2001
From: Ben Avison <bavison@riscosopen.org>
Date: Wed, 16 Apr 2014 01:51:31 +0100
Subject: [PATCH 1/3] h264: Move search code search functions into separate
source files.
This permits re-use with parsers for codecs which use similar start codes.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
libavcodec/Makefile | 2 +-
libavcodec/arm/Makefile | 2 +-
libavcodec/arm/h264dsp_armv6.S | 253 --------------------------------------
libavcodec/arm/h264dsp_init_arm.c | 4 +-
libavcodec/arm/startcode_armv6.S | 253 ++++++++++++++++++++++++++++++++++++++
libavcodec/h264dsp.c | 31 +----
libavcodec/startcode.c | 57 +++++++++
libavcodec/startcode.h | 35 ++++++
8 files changed, 351 insertions(+), 286 deletions(-)
delete mode 100644 libavcodec/arm/h264dsp_armv6.S
create mode 100644 libavcodec/arm/startcode_armv6.S
create mode 100644 libavcodec/startcode.c
create mode 100644 libavcodec/startcode.h
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index b56ecd1..19caf11 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -49,7 +49,7 @@ OBJS-$(CONFIG_FFT) += avfft.o fft_fixed.o fft_float.o \
OBJS-$(CONFIG_GOLOMB) += golomb.o
OBJS-$(CONFIG_H263DSP) += h263dsp.o
OBJS-$(CONFIG_H264CHROMA) += h264chroma.o
-OBJS-$(CONFIG_H264DSP) += h264dsp.o h264idct.o
+OBJS-$(CONFIG_H264DSP) += h264dsp.o h264idct.o startcode.o
OBJS-$(CONFIG_H264PRED) += h264pred.o
OBJS-$(CONFIG_H264QPEL) += h264qpel.o
OBJS-$(CONFIG_HPELDSP) += hpeldsp.o
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index a8446b2..b6410b2 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -47,7 +47,7 @@ ARMV6-OBJS-$(CONFIG_DSPUTIL) += arm/dsputil_init_armv6.o \
arm/simple_idct_armv6.o \
ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o
-ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o
+ARMV6-OBJS-$(CONFIG_H264DSP) += arm/startcode_armv6.o
ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \
arm/hpeldsp_armv6.o
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
diff --git a/libavcodec/arm/h264dsp_armv6.S b/libavcodec/arm/h264dsp_armv6.S
deleted file mode 100644
index 2758262..0000000
--- a/libavcodec/arm/h264dsp_armv6.S
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Copyright (c) 2013 RISC OS Open Ltd
- * Author: Ben Avison <bavison@riscosopen.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/arm/asm.S"
-
-RESULT .req a1
-BUF .req a1
-SIZE .req a2
-PATTERN .req a3
-PTR .req a4
-DAT0 .req v1
-DAT1 .req v2
-DAT2 .req v3
-DAT3 .req v4
-TMP0 .req v5
-TMP1 .req v6
-TMP2 .req ip
-TMP3 .req lr
-
-#define PRELOAD_DISTANCE 4
-
-.macro innerloop4
- ldr DAT0, [PTR], #4
- subs SIZE, SIZE, #4 @ C flag survives rest of macro
- sub TMP0, DAT0, PATTERN, lsr #14
- bic TMP0, TMP0, DAT0
- ands TMP0, TMP0, PATTERN
-.endm
-
-.macro innerloop16 decrement, do_preload
- ldmia PTR!, {DAT0,DAT1,DAT2,DAT3}
- .ifnc "\do_preload",""
- pld [PTR, #PRELOAD_DISTANCE*32]
- .endif
- .ifnc "\decrement",""
- subs SIZE, SIZE, #\decrement @ C flag survives rest of macro
- .endif
- sub TMP0, DAT0, PATTERN, lsr #14
- sub TMP1, DAT1, PATTERN, lsr #14
- bic TMP0, TMP0, DAT0
- bic TMP1, TMP1, DAT1
- sub TMP2, DAT2, PATTERN, lsr #14
- sub TMP3, DAT3, PATTERN, lsr #14
- ands TMP0, TMP0, PATTERN
- bic TMP2, TMP2, DAT2
- it eq
- andseq TMP1, TMP1, PATTERN
- bic TMP3, TMP3, DAT3
- itt eq
- andseq TMP2, TMP2, PATTERN
- andseq TMP3, TMP3, PATTERN
-.endm
-
-/* int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size) */
-function ff_h264_find_start_code_candidate_armv6, export=1
- push {v1-v6,lr}
- mov PTR, BUF
- @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go
- @ before using code that does preloads
- cmp SIZE, #(PRELOAD_DISTANCE+3)*32 - 1
- blo 60f
-
- @ Get to word-alignment, 1 byte at a time
- tst PTR, #3
- beq 2f
-1: ldrb DAT0, [PTR], #1
- sub SIZE, SIZE, #1
- teq DAT0, #0
- beq 90f
- tst PTR, #3
- bne 1b
-2: @ Get to 4-word alignment, 1 word at a time
- ldr PATTERN, =0x80008000
- setend be
- tst PTR, #12
- beq 4f
-3: innerloop4
- bne 91f
- tst PTR, #12
- bne 3b
-4: @ Get to cacheline (8-word) alignment
- tst PTR, #16
- beq 5f
- innerloop16 16
- bne 93f
-5: @ Check complete cachelines, with preloading
- @ We need to stop when there are still (PRELOAD_DISTANCE+1)
- @ complete cachelines to go
- sub SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32
-6: innerloop16 , do_preload
- bne 93f
- innerloop16 32
- bne 93f
- bcs 6b
- @ Preload trailing part-cacheline, if any
- tst SIZE, #31
- beq 7f
- pld [PTR, #(PRELOAD_DISTANCE+1)*32]
- @ Check remaining data without doing any more preloads. First
- @ do in chunks of 4 words:
-7: adds SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16
- bmi 9f
-8: innerloop16 16
- bne 93f
- bcs 8b
- @ Then in words:
-9: adds SIZE, SIZE, #16 - 4
- bmi 11f
-10: innerloop4
- bne 91f
- bcs 10b
-11: setend le
- @ Check second byte of final halfword
- ldrb DAT0, [PTR, #-1]
- teq DAT0, #0
- beq 90f
- @ Check any remaining bytes
- tst SIZE, #3
- beq 13f
-12: ldrb DAT0, [PTR], #1
- sub SIZE, SIZE, #1
- teq DAT0, #0
- beq 90f
- tst SIZE, #3
- bne 12b
- @ No candidate found
-13: sub RESULT, PTR, BUF
- b 99f
-
-60: @ Small buffer - simply check by looping over bytes
- subs SIZE, SIZE, #1
- bcc 99f
-61: ldrb DAT0, [PTR], #1
- subs SIZE, SIZE, #1
- teq DAT0, #0
- beq 90f
- bcs 61b
- @ No candidate found
- sub RESULT, PTR, BUF
- b 99f
-
-90: @ Found a candidate at the preceding byte
- sub RESULT, PTR, BUF
- sub RESULT, RESULT, #1
- b 99f
-
-91: @ Found a candidate somewhere in the preceding 4 bytes
- sub RESULT, PTR, BUF
- sub RESULT, RESULT, #4
- sub TMP0, DAT0, #0x20000
- bics TMP0, TMP0, DAT0
- itt pl
- ldrbpl DAT0, [PTR, #-3]
- addpl RESULT, RESULT, #2
- bpl 92f
- teq RESULT, #0
- beq 98f @ don't look back a byte if found at first byte in buffer
- ldrb DAT0, [PTR, #-5]
-92: teq DAT0, #0
- it eq
- subeq RESULT, RESULT, #1
- b 98f
-
-93: @ Found a candidate somewhere in the preceding 16 bytes
- sub RESULT, PTR, BUF
- sub RESULT, RESULT, #16
- teq TMP0, #0
- beq 95f @ not in first 4 bytes
- sub TMP0, DAT0, #0x20000
- bics TMP0, TMP0, DAT0
- itt pl
- ldrbpl DAT0, [PTR, #-15]
- addpl RESULT, RESULT, #2
- bpl 94f
- teq RESULT, #0
- beq 98f @ don't look back a byte if found at first byte in buffer
- ldrb DAT0, [PTR, #-17]
-94: teq DAT0, #0
- it eq
- subeq RESULT, RESULT, #1
- b 98f
-95: add RESULT, RESULT, #4
- teq TMP1, #0
- beq 96f @ not in next 4 bytes
- sub TMP1, DAT1, #0x20000
- bics TMP1, TMP1, DAT1
- itee mi
- ldrbmi DAT0, [PTR, #-13]
- ldrbpl DAT0, [PTR, #-11]
- addpl RESULT, RESULT, #2
- teq DAT0, #0
- it eq
- subeq RESULT, RESULT, #1
- b 98f
-96: add RESULT, RESULT, #4
- teq TMP2, #0
- beq 97f @ not in next 4 bytes
- sub TMP2, DAT2, #0x20000
- bics TMP2, TMP2, DAT2
- itee mi
- ldrbmi DAT0, [PTR, #-9]
- ldrbpl DAT0, [PTR, #-7]
- addpl RESULT, RESULT, #2
- teq DAT0, #0
- it eq
- subeq RESULT, RESULT, #1
- b 98f
-97: add RESULT, RESULT, #4
- sub TMP3, DAT3, #0x20000
- bics TMP3, TMP3, DAT3
- itee mi
- ldrbmi DAT0, [PTR, #-5]
- ldrbpl DAT0, [PTR, #-3]
- addpl RESULT, RESULT, #2
- teq DAT0, #0
- it eq
- subeq RESULT, RESULT, #1
- @ drop through to 98f
-98: setend le
-99: pop {v1-v6,pc}
-endfunc
-
- .unreq RESULT
- .unreq BUF
- .unreq SIZE
- .unreq PATTERN
- .unreq PTR
- .unreq DAT0
- .unreq DAT1
- .unreq DAT2
- .unreq DAT3
- .unreq TMP0
- .unreq TMP1
- .unreq TMP2
- .unreq TMP3
diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c
index a0418fd..eb6c514 100644
--- a/libavcodec/arm/h264dsp_init_arm.c
+++ b/libavcodec/arm/h264dsp_init_arm.c
@@ -24,7 +24,7 @@
#include "libavutil/arm/cpu.h"
#include "libavcodec/h264dsp.h"
-int ff_h264_find_start_code_candidate_armv6(const uint8_t *buf, int size);
+int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size);
void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
@@ -109,7 +109,7 @@ av_cold void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth,
int cpu_flags = av_get_cpu_flags();
if (have_armv6(cpu_flags))
- c->h264_find_start_code_candidate = ff_h264_find_start_code_candidate_armv6;
+ c->h264_find_start_code_candidate = ff_startcode_find_candidate_armv6;
if (have_neon(cpu_flags))
h264dsp_init_neon(c, bit_depth, chroma_format_idc);
}
diff --git a/libavcodec/arm/startcode_armv6.S b/libavcodec/arm/startcode_armv6.S
new file mode 100644
index 0000000..a46f009
--- /dev/null
+++ b/libavcodec/arm/startcode_armv6.S
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2013 RISC OS Open Ltd
+ * Author: Ben Avison <bavison@riscosopen.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+RESULT .req a1
+BUF .req a1
+SIZE .req a2
+PATTERN .req a3
+PTR .req a4
+DAT0 .req v1
+DAT1 .req v2
+DAT2 .req v3
+DAT3 .req v4
+TMP0 .req v5
+TMP1 .req v6
+TMP2 .req ip
+TMP3 .req lr
+
+#define PRELOAD_DISTANCE 4
+
+.macro innerloop4
+ ldr DAT0, [PTR], #4
+ subs SIZE, SIZE, #4 @ C flag survives rest of macro
+ sub TMP0, DAT0, PATTERN, lsr #14
+ bic TMP0, TMP0, DAT0
+ ands TMP0, TMP0, PATTERN
+.endm
+
+.macro innerloop16 decrement, do_preload
+ ldmia PTR!, {DAT0,DAT1,DAT2,DAT3}
+ .ifnc "\do_preload",""
+ pld [PTR, #PRELOAD_DISTANCE*32]
+ .endif
+ .ifnc "\decrement",""
+ subs SIZE, SIZE, #\decrement @ C flag survives rest of macro
+ .endif
+ sub TMP0, DAT0, PATTERN, lsr #14
+ sub TMP1, DAT1, PATTERN, lsr #14
+ bic TMP0, TMP0, DAT0
+ bic TMP1, TMP1, DAT1
+ sub TMP2, DAT2, PATTERN, lsr #14
+ sub TMP3, DAT3, PATTERN, lsr #14
+ ands TMP0, TMP0, PATTERN
+ bic TMP2, TMP2, DAT2
+ it eq
+ andseq TMP1, TMP1, PATTERN
+ bic TMP3, TMP3, DAT3
+ itt eq
+ andseq TMP2, TMP2, PATTERN
+ andseq TMP3, TMP3, PATTERN
+.endm
+
+/* int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size) */
+function ff_startcode_find_candidate_armv6, export=1
+ push {v1-v6,lr}
+ mov PTR, BUF
+ @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go
+ @ before using code that does preloads
+ cmp SIZE, #(PRELOAD_DISTANCE+3)*32 - 1
+ blo 60f
+
+ @ Get to word-alignment, 1 byte at a time
+ tst PTR, #3
+ beq 2f
+1: ldrb DAT0, [PTR], #1
+ sub SIZE, SIZE, #1
+ teq DAT0, #0
+ beq 90f
+ tst PTR, #3
+ bne 1b
+2: @ Get to 4-word alignment, 1 word at a time
+ ldr PATTERN, =0x80008000
+ setend be
+ tst PTR, #12
+ beq 4f
+3: innerloop4
+ bne 91f
+ tst PTR, #12
+ bne 3b
+4: @ Get to cacheline (8-word) alignment
+ tst PTR, #16
+ beq 5f
+ innerloop16 16
+ bne 93f
+5: @ Check complete cachelines, with preloading
+ @ We need to stop when there are still (PRELOAD_DISTANCE+1)
+ @ complete cachelines to go
+ sub SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32
+6: innerloop16 , do_preload
+ bne 93f
+ innerloop16 32
+ bne 93f
+ bcs 6b
+ @ Preload trailing part-cacheline, if any
+ tst SIZE, #31
+ beq 7f
+ pld [PTR, #(PRELOAD_DISTANCE+1)*32]
+ @ Check remaining data without doing any more preloads. First
+ @ do in chunks of 4 words:
+7: adds SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16
+ bmi 9f
+8: innerloop16 16
+ bne 93f
+ bcs 8b
+ @ Then in words:
+9: adds SIZE, SIZE, #16 - 4
+ bmi 11f
+10: innerloop4
+ bne 91f
+ bcs 10b
+11: setend le
+ @ Check second byte of final halfword
+ ldrb DAT0, [PTR, #-1]
+ teq DAT0, #0
+ beq 90f
+ @ Check any remaining bytes
+ tst SIZE, #3
+ beq 13f
+12: ldrb DAT0, [PTR], #1
+ sub SIZE, SIZE, #1
+ teq DAT0, #0
+ beq 90f
+ tst SIZE, #3
+ bne 12b
+ @ No candidate found
+13: sub RESULT, PTR, BUF
+ b 99f
+
+60: @ Small buffer - simply check by looping over bytes
+ subs SIZE, SIZE, #1
+ bcc 99f
+61: ldrb DAT0, [PTR], #1
+ subs SIZE, SIZE, #1
+ teq DAT0, #0
+ beq 90f
+ bcs 61b
+ @ No candidate found
+ sub RESULT, PTR, BUF
+ b 99f
+
+90: @ Found a candidate at the preceding byte
+ sub RESULT, PTR, BUF
+ sub RESULT, RESULT, #1
+ b 99f
+
+91: @ Found a candidate somewhere in the preceding 4 bytes
+ sub RESULT, PTR, BUF
+ sub RESULT, RESULT, #4
+ sub TMP0, DAT0, #0x20000
+ bics TMP0, TMP0, DAT0
+ itt pl
+ ldrbpl DAT0, [PTR, #-3]
+ addpl RESULT, RESULT, #2
+ bpl 92f
+ teq RESULT, #0
+ beq 98f @ don't look back a byte if found at first byte in buffer
+ ldrb DAT0, [PTR, #-5]
+92: teq DAT0, #0
+ it eq
+ subeq RESULT, RESULT, #1
+ b 98f
+
+93: @ Found a candidate somewhere in the preceding 16 bytes
+ sub RESULT, PTR, BUF
+ sub RESULT, RESULT, #16
+ teq TMP0, #0
+ beq 95f @ not in first 4 bytes
+ sub TMP0, DAT0, #0x20000
+ bics TMP0, TMP0, DAT0
+ itt pl
+ ldrbpl DAT0, [PTR, #-15]
+ addpl RESULT, RESULT, #2
+ bpl 94f
+ teq RESULT, #0
+ beq 98f @ don't look back a byte if found at first byte in buffer
+ ldrb DAT0, [PTR, #-17]
+94: teq DAT0, #0
+ it eq
+ subeq RESULT, RESULT, #1
+ b 98f
+95: add RESULT, RESULT, #4
+ teq TMP1, #0
+ beq 96f @ not in next 4 bytes
+ sub TMP1, DAT1, #0x20000
+ bics TMP1, TMP1, DAT1
+ itee mi
+ ldrbmi DAT0, [PTR, #-13]
+ ldrbpl DAT0, [PTR, #-11]
+ addpl RESULT, RESULT, #2
+ teq DAT0, #0
+ it eq
+ subeq RESULT, RESULT, #1
+ b 98f
+96: add RESULT, RESULT, #4
+ teq TMP2, #0
+ beq 97f @ not in next 4 bytes
+ sub TMP2, DAT2, #0x20000
+ bics TMP2, TMP2, DAT2
+ itee mi
+ ldrbmi DAT0, [PTR, #-9]
+ ldrbpl DAT0, [PTR, #-7]
+ addpl RESULT, RESULT, #2
+ teq DAT0, #0
+ it eq
+ subeq RESULT, RESULT, #1
+ b 98f
+97: add RESULT, RESULT, #4
+ sub TMP3, DAT3, #0x20000
+ bics TMP3, TMP3, DAT3
+ itee mi
+ ldrbmi DAT0, [PTR, #-5]
+ ldrbpl DAT0, [PTR, #-3]
+ addpl RESULT, RESULT, #2
+ teq DAT0, #0
+ it eq
+ subeq RESULT, RESULT, #1
+ @ drop through to 98f
+98: setend le
+99: pop {v1-v6,pc}
+endfunc
+
+ .unreq RESULT
+ .unreq BUF
+ .unreq SIZE
+ .unreq PATTERN
+ .unreq PTR
+ .unreq DAT0
+ .unreq DAT1
+ .unreq DAT2
+ .unreq DAT3
+ .unreq TMP0
+ .unreq TMP1
+ .unreq TMP2
+ .unreq TMP3
diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
index a2a4aba..a4da776 100644
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@@ -33,6 +33,7 @@
#include "avcodec.h"
#include "h264dsp.h"
#include "h264idct.h"
+#include "startcode.h"
#include "libavutil/common.h"
#define BIT_DEPTH 8
@@ -63,34 +64,6 @@
#include "h264addpx_template.c"
#undef BIT_DEPTH
-static int h264_find_start_code_candidate_c(const uint8_t *buf, int size)
-{
- int i = 0;
-#if HAVE_FAST_UNALIGNED
- /* we check i < size instead of i + 3 / 7 because it is
- * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE
- * bytes at the end.
- */
-# if HAVE_FAST_64BIT
- while (i < size &&
- !((~*(const uint64_t *)(buf + i) &
- (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
- 0x8080808080808080ULL))
- i += 8;
-# else
- while (i < size &&
- !((~*(const uint32_t *)(buf + i) &
- (*(const uint32_t *)(buf + i) - 0x01010101U)) &
- 0x80808080U))
- i += 4;
-# endif
-#endif
- for (; i < size; i++)
- if (!buf[i])
- break;
- return i;
-}
-
av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
const int chroma_format_idc)
{
@@ -178,7 +151,7 @@ av_cold void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,
H264_DSP(8);
break;
}
- c->h264_find_start_code_candidate = h264_find_start_code_candidate_c;
+ c->h264_find_start_code_candidate = ff_startcode_find_candidate_c;
if (ARCH_AARCH64) ff_h264dsp_init_aarch64(c, bit_depth, chroma_format_idc);
if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc);
diff --git a/libavcodec/startcode.c b/libavcodec/startcode.c
new file mode 100644
index 0000000..5df7695
--- /dev/null
+++ b/libavcodec/startcode.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Accelerated start code search function for start codes common to
+ * MPEG-1/2/4 video, VC-1, H.264/5
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "startcode.h"
+#include "config.h"
+
+int ff_startcode_find_candidate_c(const uint8_t *buf, int size)
+{
+ int i = 0;
+#if HAVE_FAST_UNALIGNED
+ /* we check i < size instead of i + 3 / 7 because it is
+ * simpler and there must be FF_INPUT_BUFFER_PADDING_SIZE
+ * bytes at the end.
+ */
+# if HAVE_FAST_64BIT
+ while (i < size &&
+ !((~*(const uint64_t *)(buf + i) &
+ (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
+ 0x8080808080808080ULL))
+ i += 8;
+# else
+ while (i < size &&
+ !((~*(const uint32_t *)(buf + i) &
+ (*(const uint32_t *)(buf + i) - 0x01010101U)) &
+ 0x80808080U))
+ i += 4;
+# endif
+#endif
+ for (; i < size; i++)
+ if (!buf[i])
+ break;
+ return i;
+}
diff --git a/libavcodec/startcode.h b/libavcodec/startcode.h
new file mode 100644
index 0000000..cc55d5f
--- /dev/null
+++ b/libavcodec/startcode.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2003-2010 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Accelerated start code search function for start codes common to
+ * MPEG-1/2/4 video, VC-1, H.264/5
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#ifndef AVCODEC_STARTCODE_H
+#define AVCODEC_STARTCODE_H
+
+#include <stdint.h>
+
+int ff_startcode_find_candidate_c(const uint8_t *buf, int size);
+
+#endif /* AVCODEC_STARTCODE_H */
--
1.9.1

View File

@ -1,65 +0,0 @@
From 425d69b993d25489e4830766507d9d8f6c819802 Mon Sep 17 00:00:00 2001
From: Ben Avison <bavison@riscosopen.org>
Date: Wed, 19 Mar 2014 17:26:19 +0000
Subject: [PATCH 1/6] truehd: tune VLC decoding for ARM.
Profiling on a Raspberry Pi revealed the best performance to correspond
with VLC_BITS = 5. Results for overall audio decode and the get_vlc2 function
in particular are as follows:
Before After
Mean StdDev Mean StdDev Confidence Change
6:2 total 348.8 20.1 339.6 15.1 88.8% +2.7% (insignificant)
6:2 function 38.1 8.1 26.4 4.1 100.0% +44.5%
8:2 total 339.1 15.4 324.5 15.5 99.4% +4.5%
8:2 function 33.8 7.0 27.3 5.6 99.7% +23.6%
6:6 total 604.6 20.8 572.8 20.6 100.0% +5.6%
6:6 function 95.8 8.4 68.9 8.2 100.0% +39.1%
8:8 total 766.4 17.6 741.5 21.2 100.0% +3.4%
8:8 function 106.0 11.4 86.1 9.9 100.0% +23.1%
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
libavcodec/mlpdec.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
index 93ed552..cbd9000 100644
--- a/libavcodec/mlpdec.c
+++ b/libavcodec/mlpdec.c
@@ -37,9 +37,16 @@
#include "mlp_parser.h"
#include "mlpdsp.h"
#include "mlp.h"
+#include "config.h"
/** number of bits used for VLC lookup - longest Huffman code is 9 */
+#if ARCH_ARM == 1
+#define VLC_BITS 5
+#define VLC_STATIC_SIZE 64
+#else
#define VLC_BITS 9
+#define VLC_STATIC_SIZE 512
+#endif
typedef struct SubStream {
/// Set if a valid restart header has been read. Otherwise the substream cannot be decoded.
@@ -193,13 +200,13 @@ static av_cold void init_static(void)
if (!huff_vlc[0].bits) {
INIT_VLC_STATIC(&huff_vlc[0], VLC_BITS, 18,
&ff_mlp_huffman_tables[0][0][1], 2, 1,
- &ff_mlp_huffman_tables[0][0][0], 2, 1, 512);
+ &ff_mlp_huffman_tables[0][0][0], 2, 1, VLC_STATIC_SIZE);
INIT_VLC_STATIC(&huff_vlc[1], VLC_BITS, 16,
&ff_mlp_huffman_tables[1][0][1], 2, 1,
- &ff_mlp_huffman_tables[1][0][0], 2, 1, 512);
+ &ff_mlp_huffman_tables[1][0][0], 2, 1, VLC_STATIC_SIZE);
INIT_VLC_STATIC(&huff_vlc[2], VLC_BITS, 15,
&ff_mlp_huffman_tables[2][0][1], 2, 1,
- &ff_mlp_huffman_tables[2][0][0], 2, 1, 512);
+ &ff_mlp_huffman_tables[2][0][0], 2, 1, VLC_STATIC_SIZE);
}
ff_mlp_init_crc();
--
1.9.1

View File

@ -1,557 +0,0 @@
From bfe3d8c8e4e046163dc314aa16207413e377283f Mon Sep 17 00:00:00 2001
From: Ben Avison <bavison@riscosopen.org>
Date: Mon, 3 Mar 2014 19:44:23 +0000
Subject: [PATCH 2/6] truehd: add hand-scheduled ARM asm version of
mlp_filter_channel.
Profiling results for overall audio decode and the mlp_filter_channel(_arm)
function in particular are as follows:
Before After
Mean StdDev Mean StdDev Confidence Change
6:2 total 380.4 22.0 370.8 17.0 87.4% +2.6% (insignificant)
6:2 function 60.7 7.2 36.6 8.1 100.0% +65.8%
8:2 total 357.0 17.5 343.2 19.0 97.8% +4.0% (insignificant)
8:2 function 60.3 8.8 37.3 3.8 100.0% +61.8%
6:6 total 717.2 23.2 658.4 15.7 100.0% +8.9%
6:6 function 140.4 12.9 81.5 9.2 100.0% +72.4%
8:8 total 981.9 16.2 896.2 24.5 100.0% +9.6%
8:8 function 193.4 15.0 103.3 11.5 100.0% +87.2%
Experiments with adding preload instructions to this function yielded no
useful benefit, so these have not been included.
The assembly version has also been tested with a fuzz tester to ensure that
any combinations of inputs not exercised by my available test streams still
generate mathematically identical results to the C version.
---
libavcodec/arm/Makefile | 2 +
libavcodec/arm/mlpdsp_arm.S | 433 +++++++++++++++++++++++++++++++++++++++
libavcodec/arm/mlpdsp_init_arm.c | 36 ++++
libavcodec/mlpdsp.c | 2 +
libavcodec/mlpdsp.h | 1 +
5 files changed, 474 insertions(+)
create mode 100644 libavcodec/arm/mlpdsp_arm.S
create mode 100644 libavcodec/arm/mlpdsp_init_arm.c
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index a8446b2..ba673b1 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -22,6 +22,8 @@ OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o
OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_init_arm.o
OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_arm.o \
arm/hpeldsp_arm.o
+OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_init_arm.o \
+ arm/mlpdsp_arm.o
OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o
OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += arm/neontest.o
diff --git a/libavcodec/arm/mlpdsp_arm.S b/libavcodec/arm/mlpdsp_arm.S
new file mode 100644
index 0000000..615819d
--- /dev/null
+++ b/libavcodec/arm/mlpdsp_arm.S
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2014 RISC OS Open Ltd
+ * Author: Ben Avison <bavison@riscosopen.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+#define MAX_CHANNELS 8
+#define MAX_FIR_ORDER 8
+#define MAX_IIR_ORDER 4
+#define MAX_RATEFACTOR 4
+#define MAX_BLOCKSIZE (40 * MAX_RATEFACTOR)
+
+PST .req a1
+PCO .req a2
+AC0 .req a3
+AC1 .req a4
+CO0 .req v1
+CO1 .req v2
+CO2 .req v3
+CO3 .req v4
+ST0 .req v5
+ST1 .req v6
+ST2 .req sl
+ST3 .req fp
+I .req ip
+PSAMP .req lr
+
+
+// Some macros that do loads/multiplies where the register number is determined
+// from an assembly-time expression. Boy is GNU assembler's syntax ugly...
+
+.macro load group, index, base, offset
+ .altmacro
+ load_ \group, %(\index), \base, \offset
+ .noaltmacro
+.endm
+
+.macro load_ group, index, base, offset
+ ldr \group\index, [\base, #\offset]
+.endm
+
+.macro loadd group, index, base, offset
+ .altmacro
+ loadd_ \group, %(\index), %(\index+1), \base, \offset
+ .noaltmacro
+.endm
+
+.macro loadd_ group, index0, index1, base, offset
+A .if offset >= 256
+A ldr \group\index0, [\base, #\offset]
+A ldr \group\index1, [\base, #(\offset) + 4]
+A .else
+ ldrd \group\index0, \group\index1, [\base, #\offset]
+A .endif
+.endm
+
+.macro multiply index, accumulate, long
+ .altmacro
+ multiply_ %(\index), \accumulate, \long
+ .noaltmacro
+.endm
+
+.macro multiply_ index, accumulate, long
+ .if \long
+ .if \accumulate
+ smlal AC0, AC1, CO\index, ST\index
+ .else
+ smull AC0, AC1, CO\index, ST\index
+ .endif
+ .else
+ .if \accumulate
+ mla AC0, CO\index, ST\index, AC0
+ .else
+ mul AC0, CO\index, ST\index
+ .endif
+ .endif
+.endm
+
+// A macro to update the load register number and load offsets
+
+.macro inc howmany
+ .set LOAD_REG, (LOAD_REG + \howmany) & 3
+ .set OFFSET_CO, OFFSET_CO + 4 * \howmany
+ .set OFFSET_ST, OFFSET_ST + 4 * \howmany
+ .if FIR_REMAIN > 0
+ .set FIR_REMAIN, FIR_REMAIN - \howmany
+ .if FIR_REMAIN == 0
+ .set OFFSET_CO, 4 * MAX_FIR_ORDER
+ .set OFFSET_ST, 4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)
+ .endif
+ .elseif IIR_REMAIN > 0
+ .set IIR_REMAIN, IIR_REMAIN - \howmany
+ .endif
+.endm
+
+// Macro to implement the inner loop for one specific combination of parameters
+
+.macro implement_filter mask_minus1, shift_0, shift_8, iir_taps, fir_taps
+ .set TOTAL_TAPS, \iir_taps + \fir_taps
+
+ // Deal with register allocation...
+ .set DEFINED_SHIFT, 0
+ .set DEFINED_MASK, 0
+ .set SHUFFLE_SHIFT, 0
+ .set SHUFFLE_MASK, 0
+ .set SPILL_SHIFT, 0
+ .set SPILL_MASK, 0
+ .if TOTAL_TAPS == 0
+ // Little register pressure in this case - just keep MASK where it was
+ .if !\mask_minus1
+ MASK .req ST1
+ .set DEFINED_MASK, 1
+ .endif
+ .else
+ .if \shift_0
+ .if !\mask_minus1
+ // AC1 is unused with shift 0
+ MASK .req AC1
+ .set DEFINED_MASK, 1
+ .set SHUFFLE_MASK, 1
+ .endif
+ .elseif \shift_8
+ .if !\mask_minus1
+ .if TOTAL_TAPS <= 4
+ // All coefficients are preloaded (so pointer not needed)
+ MASK .req PCO
+ .set DEFINED_MASK, 1
+ .set SHUFFLE_MASK, 1
+ .else
+ .set SPILL_MASK, 1
+ .endif
+ .endif
+ .else // shift not 0 or 8
+ .if TOTAL_TAPS <= 3
+ // All coefficients are preloaded, and at least one CO register is unused
+ .if \fir_taps & 1
+ SHIFT .req CO0
+ .set DEFINED_SHIFT, 1
+ .set SHUFFLE_SHIFT, 1
+ .else
+ SHIFT .req CO3
+ .set DEFINED_SHIFT, 1
+ .set SHUFFLE_SHIFT, 1
+ .endif
+ .if !\mask_minus1
+ MASK .req PCO
+ .set DEFINED_MASK, 1
+ .set SHUFFLE_MASK, 1
+ .endif
+ .elseif TOTAL_TAPS == 4
+ // All coefficients are preloaded
+ SHIFT .req PCO
+ .set DEFINED_SHIFT, 1
+ .set SHUFFLE_SHIFT, 1
+ .if !\mask_minus1
+ .set SPILL_MASK, 1
+ .endif
+ .else
+ .set SPILL_SHIFT, 1
+ .if !\mask_minus1
+ .set SPILL_MASK, 1
+ .endif
+ .endif
+ .endif
+ .endif
+ .if SPILL_SHIFT
+ SHIFT .req ST0
+ .set DEFINED_SHIFT, 1
+ .endif
+ .if SPILL_MASK
+ MASK .req ST1
+ .set DEFINED_MASK, 1
+ .endif
+
+ // Preload coefficients if possible
+ .if TOTAL_TAPS <= 4
+ .set OFFSET_CO, 0
+ .if \fir_taps & 1
+ .set LOAD_REG, 1
+ .else
+ .set LOAD_REG, 0
+ .endif
+ .rept \fir_taps
+ load CO, LOAD_REG, PCO, OFFSET_CO
+ .set LOAD_REG, (LOAD_REG + 1) & 3
+ .set OFFSET_CO, OFFSET_CO + 4
+ .endr
+ .set OFFSET_CO, 4 * MAX_FIR_ORDER
+ .rept \iir_taps
+ load CO, LOAD_REG, PCO, OFFSET_CO
+ .set LOAD_REG, (LOAD_REG + 1) & 3
+ .set OFFSET_CO, OFFSET_CO + 4
+ .endr
+ .endif
+
+ // Move mask/shift to final positions if necessary
+ // Need to do this after preloading, because in some cases we
+ // reuse the coefficient pointer register
+ .if SHUFFLE_SHIFT
+ mov SHIFT, ST0
+ .endif
+ .if SHUFFLE_MASK
+ mov MASK, ST1
+ .endif
+
+ // Begin loop
+01:
+ .if TOTAL_TAPS == 0
+ // Things simplify a lot in this case
+ // In fact this could be pipelined further if it's worth it...
+ ldr ST0, [PSAMP]
+ subs I, I, #1
+ .if !\mask_minus1
+ and ST0, ST0, MASK
+ .endif
+ str ST0, [PST, #-4]!
+ str ST0, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
+ str ST0, [PSAMP], #4 * MAX_CHANNELS
+ bne 01b
+ .else
+ .if \fir_taps & 1
+ .set LOAD_REG, 1
+ .else
+ .set LOAD_REG, 0
+ .endif
+ .set LOAD_BANK, 0
+ .set FIR_REMAIN, \fir_taps
+ .set IIR_REMAIN, \iir_taps
+ .if FIR_REMAIN == 0 // only IIR terms
+ .set OFFSET_CO, 4 * MAX_FIR_ORDER
+ .set OFFSET_ST, 4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)
+ .else
+ .set OFFSET_CO, 0
+ .set OFFSET_ST, 0
+ .endif
+ .set MUL_REG, LOAD_REG
+ .set COUNTER, 0
+ .rept TOTAL_TAPS + 2
+ // Do load(s)
+ .if FIR_REMAIN != 0 || IIR_REMAIN != 0
+ .if COUNTER == 0
+ .if TOTAL_TAPS > 4
+ load CO, LOAD_REG, PCO, OFFSET_CO
+ .endif
+ load ST, LOAD_REG, PST, OFFSET_ST
+ inc 1
+ .elseif COUNTER == 1 && (\fir_taps & 1) == 0
+ .if TOTAL_TAPS > 4
+ load CO, LOAD_REG, PCO, OFFSET_CO
+ .endif
+ load ST, LOAD_REG, PST, OFFSET_ST
+ inc 1
+ .elseif LOAD_BANK == 0
+ .if TOTAL_TAPS > 4
+ .if FIR_REMAIN == 0 && IIR_REMAIN == 1
+ load CO, LOAD_REG, PCO, OFFSET_CO
+ .else
+ loadd CO, LOAD_REG, PCO, OFFSET_CO
+ .endif
+ .endif
+ .set LOAD_BANK, 1
+ .else
+ .if FIR_REMAIN == 0 && IIR_REMAIN == 1
+ load ST, LOAD_REG, PST, OFFSET_ST
+ inc 1
+ .else
+ loadd ST, LOAD_REG, PST, OFFSET_ST
+ inc 2
+ .endif
+ .set LOAD_BANK, 0
+ .endif
+ .endif
+
+ // Do interleaved multiplies, slightly delayed
+ .if COUNTER >= 2
+ multiply MUL_REG, COUNTER > 2, !\shift_0
+ .set MUL_REG, (MUL_REG + 1) & 3
+ .endif
+ .set COUNTER, COUNTER + 1
+ .endr
+
+ // Post-process the result of the multiplies
+ .if SPILL_SHIFT
+ ldr SHIFT, [sp, #9*4 + 0*4]
+ .endif
+ .if SPILL_MASK
+ ldr MASK, [sp, #9*4 + 1*4]
+ .endif
+ ldr ST2, [PSAMP]
+ subs I, I, #1
+ .if \shift_8
+ mov AC0, AC0, lsr #8
+ orr AC0, AC0, AC1, lsl #24
+ .elseif !\shift_0
+ rsb ST3, SHIFT, #32
+ mov AC0, AC0, lsr SHIFT
+A orr AC0, AC0, AC1, lsl ST3
+T mov AC1, AC1, lsl ST3
+T orr AC0, AC0, AC1
+ .endif
+ .if \mask_minus1
+ add ST3, ST2, AC0
+ .else
+ add ST2, ST2, AC0
+ and ST3, ST2, MASK
+ sub ST2, ST3, AC0
+ .endif
+ str ST3, [PST, #-4]!
+ str ST2, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
+ str ST3, [PSAMP], #4 * MAX_CHANNELS
+ bne 01b
+ .endif
+ b 99f
+
+ .if DEFINED_SHIFT
+ .unreq SHIFT
+ .endif
+ .if DEFINED_MASK
+ .unreq MASK
+ .endif
+.endm
+
+.macro switch_on_fir_taps mask_minus1, shift_0, shift_8, iir_taps
+A ldr pc, [pc, a3, LSL #2] // firorder is in range 0-(8-iir_taps)
+T tbh [pc, a3, lsl #1]
+0:
+A .word 0, 70f, 71f, 72f, 73f, 74f
+T .hword (70f - 0b) / 2, (71f - 0b) / 2, (72f - 0b) / 2, (73f - 0b) / 2, (74f - 0b) / 2
+ .if \iir_taps <= 3
+A .word 75f
+T .hword (75f - 0b) / 2
+ .if \iir_taps <= 2
+A .word 76f
+T .hword (76f - 0b) / 2
+ .if \iir_taps <= 1
+A .word 77f
+T .hword (77f - 0b) / 2
+ .if \iir_taps == 0
+A .word 78f
+T .hword (78f - 0b) / 2
+ .endif
+ .endif
+ .endif
+ .endif
+70: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 0
+71: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 1
+72: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 2
+73: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 3
+74: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 4
+ .if \iir_taps <= 3
+75: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 5
+ .if \iir_taps <= 2
+76: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 6
+ .if \iir_taps <= 1
+77: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 7
+ .if \iir_taps == 0
+78: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 8
+ .endif
+ .endif
+ .endif
+ .endif
+.endm
+
+.macro switch_on_iir_taps mask_minus1, shift_0, shift_8
+A ldr pc, [pc, a4, LSL #2] // irorder is in range 0-4
+T tbh [pc, a4, lsl #1]
+0:
+A .word 0, 60f, 61f, 62f, 63f, 64f
+T .hword (60f - 0b) / 2, (61f - 0b) / 2, (62f - 0b) / 2, (63f - 0b) / 2, (64f - 0b) / 2
+60: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 0
+61: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 1
+62: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 2
+63: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 3
+64: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 4
+.endm
+
+/* void ff_mlp_filter_channel_arm(int32_t *state, const int32_t *coeff,
+ * int firorder, int iirorder,
+ * unsigned int filter_shift, int32_t mask,
+ * int blocksize, int32_t *sample_buffer);
+ */
+function ff_mlp_filter_channel_arm, export=1
+ push {v1-fp,lr}
+ add v1, sp, #9*4 // point at arguments on stack
+ ldm v1, {ST0,ST1,I,PSAMP}
+ cmp ST1, #-1
+ bne 30f
+ movs ST2, ST0, lsl #29 // shift is in range 0-15; we want to special-case 0 and 8
+ bne 20f
+ bcs 10f
+ switch_on_iir_taps 1, 1, 0
+10: switch_on_iir_taps 1, 0, 1
+20: switch_on_iir_taps 1, 0, 0
+30: movs ST2, ST0, lsl #29 // shift is in range 0-15; we want to special-case 0 and 8
+ bne 50f
+ bcs 40f
+ switch_on_iir_taps 0, 1, 0
+40: switch_on_iir_taps 0, 0, 1
+50: switch_on_iir_taps 0, 0, 0
+99: pop {v1-fp,pc}
+endfunc
+
+ .unreq PST
+ .unreq PCO
+ .unreq AC0
+ .unreq AC1
+ .unreq CO0
+ .unreq CO1
+ .unreq CO2
+ .unreq CO3
+ .unreq ST0
+ .unreq ST1
+ .unreq ST2
+ .unreq ST3
+ .unreq I
+ .unreq PSAMP
diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c
new file mode 100644
index 0000000..9a14815
--- /dev/null
+++ b/libavcodec/arm/mlpdsp_init_arm.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2014 RISC OS Open Ltd
+ * Author: Ben Avison <bavison@riscosopen.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/arm/cpu.h"
+#include "libavutil/attributes.h"
+#include "libavcodec/mlpdsp.h"
+
+void ff_mlp_filter_channel_arm(int32_t *state, const int32_t *coeff,
+ int firorder, int iirorder,
+ unsigned int filter_shift, int32_t mask,
+ int blocksize, int32_t *sample_buffer);
+
+av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
+{
+ c->mlp_filter_channel = ff_mlp_filter_channel_arm;
+}
diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c
index b413e86..4b403b8 100644
--- a/libavcodec/mlpdsp.c
+++ b/libavcodec/mlpdsp.c
@@ -60,6 +60,8 @@ static void mlp_filter_channel(int32_t *state, const int32_t *coeff,
av_cold void ff_mlpdsp_init(MLPDSPContext *c)
{
c->mlp_filter_channel = mlp_filter_channel;
+ if (ARCH_ARM)
+ ff_mlpdsp_init_arm(c);
if (ARCH_X86)
ff_mlpdsp_init_x86(c);
}
diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h
index 84a8aa3..129bcfe 100644
--- a/libavcodec/mlpdsp.h
+++ b/libavcodec/mlpdsp.h
@@ -32,6 +32,7 @@ typedef struct MLPDSPContext {
} MLPDSPContext;
void ff_mlpdsp_init(MLPDSPContext *c);
+void ff_mlpdsp_init_arm(MLPDSPContext *c);
void ff_mlpdsp_init_x86(MLPDSPContext *c);
#endif /* AVCODEC_MLPDSP_H */
--
1.9.1

View File

@ -1,143 +0,0 @@
From a60747132a1a6652ac0d18f3f110a20ea637ac30 Mon Sep 17 00:00:00 2001
From: Ben Avison <bavison@riscosopen.org>
Date: Wed, 16 Apr 2014 01:51:32 +0100
Subject: [PATCH 2/3] vc-1: Add platform-specific start code search routine to
VC1DSPContext.
Initialise VC1DSPContext for parser as well as for decoder.
Note, the VC-1 code doesn't actually use the function pointer yet.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
libavcodec/Makefile | 6 +++---
libavcodec/arm/Makefile | 2 ++
libavcodec/arm/vc1dsp_init_arm.c | 4 ++++
libavcodec/vc1.c | 2 ++
libavcodec/vc1dec.c | 1 -
libavcodec/vc1dsp.c | 3 +++
libavcodec/vc1dsp.h | 8 ++++++++
7 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 19caf11..120f85a 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -458,7 +458,7 @@ OBJS-$(CONFIG_VB_DECODER) += vb.o
OBJS-$(CONFIG_VBLE_DECODER) += vble.o
OBJS-$(CONFIG_VC1_DECODER) += vc1dec.o vc1.o vc1data.o vc1dsp.o \
msmpeg4dec.o msmpeg4.o msmpeg4data.o \
- wmv2dsp.o
+ wmv2dsp.o startcode.o
OBJS-$(CONFIG_VCR1_DECODER) += vcr1.o
OBJS-$(CONFIG_VMDAUDIO_DECODER) += vmdav.o
OBJS-$(CONFIG_VMDVIDEO_DECODER) += vmdav.o
@@ -783,9 +783,9 @@ OBJS-$(CONFIG_PNM_PARSER) += pnm_parser.o pnm.o
OBJS-$(CONFIG_RV30_PARSER) += rv34_parser.o
OBJS-$(CONFIG_RV40_PARSER) += rv34_parser.o
OBJS-$(CONFIG_TAK_PARSER) += tak_parser.o tak.o
-OBJS-$(CONFIG_VC1_PARSER) += vc1_parser.o vc1.o vc1data.o \
+OBJS-$(CONFIG_VC1_PARSER) += vc1_parser.o vc1.o vc1data.o vc1dsp.o \
msmpeg4.o msmpeg4data.o mpeg4video.o \
- h263.o
+ h263.o startcode.o
OBJS-$(CONFIG_VORBIS_PARSER) += vorbis_parser.o xiph.o
OBJS-$(CONFIG_VP3_PARSER) += vp3_parser.o
OBJS-$(CONFIG_VP8_PARSER) += vp8_parser.o
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index b6410b2..fa2b18e 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -51,6 +51,8 @@ ARMV6-OBJS-$(CONFIG_H264DSP) += arm/startcode_armv6.o
ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \
arm/hpeldsp_armv6.o
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
+ARMV6-OBJS-$(CONFIG_VC1_DECODER) += arm/startcode_armv6.o
+ARMV6-OBJS-$(CONFIG_VC1_PARSER) += arm/startcode_armv6.o
ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \
arm/vp8dsp_init_armv6.o \
arm/vp8dsp_armv6.o
diff --git a/libavcodec/arm/vc1dsp_init_arm.c b/libavcodec/arm/vc1dsp_init_arm.c
index 47d4126..4a84848 100644
--- a/libavcodec/arm/vc1dsp_init_arm.c
+++ b/libavcodec/arm/vc1dsp_init_arm.c
@@ -23,10 +23,14 @@
#include "libavcodec/vc1dsp.h"
#include "vc1dsp.h"
+int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size);
+
av_cold void ff_vc1dsp_init_arm(VC1DSPContext *dsp)
{
int cpu_flags = av_get_cpu_flags();
+ if (have_armv6(cpu_flags))
+ dsp->vc1_find_start_code_candidate = ff_startcode_find_candidate_armv6;
if (have_neon(cpu_flags))
ff_vc1dsp_init_neon(dsp);
}
diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c
index 49d4885..cb941dd 100644
--- a/libavcodec/vc1.c
+++ b/libavcodec/vc1.c
@@ -1706,5 +1706,7 @@ av_cold int ff_vc1_init_common(VC1Context *v)
v->pq = -1;
v->mvrange = 0; /* 7.1.1.18, p80 */
+ ff_vc1dsp_init(&v->vc1dsp);
+
return 0;
}
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 30fee47..67cda42 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -5631,7 +5631,6 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
ff_vc1_decode_end(avctx);
ff_h264chroma_init(&v->h264chroma, 8);
- ff_vc1dsp_init(&v->vc1dsp);
if (avctx->codec_id == AV_CODEC_ID_WMV3 || avctx->codec_id == AV_CODEC_ID_WMV3IMAGE) {
int count = 0;
diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c
index ec9c17b..09a9006 100644
--- a/libavcodec/vc1dsp.c
+++ b/libavcodec/vc1dsp.c
@@ -30,6 +30,7 @@
#include "h264chroma.h"
#include "rnd_avg.h"
#include "vc1dsp.h"
+#include "startcode.h"
/* Apply overlap transform to horizontal edge */
static void vc1_v_overlap_c(uint8_t *src, int stride)
@@ -947,6 +948,8 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
dsp->sprite_v_double_twoscale = sprite_v_double_twoscale_c;
#endif /* CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER */
+ dsp->vc1_find_start_code_candidate = ff_startcode_find_candidate_c;
+
if (ARCH_AARCH64)
ff_vc1dsp_init_aarch64(dsp);
if (ARCH_ARM)
diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h
index 990fbc3..6a90eed 100644
--- a/libavcodec/vc1dsp.h
+++ b/libavcodec/vc1dsp.h
@@ -74,6 +74,14 @@ typedef struct VC1DSPContext {
void (*sprite_v_double_twoscale)(uint8_t *dst, const uint8_t *src1a, const uint8_t *src1b, int offset1,
const uint8_t *src2a, const uint8_t *src2b, int offset2,
int alpha, int width);
+
+ /**
+ * Search buf from the start for up to size bytes. Return the index
+ * of a zero byte, or >= size if not found. Ideally, use lookahead
+ * to filter out any zero bytes that are known to not be followed by
+ * one or more further zero bytes and a one byte.
+ */
+ int (*vc1_find_start_code_candidate)(const uint8_t *buf, int size);
} VC1DSPContext;
void ff_vc1dsp_init(VC1DSPContext* c);
--
1.9.1

View File

@ -1,158 +0,0 @@
From bb74fc44081fb6d7923ce1b7ed3e3e6514695f3e Mon Sep 17 00:00:00 2001
From: Ben Avison <bavison@riscosopen.org>
Date: Wed, 5 Mar 2014 21:01:28 +0000
Subject: [PATCH 3/6] truehd: break out part of rematrix_channels into
platform-specific callback.
Verified with profiling that this doesn't have a measurable effect upon
overall performance.
---
libavcodec/mlpdec.c | 37 ++++++++++++-------------------------
libavcodec/mlpdsp.c | 33 +++++++++++++++++++++++++++++++++
libavcodec/mlpdsp.h | 23 +++++++++++++++++++++++
3 files changed, 68 insertions(+), 25 deletions(-)
diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
index cbd9000..01ded5c 100644
--- a/libavcodec/mlpdec.c
+++ b/libavcodec/mlpdec.c
@@ -1024,7 +1024,7 @@ static void fill_noise_buffer(MLPDecodeContext *m, unsigned int substr)
static void rematrix_channels(MLPDecodeContext *m, unsigned int substr)
{
SubStream *s = &m->substream[substr];
- unsigned int mat, src_ch, i;
+ unsigned int mat;
unsigned int maxchan;
maxchan = s->max_matrix_channel;
@@ -1036,31 +1036,18 @@ static void rematrix_channels(MLPDecodeContext *m, unsigned int substr)
}
for (mat = 0; mat < s->num_primitive_matrices; mat++) {
- int matrix_noise_shift = s->matrix_noise_shift[mat];
unsigned int dest_ch = s->matrix_out_ch[mat];
- int32_t mask = MSB_MASK(s->quant_step_size[dest_ch]);
- int32_t *coeffs = s->matrix_coeff[mat];
- int index = s->num_primitive_matrices - mat;
- int index2 = 2 * index + 1;
-
- /* TODO: DSPContext? */
-
- for (i = 0; i < s->blockpos; i++) {
- int32_t bypassed_lsb = m->bypassed_lsbs[i][mat];
- int32_t *samples = m->sample_buffer[i];
- int64_t accum = 0;
-
- for (src_ch = 0; src_ch <= maxchan; src_ch++)
- accum += (int64_t) samples[src_ch] * coeffs[src_ch];
-
- if (matrix_noise_shift) {
- index &= m->access_unit_size_pow2 - 1;
- accum += m->noise_buffer[index] << (matrix_noise_shift + 7);
- index += index2;
- }
-
- samples[dest_ch] = ((accum >> 14) & mask) + bypassed_lsb;
- }
+ m->dsp.mlp_rematrix_channel(&m->sample_buffer[0][0],
+ s->matrix_coeff[mat],
+ &m->bypassed_lsbs[0][mat],
+ m->noise_buffer,
+ s->num_primitive_matrices - mat,
+ dest_ch,
+ s->blockpos,
+ maxchan,
+ s->matrix_noise_shift[mat],
+ m->access_unit_size_pow2,
+ MSB_MASK(s->quant_step_size[dest_ch]));
}
}
diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c
index 4b403b8..7a359b0 100644
--- a/libavcodec/mlpdsp.c
+++ b/libavcodec/mlpdsp.c
@@ -57,9 +57,42 @@ static void mlp_filter_channel(int32_t *state, const int32_t *coeff,
}
}
+void ff_mlp_rematrix_channel(int32_t *samples,
+ const int32_t *coeffs,
+ const uint8_t *bypassed_lsbs,
+ const int8_t *noise_buffer,
+ int index,
+ unsigned int dest_ch,
+ uint16_t blockpos,
+ unsigned int maxchan,
+ int matrix_noise_shift,
+ int access_unit_size_pow2,
+ int32_t mask)
+{
+ unsigned int src_ch, i;
+ int index2 = 2 * index + 1;
+ for (i = 0; i < blockpos; i++) {
+ int64_t accum = 0;
+
+ for (src_ch = 0; src_ch <= maxchan; src_ch++)
+ accum += (int64_t) samples[src_ch] * coeffs[src_ch];
+
+ if (matrix_noise_shift) {
+ index &= access_unit_size_pow2 - 1;
+ accum += noise_buffer[index] << (matrix_noise_shift + 7);
+ index += index2;
+ }
+
+ samples[dest_ch] = ((accum >> 14) & mask) + *bypassed_lsbs;
+ bypassed_lsbs += MAX_CHANNELS;
+ samples += MAX_CHANNELS;
+ }
+}
+
av_cold void ff_mlpdsp_init(MLPDSPContext *c)
{
c->mlp_filter_channel = mlp_filter_channel;
+ c->mlp_rematrix_channel = ff_mlp_rematrix_channel;
if (ARCH_ARM)
ff_mlpdsp_init_arm(c);
if (ARCH_X86)
diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h
index 129bcfe..f98e9be 100644
--- a/libavcodec/mlpdsp.h
+++ b/libavcodec/mlpdsp.h
@@ -24,11 +24,34 @@
#include <stdint.h>
+void ff_mlp_rematrix_channel(int32_t *samples,
+ const int32_t *coeffs,
+ const uint8_t *bypassed_lsbs,
+ const int8_t *noise_buffer,
+ int index,
+ unsigned int dest_ch,
+ uint16_t blockpos,
+ unsigned int maxchan,
+ int matrix_noise_shift,
+ int access_unit_size_pow2,
+ int32_t mask);
+
typedef struct MLPDSPContext {
void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff,
int firorder, int iirorder,
unsigned int filter_shift, int32_t mask,
int blocksize, int32_t *sample_buffer);
+ void (*mlp_rematrix_channel)(int32_t *samples,
+ const int32_t *coeffs,
+ const uint8_t *bypassed_lsbs,
+ const int8_t *noise_buffer,
+ int index,
+ unsigned int dest_ch,
+ uint16_t blockpos,
+ unsigned int maxchan,
+ int matrix_noise_shift,
+ int access_unit_size_pow2,
+ int32_t mask);
} MLPDSPContext;
void ff_mlpdsp_init(MLPDSPContext *c);
--
1.9.1

View File

@ -1,401 +0,0 @@
From c39df43eae03768427243668c040de8437c4f79c Mon Sep 17 00:00:00 2001
From: Ben Avison <bavison@riscosopen.org>
Date: Wed, 23 Apr 2014 01:41:04 +0100
Subject: [PATCH 3/3] vc-1: Optimise parser (with special attention to ARM)
The previous implementation of the parser made four passes over each input
buffer (reduced to two if the container format already guaranteed the input
buffer corresponded to frames, such as with MKV). But these buffers are
often 200K in size, certainly enough to flush the data out of L1 cache, and
for many CPUs, all the way out to main memory. The passes were:
1) locate frame boundaries (not needed for MKV etc)
2) copy the data into a contiguous block (not needed for MKV etc)
3) locate the start codes within each frame
4) unescape the data between start codes
After this, the unescaped data was parsed to extract certain header fields,
but because the unescape operation was so large, this was usually also
effectively operating on uncached memory. Most of the unescaped data was
simply thrown away and never processed further. Only step 2 - because it
used memcpy - was using prefetch, making things even worse.
This patch reorganises these steps so that, aside from the copying, the
operations are performed in parallel, maximising cache utilisation. No more
than the worst-case number of bytes needed for header parsing is unescaped.
Most of the data is, in practice, only read in order to search for a start
code, for which optimised implementations already existed in the H264 codec
(notably the ARM version uses prefetch, so we end up doing both remaining
passes at maximum speed). For MKV files, we know when we've found the last
start code of interest in a given frame, so we are able to avoid doing even
that one remaining pass for most of the buffer.
In some use-cases (such as the Raspberry Pi) video decode is handled by the
GPU, but the entire elementary stream is still fed through the parser to
pick out certain elements of the header which are necessary to manage the
decode process. As you might expect, in these cases, the performance of the
parser is significant.
To measure parser performance, I used the same VC-1 elementary stream in
either an MPEG-2 transport stream or a MKV file, and fed it through ffmpeg
with -c:v copy -c:a copy -f null. These are the gperftools counts for
those streams, both filtered to only include vc1_parse() and its callees,
and unfiltered (to include the whole binary). Lower numbers are better:
Before After
File Filtered Mean StdDev Mean StdDev Confidence Change
M2TS No 861.7 8.2 650.5 8.1 100.0% +32.5%
MKV No 868.9 7.4 731.7 9.0 100.0% +18.8%
M2TS Yes 250.0 11.2 27.2 3.4 100.0% +817.9%
MKV Yes 149.0 12.8 1.7 0.8 100.0% +8526.3%
Yes, that last case shows vc1_parse() running 86 times faster! The M2TS
case does show a larger absolute improvement though, since it was worse
to begin with.
This patch has been tested with the FATE suite (albeit on x86 for speed).
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
---
libavcodec/vc1_parser.c | 284 ++++++++++++++++++++++++++++++------------------
1 file changed, 180 insertions(+), 104 deletions(-)
diff --git a/libavcodec/vc1_parser.c b/libavcodec/vc1_parser.c
index cc29ce1..4ed14bc 100644
--- a/libavcodec/vc1_parser.c
+++ b/libavcodec/vc1_parser.c
@@ -30,122 +30,88 @@
#include "vc1.h"
#include "get_bits.h"
+/** The maximum number of bytes of a sequence, entry point or
+ * frame header whose values we pay any attention to */
+#define UNESCAPED_THRESHOLD 37
+
+/** The maximum number of bytes of a sequence, entry point or
+ * frame header which must be valid memory (because they are
+ * used to update the bitstream cache in skip_bits() calls)
+ */
+#define UNESCAPED_LIMIT 144
+
+typedef enum {
+ NO_MATCH,
+ ONE_ZERO,
+ TWO_ZEROS,
+ ONE
+} VC1ParseSearchState;
+
typedef struct {
ParseContext pc;
VC1Context v;
+ uint8_t prev_start_code;
+ size_t bytes_to_skip;
+ uint8_t unesc_buffer[UNESCAPED_LIMIT];
+ size_t unesc_index;
+ VC1ParseSearchState search_state;
} VC1ParseContext;
-static void vc1_extract_headers(AVCodecParserContext *s, AVCodecContext *avctx,
- const uint8_t *buf, int buf_size)
+static void vc1_extract_header(AVCodecParserContext *s, AVCodecContext *avctx,
+ const uint8_t *buf, int buf_size)
{
+ /* Parse the header we just finished unescaping */
VC1ParseContext *vpc = s->priv_data;
GetBitContext gb;
- const uint8_t *start, *end, *next;
- uint8_t *buf2 = av_mallocz(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
-
+ int ret;
vpc->v.s.avctx = avctx;
vpc->v.parse_only = 1;
- vpc->v.first_pic_header_flag = 1;
- next = buf;
- s->repeat_pict = 0;
-
- for(start = buf, end = buf + buf_size; next < end; start = next){
- int buf2_size, size;
- int ret;
-
- next = find_next_marker(start + 4, end);
- size = next - start - 4;
- buf2_size = vc1_unescape_buffer(start + 4, size, buf2);
- init_get_bits(&gb, buf2, buf2_size * 8);
- if(size <= 0) continue;
- switch(AV_RB32(start)){
- case VC1_CODE_SEQHDR:
- ff_vc1_decode_sequence_header(avctx, &vpc->v, &gb);
- break;
- case VC1_CODE_ENTRYPOINT:
- ff_vc1_decode_entry_point(avctx, &vpc->v, &gb);
- break;
- case VC1_CODE_FRAME:
- if(vpc->v.profile < PROFILE_ADVANCED)
- ret = ff_vc1_parse_frame_header (&vpc->v, &gb);
- else
- ret = ff_vc1_parse_frame_header_adv(&vpc->v, &gb);
-
- if (ret < 0)
- break;
-
- /* keep AV_PICTURE_TYPE_BI internal to VC1 */
- if (vpc->v.s.pict_type == AV_PICTURE_TYPE_BI)
- s->pict_type = AV_PICTURE_TYPE_B;
- else
- s->pict_type = vpc->v.s.pict_type;
-
- if (avctx->ticks_per_frame > 1){
- // process pulldown flags
- s->repeat_pict = 1;
- // Pulldown flags are only valid when 'broadcast' has been set.
- // So ticks_per_frame will be 2
- if (vpc->v.rff){
- // repeat field
- s->repeat_pict = 2;
- }else if (vpc->v.rptfrm){
- // repeat frames
- s->repeat_pict = vpc->v.rptfrm * 2 + 1;
- }
- }
-
- if (vpc->v.broadcast && vpc->v.interlace && !vpc->v.psf)
- s->field_order = vpc->v.tff ? AV_FIELD_TT : AV_FIELD_BB;
- else
- s->field_order = AV_FIELD_PROGRESSIVE;
+ init_get_bits(&gb, buf, buf_size * 8);
+ switch (vpc->prev_start_code) {
+ case VC1_CODE_SEQHDR & 0xFF:
+ ff_vc1_decode_sequence_header(avctx, &vpc->v, &gb);
+ break;
+ case VC1_CODE_ENTRYPOINT & 0xFF:
+ ff_vc1_decode_entry_point(avctx, &vpc->v, &gb);
+ break;
+ case VC1_CODE_FRAME & 0xFF:
+ if(vpc->v.profile < PROFILE_ADVANCED)
+ ret = ff_vc1_parse_frame_header (&vpc->v, &gb);
+ else
+ ret = ff_vc1_parse_frame_header_adv(&vpc->v, &gb);
+ if (ret < 0)
break;
- }
- }
- av_free(buf2);
-}
+ /* keep AV_PICTURE_TYPE_BI internal to VC1 */
+ if (vpc->v.s.pict_type == AV_PICTURE_TYPE_BI)
+ s->pict_type = AV_PICTURE_TYPE_B;
+ else
+ s->pict_type = vpc->v.s.pict_type;
-/**
- * Find the end of the current frame in the bitstream.
- * @return the position of the first byte of the next frame, or -1
- */
-static int vc1_find_frame_end(ParseContext *pc, const uint8_t *buf,
- int buf_size) {
- int pic_found, i;
- uint32_t state;
-
- pic_found= pc->frame_start_found;
- state= pc->state;
-
- i=0;
- if(!pic_found){
- for(i=0; i<buf_size; i++){
- state= (state<<8) | buf[i];
- if(state == VC1_CODE_FRAME || state == VC1_CODE_FIELD){
- i++;
- pic_found=1;
- break;
+ if (avctx->ticks_per_frame > 1){
+ // process pulldown flags
+ s->repeat_pict = 1;
+ // Pulldown flags are only valid when 'broadcast' has been set.
+ // So ticks_per_frame will be 2
+ if (vpc->v.rff){
+ // repeat field
+ s->repeat_pict = 2;
+ }else if (vpc->v.rptfrm){
+ // repeat frames
+ s->repeat_pict = vpc->v.rptfrm * 2 + 1;
}
+ }else{
+ s->repeat_pict = 0;
}
- }
- if(pic_found){
- /* EOF considered as end of frame */
- if (buf_size == 0)
- return 0;
- for(; i<buf_size; i++){
- state= (state<<8) | buf[i];
- if(IS_MARKER(state) && state != VC1_CODE_FIELD && state != VC1_CODE_SLICE){
- pc->frame_start_found=0;
- pc->state=-1;
- return i-3;
- }
- }
+ if (vpc->v.broadcast && vpc->v.interlace && !vpc->v.psf)
+ s->field_order = vpc->v.tff ? AV_FIELD_TT : AV_FIELD_BB;
+ else
+ s->field_order = AV_FIELD_PROGRESSIVE;
+
+ break;
}
- pc->frame_start_found= pic_found;
- pc->state= state;
- return END_NOT_FOUND;
}
static int vc1_parse(AVCodecParserContext *s,
@@ -153,22 +119,127 @@ static int vc1_parse(AVCodecParserContext *s,
const uint8_t **poutbuf, int *poutbuf_size,
const uint8_t *buf, int buf_size)
{
+ /* Here we do the searching for frame boundaries and headers at
+ * the same time. Only a minimal amount at the start of each
+ * header is unescaped. */
VC1ParseContext *vpc = s->priv_data;
- int next;
+ int pic_found = vpc->pc.frame_start_found;
+ uint8_t *unesc_buffer = vpc->unesc_buffer;
+ size_t unesc_index = vpc->unesc_index;
+ VC1ParseSearchState search_state = vpc->search_state;
+ int next = END_NOT_FOUND;
+ int i = vpc->bytes_to_skip;
+
+ if (pic_found && buf_size == 0) {
+ /* EOF considered as end of frame */
+ memset(unesc_buffer + unesc_index, 0, UNESCAPED_THRESHOLD - unesc_index);
+ vc1_extract_header(s, avctx, unesc_buffer, unesc_index);
+ next = 0;
+ }
+ while (i < buf_size) {
+ int start_code_found = 0;
+ uint8_t b;
+ while (i < buf_size && unesc_index < UNESCAPED_THRESHOLD) {
+ b = buf[i++];
+ unesc_buffer[unesc_index++] = b;
+ if (search_state <= ONE_ZERO)
+ search_state = b ? NO_MATCH : search_state + 1;
+ else if (search_state == TWO_ZEROS) {
+ if (b == 1)
+ search_state = ONE;
+ else if (b > 1) {
+ if (b == 3)
+ unesc_index--; // swallow emulation prevention byte
+ search_state = NO_MATCH;
+ }
+ }
+ else { // search_state == ONE
+ // Header unescaping terminates early due to detection of next start code
+ search_state = NO_MATCH;
+ start_code_found = 1;
+ break;
+ }
+ }
+ if ((s->flags & PARSER_FLAG_COMPLETE_FRAMES) &&
+ unesc_index >= UNESCAPED_THRESHOLD &&
+ vpc->prev_start_code == (VC1_CODE_FRAME & 0xFF))
+ {
+ // No need to keep scanning the rest of the buffer for
+ // start codes if we know it contains a complete frame and
+ // we've already unescaped all we need of the frame header
+ vc1_extract_header(s, avctx, unesc_buffer, unesc_index);
+ break;
+ }
+ if (unesc_index >= UNESCAPED_THRESHOLD && !start_code_found) {
+ while (i < buf_size) {
+ if (search_state == NO_MATCH) {
+ i += vpc->v.vc1dsp.vc1_find_start_code_candidate(buf + i, buf_size - i);
+ if (i < buf_size) {
+ search_state = ONE_ZERO;
+ }
+ i++;
+ } else {
+ b = buf[i++];
+ if (search_state == ONE_ZERO)
+ search_state = b ? NO_MATCH : TWO_ZEROS;
+ else if (search_state == TWO_ZEROS) {
+ if (b >= 1)
+ search_state = b == 1 ? ONE : NO_MATCH;
+ }
+ else { // search_state == ONE
+ search_state = NO_MATCH;
+ start_code_found = 1;
+ break;
+ }
+ }
+ }
+ }
+ if (start_code_found) {
+ vc1_extract_header(s, avctx, unesc_buffer, unesc_index);
+
+ vpc->prev_start_code = b;
+ unesc_index = 0;
+
+ if (!(s->flags & PARSER_FLAG_COMPLETE_FRAMES)) {
+ if (!pic_found && (b == (VC1_CODE_FRAME & 0xFF) || b == (VC1_CODE_FIELD & 0xFF))) {
+ pic_found = 1;
+ }
+ else if (pic_found && b != (VC1_CODE_FIELD & 0xFF) && b != (VC1_CODE_SLICE & 0xFF)) {
+ next = i - 4;
+ pic_found = b == (VC1_CODE_FRAME & 0xFF);
+ break;
+ }
+ }
+ }
+ }
- if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){
- next= buf_size;
- }else{
- next= vc1_find_frame_end(&vpc->pc, buf, buf_size);
+ vpc->pc.frame_start_found = pic_found;
+ vpc->unesc_index = unesc_index;
+ vpc->search_state = search_state;
+ if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
+ next = buf_size;
+ } else {
if (ff_combine_frame(&vpc->pc, next, &buf, &buf_size) < 0) {
+ vpc->bytes_to_skip = 0;
*poutbuf = NULL;
*poutbuf_size = 0;
return buf_size;
}
}
- vc1_extract_headers(s, avctx, buf, buf_size);
+ vpc->v.first_pic_header_flag = 1;
+
+ /* If we return with a valid pointer to a combined frame buffer
+ * then on the next call then we'll have been unhelpfully rewound
+ * by up to 4 bytes (depending upon whether the start code
+ * overlapped the input buffer, and if so by how much). We don't
+ * want this: it will either cause spurious second detections of
+ * the start code we've already seen, or cause extra bytes to be
+ * inserted at the start of the unescaped buffer. */
+ vpc->bytes_to_skip = 4;
+ if (next < 0)
+ vpc->bytes_to_skip += next;
*poutbuf = buf;
*poutbuf_size = buf_size;
@@ -199,6 +270,11 @@ static av_cold int vc1_parse_init(AVCodecParserContext *s)
{
VC1ParseContext *vpc = s->priv_data;
vpc->v.s.slice_context_count = 1;
+ vpc->v.first_pic_header_flag = 1;
+ vpc->prev_start_code = 0;
+ vpc->bytes_to_skip = 0;
+ vpc->unesc_index = 0;
+ vpc->search_state = NO_MATCH;
return ff_vc1_init_common(&vpc->v);
}
--
1.9.1

View File

@ -1,285 +0,0 @@
From 98428a8cf593587b403076bb54b46cc70ed17ff2 Mon Sep 17 00:00:00 2001
From: Ben Avison <bavison@riscosopen.org>
Date: Mon, 10 Mar 2014 14:42:05 +0000
Subject: [PATCH 4/6] truehd: add hand-scheduled ARM asm version of
ff_mlp_rematrix_channel.
Profiling results for overall audio decode and the rematrix_channels function
in particular are as follows:
Before After
Mean StdDev Mean StdDev Confidence Change
6:2 total 370.8 17.0 348.8 20.1 99.9% +6.3%
6:2 function 46.4 8.4 45.8 6.6 18.0% +1.2% (insignificant)
8:2 total 343.2 19.0 339.1 15.4 54.7% +1.2% (insignificant)
8:2 function 38.9 3.9 40.2 6.9 52.4% -3.2% (insignificant)
6:6 total 658.4 15.7 604.6 20.8 100.0% +8.9%
6:6 function 109.0 8.7 59.5 5.4 100.0% +83.3%
8:8 total 896.2 24.5 766.4 17.6 100.0% +16.9%
8:8 function 223.4 12.8 93.8 5.0 100.0% +138.3%
The assembly version has also been tested with a fuzz tester to ensure that
any combinations of inputs not exercised by my available test streams still
generate mathematically identical results to the C version.
---
libavcodec/arm/mlpdsp_arm.S | 222 +++++++++++++++++++++++++++++++++++++++
libavcodec/arm/mlpdsp_init_arm.c | 12 +++
2 files changed, 234 insertions(+)
diff --git a/libavcodec/arm/mlpdsp_arm.S b/libavcodec/arm/mlpdsp_arm.S
index 615819d..9b51d0c 100644
--- a/libavcodec/arm/mlpdsp_arm.S
+++ b/libavcodec/arm/mlpdsp_arm.S
@@ -431,3 +431,225 @@ endfunc
.unreq ST3
.unreq I
.unreq PSAMP
+
+/********************************************************************/
+
+PSA .req a1 // samples
+PCO .req a2 // coeffs
+PBL .req a3 // bypassed_lsbs
+INDEX .req a4
+CO0 .req v1
+CO1 .req v2
+CO2 .req v3
+CO3 .req v4
+SA0 .req v5
+SA1 .req v6
+SA2 .req sl
+SA3 .req fp
+AC0 .req ip
+AC1 .req lr
+NOISE .req SA0
+LSB .req SA1
+DCH .req SA2 // dest_ch
+MASK .req SA3
+
+ // INDEX is used as follows:
+ // bits 0..6 index2 (values up to 17, but wider so that we can
+ // add to index field without needing to mask)
+ // bits 7..14 i (values up to 160)
+ // bit 15 underflow detect for i
+ // bits 25..31 (if access_unit_size_pow2 == 128) \ index
+ // bits 26..31 (if access_unit_size_pow2 == 64) /
+
+.macro implement_rematrix shift, index_mask, mask_minus1, maxchan
+ .if \maxchan == 1
+ // We can just leave the coefficients in registers in this case
+ ldrd CO0, CO1, [PCO]
+ .endif
+1:
+ .if \maxchan == 1
+ ldrd SA0, SA1, [PSA]
+ smull AC0, AC1, CO0, SA0
+ .elseif \maxchan == 5
+ ldr CO0, [PCO, #0]
+ ldr SA0, [PSA, #0]
+ ldr CO1, [PCO, #4]
+ ldr SA1, [PSA, #4]
+ ldrd CO2, CO3, [PCO, #8]
+ smull AC0, AC1, CO0, SA0
+ ldrd SA2, SA3, [PSA, #8]
+ smlal AC0, AC1, CO1, SA1
+ ldrd CO0, CO1, [PCO, #16]
+ smlal AC0, AC1, CO2, SA2
+ ldrd SA0, SA1, [PSA, #16]
+ smlal AC0, AC1, CO3, SA3
+ smlal AC0, AC1, CO0, SA0
+ .else // \maxchan == 7
+ ldr CO2, [PCO, #0]
+ ldr SA2, [PSA, #0]
+ ldr CO3, [PCO, #4]
+ ldr SA3, [PSA, #4]
+ ldrd CO0, CO1, [PCO, #8]
+ smull AC0, AC1, CO2, SA2
+ ldrd SA0, SA1, [PSA, #8]
+ smlal AC0, AC1, CO3, SA3
+ ldrd CO2, CO3, [PCO, #16]
+ smlal AC0, AC1, CO0, SA0
+ ldrd SA2, SA3, [PSA, #16]
+ smlal AC0, AC1, CO1, SA1
+ ldrd CO0, CO1, [PCO, #24]
+ smlal AC0, AC1, CO2, SA2
+ ldrd SA0, SA1, [PSA, #24]
+ smlal AC0, AC1, CO3, SA3
+ smlal AC0, AC1, CO0, SA0
+ .endif
+ ldm sp, {NOISE, DCH, MASK}
+ smlal AC0, AC1, CO1, SA1
+ .if \shift != 0
+ .if \index_mask == 63
+ add NOISE, NOISE, INDEX, lsr #32-6
+ ldrb LSB, [PBL], #MAX_CHANNELS
+ ldrsb NOISE, [NOISE]
+ add INDEX, INDEX, INDEX, lsl #32-6
+ .else // \index_mask == 127
+ add NOISE, NOISE, INDEX, lsr #32-7
+ ldrb LSB, [PBL], #MAX_CHANNELS
+ ldrsb NOISE, [NOISE]
+ add INDEX, INDEX, INDEX, lsl #32-7
+ .endif
+ sub INDEX, INDEX, #1<<7
+ adds AC0, AC0, NOISE, lsl #\shift + 7
+ adc AC1, AC1, NOISE, asr #31
+ .else
+ ldrb LSB, [PBL], #MAX_CHANNELS
+ sub INDEX, INDEX, #1<<7
+ .endif
+ add PSA, PSA, #MAX_CHANNELS*4
+ mov AC0, AC0, lsr #14
+ orr AC0, AC0, AC1, lsl #18
+ .if !\mask_minus1
+ and AC0, AC0, MASK
+ .endif
+ add AC0, AC0, LSB
+ tst INDEX, #1<<15
+ str AC0, [PSA, DCH, lsl #2] // DCH is precompensated for the early increment of PSA
+ beq 1b
+ b 98f
+.endm
+
+.macro switch_on_maxchan shift, index_mask, mask_minus1
+ cmp v4, #5
+ blo 51f
+ beq 50f
+ implement_rematrix \shift, \index_mask, \mask_minus1, 7
+50: implement_rematrix \shift, \index_mask, \mask_minus1, 5
+51: implement_rematrix \shift, \index_mask, \mask_minus1, 1
+.endm
+
+.macro switch_on_mask shift, index_mask
+ cmp sl, #-1
+ bne 40f
+ switch_on_maxchan \shift, \index_mask, 1
+40: switch_on_maxchan \shift, \index_mask, 0
+.endm
+
+.macro switch_on_au_size shift
+ .if \shift == 0
+ switch_on_mask \shift, undefined
+ .else
+ teq v6, #64
+ bne 30f
+ orr INDEX, INDEX, v1, lsl #32-6
+ switch_on_mask \shift, 63
+30: orr INDEX, INDEX, v1, lsl #32-7
+ switch_on_mask \shift, 127
+ .endif
+.endm
+
+/* void ff_mlp_rematrix_channel_arm(int32_t *samples,
+ * const int32_t *coeffs,
+ * const uint8_t *bypassed_lsbs,
+ * const int8_t *noise_buffer,
+ * int index,
+ * unsigned int dest_ch,
+ * uint16_t blockpos,
+ * unsigned int maxchan,
+ * int matrix_noise_shift,
+ * int access_unit_size_pow2,
+ * int32_t mask);
+ */
+function ff_mlp_rematrix_channel_arm, export=1
+ push {v1-fp,lr}
+ add v1, sp, #9*4 // point at arguments on stack
+ ldm v1, {v1-sl}
+ teq v4, #1
+ itt ne
+ teqne v4, #5
+ teqne v4, #7
+ bne 99f
+ teq v6, #64
+ it ne
+ teqne v6, #128
+ bne 99f
+ sub v2, v2, #MAX_CHANNELS
+ push {a4,v2,sl} // initialise NOISE,DCH,MASK; make sp dword-aligned
+ movs INDEX, v3, lsl #7
+ beq 98f // just in case, do nothing if blockpos = 0
+ subs INDEX, INDEX, #1<<7 // offset by 1 so we borrow at the right time
+ adc lr, v1, v1 // calculate index2 (C was set by preceding subs)
+ orr INDEX, INDEX, lr
+ // Switch on matrix_noise_shift: values 0 and 1 are
+ // disproportionately common so do those in a form the branch
+ // predictor can accelerate. Values can only go up to 15.
+ cmp v5, #1
+ beq 11f
+ blo 10f
+A ldr pc, [pc, v5, lsl #2]
+T tbh [pc, v5, lsl #1]
+0:
+A .word 0, 0, 0, 12f, 13f, 14f, 15f, 16f, 17f, 18f, 19f, 20f, 21f, 22f, 23f, 24f, 25f
+T .hword 0, 0, (12f - 0b) / 2, (13f - 0b) / 2, (14f - 0b) / 2, (15f - 0b) / 2
+T .hword (16f - 0b) / 2, (17f - 0b) / 2, (18f - 0b) / 2, (19f - 0b) / 2
+T .hword (20f - 0b) / 2, (21f - 0b) / 2, (22f - 0b) / 2, (23f - 0b) / 2, (24f - 0b) / 2, (25f - 0b) / 2
+10: switch_on_au_size 0
+11: switch_on_au_size 1
+12: switch_on_au_size 2
+13: switch_on_au_size 3
+14: switch_on_au_size 4
+15: switch_on_au_size 5
+16: switch_on_au_size 6
+17: switch_on_au_size 7
+18: switch_on_au_size 8
+19: switch_on_au_size 9
+20: switch_on_au_size 10
+21: switch_on_au_size 11
+22: switch_on_au_size 12
+23: switch_on_au_size 13
+24: switch_on_au_size 14
+25: switch_on_au_size 15
+
+98: add sp, sp, #3*4
+ pop {v1-fp,pc}
+99: // Can't handle these parameters, drop back to C
+ pop {v1-fp,lr}
+ b X(ff_mlp_rematrix_channel)
+endfunc
+
+ .unreq PSA
+ .unreq PCO
+ .unreq PBL
+ .unreq INDEX
+ .unreq CO0
+ .unreq CO1
+ .unreq CO2
+ .unreq CO3
+ .unreq SA0
+ .unreq SA1
+ .unreq SA2
+ .unreq SA3
+ .unreq AC0
+ .unreq AC1
+ .unreq NOISE
+ .unreq LSB
+ .unreq DCH
+ .unreq MASK
diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c
index 9a14815..1bb2276 100644
--- a/libavcodec/arm/mlpdsp_init_arm.c
+++ b/libavcodec/arm/mlpdsp_init_arm.c
@@ -29,8 +29,20 @@ void ff_mlp_filter_channel_arm(int32_t *state, const int32_t *coeff,
int firorder, int iirorder,
unsigned int filter_shift, int32_t mask,
int blocksize, int32_t *sample_buffer);
+void ff_mlp_rematrix_channel_arm(int32_t *samples,
+ const int32_t *coeffs,
+ const uint8_t *bypassed_lsbs,
+ const int8_t *noise_buffer,
+ int index,
+ unsigned int dest_ch,
+ uint16_t blockpos,
+ unsigned int maxchan,
+ int matrix_noise_shift,
+ int access_unit_size_pow2,
+ int32_t mask);
av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
{
c->mlp_filter_channel = ff_mlp_filter_channel_arm;
+ c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm;
}
--
1.9.1

View File

@ -1,197 +0,0 @@
From 5bfcb7a691eb63c56f1485b60f399d79ff943799 Mon Sep 17 00:00:00 2001
From: Ben Avison <bavison@riscosopen.org>
Date: Wed, 12 Mar 2014 18:18:39 +0000
Subject: [PATCH 5/6] truehd: break out part of output_data into
platform-specific callback.
Verified with profiling that this doesn't have a measurable effect upon
overall performance.
---
libavcodec/mlpdec.c | 40 +++++++++++++++++++++++-----------------
libavcodec/mlpdsp.c | 38 ++++++++++++++++++++++++++++++++++++++
libavcodec/mlpdsp.h | 22 ++++++++++++++++++++++
3 files changed, 83 insertions(+), 17 deletions(-)
diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
index 01ded5c..061dabc 100644
--- a/libavcodec/mlpdec.c
+++ b/libavcodec/mlpdec.c
@@ -363,6 +363,10 @@ static int read_major_sync(MLPDecodeContext *m, GetBitContext *gb)
m->avctx->sample_fmt = AV_SAMPLE_FMT_S32;
else
m->avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+ m->dsp.mlp_pack_output = m->dsp.mlp_select_pack_output(m->substream[m->max_decoded_substream].ch_assign,
+ m->substream[m->max_decoded_substream].output_shift,
+ m->substream[m->max_decoded_substream].max_matrix_channel,
+ m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
m->params_valid = 1;
for (substr = 0; substr < MAX_SUBSTREAMS; substr++)
@@ -612,6 +616,10 @@ FF_ENABLE_DEPRECATION_WARNINGS
if (substr == m->max_decoded_substream) {
m->avctx->channels = s->max_matrix_channel + 1;
m->avctx->channel_layout = s->ch_layout;
+ m->dsp.mlp_pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
+ s->output_shift,
+ s->max_matrix_channel,
+ m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
if (m->avctx->codec_id == AV_CODEC_ID_MLP && m->needs_reordering) {
if (m->avctx->channel_layout == (AV_CH_LAYOUT_QUAD|AV_CH_LOW_FREQUENCY) ||
@@ -857,9 +865,15 @@ static int read_decoding_params(MLPDecodeContext *m, GetBitContext *gbp,
return ret;
if (s->param_presence_flags & PARAM_OUTSHIFT)
- if (get_bits1(gbp))
+ if (get_bits1(gbp)) {
for (ch = 0; ch <= s->max_matrix_channel; ch++)
s->output_shift[ch] = get_sbits(gbp, 4);
+ if (substr == m->max_decoded_substream)
+ m->dsp.mlp_pack_output = m->dsp.mlp_select_pack_output(s->ch_assign,
+ s->output_shift,
+ s->max_matrix_channel,
+ m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
+ }
if (s->param_presence_flags & PARAM_QUANTSTEP)
if (get_bits1(gbp))
@@ -1058,9 +1072,6 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
{
AVCodecContext *avctx = m->avctx;
SubStream *s = &m->substream[substr];
- unsigned int i, out_ch = 0;
- int32_t *data_32;
- int16_t *data_16;
int ret;
int is32 = (m->avctx->sample_fmt == AV_SAMPLE_FMT_S32);
@@ -1078,19 +1089,14 @@ static int output_data(MLPDecodeContext *m, unsigned int substr,
frame->nb_samples = s->blockpos;
if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
return ret;
- data_32 = (int32_t *)frame->data[0];
- data_16 = (int16_t *)frame->data[0];
-
- for (i = 0; i < s->blockpos; i++) {
- for (out_ch = 0; out_ch <= s->max_matrix_channel; out_ch++) {
- int mat_ch = s->ch_assign[out_ch];
- int32_t sample = m->sample_buffer[i][mat_ch]
- << s->output_shift[mat_ch];
- s->lossless_check_data ^= (sample & 0xffffff) << mat_ch;
- if (is32) *data_32++ = sample << 8;
- else *data_16++ = sample >> 8;
- }
- }
+ s->lossless_check_data = m->dsp.mlp_pack_output(s->lossless_check_data,
+ s->blockpos,
+ m->sample_buffer,
+ frame->data[0],
+ s->ch_assign,
+ s->output_shift,
+ s->max_matrix_channel,
+ is32);
/* Update matrix encoding side data */
if ((ret = ff_side_data_update_matrix_encoding(frame, s->matrix_encoding)) < 0)
diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c
index 7a359b0..3ae8c37 100644
--- a/libavcodec/mlpdsp.c
+++ b/libavcodec/mlpdsp.c
@@ -89,10 +89,48 @@ void ff_mlp_rematrix_channel(int32_t *samples,
}
}
+static int32_t (*mlp_select_pack_output(uint8_t *ch_assign,
+ int8_t *output_shift,
+ uint8_t max_matrix_channel,
+ int is32))(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int)
+{
+ return ff_mlp_pack_output;
+}
+
+int32_t ff_mlp_pack_output(int32_t lossless_check_data,
+ uint16_t blockpos,
+ int32_t (*sample_buffer)[MAX_CHANNELS],
+ void *data,
+ uint8_t *ch_assign,
+ int8_t *output_shift,
+ uint8_t max_matrix_channel,
+ int is32)
+{
+ unsigned int i, out_ch = 0;
+ int32_t *data_32 = data;
+ int16_t *data_16 = data;
+
+ for (i = 0; i < blockpos; i++) {
+ for (out_ch = 0; out_ch <= max_matrix_channel; out_ch++) {
+ int mat_ch = ch_assign[out_ch];
+ int32_t sample = sample_buffer[i][mat_ch]
+ << output_shift[mat_ch];
+ lossless_check_data ^= (sample & 0xffffff) << mat_ch;
+ if (is32)
+ *data_32++ = sample << 8;
+ else
+ *data_16++ = sample >> 8;
+ }
+ }
+ return lossless_check_data;
+}
+
av_cold void ff_mlpdsp_init(MLPDSPContext *c)
{
c->mlp_filter_channel = mlp_filter_channel;
c->mlp_rematrix_channel = ff_mlp_rematrix_channel;
+ c->mlp_select_pack_output = mlp_select_pack_output;
+ c->mlp_pack_output = ff_mlp_pack_output;
if (ARCH_ARM)
ff_mlpdsp_init_arm(c);
if (ARCH_X86)
diff --git a/libavcodec/mlpdsp.h b/libavcodec/mlpdsp.h
index f98e9be..a0edeb7 100644
--- a/libavcodec/mlpdsp.h
+++ b/libavcodec/mlpdsp.h
@@ -23,6 +23,7 @@
#define AVCODEC_MLPDSP_H
#include <stdint.h>
+#include "mlp.h"
void ff_mlp_rematrix_channel(int32_t *samples,
const int32_t *coeffs,
@@ -36,6 +37,15 @@ void ff_mlp_rematrix_channel(int32_t *samples,
int access_unit_size_pow2,
int32_t mask);
+int32_t ff_mlp_pack_output(int32_t lossless_check_data,
+ uint16_t blockpos,
+ int32_t (*sample_buffer)[MAX_CHANNELS],
+ void *data,
+ uint8_t *ch_assign,
+ int8_t *output_shift,
+ uint8_t max_matrix_channel,
+ int is32);
+
typedef struct MLPDSPContext {
void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff,
int firorder, int iirorder,
@@ -52,6 +62,18 @@ typedef struct MLPDSPContext {
int matrix_noise_shift,
int access_unit_size_pow2,
int32_t mask);
+ int32_t (*(*mlp_select_pack_output)(uint8_t *ch_assign,
+ int8_t *output_shift,
+ uint8_t max_matrix_channel,
+ int is32))(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int);
+ int32_t (*mlp_pack_output)(int32_t lossless_check_data,
+ uint16_t blockpos,
+ int32_t (*sample_buffer)[MAX_CHANNELS],
+ void *data,
+ uint8_t *ch_assign,
+ int8_t *output_shift,
+ uint8_t max_matrix_channel,
+ int is32);
} MLPDSPContext;
void ff_mlpdsp_init(MLPDSPContext *c);
--
1.9.1

View File

@ -1,689 +0,0 @@
From c647209386bd811cc1c33b4fc8ec17a00f8c8ded Mon Sep 17 00:00:00 2001
From: Ben Avison <bavison@riscosopen.org>
Date: Thu, 13 Mar 2014 00:21:55 +0000
Subject: [PATCH 6/6] truehd: add hand-scheduled ARM asm version of
ff_mlp_pack_output.
Profiling results for overall decode and the output_data function in
particular are as follows:
Before After
Mean StdDev Mean StdDev Confidence Change
6:2 total 339.6 15.1 329.3 16.0 95.8% +3.1% (insignificant)
6:2 function 24.6 6.0 9.9 3.1 100.0% +148.5%
8:2 total 324.5 15.5 323.6 14.3 15.2% +0.3% (insignificant)
8:2 function 20.4 3.9 9.9 3.4 100.0% +104.7%
6:6 total 572.8 20.6 539.9 24.2 100.0% +6.1%
6:6 function 54.5 5.6 16.0 3.8 100.0% +240.9%
8:8 total 741.5 21.2 702.5 18.5 100.0% +5.6%
8:8 function 63.9 7.6 18.4 4.8 100.0% +247.3%
The assembly version has also been tested with a fuzz tester to ensure that
any combinations of inputs not exercised by my available test streams still
generate mathematically identical results to the C version.
---
libavcodec/arm/Makefile | 1 +
libavcodec/arm/mlpdsp_armv6.S | 530 +++++++++++++++++++++++++++++++++++++++
libavcodec/arm/mlpdsp_init_arm.c | 96 +++++++
3 files changed, 627 insertions(+)
create mode 100644 libavcodec/arm/mlpdsp_armv6.S
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index ba673b1..7b2f923 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -52,6 +52,7 @@ ARMV6-OBJS-$(CONFIG_AC3DSP) += arm/ac3dsp_armv6.o
ARMV6-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_armv6.o
ARMV6-OBJS-$(CONFIG_HPELDSP) += arm/hpeldsp_init_armv6.o \
arm/hpeldsp_armv6.o
+ARMV6-OBJS-$(CONFIG_MLP_DECODER) += arm/mlpdsp_armv6.o
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o
ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \
arm/vp8dsp_init_armv6.o \
diff --git a/libavcodec/arm/mlpdsp_armv6.S b/libavcodec/arm/mlpdsp_armv6.S
new file mode 100644
index 0000000..05a2c85
--- /dev/null
+++ b/libavcodec/arm/mlpdsp_armv6.S
@@ -0,0 +1,530 @@
+/*
+ * Copyright (c) 2014 RISC OS Open Ltd
+ * Author: Ben Avison <bavison@riscosopen.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/arm/asm.S"
+
+.macro loadregoffsh2 group, index, base, offgroup, offindex
+ .altmacro
+ loadregoffsh2_ \group, %(\index), \base, \offgroup, %(\offindex)
+ .noaltmacro
+.endm
+
+.macro loadregoffsh2_ group, index, base, offgroup, offindex
+ ldr \group\index, [\base, \offgroup\offindex, lsl #2]
+.endm
+
+.macro eorlslreg check, data, group, index
+ .altmacro
+ eorlslreg_ \check, \data, \group, %(\index)
+ .noaltmacro
+.endm
+
+.macro eorlslreg_ check, data, group, index
+ eor \check, \check, \data, lsl \group\index
+.endm
+
+.macro decr_modulo var, by, modulus
+ .set \var, \var - \by
+ .if \var == 0
+ .set \var, \modulus
+ .endif
+.endm
+
+ .macro load_group1 size, channels, r0, r1, r2, r3, pointer_dead=0
+ .if \size == 2
+ ldrd \r0, \r1, [IN], #(\size + 8 - \channels) * 4
+ .else // size == 4
+ .if IDX1 > 4 || \channels==8
+ ldm IN!, {\r0, \r1, \r2, \r3}
+ .else
+ ldm IN, {\r0, \r1, \r2, \r3}
+ .if !\pointer_dead
+ add IN, IN, #(4 + 8 - \channels) * 4
+ .endif
+ .endif
+ .endif
+ decr_modulo IDX1, \size, \channels
+ .endm
+
+ .macro load_group2 size, channels, r0, r1, r2, r3, pointer_dead=0
+ .if \size == 2
+ .if IDX1 > 2
+ ldm IN!, {\r2, \r3}
+ .else
+//A .ifc \r2, ip
+//A .if \pointer_dead
+//A ldm IN, {\r2, \r3}
+//A .else
+//A ldr \r2, [IN], #4
+//A ldr \r3, [IN], #(\size - 1 + 8 - \channels) * 4
+//A .endif
+//A .else
+ ldrd \r2, \r3, [IN], #(\size + 8 - \channels) * 4
+//A .endif
+ .endif
+ .endif
+ decr_modulo IDX1, \size, \channels
+ .endm
+
+.macro implement_pack inorder, channels, shift
+.if \inorder
+.ifc \shift, mixed
+
+CHECK .req a1
+COUNT .req a2
+IN .req a3
+OUT .req a4
+DAT0 .req v1
+DAT1 .req v2
+DAT2 .req v3
+DAT3 .req v4
+SHIFT0 .req v5
+SHIFT1 .req v6
+SHIFT2 .req sl
+SHIFT3 .req fp
+SHIFT4 .req ip
+SHIFT5 .req lr
+
+ .macro output4words
+ .set SIZE_GROUP1, IDX1
+ .if SIZE_GROUP1 > 4
+ .set SIZE_GROUP1, 4
+ .endif
+ .set SIZE_GROUP2, 4 - SIZE_GROUP1
+ load_group1 SIZE_GROUP1, \channels, DAT0, DAT1, DAT2, DAT3
+ load_group2 SIZE_GROUP2, \channels, DAT0, DAT1, DAT2, DAT3
+ .if \channels == 2
+ lsl DAT0, SHIFT0
+ lsl DAT1, SHIFT1
+ lsl DAT2, SHIFT0
+ lsl DAT3, SHIFT1
+ .elseif \channels == 6
+ .if IDX2 == 6
+ lsl DAT0, SHIFT0
+ lsl DAT1, SHIFT1
+ lsl DAT2, SHIFT2
+ lsl DAT3, SHIFT3
+ .elseif IDX2 == 2
+ lsl DAT0, SHIFT4
+ lsl DAT1, SHIFT5
+ lsl DAT2, SHIFT0
+ lsl DAT3, SHIFT1
+ .else // IDX2 == 4
+ lsl DAT0, SHIFT2
+ lsl DAT1, SHIFT3
+ lsl DAT2, SHIFT4
+ lsl DAT3, SHIFT5
+ .endif
+ .elseif \channels == 8
+ .if IDX2 == 8
+ uxtb SHIFT0, SHIFT4, ror #0
+ uxtb SHIFT1, SHIFT4, ror #8
+ uxtb SHIFT2, SHIFT4, ror #16
+ uxtb SHIFT3, SHIFT4, ror #24
+ .else
+ uxtb SHIFT0, SHIFT5, ror #0
+ uxtb SHIFT1, SHIFT5, ror #8
+ uxtb SHIFT2, SHIFT5, ror #16
+ uxtb SHIFT3, SHIFT5, ror #24
+ .endif
+ lsl DAT0, SHIFT0
+ lsl DAT1, SHIFT1
+ lsl DAT2, SHIFT2
+ lsl DAT3, SHIFT3
+ .endif
+ eor CHECK, CHECK, DAT0, lsr #8 - (\channels - IDX2)
+ eor CHECK, CHECK, DAT1, lsr #7 - (\channels - IDX2)
+ decr_modulo IDX2, 2, \channels
+ eor CHECK, CHECK, DAT2, lsr #8 - (\channels - IDX2)
+ eor CHECK, CHECK, DAT3, lsr #7 - (\channels - IDX2)
+ decr_modulo IDX2, 2, \channels
+ stm OUT!, {DAT0 - DAT3}
+ .endm
+
+ .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 4)
+ .if (WORDS_PER_LOOP % 2) == 0
+ .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
+ .endif
+ .if (WORDS_PER_LOOP % 2) == 0
+ .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
+ .endif
+ .set WORDS_PER_LOOP, WORDS_PER_LOOP * 4
+ .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
+
+function ff_mlp_pack_output_inorder_\channels\()ch_mixedshift_armv6, export=1
+ .if SAMPLES_PER_LOOP > 1
+ tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice
+ bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not
+ .endif
+ teq COUNT, #0
+ it eq
+ bxeq lr
+ push {v1-v6,sl,fp,lr}
+ ldr SHIFT0, [sp, #(9+1)*4] // get output_shift from stack
+ ldr SHIFT1, =0x08080808
+ ldr SHIFT4, [SHIFT0]
+ .if \channels == 2
+ uadd8 SHIFT4, SHIFT4, SHIFT1 // increase all shifts by 8
+ uxtb SHIFT0, SHIFT4, ror #0
+ uxtb SHIFT1, SHIFT4, ror #8
+ .else
+ ldr SHIFT5, [SHIFT0, #4]
+ uadd8 SHIFT4, SHIFT4, SHIFT1 // increase all shifts by 8
+ uadd8 SHIFT5, SHIFT5, SHIFT1
+ .if \channels == 6
+ uxtb SHIFT0, SHIFT4, ror #0
+ uxtb SHIFT1, SHIFT4, ror #8
+ uxtb SHIFT2, SHIFT4, ror #16
+ uxtb SHIFT3, SHIFT4, ror #24
+ uxtb SHIFT4, SHIFT5, ror #0
+ uxtb SHIFT5, SHIFT5, ror #8
+ .endif
+ .endif
+ .set IDX1, \channels
+ .set IDX2, \channels
+0:
+ .rept WORDS_PER_LOOP / 4
+ output4words
+ .endr
+ subs COUNT, COUNT, #SAMPLES_PER_LOOP
+ bne 0b
+ pop {v1-v6,sl,fp,pc}
+ .ltorg
+endfunc
+ .purgem output4words
+
+ .unreq CHECK
+ .unreq COUNT
+ .unreq IN
+ .unreq OUT
+ .unreq DAT0
+ .unreq DAT1
+ .unreq DAT2
+ .unreq DAT3
+ .unreq SHIFT0
+ .unreq SHIFT1
+ .unreq SHIFT2
+ .unreq SHIFT3
+ .unreq SHIFT4
+ .unreq SHIFT5
+
+.else // not mixed
+
+CHECK .req a1
+COUNT .req a2
+IN .req a3
+OUT .req a4
+DAT0 .req v1
+DAT1 .req v2
+DAT2 .req v3
+DAT3 .req v4
+DAT4 .req v5
+DAT5 .req v6
+DAT6 .req sl // use these rather than the otherwise unused
+DAT7 .req fp // ip and lr so that we can load them usinf LDRD
+
+ .macro output4words tail, head, r0, r1, r2, r3, r4, r5, r6, r7, pointer_dead=0
+ .if \head
+ .set SIZE_GROUP1, IDX1
+ .if SIZE_GROUP1 > 4
+ .set SIZE_GROUP1, 4
+ .endif
+ .set SIZE_GROUP2, 4 - SIZE_GROUP1
+ load_group1 SIZE_GROUP1, \channels, \r0, \r1, \r2, \r3, \pointer_dead
+ .endif
+ .if \tail
+ eor CHECK, CHECK, \r4, lsr #8 - (\channels - IDX2)
+ eor CHECK, CHECK, \r5, lsr #7 - (\channels - IDX2)
+ decr_modulo IDX2, 2, \channels
+ .endif
+ .if \head
+ load_group2 SIZE_GROUP2, \channels, \r0, \r1, \r2, \r3, \pointer_dead
+ .endif
+ .if \tail
+ eor CHECK, CHECK, \r6, lsr #8 - (\channels - IDX2)
+ eor CHECK, CHECK, \r7, lsr #7 - (\channels - IDX2)
+ decr_modulo IDX2, 2, \channels
+ stm OUT!, {\r4, \r5, \r6, \r7}
+ .endif
+ .if \head
+ lsl \r0, #8 + \shift
+ lsl \r1, #8 + \shift
+ lsl \r2, #8 + \shift
+ lsl \r3, #8 + \shift
+ .endif
+ .endm
+
+ .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 8)
+ .if (WORDS_PER_LOOP % 2) == 0
+ .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
+ .endif
+ .if (WORDS_PER_LOOP % 2) == 0
+ .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
+ .endif
+ .if (WORDS_PER_LOOP % 2) == 0
+ .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
+ .endif
+ .set WORDS_PER_LOOP, WORDS_PER_LOOP * 8
+ .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
+
+function ff_mlp_pack_output_inorder_\channels\()ch_\shift\()shift_armv6, export=1
+ .if SAMPLES_PER_LOOP > 1
+ tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice
+ bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not
+ .endif
+ subs COUNT, COUNT, #SAMPLES_PER_LOOP
+ it lo
+ bxlo lr
+ push {v1-v6,sl,fp,lr}
+ .set IDX1, \channels
+ .set IDX2, \channels
+ output4words 0, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
+0: beq 1f
+ .rept WORDS_PER_LOOP / 8
+ output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3
+ output4words 1, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
+ .endr
+ subs COUNT, COUNT, #SAMPLES_PER_LOOP
+ bne 0b
+1:
+ .rept WORDS_PER_LOOP / 8 - 1
+ output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3
+ output4words 1, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
+ .endr
+ output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3, pointer_dead=1
+ output4words 1, 0, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
+ pop {v1-v6,sl,fp,pc}
+endfunc
+ .purgem output4words
+
+ .unreq CHECK
+ .unreq COUNT
+ .unreq IN
+ .unreq OUT
+ .unreq DAT0
+ .unreq DAT1
+ .unreq DAT2
+ .unreq DAT3
+ .unreq DAT4
+ .unreq DAT5
+ .unreq DAT6
+ .unreq DAT7
+
+.endif // mixed
+.else // not inorder
+.ifc \shift, mixed
+
+// This case not currently handled
+
+.else // not mixed
+
+#if !CONFIG_THUMB
+
+CHECK .req a1
+COUNT .req a2
+IN .req a3
+OUT .req a4
+DAT0 .req v1
+DAT1 .req v2
+DAT2 .req v3
+DAT3 .req v4
+CHAN0 .req v5
+CHAN1 .req v6
+CHAN2 .req sl
+CHAN3 .req fp
+CHAN4 .req ip
+CHAN5 .req lr
+
+ .macro output4words
+ .if \channels == 8
+ .if IDX1 == 8
+ uxtb CHAN0, CHAN4, ror #0
+ uxtb CHAN1, CHAN4, ror #8
+ uxtb CHAN2, CHAN4, ror #16
+ uxtb CHAN3, CHAN4, ror #24
+ .else
+ uxtb CHAN0, CHAN5, ror #0
+ uxtb CHAN1, CHAN5, ror #8
+ uxtb CHAN2, CHAN5, ror #16
+ uxtb CHAN3, CHAN5, ror #24
+ .endif
+ ldr DAT0, [IN, CHAN0, lsl #2]
+ ldr DAT1, [IN, CHAN1, lsl #2]
+ ldr DAT2, [IN, CHAN2, lsl #2]
+ ldr DAT3, [IN, CHAN3, lsl #2]
+ .if IDX1 == 4
+ add IN, IN, #8*4
+ .endif
+ decr_modulo IDX1, 4, \channels
+ .else
+ .set SIZE_GROUP1, IDX1
+ .if SIZE_GROUP1 > 4
+ .set SIZE_GROUP1, 4
+ .endif
+ .set SIZE_GROUP2, 4 - SIZE_GROUP1
+ .if SIZE_GROUP1 == 2
+ loadregoffsh2 DAT, 0, IN, CHAN, 0 + (\channels - IDX1)
+ loadregoffsh2 DAT, 1, IN, CHAN, 1 + (\channels - IDX1)
+ add IN, IN, #8*4
+ .else // SIZE_GROUP1 == 4
+ loadregoffsh2 DAT, 0, IN, CHAN, 0 + (\channels - IDX1)
+ loadregoffsh2 DAT, 1, IN, CHAN, 1 + (\channels - IDX1)
+ loadregoffsh2 DAT, 2, IN, CHAN, 2 + (\channels - IDX1)
+ loadregoffsh2 DAT, 3, IN, CHAN, 3 + (\channels - IDX1)
+ .if IDX1 == 4
+ add IN, IN, #8*4
+ .endif
+ .endif
+ decr_modulo IDX1, SIZE_GROUP1, \channels
+ .if SIZE_GROUP2 == 2
+ loadregoffsh2 DAT, 2, IN, CHAN, 0 + (\channels - IDX1)
+ loadregoffsh2 DAT, 3, IN, CHAN, 1 + (\channels - IDX1)
+ .if IDX1 == 2
+ add IN, IN, #8*4
+ .endif
+ .endif
+ decr_modulo IDX1, SIZE_GROUP2, \channels
+ .endif
+ .if \channels == 8 // in this case we can corrupt CHAN0-3
+ rsb CHAN0, CHAN0, #8
+ rsb CHAN1, CHAN1, #8
+ rsb CHAN2, CHAN2, #8
+ rsb CHAN3, CHAN3, #8
+ lsl DAT0, #8 + \shift
+ lsl DAT1, #8 + \shift
+ lsl DAT2, #8 + \shift
+ lsl DAT3, #8 + \shift
+ eor CHECK, CHECK, DAT0, lsr CHAN0
+ eor CHECK, CHECK, DAT1, lsr CHAN1
+ eor CHECK, CHECK, DAT2, lsr CHAN2
+ eor CHECK, CHECK, DAT3, lsr CHAN3
+ .else
+ .if \shift != 0
+ lsl DAT0, #\shift
+ lsl DAT1, #\shift
+ lsl DAT2, #\shift
+ lsl DAT3, #\shift
+ .endif
+ bic DAT0, DAT0, #0xff000000
+ bic DAT1, DAT1, #0xff000000
+ bic DAT2, DAT2, #0xff000000
+ bic DAT3, DAT3, #0xff000000
+ eorlslreg CHECK, DAT0, CHAN, 0 + (\channels - IDX2)
+ eorlslreg CHECK, DAT1, CHAN, 1 + (\channels - IDX2)
+ decr_modulo IDX2, 2, \channels
+ eorlslreg CHECK, DAT2, CHAN, 0 + (\channels - IDX2)
+ eorlslreg CHECK, DAT3, CHAN, 1 + (\channels - IDX2)
+ decr_modulo IDX2, 2, \channels
+ lsl DAT0, #8
+ lsl DAT1, #8
+ lsl DAT2, #8
+ lsl DAT3, #8
+ .endif
+ stm OUT!, {DAT0 - DAT3}
+ .endm
+
+ .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 4)
+ .if (WORDS_PER_LOOP % 2) == 0
+ .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
+ .endif
+ .if (WORDS_PER_LOOP % 2) == 0
+ .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
+ .endif
+ .set WORDS_PER_LOOP, WORDS_PER_LOOP * 4
+ .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
+
+function ff_mlp_pack_output_outoforder_\channels\()ch_\shift\()shift_armv6, export=1
+ .if SAMPLES_PER_LOOP > 1
+ tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice
+ bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not
+ .endif
+ teq COUNT, #0
+ it eq
+ bxeq lr
+ push {v1-v6,sl,fp,lr}
+ ldr CHAN0, [sp, #(9+0)*4] // get ch_assign from stack
+ ldr CHAN4, [CHAN0]
+ .if \channels == 2
+ uxtb CHAN0, CHAN4, ror #0
+ uxtb CHAN1, CHAN4, ror #8
+ .else
+ ldr CHAN5, [CHAN0, #4]
+ .if \channels == 6
+ uxtb CHAN0, CHAN4, ror #0
+ uxtb CHAN1, CHAN4, ror #8
+ uxtb CHAN2, CHAN4, ror #16
+ uxtb CHAN3, CHAN4, ror #24
+ uxtb CHAN4, CHAN5, ror #0
+ uxtb CHAN5, CHAN5, ror #8
+ .endif
+ .endif
+ .set IDX1, \channels
+ .set IDX2, \channels
+0:
+ .rept WORDS_PER_LOOP / 4
+ output4words
+ .endr
+ subs COUNT, COUNT, #SAMPLES_PER_LOOP
+ bne 0b
+ pop {v1-v6,sl,fp,pc}
+ .ltorg
+endfunc
+ .purgem output4words
+
+ .unreq CHECK
+ .unreq COUNT
+ .unreq IN
+ .unreq OUT
+ .unreq DAT0
+ .unreq DAT1
+ .unreq DAT2
+ .unreq DAT3
+ .unreq CHAN0
+ .unreq CHAN1
+ .unreq CHAN2
+ .unreq CHAN3
+ .unreq CHAN4
+ .unreq CHAN5
+
+#endif // !CONFIG_THUMB
+
+.endif // mixed
+.endif // inorder
+.endm // implement_pack
+
+.macro pack_channels inorder, channels
+ implement_pack \inorder, \channels, 0
+ implement_pack \inorder, \channels, 1
+ implement_pack \inorder, \channels, 2
+ implement_pack \inorder, \channels, 3
+ implement_pack \inorder, \channels, 4
+ implement_pack \inorder, \channels, 5
+ implement_pack \inorder, \channels, mixed
+.endm
+
+.macro pack_order inorder
+ pack_channels \inorder, 2
+ pack_channels \inorder, 6
+ pack_channels \inorder, 8
+.endm
+
+ pack_order 0
+ pack_order 1
diff --git a/libavcodec/arm/mlpdsp_init_arm.c b/libavcodec/arm/mlpdsp_init_arm.c
index 1bb2276..10ec316 100644
--- a/libavcodec/arm/mlpdsp_init_arm.c
+++ b/libavcodec/arm/mlpdsp_init_arm.c
@@ -41,8 +41,104 @@ void ff_mlp_rematrix_channel_arm(int32_t *samples,
int access_unit_size_pow2,
int32_t mask);
+#define DECLARE_PACK(order,channels,shift) \
+ int32_t ff_mlp_pack_output_##order##order_##channels##ch_##shift##shift_armv6(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int);
+#define ENUMERATE_PACK(order,channels,shift) \
+ ff_mlp_pack_output_##order##order_##channels##ch_##shift##shift_armv6,
+#define PACK_CHANNELS(macro,order,channels) \
+ macro(order,channels,0) \
+ macro(order,channels,1) \
+ macro(order,channels,2) \
+ macro(order,channels,3) \
+ macro(order,channels,4) \
+ macro(order,channels,5) \
+ macro(order,channels,mixed)
+#define PACK_ORDER(macro,order) \
+ PACK_CHANNELS(macro,order,2) \
+ PACK_CHANNELS(macro,order,6) \
+ PACK_CHANNELS(macro,order,8)
+#define PACK_ALL(macro) \
+ PACK_ORDER(macro,outof) \
+ PACK_ORDER(macro,in)
+PACK_ALL(DECLARE_PACK)
+
+#define ff_mlp_pack_output_outoforder_2ch_mixedshift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_mixedshift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_mixedshift_armv6 0
+#if CONFIG_THUMB
+#define ff_mlp_pack_output_outoforder_2ch_0shift_armv6 0
+#define ff_mlp_pack_output_outoforder_2ch_1shift_armv6 0
+#define ff_mlp_pack_output_outoforder_2ch_2shift_armv6 0
+#define ff_mlp_pack_output_outoforder_2ch_3shift_armv6 0
+#define ff_mlp_pack_output_outoforder_2ch_4shift_armv6 0
+#define ff_mlp_pack_output_outoforder_2ch_5shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_0shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_1shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_2shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_3shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_4shift_armv6 0
+#define ff_mlp_pack_output_outoforder_6ch_5shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_0shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_1shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_2shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_3shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_4shift_armv6 0
+#define ff_mlp_pack_output_outoforder_8ch_5shift_armv6 0
+#endif
+
+static int32_t (*mlp_select_pack_output_armv6(uint8_t *ch_assign,
+ int8_t *output_shift,
+ uint8_t max_matrix_channel,
+ int is32))(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int)
+{
+ int ch_index;
+ int shift = output_shift[0] < 0 || output_shift[0] > 5 ? 6 : output_shift[0];
+ int inorder = 1;
+ static int32_t (*const routine[2*3*7])(int32_t, uint16_t, int32_t (*)[], void *, uint8_t*, int8_t *, uint8_t, int) = {
+ PACK_ALL(ENUMERATE_PACK)
+ };
+ int i;
+
+ if (!is32) // don't support 16-bit output (it's not used by TrueHD)
+ return ff_mlp_pack_output;
+
+ switch (max_matrix_channel) {
+ case 1:
+ ch_index = 0;
+ break;
+ case 5:
+ ch_index = 1;
+ break;
+ case 7:
+ ch_index = 2;
+ break;
+ default:
+ return ff_mlp_pack_output;
+ }
+
+ for (i = 0; i <= max_matrix_channel; i++) {
+ if (shift != 6 && output_shift[i] != shift)
+ shift = 6; // indicate mixed shifts
+ if (ch_assign[i] != i)
+ inorder = 0;
+ }
+#if CONFIG_THUMB
+ if (!inorder)
+ return ff_mlp_pack_output; // can't currently handle an order array except in ARM mode
+#else
+ if (shift == 6 && !inorder)
+ return ff_mlp_pack_output; // can't currently handle both an order array and a shift array
+#endif
+
+ return routine[(inorder*3+ch_index)*7+shift];
+}
+
av_cold void ff_mlpdsp_init_arm(MLPDSPContext *c)
{
+ int cpu_flags = av_get_cpu_flags();
+
c->mlp_filter_channel = ff_mlp_filter_channel_arm;
c->mlp_rematrix_channel = ff_mlp_rematrix_channel_arm;
+ if (cpu_flags & AV_CPU_FLAG_ARMV6)
+ c->mlp_select_pack_output = mlp_select_pack_output_armv6;
}
--
1.9.1