mirror of
https://github.com/LibreELEC/LibreELEC.tv.git
synced 2025-07-30 14:16:40 +00:00
Merge branch 'master' of github.com:OpenELEC/OpenELEC.tv into openelec-3.2
This commit is contained in:
commit
e009bd2206
@ -1,11 +1,14 @@
|
|||||||
## Questions about OpenELEC?
|
### Questions about OpenELEC?
|
||||||
|
|
||||||
To get your questions answered, please ask in the OpenELEC [Forum], on IRC:
|
To get your questions answered, please ask in the OpenELEC [Forum], on IRC:
|
||||||
#openelec on freenode.net, or [webchat].
|
\#openelec on freenode.net, or [webchat].
|
||||||
|
|
||||||
Do not open an issue.
|
Do not open an issue.
|
||||||
|
|
||||||
## Issue Reports
|
### Issue Reports
|
||||||
|
|
||||||
|
**BEFORE you report a bug make sure you got the latest testing version of
|
||||||
|
OpenELEC. Your bug might be already fixed.**
|
||||||
|
|
||||||
If you are at all unsure whether it's a bug in OpenELEC or a problem with
|
If you are at all unsure whether it's a bug in OpenELEC or a problem with
|
||||||
something else, post in the OpenELEC [Forum] instead. If it turns out that it is
|
something else, post in the OpenELEC [Forum] instead. If it turns out that it is
|
||||||
@ -17,8 +20,8 @@ and try to answer the following questions:
|
|||||||
- What did you expect to happen?
|
- What did you expect to happen?
|
||||||
- What happened instead?
|
- What happened instead?
|
||||||
|
|
||||||
It is also importent to provide logs for debugging.
|
**It is also importent to provide logs for debugging.
|
||||||
A zip file can be found in the [logfiles] samba share, this will contain all the logs needed.
|
A zip file can be found in the [logfiles] samba share, this will contain all the logs needed.**
|
||||||
|
|
||||||
Make sure to specify which version of OpenELEC you are using.
|
Make sure to specify which version of OpenELEC you are using.
|
||||||
- OpenELEC version
|
- OpenELEC version
|
||||||
@ -32,7 +35,7 @@ Feature requests are great, but they usually end up lying around the issue
|
|||||||
tracker indefinitely. Sending a pull request is a much better way of getting a
|
tracker indefinitely. Sending a pull request is a much better way of getting a
|
||||||
particular feature into OpenELEC.
|
particular feature into OpenELEC.
|
||||||
|
|
||||||
## Pull Requests
|
### Pull Requests
|
||||||
|
|
||||||
- **Create topic branches**. Don't ask us to pull from your master branch.
|
- **Create topic branches**. Don't ask us to pull from your master branch.
|
||||||
|
|
||||||
|
@ -1,3 +1,17 @@
|
|||||||
|
diff -Naur xbmc-pvr-addons-frodo-910d7e7/addons/Makefile.include.am xbmc-pvr-addons-frodo-910d7e7.patch/addons/Makefile.include.am
|
||||||
|
--- xbmc-pvr-addons-frodo-910d7e7/addons/Makefile.include.am 2013-09-09 01:56:00.000000000 +0200
|
||||||
|
+++ xbmc-pvr-addons-frodo-910d7e7.patch/addons/Makefile.include.am 2013-09-09 18:54:06.000000000 +0200
|
||||||
|
@@ -36,7 +36,9 @@
|
||||||
|
mkdir -m 755 -p $(DESTDIR)@LIBDIR@/$(ADDONNAME)
|
||||||
|
mkdir -m 755 -p $(DESTDIR)@DATADIR@/$(ADDONNAME)
|
||||||
|
cp -f @BINPREFIX@$(ADDONBINNAME)@BIN_EXT@ $(DESTDIR)@LIBDIR@/$(ADDONNAME) ; chmod 655 $(DESTDIR)@LIBDIR@/$(ADDONNAME)/@BINPREFIX@$(ADDONBINNAME)@BIN_EXT@
|
||||||
|
- cp -r -f @abs_top_srcdir@/addons/$(ADDONNAME)/addon/* $(DESTDIR)@DATADIR@/$(ADDONNAME) ; chmod -R o+rx $(DESTDIR)@DATADIR@/$(ADDONNAME)
|
||||||
|
+ cp -r -f @abs_top_srcdir@/addons/$(ADDONNAME)/addon/* $(DESTDIR)@DATADIR@/$(ADDONNAME)
|
||||||
|
+ cp -r -f @abs_top_builddir@/addons/$(ADDONNAME)/addon/* $(DESTDIR)@DATADIR@/$(ADDONNAME)
|
||||||
|
+ chmod -R o+rx $(DESTDIR)@DATADIR@/$(ADDONNAME)
|
||||||
|
endif
|
||||||
|
|
||||||
|
all: @BUILD_TYPE@
|
||||||
diff -Naur xbmc-pvr-addons-frodo-910d7e7/addons/pvr.argustv/Makefile.am xbmc-pvr-addons-frodo-910d7e7.patch/addons/pvr.argustv/Makefile.am
|
diff -Naur xbmc-pvr-addons-frodo-910d7e7/addons/pvr.argustv/Makefile.am xbmc-pvr-addons-frodo-910d7e7.patch/addons/pvr.argustv/Makefile.am
|
||||||
--- xbmc-pvr-addons-frodo-910d7e7/addons/pvr.argustv/Makefile.am 2013-09-09 01:56:00.000000000 +0200
|
--- xbmc-pvr-addons-frodo-910d7e7/addons/pvr.argustv/Makefile.am 2013-09-09 01:56:00.000000000 +0200
|
||||||
+++ xbmc-pvr-addons-frodo-910d7e7.patch/addons/pvr.argustv/Makefile.am 2013-09-09 03:48:49.756563520 +0200
|
+++ xbmc-pvr-addons-frodo-910d7e7.patch/addons/pvr.argustv/Makefile.am 2013-09-09 03:48:49.756563520 +0200
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
PKG_NAME="xbmc-theme-Confluence"
|
PKG_NAME="xbmc-theme-Confluence"
|
||||||
PKG_VERSION="12.2-58a9d9e"
|
PKG_VERSION="12.2-58a9d9e"
|
||||||
if [ "$XBMC" = "master" ]; then
|
if [ "$XBMC" = "master" ]; then
|
||||||
PKG_VERSION="13.alpha-dcd897b"
|
PKG_VERSION="13.alpha-2ef8929"
|
||||||
elif [ "$XBMC" = "xbmc-aml" ]; then
|
elif [ "$XBMC" = "xbmc-aml" ]; then
|
||||||
PKG_VERSION="aml-frodo-d9119f2"
|
PKG_VERSION="aml-frodo-d9119f2"
|
||||||
fi
|
fi
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
PKG_NAME="xbmc"
|
PKG_NAME="xbmc"
|
||||||
PKG_VERSION="12.2-58a9d9e"
|
PKG_VERSION="12.2-58a9d9e"
|
||||||
if [ "$XBMC" = "master" ]; then
|
if [ "$XBMC" = "master" ]; then
|
||||||
PKG_VERSION="13.alpha-dcd897b"
|
PKG_VERSION="13.alpha-2ef8929"
|
||||||
elif [ "$XBMC" = "xbmc-aml" ]; then
|
elif [ "$XBMC" = "xbmc-aml" ]; then
|
||||||
PKG_VERSION="aml-frodo-d9119f2"
|
PKG_VERSION="aml-frodo-d9119f2"
|
||||||
fi
|
fi
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,290 +0,0 @@
|
|||||||
From 4bdcd85735abd6e6948f2842501d2d949f57b405 Mon Sep 17 00:00:00 2001
|
|
||||||
From: popcornmix <popcornmix@gmail.com>
|
|
||||||
Date: Wed, 21 Aug 2013 23:48:56 +0100
|
|
||||||
Subject: [PATCH] [rbp/omxplayer] Avoid too many calls to GPU
|
|
||||||
|
|
||||||
We currently read the media time once per packet received from demuxer to determine gpu underrun.
|
|
||||||
We've found that TrueHD audio in particular produces ~1000 packets per second (whether it is the active track or not).
|
|
||||||
The cost of reading media time (from gpu) is high enough that 1000 calls per second makes us fail to keep up.
|
|
||||||
So, cache the media time, and only read it at most 50 times per second.
|
|
||||||
---
|
|
||||||
xbmc/cores/omxplayer/OMXPlayer.cpp | 209 +++++++++++++++++++------------------
|
|
||||||
xbmc/cores/omxplayer/OMXPlayer.h | 2 +
|
|
||||||
2 files changed, 110 insertions(+), 101 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/xbmc/cores/omxplayer/OMXPlayer.cpp b/xbmc/cores/omxplayer/OMXPlayer.cpp
|
|
||||||
index 6c13e35..12122e5 100644
|
|
||||||
--- a/xbmc/cores/omxplayer/OMXPlayer.cpp
|
|
||||||
+++ b/xbmc/cores/omxplayer/OMXPlayer.cpp
|
|
||||||
@@ -474,6 +474,8 @@ void COMXSelectionStreams::Update(CDVDInputStream* input, CDVDDemux* demuxer)
|
|
||||||
m_stepped = false;
|
|
||||||
m_video_fifo = 0;
|
|
||||||
m_audio_fifo = 0;
|
|
||||||
+ m_last_check_time = 0.0;
|
|
||||||
+ m_stamp = 0.0;
|
|
||||||
|
|
||||||
memset(&m_SpeedState, 0, sizeof(m_SpeedState));
|
|
||||||
|
|
||||||
@@ -1114,117 +1116,121 @@ void COMXPlayer::Process()
|
|
||||||
|
|
||||||
while (!m_bAbortRequest)
|
|
||||||
{
|
|
||||||
- const bool m_Pause = m_playSpeed == DVD_PLAYSPEED_PAUSE;
|
|
||||||
- const bool not_accepts_data = (!m_omxPlayerAudio.AcceptsData() && m_CurrentAudio.id >= 0) ||
|
|
||||||
- (!m_omxPlayerVideo.AcceptsData() && m_CurrentVideo.id >= 0);
|
|
||||||
- /* when the video/audio fifos are low, we pause clock, when high we resume */
|
|
||||||
- double stamp = m_av_clock.OMXMediaTime();
|
|
||||||
- double audio_pts = floor(m_omxPlayerAudio.GetCurrentPts());
|
|
||||||
- double video_pts = floor(m_omxPlayerVideo.GetCurrentPts());
|
|
||||||
-
|
|
||||||
- float audio_fifo = audio_pts / DVD_TIME_BASE - stamp * 1e-6;
|
|
||||||
- float video_fifo = video_pts / DVD_TIME_BASE - stamp * 1e-6;
|
|
||||||
- float threshold = 0.1f;
|
|
||||||
- bool audio_fifo_low = false, video_fifo_low = false, audio_fifo_high = false, video_fifo_high = false;
|
|
||||||
-
|
|
||||||
- // if deinterlace setting has changed, we should close and open video
|
|
||||||
- if (current_deinterlace != CMediaSettings::Get().GetCurrentVideoSettings().m_DeinterlaceMode)
|
|
||||||
+ double now = m_clock.GetAbsoluteClock();
|
|
||||||
+ if (m_last_check_time == 0.0 || m_last_check_time + DVD_MSEC_TO_TIME(20) <= now)
|
|
||||||
{
|
|
||||||
- int iStream = m_CurrentVideo.id, source = m_CurrentVideo.source;
|
|
||||||
- CloseVideoStream(false);
|
|
||||||
- OpenVideoStream(iStream, source);
|
|
||||||
- if (m_State.canseek)
|
|
||||||
- m_messenger.Put(new CDVDMsgPlayerSeek(GetTime(), true, true, true, true, true));
|
|
||||||
- current_deinterlace = CMediaSettings::Get().GetCurrentVideoSettings().m_DeinterlaceMode;
|
|
||||||
- }
|
|
||||||
+ m_last_check_time = now;
|
|
||||||
+ m_stamp = m_av_clock.OMXMediaTime();
|
|
||||||
+ const bool m_Pause = m_playSpeed == DVD_PLAYSPEED_PAUSE;
|
|
||||||
+ const bool not_accepts_data = (!m_omxPlayerAudio.AcceptsData() && m_CurrentAudio.id >= 0) ||
|
|
||||||
+ (!m_omxPlayerVideo.AcceptsData() && m_CurrentVideo.id >= 0);
|
|
||||||
+ /* when the video/audio fifos are low, we pause clock, when high we resume */
|
|
||||||
+ double audio_pts = floor(m_omxPlayerAudio.GetCurrentPts());
|
|
||||||
+ double video_pts = floor(m_omxPlayerVideo.GetCurrentPts());
|
|
||||||
+
|
|
||||||
+ float audio_fifo = audio_pts / DVD_TIME_BASE - m_stamp * 1e-6;
|
|
||||||
+ float video_fifo = video_pts / DVD_TIME_BASE - m_stamp * 1e-6;
|
|
||||||
+ float threshold = 0.1f;
|
|
||||||
+ bool audio_fifo_low = false, video_fifo_low = false, audio_fifo_high = false, video_fifo_high = false;
|
|
||||||
+
|
|
||||||
+ // if deinterlace setting has changed, we should close and open video
|
|
||||||
+ if (current_deinterlace != CMediaSettings::Get().GetCurrentVideoSettings().m_DeinterlaceMode)
|
|
||||||
+ {
|
|
||||||
+ int iStream = m_CurrentVideo.id, source = m_CurrentVideo.source;
|
|
||||||
+ CloseVideoStream(false);
|
|
||||||
+ OpenVideoStream(iStream, source);
|
|
||||||
+ if (m_State.canseek)
|
|
||||||
+ m_messenger.Put(new CDVDMsgPlayerSeek(GetTime(), true, true, true, true, true));
|
|
||||||
+ current_deinterlace = CMediaSettings::Get().GetCurrentVideoSettings().m_DeinterlaceMode;
|
|
||||||
+ }
|
|
||||||
|
|
||||||
- m_video_fifo = (int)(100.0*(m_omxPlayerVideo.GetDecoderBufferSize()-m_omxPlayerVideo.GetDecoderFreeSpace())/m_omxPlayerVideo.GetDecoderBufferSize());
|
|
||||||
- m_audio_fifo = (int)(100.0*audio_fifo/m_omxPlayerAudio.GetCacheTotal());
|
|
||||||
+ m_video_fifo = (int)(100.0*(m_omxPlayerVideo.GetDecoderBufferSize()-m_omxPlayerVideo.GetDecoderFreeSpace())/m_omxPlayerVideo.GetDecoderBufferSize());
|
|
||||||
+ m_audio_fifo = (int)(100.0*audio_fifo/m_omxPlayerAudio.GetCacheTotal());
|
|
||||||
|
|
||||||
- #ifdef _DEBUG
|
|
||||||
- static unsigned count;
|
|
||||||
- if ((count++ & 15) == 0)
|
|
||||||
- {
|
|
||||||
- char response[80];
|
|
||||||
- if (m_omxPlayerVideo.GetDecoderBufferSize() && m_omxPlayerAudio.GetCacheTotal())
|
|
||||||
- vc_gencmd(response, sizeof response, "render_bar 4 video_fifo %d %d %d %d",
|
|
||||||
- m_video_fifo,
|
|
||||||
- (int)(100.0*video_fifo/m_omxPlayerAudio.GetCacheTotal()),
|
|
||||||
- 0, 100);
|
|
||||||
- if (m_omxPlayerAudio.GetCacheTotal())
|
|
||||||
- vc_gencmd(response, sizeof response, "render_bar 5 audio_fifo %d %d %d %d",
|
|
||||||
- m_audio_fifo,
|
|
||||||
- (int)(100.0*m_omxPlayerAudio.GetDelay()/m_omxPlayerAudio.GetCacheTotal()),
|
|
||||||
- 0, 100);
|
|
||||||
- vc_gencmd(response, sizeof response, "render_bar 6 video_queue %d %d %d %d",
|
|
||||||
- m_omxPlayerVideo.GetLevel(), 0, 0, 100);
|
|
||||||
- vc_gencmd(response, sizeof response, "render_bar 7 audio_queue %d %d %d %d",
|
|
||||||
- m_omxPlayerAudio.GetLevel(), 0, 0, 100);
|
|
||||||
- }
|
|
||||||
- #endif
|
|
||||||
- if (audio_pts != DVD_NOPTS_VALUE)
|
|
||||||
- {
|
|
||||||
- audio_fifo_low = m_HasAudio && audio_fifo < threshold;
|
|
||||||
- audio_fifo_high = audio_pts != DVD_NOPTS_VALUE && audio_fifo >= m_threshold;
|
|
||||||
- }
|
|
||||||
- if (video_pts != DVD_NOPTS_VALUE)
|
|
||||||
- {
|
|
||||||
- video_fifo_low = m_HasVideo && video_fifo < threshold;
|
|
||||||
- video_fifo_high = video_pts != DVD_NOPTS_VALUE && video_fifo >= m_threshold;
|
|
||||||
- }
|
|
||||||
- if (!m_HasAudio && m_HasVideo)
|
|
||||||
- audio_fifo_high = true;
|
|
||||||
- if (!m_HasVideo && m_HasAudio)
|
|
||||||
- video_fifo_high = true;
|
|
||||||
-
|
|
||||||
- #ifdef _DEBUG
|
|
||||||
- CLog::Log(LOGDEBUG, "%s - M:%.6f-%.6f (A:%.6f V:%.6f) PEF:%d%d%d S:%.2f A:%.2f V:%.2f/T:%.2f (A:%d%d V:%d%d) A:%d%% V:%d%% (%.2f,%.2f)", __FUNCTION__,
|
|
||||||
- stamp*1e-6, m_av_clock.OMXClockAdjustment()*1e-6, audio_pts*1e-6, video_pts*1e-6, m_av_clock.OMXIsPaused(), bOmxSentEOFs, not_accepts_data, m_playSpeed * (1.0f/DVD_PLAYSPEED_NORMAL),
|
|
||||||
- audio_pts == DVD_NOPTS_VALUE ? 0.0:audio_fifo, video_pts == DVD_NOPTS_VALUE ? 0.0:video_fifo, m_threshold,
|
|
||||||
- audio_fifo_low, audio_fifo_high, video_fifo_low, video_fifo_high,
|
|
||||||
- m_omxPlayerAudio.GetLevel(), m_omxPlayerVideo.GetLevel(), m_omxPlayerAudio.GetDelay(), (float)m_omxPlayerAudio.GetCacheTotal());
|
|
||||||
- #endif
|
|
||||||
-
|
|
||||||
- if (TP(m_playSpeed))
|
|
||||||
- {
|
|
||||||
- if (m_CurrentVideo.started)
|
|
||||||
+ #ifdef _DEBUG
|
|
||||||
+ static unsigned count;
|
|
||||||
+ if ((count++ & 7) == 0)
|
|
||||||
{
|
|
||||||
- if (stamp == 0.0)
|
|
||||||
- {
|
|
||||||
- /* trickplay modes progress by stepping */
|
|
||||||
- CLog::Log(LOGDEBUG, "COMXPlayer::Process - Seeking step speed:%.2f last:%.2f v:%.2f", (double)m_playSpeed / DVD_PLAYSPEED_NORMAL, m_SpeedState.lastpts*1e-6, video_pts*1e-6);
|
|
||||||
- m_av_clock.OMXStep();
|
|
||||||
- }
|
|
||||||
- else
|
|
||||||
+ char response[80];
|
|
||||||
+ if (m_omxPlayerVideo.GetDecoderBufferSize() && m_omxPlayerAudio.GetCacheTotal())
|
|
||||||
+ vc_gencmd(response, sizeof response, "render_bar 4 video_fifo %d %d %d %d",
|
|
||||||
+ m_video_fifo,
|
|
||||||
+ (int)(100.0*video_fifo/m_omxPlayerAudio.GetCacheTotal()),
|
|
||||||
+ 0, 100);
|
|
||||||
+ if (m_omxPlayerAudio.GetCacheTotal())
|
|
||||||
+ vc_gencmd(response, sizeof response, "render_bar 5 audio_fifo %d %d %d %d",
|
|
||||||
+ m_audio_fifo,
|
|
||||||
+ (int)(100.0*m_omxPlayerAudio.GetDelay()/m_omxPlayerAudio.GetCacheTotal()),
|
|
||||||
+ 0, 100);
|
|
||||||
+ vc_gencmd(response, sizeof response, "render_bar 6 video_queue %d %d %d %d",
|
|
||||||
+ m_omxPlayerVideo.GetLevel(), 0, 0, 100);
|
|
||||||
+ vc_gencmd(response, sizeof response, "render_bar 7 audio_queue %d %d %d %d",
|
|
||||||
+ m_omxPlayerAudio.GetLevel(), 0, 0, 100);
|
|
||||||
+ }
|
|
||||||
+ #endif
|
|
||||||
+ if (audio_pts != DVD_NOPTS_VALUE)
|
|
||||||
+ {
|
|
||||||
+ audio_fifo_low = m_HasAudio && audio_fifo < threshold;
|
|
||||||
+ audio_fifo_high = audio_pts != DVD_NOPTS_VALUE && audio_fifo >= m_threshold;
|
|
||||||
+ }
|
|
||||||
+ if (video_pts != DVD_NOPTS_VALUE)
|
|
||||||
+ {
|
|
||||||
+ video_fifo_low = m_HasVideo && video_fifo < threshold;
|
|
||||||
+ video_fifo_high = video_pts != DVD_NOPTS_VALUE && video_fifo >= m_threshold;
|
|
||||||
+ }
|
|
||||||
+ if (!m_HasAudio && m_HasVideo)
|
|
||||||
+ audio_fifo_high = true;
|
|
||||||
+ if (!m_HasVideo && m_HasAudio)
|
|
||||||
+ video_fifo_high = true;
|
|
||||||
+
|
|
||||||
+ #ifdef _DEBUG
|
|
||||||
+ CLog::Log(LOGDEBUG, "%s - M:%.6f-%.6f (A:%.6f V:%.6f) PEF:%d%d%d S:%.2f A:%.2f V:%.2f/T:%.2f (A:%d%d V:%d%d) A:%d%% V:%d%% (%.2f,%.2f)", __FUNCTION__,
|
|
||||||
+ m_stamp*1e-6, m_av_clock.OMXClockAdjustment()*1e-6, audio_pts*1e-6, video_pts*1e-6, m_av_clock.OMXIsPaused(), bOmxSentEOFs, not_accepts_data, m_playSpeed * (1.0f/DVD_PLAYSPEED_NORMAL),
|
|
||||||
+ audio_pts == DVD_NOPTS_VALUE ? 0.0:audio_fifo, video_pts == DVD_NOPTS_VALUE ? 0.0:video_fifo, m_threshold,
|
|
||||||
+ audio_fifo_low, audio_fifo_high, video_fifo_low, video_fifo_high,
|
|
||||||
+ m_omxPlayerAudio.GetLevel(), m_omxPlayerVideo.GetLevel(), m_omxPlayerAudio.GetDelay(), (float)m_omxPlayerAudio.GetCacheTotal());
|
|
||||||
+ #endif
|
|
||||||
+
|
|
||||||
+ if (TP(m_playSpeed))
|
|
||||||
+ {
|
|
||||||
+ if (m_CurrentVideo.started)
|
|
||||||
{
|
|
||||||
- m_av_clock.OMXMediaTime(0.0);
|
|
||||||
- m_stepped = true;
|
|
||||||
+ if (m_stamp == 0.0 && (!m_stepped || m_playSpeed > 0))
|
|
||||||
+ {
|
|
||||||
+ /* trickplay modes progress by stepping */
|
|
||||||
+ CLog::Log(LOGDEBUG, "COMXPlayer::Process - Seeking step speed:%.2f last:%.2f v:%.2f", (double)m_playSpeed / DVD_PLAYSPEED_NORMAL, m_SpeedState.lastpts*1e-6, video_pts*1e-6);
|
|
||||||
+ m_av_clock.OMXStep();
|
|
||||||
+ }
|
|
||||||
+ else
|
|
||||||
+ {
|
|
||||||
+ m_av_clock.OMXMediaTime(0.0);
|
|
||||||
+ m_last_check_time = 0.0;
|
|
||||||
+ m_stepped = true;
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
- }
|
|
||||||
- else if(!m_Pause && (bOmxSentEOFs || not_accepts_data || (audio_fifo_high && video_fifo_high)))
|
|
||||||
- {
|
|
||||||
- if (m_av_clock.OMXIsPaused())
|
|
||||||
+ else if(!m_Pause && (bOmxSentEOFs || not_accepts_data || (audio_fifo_high && video_fifo_high)))
|
|
||||||
{
|
|
||||||
- CLog::Log(LOGDEBUG, "Resume %.2f,%.2f (A:%d%d V:%d%d) EOF:%d FULL:%d T:%.2f\n", audio_fifo, video_fifo,
|
|
||||||
- audio_fifo_low, audio_fifo_high, video_fifo_low, video_fifo_high, bOmxSentEOFs, not_accepts_data, m_threshold);
|
|
||||||
- m_av_clock.OMXStateExecute();
|
|
||||||
- m_av_clock.OMXResume();
|
|
||||||
+ if (m_av_clock.OMXIsPaused())
|
|
||||||
+ {
|
|
||||||
+ CLog::Log(LOGDEBUG, "Resume %.2f,%.2f (A:%d%d V:%d%d) EOF:%d FULL:%d T:%.2f\n", audio_fifo, video_fifo,
|
|
||||||
+ audio_fifo_low, audio_fifo_high, video_fifo_low, video_fifo_high, bOmxSentEOFs, not_accepts_data, m_threshold);
|
|
||||||
+ m_av_clock.OMXStateExecute();
|
|
||||||
+ m_av_clock.OMXResume();
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
- }
|
|
||||||
- else if (m_Pause || audio_fifo_low || video_fifo_low)
|
|
||||||
- {
|
|
||||||
- if (!m_av_clock.OMXIsPaused() && !TPA(m_playSpeed))
|
|
||||||
+ else if (m_Pause || audio_fifo_low || video_fifo_low)
|
|
||||||
{
|
|
||||||
- if (!m_Pause)
|
|
||||||
- m_threshold = std::min(2.0f*m_threshold, 16.0f);
|
|
||||||
- CLog::Log(LOGDEBUG, "Pause %.2f,%.2f (A:%d%d V:%d%d) EOF:%d FULL:%d T:%.2f\n", audio_fifo, video_fifo,
|
|
||||||
- audio_fifo_low, audio_fifo_high, video_fifo_low, video_fifo_high, bOmxSentEOFs, not_accepts_data, m_threshold);
|
|
||||||
- m_av_clock.OMXPause();
|
|
||||||
+ if (!m_av_clock.OMXIsPaused() && !TPA(m_playSpeed))
|
|
||||||
+ {
|
|
||||||
+ if (!m_Pause)
|
|
||||||
+ m_threshold = std::min(2.0f*m_threshold, 16.0f);
|
|
||||||
+ CLog::Log(LOGDEBUG, "Pause %.2f,%.2f (A:%d%d V:%d%d) EOF:%d FULL:%d T:%.2f\n", audio_fifo, video_fifo,
|
|
||||||
+ audio_fifo_low, audio_fifo_high, video_fifo_low, video_fifo_high, bOmxSentEOFs, not_accepts_data, m_threshold);
|
|
||||||
+ m_av_clock.OMXPause();
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
-
|
|
||||||
- // handle messages send to this thread, like seek or demuxer reset requests
|
|
||||||
HandleMessages();
|
|
||||||
|
|
||||||
if(m_bAbortRequest)
|
|
||||||
@@ -2294,6 +2300,7 @@ void COMXPlayer::HandleMessages()
|
|
||||||
FlushBuffers(!msg.GetFlush(), start, msg.GetAccurate());
|
|
||||||
// mark mediatime as invalid
|
|
||||||
m_av_clock.OMXMediaTime(0.0);
|
|
||||||
+ m_last_check_time = 0.0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
CLog::Log(LOGWARNING, "error while seeking");
|
|
||||||
@@ -3044,7 +3051,7 @@ int64_t COMXPlayer::GetTime()
|
|
||||||
if(offset > limit) offset = limit;
|
|
||||||
if(offset < -limit) offset = -limit;
|
|
||||||
}
|
|
||||||
- //{CLog::Log(LOGINFO, "%s: time:%.2f stamp:%.2f dts:%d m:%d (p:%d,c:%d) =%llu", __func__, (double)m_State.time, (double)m_State.timestamp, (int)DVD_TIME_TO_MSEC(m_State.dts + m_offset_pts), (int)DVD_TIME_TO_MSEC(m_av_clock.OMXMediaTime()), (int)m_playSpeed, (int)m_caching, llrint(m_State.time + DVD_TIME_TO_MSEC(offset)));}
|
|
||||||
+ //{CLog::Log(LOGINFO, "%s: time:%.2f stamp:%.2f dts:%d m:%d (p:%d,c:%d) =%llu", __func__, (double)m_State.time, (double)m_State.timestamp, (int)DVD_TIME_TO_MSEC(m_State.dts + m_offset_pts), (int)DVD_TIME_TO_MSEC(m_stamp), (int)m_playSpeed, (int)m_caching, llrint(m_State.time + DVD_TIME_TO_MSEC(offset)));}
|
|
||||||
return llrint(m_State.time + DVD_TIME_TO_MSEC(offset));
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -4300,7 +4307,7 @@ void COMXPlayer::UpdatePlayState(double timeout)
|
|
||||||
state.cache_bytes = 0;
|
|
||||||
|
|
||||||
state.timestamp = m_clock.GetAbsoluteClock();
|
|
||||||
- //{CLog::Log(LOGINFO, "%s: time:%.2f stamp:%.2f dts:%d m:%d (p:%d,c:%d) =%llu", __func__, (double)state.time, (double)state.timestamp, (int)DVD_TIME_TO_MSEC(state.dts + m_offset_pts), (int)DVD_TIME_TO_MSEC(m_av_clock.OMXMediaTime()), (int)m_playSpeed, (int)m_caching, llrint(state.time + DVD_TIME_TO_MSEC(offset)));}
|
|
||||||
+ //{CLog::Log(LOGINFO, "%s: time:%.2f stamp:%.2f dts:%d m:%d (p:%d,c:%d) =%llu", __func__, (double)state.time, (double)state.timestamp, (int)DVD_TIME_TO_MSEC(state.dts + m_offset_pts), (int)DVD_TIME_TO_MSEC(m_stamp), (int)m_playSpeed, (int)m_caching, llrint(state.time + DVD_TIME_TO_MSEC(offset)));}
|
|
||||||
|
|
||||||
CSingleLock lock(m_StateSection);
|
|
||||||
m_StateInput = state;
|
|
||||||
diff --git a/xbmc/cores/omxplayer/OMXPlayer.h b/xbmc/cores/omxplayer/OMXPlayer.h
|
|
||||||
index 6f0c148..f0e5216 100644
|
|
||||||
--- a/xbmc/cores/omxplayer/OMXPlayer.h
|
|
||||||
+++ b/xbmc/cores/omxplayer/OMXPlayer.h
|
|
||||||
@@ -378,6 +378,8 @@ class COMXPlayer : public IPlayer, public CThread, public IDVDPlayer
|
|
||||||
bool m_stepped;
|
|
||||||
int m_video_fifo;
|
|
||||||
int m_audio_fifo;
|
|
||||||
+ double m_last_check_time; // we periodically check for gpu underrun
|
|
||||||
+ double m_stamp; // last media stamp
|
|
||||||
|
|
||||||
CDVDOverlayContainer m_overlayContainer;
|
|
||||||
|
|
||||||
--
|
|
||||||
1.8.4
|
|
||||||
|
|
@ -1,256 +0,0 @@
|
|||||||
From 9d6e3524ec14401961f64376f01eaca596ae39da Mon Sep 17 00:00:00 2001
|
|
||||||
From: popcornmix <popcornmix@gmail.com>
|
|
||||||
Date: Wed, 28 Aug 2013 23:24:51 +0100
|
|
||||||
Subject: [PATCH] [rbp/omxplayer] Avoid audio codec when in passthrough modes
|
|
||||||
|
|
||||||
There is no need to open the ffmpeg audio codec when in passthrough mode.
|
|
||||||
Also there is no need to close/open the OMXAudio component when parameters (like number of channels) change in passthough.
|
|
||||||
This avoids an unpleasant stutter
|
|
||||||
---
|
|
||||||
xbmc/cores/omxplayer/OMXPlayerAudio.cpp | 137 ++++++++++++--------------------
|
|
||||||
xbmc/cores/omxplayer/OMXPlayerAudio.h | 1 -
|
|
||||||
2 files changed, 53 insertions(+), 85 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/xbmc/cores/omxplayer/OMXPlayerAudio.cpp b/xbmc/cores/omxplayer/OMXPlayerAudio.cpp
|
|
||||||
index 4121300..ec60dce 100644
|
|
||||||
--- a/xbmc/cores/omxplayer/OMXPlayerAudio.cpp
|
|
||||||
+++ b/xbmc/cores/omxplayer/OMXPlayerAudio.cpp
|
|
||||||
@@ -104,22 +104,11 @@ bool OMXPlayerAudio::OpenStream(CDVDStreamInfo &hints)
|
|
||||||
if(!m_DllBcmHost.Load())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
- m_bad_state = false;
|
|
||||||
-
|
|
||||||
- COMXAudioCodecOMX *codec = new COMXAudioCodecOMX();
|
|
||||||
-
|
|
||||||
- if(!codec || !codec->Open(hints))
|
|
||||||
- {
|
|
||||||
- CLog::Log(LOGERROR, "Unsupported audio codec");
|
|
||||||
- delete codec; codec = NULL;
|
|
||||||
- return false;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
if(m_messageQueue.IsInited())
|
|
||||||
- m_messageQueue.Put(new COMXMsgAudioCodecChange(hints, codec), 0);
|
|
||||||
+ m_messageQueue.Put(new COMXMsgAudioCodecChange(hints, NULL), 0);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
- OpenStream(hints, codec);
|
|
||||||
+ OpenStream(hints, NULL);
|
|
||||||
m_messageQueue.Init();
|
|
||||||
CLog::Log(LOGNOTICE, "Creating audio thread");
|
|
||||||
Create();
|
|
||||||
@@ -128,30 +117,56 @@ bool OMXPlayerAudio::OpenStream(CDVDStreamInfo &hints)
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
-void OMXPlayerAudio::OpenStream(CDVDStreamInfo &hints, COMXAudioCodecOMX *codec)
|
|
||||||
+void OMXPlayerAudio::OpenStream(CDVDStreamInfo &hints, COMXAudioCodecOMX *dummy)
|
|
||||||
{
|
|
||||||
- SAFE_DELETE(m_pAudioCodec);
|
|
||||||
+ bool codec_change = false;
|
|
||||||
+
|
|
||||||
+ m_bad_state = false;
|
|
||||||
+ m_use_passthrough = (CSettings::Get().GetInt("audiooutput.mode") == AUDIO_HDMI &&
|
|
||||||
+ !CSettings::Get().GetBool("audiooutput.dualaudio")) ? true : false ;
|
|
||||||
+ m_use_hw_decode = g_advancedSettings.m_omxHWAudioDecode;
|
|
||||||
+ m_format.m_dataFormat = GetDataFormat(hints);
|
|
||||||
+
|
|
||||||
+ if (m_hints.codec != hints.codec || m_hints.samplerate != hints.samplerate || !m_passthrough )
|
|
||||||
+ codec_change = true;
|
|
||||||
+
|
|
||||||
+ if (codec_change)
|
|
||||||
+ {
|
|
||||||
+ delete m_pAudioCodec;
|
|
||||||
+ m_pAudioCodec = NULL;
|
|
||||||
+
|
|
||||||
+ m_format.m_sampleRate = 0;
|
|
||||||
+ m_format.m_channelLayout = 0;
|
|
||||||
+ m_speed = DVD_PLAYSPEED_NORMAL;
|
|
||||||
+ m_audioClock = DVD_NOPTS_VALUE;
|
|
||||||
+ m_hw_decode = false;
|
|
||||||
+ m_silence = false;
|
|
||||||
+ m_started = false;
|
|
||||||
+ m_flush = false;
|
|
||||||
+ m_nChannels = 0;
|
|
||||||
+ m_stalled = m_messageQueue.GetPacketCount(CDVDMsg::DEMUXER_PACKET) == 0;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (!m_passthrough && !m_pAudioCodec)
|
|
||||||
+ {
|
|
||||||
+ m_pAudioCodec = new COMXAudioCodecOMX();
|
|
||||||
+
|
|
||||||
+ if(!m_pAudioCodec || !m_pAudioCodec->Open(hints))
|
|
||||||
+ {
|
|
||||||
+ CLog::Log(LOGERROR, "Unsupported audio codec");
|
|
||||||
+ delete m_pAudioCodec; m_pAudioCodec = NULL;
|
|
||||||
+ m_bad_state = true;
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
|
|
||||||
m_hints = hints;
|
|
||||||
- m_pAudioCodec = codec;
|
|
||||||
|
|
||||||
if(m_hints.bitspersample == 0)
|
|
||||||
m_hints.bitspersample = 16;
|
|
||||||
|
|
||||||
- m_speed = DVD_PLAYSPEED_NORMAL;
|
|
||||||
- m_audioClock = DVD_NOPTS_VALUE;
|
|
||||||
- m_hw_decode = false;
|
|
||||||
- m_silence = false;
|
|
||||||
- m_started = false;
|
|
||||||
- m_flush = false;
|
|
||||||
- m_nChannels = 0;
|
|
||||||
- m_stalled = m_messageQueue.GetPacketCount(CDVDMsg::DEMUXER_PACKET) == 0;
|
|
||||||
- m_use_passthrough = (CSettings::Get().GetInt("audiooutput.mode") == AUDIO_HDMI &&
|
|
||||||
- !CSettings::Get().GetBool("audiooutput.dualaudio")) ? true : false ;
|
|
||||||
- m_use_hw_decode = g_advancedSettings.m_omxHWAudioDecode;
|
|
||||||
- m_format.m_dataFormat = GetDataFormat(m_hints);
|
|
||||||
- m_format.m_sampleRate = 0;
|
|
||||||
- m_format.m_channelLayout = 0;
|
|
||||||
+ if (codec_change)
|
|
||||||
+ m_DecoderOpen = OpenDecoder();
|
|
||||||
}
|
|
||||||
|
|
||||||
bool OMXPlayerAudio::CloseStream(bool bWaitForBuffers)
|
|
||||||
@@ -190,38 +205,9 @@ void OMXPlayerAudio::OnExit()
|
|
||||||
CLog::Log(LOGNOTICE, "thread end: OMXPlayerAudio::OnExit()");
|
|
||||||
}
|
|
||||||
|
|
||||||
-bool OMXPlayerAudio::CodecChange()
|
|
||||||
-{
|
|
||||||
- unsigned int old_bitrate = m_hints.bitrate;
|
|
||||||
- unsigned int new_bitrate = m_hints_current.bitrate;
|
|
||||||
-
|
|
||||||
- if(m_pAudioCodec)
|
|
||||||
- {
|
|
||||||
- m_hints.channels = m_pAudioCodec->GetChannels();
|
|
||||||
- m_hints.samplerate = m_pAudioCodec->GetSampleRate();
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- /* only check bitrate changes on AV_CODEC_ID_DTS, AV_CODEC_ID_AC3, AV_CODEC_ID_EAC3 */
|
|
||||||
- if(m_hints.codec != AV_CODEC_ID_DTS && m_hints.codec != AV_CODEC_ID_AC3 && m_hints.codec != AV_CODEC_ID_EAC3)
|
|
||||||
- new_bitrate = old_bitrate = 0;
|
|
||||||
-
|
|
||||||
- if(m_hints_current.codec != m_hints.codec ||
|
|
||||||
- m_hints_current.channels != m_hints.channels ||
|
|
||||||
- m_hints_current.samplerate != m_hints.samplerate ||
|
|
||||||
- m_hints_current.bitspersample != m_hints.bitspersample ||
|
|
||||||
- old_bitrate != new_bitrate ||
|
|
||||||
- !m_DecoderOpen)
|
|
||||||
- {
|
|
||||||
- m_hints_current = m_hints;
|
|
||||||
- return true;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- return false;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
bool OMXPlayerAudio::Decode(DemuxPacket *pkt, bool bDropPacket)
|
|
||||||
{
|
|
||||||
- if(!pkt || m_bad_state || !m_pAudioCodec)
|
|
||||||
+ if(!pkt || m_bad_state)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if(pkt->dts != DVD_NOPTS_VALUE)
|
|
||||||
@@ -230,7 +216,7 @@ bool OMXPlayerAudio::Decode(DemuxPacket *pkt, bool bDropPacket)
|
|
||||||
const uint8_t *data_dec = pkt->pData;
|
|
||||||
int data_len = pkt->iSize;
|
|
||||||
|
|
||||||
- if(!OMX_IS_RAW(m_format.m_dataFormat) && !bDropPacket)
|
|
||||||
+ if(m_pAudioCodec && !OMX_IS_RAW(m_format.m_dataFormat) && !bDropPacket)
|
|
||||||
{
|
|
||||||
while(!m_bStop && data_len > 0)
|
|
||||||
{
|
|
||||||
@@ -254,13 +240,6 @@ bool OMXPlayerAudio::Decode(DemuxPacket *pkt, bool bDropPacket)
|
|
||||||
|
|
||||||
m_audioStats.AddSampleBytes(decoded_size);
|
|
||||||
|
|
||||||
- if(CodecChange())
|
|
||||||
- {
|
|
||||||
- m_DecoderOpen = OpenDecoder();
|
|
||||||
- if(!m_DecoderOpen)
|
|
||||||
- return false;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
while(!m_bStop)
|
|
||||||
{
|
|
||||||
// discard if flushing as clocks may be stopped and we'll never submit it
|
|
||||||
@@ -292,15 +271,8 @@ bool OMXPlayerAudio::Decode(DemuxPacket *pkt, bool bDropPacket)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
- else if(!bDropPacket)
|
|
||||||
+ else if(OMX_IS_RAW(m_format.m_dataFormat) && !bDropPacket)
|
|
||||||
{
|
|
||||||
- if(CodecChange())
|
|
||||||
- {
|
|
||||||
- m_DecoderOpen = OpenDecoder();
|
|
||||||
- if(!m_DecoderOpen)
|
|
||||||
- return false;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
while(!m_bStop)
|
|
||||||
{
|
|
||||||
if(m_flush)
|
|
||||||
@@ -473,6 +445,7 @@ void OMXPlayerAudio::Process()
|
|
||||||
else if (pMsg->IsType(CDVDMsg::GENERAL_STREAMCHANGE))
|
|
||||||
{
|
|
||||||
COMXMsgAudioCodecChange* msg(static_cast<COMXMsgAudioCodecChange*>(pMsg));
|
|
||||||
+ CLog::Log(LOGDEBUG, "COMXPlayerAudio - CDVDMsg::GENERAL_STREAMCHANGE");
|
|
||||||
OpenStream(msg->m_hints, msg->m_codec);
|
|
||||||
msg->m_codec = NULL;
|
|
||||||
}
|
|
||||||
@@ -520,7 +493,6 @@ AEDataFormat OMXPlayerAudio::GetDataFormat(CDVDStreamInfo hints)
|
|
||||||
m_hw_decode = false;
|
|
||||||
|
|
||||||
/* check our audio capabilties */
|
|
||||||
-
|
|
||||||
/* pathrought is overriding hw decode*/
|
|
||||||
if(AUDIO_IS_BITSTREAM(CSettings::Get().GetInt("audiooutput.mode")) && m_use_passthrough)
|
|
||||||
{
|
|
||||||
@@ -565,10 +537,6 @@ AEDataFormat OMXPlayerAudio::GetDataFormat(CDVDStreamInfo hints)
|
|
||||||
|
|
||||||
bool OMXPlayerAudio::OpenDecoder()
|
|
||||||
{
|
|
||||||
- m_nChannels = m_hints.channels;
|
|
||||||
- m_passthrough = false;
|
|
||||||
- m_hw_decode = false;
|
|
||||||
-
|
|
||||||
if(m_DecoderOpen)
|
|
||||||
{
|
|
||||||
WaitCompletion();
|
|
||||||
@@ -576,13 +544,14 @@ bool OMXPlayerAudio::OpenDecoder()
|
|
||||||
m_DecoderOpen = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
+ m_nChannels = m_hints.channels;
|
|
||||||
+ m_format.m_dataFormat = GetDataFormat(m_hints);
|
|
||||||
+
|
|
||||||
/* setup audi format for audio render */
|
|
||||||
m_format.m_sampleRate = m_hints.samplerate;
|
|
||||||
- /* GetDataFormat is setting up evrything */
|
|
||||||
- m_format.m_dataFormat = GetDataFormat(m_hints);
|
|
||||||
|
|
||||||
m_format.m_channelLayout.Reset();
|
|
||||||
- if (m_pAudioCodec && !m_passthrough)
|
|
||||||
+ if (m_pAudioCodec)
|
|
||||||
m_format.m_channelLayout = m_pAudioCodec->GetChannelMap();
|
|
||||||
else if (m_passthrough)
|
|
||||||
{
|
|
||||||
diff --git a/xbmc/cores/omxplayer/OMXPlayerAudio.h b/xbmc/cores/omxplayer/OMXPlayerAudio.h
|
|
||||||
index 19f50f7..394f309 100644
|
|
||||||
--- a/xbmc/cores/omxplayer/OMXPlayerAudio.h
|
|
||||||
+++ b/xbmc/cores/omxplayer/OMXPlayerAudio.h
|
|
||||||
@@ -55,7 +55,6 @@ class OMXPlayerAudio : public CThread
|
|
||||||
bool m_use_hw_decode;
|
|
||||||
bool m_hw_decode;
|
|
||||||
AEAudioFormat m_format;
|
|
||||||
- CAEChannelInfo m_channelLayout;
|
|
||||||
COMXAudioCodecOMX *m_pAudioCodec;
|
|
||||||
int m_speed;
|
|
||||||
bool m_silence;
|
|
||||||
--
|
|
||||||
1.8.4
|
|
||||||
|
|
@ -21,7 +21,7 @@
|
|||||||
PKG_NAME="ffmpeg"
|
PKG_NAME="ffmpeg"
|
||||||
PKG_VERSION="0.10.7"
|
PKG_VERSION="0.10.7"
|
||||||
if [ "$XBMC" = "master" ]; then
|
if [ "$XBMC" = "master" ]; then
|
||||||
PKG_VERSION="1.2.1"
|
PKG_VERSION="1.2.3"
|
||||||
fi
|
fi
|
||||||
PKG_REV="1"
|
PKG_REV="1"
|
||||||
PKG_ARCH="any"
|
PKG_ARCH="any"
|
||||||
|
@ -0,0 +1,13 @@
|
|||||||
|
diff -Naur ffmpeg-0.10.7/libavcodec/h264.c ffmpeg-0.10.7.patch/libavcodec/h264.c
|
||||||
|
--- ffmpeg-0.10.7/libavcodec/h264.c 2013-04-10 03:52:33.000000000 +0200
|
||||||
|
+++ ffmpeg-0.10.7.patch/libavcodec/h264.c 2013-09-09 17:25:55.377208927 +0200
|
||||||
|
@@ -3962,8 +3962,7 @@
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
- //FIXME do not discard SEI id
|
||||||
|
- if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)
|
||||||
|
+ if (avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0 && h->nal_unit_type != NAL_SEI)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
again:
|
@ -1,22 +0,0 @@
|
|||||||
Subject: [libav-devel] [PATCH 1/2] vaapi: return early from ff_vaapi_render_picture() without picture
|
|
||||||
From: Janne Grunau janne-libav at jannau.net
|
|
||||||
|
|
||||||
Fixes an assertion when called on uninitialized frame. Spotted after
|
|
||||||
seeking in vlc. (backported from libav mailing list)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
diff --git a/libavcodec/vaapi.c b/libavcodec/vaapi.c
|
|
||||||
index a220a9d..94959bf 100644
|
|
||||||
--- a/libavcodec/vaapi.c
|
|
||||||
+++ b/libavcodec/vaapi.c
|
|
||||||
@@ -46,6 +46,9 @@ int ff_vaapi_render_picture(struct vaapi_context *vactx, VASurfaceID surface)
|
|
||||||
VABufferID va_buffers[3];
|
|
||||||
unsigned int n_va_buffers = 0;
|
|
||||||
|
|
||||||
+ if (!vactx->pic_param_buf_id)
|
|
||||||
+ return 0;
|
|
||||||
+
|
|
||||||
vaUnmapBuffer(vactx->display, vactx->pic_param_buf_id);
|
|
||||||
va_buffers[n_va_buffers++] = vactx->pic_param_buf_id;
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,311 @@
|
|||||||
|
From 40daea3c1bafa9cea37b65f856c3c0432767d760 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ben Avison <bavison@riscosopen.org>
|
||||||
|
Date: Mon, 15 Jul 2013 18:28:09 +0100
|
||||||
|
Subject: [PATCH 39/49] [ffmpeg] - backport - arm: Add VFP-accelerated version
|
||||||
|
of synth_filter_float
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Before After
|
||||||
|
Mean StdDev Mean StdDev Change
|
||||||
|
This function 9295.0 114.9 4853.2 83.5 +91.5%
|
||||||
|
Overall 23699.8 397.6 19285.5 292.0 +22.9%
|
||||||
|
|
||||||
|
Signed-off-by: Martin Storsjö <martin@martin.st>
|
||||||
|
---
|
||||||
|
libavcodec/arm/Makefile | 1 +
|
||||||
|
libavcodec/arm/fft_init_arm.c | 8 +
|
||||||
|
libavcodec/arm/synth_filter_vfp.S | 243 ++++++++++++++++++++++++++
|
||||||
|
3 files changed, 252 insertions(+)
|
||||||
|
create mode 100644 libavcodec/arm/synth_filter_vfp.S
|
||||||
|
|
||||||
|
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
|
||||||
|
index 1c91d62..aee9d73 100644
|
||||||
|
--- a/libavcodec/arm/Makefile
|
||||||
|
+++ b/libavcodec/arm/Makefile
|
||||||
|
@@ -58,6 +58,7 @@ ARMV6-OBJS += arm/dsputil_init_armv6.o \
|
||||||
|
arm/dsputil_armv6.o \
|
||||||
|
arm/simple_idct_armv6.o \
|
||||||
|
|
||||||
|
+VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_vfp.o
|
||||||
|
VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o
|
||||||
|
|
||||||
|
NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \
|
||||||
|
diff --git a/libavcodec/arm/fft_init_arm.c b/libavcodec/arm/fft_init_arm.c
|
||||||
|
index 8c98abc..fe0acc5 100644
|
||||||
|
--- a/libavcodec/arm/fft_init_arm.c
|
||||||
|
+++ b/libavcodec/arm/fft_init_arm.c
|
||||||
|
@@ -32,6 +32,12 @@ void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input)
|
||||||
|
|
||||||
|
void ff_rdft_calc_neon(struct RDFTContext *s, FFTSample *z);
|
||||||
|
|
||||||
|
+void ff_synth_filter_float_vfp(FFTContext *imdct,
|
||||||
|
+ float *synth_buf_ptr, int *synth_buf_offset,
|
||||||
|
+ float synth_buf2[32], const float window[512],
|
||||||
|
+ float out[32], const float in[32],
|
||||||
|
+ float scale);
|
||||||
|
+
|
||||||
|
void ff_synth_filter_float_neon(FFTContext *imdct,
|
||||||
|
float *synth_buf_ptr, int *synth_buf_offset,
|
||||||
|
float synth_buf2[32], const float window[512],
|
||||||
|
@@ -71,6 +77,8 @@ av_cold void ff_synth_filter_init_arm(SynthFilterContext *s)
|
||||||
|
{
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
+ if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags))
|
||||||
|
+ s->synth_filter_float = ff_synth_filter_float_vfp;
|
||||||
|
if (have_neon(cpu_flags))
|
||||||
|
s->synth_filter_float = ff_synth_filter_float_neon;
|
||||||
|
}
|
||||||
|
diff --git a/libavcodec/arm/synth_filter_vfp.S b/libavcodec/arm/synth_filter_vfp.S
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..c219c41
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/libavcodec/arm/synth_filter_vfp.S
|
||||||
|
@@ -0,0 +1,243 @@
|
||||||
|
+/*
|
||||||
|
+ * Copyright (c) 2013 RISC OS Open Ltd
|
||||||
|
+ * Author: Ben Avison <bavison@riscosopen.org>
|
||||||
|
+ *
|
||||||
|
+ * This file is part of Libav.
|
||||||
|
+ *
|
||||||
|
+ * Libav is free software; you can redistribute it and/or
|
||||||
|
+ * modify it under the terms of the GNU Lesser General Public
|
||||||
|
+ * License as published by the Free Software Foundation; either
|
||||||
|
+ * version 2.1 of the License, or (at your option) any later version.
|
||||||
|
+ *
|
||||||
|
+ * Libav is distributed in the hope that it will be useful,
|
||||||
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
+ * Lesser General Public License for more details.
|
||||||
|
+ *
|
||||||
|
+ * You should have received a copy of the GNU Lesser General Public
|
||||||
|
+ * License along with Libav; if not, write to the Free Software
|
||||||
|
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#include "libavutil/arm/asm.S"
|
||||||
|
+
|
||||||
|
+IMDCT .req r0
|
||||||
|
+ORIG_P_SB .req r1
|
||||||
|
+P_SB_OFF .req r2
|
||||||
|
+I .req r0
|
||||||
|
+P_SB2_UP .req r1
|
||||||
|
+OLDFPSCR .req r2
|
||||||
|
+P_SB2_DN .req r3
|
||||||
|
+P_WIN_DN .req r4
|
||||||
|
+P_OUT_DN .req r5
|
||||||
|
+P_SB .req r6
|
||||||
|
+J_WRAP .req r7
|
||||||
|
+P_WIN_UP .req r12
|
||||||
|
+P_OUT_UP .req r14
|
||||||
|
+
|
||||||
|
+SCALE .req s0
|
||||||
|
+SBUF_DAT_REV0 .req s4
|
||||||
|
+SBUF_DAT_REV1 .req s5
|
||||||
|
+SBUF_DAT_REV2 .req s6
|
||||||
|
+SBUF_DAT_REV3 .req s7
|
||||||
|
+VA0 .req s8
|
||||||
|
+VA3 .req s11
|
||||||
|
+VB0 .req s12
|
||||||
|
+VB3 .req s15
|
||||||
|
+VC0 .req s8
|
||||||
|
+VC3 .req s11
|
||||||
|
+VD0 .req s12
|
||||||
|
+VD3 .req s15
|
||||||
|
+SBUF_DAT0 .req s16
|
||||||
|
+SBUF_DAT1 .req s17
|
||||||
|
+SBUF_DAT2 .req s18
|
||||||
|
+SBUF_DAT3 .req s19
|
||||||
|
+SBUF_DAT_ALT0 .req s20
|
||||||
|
+SBUF_DAT_ALT1 .req s21
|
||||||
|
+SBUF_DAT_ALT2 .req s22
|
||||||
|
+SBUF_DAT_ALT3 .req s23
|
||||||
|
+WIN_DN_DAT0 .req s24
|
||||||
|
+WIN_UP_DAT0 .req s28
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+.macro inner_loop half, tail, head
|
||||||
|
+ .if (OFFSET & (64*4)) == 0 @ even numbered call
|
||||||
|
+ SBUF_DAT_THIS0 .req SBUF_DAT0
|
||||||
|
+ SBUF_DAT_THIS1 .req SBUF_DAT1
|
||||||
|
+ SBUF_DAT_THIS2 .req SBUF_DAT2
|
||||||
|
+ SBUF_DAT_THIS3 .req SBUF_DAT3
|
||||||
|
+ .ifnc "\head",""
|
||||||
|
+ vldr d8, [P_SB, #OFFSET] @ d8 = SBUF_DAT
|
||||||
|
+ vldr d9, [P_SB, #OFFSET+8]
|
||||||
|
+ .endif
|
||||||
|
+ .else
|
||||||
|
+ SBUF_DAT_THIS0 .req SBUF_DAT_ALT0
|
||||||
|
+ SBUF_DAT_THIS1 .req SBUF_DAT_ALT1
|
||||||
|
+ SBUF_DAT_THIS2 .req SBUF_DAT_ALT2
|
||||||
|
+ SBUF_DAT_THIS3 .req SBUF_DAT_ALT3
|
||||||
|
+ .ifnc "\head",""
|
||||||
|
+ vldr d10, [P_SB, #OFFSET] @ d10 = SBUF_DAT_ALT
|
||||||
|
+ vldr d11, [P_SB, #OFFSET+8]
|
||||||
|
+ .endif
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\tail",""
|
||||||
|
+ .ifc "\half","ab"
|
||||||
|
+ vmls.f VA0, SBUF_DAT_REV0, WIN_DN_DAT0 @ all operands treated as vectors
|
||||||
|
+ .else
|
||||||
|
+ vmla.f VD0, SBUF_DAT_REV0, WIN_DN_DAT0 @ all operands treated as vectors
|
||||||
|
+ .endif
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\head",""
|
||||||
|
+ vldr d14, [P_WIN_UP, #OFFSET] @ d14 = WIN_UP_DAT
|
||||||
|
+ vldr d15, [P_WIN_UP, #OFFSET+8]
|
||||||
|
+ vldr d12, [P_WIN_DN, #OFFSET] @ d12 = WIN_DN_DAT
|
||||||
|
+ vldr d13, [P_WIN_DN, #OFFSET+8]
|
||||||
|
+ vmov SBUF_DAT_REV3, SBUF_DAT_THIS0
|
||||||
|
+ vmov SBUF_DAT_REV2, SBUF_DAT_THIS1
|
||||||
|
+ vmov SBUF_DAT_REV1, SBUF_DAT_THIS2
|
||||||
|
+ vmov SBUF_DAT_REV0, SBUF_DAT_THIS3
|
||||||
|
+ .ifc "\half","ab"
|
||||||
|
+ vmla.f VB0, SBUF_DAT_THIS0, WIN_UP_DAT0
|
||||||
|
+ .else
|
||||||
|
+ vmla.f VC0, SBUF_DAT_THIS0, WIN_UP_DAT0
|
||||||
|
+ .endif
|
||||||
|
+ teq J_WRAP, #J
|
||||||
|
+ bne 2f @ strongly predictable, so better than cond exec in this case
|
||||||
|
+ sub P_SB, P_SB, #512*4
|
||||||
|
+2:
|
||||||
|
+ .set J, J - 64
|
||||||
|
+ .set OFFSET, OFFSET + 64*4
|
||||||
|
+ .endif
|
||||||
|
+ .unreq SBUF_DAT_THIS0
|
||||||
|
+ .unreq SBUF_DAT_THIS1
|
||||||
|
+ .unreq SBUF_DAT_THIS2
|
||||||
|
+ .unreq SBUF_DAT_THIS3
|
||||||
|
+.endm
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+/* void ff_synth_filter_float_vfp(FFTContext *imdct,
|
||||||
|
+ * float *synth_buf_ptr, int *synth_buf_offset,
|
||||||
|
+ * float synth_buf2[32], const float window[512],
|
||||||
|
+ * float out[32], const float in[32], float scale)
|
||||||
|
+ */
|
||||||
|
+function ff_synth_filter_float_vfp, export=1
|
||||||
|
+ push {r3-r7,lr}
|
||||||
|
+ vpush {s16-s31}
|
||||||
|
+ ldr lr, [P_SB_OFF]
|
||||||
|
+ add a2, ORIG_P_SB, lr, LSL #2 @ calculate synth_buf to pass to imdct_half
|
||||||
|
+ mov P_SB, a2 @ and keep a copy for ourselves
|
||||||
|
+ bic J_WRAP, lr, #63 @ mangled to make testing for wrap easier in inner loop
|
||||||
|
+ sub lr, lr, #32
|
||||||
|
+ and lr, lr, #512-32
|
||||||
|
+ str lr, [P_SB_OFF] @ rotate offset, modulo buffer size, ready for next call
|
||||||
|
+ ldr a3, [sp, #(16+6+2)*4] @ fetch in from stack, to pass to imdct_half
|
||||||
|
+VFP vmov s16, SCALE @ imdct_half is free to corrupt s0, but it contains one of our arguments in hardfp case
|
||||||
|
+ bl ff_imdct_half_vfp
|
||||||
|
+VFP vmov SCALE, s16
|
||||||
|
+
|
||||||
|
+ fmrx OLDFPSCR, FPSCR
|
||||||
|
+ ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
|
||||||
|
+ fmxr FPSCR, lr
|
||||||
|
+ ldr P_SB2_DN, [sp, #16*4]
|
||||||
|
+ ldr P_WIN_DN, [sp, #(16+6+0)*4]
|
||||||
|
+ ldr P_OUT_DN, [sp, #(16+6+1)*4]
|
||||||
|
+NOVFP vldr SCALE, [sp, #(16+6+3)*4]
|
||||||
|
+
|
||||||
|
+#define IMM_OFF_SKEW 956 /* also valid immediate constant when you add 16*4 */
|
||||||
|
+ add P_SB, P_SB, #IMM_OFF_SKEW @ so we can use -ve offsets to use full immediate offset range
|
||||||
|
+ add P_SB2_UP, P_SB2_DN, #16*4
|
||||||
|
+ add P_WIN_UP, P_WIN_DN, #16*4+IMM_OFF_SKEW
|
||||||
|
+ add P_OUT_UP, P_OUT_DN, #16*4
|
||||||
|
+ add P_SB2_DN, P_SB2_DN, #16*4
|
||||||
|
+ add P_WIN_DN, P_WIN_DN, #12*4+IMM_OFF_SKEW
|
||||||
|
+ add P_OUT_DN, P_OUT_DN, #16*4
|
||||||
|
+ mov I, #4
|
||||||
|
+1:
|
||||||
|
+ vldmia P_SB2_UP!, {VB0-VB3}
|
||||||
|
+ vldmdb P_SB2_DN!, {VA0-VA3}
|
||||||
|
+ .set J, 512 - 64
|
||||||
|
+ .set OFFSET, -IMM_OFF_SKEW
|
||||||
|
+ inner_loop ab,, head
|
||||||
|
+ .rept 7
|
||||||
|
+ inner_loop ab, tail, head
|
||||||
|
+ .endr
|
||||||
|
+ inner_loop ab, tail
|
||||||
|
+ add P_WIN_UP, P_WIN_UP, #4*4
|
||||||
|
+ sub P_WIN_DN, P_WIN_DN, #4*4
|
||||||
|
+ vmul.f VB0, VB0, SCALE @ SCALE treated as scalar
|
||||||
|
+ add P_SB, P_SB, #(512+4)*4
|
||||||
|
+ subs I, I, #1
|
||||||
|
+ vmul.f VA0, VA0, SCALE
|
||||||
|
+ vstmia P_OUT_UP!, {VB0-VB3}
|
||||||
|
+ vstmdb P_OUT_DN!, {VA0-VA3}
|
||||||
|
+ bne 1b
|
||||||
|
+
|
||||||
|
+ add P_SB2_DN, P_SB2_DN, #(16+28-12)*4
|
||||||
|
+ sub P_SB2_UP, P_SB2_UP, #(16+16)*4
|
||||||
|
+ add P_WIN_DN, P_WIN_DN, #(32+16+28-12)*4
|
||||||
|
+ mov I, #4
|
||||||
|
+1:
|
||||||
|
+ vldr.d d4, zero @ d4 = VC0
|
||||||
|
+ vldr.d d5, zero
|
||||||
|
+ vldr.d d6, zero @ d6 = VD0
|
||||||
|
+ vldr.d d7, zero
|
||||||
|
+ .set J, 512 - 64
|
||||||
|
+ .set OFFSET, -IMM_OFF_SKEW
|
||||||
|
+ inner_loop cd,, head
|
||||||
|
+ .rept 7
|
||||||
|
+ inner_loop cd, tail, head
|
||||||
|
+ .endr
|
||||||
|
+ inner_loop cd, tail
|
||||||
|
+ add P_WIN_UP, P_WIN_UP, #4*4
|
||||||
|
+ sub P_WIN_DN, P_WIN_DN, #4*4
|
||||||
|
+ add P_SB, P_SB, #(512+4)*4
|
||||||
|
+ subs I, I, #1
|
||||||
|
+ vstmia P_SB2_UP!, {VC0-VC3}
|
||||||
|
+ vstmdb P_SB2_DN!, {VD0-VD3}
|
||||||
|
+ bne 1b
|
||||||
|
+
|
||||||
|
+ fmxr FPSCR, OLDFPSCR
|
||||||
|
+ vpop {s16-s31}
|
||||||
|
+ pop {r3-r7,pc}
|
||||||
|
+endfunc
|
||||||
|
+
|
||||||
|
+ .unreq IMDCT
|
||||||
|
+ .unreq ORIG_P_SB
|
||||||
|
+ .unreq P_SB_OFF
|
||||||
|
+ .unreq I
|
||||||
|
+ .unreq P_SB2_UP
|
||||||
|
+ .unreq OLDFPSCR
|
||||||
|
+ .unreq P_SB2_DN
|
||||||
|
+ .unreq P_WIN_DN
|
||||||
|
+ .unreq P_OUT_DN
|
||||||
|
+ .unreq P_SB
|
||||||
|
+ .unreq J_WRAP
|
||||||
|
+ .unreq P_WIN_UP
|
||||||
|
+ .unreq P_OUT_UP
|
||||||
|
+
|
||||||
|
+ .unreq SCALE
|
||||||
|
+ .unreq SBUF_DAT_REV0
|
||||||
|
+ .unreq SBUF_DAT_REV1
|
||||||
|
+ .unreq SBUF_DAT_REV2
|
||||||
|
+ .unreq SBUF_DAT_REV3
|
||||||
|
+ .unreq VA0
|
||||||
|
+ .unreq VA3
|
||||||
|
+ .unreq VB0
|
||||||
|
+ .unreq VB3
|
||||||
|
+ .unreq VC0
|
||||||
|
+ .unreq VC3
|
||||||
|
+ .unreq VD0
|
||||||
|
+ .unreq VD3
|
||||||
|
+ .unreq SBUF_DAT0
|
||||||
|
+ .unreq SBUF_DAT1
|
||||||
|
+ .unreq SBUF_DAT2
|
||||||
|
+ .unreq SBUF_DAT3
|
||||||
|
+ .unreq SBUF_DAT_ALT0
|
||||||
|
+ .unreq SBUF_DAT_ALT1
|
||||||
|
+ .unreq SBUF_DAT_ALT2
|
||||||
|
+ .unreq SBUF_DAT_ALT3
|
||||||
|
+ .unreq WIN_DN_DAT0
|
||||||
|
+ .unreq WIN_UP_DAT0
|
||||||
|
+
|
||||||
|
+ .align 3
|
||||||
|
+zero: .word 0, 0
|
||||||
|
--
|
||||||
|
1.7.9.5
|
@ -0,0 +1,102 @@
|
|||||||
|
From 8ead63b22d31bf71976fc6964922b43d8e0d660b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ben Avison <bavison@riscosopen.org>
|
||||||
|
Date: Mon, 15 Jul 2013 18:28:10 +0100
|
||||||
|
Subject: [PATCH 40/49] [ffmpeg] - backport - arm: Add VFP-accelerated version
|
||||||
|
of int32_to_float_fmul_scalar
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Before After
|
||||||
|
Mean StdDev Mean StdDev Change
|
||||||
|
This function 1175.0 4.4 366.2 18.3 +220.8%
|
||||||
|
Overall 19285.5 292.0 18420.5 489.1 +4.7%
|
||||||
|
|
||||||
|
Signed-off-by: Martin Storsjö <martin@martin.st>
|
||||||
|
---
|
||||||
|
libavcodec/arm/fmtconvert_init_arm.c | 10 ++++++
|
||||||
|
libavcodec/arm/fmtconvert_vfp.S | 38 +++++++++++++++++++++++
|
||||||
|
2 files changed, 48 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/libavcodec/arm/fmtconvert_init_arm.c b/libavcodec/arm/fmtconvert_init_arm.c
|
||||||
|
index 1d99c97..de3b78b 100644
|
||||||
|
--- a/libavcodec/arm/fmtconvert_init_arm.c
|
||||||
|
+++ b/libavcodec/arm/fmtconvert_init_arm.c
|
||||||
|
@@ -28,6 +28,9 @@
|
||||||
|
void ff_int32_to_float_fmul_scalar_neon(float *dst, const int *src,
|
||||||
|
float mul, int len);
|
||||||
|
|
||||||
|
+void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int32_t *src,
|
||||||
|
+ float mul, int len);
|
||||||
|
+
|
||||||
|
void ff_float_to_int16_neon(int16_t *dst, const float *src, long len);
|
||||||
|
void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
|
||||||
|
|
||||||
|
@@ -38,6 +41,13 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) {
|
||||||
|
+ if (!have_vfpv3(cpu_flags)) {
|
||||||
|
+ // This function doesn't use anything armv6 specific in itself,
|
||||||
|
+ // but ff_float_to_int16_vfp which is in the same assembly source
|
||||||
|
+ // file does, thus the whole file requires armv6 to be built.
|
||||||
|
+ c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
c->float_to_int16 = ff_float_to_int16_vfp;
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/libavcodec/arm/fmtconvert_vfp.S b/libavcodec/arm/fmtconvert_vfp.S
|
||||||
|
index 7b012bc..3cc3e56 100644
|
||||||
|
--- a/libavcodec/arm/fmtconvert_vfp.S
|
||||||
|
+++ b/libavcodec/arm/fmtconvert_vfp.S
|
||||||
|
@@ -1,5 +1,6 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
|
||||||
|
+ * Copyright (c) 2013 RISC OS Open Ltd <bavison@riscosopen.org>
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
@@ -76,3 +77,40 @@ function ff_float_to_int16_vfp, export=1
|
||||||
|
vpop {d8-d11}
|
||||||
|
pop {r4-r8,pc}
|
||||||
|
endfunc
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * ARM VFP optimised int32 to float conversion.
|
||||||
|
+ * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned
|
||||||
|
+ * (16 bytes alignment is best for BCM2835), little-endian.
|
||||||
|
+ */
|
||||||
|
+@ void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int32_t *src, float mul, int len)
|
||||||
|
+function ff_int32_to_float_fmul_scalar_vfp, export=1
|
||||||
|
+VFP tmp .req a4
|
||||||
|
+VFP len .req a3
|
||||||
|
+NOVFP tmp .req a3
|
||||||
|
+NOVFP len .req a4
|
||||||
|
+NOVFP vmov s0, a3
|
||||||
|
+ ldr tmp, =0x03070000 @ RunFast mode, short vectors of length 8, stride 1
|
||||||
|
+ fmrx ip, FPSCR
|
||||||
|
+ fmxr FPSCR, tmp
|
||||||
|
+1:
|
||||||
|
+ vldmia a2!, {s8-s15}
|
||||||
|
+ vcvt.f32.s32 s8, s8
|
||||||
|
+ vcvt.f32.s32 s9, s9
|
||||||
|
+ vcvt.f32.s32 s10, s10
|
||||||
|
+ vcvt.f32.s32 s11, s11
|
||||||
|
+ vcvt.f32.s32 s12, s12
|
||||||
|
+ vcvt.f32.s32 s13, s13
|
||||||
|
+ vcvt.f32.s32 s14, s14
|
||||||
|
+ vcvt.f32.s32 s15, s15
|
||||||
|
+ vmul.f32 s8, s8, s0
|
||||||
|
+ subs len, len, #8
|
||||||
|
+ vstmia a1!, {s8-s11}
|
||||||
|
+ vstmia a1!, {s12-s15}
|
||||||
|
+ bne 1b
|
||||||
|
+
|
||||||
|
+ fmxr FPSCR, ip
|
||||||
|
+ bx lr
|
||||||
|
+endfunc
|
||||||
|
+ .unreq tmp
|
||||||
|
+ .unreq len
|
||||||
|
--
|
||||||
|
1.7.9.5
|
@ -0,0 +1,78 @@
|
|||||||
|
From 7901e7216cf6406a2ea430c71af94ebee72f262b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ben Avison <bavison@riscosopen.org>
|
||||||
|
Date: Mon, 15 Jul 2013 18:28:11 +0100
|
||||||
|
Subject: [PATCH 41/49] [ffmpeg] - backport - fmtconvert: Add a new method,
|
||||||
|
int32_to_float_fmul_array8
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
This is similar to int32_to_float_fmul_scalar, but
|
||||||
|
loads a new scalar multiplier every 8 input samples.
|
||||||
|
This enables the use of much larger input arrays, which
|
||||||
|
is important for pipelining on some CPUs (such as
|
||||||
|
ARMv6).
|
||||||
|
|
||||||
|
Signed-off-by: Martin Storsjö <martin@martin.st>
|
||||||
|
---
|
||||||
|
libavcodec/fmtconvert.c | 10 ++++++++++
|
||||||
|
libavcodec/fmtconvert.h | 16 ++++++++++++++++
|
||||||
|
2 files changed, 26 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/libavcodec/fmtconvert.c b/libavcodec/fmtconvert.c
|
||||||
|
index 79e9645..1c45d35 100644
|
||||||
|
--- a/libavcodec/fmtconvert.c
|
||||||
|
+++ b/libavcodec/fmtconvert.c
|
||||||
|
@@ -30,6 +30,15 @@ static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul,
|
||||||
|
dst[i] = src[i] * mul;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void int32_to_float_fmul_array8_c(FmtConvertContext *c, float *dst,
|
||||||
|
+ const int32_t *src, const float *mul,
|
||||||
|
+ int len)
|
||||||
|
+{
|
||||||
|
+ int i;
|
||||||
|
+ for (i = 0; i < len; i += 8)
|
||||||
|
+ c->int32_to_float_fmul_scalar(&dst[i], &src[i], *mul++, 8);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static av_always_inline int float_to_int16_one(const float *src){
|
||||||
|
return av_clip_int16(lrintf(*src));
|
||||||
|
}
|
||||||
|
@@ -79,6 +88,7 @@ void ff_float_interleave_c(float *dst, const float **src, unsigned int len,
|
||||||
|
av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
|
||||||
|
{
|
||||||
|
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
|
||||||
|
+ c->int32_to_float_fmul_array8 = int32_to_float_fmul_array8_c;
|
||||||
|
c->float_to_int16 = float_to_int16_c;
|
||||||
|
c->float_to_int16_interleave = float_to_int16_interleave_c;
|
||||||
|
c->float_interleave = ff_float_interleave_c;
|
||||||
|
diff --git a/libavcodec/fmtconvert.h b/libavcodec/fmtconvert.h
|
||||||
|
index 3fb9f4e..02468dc 100644
|
||||||
|
--- a/libavcodec/fmtconvert.h
|
||||||
|
+++ b/libavcodec/fmtconvert.h
|
||||||
|
@@ -38,6 +38,22 @@ typedef struct FmtConvertContext {
|
||||||
|
void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
|
||||||
|
|
||||||
|
/**
|
||||||
|
+ * Convert an array of int32_t to float and multiply by a float value from another array,
|
||||||
|
+ * stepping along the float array once for each 8 integers.
|
||||||
|
+ * @param c pointer to FmtConvertContext.
|
||||||
|
+ * @param dst destination array of float.
|
||||||
|
+ * constraints: 16-byte aligned
|
||||||
|
+ * @param src source array of int32_t.
|
||||||
|
+ * constraints: 16-byte aligned
|
||||||
|
+ * @param mul source array of float multipliers.
|
||||||
|
+ * @param len number of elements to convert.
|
||||||
|
+ * constraints: multiple of 8
|
||||||
|
+ */
|
||||||
|
+ void (*int32_to_float_fmul_array8)(struct FmtConvertContext *c,
|
||||||
|
+ float *dst, const int32_t *src,
|
||||||
|
+ const float *mul, int len);
|
||||||
|
+
|
||||||
|
+ /**
|
||||||
|
* Convert an array of float to an array of int16_t.
|
||||||
|
*
|
||||||
|
* Convert floats from in the range [-32768.0,32767.0] to ints
|
||||||
|
--
|
||||||
|
1.7.9.5
|
@ -0,0 +1,90 @@
|
|||||||
|
From fa755fe82fe4cfbb85b7c57501912da2e1f316bc Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ben Avison <bavison@riscosopen.org>
|
||||||
|
Date: Tue, 16 Jul 2013 15:41:18 +0300
|
||||||
|
Subject: [PATCH 42/49] [ffmpeg] - backport - dcadec: Use
|
||||||
|
int32_to_float_fmul_array8
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Signed-off-by: Martin Storsjö <martin@martin.st>
|
||||||
|
---
|
||||||
|
libavcodec/dcadec.c | 23 +++++++++++++++--------
|
||||||
|
1 file changed, 15 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
|
||||||
|
index 1b955e4..b648613 100644
|
||||||
|
--- a/libavcodec/dcadec.c
|
||||||
|
+++ b/libavcodec/dcadec.c
|
||||||
|
@@ -1302,7 +1302,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
|
||||||
|
|
||||||
|
/* FIXME */
|
||||||
|
float (*subband_samples)[DCA_SUBBANDS][8] = s->subband_samples[block_index];
|
||||||
|
- LOCAL_ALIGNED_16(int, block, [8]);
|
||||||
|
+ LOCAL_ALIGNED_16(int, block, [8 * DCA_SUBBANDS]);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Audio data
|
||||||
|
@@ -1315,6 +1315,8 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
|
||||||
|
quant_step_table = lossy_quant_d;
|
||||||
|
|
||||||
|
for (k = base_channel; k < s->prim_channels; k++) {
|
||||||
|
+ float rscale[DCA_SUBBANDS];
|
||||||
|
+
|
||||||
|
if (get_bits_left(&s->gb) < 0)
|
||||||
|
return AVERROR_INVALIDDATA;
|
||||||
|
|
||||||
|
@@ -1337,11 +1339,12 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
|
||||||
|
* Extract bits from the bit stream
|
||||||
|
*/
|
||||||
|
if (!abits) {
|
||||||
|
- memset(subband_samples[k][l], 0, 8 * sizeof(subband_samples[0][0][0]));
|
||||||
|
+ rscale[l] = 0;
|
||||||
|
+ memset(block + 8 * l, 0, 8 * sizeof(block[0]));
|
||||||
|
} else {
|
||||||
|
/* Deal with transients */
|
||||||
|
int sfi = s->transition_mode[k][l] && subsubframe >= s->transition_mode[k][l];
|
||||||
|
- float rscale = quant_step_size * s->scale_factor[k][l][sfi] *
|
||||||
|
+ rscale[l] = quant_step_size * s->scale_factor[k][l][sfi] *
|
||||||
|
s->scalefactor_adj[k][sel];
|
||||||
|
|
||||||
|
if (abits >= 11 || !dca_smpl_bitalloc[abits].vlc[sel].table) {
|
||||||
|
@@ -1355,7 +1358,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
|
||||||
|
block_code1 = get_bits(&s->gb, size);
|
||||||
|
block_code2 = get_bits(&s->gb, size);
|
||||||
|
err = decode_blockcodes(block_code1, block_code2,
|
||||||
|
- levels, block);
|
||||||
|
+ levels, block + 8 * l);
|
||||||
|
if (err) {
|
||||||
|
av_log(s->avctx, AV_LOG_ERROR,
|
||||||
|
"ERROR: block code look-up failed\n");
|
||||||
|
@@ -1364,19 +1367,23 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index)
|
||||||
|
} else {
|
||||||
|
/* no coding */
|
||||||
|
for (m = 0; m < 8; m++)
|
||||||
|
- block[m] = get_sbits(&s->gb, abits - 3);
|
||||||
|
+ block[8 * l + m] = get_sbits(&s->gb, abits - 3);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* Huffman coded */
|
||||||
|
for (m = 0; m < 8; m++)
|
||||||
|
- block[m] = get_bitalloc(&s->gb,
|
||||||
|
+ block[8 * l + m] = get_bitalloc(&s->gb,
|
||||||
|
&dca_smpl_bitalloc[abits], sel);
|
||||||
|
}
|
||||||
|
|
||||||
|
- s->fmt_conv.int32_to_float_fmul_scalar(subband_samples[k][l],
|
||||||
|
- block, rscale, 8);
|
||||||
|
}
|
||||||
|
+ }
|
||||||
|
|
||||||
|
+ s->fmt_conv.int32_to_float_fmul_array8(&s->fmt_conv, subband_samples[k][0],
|
||||||
|
+ block, rscale, 8 * s->vq_start_subband[k]);
|
||||||
|
+
|
||||||
|
+ for (l = 0; l < s->vq_start_subband[k]; l++) {
|
||||||
|
+ int m;
|
||||||
|
/*
|
||||||
|
* Inverse ADPCM if in prediction mode
|
||||||
|
*/
|
||||||
|
--
|
||||||
|
1.7.9.5
|
@ -0,0 +1,222 @@
|
|||||||
|
From c908a710261f33130569c4360175d8f19a282d67 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ben Avison <bavison@riscosopen.org>
|
||||||
|
Date: Mon, 15 Jul 2013 18:28:12 +0100
|
||||||
|
Subject: [PATCH 43/49] [ffmpeg] - backport - arm: Add VFP-accelerated version
|
||||||
|
of int32_to_float_fmul_array8
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Before After
|
||||||
|
Mean StdDev Mean StdDev Change
|
||||||
|
This function 366.2 18.3 277.8 13.7 +31.9%
|
||||||
|
Overall 18420.5 489.1 17049.5 408.2 +8.0%
|
||||||
|
|
||||||
|
Signed-off-by: Martin Storsjö <martin@martin.st>
|
||||||
|
---
|
||||||
|
libavcodec/arm/fmtconvert_init_arm.c | 6 +-
|
||||||
|
libavcodec/arm/fmtconvert_vfp.S | 162 +++++++++++++++++++++++
|
||||||
|
2 files changed, 167 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/libavcodec/arm/fmtconvert_init_arm.c b/libavcodec/arm/fmtconvert_init_arm.c
|
||||||
|
index de3b78b..92d94a0 100644
|
||||||
|
--- a/libavcodec/arm/fmtconvert_init_arm.c
|
||||||
|
+++ b/libavcodec/arm/fmtconvert_init_arm.c
|
||||||
|
@@ -30,6 +30,9 @@ void ff_int32_to_float_fmul_scalar_neon(float *dst, const int *src,
|
||||||
|
|
||||||
|
void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int32_t *src,
|
||||||
|
float mul, int len);
|
||||||
|
+void ff_int32_to_float_fmul_array8_vfp(FmtConvertContext *c, float *dst,
|
||||||
|
+ const int32_t *src, const float *mul,
|
||||||
|
+ int len);
|
||||||
|
|
||||||
|
void ff_float_to_int16_neon(int16_t *dst, const float *src, long len);
|
||||||
|
void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
|
||||||
|
@@ -42,10 +45,11 @@ av_cold void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx
|
||||||
|
|
||||||
|
if (have_vfp(cpu_flags) && have_armv6(cpu_flags)) {
|
||||||
|
if (!have_vfpv3(cpu_flags)) {
|
||||||
|
- // This function doesn't use anything armv6 specific in itself,
|
||||||
|
+ // These functions don't use anything armv6 specific in themselves,
|
||||||
|
// but ff_float_to_int16_vfp which is in the same assembly source
|
||||||
|
// file does, thus the whole file requires armv6 to be built.
|
||||||
|
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_vfp;
|
||||||
|
+ c->int32_to_float_fmul_array8 = ff_int32_to_float_fmul_array8_vfp;
|
||||||
|
}
|
||||||
|
|
||||||
|
c->float_to_int16 = ff_float_to_int16_vfp;
|
||||||
|
diff --git a/libavcodec/arm/fmtconvert_vfp.S b/libavcodec/arm/fmtconvert_vfp.S
|
||||||
|
index 3cc3e56..a6d4ebd 100644
|
||||||
|
--- a/libavcodec/arm/fmtconvert_vfp.S
|
||||||
|
+++ b/libavcodec/arm/fmtconvert_vfp.S
|
||||||
|
@@ -83,6 +83,168 @@ endfunc
|
||||||
|
* Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned
|
||||||
|
* (16 bytes alignment is best for BCM2835), little-endian.
|
||||||
|
*/
|
||||||
|
+@ void ff_int32_to_float_fmul_array8_vfp(FmtConvertContext *c, float *dst, const int32_t *src, const float *mul, int len)
|
||||||
|
+function ff_int32_to_float_fmul_array8_vfp, export=1
|
||||||
|
+ push {lr}
|
||||||
|
+ ldr a1, [sp, #4]
|
||||||
|
+ subs lr, a1, #3*8
|
||||||
|
+ bcc 50f @ too short to pipeline
|
||||||
|
+ @ Now need to find (len / 8) % 3. The approximation
|
||||||
|
+ @ x / 24 = (x * 0xAB) >> 12
|
||||||
|
+ @ is good for x < 4096, which is true for both AC3 and DCA.
|
||||||
|
+ mov a1, #0xAB
|
||||||
|
+ ldr ip, =0x03070000 @ RunFast mode, short vectors of length 8, stride 1
|
||||||
|
+ mul a1, lr, a1
|
||||||
|
+ vpush {s16-s31}
|
||||||
|
+ mov a1, a1, lsr #12
|
||||||
|
+ add a1, a1, a1, lsl #1
|
||||||
|
+ rsb a1, a1, lr, lsr #3
|
||||||
|
+ cmp a1, #1
|
||||||
|
+ fmrx a1, FPSCR
|
||||||
|
+ fmxr FPSCR, ip
|
||||||
|
+ beq 11f
|
||||||
|
+ blo 10f
|
||||||
|
+ @ Array is (2 + multiple of 3) x 8 floats long
|
||||||
|
+ @ drop through...
|
||||||
|
+ vldmia a3!, {s16-s23}
|
||||||
|
+ vldmia a4!, {s2,s3}
|
||||||
|
+ vldmia a3!, {s24-s31}
|
||||||
|
+ vcvt.f32.s32 s16, s16
|
||||||
|
+ vcvt.f32.s32 s17, s17
|
||||||
|
+ vcvt.f32.s32 s18, s18
|
||||||
|
+ vcvt.f32.s32 s19, s19
|
||||||
|
+ vcvt.f32.s32 s20, s20
|
||||||
|
+ vcvt.f32.s32 s21, s21
|
||||||
|
+ vcvt.f32.s32 s22, s22
|
||||||
|
+ vcvt.f32.s32 s23, s23
|
||||||
|
+ vmul.f32 s16, s16, s2
|
||||||
|
+ @ drop through...
|
||||||
|
+3:
|
||||||
|
+ vldmia a3!, {s8-s15}
|
||||||
|
+ vldmia a4!, {s1}
|
||||||
|
+ vcvt.f32.s32 s24, s24
|
||||||
|
+ vcvt.f32.s32 s25, s25
|
||||||
|
+ vcvt.f32.s32 s26, s26
|
||||||
|
+ vcvt.f32.s32 s27, s27
|
||||||
|
+ vcvt.f32.s32 s28, s28
|
||||||
|
+ vcvt.f32.s32 s29, s29
|
||||||
|
+ vcvt.f32.s32 s30, s30
|
||||||
|
+ vcvt.f32.s32 s31, s31
|
||||||
|
+ vmul.f32 s24, s24, s3
|
||||||
|
+ vstmia a2!, {s16-s19}
|
||||||
|
+ vstmia a2!, {s20-s23}
|
||||||
|
+2:
|
||||||
|
+ vldmia a3!, {s16-s23}
|
||||||
|
+ vldmia a4!, {s2}
|
||||||
|
+ vcvt.f32.s32 s8, s8
|
||||||
|
+ vcvt.f32.s32 s9, s9
|
||||||
|
+ vcvt.f32.s32 s10, s10
|
||||||
|
+ vcvt.f32.s32 s11, s11
|
||||||
|
+ vcvt.f32.s32 s12, s12
|
||||||
|
+ vcvt.f32.s32 s13, s13
|
||||||
|
+ vcvt.f32.s32 s14, s14
|
||||||
|
+ vcvt.f32.s32 s15, s15
|
||||||
|
+ vmul.f32 s8, s8, s1
|
||||||
|
+ vstmia a2!, {s24-s27}
|
||||||
|
+ vstmia a2!, {s28-s31}
|
||||||
|
+1:
|
||||||
|
+ vldmia a3!, {s24-s31}
|
||||||
|
+ vldmia a4!, {s3}
|
||||||
|
+ vcvt.f32.s32 s16, s16
|
||||||
|
+ vcvt.f32.s32 s17, s17
|
||||||
|
+ vcvt.f32.s32 s18, s18
|
||||||
|
+ vcvt.f32.s32 s19, s19
|
||||||
|
+ vcvt.f32.s32 s20, s20
|
||||||
|
+ vcvt.f32.s32 s21, s21
|
||||||
|
+ vcvt.f32.s32 s22, s22
|
||||||
|
+ vcvt.f32.s32 s23, s23
|
||||||
|
+ vmul.f32 s16, s16, s2
|
||||||
|
+ vstmia a2!, {s8-s11}
|
||||||
|
+ vstmia a2!, {s12-s15}
|
||||||
|
+
|
||||||
|
+ subs lr, lr, #8*3
|
||||||
|
+ bpl 3b
|
||||||
|
+
|
||||||
|
+ vcvt.f32.s32 s24, s24
|
||||||
|
+ vcvt.f32.s32 s25, s25
|
||||||
|
+ vcvt.f32.s32 s26, s26
|
||||||
|
+ vcvt.f32.s32 s27, s27
|
||||||
|
+ vcvt.f32.s32 s28, s28
|
||||||
|
+ vcvt.f32.s32 s29, s29
|
||||||
|
+ vcvt.f32.s32 s30, s30
|
||||||
|
+ vcvt.f32.s32 s31, s31
|
||||||
|
+ vmul.f32 s24, s24, s3
|
||||||
|
+ vstmia a2!, {s16-s19}
|
||||||
|
+ vstmia a2!, {s20-s23}
|
||||||
|
+ vstmia a2!, {s24-s27}
|
||||||
|
+ vstmia a2!, {s28-s31}
|
||||||
|
+
|
||||||
|
+ fmxr FPSCR, a1
|
||||||
|
+ vpop {s16-s31}
|
||||||
|
+ pop {pc}
|
||||||
|
+
|
||||||
|
+10: @ Array is (multiple of 3) x 8 floats long
|
||||||
|
+ vldmia a3!, {s8-s15}
|
||||||
|
+ vldmia a4!, {s1,s2}
|
||||||
|
+ vldmia a3!, {s16-s23}
|
||||||
|
+ vcvt.f32.s32 s8, s8
|
||||||
|
+ vcvt.f32.s32 s9, s9
|
||||||
|
+ vcvt.f32.s32 s10, s10
|
||||||
|
+ vcvt.f32.s32 s11, s11
|
||||||
|
+ vcvt.f32.s32 s12, s12
|
||||||
|
+ vcvt.f32.s32 s13, s13
|
||||||
|
+ vcvt.f32.s32 s14, s14
|
||||||
|
+ vcvt.f32.s32 s15, s15
|
||||||
|
+ vmul.f32 s8, s8, s1
|
||||||
|
+ b 1b
|
||||||
|
+
|
||||||
|
+11: @ Array is (1 + multiple of 3) x 8 floats long
|
||||||
|
+ vldmia a3!, {s24-s31}
|
||||||
|
+ vldmia a4!, {s3}
|
||||||
|
+ vldmia a3!, {s8-s15}
|
||||||
|
+ vldmia a4!, {s1}
|
||||||
|
+ vcvt.f32.s32 s24, s24
|
||||||
|
+ vcvt.f32.s32 s25, s25
|
||||||
|
+ vcvt.f32.s32 s26, s26
|
||||||
|
+ vcvt.f32.s32 s27, s27
|
||||||
|
+ vcvt.f32.s32 s28, s28
|
||||||
|
+ vcvt.f32.s32 s29, s29
|
||||||
|
+ vcvt.f32.s32 s30, s30
|
||||||
|
+ vcvt.f32.s32 s31, s31
|
||||||
|
+ vmul.f32 s24, s24, s3
|
||||||
|
+ b 2b
|
||||||
|
+
|
||||||
|
+50:
|
||||||
|
+ ldr lr, =0x03070000 @ RunFast mode, short vectors of length 8, stride 1
|
||||||
|
+ fmrx ip, FPSCR
|
||||||
|
+ fmxr FPSCR, lr
|
||||||
|
+51:
|
||||||
|
+ vldmia a3!, {s8-s15}
|
||||||
|
+ vldmia a4!, {s0}
|
||||||
|
+ vcvt.f32.s32 s8, s8
|
||||||
|
+ vcvt.f32.s32 s9, s9
|
||||||
|
+ vcvt.f32.s32 s10, s10
|
||||||
|
+ vcvt.f32.s32 s11, s11
|
||||||
|
+ vcvt.f32.s32 s12, s12
|
||||||
|
+ vcvt.f32.s32 s13, s13
|
||||||
|
+ vcvt.f32.s32 s14, s14
|
||||||
|
+ vcvt.f32.s32 s15, s15
|
||||||
|
+ vmul.f32 s8, s8, s0
|
||||||
|
+ subs a1, a1, #8
|
||||||
|
+ vstmia a2!, {s8-s11}
|
||||||
|
+ vstmia a2!, {s12-s15}
|
||||||
|
+ bne 51b
|
||||||
|
+
|
||||||
|
+ fmxr FPSCR, ip
|
||||||
|
+ pop {pc}
|
||||||
|
+endfunc
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * ARM VFP optimised int32 to float conversion.
|
||||||
|
+ * Assume len is a multiple of 8, destination buffer is at least 4 bytes aligned
|
||||||
|
+ * (16 bytes alignment is best for BCM2835), little-endian.
|
||||||
|
+ * TODO: could be further optimised by unrolling and interleaving, as above
|
||||||
|
+ */
|
||||||
|
@ void ff_int32_to_float_fmul_scalar_vfp(float *dst, const int32_t *src, float mul, int len)
|
||||||
|
function ff_int32_to_float_fmul_scalar_vfp, export=1
|
||||||
|
VFP tmp .req a4
|
||||||
|
--
|
||||||
|
1.7.9.5
|
@ -0,0 +1,274 @@
|
|||||||
|
From 15520de67fc951213ab32661b8b368a9439e8b9a Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
|
||||||
|
Date: Fri, 19 Jul 2013 10:59:17 +0300
|
||||||
|
Subject: [PATCH 44/49] [ffmpeg] - backport - arm: Add VFP-accelerated version
|
||||||
|
of imdct_half
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Before After
|
||||||
|
Mean StdDev Mean StdDev Change
|
||||||
|
This function 2653.0 28.5 1108.8 51.4 +139.3%
|
||||||
|
Overall 17049.5 408.2 15973.0 223.2 +6.7%
|
||||||
|
|
||||||
|
Signed-off-by: Martin Storsjö <martin@martin.st>
|
||||||
|
---
|
||||||
|
libavcodec/arm/Makefile | 1 +
|
||||||
|
libavcodec/arm/fft_init_arm.c | 9 ++
|
||||||
|
libavcodec/arm/mdct_vfp.S | 205 ++++++++++++++++++++++++++++++
|
||||||
|
3 files changed, 215 insertions(+)
|
||||||
|
create mode 100644 libavcodec/arm/mdct_vfp.S
|
||||||
|
|
||||||
|
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
|
||||||
|
index aee9d73..27e80d5 100644
|
||||||
|
--- a/libavcodec/arm/Makefile
|
||||||
|
+++ b/libavcodec/arm/Makefile
|
||||||
|
@@ -59,6 +59,7 @@ ARMV6-OBJS += arm/dsputil_init_armv6.o \
|
||||||
|
arm/simple_idct_armv6.o \
|
||||||
|
|
||||||
|
VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_vfp.o
|
||||||
|
+VFP-OBJS-$(CONFIG_MDCT) += arm/mdct_vfp.o
|
||||||
|
VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o
|
||||||
|
|
||||||
|
NEON-OBJS-$(CONFIG_FFT) += arm/fft_neon.o \
|
||||||
|
diff --git a/libavcodec/arm/fft_init_arm.c b/libavcodec/arm/fft_init_arm.c
|
||||||
|
index fe0acc5..a000ea5 100644
|
||||||
|
--- a/libavcodec/arm/fft_init_arm.c
|
||||||
|
+++ b/libavcodec/arm/fft_init_arm.c
|
||||||
|
@@ -26,6 +26,8 @@
|
||||||
|
void ff_fft_permute_neon(FFTContext *s, FFTComplex *z);
|
||||||
|
void ff_fft_calc_neon(FFTContext *s, FFTComplex *z);
|
||||||
|
|
||||||
|
+void ff_imdct_half_vfp(FFTContext *s, FFTSample *output, const FFTSample *input);
|
||||||
|
+
|
||||||
|
void ff_imdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
|
||||||
|
void ff_imdct_half_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
|
||||||
|
void ff_mdct_calc_neon(FFTContext *s, FFTSample *output, const FFTSample *input);
|
||||||
|
@@ -48,6 +50,13 @@ av_cold void ff_fft_init_arm(FFTContext *s)
|
||||||
|
{
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
+ if (have_vfp(cpu_flags)) {
|
||||||
|
+#if CONFIG_MDCT
|
||||||
|
+ if (!have_vfpv3(cpu_flags))
|
||||||
|
+ s->imdct_half = ff_imdct_half_vfp;
|
||||||
|
+#endif
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (have_neon(cpu_flags)) {
|
||||||
|
#if CONFIG_FFT
|
||||||
|
s->fft_permute = ff_fft_permute_neon;
|
||||||
|
diff --git a/libavcodec/arm/mdct_vfp.S b/libavcodec/arm/mdct_vfp.S
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..0623e96
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/libavcodec/arm/mdct_vfp.S
|
||||||
|
@@ -0,0 +1,205 @@
|
||||||
|
+/*
|
||||||
|
+ * Copyright (c) 2013 RISC OS Open Ltd
|
||||||
|
+ * Author: Ben Avison <bavison@riscosopen.org>
|
||||||
|
+ *
|
||||||
|
+ * This file is part of Libav.
|
||||||
|
+ *
|
||||||
|
+ * Libav is free software; you can redistribute it and/or
|
||||||
|
+ * modify it under the terms of the GNU Lesser General Public
|
||||||
|
+ * License as published by the Free Software Foundation; either
|
||||||
|
+ * version 2.1 of the License, or (at your option) any later version.
|
||||||
|
+ *
|
||||||
|
+ * Libav is distributed in the hope that it will be useful,
|
||||||
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
+ * Lesser General Public License for more details.
|
||||||
|
+ *
|
||||||
|
+ * You should have received a copy of the GNU Lesser General Public
|
||||||
|
+ * License along with Libav; if not, write to the Free Software
|
||||||
|
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#include "libavutil/arm/asm.S"
|
||||||
|
+
|
||||||
|
+CONTEXT .req a1
|
||||||
|
+ORIGOUT .req a2
|
||||||
|
+IN .req a3
|
||||||
|
+OUT .req v1
|
||||||
|
+REVTAB .req v2
|
||||||
|
+TCOS .req v3
|
||||||
|
+TSIN .req v4
|
||||||
|
+OLDFPSCR .req v5
|
||||||
|
+J0 .req a2
|
||||||
|
+J1 .req a4
|
||||||
|
+J2 .req ip
|
||||||
|
+J3 .req lr
|
||||||
|
+
|
||||||
|
+.macro prerotation_innerloop
|
||||||
|
+ .set trig_lo, k
|
||||||
|
+ .set trig_hi, n4 - k - 2
|
||||||
|
+ .set in_lo, trig_lo * 2
|
||||||
|
+ .set in_hi, trig_hi * 2
|
||||||
|
+ vldr d8, [TCOS, #trig_lo*4] @ s16,s17
|
||||||
|
+ vldr d9, [TCOS, #trig_hi*4] @ s18,s19
|
||||||
|
+ vldr s0, [IN, #in_hi*4 + 12]
|
||||||
|
+ vldr s1, [IN, #in_hi*4 + 4]
|
||||||
|
+ vldr s2, [IN, #in_lo*4 + 12]
|
||||||
|
+ vldr s3, [IN, #in_lo*4 + 4]
|
||||||
|
+ vmul.f s8, s0, s16 @ vector operation
|
||||||
|
+ vldr d10, [TSIN, #trig_lo*4] @ s20,s21
|
||||||
|
+ vldr d11, [TSIN, #trig_hi*4] @ s22,s23
|
||||||
|
+ vldr s4, [IN, #in_lo*4]
|
||||||
|
+ vldr s5, [IN, #in_lo*4 + 8]
|
||||||
|
+ vldr s6, [IN, #in_hi*4]
|
||||||
|
+ vldr s7, [IN, #in_hi*4 + 8]
|
||||||
|
+ ldr J0, [REVTAB, #trig_lo*2]
|
||||||
|
+ vmul.f s12, s0, s20 @ vector operation
|
||||||
|
+ ldr J2, [REVTAB, #trig_hi*2]
|
||||||
|
+ mov J1, J0, lsr #16
|
||||||
|
+ and J0, J0, #255 @ halfword value will be < n4
|
||||||
|
+ vmls.f s8, s4, s20 @ vector operation
|
||||||
|
+ mov J3, J2, lsr #16
|
||||||
|
+ and J2, J2, #255 @ halfword value will be < n4
|
||||||
|
+ add J0, OUT, J0, lsl #3
|
||||||
|
+ vmla.f s12, s4, s16 @ vector operation
|
||||||
|
+ add J1, OUT, J1, lsl #3
|
||||||
|
+ add J2, OUT, J2, lsl #3
|
||||||
|
+ add J3, OUT, J3, lsl #3
|
||||||
|
+ vstr s8, [J0]
|
||||||
|
+ vstr s9, [J1]
|
||||||
|
+ vstr s10, [J2]
|
||||||
|
+ vstr s11, [J3]
|
||||||
|
+ vstr s12, [J0, #4]
|
||||||
|
+ vstr s13, [J1, #4]
|
||||||
|
+ vstr s14, [J2, #4]
|
||||||
|
+ vstr s15, [J3, #4]
|
||||||
|
+ .set k, k + 2
|
||||||
|
+.endm
|
||||||
|
+
|
||||||
|
+.macro postrotation_innerloop tail, head
|
||||||
|
+ .set trig_lo_head, n8 - k - 2
|
||||||
|
+ .set trig_hi_head, n8 + k
|
||||||
|
+ .set out_lo_head, trig_lo_head * 2
|
||||||
|
+ .set out_hi_head, trig_hi_head * 2
|
||||||
|
+ .set trig_lo_tail, n8 - (k - 2) - 2
|
||||||
|
+ .set trig_hi_tail, n8 + (k - 2)
|
||||||
|
+ .set out_lo_tail, trig_lo_tail * 2
|
||||||
|
+ .set out_hi_tail, trig_hi_tail * 2
|
||||||
|
+ .if (k & 2) == 0
|
||||||
|
+ TCOS_D0_HEAD .req d10 @ s20,s21
|
||||||
|
+ TCOS_D1_HEAD .req d11 @ s22,s23
|
||||||
|
+ TCOS_S0_TAIL .req s24
|
||||||
|
+ .else
|
||||||
|
+ TCOS_D0_HEAD .req d12 @ s24,s25
|
||||||
|
+ TCOS_D1_HEAD .req d13 @ s26,s27
|
||||||
|
+ TCOS_S0_TAIL .req s20
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\tail",""
|
||||||
|
+ vmls.f s8, s0, TCOS_S0_TAIL @ vector operation
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\head",""
|
||||||
|
+ vldr d8, [TSIN, #trig_lo_head*4] @ s16,s17
|
||||||
|
+ vldr d9, [TSIN, #trig_hi_head*4] @ s18,s19
|
||||||
|
+ vldr TCOS_D0_HEAD, [TCOS, #trig_lo_head*4]
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\tail",""
|
||||||
|
+ vmla.f s12, s4, TCOS_S0_TAIL @ vector operation
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\head",""
|
||||||
|
+ vldr s0, [OUT, #out_lo_head*4]
|
||||||
|
+ vldr s1, [OUT, #out_lo_head*4 + 8]
|
||||||
|
+ vldr s2, [OUT, #out_hi_head*4]
|
||||||
|
+ vldr s3, [OUT, #out_hi_head*4 + 8]
|
||||||
|
+ vldr s4, [OUT, #out_lo_head*4 + 4]
|
||||||
|
+ vldr s5, [OUT, #out_lo_head*4 + 12]
|
||||||
|
+ vldr s6, [OUT, #out_hi_head*4 + 4]
|
||||||
|
+ vldr s7, [OUT, #out_hi_head*4 + 12]
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\tail",""
|
||||||
|
+ vstr s8, [OUT, #out_lo_tail*4]
|
||||||
|
+ vstr s9, [OUT, #out_lo_tail*4 + 8]
|
||||||
|
+ vstr s10, [OUT, #out_hi_tail*4]
|
||||||
|
+ vstr s11, [OUT, #out_hi_tail*4 + 8]
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\head",""
|
||||||
|
+ vmul.f s8, s4, s16 @ vector operation
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\tail",""
|
||||||
|
+ vstr s12, [OUT, #out_hi_tail*4 + 12]
|
||||||
|
+ vstr s13, [OUT, #out_hi_tail*4 + 4]
|
||||||
|
+ vstr s14, [OUT, #out_lo_tail*4 + 12]
|
||||||
|
+ vstr s15, [OUT, #out_lo_tail*4 + 4]
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\head",""
|
||||||
|
+ vmul.f s12, s0, s16 @ vector operation
|
||||||
|
+ vldr TCOS_D1_HEAD, [TCOS, #trig_hi_head*4]
|
||||||
|
+ .endif
|
||||||
|
+ .unreq TCOS_D0_HEAD
|
||||||
|
+ .unreq TCOS_D1_HEAD
|
||||||
|
+ .unreq TCOS_S0_TAIL
|
||||||
|
+ .ifnc "\head",""
|
||||||
|
+ .set k, k + 2
|
||||||
|
+ .endif
|
||||||
|
+.endm
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+/* void ff_imdct_half_vfp(FFTContext *s,
|
||||||
|
+ * FFTSample *output,
|
||||||
|
+ * const FFTSample *input)
|
||||||
|
+ */
|
||||||
|
+function ff_imdct_half_vfp, export=1
|
||||||
|
+ ldr ip, [CONTEXT, #5*4] @ mdct_bits
|
||||||
|
+ teq ip, #6
|
||||||
|
+ it ne
|
||||||
|
+ bne ff_imdct_half_c @ only case currently accelerated is the one used by DCA
|
||||||
|
+
|
||||||
|
+ .set n, 1<<6
|
||||||
|
+ .set n2, n/2
|
||||||
|
+ .set n4, n/4
|
||||||
|
+ .set n8, n/8
|
||||||
|
+
|
||||||
|
+ push {v1-v5,lr}
|
||||||
|
+ vpush {s16-s27}
|
||||||
|
+ fmrx OLDFPSCR, FPSCR
|
||||||
|
+ ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
|
||||||
|
+ fmxr FPSCR, lr
|
||||||
|
+ mov OUT, ORIGOUT
|
||||||
|
+ ldr REVTAB, [CONTEXT, #2*4]
|
||||||
|
+ ldr TCOS, [CONTEXT, #6*4]
|
||||||
|
+ ldr TSIN, [CONTEXT, #7*4]
|
||||||
|
+
|
||||||
|
+ .set k, 0
|
||||||
|
+ .rept n8/2
|
||||||
|
+ prerotation_innerloop
|
||||||
|
+ .endr
|
||||||
|
+
|
||||||
|
+ fmxr FPSCR, OLDFPSCR
|
||||||
|
+ mov a1, OUT
|
||||||
|
+ bl ff_fft16_vfp
|
||||||
|
+ ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
|
||||||
|
+ fmxr FPSCR, lr
|
||||||
|
+
|
||||||
|
+ .set k, 0
|
||||||
|
+ postrotation_innerloop , head
|
||||||
|
+ .rept n8/2 - 1
|
||||||
|
+ postrotation_innerloop tail, head
|
||||||
|
+ .endr
|
||||||
|
+ postrotation_innerloop tail
|
||||||
|
+
|
||||||
|
+ fmxr FPSCR, OLDFPSCR
|
||||||
|
+ vpop {s16-s27}
|
||||||
|
+ pop {v1-v5,pc}
|
||||||
|
+endfunc
|
||||||
|
+
|
||||||
|
+ .unreq CONTEXT
|
||||||
|
+ .unreq ORIGOUT
|
||||||
|
+ .unreq IN
|
||||||
|
+ .unreq OUT
|
||||||
|
+ .unreq REVTAB
|
||||||
|
+ .unreq TCOS
|
||||||
|
+ .unreq TSIN
|
||||||
|
+ .unreq OLDFPSCR
|
||||||
|
+ .unreq J0
|
||||||
|
+ .unreq J1
|
||||||
|
+ .unreq J2
|
||||||
|
+ .unreq J3
|
||||||
|
--
|
||||||
|
1.7.9.5
|
@ -0,0 +1,58 @@
|
|||||||
|
From 8e0babd84c7e03cf678aab8bcf7e2106fe2b3de6 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
|
||||||
|
Date: Fri, 19 Jul 2013 11:03:32 +0300
|
||||||
|
Subject: [PATCH 45/49] [ffmpeg] - backport - arm: Add VFP-accelerated version
|
||||||
|
of dca_lfe_fir
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Before After
|
||||||
|
Mean StdDev Mean StdDev Change
|
||||||
|
This function 868.2 33.5 436.0 27.0 +99.1%
|
||||||
|
Overall 15973.0 223.2 15577.5 83.2 +2.5%
|
||||||
|
|
||||||
|
Signed-off-by: Martin Storsjö <martin@martin.st>
|
||||||
|
---
|
||||||
|
libavcodec/arm/Makefile | 3 ++-
|
||||||
|
libavcodec/arm/dcadsp_init_arm.c | 4 ++++
|
||||||
|
2 files changed, 6 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
|
||||||
|
index 27e80d5..7fe5bb5 100644
|
||||||
|
--- a/libavcodec/arm/Makefile
|
||||||
|
+++ b/libavcodec/arm/Makefile
|
||||||
|
@@ -58,7 +58,8 @@ ARMV6-OBJS += arm/dsputil_init_armv6.o \
|
||||||
|
arm/dsputil_armv6.o \
|
||||||
|
arm/simple_idct_armv6.o \
|
||||||
|
|
||||||
|
-VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/synth_filter_vfp.o
|
||||||
|
+VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \
|
||||||
|
+ arm/synth_filter_vfp.o
|
||||||
|
VFP-OBJS-$(CONFIG_MDCT) += arm/mdct_vfp.o
|
||||||
|
VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o
|
||||||
|
|
||||||
|
diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c
|
||||||
|
index 56568e0..a1efbff 100644
|
||||||
|
--- a/libavcodec/arm/dcadsp_init_arm.c
|
||||||
|
+++ b/libavcodec/arm/dcadsp_init_arm.c
|
||||||
|
@@ -24,6 +24,8 @@
|
||||||
|
#include "libavutil/attributes.h"
|
||||||
|
#include "libavcodec/dcadsp.h"
|
||||||
|
|
||||||
|
+void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs,
|
||||||
|
+ int decifactor, float scale);
|
||||||
|
void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs,
|
||||||
|
int decifactor, float scale);
|
||||||
|
|
||||||
|
@@ -31,6 +33,8 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
|
||||||
|
{
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
+ if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags))
|
||||||
|
+ s->lfe_fir = ff_dca_lfe_fir_vfp;
|
||||||
|
if (have_neon(cpu_flags))
|
||||||
|
s->lfe_fir = ff_dca_lfe_fir_neon;
|
||||||
|
}
|
||||||
|
--
|
||||||
|
1.7.9.5
|
@ -0,0 +1,339 @@
|
|||||||
|
From 018b74ea9d8f52788db18ed40838afca05e7b4df Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
|
||||||
|
Date: Fri, 19 Jul 2013 11:23:57 +0300
|
||||||
|
Subject: [PATCH 46/49] [ffmpeg] - backport - arm: Add VFP-accelerated version
|
||||||
|
of fft16
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Before After
|
||||||
|
Mean StdDev Mean StdDev Change
|
||||||
|
This function 1389.3 4.2 967.8 35.1 +43.6%
|
||||||
|
Overall 15577.5 83.2 15400.0 336.4 +1.2%
|
||||||
|
|
||||||
|
Signed-off-by: Martin Storsjö <martin@martin.st>
|
||||||
|
---
|
||||||
|
libavcodec/arm/Makefile | 1 +
|
||||||
|
libavcodec/arm/fft_vfp.S | 298 +++++++++++++++++++++++++++++++++++
|
||||||
|
2 files changed, 299 insertions(+)
|
||||||
|
create mode 100644 libavcodec/arm/fft_vfp.S
|
||||||
|
|
||||||
|
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
|
||||||
|
index 7fe5bb5..7390a8b 100644
|
||||||
|
--- a/libavcodec/arm/Makefile
|
||||||
|
+++ b/libavcodec/arm/Makefile
|
||||||
|
@@ -60,6 +60,7 @@ ARMV6-OBJS += arm/dsputil_init_armv6.o \
|
||||||
|
|
||||||
|
VFP-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_vfp.o \
|
||||||
|
arm/synth_filter_vfp.o
|
||||||
|
+VFP-OBJS-$(CONFIG_FFT) += arm/fft_vfp.o
|
||||||
|
VFP-OBJS-$(CONFIG_MDCT) += arm/mdct_vfp.o
|
||||||
|
VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o
|
||||||
|
|
||||||
|
diff --git a/libavcodec/arm/fft_vfp.S b/libavcodec/arm/fft_vfp.S
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..7845ebb
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/libavcodec/arm/fft_vfp.S
|
||||||
|
@@ -0,0 +1,298 @@
|
||||||
|
+/*
|
||||||
|
+ * Copyright (c) 2013 RISC OS Open Ltd
|
||||||
|
+ * Author: Ben Avison <bavison@riscosopen.org>
|
||||||
|
+ *
|
||||||
|
+ * This file is part of Libav.
|
||||||
|
+ *
|
||||||
|
+ * Libav is free software; you can redistribute it and/or
|
||||||
|
+ * modify it under the terms of the GNU Lesser General Public
|
||||||
|
+ * License as published by the Free Software Foundation; either
|
||||||
|
+ * version 2.1 of the License, or (at your option) any later version.
|
||||||
|
+ *
|
||||||
|
+ * Libav is distributed in the hope that it will be useful,
|
||||||
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
+ * Lesser General Public License for more details.
|
||||||
|
+ *
|
||||||
|
+ * You should have received a copy of the GNU Lesser General Public
|
||||||
|
+ * License along with Libav; if not, write to the Free Software
|
||||||
|
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#include "libavutil/arm/asm.S"
|
||||||
|
+
|
||||||
|
+@ TODO: * FFTs wider than 16
|
||||||
|
+@ * dispatch code
|
||||||
|
+
|
||||||
|
+function fft4_vfp
|
||||||
|
+ vldr d0, [a1, #0*2*4] @ s0,s1 = z[0]
|
||||||
|
+ vldr d4, [a1, #1*2*4] @ s8,s9 = z[1]
|
||||||
|
+ vldr d1, [a1, #2*2*4] @ s2,s3 = z[2]
|
||||||
|
+ vldr d5, [a1, #3*2*4] @ s10,s11 = z[3]
|
||||||
|
+ @ stall
|
||||||
|
+ vadd.f s12, s0, s8 @ i0
|
||||||
|
+ vadd.f s13, s1, s9 @ i1
|
||||||
|
+ vadd.f s14, s2, s10 @ i2
|
||||||
|
+ vadd.f s15, s3, s11 @ i3
|
||||||
|
+ vsub.f s8, s0, s8 @ i4
|
||||||
|
+ vsub.f s9, s1, s9 @ i5
|
||||||
|
+ vsub.f s10, s2, s10 @ i6
|
||||||
|
+ vsub.f s11, s3, s11 @ i7
|
||||||
|
+ @ stall
|
||||||
|
+ @ stall
|
||||||
|
+ vadd.f s0, s12, s14 @ z[0].re
|
||||||
|
+ vsub.f s4, s12, s14 @ z[2].re
|
||||||
|
+ vadd.f s1, s13, s15 @ z[0].im
|
||||||
|
+ vsub.f s5, s13, s15 @ z[2].im
|
||||||
|
+ vadd.f s7, s9, s10 @ z[3].im
|
||||||
|
+ vsub.f s3, s9, s10 @ z[1].im
|
||||||
|
+ vadd.f s2, s8, s11 @ z[1].re
|
||||||
|
+ vsub.f s6, s8, s11 @ z[3].re
|
||||||
|
+ @ stall
|
||||||
|
+ @ stall
|
||||||
|
+ vstr d0, [a1, #0*2*4]
|
||||||
|
+ vstr d2, [a1, #2*2*4]
|
||||||
|
+ @ stall
|
||||||
|
+ @ stall
|
||||||
|
+ vstr d1, [a1, #1*2*4]
|
||||||
|
+ vstr d3, [a1, #3*2*4]
|
||||||
|
+
|
||||||
|
+ bx lr
|
||||||
|
+endfunc
|
||||||
|
+
|
||||||
|
+.macro macro_fft8_head
|
||||||
|
+ @ FFT4
|
||||||
|
+ vldr d4, [a1, #0 * 2*4]
|
||||||
|
+ vldr d6, [a1, #1 * 2*4]
|
||||||
|
+ vldr d5, [a1, #2 * 2*4]
|
||||||
|
+ vldr d7, [a1, #3 * 2*4]
|
||||||
|
+ @ BF
|
||||||
|
+ vldr d12, [a1, #4 * 2*4]
|
||||||
|
+ vadd.f s16, s8, s12 @ vector op
|
||||||
|
+ vldr d14, [a1, #5 * 2*4]
|
||||||
|
+ vldr d13, [a1, #6 * 2*4]
|
||||||
|
+ vldr d15, [a1, #7 * 2*4]
|
||||||
|
+ vsub.f s20, s8, s12 @ vector op
|
||||||
|
+ vadd.f s0, s16, s18
|
||||||
|
+ vsub.f s2, s16, s18
|
||||||
|
+ vadd.f s1, s17, s19
|
||||||
|
+ vsub.f s3, s17, s19
|
||||||
|
+ vadd.f s7, s21, s22
|
||||||
|
+ vsub.f s5, s21, s22
|
||||||
|
+ vadd.f s4, s20, s23
|
||||||
|
+ vsub.f s6, s20, s23
|
||||||
|
+ vsub.f s20, s24, s28 @ vector op
|
||||||
|
+ vstr d0, [a1, #0 * 2*4] @ transfer s0-s7 to s24-s31 via memory
|
||||||
|
+ vstr d1, [a1, #1 * 2*4]
|
||||||
|
+ vldr s0, cos1pi4
|
||||||
|
+ vadd.f s16, s24, s28 @ vector op
|
||||||
|
+ vstr d2, [a1, #2 * 2*4]
|
||||||
|
+ vstr d3, [a1, #3 * 2*4]
|
||||||
|
+ vldr d12, [a1, #0 * 2*4]
|
||||||
|
+ @ TRANSFORM
|
||||||
|
+ vmul.f s20, s20, s0 @ vector x scalar op
|
||||||
|
+ vldr d13, [a1, #1 * 2*4]
|
||||||
|
+ vldr d14, [a1, #2 * 2*4]
|
||||||
|
+ vldr d15, [a1, #3 * 2*4]
|
||||||
|
+ @ BUTTERFLIES
|
||||||
|
+ vadd.f s0, s18, s16
|
||||||
|
+ vadd.f s1, s17, s19
|
||||||
|
+ vsub.f s2, s17, s19
|
||||||
|
+ vsub.f s3, s18, s16
|
||||||
|
+ vadd.f s4, s21, s20
|
||||||
|
+ vsub.f s5, s21, s20
|
||||||
|
+ vadd.f s6, s22, s23
|
||||||
|
+ vsub.f s7, s22, s23
|
||||||
|
+ vadd.f s8, s0, s24 @ vector op
|
||||||
|
+ vstr d0, [a1, #0 * 2*4] @ transfer s0-s3 to s12-s15 via memory
|
||||||
|
+ vstr d1, [a1, #1 * 2*4]
|
||||||
|
+ vldr d6, [a1, #0 * 2*4]
|
||||||
|
+ vldr d7, [a1, #1 * 2*4]
|
||||||
|
+ vadd.f s1, s5, s6
|
||||||
|
+ vadd.f s0, s7, s4
|
||||||
|
+ vsub.f s2, s5, s6
|
||||||
|
+ vsub.f s3, s7, s4
|
||||||
|
+ vsub.f s12, s24, s12 @ vector op
|
||||||
|
+ vsub.f s5, s29, s1
|
||||||
|
+ vsub.f s4, s28, s0
|
||||||
|
+ vsub.f s6, s30, s2
|
||||||
|
+ vsub.f s7, s31, s3
|
||||||
|
+ vadd.f s16, s0, s28 @ vector op
|
||||||
|
+ vstr d6, [a1, #4 * 2*4]
|
||||||
|
+ vstr d7, [a1, #6 * 2*4]
|
||||||
|
+ vstr d4, [a1, #0 * 2*4]
|
||||||
|
+ vstr d5, [a1, #2 * 2*4]
|
||||||
|
+ vstr d2, [a1, #5 * 2*4]
|
||||||
|
+ vstr d3, [a1, #7 * 2*4]
|
||||||
|
+.endm
|
||||||
|
+
|
||||||
|
+.macro macro_fft8_tail
|
||||||
|
+ vstr d8, [a1, #1 * 2*4]
|
||||||
|
+ vstr d9, [a1, #3 * 2*4]
|
||||||
|
+.endm
|
||||||
|
+
|
||||||
|
+function fft8_vfp
|
||||||
|
+ ldr a3, =0x03030000 @ RunFast mode, vector length 4, stride 1
|
||||||
|
+ fmrx a2, FPSCR
|
||||||
|
+ fmxr FPSCR, a3
|
||||||
|
+ vpush {s16-s31}
|
||||||
|
+
|
||||||
|
+ macro_fft8_head
|
||||||
|
+ macro_fft8_tail
|
||||||
|
+
|
||||||
|
+ vpop {s16-s31}
|
||||||
|
+ fmxr FPSCR, a2
|
||||||
|
+ bx lr
|
||||||
|
+endfunc
|
||||||
|
+
|
||||||
|
+.align 3
|
||||||
|
+cos1pi4: @ cos(1*pi/4) = sqrt(2)
|
||||||
|
+ .float 0.707106769084930419921875
|
||||||
|
+cos1pi8: @ cos(1*pi/8) = sqrt(2+sqrt(2))/2
|
||||||
|
+ .float 0.92387950420379638671875
|
||||||
|
+cos3pi8: @ cos(2*pi/8) = sqrt(2-sqrt(2))/2
|
||||||
|
+ .float 0.3826834261417388916015625
|
||||||
|
+
|
||||||
|
+function ff_fft16_vfp, export=1
|
||||||
|
+ ldr a3, =0x03030000 @ RunFast mode, vector length 4, stride 1
|
||||||
|
+ fmrx a2, FPSCR
|
||||||
|
+ fmxr FPSCR, a3
|
||||||
|
+ vpush {s16-s31}
|
||||||
|
+
|
||||||
|
+ macro_fft8_head
|
||||||
|
+ @ FFT4(z+8)
|
||||||
|
+ vldr d10, [a1, #8 * 2*4]
|
||||||
|
+ vldr d12, [a1, #9 * 2*4]
|
||||||
|
+ vldr d11, [a1, #10 * 2*4]
|
||||||
|
+ vldr d13, [a1, #11 * 2*4]
|
||||||
|
+ macro_fft8_tail
|
||||||
|
+ vadd.f s16, s20, s24 @ vector op
|
||||||
|
+ @ FFT4(z+12)
|
||||||
|
+ vldr d4, [a1, #12 * 2*4]
|
||||||
|
+ vldr d6, [a1, #13 * 2*4]
|
||||||
|
+ vldr d5, [a1, #14 * 2*4]
|
||||||
|
+ vsub.f s20, s20, s24 @ vector op
|
||||||
|
+ vldr d7, [a1, #15 * 2*4]
|
||||||
|
+ vadd.f s0, s16, s18
|
||||||
|
+ vsub.f s4, s16, s18
|
||||||
|
+ vadd.f s1, s17, s19
|
||||||
|
+ vsub.f s5, s17, s19
|
||||||
|
+ vadd.f s7, s21, s22
|
||||||
|
+ vsub.f s3, s21, s22
|
||||||
|
+ vadd.f s2, s20, s23
|
||||||
|
+ vsub.f s6, s20, s23
|
||||||
|
+ vadd.f s16, s8, s12 @ vector op
|
||||||
|
+ vstr d0, [a1, #8 * 2*4]
|
||||||
|
+ vstr d2, [a1, #10 * 2*4]
|
||||||
|
+ vstr d1, [a1, #9 * 2*4]
|
||||||
|
+ vsub.f s20, s8, s12
|
||||||
|
+ vstr d3, [a1, #11 * 2*4]
|
||||||
|
+ @ TRANSFORM(z[2],z[6],z[10],z[14],cos1pi4,cos1pi4)
|
||||||
|
+ vldr d12, [a1, #10 * 2*4]
|
||||||
|
+ vadd.f s0, s16, s18
|
||||||
|
+ vadd.f s1, s17, s19
|
||||||
|
+ vsub.f s6, s16, s18
|
||||||
|
+ vsub.f s7, s17, s19
|
||||||
|
+ vsub.f s3, s21, s22
|
||||||
|
+ vadd.f s2, s20, s23
|
||||||
|
+ vadd.f s5, s21, s22
|
||||||
|
+ vsub.f s4, s20, s23
|
||||||
|
+ vstr d0, [a1, #12 * 2*4]
|
||||||
|
+ vmov s0, s6
|
||||||
|
+ @ TRANSFORM(z[1],z[5],z[9],z[13],cos1pi8,cos3pi8)
|
||||||
|
+ vldr d6, [a1, #9 * 2*4]
|
||||||
|
+ vstr d1, [a1, #13 * 2*4]
|
||||||
|
+ vldr d1, cos1pi4 @ s2 = cos1pi4, s3 = cos1pi8
|
||||||
|
+ vstr d2, [a1, #15 * 2*4]
|
||||||
|
+ vldr d7, [a1, #13 * 2*4]
|
||||||
|
+ vadd.f s4, s25, s24
|
||||||
|
+ vsub.f s5, s25, s24
|
||||||
|
+ vsub.f s6, s0, s7
|
||||||
|
+ vadd.f s7, s0, s7
|
||||||
|
+ vmul.f s20, s12, s3 @ vector op
|
||||||
|
+ @ TRANSFORM(z[3],z[7],z[11],z[15],cos3pi8,cos1pi8)
|
||||||
|
+ vldr d4, [a1, #11 * 2*4]
|
||||||
|
+ vldr d5, [a1, #15 * 2*4]
|
||||||
|
+ vldr s1, cos3pi8
|
||||||
|
+ vmul.f s24, s4, s2 @ vector * scalar op
|
||||||
|
+ vmul.f s28, s12, s1 @ vector * scalar op
|
||||||
|
+ vmul.f s12, s8, s1 @ vector * scalar op
|
||||||
|
+ vadd.f s4, s20, s29
|
||||||
|
+ vsub.f s5, s21, s28
|
||||||
|
+ vsub.f s6, s22, s31
|
||||||
|
+ vadd.f s7, s23, s30
|
||||||
|
+ vmul.f s8, s8, s3 @ vector * scalar op
|
||||||
|
+ vldr d8, [a1, #1 * 2*4]
|
||||||
|
+ vldr d9, [a1, #5 * 2*4]
|
||||||
|
+ vldr d10, [a1, #3 * 2*4]
|
||||||
|
+ vldr d11, [a1, #7 * 2*4]
|
||||||
|
+ vldr d14, [a1, #2 * 2*4]
|
||||||
|
+ vadd.f s0, s6, s4
|
||||||
|
+ vadd.f s1, s5, s7
|
||||||
|
+ vsub.f s2, s5, s7
|
||||||
|
+ vsub.f s3, s6, s4
|
||||||
|
+ vadd.f s4, s12, s9
|
||||||
|
+ vsub.f s5, s13, s8
|
||||||
|
+ vsub.f s6, s14, s11
|
||||||
|
+ vadd.f s7, s15, s10
|
||||||
|
+ vadd.f s12, s0, s16 @ vector op
|
||||||
|
+ vstr d0, [a1, #1 * 2*4]
|
||||||
|
+ vstr d1, [a1, #5 * 2*4]
|
||||||
|
+ vldr d4, [a1, #1 * 2*4]
|
||||||
|
+ vldr d5, [a1, #5 * 2*4]
|
||||||
|
+ vadd.f s0, s6, s4
|
||||||
|
+ vadd.f s1, s5, s7
|
||||||
|
+ vsub.f s2, s5, s7
|
||||||
|
+ vsub.f s3, s6, s4
|
||||||
|
+ vsub.f s8, s16, s8 @ vector op
|
||||||
|
+ vstr d6, [a1, #1 * 2*4]
|
||||||
|
+ vstr d7, [a1, #5 * 2*4]
|
||||||
|
+ vldr d15, [a1, #6 * 2*4]
|
||||||
|
+ vsub.f s4, s20, s0
|
||||||
|
+ vsub.f s5, s21, s1
|
||||||
|
+ vsub.f s6, s22, s2
|
||||||
|
+ vsub.f s7, s23, s3
|
||||||
|
+ vadd.f s20, s0, s20 @ vector op
|
||||||
|
+ vstr d4, [a1, #9 * 2*4]
|
||||||
|
+ @ TRANSFORM_ZERO(z[0],z[4],z[8],z[12])
|
||||||
|
+ vldr d6, [a1, #8 * 2*4]
|
||||||
|
+ vstr d5, [a1, #13 * 2*4]
|
||||||
|
+ vldr d7, [a1, #12 * 2*4]
|
||||||
|
+ vstr d2, [a1, #11 * 2*4]
|
||||||
|
+ vldr d8, [a1, #0 * 2*4]
|
||||||
|
+ vstr d3, [a1, #15 * 2*4]
|
||||||
|
+ vldr d9, [a1, #4 * 2*4]
|
||||||
|
+ vadd.f s0, s26, s24
|
||||||
|
+ vadd.f s1, s25, s27
|
||||||
|
+ vsub.f s2, s25, s27
|
||||||
|
+ vsub.f s3, s26, s24
|
||||||
|
+ vadd.f s4, s14, s12
|
||||||
|
+ vadd.f s5, s13, s15
|
||||||
|
+ vsub.f s6, s13, s15
|
||||||
|
+ vsub.f s7, s14, s12
|
||||||
|
+ vadd.f s8, s0, s28 @ vector op
|
||||||
|
+ vstr d0, [a1, #3 * 2*4]
|
||||||
|
+ vstr d1, [a1, #7 * 2*4]
|
||||||
|
+ vldr d6, [a1, #3 * 2*4]
|
||||||
|
+ vldr d7, [a1, #7 * 2*4]
|
||||||
|
+ vsub.f s0, s16, s4
|
||||||
|
+ vsub.f s1, s17, s5
|
||||||
|
+ vsub.f s2, s18, s6
|
||||||
|
+ vsub.f s3, s19, s7
|
||||||
|
+ vsub.f s12, s28, s12 @ vector op
|
||||||
|
+ vadd.f s16, s4, s16 @ vector op
|
||||||
|
+ vstr d10, [a1, #3 * 2*4]
|
||||||
|
+ vstr d11, [a1, #7 * 2*4]
|
||||||
|
+ vstr d4, [a1, #2 * 2*4]
|
||||||
|
+ vstr d5, [a1, #6 * 2*4]
|
||||||
|
+ vstr d0, [a1, #8 * 2*4]
|
||||||
|
+ vstr d1, [a1, #12 * 2*4]
|
||||||
|
+ vstr d6, [a1, #10 * 2*4]
|
||||||
|
+ vstr d7, [a1, #14 * 2*4]
|
||||||
|
+ vstr d8, [a1, #0 * 2*4]
|
||||||
|
+ vstr d9, [a1, #4 * 2*4]
|
||||||
|
+
|
||||||
|
+ vpop {s16-s31}
|
||||||
|
+ fmxr FPSCR, a2
|
||||||
|
+ bx lr
|
||||||
|
+endfunc
|
||||||
|
--
|
||||||
|
1.7.9.5
|
@ -0,0 +1,140 @@
|
|||||||
|
From ed16009b0a05fbd344832d5ad2e982c169aec42c Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ben Avison <bavison@riscosopen.org>
|
||||||
|
Date: Mon, 15 Jul 2013 18:28:16 +0100
|
||||||
|
Subject: [PATCH 47/49] [ffmpeg] - backport - dcadsp: Add a new method,
|
||||||
|
qmf_32_subbands
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
This does most of the work formerly carried out by
|
||||||
|
the static function qmf_32_subbands() in dcadec.c.
|
||||||
|
|
||||||
|
Signed-off-by: Martin Storsjö <martin@martin.st>
|
||||||
|
---
|
||||||
|
libavcodec/dcadec.c | 26 +++++---------------------
|
||||||
|
libavcodec/dcadsp.c | 30 ++++++++++++++++++++++++++++++
|
||||||
|
libavcodec/dcadsp.h | 9 +++++++++
|
||||||
|
3 files changed, 44 insertions(+), 21 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c
|
||||||
|
index b648613..4054d63 100644
|
||||||
|
--- a/libavcodec/dcadec.c
|
||||||
|
+++ b/libavcodec/dcadec.c
|
||||||
|
@@ -1108,10 +1108,8 @@ static void qmf_32_subbands(DCAContext *s, int chans,
|
||||||
|
float scale)
|
||||||
|
{
|
||||||
|
const float *prCoeff;
|
||||||
|
- int i;
|
||||||
|
|
||||||
|
int sb_act = s->subband_activity[chans];
|
||||||
|
- int subindex;
|
||||||
|
|
||||||
|
scale *= sqrt(1 / 8.0);
|
||||||
|
|
||||||
|
@@ -1121,25 +1119,11 @@ static void qmf_32_subbands(DCAContext *s, int chans,
|
||||||
|
else /* Perfect reconstruction */
|
||||||
|
prCoeff = fir_32bands_perfect;
|
||||||
|
|
||||||
|
- for (i = sb_act; i < 32; i++)
|
||||||
|
- s->raXin[i] = 0.0;
|
||||||
|
-
|
||||||
|
- /* Reconstructed channel sample index */
|
||||||
|
- for (subindex = 0; subindex < 8; subindex++) {
|
||||||
|
- /* Load in one sample from each subband and clear inactive subbands */
|
||||||
|
- for (i = 0; i < sb_act; i++) {
|
||||||
|
- unsigned sign = (i - 1) & 2;
|
||||||
|
- uint32_t v = AV_RN32A(&samples_in[i][subindex]) ^ sign << 30;
|
||||||
|
- AV_WN32A(&s->raXin[i], v);
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- s->synth.synth_filter_float(&s->imdct,
|
||||||
|
- s->subband_fir_hist[chans],
|
||||||
|
- &s->hist_index[chans],
|
||||||
|
- s->subband_fir_noidea[chans], prCoeff,
|
||||||
|
- samples_out, s->raXin, scale);
|
||||||
|
- samples_out += 32;
|
||||||
|
- }
|
||||||
|
+ s->dcadsp.qmf_32_subbands(samples_in, sb_act, &s->synth, &s->imdct,
|
||||||
|
+ s->subband_fir_hist[chans],
|
||||||
|
+ &s->hist_index[chans],
|
||||||
|
+ s->subband_fir_noidea[chans], prCoeff,
|
||||||
|
+ samples_out, s->raXin, scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void lfe_interpolation_fir(DCAContext *s, int decimation_select,
|
||||||
|
diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c
|
||||||
|
index dd4994d..ab63f1b 100644
|
||||||
|
--- a/libavcodec/dcadsp.c
|
||||||
|
+++ b/libavcodec/dcadsp.c
|
||||||
|
@@ -20,6 +20,7 @@
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
+#include "libavutil/intreadwrite.h"
|
||||||
|
#include "dcadsp.h"
|
||||||
|
|
||||||
|
static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
|
||||||
|
@@ -44,8 +45,37 @@ static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void dca_qmf_32_subbands(float samples_in[32][8], int sb_act,
|
||||||
|
+ SynthFilterContext *synth, FFTContext *imdct,
|
||||||
|
+ float synth_buf_ptr[512],
|
||||||
|
+ int *synth_buf_offset, float synth_buf2[32],
|
||||||
|
+ const float window[512], float *samples_out,
|
||||||
|
+ float raXin[32], float scale)
|
||||||
|
+{
|
||||||
|
+ int i;
|
||||||
|
+ int subindex;
|
||||||
|
+
|
||||||
|
+ for (i = sb_act; i < 32; i++)
|
||||||
|
+ raXin[i] = 0.0;
|
||||||
|
+
|
||||||
|
+ /* Reconstructed channel sample index */
|
||||||
|
+ for (subindex = 0; subindex < 8; subindex++) {
|
||||||
|
+ /* Load in one sample from each subband and clear inactive subbands */
|
||||||
|
+ for (i = 0; i < sb_act; i++) {
|
||||||
|
+ unsigned sign = (i - 1) & 2;
|
||||||
|
+ uint32_t v = AV_RN32A(&samples_in[i][subindex]) ^ sign << 30;
|
||||||
|
+ AV_WN32A(&raXin[i], v);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ synth->synth_filter_float(imdct, synth_buf_ptr, synth_buf_offset,
|
||||||
|
+ synth_buf2, window, samples_out, raXin, scale);
|
||||||
|
+ samples_out += 32;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
void ff_dcadsp_init(DCADSPContext *s)
|
||||||
|
{
|
||||||
|
s->lfe_fir = dca_lfe_fir_c;
|
||||||
|
+ s->qmf_32_subbands = dca_qmf_32_subbands;
|
||||||
|
if (ARCH_ARM) ff_dcadsp_init_arm(s);
|
||||||
|
}
|
||||||
|
diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h
|
||||||
|
index bb157f7..d86c1f3 100644
|
||||||
|
--- a/libavcodec/dcadsp.h
|
||||||
|
+++ b/libavcodec/dcadsp.h
|
||||||
|
@@ -19,9 +19,18 @@
|
||||||
|
#ifndef AVCODEC_DCADSP_H
|
||||||
|
#define AVCODEC_DCADSP_H
|
||||||
|
|
||||||
|
+#include "avfft.h"
|
||||||
|
+#include "synth_filter.h"
|
||||||
|
+
|
||||||
|
typedef struct DCADSPContext {
|
||||||
|
void (*lfe_fir)(float *out, const float *in, const float *coefs,
|
||||||
|
int decifactor, float scale);
|
||||||
|
+ void (*qmf_32_subbands)(float samples_in[32][8], int sb_act,
|
||||||
|
+ SynthFilterContext *synth, FFTContext *imdct,
|
||||||
|
+ float synth_buf_ptr[512],
|
||||||
|
+ int *synth_buf_offset, float synth_buf2[32],
|
||||||
|
+ const float window[512], float *samples_out,
|
||||||
|
+ float raXin[32], float scale);
|
||||||
|
} DCADSPContext;
|
||||||
|
|
||||||
|
void ff_dcadsp_init(DCADSPContext *s);
|
||||||
|
--
|
||||||
|
1.7.9.5
|
@ -0,0 +1,551 @@
|
|||||||
|
From a6c273927c5bb212e806be6ae10c81dcd81b2152 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ben Avison <bavison@riscosopen.org>
|
||||||
|
Date: Mon, 15 Jul 2013 18:28:17 +0100
|
||||||
|
Subject: [PATCH 48/49] [ffmpeg] - backport - arm: Add VFP-accelerated version
|
||||||
|
of qmf_32_subbands
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Before After
|
||||||
|
Mean StdDev Mean StdDev Change
|
||||||
|
This function 1323.0 98.0 746.2 60.6 +77.3%
|
||||||
|
Overall 15400.0 336.4 14147.5 288.4 +8.9%
|
||||||
|
|
||||||
|
Signed-off-by: Martin Storsjö <martin@martin.st>
|
||||||
|
---
|
||||||
|
libavcodec/arm/dcadsp_init_arm.c | 10 +-
|
||||||
|
libavcodec/arm/dcadsp_vfp.S | 493 +++++++++++++++++++++++++++
|
||||||
|
2 files changed, 502 insertions(+), 1 deletion(-)
|
||||||
|
create mode 100644 libavcodec/arm/dcadsp_vfp.S
|
||||||
|
|
||||||
|
diff --git a/libavcodec/arm/dcadsp_init_arm.c b/libavcodec/arm/dcadsp_init_arm.c
|
||||||
|
index a1efbff..58267a2 100644
|
||||||
|
--- a/libavcodec/arm/dcadsp_init_arm.c
|
||||||
|
+++ b/libavcodec/arm/dcadsp_init_arm.c
|
||||||
|
@@ -26,6 +26,12 @@
|
||||||
|
|
||||||
|
void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs,
|
||||||
|
int decifactor, float scale);
|
||||||
|
+void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act,
|
||||||
|
+ SynthFilterContext *synth, FFTContext *imdct,
|
||||||
|
+ float synth_buf_ptr[512],
|
||||||
|
+ int *synth_buf_offset, float synth_buf2[32],
|
||||||
|
+ const float window[512], float *samples_out,
|
||||||
|
+ float raXin[32], float scale);
|
||||||
|
void ff_dca_lfe_fir_neon(float *out, const float *in, const float *coefs,
|
||||||
|
int decifactor, float scale);
|
||||||
|
|
||||||
|
@@ -33,8 +39,10 @@ av_cold void ff_dcadsp_init_arm(DCADSPContext *s)
|
||||||
|
{
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
|
||||||
|
- if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags))
|
||||||
|
+ if (have_vfp(cpu_flags) && !have_vfpv3(cpu_flags)) {
|
||||||
|
s->lfe_fir = ff_dca_lfe_fir_vfp;
|
||||||
|
+ s->qmf_32_subbands = ff_dca_qmf_32_subbands_vfp;
|
||||||
|
+ }
|
||||||
|
if (have_neon(cpu_flags))
|
||||||
|
s->lfe_fir = ff_dca_lfe_fir_neon;
|
||||||
|
}
|
||||||
|
diff --git a/libavcodec/arm/dcadsp_vfp.S b/libavcodec/arm/dcadsp_vfp.S
|
||||||
|
new file mode 100644
|
||||||
|
index 0000000..6039e87
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/libavcodec/arm/dcadsp_vfp.S
|
||||||
|
@@ -0,0 +1,493 @@
|
||||||
|
+/*
|
||||||
|
+ * Copyright (c) 2013 RISC OS Open Ltd
|
||||||
|
+ * Author: Ben Avison <bavison@riscosopen.org>
|
||||||
|
+ *
|
||||||
|
+ * This file is part of Libav.
|
||||||
|
+ *
|
||||||
|
+ * Libav is free software; you can redistribute it and/or
|
||||||
|
+ * modify it under the terms of the GNU Lesser General Public
|
||||||
|
+ * License as published by the Free Software Foundation; either
|
||||||
|
+ * version 2.1 of the License, or (at your option) any later version.
|
||||||
|
+ *
|
||||||
|
+ * Libav is distributed in the hope that it will be useful,
|
||||||
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
+ * Lesser General Public License for more details.
|
||||||
|
+ *
|
||||||
|
+ * You should have received a copy of the GNU Lesser General Public
|
||||||
|
+ * License along with Libav; if not, write to the Free Software
|
||||||
|
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#include "libavutil/arm/asm.S"
|
||||||
|
+
|
||||||
|
+POUT .req a1
|
||||||
|
+PIN .req a2
|
||||||
|
+PCOEF .req a3
|
||||||
|
+DECIFACTOR .req a4
|
||||||
|
+OLDFPSCR .req a4
|
||||||
|
+COUNTER .req ip
|
||||||
|
+
|
||||||
|
+SCALE32 .req s28 @ use vector of 4 in place of 9th scalar when decifactor=32 / JMAX=8
|
||||||
|
+SCALE64 .req s0 @ spare register in scalar bank when decifactor=64 / JMAX=4
|
||||||
|
+IN0 .req s4
|
||||||
|
+IN1 .req s5
|
||||||
|
+IN2 .req s6
|
||||||
|
+IN3 .req s7
|
||||||
|
+IN4 .req s0
|
||||||
|
+IN5 .req s1
|
||||||
|
+IN6 .req s2
|
||||||
|
+IN7 .req s3
|
||||||
|
+COEF0 .req s8 @ coefficient elements
|
||||||
|
+COEF1 .req s9
|
||||||
|
+COEF2 .req s10
|
||||||
|
+COEF3 .req s11
|
||||||
|
+COEF4 .req s12
|
||||||
|
+COEF5 .req s13
|
||||||
|
+COEF6 .req s14
|
||||||
|
+COEF7 .req s15
|
||||||
|
+ACCUM0 .req s16 @ double-buffered multiply-accumulate results
|
||||||
|
+ACCUM4 .req s20
|
||||||
|
+POST0 .req s24 @ do long-latency post-multiply in this vector in parallel
|
||||||
|
+POST1 .req s25
|
||||||
|
+POST2 .req s26
|
||||||
|
+POST3 .req s27
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+.macro inner_loop decifactor, dir, tail, head
|
||||||
|
+ .ifc "\dir","up"
|
||||||
|
+ .set X, 0
|
||||||
|
+ .set Y, 4
|
||||||
|
+ .else
|
||||||
|
+ .set X, 4*JMAX*4 - 4
|
||||||
|
+ .set Y, -4
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\head",""
|
||||||
|
+ vldr COEF0, [PCOEF, #X + (0*JMAX + 0) * Y]
|
||||||
|
+ vldr COEF1, [PCOEF, #X + (1*JMAX + 0) * Y]
|
||||||
|
+ vldr COEF2, [PCOEF, #X + (2*JMAX + 0) * Y]
|
||||||
|
+ vldr COEF3, [PCOEF, #X + (3*JMAX + 0) * Y]
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\tail",""
|
||||||
|
+ vadd.f POST0, ACCUM0, ACCUM4 @ vector operation
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\head",""
|
||||||
|
+ vmul.f ACCUM0, COEF0, IN0 @ vector = vector * scalar
|
||||||
|
+ vldr COEF4, [PCOEF, #X + (0*JMAX + 1) * Y]
|
||||||
|
+ vldr COEF5, [PCOEF, #X + (1*JMAX + 1) * Y]
|
||||||
|
+ vldr COEF6, [PCOEF, #X + (2*JMAX + 1) * Y]
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\tail",""
|
||||||
|
+ vmul.f POST0, POST0, SCALE\decifactor @ vector operation (SCALE may be scalar)
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\head",""
|
||||||
|
+ vldr COEF7, [PCOEF, #X + (3*JMAX + 1) * Y]
|
||||||
|
+ .ifc "\tail",""
|
||||||
|
+ vmul.f ACCUM4, COEF4, IN1 @ vector operation
|
||||||
|
+ .endif
|
||||||
|
+ vldr COEF0, [PCOEF, #X + (0*JMAX + 2) * Y]
|
||||||
|
+ vldr COEF1, [PCOEF, #X + (1*JMAX + 2) * Y]
|
||||||
|
+ .ifnc "\tail",""
|
||||||
|
+ vmul.f ACCUM4, COEF4, IN1 @ vector operation
|
||||||
|
+ .endif
|
||||||
|
+ vldr COEF2, [PCOEF, #X + (2*JMAX + 2) * Y]
|
||||||
|
+ vldr COEF3, [PCOEF, #X + (3*JMAX + 2) * Y]
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\tail",""
|
||||||
|
+ vstmia POUT!, {POST0-POST3}
|
||||||
|
+ .endif
|
||||||
|
+ .ifnc "\head",""
|
||||||
|
+ vmla.f ACCUM0, COEF0, IN2 @ vector = vector * scalar
|
||||||
|
+ vldr COEF4, [PCOEF, #X + (0*JMAX + 3) * Y]
|
||||||
|
+ vldr COEF5, [PCOEF, #X + (1*JMAX + 3) * Y]
|
||||||
|
+ vldr COEF6, [PCOEF, #X + (2*JMAX + 3) * Y]
|
||||||
|
+ vldr COEF7, [PCOEF, #X + (3*JMAX + 3) * Y]
|
||||||
|
+ vmla.f ACCUM4, COEF4, IN3 @ vector = vector * scalar
|
||||||
|
+ .if \decifactor == 32
|
||||||
|
+ vldr COEF0, [PCOEF, #X + (0*JMAX + 4) * Y]
|
||||||
|
+ vldr COEF1, [PCOEF, #X + (1*JMAX + 4) * Y]
|
||||||
|
+ vldr COEF2, [PCOEF, #X + (2*JMAX + 4) * Y]
|
||||||
|
+ vldr COEF3, [PCOEF, #X + (3*JMAX + 4) * Y]
|
||||||
|
+ vmla.f ACCUM0, COEF0, IN4 @ vector = vector * scalar
|
||||||
|
+ vldr COEF4, [PCOEF, #X + (0*JMAX + 5) * Y]
|
||||||
|
+ vldr COEF5, [PCOEF, #X + (1*JMAX + 5) * Y]
|
||||||
|
+ vldr COEF6, [PCOEF, #X + (2*JMAX + 5) * Y]
|
||||||
|
+ vldr COEF7, [PCOEF, #X + (3*JMAX + 5) * Y]
|
||||||
|
+ vmla.f ACCUM4, COEF4, IN5 @ vector = vector * scalar
|
||||||
|
+ vldr COEF0, [PCOEF, #X + (0*JMAX + 6) * Y]
|
||||||
|
+ vldr COEF1, [PCOEF, #X + (1*JMAX + 6) * Y]
|
||||||
|
+ vldr COEF2, [PCOEF, #X + (2*JMAX + 6) * Y]
|
||||||
|
+ vldr COEF3, [PCOEF, #X + (3*JMAX + 6) * Y]
|
||||||
|
+ vmla.f ACCUM0, COEF0, IN6 @ vector = vector * scalar
|
||||||
|
+ vldr COEF4, [PCOEF, #X + (0*JMAX + 7) * Y]
|
||||||
|
+ vldr COEF5, [PCOEF, #X + (1*JMAX + 7) * Y]
|
||||||
|
+ vldr COEF6, [PCOEF, #X + (2*JMAX + 7) * Y]
|
||||||
|
+ vldr COEF7, [PCOEF, #X + (3*JMAX + 7) * Y]
|
||||||
|
+ vmla.f ACCUM4, COEF4, IN7 @ vector = vector * scalar
|
||||||
|
+ .endif
|
||||||
|
+ .endif
|
||||||
|
+.endm
|
||||||
|
+
|
||||||
|
+.macro dca_lfe_fir decifactor
|
||||||
|
+ .if \decifactor == 32
|
||||||
|
+ .set JMAX, 8
|
||||||
|
+ vpush {s16-s31}
|
||||||
|
+ vmov SCALE32, s0 @ duplicate scalar across vector
|
||||||
|
+ vldr IN4, [PIN, #-4*4]
|
||||||
|
+ vldr IN5, [PIN, #-5*4]
|
||||||
|
+ vldr IN6, [PIN, #-6*4]
|
||||||
|
+ vldr IN7, [PIN, #-7*4]
|
||||||
|
+ .else
|
||||||
|
+ .set JMAX, 4
|
||||||
|
+ vpush {s16-s27}
|
||||||
|
+ .endif
|
||||||
|
+
|
||||||
|
+ mov COUNTER, #\decifactor/4 - 1
|
||||||
|
+ inner_loop \decifactor, up,, head
|
||||||
|
+1: add PCOEF, PCOEF, #4*JMAX*4
|
||||||
|
+ subs COUNTER, COUNTER, #1
|
||||||
|
+ inner_loop \decifactor, up, tail, head
|
||||||
|
+ bne 1b
|
||||||
|
+ inner_loop \decifactor, up, tail
|
||||||
|
+
|
||||||
|
+ mov COUNTER, #\decifactor/4 - 1
|
||||||
|
+ inner_loop \decifactor, down,, head
|
||||||
|
+1: sub PCOEF, PCOEF, #4*JMAX*4
|
||||||
|
+ subs COUNTER, COUNTER, #1
|
||||||
|
+ inner_loop \decifactor, down, tail, head
|
||||||
|
+ bne 1b
|
||||||
|
+ inner_loop \decifactor, down, tail
|
||||||
|
+
|
||||||
|
+ .if \decifactor == 32
|
||||||
|
+ vpop {s16-s31}
|
||||||
|
+ .else
|
||||||
|
+ vpop {s16-s27}
|
||||||
|
+ .endif
|
||||||
|
+ fmxr FPSCR, OLDFPSCR
|
||||||
|
+ bx lr
|
||||||
|
+.endm
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+/* void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs,
|
||||||
|
+ * int decifactor, float scale)
|
||||||
|
+ */
|
||||||
|
+function ff_dca_lfe_fir_vfp, export=1
|
||||||
|
+ teq DECIFACTOR, #32
|
||||||
|
+ fmrx OLDFPSCR, FPSCR
|
||||||
|
+ ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
|
||||||
|
+ fmxr FPSCR, ip
|
||||||
|
+NOVFP vldr s0, [sp]
|
||||||
|
+ vldr IN0, [PIN, #-0*4]
|
||||||
|
+ vldr IN1, [PIN, #-1*4]
|
||||||
|
+ vldr IN2, [PIN, #-2*4]
|
||||||
|
+ vldr IN3, [PIN, #-3*4]
|
||||||
|
+ beq 32f
|
||||||
|
+64: dca_lfe_fir 64
|
||||||
|
+ .ltorg
|
||||||
|
+32: dca_lfe_fir 32
|
||||||
|
+endfunc
|
||||||
|
+
|
||||||
|
+ .unreq POUT
|
||||||
|
+ .unreq PIN
|
||||||
|
+ .unreq PCOEF
|
||||||
|
+ .unreq DECIFACTOR
|
||||||
|
+ .unreq OLDFPSCR
|
||||||
|
+ .unreq COUNTER
|
||||||
|
+
|
||||||
|
+ .unreq SCALE32
|
||||||
|
+ .unreq SCALE64
|
||||||
|
+ .unreq IN0
|
||||||
|
+ .unreq IN1
|
||||||
|
+ .unreq IN2
|
||||||
|
+ .unreq IN3
|
||||||
|
+ .unreq IN4
|
||||||
|
+ .unreq IN5
|
||||||
|
+ .unreq IN6
|
||||||
|
+ .unreq IN7
|
||||||
|
+ .unreq COEF0
|
||||||
|
+ .unreq COEF1
|
||||||
|
+ .unreq COEF2
|
||||||
|
+ .unreq COEF3
|
||||||
|
+ .unreq COEF4
|
||||||
|
+ .unreq COEF5
|
||||||
|
+ .unreq COEF6
|
||||||
|
+ .unreq COEF7
|
||||||
|
+ .unreq ACCUM0
|
||||||
|
+ .unreq ACCUM4
|
||||||
|
+ .unreq POST0
|
||||||
|
+ .unreq POST1
|
||||||
|
+ .unreq POST2
|
||||||
|
+ .unreq POST3
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+IN .req a1
|
||||||
|
+SBACT .req a2
|
||||||
|
+OLDFPSCR .req a3
|
||||||
|
+IMDCT .req a4
|
||||||
|
+WINDOW .req v1
|
||||||
|
+OUT .req v2
|
||||||
|
+BUF .req v3
|
||||||
|
+SCALEINT .req v4 @ only used in softfp case
|
||||||
|
+COUNT .req v5
|
||||||
|
+
|
||||||
|
+SCALE .req s0
|
||||||
|
+
|
||||||
|
+/* Stack layout differs in softfp and hardfp cases:
|
||||||
|
+ *
|
||||||
|
+ * hardfp
|
||||||
|
+ * fp -> 6 arg words saved by caller
|
||||||
|
+ * a3,a4,v1-v3,v5,fp,lr on entry (a3 just to pad to 8 bytes)
|
||||||
|
+ * s16-s23 on entry
|
||||||
|
+ * align 16
|
||||||
|
+ * buf -> 8*32*4 bytes buffer
|
||||||
|
+ * s0 on entry
|
||||||
|
+ * sp -> 3 arg words for callee
|
||||||
|
+ *
|
||||||
|
+ * softfp
|
||||||
|
+ * fp -> 7 arg words saved by caller
|
||||||
|
+ * a4,v1-v5,fp,lr on entry
|
||||||
|
+ * s16-s23 on entry
|
||||||
|
+ * align 16
|
||||||
|
+ * buf -> 8*32*4 bytes buffer
|
||||||
|
+ * sp -> 4 arg words for callee
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+/* void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act,
|
||||||
|
+ * SynthFilterContext *synth, FFTContext *imdct,
|
||||||
|
+ * float (*synth_buf_ptr)[512],
|
||||||
|
+ * int *synth_buf_offset, float (*synth_buf2)[32],
|
||||||
|
+ * const float (*window)[512], float *samples_out,
|
||||||
|
+ * float (*raXin)[32], float scale);
|
||||||
|
+ */
|
||||||
|
+function ff_dca_qmf_32_subbands_vfp, export=1
|
||||||
|
+VFP push {a3-a4,v1-v3,v5,fp,lr}
|
||||||
|
+NOVFP push {a4,v1-v5,fp,lr}
|
||||||
|
+ add fp, sp, #8*4
|
||||||
|
+ vpush {s16-s23}
|
||||||
|
+ @ The buffer pointed at by raXin isn't big enough for us to do a
|
||||||
|
+ @ complete matrix transposition as we want to, so allocate an
|
||||||
|
+ @ alternative buffer from the stack. Align to 4 words for speed.
|
||||||
|
+ sub BUF, sp, #8*32*4
|
||||||
|
+ bic BUF, BUF, #15
|
||||||
|
+ mov sp, BUF
|
||||||
|
+ ldr lr, =0x03330000 @ RunFast mode, short vectors of length 4, stride 2
|
||||||
|
+ fmrx OLDFPSCR, FPSCR
|
||||||
|
+ fmxr FPSCR, lr
|
||||||
|
+ @ COUNT is used to count down 2 things at once:
|
||||||
|
+ @ bits 0-4 are the number of word pairs remaining in the output row
|
||||||
|
+ @ bits 5-31 are the number of words to copy (with possible negation)
|
||||||
|
+ @ from the source matrix before we start zeroing the remainder
|
||||||
|
+ mov COUNT, #(-4 << 5) + 16
|
||||||
|
+ adds COUNT, COUNT, SBACT, lsl #5
|
||||||
|
+ bmi 2f
|
||||||
|
+1:
|
||||||
|
+ vldr s8, [IN, #(0*8+0)*4]
|
||||||
|
+ vldr s10, [IN, #(0*8+1)*4]
|
||||||
|
+ vldr s12, [IN, #(0*8+2)*4]
|
||||||
|
+ vldr s14, [IN, #(0*8+3)*4]
|
||||||
|
+ vldr s16, [IN, #(0*8+4)*4]
|
||||||
|
+ vldr s18, [IN, #(0*8+5)*4]
|
||||||
|
+ vldr s20, [IN, #(0*8+6)*4]
|
||||||
|
+ vldr s22, [IN, #(0*8+7)*4]
|
||||||
|
+ vneg.f s8, s8
|
||||||
|
+ vldr s9, [IN, #(1*8+0)*4]
|
||||||
|
+ vldr s11, [IN, #(1*8+1)*4]
|
||||||
|
+ vldr s13, [IN, #(1*8+2)*4]
|
||||||
|
+ vldr s15, [IN, #(1*8+3)*4]
|
||||||
|
+ vneg.f s16, s16
|
||||||
|
+ vldr s17, [IN, #(1*8+4)*4]
|
||||||
|
+ vldr s19, [IN, #(1*8+5)*4]
|
||||||
|
+ vldr s21, [IN, #(1*8+6)*4]
|
||||||
|
+ vldr s23, [IN, #(1*8+7)*4]
|
||||||
|
+ vstr d4, [BUF, #(0*32+0)*4]
|
||||||
|
+ vstr d5, [BUF, #(1*32+0)*4]
|
||||||
|
+ vstr d6, [BUF, #(2*32+0)*4]
|
||||||
|
+ vstr d7, [BUF, #(3*32+0)*4]
|
||||||
|
+ vstr d8, [BUF, #(4*32+0)*4]
|
||||||
|
+ vstr d9, [BUF, #(5*32+0)*4]
|
||||||
|
+ vstr d10, [BUF, #(6*32+0)*4]
|
||||||
|
+ vstr d11, [BUF, #(7*32+0)*4]
|
||||||
|
+ vldr s9, [IN, #(3*8+0)*4]
|
||||||
|
+ vldr s11, [IN, #(3*8+1)*4]
|
||||||
|
+ vldr s13, [IN, #(3*8+2)*4]
|
||||||
|
+ vldr s15, [IN, #(3*8+3)*4]
|
||||||
|
+ vldr s17, [IN, #(3*8+4)*4]
|
||||||
|
+ vldr s19, [IN, #(3*8+5)*4]
|
||||||
|
+ vldr s21, [IN, #(3*8+6)*4]
|
||||||
|
+ vldr s23, [IN, #(3*8+7)*4]
|
||||||
|
+ vneg.f s9, s9
|
||||||
|
+ vldr s8, [IN, #(2*8+0)*4]
|
||||||
|
+ vldr s10, [IN, #(2*8+1)*4]
|
||||||
|
+ vldr s12, [IN, #(2*8+2)*4]
|
||||||
|
+ vldr s14, [IN, #(2*8+3)*4]
|
||||||
|
+ vneg.f s17, s17
|
||||||
|
+ vldr s16, [IN, #(2*8+4)*4]
|
||||||
|
+ vldr s18, [IN, #(2*8+5)*4]
|
||||||
|
+ vldr s20, [IN, #(2*8+6)*4]
|
||||||
|
+ vldr s22, [IN, #(2*8+7)*4]
|
||||||
|
+ vstr d4, [BUF, #(0*32+2)*4]
|
||||||
|
+ vstr d5, [BUF, #(1*32+2)*4]
|
||||||
|
+ vstr d6, [BUF, #(2*32+2)*4]
|
||||||
|
+ vstr d7, [BUF, #(3*32+2)*4]
|
||||||
|
+ vstr d8, [BUF, #(4*32+2)*4]
|
||||||
|
+ vstr d9, [BUF, #(5*32+2)*4]
|
||||||
|
+ vstr d10, [BUF, #(6*32+2)*4]
|
||||||
|
+ vstr d11, [BUF, #(7*32+2)*4]
|
||||||
|
+ add IN, IN, #4*8*4
|
||||||
|
+ add BUF, BUF, #4*4
|
||||||
|
+ subs COUNT, COUNT, #(4 << 5) + 2
|
||||||
|
+ bpl 1b
|
||||||
|
+2: @ Now deal with trailing < 4 samples
|
||||||
|
+ adds COUNT, COUNT, #3 << 5
|
||||||
|
+ bmi 4f @ sb_act was a multiple of 4
|
||||||
|
+ bics lr, COUNT, #0x1F
|
||||||
|
+ bne 3f
|
||||||
|
+ @ sb_act was n*4+1
|
||||||
|
+ vldr s8, [IN, #(0*8+0)*4]
|
||||||
|
+ vldr s10, [IN, #(0*8+1)*4]
|
||||||
|
+ vldr s12, [IN, #(0*8+2)*4]
|
||||||
|
+ vldr s14, [IN, #(0*8+3)*4]
|
||||||
|
+ vldr s16, [IN, #(0*8+4)*4]
|
||||||
|
+ vldr s18, [IN, #(0*8+5)*4]
|
||||||
|
+ vldr s20, [IN, #(0*8+6)*4]
|
||||||
|
+ vldr s22, [IN, #(0*8+7)*4]
|
||||||
|
+ vneg.f s8, s8
|
||||||
|
+ vldr s9, zero
|
||||||
|
+ vldr s11, zero
|
||||||
|
+ vldr s13, zero
|
||||||
|
+ vldr s15, zero
|
||||||
|
+ vneg.f s16, s16
|
||||||
|
+ vldr s17, zero
|
||||||
|
+ vldr s19, zero
|
||||||
|
+ vldr s21, zero
|
||||||
|
+ vldr s23, zero
|
||||||
|
+ vstr d4, [BUF, #(0*32+0)*4]
|
||||||
|
+ vstr d5, [BUF, #(1*32+0)*4]
|
||||||
|
+ vstr d6, [BUF, #(2*32+0)*4]
|
||||||
|
+ vstr d7, [BUF, #(3*32+0)*4]
|
||||||
|
+ vstr d8, [BUF, #(4*32+0)*4]
|
||||||
|
+ vstr d9, [BUF, #(5*32+0)*4]
|
||||||
|
+ vstr d10, [BUF, #(6*32+0)*4]
|
||||||
|
+ vstr d11, [BUF, #(7*32+0)*4]
|
||||||
|
+ add BUF, BUF, #2*4
|
||||||
|
+ sub COUNT, COUNT, #1
|
||||||
|
+ b 4f
|
||||||
|
+3: @ sb_act was n*4+2 or n*4+3, so do the first 2
|
||||||
|
+ vldr s8, [IN, #(0*8+0)*4]
|
||||||
|
+ vldr s10, [IN, #(0*8+1)*4]
|
||||||
|
+ vldr s12, [IN, #(0*8+2)*4]
|
||||||
|
+ vldr s14, [IN, #(0*8+3)*4]
|
||||||
|
+ vldr s16, [IN, #(0*8+4)*4]
|
||||||
|
+ vldr s18, [IN, #(0*8+5)*4]
|
||||||
|
+ vldr s20, [IN, #(0*8+6)*4]
|
||||||
|
+ vldr s22, [IN, #(0*8+7)*4]
|
||||||
|
+ vneg.f s8, s8
|
||||||
|
+ vldr s9, [IN, #(1*8+0)*4]
|
||||||
|
+ vldr s11, [IN, #(1*8+1)*4]
|
||||||
|
+ vldr s13, [IN, #(1*8+2)*4]
|
||||||
|
+ vldr s15, [IN, #(1*8+3)*4]
|
||||||
|
+ vneg.f s16, s16
|
||||||
|
+ vldr s17, [IN, #(1*8+4)*4]
|
||||||
|
+ vldr s19, [IN, #(1*8+5)*4]
|
||||||
|
+ vldr s21, [IN, #(1*8+6)*4]
|
||||||
|
+ vldr s23, [IN, #(1*8+7)*4]
|
||||||
|
+ vstr d4, [BUF, #(0*32+0)*4]
|
||||||
|
+ vstr d5, [BUF, #(1*32+0)*4]
|
||||||
|
+ vstr d6, [BUF, #(2*32+0)*4]
|
||||||
|
+ vstr d7, [BUF, #(3*32+0)*4]
|
||||||
|
+ vstr d8, [BUF, #(4*32+0)*4]
|
||||||
|
+ vstr d9, [BUF, #(5*32+0)*4]
|
||||||
|
+ vstr d10, [BUF, #(6*32+0)*4]
|
||||||
|
+ vstr d11, [BUF, #(7*32+0)*4]
|
||||||
|
+ add BUF, BUF, #2*4
|
||||||
|
+ sub COUNT, COUNT, #(2 << 5) + 1
|
||||||
|
+ bics lr, COUNT, #0x1F
|
||||||
|
+ bne 4f
|
||||||
|
+ @ sb_act was n*4+3
|
||||||
|
+ vldr s8, [IN, #(2*8+0)*4]
|
||||||
|
+ vldr s10, [IN, #(2*8+1)*4]
|
||||||
|
+ vldr s12, [IN, #(2*8+2)*4]
|
||||||
|
+ vldr s14, [IN, #(2*8+3)*4]
|
||||||
|
+ vldr s16, [IN, #(2*8+4)*4]
|
||||||
|
+ vldr s18, [IN, #(2*8+5)*4]
|
||||||
|
+ vldr s20, [IN, #(2*8+6)*4]
|
||||||
|
+ vldr s22, [IN, #(2*8+7)*4]
|
||||||
|
+ vldr s9, zero
|
||||||
|
+ vldr s11, zero
|
||||||
|
+ vldr s13, zero
|
||||||
|
+ vldr s15, zero
|
||||||
|
+ vldr s17, zero
|
||||||
|
+ vldr s19, zero
|
||||||
|
+ vldr s21, zero
|
||||||
|
+ vldr s23, zero
|
||||||
|
+ vstr d4, [BUF, #(0*32+0)*4]
|
||||||
|
+ vstr d5, [BUF, #(1*32+0)*4]
|
||||||
|
+ vstr d6, [BUF, #(2*32+0)*4]
|
||||||
|
+ vstr d7, [BUF, #(3*32+0)*4]
|
||||||
|
+ vstr d8, [BUF, #(4*32+0)*4]
|
||||||
|
+ vstr d9, [BUF, #(5*32+0)*4]
|
||||||
|
+ vstr d10, [BUF, #(6*32+0)*4]
|
||||||
|
+ vstr d11, [BUF, #(7*32+0)*4]
|
||||||
|
+ add BUF, BUF, #2*4
|
||||||
|
+ sub COUNT, COUNT, #1
|
||||||
|
+4: @ Now fill the remainder with 0
|
||||||
|
+ vldr s8, zero
|
||||||
|
+ vldr s9, zero
|
||||||
|
+ ands COUNT, COUNT, #0x1F
|
||||||
|
+ beq 6f
|
||||||
|
+5: vstr d4, [BUF, #(0*32+0)*4]
|
||||||
|
+ vstr d4, [BUF, #(1*32+0)*4]
|
||||||
|
+ vstr d4, [BUF, #(2*32+0)*4]
|
||||||
|
+ vstr d4, [BUF, #(3*32+0)*4]
|
||||||
|
+ vstr d4, [BUF, #(4*32+0)*4]
|
||||||
|
+ vstr d4, [BUF, #(5*32+0)*4]
|
||||||
|
+ vstr d4, [BUF, #(6*32+0)*4]
|
||||||
|
+ vstr d4, [BUF, #(7*32+0)*4]
|
||||||
|
+ add BUF, BUF, #2*4
|
||||||
|
+ subs COUNT, COUNT, #1
|
||||||
|
+ bne 5b
|
||||||
|
+6:
|
||||||
|
+ fmxr FPSCR, OLDFPSCR
|
||||||
|
+ ldr WINDOW, [fp, #3*4]
|
||||||
|
+ ldr OUT, [fp, #4*4]
|
||||||
|
+ sub BUF, BUF, #32*4
|
||||||
|
+NOVFP ldr SCALEINT, [fp, #6*4]
|
||||||
|
+ mov COUNT, #8
|
||||||
|
+VFP vpush {SCALE}
|
||||||
|
+VFP sub sp, sp, #3*4
|
||||||
|
+NOVFP sub sp, sp, #4*4
|
||||||
|
+7:
|
||||||
|
+VFP ldr a1, [fp, #-7*4] @ imdct
|
||||||
|
+NOVFP ldr a1, [fp, #-8*4]
|
||||||
|
+ ldmia fp, {a2-a4}
|
||||||
|
+VFP stmia sp, {WINDOW, OUT, BUF}
|
||||||
|
+NOVFP stmia sp, {WINDOW, OUT, BUF, SCALEINT}
|
||||||
|
+VFP vldr SCALE, [sp, #3*4]
|
||||||
|
+ bl ff_synth_filter_float_vfp
|
||||||
|
+ add OUT, OUT, #32*4
|
||||||
|
+ add BUF, BUF, #32*4
|
||||||
|
+ subs COUNT, COUNT, #1
|
||||||
|
+ bne 7b
|
||||||
|
+
|
||||||
|
+A sub sp, fp, #(8+8)*4
|
||||||
|
+T sub fp, fp, #(8+8)*4
|
||||||
|
+T mov sp, fp
|
||||||
|
+ vpop {s16-s23}
|
||||||
|
+VFP pop {a3-a4,v1-v3,v5,fp,pc}
|
||||||
|
+NOVFP pop {a4,v1-v5,fp,pc}
|
||||||
|
+endfunc
|
||||||
|
+
|
||||||
|
+ .unreq IN
|
||||||
|
+ .unreq SBACT
|
||||||
|
+ .unreq OLDFPSCR
|
||||||
|
+ .unreq IMDCT
|
||||||
|
+ .unreq WINDOW
|
||||||
|
+ .unreq OUT
|
||||||
|
+ .unreq BUF
|
||||||
|
+ .unreq SCALEINT
|
||||||
|
+ .unreq COUNT
|
||||||
|
+
|
||||||
|
+ .unreq SCALE
|
||||||
|
+
|
||||||
|
+ .align 2
|
||||||
|
+zero: .word 0
|
||||||
|
--
|
||||||
|
1.7.9.5
|
@ -0,0 +1,64 @@
|
|||||||
|
From 101f5a2c5db12605c24fe4aa41b3fabacfd3bad3 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
|
||||||
|
Date: Mon, 22 Jul 2013 12:33:22 +0300
|
||||||
|
Subject: [PATCH 49/49] [ffmpeg] - backport - arm: Mangle external symbols
|
||||||
|
properly in new vfp assembly files
|
||||||
|
|
||||||
|
Reviewed-by: Kostya Shishkov
|
||||||
|
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
|
||||||
|
---
|
||||||
|
libavcodec/arm/dcadsp_vfp.S | 2 +-
|
||||||
|
libavcodec/arm/mdct_vfp.S | 4 ++--
|
||||||
|
libavcodec/arm/synth_filter_vfp.S | 2 +-
|
||||||
|
3 files changed, 4 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/libavcodec/arm/dcadsp_vfp.S b/libavcodec/arm/dcadsp_vfp.S
|
||||||
|
index 6039e87..5892a84 100644
|
||||||
|
--- a/libavcodec/arm/dcadsp_vfp.S
|
||||||
|
+++ b/libavcodec/arm/dcadsp_vfp.S
|
||||||
|
@@ -463,7 +463,7 @@ NOVFP ldr a1, [fp, #-8*4]
|
||||||
|
VFP stmia sp, {WINDOW, OUT, BUF}
|
||||||
|
NOVFP stmia sp, {WINDOW, OUT, BUF, SCALEINT}
|
||||||
|
VFP vldr SCALE, [sp, #3*4]
|
||||||
|
- bl ff_synth_filter_float_vfp
|
||||||
|
+ bl X(ff_synth_filter_float_vfp)
|
||||||
|
add OUT, OUT, #32*4
|
||||||
|
add BUF, BUF, #32*4
|
||||||
|
subs COUNT, COUNT, #1
|
||||||
|
diff --git a/libavcodec/arm/mdct_vfp.S b/libavcodec/arm/mdct_vfp.S
|
||||||
|
index 0623e96..94db24f 100644
|
||||||
|
--- a/libavcodec/arm/mdct_vfp.S
|
||||||
|
+++ b/libavcodec/arm/mdct_vfp.S
|
||||||
|
@@ -151,7 +151,7 @@ function ff_imdct_half_vfp, export=1
|
||||||
|
ldr ip, [CONTEXT, #5*4] @ mdct_bits
|
||||||
|
teq ip, #6
|
||||||
|
it ne
|
||||||
|
- bne ff_imdct_half_c @ only case currently accelerated is the one used by DCA
|
||||||
|
+ bne X(ff_imdct_half_c) @ only case currently accelerated is the one used by DCA
|
||||||
|
|
||||||
|
.set n, 1<<6
|
||||||
|
.set n2, n/2
|
||||||
|
@@ -175,7 +175,7 @@ function ff_imdct_half_vfp, export=1
|
||||||
|
|
||||||
|
fmxr FPSCR, OLDFPSCR
|
||||||
|
mov a1, OUT
|
||||||
|
- bl ff_fft16_vfp
|
||||||
|
+ bl X(ff_fft16_vfp)
|
||||||
|
ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
|
||||||
|
fmxr FPSCR, lr
|
||||||
|
|
||||||
|
diff --git a/libavcodec/arm/synth_filter_vfp.S b/libavcodec/arm/synth_filter_vfp.S
|
||||||
|
index c219c41..e6e6408 100644
|
||||||
|
--- a/libavcodec/arm/synth_filter_vfp.S
|
||||||
|
+++ b/libavcodec/arm/synth_filter_vfp.S
|
||||||
|
@@ -132,7 +132,7 @@ function ff_synth_filter_float_vfp, export=1
|
||||||
|
str lr, [P_SB_OFF] @ rotate offset, modulo buffer size, ready for next call
|
||||||
|
ldr a3, [sp, #(16+6+2)*4] @ fetch in from stack, to pass to imdct_half
|
||||||
|
VFP vmov s16, SCALE @ imdct_half is free to corrupt s0, but it contains one of our arguments in hardfp case
|
||||||
|
- bl ff_imdct_half_vfp
|
||||||
|
+ bl X(ff_imdct_half_vfp)
|
||||||
|
VFP vmov SCALE, s16
|
||||||
|
|
||||||
|
fmrx OLDFPSCR, FPSCR
|
||||||
|
--
|
||||||
|
1.7.9.5
|
@ -0,0 +1,72 @@
|
|||||||
|
From 5ce8f2bf354b7adf904ac3e1438915586c5a0bb1 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ben Avison <bavison@riscosopen.org>
|
||||||
|
Date: Wed, 31 Jul 2013 23:46:08 +0100
|
||||||
|
Subject: [PATCH 51/54] [ffmpeg] - backport - avio: Add an internal function
|
||||||
|
for reading without copying
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
As long as there is enough contiguous data in the avio buffer,
|
||||||
|
just return a pointer to it instead of copying it to the caller
|
||||||
|
provided buffer.
|
||||||
|
|
||||||
|
Signed-off-by: Martin Storsjö <martin@martin.st>
|
||||||
|
---
|
||||||
|
libavformat/avio_internal.h | 17 +++++++++++++++++
|
||||||
|
libavformat/aviobuf.c | 12 ++++++++++++
|
||||||
|
2 files changed, 29 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/libavformat/avio_internal.h b/libavformat/avio_internal.h
|
||||||
|
index cf36764..e9ece57 100644
|
||||||
|
--- a/libavformat/avio_internal.h
|
||||||
|
+++ b/libavformat/avio_internal.h
|
||||||
|
@@ -38,6 +38,23 @@ int ffio_init_context(AVIOContext *s,
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
+ * Read size bytes from AVIOContext, returning a pointer.
|
||||||
|
+ * Note that the data pointed at by the returned pointer is only
|
||||||
|
+ * valid until the next call that references the same IO context.
|
||||||
|
+ * @param s IO context
|
||||||
|
+ * @param buf pointer to buffer into which to assemble the requested
|
||||||
|
+ * data if it is not available in contiguous addresses in the
|
||||||
|
+ * underlying buffer
|
||||||
|
+ * @param size number of bytes requested
|
||||||
|
+ * @param data address at which to store pointer: this will be a
|
||||||
|
+ * a direct pointer into the underlying buffer if the requested
|
||||||
|
+ * number of bytes are available at contiguous addresses, otherwise
|
||||||
|
+ * will be a copy of buf
|
||||||
|
+ * @return number of bytes read or AVERROR
|
||||||
|
+ */
|
||||||
|
+int ffio_read_indirect(AVIOContext *s, unsigned char *buf, int size, unsigned char **data);
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
* Read size bytes from AVIOContext into buf.
|
||||||
|
* This reads at most 1 packet. If that is not enough fewer bytes will be
|
||||||
|
* returned.
|
||||||
|
diff --git a/libavformat/aviobuf.c b/libavformat/aviobuf.c
|
||||||
|
index 7a73a17..465c46d 100644
|
||||||
|
--- a/libavformat/aviobuf.c
|
||||||
|
+++ b/libavformat/aviobuf.c
|
||||||
|
@@ -522,6 +522,18 @@ int avio_read(AVIOContext *s, unsigned char *buf, int size)
|
||||||
|
return size1 - size;
|
||||||
|
}
|
||||||
|
|
||||||
|
+int ffio_read_indirect(AVIOContext *s, unsigned char *buf, int size, unsigned char **data)
|
||||||
|
+{
|
||||||
|
+ if (s->buf_end - s->buf_ptr >= size && !s->write_flag) {
|
||||||
|
+ *data = s->buf_ptr;
|
||||||
|
+ s->buf_ptr += size;
|
||||||
|
+ return size;
|
||||||
|
+ } else {
|
||||||
|
+ *data = buf;
|
||||||
|
+ return avio_read(s, buf, size);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
int ffio_read_partial(AVIOContext *s, unsigned char *buf, int size)
|
||||||
|
{
|
||||||
|
int len;
|
||||||
|
--
|
||||||
|
1.7.9.5
|
@ -0,0 +1,149 @@
|
|||||||
|
From 1496d8c12075c0f3783e348a5d73fef9e3000b0f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ben Avison <bavison@riscosopen.org>
|
||||||
|
Date: Wed, 31 Jul 2013 23:46:08 +0100
|
||||||
|
Subject: [PATCH 52/54] [ffmpeg] - backport - mpegts: Remove one memcpy per
|
||||||
|
packet
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
This was being performed to ensure that a complete packet was held in
|
||||||
|
contiguous memory, prior to parsing the packet. However, the source buffer
|
||||||
|
is typically large enough that the packet was already contiguous, so it is
|
||||||
|
beneficial to return the packet by reference in most cases.
|
||||||
|
|
||||||
|
Before After
|
||||||
|
Mean StdDev Mean StdDev Change
|
||||||
|
memcpy 720.7 32.7 649.8 25.1 +10.9%
|
||||||
|
Overall 2372.7 46.1 2291.7 21.8 +3.5%
|
||||||
|
|
||||||
|
Signed-off-by: Martin Storsjö <martin@martin.st>
|
||||||
|
---
|
||||||
|
libavformat/mpegts.c | 41 ++++++++++++++++++++++++++-------------
|
||||||
|
1 file changed, 28 insertions(+), 13 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
|
||||||
|
index b5f5d63..5307521 100644
|
||||||
|
--- a/libavformat/mpegts.c
|
||||||
|
+++ b/libavformat/mpegts.c
|
||||||
|
@@ -1863,17 +1863,17 @@ static int mpegts_resync(AVFormatContext *s)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* return -1 if error or EOF. Return 0 if OK. */
|
||||||
|
-static int read_packet(AVFormatContext *s, uint8_t *buf, int raw_packet_size)
|
||||||
|
+static int read_packet(AVFormatContext *s, uint8_t *buf, int raw_packet_size, uint8_t **data)
|
||||||
|
{
|
||||||
|
AVIOContext *pb = s->pb;
|
||||||
|
- int skip, len;
|
||||||
|
+ int len;
|
||||||
|
|
||||||
|
for(;;) {
|
||||||
|
- len = avio_read(pb, buf, TS_PACKET_SIZE);
|
||||||
|
+ len = ffio_read_indirect(pb, buf, TS_PACKET_SIZE, data);
|
||||||
|
if (len != TS_PACKET_SIZE)
|
||||||
|
return len < 0 ? len : AVERROR_EOF;
|
||||||
|
/* check packet sync byte */
|
||||||
|
- if (buf[0] != 0x47) {
|
||||||
|
+ if ((*data)[0] != 0x47) {
|
||||||
|
/* find a new packet start */
|
||||||
|
avio_seek(pb, -TS_PACKET_SIZE, SEEK_CUR);
|
||||||
|
if (mpegts_resync(s) < 0)
|
||||||
|
@@ -1881,19 +1881,25 @@ static int read_packet(AVFormatContext *s, uint8_t *buf, int raw_packet_size)
|
||||||
|
else
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
- skip = raw_packet_size - TS_PACKET_SIZE;
|
||||||
|
- if (skip > 0)
|
||||||
|
- avio_skip(pb, skip);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void finished_reading_packet(AVFormatContext *s, int raw_packet_size)
|
||||||
|
+{
|
||||||
|
+ AVIOContext *pb = s->pb;
|
||||||
|
+ int skip = raw_packet_size - TS_PACKET_SIZE;
|
||||||
|
+ if (skip > 0)
|
||||||
|
+ avio_skip(pb, skip);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
static int handle_packets(MpegTSContext *ts, int nb_packets)
|
||||||
|
{
|
||||||
|
AVFormatContext *s = ts->stream;
|
||||||
|
uint8_t packet[TS_PACKET_SIZE + FF_INPUT_BUFFER_PADDING_SIZE];
|
||||||
|
+ uint8_t *data;
|
||||||
|
int packet_num, ret = 0;
|
||||||
|
|
||||||
|
if (avio_tell(s->pb) != ts->last_pos) {
|
||||||
|
@@ -1926,10 +1932,11 @@ static int handle_packets(MpegTSContext *ts, int nb_packets)
|
||||||
|
if (ts->stop_parse > 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
- ret = read_packet(s, packet, ts->raw_packet_size);
|
||||||
|
+ ret = read_packet(s, packet, ts->raw_packet_size, &data);
|
||||||
|
if (ret != 0)
|
||||||
|
break;
|
||||||
|
- ret = handle_packet(ts, packet);
|
||||||
|
+ ret = handle_packet(ts, data);
|
||||||
|
+ finished_reading_packet(s, ts->raw_packet_size);
|
||||||
|
if (ret != 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
@@ -2087,6 +2094,7 @@ static int mpegts_read_header(AVFormatContext *s)
|
||||||
|
int64_t pcrs[2], pcr_h;
|
||||||
|
int packet_count[2];
|
||||||
|
uint8_t packet[TS_PACKET_SIZE];
|
||||||
|
+ uint8_t *data;
|
||||||
|
|
||||||
|
/* only read packets */
|
||||||
|
|
||||||
|
@@ -2102,18 +2110,21 @@ static int mpegts_read_header(AVFormatContext *s)
|
||||||
|
nb_pcrs = 0;
|
||||||
|
nb_packets = 0;
|
||||||
|
for(;;) {
|
||||||
|
- ret = read_packet(s, packet, ts->raw_packet_size);
|
||||||
|
+ ret = read_packet(s, packet, ts->raw_packet_size, &data);
|
||||||
|
if (ret < 0)
|
||||||
|
return -1;
|
||||||
|
- pid = AV_RB16(packet + 1) & 0x1fff;
|
||||||
|
+ pid = AV_RB16(data + 1) & 0x1fff;
|
||||||
|
if ((pcr_pid == -1 || pcr_pid == pid) &&
|
||||||
|
- parse_pcr(&pcr_h, &pcr_l, packet) == 0) {
|
||||||
|
+ parse_pcr(&pcr_h, &pcr_l, data) == 0) {
|
||||||
|
+ finished_reading_packet(s, ts->raw_packet_size);
|
||||||
|
pcr_pid = pid;
|
||||||
|
packet_count[nb_pcrs] = nb_packets;
|
||||||
|
pcrs[nb_pcrs] = pcr_h * 300 + pcr_l;
|
||||||
|
nb_pcrs++;
|
||||||
|
if (nb_pcrs >= 2)
|
||||||
|
break;
|
||||||
|
+ } else {
|
||||||
|
+ finished_reading_packet(s, ts->raw_packet_size);
|
||||||
|
}
|
||||||
|
nb_packets++;
|
||||||
|
}
|
||||||
|
@@ -2145,15 +2156,19 @@ static int mpegts_raw_read_packet(AVFormatContext *s,
|
||||||
|
int64_t pcr_h, next_pcr_h, pos;
|
||||||
|
int pcr_l, next_pcr_l;
|
||||||
|
uint8_t pcr_buf[12];
|
||||||
|
+ uint8_t *data;
|
||||||
|
|
||||||
|
if (av_new_packet(pkt, TS_PACKET_SIZE) < 0)
|
||||||
|
return AVERROR(ENOMEM);
|
||||||
|
pkt->pos= avio_tell(s->pb);
|
||||||
|
- ret = read_packet(s, pkt->data, ts->raw_packet_size);
|
||||||
|
+ ret = read_packet(s, pkt->data, ts->raw_packet_size, &data);
|
||||||
|
if (ret < 0) {
|
||||||
|
av_free_packet(pkt);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
+ if (data != pkt->data)
|
||||||
|
+ memcpy(pkt->data, data, ts->raw_packet_size);
|
||||||
|
+ finished_reading_packet(s, ts->raw_packet_size);
|
||||||
|
if (ts->mpeg2ts_compute_pcr) {
|
||||||
|
/* compute exact PCR for each packet */
|
||||||
|
if (parse_pcr(&pcr_h, &pcr_l, pkt->data) == 0) {
|
||||||
|
--
|
||||||
|
1.7.9.5
|
@ -0,0 +1,47 @@
|
|||||||
|
From 6aec5772fd5331b3514f308ab0895f6234b60045 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ben Avison <bavison@riscosopen.org>
|
||||||
|
Date: Mon, 5 Aug 2013 13:12:51 +0100
|
||||||
|
Subject: [PATCH 53/54] [ffmpeg] - backport - mpegts: Make discard_pid()
|
||||||
|
faster for single-program streams
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
When a stream contains a single program, there's no point in doing a
|
||||||
|
PID -> program lookup. Normally the one and only program isn't disabled,
|
||||||
|
so no packets should be discarded.
|
||||||
|
|
||||||
|
Before After
|
||||||
|
Mean StdDev Mean StdDev Change
|
||||||
|
discard_pid() 73.8 9.4 20.2 1.5 +264.8%
|
||||||
|
Overall 2300.8 28.0 2253.1 20.6 +2.1%
|
||||||
|
|
||||||
|
Signed-off-by: Martin Storsjö <martin@martin.st>
|
||||||
|
---
|
||||||
|
libavformat/mpegts.c | 11 +++++++++++
|
||||||
|
1 file changed, 11 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
|
||||||
|
index 5307521..82dd209 100644
|
||||||
|
--- a/libavformat/mpegts.c
|
||||||
|
+++ b/libavformat/mpegts.c
|
||||||
|
@@ -268,6 +268,17 @@ static int discard_pid(MpegTSContext *ts, unsigned int pid)
|
||||||
|
int i, j, k;
|
||||||
|
int used = 0, discarded = 0;
|
||||||
|
struct Program *p;
|
||||||
|
+
|
||||||
|
+ /* If none of the programs have .discard=AVDISCARD_ALL then there's
|
||||||
|
+ * no way we have to discard this packet
|
||||||
|
+ */
|
||||||
|
+ for (k = 0; k < ts->stream->nb_programs; k++) {
|
||||||
|
+ if (ts->stream->programs[k]->discard == AVDISCARD_ALL)
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ if (k == ts->stream->nb_programs)
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
for(i=0; i<ts->nb_prg; i++) {
|
||||||
|
p = &ts->prg[i];
|
||||||
|
for(j=0; j<p->nb_pids; j++) {
|
||||||
|
--
|
||||||
|
1.7.9.5
|
@ -0,0 +1,76 @@
|
|||||||
|
From b79aa2b89ed9027a72a10c1d26ccdf2bb385d57b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ben Avison <bavison@riscosopen.org>
|
||||||
|
Date: Mon, 5 Aug 2013 13:12:49 +0100
|
||||||
|
Subject: [PATCH 54/54] [ffmpeg] - backport - mpegts: Remove one 64-bit
|
||||||
|
integer modulus operation per packet
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
The common case of the pointer having increased by one packet (which results
|
||||||
|
in no change to the modulus) can be detected with a 64-bit subtraction,
|
||||||
|
which is far cheaper than a division on many platforms.
|
||||||
|
|
||||||
|
Before After
|
||||||
|
Mean StdDev Mean StdDev Change
|
||||||
|
Divisions 248.3 8.8 51.5 7.4 +381.7%
|
||||||
|
Overall 2773.2 25.6 2372.5 43.1 +16.9%
|
||||||
|
|
||||||
|
Signed-off-by: Martin Storsjö <martin@martin.st>
|
||||||
|
---
|
||||||
|
libavcodec/mathops.h | 9 +++++++++
|
||||||
|
libavformat/mpegts.c | 5 ++++-
|
||||||
|
2 files changed, 13 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h
|
||||||
|
index 592f5a5..1d57342 100644
|
||||||
|
--- a/libavcodec/mathops.h
|
||||||
|
+++ b/libavcodec/mathops.h
|
||||||
|
@@ -195,6 +195,15 @@ if ((y) < (x)) {\
|
||||||
|
# define FASTDIV(a,b) ((uint32_t)((((uint64_t)a) * ff_inverse[b]) >> 32))
|
||||||
|
#endif /* FASTDIV */
|
||||||
|
|
||||||
|
+#ifndef MOD_UNLIKELY
|
||||||
|
+# define MOD_UNLIKELY(modulus, dividend, divisor, prev_dividend) \
|
||||||
|
+ do { \
|
||||||
|
+ if ((prev_dividend) == 0 || (dividend) - (prev_dividend) != (divisor)) \
|
||||||
|
+ (modulus) = (dividend) % (divisor); \
|
||||||
|
+ (prev_dividend) = (dividend); \
|
||||||
|
+ } while (0)
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
static inline av_const unsigned int ff_sqrt(unsigned int a)
|
||||||
|
{
|
||||||
|
unsigned int b;
|
||||||
|
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
|
||||||
|
index 82dd209..b995f60 100644
|
||||||
|
--- a/libavformat/mpegts.c
|
||||||
|
+++ b/libavformat/mpegts.c
|
||||||
|
@@ -28,6 +28,7 @@
|
||||||
|
#include "libavutil/avassert.h"
|
||||||
|
#include "libavcodec/bytestream.h"
|
||||||
|
#include "libavcodec/get_bits.h"
|
||||||
|
+#include "libavcodec/mathops.h"
|
||||||
|
#include "avformat.h"
|
||||||
|
#include "mpegts.h"
|
||||||
|
#include "internal.h"
|
||||||
|
@@ -99,6 +100,8 @@ struct MpegTSContext {
|
||||||
|
int raw_packet_size;
|
||||||
|
|
||||||
|
int pos47;
|
||||||
|
+ /** position corresponding to pos47, or 0 if pos47 invalid */
|
||||||
|
+ int64_t pos;
|
||||||
|
|
||||||
|
/** if true, all pids are analyzed to find streams */
|
||||||
|
int auto_guess;
|
||||||
|
@@ -1814,7 +1817,7 @@ static int handle_packet(MpegTSContext *ts, const uint8_t *packet)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
pos = avio_tell(ts->stream->pb);
|
||||||
|
- ts->pos47= pos % ts->raw_packet_size;
|
||||||
|
+ MOD_UNLIKELY(ts->pos47, pos, ts->raw_packet_size, ts->pos);
|
||||||
|
|
||||||
|
if (tss->type == MPEGTS_SECTION) {
|
||||||
|
if (is_start) {
|
||||||
|
--
|
||||||
|
1.7.9.5
|
@ -0,0 +1,110 @@
|
|||||||
|
From 8067f55edf3719182aed6e5b57b7863889f80218 Mon Sep 17 00:00:00 2001
|
||||||
|
From: =?utf8?q?Reimar=20D=C3=B6ffinger?= <Reimar.Doeffinger@gmx.de>
|
||||||
|
Date: Sat, 16 Mar 2013 13:36:20 +0100
|
||||||
|
Subject: [PATCH] Fix compilation on ARM with android gcc 4.7
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=utf8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
With the current code it fails due to running out
|
||||||
|
of registers.
|
||||||
|
So code the store offsets manually into the assembler
|
||||||
|
instead.
|
||||||
|
Passes "make fate-dts".
|
||||||
|
|
||||||
|
Signed-off-by: Reimar Döffinger <Reimar.Doeffinger@gmx.de>
|
||||||
|
---
|
||||||
|
libavcodec/arm/dca.h | 74 ++++++++++++++++++++++++--------------------------
|
||||||
|
1 file changed, 36 insertions(+), 38 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/libavcodec/arm/dca.h b/libavcodec/arm/dca.h
|
||||||
|
index 2cfd18a..431b62e 100644
|
||||||
|
--- a/libavcodec/arm/dca.h
|
||||||
|
+++ b/libavcodec/arm/dca.h
|
||||||
|
@@ -34,46 +34,44 @@ static inline int decode_blockcodes(int code1, int code2, int levels,
|
||||||
|
{
|
||||||
|
int v0, v1, v2, v3, v4, v5;
|
||||||
|
|
||||||
|
- __asm__ ("smmul %8, %14, %18 \n"
|
||||||
|
- "smmul %11, %15, %18 \n"
|
||||||
|
- "smlabb %14, %8, %17, %14 \n"
|
||||||
|
- "smlabb %15, %11, %17, %15 \n"
|
||||||
|
- "smmul %9, %8, %18 \n"
|
||||||
|
- "smmul %12, %11, %18 \n"
|
||||||
|
- "sub %14, %14, %16, lsr #1 \n"
|
||||||
|
- "sub %15, %15, %16, lsr #1 \n"
|
||||||
|
- "smlabb %8, %9, %17, %8 \n"
|
||||||
|
- "smlabb %11, %12, %17, %11 \n"
|
||||||
|
- "smmul %10, %9, %18 \n"
|
||||||
|
- "smmul %13, %12, %18 \n"
|
||||||
|
- "str %14, %0 \n"
|
||||||
|
- "str %15, %4 \n"
|
||||||
|
- "sub %8, %8, %16, lsr #1 \n"
|
||||||
|
- "sub %11, %11, %16, lsr #1 \n"
|
||||||
|
- "smlabb %9, %10, %17, %9 \n"
|
||||||
|
- "smlabb %12, %13, %17, %12 \n"
|
||||||
|
- "smmul %14, %10, %18 \n"
|
||||||
|
- "smmul %15, %13, %18 \n"
|
||||||
|
- "str %8, %1 \n"
|
||||||
|
- "str %11, %5 \n"
|
||||||
|
- "sub %9, %9, %16, lsr #1 \n"
|
||||||
|
- "sub %12, %12, %16, lsr #1 \n"
|
||||||
|
- "smlabb %10, %14, %17, %10 \n"
|
||||||
|
- "smlabb %13, %15, %17, %13 \n"
|
||||||
|
- "str %9, %2 \n"
|
||||||
|
- "str %12, %6 \n"
|
||||||
|
- "sub %10, %10, %16, lsr #1 \n"
|
||||||
|
- "sub %13, %13, %16, lsr #1 \n"
|
||||||
|
- "str %10, %3 \n"
|
||||||
|
- "str %13, %7 \n"
|
||||||
|
- : "=m"(values[0]), "=m"(values[1]),
|
||||||
|
- "=m"(values[2]), "=m"(values[3]),
|
||||||
|
- "=m"(values[4]), "=m"(values[5]),
|
||||||
|
- "=m"(values[6]), "=m"(values[7]),
|
||||||
|
- "=&r"(v0), "=&r"(v1), "=&r"(v2),
|
||||||
|
+ __asm__ ("smmul %0, %6, %10 \n"
|
||||||
|
+ "smmul %3, %7, %10 \n"
|
||||||
|
+ "smlabb %6, %0, %9, %6 \n"
|
||||||
|
+ "smlabb %7, %3, %9, %7 \n"
|
||||||
|
+ "smmul %1, %0, %10 \n"
|
||||||
|
+ "smmul %4, %3, %10 \n"
|
||||||
|
+ "sub %6, %6, %8, lsr #1 \n"
|
||||||
|
+ "sub %7, %7, %8, lsr #1 \n"
|
||||||
|
+ "smlabb %0, %1, %9, %0 \n"
|
||||||
|
+ "smlabb %3, %4, %9, %3 \n"
|
||||||
|
+ "smmul %2, %1, %10 \n"
|
||||||
|
+ "smmul %5, %4, %10 \n"
|
||||||
|
+ "str %6, [%11, #0] \n"
|
||||||
|
+ "str %7, [%11, #16] \n"
|
||||||
|
+ "sub %0, %0, %8, lsr #1 \n"
|
||||||
|
+ "sub %3, %3, %8, lsr #1 \n"
|
||||||
|
+ "smlabb %1, %2, %9, %1 \n"
|
||||||
|
+ "smlabb %4, %5, %9, %4 \n"
|
||||||
|
+ "smmul %6, %2, %10 \n"
|
||||||
|
+ "smmul %7, %5, %10 \n"
|
||||||
|
+ "str %0, [%11, #4] \n"
|
||||||
|
+ "str %3, [%11, #20] \n"
|
||||||
|
+ "sub %1, %1, %8, lsr #1 \n"
|
||||||
|
+ "sub %4, %4, %8, lsr #1 \n"
|
||||||
|
+ "smlabb %2, %6, %9, %2 \n"
|
||||||
|
+ "smlabb %5, %7, %9, %5 \n"
|
||||||
|
+ "str %1, [%11, #8] \n"
|
||||||
|
+ "str %4, [%11, #24] \n"
|
||||||
|
+ "sub %2, %2, %8, lsr #1 \n"
|
||||||
|
+ "sub %5, %5, %8, lsr #1 \n"
|
||||||
|
+ "str %2, [%11, #12] \n"
|
||||||
|
+ "str %5, [%11, #28] \n"
|
||||||
|
+ : "=&r"(v0), "=&r"(v1), "=&r"(v2),
|
||||||
|
"=&r"(v3), "=&r"(v4), "=&r"(v5),
|
||||||
|
"+&r"(code1), "+&r"(code2)
|
||||||
|
- : "r"(levels - 1), "r"(-levels), "r"(ff_inverse[levels]));
|
||||||
|
+ : "r"(levels - 1), "r"(-levels),
|
||||||
|
+ "r"(ff_inverse[levels]), "r"(values)
|
||||||
|
+ : "memory");
|
||||||
|
|
||||||
|
return code1 | code2;
|
||||||
|
}
|
||||||
|
--
|
||||||
|
1.7.10.4
|
||||||
|
|
||||||
|
|
@ -1,42 +1,15 @@
|
|||||||
From 67895a77c9e5f519166dd0ce4a2a98649194b11b Mon Sep 17 00:00:00 2001
|
diff -Naur ffmpeg-1.2.3/configure ffmpeg-1.2.3.patch/configure
|
||||||
From: Rainer Hochecker <fernetmenta@online.de>
|
--- ffmpeg-1.2.3/configure 2013-09-09 22:46:04.636832059 +0200
|
||||||
Date: Sat, 8 Oct 2011 16:45:13 +0200
|
+++ ffmpeg-1.2.3.patch/configure 2013-09-09 22:47:15.023872481 +0200
|
||||||
Subject: [PATCH] ffmpeg: add xvba hwaccel
|
@@ -144,6 +144,7 @@
|
||||||
|
--disable-vaapi disable VAAPI code [autodetect]
|
||||||
---
|
|
||||||
configure | 11 ++
|
|
||||||
libavcodec/Makefile | 6 ++
|
|
||||||
libavcodec/allcodecs.c | 4 +
|
|
||||||
libavcodec/h264.c | 3 +
|
|
||||||
libavcodec/xvba.c | 66 ++++++++++++
|
|
||||||
libavcodec/xvba.h | 71 +++++++++++++
|
|
||||||
libavcodec/xvba_h264.c | 192 ++++++++++++++++++++++++++++++++++
|
|
||||||
libavcodec/xvba_internal.h | 24 +++++
|
|
||||||
libavcodec/xvba_mpeg2.c | 52 +++++++++
|
|
||||||
libavcodec/xvba_vc1.c | 190 +++++++++++++++++++++++++++++++++
|
|
||||||
libavutil/pixdesc.c | 6 ++
|
|
||||||
libavutil/pixfmt.h | 1 +
|
|
||||||
12 files changed, 626 insertions(+)
|
|
||||||
create mode 100644 libavcodec/xvba.c
|
|
||||||
create mode 100644 libavcodec/xvba.h
|
|
||||||
create mode 100644 libavcodec/xvba_h264.c
|
|
||||||
create mode 100644 libavcodec/xvba_internal.h
|
|
||||||
create mode 100644 libavcodec/xvba_mpeg2.c
|
|
||||||
create mode 100644 libavcodec/xvba_vc1.c
|
|
||||||
|
|
||||||
diff --git a/configure b/configure
|
|
||||||
index 351611d..876a6ea 100755
|
|
||||||
--- a/configure
|
|
||||||
+++ b/configure
|
|
||||||
@@ -144,6 +144,7 @@ Hardware accelerators:
|
|
||||||
--enable-vaapi enable VAAPI code
|
|
||||||
--enable-vda enable VDA code
|
--enable-vda enable VDA code
|
||||||
--enable-vdpau enable VDPAU code
|
--disable-vdpau disable VDPAU code [autodetect]
|
||||||
+ --disable-xvba disable XVBA code
|
+ --disable-xvba disable XVBA code
|
||||||
|
|
||||||
Individual component options:
|
Individual component options:
|
||||||
--disable-everything disable all components listed below
|
--disable-everything disable all components listed below
|
||||||
@@ -1197,6 +1198,7 @@ HWACCEL_LIST="
|
@@ -1197,6 +1198,7 @@
|
||||||
vaapi
|
vaapi
|
||||||
vda
|
vda
|
||||||
vdpau
|
vdpau
|
||||||
@ -44,7 +17,7 @@ index 351611d..876a6ea 100755
|
|||||||
"
|
"
|
||||||
|
|
||||||
LIBRARY_LIST="
|
LIBRARY_LIST="
|
||||||
@@ -1827,6 +1829,7 @@ crystalhd_deps="libcrystalhd_libcrystalhd_if_h"
|
@@ -1827,6 +1829,7 @@
|
||||||
dxva2_deps="dxva2api_h"
|
dxva2_deps="dxva2api_h"
|
||||||
vaapi_deps="va_va_h"
|
vaapi_deps="va_va_h"
|
||||||
vda_deps="VideoDecodeAcceleration_VDADecoder_h pthreads"
|
vda_deps="VideoDecodeAcceleration_VDADecoder_h pthreads"
|
||||||
@ -52,7 +25,7 @@ index 351611d..876a6ea 100755
|
|||||||
vda_extralibs="-framework CoreFoundation -framework VideoDecodeAcceleration -framework QuartzCore"
|
vda_extralibs="-framework CoreFoundation -framework VideoDecodeAcceleration -framework QuartzCore"
|
||||||
vdpau_deps="vdpau_vdpau_h vdpau_vdpau_x11_h"
|
vdpau_deps="vdpau_vdpau_h vdpau_vdpau_x11_h"
|
||||||
|
|
||||||
@@ -1847,6 +1850,8 @@ h264_vdpau_decoder_deps="vdpau"
|
@@ -1847,6 +1850,8 @@
|
||||||
h264_vdpau_decoder_select="h264_decoder"
|
h264_vdpau_decoder_select="h264_decoder"
|
||||||
h264_vdpau_hwaccel_deps="vdpau"
|
h264_vdpau_hwaccel_deps="vdpau"
|
||||||
h264_vdpau_hwaccel_select="h264_decoder"
|
h264_vdpau_hwaccel_select="h264_decoder"
|
||||||
@ -61,7 +34,7 @@ index 351611d..876a6ea 100755
|
|||||||
mpeg_vdpau_decoder_deps="vdpau"
|
mpeg_vdpau_decoder_deps="vdpau"
|
||||||
mpeg_vdpau_decoder_select="mpegvideo_decoder"
|
mpeg_vdpau_decoder_select="mpegvideo_decoder"
|
||||||
mpeg1_vdpau_decoder_deps="vdpau"
|
mpeg1_vdpau_decoder_deps="vdpau"
|
||||||
@@ -1859,6 +1864,8 @@ mpeg2_dxva2_hwaccel_select="mpeg2video_decoder"
|
@@ -1859,6 +1864,8 @@
|
||||||
mpeg2_vaapi_hwaccel_deps="vaapi"
|
mpeg2_vaapi_hwaccel_deps="vaapi"
|
||||||
mpeg2_vaapi_hwaccel_select="mpeg2video_decoder"
|
mpeg2_vaapi_hwaccel_select="mpeg2video_decoder"
|
||||||
mpeg2_vdpau_hwaccel_deps="vdpau"
|
mpeg2_vdpau_hwaccel_deps="vdpau"
|
||||||
@ -70,7 +43,7 @@ index 351611d..876a6ea 100755
|
|||||||
mpeg2_vdpau_hwaccel_select="mpeg2video_decoder"
|
mpeg2_vdpau_hwaccel_select="mpeg2video_decoder"
|
||||||
mpeg4_crystalhd_decoder_select="crystalhd"
|
mpeg4_crystalhd_decoder_select="crystalhd"
|
||||||
mpeg4_vaapi_hwaccel_deps="vaapi"
|
mpeg4_vaapi_hwaccel_deps="vaapi"
|
||||||
@@ -1877,11 +1884,14 @@ vc1_vdpau_decoder_deps="vdpau"
|
@@ -1877,11 +1884,14 @@
|
||||||
vc1_vdpau_decoder_select="vc1_decoder"
|
vc1_vdpau_decoder_select="vc1_decoder"
|
||||||
vc1_vdpau_hwaccel_deps="vdpau"
|
vc1_vdpau_hwaccel_deps="vdpau"
|
||||||
vc1_vdpau_hwaccel_select="vc1_decoder"
|
vc1_vdpau_hwaccel_select="vc1_decoder"
|
||||||
@ -85,71 +58,18 @@ index 351611d..876a6ea 100755
|
|||||||
|
|
||||||
# parsers
|
# parsers
|
||||||
h264_parser_select="golomb h264chroma h264dsp h264pred h264qpel videodsp"
|
h264_parser_select="golomb h264chroma h264dsp h264pred h264qpel videodsp"
|
||||||
@@ -3832,6 +3842,7 @@ check_header termios.h
|
@@ -3836,6 +3846,7 @@
|
||||||
check_header unistd.h
|
|
||||||
check_header vdpau/vdpau.h
|
check_header vdpau/vdpau.h
|
||||||
check_header vdpau/vdpau_x11.h
|
check_header vdpau/vdpau_x11.h
|
||||||
+check_header amd/amdxvba.h
|
|
||||||
check_cpp_condition vdpau/vdpau.h "defined(VDP_DECODER_PROFILE_MPEG4_PART2_SP)" && enable vdpau_mpeg4_support
|
check_cpp_condition vdpau/vdpau.h "defined(VDP_DECODER_PROFILE_MPEG4_PART2_SP)" && enable vdpau_mpeg4_support
|
||||||
|
+check_header amd/amdxvba.h
|
||||||
|
|
||||||
check_header VideoDecodeAcceleration/VDADecoder.h
|
check_header VideoDecodeAcceleration/VDADecoder.h
|
||||||
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
|
check_header windows.h
|
||||||
index dc065a5..c386923 100644
|
diff -Naur ffmpeg-1.2.3/libavcodec/allcodecs.c ffmpeg-1.2.3.patch/libavcodec/allcodecs.c
|
||||||
--- a/libavcodec/Makefile
|
--- ffmpeg-1.2.3/libavcodec/allcodecs.c 2013-08-27 02:13:44.000000000 +0200
|
||||||
+++ b/libavcodec/Makefile
|
+++ ffmpeg-1.2.3.patch/libavcodec/allcodecs.c 2013-09-09 22:46:40.577852790 +0200
|
||||||
@@ -12,6 +12,7 @@ HEADERS = avcodec.h \
|
@@ -79,18 +79,22 @@
|
||||||
vdpau.h \
|
|
||||||
version.h \
|
|
||||||
xvmc.h \
|
|
||||||
+ xvba.h \
|
|
||||||
|
|
||||||
OBJS = allcodecs.o \
|
|
||||||
audioconvert.o \
|
|
||||||
@@ -73,6 +74,7 @@ OBJS-$(CONFIG_SHARED) += log2_tab.o
|
|
||||||
OBJS-$(CONFIG_SINEWIN) += sinewin.o
|
|
||||||
OBJS-$(CONFIG_VAAPI) += vaapi.o
|
|
||||||
OBJS-$(CONFIG_VDPAU) += vdpau.o
|
|
||||||
+OBJS-$(CONFIG_XVBA) += xvba.o
|
|
||||||
OBJS-$(CONFIG_VIDEODSP) += videodsp.o
|
|
||||||
OBJS-$(CONFIG_VP3DSP) += vp3dsp.o
|
|
||||||
|
|
||||||
@@ -232,6 +234,7 @@ OBJS-$(CONFIG_H264_VAAPI_HWACCEL) += vaapi_h264.o
|
|
||||||
OBJS-$(CONFIG_H264_VDA_HWACCEL) += vda_h264.o
|
|
||||||
OBJS-$(CONFIG_H264_VDA_DECODER) += vda_h264_dec.o
|
|
||||||
OBJS-$(CONFIG_H264_VDPAU_HWACCEL) += vdpau_h264.o
|
|
||||||
+OBJS-$(CONFIG_H264_XVBA_HWACCEL) += xvba_h264.o
|
|
||||||
OBJS-$(CONFIG_HUFFYUV_DECODER) += huffyuv.o huffyuvdec.o
|
|
||||||
OBJS-$(CONFIG_HUFFYUV_ENCODER) += huffyuv.o huffyuvenc.o
|
|
||||||
OBJS-$(CONFIG_IAC_DECODER) += imc.o
|
|
||||||
@@ -295,6 +298,7 @@ OBJS-$(CONFIG_MPEG1VIDEO_ENCODER) += mpeg12enc.o mpeg12.o \
|
|
||||||
OBJS-$(CONFIG_MPEG2_DXVA2_HWACCEL) += dxva2_mpeg2.o
|
|
||||||
OBJS-$(CONFIG_MPEG2_VAAPI_HWACCEL) += vaapi_mpeg2.o
|
|
||||||
OBJS-$(CONFIG_MPEG2_VDPAU_HWACCEL) += vdpau_mpeg12.o
|
|
||||||
+OBJS-$(CONFIG_MPEG2_XVBA_HWACCEL) += xvba_mpeg2.o
|
|
||||||
OBJS-$(CONFIG_MPEG2VIDEO_DECODER) += mpeg12.o mpeg12data.o
|
|
||||||
OBJS-$(CONFIG_MPEG2VIDEO_ENCODER) += mpeg12enc.o mpeg12.o \
|
|
||||||
timecode.o
|
|
||||||
@@ -459,6 +463,7 @@ OBJS-$(CONFIG_VC1_DECODER) += vc1dec.o vc1.o vc1data.o vc1dsp.o \
|
|
||||||
OBJS-$(CONFIG_VC1_DXVA2_HWACCEL) += dxva2_vc1.o
|
|
||||||
OBJS-$(CONFIG_VC1_VAAPI_HWACCEL) += vaapi_vc1.o
|
|
||||||
OBJS-$(CONFIG_VC1_VDPAU_HWACCEL) += vdpau_vc1.o
|
|
||||||
+OBJS-$(CONFIG_VC1_XVBA_HWACCEL) += xvba_vc1.o
|
|
||||||
OBJS-$(CONFIG_VCR1_DECODER) += vcr1.o
|
|
||||||
OBJS-$(CONFIG_VMDAUDIO_DECODER) += vmdav.o
|
|
||||||
OBJS-$(CONFIG_VMDVIDEO_DECODER) += vmdav.o
|
|
||||||
@@ -788,6 +793,7 @@ SKIPHEADERS-$(CONFIG_LIBSCHROEDINGER) += libschroedinger.h
|
|
||||||
SKIPHEADERS-$(CONFIG_LIBUTVIDEO) += libutvideo.h
|
|
||||||
SKIPHEADERS-$(CONFIG_MPEG_XVMC_DECODER) += xvmc.h
|
|
||||||
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_internal.h
|
|
||||||
+SKIPHEADERS-$(CONFIG_XVBA) += xvba_internal.h
|
|
||||||
SKIPHEADERS-$(CONFIG_VDA) += vda.h
|
|
||||||
SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h
|
|
||||||
SKIPHEADERS-$(HAVE_OS2THREADS) += os2threads.h
|
|
||||||
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
|
|
||||||
index 584446f..7a8f61c 100644
|
|
||||||
--- a/libavcodec/allcodecs.c
|
|
||||||
+++ b/libavcodec/allcodecs.c
|
|
||||||
@@ -79,18 +79,22 @@ void avcodec_register_all(void)
|
|
||||||
REGISTER_HWACCEL(H264_VAAPI, h264_vaapi);
|
REGISTER_HWACCEL(H264_VAAPI, h264_vaapi);
|
||||||
REGISTER_HWACCEL(H264_VDA, h264_vda);
|
REGISTER_HWACCEL(H264_VDA, h264_vda);
|
||||||
REGISTER_HWACCEL(H264_VDPAU, h264_vdpau);
|
REGISTER_HWACCEL(H264_VDPAU, h264_vdpau);
|
||||||
@ -172,10 +92,9 @@ index 584446f..7a8f61c 100644
|
|||||||
|
|
||||||
/* video codecs */
|
/* video codecs */
|
||||||
REGISTER_ENCODER(A64MULTI, a64multi);
|
REGISTER_ENCODER(A64MULTI, a64multi);
|
||||||
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
|
diff -Naur ffmpeg-1.2.3/libavcodec/h264.c ffmpeg-1.2.3.patch/libavcodec/h264.c
|
||||||
index 937ad7a..299039f 100644
|
--- ffmpeg-1.2.3/libavcodec/h264.c 2013-09-09 22:46:04.639832061 +0200
|
||||||
--- a/libavcodec/h264.c
|
+++ ffmpeg-1.2.3.patch/libavcodec/h264.c 2013-09-09 22:46:40.579852792 +0200
|
||||||
+++ b/libavcodec/h264.c
|
|
||||||
@@ -81,6 +81,9 @@
|
@@ -81,6 +81,9 @@
|
||||||
#if CONFIG_H264_VDPAU_HWACCEL
|
#if CONFIG_H264_VDPAU_HWACCEL
|
||||||
AV_PIX_FMT_VDPAU,
|
AV_PIX_FMT_VDPAU,
|
||||||
@ -186,11 +105,60 @@ index 937ad7a..299039f 100644
|
|||||||
AV_PIX_FMT_YUV420P,
|
AV_PIX_FMT_YUV420P,
|
||||||
AV_PIX_FMT_NONE
|
AV_PIX_FMT_NONE
|
||||||
};
|
};
|
||||||
diff --git a/libavcodec/xvba.c b/libavcodec/xvba.c
|
diff -Naur ffmpeg-1.2.3/libavcodec/Makefile ffmpeg-1.2.3.patch/libavcodec/Makefile
|
||||||
new file mode 100644
|
--- ffmpeg-1.2.3/libavcodec/Makefile 2013-08-27 02:13:44.000000000 +0200
|
||||||
index 0000000..be29e5d
|
+++ ffmpeg-1.2.3.patch/libavcodec/Makefile 2013-09-09 22:46:40.580852793 +0200
|
||||||
--- /dev/null
|
@@ -12,6 +12,7 @@
|
||||||
+++ b/libavcodec/xvba.c
|
vdpau.h \
|
||||||
|
version.h \
|
||||||
|
xvmc.h \
|
||||||
|
+ xvba.h \
|
||||||
|
|
||||||
|
OBJS = allcodecs.o \
|
||||||
|
audioconvert.o \
|
||||||
|
@@ -73,6 +74,7 @@
|
||||||
|
OBJS-$(CONFIG_SINEWIN) += sinewin.o
|
||||||
|
OBJS-$(CONFIG_VAAPI) += vaapi.o
|
||||||
|
OBJS-$(CONFIG_VDPAU) += vdpau.o
|
||||||
|
+OBJS-$(CONFIG_XVBA) += xvba.o
|
||||||
|
OBJS-$(CONFIG_VIDEODSP) += videodsp.o
|
||||||
|
OBJS-$(CONFIG_VP3DSP) += vp3dsp.o
|
||||||
|
|
||||||
|
@@ -232,6 +234,7 @@
|
||||||
|
OBJS-$(CONFIG_H264_VDA_HWACCEL) += vda_h264.o
|
||||||
|
OBJS-$(CONFIG_H264_VDA_DECODER) += vda_h264_dec.o
|
||||||
|
OBJS-$(CONFIG_H264_VDPAU_HWACCEL) += vdpau_h264.o
|
||||||
|
+OBJS-$(CONFIG_H264_XVBA_HWACCEL) += xvba_h264.o
|
||||||
|
OBJS-$(CONFIG_HUFFYUV_DECODER) += huffyuv.o huffyuvdec.o
|
||||||
|
OBJS-$(CONFIG_HUFFYUV_ENCODER) += huffyuv.o huffyuvenc.o
|
||||||
|
OBJS-$(CONFIG_IAC_DECODER) += imc.o
|
||||||
|
@@ -295,6 +298,7 @@
|
||||||
|
OBJS-$(CONFIG_MPEG2_DXVA2_HWACCEL) += dxva2_mpeg2.o
|
||||||
|
OBJS-$(CONFIG_MPEG2_VAAPI_HWACCEL) += vaapi_mpeg2.o
|
||||||
|
OBJS-$(CONFIG_MPEG2_VDPAU_HWACCEL) += vdpau_mpeg12.o
|
||||||
|
+OBJS-$(CONFIG_MPEG2_XVBA_HWACCEL) += xvba_mpeg2.o
|
||||||
|
OBJS-$(CONFIG_MPEG2VIDEO_DECODER) += mpeg12.o mpeg12data.o
|
||||||
|
OBJS-$(CONFIG_MPEG2VIDEO_ENCODER) += mpeg12enc.o mpeg12.o \
|
||||||
|
timecode.o
|
||||||
|
@@ -459,6 +463,7 @@
|
||||||
|
OBJS-$(CONFIG_VC1_DXVA2_HWACCEL) += dxva2_vc1.o
|
||||||
|
OBJS-$(CONFIG_VC1_VAAPI_HWACCEL) += vaapi_vc1.o
|
||||||
|
OBJS-$(CONFIG_VC1_VDPAU_HWACCEL) += vdpau_vc1.o
|
||||||
|
+OBJS-$(CONFIG_VC1_XVBA_HWACCEL) += xvba_vc1.o
|
||||||
|
OBJS-$(CONFIG_VCR1_DECODER) += vcr1.o
|
||||||
|
OBJS-$(CONFIG_VMDAUDIO_DECODER) += vmdav.o
|
||||||
|
OBJS-$(CONFIG_VMDVIDEO_DECODER) += vmdav.o
|
||||||
|
@@ -788,6 +793,7 @@
|
||||||
|
SKIPHEADERS-$(CONFIG_LIBUTVIDEO) += libutvideo.h
|
||||||
|
SKIPHEADERS-$(CONFIG_MPEG_XVMC_DECODER) += xvmc.h
|
||||||
|
SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_internal.h
|
||||||
|
+SKIPHEADERS-$(CONFIG_XVBA) += xvba_internal.h
|
||||||
|
SKIPHEADERS-$(CONFIG_VDA) += vda.h
|
||||||
|
SKIPHEADERS-$(CONFIG_VDPAU) += vdpau.h
|
||||||
|
SKIPHEADERS-$(HAVE_OS2THREADS) += os2threads.h
|
||||||
|
diff -Naur ffmpeg-1.2.3/libavcodec/xvba.c ffmpeg-1.2.3.patch/libavcodec/xvba.c
|
||||||
|
--- ffmpeg-1.2.3/libavcodec/xvba.c 1970-01-01 01:00:00.000000000 +0100
|
||||||
|
+++ ffmpeg-1.2.3.patch/libavcodec/xvba.c 2013-09-09 22:46:40.580852793 +0200
|
||||||
@@ -0,0 +1,66 @@
|
@@ -0,0 +1,66 @@
|
||||||
+/*
|
+/*
|
||||||
+ * HW decode acceleration for MPEG-2, H.264 and VC-1
|
+ * HW decode acceleration for MPEG-2, H.264 and VC-1
|
||||||
@ -258,11 +226,9 @@ index 0000000..be29e5d
|
|||||||
+ render->num_slices++;
|
+ render->num_slices++;
|
||||||
+}
|
+}
|
||||||
+
|
+
|
||||||
diff --git a/libavcodec/xvba.h b/libavcodec/xvba.h
|
diff -Naur ffmpeg-1.2.3/libavcodec/xvba.h ffmpeg-1.2.3.patch/libavcodec/xvba.h
|
||||||
new file mode 100644
|
--- ffmpeg-1.2.3/libavcodec/xvba.h 1970-01-01 01:00:00.000000000 +0100
|
||||||
index 0000000..9f9ff0c
|
+++ ffmpeg-1.2.3.patch/libavcodec/xvba.h 2013-09-09 22:46:40.581852794 +0200
|
||||||
--- /dev/null
|
|
||||||
+++ b/libavcodec/xvba.h
|
|
||||||
@@ -0,0 +1,71 @@
|
@@ -0,0 +1,71 @@
|
||||||
+/*
|
+/*
|
||||||
+ * HW decode acceleration for MPEG-2, H.264 and VC-1
|
+ * HW decode acceleration for MPEG-2, H.264 and VC-1
|
||||||
@ -335,11 +301,9 @@ index 0000000..9f9ff0c
|
|||||||
+};
|
+};
|
||||||
+
|
+
|
||||||
+#endif /* AVCODEC_XVBA_H */
|
+#endif /* AVCODEC_XVBA_H */
|
||||||
diff --git a/libavcodec/xvba_h264.c b/libavcodec/xvba_h264.c
|
diff -Naur ffmpeg-1.2.3/libavcodec/xvba_h264.c ffmpeg-1.2.3.patch/libavcodec/xvba_h264.c
|
||||||
new file mode 100644
|
--- ffmpeg-1.2.3/libavcodec/xvba_h264.c 1970-01-01 01:00:00.000000000 +0100
|
||||||
index 0000000..ae45f3a
|
+++ ffmpeg-1.2.3.patch/libavcodec/xvba_h264.c 2013-09-09 22:46:40.582852794 +0200
|
||||||
--- /dev/null
|
|
||||||
+++ b/libavcodec/xvba_h264.c
|
|
||||||
@@ -0,0 +1,192 @@
|
@@ -0,0 +1,192 @@
|
||||||
+/*
|
+/*
|
||||||
+ * H.264 HW decode acceleration through XVBA
|
+ * H.264 HW decode acceleration through XVBA
|
||||||
@ -533,11 +497,9 @@ index 0000000..ae45f3a
|
|||||||
+ .end_frame = end_frame,
|
+ .end_frame = end_frame,
|
||||||
+ .decode_slice = decode_slice,
|
+ .decode_slice = decode_slice,
|
||||||
+};
|
+};
|
||||||
diff --git a/libavcodec/xvba_internal.h b/libavcodec/xvba_internal.h
|
diff -Naur ffmpeg-1.2.3/libavcodec/xvba_internal.h ffmpeg-1.2.3.patch/libavcodec/xvba_internal.h
|
||||||
new file mode 100644
|
--- ffmpeg-1.2.3/libavcodec/xvba_internal.h 1970-01-01 01:00:00.000000000 +0100
|
||||||
index 0000000..9653f85
|
+++ ffmpeg-1.2.3.patch/libavcodec/xvba_internal.h 2013-09-09 22:46:40.582852794 +0200
|
||||||
--- /dev/null
|
|
||||||
+++ b/libavcodec/xvba_internal.h
|
|
||||||
@@ -0,0 +1,24 @@
|
@@ -0,0 +1,24 @@
|
||||||
+/*
|
+/*
|
||||||
+ * HW decode acceleration for MPEG-2, H.264 and VC-1
|
+ * HW decode acceleration for MPEG-2, H.264 and VC-1
|
||||||
@ -563,11 +525,9 @@ index 0000000..9653f85
|
|||||||
+
|
+
|
||||||
+int ff_xvba_translate_profile(int profile);
|
+int ff_xvba_translate_profile(int profile);
|
||||||
+void ff_xvba_add_slice_data(struct xvba_render_state *render, const uint8_t *buffer, uint32_t size);
|
+void ff_xvba_add_slice_data(struct xvba_render_state *render, const uint8_t *buffer, uint32_t size);
|
||||||
diff --git a/libavcodec/xvba_mpeg2.c b/libavcodec/xvba_mpeg2.c
|
diff -Naur ffmpeg-1.2.3/libavcodec/xvba_mpeg2.c ffmpeg-1.2.3.patch/libavcodec/xvba_mpeg2.c
|
||||||
new file mode 100644
|
--- ffmpeg-1.2.3/libavcodec/xvba_mpeg2.c 1970-01-01 01:00:00.000000000 +0100
|
||||||
index 0000000..0fc7d78
|
+++ ffmpeg-1.2.3.patch/libavcodec/xvba_mpeg2.c 2013-09-09 22:46:40.582852794 +0200
|
||||||
--- /dev/null
|
|
||||||
+++ b/libavcodec/xvba_mpeg2.c
|
|
||||||
@@ -0,0 +1,52 @@
|
@@ -0,0 +1,52 @@
|
||||||
+/*
|
+/*
|
||||||
+ * MPEG-2 HW decode acceleration through XVBA
|
+ * MPEG-2 HW decode acceleration through XVBA
|
||||||
@ -621,11 +581,9 @@ index 0000000..0fc7d78
|
|||||||
+ .decode_slice = decode_slice,
|
+ .decode_slice = decode_slice,
|
||||||
+ .priv_data_size = 0,
|
+ .priv_data_size = 0,
|
||||||
+};
|
+};
|
||||||
diff --git a/libavcodec/xvba_vc1.c b/libavcodec/xvba_vc1.c
|
diff -Naur ffmpeg-1.2.3/libavcodec/xvba_vc1.c ffmpeg-1.2.3.patch/libavcodec/xvba_vc1.c
|
||||||
new file mode 100644
|
--- ffmpeg-1.2.3/libavcodec/xvba_vc1.c 1970-01-01 01:00:00.000000000 +0100
|
||||||
index 0000000..bf3d9c2
|
+++ ffmpeg-1.2.3.patch/libavcodec/xvba_vc1.c 2013-09-09 22:46:40.583852794 +0200
|
||||||
--- /dev/null
|
|
||||||
+++ b/libavcodec/xvba_vc1.c
|
|
||||||
@@ -0,0 +1,190 @@
|
@@ -0,0 +1,190 @@
|
||||||
+/*
|
+/*
|
||||||
+ * VC-1 HW decode acceleration through XVBA
|
+ * VC-1 HW decode acceleration through XVBA
|
||||||
@ -817,11 +775,10 @@ index 0000000..bf3d9c2
|
|||||||
+ .end_frame = end_frame,
|
+ .end_frame = end_frame,
|
||||||
+ .decode_slice = decode_slice,
|
+ .decode_slice = decode_slice,
|
||||||
+};
|
+};
|
||||||
diff --git a/libavutil/pixdesc.c b/libavutil/pixdesc.c
|
diff -Naur ffmpeg-1.2.3/libavutil/pixdesc.c ffmpeg-1.2.3.patch/libavutil/pixdesc.c
|
||||||
index 1016dba..53dfec1 100644
|
--- ffmpeg-1.2.3/libavutil/pixdesc.c 2013-08-27 02:13:47.000000000 +0200
|
||||||
--- a/libavutil/pixdesc.c
|
+++ ffmpeg-1.2.3.patch/libavutil/pixdesc.c 2013-09-09 22:46:40.584852795 +0200
|
||||||
+++ b/libavutil/pixdesc.c
|
@@ -1141,6 +1141,12 @@
|
||||||
@@ -1141,6 +1141,12 @@ void av_write_image_line(const uint16_t *src,
|
|
||||||
.log2_chroma_h = 1,
|
.log2_chroma_h = 1,
|
||||||
.flags = PIX_FMT_HWACCEL,
|
.flags = PIX_FMT_HWACCEL,
|
||||||
},
|
},
|
||||||
@ -834,11 +791,10 @@ index 1016dba..53dfec1 100644
|
|||||||
[AV_PIX_FMT_YUV420P9LE] = {
|
[AV_PIX_FMT_YUV420P9LE] = {
|
||||||
.name = "yuv420p9le",
|
.name = "yuv420p9le",
|
||||||
.nb_components = 3,
|
.nb_components = 3,
|
||||||
diff --git a/libavutil/pixfmt.h b/libavutil/pixfmt.h
|
diff -Naur ffmpeg-1.2.3/libavutil/pixfmt.h ffmpeg-1.2.3.patch/libavutil/pixfmt.h
|
||||||
index 1c00ac4..6437e29 100644
|
--- ffmpeg-1.2.3/libavutil/pixfmt.h 2013-08-27 02:13:47.000000000 +0200
|
||||||
--- a/libavutil/pixfmt.h
|
+++ ffmpeg-1.2.3.patch/libavutil/pixfmt.h 2013-09-09 22:46:40.585852796 +0200
|
||||||
+++ b/libavutil/pixfmt.h
|
@@ -124,6 +124,7 @@
|
||||||
@@ -124,6 +124,7 @@ enum AVPixelFormat {
|
|
||||||
AV_PIX_FMT_VAAPI_MOCO, ///< HW acceleration through VA API at motion compensation entry-point, Picture.data[3] contains a vaapi_render_state struct which contains macroblocks as well as various fields extracted from headers
|
AV_PIX_FMT_VAAPI_MOCO, ///< HW acceleration through VA API at motion compensation entry-point, Picture.data[3] contains a vaapi_render_state struct which contains macroblocks as well as various fields extracted from headers
|
||||||
AV_PIX_FMT_VAAPI_IDCT, ///< HW acceleration through VA API at IDCT entry-point, Picture.data[3] contains a vaapi_render_state struct which contains fields extracted from headers
|
AV_PIX_FMT_VAAPI_IDCT, ///< HW acceleration through VA API at IDCT entry-point, Picture.data[3] contains a vaapi_render_state struct which contains fields extracted from headers
|
||||||
AV_PIX_FMT_VAAPI_VLD, ///< HW decoding through VA API, Picture.data[3] contains a vaapi_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
|
AV_PIX_FMT_VAAPI_VLD, ///< HW decoding through VA API, Picture.data[3] contains a vaapi_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
|
||||||
@ -846,6 +802,3 @@ index 1c00ac4..6437e29 100644
|
|||||||
|
|
||||||
AV_PIX_FMT_YUV420P16LE, ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
|
AV_PIX_FMT_YUV420P16LE, ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), little-endian
|
||||||
AV_PIX_FMT_YUV420P16BE, ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
|
AV_PIX_FMT_YUV420P16BE, ///< planar YUV 4:2:0, 24bpp, (1 Cr & Cb sample per 2x2 Y samples), big-endian
|
||||||
--
|
|
||||||
1.8.1.5
|
|
||||||
|
|
@ -0,0 +1,27 @@
|
|||||||
|
From 615c1fe2ff904cdb9ebcd8edc0b2c42c3cd6218c Mon Sep 17 00:00:00 2001
|
||||||
|
From: Rainer Hochecker <fernetmenta@online.de>
|
||||||
|
Date: Mon, 9 Sep 2013 12:13:15 +0200
|
||||||
|
Subject: [PATCH] ffmpeg backport: h264: do not discard NAL_SEI when skipping
|
||||||
|
frames
|
||||||
|
|
||||||
|
---
|
||||||
|
libavcodec/h264.c | 3 +--
|
||||||
|
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
|
||||||
|
index 4f09c67..62ac8fb 100644
|
||||||
|
--- a/libavcodec/h264.c
|
||||||
|
+++ b/libavcodec/h264.c
|
||||||
|
@@ -4529,8 +4529,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size,
|
||||||
|
first_slice = hx->nal_unit_type;
|
||||||
|
}
|
||||||
|
|
||||||
|
- // FIXME do not discard SEI id
|
||||||
|
- if (avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)
|
||||||
|
+ if (avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0 && h->nal_unit_type != NAL_SEI)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
again:
|
||||||
|
--
|
||||||
|
1.8.4
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user