diff --git a/packages/mediacenter/xbmc/patches/xbmc-xbmc-999.80.010-omxplayer-Allow_small_audio_packets_to_be_concatenated_to_make_better_use_of_audio_fifo.patch b/packages/mediacenter/xbmc/patches/xbmc-xbmc-999.80.010-omxplayer-Allow_small_audio_packets_to_be_concatenated_to_make_better_use_of_audio_fifo.patch new file mode 100644 index 0000000000..58d6581d6a --- /dev/null +++ b/packages/mediacenter/xbmc/patches/xbmc-xbmc-999.80.010-omxplayer-Allow_small_audio_packets_to_be_concatenated_to_make_better_use_of_audio_fifo.patch @@ -0,0 +1,402 @@ +From 74b06eccc06a0b4bd00114b20c929dfa1b73c783 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Sat, 1 Mar 2014 14:24:08 +0000 +Subject: [PATCH] [omxplayer] Allow small audio packets to be concatenated to + make better use of audio fifo + +Some audio codecs produce small packets which causes a high overhead when submitting to GPU, and doesn't make full use of GPU side buffering. +TrueHD in particular can produce packets with 40 samples (so 1200 packets per second) which causes very high overhead. + +What this aims to do is to concatenate audio packets until they approach the ideal audio packet size, +and then deal with the awkardness of concatenated planar formats. +--- + xbmc/cores/omxplayer/OMXAudio.cpp | 67 ++++++++++++++++++++++--------- + xbmc/cores/omxplayer/OMXAudio.h | 3 +- + xbmc/cores/omxplayer/OMXAudioCodecOMX.cpp | 62 ++++++++++++++++++---------- + xbmc/cores/omxplayer/OMXAudioCodecOMX.h | 8 +++- + xbmc/cores/omxplayer/OMXPlayerAudio.cpp | 9 +++-- + 5 files changed, 100 insertions(+), 49 deletions(-) + +diff --git a/xbmc/cores/omxplayer/OMXAudio.cpp b/xbmc/cores/omxplayer/OMXAudio.cpp +index 3f91e89..70177db 100644 +--- a/xbmc/cores/omxplayer/OMXAudio.cpp ++++ b/xbmc/cores/omxplayer/OMXAudio.cpp +@@ -43,6 +43,10 @@ + + using namespace std; + ++// the size of the audio_render output port buffers ++#define AUDIO_DECODE_OUTPUT_BUFFER (32*1024) ++static const char rounded_up_channels_shift[] = {0,0,1,2,2,3,3,3,3}; ++ + static const uint16_t AC3Bitrates[] = {32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 448, 512, 576, 640}; + static const uint16_t AC3FSCod [] = {48000, 44100, 32000, 0}; + +@@ -61,6 +65,7 @@ + m_Passthrough (false ), + m_HWDecode (false ), + m_BytesPerSec (0 ), ++ m_InputBytesPerSec(0 ), + m_BufferLen (0 ), + m_ChunkLen (0 ), + m_InputChannels (0 ), +@@ -491,11 +496,15 @@ bool COMXAudio::Initialize(AEAudioFormat format, OMXClock *clock, CDVDStreamInfo + + m_SampleRate = m_format.m_sampleRate; + m_BitsPerSample = CAEUtil::DataFormatToBits(m_format.m_dataFormat); +- m_BufferLen = m_BytesPerSec = m_format.m_sampleRate * (16 >> 3) * m_InputChannels; +- m_BufferLen *= AUDIO_BUFFER_SECONDS; ++ m_BytesPerSec = m_SampleRate * 2 << rounded_up_channels_shift[m_InputChannels]; ++ m_BufferLen = m_BytesPerSec * AUDIO_BUFFER_SECONDS; ++ m_InputBytesPerSec = m_SampleRate * m_BitsPerSample * m_InputChannels >> 3; ++ ++ // should be big enough that common formats (e.g. 6 channel DTS) fit in a single packet. ++ // we don't mind less common formats being split (e.g. ape/wma output large frames) + // the audio_decode output buffer size is 32K, and typically we convert from +- // 6 channel 32bpp float to 8 channel 16bpp in, so a full 48K input buffer will fit the outbut buffer +- m_ChunkLen = 48*1024; ++ // 6 channel 32bpp float to 8 channel 16bpp in, so a full 48K input buffer will fit the output buffer ++ m_ChunkLen = AUDIO_DECODE_OUTPUT_BUFFER * (m_InputChannels * m_BitsPerSample) >> (rounded_up_channels_shift[m_InputChannels] + 4); + + m_wave_header.Samples.wSamplesPerBlock = 0; + m_wave_header.Format.nChannels = m_InputChannels; +@@ -683,7 +692,7 @@ bool COMXAudio::Initialize(AEAudioFormat format, OMXClock *clock, CDVDStreamInfo + m_maxLevel = 0.0f; + + CLog::Log(LOGDEBUG, "COMXAudio::Initialize Input bps %d samplerate %d channels %d buffer size %d bytes per second %d", +- (int)m_pcm_input.nBitPerSample, (int)m_pcm_input.nSamplingRate, (int)m_pcm_input.nChannels, m_BufferLen, m_BytesPerSec); ++ (int)m_pcm_input.nBitPerSample, (int)m_pcm_input.nSamplingRate, (int)m_pcm_input.nChannels, m_BufferLen, m_InputBytesPerSec); + PrintPCM(&m_pcm_input, std::string("input")); + CLog::Log(LOGDEBUG, "COMXAudio::Initialize device passthrough %d hwdecode %d", + m_Passthrough, m_HWDecode); +@@ -866,11 +875,11 @@ bool COMXAudio::ApplyVolume(void) + //*********************************************************************************************** + unsigned int COMXAudio::AddPackets(const void* data, unsigned int len) + { +- return AddPackets(data, len, 0, 0); ++ return AddPackets(data, len, 0, 0, 0); + } + + //*********************************************************************************************** +-unsigned int COMXAudio::AddPackets(const void* data, unsigned int len, double dts, double pts) ++unsigned int COMXAudio::AddPackets(const void* data, unsigned int len, double dts, double pts, unsigned int frame_size) + { + CSingleLock lock (m_critSection); + +@@ -917,24 +926,40 @@ unsigned int COMXAudio::AddPackets(const void* data, unsigned int len, double dt + omx_buffer->nOffset = 0; + omx_buffer->nFlags = 0; + ++ // we want audio_decode output buffer size to be no more than AUDIO_DECODE_OUTPUT_BUFFER. ++ // it will be 16-bit and rounded up to next power of 2 in channels ++ unsigned int max_buffer = AUDIO_DECODE_OUTPUT_BUFFER * (m_InputChannels * m_BitsPerSample) >> (rounded_up_channels_shift[m_InputChannels] + 4); ++ + unsigned int remaining = demuxer_samples-demuxer_samples_sent; +- unsigned int samples_space = omx_buffer->nAllocLen/pitch; ++ unsigned int samples_space = std::min(max_buffer, omx_buffer->nAllocLen)/pitch; + unsigned int samples = std::min(remaining, samples_space); + + omx_buffer->nFilledLen = samples * pitch; + +- if (samples < demuxer_samples && m_BitsPerSample==32 && !(m_Passthrough || m_HWDecode)) ++ unsigned int frames = frame_size ? len/frame_size:0; ++ if ((samples < demuxer_samples || frames > 1) && m_BitsPerSample==32 && !(m_Passthrough || m_HWDecode)) + { +- uint8_t *dst = omx_buffer->pBuffer; +- uint8_t *src = demuxer_content + demuxer_samples_sent * (m_BitsPerSample >> 3); +- // we need to extract samples from planar audio, so the copying needs to be done per plane +- for (int i=0; i<(int)m_InputChannels; i++) +- { +- memcpy(dst, src, omx_buffer->nFilledLen / m_InputChannels); +- dst += omx_buffer->nFilledLen / m_InputChannels; +- src += demuxer_samples * (m_BitsPerSample >> 3); +- } +- assert(dst <= omx_buffer->pBuffer + m_ChunkLen); ++ const unsigned int sample_pitch = m_BitsPerSample >> 3; ++ const unsigned int frame_samples = frame_size / pitch; ++ const unsigned int plane_size = frame_samples * sample_pitch; ++ const unsigned int out_plane_size = samples * sample_pitch; ++ //CLog::Log(LOGDEBUG, "%s::%s samples:%d/%d ps:%d ops:%d fs:%d pitch:%d filled:%d frames=%d", CLASSNAME, __func__, samples, demuxer_samples, plane_size, out_plane_size, frame_size, pitch, omx_buffer->nFilledLen, frames); ++ for (unsigned int sample = 0; sample < samples; ) ++ { ++ unsigned int frame = (demuxer_samples_sent + sample) / frame_samples; ++ unsigned int sample_in_frame = (demuxer_samples_sent + sample) - frame * frame_samples; ++ int out_remaining = std::min(std::min(frame_samples - sample_in_frame, samples), samples-sample); ++ uint8_t *src = demuxer_content + frame*frame_size + sample_in_frame * sample_pitch; ++ uint8_t *dst = (uint8_t *)omx_buffer->pBuffer + sample * sample_pitch; ++ for (unsigned int channel = 0; channel < m_InputChannels; channel++) ++ { ++ //CLog::Log(LOGDEBUG, "%s::%s copy(%d,%d,%d) (s:%d f:%d sin:%d c:%d)", CLASSNAME, __func__, dst-(uint8_t *)omx_buffer->pBuffer, src-demuxer_content, out_remaining, sample, frame, sample_in_frame, channel); ++ memcpy(dst, src, out_remaining * sample_pitch); ++ src += plane_size; ++ dst += out_plane_size; ++ } ++ sample += out_remaining; ++ } + } + else + { +@@ -1103,7 +1128,9 @@ float COMXAudio::GetCacheTime() + + float COMXAudio::GetCacheTotal() + { +- return m_BytesPerSec ? (float)m_BufferLen / (float)m_BytesPerSec : 0.0f; ++ float audioplus_buffer = m_SampleRate ? 0.0f : 32.0f * 512.0f / m_SampleRate; ++ float input_buffer = (float)m_omx_decoder.GetInputBufferSize() / (float)m_InputBytesPerSec; ++ return AUDIO_BUFFER_SECONDS + input_buffer + audioplus_buffer; + } + + //*********************************************************************************************** +diff --git a/xbmc/cores/omxplayer/OMXAudio.h b/xbmc/cores/omxplayer/OMXAudio.h +index f6bc023..7eba151 100644 +--- a/xbmc/cores/omxplayer/OMXAudio.h ++++ b/xbmc/cores/omxplayer/OMXAudio.h +@@ -66,7 +66,7 @@ class COMXAudio + ~COMXAudio(); + + unsigned int AddPackets(const void* data, unsigned int len); +- unsigned int AddPackets(const void* data, unsigned int len, double dts, double pts); ++ unsigned int AddPackets(const void* data, unsigned int len, double dts, double pts, unsigned int frame_size); + unsigned int GetSpace(); + bool Deinitialize(); + +@@ -114,6 +114,7 @@ class COMXAudio + bool m_Passthrough; + bool m_HWDecode; + unsigned int m_BytesPerSec; ++ unsigned int m_InputBytesPerSec; + unsigned int m_BufferLen; + unsigned int m_ChunkLen; + unsigned int m_InputChannels; +diff --git a/xbmc/cores/omxplayer/OMXAudioCodecOMX.cpp b/xbmc/cores/omxplayer/OMXAudioCodecOMX.cpp +index 5503a0e..cc6558a 100644 +--- a/xbmc/cores/omxplayer/OMXAudioCodecOMX.cpp ++++ b/xbmc/cores/omxplayer/OMXAudioCodecOMX.cpp +@@ -26,10 +26,15 @@ + + #include "cores/AudioEngine/Utils/AEUtil.h" + ++// the size of the audio_render output port buffers ++#define AUDIO_DECODE_OUTPUT_BUFFER (32*1024) ++static const char rounded_up_channels_shift[] = {0,0,1,2,2,3,3,3,3}; ++ + COMXAudioCodecOMX::COMXAudioCodecOMX() + { + m_pBufferOutput = NULL; + m_iBufferOutputAlloced = 0; ++ m_iBufferOutputUsed = 0; + + m_pCodecContext = NULL; + m_pConvert = NULL; +@@ -37,6 +42,7 @@ + + m_channels = 0; + m_pFrame1 = NULL; ++ m_frameSize = 0; + m_bGotFrame = false; + m_iSampleFormat = AV_SAMPLE_FMT_NONE; + m_desiredSampleFormat = AV_SAMPLE_FMT_NONE; +@@ -47,6 +53,7 @@ + m_dllAvUtil.av_free(m_pBufferOutput); + m_pBufferOutput = NULL; + m_iBufferOutputAlloced = 0; ++ m_iBufferOutputUsed = 0; + Dispose(); + } + +@@ -132,7 +139,7 @@ void COMXAudioCodecOMX::Dispose() + m_bGotFrame = false; + } + +-int COMXAudioCodecOMX::Decode(BYTE* pData, int iSize) ++int COMXAudioCodecOMX::Decode(BYTE* pData, int iSize, double dts, double pts) + { + int iBytesUsed, got_frame; + if (!m_pCodecContext) return -1; +@@ -167,10 +174,15 @@ int COMXAudioCodecOMX::Decode(BYTE* pData, int iSize) + } + + m_bGotFrame = true; ++ if (!m_iBufferOutputUsed) ++ { ++ m_dts = dts; ++ m_pts = pts; ++ } + return iBytesUsed; + } + +-int COMXAudioCodecOMX::GetData(BYTE** dst) ++int COMXAudioCodecOMX::GetData(BYTE** dst, double &dts, double &pts) + { + if (!m_bGotFrame) + return 0; +@@ -179,13 +191,12 @@ int COMXAudioCodecOMX::GetData(BYTE** dst) + int inputSize = m_dllAvUtil.av_samples_get_buffer_size(&inLineSize, m_pCodecContext->channels, m_pFrame1->nb_samples, m_pCodecContext->sample_fmt, 0); + /* output audio will be packed */ + int outputSize = m_dllAvUtil.av_samples_get_buffer_size(&outLineSize, m_pCodecContext->channels, m_pFrame1->nb_samples, m_desiredSampleFormat, 1); +- bool cont = !m_pFrame1->data[1] || (m_pFrame1->data[1] == m_pFrame1->data[0] + inLineSize && inLineSize == outLineSize && inLineSize * m_pCodecContext->channels == inputSize); + +- if (m_iBufferOutputAlloced < outputSize) ++ if (m_iBufferOutputAlloced < m_iBufferOutputUsed + outputSize) + { + m_dllAvUtil.av_free(m_pBufferOutput); +- m_pBufferOutput = (BYTE*)m_dllAvUtil.av_malloc(outputSize + FF_INPUT_BUFFER_PADDING_SIZE); +- m_iBufferOutputAlloced = outputSize; ++ m_pBufferOutput = (BYTE*)m_dllAvUtil.av_malloc(m_iBufferOutputUsed + outputSize + FF_INPUT_BUFFER_PADDING_SIZE); ++ m_iBufferOutputAlloced = m_iBufferOutputUsed + outputSize; + } + *dst = m_pBufferOutput; + +@@ -217,7 +228,7 @@ int COMXAudioCodecOMX::GetData(BYTE** dst) + + /* use unaligned flag to keep output packed */ + uint8_t *out_planes[m_pCodecContext->channels]; +- if(m_dllAvUtil.av_samples_fill_arrays(out_planes, NULL, m_pBufferOutput, m_pCodecContext->channels, m_pFrame1->nb_samples, m_desiredSampleFormat, 1) < 0 || ++ if(m_dllAvUtil.av_samples_fill_arrays(out_planes, NULL, m_pBufferOutput + m_iBufferOutputUsed, m_pCodecContext->channels, m_pFrame1->nb_samples, m_desiredSampleFormat, 1) < 0 || + m_dllSwResample.swr_convert(m_pConvert, out_planes, m_pFrame1->nb_samples, (const uint8_t **)m_pFrame1->data, m_pFrame1->nb_samples) < 0) + { + CLog::Log(LOGERROR, "COMXAudioCodecOMX::Decode - Unable to convert format %d to %d", (int)m_pCodecContext->sample_fmt, m_desiredSampleFormat); +@@ -226,35 +237,42 @@ int COMXAudioCodecOMX::GetData(BYTE** dst) + } + else + { +- /* if it is already contiguous, just return decoded frame */ +- if (cont) +- { +- *dst = m_pFrame1->data[0]; +- } +- else ++ /* copy to a contiguous buffer */ ++ uint8_t *out_planes[m_pCodecContext->channels]; ++ if (m_dllAvUtil.av_samples_fill_arrays(out_planes, NULL, m_pBufferOutput + m_iBufferOutputUsed, m_pCodecContext->channels, m_pFrame1->nb_samples, m_desiredSampleFormat, 1) < 0 || ++ m_dllAvUtil.av_samples_copy(out_planes, m_pFrame1->data, 0, 0, m_pFrame1->nb_samples, m_pCodecContext->channels, m_desiredSampleFormat) < 0 ) + { +- /* copy to a contiguous buffer */ +- uint8_t *out_planes[m_pCodecContext->channels]; +- if (m_dllAvUtil.av_samples_fill_arrays(out_planes, NULL, m_pBufferOutput, m_pCodecContext->channels, m_pFrame1->nb_samples, m_desiredSampleFormat, 1) < 0 || +- m_dllAvUtil.av_samples_copy(out_planes, m_pFrame1->data, 0, 0, m_pFrame1->nb_samples, m_pCodecContext->channels, m_desiredSampleFormat) < 0 ) +- { +- outputSize = 0; +- } ++ outputSize = 0; + } + } ++ int desired_size = AUDIO_DECODE_OUTPUT_BUFFER * (m_pCodecContext->channels * GetBitsPerSample()) >> (rounded_up_channels_shift[m_pCodecContext->channels] + 4); + + if (m_bFirstFrame) + { +- CLog::Log(LOGDEBUG, "COMXAudioCodecOMX::GetData size=%d/%d line=%d/%d cont=%d buf=%p", inputSize, outputSize, inLineSize, outLineSize, cont, *dst); ++ CLog::Log(LOGDEBUG, "COMXAudioCodecOMX::GetData size=%d/%d line=%d/%d buf=%p, desired=%d", inputSize, outputSize, inLineSize, outLineSize, *dst, desired_size); + m_bFirstFrame = false; + } +- return outputSize; ++ m_iBufferOutputUsed += outputSize; ++ ++ // if next buffer submitted won't fit then flush it out ++ if (m_iBufferOutputUsed + outputSize > desired_size || (m_frameSize && (int)m_frameSize != outputSize)) ++ { ++ int ret = m_iBufferOutputUsed; ++ m_bGotFrame = false; ++ m_iBufferOutputUsed = 0; ++ m_frameSize = outputSize; ++ dts = m_dts; ++ pts = m_pts; ++ return ret; ++ } ++ return 0; + } + + void COMXAudioCodecOMX::Reset() + { + if (m_pCodecContext) m_dllAvCodec.avcodec_flush_buffers(m_pCodecContext); + m_bGotFrame = false; ++ m_iBufferOutputUsed = 0; + } + + int COMXAudioCodecOMX::GetChannels() +diff --git a/xbmc/cores/omxplayer/OMXAudioCodecOMX.h b/xbmc/cores/omxplayer/OMXAudioCodecOMX.h +index 343465c..4dc2371 100644 +--- a/xbmc/cores/omxplayer/OMXAudioCodecOMX.h ++++ b/xbmc/cores/omxplayer/OMXAudioCodecOMX.h +@@ -36,8 +36,8 @@ class COMXAudioCodecOMX + virtual ~COMXAudioCodecOMX(); + bool Open(CDVDStreamInfo &hints); + void Dispose(); +- int Decode(BYTE* pData, int iSize); +- int GetData(BYTE** dst); ++ int Decode(BYTE* pData, int iSize, double dts, double pts); ++ int GetData(BYTE** dst, double &dts, double &pts); + void Reset(); + int GetChannels(); + uint64_t GetChannelMap(); +@@ -45,6 +45,7 @@ class COMXAudioCodecOMX + int GetBitsPerSample(); + static const char* GetName() { return "FFmpeg"; } + int GetBitRate(); ++ unsigned int GetFrameSize() { return m_frameSize; } + + protected: + AVCodecContext* m_pCodecContext; +@@ -55,6 +56,7 @@ class COMXAudioCodecOMX + AVFrame* m_pFrame1; + + BYTE *m_pBufferOutput; ++ int m_iBufferOutputUsed; + int m_iBufferOutputAlloced; + + bool m_bOpenedCodec; +@@ -63,6 +65,8 @@ class COMXAudioCodecOMX + + bool m_bFirstFrame; + bool m_bGotFrame; ++ unsigned int m_frameSize; ++ double m_dts, m_pts; + DllAvCodec m_dllAvCodec; + DllAvUtil m_dllAvUtil; + DllSwResample m_dllSwResample; +diff --git a/xbmc/cores/omxplayer/OMXPlayerAudio.cpp b/xbmc/cores/omxplayer/OMXPlayerAudio.cpp +index 7d9081c..53336cb 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerAudio.cpp ++++ b/xbmc/cores/omxplayer/OMXPlayerAudio.cpp +@@ -228,9 +228,10 @@ bool OMXPlayerAudio::Decode(DemuxPacket *pkt, bool bDropPacket) + + if(!OMX_IS_RAW(m_format.m_dataFormat) && !bDropPacket) + { ++ double dts = pkt->dts, pts=pkt->pts; + while(!m_bStop && data_len > 0) + { +- int len = m_pAudioCodec->Decode((BYTE *)data_dec, data_len); ++ int len = m_pAudioCodec->Decode((BYTE *)data_dec, data_len, dts, pts); + if( (len < 0) || (len > data_len) ) + { + m_pAudioCodec->Reset(); +@@ -241,7 +242,7 @@ bool OMXPlayerAudio::Decode(DemuxPacket *pkt, bool bDropPacket) + data_len -= len; + + uint8_t *decoded; +- int decoded_size = m_pAudioCodec->GetData(&decoded); ++ int decoded_size = m_pAudioCodec->GetData(&decoded, dts, pts); + + if(decoded_size <=0) + continue; +@@ -275,7 +276,7 @@ bool OMXPlayerAudio::Decode(DemuxPacket *pkt, bool bDropPacket) + if(m_silence) + memset(decoded, 0x0, decoded_size); + +- ret = m_omxAudio.AddPackets(decoded, decoded_size, m_audioClock, m_audioClock); ++ ret = m_omxAudio.AddPackets(decoded, decoded_size, dts, pts, m_pAudioCodec->GetFrameSize()); + + if(ret != decoded_size) + { +@@ -313,7 +314,7 @@ bool OMXPlayerAudio::Decode(DemuxPacket *pkt, bool bDropPacket) + if(m_silence) + memset(pkt->pData, 0x0, pkt->iSize); + +- m_omxAudio.AddPackets(pkt->pData, pkt->iSize, m_audioClock, m_audioClock); ++ m_omxAudio.AddPackets(pkt->pData, pkt->iSize, m_audioClock, m_audioClock, 0); + } + + m_audioStats.AddSampleBytes(pkt->iSize); +-- +1.8.5.5 +