xbmc: update XVBA/VDPAU patch

Signed-off-by: Stephan Raue <stephan@openelec.tv>
This commit is contained in:
Stephan Raue 2013-05-25 19:43:53 +02:00
parent 52ea291c71
commit 622dbaea11
4 changed files with 845 additions and 693 deletions

View File

@ -1,107 +0,0 @@
From ca0ddf0673dea966af5bf0bc562f9ff69a551cd9 Mon Sep 17 00:00:00 2001
From: fritsch <peter.fruehberger@gmail.com>
Date: Sat, 12 Jan 2013 13:03:50 +0100
Subject: [PATCH] dvdplayer: Allow multithread decoding for hi10p content by
default
This allows decoding of some hi10p material on e.g. AMD Fusion with
both cores at the max. This introduces a new advancedsetting named
disablehi10pmultithreading to disable hi10p decoded multithreaded.
---
.../DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp | 18 ++++++++++++++++--
.../dvdplayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.h | 1 +
xbmc/settings/AdvancedSettings.cpp | 2 ++
xbmc/settings/AdvancedSettings.h | 1 +
4 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp b/xbmc/cores/dvdplayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp
index 8f81637..77ac6b1 100644
--- a/xbmc/cores/dvdplayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp
+++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp
@@ -138,6 +138,7 @@ enum PixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avctx
m_iScreenHeight = 0;
m_iOrientation = 0;
m_bSoftware = false;
+ m_isHi10p = false;
m_pHardware = NULL;
m_iLastKeyframe = 0;
m_dts = DVD_NOPTS_VALUE;
@@ -187,7 +188,10 @@ bool CDVDVideoCodecFFmpeg::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options
case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
case FF_PROFILE_H264_HIGH_444_INTRA:
case FF_PROFILE_H264_CAVLC_444:
+ // this is needed to not open the decoders
m_bSoftware = true;
+ // this we need to enable multithreading for hi10p via advancedsettings
+ m_isHi10p = true;
break;
}
}
@@ -247,8 +251,18 @@ bool CDVDVideoCodecFFmpeg::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options
m_pCodecContext->codec_tag = hints.codec_tag;
/* Only allow slice threading, since frame threading is more
* sensitive to changes in frame sizes, and it causes crashes
- * during HW accell */
- m_pCodecContext->thread_type = FF_THREAD_SLICE;
+ * during HW accell - so we unset it in this case.
+ *
+ * When we detect Hi10p and user did not disable hi10pmultithreading
+ * via advancedsettings.xml we keep the ffmpeg default thread type.
+ * */
+ if(m_isHi10p && !g_advancedSettings.m_videoDisableHi10pMultithreading)
+ {
+ CLog::Log(LOGDEBUG,"CDVDVideoCodecFFmpeg::Open() Keep default threading for Hi10p: %d",
+ m_pCodecContext->thread_type);
+ }
+ else
+ m_pCodecContext->thread_type = FF_THREAD_SLICE;
#if defined(TARGET_DARWIN_IOS)
// ffmpeg with enabled neon will crash and burn if this is enabled
diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.h b/xbmc/cores/dvdplayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.h
index 61d0305..827b2d9 100644
--- a/xbmc/cores/dvdplayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.h
+++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.h
@@ -114,6 +114,7 @@ class CDVDVideoCodecFFmpeg : public CDVDVideoCodec
std::string m_name;
bool m_bSoftware;
+ bool m_isHi10p;
IHardwareDecoder *m_pHardware;
int m_iLastKeyframe;
double m_dts;
diff --git a/xbmc/settings/AdvancedSettings.cpp b/xbmc/settings/AdvancedSettings.cpp
index 16800b7..1e0f3e0 100644
--- a/xbmc/settings/AdvancedSettings.cpp
+++ b/xbmc/settings/AdvancedSettings.cpp
@@ -112,6 +112,7 @@ void CAdvancedSettings::Initialize()
m_DXVANoDeintProcForProgressive = false;
m_videoFpsDetect = 1;
m_videoDefaultLatency = 0.0;
+ m_videoDisableHi10pMultithreading = false;
m_musicUseTimeSeeking = true;
m_musicTimeSeekForward = 10;
@@ -498,6 +499,7 @@ void CAdvancedSettings::ParseSettingsFile(const CStdString &file)
XMLUtils::GetBoolean(pElement,"enablehighqualityhwscalers", m_videoEnableHighQualityHwScalers);
XMLUtils::GetFloat(pElement,"autoscalemaxfps",m_videoAutoScaleMaxFps, 0.0f, 1000.0f);
XMLUtils::GetBoolean(pElement,"allowmpeg4vdpau",m_videoAllowMpeg4VDPAU);
+ XMLUtils::GetBoolean(pElement,"disablehi10pmultithreading",m_videoDisableHi10pMultithreading);
XMLUtils::GetBoolean(pElement,"allowmpeg4vaapi",m_videoAllowMpeg4VAAPI);
XMLUtils::GetBoolean(pElement, "disablebackgrounddeinterlace", m_videoDisableBackgroundDeinterlace);
XMLUtils::GetInt(pElement, "useocclusionquery", m_videoCaptureUseOcclusionQuery, -1, 1);
diff --git a/xbmc/settings/AdvancedSettings.h b/xbmc/settings/AdvancedSettings.h
index 27887d4..fc05e41 100644
--- a/xbmc/settings/AdvancedSettings.h
+++ b/xbmc/settings/AdvancedSettings.h
@@ -164,6 +164,7 @@ class CAdvancedSettings
bool m_DXVAForceProcessorRenderer;
bool m_DXVANoDeintProcForProgressive;
int m_videoFpsDetect;
+ bool m_videoDisableHi10pMultithreading;
CStdString m_videoDefaultPlayer;
CStdString m_videoDefaultDVDPlayer;
--
1.7.10

View File

@ -1,295 +0,0 @@
From 4a61baea4a936089b589f0b7a834aa180a1017ea Mon Sep 17 00:00:00 2001
From: Bob van Loosen <bob.loosen@gmail.com>
Date: Tue, 14 May 2013 12:28:45 +0200
Subject: [PATCH 1/2] fixed: when doing a format conversion from float to 32
bit int, multiply with INT32_MAX - 127, since this is the maximum value that
can be stored in 32 bit float and int, if INT32_MAX gets converted to float,
it gets rounded to INT32_MAX + 1 which can cause wrap around distortion
---
xbmc/cores/AudioEngine/Utils/AEConvert.cpp | 32 +++++++++++++++++-------------
1 file changed, 18 insertions(+), 14 deletions(-)
diff --git a/xbmc/cores/AudioEngine/Utils/AEConvert.cpp b/xbmc/cores/AudioEngine/Utils/AEConvert.cpp
index 97ec87b..5f0af63 100644
--- a/xbmc/cores/AudioEngine/Utils/AEConvert.cpp
+++ b/xbmc/cores/AudioEngine/Utils/AEConvert.cpp
@@ -922,17 +922,21 @@ unsigned int CAEConvert::Float_S24NE3(float *data, const unsigned int samples, u
return samples * 3;
}
+//float can't store INT32_MAX, it gets rounded up to INT32_MAX + 1
+//INT32_MAX - 127 is the maximum value that can exactly be stored in both 32 bit float and int
+#define MUL32 ((float)(INT32_MAX - 127))
+
unsigned int CAEConvert::Float_S32LE(float *data, const unsigned int samples, uint8_t *dest)
{
int32_t *dst = (int32_t*)dest;
#ifdef __SSE__
- const __m128 mul = _mm_set_ps1((float)INT32_MAX);
+ const __m128 mul = _mm_set_ps1(MUL32);
unsigned int count = samples;
/* work around invalid alignment */
while ((((uintptr_t)data & 0xF) || ((uintptr_t)dest & 0xF)) && count > 0)
{
- dst[0] = safeRound(data[0] * (float)INT32_MAX);
+ dst[0] = safeRound(data[0] * MUL32);
++data;
++dst;
--count;
@@ -955,7 +959,7 @@ unsigned int CAEConvert::Float_S32LE(float *data, const unsigned int samples, ui
const uint32_t odd = samples - even;
if (odd == 1)
{
- dst[0] = safeRound(data[0] * (float)INT32_MAX);
+ dst[0] = safeRound(data[0] * MUL32);
dst[0] = Endian_SwapLE32(dst[0]);
}
else
@@ -988,7 +992,7 @@ unsigned int CAEConvert::Float_S32LE(float *data, const unsigned int samples, ui
/* no SIMD */
for (uint32_t i = 0; i < samples; ++i, ++data, ++dst)
{
- dst[0] = safeRound(data[0] * (float)INT32_MAX);
+ dst[0] = safeRound(data[0] * MUL32);
dst[0] = Endian_SwapLE32(dst[0]);
}
#endif
@@ -1002,7 +1006,7 @@ unsigned int CAEConvert::Float_S32LE_Neon(float *data, const unsigned int sample
int32_t *dst = (int32_t*)dest;
for (float *end = data + (samples & ~0x3); data < end; data += 4, dst += 4)
{
- float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), INT32_MAX);
+ float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), MUL32);
int32x4_t ret = vcvtq_s32_f32(val);
#ifdef __BIG_ENDIAN__
ret = vrev64q_s32(ret);
@@ -1012,7 +1016,7 @@ unsigned int CAEConvert::Float_S32LE_Neon(float *data, const unsigned int sample
if (samples & 0x2)
{
- float32x2_t val = vmul_n_f32(vld1_f32((const float32_t *)data), INT32_MAX);
+ float32x2_t val = vmul_n_f32(vld1_f32((const float32_t *)data), MUL32);
int32x2_t ret = vcvt_s32_f32(val);
#ifdef __BIG_ENDIAN__
ret = vrev64_s32(ret);
@@ -1024,7 +1028,7 @@ unsigned int CAEConvert::Float_S32LE_Neon(float *data, const unsigned int sample
if (samples & 0x1)
{
- dst[0] = safeRound(data[0] * (float)INT32_MAX);
+ dst[0] = safeRound(data[0] * MUL32);
dst[0] = Endian_SwapLE32(dst[0]);
}
#endif
@@ -1035,13 +1039,13 @@ unsigned int CAEConvert::Float_S32BE(float *data, const unsigned int samples, ui
{
int32_t *dst = (int32_t*)dest;
#ifdef __SSE__
- const __m128 mul = _mm_set_ps1((float)INT32_MAX);
+ const __m128 mul = _mm_set_ps1(MUL32);
unsigned int count = samples;
/* work around invalid alignment */
while ((((uintptr_t)data & 0xF) || ((uintptr_t)dest & 0xF)) && count > 0)
{
- dst[0] = safeRound(data[0] * (float)INT32_MAX);
+ dst[0] = safeRound(data[0] * MUL32);
++data;
++dst;
--count;
@@ -1064,7 +1068,7 @@ unsigned int CAEConvert::Float_S32BE(float *data, const unsigned int samples, ui
const uint32_t odd = samples - even;
if (odd == 1)
{
- dst[0] = safeRound(data[0] * (float)INT32_MAX);
+ dst[0] = safeRound(data[0] * MUL32);
dst[0] = Endian_SwapBE32(dst[0]);
}
else
@@ -1096,7 +1100,7 @@ unsigned int CAEConvert::Float_S32BE(float *data, const unsigned int samples, ui
/* no SIMD */
for (uint32_t i = 0; i < samples; ++i, ++data, ++dst)
{
- dst[0] = safeRound(data[0] * (float)INT32_MAX);
+ dst[0] = safeRound(data[0] * MUL32);
dst[0] = Endian_SwapBE32(dst[0]);
}
#endif
@@ -1110,7 +1114,7 @@ unsigned int CAEConvert::Float_S32BE_Neon(float *data, const unsigned int sample
int32_t *dst = (int32_t*)dest;
for (float *end = data + (samples & ~0x3); data < end; data += 4, dst += 4)
{
- float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), INT32_MAX);
+ float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), MUL32);
int32x4_t ret = vcvtq_s32_f32(val);
#ifndef __BIG_ENDIAN__
ret = vrev64q_s32(ret);
@@ -1120,7 +1124,7 @@ unsigned int CAEConvert::Float_S32BE_Neon(float *data, const unsigned int sample
if (samples & 0x2)
{
- float32x2_t val = vmul_n_f32(vld1_f32((const float32_t *)data), INT32_MAX);
+ float32x2_t val = vmul_n_f32(vld1_f32((const float32_t *)data), MUL32);
int32x2_t ret = vcvt_s32_f32(val);
#ifndef __BIG_ENDIAN__
ret = vrev64_s32(ret);
@@ -1132,7 +1136,7 @@ unsigned int CAEConvert::Float_S32BE_Neon(float *data, const unsigned int sample
if (samples & 0x1)
{
- dst[0] = safeRound(data[0] * (float)INT32_MAX);
+ dst[0] = safeRound(data[0] * MUL32);
dst[0] = Endian_SwapBE32(dst[0]);
}
#endif
--
1.8.1.6
From edf2e3d3c9b124446961a9b66aaf401cf1770500 Mon Sep 17 00:00:00 2001
From: Bob van Loosen <bob.loosen@gmail.com>
Date: Tue, 14 May 2013 18:43:01 +0200
Subject: [PATCH 2/2] rename: MUL32 -> AE_MUL32
---
xbmc/cores/AudioEngine/Utils/AEConvert.cpp | 30 +++++++++++++++---------------
1 file changed, 15 insertions(+), 15 deletions(-)
diff --git a/xbmc/cores/AudioEngine/Utils/AEConvert.cpp b/xbmc/cores/AudioEngine/Utils/AEConvert.cpp
index 5f0af63..0b0b646 100644
--- a/xbmc/cores/AudioEngine/Utils/AEConvert.cpp
+++ b/xbmc/cores/AudioEngine/Utils/AEConvert.cpp
@@ -924,19 +924,19 @@ unsigned int CAEConvert::Float_S24NE3(float *data, const unsigned int samples, u
//float can't store INT32_MAX, it gets rounded up to INT32_MAX + 1
//INT32_MAX - 127 is the maximum value that can exactly be stored in both 32 bit float and int
-#define MUL32 ((float)(INT32_MAX - 127))
+#define AE_MUL32 ((float)(INT32_MAX - 127))
unsigned int CAEConvert::Float_S32LE(float *data, const unsigned int samples, uint8_t *dest)
{
int32_t *dst = (int32_t*)dest;
#ifdef __SSE__
- const __m128 mul = _mm_set_ps1(MUL32);
+ const __m128 mul = _mm_set_ps1(AE_MUL32);
unsigned int count = samples;
/* work around invalid alignment */
while ((((uintptr_t)data & 0xF) || ((uintptr_t)dest & 0xF)) && count > 0)
{
- dst[0] = safeRound(data[0] * MUL32);
+ dst[0] = safeRound(data[0] * AE_MUL32);
++data;
++dst;
--count;
@@ -959,7 +959,7 @@ unsigned int CAEConvert::Float_S32LE(float *data, const unsigned int samples, ui
const uint32_t odd = samples - even;
if (odd == 1)
{
- dst[0] = safeRound(data[0] * MUL32);
+ dst[0] = safeRound(data[0] * AE_MUL32);
dst[0] = Endian_SwapLE32(dst[0]);
}
else
@@ -992,7 +992,7 @@ unsigned int CAEConvert::Float_S32LE(float *data, const unsigned int samples, ui
/* no SIMD */
for (uint32_t i = 0; i < samples; ++i, ++data, ++dst)
{
- dst[0] = safeRound(data[0] * MUL32);
+ dst[0] = safeRound(data[0] * AE_MUL32);
dst[0] = Endian_SwapLE32(dst[0]);
}
#endif
@@ -1006,7 +1006,7 @@ unsigned int CAEConvert::Float_S32LE_Neon(float *data, const unsigned int sample
int32_t *dst = (int32_t*)dest;
for (float *end = data + (samples & ~0x3); data < end; data += 4, dst += 4)
{
- float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), MUL32);
+ float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), AE_MUL32);
int32x4_t ret = vcvtq_s32_f32(val);
#ifdef __BIG_ENDIAN__
ret = vrev64q_s32(ret);
@@ -1016,7 +1016,7 @@ unsigned int CAEConvert::Float_S32LE_Neon(float *data, const unsigned int sample
if (samples & 0x2)
{
- float32x2_t val = vmul_n_f32(vld1_f32((const float32_t *)data), MUL32);
+ float32x2_t val = vmul_n_f32(vld1_f32((const float32_t *)data), AE_MUL32);
int32x2_t ret = vcvt_s32_f32(val);
#ifdef __BIG_ENDIAN__
ret = vrev64_s32(ret);
@@ -1028,7 +1028,7 @@ unsigned int CAEConvert::Float_S32LE_Neon(float *data, const unsigned int sample
if (samples & 0x1)
{
- dst[0] = safeRound(data[0] * MUL32);
+ dst[0] = safeRound(data[0] * AE_MUL32);
dst[0] = Endian_SwapLE32(dst[0]);
}
#endif
@@ -1039,13 +1039,13 @@ unsigned int CAEConvert::Float_S32BE(float *data, const unsigned int samples, ui
{
int32_t *dst = (int32_t*)dest;
#ifdef __SSE__
- const __m128 mul = _mm_set_ps1(MUL32);
+ const __m128 mul = _mm_set_ps1(AE_MUL32);
unsigned int count = samples;
/* work around invalid alignment */
while ((((uintptr_t)data & 0xF) || ((uintptr_t)dest & 0xF)) && count > 0)
{
- dst[0] = safeRound(data[0] * MUL32);
+ dst[0] = safeRound(data[0] * AE_MUL32);
++data;
++dst;
--count;
@@ -1068,7 +1068,7 @@ unsigned int CAEConvert::Float_S32BE(float *data, const unsigned int samples, ui
const uint32_t odd = samples - even;
if (odd == 1)
{
- dst[0] = safeRound(data[0] * MUL32);
+ dst[0] = safeRound(data[0] * AE_MUL32);
dst[0] = Endian_SwapBE32(dst[0]);
}
else
@@ -1100,7 +1100,7 @@ unsigned int CAEConvert::Float_S32BE(float *data, const unsigned int samples, ui
/* no SIMD */
for (uint32_t i = 0; i < samples; ++i, ++data, ++dst)
{
- dst[0] = safeRound(data[0] * MUL32);
+ dst[0] = safeRound(data[0] * AE_MUL32);
dst[0] = Endian_SwapBE32(dst[0]);
}
#endif
@@ -1114,7 +1114,7 @@ unsigned int CAEConvert::Float_S32BE_Neon(float *data, const unsigned int sample
int32_t *dst = (int32_t*)dest;
for (float *end = data + (samples & ~0x3); data < end; data += 4, dst += 4)
{
- float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), MUL32);
+ float32x4_t val = vmulq_n_f32(vld1q_f32((const float32_t *)data), AE_MUL32);
int32x4_t ret = vcvtq_s32_f32(val);
#ifndef __BIG_ENDIAN__
ret = vrev64q_s32(ret);
@@ -1124,7 +1124,7 @@ unsigned int CAEConvert::Float_S32BE_Neon(float *data, const unsigned int sample
if (samples & 0x2)
{
- float32x2_t val = vmul_n_f32(vld1_f32((const float32_t *)data), MUL32);
+ float32x2_t val = vmul_n_f32(vld1_f32((const float32_t *)data), AE_MUL32);
int32x2_t ret = vcvt_s32_f32(val);
#ifndef __BIG_ENDIAN__
ret = vrev64_s32(ret);
@@ -1136,7 +1136,7 @@ unsigned int CAEConvert::Float_S32BE_Neon(float *data, const unsigned int sample
if (samples & 0x1)
{
- dst[0] = safeRound(data[0] * MUL32);
+ dst[0] = safeRound(data[0] * AE_MUL32);
dst[0] = Endian_SwapBE32(dst[0]);
}
#endif
--
1.8.1.6