diff --git a/packages/mediacenter/kodi-theme-Confluence/package.mk b/packages/mediacenter/kodi-theme-Confluence/package.mk index dc13f3adda..86f90fdc6e 100644 --- a/packages/mediacenter/kodi-theme-Confluence/package.mk +++ b/packages/mediacenter/kodi-theme-Confluence/package.mk @@ -17,7 +17,7 @@ ################################################################################ PKG_NAME="kodi-theme-Confluence" -PKG_VERSION="15.0-rc1-a248db2" +PKG_VERSION="15.0-rc1-8f081c2" PKG_REV="1" PKG_ARCH="any" PKG_LICENSE="GPL" diff --git a/packages/mediacenter/kodi/package.mk b/packages/mediacenter/kodi/package.mk index 890874a28a..0d72df1834 100644 --- a/packages/mediacenter/kodi/package.mk +++ b/packages/mediacenter/kodi/package.mk @@ -17,7 +17,7 @@ ################################################################################ PKG_NAME="kodi" -PKG_VERSION="15.0-rc1-a248db2" +PKG_VERSION="15.0-rc1-8f081c2" PKG_REV="1" PKG_ARCH="any" PKG_LICENSE="GPL" diff --git a/packages/mediacenter/kodi/patches/kodi-999.22-PR7280.patch b/packages/mediacenter/kodi/patches/kodi-999.22-PR7280.patch deleted file mode 100644 index 0cfe4bbd31..0000000000 --- a/packages/mediacenter/kodi/patches/kodi-999.22-PR7280.patch +++ /dev/null @@ -1,1364 +0,0 @@ -From 224c1919ad3f68e23e817f41036687343f34aaae Mon Sep 17 00:00:00 2001 -From: popcornmix -Date: Fri, 12 Jun 2015 17:27:47 +0100 -Subject: [PATCH] [utils] Disable fast_memcpy which is slower than memcpy - -The default glibc memcpy is likely to be better tuned than this code -which hasn't been touched for four years. - -In a test with software video decode on Pi2 the skipped frames went -from 189 to 172 when fast_memcpy was disabled. ---- - Kodi.xcodeproj/project.pbxproj | 6 - - project/VS2010Express/XBMC.vcxproj | 4 - - project/VS2010Express/XBMC.vcxproj.filters | 3 - - xbmc/cores/VideoRenderers/LinuxRendererGLES.cpp | 1 - - xbmc/cores/VideoRenderers/RenderCapture.cpp | 7 +- - xbmc/cores/dvdplayer/DVDCodecs/DVDCodecUtils.cpp | 33 +- - .../Video/libstagefrightICS/StageFrightVideo.cpp | 3 +- - xbmc/utils/Makefile.in | 2 - - xbmc/utils/fastmemcpy-arm.S | 528 --------------------- - xbmc/utils/fastmemcpy.c | 396 ---------------- - xbmc/utils/fastmemcpy.h | 35 -- - xbmc/utils/test/Makefile | 1 - - xbmc/utils/test/Testfastmemcpy.cpp | 39 -- - 13 files changed, 20 insertions(+), 1038 deletions(-) - delete mode 100644 xbmc/utils/fastmemcpy-arm.S - delete mode 100644 xbmc/utils/fastmemcpy.c - delete mode 100644 xbmc/utils/fastmemcpy.h - delete mode 100644 xbmc/utils/test/Testfastmemcpy.cpp - -diff --git a/Kodi.xcodeproj/project.pbxproj b/Kodi.xcodeproj/project.pbxproj -index 395c4ea..ce5a7f7 100644 ---- a/Kodi.xcodeproj/project.pbxproj -+++ b/Kodi.xcodeproj/project.pbxproj -@@ -3192,7 +3192,6 @@ - F5E55B5D10741272006E788A /* DVDPlayerTeletext.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F5E55B5B10741272006E788A /* DVDPlayerTeletext.cpp */; }; - F5E55B66107412DE006E788A /* GUIDialogTeletext.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F5E55B65107412DE006E788A /* GUIDialogTeletext.cpp */; }; - F5E55B7010741340006E788A /* Teletext.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F5E55B6E10741340006E788A /* Teletext.cpp */; }; -- F5E5697310803FC3006E788A /* fastmemcpy.c in Sources */ = {isa = PBXBuildFile; fileRef = F5E5697210803FC3006E788A /* fastmemcpy.c */; }; - F5E56BA61082A675006E788A /* PosixMountProvider.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F5E56BA51082A675006E788A /* PosixMountProvider.cpp */; }; - F5EA02260F6DA990005C2EC5 /* CocoaPowerSyscall.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F5EA02200F6DA85C005C2EC5 /* CocoaPowerSyscall.cpp */; }; - F5EA02270F6DA9A5005C2EC5 /* PowerManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F5EA021A0F6DA7E8005C2EC5 /* PowerManager.cpp */; }; -@@ -3632,7 +3631,6 @@ - 43348AAB1077486D00F859CF /* PlayerSelectionRule.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PlayerSelectionRule.h; path = playercorefactory/PlayerSelectionRule.h; sourceTree = ""; }; - 436721A612D66A09002508E6 /* IAnnouncer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = IAnnouncer.h; sourceTree = ""; }; - 436B38F3106628850049AB3B /* EndianSwap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = EndianSwap.h; sourceTree = ""; }; -- 43BF09DD1080D39300E25290 /* fastmemcpy.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fastmemcpy.h; sourceTree = ""; }; - 43FAC87112D6349400F67914 /* IStorageProvider.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = IStorageProvider.h; sourceTree = ""; }; - 551C3A43175A12010051AAAD /* VDA.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = VDA.cpp; sourceTree = ""; }; - 551C3A44175A12010051AAAD /* VDA.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = VDA.h; sourceTree = ""; }; -@@ -5735,7 +5733,6 @@ - F5E55B6D10741340006E788A /* Teletext.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Teletext.h; sourceTree = ""; }; - F5E55B6E10741340006E788A /* Teletext.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = Teletext.cpp; sourceTree = ""; }; - F5E55B6F10741340006E788A /* TeletextDefines.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TeletextDefines.h; sourceTree = ""; }; -- F5E5697210803FC3006E788A /* fastmemcpy.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = fastmemcpy.c; sourceTree = ""; }; - F5E56BA41082A675006E788A /* PosixMountProvider.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PosixMountProvider.h; sourceTree = ""; }; - F5E56BA51082A675006E788A /* PosixMountProvider.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = PosixMountProvider.cpp; sourceTree = ""; }; - F5EA021A0F6DA7E8005C2EC5 /* PowerManager.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = PowerManager.cpp; sourceTree = ""; }; -@@ -9202,8 +9199,6 @@ - DF529BAD1741697B00523FB4 /* Environment.h */, - E36C29E90DA72486001F0C9D /* Fanart.cpp */, - 6E97BDC30DA2B620003A2A89 /* Fanart.h */, -- F5E5697210803FC3006E788A /* fastmemcpy.c */, -- 43BF09DD1080D39300E25290 /* fastmemcpy.h */, - F5F244641110DC6B009126C6 /* FileOperationJob.cpp */, - F5F244631110DC6B009126C6 /* FileOperationJob.h */, - F5F245EC1112C9AB009126C6 /* FileUtils.cpp */, -@@ -10519,7 +10514,6 @@ - 43348AAE1077486D00F859CF /* PlayerCoreFactory.cpp in Sources */, - 43348AAF1077486D00F859CF /* PlayerSelectionRule.cpp in Sources */, - 7CAA20511079C8160096DE39 /* BaseRenderer.cpp in Sources */, -- F5E5697310803FC3006E788A /* fastmemcpy.c in Sources */, - 55D3604E1826CAB900DA66D2 /* OverlayRendererGUI.cpp in Sources */, - F5E56BA61082A675006E788A /* PosixMountProvider.cpp in Sources */, - 7CAA25351085963B0096DE39 /* PasswordManager.cpp in Sources */, -diff --git a/project/VS2010Express/XBMC.vcxproj b/project/VS2010Express/XBMC.vcxproj -index 2d37c57..e8e8dce 100644 ---- a/project/VS2010Express/XBMC.vcxproj -+++ b/project/VS2010Express/XBMC.vcxproj -@@ -1439,10 +1439,6 @@ - true - true - -- -- true -- true -- - - - -diff --git a/project/VS2010Express/XBMC.vcxproj.filters b/project/VS2010Express/XBMC.vcxproj.filters -index c858f32..cada31e 100644 ---- a/project/VS2010Express/XBMC.vcxproj.filters -+++ b/project/VS2010Express/XBMC.vcxproj.filters -@@ -2371,9 +2371,6 @@ - - utils\test - -- -- utils\test -- - - utils\test - -diff --git a/xbmc/cores/VideoRenderers/LinuxRendererGLES.cpp b/xbmc/cores/VideoRenderers/LinuxRendererGLES.cpp -index 2b64121..fdad7f0 100644 ---- a/xbmc/cores/VideoRenderers/LinuxRendererGLES.cpp -+++ b/xbmc/cores/VideoRenderers/LinuxRendererGLES.cpp -@@ -31,7 +31,6 @@ - #include - #include "guilib/MatrixGLES.h" - #include "LinuxRendererGLES.h" --#include "utils/fastmemcpy.h" - #include "utils/MathUtils.h" - #include "utils/GLUtils.h" - #include "utils/log.h" -diff --git a/xbmc/cores/VideoRenderers/RenderCapture.cpp b/xbmc/cores/VideoRenderers/RenderCapture.cpp -index 603b68d..0456a27 100644 ---- a/xbmc/cores/VideoRenderers/RenderCapture.cpp -+++ b/xbmc/cores/VideoRenderers/RenderCapture.cpp -@@ -21,7 +21,6 @@ - #include "RenderCapture.h" - #include "utils/log.h" - #include "windowing/WindowingFactory.h" --#include "utils/fastmemcpy.h" - #include "settings/AdvancedSettings.h" - - CRenderCaptureBase::CRenderCaptureBase() -@@ -297,7 +296,7 @@ void CRenderCaptureGL::PboToBuffer() - - if (pboPtr) - { -- fast_memcpy(m_pixels, pboPtr, m_bufferSize); -+ memcpy(m_pixels, pboPtr, m_bufferSize); - SetState(CAPTURESTATE_DONE); - } - else -@@ -491,12 +490,12 @@ void CRenderCaptureDX::SurfaceToBuffer() - //if pitch is same, do a direct copy, otherwise copy one line at a time - if (lockedRect.Pitch == m_width * 4) - { -- fast_memcpy(m_pixels, lockedRect.pBits, m_width * m_height * 4); -+ memcpy(m_pixels, lockedRect.pBits, m_width * m_height * 4); - } - else - { - for (unsigned int y = 0; y < m_height; y++) -- fast_memcpy(m_pixels + y * m_width * 4, (uint8_t*)lockedRect.pBits + y * lockedRect.Pitch, m_width * 4); -+ memcpy(m_pixels + y * m_width * 4, (uint8_t*)lockedRect.pBits + y * lockedRect.Pitch, m_width * 4); - } - m_copySurface->UnlockRect(); - SetState(CAPTURESTATE_DONE); -diff --git a/xbmc/cores/dvdplayer/DVDCodecs/DVDCodecUtils.cpp b/xbmc/cores/dvdplayer/DVDCodecs/DVDCodecUtils.cpp -index 56e68713..5f0e486 100644 ---- a/xbmc/cores/dvdplayer/DVDCodecs/DVDCodecUtils.cpp -+++ b/xbmc/cores/dvdplayer/DVDCodecs/DVDCodecUtils.cpp -@@ -22,7 +22,6 @@ - #include "DVDClock.h" - #include "cores/VideoRenderers/RenderManager.h" - #include "utils/log.h" --#include "utils/fastmemcpy.h" - #include "cores/FFmpeg.h" - #include "Util.h" - #ifdef HAS_DX -@@ -95,7 +94,7 @@ bool CDVDCodecUtils::CopyPicture(DVDVideoPicture* pDst, DVDVideoPicture* pSrc) - - for (int y = 0; y < h; y++) - { -- fast_memcpy(d, s, w); -+ memcpy(d, s, w); - s += pSrc->iLineSize[0]; - d += pDst->iLineSize[0]; - } -@@ -107,7 +106,7 @@ bool CDVDCodecUtils::CopyPicture(DVDVideoPicture* pDst, DVDVideoPicture* pSrc) - d = pDst->data[1]; - for (int y = 0; y < h; y++) - { -- fast_memcpy(d, s, w); -+ memcpy(d, s, w); - s += pSrc->iLineSize[1]; - d += pDst->iLineSize[1]; - } -@@ -116,7 +115,7 @@ bool CDVDCodecUtils::CopyPicture(DVDVideoPicture* pDst, DVDVideoPicture* pSrc) - d = pDst->data[2]; - for (int y = 0; y < h; y++) - { -- fast_memcpy(d, s, w); -+ memcpy(d, s, w); - s += pSrc->iLineSize[2]; - d += pDst->iLineSize[2]; - } -@@ -131,13 +130,13 @@ bool CDVDCodecUtils::CopyPicture(YV12Image* pImage, DVDVideoPicture *pSrc) - int h = pImage->height; - if ((w == pSrc->iLineSize[0]) && ((unsigned int) pSrc->iLineSize[0] == pImage->stride[0])) - { -- fast_memcpy(d, s, w*h); -+ memcpy(d, s, w*h); - } - else - { - for (int y = 0; y < h; y++) - { -- fast_memcpy(d, s, w); -+ memcpy(d, s, w); - s += pSrc->iLineSize[0]; - d += pImage->stride[0]; - } -@@ -148,13 +147,13 @@ bool CDVDCodecUtils::CopyPicture(YV12Image* pImage, DVDVideoPicture *pSrc) - h =(pImage->height >> pImage->cshift_y); - if ((w==pSrc->iLineSize[1]) && ((unsigned int) pSrc->iLineSize[1]==pImage->stride[1])) - { -- fast_memcpy(d, s, w*h); -+ memcpy(d, s, w*h); - } - else - { - for (int y = 0; y < h; y++) - { -- fast_memcpy(d, s, w); -+ memcpy(d, s, w); - s += pSrc->iLineSize[1]; - d += pImage->stride[1]; - } -@@ -163,13 +162,13 @@ bool CDVDCodecUtils::CopyPicture(YV12Image* pImage, DVDVideoPicture *pSrc) - d = pImage->plane[2]; - if ((w==pSrc->iLineSize[2]) && ((unsigned int) pSrc->iLineSize[2]==pImage->stride[2])) - { -- fast_memcpy(d, s, w*h); -+ memcpy(d, s, w*h); - } - else - { - for (int y = 0; y < h; y++) - { -- fast_memcpy(d, s, w); -+ memcpy(d, s, w); - s += pSrc->iLineSize[2]; - d += pImage->stride[2]; - } -@@ -207,7 +206,7 @@ DVDVideoPicture* CDVDCodecUtils::ConvertToNV12Picture(DVDVideoPicture *pSrc) - uint8_t *d = pPicture->data[0]; - for (int y = 0; y < (int)pSrc->iHeight; y++) - { -- fast_memcpy(d, s, pSrc->iWidth); -+ memcpy(d, s, pSrc->iWidth); - s += pSrc->iLineSize[0]; - d += pPicture->iLineSize[0]; - } -@@ -298,13 +297,13 @@ bool CDVDCodecUtils::CopyNV12Picture(YV12Image* pImage, DVDVideoPicture *pSrc) - // Copy Y - if ((w == pSrc->iLineSize[0]) && ((unsigned int) pSrc->iLineSize[0] == pImage->stride[0])) - { -- fast_memcpy(d, s, w*h); -+ memcpy(d, s, w*h); - } - else - { - for (int y = 0; y < h; y++) - { -- fast_memcpy(d, s, w); -+ memcpy(d, s, w); - s += pSrc->iLineSize[0]; - d += pImage->stride[0]; - } -@@ -317,13 +316,13 @@ bool CDVDCodecUtils::CopyNV12Picture(YV12Image* pImage, DVDVideoPicture *pSrc) - // Copy packed UV (width is same as for Y as it's both U and V components) - if ((w==pSrc->iLineSize[1]) && ((unsigned int) pSrc->iLineSize[1]==pImage->stride[1])) - { -- fast_memcpy(d, s, w*h); -+ memcpy(d, s, w*h); - } - else - { - for (int y = 0; y < h; y++) - { -- fast_memcpy(d, s, w); -+ memcpy(d, s, w); - s += pSrc->iLineSize[1]; - d += pImage->stride[1]; - } -@@ -342,13 +341,13 @@ bool CDVDCodecUtils::CopyYUV422PackedPicture(YV12Image* pImage, DVDVideoPicture - // Copy YUYV - if ((w * 2 == pSrc->iLineSize[0]) && ((unsigned int) pSrc->iLineSize[0] == pImage->stride[0])) - { -- fast_memcpy(d, s, w*h*2); -+ memcpy(d, s, w*h*2); - } - else - { - for (int y = 0; y < h; y++) - { -- fast_memcpy(d, s, w*2); -+ memcpy(d, s, w*2); - s += pSrc->iLineSize[0]; - d += pImage->stride[0]; - } -diff --git a/xbmc/cores/dvdplayer/DVDCodecs/Video/libstagefrightICS/StageFrightVideo.cpp b/xbmc/cores/dvdplayer/DVDCodecs/Video/libstagefrightICS/StageFrightVideo.cpp -index 019bc7a..d5ca74f 100644 ---- a/xbmc/cores/dvdplayer/DVDCodecs/Video/libstagefrightICS/StageFrightVideo.cpp -+++ b/xbmc/cores/dvdplayer/DVDCodecs/Video/libstagefrightICS/StageFrightVideo.cpp -@@ -30,7 +30,6 @@ - #include "guilib/GraphicContext.h" - #include "DVDClock.h" - #include "utils/log.h" --#include "utils/fastmemcpy.h" - #include "threads/Thread.h" - #include "threads/Event.h" - #include "Application.h" -@@ -620,7 +619,7 @@ int CStageFrightVideo::Decode(uint8_t *pData, int iSize, double dts, double pts - return VC_ERROR; - } - -- fast_memcpy(frame->medbuf->data(), demuxer_content, demuxer_bytes); -+ memcpy(frame->medbuf->data(), demuxer_content, demuxer_bytes); - frame->medbuf->set_range(0, demuxer_bytes); - frame->medbuf->meta_data()->clear(); - frame->medbuf->meta_data()->setInt64(kKeyTime, frame->pts); -diff --git a/xbmc/utils/Makefile.in b/xbmc/utils/Makefile.in -index 438f025..dbd3db9 100644 ---- a/xbmc/utils/Makefile.in -+++ b/xbmc/utils/Makefile.in -@@ -17,8 +17,6 @@ SRCS += DatabaseUtils.cpp - SRCS += EndianSwap.cpp - SRCS += Environment.cpp - SRCS += Fanart.cpp --SRCS += fastmemcpy.c --SRCS += fastmemcpy-arm.S - SRCS += FileOperationJob.cpp - SRCS += FileUtils.cpp - SRCS += fstrcmp.c -diff --git a/xbmc/utils/fastmemcpy-arm.S b/xbmc/utils/fastmemcpy-arm.S -deleted file mode 100644 -index 6cb8b0c..0000000 ---- a/xbmc/utils/fastmemcpy-arm.S -+++ /dev/null -@@ -1,528 +0,0 @@ --/* -- * Copyright (C) 2008 The Android Open Source Project -- * All rights reserved. -- * -- * Copyright (C) 2011-2013 Team XBMC -- * http://xbmc.org -- * -- * This Program is free software; you can redistribute it and/or modify -- * it under the terms of the GNU General Public License as published by -- * the Free Software Foundation; either version 2, or (at your option) -- * any later version. -- * -- * This Program is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with XBMC; see the file COPYING. If not, see -- * . -- * -- */ --#if defined(__arm__) && !defined(TARGET_ANDROID) && !defined(TARGET_DARWIN_IOS) --#if defined(__ARM_NEON__) -- -- .text --#ifndef __APPLE__ -- .fpu neon -- .global fast_memcpy -- .type fast_memcpy, %function --#else -- .globl _fast_memcpy --#endif -- .align 4 -- --/* a prefetch distance of 4 cache-lines works best experimentally */ --#define CACHE_LINE_SIZE 64 --#define PREFETCH_DISTANCE (CACHE_LINE_SIZE*4) -- --#ifndef __APPLE__ -- .fnstart -- .save {r0, lr} --fast_memcpy: --#else --_fast_memcpy: --#endif -- stmfd sp!, {r0, lr} -- -- /* start preloading as early as possible */ -- pld [r1, #(CACHE_LINE_SIZE*0)] -- pld [r1, #(CACHE_LINE_SIZE*1)] -- -- /* do we have at least 16-bytes to copy (needed for alignment below) */ -- cmp r2, #16 -- blo 5f -- -- /* align destination to half cache-line for the write-buffer */ -- rsb r3, r0, #0 -- ands r3, r3, #0xF -- beq 0f -- -- /* copy up to 15-bytes (count in r3) */ -- sub r2, r2, r3 -- movs ip, r3, lsl #31 -- ldrmib lr, [r1], #1 -- strmib lr, [r0], #1 -- ldrcsb ip, [r1], #1 -- ldrcsb lr, [r1], #1 -- strcsb ip, [r0], #1 -- strcsb lr, [r0], #1 -- movs ip, r3, lsl #29 -- bge 1f -- // copies 4 bytes, destination 32-bits aligned -- vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! -- vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0, :32]! --1: bcc 2f -- // copies 8 bytes, destination 64-bits aligned -- vld1.8 {d0}, [r1]! -- vst1.8 {d0}, [r0, :64]! --2: -- --0: /* preload immediately the next cache line, which we may need */ -- pld [r1, #(CACHE_LINE_SIZE*0)] -- pld [r1, #(CACHE_LINE_SIZE*1)] -- -- /* make sure we have at least 64 bytes to copy */ -- subs r2, r2, #64 -- blo 2f -- -- /* preload all the cache lines we need. -- * NOTE: the number of pld below depends on PREFETCH_DISTANCE, -- * ideally would would increase the distance in the main loop to -- * avoid the goofy code below. In practice this doesn't seem to make -- * a big difference. -- */ -- pld [r1, #(CACHE_LINE_SIZE*2)] -- pld [r1, #(CACHE_LINE_SIZE*3)] -- pld [r1, #(PREFETCH_DISTANCE)] -- --1: /* The main loop copies 64 bytes at a time */ -- vld1.8 {d0 - d3}, [r1]! -- vld1.8 {d4 - d7}, [r1]! -- pld [r1, #(PREFETCH_DISTANCE)] -- subs r2, r2, #64 -- vst1.8 {d0 - d3}, [r0, :128]! -- vst1.8 {d4 - d7}, [r0, :128]! -- bhs 1b -- --2: /* fix-up the remaining count and make sure we have >= 32 bytes left */ -- add r2, r2, #64 -- subs r2, r2, #32 -- blo 4f -- --3: /* 32 bytes at a time. These cache lines were already preloaded */ -- vld1.8 {d0 - d3}, [r1]! -- subs r2, r2, #32 -- vst1.8 {d0 - d3}, [r0, :128]! -- bhs 3b -- --4: /* less than 32 left */ -- add r2, r2, #32 -- tst r2, #0x10 -- beq 5f -- // copies 16 bytes, 128-bits aligned -- vld1.8 {d0, d1}, [r1]! -- vst1.8 {d0, d1}, [r0, :128]! -- --5: /* copy up to 15-bytes (count in r2) */ -- movs ip, r2, lsl #29 -- bcc 1f -- vld1.8 {d0}, [r1]! -- vst1.8 {d0}, [r0]! --1: bge 2f -- vld4.8 {d0[0], d1[0], d2[0], d3[0]}, [r1]! -- vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0]! --2: movs ip, r2, lsl #31 -- ldrmib r3, [r1], #1 -- ldrcsb ip, [r1], #1 -- ldrcsb lr, [r1], #1 -- strmib r3, [r0], #1 -- strcsb ip, [r0], #1 -- strcsb lr, [r0], #1 -- -- ldmfd sp!, {r0, lr} -- bx lr --#ifndef __APPLE__ -- .fnend --#endif -- --#else /* __ARM_ARCH__ < 7 */ -- -- -- .text -- --#ifndef __APPLE__ -- .global fast_memcpy -- .type fast_memcpy, %function --#else -- .globl _fast_memcpy --#endif -- .align 4 -- -- /* -- * Optimized memcpy() for ARM. -- * -- * note that memcpy() always returns the destination pointer, -- * so we have to preserve R0. -- */ -- --#ifndef __APPLE__ --fast_memcpy: --#else --_fast_memcpy: --#endif -- /* The stack must always be 64-bits aligned to be compliant with the -- * ARM ABI. Since we have to save R0, we might as well save R4 -- * which we can use for better pipelining of the reads below -- */ --#ifndef __APPLE__ -- .fnstart -- .save {r0, r4, lr} --#endif -- stmfd sp!, {r0, r4, lr} -- /* Making room for r5-r11 which will be spilled later */ -- .pad #28 -- sub sp, sp, #28 -- -- // preload the destination because we'll align it to a cache line -- // with small writes. Also start the source "pump". -- //PLD (r0, #0) -- //PLD (r1, #0) -- //PLD (r1, #32) -- -- /* it simplifies things to take care of len<4 early */ -- cmp r2, #4 -- blo copy_last_3_and_return -- -- /* compute the offset to align the source -- * offset = (4-(src&3))&3 = -src & 3 -- */ -- rsb r3, r1, #0 -- ands r3, r3, #3 -- beq src_aligned -- -- /* align source to 32 bits. We need to insert 2 instructions between -- * a ldr[b|h] and str[b|h] because byte and half-word instructions -- * stall 2 cycles. -- */ -- movs r12, r3, lsl #31 -- sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */ -- ldrmib r3, [r1], #1 -- ldrcsb r4, [r1], #1 -- ldrcsb r12,[r1], #1 -- strmib r3, [r0], #1 -- strcsb r4, [r0], #1 -- strcsb r12,[r0], #1 -- --src_aligned: -- -- /* see if src and dst are aligned together (congruent) */ -- eor r12, r0, r1 -- tst r12, #3 -- bne non_congruent -- -- /* Use post-incriment mode for stm to spill r5-r11 to reserved stack -- * frame. Don't update sp. -- */ -- stmea sp, {r5-r11} -- -- /* align the destination to a cache-line */ -- rsb r3, r0, #0 -- ands r3, r3, #0x1C -- beq congruent_aligned32 -- cmp r3, r2 -- andhi r3, r2, #0x1C -- -- /* conditionnaly copies 0 to 7 words (length in r3) */ -- movs r12, r3, lsl #28 -- ldmcsia r1!, {r4, r5, r6, r7} /* 16 bytes */ -- ldmmiia r1!, {r8, r9} /* 8 bytes */ -- stmcsia r0!, {r4, r5, r6, r7} -- stmmiia r0!, {r8, r9} -- tst r3, #0x4 -- ldrne r10,[r1], #4 /* 4 bytes */ -- strne r10,[r0], #4 -- sub r2, r2, r3 -- --congruent_aligned32: -- /* -- * here source is aligned to 32 bytes. -- */ -- --cached_aligned32: -- subs r2, r2, #32 -- blo less_than_32_left -- -- /* -- * We preload a cache-line up to 64 bytes ahead. On the 926, this will -- * stall only until the requested world is fetched, but the linefill -- * continues in the the background. -- * While the linefill is going, we write our previous cache-line -- * into the write-buffer (which should have some free space). -- * When the linefill is done, the writebuffer will -- * start dumping its content into memory -- * -- * While all this is going, we then load a full cache line into -- * 8 registers, this cache line should be in the cache by now -- * (or partly in the cache). -- * -- * This code should work well regardless of the source/dest alignment. -- * -- */ -- -- // Align the preload register to a cache-line because the cpu does -- // "critical word first" (the first word requested is loaded first). -- bic r12, r1, #0x1F -- add r12, r12, #64 -- --1: ldmia r1!, { r4-r11 } -- //PLD (r12, #64) -- subs r2, r2, #32 -- -- // NOTE: if r12 is more than 64 ahead of r1, the following ldrhi -- // for ARM9 preload will not be safely guarded by the preceding subs. -- // When it is safely guarded the only possibility to have SIGSEGV here -- // is because the caller overstates the length. -- ldrhi r3, [r12], #32 /* cheap ARM9 preload */ -- stmia r0!, { r4-r11 } -- bhs 1b -- -- add r2, r2, #32 -- -- -- -- --less_than_32_left: -- /* -- * less than 32 bytes left at this point (length in r2) -- */ -- -- /* skip all this if there is nothing to do, which should -- * be a common case (if not executed the code below takes -- * about 16 cycles) -- */ -- tst r2, #0x1F -- beq 1f -- -- /* conditionnaly copies 0 to 31 bytes */ -- movs r12, r2, lsl #28 -- ldmcsia r1!, {r4, r5, r6, r7} /* 16 bytes */ -- ldmmiia r1!, {r8, r9} /* 8 bytes */ -- stmcsia r0!, {r4, r5, r6, r7} -- stmmiia r0!, {r8, r9} -- movs r12, r2, lsl #30 -- ldrcs r3, [r1], #4 /* 4 bytes */ -- ldrmih r4, [r1], #2 /* 2 bytes */ -- strcs r3, [r0], #4 -- strmih r4, [r0], #2 -- tst r2, #0x1 -- ldrneb r3, [r1] /* last byte */ -- strneb r3, [r0] -- -- /* we're done! restore everything and return */ --1: ldmfd sp!, {r5-r11} -- ldmfd sp!, {r0, r4, lr} -- bx lr -- -- /********************************************************************/ -- --non_congruent: -- /* -- * here source is aligned to 4 bytes -- * but destination is not. -- * -- * in the code below r2 is the number of bytes read -- * (the number of bytes written is always smaller, because we have -- * partial words in the shift queue) -- */ -- cmp r2, #4 -- blo copy_last_3_and_return -- -- /* Use post-incriment mode for stm to spill r5-r11 to reserved stack -- * frame. Don't update sp. -- */ -- stmea sp, {r5-r11} -- -- /* compute shifts needed to align src to dest */ -- rsb r5, r0, #0 -- and r5, r5, #3 /* r5 = # bytes in partial words */ -- mov r12, r5, lsl #3 /* r12 = right */ -- rsb lr, r12, #32 /* lr = left */ -- -- /* read the first word */ -- ldr r3, [r1], #4 -- sub r2, r2, #4 -- -- /* write a partial word (0 to 3 bytes), such that destination -- * becomes aligned to 32 bits (r5 = nb of words to copy for alignment) -- */ -- movs r5, r5, lsl #31 -- strmib r3, [r0], #1 -- movmi r3, r3, lsr #8 -- strcsb r3, [r0], #1 -- movcs r3, r3, lsr #8 -- strcsb r3, [r0], #1 -- movcs r3, r3, lsr #8 -- -- cmp r2, #4 -- blo partial_word_tail -- -- /* Align destination to 32 bytes (cache line boundary) */ --1: tst r0, #0x1c -- beq 2f -- ldr r5, [r1], #4 -- sub r2, r2, #4 -- orr r4, r3, r5, lsl lr -- mov r3, r5, lsr r12 -- str r4, [r0], #4 -- cmp r2, #4 -- bhs 1b -- blo partial_word_tail -- -- /* copy 32 bytes at a time */ --2: subs r2, r2, #32 -- blo less_than_thirtytwo -- -- /* Use immediate mode for the shifts, because there is an extra cycle -- * for register shifts, which could account for up to 50% of -- * performance hit. -- */ -- -- cmp r12, #24 -- beq loop24 -- cmp r12, #8 -- beq loop8 -- --loop16: -- ldr r12, [r1], #4 --1: mov r4, r12 -- ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} -- //PLD (r1, #64) -- subs r2, r2, #32 -- ldrhs r12, [r1], #4 -- orr r3, r3, r4, lsl #16 -- mov r4, r4, lsr #16 -- orr r4, r4, r5, lsl #16 -- mov r5, r5, lsr #16 -- orr r5, r5, r6, lsl #16 -- mov r6, r6, lsr #16 -- orr r6, r6, r7, lsl #16 -- mov r7, r7, lsr #16 -- orr r7, r7, r8, lsl #16 -- mov r8, r8, lsr #16 -- orr r8, r8, r9, lsl #16 -- mov r9, r9, lsr #16 -- orr r9, r9, r10, lsl #16 -- mov r10, r10, lsr #16 -- orr r10, r10, r11, lsl #16 -- stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} -- mov r3, r11, lsr #16 -- bhs 1b -- b less_than_thirtytwo -- --loop8: -- ldr r12, [r1], #4 --1: mov r4, r12 -- ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} -- //PLD (r1, #64) -- subs r2, r2, #32 -- ldrhs r12, [r1], #4 -- orr r3, r3, r4, lsl #24 -- mov r4, r4, lsr #8 -- orr r4, r4, r5, lsl #24 -- mov r5, r5, lsr #8 -- orr r5, r5, r6, lsl #24 -- mov r6, r6, lsr #8 -- orr r6, r6, r7, lsl #24 -- mov r7, r7, lsr #8 -- orr r7, r7, r8, lsl #24 -- mov r8, r8, lsr #8 -- orr r8, r8, r9, lsl #24 -- mov r9, r9, lsr #8 -- orr r9, r9, r10, lsl #24 -- mov r10, r10, lsr #8 -- orr r10, r10, r11, lsl #24 -- stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} -- mov r3, r11, lsr #8 -- bhs 1b -- b less_than_thirtytwo -- --loop24: -- ldr r12, [r1], #4 --1: mov r4, r12 -- ldmia r1!, { r5,r6,r7, r8,r9,r10,r11} -- //PLD (r1, #64) -- subs r2, r2, #32 -- ldrhs r12, [r1], #4 -- orr r3, r3, r4, lsl #8 -- mov r4, r4, lsr #24 -- orr r4, r4, r5, lsl #8 -- mov r5, r5, lsr #24 -- orr r5, r5, r6, lsl #8 -- mov r6, r6, lsr #24 -- orr r6, r6, r7, lsl #8 -- mov r7, r7, lsr #24 -- orr r7, r7, r8, lsl #8 -- mov r8, r8, lsr #24 -- orr r8, r8, r9, lsl #8 -- mov r9, r9, lsr #24 -- orr r9, r9, r10, lsl #8 -- mov r10, r10, lsr #24 -- orr r10, r10, r11, lsl #8 -- stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10} -- mov r3, r11, lsr #24 -- bhs 1b -- -- --less_than_thirtytwo: -- /* copy the last 0 to 31 bytes of the source */ -- rsb r12, lr, #32 /* we corrupted r12, recompute it */ -- add r2, r2, #32 -- cmp r2, #4 -- blo partial_word_tail -- --1: ldr r5, [r1], #4 -- sub r2, r2, #4 -- orr r4, r3, r5, lsl lr -- mov r3, r5, lsr r12 -- str r4, [r0], #4 -- cmp r2, #4 -- bhs 1b -- --partial_word_tail: -- /* we have a partial word in the input buffer */ -- movs r5, lr, lsl #(31-3) -- strmib r3, [r0], #1 -- movmi r3, r3, lsr #8 -- strcsb r3, [r0], #1 -- movcs r3, r3, lsr #8 -- strcsb r3, [r0], #1 -- -- /* Refill spilled registers from the stack. Don't update sp. */ -- ldmfd sp, {r5-r11} -- --copy_last_3_and_return: -- movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */ -- ldrmib r2, [r1], #1 -- ldrcsb r3, [r1], #1 -- ldrcsb r12,[r1] -- strmib r2, [r0], #1 -- strcsb r3, [r0], #1 -- strcsb r12,[r0] -- -- /* we're done! restore sp and spilled registers and return */ -- add sp, sp, #28 -- ldmfd sp!, {r0, r4, lr} -- bx lr --#ifndef __APPLE__ -- .fnend --#endif -- --#endif /* __ARM_ARCH__ < 7 */ --#endif -- --#if defined(__linux__) && defined(__ELF__) --/* we don't need an executable stack */ --.section .note.GNU-stack,"",%progbits --#endif -diff --git a/xbmc/utils/fastmemcpy.c b/xbmc/utils/fastmemcpy.c -deleted file mode 100644 -index ec9019a..0000000 ---- a/xbmc/utils/fastmemcpy.c -+++ /dev/null -@@ -1,396 +0,0 @@ --/* -- * fastmemcpy.h : fast memcpy routines -- ***************************************************************************** -- * $Id: fastmemcpy.h 13905 2006-01-12 23:10:04Z dionoea $ -- * -- * Authors: various Linux kernel hackers -- * various MPlayer hackers -- * Nick Kurshev -- * -- * Copyright (C) 2011-2013 Team XBMC -- * http://xbmc.org -- * -- * This Program is free software; you can redistribute it and/or modify -- * it under the terms of the GNU General Public License as published by -- * the Free Software Foundation; either version 2, or (at your option) -- * any later version. -- * -- * This Program is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with XBMC; see the file COPYING. If not, see -- * . -- * -- */ --#if !defined(TARGET_WINDOWS) && !defined(__ppc__) && !defined(__powerpc__) && !defined(__arm__) && !defined(__mips__) --#define HAVE_MMX2 --#define HAVE_SSE -- --/* -- aclib - advanced C library ;) -- This file contains functions which improve and expand standard C-library --*/ --#include -- --#define BLOCK_SIZE 4096 --#define CONFUSION_FACTOR 0 --/*Feel free to fine-tune the above 2, it might be possible to get some speedup with them :)*/ -- --/*#define STATISTICS*/ -- --#ifndef HAVE_SSE2 --/* -- P3 processor has only one SSE decoder so can execute only 1 sse insn per -- cpu clock, but it has 3 mmx decoders (include load/store unit) -- and executes 3 mmx insns per cpu clock. -- P4 processor has some chances, but after reading: -- http://www.emulators.com/pentium4.htm -- I have doubts. Anyway SSE2 version of this code can be written better. --*/ --#undef HAVE_SSE --#endif -- -- --/* -- This part of code was taken by me from Linux-2.4.3 and slightly modified --for MMX, MMX2, SSE instruction set. I have done it since linux uses page aligned --blocks but mplayer uses weakly ordered data and original sources can not --speedup them. Only using PREFETCHNTA and MOVNTQ together have effect! -- -->From IA-32 Intel Architecture Software Developer's Manual Volume 1, -- --Order Number 245470: --"10.4.6. Cacheability Control, Prefetch, and Memory Ordering Instructions" -- --Data referenced by a program can be temporal (data will be used again) or --non-temporal (data will be referenced once and not reused in the immediate --future). To make efficient use of the processor's caches, it is generally --desirable to cache temporal data and not cache non-temporal data. Overloading --the processor's caches with non-temporal data is sometimes referred to as --"polluting the caches". --The non-temporal data is written to memory with Write-Combining semantics. -- --The PREFETCHh instructions permits a program to load data into the processor --at a suggested cache level, so that it is closer to the processors load and --store unit when it is needed. If the data is already present in a level of --the cache hierarchy that is closer to the processor, the PREFETCHh instruction --will not result in any data movement. --But we should you PREFETCHNTA: Non-temporal data fetch data into location --close to the processor, minimizing cache pollution. -- --The MOVNTQ (store quadword using non-temporal hint) instruction stores --packed integer data from an MMX register to memory, using a non-temporal hint. --The MOVNTPS (store packed single-precision floating-point values using --non-temporal hint) instruction stores packed floating-point data from an --XMM register to memory, using a non-temporal hint. -- --The SFENCE (Store Fence) instruction controls write ordering by creating a --fence for memory store operations. This instruction guarantees that the results --of every store instruction that precedes the store fence in program order is --globally visible before any store instruction that follows the fence. The --SFENCE instruction provides an efficient way of ensuring ordering between --procedures that produce weakly-ordered data and procedures that consume that --data. -- --If you have questions please contact with me: Nick Kurshev: nickols_k@mail.ru. --*/ -- --/* 3dnow memcpy support from kernel 2.4.2 */ --/* by Pontscho/fresh!mindworkz */ -- --#if defined( HAVE_MMX2 ) || defined( HAVE_3DNOW ) || defined( HAVE_MMX ) -- --#undef HAVE_MMX1 --#if defined(HAVE_MMX) && !defined(HAVE_MMX2) && !defined(HAVE_3DNOW) && !defined(HAVE_SSE) --/* means: mmx v.1. Note: Since we added alignment of destinition it speedups -- of memory copying on PentMMX, Celeron-1 and P2 upto 12% versus -- standard (non MMX-optimized) version. -- Note: on K6-2+ it speedups memory copying upto 25% and -- on K7 and P3 about 500% (5 times). */ --#define HAVE_MMX1 --#endif -- -- --#undef HAVE_K6_2PLUS --#if !defined( HAVE_MMX2) && defined( HAVE_3DNOW) --#define HAVE_K6_2PLUS --#endif -- --/* for small memory blocks (<256 bytes) this version is faster */ --#define small_memcpy(to,from,n)\ --{\ --register unsigned long int dummy;\ --__asm__ __volatile__(\ -- "rep; movsb"\ -- :"=&D"(to), "=&S"(from), "=&c"(dummy)\ --/* It's most portable way to notify compiler */\ --/* that edi, esi and ecx are clobbered in asm block. */\ --/* Thanks to A'rpi for hint!!! */\ -- :"0" (to), "1" (from),"2" (n)\ -- : "memory");\ --} -- --#ifdef HAVE_SSE --#define MMREG_SIZE 16 --#else --#define MMREG_SIZE 64 /*8*/ --#endif -- --/* Small defines (for readability only) ;) */ --#ifdef HAVE_K6_2PLUS --#define PREFETCH "prefetch" --/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ --#define EMMS "femms" --#else --#define PREFETCH "prefetchnta" --#define EMMS "emms" --#endif -- --#ifdef HAVE_MMX2 --#define MOVNTQ "movntq" --#else --#define MOVNTQ "movq" --#endif -- --#ifdef HAVE_MMX1 --#define MIN_LEN 0x800 /* 2K blocks */ --#else --#define MIN_LEN 0x40 /* 64-byte blocks */ --#endif -- --void * fast_memcpy(void * to, const void * from, size_t len) --{ -- void *retval; -- size_t i; -- retval = to; --#ifdef STATISTICS -- { -- static int freq[33]; -- static int t=0; -- int i; -- for(i=0; len>(1<= MIN_LEN) -- { -- register unsigned long int delta; -- /* Align destinition to MMREG_SIZE -boundary */ -- delta = ((unsigned long int)to)&(MMREG_SIZE-1); -- if(delta) -- { -- delta=MMREG_SIZE-delta; -- len -= delta; -- small_memcpy(to, from, delta); -- } -- i = len >> 6; /* len/64 */ -- len&=63; -- /* -- This algorithm is top effective when the code consequently -- reads and writes blocks which have size of cache line. -- Size of cache line is processor-dependent. -- It will, however, be a minimum of 32 bytes on any processors. -- It would be better to have a number of instructions which -- perform reading and writing to be multiple to a number of -- processor's decoders, but it's not always possible. -- */ --#ifdef HAVE_SSE /* Only P3 (may be Cyrix3) */ -- if(((unsigned long)from) & 15) -- /* if SRC is misaligned */ -- for(; i>0; i--) -- { -- __asm__ __volatile__ ( -- PREFETCH" 320(%0)\n" -- "movups (%0), %%xmm0\n" -- "movups 16(%0), %%xmm1\n" -- "movups 32(%0), %%xmm2\n" -- "movups 48(%0), %%xmm3\n" -- "movntps %%xmm0, (%1)\n" -- "movntps %%xmm1, 16(%1)\n" -- "movntps %%xmm2, 32(%1)\n" -- "movntps %%xmm3, 48(%1)\n" -- :: "r" (from), "r" (to) : "memory"); -- ((const unsigned char *)from)+=64; -- ((unsigned char *)to)+=64; -- } -- else -- /* -- Only if SRC is aligned on 16-byte boundary. -- It allows to use movaps instead of movups, which required data -- to be aligned or a general-protection exception (#GP) is generated. -- */ -- for(; i>0; i--) -- { -- __asm__ __volatile__ ( -- PREFETCH" 320(%0)\n" -- "movaps (%0), %%xmm0\n" -- "movaps 16(%0), %%xmm1\n" -- "movaps 32(%0), %%xmm2\n" -- "movaps 48(%0), %%xmm3\n" -- "movntps %%xmm0, (%1)\n" -- "movntps %%xmm1, 16(%1)\n" -- "movntps %%xmm2, 32(%1)\n" -- "movntps %%xmm3, 48(%1)\n" -- :: "r" (from), "r" (to) : "memory"); -- ((const unsigned char *)from)+=64; -- ((unsigned char *)to)+=64; -- } --#else -- /* Align destination at BLOCK_SIZE boundary */ -- for(; ((ptrdiff_t)to & (BLOCK_SIZE-1)) && i>0; i--) -- { -- __asm__ __volatile__ ( --#ifndef HAVE_MMX1 -- PREFETCH" 320(%0)\n" --#endif -- "movq (%0), %%mm0\n" -- "movq 8(%0), %%mm1\n" -- "movq 16(%0), %%mm2\n" -- "movq 24(%0), %%mm3\n" -- "movq 32(%0), %%mm4\n" -- "movq 40(%0), %%mm5\n" -- "movq 48(%0), %%mm6\n" -- "movq 56(%0), %%mm7\n" -- MOVNTQ" %%mm0, (%1)\n" -- MOVNTQ" %%mm1, 8(%1)\n" -- MOVNTQ" %%mm2, 16(%1)\n" -- MOVNTQ" %%mm3, 24(%1)\n" -- MOVNTQ" %%mm4, 32(%1)\n" -- MOVNTQ" %%mm5, 40(%1)\n" -- MOVNTQ" %%mm6, 48(%1)\n" -- MOVNTQ" %%mm7, 56(%1)\n" -- :: "r" (from), "r" (to) : "memory"); -- from = (const void *) (((const unsigned char *)from)+64); -- to = (void *) (((unsigned char *)to)+64); -- } -- --/* printf(" %p %p\n", (ptrdiff_t)from&1023, (ptrdiff_t)to&1023); */ -- /* Pure Assembly cuz gcc is a bit unpredictable ;) */ --# if 0 -- if(i>=BLOCK_SIZE/64) -- asm volatile( -- "xorl %%eax, %%eax \n\t" -- ".balign 16 \n\t" -- "1: \n\t" -- "movl (%0, %%eax), %%ebx \n\t" -- "movl 32(%0, %%eax), %%ebx \n\t" -- "movl 64(%0, %%eax), %%ebx \n\t" -- "movl 96(%0, %%eax), %%ebx \n\t" -- "addl $128, %%eax \n\t" -- "cmpl %3, %%eax \n\t" -- " jb 1b \n\t" -- -- "xorl %%eax, %%eax \n\t" -- -- ".balign 16 \n\t" -- "2: \n\t" -- "movq (%0, %%eax), %%mm0\n" -- "movq 8(%0, %%eax), %%mm1\n" -- "movq 16(%0, %%eax), %%mm2\n" -- "movq 24(%0, %%eax), %%mm3\n" -- "movq 32(%0, %%eax), %%mm4\n" -- "movq 40(%0, %%eax), %%mm5\n" -- "movq 48(%0, %%eax), %%mm6\n" -- "movq 56(%0, %%eax), %%mm7\n" -- MOVNTQ" %%mm0, (%1, %%eax)\n" -- MOVNTQ" %%mm1, 8(%1, %%eax)\n" -- MOVNTQ" %%mm2, 16(%1, %%eax)\n" -- MOVNTQ" %%mm3, 24(%1, %%eax)\n" -- MOVNTQ" %%mm4, 32(%1, %%eax)\n" -- MOVNTQ" %%mm5, 40(%1, %%eax)\n" -- MOVNTQ" %%mm6, 48(%1, %%eax)\n" -- MOVNTQ" %%mm7, 56(%1, %%eax)\n" -- "addl $64, %%eax \n\t" -- "cmpl %3, %%eax \n\t" -- "jb 2b \n\t" -- --#if CONFUSION_FACTOR > 0 -- /* a few percent speedup on out of order executing CPUs */ -- "movl %5, %%eax \n\t" -- "2: \n\t" -- "movl (%0), %%ebx \n\t" -- "movl (%0), %%ebx \n\t" -- "movl (%0), %%ebx \n\t" -- "movl (%0), %%ebx \n\t" -- "decl %%eax \n\t" -- " jnz 2b \n\t" --#endif -- -- "xorl %%eax, %%eax \n\t" -- "addl %3, %0 \n\t" -- "addl %3, %1 \n\t" -- "subl %4, %2 \n\t" -- "cmpl %4, %2 \n\t" -- " jae 1b \n\t" -- : "+r" (from), "+r" (to), "+r" (i) -- : "r" (BLOCK_SIZE), "i" (BLOCK_SIZE/64), "i" (CONFUSION_FACTOR) -- : "%eax", "%ebx" -- ); --#endif -- -- for(; i>0; i--) -- { -- __asm__ __volatile__ ( --#ifndef HAVE_MMX1 -- PREFETCH" 320(%0)\n" --#endif -- "movq (%0), %%mm0\n" -- "movq 8(%0), %%mm1\n" -- "movq 16(%0), %%mm2\n" -- "movq 24(%0), %%mm3\n" -- "movq 32(%0), %%mm4\n" -- "movq 40(%0), %%mm5\n" -- "movq 48(%0), %%mm6\n" -- "movq 56(%0), %%mm7\n" -- MOVNTQ" %%mm0, (%1)\n" -- MOVNTQ" %%mm1, 8(%1)\n" -- MOVNTQ" %%mm2, 16(%1)\n" -- MOVNTQ" %%mm3, 24(%1)\n" -- MOVNTQ" %%mm4, 32(%1)\n" -- MOVNTQ" %%mm5, 40(%1)\n" -- MOVNTQ" %%mm6, 48(%1)\n" -- MOVNTQ" %%mm7, 56(%1)\n" -- :: "r" (from), "r" (to) : "memory"); -- from = (const void *) (((const unsigned char *)from)+64); -- to = (void *) (((unsigned char *)to)+64); -- } -- --#endif /* Have SSE */ --#ifdef HAVE_MMX2 -- /* since movntq is weakly-ordered, a "sfence" -- * is needed to become ordered again. */ -- __asm__ __volatile__ ("sfence":::"memory"); --#endif --#ifndef HAVE_SSE -- /* enables to use FPU */ -- __asm__ __volatile__ (EMMS:::"memory"); --#endif -- } -- /* -- * Now do the tail of the block -- */ -- if(len) small_memcpy(to, from, len); -- return retval; --} -- -- --#endif /* #if defined( HAVE_MMX2 ) || defined( HAVE_3DNOW ) || defined( HAVE_MMX ) */ -- --#endif -diff --git a/xbmc/utils/fastmemcpy.h b/xbmc/utils/fastmemcpy.h -deleted file mode 100644 -index 43f5904..0000000 ---- a/xbmc/utils/fastmemcpy.h -+++ /dev/null -@@ -1,35 +0,0 @@ --/* -- * Copyright (C) 2005-2013 Team XBMC -- * http://xbmc.org -- * -- * This Program is free software; you can redistribute it and/or modify -- * it under the terms of the GNU General Public License as published by -- * the Free Software Foundation; either version 2, or (at your option) -- * any later version. -- * -- * This Program is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with XBMC; see the file COPYING. If not, see -- * . -- * -- */ --#pragma once -- --#ifdef __cplusplus --extern "C" { --#endif -- --#if !defined(TARGET_WINDOWS) && !defined(__ppc__) && !defined(__powerpc__) && !defined(__mips__) && !defined(TARGET_ANDROID) && !defined(TARGET_DARWIN_IOS) --void * fast_memcpy(void * to, const void * from, size_t len); --//#define fast_memcpy memcpy --#else --#define fast_memcpy memcpy --#endif -- --#ifdef __cplusplus --} --#endif -diff --git a/xbmc/utils/test/Makefile b/xbmc/utils/test/Makefile -index 8fa0526..3a467ad 100644 ---- a/xbmc/utils/test/Makefile -+++ b/xbmc/utils/test/Makefile -@@ -11,7 +11,6 @@ SRCS= \ - TestCryptThreading.cpp \ - TestDatabaseUtils.cpp \ - TestEndianSwap.cpp \ -- Testfastmemcpy.cpp \ - TestFileOperationJob.cpp \ - TestFileUtils.cpp \ - Testfstrcmp.cpp \ -diff --git a/xbmc/utils/test/Testfastmemcpy.cpp b/xbmc/utils/test/Testfastmemcpy.cpp -deleted file mode 100644 -index 93a9bb0..0000000 ---- a/xbmc/utils/test/Testfastmemcpy.cpp -+++ /dev/null -@@ -1,39 +0,0 @@ --/* -- * Copyright (C) 2005-2013 Team XBMC -- * http://xbmc.org -- * -- * This Program is free software; you can redistribute it and/or modify -- * it under the terms of the GNU General Public License as published by -- * the Free Software Foundation; either version 2, or (at your option) -- * any later version. -- * -- * This Program is distributed in the hope that it will be useful, -- * but WITHOUT ANY WARRANTY; without even the implied warranty of -- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- * GNU General Public License for more details. -- * -- * You should have received a copy of the GNU General Public License -- * along with XBMC; see the file COPYING. If not, see -- * . -- * -- */ -- --#include // TODO: This should go in fastmemcpy.h instead. --#include "utils/fastmemcpy.h" -- --#include "gtest/gtest.h" -- --static const char refdata[] = "\x01\x02\x03\x04\x05\x06\x07\x08" -- "\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" -- "\x11\x12\x13\x14\x15\x16\x17\x18" -- "\x19\x1a\x1b\x1c\x1d\x1e\x1f\x20" -- "\x21\x22\x23\x24\x25\x26\x27\x28" -- "\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30"; -- --TEST(Testfastmemcpy, General) --{ -- char vardata[sizeof(refdata)]; -- memset(vardata, 0, sizeof(vardata)); -- EXPECT_NE(nullptr, fast_memcpy(vardata, refdata, sizeof(refdata))); -- EXPECT_EQ(0, memcmp(refdata, vardata, sizeof(refdata))); --}