diff --git a/projects/RPi/patches/kodi/kodi-000-backports.patch b/projects/RPi/patches/kodi/kodi-000-backports.patch new file mode 100644 index 0000000000..e09ca0ff94 --- /dev/null +++ b/projects/RPi/patches/kodi/kodi-000-backports.patch @@ -0,0 +1,2187 @@ +From 618094ed6ad5b01165de2111410dafbe4160598c Mon Sep 17 00:00:00 2001 +From: Rainer Hochecker +Date: Tue, 31 May 2016 13:28:48 +0200 +Subject: [PATCH 1/3] VideoPlayer: expose stream player info to GUI + +--- + xbmc/GUIInfoManager.cpp | 60 +++++++ + xbmc/cores/DataCacheCore.cpp | 166 ++++++++++++++++- + xbmc/cores/DataCacheCore.h | 55 +++++- + .../VideoPlayer/DVDCodecs/Audio/DVDAudioCodec.h | 6 +- + .../DVDCodecs/Audio/DVDAudioCodecFFmpeg.cpp | 3 +- + .../DVDCodecs/Audio/DVDAudioCodecFFmpeg.h | 4 +- + .../DVDCodecs/Audio/DVDAudioCodecPassthrough.cpp | 9 +- + .../DVDCodecs/Audio/DVDAudioCodecPassthrough.h | 4 +- + .../VideoPlayer/DVDCodecs/DVDFactoryCodec.cpp | 6 +- + xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.h | 3 +- + .../DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp | 30 +++- + .../DVDCodecs/Video/DVDVideoCodecFFmpeg.h | 1 + + xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.cpp | 6 +- + xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.h | 5 +- + xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.cpp | 19 +- + xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.h | 5 +- + xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.cpp | 4 +- + xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.h | 6 +- + xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.cpp | 4 +- + xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.h | 6 +- + xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp | 199 +++++++++++++++++++++ + xbmc/cores/VideoPlayer/Process/ProcessInfo.h | 47 +++++ + xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp | 11 +- + xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp | 5 + + xbmc/cores/omxplayer/OMXPlayerAudio.cpp | 1 + + xbmc/cores/omxplayer/OMXPlayerVideo.cpp | 7 + + xbmc/cores/paplayer/VideoPlayerCodec.cpp | 4 +- + xbmc/cores/paplayer/VideoPlayerCodec.h | 2 + + xbmc/guiinfo/GUIInfoLabels.h | 14 ++ + 29 files changed, 662 insertions(+), 30 deletions(-) + +diff --git a/xbmc/GUIInfoManager.cpp b/xbmc/GUIInfoManager.cpp +index 0d37f1f..ab5cb12 100644 +--- a/xbmc/GUIInfoManager.cpp ++++ b/xbmc/GUIInfoManager.cpp +@@ -2108,6 +2108,22 @@ const infomap videoplayer[] = {{ "title", VIDEOPLAYER_TITLE }, + { "episodename", VIDEOPLAYER_EPISODENAME } + }; + ++const infomap player_process[] = ++{ ++ { "videodecoder", PLAYER_PROCESS_VIDEODECODER }, ++ { "deintmethod", PLAYER_PROCESS_DEINTMETHOD }, ++ { "pixformat", PLAYER_PROCESS_PIXELFORMAT }, ++ { "videowidth", PLAYER_PROCESS_VIDEOWIDTH }, ++ { "videoheight", PLAYER_PROCESS_VIDEOHEIGHT }, ++ { "videofps", PLAYER_PROCESS_VIDEOFPS }, ++ { "videodar", PLAYER_PROCESS_VIDEODAR }, ++ { "videohwdecoder", PLAYER_PROCESS_VIDEOHWDECODER }, ++ { "audiodecoder", PLAYER_PROCESS_AUDIODECODER }, ++ { "audiochannels", PLAYER_PROCESS_AUDIOCHANNELS }, ++ { "audiosamplerate", PLAYER_PROCESS_AUDIOSAMPLERATE }, ++ { "audiobitspersample", PLAYER_PROCESS_AUDIOBITSPERSAMPLE } ++}; ++ + /// \page modules__General__List_of_gui_access + /// \section modules__General__List_of_gui_access_Container Container + /// @{ +@@ -5320,6 +5336,14 @@ int CGUIInfoManager::TranslateSingleString(const std::string &strCondition, bool + return videoplayer[i].val; + } + } ++ else if (cat.name == "player_process") ++ { ++ for (size_t i = 0; i < sizeof(player_process) / sizeof(infomap); i++) ++ { ++ if (prop.name == player_process[i].str) ++ return videoplayer[i].val; ++ } ++ } + else if (cat.name == "slideshow") + { + for (size_t i = 0; i < sizeof(slideshow) / sizeof(infomap); i++) +@@ -5993,6 +6017,27 @@ std::string CGUIInfoManager::GetLabel(int info, int contextWindow, std::string * + strLabel = info.language; + } + break; ++ case PLAYER_PROCESS_VIDEODECODER: ++ strLabel = g_dataCacheCore.GetVideoDecoderName(); ++ break; ++ case PLAYER_PROCESS_DEINTMETHOD: ++ strLabel = g_dataCacheCore.GetVideoDeintMethod(); ++ break; ++ case PLAYER_PROCESS_PIXELFORMAT: ++ strLabel = g_dataCacheCore.GetVideoPixelFormat(); ++ break; ++ case PLAYER_PROCESS_VIDEOFPS: ++ strLabel = StringUtils::FormatNumber(g_dataCacheCore.GetVideoFps()); ++ break; ++ case PLAYER_PROCESS_VIDEODAR: ++ strLabel = StringUtils::FormatNumber(CServiceBroker::GetDataCacheCore().GetVideoDAR()); ++ break; ++ case PLAYER_PROCESS_AUDIODECODER: ++ strLabel = g_dataCacheCore.GetAudioDecoderName(); ++ break; ++ case PLAYER_PROCESS_AUDIOCHANNELS: ++ strLabel = g_dataCacheCore.GetAudioChannels(); ++ break; + case RDS_AUDIO_LANG: + case RDS_CHANNEL_COUNTRY: + case RDS_TITLE: +@@ -6555,6 +6600,18 @@ bool CGUIInfoManager::GetInt(int &value, int info, int contextWindow, const CGUI + case SYSTEM_BATTERY_LEVEL: + value = g_powerManager.BatteryLevel(); + return true; ++ case PLAYER_PROCESS_VIDEOWIDTH: ++ value = g_dataCacheCore.GetVideoWidth(); ++ return true; ++ case PLAYER_PROCESS_VIDEOHEIGHT: ++ value = g_dataCacheCore.GetVideoHeight(); ++ return true; ++ case PLAYER_PROCESS_AUDIOSAMPLERATE: ++ value = g_dataCacheCore.GetAudioSampleRate(); ++ return true; ++ case PLAYER_PROCESS_AUDIOBITSPERSAMPLE: ++ value = g_dataCacheCore.GetAudioBitsPerSampe(); ++ return true; + } + return false; + } +@@ -7090,6 +7147,9 @@ bool CGUIInfoManager::GetBool(int condition1, int contextWindow, const CGUIListI + !m_currentFile->GetPVRRadioRDSInfoTag()->GetSMSStudio().empty() || + !m_currentFile->GetPVRRadioRDSInfoTag()->GetPhoneStudio().empty()); + break; ++ case PLAYER_PROCESS_VIDEOHWDECODER: ++ bReturn = g_dataCacheCore.IsVideoHwDecoder(); ++ break; + default: // default, use integer value different from 0 as true + { + int val; +diff --git a/xbmc/cores/DataCacheCore.cpp b/xbmc/cores/DataCacheCore.cpp +index 68cf2fb..cbb0a4f 100644 +--- a/xbmc/cores/DataCacheCore.cpp ++++ b/xbmc/cores/DataCacheCore.cpp +@@ -19,6 +19,12 @@ + */ + + #include "cores/DataCacheCore.h" ++#include "threads/SingleLock.h" ++ ++CDataCacheCore::CDataCacheCore() ++{ ++ m_hasAVInfoChanges = false; ++} + + bool CDataCacheCore::HasAVInfoChanges() + { +@@ -35,4 +41,162 @@ void CDataCacheCore::SignalVideoInfoChange() + void CDataCacheCore::SignalAudioInfoChange() + { + m_hasAVInfoChanges = true; +-} +\ No newline at end of file ++} ++ ++void CDataCacheCore::SetVideoDecoderName(std::string name, bool isHw) ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ m_playerVideoInfo.decoderName = name; ++ m_playerVideoInfo.isHwDecoder = isHw; ++} ++ ++std::string CDataCacheCore::GetVideoDecoderName() ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ return m_playerVideoInfo.decoderName; ++} ++ ++bool CDataCacheCore::IsVideoHwDecoder() ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ return m_playerVideoInfo.isHwDecoder; ++} ++ ++ ++void CDataCacheCore::SetVideoDeintMethod(std::string method) ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ m_playerVideoInfo.deintMethod = method; ++} ++ ++std::string CDataCacheCore::GetVideoDeintMethod() ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ return m_playerVideoInfo.deintMethod; ++} ++ ++void CDataCacheCore::SetVideoPixelFormat(std::string pixFormat) ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ m_playerVideoInfo.pixFormat = pixFormat; ++} ++ ++std::string CDataCacheCore::GetVideoPixelFormat() ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ return m_playerVideoInfo.pixFormat; ++} ++ ++void CDataCacheCore::SetVideoDimensions(int width, int height) ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ m_playerVideoInfo.width = width; ++ m_playerVideoInfo.height = height; ++} ++ ++int CDataCacheCore::GetVideoWidth() ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ return m_playerVideoInfo.width; ++} ++ ++int CDataCacheCore::GetVideoHeight() ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ return m_playerVideoInfo.height; ++} ++ ++void CDataCacheCore::SetVideoFps(float fps) ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ m_playerVideoInfo.fps = fps; ++} ++ ++float CDataCacheCore::GetVideoFps() ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ return m_playerVideoInfo.fps; ++} ++ ++void CDataCacheCore::SetVideoDAR(float dar) ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ m_playerVideoInfo.dar = dar; ++} ++ ++float CDataCacheCore::GetVideoDAR() ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ return m_playerVideoInfo.dar; ++} ++ ++// player audio info ++void CDataCacheCore::SetAudioDecoderName(std::string name) ++{ ++ CSingleLock lock(m_audioPlayerSection); ++ ++ m_playerAudioInfo.decoderName = name; ++} ++ ++std::string CDataCacheCore::GetAudioDecoderName() ++{ ++ CSingleLock lock(m_audioPlayerSection); ++ ++ return m_playerAudioInfo.decoderName; ++} ++ ++void CDataCacheCore::SetAudioChannels(std::string channels) ++{ ++ CSingleLock lock(m_audioPlayerSection); ++ ++ m_playerAudioInfo.channels = channels; ++} ++ ++std::string CDataCacheCore::GetAudioChannels() ++{ ++ CSingleLock lock(m_audioPlayerSection); ++ ++ return m_playerAudioInfo.channels; ++} ++ ++void CDataCacheCore::SetAudioSampleRate(int sampleRate) ++{ ++ CSingleLock lock(m_audioPlayerSection); ++ ++ m_playerAudioInfo.sampleRate = sampleRate; ++} ++ ++int CDataCacheCore::GetAudioSampleRate() ++{ ++ CSingleLock lock(m_audioPlayerSection); ++ ++ return m_playerAudioInfo.sampleRate; ++} ++ ++void CDataCacheCore::SetAudioBitsPerSample(int bitsPerSample) ++{ ++ CSingleLock lock(m_audioPlayerSection); ++ ++ m_playerAudioInfo.bitsPerSample = bitsPerSample; ++} ++ ++int CDataCacheCore::GetAudioBitsPerSampe() ++{ ++ CSingleLock lock(m_audioPlayerSection); ++ ++ return m_playerAudioInfo.bitsPerSample; ++} +diff --git a/xbmc/cores/DataCacheCore.h b/xbmc/cores/DataCacheCore.h +index 0df013d..e16c81f 100644 +--- a/xbmc/cores/DataCacheCore.h ++++ b/xbmc/cores/DataCacheCore.h +@@ -20,15 +20,68 @@ + * + */ + ++#include ++#include ++#include "threads/CriticalSection.h" ++ + class CDataCacheCore + { + public: ++ CDataCacheCore(); + bool HasAVInfoChanges(); + void SignalVideoInfoChange(); + void SignalAudioInfoChange(); + ++ // player video info ++ void SetVideoDecoderName(std::string name, bool isHw); ++ std::string GetVideoDecoderName(); ++ bool IsVideoHwDecoder(); ++ void SetVideoDeintMethod(std::string method); ++ std::string GetVideoDeintMethod(); ++ void SetVideoPixelFormat(std::string pixFormat); ++ std::string GetVideoPixelFormat(); ++ void SetVideoDimensions(int width, int height); ++ int GetVideoWidth(); ++ int GetVideoHeight(); ++ void SetVideoFps(float fps); ++ float GetVideoFps(); ++ void SetVideoDAR(float dar); ++ float GetVideoDAR(); ++ ++ // player audio info ++ void SetAudioDecoderName(std::string name); ++ std::string GetAudioDecoderName(); ++ void SetAudioChannels(std::string channels); ++ std::string GetAudioChannels(); ++ void SetAudioSampleRate(int sampleRate); ++ int GetAudioSampleRate(); ++ void SetAudioBitsPerSample(int bitsPerSample); ++ int GetAudioBitsPerSampe(); ++ + protected: +- volatile bool m_hasAVInfoChanges; ++ std::atomic_bool m_hasAVInfoChanges; ++ ++ CCriticalSection m_videoPlayerSection; ++ struct SPlayerVideoInfo ++ { ++ std::string decoderName; ++ bool isHwDecoder; ++ std::string deintMethod; ++ std::string pixFormat; ++ int width; ++ int height; ++ float fps; ++ float dar; ++ } m_playerVideoInfo; ++ ++ CCriticalSection m_audioPlayerSection; ++ struct SPlayerAudioInfo ++ { ++ std::string decoderName; ++ std::string channels; ++ int sampleRate; ++ int bitsPerSample; ++ } m_playerAudioInfo; + }; + + extern CDataCacheCore g_dataCacheCore; +\ No newline at end of file +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodec.h b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodec.h +index 7e0da61..bb698da 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodec.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodec.h +@@ -23,6 +23,7 @@ + #include "system.h" + #include "cores/AudioEngine/Utils/AEAudioFormat.h" + #include "cores/AudioEngine/Utils/AEUtil.h" ++#include "cores/VideoPlayer/Process/ProcessInfo.h" + #include "DVDClock.h" + + +@@ -64,7 +65,7 @@ class CDVDAudioCodec + { + public: + +- CDVDAudioCodec() {} ++ CDVDAudioCodec(CProcessInfo &processInfo) : m_processInfo(processInfo) {} + virtual ~CDVDAudioCodec() {} + + /* +@@ -138,4 +139,7 @@ class CDVDAudioCodec + * should return the ffmpeg profile value + */ + virtual int GetProfile() { return 0; } ++ ++protected: ++ CProcessInfo &m_processInfo; + }; +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecFFmpeg.cpp +index a21894e..f5880cc 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecFFmpeg.cpp +@@ -35,7 +35,7 @@ extern "C" { + #include "cores/AudioEngine/Utils/AEUtil.h" + #endif + +-CDVDAudioCodecFFmpeg::CDVDAudioCodecFFmpeg() : CDVDAudioCodec() ++CDVDAudioCodecFFmpeg::CDVDAudioCodecFFmpeg(CProcessInfo &processInfo) : CDVDAudioCodec(processInfo) + { + m_pCodecContext = NULL; + +@@ -126,6 +126,7 @@ bool CDVDAudioCodecFFmpeg::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options + m_iSampleFormat = AV_SAMPLE_FMT_NONE; + m_matrixEncoding = AV_MATRIX_ENCODING_NONE; + ++ m_processInfo.SetAudioDecoderName(m_pCodecContext->codec->name); + return true; + } + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecFFmpeg.h b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecFFmpeg.h +index a15317a..d5760bb 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecFFmpeg.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecFFmpeg.h +@@ -29,10 +29,12 @@ extern "C" { + #include "libswresample/swresample.h" + } + ++class CProcessInfo; ++ + class CDVDAudioCodecFFmpeg : public CDVDAudioCodec + { + public: +- CDVDAudioCodecFFmpeg(); ++ CDVDAudioCodecFFmpeg(CProcessInfo &processInfo); + virtual ~CDVDAudioCodecFFmpeg(); + virtual bool Open(CDVDStreamInfo &hints, CDVDCodecOptions &options); + virtual void Dispose(); +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecPassthrough.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecPassthrough.cpp +index 1fb00e1..8009297 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecPassthrough.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecPassthrough.cpp +@@ -29,7 +29,8 @@ + + #define TRUEHD_BUF_SIZE 61440 + +-CDVDAudioCodecPassthrough::CDVDAudioCodecPassthrough(void) : ++CDVDAudioCodecPassthrough::CDVDAudioCodecPassthrough(CProcessInfo &processInfo) : ++ CDVDAudioCodec(processInfo), + m_buffer(NULL), + m_bufferSize(0), + m_trueHDoffset(0) +@@ -51,22 +52,26 @@ bool CDVDAudioCodecPassthrough::Open(CDVDStreamInfo &hints, CDVDCodecOptions &op + case AV_CODEC_ID_AC3: + format.m_streamInfo.m_type = CAEStreamInfo::STREAM_TYPE_AC3; + format.m_streamInfo.m_sampleRate = hints.samplerate; ++ m_processInfo.SetAudioDecoderName("PT_AC3"); + break; + + case AV_CODEC_ID_EAC3: + format.m_streamInfo.m_type = CAEStreamInfo::STREAM_TYPE_EAC3; + format.m_streamInfo.m_sampleRate = hints.samplerate; ++ m_processInfo.SetAudioDecoderName("PT_EAC3"); + break; + + case AV_CODEC_ID_DTS: + format.m_streamInfo.m_type = CAEStreamInfo::STREAM_TYPE_DTSHD; + format.m_streamInfo.m_sampleRate = hints.samplerate; ++ m_processInfo.SetAudioDecoderName("PT_DTSHD"); + break; + + case AV_CODEC_ID_TRUEHD: + format.m_streamInfo.m_type = CAEStreamInfo::STREAM_TYPE_TRUEHD; + format.m_streamInfo.m_sampleRate = hints.samplerate; + m_trueHDBuffer.reset(new uint8_t[TRUEHD_BUF_SIZE]); ++ m_processInfo.SetAudioDecoderName("PT_TRUEHD"); + break; + + default: +@@ -83,6 +88,8 @@ bool CDVDAudioCodecPassthrough::Open(CDVDStreamInfo &hints, CDVDCodecOptions &op + + // only get the dts core from the parser if we don't support dtsHD + m_parser.SetCoreOnly(true); ++ ++ m_processInfo.SetAudioDecoderName("PT_DTS"); + } + + m_dataSize = 0; +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecPassthrough.h b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecPassthrough.h +index a04e736..4005429 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecPassthrough.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecPassthrough.h +@@ -29,10 +29,12 @@ + #include "cores/AudioEngine/Utils/AEStreamInfo.h" + #include "cores/AudioEngine/Utils/AEBitstreamPacker.h" + ++class CProcessInfo; ++ + class CDVDAudioCodecPassthrough : public CDVDAudioCodec + { + public: +- CDVDAudioCodecPassthrough(); ++ CDVDAudioCodecPassthrough(CProcessInfo &processInfo); + virtual ~CDVDAudioCodecPassthrough(); + + virtual bool Open(CDVDStreamInfo &hints, CDVDCodecOptions &options); +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.cpp +index bb5bfe0..9717412 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.cpp +@@ -173,7 +173,7 @@ CDVDVideoCodec* CDVDFactoryCodec::CreateVideoCodec(CDVDStreamInfo &hint, CProces + return nullptr;; + } + +-CDVDAudioCodec* CDVDFactoryCodec::CreateAudioCodec(CDVDStreamInfo &hint, bool allowpassthrough, bool allowdtshddecode) ++CDVDAudioCodec* CDVDFactoryCodec::CreateAudioCodec(CDVDStreamInfo &hint, CProcessInfo &processInfo, bool allowpassthrough, bool allowdtshddecode) + { + CDVDAudioCodec* pCodec = NULL; + CDVDCodecOptions options; +@@ -184,12 +184,12 @@ CDVDAudioCodec* CDVDFactoryCodec::CreateAudioCodec(CDVDStreamInfo &hint, bool al + // we don't use passthrough if "sync playback to display" is enabled + if (allowpassthrough) + { +- pCodec = OpenCodec(new CDVDAudioCodecPassthrough(), hint, options); ++ pCodec = OpenCodec(new CDVDAudioCodecPassthrough(processInfo), hint, options); + if (pCodec) + return pCodec; + } + +- pCodec = OpenCodec(new CDVDAudioCodecFFmpeg(), hint, options); ++ pCodec = OpenCodec(new CDVDAudioCodecFFmpeg(processInfo), hint, options); + if (pCodec) + return pCodec; + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.h b/xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.h +index 45e794b98..d11c700 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.h +@@ -41,7 +41,8 @@ class CDVDFactoryCodec + static CDVDVideoCodec* CreateVideoCodec(CDVDStreamInfo &hint, + CProcessInfo &processInfo, + const CRenderInfo &info = CRenderInfo()); +- static CDVDAudioCodec* CreateAudioCodec(CDVDStreamInfo &hint, bool allowpassthrough = true, bool allowdtshddecode = true); ++ static CDVDAudioCodec* CreateAudioCodec(CDVDStreamInfo &hint, CProcessInfo &processInfo, ++ bool allowpassthrough = true, bool allowdtshddecode = true); + static CDVDOverlayCodec* CreateOverlayCodec(CDVDStreamInfo &hint ); + + static CDVDAudioCodec* OpenCodec(CDVDAudioCodec* pCodec, CDVDStreamInfo &hint, CDVDCodecOptions &options ); +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp +index 0414d85..967d518 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp +@@ -68,6 +68,7 @@ extern "C" { + #include "libavfilter/avfilter.h" + #include "libavfilter/buffersink.h" + #include "libavfilter/buffersrc.h" ++#include "libavutil/pixdesc.h" + } + + enum DecoderState +@@ -88,11 +89,12 @@ enum EFilterFlags { + FILTER_ROTATE = 0x40, //< rotate image according to the codec hints + }; + +-enum AVPixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avctx +- , const AVPixelFormat * fmt ) ++enum AVPixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avctx, const AVPixelFormat * fmt) + { + CDVDVideoCodecFFmpeg* ctx = (CDVDVideoCodecFFmpeg*)avctx->opaque; + ++ const char* pixFmtName = av_get_pix_fmt_name(*fmt); ++ + // if frame threading is enabled hw accel is not allowed + if(ctx->m_decoderState != STATE_HW_SINGLE) + { +@@ -122,9 +124,10 @@ enum AVPixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avct + if(VDPAU::CDecoder::IsVDPAUFormat(*cur) && CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEVDPAU)) + { + CLog::Log(LOGNOTICE,"CDVDVideoCodecFFmpeg::GetFormat - Creating VDPAU(%ix%i)", avctx->width, avctx->height); +- VDPAU::CDecoder* vdp = new VDPAU::CDecoder(); ++ VDPAU::CDecoder* vdp = new VDPAU::CDecoder(ctx->m_processInfo); + if(vdp->Open(avctx, ctx->m_pCodecContext, *cur, ctx->m_uSurfacesCount)) + { ++ ctx->m_processInfo.SetVideoPixelFormat(pixFmtName ? pixFmtName : ""); + ctx->SetHardware(vdp); + return *cur; + } +@@ -137,9 +140,10 @@ enum AVPixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avct + !ctx->m_hints.dvd && !ctx->m_hints.stills) + { + CLog::Log(LOGNOTICE, "CDVDVideoCodecFFmpeg::GetFormat - Creating DXVA(%ix%i)", avctx->width, avctx->height); +- DXVA::CDecoder* dec = new DXVA::CDecoder(); ++ DXVA::CDecoder* dec = new DXVA::CDecoder(ctx->m_processInfo); + if(dec->Open(avctx, ctx->m_pCodecContext, *cur, ctx->m_uSurfacesCount)) + { ++ ctx->m_processInfo.SetVideoPixelFormat(pixFmtName ? pixFmtName : ""); + ctx->SetHardware(dec); + return *cur; + } +@@ -151,9 +155,10 @@ enum AVPixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avct + // mpeg4 vaapi decoding is disabled + if(*cur == AV_PIX_FMT_VAAPI_VLD && CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEVAAPI)) + { +- VAAPI::CDecoder* dec = new VAAPI::CDecoder(); ++ VAAPI::CDecoder* dec = new VAAPI::CDecoder(ctx->m_processInfo); + if(dec->Open(avctx, ctx->m_pCodecContext, *cur, ctx->m_uSurfacesCount) == true) + { ++ ctx->m_processInfo.SetVideoPixelFormat(pixFmtName ? pixFmtName : ""); + ctx->SetHardware(dec); + return *cur; + } +@@ -165,9 +170,10 @@ enum AVPixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avct + #ifdef TARGET_DARWIN + if (*cur == AV_PIX_FMT_VIDEOTOOLBOX && CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEVTB)) + { +- VTB::CDecoder* dec = new VTB::CDecoder(); ++ VTB::CDecoder* dec = new VTB::CDecoder(ctx->m_processInfo); + if(dec->Open(avctx, ctx->m_pCodecContext, *cur, ctx->m_uSurfacesCount)) + { ++ ctx->m_processInfo.SetVideoPixelFormat(pixFmtName ? pixFmtName : ""); + ctx->SetHardware(dec); + return *cur; + } +@@ -183,6 +189,7 @@ enum AVPixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avct + ctx->m_pCodecContext->hwaccel_context = (void *)ctx->m_options.m_opaque_pointer; + if(dec->Open(avctx, ctx->m_pCodecContext, *cur, ctx->m_uSurfacesCount)) + { ++ ctx->m_processInfo.SetVideoPixelFormat(pixFmtName ? pixFmtName : ""); + ctx->SetHardware(dec); + return *cur; + } +@@ -193,6 +200,7 @@ enum AVPixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avct + cur++; + } + ++ ctx->m_processInfo.SetVideoPixelFormat(pixFmtName ? pixFmtName : ""); + ctx->m_decoderState = STATE_HW_FAILED; + return avcodec_default_get_format(avctx, fmt); + } +@@ -226,6 +234,7 @@ CDVDVideoCodecFFmpeg::CDVDVideoCodecFFmpeg(CProcessInfo &processInfo) : CDVDVide + m_skippedDeint = 0; + m_droppedFrames = 0; + m_interlaced = false; ++ m_DAR = 1.0; + } + + CDVDVideoCodecFFmpeg::~CDVDVideoCodecFFmpeg() +@@ -385,6 +394,9 @@ bool CDVDVideoCodecFFmpeg::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options + } + + UpdateName(); ++ ++ m_processInfo.SetVideoDecoderName(m_name, m_pHardware ? true : false); ++ m_processInfo.SetVideoDimensions(m_pCodecContext->coded_width, m_pCodecContext->coded_height); + return true; + } + +@@ -746,6 +758,12 @@ bool CDVDVideoCodecFFmpeg::GetPictureCommon(DVDVideoPicture* pDvdVideoPicture) + if (aspect_ratio <= 0.0) + aspect_ratio = (float)pDvdVideoPicture->iWidth / (float)pDvdVideoPicture->iHeight; + ++ if (m_DAR != aspect_ratio) ++ { ++ m_DAR = aspect_ratio; ++ m_processInfo.SetVideoDAR(m_DAR); ++ } ++ + /* XXX: we suppose the screen has a 1.0 pixel ratio */ // CDVDVideo will compensate it. + pDvdVideoPicture->iDisplayHeight = pDvdVideoPicture->iHeight; + pDvdVideoPicture->iDisplayWidth = ((int)RINT(pDvdVideoPicture->iHeight * aspect_ratio)) & -3; +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.h +index 4ef2982..20bc1ff 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.h +@@ -119,6 +119,7 @@ class CDVDVideoCodecFFmpeg : public CDVDVideoCodec + bool m_requestSkipDeint; + int m_codecControlFlags; + bool m_interlaced; ++ double m_DAR; + CDVDStreamInfo m_hints; + CDVDCodecOptions m_options; + }; +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.cpp +index f8730c5..fb83b42 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.cpp +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include "cores/VideoPlayer/Process/ProcessInfo.h" + #include "cores/VideoPlayer/VideoRenderers/RenderManager.h" + #include "../DVDCodecUtils.h" + #include "DXVA.h" +@@ -689,8 +690,9 @@ CRenderPicture::~CRenderPicture() + // DXVA Decoder + //----------------------------------------------------------------------------- + +-CDecoder::CDecoder() +- : m_event(true) ++CDecoder::CDecoder(CProcessInfo& processInfo) ++ : m_event(true), ++ m_processInfo(processInfo) + { + m_event.Set(); + m_state = DXVA_OPEN; +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.h +index ab756f7..2170515 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.h +@@ -28,6 +28,8 @@ + #include "libavcodec/d3d11va.h" + #include "threads/Event.h" + ++class CProcessInfo; ++ + namespace DXVA { + + #define CHECK(a) \ +@@ -114,7 +116,7 @@ class CDecoder + , public ID3DResource + { + public: +- CDecoder(); ++ CDecoder(CProcessInfo& processInfo); + ~CDecoder(); + + // IHardwareDecoder overrides +@@ -163,6 +165,7 @@ class CDecoder + unsigned int m_surface_alignment; + CCriticalSection m_section; + CEvent m_event; ++ CProcessInfo& m_processInfo; + }; + + }; +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.cpp +index c014ce2..1b4c8e8 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.cpp +@@ -24,6 +24,7 @@ + #include "DVDVideoCodec.h" + #include "cores/VideoPlayer/DVDCodecs/DVDCodecUtils.h" + #include "cores/VideoPlayer/DVDClock.h" ++#include "cores/VideoPlayer/Process/ProcessInfo.h" + #include "utils/log.h" + #include "utils/StringUtils.h" + #include "threads/SingleLock.h" +@@ -444,7 +445,9 @@ bool CVideoSurfaces::HasRefs() + // VAAPI + //----------------------------------------------------------------------------- + +-CDecoder::CDecoder() : m_vaapiOutput(&m_inMsgEvent) ++CDecoder::CDecoder(CProcessInfo& processInfo) : ++ m_vaapiOutput(&m_inMsgEvent), ++ m_processInfo(processInfo) + { + m_vaapiConfig.videoSurfaces = &m_videoSurfaces; + +@@ -453,6 +456,7 @@ CDecoder::CDecoder() : m_vaapiOutput(&m_inMsgEvent) + m_vaapiConfig.context = 0; + m_vaapiConfig.contextId = VA_INVALID_ID; + m_vaapiConfig.configId = VA_INVALID_ID; ++ m_vaapiConfig.processInfo = &m_processInfo; + m_avctx = NULL; + m_getBufferError = 0; + } +@@ -2016,6 +2020,7 @@ void COutput::InitCycle() + delete m_pp; + m_pp = NULL; + DropVppProcessedPictures(); ++ m_config.processInfo->SetVideoDeintMethod("unknown"); + } + if (!m_pp) + { +@@ -2034,6 +2039,17 @@ void COutput::InitCycle() + { + m_pp->Init(method); + m_currentDiMethod = method; ++ ++ if (method == VS_INTERLACEMETHOD_DEINTERLACE) ++ m_config.processInfo->SetVideoDeintMethod("yadif"); ++ else if (method == VS_INTERLACEMETHOD_RENDER_BOB) ++ m_config.processInfo->SetVideoDeintMethod("render-bob"); ++ else if (method == VS_INTERLACEMETHOD_VAAPI_BOB) ++ m_config.processInfo->SetVideoDeintMethod("vaapi-bob"); ++ else if (method == VS_INTERLACEMETHOD_VAAPI_MADI) ++ m_config.processInfo->SetVideoDeintMethod("vaapi-madi"); ++ else if (method == VS_INTERLACEMETHOD_VAAPI_MACI) ++ m_config.processInfo->SetVideoDeintMethod("vaapi-maci"); + } + else + { +@@ -2066,6 +2082,7 @@ void COutput::InitCycle() + { + m_pp->Init(method); + m_currentDiMethod = method; ++ m_config.processInfo->SetVideoDeintMethod("none"); + } + else + { +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.h +index cc744c7..08c5dfc 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.h +@@ -48,6 +48,7 @@ extern "C" { + + using namespace Actor; + ++class CProcessInfo; + + #define FULLHD_WIDTH 1920 + +@@ -125,6 +126,7 @@ struct CVaapiConfig + VAProfile profile; + VAConfigAttrib attrib; + Display *x11dsp; ++ CProcessInfo *processInfo; + }; + + /** +@@ -411,7 +413,7 @@ class CDecoder + + public: + +- CDecoder(); ++ CDecoder(CProcessInfo& processInfo); + virtual ~CDecoder(); + + virtual bool Open (AVCodecContext* avctx, AVCodecContext* mainctx, const enum AVPixelFormat, unsigned int surfaces = 0); +@@ -468,6 +470,7 @@ class CDecoder + + int m_codecControl; + std::vector m_diMethods; ++ CProcessInfo& m_processInfo; + }; + + //----------------------------------------------------------------------------- +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.cpp +index 331b719..377c72b 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.cpp +@@ -25,6 +25,7 @@ + #include "windowing/WindowingFactory.h" + #include "guilib/GraphicContext.h" + #include "guilib/TextureManager.h" ++#include "cores/VideoPlayer/Process/ProcessInfo.h" + #include "cores/VideoPlayer/VideoRenderers/RenderManager.h" + #include "DVDVideoCodecFFmpeg.h" + #include "DVDClock.h" +@@ -467,13 +468,14 @@ int CVideoSurfaces::Size() + // CVDPAU + //----------------------------------------------------------------------------- + +-CDecoder::CDecoder() : m_vdpauOutput(&m_inMsgEvent) ++CDecoder::CDecoder(CProcessInfo& processInfo) : m_vdpauOutput(&m_inMsgEvent), m_processInfo(processInfo) + { + m_vdpauConfig.videoSurfaces = &m_videoSurfaces; + + m_vdpauConfigured = false; + m_DisplayState = VDPAU_OPEN; + m_vdpauConfig.context = 0; ++ m_vdpauConfig.processInfo = &m_processInfo; + } + + bool CDecoder::Open(AVCodecContext* avctx, AVCodecContext* mainctx, const enum AVPixelFormat fmt, unsigned int surfaces) +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.h +index 56601a1..59432ad 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.h +@@ -70,6 +70,8 @@ extern "C" { + #define FULLHD_WIDTH 1920 + #define MAX_PIC_Q_LENGTH 20 //for non-interop_yuv this controls the max length of the decoded pic to render completion Q + ++class CProcessInfo; ++ + namespace VDPAU + { + +@@ -182,6 +184,7 @@ struct CVdpauConfig + uint32_t maxReferences; + bool useInteropYuv; + CVDPAUContext *context; ++ CProcessInfo *processInfo; + }; + + /** +@@ -556,7 +559,7 @@ class CDecoder + uint32_t aux; /* optional extra parameter... */ + }; + +- CDecoder(); ++ CDecoder(CProcessInfo& processInfo); + virtual ~CDecoder(); + + virtual bool Open (AVCodecContext* avctx, AVCodecContext* mainctx, const enum AVPixelFormat, unsigned int surfaces = 0); +@@ -623,6 +626,7 @@ class CDecoder + CVdpauRenderPicture *m_presentPicture; + + int m_codecControl; ++ CProcessInfo& m_processInfo; + }; + + } +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.cpp +index 253aefd..287b7c1 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.cpp +@@ -21,6 +21,7 @@ + #ifdef TARGET_DARWIN + #include "platform/darwin/osx/CocoaInterface.h" + #include "platform/darwin/DarwinUtils.h" ++#include "cores/VideoPlayer/Process/ProcessInfo.h" + #include "DVDVideoCodec.h" + #include "DVDCodecs/DVDCodecUtils.h" + #include "utils/log.h" +@@ -34,7 +35,7 @@ extern "C" { + using namespace VTB; + + +-CDecoder::CDecoder() ++CDecoder::CDecoder(CProcessInfo& processInfo) : m_processInfo(processInfo) + { + m_avctx = nullptr; + } +@@ -86,6 +87,7 @@ bool CDecoder::Open(AVCodecContext *avctx, AVCodecContext* mainctx, enum AVPixel + mainctx->pix_fmt = fmt; + mainctx->hwaccel_context = avctx->hwaccel_context; + ++ m_processInfo.SetVideoDeintMethod("none"); + return true; + } + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.h +index 1e097d4..bad295b 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.h +@@ -23,6 +23,8 @@ + + #include "DVDVideoCodecFFmpeg.h" + ++class CProcessInfo; ++ + namespace VTB + { + +@@ -30,7 +32,7 @@ class CDecoder + : public CDVDVideoCodecFFmpeg::IHardwareDecoder + { + public: +- CDecoder(); ++ CDecoder(CProcessInfo& processInfo); + ~CDecoder(); + virtual bool Open(AVCodecContext* avctx, AVCodecContext* mainctx, const enum AVPixelFormat, unsigned int surfaces = 0); + virtual int Decode(AVCodecContext* avctx, AVFrame* frame); +@@ -43,7 +45,7 @@ class CDecoder + protected: + unsigned m_renderbuffers_count; + AVCodecContext *m_avctx; +- ++ CProcessInfo& m_processInfo; + }; + + } +diff --git a/xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp b/xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp +index ceaa256..fc1f5dd 100644 +--- a/xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp ++++ b/xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp +@@ -19,6 +19,8 @@ + */ + + #include "ProcessInfo.h" ++#include "cores/DataCacheCore.h" ++#include "threads/SingleLock.h" + + // Override for platform ports + #if !defined(PLATFORM_OVERRIDE) +@@ -51,3 +53,200 @@ bool CProcessInfo::AllowDTSHDDecode() + { + return true; + } ++ ++void CProcessInfo::ResetVideoCodecInfo() ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ m_videoIsHWDecoder = false; ++ m_videoDecoderName = "unknown"; ++ m_videoDeintMethod = "unknown"; ++ m_videoPixelFormat = "unknown"; ++ m_videoWidth = 0; ++ m_videoHeight = 0; ++ m_videoFPS = 0.0; ++ ++ g_dataCacheCore.SetVideoDecoderName(m_videoDecoderName, m_videoIsHWDecoder); ++ g_dataCacheCore.SetVideoDeintMethod(m_videoDeintMethod); ++ g_dataCacheCore.SetVideoPixelFormat(m_videoPixelFormat); ++ g_dataCacheCore.SetVideoDimensions(m_videoWidth, m_videoHeight); ++ g_dataCacheCore.SetVideoFps(m_videoFPS); ++} ++ ++void CProcessInfo::SetVideoDecoderName(std::string name, bool isHw) ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ m_videoIsHWDecoder = isHw; ++ m_videoDecoderName = name; ++ ++ g_dataCacheCore.SetVideoDecoderName(m_videoDecoderName, m_videoIsHWDecoder); ++} ++ ++std::string CProcessInfo::GetVideoDecoderName() ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ return m_videoDecoderName; ++} ++ ++bool CProcessInfo::IsVideoHwDecoder() ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ return m_videoIsHWDecoder; ++} ++ ++void CProcessInfo::SetVideoDeintMethod(std::string method) ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ m_videoDeintMethod = method; ++ ++ g_dataCacheCore.SetVideoDeintMethod(m_videoDeintMethod); ++} ++ ++std::string CProcessInfo::GetVideoDeintMethod() ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ return m_videoDeintMethod; ++} ++ ++void CProcessInfo::SetVideoPixelFormat(std::string pixFormat) ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ m_videoPixelFormat = pixFormat; ++ ++ g_dataCacheCore.SetVideoPixelFormat(m_videoPixelFormat); ++} ++ ++std::string CProcessInfo::GetVideoPixelFormat() ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ return m_videoPixelFormat; ++} ++ ++void CProcessInfo::SetVideoDimensions(int width, int height) ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ m_videoWidth = width; ++ m_videoHeight = height; ++ ++ g_dataCacheCore.SetVideoDimensions(m_videoWidth, m_videoHeight); ++} ++ ++void CProcessInfo::GetVideoDimensions(int &width, int &height) ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ width = m_videoWidth; ++ height = m_videoHeight; ++} ++ ++void CProcessInfo::SetVideoFps(float fps) ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ m_videoFPS = fps; ++ ++ g_dataCacheCore.SetVideoFps(m_videoFPS); ++} ++ ++float CProcessInfo::GetVideoFps() ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ return m_videoFPS; ++} ++ ++void CProcessInfo::SetVideoDAR(float dar) ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ m_videoDAR = dar; ++ ++ CServiceBroker::GetDataCacheCore().SetVideoDAR(m_videoDAR); ++} ++ ++float CProcessInfo::GetVideoDAR() ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ return m_videoDAR; ++} ++ ++// player audio info ++void CProcessInfo::ResetAudioCodecInfo() ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ m_audioDecoderName = "unknown"; ++ m_audioChannels = "unknown"; ++ m_audioSampleRate = 0;; ++ m_audioBitsPerSample = 0; ++ ++ g_dataCacheCore.SetAudioDecoderName(m_audioDecoderName); ++ g_dataCacheCore.SetAudioChannels(m_audioChannels); ++ g_dataCacheCore.SetAudioSampleRate(m_audioSampleRate); ++ g_dataCacheCore.SetAudioBitsPerSample(m_audioBitsPerSample); ++} ++ ++void CProcessInfo::SetAudioDecoderName(std::string name) ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ m_audioDecoderName = name; ++} ++ ++std::string CProcessInfo::GetAudioDecoderName() ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ return m_audioDecoderName; ++} ++ ++void CProcessInfo::SetAudioChannels(std::string channels) ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ m_audioChannels = channels; ++} ++ ++std::string CProcessInfo::GetAudioChannels() ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ return m_audioChannels; ++} ++ ++void CProcessInfo::SetAudioSampleRate(int sampleRate) ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ m_audioSampleRate = sampleRate; ++} ++ ++int CProcessInfo::GetAudioSampleRate() ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ return m_audioSampleRate; ++} ++ ++void CProcessInfo::SetAudioBitsPerSample(int bitsPerSample) ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ m_audioBitsPerSample = bitsPerSample; ++} ++ ++int CProcessInfo::GetAudioBitsPerSampe() ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ return m_audioBitsPerSample; ++} +diff --git a/xbmc/cores/VideoPlayer/Process/ProcessInfo.h b/xbmc/cores/VideoPlayer/Process/ProcessInfo.h +index b8a4e46..0ec9a2c 100644 +--- a/xbmc/cores/VideoPlayer/Process/ProcessInfo.h ++++ b/xbmc/cores/VideoPlayer/Process/ProcessInfo.h +@@ -20,6 +20,8 @@ + #pragma once + + #include "cores/IPlayer.h" ++#include "threads/CriticalSection.h" ++#include + + class CProcessInfo + { +@@ -29,6 +31,51 @@ class CProcessInfo + virtual EINTERLACEMETHOD GetFallbackDeintMethod(); + virtual bool AllowDTSHDDecode(); + ++ // player video info ++ void ResetVideoCodecInfo(); ++ void SetVideoDecoderName(std::string name, bool isHw); ++ std::string GetVideoDecoderName(); ++ bool IsVideoHwDecoder(); ++ void SetVideoDeintMethod(std::string method); ++ std::string GetVideoDeintMethod(); ++ void SetVideoPixelFormat(std::string pixFormat); ++ std::string GetVideoPixelFormat(); ++ void SetVideoDimensions(int width, int height); ++ void GetVideoDimensions(int &width, int &height); ++ void SetVideoFps(float fps); ++ float GetVideoFps(); ++ void SetVideoDAR(float dar); ++ float GetVideoDAR(); ++ ++ // player audio info ++ void ResetAudioCodecInfo(); ++ void SetAudioDecoderName(std::string name); ++ std::string GetAudioDecoderName(); ++ void SetAudioChannels(std::string channels); ++ std::string GetAudioChannels(); ++ void SetAudioSampleRate(int sampleRate); ++ int GetAudioSampleRate(); ++ void SetAudioBitsPerSample(int bitsPerSample); ++ int GetAudioBitsPerSampe(); ++ + protected: + CProcessInfo(); ++ ++ // player video info ++ bool m_videoIsHWDecoder; ++ std::string m_videoDecoderName; ++ std::string m_videoDeintMethod; ++ std::string m_videoPixelFormat; ++ int m_videoWidth; ++ int m_videoHeight; ++ float m_videoFPS; ++ float m_videoDAR; ++ CCriticalSection m_videoCodecSection; ++ ++ // player audio info ++ std::string m_audioDecoderName; ++ std::string m_audioChannels; ++ int m_audioSampleRate; ++ int m_audioBitsPerSample; ++ CCriticalSection m_audioCodecSection; + }; +diff --git a/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp b/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp +index fb1d993..2422815 100644 +--- a/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp ++++ b/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp +@@ -90,11 +90,13 @@ CVideoPlayerAudio::~CVideoPlayerAudio() + + bool CVideoPlayerAudio::OpenStream(CDVDStreamInfo &hints) + { ++ m_processInfo.ResetAudioCodecInfo(); ++ + CLog::Log(LOGNOTICE, "Finding audio codec for: %i", hints.codec); + bool allowpassthrough = !CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEDISPLAYASCLOCK); + if (hints.realtime) + allowpassthrough = false; +- CDVDAudioCodec* codec = CDVDFactoryCodec::CreateAudioCodec(hints, allowpassthrough, m_processInfo.AllowDTSHDDecode()); ++ CDVDAudioCodec* codec = CDVDFactoryCodec::CreateAudioCodec(hints, m_processInfo, allowpassthrough, m_processInfo.AllowDTSHDDecode()); + if(!codec) + { + CLog::Log(LOGERROR, "Unsupported audio codec"); +@@ -451,6 +453,11 @@ void CVideoPlayerAudio::Process() + + m_streaminfo.channels = audioframe.format.m_channelLayout.Count(); + ++ ++ m_processInfo.SetAudioChannels(audioframe.format.m_channelLayout); ++ m_processInfo.SetAudioSampleRate(audioframe.format.m_sampleRate); ++ m_processInfo.SetAudioBitsPerSample(audioframe.bits_per_sample); ++ + m_messageParent.Put(new CDVDMsg(CDVDMsg::PLAYER_AVCHANGE)); + } + +@@ -595,7 +602,7 @@ bool CVideoPlayerAudio::SwitchCodecIfNeeded() + bool allowpassthrough = !CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEDISPLAYASCLOCK); + if (m_streaminfo.realtime) + allowpassthrough = false; +- CDVDAudioCodec *codec = CDVDFactoryCodec::CreateAudioCodec(m_streaminfo, allowpassthrough, m_processInfo.AllowDTSHDDecode()); ++ CDVDAudioCodec *codec = CDVDFactoryCodec::CreateAudioCodec(m_streaminfo, m_processInfo, allowpassthrough, m_processInfo.AllowDTSHDDecode()); + if (!codec || codec->NeedPassthrough() == m_pAudioCodec->NeedPassthrough()) { + // passthrough state has not changed + delete codec; +diff --git a/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp b/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp +index 8e5d33dc..fd260d43 100644 +--- a/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp ++++ b/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp +@@ -120,6 +120,8 @@ double CVideoPlayerVideo::GetOutputDelay() + + bool CVideoPlayerVideo::OpenStream( CDVDStreamInfo &hint ) + { ++ m_processInfo.ResetVideoCodecInfo(); ++ + CRenderInfo info; + info = m_renderManager.GetRenderInfo(); + +@@ -156,11 +158,13 @@ void CVideoPlayerVideo::OpenStream(CDVDStreamInfo &hint, CDVDVideoCodec* codec) + { + m_fFrameRate = DVD_TIME_BASE / CDVDCodecUtils::NormalizeFrameduration((double)DVD_TIME_BASE * hint.fpsscale / hint.fpsrate); + m_bFpsInvalid = false; ++ m_processInfo.SetVideoFps(m_fFrameRate); + } + else + { + m_fFrameRate = 25; + m_bFpsInvalid = true; ++ m_processInfo.SetVideoFps(0); + } + + m_pullupCorrection.ResetVFRDetection(); +@@ -1023,6 +1027,7 @@ void CVideoPlayerVideo::CalcFrameRate() + CLog::Log(LOGDEBUG,"%s framerate was:%f calculated:%f", __FUNCTION__, m_fFrameRate, m_fStableFrameRate / m_iFrameRateCount); + m_fFrameRate = m_fStableFrameRate / m_iFrameRateCount; + m_bFpsInvalid = false; ++ m_processInfo.SetVideoFps(m_fFrameRate); + } + + //reset the stored framerates +diff --git a/xbmc/cores/omxplayer/OMXPlayerAudio.cpp b/xbmc/cores/omxplayer/OMXPlayerAudio.cpp +index 50a5b17..6161962 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerAudio.cpp ++++ b/xbmc/cores/omxplayer/OMXPlayerAudio.cpp +@@ -100,6 +100,7 @@ bool OMXPlayerAudio::OpenStream(CDVDStreamInfo &hints) + { + m_bad_state = false; + ++ m_processInfo.ResetAudioCodecInfo(); + COMXAudioCodecOMX *codec = new COMXAudioCodecOMX(); + + if(!codec || !codec->Open(hints)) +diff --git a/xbmc/cores/omxplayer/OMXPlayerVideo.cpp b/xbmc/cores/omxplayer/OMXPlayerVideo.cpp +index 236f1b3..0ec7f15 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerVideo.cpp ++++ b/xbmc/cores/omxplayer/OMXPlayerVideo.cpp +@@ -515,6 +515,8 @@ bool OMXPlayerVideo::OpenDecoder() + if(!m_av_clock) + return false; + ++ m_processInfo.ResetVideoCodecInfo(); ++ + if (m_hints.fpsrate && m_hints.fpsscale) + m_fFrameRate = DVD_TIME_BASE / CDVDCodecUtils::NormalizeFrameduration((double)DVD_TIME_BASE * m_hints.fpsscale / m_hints.fpsrate); + else +@@ -525,6 +527,8 @@ bool OMXPlayerVideo::OpenDecoder() + CLog::Log(LOGINFO, "OMXPlayerVideo::OpenDecoder : Invalid framerate %d, using forced 25fps and just trust timestamps\n", (int)m_fFrameRate); + m_fFrameRate = 25; + } ++ m_processInfo.SetVideoFps(m_fFrameRate); ++ + // use aspect in stream if available + if (m_hints.forced_aspect) + m_fForcedAspectRatio = m_hints.aspect; +@@ -705,6 +709,9 @@ void OMXPlayerVideo::ResolutionUpdateCallBack(uint32_t width, uint32_t height, f + m_bAllowFullscreen = false; // only allow on first configure + } + ++ m_processInfo.SetVideoDimensions(width, height); ++ m_processInfo.SetVideoAspectRatio(display_aspect); ++ + unsigned int iDisplayWidth = width; + unsigned int iDisplayHeight = height; + +diff --git a/xbmc/cores/paplayer/VideoPlayerCodec.cpp b/xbmc/cores/paplayer/VideoPlayerCodec.cpp +index 9056cf8..32add6c 100644 +--- a/xbmc/cores/paplayer/VideoPlayerCodec.cpp ++++ b/xbmc/cores/paplayer/VideoPlayerCodec.cpp +@@ -46,6 +46,8 @@ VideoPlayerCodec::VideoPlayerCodec() + m_pResampler = NULL; + m_needConvert = false; + m_channels = 0; ++ ++ m_processInfo.reset(CProcessInfo::CreateInstance()); + } + + VideoPlayerCodec::~VideoPlayerCodec() +@@ -165,7 +167,7 @@ bool VideoPlayerCodec::Init(const CFileItem &file, unsigned int filecache) + + CDVDStreamInfo hint(*pStream, true); + +- m_pAudioCodec = CDVDFactoryCodec::CreateAudioCodec(hint); ++ m_pAudioCodec = CDVDFactoryCodec::CreateAudioCodec(hint, *m_processInfo.get()); + if (!m_pAudioCodec) + { + CLog::Log(LOGERROR, "%s: Could not create audio codec", __FUNCTION__); +diff --git a/xbmc/cores/paplayer/VideoPlayerCodec.h b/xbmc/cores/paplayer/VideoPlayerCodec.h +index 81379bd..042f4f7 100644 +--- a/xbmc/cores/paplayer/VideoPlayerCodec.h ++++ b/xbmc/cores/paplayer/VideoPlayerCodec.h +@@ -73,6 +73,8 @@ class VideoPlayerCodec : public ICodec + bool m_needConvert; + AEAudioFormat m_srcFormat; + int m_channels; ++ ++ std::unique_ptr m_processInfo; + }; + + #endif +diff --git a/xbmc/guiinfo/GUIInfoLabels.h b/xbmc/guiinfo/GUIInfoLabels.h +index 27d6bc2..96edafa 100644 +--- a/xbmc/guiinfo/GUIInfoLabels.h ++++ b/xbmc/guiinfo/GUIInfoLabels.h +@@ -562,6 +562,20 @@ + #define RDS_CHANNEL_COUNTRY (RDS_DATA_START + 44) + #define RDS_DATA_END RDS_CHANNEL_COUNTRY + ++#define PLAYER_PROCESS 1500 ++#define PLAYER_PROCESS_VIDEODECODER (PLAYER_PROCESS) ++#define PLAYER_PROCESS_DEINTMETHOD (PLAYER_PROCESS + 1) ++#define PLAYER_PROCESS_PIXELFORMAT (PLAYER_PROCESS + 2) ++#define PLAYER_PROCESS_VIDEOWIDTH (PLAYER_PROCESS + 3) ++#define PLAYER_PROCESS_VIDEOHEIGHT (PLAYER_PROCESS + 4) ++#define PLAYER_PROCESS_VIDEOFPS (PLAYER_PROCESS + 5) ++#define PLAYER_PROCESS_VIDEODAR (PLAYER_PROCESS + 6) ++#define PLAYER_PROCESS_VIDEOHWDECODER (PLAYER_PROCESS + 7) ++#define PLAYER_PROCESS_AUDIODECODER (PLAYER_PROCESS + 8) ++#define PLAYER_PROCESS_AUDIOCHANNELS (PLAYER_PROCESS + 9) ++#define PLAYER_PROCESS_AUDIOSAMPLERATE (PLAYER_PROCESS + 10) ++#define PLAYER_PROCESS_AUDIOBITSPERSAMPLE (PLAYER_PROCESS + 11) ++ + #define WINDOW_PROPERTY 9993 + #define WINDOW_IS_TOPMOST 9994 + #define WINDOW_IS_VISIBLE 9995 + +From 6855680f52c826aa2a2e2684c5607e00a6f8fff1 Mon Sep 17 00:00:00 2001 +From: Rainer Hochecker +Date: Wed, 22 Jun 2016 18:48:10 +0200 +Subject: [PATCH 2/3] remove DataCacheCore from systemGlobals + +--- + xbmc/Application.cpp | 1 + + xbmc/GUIInfoManager.cpp | 24 ++++++++++----------- + xbmc/ServiceBroker.cpp | 5 +++++ + xbmc/ServiceBroker.h | 2 ++ + xbmc/ServiceManager.cpp | 7 +++++++ + xbmc/ServiceManager.h | 3 +++ + xbmc/SystemGlobals.cpp | 4 ---- + xbmc/cores/DataCacheCore.cpp | 6 ++++++ + xbmc/cores/DataCacheCore.h | 5 ++--- + xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp | 29 +++++++++++++------------- + xbmc/cores/VideoPlayer/VideoPlayer.cpp | 14 +++++++------ + xbmc/cores/paplayer/PAPlayer.cpp | 3 ++- + 12 files changed, 63 insertions(+), 40 deletions(-) + +diff --git a/xbmc/Application.cpp b/xbmc/Application.cpp +index cdcf53e..baada48 100644 +--- a/xbmc/Application.cpp ++++ b/xbmc/Application.cpp +@@ -37,6 +37,7 @@ + #include "cores/AudioEngine/AEFactory.h" + #include "cores/AudioEngine/Engines/ActiveAE/AudioDSPAddons/ActiveAEDSP.h" + #include "cores/AudioEngine/Utils/AEUtil.h" ++#include "cores/DataCacheCore.h" + #include "cores/playercorefactory/PlayerCoreFactory.h" + #include "PlayListPlayer.h" + #include "Autorun.h" +diff --git a/xbmc/GUIInfoManager.cpp b/xbmc/GUIInfoManager.cpp +index ab5cb12..7d42106 100644 +--- a/xbmc/GUIInfoManager.cpp ++++ b/xbmc/GUIInfoManager.cpp +@@ -6018,25 +6018,25 @@ std::string CGUIInfoManager::GetLabel(int info, int contextWindow, std::string * + } + break; + case PLAYER_PROCESS_VIDEODECODER: +- strLabel = g_dataCacheCore.GetVideoDecoderName(); ++ strLabel = CServiceBroker::GetDataCacheCore().GetVideoDecoderName(); + break; + case PLAYER_PROCESS_DEINTMETHOD: +- strLabel = g_dataCacheCore.GetVideoDeintMethod(); ++ strLabel = CServiceBroker::GetDataCacheCore().GetVideoDeintMethod(); + break; + case PLAYER_PROCESS_PIXELFORMAT: +- strLabel = g_dataCacheCore.GetVideoPixelFormat(); ++ strLabel = CServiceBroker::GetDataCacheCore().GetVideoPixelFormat(); + break; + case PLAYER_PROCESS_VIDEOFPS: +- strLabel = StringUtils::FormatNumber(g_dataCacheCore.GetVideoFps()); ++ strLabel = StringUtils::FormatNumber(CServiceBroker::GetDataCacheCore().GetVideoFps()); + break; + case PLAYER_PROCESS_VIDEODAR: + strLabel = StringUtils::FormatNumber(CServiceBroker::GetDataCacheCore().GetVideoDAR()); + break; + case PLAYER_PROCESS_AUDIODECODER: +- strLabel = g_dataCacheCore.GetAudioDecoderName(); ++ strLabel = CServiceBroker::GetDataCacheCore().GetAudioDecoderName(); + break; + case PLAYER_PROCESS_AUDIOCHANNELS: +- strLabel = g_dataCacheCore.GetAudioChannels(); ++ strLabel = CServiceBroker::GetDataCacheCore().GetAudioChannels(); + break; + case RDS_AUDIO_LANG: + case RDS_CHANNEL_COUNTRY: +@@ -6601,16 +6601,16 @@ bool CGUIInfoManager::GetInt(int &value, int info, int contextWindow, const CGUI + value = g_powerManager.BatteryLevel(); + return true; + case PLAYER_PROCESS_VIDEOWIDTH: +- value = g_dataCacheCore.GetVideoWidth(); ++ value = CServiceBroker::GetDataCacheCore().GetVideoWidth(); + return true; + case PLAYER_PROCESS_VIDEOHEIGHT: +- value = g_dataCacheCore.GetVideoHeight(); ++ value = CServiceBroker::GetDataCacheCore().GetVideoHeight(); + return true; + case PLAYER_PROCESS_AUDIOSAMPLERATE: +- value = g_dataCacheCore.GetAudioSampleRate(); ++ value = CServiceBroker::GetDataCacheCore().GetAudioSampleRate(); + return true; + case PLAYER_PROCESS_AUDIOBITSPERSAMPLE: +- value = g_dataCacheCore.GetAudioBitsPerSampe(); ++ value = CServiceBroker::GetDataCacheCore().GetAudioBitsPerSampe(); + return true; + } + return false; +@@ -7148,7 +7148,7 @@ bool CGUIInfoManager::GetBool(int condition1, int contextWindow, const CGUIListI + !m_currentFile->GetPVRRadioRDSInfoTag()->GetPhoneStudio().empty()); + break; + case PLAYER_PROCESS_VIDEOHWDECODER: +- bReturn = g_dataCacheCore.IsVideoHwDecoder(); ++ bReturn = CServiceBroker::GetDataCacheCore().IsVideoHwDecoder(); + break; + default: // default, use integer value different from 0 as true + { +@@ -9150,7 +9150,7 @@ void CGUIInfoManager::UpdateAVInfo() + { + if(g_application.m_pPlayer->IsPlaying()) + { +- if (g_dataCacheCore.HasAVInfoChanges()) ++ if (CServiceBroker::GetDataCacheCore().HasAVInfoChanges()) + { + SPlayerVideoStreamInfo video; + SPlayerAudioStreamInfo audio; +diff --git a/xbmc/ServiceBroker.cpp b/xbmc/ServiceBroker.cpp +index fff03c3..dd5c640 100644 +--- a/xbmc/ServiceBroker.cpp ++++ b/xbmc/ServiceBroker.cpp +@@ -50,3 +50,8 @@ ActiveAE::CActiveAEDSP &CServiceBroker::GetADSP() + { + return g_application.m_ServiceManager->GetADSPManager(); + } ++ ++CDataCacheCore &CServiceBroker::GetDataCacheCore() ++{ ++ return g_application.m_ServiceManager->GetDataCacheCore(); ++} +diff --git a/xbmc/ServiceBroker.h b/xbmc/ServiceBroker.h +index 32add30..9f9de19 100644 +--- a/xbmc/ServiceBroker.h ++++ b/xbmc/ServiceBroker.h +@@ -40,6 +40,7 @@ namespace PVR + } + + class XBPython; ++class CDataCacheCore; + + class CServiceBroker + { +@@ -50,4 +51,5 @@ class CServiceBroker + static XBPython &GetXBPython(); + static PVR::CPVRManager &GetPVRManager(); + static ActiveAE::CActiveAEDSP& GetADSP(); ++ static CDataCacheCore& GetDataCacheCore(); + }; +diff --git a/xbmc/ServiceManager.cpp b/xbmc/ServiceManager.cpp +index 3cc188c..4cf4440 100644 +--- a/xbmc/ServiceManager.cpp ++++ b/xbmc/ServiceManager.cpp +@@ -21,6 +21,7 @@ + #include "ServiceManager.h" + #include "addons/BinaryAddonCache.h" + #include "cores/AudioEngine/Engines/ActiveAE/AudioDSPAddons/ActiveAEDSP.h" ++#include "cores/DataCacheCore.h" + #include "utils/log.h" + #include "interfaces/AnnouncementManager.h" + #include "interfaces/generic/ScriptInvocationManager.h" +@@ -49,6 +50,7 @@ bool CServiceManager::Init2() + + m_ADSPManager.reset(new ActiveAE::CActiveAEDSP()); + m_PVRManager.reset(new PVR::CPVRManager()); ++ m_dataCacheCore.reset(new CDataCacheCore()); + + m_binaryAddonCache.reset( new ADDON::CBinaryAddonCache()); + m_binaryAddonCache->Init(); +@@ -104,3 +106,8 @@ ActiveAE::CActiveAEDSP& CServiceManager::GetADSPManager() + { + return *m_ADSPManager; + } ++ ++CDataCacheCore& CServiceManager::GetDataCacheCore() ++{ ++ return *m_dataCacheCore; ++} +\ No newline at end of file +diff --git a/xbmc/ServiceManager.h b/xbmc/ServiceManager.h +index 9b7806f6..5c7a9a8 100644 +--- a/xbmc/ServiceManager.h ++++ b/xbmc/ServiceManager.h +@@ -42,6 +42,7 @@ class CPVRManager; + } + + class XBPython; ++class CDataCacheCore; + + class CServiceManager + { +@@ -56,6 +57,7 @@ class CServiceManager + XBPython& GetXBPython(); + PVR::CPVRManager& GetPVRManager(); + ActiveAE::CActiveAEDSP& GetADSPManager(); ++ CDataCacheCore& GetDataCacheCore(); + + protected: + std::unique_ptr m_addonMgr; +@@ -64,4 +66,5 @@ class CServiceManager + std::unique_ptr m_XBPython; + std::unique_ptr m_PVRManager; + std::unique_ptr m_ADSPManager; ++ std::unique_ptr m_dataCacheCore; + }; +diff --git a/xbmc/SystemGlobals.cpp b/xbmc/SystemGlobals.cpp +index 9354471..3d1cb55 100644 +--- a/xbmc/SystemGlobals.cpp ++++ b/xbmc/SystemGlobals.cpp +@@ -19,7 +19,6 @@ + */ + #include "system.h" + #include "SectionLoader.h" +-#include "cores/DataCacheCore.h" + #include "GUILargeTextureManager.h" + #include "guilib/TextureManager.h" + #include "utils/AlarmClock.h" +@@ -71,6 +70,3 @@ std::map CSpecialProtocol::m_pathMap; + #endif + + CZipManager g_ZipManager; +- +- CDataCacheCore g_dataCacheCore; +- +diff --git a/xbmc/cores/DataCacheCore.cpp b/xbmc/cores/DataCacheCore.cpp +index cbb0a4f..43a24f1 100644 +--- a/xbmc/cores/DataCacheCore.cpp ++++ b/xbmc/cores/DataCacheCore.cpp +@@ -20,12 +20,18 @@ + + #include "cores/DataCacheCore.h" + #include "threads/SingleLock.h" ++#include "ServiceBroker.h" + + CDataCacheCore::CDataCacheCore() + { + m_hasAVInfoChanges = false; + } + ++CDataCacheCore& GetInstance() ++{ ++ return CServiceBroker::GetDataCacheCore(); ++} ++ + bool CDataCacheCore::HasAVInfoChanges() + { + bool ret = m_hasAVInfoChanges; +diff --git a/xbmc/cores/DataCacheCore.h b/xbmc/cores/DataCacheCore.h +index e16c81f..646f512 100644 +--- a/xbmc/cores/DataCacheCore.h ++++ b/xbmc/cores/DataCacheCore.h +@@ -28,6 +28,7 @@ class CDataCacheCore + { + public: + CDataCacheCore(); ++ static CDataCacheCore& GetInstance(); + bool HasAVInfoChanges(); + void SignalVideoInfoChange(); + void SignalAudioInfoChange(); +@@ -82,6 +83,4 @@ class CDataCacheCore + int sampleRate; + int bitsPerSample; + } m_playerAudioInfo; +-}; +- +-extern CDataCacheCore g_dataCacheCore; +\ No newline at end of file ++}; +\ No newline at end of file +diff --git a/xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp b/xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp +index fc1f5dd..bfd7d58 100644 +--- a/xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp ++++ b/xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp +@@ -19,6 +19,7 @@ + */ + + #include "ProcessInfo.h" ++#include "ServiceBroker.h" + #include "cores/DataCacheCore.h" + #include "threads/SingleLock.h" + +@@ -66,11 +67,11 @@ void CProcessInfo::ResetVideoCodecInfo() + m_videoHeight = 0; + m_videoFPS = 0.0; + +- g_dataCacheCore.SetVideoDecoderName(m_videoDecoderName, m_videoIsHWDecoder); +- g_dataCacheCore.SetVideoDeintMethod(m_videoDeintMethod); +- g_dataCacheCore.SetVideoPixelFormat(m_videoPixelFormat); +- g_dataCacheCore.SetVideoDimensions(m_videoWidth, m_videoHeight); +- g_dataCacheCore.SetVideoFps(m_videoFPS); ++ CServiceBroker::GetDataCacheCore().SetVideoDecoderName(m_videoDecoderName, m_videoIsHWDecoder); ++ CServiceBroker::GetDataCacheCore().SetVideoDeintMethod(m_videoDeintMethod); ++ CServiceBroker::GetDataCacheCore().SetVideoPixelFormat(m_videoPixelFormat); ++ CServiceBroker::GetDataCacheCore().SetVideoDimensions(m_videoWidth, m_videoHeight); ++ CServiceBroker::GetDataCacheCore().SetVideoFps(m_videoFPS); + } + + void CProcessInfo::SetVideoDecoderName(std::string name, bool isHw) +@@ -80,7 +81,7 @@ void CProcessInfo::SetVideoDecoderName(std::string name, bool isHw) + m_videoIsHWDecoder = isHw; + m_videoDecoderName = name; + +- g_dataCacheCore.SetVideoDecoderName(m_videoDecoderName, m_videoIsHWDecoder); ++ CServiceBroker::GetDataCacheCore().SetVideoDecoderName(m_videoDecoderName, m_videoIsHWDecoder); + } + + std::string CProcessInfo::GetVideoDecoderName() +@@ -103,7 +104,7 @@ void CProcessInfo::SetVideoDeintMethod(std::string method) + + m_videoDeintMethod = method; + +- g_dataCacheCore.SetVideoDeintMethod(m_videoDeintMethod); ++ CServiceBroker::GetDataCacheCore().SetVideoDeintMethod(m_videoDeintMethod); + } + + std::string CProcessInfo::GetVideoDeintMethod() +@@ -119,7 +120,7 @@ void CProcessInfo::SetVideoPixelFormat(std::string pixFormat) + + m_videoPixelFormat = pixFormat; + +- g_dataCacheCore.SetVideoPixelFormat(m_videoPixelFormat); ++ CServiceBroker::GetDataCacheCore().SetVideoPixelFormat(m_videoPixelFormat); + } + + std::string CProcessInfo::GetVideoPixelFormat() +@@ -136,7 +137,7 @@ void CProcessInfo::SetVideoDimensions(int width, int height) + m_videoWidth = width; + m_videoHeight = height; + +- g_dataCacheCore.SetVideoDimensions(m_videoWidth, m_videoHeight); ++ CServiceBroker::GetDataCacheCore().SetVideoDimensions(m_videoWidth, m_videoHeight); + } + + void CProcessInfo::GetVideoDimensions(int &width, int &height) +@@ -153,7 +154,7 @@ void CProcessInfo::SetVideoFps(float fps) + + m_videoFPS = fps; + +- g_dataCacheCore.SetVideoFps(m_videoFPS); ++ CServiceBroker::GetDataCacheCore().SetVideoFps(m_videoFPS); + } + + float CProcessInfo::GetVideoFps() +@@ -189,10 +190,10 @@ void CProcessInfo::ResetAudioCodecInfo() + m_audioSampleRate = 0;; + m_audioBitsPerSample = 0; + +- g_dataCacheCore.SetAudioDecoderName(m_audioDecoderName); +- g_dataCacheCore.SetAudioChannels(m_audioChannels); +- g_dataCacheCore.SetAudioSampleRate(m_audioSampleRate); +- g_dataCacheCore.SetAudioBitsPerSample(m_audioBitsPerSample); ++ CServiceBroker::GetDataCacheCore().SetAudioDecoderName(m_audioDecoderName); ++ CServiceBroker::GetDataCacheCore().SetAudioChannels(m_audioChannels); ++ CServiceBroker::GetDataCacheCore().SetAudioSampleRate(m_audioSampleRate); ++ CServiceBroker::GetDataCacheCore().SetAudioBitsPerSample(m_audioBitsPerSample); + } + + void CProcessInfo::SetAudioDecoderName(std::string name) +diff --git a/xbmc/cores/VideoPlayer/VideoPlayer.cpp b/xbmc/cores/VideoPlayer/VideoPlayer.cpp +index 9ed9176..5205414 100644 +--- a/xbmc/cores/VideoPlayer/VideoPlayer.cpp ++++ b/xbmc/cores/VideoPlayer/VideoPlayer.cpp +@@ -45,6 +45,7 @@ + #include "guilib/GUIWindowManager.h" + #include "guilib/StereoscopicsManager.h" + #include "Application.h" ++#include "ServiceBroker.h" + #include "messaging/ApplicationMessenger.h" + + #include "DVDDemuxers/DVDDemuxCC.h" +@@ -81,6 +82,7 @@ + #include "cores/omxplayer/OMXHelper.h" + #endif + #include "VideoPlayerAudio.h" ++#include "cores/DataCacheCore.h" + #include "windowing/WindowingFactory.h" + #include "DVDCodecs/DVDCodecUtils.h" + +@@ -545,8 +547,8 @@ void CSelectionStreams::Update(CDVDInputStream* input, CDVDDemux* demuxer, std:: + Update(s); + } + } +- g_dataCacheCore.SignalAudioInfoChange(); +- g_dataCacheCore.SignalVideoInfoChange(); ++ CServiceBroker::GetDataCacheCore().SignalAudioInfoChange(); ++ CServiceBroker::GetDataCacheCore().SignalVideoInfoChange(); + } + + int CSelectionStreams::CountSource(StreamType type, StreamSource source) const +@@ -2872,8 +2874,8 @@ void CVideoPlayer::HandleMessages() + else if (pMsg->IsType(CDVDMsg::PLAYER_AVCHANGE)) + { + UpdateStreamInfos(); +- g_dataCacheCore.SignalAudioInfoChange(); +- g_dataCacheCore.SignalVideoInfoChange(); ++ CServiceBroker::GetDataCacheCore().SignalAudioInfoChange(); ++ CServiceBroker::GetDataCacheCore().SignalVideoInfoChange(); + } + + pMsg->Release(); +@@ -3596,8 +3598,8 @@ bool CVideoPlayer::OpenStream(CCurrentStream& current, int64_t demuxerId, int iS + } + } + +- g_dataCacheCore.SignalAudioInfoChange(); +- g_dataCacheCore.SignalVideoInfoChange(); ++ CServiceBroker::GetDataCacheCore().SignalAudioInfoChange(); ++ CServiceBroker::GetDataCacheCore().SignalVideoInfoChange(); + + return res; + } +diff --git a/xbmc/cores/paplayer/PAPlayer.cpp b/xbmc/cores/paplayer/PAPlayer.cpp +index 17e1cfc..6bd5180 100644 +--- a/xbmc/cores/paplayer/PAPlayer.cpp ++++ b/xbmc/cores/paplayer/PAPlayer.cpp +@@ -21,6 +21,7 @@ + #include "PAPlayer.h" + #include "CodecFactory.h" + #include "FileItem.h" ++#include "ServiceBroker.h" + #include "settings/AdvancedSettings.h" + #include "settings/Settings.h" + #include "music/tags/MusicInfoTag.h" +@@ -1130,7 +1131,7 @@ void PAPlayer::UpdateGUIData(StreamInfo *si) + total -= m_currentStream->m_startOffset; + m_playerGUIData.m_totalTime = total; + +- g_dataCacheCore.SignalAudioInfoChange(); ++ CServiceBroker::GetDataCacheCore().SignalAudioInfoChange(); + } + + void PAPlayer::OnJobComplete(unsigned int jobID, bool success, CJob *job) + +From 0337c933aaf3a438edba894780838f5c1fbb00f5 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Fri, 24 Jun 2016 19:37:32 +0100 +Subject: [PATCH 3/3] rbp: Update to use new processInfo data cache + +--- + .../VideoPlayer/DVDCodecs/Video/MMALCodec.cpp | 19 +++++++++++++++++ + xbmc/cores/omxplayer/OMXAudioCodecOMX.cpp | 3 ++- + xbmc/cores/omxplayer/OMXAudioCodecOMX.h | 4 +++- + xbmc/cores/omxplayer/OMXPlayerAudio.cpp | 24 +++++++++++++++++++--- + xbmc/cores/omxplayer/OMXPlayerVideo.cpp | 10 ++++----- + xbmc/cores/omxplayer/OMXVideo.cpp | 20 +++++++++++++++++- + xbmc/cores/omxplayer/OMXVideo.h | 4 +++- + 7 files changed, 72 insertions(+), 12 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +index 3d026cd..51ded6b2 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +@@ -177,6 +177,10 @@ void CMMALVideo::PortSettingsChanged(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *bu + m_decoded_height = m_es_format->es->video.crop.height; + m_decoded_aligned_width = m_es_format->es->video.width; + m_decoded_aligned_height = m_es_format->es->video.height; ++ ++ m_processInfo.SetVideoDimensions(m_decoded_width, m_decoded_height); ++ m_processInfo.SetVideoDAR(m_aspect_ratio); ++ + if (g_advancedSettings.CanLogComponent(LOGVIDEO)) + CLog::Log(LOGDEBUG, "%s::%s format changed: %dx%d (%dx%d) %.2f", CLASSNAME, __func__, m_decoded_width, m_decoded_height, m_decoded_aligned_width, m_decoded_aligned_height, m_aspect_ratio); + } +@@ -360,6 +364,15 @@ bool CMMALVideo::CreateDeinterlace(EINTERLACEMETHOD interlace_method) + bool advanced_deinterlace = interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED || interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED_HALF; + bool half_framerate = interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED_HALF || interlace_method == VS_INTERLACEMETHOD_MMAL_BOB_HALF; + ++ if (advanced_deinterlace && !half_framerate) ++ m_processInfo.SetVideoDeintMethod("adv(x2)"); ++ else if (advanced_deinterlace && half_framerate) ++ m_processInfo.SetVideoDeintMethod("adv(x1)"); ++ else if (!advanced_deinterlace && !half_framerate) ++ m_processInfo.SetVideoDeintMethod("bob(x2)"); ++ else if (!advanced_deinterlace && half_framerate) ++ m_processInfo.SetVideoDeintMethod("bob(x1)"); ++ + MMAL_PARAMETER_IMAGEFX_PARAMETERS_T imfx_param = {{MMAL_PARAMETER_IMAGE_EFFECT_PARAMETERS, sizeof(imfx_param)}, + advanced_deinterlace ? MMAL_PARAM_IMAGEFX_DEINTERLACE_ADV : MMAL_PARAM_IMAGEFX_DEINTERLACE_FAST, 4, {3, 0, half_framerate, 1 }}; + +@@ -437,6 +450,8 @@ bool CMMALVideo::DestroyDeinterlace() + if (g_advancedSettings.CanLogComponent(LOGVIDEO)) + CLog::Log(LOGDEBUG, "%s::%s", CLASSNAME, __func__); + ++ m_processInfo.SetVideoDeintMethod("none"); ++ + assert(m_deint); + assert(m_dec_output == m_deint->output[0]); + +@@ -526,6 +541,8 @@ bool CMMALVideo::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options) + if (!CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEMMAL) || hints.software) + return false; + ++ m_processInfo.SetVideoDeintMethod("none"); ++ + m_hints = hints; + m_renderer = (CMMALRenderer *)options.m_opaque_pointer; + MMAL_STATUS_T status; +@@ -732,6 +749,8 @@ bool CMMALVideo::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options) + m_preroll = !m_hints.stills; + m_speed = DVD_PLAYSPEED_NORMAL; + ++ m_processInfo.SetVideoDecoderName(m_pFormatName, true); ++ + return true; + } + +diff --git a/xbmc/cores/omxplayer/OMXAudioCodecOMX.cpp b/xbmc/cores/omxplayer/OMXAudioCodecOMX.cpp +index 20f706c..d8cef9c 100644 +--- a/xbmc/cores/omxplayer/OMXAudioCodecOMX.cpp ++++ b/xbmc/cores/omxplayer/OMXAudioCodecOMX.cpp +@@ -33,7 +33,7 @@ + #define AUDIO_DECODE_OUTPUT_BUFFER (32*1024) + static const char rounded_up_channels_shift[] = {0,0,1,2,2,3,3,3,3}; + +-COMXAudioCodecOMX::COMXAudioCodecOMX() ++COMXAudioCodecOMX::COMXAudioCodecOMX(CProcessInfo &processInfo) : m_processInfo(processInfo) + { + m_pBufferOutput = NULL; + m_iBufferOutputAlloced = 0; +@@ -134,6 +134,7 @@ bool COMXAudioCodecOMX::Open(CDVDStreamInfo &hints) + + m_iSampleFormat = AV_SAMPLE_FMT_NONE; + m_desiredSampleFormat = m_pCodecContext->sample_fmt == AV_SAMPLE_FMT_S16 ? AV_SAMPLE_FMT_S16 : AV_SAMPLE_FMT_FLTP; ++ m_processInfo.SetAudioDecoderName(m_pCodecContext->codec->name); + return true; + } + +diff --git a/xbmc/cores/omxplayer/OMXAudioCodecOMX.h b/xbmc/cores/omxplayer/OMXAudioCodecOMX.h +index c06a323..3b2a0f3 100644 +--- a/xbmc/cores/omxplayer/OMXAudioCodecOMX.h ++++ b/xbmc/cores/omxplayer/OMXAudioCodecOMX.h +@@ -31,11 +31,12 @@ extern "C" { + + #include "DVDStreamInfo.h" + #include "linux/PlatformDefs.h" ++#include "cores/VideoPlayer/Process/ProcessInfo.h" + + class COMXAudioCodecOMX + { + public: +- COMXAudioCodecOMX(); ++ COMXAudioCodecOMX(CProcessInfo &processInfo); + virtual ~COMXAudioCodecOMX(); + bool Open(CDVDStreamInfo &hints); + void Dispose(); +@@ -52,6 +53,7 @@ class COMXAudioCodecOMX + unsigned int GetFrameSize() { return m_frameSize; } + + protected: ++ CProcessInfo &m_processInfo; + AVCodecContext* m_pCodecContext; + SwrContext* m_pConvert; + enum AVSampleFormat m_iSampleFormat; +diff --git a/xbmc/cores/omxplayer/OMXPlayerAudio.cpp b/xbmc/cores/omxplayer/OMXPlayerAudio.cpp +index 6161962..1e5d2b9 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerAudio.cpp ++++ b/xbmc/cores/omxplayer/OMXPlayerAudio.cpp +@@ -43,6 +43,7 @@ + #include "linux/RBP.h" + #include "cores/AudioEngine/AEFactory.h" + #include "cores/DataCacheCore.h" ++#include "ServiceBroker.h" + + #include + #include +@@ -101,7 +102,7 @@ bool OMXPlayerAudio::OpenStream(CDVDStreamInfo &hints) + m_bad_state = false; + + m_processInfo.ResetAudioCodecInfo(); +- COMXAudioCodecOMX *codec = new COMXAudioCodecOMX(); ++ COMXAudioCodecOMX *codec = new COMXAudioCodecOMX(m_processInfo); + + if(!codec || !codec->Open(hints)) + { +@@ -143,7 +144,7 @@ void OMXPlayerAudio::OpenStream(CDVDStreamInfo &hints, COMXAudioCodecOMX *codec) + m_format.m_sampleRate = 0; + m_format.m_channelLayout = 0; + +- g_dataCacheCore.SignalAudioInfoChange(); ++ CServiceBroker::GetDataCacheCore().SignalAudioInfoChange(); + } + + void OMXPlayerAudio::CloseStream(bool bWaitForBuffers) +@@ -188,6 +189,7 @@ bool OMXPlayerAudio::CodecChange() + { + m_hints.channels = m_pAudioCodec->GetChannels(); + m_hints.samplerate = m_pAudioCodec->GetSampleRate(); ++ m_hints.bitspersample = m_pAudioCodec->GetBitsPerSample(); + } + + /* only check bitrate changes on AV_CODEC_ID_DTS, AV_CODEC_ID_AC3, AV_CODEC_ID_EAC3 */ +@@ -204,7 +206,11 @@ bool OMXPlayerAudio::CodecChange() + (!m_passthrough && minor_change) || !m_DecoderOpen) + { + m_hints_current = m_hints; +- g_dataCacheCore.SignalAudioInfoChange(); ++ ++ m_processInfo.SetAudioSampleRate(m_hints.samplerate); ++ m_processInfo.SetAudioBitsPerSample(m_hints.bitspersample); ++ ++ CServiceBroker::GetDataCacheCore().SignalAudioInfoChange(); + return true; + } + +@@ -562,11 +568,23 @@ bool OMXPlayerAudio::OpenDecoder() + + CAEChannelInfo channelMap; + if (m_pAudioCodec && !m_passthrough) ++ { + channelMap = m_pAudioCodec->GetChannelMap(); ++ } + else if (m_passthrough) ++ { + // we just want to get the channel count right to stop OMXAudio.cpp rejecting stream + // the actual layout is not used + channelMap = AE_CH_LAYOUT_5_1; ++ ++ if (m_hints.codec == AV_CODEC_ID_AC3) ++ m_processInfo.SetAudioDecoderName("PT_AC3"); ++ else if (m_hints.codec == AV_CODEC_ID_EAC3) ++ m_processInfo.SetAudioDecoderName("PT_EAC3"); ++ else ++ m_processInfo.SetAudioDecoderName("PT_DTS"); ++ } ++ m_processInfo.SetAudioChannels(channelMap); + bool bAudioRenderOpen = m_omxAudio.Initialize(m_format, m_av_clock, m_hints, channelMap, m_passthrough); + + m_codec_name = ""; +diff --git a/xbmc/cores/omxplayer/OMXPlayerVideo.cpp b/xbmc/cores/omxplayer/OMXPlayerVideo.cpp +index 0ec7f15..6efd0d5 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerVideo.cpp ++++ b/xbmc/cores/omxplayer/OMXPlayerVideo.cpp +@@ -73,8 +73,7 @@ OMXPlayerVideo::OMXPlayerVideo(OMXClock *av_clock, + : CThread("OMXPlayerVideo") + , IDVDStreamPlayerVideo(processInfo) + , m_messageQueue("video") +-, m_omxVideo(renderManager) +-, m_codecname("") ++, m_omxVideo(renderManager, processInfo) + , m_messageParent(parent) + , m_renderManager(renderManager) + { +@@ -471,7 +470,7 @@ void OMXPlayerVideo::Process() + + if (m_syncState == IDVDStreamPlayer::SYNC_STARTING && !bRequestDrop && settings_changed) + { +- m_codecname = m_omxVideo.GetDecoderName(); ++ m_processInfo.SetVideoDecoderName(m_omxVideo.GetDecoderName(), true); + m_syncState = IDVDStreamPlayer::SYNC_WAITSYNC; + SStartMsg msg; + msg.player = VideoPlayer_VIDEO; +@@ -548,7 +547,7 @@ bool OMXPlayerVideo::OpenDecoder() + CLog::Log(LOGINFO, "OMXPlayerVideo::OpenDecoder : Video codec %s width %d height %d profile %d fps %f\n", + m_omxVideo.GetDecoderName().c_str() , m_hints.width, m_hints.height, m_hints.profile, m_fFrameRate); + +- m_codecname = m_omxVideo.GetDecoderName(); ++ m_processInfo.SetVideoDecoderName(m_omxVideo.GetDecoderName(), true); + } + + return bVideoDecoderOpen; +@@ -710,7 +709,7 @@ void OMXPlayerVideo::ResolutionUpdateCallBack(uint32_t width, uint32_t height, f + } + + m_processInfo.SetVideoDimensions(width, height); +- m_processInfo.SetVideoAspectRatio(display_aspect); ++ m_processInfo.SetVideoDAR(display_aspect); + + unsigned int iDisplayWidth = width; + unsigned int iDisplayHeight = height; +@@ -722,6 +721,7 @@ void OMXPlayerVideo::ResolutionUpdateCallBack(uint32_t width, uint32_t height, f + iDisplayWidth = (int) (iDisplayHeight * display_aspect); + + m_fFrameRate = DVD_TIME_BASE / CDVDCodecUtils::NormalizeFrameduration((double)DVD_TIME_BASE / framerate); ++ m_processInfo.SetVideoFps(m_fFrameRate); + + CLog::Log(LOGDEBUG,"%s - change configuration. video:%dx%d. framerate: %4.2f. %dx%d format: BYPASS", + __FUNCTION__, video_width, video_height, m_fFrameRate, iDisplayWidth, iDisplayHeight); +diff --git a/xbmc/cores/omxplayer/OMXVideo.cpp b/xbmc/cores/omxplayer/OMXVideo.cpp +index 4c165bf..b2bb0a8 100644 +--- a/xbmc/cores/omxplayer/OMXVideo.cpp ++++ b/xbmc/cores/omxplayer/OMXVideo.cpp +@@ -65,8 +65,9 @@ + + #define MAX_TEXT_LENGTH 1024 + +-COMXVideo::COMXVideo(CRenderManager& renderManager) : m_video_codec_name("") ++COMXVideo::COMXVideo(CRenderManager& renderManager, CProcessInfo &processInfo) : m_video_codec_name("") + , m_renderManager(renderManager) ++, m_processInfo(processInfo) + { + m_is_open = false; + m_extradata = NULL; +@@ -244,6 +245,19 @@ bool COMXVideo::PortSettingsChanged(ResolutionUpdateInfo &resinfo) + EINTERLACEMETHOD interlace_method = m_renderManager.AutoInterlaceMethod(CMediaSettings::GetInstance().GetCurrentVideoSettings().m_InterlaceMethod); + bool advanced_deinterlace = interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED || interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED_HALF; + bool half_framerate = interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED_HALF || interlace_method == VS_INTERLACEMETHOD_MMAL_BOB_HALF; ++ ++ if (advanced_deinterlace && !half_framerate) ++ m_processInfo.SetVideoDeintMethod("adv(x2)"); ++ else if (advanced_deinterlace && half_framerate) ++ m_processInfo.SetVideoDeintMethod("adv(x1)"); ++ else if (!advanced_deinterlace && !half_framerate) ++ m_processInfo.SetVideoDeintMethod("bob(x2)"); ++ else if (!advanced_deinterlace && half_framerate) ++ m_processInfo.SetVideoDeintMethod("bob(x1)"); ++ ++ if (!half_framerate) ++ resinfo.framerate *= 2.0f; ++ + if (!advanced_deinterlace) + { + // Image_fx assumed 3 frames of context. simple deinterlace doesn't require this +@@ -280,6 +294,10 @@ bool COMXVideo::PortSettingsChanged(ResolutionUpdateInfo &resinfo) + return false; + } + } ++ else ++ { ++ m_processInfo.SetVideoDeintMethod("none"); ++ } + + if(m_deinterlace) + { +diff --git a/xbmc/cores/omxplayer/OMXVideo.h b/xbmc/cores/omxplayer/OMXVideo.h +index 46e79cb..fd101e7 100644 +--- a/xbmc/cores/omxplayer/OMXVideo.h ++++ b/xbmc/cores/omxplayer/OMXVideo.h +@@ -34,6 +34,7 @@ + #include "threads/CriticalSection.h" + #include "xbmc/rendering/RenderSystem.h" + #include "cores/VideoPlayer/VideoRenderers/RenderManager.h" ++#include "cores/VideoPlayer/Process/ProcessInfo.h" + #include + + #define VIDEO_BUFFERS 60 +@@ -53,7 +54,7 @@ struct ResolutionUpdateInfo { + class COMXVideo + { + public: +- COMXVideo(CRenderManager& renderManager); ++ COMXVideo(CRenderManager& renderManager, CProcessInfo &processInfo); + ~COMXVideo(); + + // Required overrides +@@ -112,6 +113,7 @@ class COMXVideo + OMX_DISPLAYTRANSFORMTYPE m_transform; + bool m_settings_changed; + CRenderManager& m_renderManager; ++ CProcessInfo& m_processInfo; + static bool NaluFormatStartCodes(enum AVCodecID codec, uint8_t *in_extradata, int in_extrasize); + CCriticalSection m_critSection; + }; diff --git a/projects/RPi/patches/kodi/kodi-001-backport.patch b/projects/RPi/patches/kodi/kodi-001-backport.patch new file mode 100644 index 0000000000..1f88c2f0db --- /dev/null +++ b/projects/RPi/patches/kodi/kodi-001-backport.patch @@ -0,0 +1,80614 @@ +From 1c8dd52e7185c555335c927aa16102e7b758e54d Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Tue, 5 May 2015 17:27:39 +0100 +Subject: [PATCH 01/67] build: Allow installed links to be overwritten + +--- + tools/depends/target/Makefile | 72 +++++++++++++++++++-------------------- + tools/depends/xbmc-addons.include | 36 ++++++++++---------- + 2 files changed, 54 insertions(+), 54 deletions(-) + +diff --git a/tools/depends/target/Makefile b/tools/depends/target/Makefile +index 2f0c83141034374ec5742a96a282391405ec8125..89f24641fd341336545fbdc6024e88eaacc805e7 100644 +--- a/tools/depends/target/Makefile ++++ b/tools/depends/target/Makefile +@@ -129,41 +129,41 @@ distclean:: + for d in $(DEPENDS); do $(MAKE) -C $$d distclean; done + + linux-system-libs-egl: +- [ -f $(PREFIX)/lib/pkgconfig/egl.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/egl.pc $(PREFIX)/lib/pkgconfig/egl.pc +- [ -f $(PREFIX)/lib/pkgconfig/damageproto.pc ] || ln -s /usr/share/pkgconfig/damageproto.pc $(PREFIX)/lib/pkgconfig/damageproto.pc +- [ -f $(PREFIX)/lib/pkgconfig/fixesproto.pc ] || ln -s /usr/share/pkgconfig/fixesproto.pc $(PREFIX)/lib/pkgconfig/fixesproto.pc +- [ -f $(PREFIX)/lib/pkgconfig/x11-xcb.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/x11-xcb.pc $(PREFIX)/lib/pkgconfig/x11-xcb.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-dri2.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-dri2.pc $(PREFIX)/lib/pkgconfig/xcb-dri2.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-dri3.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-dri3.pc $(PREFIX)/lib/pkgconfig/xcb-dri3.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-glx.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-glx.pc $(PREFIX)/lib/pkgconfig/xcb-glx.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-xfixes.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-xfixes.pc $(PREFIX)/lib/pkgconfig/xcb-xfixes.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-present.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-present.pc $(PREFIX)/lib/pkgconfig/xcb-present.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-randr.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-randr.pc $(PREFIX)/lib/pkgconfig/xcb-randr.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-render.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-render.pc $(PREFIX)/lib/pkgconfig/xcb-render.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-shape.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-shape.pc $(PREFIX)/lib/pkgconfig/xcb-shape.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-sync.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-sync.pc $(PREFIX)/lib/pkgconfig/xcb-sync.pc +- [ -f $(PREFIX)/lib/pkgconfig/xdamage.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xdamage.pc $(PREFIX)/lib/pkgconfig/xdamage.pc +- [ -f $(PREFIX)/lib/pkgconfig/xf86vidmodeproto.pc ] || ln -s /usr/share/pkgconfig/xf86vidmodeproto.pc $(PREFIX)/lib/pkgconfig/xf86vidmodeproto.pc +- [ -f $(PREFIX)/lib/pkgconfig/xfixes.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xfixes.pc $(PREFIX)/lib/pkgconfig/xfixes.pc +- [ -f $(PREFIX)/lib/pkgconfig/xshmfence.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xshmfence.pc $(PREFIX)/lib/pkgconfig/xshmfence.pc +- [ -f $(PREFIX)/lib/pkgconfig/xxf86vm.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xxf86vm.pc $(PREFIX)/lib/pkgconfig/xxf86vm.pc ++ [ -f $(PREFIX)/lib/pkgconfig/egl.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/egl.pc $(PREFIX)/lib/pkgconfig/egl.pc ++ [ -f $(PREFIX)/lib/pkgconfig/damageproto.pc ] || ln -sf /usr/share/pkgconfig/damageproto.pc $(PREFIX)/lib/pkgconfig/damageproto.pc ++ [ -f $(PREFIX)/lib/pkgconfig/fixesproto.pc ] || ln -sf /usr/share/pkgconfig/fixesproto.pc $(PREFIX)/lib/pkgconfig/fixesproto.pc ++ [ -f $(PREFIX)/lib/pkgconfig/x11-xcb.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/x11-xcb.pc $(PREFIX)/lib/pkgconfig/x11-xcb.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-dri2.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-dri2.pc $(PREFIX)/lib/pkgconfig/xcb-dri2.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-dri3.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-dri3.pc $(PREFIX)/lib/pkgconfig/xcb-dri3.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-glx.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-glx.pc $(PREFIX)/lib/pkgconfig/xcb-glx.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-xfixes.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-xfixes.pc $(PREFIX)/lib/pkgconfig/xcb-xfixes.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-present.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-present.pc $(PREFIX)/lib/pkgconfig/xcb-present.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-randr.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-randr.pc $(PREFIX)/lib/pkgconfig/xcb-randr.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-render.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-render.pc $(PREFIX)/lib/pkgconfig/xcb-render.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-shape.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-shape.pc $(PREFIX)/lib/pkgconfig/xcb-shape.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-sync.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-sync.pc $(PREFIX)/lib/pkgconfig/xcb-sync.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xdamage.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xdamage.pc $(PREFIX)/lib/pkgconfig/xdamage.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xf86vidmodeproto.pc ] || ln -sf /usr/share/pkgconfig/xf86vidmodeproto.pc $(PREFIX)/lib/pkgconfig/xf86vidmodeproto.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xfixes.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xfixes.pc $(PREFIX)/lib/pkgconfig/xfixes.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xshmfence.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xshmfence.pc $(PREFIX)/lib/pkgconfig/xshmfence.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xxf86vm.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xxf86vm.pc $(PREFIX)/lib/pkgconfig/xxf86vm.pc + + linux-system-libs: linux-system-libs-egl +- [ -f $(PREFIX)/lib/pkgconfig/x11.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/x11.pc $(PREFIX)/lib/pkgconfig/x11.pc +- [ -f $(PREFIX)/lib/pkgconfig/xproto.pc ] || ln -s /usr/share/pkgconfig/xproto.pc $(PREFIX)/lib/pkgconfig/xproto.pc +- [ -f $(PREFIX)/lib/pkgconfig/kbproto.pc ] || ln -s /usr/share/pkgconfig/kbproto.pc $(PREFIX)/lib/pkgconfig/kbproto.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb.pc $(PREFIX)/lib/pkgconfig/xcb.pc +- [ -f $(PREFIX)/lib/pkgconfig/pthread-stubs.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/pthread-stubs.pc $(PREFIX)/lib/pkgconfig/pthread-stubs.pc +- [ -f $(PREFIX)/lib/pkgconfig/xau.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xau.pc $(PREFIX)/lib/pkgconfig/xau.pc +- [ -f $(PREFIX)/lib/pkgconfig/xdmcp.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xdmcp.pc $(PREFIX)/lib/pkgconfig/xdmcp.pc +- [ -f $(PREFIX)/lib/pkgconfig/xext.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xext.pc $(PREFIX)/lib/pkgconfig/xext.pc +- [ -f $(PREFIX)/lib/pkgconfig/xextproto.pc ] || ln -s /usr/share/pkgconfig/xextproto.pc $(PREFIX)/lib/pkgconfig/xextproto.pc +- [ -f $(PREFIX)/lib/pkgconfig/xrandr.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xrandr.pc $(PREFIX)/lib/pkgconfig/xrandr.pc +- [ -f $(PREFIX)/lib/pkgconfig/xrender.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xrender.pc $(PREFIX)/lib/pkgconfig/xrender.pc +- [ -f $(PREFIX)/lib/pkgconfig/randrproto.pc ] || ln -s /usr/share/pkgconfig/randrproto.pc $(PREFIX)/lib/pkgconfig/randrproto.pc +- [ -f $(PREFIX)/lib/pkgconfig/renderproto.pc ] || ln -s /usr/share/pkgconfig/renderproto.pc $(PREFIX)/lib/pkgconfig/renderproto.pc +- [ -f $(PREFIX)/lib/pkgconfig/xt.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xt.pc $(PREFIX)/lib/pkgconfig/xt.pc +- [ -f $(PREFIX)/lib/pkgconfig/ice.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/ice.pc $(PREFIX)/lib/pkgconfig/ice.pc +- [ -f $(PREFIX)/lib/pkgconfig/sm.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/sm.pc $(PREFIX)/lib/pkgconfig/sm.pc +- [ -f $(PREFIX)/lib/pkgconfig/xmu.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xmu.pc $(PREFIX)/lib/pkgconfig/xmu.pc +- [ -f $(PREFIX)/lib/pkgconfig/libdrm.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/libdrm.pc $(PREFIX)/lib/pkgconfig/libdrm.pc ++ [ -f $(PREFIX)/lib/pkgconfig/x11.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/x11.pc $(PREFIX)/lib/pkgconfig/x11.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xproto.pc ] || ln -sf /usr/share/pkgconfig/xproto.pc $(PREFIX)/lib/pkgconfig/xproto.pc ++ [ -f $(PREFIX)/lib/pkgconfig/kbproto.pc ] || ln -sf /usr/share/pkgconfig/kbproto.pc $(PREFIX)/lib/pkgconfig/kbproto.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb.pc $(PREFIX)/lib/pkgconfig/xcb.pc ++ [ -f $(PREFIX)/lib/pkgconfig/pthread-stubs.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/pthread-stubs.pc $(PREFIX)/lib/pkgconfig/pthread-stubs.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xau.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xau.pc $(PREFIX)/lib/pkgconfig/xau.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xdmcp.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xdmcp.pc $(PREFIX)/lib/pkgconfig/xdmcp.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xext.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xext.pc $(PREFIX)/lib/pkgconfig/xext.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xextproto.pc ] || ln -sf /usr/share/pkgconfig/xextproto.pc $(PREFIX)/lib/pkgconfig/xextproto.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xrandr.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xrandr.pc $(PREFIX)/lib/pkgconfig/xrandr.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xrender.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xrender.pc $(PREFIX)/lib/pkgconfig/xrender.pc ++ [ -f $(PREFIX)/lib/pkgconfig/randrproto.pc ] || ln -sf /usr/share/pkgconfig/randrproto.pc $(PREFIX)/lib/pkgconfig/randrproto.pc ++ [ -f $(PREFIX)/lib/pkgconfig/renderproto.pc ] || ln -sf /usr/share/pkgconfig/renderproto.pc $(PREFIX)/lib/pkgconfig/renderproto.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xt.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xt.pc $(PREFIX)/lib/pkgconfig/xt.pc ++ [ -f $(PREFIX)/lib/pkgconfig/ice.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/ice.pc $(PREFIX)/lib/pkgconfig/ice.pc ++ [ -f $(PREFIX)/lib/pkgconfig/sm.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/sm.pc $(PREFIX)/lib/pkgconfig/sm.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xmu.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xmu.pc $(PREFIX)/lib/pkgconfig/xmu.pc ++ [ -f $(PREFIX)/lib/pkgconfig/libdrm.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/libdrm.pc $(PREFIX)/lib/pkgconfig/libdrm.pc +diff --git a/tools/depends/xbmc-addons.include b/tools/depends/xbmc-addons.include +index e5cb842d9f61578efe5df95dfa3a938cf5346663..3ddba3cefb1ca785f7a17c72f42aacbbaada7b6c 100644 +--- a/tools/depends/xbmc-addons.include ++++ b/tools/depends/xbmc-addons.include +@@ -77,23 +77,23 @@ $(TOOLCHAIN_FILE): $(abs_top_srcdir)/target/Toolchain_binaddons.cmake + + linux-system-libs: + mkdir -p $(ADDON_DEPS_DIR)/lib/pkgconfig $(ADDON_DEPS_DIR)/include +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/x11.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/x*.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/ +- [ -f $(ADDON_DEPS_DIR)/lib/libX11.so ] || ln -s /usr/lib/$(HOST)/libX11.so* $(ADDON_DEPS_DIR)/lib/ +- [ -L $(ADDON_DEPS_DIR)/include/X11 ] || ln -s /usr/include/X11 $(ADDON_DEPS_DIR)/include/X11 +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/xproto.pc ] || ln -s /usr/share/pkgconfig/x*.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/ +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/kbproto.pc ] || ln -s /usr/share/pkgconfig/kbproto.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/kbproto.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/damageproto.pc ] || ln -s /usr/share/pkgconfig/damageproto.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/damageproto.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/fixesproto.pc ] || ln -s /usr/share/pkgconfig/fixesproto.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/fixesproto.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/pthread-stubs.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/pthread-stubs.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/pthread-stubs.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/ice.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/ice.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/ice.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/sm.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/sm.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/sm.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/libdrm.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/libdrm.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/libdrm.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/gl.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/gl.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/gl.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/glu.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/glu.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/glu.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/glew.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/glew.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/glew.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/x11.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/x*.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/ ++ [ -f $(ADDON_DEPS_DIR)/lib/libX11.so ] || ln -sf /usr/lib/$(HOST)/libX11.so* $(ADDON_DEPS_DIR)/lib/ ++ [ -L $(ADDON_DEPS_DIR)/include/X11 ] || ln -sf /usr/include/X11 $(ADDON_DEPS_DIR)/include/X11 ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/xproto.pc ] || ln -sf /usr/share/pkgconfig/x*.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/ ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/kbproto.pc ] || ln -sf /usr/share/pkgconfig/kbproto.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/kbproto.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/damageproto.pc ] || ln -sf /usr/share/pkgconfig/damageproto.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/damageproto.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/fixesproto.pc ] || ln -sf /usr/share/pkgconfig/fixesproto.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/fixesproto.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/pthread-stubs.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/pthread-stubs.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/pthread-stubs.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/ice.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/ice.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/ice.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/sm.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/sm.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/sm.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/libdrm.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/libdrm.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/libdrm.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/gl.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/gl.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/gl.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/glu.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/glu.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/glu.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/glew.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/glew.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/glew.pc + [ -f $(ADDON_DEPS_DIR)/lib/libGL.so ] || \ +- (ln -s /usr/lib/$(HOST)/mesa $(ADDON_DEPS_DIR)/lib/mesa && ln -s $(ADDON_DEPS_DIR)/lib/mesa/libGL.so $(ADDON_DEPS_DIR)/lib/libGL.so) +- [ -f $(ADDON_DEPS_DIR)/lib/libGLEW.so ] || ln -s /usr/lib/$(HOST)/libGLEW.so* $(ADDON_DEPS_DIR)/lib/ +- [ -L $(ADDON_DEPS_DIR)/include/GL ] || ln -s /usr/include/GL $(ADDON_DEPS_DIR)/include/GL +- [ -f $(ADDON_DEPS_DIR)/lib/libm.so ] || ln -s /usr/lib/$(HOST)/libm.so $(ADDON_DEPS_DIR)/lib/ ++ (ln -sf /usr/lib/$(HOST)/mesa $(ADDON_DEPS_DIR)/lib/mesa && ln -sf $(ADDON_DEPS_DIR)/lib/mesa/libGL.so $(ADDON_DEPS_DIR)/lib/libGL.so) ++ [ -f $(ADDON_DEPS_DIR)/lib/libGLEW.so ] || ln -sf /usr/lib/$(HOST)/libGLEW.so* $(ADDON_DEPS_DIR)/lib/ ++ [ -L $(ADDON_DEPS_DIR)/include/GL ] || ln -sf /usr/include/GL $(ADDON_DEPS_DIR)/include/GL ++ [ -f $(ADDON_DEPS_DIR)/lib/libm.so ] || ln -sf /usr/lib/$(HOST)/libm.so $(ADDON_DEPS_DIR)/lib/ + + +From 9e113927dc8591c51d7cebc3e13d97c5db19f1d4 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Mon, 7 Apr 2014 18:19:32 +0100 +Subject: [PATCH 02/67] [rbp/omxplayer] When opening a stream don't try to + update gui so often + +--- + xbmc/dialogs/GUIDialogBusy.cpp | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/xbmc/dialogs/GUIDialogBusy.cpp b/xbmc/dialogs/GUIDialogBusy.cpp +index 8ea5161637b4e66ddd222859f058521dbc8922b9..811019a39a10acc21b83f0b0c70d5500055e7f98 100644 +--- a/xbmc/dialogs/GUIDialogBusy.cpp ++++ b/xbmc/dialogs/GUIDialogBusy.cpp +@@ -69,7 +69,11 @@ bool CGUIDialogBusy::WaitOnEvent(CEvent &event, unsigned int displaytime /* = 10 + { + dialog->Open(); + ++#ifdef TARGET_RASPBERRY_PI ++ while(!event.WaitMSec(100)) ++#else + while(!event.WaitMSec(1)) ++#endif + { + dialog->ProcessRenderLoop(false); + if (allowCancel && dialog->IsCanceled()) + +From 13bfba5171501299fc0d21ef4c5b1407807242e2 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Sat, 8 Mar 2014 15:36:06 +0000 +Subject: [PATCH 03/67] [hifiberry] Hack: force it to be recognised as IEC958 + capable to enable passthrough options + +--- + xbmc/cores/AudioEngine/Sinks/AESinkALSA.cpp | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/xbmc/cores/AudioEngine/Sinks/AESinkALSA.cpp b/xbmc/cores/AudioEngine/Sinks/AESinkALSA.cpp +index 6a9066b2dbe8d505d636b3638c1d35c7c8a698ed..9c6ac5d4cc9bf21b2d48619cc6fb5d274f1c3928 100644 +--- a/xbmc/cores/AudioEngine/Sinks/AESinkALSA.cpp ++++ b/xbmc/cores/AudioEngine/Sinks/AESinkALSA.cpp +@@ -1352,6 +1352,10 @@ void CAESinkALSA::EnumerateDevice(AEDeviceInfoList &list, const std::string &dev + if (snd_card_get_name(cardNr, &cardName) == 0) + info.m_displayName = cardName; + ++ // hack: hifiberry digi doesn't correctly report as iec958 device. Needs fixing in kernel driver ++ if (info.m_displayName == "snd_rpi_hifiberry_digi") ++ info.m_deviceType = AE_DEVTYPE_IEC958; ++ + if (info.m_deviceType == AE_DEVTYPE_HDMI && info.m_displayName.size() > 5 && + info.m_displayName.substr(info.m_displayName.size()-5) == " HDMI") + { + +From c89b8b2588ffc2fb3022bb2debc09648e66f01d1 Mon Sep 17 00:00:00 2001 +From: Ben Avison +Date: Thu, 1 May 2014 16:28:39 +0100 +Subject: [PATCH 04/67] Improved file buffering in CArchive + +Even though memcpy is typically inlined by the compiler into byte/word loads +and stores (at least for release builds), the frequency with which 1, 2 and 4 +byte loads/stores are encountered in cases where the size is *not* +determinable at compile time is still high enough that it's worth handling +these specially. On the ARM1176JZF-S in the Raspberry Pi, this improves the +total time to open a library (in the case where it's fetched from a CArchive) +by around 4%. + +It should be noted that this code uses 16-bit and 32-bit word loads and +stores that are not necessarily aligned to their respective widths. It is +possible that there are some architectures out there which do not support +this, although all ARMs since ARMv6 have supported it (and ARMs earlier than +that are probably not powerful enough to be good targets for XBMC). +--- + xbmc/utils/Archive.h | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/xbmc/utils/Archive.h b/xbmc/utils/Archive.h +index 6ed0f8fe37950306bb6ac369082dd024f032ab66..8506d9593de4c913a3c1469cf9cec89475d8dd30 100644 +--- a/xbmc/utils/Archive.h ++++ b/xbmc/utils/Archive.h +@@ -154,9 +154,17 @@ protected: + * than waiting until we attempt to put more data into an already full buffer */ + if (m_BufferRemain > size) + { ++ switch (size) ++ { ++ case 1: *m_BufferPos++ = *ptr; m_BufferRemain--; break; ++ case 2: *(uint16_t *) m_BufferPos = *(const uint16_t *) ptr; m_BufferPos += 2; m_BufferRemain -= 2; break; ++ case 4: *(uint32_t *) m_BufferPos = *(const uint32_t *) ptr; m_BufferPos += 4; m_BufferRemain -= 4; break; ++ default: + memcpy(m_BufferPos, ptr, size); + m_BufferPos += size; + m_BufferRemain -= size; ++ break; ++ } + return *this; + } + else +@@ -171,9 +179,17 @@ protected: + /* Note, refilling the buffer is deferred until we know we need to read more from it */ + if (m_BufferRemain >= size) + { ++ switch (size) ++ { ++ case 1: *ptr = *m_BufferPos++; m_BufferRemain--; break; ++ case 2: *(uint16_t *) ptr = *(const uint16_t *) m_BufferPos; m_BufferPos += 2; m_BufferRemain -= 2; break; ++ case 4: *(uint32_t *) ptr = *(const uint32_t *) m_BufferPos; m_BufferPos += 4; m_BufferRemain -= 4; break; ++ default: + memcpy(ptr, m_BufferPos, size); + m_BufferPos += size; + m_BufferRemain -= size; ++ break; ++ } + return *this; + } + else + +From afe3081bcf63939850a753200650570d04ed8aaa Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Sun, 10 Aug 2014 16:45:16 +0100 +Subject: [PATCH 05/67] filesystem: Make support of browsing into archives + optional + +The ability to browse, scan and play content in archives can cause problems on low powered/low memory devices. +It's quite common to see reports of a large rar file that causes xbmc to crash with an out-of-memory error when browsing or scanning. +It also can be slow as any archive in the directory is opened and extracted. + +This causes issues for people who scan library with archives disabled, then subsequently enable it. +The library has the .rar files in which don't play without removing and re-adding. + +We'll let people who don't use archives disable it manually +--- + addons/resource.language.en_gb/resources/strings.po | 9 +++++++++ + system/settings/rbp.xml | 11 +++++++++++ + xbmc/Util.cpp | 4 ++-- + xbmc/filesystem/FileDirectoryFactory.cpp | 4 ++++ + 4 files changed, 26 insertions(+), 2 deletions(-) + +diff --git a/addons/resource.language.en_gb/resources/strings.po b/addons/resource.language.en_gb/resources/strings.po +index f0cfe2bc13ec3f333af83df21d0185448896719b..8860129ce3d4fd3426f6ba65d0c8cb8df18be8b2 100644 +--- a/addons/resource.language.en_gb/resources/strings.po ++++ b/addons/resource.language.en_gb/resources/strings.po +@@ -19131,6 +19131,15 @@ msgstr "" + #: system/settings/rbp.xml + msgctxt "#38010" + msgid "GPU accelerated" ++ ++#: system/settings/settings.xml ++msgctxt "#38040" ++msgid "Support browsing into archives" ++msgstr "" ++ ++#: system/settings/settings.xml ++msgctxt "#38041" ++msgid "Allow viewing and playing files in archives (e.g. zip, rar)" + msgstr "" + + #. Setting #38011 "Show All Items entry" +diff --git a/system/settings/rbp.xml b/system/settings/rbp.xml +index 806eadf44d73cea70fdbd8b723770a7f828e0633..7e6e52f82fde4c91fdc004c4b4b46e86091bcc87 100644 +--- a/system/settings/rbp.xml ++++ b/system/settings/rbp.xml +@@ -99,4 +99,15 @@ + + + ++
++ ++ ++ ++ 1 ++ true ++ ++ ++ ++ ++
+ +diff --git a/xbmc/Util.cpp b/xbmc/Util.cpp +index b0de1c8f1046e094191f19ecd52334ddc6d1b4d1..446d8df2993423a2f80d88f82fbb7f767b11cf1b 100644 +--- a/xbmc/Util.cpp ++++ b/xbmc/Util.cpp +@@ -1773,7 +1773,7 @@ void CUtil::ScanPathsForAssociatedItems(const std::string& videoName, + URIUtils::RemoveExtension(strCandidate); + if (StringUtils::StartsWithNoCase(strCandidate, videoName)) + { +- if (URIUtils::IsRAR(pItem->GetPath()) || URIUtils::IsZIP(pItem->GetPath())) ++ if (CSettings::GetInstance().GetBool("filelists.browsearchives") && (URIUtils::IsRAR(pItem->GetPath()) || URIUtils::IsZIP(pItem->GetPath()))) + CUtil::ScanArchiveForAssociatedItems(pItem->GetPath(), "", item_exts, associatedFiles); + else + { +@@ -1783,7 +1783,7 @@ void CUtil::ScanPathsForAssociatedItems(const std::string& videoName, + } + else + { +- if (URIUtils::IsRAR(pItem->GetPath()) || URIUtils::IsZIP(pItem->GetPath())) ++ if (CSettings::GetInstance().GetBool("filelists.browsearchives") && (URIUtils::IsRAR(pItem->GetPath()) || URIUtils::IsZIP(pItem->GetPath()))) + CUtil::ScanArchiveForAssociatedItems(pItem->GetPath(), videoName, item_exts, associatedFiles); + } + } +diff --git a/xbmc/filesystem/FileDirectoryFactory.cpp b/xbmc/filesystem/FileDirectoryFactory.cpp +index a0fd0a9011e71f4af1535110c696b6ea5c4b37db..688b71a297c7c617c6764bfe6be157d727eb49d3 100644 +--- a/xbmc/filesystem/FileDirectoryFactory.cpp ++++ b/xbmc/filesystem/FileDirectoryFactory.cpp +@@ -40,6 +40,7 @@ + #include "playlists/PlayListFactory.h" + #include "Directory.h" + #include "File.h" ++#include "settings/Settings.h" + #include "FileItem.h" + #include "utils/StringUtils.h" + #include "URL.h" +@@ -116,6 +117,8 @@ IFileDirectory* CFileDirectoryFactory::Create(const CURL& url, CFileItem* pItem, + return NULL; + } + #endif ++ if (CSettings::GetInstance().GetBool("filelists.browsearchives")) ++ { + if (url.IsFileType("zip")) + { + CURL zipURL = URIUtils::CreateArchivePath("zip", url); +@@ -189,6 +192,7 @@ IFileDirectory* CFileDirectoryFactory::Create(const CURL& url, CFileItem* pItem, + } + return NULL; + } ++ } + if (url.IsFileType("xbt")) + { + CURL xbtUrl = URIUtils::CreateArchivePath("xbt", url); + +From b38f7abd72691bb2eb87892e6619a7eba7ebea77 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Mon, 27 Oct 2014 13:06:57 +0000 +Subject: [PATCH 06/67] [rbp] Make cachemembuffersize default depend on memory + size + +--- + xbmc/linux/RBP.cpp | 10 ++++++++++ + xbmc/linux/RBP.h | 1 + + xbmc/settings/AdvancedSettings.cpp | 12 +++++++++++- + 3 files changed, 22 insertions(+), 1 deletion(-) + +diff --git a/xbmc/linux/RBP.cpp b/xbmc/linux/RBP.cpp +index d101638cc38468c3d9673bc48f6603d414bcb7f5..ddbe27061f8192b7f6c830a4c22652a731537079 100644 +--- a/xbmc/linux/RBP.cpp ++++ b/xbmc/linux/RBP.cpp +@@ -23,6 +23,7 @@ + + #include + #include "settings/Settings.h" ++#include "settings/AdvancedSettings.h" + #include "utils/log.h" + + #include "cores/omxplayer/OMXImage.h" +@@ -58,6 +59,12 @@ CRBP::~CRBP() + delete m_DllBcmHost; + } + ++void CRBP::InitializeSettings() ++{ ++ if (m_initialized && g_advancedSettings.m_cacheMemSize == ~0U) ++ g_advancedSettings.m_cacheMemSize = m_arm_mem < 256 ? 1024 * 1024 * 2 : 1024 * 1024 * 20; ++} ++ + bool CRBP::Initialize() + { + CSingleLock lock(m_critSection); +@@ -97,6 +104,8 @@ bool CRBP::Initialize() + if (!m_gui_resolution_limit) + m_gui_resolution_limit = m_gpu_mem < 128 ? 720:1080; + ++ InitializeSettings(); ++ + g_OMXImage.Initialize(); + m_omx_image_init = true; + return true; +@@ -109,6 +118,7 @@ void CRBP::LogFirmwareVerison() + response[sizeof(response) - 1] = '\0'; + CLog::Log(LOGNOTICE, "Raspberry PI firmware version: %s", response); + CLog::Log(LOGNOTICE, "ARM mem: %dMB GPU mem: %dMB MPG2:%d WVC1:%d", m_arm_mem, m_gpu_mem, m_codec_mpg2_enabled, m_codec_wvc1_enabled); ++ CLog::Log(LOGNOTICE, "cache.memorysize: %dMB", g_advancedSettings.m_cacheMemSize >> 20); + m_DllBcmHost->vc_gencmd(response, sizeof response, "get_config int"); + response[sizeof(response) - 1] = '\0'; + CLog::Log(LOGNOTICE, "Config:\n%s", response); +diff --git a/xbmc/linux/RBP.h b/xbmc/linux/RBP.h +index a35a509a91483f13e2cf0e688fc7e9528f254290..fffa5182126159f6dfcf750b21fa0464e229e545 100644 +--- a/xbmc/linux/RBP.h ++++ b/xbmc/linux/RBP.h +@@ -62,6 +62,7 @@ public: + ~CRBP(); + + bool Initialize(); ++ void InitializeSettings(); + void LogFirmwareVerison(); + void Deinitialize(); + int GetArmMem() { return m_arm_mem; } +diff --git a/xbmc/settings/AdvancedSettings.cpp b/xbmc/settings/AdvancedSettings.cpp +index e7f13a73e5ce6d5fe9864fe76dccc9d3e1fdbc27..446293308010f3b8cd8d325fa6d0285fcc9f892d 100644 +--- a/xbmc/settings/AdvancedSettings.cpp ++++ b/xbmc/settings/AdvancedSettings.cpp +@@ -50,6 +50,9 @@ + #if defined(TARGET_DARWIN_IOS) + #include "platform/darwin/DarwinUtils.h" + #endif ++#if defined(TARGET_RASPBERRY_PI) ++#include "linux/RBP.h" ++#endif + + using namespace ADDON; + using namespace XFILE; +@@ -356,7 +359,12 @@ void CAdvancedSettings::Initialize() + m_bPVRAutoScanIconsUserSet = false; + m_iPVRNumericChannelSwitchTimeout = 1000; + ++#ifdef TARGET_RASPBERRY_PI ++ // want default to be memory dependent, but interface to gpu not available yet, so set in RBP.cpp ++ m_cacheMemSize = ~0; ++#else + m_cacheMemSize = 1024 * 1024 * 20; ++#endif + m_cacheBufferMode = CACHE_BUFFER_MODE_INTERNET; // Default (buffer all internet streams/filesystems) + // the following setting determines the readRate of a player data + // as multiply of the default data read rate +@@ -405,7 +413,9 @@ void CAdvancedSettings::Initialize() + m_extraLogLevels = 0; + + m_userAgent = g_sysinfo.GetUserAgent(); +- ++#ifdef TARGET_RASPBERRY_PI ++ g_RBP.InitializeSettings(); ++#endif + m_initialized = true; + } + + +From 444ff3630cfa2ff69f1f41150158175ed7d8a549 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Fri, 30 May 2014 14:58:43 +0100 +Subject: [PATCH 07/67] [settings] Experiment: Report DESKTOP resolution in + video settings + +--- + xbmc/settings/DisplaySettings.cpp | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/xbmc/settings/DisplaySettings.cpp b/xbmc/settings/DisplaySettings.cpp +index c1cca7efdd5d119b07308b947c569911f2a9bdc9..e03f3c8ef21ba824c0d707042e5a735ac74a86b8 100644 +--- a/xbmc/settings/DisplaySettings.cpp ++++ b/xbmc/settings/DisplaySettings.cpp +@@ -704,6 +704,9 @@ void CDisplaySettings::SettingOptionsResolutionsFiller(const CSetting *setting, + std::vector resolutions = g_Windowing.ScreenResolutions(info.iScreen, info.fRefreshRate); + for (std::vector::const_iterator resolution = resolutions.begin(); resolution != resolutions.end(); ++resolution) + { ++if (resolution->ResInfo_Index == RES_DESKTOP) ++ list.push_back(std::make_pair(StringUtils::Format("DESKTOP"), resolution->ResInfo_Index)); ++else + list.push_back(std::make_pair( + StringUtils::Format("%dx%d%s", resolution->width, resolution->height, + ModeFlagsToString(resolution->flags, false).c_str()), + +From 03a66653809c1494b57bc1644af53c1c111a4765 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Wed, 24 Sep 2014 23:13:52 +0100 +Subject: [PATCH 08/67] [audio] Add settings option to boost centre channel + when downmixing + +This allows a dB volume increase to be added to centre channel. +This can help improve dialgue in the presence of background music/effects. +It can go up to 30dB for testing purposes, but value of 6 is probably more reasonable. +It is recommended to ensure "Normalise levels on downmix" is enabled when boosting by large values to avoid clipping. + +Should work with Pi Sink (dvdplayer/paplayer) and omxplayer +--- + addons/resource.language.en_gb/resources/strings.po | 15 +++++++++++++++ + system/settings/settings.xml | 12 ++++++++++++ + .../Engines/ActiveAE/ActiveAEResampleFFMPEG.cpp | 7 +++++++ + .../AudioEngine/Engines/ActiveAE/ActiveAEResamplePi.cpp | 6 ++++++ + xbmc/cores/omxplayer/OMXAudio.cpp | 6 ++++++ + 5 files changed, 46 insertions(+) + +diff --git a/addons/resource.language.en_gb/resources/strings.po b/addons/resource.language.en_gb/resources/strings.po +index 8860129ce3d4fd3426f6ba65d0c8cb8df18be8b2..f646446b73b2e8a3a783b2e52b3257c6ad6da2bd 100644 +--- a/addons/resource.language.en_gb/resources/strings.po ++++ b/addons/resource.language.en_gb/resources/strings.po +@@ -19305,6 +19305,21 @@ msgstr "" + + #empty strings from id 38043 to 38099 + ++#: system/settings/settings.xml ++msgctxt "#38007" ++msgid "Boost centre channel when downmixing" ++msgstr "" ++ ++#: system/settings/settings.xml ++msgctxt "#38008" ++msgid "Increase this value to make the dialogue louder compared to background sounds when downmixing multichannel audio" ++msgstr "" ++ ++#: system/settings/settings.xml ++msgctxt "#38009" ++msgid "%i dB" ++msgstr "" ++ + #. Description of section #14200 "Player"" + #: system/settings/settings.xml + msgctxt "#38100" +diff --git a/system/settings/settings.xml b/system/settings/settings.xml +index c9d3c9dbe5bc5d41c8eb54babf78f9fe4046dd5c..2fad528a2f7ad57db8476c1879f853b8485d08e4 100644 +--- a/system/settings/settings.xml ++++ b/system/settings/settings.xml +@@ -2261,6 +2261,18 @@ + + + ++ ++ 2 ++ 0 ++ ++ 0 ++ 1 ++ 30 ++ ++ ++ 38009 ++ ++ + + HAS_AE_QUALITY_LEVELS + 2 +diff --git a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEResampleFFMPEG.cpp b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEResampleFFMPEG.cpp +index 0cef1c58fae68f5a74d9ca31073282eb13abb037..23cd1eb96c2515eb5022f5b0220e67785b8aa4de 100644 +--- a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEResampleFFMPEG.cpp ++++ b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEResampleFFMPEG.cpp +@@ -20,6 +20,7 @@ + + #include "cores/AudioEngine/Utils/AEUtil.h" + #include "ActiveAEResampleFFMPEG.h" ++#include "settings/Settings.h" + #include "utils/log.h" + + extern "C" { +@@ -104,6 +105,12 @@ bool CActiveAEResampleFFMPEG::Init(uint64_t dst_chan_layout, int dst_channels, i + { + av_opt_set_double(m_pContext, "rematrix_maxval", 1.0, 0); + } ++ int boost_center = CSettings::GetInstance().GetInt("audiooutput.boostcenter"); ++ if (boost_center) ++ { ++ float gain = pow(10.0f, ((float)(-3 + boost_center))/20.0f); ++ av_opt_set_double(m_pContext, "center_mix_level", gain, 0); ++ } + + if (remapLayout) + { +diff --git a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEResamplePi.cpp b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEResamplePi.cpp +index 78071493fca4756c6741d7085e35cbe2f27038e6..698a6ae1e2bc0cc9256caec42c0dcfb0893301b5 100644 +--- a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEResamplePi.cpp ++++ b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEResamplePi.cpp +@@ -164,6 +164,12 @@ bool CActiveAEResamplePi::Init(uint64_t dst_chan_layout, int dst_channels, int d + { + av_opt_set_double(m_pContext, "rematrix_maxval", 1.0, 0); + } ++ int boost_center = CSettings::GetInstance().GetInt("audiooutput.boostcenter"); ++ if (boost_center) ++ { ++ float gain = pow(10.0f, ((float)(-3 + boost_center))/20.0f); ++ av_opt_set_double(m_pContext, "center_mix_level", gain, 0); ++ } + + if (remapLayout) + { +diff --git a/xbmc/cores/omxplayer/OMXAudio.cpp b/xbmc/cores/omxplayer/OMXAudio.cpp +index f16b822ed7b4aebe18b5d339b3f71ee66e97c23f..993d4b33a294e88c2c004b7943895ba55558c2d0 100644 +--- a/xbmc/cores/omxplayer/OMXAudio.cpp ++++ b/xbmc/cores/omxplayer/OMXAudio.cpp +@@ -633,6 +633,12 @@ bool COMXAudio::Initialize(AEAudioFormat format, OMXClock *clock, CDVDStreamInfo + { + av_opt_set_double(m_pContext, "rematrix_maxval", 1.0, 0); + } ++ int boost_center = CSettings::GetInstance().GetInt("audiooutput.boostcenter"); ++ if (boost_center) ++ { ++ float gain = pow(10.0f, ((float)(-3 + boost_center))/20.0f); ++ av_opt_set_double(m_pContext, "center_mix_level", gain, 0); ++ } + + // stereo upmix + if (upmix && m_src_channels == 2 && m_dst_channels > 2) + +From db58404d482592303a170a3519ed43e552f3034a Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Mon, 27 Oct 2014 15:23:51 +0000 +Subject: [PATCH 09/67] [rbp] Default extract thumbnails to false + +It can take 80 seconds for a single file on a Pi. It can cause crashes with out-of-memory errors. +It genereates a lot of support issues. Best to default to disabled and let users enable it if they must +--- + system/settings/rbp.xml | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/system/settings/rbp.xml b/system/settings/rbp.xml +index 7e6e52f82fde4c91fdc004c4b4b46e86091bcc87..737ec4e0c7f0feb98a6dd008b53e238c41dde8af 100644 +--- a/system/settings/rbp.xml ++++ b/system/settings/rbp.xml +@@ -43,6 +43,12 @@ + + false + ++ ++ false ++ ++ ++ false ++ + + + + +From e2a04cad01c0fe85bec84480d05a58fe55f84bb2 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Thu, 27 Nov 2014 16:31:56 +0000 +Subject: [PATCH 10/67] [languageinvoker] Reduce priority of python threads + +--- + xbmc/interfaces/generic/LanguageInvokerThread.cpp | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/xbmc/interfaces/generic/LanguageInvokerThread.cpp b/xbmc/interfaces/generic/LanguageInvokerThread.cpp +index fcdd0633f30cd9595ae6cc4ed293677cdcb1f422..16f0c8916b5e0a9e90973d194cf2ebd12b5a81fd 100644 +--- a/xbmc/interfaces/generic/LanguageInvokerThread.cpp ++++ b/xbmc/interfaces/generic/LanguageInvokerThread.cpp +@@ -50,6 +50,11 @@ bool CLanguageInvokerThread::execute(const std::string &script, const std::vecto + m_args = arguments; + + Create(); ++ #ifdef TARGET_RASPBERRY_PI ++ /* low prio */ ++ SetPriority(GetPriority()-1); ++ #endif ++ + return true; + } + + +From e34bc9595b6b789d3b13165d7abcec3b25c83bfd Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Sat, 14 Dec 2013 16:55:05 +0000 +Subject: [PATCH 11/67] logging: Add microsecond timer to log messages + +--- + xbmc/utils/log.cpp | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +diff --git a/xbmc/utils/log.cpp b/xbmc/utils/log.cpp +index 3443f1293d86018830269ed992c90a4e69c0430c..d330320842243df6f5ff256e608dddfa946e8773 100644 +--- a/xbmc/utils/log.cpp ++++ b/xbmc/utils/log.cpp +@@ -24,6 +24,7 @@ + #include "threads/Thread.h" + #include "utils/StringUtils.h" + #include "CompileInfo.h" ++#include "utils/TimeUtils.h" + + static const char* const levelNames[] = + {"DEBUG", "INFO", "NOTICE", "WARNING", "ERROR", "SEVERE", "FATAL", "NONE"}; +@@ -198,19 +199,29 @@ void CLog::PrintDebugString(const std::string& line) + + bool CLog::WriteLogString(int logLevel, const std::string& logString) + { ++#if defined(TARGET_LINUX) ++ static const char* prefixFormat = "%02.2d:%02.2d:%02.2d %10.6f T:%" PRIu64" %7s: "; ++#else + static const char* prefixFormat = "%02.2d:%02.2d:%02.2d T:%" PRIu64" %7s: "; +- ++#endif + std::string strData(logString); + /* fixup newline alignment, number of spaces should equal prefix length */ + StringUtils::Replace(strData, "\n", "\n "); + + int hour, minute, second; + s_globals.m_platform.GetCurrentLocalTime(hour, minute, second); +- ++ ++#if defined(TARGET_LINUX) ++ float Now = CurrentHostCounter() * 1e-9; ++#endif ++ + strData = StringUtils::Format(prefixFormat, + hour, + minute, + second, ++#if defined(TARGET_LINUX) ++ Now, ++#endif + (uint64_t)CThread::GetCurrentThreadId(), + levelNames[logLevel]) + strData; + + +From 6a9154ceb989a8ca0f2c5f50c6746ade14125267 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Sat, 29 Nov 2014 15:25:16 +0000 +Subject: [PATCH 12/67] [rbp] hack: wait for splash to complete before changing + hdmi mode + +--- + xbmc/windowing/egl/EGLNativeTypeRaspberryPI.cpp | 52 +++++++++++++++++++++++++ + 1 file changed, 52 insertions(+) + +diff --git a/xbmc/windowing/egl/EGLNativeTypeRaspberryPI.cpp b/xbmc/windowing/egl/EGLNativeTypeRaspberryPI.cpp +index ee297700f8583dbb15cbe53baf8c887b36bd2ea0..bbe501d40c5e101f1d0d64b8b59b1928ae12d52f 100644 +--- a/xbmc/windowing/egl/EGLNativeTypeRaspberryPI.cpp ++++ b/xbmc/windowing/egl/EGLNativeTypeRaspberryPI.cpp +@@ -32,6 +32,9 @@ + #include "guilib/StereoscopicsManager.h" + #include "rendering/RenderSystem.h" + #include ++#ifdef TARGET_POSIX ++#include "linux/XTimeUtils.h" ++#endif + + #ifndef __VIDEOCORE4__ + #define __VIDEOCORE4__ +@@ -221,12 +224,61 @@ int CEGLNativeTypeRaspberryPI::AddUniqueResolution(RESOLUTION_INFO &res, std::ve + } + #endif + ++#include ++ ++pid_t proc_find(const char* name) ++{ ++ DIR* dir; ++ struct dirent* ent; ++ char buf[512]; ++ ++ long pid; ++ char pname[100] = {0,}; ++ char state; ++ FILE *fp=NULL; ++ ++ if (!(dir = opendir("/proc"))) { ++ perror("can't open /proc"); ++ return -1; ++ } ++ ++ while((ent = readdir(dir)) != NULL) { ++ long lpid = atol(ent->d_name); ++ if(lpid < 0) ++ continue; ++ snprintf(buf, sizeof(buf), "/proc/%ld/stat", lpid); ++ fp = fopen(buf, "r"); ++ ++ if (fp) { ++ if ( (fscanf(fp, "%ld (%[^)]) %c", &pid, pname, &state)) != 3 ){ ++ printf("fscanf failed \n"); ++ fclose(fp); ++ closedir(dir); ++ return -1; ++ } ++ if (!strcmp(pname, name)) { ++ fclose(fp); ++ closedir(dir); ++ return (pid_t)lpid; ++ } ++ fclose(fp); ++ } ++ } ++ ++ closedir(dir); ++ return -1; ++} ++ ++ + bool CEGLNativeTypeRaspberryPI::SetNativeResolution(const RESOLUTION_INFO &res) + { + #if defined(TARGET_RASPBERRY_PI) + if(!m_DllBcmHost || !m_nativeWindow) + return false; + ++ while (proc_find("hello_video.bin") >= 0) ++ Sleep(100); ++ + DestroyDispmaxWindow(); + + RENDER_STEREO_MODE stereo_mode = g_graphicsContext.GetStereoMode(); + +From 6aa85041e715484b032f9e905db8c65388acfe17 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Thu, 11 Dec 2014 17:00:57 +0000 +Subject: [PATCH 13/67] Fix for UI not showing both extractflags and + extractthumb + +--- + addons/resource.language.en_gb/resources/strings.po | 10 +++++++--- + system/settings/settings.xml | 4 ++-- + 2 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/addons/resource.language.en_gb/resources/strings.po b/addons/resource.language.en_gb/resources/strings.po +index f646446b73b2e8a3a783b2e52b3257c6ad6da2bd..f1100b4238139b15799ddf1dba86265a1eaa53f3 100644 +--- a/addons/resource.language.en_gb/resources/strings.po ++++ b/addons/resource.language.en_gb/resources/strings.po +@@ -12348,7 +12348,7 @@ msgstr "" + + #: system/settings/settings.xml + msgctxt "#20433" +-msgid "Extract thumbnails and video information" ++msgid "Extract video information from files" + msgstr "" + + #: xbmc/dialogs/GUIDialogSmartPlaylistRule.cpp +@@ -16783,7 +16783,7 @@ msgstr "" + #. Description of setting with label #20433 "Extract thumbnails and video information" + #: system/settings/settings.xml + msgctxt "#36178" +-msgid "Extract thumbnails and metadata information such as codec and aspect ratio from videos." ++msgid "Extract metadata information such as codec and aspect ratio from videos." + msgstr "" + + #. Description of setting with label #20419 "Replace file names with library titles" +@@ -16795,7 +16795,7 @@ msgstr "" + #. Description of setting with label #20433 "Extract thumbnails and video information" + #: system/settings/settings.xml + msgctxt "#36180" +-msgid "Extract thumbnails and information, such as codecs and aspect ratio, to display in library mode." ++msgid "Extract thumbnails, to display in library Mode." + msgstr "" + + #: system/settings/settings.xml +@@ -19413,3 +19413,7 @@ msgstr "" + msgctxt "#39003" + msgid "Accelerate h264" + msgstr "" ++ ++msgctxt "#38190" ++msgid "Extract thumbnails from video files" ++msgstr "" +diff --git a/system/settings/settings.xml b/system/settings/settings.xml +index 2fad528a2f7ad57db8476c1879f853b8485d08e4..ca7e8892606782e54d4883c5b2f0e6686b1ae280 100644 +--- a/system/settings/settings.xml ++++ b/system/settings/settings.xml +@@ -919,8 +919,8 @@ + true + + +- +- 4 ++ ++ 1 + true + + + +From 2900f0dc9fa9b7271efc13dfd219ee62a8737f6c Mon Sep 17 00:00:00 2001 +From: anaconda +Date: Thu, 11 Sep 2014 21:30:43 +0200 +Subject: [PATCH 14/67] Disable autoscrolling while on screensaver and while + opening streams. + +--- + xbmc/Application.cpp | 10 ++++++++++ + xbmc/Application.h | 2 ++ + xbmc/guilib/GUIFadeLabelControl.cpp | 4 +++- + xbmc/guilib/GUIFont.cpp | 4 ++++ + xbmc/guilib/GUILabel.cpp | 4 +++- + xbmc/guilib/GUITextBox.cpp | 3 ++- + 6 files changed, 24 insertions(+), 3 deletions(-) + +diff --git a/xbmc/Application.cpp b/xbmc/Application.cpp +index 39c5731cc13c028212c4776511ea978fa2cb6776..bf2f2d3e73cbc88ab9d89f91baa11f983f36ee10 100644 +--- a/xbmc/Application.cpp ++++ b/xbmc/Application.cpp +@@ -5198,3 +5198,13 @@ bool CApplication::NotifyActionListeners(const CAction &action) const + + return false; + } ++ ++bool CApplication::ScreenSaverDisablesAutoScrolling() ++{ ++ bool onBlackDimScreenSaver = IsInScreenSaver() && m_screenSaver && ++ (m_screenSaver->ID() == "screensaver.xbmc.builtin.black" || ++ m_screenSaver->ID() == "screensaver.xbmc.builtin.dim"); ++ bool openingStreams = m_pPlayer->IsPlaying() && g_windowManager.IsWindowActive(WINDOW_DIALOG_BUSY); ++ ++ return onBlackDimScreenSaver || openingStreams; ++} +diff --git a/xbmc/Application.h b/xbmc/Application.h +index 5d38663767a70875d9459a2f4a65979a203edc7b..1aca9fe67fea8436a15a5e2c07b6558b2bdf3ab7 100644 +--- a/xbmc/Application.h ++++ b/xbmc/Application.h +@@ -394,6 +394,8 @@ public: + */ + void UnregisterActionListener(IActionListener *listener); + ++ bool ScreenSaverDisablesAutoScrolling(); ++ + std::unique_ptr m_ServiceManager; + + /*! +diff --git a/xbmc/guilib/GUIFadeLabelControl.cpp b/xbmc/guilib/GUIFadeLabelControl.cpp +index 01826a5f7ca2ccb104f897ca0670571a9b04b83d..553a6458a71009dd592c8a843eeb3bc336864d61 100644 +--- a/xbmc/guilib/GUIFadeLabelControl.cpp ++++ b/xbmc/guilib/GUIFadeLabelControl.cpp +@@ -21,6 +21,8 @@ + #include "GUIFadeLabelControl.h" + #include "utils/Random.h" + ++#include "Application.h" ++ + CGUIFadeLabelControl::CGUIFadeLabelControl(int parentID, int controlID, float posX, float posY, float width, float height, const CLabelInfo& labelInfo, bool scrollOut, unsigned int timeToDelayAtEnd, bool resetOnLabelChange, bool randomized) + : CGUIControl(parentID, controlID, posX, posY, width, height), m_label(labelInfo), m_scrollInfo(50, labelInfo.offsetX, labelInfo.scrollSpeed) + , m_textLayout(labelInfo.font, false) +@@ -106,7 +108,7 @@ void CGUIFadeLabelControl::Process(unsigned int currentTime, CDirtyRegionList &d + m_lastLabel = m_currentLabel; + } + +- if (m_infoLabels.size() > 1 || !m_shortText) ++ if ((m_infoLabels.size() > 1 || !m_shortText) && !g_application.ScreenSaverDisablesAutoScrolling()) + { // have scrolling text + bool moveToNextLabel = false; + if (!m_scrollOut) +diff --git a/xbmc/guilib/GUIFont.cpp b/xbmc/guilib/GUIFont.cpp +index 7f1108939a63162024c7a055403a58e395f090b6..1192b74675b79d1a862de2949a60163abb916035 100644 +--- a/xbmc/guilib/GUIFont.cpp ++++ b/xbmc/guilib/GUIFont.cpp +@@ -22,6 +22,7 @@ + #include "GUIFontTTF.h" + #include "GraphicContext.h" + ++#include "Application.h" + #include "threads/SingleLock.h" + #include "utils/TimeUtils.h" + #include "utils/MathUtils.h" +@@ -128,6 +129,9 @@ bool CGUIFont::UpdateScrollInfo(const vecText &text, CScrollInfo &scrollInfo) + // If the string is smaller than the viewport, then it may be plotted even + // more times than that. + // ++ if (g_application.ScreenSaverDisablesAutoScrolling()) ++ return false; ++ + if (scrollInfo.waitTime) + { + scrollInfo.waitTime--; +diff --git a/xbmc/guilib/GUILabel.cpp b/xbmc/guilib/GUILabel.cpp +index db201317a1f8d93dcf0641a28b7688cbd1a70734..1c8c30dcb9d55b7240af93b5e46c620320ef410a 100644 +--- a/xbmc/guilib/GUILabel.cpp ++++ b/xbmc/guilib/GUILabel.cpp +@@ -21,6 +21,8 @@ + #include "GUILabel.h" + #include + ++#include "Application.h" ++ + CGUILabel::CGUILabel(float posX, float posY, float width, float height, const CLabelInfo& labelInfo, CGUILabel::OVER_FLOW overflow) + : m_label(labelInfo) + , m_textLayout(labelInfo.font, overflow == OVER_FLOW_WRAP, height) +@@ -104,7 +106,7 @@ void CGUILabel::Render() + color_t color = GetColor(); + bool renderSolid = (m_color == COLOR_DISABLED); + bool overFlows = (m_renderRect.Width() + 0.5f < m_textLayout.GetTextWidth()); // 0.5f to deal with floating point rounding issues +- if (overFlows && m_scrolling && !renderSolid) ++ if (overFlows && m_scrolling && !renderSolid && !g_application.ScreenSaverDisablesAutoScrolling()) + m_textLayout.RenderScrolling(m_renderRect.x1, m_renderRect.y1, m_label.angle, color, m_label.shadowColor, 0, m_renderRect.Width(), m_scrollInfo); + else + { +diff --git a/xbmc/guilib/GUITextBox.cpp b/xbmc/guilib/GUITextBox.cpp +index d7bc1c5ba6067af9a460589920367288c640a915..ac766293f1c47c7f145cb46f6b152144b303f15f 100644 +--- a/xbmc/guilib/GUITextBox.cpp ++++ b/xbmc/guilib/GUITextBox.cpp +@@ -24,6 +24,7 @@ + #include "utils/MathUtils.h" + #include "utils/StringUtils.h" + #include "guiinfo/GUIInfoLabels.h" ++#include "Application.h" + + #include + +@@ -133,7 +134,7 @@ void CGUITextBox::Process(unsigned int currentTime, CDirtyRegionList &dirtyregio + // update our auto-scrolling as necessary + if (m_autoScrollTime && m_lines.size() > m_itemsPerPage) + { +- if (!m_autoScrollCondition || m_autoScrollCondition->Get()) ++ if ((!m_autoScrollCondition || m_autoScrollCondition->Get()) && !g_application.ScreenSaverDisablesAutoScrolling()) + { + if (m_lastRenderTime) + m_autoScrollDelayTime += currentTime - m_lastRenderTime; + +From 91f06fc770b8d9dee8086ab20a7111dc75664229 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Sat, 13 Dec 2014 18:35:20 +0000 +Subject: [PATCH 15/67] [demuxer] Avoid memcpy on every demuxer packet + +Avoids an unnecessary memcpy on every demuxer packet which for +high bitrate videos can be significant. +--- + xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp | 17 ++++++++++++----- + xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxPacket.h | 3 +++ + xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxUtils.cpp | 7 ++++++- + 3 files changed, 21 insertions(+), 6 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +index 0b3643c70a9f0d18ccdbb04619d90f82e3b2f232..b9131402dff3a6d538a188794096bad5784dbb63 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +@@ -851,7 +851,7 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + { + if(m_pkt.pkt.stream_index == (int)m_pFormatContext->programs[m_program]->stream_index[i]) + { +- pPacket = CDVDDemuxUtils::AllocateDemuxPacket(m_pkt.pkt.size); ++ pPacket = CDVDDemuxUtils::AllocateDemuxPacket(0); + break; + } + } +@@ -860,7 +860,7 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + bReturnEmpty = true; + } + else +- pPacket = CDVDDemuxUtils::AllocateDemuxPacket(m_pkt.pkt.size); ++ pPacket = CDVDDemuxUtils::AllocateDemuxPacket(0); + } + else + bReturnEmpty = true; +@@ -890,9 +890,13 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + // copy contents into our own packet + pPacket->iSize = m_pkt.pkt.size; + +- // maybe we can avoid a memcpy here by detecting where pkt.destruct is pointing too? + if (m_pkt.pkt.data) +- memcpy(pPacket->pData, m_pkt.pkt.data, pPacket->iSize); ++ { ++ pPacket->pData = m_pkt.pkt.data; ++ // so we can free AVPacket when DemuxPacket is freed ++ pPacket->pkt = new AVPacket(m_pkt.pkt); ++ } ++ + + pPacket->pts = ConvertTimestamp(m_pkt.pkt.pts, stream->time_base.den, stream->time_base.num); + pPacket->dts = ConvertTimestamp(m_pkt.pkt.dts, stream->time_base.den, stream->time_base.num); +@@ -946,7 +950,10 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + pPacket->iStreamId = m_pkt.pkt.stream_index; + } + m_pkt.result = -1; +- av_packet_unref(&m_pkt.pkt); ++ if (pPacket && pPacket->pkt) ++ memset(&m_pkt.pkt, 0, sizeof(AVPacket)); ++ else ++ av_packet_unref(&m_pkt.pkt); + } + } + } // end of lock scope +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxPacket.h b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxPacket.h +index 4f471188c133deb91516311f0082e8741d9dee79..22805781c4d5a957d10fdf74ffa34387f67a25e9 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxPacket.h ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxPacket.h +@@ -25,6 +25,8 @@ + #define DMX_SPECIALID_STREAMINFO -10 + #define DMX_SPECIALID_STREAMCHANGE -11 + ++struct AVPacket; ++ + typedef struct DemuxPacket + { + unsigned char* pData; // data +@@ -36,6 +38,7 @@ typedef struct DemuxPacket + double pts; // pts in DVD_TIME_BASE + double dts; // dts in DVD_TIME_BASE + double duration; // duration in DVD_TIME_BASE if available ++ AVPacket *pkt; // to allow packet to be freed + + int dispTime; + } DemuxPacket; +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxUtils.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxUtils.cpp +index df0f35bd49c65b302de4ccd110d859e8b881ea5f..b4b591ae4c4dd4fb0b36d4d00fedca966f86000f 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxUtils.cpp ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxUtils.cpp +@@ -39,7 +39,12 @@ void CDVDDemuxUtils::FreeDemuxPacket(DemuxPacket* pPacket) + if (pPacket) + { + try { +- if (pPacket->pData) _aligned_free(pPacket->pData); ++ if (pPacket->pkt) ++ { ++ av_free_packet(pPacket->pkt); ++ delete pPacket->pkt; ++ } ++ else if (pPacket->pData) _aligned_free(pPacket->pData); + delete pPacket; + } + catch(...) { + +From a1f9425d9d9417c7f83806f41b724554653a1be6 Mon Sep 17 00:00:00 2001 +From: anaconda +Date: Wed, 25 Feb 2015 18:22:21 +0100 +Subject: [PATCH 16/67] Load OSD dialogs on startup. + +Fixes skipped frames the first time they're loaded in memory on less powered +devices, like a Raspberry Pi, when using DVDPlayer. +See http://forum.kodi.tv/showthread.php?tid=211501&pid=1938811#pid1938811 +--- + xbmc/pvr/dialogs/GUIDialogPVRChannelsOSD.cpp | 1 + + xbmc/pvr/dialogs/GUIDialogPVRGuideOSD.cpp | 1 + + xbmc/video/dialogs/GUIDialogAudioSubtitleSettings.cpp | 4 +++- + xbmc/video/dialogs/GUIDialogSubtitles.cpp | 2 +- + xbmc/video/dialogs/GUIDialogVideoOSD.cpp | 2 +- + xbmc/video/dialogs/GUIDialogVideoSettings.cpp | 4 +++- + 6 files changed, 10 insertions(+), 4 deletions(-) + +diff --git a/xbmc/pvr/dialogs/GUIDialogPVRChannelsOSD.cpp b/xbmc/pvr/dialogs/GUIDialogPVRChannelsOSD.cpp +index 1f72db5b0726434505ee7f52296b909b98a5d133..bb2dd07f8c18e6e72c31feb6273b84b599265e0e 100644 +--- a/xbmc/pvr/dialogs/GUIDialogPVRChannelsOSD.cpp ++++ b/xbmc/pvr/dialogs/GUIDialogPVRChannelsOSD.cpp +@@ -50,6 +50,7 @@ CGUIDialogPVRChannelsOSD::CGUIDialogPVRChannelsOSD() : + CGUIDialog(WINDOW_DIALOG_PVR_OSD_CHANNELS, "DialogPVRChannelsOSD.xml"), + Observer() + { ++ m_loadType = LOAD_ON_GUI_INIT; + m_vecItems = new CFileItemList; + } + +diff --git a/xbmc/pvr/dialogs/GUIDialogPVRGuideOSD.cpp b/xbmc/pvr/dialogs/GUIDialogPVRGuideOSD.cpp +index 8b472435e26e455249637faf5120055b415fc49e..be1f64d552161f8a86a5c5d89c1bc23328574fb6 100644 +--- a/xbmc/pvr/dialogs/GUIDialogPVRGuideOSD.cpp ++++ b/xbmc/pvr/dialogs/GUIDialogPVRGuideOSD.cpp +@@ -36,6 +36,7 @@ using namespace PVR; + CGUIDialogPVRGuideOSD::CGUIDialogPVRGuideOSD() + : CGUIDialog(WINDOW_DIALOG_PVR_OSD_GUIDE, "DialogPVRGuideOSD.xml") + { ++ m_loadType = LOAD_ON_GUI_INIT; + m_vecItems = new CFileItemList; + } + +diff --git a/xbmc/video/dialogs/GUIDialogAudioSubtitleSettings.cpp b/xbmc/video/dialogs/GUIDialogAudioSubtitleSettings.cpp +index eb67552344f59b8857b16c882c29e3fa62bed75c..f31572b34d376e70a35003a8c2e175b45daf8070 100644 +--- a/xbmc/video/dialogs/GUIDialogAudioSubtitleSettings.cpp ++++ b/xbmc/video/dialogs/GUIDialogAudioSubtitleSettings.cpp +@@ -68,7 +68,9 @@ CGUIDialogAudioSubtitleSettings::CGUIDialogAudioSubtitleSettings() + : CGUIDialogSettingsManualBase(WINDOW_DIALOG_AUDIO_OSD_SETTINGS, "DialogSettings.xml"), + m_passthrough(false), + m_dspEnabled(false) +-{ } ++{ ++ m_loadType = LOAD_ON_GUI_INIT; ++} + + CGUIDialogAudioSubtitleSettings::~CGUIDialogAudioSubtitleSettings() + { } +diff --git a/xbmc/video/dialogs/GUIDialogSubtitles.cpp b/xbmc/video/dialogs/GUIDialogSubtitles.cpp +index 398558e4d5d0cae30ee1c73e2b70e3b2f787e8fc..4e8a9b1e307a89d3a7b68402e2ff11b57e7dccd4 100644 +--- a/xbmc/video/dialogs/GUIDialogSubtitles.cpp ++++ b/xbmc/video/dialogs/GUIDialogSubtitles.cpp +@@ -103,7 +103,7 @@ CGUIDialogSubtitles::CGUIDialogSubtitles(void) + , m_pausedOnRun(false) + , m_updateSubsList(false) + { +- m_loadType = KEEP_IN_MEMORY; ++ m_loadType = LOAD_ON_GUI_INIT; + } + + CGUIDialogSubtitles::~CGUIDialogSubtitles(void) +diff --git a/xbmc/video/dialogs/GUIDialogVideoOSD.cpp b/xbmc/video/dialogs/GUIDialogVideoOSD.cpp +index e498e1fd476d9ab5300bb00bc39946a22cfd93cb..a6648d016b07e2eb3e52f8d927697cc53a42fd7b 100644 +--- a/xbmc/video/dialogs/GUIDialogVideoOSD.cpp ++++ b/xbmc/video/dialogs/GUIDialogVideoOSD.cpp +@@ -30,7 +30,7 @@ using namespace PVR; + CGUIDialogVideoOSD::CGUIDialogVideoOSD(void) + : CGUIDialog(WINDOW_DIALOG_VIDEO_OSD, "VideoOSD.xml") + { +- m_loadType = KEEP_IN_MEMORY; ++ m_loadType = LOAD_ON_GUI_INIT; + } + + CGUIDialogVideoOSD::~CGUIDialogVideoOSD(void) +diff --git a/xbmc/video/dialogs/GUIDialogVideoSettings.cpp b/xbmc/video/dialogs/GUIDialogVideoSettings.cpp +index afbe2032b9b2235cd524263d8a730eb3402eb07f..89f685e5dc791a64dd74fa25356d62bbb74f5b58 100644 +--- a/xbmc/video/dialogs/GUIDialogVideoSettings.cpp ++++ b/xbmc/video/dialogs/GUIDialogVideoSettings.cpp +@@ -66,7 +66,9 @@ + CGUIDialogVideoSettings::CGUIDialogVideoSettings() + : CGUIDialogSettingsManualBase(WINDOW_DIALOG_VIDEO_OSD_SETTINGS, "DialogSettings.xml"), + m_viewModeChanged(false) +-{ } ++{ ++ m_loadType = LOAD_ON_GUI_INIT; ++} + + CGUIDialogVideoSettings::~CGUIDialogVideoSettings() + { } + +From be39b1d7f8f1c217bb78888b18f2a27acc793031 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Tue, 14 Apr 2015 20:51:14 +0100 +Subject: [PATCH 17/67] [gui] Also limit GUI updates when in non full-screen + video mode + +--- + xbmc/Application.cpp | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/xbmc/Application.cpp b/xbmc/Application.cpp +index bf2f2d3e73cbc88ab9d89f91baa11f983f36ee10..3ecce5b0ac4c1b9d3c4fc0dd759b31f1600ac7fa 100644 +--- a/xbmc/Application.cpp ++++ b/xbmc/Application.cpp +@@ -2707,7 +2707,7 @@ void CApplication::FrameMove(bool processEvents, bool processGUI) + #if defined(TARGET_RASPBERRY_PI) || defined(HAS_IMXVPU) + // This code reduces rendering fps of the GUI layer when playing videos in fullscreen mode + // it makes only sense on architectures with multiple layers +- if (g_graphicsContext.IsFullScreenVideo() && !m_pPlayer->IsPausedPlayback() && m_pPlayer->IsRenderingVideoLayer()) ++ if (m_pPlayer->IsPlayingVideo() && !m_pPlayer->IsPausedPlayback() && m_pPlayer->IsRenderingVideoLayer()) + fps = CSettings::GetInstance().GetInt(CSettings::SETTING_VIDEOPLAYER_LIMITGUIUPDATE); + #endif + +@@ -2720,6 +2720,8 @@ void CApplication::FrameMove(bool processEvents, bool processGUI) + { + if (!m_skipGuiRender) + g_windowManager.Process(CTimeUtils::GetFrameTime()); ++ else if (!g_graphicsContext.IsFullScreenVideo()) ++ g_windowManager.FrameMove(); + } + g_windowManager.FrameMove(); + } + +From 3dea2824fdcfe2448b5b6fd348569c34c5c12f84 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Tue, 21 Apr 2015 14:32:07 +0100 +Subject: [PATCH 18/67] [mmalrenderer] Add sharpness control + +--- + addons/resource.language.en_gb/resources/strings.po | 2 +- + .../VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp | 13 ++++++++++++- + .../VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h | 1 + + 3 files changed, 14 insertions(+), 2 deletions(-) + +diff --git a/addons/resource.language.en_gb/resources/strings.po b/addons/resource.language.en_gb/resources/strings.po +index f1100b4238139b15799ddf1dba86265a1eaa53f3..085e2a195d2e52ce6bea3ed791bf817f5be23b15 100644 +--- a/addons/resource.language.en_gb/resources/strings.po ++++ b/addons/resource.language.en_gb/resources/strings.po +@@ -8631,7 +8631,7 @@ msgstr "" + + #: xbmc/video/dialogs/GUIDialogVideoSettings.cpp + msgctxt "#16313" +-msgid "VDPAU - Sharpness" ++msgid "Sharpness" + msgstr "" + + #: xbmc/video/dialogs/GUIDialogVideoSettings.cpp +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +index cd573128fdc7e24b5ecf19730b40ef35d1c67a14..d65857779628debfc85b47b8dd283513edb5a319 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +@@ -237,6 +237,7 @@ CMMALRenderer::CMMALRenderer() : CThread("MMALRenderer") + m_inflight = 0; + m_queue = nullptr; + m_error = 0.0; ++ m_sharpness = -2.0f; + } + + CMMALRenderer::~CMMALRenderer() +@@ -419,6 +420,15 @@ void CMMALRenderer::RenderUpdate(bool clear, DWORD flags, DWORD alpha) + + ManageRenderArea(); + ++ // if sharpness setting has changed, we should update it ++ if (m_sharpness != CMediaSettings::GetInstance().GetCurrentVideoSettings().m_Sharpness) ++ { ++ m_sharpness = CMediaSettings::GetInstance().GetCurrentVideoSettings().m_Sharpness; ++ char command[80], response[80]; ++ sprintf(command, "scaling_sharpness %d", ((int)(50.0f * (m_sharpness + 1.0f) + 0.5f))); ++ vc_gencmd(response, sizeof response, command); ++ } ++ + if (m_format != RENDER_FMT_MMAL) + { + if (g_advancedSettings.CanLogComponent(LOGVIDEO)) +@@ -608,7 +618,8 @@ bool CMMALRenderer::Supports(ERENDERFEATURE feature) + feature == RENDERFEATURE_ZOOM || + feature == RENDERFEATURE_ROTATION || + feature == RENDERFEATURE_VERTICAL_SHIFT || +- feature == RENDERFEATURE_PIXEL_RATIO) ++ feature == RENDERFEATURE_PIXEL_RATIO || ++ feature == RENDERFEATURE_SHARPNESS) + return true; + + return false; +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h +index e2c0223836af4171715a3907a5f7ac2511930f5f..ae0ce625c619910530f0b62ea8921aca0a3a7f63 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h +@@ -116,6 +116,7 @@ protected: + bool m_StereoInvert; + int m_inflight; + bool m_opaque; ++ float m_sharpness; + AVPixelFormat m_pixfmt; + + CCriticalSection m_sharedSection; + +From 121a372d0e98284ede602670609158fc26f8a5be Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Tue, 5 May 2015 23:58:06 +0100 +Subject: [PATCH 19/67] [screensaver] Leave GUI contents available for + screensaver + +--- + xbmc/guilib/GUIWindowManager.cpp | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/xbmc/guilib/GUIWindowManager.cpp b/xbmc/guilib/GUIWindowManager.cpp +index 5835280d07f049329b05494cd30744c9c1f7a258..93f646e2b28efca6a4bdebbf458127ab597024eb 100644 +--- a/xbmc/guilib/GUIWindowManager.cpp ++++ b/xbmc/guilib/GUIWindowManager.cpp +@@ -789,7 +789,16 @@ void CGUIWindowManager::ActivateWindow_Internal(int iWindowID, const std::vector + int currentWindow = GetActiveWindow(); + CGUIWindow *pWindow = GetWindow(currentWindow); + if (pWindow) +- CloseWindowSync(pWindow, iWindowID); ++ { ++ if (iWindowID == WINDOW_SCREENSAVER) ++ { ++ pWindow->Close(true, iWindowID); ++ } ++ else ++ { ++ CloseWindowSync(pWindow, iWindowID); ++ } ++ } + g_infoManager.SetNextWindow(WINDOW_INVALID); + + // Add window to the history list (we must do this before we activate it, + +From d0dac94c4e36e2c8d60311137194573b49ca3c9a Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Sat, 6 Jun 2015 18:43:57 +0100 +Subject: [PATCH 20/67] ffmpeg: Automatic switch to software decode for GMC + with more than one warp point + +--- + ...Signal-unsupported-GMC-with-more-than-one.patch | 48 ++++++++++++++++++++++ + tools/depends/target/ffmpeg/Makefile | 4 +- + tools/depends/target/ffmpeg/autobuild.sh | 2 + + .../VideoPlayer/DVDCodecs/Video/MMALCodec.cpp | 6 +++ + xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemux.h | 2 + + .../VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp | 2 +- + xbmc/cores/VideoPlayer/DVDStreamInfo.cpp | 3 ++ + xbmc/cores/VideoPlayer/DVDStreamInfo.h | 1 + + xbmc/cores/omxplayer/OMXHelper.cpp | 8 +++- + 9 files changed, 73 insertions(+), 3 deletions(-) + create mode 100644 tools/depends/target/ffmpeg/0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch + +diff --git a/tools/depends/target/ffmpeg/0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch b/tools/depends/target/ffmpeg/0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..4cb8dd8fc466220e5d2539120de79ab123e65713 +--- /dev/null ++++ b/tools/depends/target/ffmpeg/0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch +@@ -0,0 +1,48 @@ ++From 84e9a1784bbd3182b68cefa5e5feae8da8b9e184 Mon Sep 17 00:00:00 2001 ++From: popcornmix ++Date: Fri, 5 Jun 2015 22:48:33 +0100 ++Subject: [PATCH] mpeg4video: Signal unsupported GMC with more than one warp ++ point ++ ++--- ++ libavcodec/avcodec.h | 1 + ++ libavcodec/mpeg4videodec.c | 4 ++++ ++ 2 files changed, 5 insertions(+) ++ ++diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h ++index 8c7c420..e63dc2d 100644 ++--- a/libavcodec/avcodec.h +++++ b/libavcodec/avcodec.h ++@@ -2527,6 +2527,7 @@ typedef struct AVCodecContext { ++ #define FF_BUG_DC_CLIP 4096 ++ #define FF_BUG_MS 8192 ///< Work around various bugs in Microsoft's broken decoders. ++ #define FF_BUG_TRUNCATED 16384 +++#define FF_BUG_GMC_UNSUPPORTED 32768 ++ ++ /** ++ * strictly follow the standard (MPEG4, ...). ++diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c ++index 9bf33dd..0b5d3b9 100644 ++--- a/libavcodec/mpeg4videodec.c +++++ b/libavcodec/mpeg4videodec.c ++@@ -2179,6 +2179,9 @@ int ff_mpeg4_workaround_bugs(AVCodecContext *avctx) ++ ++ if (ctx->divx_version >= 0) ++ s->workaround_bugs |= FF_BUG_HPEL_CHROMA; +++ +++ if (ctx->num_sprite_warping_points > 1) +++ s->workaround_bugs |= FF_BUG_GMC_UNSUPPORTED; ++ } ++ ++ if (s->workaround_bugs & FF_BUG_STD_QPEL) { ++@@ -2203,6 +2206,7 @@ int ff_mpeg4_workaround_bugs(AVCodecContext *avctx) ++ s->workaround_bugs, ctx->lavc_build, ctx->xvid_build, ++ ctx->divx_version, ctx->divx_build, s->divx_packed ? "p" : ""); ++ +++ avctx->workaround_bugs = s->workaround_bugs; ++ if (CONFIG_MPEG4_DECODER && ctx->xvid_build >= 0 && ++ s->codec_id == AV_CODEC_ID_MPEG4 && ++ avctx->idct_algo == FF_IDCT_AUTO) { ++-- ++1.9.1 ++ +diff --git a/tools/depends/target/ffmpeg/Makefile b/tools/depends/target/ffmpeg/Makefile +index c3998be2f3a5f1dbde2498be624fa8b48de7339f..dffe2da1dfd09e06c5f15c362f7cbe3cf2a26f75 100644 +--- a/tools/depends/target/ffmpeg/Makefile ++++ b/tools/depends/target/ffmpeg/Makefile +@@ -1,6 +1,7 @@ + include ../../Makefile.include + include FFMPEG-VERSION +-DEPS= ../../Makefile.include FFMPEG-VERSION Makefile ++DEPS= ../../Makefile.include FFMPEG-VERSION Makefile \ ++ 0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch + + # set to "yes" to enable patching + # we don't apply patches until we move to a vanilla ffmpeg tarball +@@ -72,6 +73,7 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + rm -rf $(PLATFORM); mkdir -p $(PLATFORM) + cd $(PLATFORM); $(ARCHIVE_TOOL) $(ARCHIVE_TOOL_FLAGS) $(TARBALLS_LOCATION)/$(ARCHIVE) + cd $(PLATFORM); sed -i".bak" -e "s%pkg_config_default=pkg-config%export PKG_CONFIG_LIBDIR=$(PREFIX)/lib/pkgconfig \&\& pkg_config_default=$(NATIVEPREFIX)/bin/pkg-config%" configure ++ cd $(PLATFORM); patch -p1 < ../0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch + cd $(PLATFORM);\ + CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)" \ + ./configure $(ffmpg_config) +diff --git a/tools/depends/target/ffmpeg/autobuild.sh b/tools/depends/target/ffmpeg/autobuild.sh +index 6bbebfca1c7189fec6650932d7292f17af60db62..9c26b239c2b2c1221bed7c4d99c46e909a4a5c5d 100755 +--- a/tools/depends/target/ffmpeg/autobuild.sh ++++ b/tools/depends/target/ffmpeg/autobuild.sh +@@ -127,6 +127,8 @@ mkdir -p "ffmpeg-${VERSION}" + cd "ffmpeg-${VERSION}" || exit 2 + tar --strip-components=1 -xf $MYDIR/${ARCHIVE} + ++patch -p1 < ../../0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch ++ + CFLAGS="$CFLAGS" CXXFLAGS="$CXXFLAGS" LDFLAGS="$LDFLAGS" \ + ./configure --prefix=$FFMPEG_PREFIX \ + --extra-version="kodi-${VERSION}" \ +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +index 51ded6b236418b7ff31b15b59e5da1b196f31fc2..c0e553ca060749edff28bcbb880ed3e149b9f751 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +@@ -47,6 +47,10 @@ + + #include "linux/RBP.h" + ++#ifndef FF_BUG_GMC_UNSUPPORTED ++#define FF_BUG_GMC_UNSUPPORTED 0 ++#endif ++ + using namespace KODI::MESSAGING; + + #define CLASSNAME "CMMALVideoBuffer" +@@ -540,6 +544,8 @@ bool CMMALVideo::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options) + // we always qualify even if DVDFactoryCodec does this too. + if (!CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEMMAL) || hints.software) + return false; ++ if (hints.workaround_bugs & FF_BUG_GMC_UNSUPPORTED) ++ return false; + + m_processInfo.SetVideoDeintMethod("none"); + +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemux.h b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemux.h +index 23cd50ce4643d32fc8f97bc612e9e911169f32d1..86ac5175b0ff1481571beaf0617471e122ee05a1 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemux.h ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemux.h +@@ -157,6 +157,7 @@ public: + type = STREAM_VIDEO; + iOrientation = 0; + iBitsPerPixel = 0; ++ workaround_bugs = 0; + } + + virtual ~CDemuxStreamVideo() {} +@@ -171,6 +172,7 @@ public: + int iOrientation; // orientation of the video in degress counter clockwise + int iBitsPerPixel; + std::string stereo_mode; // expected stereo mode ++ int workaround_bugs; // info for decoder + }; + + class CDemuxStreamAudio : public CDemuxStream +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +index b9131402dff3a6d538a188794096bad5784dbb63..84310bbda6440dd10f9aa0711859f4dc0bb1fd1a 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +@@ -1310,7 +1310,7 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + if (!stereoMode.empty()) + st->stereo_mode = stereoMode; + +- ++ st->workaround_bugs = pStream->codec->workaround_bugs; + if ( m_pInput->IsStreamType(DVDSTREAM_TYPE_DVD) ) + { + if (pStream->codec->codec_id == AV_CODEC_ID_PROBE) +diff --git a/xbmc/cores/VideoPlayer/DVDStreamInfo.cpp b/xbmc/cores/VideoPlayer/DVDStreamInfo.cpp +index e59c84c32ff6f108b52955523321f37bd3885986..28dbdd344473338762927f5f2d01425243187a7c 100644 +--- a/xbmc/cores/VideoPlayer/DVDStreamInfo.cpp ++++ b/xbmc/cores/VideoPlayer/DVDStreamInfo.cpp +@@ -74,6 +74,7 @@ void CDVDStreamInfo::Clear() + channellayout = 0; + + orientation = 0; ++ workaround_bugs = 0; + } + + bool CDVDStreamInfo::Equal(const CDVDStreamInfo& right, bool withextradata) +@@ -175,6 +176,7 @@ void CDVDStreamInfo::Assign(const CDVDStreamInfo& right, bool withextradata) + vfr = right.vfr; + software = right.software; + stereo_mode = right.stereo_mode; ++ workaround_bugs = right.workaround_bugs; + + // AUDIO + channels = right.channels; +@@ -233,6 +235,7 @@ void CDVDStreamInfo::Assign(const CDemuxStream& right, bool withextradata) + orientation = stream->iOrientation; + bitsperpixel = stream->iBitsPerPixel; + stereo_mode = stream->stereo_mode; ++ workaround_bugs = stream->workaround_bugs; + } + else if( right.type == STREAM_SUBTITLE ) + { +diff --git a/xbmc/cores/VideoPlayer/DVDStreamInfo.h b/xbmc/cores/VideoPlayer/DVDStreamInfo.h +index f14170850673ebf746df0acf8f5cf5977feae684..85e402bb4e1ddd61bdb657802cc7347c95b9a302 100644 +--- a/xbmc/cores/VideoPlayer/DVDStreamInfo.h ++++ b/xbmc/cores/VideoPlayer/DVDStreamInfo.h +@@ -73,6 +73,7 @@ public: + int orientation; // orientation of the video in degress counter clockwise + int bitsperpixel; + std::string stereo_mode; // stereoscopic 3d mode ++ int workaround_bugs; // info for decoder + + // AUDIO + int channels; +diff --git a/xbmc/cores/omxplayer/OMXHelper.cpp b/xbmc/cores/omxplayer/OMXHelper.cpp +index b5db1c4ec03e4b5809a14c541329ee11aa7df04f..344f393cfa2230b21a8dba42ef3cf79ce428dac2 100644 +--- a/xbmc/cores/omxplayer/OMXHelper.cpp ++++ b/xbmc/cores/omxplayer/OMXHelper.cpp +@@ -30,6 +30,10 @@ + #include "cores/omxplayer/OMXPlayerVideo.h" + #include "threads/SystemClock.h" + ++#ifndef FF_BUG_GMC_UNSUPPORTED ++#define FF_BUG_GMC_UNSUPPORTED 0 ++#endif ++ + #define PREDICATE_RETURN(lh, rh) \ + do { \ + if((lh) != (rh)) \ +@@ -81,7 +85,9 @@ bool OMXPlayerUnsuitable(bool m_HasVideo, bool m_HasAudio, CDVDDemux* m_pDemuxer + CDVDStreamInfo hint(*stream, true); + + bool supported = false; +- if ((hint.codec == AV_CODEC_ID_MPEG1VIDEO || hint.codec == AV_CODEC_ID_MPEG2VIDEO) && g_RBP.GetCodecMpg2()) ++ if (hint.workaround_bugs & FF_BUG_GMC_UNSUPPORTED) ++ ; ++ else if ((hint.codec == AV_CODEC_ID_MPEG1VIDEO || hint.codec == AV_CODEC_ID_MPEG2VIDEO) && g_RBP.GetCodecMpg2()) + supported = true; + else if ((hint.codec == AV_CODEC_ID_VC1 || hint.codec == AV_CODEC_ID_WMV3) && g_RBP.GetCodecWvc1()) + supported = true; + +From e7ca15df03877b289fcaed9838e49758982ecacf Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Thu, 10 Mar 2016 17:56:11 +0000 +Subject: [PATCH 21/67] [rbp] HW mouse pointer + +Updating the mouse point provokes a complete screen update which can make it feel laggy +and results in high cpu. + +Render the mouse with an overlay to avoid redrawing the normal gui. +--- + xbmc/guilib/GUIWindowManager.cpp | 2 + + xbmc/linux/RBP.cpp | 135 ++++++++++++++++++++++++++++++++++++++ + xbmc/linux/RBP.h | 9 +++ + xbmc/windowing/WinEventsLinux.cpp | 125 +++++++++++++++++++++++++++++++++++ + xbmc/windowing/WinEventsLinux.h | 11 ++++ + 5 files changed, 282 insertions(+) + +diff --git a/xbmc/guilib/GUIWindowManager.cpp b/xbmc/guilib/GUIWindowManager.cpp +index 93f646e2b28efca6a4bdebbf458127ab597024eb..4bedbdde8c9b226e86a0c37378597bd524dbe66e 100644 +--- a/xbmc/guilib/GUIWindowManager.cpp ++++ b/xbmc/guilib/GUIWindowManager.cpp +@@ -198,7 +198,9 @@ void CGUIWindowManager::CreateWindows() + Add(new CGUIWindowAddonBrowser); + Add(new CGUIWindowScreensaverDim); + Add(new CGUIWindowDebugInfo); ++#ifndef TARGET_RASPBERRY_PI + Add(new CGUIWindowPointer); ++#endif + Add(new CGUIDialogYesNo); + Add(new CGUIDialogProgress); + Add(new CGUIDialogExtendedProgressBar); +diff --git a/xbmc/linux/RBP.cpp b/xbmc/linux/RBP.cpp +index ddbe27061f8192b7f6c830a4c22652a731537079..fbffa3a952d920cb41412f00f59d5c1c91f98740 100644 +--- a/xbmc/linux/RBP.cpp ++++ b/xbmc/linux/RBP.cpp +@@ -28,6 +28,9 @@ + + #include "cores/omxplayer/OMXImage.h" + ++#include "guilib/GraphicContext.h" ++#include "settings/DisplaySettings.h" ++ + #include + #include "rpi/rpi_user_vcsm.h" + #include "utils/TimeUtils.h" +@@ -46,6 +49,10 @@ CRBP::CRBP() + m_DllBcmHost = new DllBcmHost(); + m_OMX = new COMXCore(); + m_display = DISPMANX_NO_HANDLE; ++ m_p = NULL; ++ m_x = 0; ++ m_y = 0; ++ m_enabled = 0; + m_mb = mbox_open(); + vcsm_init(); + m_vsync_count = 0; +@@ -141,6 +148,7 @@ DISPMANX_DISPLAY_HANDLE_T CRBP::OpenDisplay(uint32_t device) + m_display = vc_dispmanx_display_open( 0 /*screen*/ ); + int s = vc_dispmanx_vsync_callback(m_display, vsync_callback_static, (void *)this); + assert(s == 0); ++ init_cursor(); + } + return m_display; + } +@@ -148,6 +156,7 @@ DISPMANX_DISPLAY_HANDLE_T CRBP::OpenDisplay(uint32_t device) + void CRBP::CloseDisplay(DISPMANX_DISPLAY_HANDLE_T display) + { + CSingleLock lock(m_critSection); ++ uninit_cursor(); + assert(display == m_display); + int s = vc_dispmanx_vsync_callback(m_display, NULL, NULL); + assert(s == 0); +@@ -266,6 +275,9 @@ void CRBP::Deinitialize() + m_omx_image_init = false; + m_initialized = false; + m_omx_initialized = false; ++ uninit_cursor(); ++ delete m_p; ++ m_p = NULL; + if (m_mb) + mbox_close(m_mb); + m_mb = 0; +@@ -338,6 +350,52 @@ unsigned mem_unlock(int file_desc, unsigned handle) + return p[5]; + } + ++unsigned int mailbox_set_cursor_info(int file_desc, int width, int height, int format, uint32_t buffer, int hotspotx, int hotspoty) ++{ ++ int i=0; ++ unsigned int p[32]; ++ p[i++] = 0; // size ++ p[i++] = 0x00000000; // process request ++ p[i++] = 0x00008010; // set cursor state ++ p[i++] = 24; // buffer size ++ p[i++] = 24; // data size ++ ++ p[i++] = width; ++ p[i++] = height; ++ p[i++] = format; ++ p[i++] = buffer; // ptr to VC memory buffer. Doesn't work in 64bit.... ++ p[i++] = hotspotx; ++ p[i++] = hotspoty; ++ ++ p[i++] = 0x00000000; // end tag ++ p[0] = i*sizeof(*p); // actual size ++ ++ mbox_property(file_desc, p); ++ return p[5]; ++ ++} ++ ++unsigned int mailbox_set_cursor_position(int file_desc, int enabled, int x, int y) ++{ ++ int i=0; ++ unsigned p[32]; ++ p[i++] = 0; // size ++ p[i++] = 0x00000000; // process request ++ p[i++] = 0x00008011; // set cursor state ++ p[i++] = 12; // buffer size ++ p[i++] = 12; // data size ++ ++ p[i++] = enabled; ++ p[i++] = x; ++ p[i++] = y; ++ ++ p[i++] = 0x00000000; // end tag ++ p[0] = i*sizeof *p; // actual size ++ ++ mbox_property(file_desc, p); ++ return p[5]; ++} ++ + CGPUMEM::CGPUMEM(unsigned int numbytes, bool cached) + { + m_numbytes = numbytes; +@@ -369,4 +427,81 @@ void CGPUMEM::Flush() + vcsm_clean_invalid( &iocache ); + } + ++#define T 0 ++#define W 0xffffffff ++#define B 0xff000000 ++ ++const static uint32_t default_cursor_pixels[] = ++{ ++ B,B,B,B,B,B,B,B,B,T,T,T,T,T,T,T, ++ B,W,W,W,W,W,W,B,T,T,T,T,T,T,T,T, ++ B,W,W,W,W,W,B,T,T,T,T,T,T,T,T,T, ++ B,W,W,W,W,B,T,T,T,T,T,T,T,T,T,T, ++ B,W,W,W,W,W,B,T,T,T,T,T,T,T,T,T, ++ B,W,W,B,W,W,W,B,T,T,T,T,T,T,T,T, ++ B,W,B,T,B,W,W,W,B,T,T,T,T,T,T,T, ++ B,B,T,T,T,B,W,W,W,B,T,T,T,T,T,T, ++ B,T,T,T,T,T,B,W,W,W,B,T,T,T,T,T, ++ T,T,T,T,T,T,T,B,W,W,W,B,T,T,T,T, ++ T,T,T,T,T,T,T,T,B,W,W,W,B,T,T,T, ++ T,T,T,T,T,T,T,T,T,B,W,W,W,B,T,T, ++ T,T,T,T,T,T,T,T,T,T,B,W,W,W,B,T, ++ T,T,T,T,T,T,T,T,T,T,T,B,W,W,W,B, ++ T,T,T,T,T,T,T,T,T,T,T,T,B,W,B,T, ++ T,T,T,T,T,T,T,T,T,T,T,T,T,B,T,T ++}; ++ ++#undef T ++#undef W ++#undef B ++ ++void CRBP::init_cursor() ++{ ++ if (!m_mb) ++ return; ++ if (!m_p) ++ m_p = new CGPUMEM(64 * 64 * 4, false); ++ if (m_p && m_p->m_arm && m_p->m_vc) ++ set_cursor(default_cursor_pixels, 16, 16, 0, 0); ++} ++ ++void CRBP::set_cursor(const void *pixels, int width, int height, int hotspot_x, int hotspot_y) ++{ ++ if (!m_mb || !m_p || !m_p->m_arm || !m_p->m_vc || !pixels || width * height > 64 * 64) ++ return; ++ memcpy(m_p->m_arm, pixels, width * height * 4); ++ unsigned int s = mailbox_set_cursor_info(m_mb, width, height, 0, m_p->m_vc, hotspot_x, hotspot_y); ++ assert(s == 0); ++} ++ ++void CRBP::update_cursor(int x, int y, bool enabled) ++{ ++ if (!m_mb || !m_p || !m_p->m_arm || !m_p->m_vc) ++ return; ++ ++ RESOLUTION res = g_graphicsContext.GetVideoResolution(); ++ CRect gui(0, 0, CDisplaySettings::GetInstance().GetResolutionInfo(res).iWidth, CDisplaySettings::GetInstance().GetResolutionInfo(res).iHeight); ++ CRect display(0, 0, CDisplaySettings::GetInstance().GetResolutionInfo(res).iScreenWidth, CDisplaySettings::GetInstance().GetResolutionInfo(res).iScreenHeight); ++ ++ int x2 = x * display.Width() / gui.Width(); ++ int y2 = y * display.Height() / gui.Height(); ++ ++ if (g_graphicsContext.GetStereoMode() == RENDER_STEREO_MODE_SPLIT_HORIZONTAL) ++ y2 *= 2; ++ else if (g_graphicsContext.GetStereoMode() == RENDER_STEREO_MODE_SPLIT_VERTICAL) ++ x2 *= 2; ++ if (m_x != x2 || m_y != y2 || m_enabled != enabled) ++ mailbox_set_cursor_position(m_mb, enabled, x2, y2); ++ m_x = x2; ++ m_y = y2; ++ m_enabled = enabled; ++} ++ ++void CRBP::uninit_cursor() ++{ ++ if (!m_mb || !m_p || !m_p->m_arm || !m_p->m_vc) ++ return; ++ mailbox_set_cursor_position(m_mb, 0, 0, 0); ++} ++ + #endif +diff --git a/xbmc/linux/RBP.h b/xbmc/linux/RBP.h +index fffa5182126159f6dfcf750b21fa0464e229e545..90b04db5405058be2ff20aeaa6af2d2ac651586f 100644 +--- a/xbmc/linux/RBP.h ++++ b/xbmc/linux/RBP.h +@@ -103,6 +103,15 @@ private: + CCriticalSection m_critSection; + + int m_mb; ++ CGPUMEM *m_p; ++ int m_x; ++ int m_y; ++ bool m_enabled; ++ public: ++ void init_cursor(); ++ void set_cursor(const void *pixels, int width, int height, int hotspot_x, int hotspot_y); ++ void update_cursor(int x, int y, bool enabled); ++ void uninit_cursor(); + }; + + extern CRBP g_RBP; +diff --git a/xbmc/windowing/WinEventsLinux.cpp b/xbmc/windowing/WinEventsLinux.cpp +index a958a23d7185a1dce59fc6c3f8854d177068ace4..70f0e4d3f2ac6c706e7c477d0a6e5ee2999dc88b 100644 +--- a/xbmc/windowing/WinEventsLinux.cpp ++++ b/xbmc/windowing/WinEventsLinux.cpp +@@ -30,11 +30,26 @@ + #include "utils/log.h" + #include "powermanagement/PowerManager.h" + ++#ifdef TARGET_RASPBERRY_PI ++#include "utils/TimeUtils.h" ++#include "guilib/Resolution.h" ++#include "addons/Skin.h" ++#include "utils/XMLUtils.h" ++#include "utils/StringUtils.h" ++#include "guilib/Texture.h" ++#include "linux/RBP.h" ++#include "input/InputManager.h" ++#endif ++ + bool CWinEventsLinux::m_initialized = false; + CLinuxInputDevices CWinEventsLinux::m_devices; + + CWinEventsLinux::CWinEventsLinux() + { ++#ifdef TARGET_RASPBERRY_PI ++ m_last_mouse_move_time = 0; ++ m_mouse_state = -1; ++#endif + } + + void CWinEventsLinux::RefreshDevices() +@@ -48,6 +63,72 @@ bool CWinEventsLinux::IsRemoteLowBattery() + return false; + } + ++#ifdef TARGET_RASPBERRY_PI ++bool CWinEventsLinux::LoadXML(const std::string strFileName) ++{ ++ RESOLUTION_INFO m_coordsRes; // resolution that the window coordinates are in. ++ // Find appropriate skin folder + resolution to load from ++ std::string strFileNameLower = strFileName; ++ StringUtils::ToLower(strFileNameLower); ++ std::string strLowerPath = g_SkinInfo->GetSkinPath(strFileNameLower, &m_coordsRes); ++ std::string strPath = g_SkinInfo->GetSkinPath(strFileName, &m_coordsRes); ++ ++ TiXmlElement* pRootElement = NULL; ++ CXBMCTinyXML xmlDoc; ++ std::string strPathLower = strPath; ++ StringUtils::ToLower(strPathLower); ++ if (!xmlDoc.LoadFile(strPath) && !xmlDoc.LoadFile(strPathLower) && !xmlDoc.LoadFile(strLowerPath)) ++ { ++ CLog::Log(LOGERROR, "unable to load:%s, Line %d\n%s", strPath.c_str(), xmlDoc.ErrorRow(), xmlDoc.ErrorDesc()); ++ return false; ++ } ++ pRootElement = (TiXmlElement*)xmlDoc.RootElement()->Clone(); ++ ++ if (!pRootElement) ++ return false; ++ ++ if (strcmpi(pRootElement->Value(), "window")) ++ { ++ CLog::Log(LOGERROR, "file : XML file doesnt contain "); ++ return false; ++ } ++ ++ TiXmlElement *pChild = pRootElement->FirstChildElement(); ++ while (pChild) ++ { ++ if (strcmpi(pChild->Value(), "controls") == 0) ++ { ++ TiXmlElement *pControl = pChild->FirstChildElement(); ++ while (pControl) ++ { ++ if (strcmpi(pControl->Value(), "control") == 0) ++ { ++ std::string strStringValue; ++ if (XMLUtils::GetString(pControl, "texture", strStringValue)) ++ { ++ const char* idAttr = pControl->Attribute("id"); ++ int index = idAttr ? atoi(idAttr)-1 : -1; ++ if (index >= 0 && index < (int)(sizeof m_cursors/sizeof *m_cursors)) ++ { ++ if (m_cursors[index].m_filename.size()) ++ g_TextureManager.ReleaseTexture(m_cursors[index].m_filename, true); ++ m_cursors[index].m_filename.clear(); ++ m_cursors[index].m_texture = g_TextureManager.Load(strStringValue); ++ if (m_cursors[index].m_texture.size()) ++ m_cursors[index].m_filename = strStringValue; ++ } ++ } ++ } ++ pControl = pControl->NextSiblingElement(); ++ } ++ } ++ pChild = pChild->NextSiblingElement(); ++ } ++ delete pRootElement; ++ return true; ++} ++#endif ++ + bool CWinEventsLinux::MessagePump() + { + if (!m_initialized) +@@ -55,13 +136,50 @@ bool CWinEventsLinux::MessagePump() + m_devices.InitAvailable(); + m_checkHotplug = std::unique_ptr(new CLinuxInputDevicesCheckHotplugged(m_devices)); + m_initialized = true; ++#ifdef TARGET_RASPBERRY_PI ++ LoadXML("Pointer.xml"); ++#endif + } + + bool ret = false; + XBMC_Event event = {0}; ++#ifdef TARGET_RASPBERRY_PI ++ bool active = CInputManager::GetInstance().IsMouseActive(); ++ int64_t Now = CurrentHostCounter(); ++ if (!active) ++ { ++ if (m_mouse_state != -1) ++ { ++ g_RBP.update_cursor(0, 0, 0); ++ m_mouse_state = -1; ++ } ++ } ++ else ++ { ++ int state = CInputManager::GetInstance().GetMouseState() - 1; ++ if (m_mouse_state != state) ++ { ++ if (state >= 0 && state < (int)(sizeof m_cursors/sizeof *m_cursors) && !m_cursors[state].m_texture.m_textures.empty()) ++ { ++ CBaseTexture *t = (m_cursors[state].m_texture.m_textures)[0]; ++ if (t) ++ g_RBP.set_cursor((const void *)t->GetPixels(), t->GetPitch()>>2, t->GetRows(), 0, 0); ++ } ++ m_mouse_state = state; ++ } ++ } ++#endif + while (1) + { + event = m_devices.ReadEvent(); ++#ifdef TARGET_RASPBERRY_PI ++ if (active && (event.type == XBMC_MOUSEMOTION || event.type == XBMC_MOUSEBUTTONDOWN || event.type == XBMC_MOUSEBUTTONUP)) ++ { ++ if (event.type == XBMC_MOUSEMOTION) ++ g_RBP.update_cursor(event.motion.x, event.motion.y, 1); ++ m_last_mouse_move_time = Now; ++ } ++#endif + if (event.type != XBMC_NOEVENT) + { + ret |= g_application.OnEvent(event); +@@ -72,6 +190,13 @@ bool CWinEventsLinux::MessagePump() + } + } + ++#ifdef TARGET_RASPBERRY_PI ++ if (active && Now - m_last_mouse_move_time > 5 * 1000000000LL) ++ { ++ g_RBP.update_cursor(0, 0, 0); ++ m_mouse_state = -1; ++ } ++#endif + return ret; + } + +diff --git a/xbmc/windowing/WinEventsLinux.h b/xbmc/windowing/WinEventsLinux.h +index 1b1d2f2e60334ed0f3a9964d106957f58e69f1b3..c82ba84625fe3556ff49764d40ceb3ec220114e1 100644 +--- a/xbmc/windowing/WinEventsLinux.h ++++ b/xbmc/windowing/WinEventsLinux.h +@@ -25,6 +25,7 @@ + #include + #include "windowing/WinEvents.h" + #include "input/linux/LinuxInputDevices.h" ++#include "guilib/TextureManager.h" + + class CWinEventsLinux : public IWinEvents + { +@@ -45,6 +46,16 @@ private: + static bool m_initialized; + static CLinuxInputDevices m_devices; + std::unique_ptr m_checkHotplug; ++#ifdef TARGET_RASPBERRY_PI ++ bool LoadXML(const std::string strFileName); ++ int64_t m_last_mouse_move_time; ++ struct ++ { ++ std::string m_filename; ++ CTextureArray m_texture; ++ } m_cursors[4]; ++ int m_mouse_state; ++#endif + }; + + #endif + +From f5e09c6ab9f5544d67f94305998b8a3b13f27b9a Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Tue, 10 Feb 2015 16:39:12 +0000 +Subject: [PATCH 22/67] [librtmp] Update to 15-Dec-2015 from + http://stream-recorder.com/forum/customized-rtmpdump-binaries-patch-file-t16103.html + +--- + tools/depends/target/librtmp/Makefile | 5 +- + tools/depends/target/librtmp/Patch.diff | 4066 ++++++++++++++++++++++ + tools/depends/target/librtmp/UpdateToLatest.diff | 257 ++ + tools/depends/target/librtmp/libm.patch | 11 - + 4 files changed, 4326 insertions(+), 13 deletions(-) + create mode 100644 tools/depends/target/librtmp/Patch.diff + create mode 100644 tools/depends/target/librtmp/UpdateToLatest.diff + delete mode 100644 tools/depends/target/librtmp/libm.patch + +diff --git a/tools/depends/target/librtmp/Makefile b/tools/depends/target/librtmp/Makefile +index e78d375b1284957036a549a65b8493582cea82e6..03fee99576ab943c72bfb1f5c5b1ccc88450a63a 100644 +--- a/tools/depends/target/librtmp/Makefile ++++ b/tools/depends/target/librtmp/Makefile +@@ -1,5 +1,5 @@ + include ../../Makefile.include +-DEPS= ../../Makefile.include Makefile prefix.patch ++DEPS= ../../Makefile.include Makefile prefix.patch UpdateToLatest.diff Patch.diff + + # lib name, version + LIBNAME=rtmpdump +@@ -27,7 +27,8 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + rm -rf $(PLATFORM)/*; mkdir -p $(PLATFORM) + cd $(PLATFORM); $(ARCHIVE_TOOL) $(ARCHIVE_TOOL_FLAGS) $(TARBALLS_LOCATION)/$(ARCHIVE) + cd $(PLATFORM); patch -p0 < ../prefix.patch +- cd $(PLATFORM)/librtmp; patch -p0 < ../../libm.patch ++ cd $(PLATFORM); patch -p1 < ../UpdateToLatest.diff ++ cd $(PLATFORM); patch -p0 < ../Patch.diff + sed -i -e 's|CC=|#CC=|' $(PLATFORM)/librtmp/Makefile + sed -i -e 's|LD=|#LD=|' $(PLATFORM)/librtmp/Makefile + sed -i -e 's|AR=|#AR=|' $(PLATFORM)/librtmp/Makefile +diff --git a/tools/depends/target/librtmp/Patch.diff b/tools/depends/target/librtmp/Patch.diff +new file mode 100644 +index 0000000000000000000000000000000000000000..62c1e990e73f61dd205028c3acae0e57d5953f76 +--- /dev/null ++++ b/tools/depends/target/librtmp/Patch.diff +@@ -0,0 +1,4066 @@ ++diff --git Makefile Makefile ++index a1595a8..9fe7584 100644 ++--- Makefile +++++ Makefile ++@@ -32,7 +32,7 @@ BINDIR=$(DESTDIR)$(bindir) ++ SBINDIR=$(DESTDIR)$(sbindir) ++ MANDIR=$(DESTDIR)$(mandir) ++ ++-LIBS_posix= +++LIBS_posix=-lm ++ LIBS_darwin= ++ LIBS_mingw=-lws2_32 -lwinmm -lgdi32 ++ LIB_RTMP=-Llibrtmp -lrtmp ++diff --git librtmp/Makefile librtmp/Makefile ++index 2c1c790..e367535 100644 ++--- librtmp/Makefile +++++ librtmp/Makefile ++@@ -26,7 +26,7 @@ REQ_GNUTLS=gnutls,hogweed,nettle ++ REQ_OPENSSL=libssl,libcrypto ++ PUB_GNUTLS=-lgmp ++ LIBZ=-lz ++-LIBS_posix= +++LIBS_posix=-lm ++ LIBS_darwin= ++ LIBS_mingw=-lws2_32 -lwinmm -lgdi32 ++ LIB_GNUTLS=-lgnutls -lhogweed -lnettle -lgmp $(LIBZ) ++diff --git librtmp/amf.c librtmp/amf.c ++index 1c5f99f..1310cbe 100644 ++--- librtmp/amf.c +++++ librtmp/amf.c ++@@ -319,6 +319,13 @@ AMFProp_SetName(AMFObjectProperty *prop, AVal *name) ++ prop->p_name = *name; ++ } ++ +++void +++AMFProp_SetString(AMFObjectProperty *prop, AVal *str) +++{ +++ prop->p_type = AMF_STRING; +++ prop->p_vu.p_aval = *str; +++} +++ ++ AMFDataType ++ AMFProp_GetType(AMFObjectProperty *prop) ++ { ++@@ -503,6 +510,9 @@ AMF3Prop_Decode(AMFObjectProperty *prop, const char *pBuffer, int nSize, ++ return -1; ++ } ++ +++ if (*pBuffer == AMF3_NULL) +++ bDecodeName = FALSE; +++ ++ /* decode name */ ++ if (bDecodeName) ++ { ++@@ -586,7 +596,7 @@ AMF3Prop_Decode(AMFObjectProperty *prop, const char *pBuffer, int nSize, ++ } ++ case AMF3_OBJECT: ++ { ++- int nRes = AMF3_Decode(&prop->p_vu.p_object, pBuffer, nSize, TRUE); +++ int nRes = AMF3_Decode(&prop->p_vu.p_object, pBuffer, nSize, FALSE); ++ if (nRes == -1) ++ return -1; ++ nSize -= nRes; ++@@ -620,6 +630,9 @@ AMFProp_Decode(AMFObjectProperty *prop, const char *pBuffer, int nSize, ++ return -1; ++ } ++ +++ if (*pBuffer == AMF_NULL) +++ bDecodeName = FALSE; +++ ++ if (bDecodeName && nSize < 4) ++ { /* at least name (length + at least 1 byte) and 1 byte of data */ ++ RTMP_Log(RTMP_LOGDEBUG, ++@@ -649,9 +662,8 @@ AMFProp_Decode(AMFObjectProperty *prop, const char *pBuffer, int nSize, ++ return -1; ++ } ++ ++- nSize--; ++- ++ prop->p_type = *pBuffer++; +++ nSize--; ++ switch (prop->p_type) ++ { ++ case AMF_NUMBER: ++@@ -697,9 +709,13 @@ AMFProp_Decode(AMFObjectProperty *prop, const char *pBuffer, int nSize, ++ break; ++ case AMF_REFERENCE: ++ { ++- RTMP_Log(RTMP_LOGERROR, "AMF_REFERENCE not supported!"); ++- return -1; ++- break; +++ RTMP_Log(RTMP_LOGDEBUG, "AMF_REFERENCE is not fully supported!"); +++ if (nSize < 2) +++ return -1; +++ prop->p_type = AMF_NUMBER; +++ prop->p_vu.p_number = AMF_DecodeInt16(pBuffer); +++ nSize -= 2; +++ break; ++ } ++ case AMF_ECMA_ARRAY: ++ { ++@@ -731,13 +747,13 @@ AMFProp_Decode(AMFObjectProperty *prop, const char *pBuffer, int nSize, ++ } ++ case AMF_DATE: ++ { ++- RTMP_Log(RTMP_LOGDEBUG, "AMF_DATE"); ++- ++ if (nSize < 10) ++ return -1; ++ ++ prop->p_vu.p_number = AMF_DecodeNumber(pBuffer); ++ prop->p_UTCoffset = AMF_DecodeInt16(pBuffer + 8); +++ RTMP_Log(RTMP_LOGDEBUG, "AMF_DATE: %f, UTC offset: %d", prop->p_vu.p_number, +++ prop->p_UTCoffset); ++ ++ nSize -= 10; ++ break; ++@@ -809,8 +825,8 @@ AMFProp_Dump(AMFObjectProperty *prop) ++ } ++ else ++ { ++- name.av_val = "no-name."; ++- name.av_len = sizeof("no-name.") - 1; +++ name.av_val = "no-name"; +++ name.av_len = sizeof ("no-name") - 1; ++ } ++ if (name.av_len > 18) ++ name.av_len = 18; ++@@ -1021,11 +1037,18 @@ AMF3_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bAMFData) ++ obj->o_props = NULL; ++ if (bAMFData) ++ { ++- if (*pBuffer != AMF3_OBJECT) ++- RTMP_Log(RTMP_LOGERROR, ++- "AMF3 Object encapsulated in AMF stream does not start with AMF3_OBJECT!"); ++- pBuffer++; ++- nSize--; +++ // Decode only if it's an AMF3 object +++ if (*pBuffer == AMF3_OBJECT) +++ { +++ pBuffer++; +++ nSize--; +++ } +++ else +++ { +++ RTMP_Log(RTMP_LOGERROR, "AMF3 Object encapsulated in AMF stream does not start with AMF3_OBJECT!"); +++ pBuffer += nOriginalSize; +++ return nOriginalSize; +++ } ++ } ++ ++ ref = 0; ++@@ -1043,8 +1066,12 @@ AMF3_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bAMFData) ++ { ++ int32_t classRef = (ref >> 1); ++ ++- AMF3ClassDef cd = { {0, 0} ++- }; +++ AMF3ClassDef cd; +++ cd.cd_name.av_len = 0; +++ cd.cd_name.av_val = 0; +++ cd.cd_externalizable = FALSE; +++ cd.cd_dynamic = TRUE; +++ cd.cd_num = 0; ++ AMFObjectProperty prop; ++ ++ if ((classRef & 0x1) == 0) ++@@ -1061,6 +1088,7 @@ AMF3_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bAMFData) ++ cd.cd_dynamic = ((classExtRef >> 1) & 0x1) == 1; ++ ++ cdnum = classExtRef >> 2; +++ cd.cd_num = cdnum; ++ ++ /* class name */ ++ ++@@ -1070,24 +1098,25 @@ AMF3_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bAMFData) ++ ++ /*std::string str = className; */ ++ ++- RTMP_Log(RTMP_LOGDEBUG, ++- "Class name: %s, externalizable: %d, dynamic: %d, classMembers: %d", ++- cd.cd_name.av_val, cd.cd_externalizable, cd.cd_dynamic, ++- cd.cd_num); +++ RTMP_Log(RTMP_LOGDEBUG, "Class name: %.*s, externalizable: %d, dynamic: %d, classMembers: %d", +++ cd.cd_name.av_len, cd.cd_name.av_val, cd.cd_externalizable, cd.cd_dynamic, cd.cd_num); ++ ++ for (i = 0; i < cdnum; i++) ++- { ++- AVal memberName; ++- if (nSize <=0) +++ { +++ AVal memberName = {NULL, 0}; +++ if (nSize <= 0) ++ { ++ invalid: ++ RTMP_Log(RTMP_LOGDEBUG, "%s, invalid class encoding!", ++ __FUNCTION__); ++ return nOriginalSize; ++- } ++- len = AMF3ReadString(pBuffer, &memberName); ++- RTMP_Log(RTMP_LOGDEBUG, "Member: %s", memberName.av_val); ++- AMF3CD_AddProp(&cd, &memberName); +++ } +++ len = AMF3ReadString(pBuffer, &memberName); +++ if (memberName.av_val) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "Member: %.*s", memberName.av_len, memberName.av_val); +++ AMF3CD_AddProp(&cd, &memberName); +++ } ++ nSize -= len; ++ pBuffer += len; ++ } ++@@ -1118,10 +1147,10 @@ invalid: ++ else ++ { ++ int nRes, i; ++- for (i = 0; i < cd.cd_num; i++) /* non-dynamic */ ++- { ++- if (nSize <=0) ++- goto invalid; +++ for (i = 0; i < cd.cd_num; i++) /* non-dynamic */ +++ { +++ if (nSize <= 0) +++ goto invalid; ++ nRes = AMF3Prop_Decode(&prop, pBuffer, nSize, FALSE); ++ if (nRes == -1) ++ RTMP_Log(RTMP_LOGDEBUG, "%s, failed to decode AMF3 property!", ++@@ -1138,9 +1167,9 @@ invalid: ++ int len = 0; ++ ++ do ++- { ++- if (nSize <=0) ++- goto invalid; +++ { +++ if (nSize <= 0) +++ goto invalid; ++ nRes = AMF3Prop_Decode(&prop, pBuffer, nSize, TRUE); ++ AMF_AddProp(obj, &prop); ++ ++@@ -1154,7 +1183,15 @@ invalid: ++ } ++ RTMP_Log(RTMP_LOGDEBUG, "class object!"); ++ } ++- return nOriginalSize - nSize; +++ +++ /** +++ * In case of switch to AMF3 serialization consume rest of the unprocessed +++ * packet data to make sure it's not later processed as AMF0 data. +++ */ +++ if (bAMFData) +++ return nOriginalSize; +++ else +++ return nOriginalSize - nSize; ++ } ++ ++ int ++@@ -1272,7 +1309,8 @@ AMF3CD_AddProp(AMF3ClassDef *cd, AVal *prop) ++ { ++ if (!(cd->cd_num & 0x0f)) ++ cd->cd_props = realloc(cd->cd_props, (cd->cd_num + 16) * sizeof(AVal)); ++- cd->cd_props[cd->cd_num++] = *prop; +++ if (cd->cd_props) +++ cd->cd_props[cd->cd_num++] = *prop; ++ } ++ ++ AVal * ++diff --git librtmp/handshake.h librtmp/handshake.h ++index 0438486..104af28 100644 ++--- librtmp/handshake.h +++++ librtmp/handshake.h ++@@ -707,7 +707,7 @@ HandShake(RTMP * r, int FP9HandShake) ++ uint32_t uptime; ++ ++ uint8_t clientbuf[RTMP_SIG_SIZE + 4], *clientsig=clientbuf+4; ++- uint8_t serversig[RTMP_SIG_SIZE], client2[RTMP_SIG_SIZE], *reply; +++ uint8_t serversig[RTMP_SIG_SIZE], serversig1[RTMP_SIG_SIZE], client2[RTMP_SIG_SIZE], *reply; ++ uint8_t type; ++ getoff *getdh = NULL, *getdig = NULL; ++ ++@@ -760,7 +760,7 @@ HandShake(RTMP * r, int FP9HandShake) ++ #else ++ ip = (int32_t *)(clientsig+8); ++ for (i = 2; i < RTMP_SIG_SIZE/4; i++) ++- *ip++ = rand(); +++ *ip++ = ((rand() & 0xFFFF) << 16) | (rand() & 0xFFFF); ++ #endif ++ ++ /* set handshake digest */ ++@@ -825,6 +825,8 @@ HandShake(RTMP * r, int FP9HandShake) ++ ++ if (ReadN(r, (char *)serversig, RTMP_SIG_SIZE) != RTMP_SIG_SIZE) ++ return FALSE; +++ if (ReadN(r, (char *) serversig1, RTMP_SIG_SIZE) != RTMP_SIG_SIZE) +++ return FALSE; ++ ++ /* decode server response */ ++ memcpy(&uptime, serversig, 4); ++@@ -834,7 +836,7 @@ HandShake(RTMP * r, int FP9HandShake) ++ RTMP_Log(RTMP_LOGDEBUG, "%s: FMS Version : %d.%d.%d.%d", __FUNCTION__, serversig[4], ++ serversig[5], serversig[6], serversig[7]); ++ ++- if (FP9HandShake && type == 3 && !serversig[4]) +++ if (FP9HandShake && type == 3 && (!serversig[4] || !serversig1[4])) ++ FP9HandShake = FALSE; ++ ++ #ifdef _DEBUG ++@@ -914,7 +916,7 @@ HandShake(RTMP * r, int FP9HandShake) ++ #else ++ ip = (int32_t *)reply; ++ for (i = 0; i < RTMP_SIG_SIZE/4; i++) ++- *ip++ = rand(); +++ *ip++ = ((rand() & 0xFFFF) << 16) | (rand() & 0xFFFF); ++ #endif ++ /* calculate response now */ ++ signatureResp = reply+RTMP_SIG_SIZE-SHA256_DIGEST_LENGTH; ++@@ -965,16 +967,22 @@ HandShake(RTMP * r, int FP9HandShake) ++ __FUNCTION__); ++ RTMP_LogHex(RTMP_LOGDEBUG, reply, RTMP_SIG_SIZE); ++ #endif ++- if (!WriteN(r, (char *)reply, RTMP_SIG_SIZE)) ++- return FALSE; ++- ++- /* 2nd part of handshake */ ++- if (ReadN(r, (char *)serversig, RTMP_SIG_SIZE) != RTMP_SIG_SIZE) ++- return FALSE; +++ if (r->Link.CombineConnectPacket) +++ { +++ char *HandshakeResponse = malloc(RTMP_SIG_SIZE); +++ memcpy(HandshakeResponse, (char *) reply, RTMP_SIG_SIZE); +++ r->Link.HandshakeResponse.av_val = HandshakeResponse; +++ r->Link.HandshakeResponse.av_len = RTMP_SIG_SIZE; +++ } +++ else +++ { +++ if (!WriteN(r, (char *) reply, RTMP_SIG_SIZE)) +++ return FALSE; +++ } ++ ++ #ifdef _DEBUG ++ RTMP_Log(RTMP_LOGDEBUG, "%s: 2nd handshake: ", __FUNCTION__); ++- RTMP_LogHex(RTMP_LOGDEBUG, serversig, RTMP_SIG_SIZE); +++ RTMP_LogHex(RTMP_LOGDEBUG, serversig1, RTMP_SIG_SIZE); ++ #endif ++ ++ if (FP9HandShake) ++@@ -982,21 +990,21 @@ HandShake(RTMP * r, int FP9HandShake) ++ uint8_t signature[SHA256_DIGEST_LENGTH]; ++ uint8_t digest[SHA256_DIGEST_LENGTH]; ++ ++- if (serversig[4] == 0 && serversig[5] == 0 && serversig[6] == 0 ++- && serversig[7] == 0) +++ if (serversig1[4] == 0 && serversig1[5] == 0 && serversig1[6] == 0 +++ && serversig1[7] == 0) ++ { ++ RTMP_Log(RTMP_LOGDEBUG, ++ "%s: Wait, did the server just refuse signed authentication?", ++ __FUNCTION__); ++ } ++ RTMP_Log(RTMP_LOGDEBUG, "%s: Server sent signature:", __FUNCTION__); ++- RTMP_LogHex(RTMP_LOGDEBUG, &serversig[RTMP_SIG_SIZE - SHA256_DIGEST_LENGTH], +++ RTMP_LogHex(RTMP_LOGDEBUG, &serversig1[RTMP_SIG_SIZE - SHA256_DIGEST_LENGTH], ++ SHA256_DIGEST_LENGTH); ++ ++ /* verify server response */ ++ HMACsha256(&clientsig[digestPosClient], SHA256_DIGEST_LENGTH, ++ GenuineFMSKey, sizeof(GenuineFMSKey), digest); ++- HMACsha256(serversig, RTMP_SIG_SIZE - SHA256_DIGEST_LENGTH, digest, +++ HMACsha256(serversig1, RTMP_SIG_SIZE - SHA256_DIGEST_LENGTH, digest, ++ SHA256_DIGEST_LENGTH, signature); ++ ++ /* show some information */ ++@@ -1024,7 +1032,7 @@ HandShake(RTMP * r, int FP9HandShake) ++ RTMP_Log(RTMP_LOGDEBUG, "%s: Signature calculated:", __FUNCTION__); ++ RTMP_LogHex(RTMP_LOGDEBUG, signature, SHA256_DIGEST_LENGTH); ++ if (memcmp ++- (signature, &serversig[RTMP_SIG_SIZE - SHA256_DIGEST_LENGTH], +++ (signature, &serversig1[RTMP_SIG_SIZE - SHA256_DIGEST_LENGTH], ++ SHA256_DIGEST_LENGTH) != 0) ++ { ++ RTMP_Log(RTMP_LOGWARNING, "%s: Server not genuine Adobe!", __FUNCTION__); ++@@ -1057,7 +1065,7 @@ HandShake(RTMP * r, int FP9HandShake) ++ } ++ else ++ { ++- if (memcmp(serversig, clientsig, RTMP_SIG_SIZE) != 0) +++ if (memcmp(serversig1, clientsig, RTMP_SIG_SIZE) != 0) ++ { ++ RTMP_Log(RTMP_LOGWARNING, "%s: client signature does not match!", ++ __FUNCTION__); ++@@ -1099,7 +1107,7 @@ SHandShake(RTMP * r) ++ { ++ encrypted = FALSE; ++ } ++- else if (type == 6 || type == 8) +++ else if (type == 6 || type == 8 || type == 9) ++ { ++ offalg = 1; ++ encrypted = TRUE; ++@@ -1148,7 +1156,7 @@ SHandShake(RTMP * r) ++ #else ++ ip = (int32_t *)(serversig+8); ++ for (i = 2; i < RTMP_SIG_SIZE/4; i++) ++- *ip++ = rand(); +++ *ip++ = ((rand() & 0xFFFF) << 16) | (rand() & 0xFFFF); ++ #endif ++ ++ /* set handshake digest */ ++diff --git librtmp/hashswf.c librtmp/hashswf.c ++index 9f4e2c0..01b97e2 100644 ++--- librtmp/hashswf.c +++++ librtmp/hashswf.c ++@@ -70,7 +70,7 @@ extern TLS_CTX RTMP_TLS_ctx; ++ ++ #endif /* CRYPTO */ ++ ++-#define AGENT "Mozilla/5.0" +++#define AGENT "Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0" ++ ++ HTTPResult ++ HTTP_get(struct HTTP_ctx *http, const char *url, HTTP_read_callback *cb) ++@@ -116,6 +116,8 @@ HTTP_get(struct HTTP_ctx *http, const char *url, HTTP_read_callback *cb) ++ ++ host = p1 + 3; ++ path = strchr(host, '/'); +++ if (!path) +++ return HTTPRES_BAD_REQUEST; ++ hlen = path - host; ++ strncpy(hbuf, host, hlen); ++ hbuf[hlen] = '\0'; ++@@ -200,7 +202,7 @@ HTTP_get(struct HTTP_ctx *http, const char *url, HTTP_read_callback *cb) ++ } ++ ++ p1 = strchr(sb.sb_buf, ' '); ++- rc = atoi(p1 + 1); +++ rc = p1 ? atoi(p1 + 1) : 400; ++ http->status = rc; ++ ++ if (rc >= 300) ++@@ -379,13 +381,13 @@ make_unix_time(char *s) ++ if (fmt) ++ { ++ /* Day, DD-MMM-YYYY HH:MM:SS GMT */ ++- time.tm_mday = strtol(n + 1, &n, 0); +++ time.tm_mday = strtol(n + 1, &n, 10); ++ month = n + 1; ++ n = strchr(month, ' '); ++- time.tm_year = strtol(n + 1, &n, 0); ++- time.tm_hour = strtol(n + 1, &n, 0); ++- time.tm_min = strtol(n + 1, &n, 0); ++- time.tm_sec = strtol(n + 1, NULL, 0); +++ time.tm_year = strtol(n + 1, &n, 10); +++ time.tm_hour = strtol(n + 1, &n, 10); +++ time.tm_min = strtol(n + 1, &n, 10); +++ time.tm_sec = strtol(n + 1, NULL, 10); ++ } ++ else ++ { ++@@ -395,11 +397,11 @@ make_unix_time(char *s) ++ n = strchr(month, ' '); ++ while (isspace(*n)) ++ n++; ++- time.tm_mday = strtol(n, &n, 0); ++- time.tm_hour = strtol(n + 1, &n, 0); ++- time.tm_min = strtol(n + 1, &n, 0); ++- time.tm_sec = strtol(n + 1, &n, 0); ++- time.tm_year = strtol(n + 1, NULL, 0); +++ time.tm_mday = strtol(n, &n, 10); +++ time.tm_hour = strtol(n + 1, &n, 10); +++ time.tm_min = strtol(n + 1, &n, 10); +++ time.tm_sec = strtol(n + 1, &n, 10); +++ time.tm_year = strtol(n + 1, NULL, 10); ++ } ++ if (time.tm_year > 100) ++ time.tm_year -= ysub; ++@@ -528,9 +530,11 @@ RTMP_HashSWF(const char *url, unsigned int *size, unsigned char *hash, ++ ++ if (strncmp(buf, "url: ", 5)) ++ continue; ++- if (strncmp(buf + 5, url, hlen)) +++ if (strncmp(buf + 5, url, strlen(buf + 5) - 1)) ++ continue; ++ r1 = strrchr(buf, '/'); +++ if (!r1) +++ continue; ++ i = strlen(r1); ++ r1[--i] = '\0'; ++ if (strncmp(r1, file, i)) ++@@ -640,7 +644,7 @@ RTMP_HashSWF(const char *url, unsigned int *size, unsigned char *hash, ++ HMAC_finish(in.ctx, hash, hlen); ++ *size = in.size; ++ ++- fprintf(f, "date: %s\n", date); +++ fprintf(f, "date: %s\n", date[0] ? date : cctim); ++ fprintf(f, "size: %08x\n", in.size); ++ fprintf(f, "hash: "); ++ for (i = 0; i < SHA256_DIGEST_LENGTH; i++) ++diff --git librtmp/log.c librtmp/log.c ++index 1b52000..7564a15 100644 ++--- librtmp/log.c +++++ librtmp/log.c ++@@ -52,8 +52,8 @@ static void rtmp_log_default(int level, const char *format, va_list vl) ++ vsnprintf(str, MAX_PRINT_LEN-1, format, vl); ++ ++ /* Filter out 'no-name' */ ++- if ( RTMP_debuglevelav_val = p; ++ app->av_len = applen; ++ RTMP_Log(RTMP_LOGDEBUG, "Parsed app : %.*s", applen, p); ++diff --git librtmp/rtmp.c librtmp/rtmp.c ++index ca7db6a..c652cff 100644 ++--- librtmp/rtmp.c +++++ librtmp/rtmp.c ++@@ -28,6 +28,7 @@ ++ #include ++ #include ++ #include +++#include ++ ++ #include "rtmp_sys.h" ++ #include "log.h" ++@@ -68,6 +69,7 @@ TLS_CTX RTMP_TLS_ctx; ++ ++ #define RTMP_SIG_SIZE 1536 ++ #define RTMP_LARGE_HEADER_SIZE 12 +++#define HEX2BIN(a) (((a)&0x40)?((a)&0xf)+9:((a)&0xf)) ++ ++ static const int packetSize[] = { 12, 8, 4, 1 }; ++ ++@@ -108,18 +110,25 @@ typedef enum { ++ RTMPT_OPEN=0, RTMPT_SEND, RTMPT_IDLE, RTMPT_CLOSE ++ } RTMPTCmd; ++ +++static int ConnectSocket(RTMP *r); ++ static int DumpMetaData(AMFObject *obj); ++ static int HandShake(RTMP *r, int FP9HandShake); ++ static int SocksNegotiate(RTMP *r); ++ +++static int SendBytesReceived(RTMP *r); +++static int SendCommand(RTMP *r, char *method, int queue); ++ static int SendConnectPacket(RTMP *r, RTMPPacket *cp); ++ static int SendCheckBW(RTMP *r); ++ static int SendCheckBWResult(RTMP *r, double txn); ++ static int SendDeleteStream(RTMP *r, double dStreamId); ++ static int SendFCSubscribe(RTMP *r, AVal *subscribepath); +++static int SendGetStreamLength(RTMP *r); +++static int SendInvoke(RTMP *r, AVal *command, int queue); ++ static int SendPlay(RTMP *r); ++-static int SendBytesReceived(RTMP *r); ++ static int SendUsherToken(RTMP *r, AVal *usherToken); +++static void TransformRot13(AMFObject *obj, AVal *rindex, AVal *r); +++static void __TeaCrypt(uint32_t *block, uint32_t len, uint32_t *key); +++static AVal TeaEncrypt(AVal *srcData, AVal *srcKey); ++ ++ #if 0 /* unused */ ++ static int SendBGHasStream(RTMP *r, double dId, AVal *playpath); ++@@ -338,10 +347,15 @@ RTMP_Init(RTMP *r) ++ r->m_nClientBW = 2500000; ++ r->m_nClientBW2 = 2; ++ r->m_nServerBW = 2500000; ++- r->m_fAudioCodecs = 3191.0; +++ r->m_fAudioCodecs = 3575.0; ++ r->m_fVideoCodecs = 252.0; +++ r->m_fEncoding = 3.0; ++ r->Link.timeout = 30; ++ r->Link.swfAge = 30; +++ r->Link.CombineConnectPacket = TRUE; +++ r->Link.ConnectPacket = FALSE; +++ r->Link.publishId = 0; +++ r->Link.dynamicPublish = FALSE; ++ } ++ ++ void ++@@ -359,6 +373,8 @@ RTMP_GetDuration(RTMP *r) ++ int ++ RTMP_IsConnected(RTMP *r) ++ { +++ if (r->m_sb.sb_size > 0) +++ return TRUE; ++ return r->m_sb.sb_socket != -1; ++ } ++ ++@@ -445,6 +461,8 @@ RTMP_SetupStream(RTMP *r, ++ AVal *flashVer, ++ AVal *subscribepath, ++ AVal *usherToken, +++ AVal *WeebToken, +++ AVal *ccomm, ++ int dStart, ++ int dStop, int bLiveStream, long int timeout) ++ { ++@@ -467,6 +485,8 @@ RTMP_SetupStream(RTMP *r, ++ RTMP_Log(RTMP_LOGDEBUG, "subscribepath : %s", subscribepath->av_val); ++ if (usherToken && usherToken->av_val) ++ RTMP_Log(RTMP_LOGDEBUG, "NetStream.Authenticate.UsherToken : %s", usherToken->av_val); +++ if (WeebToken && WeebToken->av_val) +++ RTMP_Log(RTMP_LOGDEBUG, "WeebToken: %s", WeebToken->av_val); ++ if (flashVer && flashVer->av_val) ++ RTMP_Log(RTMP_LOGDEBUG, "flashVer : %s", flashVer->av_val); ++ if (dStart > 0) ++@@ -515,6 +535,10 @@ RTMP_SetupStream(RTMP *r, ++ r->Link.subscribepath = *subscribepath; ++ if (usherToken && usherToken->av_len) ++ r->Link.usherToken = *usherToken; +++ if (WeebToken && WeebToken->av_len) +++ r->Link.WeebToken = *WeebToken; +++ if (ccomm && ccomm->av_len) +++ r->Link.ccomm = *ccomm; ++ r->Link.seekTime = dStart; ++ r->Link.stopTime = dStop; ++ if (bLiveStream) ++@@ -572,14 +596,24 @@ static struct urlopt { ++ "Stream is live, no seeking possible" }, ++ { AVC("subscribe"), OFF(Link.subscribepath), OPT_STR, 0, ++ "Stream to subscribe to" }, ++- { AVC("jtv"), OFF(Link.usherToken), OPT_STR, 0, ++- "Justin.tv authentication token" }, ++- { AVC("token"), OFF(Link.token), OPT_STR, 0, +++ { AVC("jtv"), OFF(Link.usherToken), OPT_STR, 0, +++ "Justin.tv authentication token"}, +++ { AVC("weeb"), OFF(Link.WeebToken), OPT_STR, 0, +++ "Weeb.tv authentication token"}, +++ { AVC("token"), OFF(Link.token), OPT_STR, 0, ++ "Key for SecureToken response" }, +++ { AVC("ccommand"), OFF(Link.ccomm), OPT_STR, 0, +++ "Send custom command before play" }, ++ { AVC("swfVfy"), OFF(Link.lFlags), OPT_BOOL, RTMP_LF_SWFV, ++ "Perform SWF Verification" }, ++ { AVC("swfAge"), OFF(Link.swfAge), OPT_INT, 0, ++ "Number of days to use cached SWF hash" }, +++#ifdef CRYPTO +++ { AVC("swfsize"), OFF(Link.swfSize), OPT_INT, 0, +++ "Size of the decompressed SWF file"}, +++ { AVC("swfhash"), OFF(Link.swfHash), OPT_STR, 0, +++ "SHA256 hash of the decompressed SWF file"}, +++#endif ++ { AVC("start"), OFF(Link.seekTime), OPT_INT, 0, ++ "Stream start position in milliseconds" }, ++ { AVC("stop"), OFF(Link.stopTime), OPT_INT, 0, ++@@ -685,6 +719,9 @@ parseAMF(AMFObject *obj, AVal *av, int *depth) ++ case 'O': ++ prop.p_type = AMF_OBJECT; ++ break; +++ case 'Z': +++ prop.p_type = AMF_NULL; +++ break; ++ default: ++ return -1; ++ } ++@@ -722,7 +759,7 @@ int RTMP_SetOpt(RTMP *r, const AVal *opt, AVal *arg) ++ *aptr = *arg; } ++ break; ++ case OPT_INT: { ++- long l = strtol(arg->av_val, NULL, 0); +++ long l = strtol(arg->av_val, NULL, 10); ++ *(int *)v = l; } ++ break; ++ case OPT_BOOL: { ++@@ -767,7 +804,7 @@ int RTMP_SetupURL(RTMP *r, char *url) ++ if (!ret) ++ return ret; ++ r->Link.port = port; ++- r->Link.playpath = r->Link.playpath0; +++ r->Link.playpath = AVcopy(r->Link.playpath0); ++ ++ while (ptr) { ++ *ptr++ = '\0'; ++@@ -844,9 +881,16 @@ int RTMP_SetupURL(RTMP *r, char *url) ++ } ++ ++ #ifdef CRYPTO ++- if ((r->Link.lFlags & RTMP_LF_SWFV) && r->Link.swfUrl.av_len) ++- RTMP_HashSWF(r->Link.swfUrl.av_val, &r->Link.SWFSize, ++- (unsigned char *)r->Link.SWFHash, r->Link.swfAge); +++ RTMP_Log(RTMP_LOGDEBUG, "Khalsa: %d %d %s", r->Link.swfSize, r->Link.swfHash.av_len, r->Link.swfHash.av_val); +++ if (r->Link.swfSize && r->Link.swfHash.av_len) +++ { +++ int i, j = 0; +++ for (i = 0; i < r->Link.swfHash.av_len; i += 2) +++ r->Link.SWFHash[j++] = (HEX2BIN(r->Link.swfHash.av_val[i]) << 4) | HEX2BIN(r->Link.swfHash.av_val[i + 1]); +++ r->Link.SWFSize = (uint32_t) r->Link.swfSize; +++ } +++ else if ((r->Link.lFlags & RTMP_LF_SWFV) && r->Link.swfUrl.av_len) +++ RTMP_HashSWF(r->Link.swfUrl.av_val, &r->Link.SWFSize, (unsigned char *) r->Link.SWFHash, r->Link.swfAge); ++ #endif ++ ++ SocksSetup(r, &r->Link.sockshost); ++@@ -949,6 +993,8 @@ RTMP_Connect0(RTMP *r, struct sockaddr * service) ++ } ++ ++ setsockopt(r->m_sb.sb_socket, IPPROTO_TCP, TCP_NODELAY, (char *) &on, sizeof(on)); +++ if (r->Link.protocol & RTMP_FEATURE_HTTP) +++ setsockopt(r->m_sb.sb_socket, SOL_SOCKET, SO_KEEPALIVE, (char *) &on, sizeof (on)); ++ ++ return TRUE; ++ } ++@@ -1399,41 +1445,96 @@ ReadN(RTMP *r, char *buffer, int n) ++ ptr = buffer; ++ while (n > 0) ++ { ++- int nBytes = 0, nRead; +++ int nBytes = 0, nRead, status = 0, retries = 0; ++ if (r->Link.protocol & RTMP_FEATURE_HTTP) ++ { ++- int refill = 0; ++- while (!r->m_resplen) ++- { ++- int ret; ++- if (r->m_sb.sb_size < 13 || refill) ++- { ++- if (!r->m_unackd) ++- HTTP_Post(r, RTMPT_IDLE, "", 1); ++- if (RTMPSockBuf_Fill(&r->m_sb) < 1) ++- { ++- if (!r->m_sb.sb_timedout) ++- RTMP_Close(r); ++- return 0; ++- } ++- } ++- if ((ret = HTTP_read(r, 0)) == -1) ++- { ++- RTMP_Log(RTMP_LOGDEBUG, "%s, No valid HTTP response found", __FUNCTION__); ++- RTMP_Close(r); ++- return 0; ++- } ++- else if (ret == -2) +++ while (!r->m_resplen) +++ { +++ /* Refill if socket buffer is empty */ +++ if (!r->m_sb.sb_size) ++ { ++- refill = 1; +++ if (retries > 30) +++ { +++ RTMP_Close(r); +++ return 0; +++ } +++ +++ if (!r->m_unackd) +++ { +++ if (retries > 0) +++ { +++ HTTP_Post(r, RTMPT_IDLE, "", 1); +++ r->m_unackd = TRUE; +++ } +++ retries++; +++ +++ if (!r->m_bPlaying) +++ sleep(.25); +++ } +++ +++ RTMP_Log(RTMP_LOGDEBUG, "Trying to fill HTTP buffer, Retries: %d", retries); +++ status = RTMPSockBuf_Fill(&r->m_sb); +++ /* Reconnect socket when closed by some moronic servers after +++ * every HTTP data packet */ +++ if (status < 1) +++ { +++ /* Close connection on connection reset */ +++ if (status == -1) +++ { +++ RTMP_Close(r); +++ return 0; +++ } +++ +++ RTMP_Log(RTMP_LOGDEBUG, "Reconnecting socket, Status: %d", status); +++ if (ConnectSocket(r)) +++ { +++ HTTP_Post(r, RTMPT_IDLE, "", 1); +++ r->m_unackd = TRUE; +++ retries++; +++ } +++ else +++ { +++ RTMP_Close(r); +++ return 0; +++ } +++ } ++ } ++- else +++ +++ RTMP_Log(RTMP_LOGDEBUG, "Trying to read HTTP response, Bytes Available: %d", r->m_sb.sb_size); +++ status = HTTP_read(r, 0); +++ if (status == -1) ++ { ++- refill = 0; +++ RTMP_Log(RTMP_LOGDEBUG, "%s, No valid HTTP response found", __FUNCTION__); +++ RTMP_Close(r); +++ return 0; ++ } ++- } ++- if (r->m_resplen && !r->m_sb.sb_size) ++- RTMPSockBuf_Fill(&r->m_sb); +++ else if (status == -2) +++ { +++ if (RTMPSockBuf_Fill(&r->m_sb) < 1) +++ if (!r->m_sb.sb_timedout) +++ { +++ RTMP_Close(r); +++ return 0; +++ } +++ } +++ else if (status == -3) +++ { +++ RTMP_Close(r); +++ return 0; +++ } +++ else +++ r->m_unackd = FALSE; +++ } +++ +++ /* Refill when there is still some data to be read and socket buffer +++ * is empty */ +++ if (r->m_resplen && (!r->m_sb.sb_size)) +++ { +++ if (RTMPSockBuf_Fill(&r->m_sb) < 1) +++ if (!r->m_sb.sb_timedout) +++ RTMP_Close(r); +++ } +++ ++ avail = r->m_sb.sb_size; ++ if (avail > r->m_resplen) ++ avail = r->m_resplen; ++@@ -1460,10 +1561,11 @@ ReadN(RTMP *r, char *buffer, int n) ++ r->m_sb.sb_size -= nRead; ++ nBytes = nRead; ++ r->m_nBytesIn += nRead; ++- if (r->m_bSendCounter ++- && r->m_nBytesIn > ( r->m_nBytesInSent + r->m_nClientBW / 10)) ++- if (!SendBytesReceived(r)) ++- return FALSE; +++ if (r->m_nBytesIn > 0xF0000000) +++ r->m_nBytesIn -= 0xF0000000; +++ if (r->m_bSendCounter && (r->m_nBytesIn > (r->m_nBytesInSent + r->m_nClientBW / 10))) +++ if (!SendBytesReceived(r)) +++ return FALSE; ++ } ++ /*RTMP_Log(RTMP_LOGDEBUG, "%s: %d bytes\n", __FUNCTION__, nBytes); */ ++ #ifdef _DEBUG ++@@ -1474,7 +1576,8 @@ ReadN(RTMP *r, char *buffer, int n) ++ { ++ RTMP_Log(RTMP_LOGDEBUG, "%s, RTMP socket closed by peer", __FUNCTION__); ++ /*goto again; */ ++- RTMP_Close(r); +++ if (!r->m_sb.sb_timedout) +++ RTMP_Close(r); ++ break; ++ } ++ ++@@ -1499,6 +1602,7 @@ static int ++ WriteN(RTMP *r, const char *buffer, int n) ++ { ++ const char *ptr = buffer; +++ char *ConnectPacket = 0; ++ #ifdef CRYPTO ++ char *encrypted = 0; ++ char buf[RTMP_BUFFER_CACHE_SIZE]; ++@@ -1514,6 +1618,15 @@ WriteN(RTMP *r, const char *buffer, int n) ++ } ++ #endif ++ +++ if (r->Link.ConnectPacket) +++ { +++ char *ConnectPacket = malloc(r->Link.HandshakeResponse.av_len + n); +++ memcpy(ConnectPacket, r->Link.HandshakeResponse.av_val, r->Link.HandshakeResponse.av_len); +++ memcpy(ConnectPacket + r->Link.HandshakeResponse.av_len, ptr, n); +++ ptr = ConnectPacket; +++ n += r->Link.HandshakeResponse.av_len; +++ } +++ ++ while (n > 0) ++ { ++ int nBytes; ++@@ -1550,6 +1663,14 @@ WriteN(RTMP *r, const char *buffer, int n) ++ free(encrypted); ++ #endif ++ +++ if (r->Link.ConnectPacket) +++ { +++ if (r->Link.HandshakeResponse.av_val) +++ free(r->Link.HandshakeResponse.av_val); +++ free(ConnectPacket); +++ r->Link.ConnectPacket = FALSE; +++ } +++ ++ return n == 0; ++ } ++ ++@@ -1579,6 +1700,9 @@ SendConnectPacket(RTMP *r, RTMPPacket *cp) ++ char pbuf[4096], *pend = pbuf + sizeof(pbuf); ++ char *enc; ++ +++ if (r->Link.CombineConnectPacket) +++ r->Link.ConnectPacket = TRUE; +++ ++ if (cp) ++ return RTMP_SendPacket(r, cp, TRUE); ++ ++@@ -1627,7 +1751,7 @@ SendConnectPacket(RTMP *r, RTMPPacket *cp) ++ enc = AMF_EncodeNamedBoolean(enc, pend, &av_fpad, FALSE); ++ if (!enc) ++ return FALSE; ++- enc = AMF_EncodeNamedNumber(enc, pend, &av_capabilities, 15.0); +++ enc = AMF_EncodeNamedNumber(enc, pend, &av_capabilities, 239.0); ++ if (!enc) ++ return FALSE; ++ enc = AMF_EncodeNamedNumber(enc, pend, &av_audioCodecs, r->m_fAudioCodecs); ++@@ -1791,7 +1915,7 @@ SendUsherToken(RTMP *r, AVal *usherToken) ++ packet.m_hasAbsTimestamp = 0; ++ packet.m_body = pbuf + RTMP_MAX_HEADER_SIZE; ++ ++- RTMP_Log(RTMP_LOGDEBUG, "UsherToken: %s", usherToken->av_val); +++ RTMP_Log(RTMP_LOGDEBUG, "UsherToken: %.*s", usherToken->av_len, usherToken->av_val); ++ enc = packet.m_body; ++ enc = AMF_EncodeString(enc, pend, &av_NetStream_Authenticate_UsherToken); ++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); ++@@ -1934,6 +2058,26 @@ SendPublish(RTMP *r) ++ return RTMP_SendPacket(r, &packet, TRUE); ++ } ++ +++static int +++SendDynamicPublish(RTMP *r, double publishId) +++{ +++ char pbuf[1024], *pend = pbuf + sizeof (pbuf), *enc; +++ AVal av_command, av_publishId; +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_publish); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ av_publishId.av_val = malloc(128 * sizeof (char)); +++ av_publishId.av_len = sprintf(av_publishId.av_val, "%.0f", publishId); +++ enc = AMF_EncodeString(enc, pend, &av_publishId); +++ enc = AMF_EncodeString(enc, pend, &av_live); +++ av_command.av_val = pbuf; +++ av_command.av_len = enc - pbuf; +++ +++ return SendInvoke(r, &av_command, FALSE); +++} +++ ++ SAVC(deleteStream); ++ ++ static int ++@@ -2097,6 +2241,7 @@ SendBytesReceived(RTMP *r) ++ } ++ ++ SAVC(_checkbw); +++SAVC(checkBandwidth); ++ ++ static int ++ SendCheckBW(RTMP *r) ++@@ -2114,7 +2259,7 @@ SendCheckBW(RTMP *r) ++ packet.m_body = pbuf + RTMP_MAX_HEADER_SIZE; ++ ++ enc = packet.m_body; ++- enc = AMF_EncodeString(enc, pend, &av__checkbw); +++ enc = AMF_EncodeString(enc, pend, &av_checkBandwidth); ++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); ++ *enc++ = AMF_NULL; ++ ++@@ -2221,10 +2366,8 @@ SendPlay(RTMP *r) ++ enc = AMF_EncodeNumber(enc, pend, -1000.0); ++ else ++ { ++- if (r->Link.seekTime > 0.0) ++- enc = AMF_EncodeNumber(enc, pend, r->Link.seekTime); /* resume from here */ ++- else ++- enc = AMF_EncodeNumber(enc, pend, 0.0); /*-2000.0);*/ /* recorded as default, -2000.0 is not reliable since that freezes the player if the stream is not found */ +++ if (r->Link.seekTime > 0.0 || r->Link.stopTime) +++ enc = AMF_EncodeNumber(enc, pend, r->Link.seekTime); /* resume from here */ ++ } ++ if (!enc) ++ return FALSE; ++@@ -2340,7 +2483,7 @@ RTMP_SendCtrl(RTMP *r, short nType, unsigned int nObject, unsigned int nTime) ++ int nSize; ++ char *buf; ++ ++- RTMP_Log(RTMP_LOGDEBUG, "sending ctrl. type: 0x%04x", (unsigned short)nType); +++ RTMP_Log(RTMP_LOGDEBUG, "sending ctrl, type: 0x%04x", (unsigned short)nType); ++ ++ packet.m_nChannel = 0x02; /* control channel (ping) */ ++ packet.m_headerType = RTMP_PACKET_SIZE_MEDIUM; ++@@ -2372,8 +2515,8 @@ RTMP_SendCtrl(RTMP *r, short nType, unsigned int nObject, unsigned int nTime) ++ } ++ else if (nType == 0x1A) ++ { ++- *buf = nObject & 0xff; ++- } +++ *buf = nObject & 0xff; +++ } ++ else ++ { ++ if (nSize > 2) ++@@ -2873,6 +3016,7 @@ PublisherAuth(RTMP *r, AVal *description) ++ #endif ++ ++ +++SAVC(onBWCheck); ++ SAVC(onBWDone); ++ SAVC(onFCSubscribe); ++ SAVC(onFCUnsubscribe); ++@@ -2885,24 +3029,25 @@ SAVC(level); ++ SAVC(description); ++ SAVC(onStatus); ++ SAVC(playlist_ready); +++SAVC(cps); +++SAVC(disneyToken); +++SAVC(getStreamLength); +++SAVC(sendStatus); +++SAVC(verifyClient); ++ static const AVal av_NetStream_Failed = AVC("NetStream.Failed"); ++ static const AVal av_NetStream_Play_Failed = AVC("NetStream.Play.Failed"); ++-static const AVal av_NetStream_Play_StreamNotFound = ++-AVC("NetStream.Play.StreamNotFound"); ++-static const AVal av_NetConnection_Connect_InvalidApp = ++-AVC("NetConnection.Connect.InvalidApp"); +++static const AVal av_NetStream_Play_StreamNotFound = AVC("NetStream.Play.StreamNotFound"); +++static const AVal av_NetConnection_Connect_InvalidApp = AVC("NetConnection.Connect.InvalidApp"); ++ static const AVal av_NetStream_Play_Start = AVC("NetStream.Play.Start"); ++ static const AVal av_NetStream_Play_Complete = AVC("NetStream.Play.Complete"); ++ static const AVal av_NetStream_Play_Stop = AVC("NetStream.Play.Stop"); ++ static const AVal av_NetStream_Seek_Notify = AVC("NetStream.Seek.Notify"); ++ static const AVal av_NetStream_Pause_Notify = AVC("NetStream.Pause.Notify"); ++-static const AVal av_NetStream_Play_PublishNotify = ++-AVC("NetStream.Play.PublishNotify"); ++-static const AVal av_NetStream_Play_UnpublishNotify = ++-AVC("NetStream.Play.UnpublishNotify"); +++static const AVal av_NetStream_Play_PublishNotify = AVC("NetStream.Play.PublishNotify"); +++static const AVal av_NetStream_Play_UnpublishNotify = AVC("NetStream.Play.UnpublishNotify"); ++ static const AVal av_NetStream_Publish_Start = AVC("NetStream.Publish.Start"); ++-static const AVal av_NetConnection_Connect_Rejected = ++-AVC("NetConnection.Connect.Rejected"); +++static const AVal av_NetConnection_Connect_Rejected = AVC("NetConnection.Connect.Rejected"); +++static const AVal av_NetConnection_confStream = AVC("NetConnection.confStream"); ++ ++ /* Returns 0 for OK/Failed/error, 1 for 'Stop or Complete' */ ++ static int ++@@ -2912,6 +3057,11 @@ HandleInvoke(RTMP *r, const char *body, unsigned int nBodySize) ++ AVal method; ++ double txn; ++ int ret = 0, nRes; +++ char pbuf[512], *pend = pbuf + sizeof (pbuf), *enc, **params = NULL; +++ char *host = r->Link.hostname.av_len ? r->Link.hostname.av_val : ""; +++ char *pageUrl = r->Link.pageUrl.av_len ? r->Link.pageUrl.av_val : ""; +++ int param_count; +++ AVal av_Command, av_Response; ++ if (body[0] != 0x02) /* make sure it is a string method name we start with */ ++ { ++ RTMP_Log(RTMP_LOGWARNING, "%s, Sanity failed. no string method in invoke packet", ++@@ -2952,7 +3102,14 @@ HandleInvoke(RTMP *r, const char *body, unsigned int nBodySize) ++ RTMP_Log(RTMP_LOGDEBUG, "%s, received result for method call <%s>", __FUNCTION__, ++ methodInvoked.av_val); ++ ++- if (AVMATCH(&methodInvoked, &av_connect)) +++ if ((r->Link.dynamicPublish == TRUE) && AVMATCH(&methodInvoked, &r->Link.dynamicCommand)) +++ { +++ r->Link.dynamicPublish = FALSE; +++ r->Link.publishId = AMFProp_GetNumber(AMF_GetProp(&obj, NULL, 3)); +++ RTMP_Log(RTMP_LOGDEBUG, "server returned dynamic publish id: %.0f", r->Link.publishId); +++ RTMP_SendCreateStream(r); +++ } +++ else if (AVMATCH(&methodInvoked, &av_connect)) ++ { ++ if (r->Link.token.av_len) ++ { ++@@ -2973,46 +3130,360 @@ HandleInvoke(RTMP *r, const char *body, unsigned int nBodySize) ++ RTMP_SendServerBW(r); ++ RTMP_SendCtrl(r, 3, 0, 300); ++ } ++- RTMP_SendCreateStream(r); +++ if (r->Link.ccomm.av_len) +++ { +++ param_count = strsplit(r->Link.ccomm.av_val, FALSE, ';', ¶ms); +++ if ((param_count > 1) && (strcasecmp(params[1], "TRUE") == 0)) +++ SendCommand(r, params[0], TRUE); +++ else +++ SendCommand(r, params[0], FALSE); +++ if ((param_count > 2) && (strcasecmp(params[2], "TRUE") == 0)) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "overriding inbuilt dynamic publish command with -K (ccommand) switch"); +++ r->Link.dynamicPublish = TRUE; +++ r->Link.dynamicCommand.av_val = params[0]; +++ r->Link.dynamicCommand.av_len = strlen(params[0]); +++ } +++ else +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "overriding inbuilt site specific authentication with -K (ccommand) switch"); +++ r->Link.dynamicPublish = FALSE; +++ RTMP_SendCreateStream(r); +++ } +++ } +++ else if (strstr(host, "3dbuzz.com") || strstr(pageUrl, "3dbuzz.com")) +++ { +++ AVal r1, r3; +++ AVal av_r1 = AVC("r1"); +++ AVal av_r3 = AVC("r3"); +++ AVal r1_key = AVC("4V?c6k7Y`(6~rMjp6S6!xT04]8m$g2"); +++ AVal r3_key = AVC("aB`d^+8?9;36]Lw2#rg?PDMcX?lCw2"); +++ TransformRot13(&obj, &av_r1, &r1); +++ TransformRot13(&obj, &av_r3, &r3); +++ if (r1.av_val && r3.av_val) +++ { +++ AVal av_qq = AVC("qq"); +++ AVal av_tos = AVC("http://www.3dbuzz.com/home/tos"); +++ AVal av_warning = AVC("Stream capturing is a violation of our terms, and may result in immediate cancellation of your account without refund"); +++ AVal r1_response; +++ +++ RTMP_Log(RTMP_LOGDEBUG, "3DBuzz SecureToken r1 request - %.*s", r1.av_len, r1.av_val); +++ RTMP_Log(RTMP_LOGDEBUG, "3DBuzz SecureToken r3 request - %.*s", r3.av_len, r3.av_val); +++ DecodeTEA(&r1_key, &r1); +++ DecodeTEA(&r3_key, &r3); +++ r1_response = TeaEncrypt(&av_tos, &r1); +++ RTMP_Log(RTMP_LOGDEBUG, "3DBuzz SecureToken r1 response - %.*s", r1_response.av_len, r1_response.av_val); +++ RTMP_Log(RTMP_LOGDEBUG, "3DBuzz SecureToken r3 response - %.*s", r3.av_len, r3.av_val); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_qq); +++ enc = AMF_EncodeNumber(enc, pend, 0); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeString(enc, pend, &r3); +++ enc = AMF_EncodeString(enc, pend, &av_tos); +++ enc = AMF_EncodeString(enc, pend, &r1_response); +++ enc = AMF_EncodeString(enc, pend, &av_warning); +++ av_Command.av_val = pbuf; +++ av_Command.av_len = enc - pbuf; +++ SendInvoke(r, &av_Command, FALSE); +++ } ++ ++- if (!(r->Link.protocol & RTMP_FEATURE_WRITE)) ++- { ++- /* Authenticate on Justin.tv legacy servers before sending FCSubscribe */ ++- if (r->Link.usherToken.av_len) ++- SendUsherToken(r, &r->Link.usherToken); ++- /* Send the FCSubscribe if live stream or if subscribepath is set */ ++- if (r->Link.subscribepath.av_len) ++- SendFCSubscribe(r, &r->Link.subscribepath); ++- else if (r->Link.lFlags & RTMP_LF_LIVE) ++- SendFCSubscribe(r, &r->Link.playpath); ++- } ++- } +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "cam4")) +++ { +++ AMFObject obj2, response; +++ AMFObjectProperty p; +++ AVal Host, ID, IP, av_ChallengeResponse; +++ AVal av_receiveRTMPResponse = AVC("receiveRTMPResponse"); +++ AVal av_client = AVC("client"); +++ AVal av_result = AVC("result"); +++ char ChallengeResponse[16] = {0}; +++ SAVC(application); +++ SAVC(Host); +++ SAVC(ID); +++ SAVC(IP); +++ +++ AMFProp_GetObject(AMF_GetProp(&obj, NULL, 3), &obj2); +++ if (RTMP_FindFirstMatchingProperty(&obj2, &av_application, &p)) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "sending cam4 authentication"); +++ AMFProp_GetObject(&p, &obj2); +++ RTMP_FindFirstMatchingProperty(&obj2, &av_Host, &p); +++ AMFProp_GetString(&p, &Host); +++ RTMP_FindFirstMatchingProperty(&obj2, &av_ID, &p); +++ AMFProp_GetString(&p, &ID); +++ RTMP_FindFirstMatchingProperty(&obj2, &av_IP, &p); +++ AMFProp_GetString(&p, &IP); +++ RTMP_Log(RTMP_LOGDEBUG, "Cam4 Host: %.*s", Host.av_len, Host.av_val); +++ RTMP_Log(RTMP_LOGDEBUG, "Cam4 ID : %.*s", ID.av_len, ID.av_val); +++ RTMP_Log(RTMP_LOGDEBUG, "Cam4 IP : %.*s", IP.av_len, IP.av_val); +++ snprintf(ChallengeResponse, 15, "%d", Host.av_len + ID.av_len + IP.av_len); +++ av_ChallengeResponse.av_val = ChallengeResponse; +++ av_ChallengeResponse.av_len = strlen(av_ChallengeResponse.av_val); +++ AMFProp_SetName(&p, &av_client); +++ AMFProp_SetString(&p, &ID); +++ AMF_AddProp(&response, &p); +++ AMFProp_SetName(&p, &av_result); +++ AMFProp_SetString(&p, &av_ChallengeResponse); +++ AMF_AddProp(&response, &p); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_receiveRTMPResponse); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ enc = AMF_Encode(&response, enc, pend); +++ enc = AMF_EncodeBoolean(enc, pend, TRUE); +++ av_Response.av_val = pbuf; +++ av_Response.av_len = enc - pbuf; +++ +++ AMF_Decode(&obj, av_Response.av_val, av_Response.av_len, FALSE); +++ AMF_Dump(&obj); +++ SendInvoke(r, &av_Response, TRUE); +++ } +++ +++ RTMP_SendCreateStream(r); +++ } +++ else if ((strstr(host, "highwebmedia.com") || strstr(pageUrl, "chaturbate.com")) +++ && (!strstr(host, "origin"))) +++ { +++ AVal av_ModelName; +++ SAVC(CheckPublicStatus); +++ +++ if (strlen(pageUrl) > 7) +++ { +++ strsplit(pageUrl + 7, FALSE, '/', ¶ms); +++ av_ModelName.av_val = params[1]; +++ av_ModelName.av_len = strlen(params[1]); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_CheckPublicStatus); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeString(enc, pend, &av_ModelName); +++ av_Command.av_val = pbuf; +++ av_Command.av_len = enc - pbuf; +++ +++ SendInvoke(r, &av_Command, FALSE); +++ } +++ else +++ { +++ RTMP_Log(RTMP_LOGERROR, "you must specify the pageUrl"); +++ RTMP_Close(r); +++ } +++ } +++ else if (strstr(host, "featve.com") || strstr(pageUrl, "featve.com")) +++ { +++ AVal av_auth = AVC("yes"); +++ SAVC(youCannotPlayMe); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_youCannotPlayMe); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeString(enc, pend, &av_auth); +++ av_Command.av_val = pbuf; +++ av_Command.av_len = enc - pbuf; +++ SendInvoke(r, &av_Command, FALSE); +++ +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(host, "tv-stream.to") || strstr(pageUrl, "tv-stream.to")) +++ { +++ static char auth[] = {'h', 0xC2, 0xA7, '4', 'j', 'h', 'H', '4', '3', 'd'}; +++ AVal av_auth; +++ SAVC(requestAccess); +++ av_auth.av_val = auth; +++ av_auth.av_len = sizeof (auth); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_requestAccess); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeString(enc, pend, &av_auth); +++ av_Command.av_val = pbuf; +++ av_Command.av_len = enc - pbuf; +++ SendInvoke(r, &av_Command, FALSE); +++ +++ SendCommand(r, "getConnectionCount", FALSE); +++ SendGetStreamLength(r); +++ RTMP_SendCreateStream(r); +++ } +++ else if (r->Link.WeebToken.av_len) +++ { +++ AVal av_Token, av_Username, av_Password; +++ SAVC(determineAccess); +++ +++ param_count = strsplit(r->Link.WeebToken.av_val, FALSE, ';', ¶ms); +++ if (param_count >= 1) +++ { +++ av_Token.av_val = params[0]; +++ av_Token.av_len = strlen(params[0]); +++ } +++ if (param_count >= 2) +++ { +++ av_Username.av_val = params[1]; +++ av_Username.av_len = strlen(params[1]); +++ } +++ if (param_count >= 3) +++ { +++ av_Password.av_val = params[2]; +++ av_Password.av_len = strlen(params[2]); +++ } +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_determineAccess); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeString(enc, pend, &av_Token); +++ enc = AMF_EncodeString(enc, pend, &av_Username); +++ enc = AMF_EncodeString(enc, pend, &av_Password); +++ av_Command.av_val = pbuf; +++ av_Command.av_len = enc - pbuf; +++ +++ RTMP_Log(RTMP_LOGDEBUG, "WeebToken: %s", r->Link.WeebToken.av_val); +++ SendInvoke(r, &av_Command, FALSE); +++ } +++ else if (strstr(host, "wfctv.com") || strstr(pageUrl, "wfctv.com")) +++ { +++ AVal av_auth1 = AVC("zoivid"); +++ AVal av_auth2 = AVC("yePi4jee"); +++ SAVC(stream_login); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_stream_login); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeString(enc, pend, &av_auth1); +++ enc = AMF_EncodeString(enc, pend, &av_auth2); +++ av_Command.av_val = pbuf; +++ av_Command.av_len = enc - pbuf; +++ SendInvoke(r, &av_Command, FALSE); +++ +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(host, "pc3oot.us.to")) +++ { +++ SendCommand(r, "UIUIUINASOWAS", TRUE); +++ SendGetStreamLength(r); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(host, "streamscene.cc") || strstr(pageUrl, "streamscene.cc") +++ || strstr(host, "tsboard.tv") || strstr(pageUrl, "teamstream.in") +++ || strstr(host, "hdstreams.tv") || strstr(pageUrl, "teamstream.to") +++ || strstr(pageUrl, "istreams.to")) +++ { +++ SendCommand(r, "r", FALSE); +++ SendGetStreamLength(r); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "axcast.com")) +++ { +++ SendCommand(r, "requestData", FALSE); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "dhmediahosting.com")) +++ { +++ SendCommand(r, "netStreamEnable", FALSE); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "ezcast.tv")) +++ { +++ SendCommand(r, "iUsteJaSakamCarevataKerka", TRUE); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "janjua.tv")) +++ { +++ SendCommand(r, "soLagaDaSeStoriAga", TRUE); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "liveflash.tv")) +++ { +++ char *command = "kaskatijaEkonomista"; +++ r->Link.dynamicPublish = TRUE; +++ r->Link.dynamicCommand.av_val = command; +++ r->Link.dynamicCommand.av_len = strlen(command); +++ SendCommand(r, command, TRUE); +++ } +++ else if (strstr(pageUrl, "mips.tv") || strstr(pageUrl, "mipsplayer.com")) +++ { +++ char *command = "gaolVanusPobeleVoKosata"; +++ r->Link.dynamicPublish = TRUE; +++ r->Link.dynamicCommand.av_val = command; +++ r->Link.dynamicCommand.av_len = strlen(command); +++ SendCommand(r, command, TRUE); +++ } +++ else if (strstr(pageUrl, "streamify.tv")) +++ { +++ SendCommand(r, "keGoVidishStambolSoseBardovci", TRUE); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "ucaster.eu")) +++ { +++ SendCommand(r, "vujkoMiLazarBarakovOdMonospitovo", TRUE); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "yukons.net")) +++ { +++ SendCommand(r, "trxuwaaLahRKnaechb", TRUE); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "yycast.com")) +++ { +++ SendCommand(r, "trajkoProkopiev", TRUE); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "zenex.tv")) +++ { +++ SendCommand(r, "goVideStambolSoseBardovci", TRUE); +++ RTMP_SendCreateStream(r); +++ } +++ else +++ RTMP_SendCreateStream(r); +++ } ++ else if (AVMATCH(&methodInvoked, &av_createStream)) ++- { ++- r->m_stream_id = (int)AMFProp_GetNumber(AMF_GetProp(&obj, NULL, 3)); +++ { +++ r->m_stream_id = (int) AMFProp_GetNumber(AMF_GetProp(&obj, NULL, 3)); ++ ++- if (r->Link.protocol & RTMP_FEATURE_WRITE) ++- { ++- SendPublish(r); ++- } ++- else ++- { ++- if (r->Link.lFlags & RTMP_LF_PLST) ++- SendPlaylist(r); ++- SendPlay(r); ++- RTMP_SendCtrl(r, 3, r->m_stream_id, r->m_nBufferMS); ++- } ++- } +++ if (!(r->Link.protocol & RTMP_FEATURE_WRITE)) +++ { +++ /* Authenticate on Justin.tv legacy servers before sending FCSubscribe */ +++ if (r->Link.usherToken.av_len) +++ SendUsherToken(r, &r->Link.usherToken); +++ if (r->Link.publishId > 0) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "sending dynamic publish id: %.0f", r->Link.publishId); +++ SendDynamicPublish(r, r->Link.publishId); +++ } +++ /* Send the FCSubscribe if live stream or if subscribepath is set */ +++ if (r->Link.subscribepath.av_len) +++ SendFCSubscribe(r, &r->Link.subscribepath); +++ else if ((r->Link.lFlags & RTMP_LF_LIVE) && (!r->Link.WeebToken.av_len)) +++ SendFCSubscribe(r, &r->Link.playpath); +++ } +++ +++ if (r->Link.protocol & RTMP_FEATURE_WRITE) +++ { +++ SendPublish(r); +++ } +++ else +++ { +++ if (r->Link.lFlags & RTMP_LF_PLST) +++ SendPlaylist(r); +++ SendPlay(r); +++ RTMP_SendCtrl(r, 3, r->m_stream_id, r->m_nBufferMS); +++ } +++ } ++ else if (AVMATCH(&methodInvoked, &av_play) || ++- AVMATCH(&methodInvoked, &av_publish)) ++- { ++- r->m_bPlaying = TRUE; ++- } +++ AVMATCH(&methodInvoked, &av_publish)) +++ { +++ r->m_bPlaying = TRUE; +++ } ++ free(methodInvoked.av_val); ++ } ++ else if (AVMATCH(&method, &av_onBWDone)) ++ { ++- if (!r->m_nBWCheckCounter) +++ if (!r->m_nBWCheckCounter) ++ SendCheckBW(r); ++ } ++ else if (AVMATCH(&method, &av_onFCSubscribe)) ++@@ -3036,21 +3507,22 @@ HandleInvoke(RTMP *r, const char *body, unsigned int nBodySize) ++ { ++ int i; ++ for (i = 0; i < r->m_numCalls; i++) ++- if (AVMATCH(&r->m_methodCalls[i].name, &av__checkbw)) ++- { ++- AV_erase(r->m_methodCalls, &r->m_numCalls, i, TRUE); ++- break; ++- } +++ if (AVMATCH(&r->m_methodCalls[i].name, &av__checkbw)) +++ { +++ AV_erase(r->m_methodCalls, &r->m_numCalls, i, TRUE); +++ break; +++ } ++ } ++ else if (AVMATCH(&method, &av__error)) ++ { +++ int handled = FALSE; ++ #ifdef CRYPTO ++ AVal methodInvoked = {0}; ++ int i; ++ ++ if (r->Link.protocol & RTMP_FEATURE_WRITE) ++ { ++- for (i=0; im_numCalls; i++) +++ for (i = 0; i < r->m_numCalls; i++) ++ { ++ if (r->m_methodCalls[i].num == txn) ++ { ++@@ -3062,12 +3534,12 @@ HandleInvoke(RTMP *r, const char *body, unsigned int nBodySize) ++ if (!methodInvoked.av_val) ++ { ++ RTMP_Log(RTMP_LOGDEBUG, "%s, received result id %f without matching request", ++- __FUNCTION__, txn); +++ __FUNCTION__, txn); ++ goto leave; ++ } ++ ++ RTMP_Log(RTMP_LOGDEBUG, "%s, received error for method call <%s>", __FUNCTION__, ++- methodInvoked.av_val); +++ methodInvoked.av_val); ++ ++ if (AVMATCH(&methodInvoked, &av_connect)) ++ { ++@@ -3086,34 +3558,96 @@ HandleInvoke(RTMP *r, const char *body, unsigned int nBodySize) ++ goto leave; ++ } ++ } ++- } ++- else ++- { ++- RTMP_Log(RTMP_LOGERROR, "rtmp server sent error"); +++ handled = TRUE; ++ } ++ free(methodInvoked.av_val); ++-#else ++- RTMP_Log(RTMP_LOGERROR, "rtmp server sent error"); ++ #endif +++ double code = 0.0; +++ unsigned int parsedPort = 0; +++ AMFObject obj2; +++ AMFObjectProperty p; +++ AVal redirect; +++ SAVC(ex); +++ SAVC(redirect); +++ +++ AMFProp_GetObject(AMF_GetProp(&obj, NULL, 3), &obj2); +++ if (RTMP_FindFirstMatchingProperty(&obj2, &av_ex, &p)) +++ { +++ AMFProp_GetObject(&p, &obj2); +++ if (RTMP_FindFirstMatchingProperty(&obj2, &av_code, &p)) +++ code = AMFProp_GetNumber(&p); +++ if (code == 302 && RTMP_FindFirstMatchingProperty(&obj2, &av_redirect, &p)) +++ { +++ AMFProp_GetString(&p, &redirect); +++ r->Link.redirected = TRUE; +++ +++ char *playpath = "//playpath"; +++ int len = redirect.av_len + strlen(playpath); +++ char *url = malloc(len + 1); +++ memcpy(url, redirect.av_val, redirect.av_len); +++ memcpy(url + redirect.av_len, playpath, strlen(playpath)); +++ url[len] = '\0'; +++ r->Link.tcUrl.av_val = url; +++ r->Link.tcUrl.av_len = redirect.av_len; +++ if (r->Link.lFlags & RTMP_LF_FTCU) +++ r->Link.lFlags ^= RTMP_LF_FTCU; +++ RTMP_ParseURL(url, &r->Link.protocol, &r->Link.hostname, &parsedPort, &r->Link.playpath0, &r->Link.app); +++ if (parsedPort) +++ r->Link.port = parsedPort; +++ } +++ } +++ if (r->Link.redirected) +++ { +++ handled = TRUE; +++ RTMP_Log(RTMP_LOGINFO, "rtmp server sent redirect"); +++ } +++ +++ if (!handled) +++ RTMP_Log(RTMP_LOGERROR, "rtmp server sent error"); ++ } ++ else if (AVMATCH(&method, &av_close)) ++ { ++- RTMP_Log(RTMP_LOGERROR, "rtmp server requested close"); ++- RTMP_Close(r); +++ if (r->Link.redirected) +++ { +++ r->Link.redirected = FALSE; +++ RTMP_Close(r); +++ RTMP_Log(RTMP_LOGINFO, "trying to connect with redirected url"); +++ if (r->Link.port == 0) +++ { +++ if (r->Link.protocol & RTMP_FEATURE_SSL) +++ r->Link.port = 443; +++ else if (r->Link.protocol & RTMP_FEATURE_HTTP) +++ r->Link.port = 80; +++ else +++ r->Link.port = 1935; +++ } +++ RTMP_Connect(r, NULL); +++ } +++ else +++ { +++ +++ RTMP_Log(RTMP_LOGERROR, "rtmp server requested close"); +++ if (r->m_bPlaying && (strstr(pageUrl, "streamlive.to") || strstr(pageUrl, "uk-iptv.co.uk"))) +++ RTMP_Log(RTMP_LOGINFO, "ignoring close request"); +++ else +++ RTMP_Close(r); +++ } ++ } ++ else if (AVMATCH(&method, &av_onStatus)) ++ { ++ AMFObject obj2; ++- AVal code, level; +++ AVal code, level, description; ++ AMFProp_GetObject(AMF_GetProp(&obj, NULL, 3), &obj2); ++ AMFProp_GetString(AMF_GetProp(&obj2, &av_code, -1), &code); ++ AMFProp_GetString(AMF_GetProp(&obj2, &av_level, -1), &level); +++ AMFProp_GetString(AMF_GetProp(&obj2, &av_description, -1), &description); ++ ++ RTMP_Log(RTMP_LOGDEBUG, "%s, onStatus: %s", __FUNCTION__, code.av_val); ++ if (AVMATCH(&code, &av_NetStream_Failed) ++- || AVMATCH(&code, &av_NetStream_Play_Failed) ++- || AVMATCH(&code, &av_NetStream_Play_StreamNotFound) ++- || AVMATCH(&code, &av_NetConnection_Connect_InvalidApp)) +++ || AVMATCH(&code, &av_NetStream_Play_Failed) +++ || AVMATCH(&code, &av_NetStream_Play_StreamNotFound) +++ || AVMATCH(&code, &av_NetConnection_Connect_Rejected) +++ || AVMATCH(&code, &av_NetConnection_Connect_InvalidApp)) ++ { ++ r->m_stream_id = -1; ++ RTMP_Close(r); ++@@ -3171,6 +3705,46 @@ HandleInvoke(RTMP *r, const char *body, unsigned int nBodySize) ++ r->m_pausing = 3; ++ } ++ } +++ +++ else if (AVMATCH(&code, &av_NetConnection_confStream)) +++ { +++#ifdef CRYPTO +++ static const char hexdig[] = "0123456789abcdef"; +++ AVal auth; +++ SAVC(cf_stream); +++ int i; +++ char hash_hex[33] = {0}; +++ unsigned char hash[16]; +++ +++ param_count = strsplit(description.av_val, description.av_len, ':', ¶ms); +++ if (param_count >= 3) +++ { +++ char *buf = malloc(strlen(params[0]) + r->Link.playpath.av_len + 1); +++ strcpy(buf, params[0]); +++ strncat(buf, r->Link.playpath.av_val, r->Link.playpath.av_len); +++ md5_hash((unsigned char *) buf, strlen(buf), hash); +++ for (i = 0; i < 16; i++) +++ { +++ hash_hex[i * 2] = hexdig[0x0f & (hash[i] >> 4)]; +++ hash_hex[i * 2 + 1] = hexdig[0x0f & (hash[i])]; +++ } +++ auth.av_val = &hash_hex[atoi(params[1]) - 1]; +++ auth.av_len = atoi(params[2]); +++ RTMP_Log(RTMP_LOGDEBUG, "Khalsa: %.*s", auth.av_len, auth.av_val); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_cf_stream); +++ enc = AMF_EncodeNumber(enc, pend, txn); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeString(enc, pend, &auth); +++ av_Command.av_val = pbuf; +++ av_Command.av_len = enc - pbuf; +++ +++ SendInvoke(r, &av_Command, FALSE); +++ free(buf); +++ } +++#endif +++ } ++ } ++ else if (AVMATCH(&method, &av_playlist_ready)) ++ { ++@@ -3184,6 +3758,109 @@ HandleInvoke(RTMP *r, const char *body, unsigned int nBodySize) ++ } ++ } ++ } +++ else if (AVMATCH(&method, &av_cps)) +++ { +++ if (obj.o_num >= 4) +++ { +++ int Status = AMFProp_GetBoolean(AMF_GetProp(&obj, NULL, 3)); +++ if (Status == FALSE) +++ { +++ AVal Message; +++ AMFProp_GetString(AMF_GetProp(&obj, NULL, 4), &Message); +++ RTMP_Log(RTMP_LOGINFO, "Model status is %.*s", Message.av_len, Message.av_val); +++ RTMP_Close(r); +++ } +++ else +++ { +++ if (obj.o_num >= 7) +++ { +++ AVal Playpath, Server; +++ AMFProp_GetString(AMF_GetProp(&obj, NULL, 5), &Playpath); +++ AMFProp_GetString(AMF_GetProp(&obj, NULL, 6), &Server); +++ if (strncasecmp(&Playpath.av_val[Playpath.av_len - 4], ".mp4", 4) != 0) +++ { +++ char *playpath = calloc(Server.av_len + Playpath.av_len + 25, sizeof (char)); +++ strcat(playpath, "rtmp://"); +++ strncat(playpath, Server.av_val, Server.av_len); +++ strcat(playpath, "/live-origin/"); +++ strncat(playpath, Playpath.av_val, Playpath.av_len); +++ strcat(playpath, ".mp4"); +++ Playpath.av_val = playpath; +++ Playpath.av_len = strlen(playpath); +++ } +++ RTMP_ParsePlaypath(&Playpath, &r->Link.playpath); +++ RTMP_SendCreateStream(r); +++ } +++ } +++ } +++ } +++ else if (AVMATCH(&method, &av_disneyToken)) +++ { +++ double FirstNumber = AMFProp_GetNumber(AMF_GetProp(&obj, NULL, 3)); +++ double SecondNumber = AMFProp_GetNumber(AMF_GetProp(&obj, NULL, 4)); +++ RTMP_Log(RTMP_LOGDEBUG, "FirstNumber: %.2f, SecondNumber: %.2f", FirstNumber, SecondNumber); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av__result); +++ enc = AMF_EncodeNumber(enc, pend, txn); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeNumber(enc, pend, FirstNumber * SecondNumber); +++ av_Response.av_val = pbuf; +++ av_Response.av_len = enc - pbuf; +++ +++ AMF_Decode(&obj, av_Response.av_val, av_Response.av_len, FALSE); +++ AMF_Dump(&obj); +++ SendInvoke(r, &av_Response, FALSE); +++ } +++ else if (AVMATCH(&method, &av_verifyClient)) +++ { +++ double VerificationNumber = AMFProp_GetNumber(AMF_GetProp(&obj, NULL, 3)); +++ RTMP_Log(RTMP_LOGDEBUG, "VerificationNumber: %.2f", VerificationNumber); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av__result); +++ enc = AMF_EncodeNumber(enc, pend, txn); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeNumber(enc, pend, exp(atan(sqrt(VerificationNumber))) + 1); +++ av_Response.av_val = pbuf; +++ av_Response.av_len = enc - pbuf; +++ +++ AMF_Decode(&obj, av_Response.av_val, av_Response.av_len, FALSE); +++ AMF_Dump(&obj); +++ SendInvoke(r, &av_Response, FALSE); +++ } +++ else if (AVMATCH(&method, &av_sendStatus)) +++ { +++ if (r->Link.WeebToken.av_len) +++ { +++ AVal av_Authorized = AVC("User.hasAccess"); +++ AVal av_TransferLimit = AVC("User.noPremium.limited"); +++ AVal av_UserLimit = AVC("User.noPremium.tooManyUsers"); +++ AVal av_TimeLeft = AVC("timeLeft"); +++ AVal av_Status, av_ReconnectionTime; +++ +++ AMFObject Status; +++ AMFProp_GetObject(AMF_GetProp(&obj, NULL, 3), &Status); +++ AMFProp_GetString(AMF_GetProp(&Status, &av_code, -1), &av_Status); +++ RTMP_Log(RTMP_LOGINFO, "%.*s", av_Status.av_len, av_Status.av_val); +++ if (AVMATCH(&av_Status, &av_Authorized)) +++ { +++ RTMP_Log(RTMP_LOGINFO, "Weeb.tv authentication successful"); +++ RTMP_SendCreateStream(r); +++ } +++ else if (AVMATCH(&av_Status, &av_UserLimit)) +++ { +++ RTMP_Log(RTMP_LOGINFO, "No free slots available"); +++ RTMP_Close(r); +++ } +++ else if (AVMATCH(&av_Status, &av_TransferLimit)) +++ { +++ AMFProp_GetString(AMF_GetProp(&Status, &av_TimeLeft, -1), &av_ReconnectionTime); +++ RTMP_Log(RTMP_LOGINFO, "Viewing limit exceeded. try again in %.*s minutes.", av_ReconnectionTime.av_len, av_ReconnectionTime.av_val); +++ RTMP_Close(r); +++ } +++ } +++ } ++ else ++ { ++ ++@@ -3209,7 +3886,8 @@ RTMP_FindFirstMatchingProperty(AMFObject *obj, const AVal *name, ++ return TRUE; ++ } ++ ++- if (prop->p_type == AMF_OBJECT || prop->p_type == AMF_ECMA_ARRAY) +++ if (prop->p_type == AMF_OBJECT || prop->p_type == AMF_ECMA_ARRAY +++ || prop->p_type == AMF_STRICT_ARRAY) ++ { ++ if (RTMP_FindFirstMatchingProperty(&prop->p_vu.p_object, name, p)) ++ return TRUE; ++@@ -3235,7 +3913,8 @@ RTMP_FindPrefixProperty(AMFObject *obj, const AVal *name, ++ return TRUE; ++ } ++ ++- if (prop->p_type == AMF_OBJECT) +++ if (prop->p_type == AMF_OBJECT || prop->p_type == AMF_ECMA_ARRAY +++ || prop->p_type == AMF_STRICT_ARRAY) ++ { ++ if (RTMP_FindPrefixProperty(&prop->p_vu.p_object, name, p)) ++ return TRUE; ++@@ -3269,6 +3948,7 @@ DumpMetaData(AMFObject *obj) ++ snprintf(str, 255, "%s", ++ prop->p_vu.p_number != 0. ? "TRUE" : "FALSE"); ++ break; +++ case AMF_NULL: ++ case AMF_STRING: ++ len = snprintf(str, 255, "%.*s", prop->p_vu.p_aval.av_len, ++ prop->p_vu.p_aval.av_val); ++@@ -3284,7 +3964,7 @@ DumpMetaData(AMFObject *obj) ++ } ++ if (str[0] && prop->p_name.av_len) ++ { ++- RTMP_Log(RTMP_LOGINFO, " %-22.*s%s", prop->p_name.av_len, +++ RTMP_Log(RTMP_LOGINFO, " %-24.*s%s", prop->p_name.av_len, ++ prop->p_name.av_val, str); ++ } ++ } ++@@ -3366,7 +4046,7 @@ HandleCtrl(RTMP *r, const RTMPPacket *packet) ++ unsigned int tmp; ++ if (packet->m_body && packet->m_nBodySize >= 2) ++ nType = AMF_DecodeInt16(packet->m_body); ++- RTMP_Log(RTMP_LOGDEBUG, "%s, received ctrl. type: %d, len: %d", __FUNCTION__, nType, +++ RTMP_Log(RTMP_LOGDEBUG, "%s, received ctrl, type: %d, len: %d", __FUNCTION__, nType, ++ packet->m_nBodySize); ++ /*RTMP_LogHex(packet.m_body, packet.m_nBodySize); */ ++ ++@@ -3475,15 +4155,15 @@ HandleCtrl(RTMP *r, const RTMPPacket *packet) ++ RTMP_Log(RTMP_LOGDEBUG, "%s, SWFVerification ping received: ", __FUNCTION__); ++ if (packet->m_nBodySize > 2 && packet->m_body[2] > 0x01) ++ { ++- RTMP_Log(RTMP_LOGERROR, ++- "%s: SWFVerification Type %d request not supported! Patches welcome...", ++- __FUNCTION__, packet->m_body[2]); +++ RTMP_Log(RTMP_LOGERROR, +++ "%s: SWFVerification Type %d request not supported, attempting to use SWFVerification Type 1! Patches welcome...", +++ __FUNCTION__, packet->m_body[2]); ++ } ++ #ifdef CRYPTO ++ /*RTMP_LogHex(packet.m_body, packet.m_nBodySize); */ ++ ++ /* respond with HMAC SHA256 of decompressed SWF, key is the 30byte player key, also the last 30 bytes of the server handshake are applied */ ++- else if (r->Link.SWFSize) +++ if (r->Link.SWFSize) ++ { ++ RTMP_SendCtrl(r, 0x1B, 0, 0); ++ } ++@@ -3788,8 +4468,18 @@ HandShake(RTMP *r, int FP9HandShake) ++ serversig[4], serversig[5], serversig[6], serversig[7]); ++ ++ /* 2nd part of handshake */ ++- if (!WriteN(r, serversig, RTMP_SIG_SIZE)) ++- return FALSE; +++ if (r->Link.CombineConnectPacket) +++ { +++ char *HandshakeResponse = malloc(RTMP_SIG_SIZE); +++ memcpy(HandshakeResponse, (char *) serversig, RTMP_SIG_SIZE); +++ r->Link.HandshakeResponse.av_val = HandshakeResponse; +++ r->Link.HandshakeResponse.av_len = RTMP_SIG_SIZE; +++ } +++ else +++ { +++ if (!WriteN(r, (char *) serversig, RTMP_SIG_SIZE)) +++ return FALSE; +++ } ++ ++ if (ReadN(r, serversig, RTMP_SIG_SIZE) != RTMP_SIG_SIZE) ++ return FALSE; ++@@ -3942,7 +4632,7 @@ RTMP_SendPacket(RTMP *r, RTMPPacket *packet, int queue) ++ ++ nSize = packetSize[packet->m_headerType]; ++ hSize = nSize; cSize = 0; ++- t = packet->m_nTimeStamp - last; +++ t = packet->m_nTimeStamp ? packet->m_nTimeStamp - last : 0; ++ ++ if (packet->m_body) ++ { ++@@ -4251,8 +4941,13 @@ RTMPSockBuf_Fill(RTMPSockBuf *sb) ++ { ++ int nBytes; ++ ++- if (!sb->sb_size) ++- sb->sb_start = sb->sb_buf; +++ /* Copy unprocessed bytes to the start of buffer to make optimum use of +++ * available buffer */ +++ if (sb->sb_start != sb->sb_buf) +++ { +++ memcpy(sb->sb_buf, sb->sb_start, sb->sb_size); +++ sb->sb_start = sb->sb_buf; +++ } ++ ++ while (1) ++ { ++@@ -4266,8 +4961,10 @@ RTMPSockBuf_Fill(RTMPSockBuf *sb) ++ #endif ++ { ++ nBytes = recv(sb->sb_socket, sb->sb_start + sb->sb_size, nBytes, 0); ++- } ++- if (nBytes != -1) +++ if (!nBytes) +++ RTMP_Log(RTMP_LOGDEBUG, "Socket closed by server, nBytes: %d", nBytes); +++ } +++ if (nBytes >= 0) ++ { ++ sb->sb_size += nBytes; ++ } ++@@ -4405,21 +5102,19 @@ static int ++ HTTP_Post(RTMP *r, RTMPTCmd cmd, const char *buf, int len) ++ { ++ char hbuf[512]; ++- int hlen = snprintf(hbuf, sizeof(hbuf), "POST /%s%s/%d HTTP/1.1\r\n" ++- "Host: %.*s:%d\r\n" ++- "Accept: */*\r\n" ++- "User-Agent: Shockwave Flash\r\n" ++- "Connection: Keep-Alive\r\n" ++- "Cache-Control: no-cache\r\n" ++- "Content-type: application/x-fcs\r\n" ++- "Content-length: %d\r\n\r\n", RTMPT_cmds[cmd], ++- r->m_clientID.av_val ? r->m_clientID.av_val : "", ++- r->m_msgCounter, r->Link.hostname.av_len, r->Link.hostname.av_val, ++- r->Link.port, len); +++ int hlen = snprintf(hbuf, sizeof (hbuf), "POST /%s%s/%d HTTP/1.1\r\n" +++ "Content-Type: application/x-fcs\r\n" +++ "User-Agent: Shockwave Flash\r\n" +++ "Host: %.*s:%d\r\n" +++ "Content-Length: %d\r\n" +++ "Connection: Keep-Alive\r\n" +++ "Cache-Control: no-cache\r\n\r\n", RTMPT_cmds[cmd], +++ r->m_clientID.av_val ? r->m_clientID.av_val : "", +++ r->m_msgCounter, r->Link.hostname.av_len, r->Link.hostname.av_val, +++ r->Link.port, len); ++ RTMPSockBuf_Send(&r->m_sb, hbuf, hlen); ++ hlen = RTMPSockBuf_Send(&r->m_sb, buf, len); ++ r->m_msgCounter++; ++- r->m_unackd++; ++ return hlen; ++ } ++ ++@@ -4429,22 +5124,17 @@ HTTP_read(RTMP *r, int fill) ++ char *ptr; ++ int hlen; ++ ++-restart: ++ if (fill) ++ RTMPSockBuf_Fill(&r->m_sb); ++- if (r->m_sb.sb_size < 13) { ++- if (fill) ++- goto restart; +++ +++ /* Check if socket buffer is empty or HTTP header isn't completely received */ +++ memset(r->m_sb.sb_start + r->m_sb.sb_size, '\0', 1); +++ if ((!r->m_sb.sb_size) || (!strstr(r->m_sb.sb_start, "\r\n\r\n"))) ++ return -2; ++- } +++ ++ if (strncmp(r->m_sb.sb_start, "HTTP/1.1 200 ", 13)) ++ return -1; ++ r->m_sb.sb_start[r->m_sb.sb_size] = '\0'; ++- if (!strstr(r->m_sb.sb_start, "\r\n\r\n")) { ++- if (fill) ++- goto restart; ++- return -2; ++- } ++ ++ ptr = r->m_sb.sb_start + sizeof("HTTP/1.1 200"); ++ while ((ptr = strstr(ptr, "Content-"))) { ++@@ -4452,21 +5142,31 @@ restart: ++ ptr += 8; ++ } ++ if (!ptr) ++- return -1; ++- hlen = atoi(ptr+16); +++ { +++ ptr = r->m_sb.sb_start + sizeof ("HTTP/1.1 200"); +++ RTMP_Log(RTMP_LOGDEBUG, "No Content-Length header found, assuming continuous stream"); +++ hlen = 2147483648UL; // 2 GB +++ } +++ else +++ hlen = atoi(ptr + 16); ++ ptr = strstr(ptr+16, "\r\n\r\n"); ++ if (!ptr) ++ return -1; ++ ptr += 4; ++- if (ptr + (r->m_clientID.av_val ? 1 : hlen) > r->m_sb.sb_start + r->m_sb.sb_size) ++- { ++- if (fill) ++- goto restart; ++- return -2; ++- } ++ r->m_sb.sb_size -= ptr - r->m_sb.sb_start; ++ r->m_sb.sb_start = ptr; ++- r->m_unackd--; +++ +++ /* Stop processing if content length is 0 */ +++ if (!hlen) +++ return -3; +++ +++ /* Refill buffer if no payload is received */ +++ if (hlen && (!r->m_sb.sb_size)) +++ { +++ RTMPSockBuf_Fill(&r->m_sb); +++ ptr = r->m_sb.sb_buf; +++ r->m_sb.sb_start = ptr; +++ } ++ ++ if (!r->m_clientID.av_val) ++ { ++@@ -4486,10 +5186,17 @@ restart: ++ r->m_sb.sb_start++; ++ r->m_sb.sb_size--; ++ } +++ +++ /* Following values shouldn't be negative in any case */ +++ if (r->m_resplen < 0) +++ r->m_resplen = 0; +++ if (r->m_sb.sb_size < 0) +++ r->m_sb.sb_size = 0; +++ ++ return 0; ++ } ++ ++-#define MAX_IGNORED_FRAMES 50 +++#define MAX_IGNORED_FRAMES 100 ++ ++ /* Read from the stream until we get a media packet. ++ * Returns -3 if Play.Close/Stop, -2 if fatal error, -1 if no more media ++@@ -4557,162 +5264,156 @@ Read_1_Packet(RTMP *r, char *buf, unsigned int buflen) ++ #endif ++ ++ if (r->m_read.flags & RTMP_READ_RESUME) ++- { ++- /* check the header if we get one */ ++- if (packet.m_nTimeStamp == 0) ++- { ++- if (r->m_read.nMetaHeaderSize > 0 ++- && packet.m_packetType == RTMP_PACKET_TYPE_INFO) ++- { ++- AMFObject metaObj; ++- int nRes = ++- AMF_Decode(&metaObj, packetBody, nPacketLen, FALSE); ++- if (nRes >= 0) ++- { ++- AVal metastring; ++- AMFProp_GetString(AMF_GetProp(&metaObj, NULL, 0), ++- &metastring); ++- ++- if (AVMATCH(&metastring, &av_onMetaData)) ++- { ++- /* compare */ ++- if ((r->m_read.nMetaHeaderSize != nPacketLen) || ++- (memcmp ++- (r->m_read.metaHeader, packetBody, ++- r->m_read.nMetaHeaderSize) != 0)) ++- { ++- ret = RTMP_READ_ERROR; ++- } ++- } ++- AMF_Reset(&metaObj); ++- if (ret == RTMP_READ_ERROR) ++- break; ++- } ++- } +++ { +++ RTMP_Log(RTMP_LOGDEBUG2, "Received timestamp: %d, type %d", +++ packet.m_nTimeStamp, packet.m_packetType); +++ if (packet.m_nTimeStamp > 0 && r->m_read.nResumeDriftTS > 0) +++ packet.m_nTimeStamp -= r->m_read.nResumeDriftTS; +++ RTMP_Log(RTMP_LOGDEBUG2, "Adjusted timestamp: %d", packet.m_nTimeStamp); +++ +++ /* check the header if we get one */ +++ if (r->m_read.nMetaHeaderSize > 0 +++ && packet.m_packetType == RTMP_PACKET_TYPE_INFO) +++ { +++ AMFObject metaObj; +++ int nRes = AMF_Decode(&metaObj, packetBody, nPacketLen, FALSE); +++ if (nRes >= 0) +++ { +++ AVal metastring; +++ AMFProp_GetString(AMF_GetProp(&metaObj, NULL, 0), &metastring); +++ +++ if (AVMATCH(&metastring, &av_onMetaData)) +++ { +++ /* compare */ +++ if ((r->m_read.nMetaHeaderSize != nPacketLen) || +++ (memcmp(r->m_read.metaHeader, packetBody, r->m_read.nMetaHeaderSize) != 0)) +++ { +++ ret = RTMP_READ_ERROR; +++ } +++ } +++ AMF_Reset(&metaObj); +++ if (ret == RTMP_READ_ERROR) +++ break; +++ } +++ } ++ ++- /* check first keyframe to make sure we got the right position ++- * in the stream! (the first non ignored frame) ++- */ ++- if (r->m_read.nInitialFrameSize > 0) ++- { ++- /* video or audio data */ ++- if (packet.m_packetType == r->m_read.initialFrameType ++- && r->m_read.nInitialFrameSize == nPacketLen) ++- { ++- /* we don't compare the sizes since the packet can ++- * contain several FLV packets, just make sure the ++- * first frame is our keyframe (which we are going ++- * to rewrite) ++- */ ++- if (memcmp ++- (r->m_read.initialFrame, packetBody, ++- r->m_read.nInitialFrameSize) == 0) ++- { ++- RTMP_Log(RTMP_LOGDEBUG, "Checked keyframe successfully!"); ++- r->m_read.flags |= RTMP_READ_GOTKF; ++- /* ignore it! (what about audio data after it? it is ++- * handled by ignoring all 0ms frames, see below) ++- */ ++- ret = RTMP_READ_IGNORE; ++- break; ++- } ++- } +++ /* check first keyframe to make sure we got the right position +++ * in the stream! (the first non ignored frame) +++ */ +++ RTMP_Log(RTMP_LOGDEBUG2, "Required packet length: %d, Packet length: %d", +++ r->m_read.nInitialFrameSize, nPacketLen); +++ if (r->m_read.nInitialFrameSize > 0) +++ { +++ /* video or audio data */ +++ if (packet.m_packetType == r->m_read.initialFrameType +++ && r->m_read.nInitialFrameSize == nPacketLen) +++ { +++ /* we don't compare the sizes since the packet can +++ * contain several FLV packets, just make sure the +++ * first frame is our keyframe (which we are going +++ * to rewrite) +++ */ +++ RTMP_Log(RTMP_LOGDEBUG2, "Comparing keyframe data"); +++ if (memcmp(r->m_read.initialFrame, packetBody, +++ r->m_read.nInitialFrameSize) == 0) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "Checked keyframe successfully!"); +++ r->m_read.flags |= RTMP_READ_GOTKF; +++ r->m_read.nResumeDriftTS = packet.m_nTimeStamp; +++ /* ignore it! (what about audio data after it? it is +++ * handled by ignoring all 0ms frames, see below) +++ */ +++ ret = RTMP_READ_IGNORE; +++ break; +++ } +++ } ++ ++- /* hande FLV streams, even though the server resends the ++- * keyframe as an extra video packet it is also included ++- * in the first FLV stream chunk and we have to compare ++- * it and filter it out !! ++- */ ++- if (packet.m_packetType == RTMP_PACKET_TYPE_FLASH_VIDEO) ++- { ++- /* basically we have to find the keyframe with the ++- * correct TS being nResumeTS ++- */ ++- unsigned int pos = 0; ++- uint32_t ts = 0; ++- ++- while (pos + 11 < nPacketLen) ++- { ++- /* size without header (11) and prevTagSize (4) */ ++- uint32_t dataSize = ++- AMF_DecodeInt24(packetBody + pos + 1); ++- ts = AMF_DecodeInt24(packetBody + pos + 4); ++- ts |= (packetBody[pos + 7] << 24); +++ /* hande FLV streams, even though the server resends the +++ * keyframe as an extra video packet it is also included +++ * in the first FLV stream chunk and we have to compare +++ * it and filter it out !! +++ */ +++ if (packet.m_packetType == RTMP_PACKET_TYPE_FLASH_VIDEO) +++ { +++ /* basically we have to find the keyframe with the +++ * correct TS being nResumeTS +++ */ +++ unsigned int pos = 0; +++ uint32_t ts = 0; +++ +++ while (pos + 11 < nPacketLen) +++ { +++ /* size without header (11) and prevTagSize (4) */ +++ uint32_t dataSize = AMF_DecodeInt24(packetBody + pos + 1); +++ ts = AMF_DecodeInt24(packetBody + pos + 4); +++ ts |= (packetBody[pos + 7] << 24); ++ ++ #ifdef _DEBUG ++- RTMP_Log(RTMP_LOGDEBUG, ++- "keyframe search: FLV Packet: type %02X, dataSize: %d, timeStamp: %d ms", ++- packetBody[pos], dataSize, ts); +++ RTMP_Log(RTMP_LOGDEBUG, +++ "keyframe search: FLV Packet: type %02X, dataSize: %d, timeStamp: %d ms", +++ packetBody[pos], dataSize, ts); ++ #endif ++- /* ok, is it a keyframe?: ++- * well doesn't work for audio! ++- */ ++- if (packetBody[pos /*6928, test 0 */ ] == ++- r->m_read.initialFrameType ++- /* && (packetBody[11]&0xf0) == 0x10 */ ) ++- { ++- if (ts == r->m_read.nResumeTS) ++- { ++- RTMP_Log(RTMP_LOGDEBUG, ++- "Found keyframe with resume-keyframe timestamp!"); ++- if (r->m_read.nInitialFrameSize != dataSize ++- || memcmp(r->m_read.initialFrame, ++- packetBody + pos + 11, ++- r->m_read. ++- nInitialFrameSize) != 0) ++- { ++- RTMP_Log(RTMP_LOGERROR, ++- "FLV Stream: Keyframe doesn't match!"); ++- ret = RTMP_READ_ERROR; ++- break; ++- } ++- r->m_read.flags |= RTMP_READ_GOTFLVK; ++- ++- /* skip this packet? ++- * check whether skippable: ++- */ ++- if (pos + 11 + dataSize + 4 > nPacketLen) ++- { ++- RTMP_Log(RTMP_LOGWARNING, ++- "Non skipable packet since it doesn't end with chunk, stream corrupt!"); ++- ret = RTMP_READ_ERROR; ++- break; ++- } ++- packetBody += (pos + 11 + dataSize + 4); ++- nPacketLen -= (pos + 11 + dataSize + 4); ++- ++- goto stopKeyframeSearch; ++- ++- } ++- else if (r->m_read.nResumeTS < ts) ++- { ++- /* the timestamp ts will only increase with ++- * further packets, wait for seek ++- */ ++- goto stopKeyframeSearch; ++- } ++- } ++- pos += (11 + dataSize + 4); ++- } ++- if (ts < r->m_read.nResumeTS) ++- { ++- RTMP_Log(RTMP_LOGERROR, ++- "First packet does not contain keyframe, all " ++- "timestamps are smaller than the keyframe " ++- "timestamp; probably the resume seek failed?"); ++- } ++- stopKeyframeSearch: ++- ; ++- if (!(r->m_read.flags & RTMP_READ_GOTFLVK)) ++- { ++- RTMP_Log(RTMP_LOGERROR, ++- "Couldn't find the seeked keyframe in this chunk!"); ++- ret = RTMP_READ_IGNORE; ++- break; ++- } ++- } ++- } ++- } +++ /* ok, is it a keyframe?: +++ * well doesn't work for audio! +++ */ +++ if (packetBody[pos /*6928, test 0 */ ] == r->m_read.initialFrameType +++ /* && (packetBody[11]&0xf0) == 0x10 */) +++ { +++ if (ts == r->m_read.nResumeTS) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "Found keyframe with resume-keyframe timestamp!"); +++ if (r->m_read.nInitialFrameSize != dataSize || +++ memcmp(r->m_read.initialFrame, packetBody + pos + 11, +++ r->m_read.nInitialFrameSize) != 0) +++ { +++ RTMP_Log(RTMP_LOGERROR, "FLV Stream: Keyframe doesn't match!"); +++ ret = RTMP_READ_ERROR; +++ break; +++ } +++ r->m_read.flags |= RTMP_READ_GOTFLVK; +++ +++ /* skip this packet? +++ * check whether skippable: +++ */ +++ if (pos + 11 + dataSize + 4 > nPacketLen) +++ { +++ RTMP_Log(RTMP_LOGWARNING, "Non skipable packet since it doesn't " +++ "end with chunk, stream corrupt!"); +++ ret = RTMP_READ_ERROR; +++ break; +++ } +++ packetBody += (pos + 11 + dataSize + 4); +++ nPacketLen -= (pos + 11 + dataSize + 4); +++ +++ goto stopKeyframeSearch; +++ +++ } +++ else if (r->m_read.nResumeTS < ts) +++ { +++ /* the timestamp ts will only increase with +++ * further packets, wait for seek +++ */ +++ goto stopKeyframeSearch; +++ } +++ } +++ pos += (11 + dataSize + 4); +++ } +++ if (ts < r->m_read.nResumeTS) +++ { +++ RTMP_Log(RTMP_LOGERROR, +++ "First packet does not contain keyframe, all " +++ "timestamps are smaller than the keyframe " +++ "timestamp; probably the resume seek failed?"); +++ } +++ stopKeyframeSearch: +++ if (!(r->m_read.flags & RTMP_READ_GOTFLVK)) +++ { +++ RTMP_Log(RTMP_LOGERROR, "Couldn't find the seeked keyframe in this chunk!"); +++ ret = RTMP_READ_IGNORE; +++ break; +++ } +++ } +++ } ++ ++ if (packet.m_nTimeStamp > 0 ++ && (r->m_read.flags & (RTMP_READ_GOTKF|RTMP_READ_GOTFLVK))) ++@@ -4972,7 +5673,7 @@ static const char flvHeader[] = { 'F', 'L', 'V', 0x01, ++ 0x00, 0x00, 0x00, 0x00 ++ }; ++ ++-#define HEADERBUF (128*1024) +++#define HEADERBUF (1024*1024) ++ int ++ RTMP_Read(RTMP *r, char *buf, int size) ++ { ++@@ -5175,3 +5876,395 @@ RTMP_Write(RTMP *r, const char *buf, int size) ++ } ++ return size+s2; ++ } +++ +++AVal +++AVcopy(AVal src) +++{ +++ AVal dst; +++ if (src.av_len) +++ { +++ dst.av_val = malloc(src.av_len + 1); +++ memcpy(dst.av_val, src.av_val, src.av_len); +++ dst.av_val[src.av_len] = '\0'; +++ dst.av_len = src.av_len; +++ } +++ else +++ { +++ dst.av_val = NULL; +++ dst.av_len = 0; +++ } +++ return dst; +++} +++ +++static int +++ConnectSocket(RTMP *r) +++{ +++ int on = 1; +++ struct sockaddr_in service; +++ if (!r->Link.hostname.av_len) +++ return FALSE; +++ +++ memset(&service, 0, sizeof (struct sockaddr_in)); +++ service.sin_family = AF_INET; +++ +++ if (r->Link.socksport) +++ { +++ /* Connect via SOCKS */ +++ if (!add_addr_info(&service, &r->Link.sockshost, r->Link.socksport)) +++ return FALSE; +++ } +++ else +++ { +++ /* Connect directly */ +++ if (!add_addr_info(&service, &r->Link.hostname, r->Link.port)) +++ return FALSE; +++ } +++ +++ r->m_sb.sb_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); +++ if (r->m_sb.sb_socket != -1) +++ { +++ if (connect(r->m_sb.sb_socket, (struct sockaddr *) &service, sizeof (struct sockaddr)) < 0) +++ { +++ int err = GetSockError(); +++ RTMP_Log(RTMP_LOGERROR, "%s, failed to connect socket. %d (%s)", +++ __FUNCTION__, err, strerror(err)); +++ RTMP_Close(r); +++ return FALSE; +++ } +++ +++ if (r->Link.socksport) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "%s ... SOCKS negotiation", __FUNCTION__); +++ if (!SocksNegotiate(r)) +++ { +++ RTMP_Log(RTMP_LOGERROR, "%s, SOCKS negotiation failed.", __FUNCTION__); +++ RTMP_Close(r); +++ return FALSE; +++ } +++ } +++ } +++ else +++ { +++ RTMP_Log(RTMP_LOGERROR, "%s, failed to create socket. Error: %d", +++ __FUNCTION__, GetSockError()); +++ return FALSE; +++ } +++ +++ /* set timeout */ +++ SET_RCVTIMEO(tv, r->Link.timeout); +++ if (setsockopt(r->m_sb.sb_socket, SOL_SOCKET, SO_RCVTIMEO, (char *) &tv, sizeof (tv))) +++ { +++ RTMP_Log(RTMP_LOGERROR, "%s, Setting socket timeout to %d failed!", +++ __FUNCTION__, r->Link.timeout); +++ } +++ +++ setsockopt(r->m_sb.sb_socket, IPPROTO_TCP, TCP_NODELAY, (char *) &on, sizeof (on)); +++ if (r->Link.protocol & RTMP_FEATURE_HTTP) +++ setsockopt(r->m_sb.sb_socket, SOL_SOCKET, SO_KEEPALIVE, (char *) &on, sizeof (on)); +++ +++ return TRUE; +++} +++ +++static int +++SendCommand(RTMP *r, char *method, int queue) +++{ +++ char pbuf[256], *pend = pbuf + sizeof (pbuf), *enc; +++ AVal av_command, methodName; +++ +++ enc = pbuf; +++ methodName.av_val = method; +++ methodName.av_len = strlen(method); +++ enc = AMF_EncodeString(enc, pend, &methodName); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ av_command.av_val = pbuf; +++ av_command.av_len = enc - pbuf; +++ +++ return SendInvoke(r, &av_command, queue); +++} +++ +++static int +++SendGetStreamLength(RTMP *r) +++{ +++ char pbuf[256], *pend = pbuf + sizeof (pbuf), *enc; +++ AVal av_Command; +++ SAVC(getStreamLength); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_getStreamLength); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeString(enc, pend, &r->Link.playpath); +++ av_Command.av_val = pbuf; +++ av_Command.av_len = enc - pbuf; +++ +++ return SendInvoke(r, &av_Command, TRUE); +++} +++ +++static int +++SendInvoke(RTMP *r, AVal *command, int queue) +++{ +++ RTMPPacket packet; +++ char pbuf[512], *enc; +++ +++ packet.m_nChannel = 0x03; /* control channel (invoke) */ +++ packet.m_headerType = RTMP_PACKET_SIZE_MEDIUM; +++ packet.m_packetType = RTMP_PACKET_TYPE_INVOKE; +++ packet.m_nTimeStamp = 0; +++ packet.m_nInfoField2 = 0; +++ packet.m_hasAbsTimestamp = 0; +++ packet.m_body = pbuf + RTMP_MAX_HEADER_SIZE; +++ +++ enc = packet.m_body; +++ if (command->av_len) +++ { +++ memcpy(enc, command->av_val, command->av_len); +++ enc += command->av_len; +++ } +++ else +++ return FALSE; +++ packet.m_nBodySize = enc - packet.m_body; +++ +++ return RTMP_SendPacket(r, &packet, queue); +++} +++ +++AVal +++StripParams(AVal *src) +++{ +++ AVal str; +++ if (src->av_val) +++ { +++ str.av_val = calloc(src->av_len + 1, sizeof (char)); +++ strncpy(str.av_val, src->av_val, src->av_len); +++ str.av_len = src->av_len; +++ char *start = str.av_val; +++ char *end = start + str.av_len; +++ char *ptr = start; +++ +++ while (ptr < end) +++ { +++ if (*ptr == '?') +++ { +++ str.av_len = ptr - start; +++ break; +++ } +++ ptr++; +++ } +++ memset(start + str.av_len, 0, 1); +++ +++ char *dynamic = strstr(start, "[[DYNAMIC]]"); +++ if (dynamic) +++ { +++ dynamic -= 1; +++ memset(dynamic, 0, 1); +++ str.av_len = dynamic - start; +++ end = start + str.av_len; +++ } +++ +++ char *import = strstr(start, "[[IMPORT]]"); +++ if (import) +++ { +++ str.av_val = import + 11; +++ strcpy(start, "http://"); +++ str.av_val = strcat(start, str.av_val); +++ str.av_len = strlen(str.av_val); +++ } +++ return str; +++ } +++ str = *src; +++ return str; +++} +++ +++char * +++strreplace(char *srcstr, int srclen, char *orig, char *repl, int didAlloc) +++{ +++ char *ptr = NULL, *sptr = srcstr; +++ int origlen = strlen(orig); +++ int repllen = strlen(repl); +++ if (!srclen) +++ srclen = strlen(srcstr); +++ char *srcend = srcstr + srclen; +++ int dstbuffer = srclen / origlen * repllen; +++ if (dstbuffer < srclen) +++ dstbuffer = srclen; +++ char *dststr = calloc(dstbuffer + 1, sizeof (char)); +++ char *dptr = dststr; +++ +++ if ((ptr = strstr(srcstr, orig))) +++ { +++ while (ptr < srcend && (ptr = strstr(sptr, orig))) +++ { +++ int len = ptr - sptr; +++ memcpy(dptr, sptr, len); +++ sptr += len + origlen; +++ dptr += len; +++ memcpy(dptr, repl, repllen); +++ dptr += repllen; +++ } +++ memcpy(dptr, sptr, srcend - sptr); +++ if (didAlloc) +++ free(srcstr); +++ return dststr; +++ } +++ +++ memcpy(dststr, srcstr, srclen); +++ if (didAlloc) +++ free(srcstr); +++ return dststr; +++} +++ +++int +++strsplit(char *src, int srclen, char delim, char ***params) +++{ +++ char *sptr, *srcbeg, *srcend, *dstr; +++ int count = 1, i = 0, len = 0; +++ +++ if (src == NULL) +++ return 0; +++ if (!srclen) +++ srclen = strlen(src); +++ srcbeg = src; +++ srcend = srcbeg + srclen; +++ sptr = srcbeg; +++ +++ /* count the delimiters */ +++ while (sptr < srcend) +++ { +++ if (*sptr++ == delim) +++ count++; +++ } +++ sptr = srcbeg; +++ *params = malloc(count * sizeof (size_t)); +++ char **param = *params; +++ +++ for (i = 0; i < (count - 1); i++) +++ { +++ dstr = strchr(sptr, delim); +++ len = dstr - sptr; +++ param[i] = malloc((len + 1) * sizeof (char)); +++ memcpy(param[i], sptr, len); +++ *(param[i] + len) = '\0'; +++ sptr += len + 1; +++ } +++ +++ /* copy the last string */ +++ if (sptr <= srcend) +++ { +++ len = srclen - (sptr - srcbeg); +++ param[i] = malloc((len + 1) * sizeof (char)); +++ memcpy(param[i], sptr, len); +++ *(param[i] + len) = '\0'; +++ } +++ return count; +++} +++ +++void +++TransformRot13(AMFObject *obj, AVal *rindex, AVal *r) +++{ +++ char *chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMabcdefghijklmnopqrstuvwxyzabcdefghijklm"; +++ int i = 0, pos = 0; +++ AMFObject obj2; +++ +++ AMFProp_GetObject(AMF_GetProp(obj, NULL, 3), &obj2); +++ AMFProp_GetString(AMF_GetProp(&obj2, rindex, -1), r); +++ +++ for (i = 0; i < r->av_len; i++) +++ { +++ char *chr = &r->av_val[i]; +++ chr = strchr(chars, *chr); +++ pos = chr ? chr - chars : -1; +++ if (pos > -1) +++ r->av_val[i] = chars[pos + 13]; +++ } +++} +++ +++void +++__TeaCrypt(uint32_t *block, uint32_t len, uint32_t *key) +++{ +++ uint32_t z = block[len - 1], y = block[0], sum = 0, e, DELTA = 0x9e3779b9; +++ int32_t p, q; +++ +++ q = 6 + 52 / len; +++ while (q-- > 0) +++ { +++ sum += DELTA; +++ e = (sum >> 2) & 3; +++ for (p = 0; p < len - 1; p++) +++ { +++ y = block[p + 1]; +++ block[p] += ((z >> 5^y << 2) + (y >> 3^z << 4)) ^ ((sum^y) + (key[(p & 3)^e] ^ z)); +++ z = block[p]; +++ } +++ y = block[0]; +++ block[len - 1] += ((z >> 5^y << 2) + (y >> 3^z << 4)) ^ ((sum^y) + (key[(p & 3)^e] ^ z)); +++ z = block[len - 1]; +++ } +++} +++ +++AVal +++TeaEncrypt(AVal *srcData, AVal *srcKey) +++{ +++ int i, reqPadding, longKeyBlocks, longDataBlocks; +++ unsigned char *key, *data; +++ +++ // Prepare key +++ int srcKeyLen = srcKey->av_len; +++ int reqKeyLen = 16; +++ reqPadding = reqKeyLen - srcKeyLen; +++ if (reqPadding < 0) +++ { +++ reqPadding = 0; +++ srcKeyLen = reqKeyLen; +++ } +++ key = calloc((srcKeyLen + reqPadding + 1), sizeof (char)); +++ memcpy(key, srcKey->av_val, srcKeyLen); +++ longKeyBlocks = reqKeyLen / 4; +++ uint32_t *longKeyBuf = (uint32_t *) malloc(longKeyBlocks * sizeof (uint32_t)); +++ for (i = 0; i < longKeyBlocks; i++) +++ { +++ longKeyBuf[i] = 0; +++ longKeyBuf[i] |= (key[i * 4 + 0]) | (key[i * 4 + 1] << 8) | (key[i * 4 + 2] << 16) | (key[i * 4 + 3] << 24); +++ } +++ +++ // Prepare data +++ int srcDataLen = srcData->av_len; +++ reqPadding = ((int) ((srcDataLen + 3) / 4))*4 - srcDataLen; +++ if ((srcDataLen + reqPadding) < 8) +++ reqPadding = 8 - srcDataLen; +++ data = calloc((srcDataLen + reqPadding + 1), sizeof (char)); +++ memcpy(data, srcData->av_val, srcDataLen); +++ longDataBlocks = (srcDataLen + reqPadding) / 4; +++ uint32_t *longDataBuf = malloc(longDataBlocks * sizeof (uint32_t)); +++ for (i = 0; i < longDataBlocks; i++) +++ { +++ longDataBuf[i] = 0; +++ longDataBuf[i] |= (data[i * 4 + 0]) | (data[i * 4 + 1] << 8) | (data[i * 4 + 2] << 16) | (data[i * 4 + 3] << 24); +++ } +++ +++ // Encrypt data +++ __TeaCrypt(longDataBuf, longDataBlocks, longKeyBuf); +++ +++ // Convert data back to char array +++ for (i = 0; i < longDataBlocks; i++) +++ { +++ data[i * 4 + 0] = longDataBuf[i] & 0xFF; +++ data[i * 4 + 1] = (longDataBuf[i] >> 8) & 0xFF; +++ data[i * 4 + 2] = (longDataBuf[i] >> 16) & 0xFF; +++ data[i * 4 + 3] = (longDataBuf[i] >> 24) & 0xFF; +++ } +++ +++ // Convert to hex string +++ AVal hexData; +++ hexData.av_val = calloc((longDataBlocks * 4 * 2) + 1, sizeof (char)); +++ for (i = 0; i < (longDataBlocks * 4); i++) +++ sprintf(&hexData.av_val[i * 2], "%.2X", data[i]); +++ hexData.av_len = strlen(hexData.av_val); +++ +++ // Free allocated resources +++ free(key); +++ free(longKeyBuf); +++ free(data); +++ free(longDataBuf); +++ +++ return hexData; +++} ++diff --git librtmp/rtmp.h librtmp/rtmp.h ++index 0248913..3e573da 100644 ++--- librtmp/rtmp.h +++++ librtmp/rtmp.h ++@@ -150,12 +150,15 @@ extern "C" ++ AVal playpath; /* passed in explicitly */ ++ AVal tcUrl; ++ AVal swfUrl; +++ AVal swfHash; ++ AVal pageUrl; ++ AVal app; ++ AVal auth; ++ AVal flashVer; ++ AVal subscribepath; +++ AVal ccomm; ++ AVal usherToken; +++ AVal WeebToken; ++ AVal token; ++ AVal pubUser; ++ AVal pubPasswd; ++@@ -175,9 +178,18 @@ extern "C" ++ int lFlags; ++ ++ int swfAge; +++ int swfSize; ++ ++ int protocol; +++ int ConnectPacket; +++ int CombineConnectPacket; +++ int redirected; ++ int timeout; /* connection timeout in seconds */ +++ int dynamicPublish; +++ AVal dynamicCommand; +++ AVal Extras; +++ AVal HandshakeResponse; +++ double publishId; ++ ++ int pFlags; /* unused, but kept to avoid breaking ABI */ ++ ++@@ -220,6 +232,7 @@ extern "C" ++ /* if bResume == TRUE */ ++ uint8_t initialFrameType; ++ uint32_t nResumeTS; +++ uint32_t nResumeDriftTS; ++ char *metaHeader; ++ char *initialFrame; ++ uint32_t nMetaHeaderSize; ++@@ -306,6 +319,8 @@ extern "C" ++ AVal *flashVer, ++ AVal *subscribepath, ++ AVal *usherToken, +++ AVal *WeebToken, +++ AVal *ccomm, ++ int dStart, ++ int dStop, int bLiveStream, long int timeout); ++ ++@@ -371,6 +386,11 @@ extern "C" ++ int RTMP_HashSWF(const char *url, unsigned int *size, unsigned char *hash, ++ int age); ++ +++ AVal AVcopy(AVal src); +++ AVal StripParams(AVal *src); +++ char *strreplace(char *srcstr, int srclen, char *orig, char *repl, int didAlloc); +++ int strsplit(char *src, int srclen, char delim, char ***params); +++ ++ #ifdef __cplusplus ++ }; ++ #endif ++diff --git librtmp/rtmp_sys.h librtmp/rtmp_sys.h ++index 85d7e53..b2a3438 100644 ++--- librtmp/rtmp_sys.h +++++ librtmp/rtmp_sys.h ++@@ -65,6 +65,7 @@ ++ #include ++ #include ++ #include +++#include ++ #if POLARSSL_VERSION_NUMBER < 0x01010000 ++ #define havege_random havege_rand ++ #endif ++@@ -105,6 +106,7 @@ typedef struct tls_server_ctx { ++ #define TLS_write(s,b,l) ssl_write(s,(unsigned char *)b,l) ++ #define TLS_shutdown(s) ssl_close_notify(s) ++ #define TLS_close(s) ssl_free(s); free(s) +++#define md5_hash(i, ilen, o) md5(i, ilen, o) ++ ++ #elif defined(USE_GNUTLS) ++ #include ++@@ -122,6 +124,8 @@ typedef struct tls_ctx { ++ #define TLS_write(s,b,l) gnutls_record_send(s,b,l) ++ #define TLS_shutdown(s) gnutls_bye(s, GNUTLS_SHUT_RDWR) ++ #define TLS_close(s) gnutls_deinit(s) +++#define md5_hash(i, ilen, o) gnutls_digest_algorithm_t algorithm = GNUTLS_DIG_MD5;\ +++ gnutls_hash_fast(algorithm, i, ilen, o); ++ ++ #else /* USE_OPENSSL */ ++ #define TLS_CTX SSL_CTX * ++@@ -134,6 +138,7 @@ typedef struct tls_ctx { ++ #define TLS_write(s,b,l) SSL_write(s,b,l) ++ #define TLS_shutdown(s) SSL_shutdown(s) ++ #define TLS_close(s) SSL_free(s) +++#define md5_hash(i, ilen, o) MD5(i, ilen, o) ++ ++ #endif ++ #endif ++diff --git rtmpdump.c rtmpdump.c ++index 13741a7..b3ae33f 100644 ++--- rtmpdump.c +++++ rtmpdump.c ++@@ -36,6 +36,9 @@ ++ #ifdef WIN32 ++ #define fseeko fseeko64 ++ #define ftello ftello64 +++#ifdef __MINGW32__ +++#define off_t off64_t +++#endif ++ #include ++ #include ++ #define SET_BINMODE(f) setmode(fileno(f), O_BINARY) ++@@ -67,7 +70,7 @@ InitSockets() ++ #endif ++ } ++ ++-inline void +++static inline void ++ CleanupSockets() ++ { ++ #ifdef WIN32 ++@@ -148,9 +151,9 @@ OpenResumeFile(const char *flvFile, // file name [in] ++ if (!*file) ++ return RD_SUCCESS; // RD_SUCCESS, because we go to fresh file mode instead of quiting ++ ++- fseek(*file, 0, SEEK_END); +++ fseeko(*file, 0, SEEK_END); ++ *size = ftello(*file); ++- fseek(*file, 0, SEEK_SET); +++ fseeko(*file, 0, SEEK_SET); ++ ++ if (*size > 0) ++ { ++@@ -178,7 +181,7 @@ OpenResumeFile(const char *flvFile, // file name [in] ++ } ++ ++ uint32_t dataOffset = AMF_DecodeInt32(hbuf + 5); ++- fseek(*file, dataOffset, SEEK_SET); +++ fseeko(*file, dataOffset, SEEK_SET); ++ ++ if (fread(hbuf, 1, 4, *file) != 4) ++ { ++@@ -283,18 +286,24 @@ GetLastKeyframe(FILE * file, // output file [in] ++ uint8_t dataType; ++ int bAudioOnly; ++ off_t size; +++ char *syncbuf, *p; ++ ++- fseek(file, 0, SEEK_END); +++ fseeko(file, 0, SEEK_END); ++ size = ftello(file); +++ if (size <= 0) +++ { +++ dSeek = 0; +++ return RD_SUCCESS; +++ } ++ ++- fseek(file, 4, SEEK_SET); +++ fseeko(file, 4, SEEK_SET); ++ if (fread(&dataType, sizeof(uint8_t), 1, file) != 1) ++ return RD_FAILED; ++ ++ bAudioOnly = (dataType & 0x4) && !(dataType & 0x1); ++ ++- RTMP_Log(RTMP_LOGDEBUG, "bAudioOnly: %d, size: %llu", bAudioOnly, ++- (unsigned long long) size); +++ RTMP_Log(RTMP_LOGDEBUG, "bAudioOnly: %d, size: %lu", bAudioOnly, +++ (unsigned long) size); ++ ++ // ok, we have to get the timestamp of the last keyframe (only keyframes are seekable) / last audio frame (audio only streams) ++ ++@@ -326,6 +335,51 @@ GetLastKeyframe(FILE * file, // output file [in] ++ prevTagSize = AMF_DecodeInt32(buffer); ++ //RTMP_Log(RTMP_LOGDEBUG, "Last packet: prevTagSize: %d", prevTagSize); ++ +++ if (prevTagSize <= 0 || prevTagSize > size - 4 - 13) +++ { +++ /* Last packet was not fully received - try to sync to last tag */ +++ prevTagSize = 0; +++ tsize = size > 0x100000 ? 0x100000 : size; /* 1MB should be enough for 3500K bitrates */ +++ if (tsize > 13 + 15) +++ { +++ tsize -= 13; // do not read header +++ syncbuf = (char *) malloc(tsize); +++ if (syncbuf) +++ { +++ fseeko(file, size - tsize, SEEK_SET); +++ if (fread(syncbuf, 1, tsize, file) == tsize) +++ { +++ p = syncbuf + tsize; +++ while (p >= syncbuf + 15) +++ { +++ /* Check for StreamID */ +++ if (AMF_DecodeInt24(p - 7) == 0) +++ { +++ /* Check for Audio/Video/Script */ +++ dataType = p[-15] & 0x1F; +++ if (dataType == 8 || dataType == 9 || dataType == 18) +++ { +++ prevTagSize = AMF_DecodeInt24(p - 14); +++ if ((prevTagSize < tsize) && (p + prevTagSize + 11 <= syncbuf + tsize - 4) +++ && (AMF_DecodeInt32(p - 4 + prevTagSize) == prevTagSize + 11)) +++ { +++ prevTagSize = syncbuf + tsize - p + 15; +++ RTMP_Log(RTMP_LOGDEBUG, "Sync success - found last tag at 0x%x", (uint32_t) (size - prevTagSize)); +++ prevTagSize -= 4; +++ tsize = 0; +++ break; +++ } +++ else +++ prevTagSize = 0; +++ } +++ } +++ --p; +++ } +++ } +++ free(syncbuf); +++ } +++ } +++ } ++ if (prevTagSize == 0) ++ { ++ RTMP_Log(RTMP_LOGERROR, "Couldn't find keyframe to resume from!"); ++@@ -703,8 +757,12 @@ void usage(char *prog) ++ RTMP_LogPrintf ++ ("--token|-T key Key for SecureToken response\n"); ++ RTMP_LogPrintf +++ ("--ccommand|-K key Send custom command before play\n"); +++ RTMP_LogPrintf ++ ("--jtv|-j JSON Authentication token for Justin.tv legacy servers\n"); ++ RTMP_LogPrintf +++ ("--weeb|-J string Authentication token for weeb.tv servers\n"); +++ RTMP_LogPrintf ++ ("--hashes|-# Display progress with hashes, not with the byte counter\n"); ++ RTMP_LogPrintf ++ ("--buffer|-b Buffer time in milliseconds (default: %u)\n", ++@@ -751,7 +809,9 @@ main(int argc, char **argv) ++ AVal hostname = { 0, 0 }; ++ AVal playpath = { 0, 0 }; ++ AVal subscribepath = { 0, 0 }; ++- AVal usherToken = { 0, 0 }; //Justin.tv auth token +++ AVal usherToken = { 0, 0 }; // Justin.tv auth token +++ AVal WeebToken = { 0, 0 }; // Weeb.tv auth token +++ AVal ccomm = { 0, 0 }; ++ int port = -1; ++ int protocol = RTMP_PROTOCOL_UNDEFINED; ++ int retries = 0; ++@@ -853,17 +913,19 @@ main(int argc, char **argv) ++ {"start", 1, NULL, 'A'}, ++ {"stop", 1, NULL, 'B'}, ++ {"token", 1, NULL, 'T'}, +++ {"ccommand", 1, NULL, 'K'}, ++ {"hashes", 0, NULL, '#'}, ++ {"debug", 0, NULL, 'z'}, ++ {"quiet", 0, NULL, 'q'}, ++ {"verbose", 0, NULL, 'V'}, ++ {"jtv", 1, NULL, 'j'}, +++ {"weeb", 1, NULL, 'J'}, ++ {0, 0, 0, 0} ++ }; ++ ++ while ((opt = ++ getopt_long(argc, argv, ++- "hVveqzRr:s:t:i:p:a:b:f:o:u:C:n:c:l:y:Ym:k:d:A:B:T:w:x:W:X:S:#j:", +++ "hVveqzRr:s:t:i:p:a:b:f:o:u:C:n:c:l:y:Ym:k:d:A:B:T:K:w:x:W:X:S:#j:J:", ++ longopts, NULL)) != -1) ++ { ++ switch (opt) ++@@ -995,7 +1057,7 @@ main(int argc, char **argv) ++ port = parsedPort; ++ if (playpath.av_len == 0 && parsedPlaypath.av_len) ++ { ++- playpath = parsedPlaypath; +++ playpath = AVcopy(parsedPlaypath); ++ } ++ if (protocol == RTMP_PROTOCOL_UNDEFINED) ++ protocol = parsedProtocol; ++@@ -1061,6 +1123,9 @@ main(int argc, char **argv) ++ RTMP_SetOpt(&rtmp, &av_token, &token); ++ } ++ break; +++ case 'K': +++ STR2AVAL(ccomm, optarg); +++ break; ++ case '#': ++ bHashes = TRUE; ++ break; ++@@ -1079,6 +1144,9 @@ main(int argc, char **argv) ++ case 'j': ++ STR2AVAL(usherToken, optarg); ++ break; +++ case 'J': +++ STR2AVAL(WeebToken, optarg); +++ break; ++ default: ++ RTMP_LogPrintf("unknown option: %c\n", opt); ++ usage(argv[0]); ++@@ -1170,14 +1238,14 @@ main(int argc, char **argv) ++ ++ if (tcUrl.av_len == 0) ++ { ++- tcUrl.av_len = strlen(RTMPProtocolStringsLower[protocol]) + ++- hostname.av_len + app.av_len + sizeof("://:65535/"); +++ tcUrl.av_len = strlen(RTMPProtocolStringsLower[protocol]) + +++ hostname.av_len + app.av_len + sizeof ("://:65535/"); ++ tcUrl.av_val = (char *) malloc(tcUrl.av_len); ++- if (!tcUrl.av_val) ++- return RD_FAILED; +++ if (!tcUrl.av_val) +++ return RD_FAILED; ++ tcUrl.av_len = snprintf(tcUrl.av_val, tcUrl.av_len, "%s://%.*s:%d/%.*s", ++- RTMPProtocolStringsLower[protocol], hostname.av_len, ++- hostname.av_val, port, app.av_len, app.av_val); +++ RTMPProtocolStringsLower[protocol], hostname.av_len, +++ hostname.av_val, port, app.av_len, app.av_val); ++ } ++ ++ int first = 1; ++@@ -1197,8 +1265,9 @@ main(int argc, char **argv) ++ if (!fullUrl.av_len) ++ { ++ RTMP_SetupStream(&rtmp, protocol, &hostname, port, &sockshost, &playpath, ++- &tcUrl, &swfUrl, &pageUrl, &app, &auth, &swfHash, swfSize, ++- &flashVer, &subscribepath, &usherToken, dSeek, dStopOffset, bLiveStream, timeout); +++ &tcUrl, &swfUrl, &pageUrl, &app, &auth, &swfHash, swfSize, +++ &flashVer, &subscribepath, &usherToken, &WeebToken, &ccomm, +++ dSeek, dStopOffset, bLiveStream, timeout); ++ } ++ else ++ { ++diff --git rtmpgw.c rtmpgw.c ++index 3e47602..e56b855 100644 ++--- rtmpgw.c +++++ rtmpgw.c ++@@ -96,7 +96,9 @@ typedef struct ++ AVal flashVer; ++ AVal token; ++ AVal subscribepath; ++- AVal usherToken; //Justin.tv auth token +++ AVal ccomm; +++ AVal usherToken; // Justin.tv auth token +++ AVal WeebToken; // Weeb.tv auth token ++ AVal sockshost; ++ AMFObject extras; ++ int edepth; ++@@ -556,8 +558,8 @@ void processTCPrequest(STREAMING_SERVER * server, // server socket and state (ou ++ if (!req.fullUrl.av_len) ++ { ++ RTMP_SetupStream(&rtmp, req.protocol, &req.hostname, req.rtmpport, &req.sockshost, ++- &req.playpath, &req.tcUrl, &req.swfUrl, &req.pageUrl, &req.app, &req.auth, &req.swfHash, req.swfSize, &req.flashVer, &req.subscribepath, &req.usherToken, dSeek, req.dStopOffset, ++- req.bLiveStream, req.timeout); +++ &req.playpath, &req.tcUrl, &req.swfUrl, &req.pageUrl, &req.app, &req.auth, &req.swfHash, req.swfSize, &req.flashVer, &req.subscribepath, +++ &req.usherToken, &req.WeebToken, &req.ccomm, dSeek, req.dStopOffset, req.bLiveStream, req.timeout); ++ } ++ else ++ { ++@@ -972,6 +974,12 @@ ParseOption(char opt, char *arg, RTMP_REQUEST * req) ++ case 'j': ++ STR2AVAL(req->usherToken, arg); ++ break; +++ case 'J': +++ STR2AVAL(req->WeebToken, arg); +++ break; +++ case 'K': +++ STR2AVAL(req->ccomm, arg); +++ break; ++ default: ++ RTMP_LogPrintf("unknown option: %c, arg: %s\n", opt, arg); ++ return FALSE; ++@@ -1044,6 +1052,8 @@ main(int argc, char **argv) ++ {"quiet", 0, NULL, 'q'}, ++ {"verbose", 0, NULL, 'V'}, ++ {"jtv", 1, NULL, 'j'}, +++ {"weeb", 1, NULL, 'J'}, +++ {"ccommand", 1, NULL, 'K'}, ++ {0, 0, 0, 0} ++ }; ++ ++@@ -1056,7 +1066,7 @@ main(int argc, char **argv) ++ ++ while ((opt = ++ getopt_long(argc, argv, ++- "hvqVzr:s:t:i:p:a:f:u:n:c:l:y:m:d:D:A:B:T:g:w:x:W:X:S:j:", longopts, +++ "hvqVzr:s:t:i:p:a:f:u:n:c:l:y:m:d:D:A:B:T:g:w:x:W:X:S:j:J:", longopts, ++ NULL)) != -1) ++ { ++ switch (opt) ++@@ -1119,8 +1129,12 @@ main(int argc, char **argv) ++ RTMP_LogPrintf ++ ("--token|-T key Key for SecureToken response\n"); ++ RTMP_LogPrintf +++ ("--ccommand|-K key Send custom command before play\n"); +++ RTMP_LogPrintf ++ ("--jtv|-j JSON Authentication token for Justin.tv legacy servers\n"); ++ RTMP_LogPrintf +++ ("--weeb|-J string Authentication token for weeb.tv servers\n"); +++ RTMP_LogPrintf ++ ("--buffer|-b Buffer time in milliseconds (default: %u)\n\n", ++ defaultRTMPRequest.bufferTime); ++ ++diff --git rtmpsrv.c rtmpsrv.c ++index 5df4d3a..eccaa9c 100644 ++--- rtmpsrv.c +++++ rtmpsrv.c ++@@ -25,9 +25,13 @@ ++ */ ++ ++ #include +++#ifdef __MINGW_H +++#include +++#endif ++ #include ++ #include ++ #include +++#include ++ ++ #include ++ #include ++@@ -94,12 +98,19 @@ typedef struct ++ STREAMING_SERVER *rtmpServer = 0; // server structure pointer ++ void *sslCtx = NULL; ++ +++int file_exists(const char *fname); ++ STREAMING_SERVER *startStreaming(const char *address, int port); ++ void stopStreaming(STREAMING_SERVER * server); ++ void AVreplace(AVal *src, const AVal *orig, const AVal *repl); ++ ++ static const AVal av_dquote = AVC("\""); ++ static const AVal av_escdquote = AVC("\\\""); +++#ifdef WIN32 +++static const AVal av_caret = AVC("^"); +++static const AVal av_esccaret = AVC("^^"); +++static const AVal av_pipe = AVC("|"); +++static const AVal av_escpipe = AVC("^|"); +++#endif ++ ++ typedef struct ++ { ++@@ -168,6 +179,12 @@ SAVC(level); ++ SAVC(code); ++ SAVC(description); ++ SAVC(secureToken); +++SAVC(_checkbw); +++SAVC(_onbwdone); +++SAVC(checkBandwidth); +++SAVC(onBWDone); +++SAVC(FCSubscribe); +++SAVC(onFCSubscribe); ++ ++ static int ++ SendConnectResult(RTMP *r, double txn) ++@@ -191,7 +208,7 @@ SendConnectResult(RTMP *r, double txn) ++ enc = AMF_EncodeNumber(enc, pend, txn); ++ *enc++ = AMF_OBJECT; ++ ++- STR2AVAL(av, "FMS/3,5,1,525"); +++ STR2AVAL(av, "FMS/3,5,7,7009"); ++ enc = AMF_EncodeNamedString(enc, pend, &av_fmsVer, &av); ++ enc = AMF_EncodeNamedNumber(enc, pend, &av_capabilities, 31.0); ++ enc = AMF_EncodeNamedNumber(enc, pend, &av_mode, 1.0); ++@@ -213,7 +230,7 @@ SendConnectResult(RTMP *r, double txn) ++ enc = AMF_EncodeNamedString(enc, pend, &av_secureToken, &av); ++ #endif ++ STR2AVAL(p.p_name, "version"); ++- STR2AVAL(p.p_vu.p_aval, "3,5,1,525"); +++ STR2AVAL(p.p_vu.p_aval, "3,5,7,7009"); ++ p.p_type = AMF_STRING; ++ obj.o_num = 1; ++ obj.o_props = &p; ++@@ -234,7 +251,7 @@ static int ++ SendResultNumber(RTMP *r, double txn, double ID) ++ { ++ RTMPPacket packet; ++- char pbuf[256], *pend = pbuf+sizeof(pbuf); +++ char pbuf[1024], *pend = pbuf + sizeof (pbuf); ++ ++ packet.m_nChannel = 0x03; // control channel (invoke) ++ packet.m_headerType = 1; /* RTMP_PACKET_SIZE_MEDIUM; */ ++@@ -264,12 +281,13 @@ static const AVal av_Stopped_playing = AVC("Stopped playing"); ++ SAVC(details); ++ SAVC(clientid); ++ static const AVal av_NetStream_Authenticate_UsherToken = AVC("NetStream.Authenticate.UsherToken"); +++static const AVal av_FCSubscribe_message = AVC("FCSubscribe to stream"); ++ ++ static int ++ SendPlayStart(RTMP *r) ++ { ++ RTMPPacket packet; ++- char pbuf[512], *pend = pbuf+sizeof(pbuf); +++ char pbuf[1024], *pend = pbuf + sizeof (pbuf); ++ ++ packet.m_nChannel = 0x03; // control channel (invoke) ++ packet.m_headerType = 1; /* RTMP_PACKET_SIZE_MEDIUM; */ ++@@ -301,7 +319,7 @@ static int ++ SendPlayStop(RTMP *r) ++ { ++ RTMPPacket packet; ++- char pbuf[512], *pend = pbuf+sizeof(pbuf); +++ char pbuf[1024], *pend = pbuf + sizeof (pbuf); ++ ++ packet.m_nChannel = 0x03; // control channel (invoke) ++ packet.m_headerType = 1; /* RTMP_PACKET_SIZE_MEDIUM; */ ++@@ -329,6 +347,83 @@ SendPlayStop(RTMP *r) ++ return RTMP_SendPacket(r, &packet, FALSE); ++ } ++ +++static int +++SendCheckBWResponse(RTMP *r, int oldMethodType, int onBWDoneInit) +++{ +++ RTMPPacket packet; +++ char pbuf[1024], *pend = pbuf + sizeof (pbuf); +++ char *enc; +++ +++ packet.m_nChannel = 0x03; /* control channel (invoke) */ +++ packet.m_headerType = RTMP_PACKET_SIZE_MEDIUM; +++ packet.m_packetType = RTMP_PACKET_TYPE_INVOKE; +++ packet.m_nTimeStamp = 0; +++ packet.m_nInfoField2 = 0; +++ packet.m_hasAbsTimestamp = 0; +++ packet.m_body = pbuf + RTMP_MAX_HEADER_SIZE; +++ +++ enc = packet.m_body; +++ if (oldMethodType) +++ { +++ enc = AMF_EncodeString(enc, pend, &av__onbwdone); +++ enc = AMF_EncodeNumber(enc, pend, 0); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeNumber(enc, pend, 10240); +++ enc = AMF_EncodeNumber(enc, pend, 0); +++ } +++ else +++ { +++ enc = AMF_EncodeString(enc, pend, &av_onBWDone); +++ enc = AMF_EncodeNumber(enc, pend, 0); +++ *enc++ = AMF_NULL; +++ if (!onBWDoneInit) +++ { +++ enc = AMF_EncodeNumber(enc, pend, 10240); +++ enc = AMF_EncodeNumber(enc, pend, 0); +++ enc = AMF_EncodeNumber(enc, pend, 0); +++ enc = AMF_EncodeNumber(enc, pend, 20); +++ } +++ } +++ +++ packet.m_nBodySize = enc - packet.m_body; +++ +++ return RTMP_SendPacket(r, &packet, FALSE); +++} +++ +++static int +++SendOnFCSubscribe(RTMP *r) +++{ +++ RTMPPacket packet; +++ char pbuf[1024], *pend = pbuf + sizeof (pbuf); +++ char *enc; +++ +++ packet.m_nChannel = 0x03; /* control channel (invoke) */ +++ packet.m_headerType = RTMP_PACKET_SIZE_MEDIUM; +++ packet.m_packetType = RTMP_PACKET_TYPE_INVOKE; +++ packet.m_nTimeStamp = 0; +++ packet.m_nInfoField2 = 0; +++ packet.m_hasAbsTimestamp = 0; +++ packet.m_body = pbuf + RTMP_MAX_HEADER_SIZE; +++ +++ enc = packet.m_body; +++ enc = AMF_EncodeString(enc, pend, &av_onFCSubscribe); +++ enc = AMF_EncodeNumber(enc, pend, 0); +++ *enc++ = AMF_NULL; +++ +++ *enc++ = AMF_OBJECT; +++ enc = AMF_EncodeNamedString(enc, pend, &av_level, &av_status); +++ enc = AMF_EncodeNamedString(enc, pend, &av_code, &av_NetStream_Play_Start); +++ enc = AMF_EncodeNamedString(enc, pend, &av_description, &av_FCSubscribe_message); +++ enc = AMF_EncodeNamedNumber(enc, pend, &av_clientid, 0); +++ *enc++ = 0; +++ *enc++ = 0; +++ *enc++ = AMF_OBJECT_END; +++ +++ packet.m_nBodySize = enc - packet.m_body; +++ +++ return RTMP_SendPacket(r, &packet, FALSE); +++} +++ ++ static void ++ spawn_dumper(int argc, AVal *av, char *cmd) ++ { ++@@ -389,6 +484,8 @@ countAMF(AMFObject *obj, int *argc) ++ len += 40; ++ break; ++ case AMF_OBJECT: +++ case AMF_ECMA_ARRAY: +++ case AMF_STRICT_ARRAY: ++ len += 9; ++ len += countAMF(&p->p_vu.p_object, argc); ++ (*argc) += 2; ++@@ -407,9 +504,11 @@ dumpAMF(AMFObject *obj, char *ptr, AVal *argv, int *argc) ++ int i, ac = *argc; ++ const char opt[] = "NBSO Z"; ++ ++- for (i=0; i < obj->o_num; i++) +++ for (i = 0; i < obj->o_num; i++) ++ { ++ AMFObjectProperty *p = &obj->o_props[i]; +++ if ((p->p_type == AMF_ECMA_ARRAY) || (p->p_type == AMF_STRICT_ARRAY)) +++ p->p_type = AMF_OBJECT; ++ argv[ac].av_val = ptr+1; ++ argv[ac++].av_len = 2; ++ ptr += sprintf(ptr, " -C "); ++@@ -569,6 +668,7 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ server->arglen += countAMF(&r->Link.extras, &server->argc); ++ } ++ SendConnectResult(r, txn); +++ SendCheckBWResponse(r, FALSE, TRUE); ++ } ++ else if (AVMATCH(&method, &av_createStream)) ++ { ++@@ -583,10 +683,26 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ AVal usherToken; ++ AMFProp_GetString(AMF_GetProp(&obj, NULL, 3), &usherToken); ++ AVreplace(&usherToken, &av_dquote, &av_escdquote); +++#ifdef WIN32 +++ AVreplace(&usherToken, &av_caret, &av_esccaret); +++ AVreplace(&usherToken, &av_pipe, &av_escpipe); +++#endif ++ server->arglen += 6 + usherToken.av_len; ++ server->argc += 2; ++ r->Link.usherToken = usherToken; ++ } +++ else if (AVMATCH(&method, &av__checkbw)) +++ { +++ SendCheckBWResponse(r, TRUE, FALSE); +++ } +++ else if (AVMATCH(&method, &av_checkBandwidth)) +++ { +++ SendCheckBWResponse(r, FALSE, FALSE); +++ } +++ else if (AVMATCH(&method, &av_FCSubscribe)) +++ { +++ SendOnFCSubscribe(r); +++ } ++ else if (AVMATCH(&method, &av_play)) ++ { ++ char *file, *p, *q, *cmd, *ptr; ++@@ -602,6 +718,17 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ if (obj.o_num > 5) ++ r->Link.length = AMFProp_GetNumber(AMF_GetProp(&obj, NULL, 5)); ++ */ +++ double StartFlag = 0; +++ AMFObjectProperty *Start = AMF_GetProp(&obj, NULL, 4); +++ if (!(Start->p_type == AMF_INVALID)) +++ StartFlag = AMFProp_GetNumber(Start); +++ r->Link.app = AVcopy(r->Link.app); +++ if (StartFlag == -1000 || (r->Link.app.av_val && strstr(r->Link.app.av_val, "live"))) +++ { +++ StartFlag = -1000; +++ server->arglen += 7; +++ server->argc += 1; +++ } ++ if (r->Link.tcUrl.av_len) ++ { ++ len = server->arglen + r->Link.playpath.av_len + 4 + ++@@ -619,6 +746,7 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ argv[argc].av_val = ptr + 1; ++ argv[argc++].av_len = 2; ++ argv[argc].av_val = ptr + 5; +++ r->Link.tcUrl = StripParams(&r->Link.tcUrl); ++ ptr += sprintf(ptr," -r \"%s\"", r->Link.tcUrl.av_val); ++ argv[argc++].av_len = r->Link.tcUrl.av_len; ++ ++@@ -643,6 +771,7 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ argv[argc].av_val = ptr + 1; ++ argv[argc++].av_len = 2; ++ argv[argc].av_val = ptr + 5; +++ r->Link.swfUrl = StripParams(&r->Link.swfUrl); ++ ptr += sprintf(ptr, " -W \"%s\"", r->Link.swfUrl.av_val); ++ argv[argc++].av_len = r->Link.swfUrl.av_len; ++ } ++@@ -665,10 +794,17 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ r->Link.usherToken.av_val = NULL; ++ r->Link.usherToken.av_len = 0; ++ } ++- if (r->Link.extras.o_num) { ++- ptr = dumpAMF(&r->Link.extras, ptr, argv, &argc); ++- AMF_Reset(&r->Link.extras); ++- } +++ if (StartFlag == -1000) +++ { +++ argv[argc].av_val = ptr + 1; +++ argv[argc++].av_len = 6; +++ ptr += sprintf(ptr, " --live"); +++ } +++ if (r->Link.extras.o_num) +++ { +++ ptr = dumpAMF(&r->Link.extras, ptr, argv, &argc); +++ AMF_Reset(&r->Link.extras); +++ } ++ argv[argc].av_val = ptr + 1; ++ argv[argc++].av_len = 2; ++ argv[argc].av_val = ptr + 5; ++@@ -676,7 +812,13 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ r->Link.playpath.av_len, r->Link.playpath.av_val); ++ argv[argc++].av_len = r->Link.playpath.av_len; ++ ++- av = r->Link.playpath; +++ if (r->Link.playpath.av_len) +++ av = r->Link.playpath; +++ else +++ { +++ av.av_val = "file"; +++ av.av_len = 4; +++ } ++ /* strip trailing URL parameters */ ++ q = memchr(av.av_val, '?', av.av_len); ++ if (q) ++@@ -710,25 +852,82 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ ++ memcpy(file, av.av_val, av.av_len); ++ file[av.av_len] = '\0'; ++- for (p=file; *p; p++) ++- if (*p == ':') ++- *p = '_'; ++ ++- /* Add extension if none present */ ++- if (file[av.av_len - 4] != '.') ++- { ++- av.av_len += 4; ++- } ++- /* Always use flv extension, regardless of original */ ++- if (strcmp(file+av.av_len-4, ".flv")) ++- { ++- strcpy(file+av.av_len-4, ".flv"); ++- } +++ if (strlen(file) < 128) +++ { +++ /* Add extension if none present */ +++ if (file[av.av_len - 4] != '.') +++ { +++ av.av_len += 4; +++ } +++ +++ /* Always use flv extension, regardless of original */ +++ if (strcmp(file + av.av_len - 4, ".flv")) +++ { +++ strcpy(file + av.av_len - 4, ".flv"); +++ } +++ +++ /* Remove invalid characters from filename */ +++ file = strreplace(file, 0, ":", "_", TRUE); +++ file = strreplace(file, 0, "&", "_", TRUE); +++ file = strreplace(file, 0, "^", "_", TRUE); +++ file = strreplace(file, 0, "|", "_", TRUE); +++ } +++ else +++ { +++ /* Filename too long - generate unique name */ +++ strcpy(file, "vXXXXXX"); +++ mkstemp(file); +++ strcat(file, ".flv"); +++ } +++ +++ /* Add timestamp to the filename */ +++ char *filename, *pfilename, timestamp[21]; +++ int filename_len, timestamp_len; +++ time_t current_time; +++ +++ time(¤t_time); +++ timestamp_len = strftime(×tamp[0], sizeof (timestamp), "%Y-%m-%d_%I-%M-%S_", localtime(¤t_time)); +++ timestamp[timestamp_len] = '\0'; +++ filename_len = strlen(file); +++ filename = malloc(timestamp_len + filename_len + 1); +++ pfilename = filename; +++ memcpy(pfilename, timestamp, timestamp_len); +++ pfilename += timestamp_len; +++ memcpy(pfilename, file, filename_len); +++ pfilename += filename_len; +++ *pfilename++ = '\0'; +++ file = filename; +++ ++ argv[argc].av_val = ptr + 1; ++ argv[argc++].av_len = 2; ++ argv[argc].av_val = file; ++ argv[argc].av_len = av.av_len; ++- ptr += sprintf(ptr, " -o %s", file); +++#ifdef VLC +++ char *vlc; +++ int didAlloc = FALSE; +++ +++ if (getenv("VLC")) +++ vlc = getenv("VLC"); +++ else if (getenv("ProgramFiles")) +++ { +++ vlc = malloc(512 * sizeof (char)); +++ didAlloc = TRUE; +++ char *ProgramFiles = getenv("ProgramFiles"); +++ sprintf(vlc, "\"%s%s", ProgramFiles, " (x86)\\VideoLAN\\VLC\\vlc.exe"); +++ if (!file_exists(vlc + 1)) +++ sprintf(vlc + 1, "%s%s", ProgramFiles, "\\VideoLAN\\VLC\\vlc.exe"); +++ strcpy(vlc + strlen(vlc), "\" -"); +++ } +++ else +++ vlc = "vlc -"; +++ +++ ptr += sprintf(ptr, " | %s", vlc); +++ if (didAlloc) +++ free(vlc); +++#else +++ ptr += sprintf(ptr, " -o \"%s\"", file); +++#endif ++ now = RTMP_GetTime(); ++ if (now - server->filetime < DUPTIME && AVMATCH(&argv[argc], &server->filename)) ++ { ++@@ -742,7 +941,21 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ server->filetime = now; ++ free(server->filename.av_val); ++ server->filename = argv[argc++]; ++- spawn_dumper(argc, argv, cmd); +++#ifdef VLC +++ FILE *vlc_cmdfile = fopen("VLC.bat", "w"); +++ char *vlc_batchcmd = strreplace(cmd, 0, "%", "%%", FALSE); +++ fprintf(vlc_cmdfile, "%s\n", vlc_batchcmd); +++ fclose(vlc_cmdfile); +++ free(vlc_batchcmd); +++ spawn_dumper(argc, argv, "VLC.bat"); +++#else +++ spawn_dumper(argc, argv, cmd); +++#endif +++ +++ /* Save command to text file */ +++ FILE *cmdfile = fopen("Command.txt", "a"); +++ fprintf(cmdfile, "%s\n", cmd); +++ fclose(cmdfile); ++ } ++ ++ free(cmd); ++@@ -861,12 +1074,18 @@ controlServerThread(void *unused) ++ { ++ case 'q': ++ RTMP_LogPrintf("Exiting\n"); ++- stopStreaming(rtmpServer); ++- exit(0); +++ if (rtmpServer) +++ stopStreaming(rtmpServer); ++ break; ++ default: ++ RTMP_LogPrintf("Unknown command \'%c\', ignoring\n", ich); ++ } +++ sleep(1); +++ if (rtmpServer && (rtmpServer->state == STREAMING_STOPPED)) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "Exiting text UI thread"); +++ break; +++ } ++ } ++ TFRET(); ++ } ++@@ -1054,7 +1273,6 @@ stopStreaming(STREAMING_SERVER * server) ++ } ++ } ++ ++- ++ void ++ sigIntHandler(int sig) ++ { ++@@ -1191,3 +1409,15 @@ AVreplace(AVal *src, const AVal *orig, const AVal *repl) ++ src->av_val = dest; ++ src->av_len = dptr - dest; ++ } +++ +++int +++file_exists(const char *fname) +++{ +++ FILE *file; +++ if ((file = fopen(fname, "r"))) +++ { +++ fclose(file); +++ return TRUE; +++ } +++ return FALSE; +++} ++diff --git rtmpsuck.c rtmpsuck.c ++index e886179..0abdba4 100644 ++--- rtmpsuck.c +++++ rtmpsuck.c ++@@ -25,10 +25,13 @@ ++ */ ++ ++ #include +++#ifdef __MINGW_H +++#include +++#endif ++ #include ++ #include ++ #include ++- +++#include ++ #include ++ #include ++ ++@@ -141,18 +144,21 @@ SAVC(code); ++ SAVC(secureToken); ++ SAVC(onStatus); ++ SAVC(close); +++SAVC(play2); ++ static const AVal av_NetStream_Failed = AVC("NetStream.Failed"); ++ static const AVal av_NetStream_Play_Failed = AVC("NetStream.Play.Failed"); ++-static const AVal av_NetStream_Play_StreamNotFound = ++-AVC("NetStream.Play.StreamNotFound"); ++-static const AVal av_NetConnection_Connect_InvalidApp = ++-AVC("NetConnection.Connect.InvalidApp"); +++static const AVal av_NetStream_Play_StreamNotFound = AVC("NetStream.Play.StreamNotFound"); +++static const AVal av_NetConnection_Connect_InvalidApp = AVC("NetConnection.Connect.InvalidApp"); +++static const AVal av_NetConnection_Connect_Rejected = AVC("NetConnection.Connect.Rejected"); ++ static const AVal av_NetStream_Play_Start = AVC("NetStream.Play.Start"); ++ static const AVal av_NetStream_Play_Complete = AVC("NetStream.Play.Complete"); ++ static const AVal av_NetStream_Play_Stop = AVC("NetStream.Play.Stop"); +++static const AVal av_NetStream_Authenticate_UsherToken = AVC("NetStream.Authenticate.UsherToken"); ++ ++ static const char *cst[] = { "client", "server" }; ++ +++char *dumpAMF(AMFObject *obj, char *ptr); +++ ++ // Returns 0 for OK/Failed/error, 1 for 'Stop or Complete' ++ int ++ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *body) ++@@ -198,26 +204,28 @@ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *b ++ if (cobj.o_props[i].p_type == AMF_STRING) ++ { ++ pval = cobj.o_props[i].p_vu.p_aval; ++- RTMP_LogPrintf("%.*s: %.*s\n", pname.av_len, pname.av_val, pval.av_len, pval.av_val); +++ RTMP_LogPrintf("%10.*s : %.*s\n", pname.av_len, pname.av_val, pval.av_len, pval.av_val); ++ } ++ if (AVMATCH(&pname, &av_app)) ++ { ++- server->rc.Link.app = pval; +++ server->rc.Link.app = AVcopy(pval); ++ pval.av_val = NULL; ++ } ++ else if (AVMATCH(&pname, &av_flashVer)) ++ { ++- server->rc.Link.flashVer = pval; +++ server->rc.Link.flashVer = AVcopy(pval); ++ pval.av_val = NULL; ++ } ++ else if (AVMATCH(&pname, &av_swfUrl)) ++ { ++ #ifdef CRYPTO ++ if (pval.av_val) ++- RTMP_HashSWF(pval.av_val, &server->rc.Link.SWFSize, ++- (unsigned char *)server->rc.Link.SWFHash, 30); +++ { +++ AVal swfUrl = StripParams(&pval); +++ RTMP_HashSWF(swfUrl.av_val, &server->rc.Link.SWFSize, (unsigned char *) server->rc.Link.SWFHash, 30); +++ } ++ #endif ++- server->rc.Link.swfUrl = pval; +++ server->rc.Link.swfUrl = AVcopy(pval); ++ pval.av_val = NULL; ++ } ++ else if (AVMATCH(&pname, &av_tcUrl)) ++@@ -225,7 +233,7 @@ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *b ++ char *r1 = NULL, *r2; ++ int len; ++ ++- server->rc.Link.tcUrl = pval; +++ server->rc.Link.tcUrl = AVcopy(pval); ++ if ((pval.av_val[0] | 0x40) == 'r' && ++ (pval.av_val[1] | 0x40) == 't' && ++ (pval.av_val[2] | 0x40) == 'm' && ++@@ -267,7 +275,7 @@ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *b ++ } ++ else if (AVMATCH(&pname, &av_pageUrl)) ++ { ++- server->rc.Link.pageUrl = pval; +++ server->rc.Link.pageUrl = AVcopy(pval); ++ pval.av_val = NULL; ++ } ++ else if (AVMATCH(&pname, &av_audioCodecs)) ++@@ -287,14 +295,21 @@ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *b ++ if (pval.av_val) ++ free(pval.av_val); ++ } +++ ++ if (obj.o_num > 3) ++ { ++- if (AMFProp_GetBoolean(&obj.o_props[3])) ++- server->rc.Link.lFlags |= RTMP_LF_AUTH; ++- if (obj.o_num > 4) ++- { ++- AMFProp_GetString(&obj.o_props[4], &server->rc.Link.auth); ++- } +++ int i = obj.o_num - 3; +++ server->rc.Link.extras.o_num = i; +++ server->rc.Link.extras.o_props = malloc(i * sizeof (AMFObjectProperty)); +++ memcpy(server->rc.Link.extras.o_props, obj.o_props + 3, i * sizeof (AMFObjectProperty)); +++ obj.o_num = 3; +++ } +++ +++ if (server->rc.Link.extras.o_num) +++ { +++ server->rc.Link.Extras.av_val = calloc(2048, sizeof (char)); +++ dumpAMF(&server->rc.Link.extras, server->rc.Link.Extras.av_val); +++ server->rc.Link.Extras.av_len = strlen(server->rc.Link.Extras.av_val); ++ } ++ ++ if (!RTMP_Connect(&server->rc, pack)) ++@@ -303,6 +318,37 @@ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *b ++ return 1; ++ } ++ server->rc.m_bSendCounter = FALSE; +++ +++ if (server->rc.Link.extras.o_props) +++ { +++ AMF_Reset(&server->rc.Link.extras); +++ } +++ } +++ else if (AVMATCH(&method, &av_NetStream_Authenticate_UsherToken)) +++ { +++ AVal usherToken = {0}; +++ AMFProp_GetString(AMF_GetProp(&obj, NULL, 3), &usherToken); +++ server->rc.Link.usherToken = AVcopy(usherToken); +++ RTMP_LogPrintf("%10s : %.*s\n", "usherToken", server->rc.Link.usherToken.av_len, server->rc.Link.usherToken.av_val); +++ } +++ else if (AVMATCH(&method, &av_play2)) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "%s: Detected play2 request\n", __FUNCTION__); +++ if (body && nBodySize > 0) +++ { +++ char* pCmd = (char*) body; +++ char* pEnd = pCmd + nBodySize - 4; +++ while (pCmd < pEnd) +++ { +++ if (pCmd[0] == 'p' && pCmd[1] == 'l' && pCmd[2] == 'a' && pCmd[3] == 'y' && pCmd[4] == '2') +++ { +++ /* Disable bitrate transition by sending invalid command */ +++ pCmd[4] = 'z'; +++ break; +++ } +++ ++pCmd; +++ } +++ } ++ } ++ else if (AVMATCH(&method, &av_play)) ++ { ++@@ -323,6 +369,14 @@ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *b ++ if (!av.av_val) ++ goto out; ++ +++ double StartFlag = 0; +++ AMFObjectProperty *Start = AMF_GetProp(&obj, NULL, 4); +++ if (!(Start->p_type == AMF_INVALID)) +++ StartFlag = AMFProp_GetNumber(Start); +++ if (StartFlag == -1000 || (server->rc.Link.app.av_val && strstr(server->rc.Link.app.av_val, "live"))) +++ StartFlag = -1000; +++ RTMP_LogPrintf("%10s : %s\n", "live", (StartFlag == -1000) ? "yes" : "no"); +++ ++ /* check for duplicates */ ++ for (fl = server->f_head; fl; fl=fl->f_next) ++ { ++@@ -362,19 +416,104 @@ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *b ++ /* hope there aren't more than 255 dups */ ++ if (count) ++ flen += 2; ++- file = malloc(flen+1); +++ file = malloc(flen + 5); ++ ++ memcpy(file, av.av_val, av.av_len); ++ if (count) ++ sprintf(file+av.av_len, "%02x", count); ++ else ++ file[av.av_len] = '\0'; ++- for (p=file; *p; p++) ++- if (*p == ':') ++- *p = '_'; ++- RTMP_LogPrintf("Playpath: %.*s\nSaving as: %s\n", ++- server->rc.Link.playpath.av_len, server->rc.Link.playpath.av_val, ++- file); +++ +++ if (strlen(file) < 128) +++ { +++ /* Add extension if none present */ +++ if (file[av.av_len - 4] != '.') +++ { +++ av.av_len += 4; +++ } +++ +++ /* Always use flv extension, regardless of original */ +++ if (strcmp(file + av.av_len - 4, ".flv")) +++ { +++ strcpy(file + av.av_len - 4, ".flv"); +++ } +++ +++ /* Remove invalid characters from filename */ +++ file = strreplace(file, 0, ":", "_", TRUE); +++ file = strreplace(file, 0, "&", "_", TRUE); +++ file = strreplace(file, 0, "^", "_", TRUE); +++ file = strreplace(file, 0, "|", "_", TRUE); +++ } +++ else +++ { +++ /* Filename too long - generate unique name */ +++ strcpy(file, "vXXXXXX"); +++ mkstemp(file); +++ strcat(file, ".flv"); +++ } +++ +++ /* Add timestamp to the filename */ +++ char *filename, *pfilename, timestamp[21]; +++ int filename_len, timestamp_len; +++ time_t current_time; +++ +++ time(¤t_time); +++ timestamp_len = strftime(×tamp[0], sizeof (timestamp), "%Y-%m-%d_%I-%M-%S_", localtime(¤t_time)); +++ timestamp[timestamp_len] = '\0'; +++ filename_len = strlen(file); +++ filename = malloc(timestamp_len + filename_len + 1); +++ pfilename = filename; +++ memcpy(pfilename, timestamp, timestamp_len); +++ pfilename += timestamp_len; +++ memcpy(pfilename, file, filename_len); +++ pfilename += filename_len; +++ *pfilename++ = '\0'; +++ file = filename; +++ +++ RTMP_LogPrintf("%10s : %.*s\n%10s : %s\n", "Playpath", server->rc.Link.playpath.av_len, +++ server->rc.Link.playpath.av_val, "Saving as", file); +++ +++ /* Save command to text file */ +++ char *cmd = NULL, *ptr = NULL; +++ AVal swfUrl, tcUrl; +++ +++ cmd = calloc(4096, sizeof (char)); +++ ptr = cmd; +++ tcUrl = StripParams(&server->rc.Link.tcUrl); +++ swfUrl = StripParams(&server->rc.Link.swfUrl); +++ ptr += sprintf(ptr, "rtmpdump -r \"%.*s\" -a \"%.*s\" -f \"%.*s\" -W \"%.*s\" -p \"%.*s\"", +++ tcUrl.av_len, tcUrl.av_val, +++ server->rc.Link.app.av_len, server->rc.Link.app.av_val, +++ server->rc.Link.flashVer.av_len, server->rc.Link.flashVer.av_val, +++ swfUrl.av_len, swfUrl.av_val, +++ server->rc.Link.pageUrl.av_len, server->rc.Link.pageUrl.av_val); +++ +++ if (server->rc.Link.usherToken.av_val) +++ { +++ char *usherToken = strreplace(server->rc.Link.usherToken.av_val, server->rc.Link.usherToken.av_len, "\"", "\\\"", TRUE); +++#ifdef WIN32 +++ usherToken = strreplace(usherToken, 0, "^", "^^", TRUE); +++ usherToken = strreplace(usherToken, 0, "|", "^|", TRUE); +++#endif +++ ptr += sprintf(ptr, " --jtv \"%s\"", usherToken); +++ free(usherToken); +++ } +++ +++ if (server->rc.Link.Extras.av_len) +++ { +++ ptr += sprintf(ptr, "%.*s", server->rc.Link.Extras.av_len, server->rc.Link.Extras.av_val); +++ } +++ +++ if (StartFlag == -1000) +++ ptr += sprintf(ptr, "%s", " --live"); +++ ptr += sprintf(ptr, " -y \"%.*s\"", server->rc.Link.playpath.av_len, server->rc.Link.playpath.av_val); +++ ptr += sprintf(ptr, " -o \"%s\"\n", file); +++ +++ FILE *cmdfile = fopen("Command.txt", "a"); +++ fprintf(cmdfile, "%s", cmd); +++ fclose(cmdfile); +++ free(cmd); +++ ++ out = fopen(file, "wb"); ++ free(file); ++ if (!out) ++@@ -407,9 +546,10 @@ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *b ++ ++ RTMP_Log(RTMP_LOGDEBUG, "%s, onStatus: %s", __FUNCTION__, code.av_val); ++ if (AVMATCH(&code, &av_NetStream_Failed) ++- || AVMATCH(&code, &av_NetStream_Play_Failed) ++- || AVMATCH(&code, &av_NetStream_Play_StreamNotFound) ++- || AVMATCH(&code, &av_NetConnection_Connect_InvalidApp)) +++ || AVMATCH(&code, &av_NetStream_Play_Failed) +++ || AVMATCH(&code, &av_NetStream_Play_StreamNotFound) +++ || AVMATCH(&code, &av_NetConnection_Connect_Rejected) +++ || AVMATCH(&code, &av_NetConnection_Connect_InvalidApp)) ++ { ++ ret = 1; ++ } ++@@ -719,13 +859,18 @@ controlServerThread(void *unused) ++ { ++ case 'q': ++ RTMP_LogPrintf("Exiting\n"); ++- stopStreaming(rtmpServer); ++- free(rtmpServer); ++- exit(0); +++ if (rtmpServer) +++ stopStreaming(rtmpServer); ++ break; ++ default: ++ RTMP_LogPrintf("Unknown command \'%c\', ignoring\n", ich); ++ } +++ sleep(1); +++ if (rtmpServer && (rtmpServer->state == STREAMING_STOPPED)) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "Exiting text UI thread"); +++ break; +++ } ++ } ++ TFRET(); ++ } ++@@ -815,7 +960,7 @@ TFTYPE doServe(void *arg) // server socket and state (our listening socket) ++ ++ if (select(n + 1, &rfds, NULL, NULL, &tv) <= 0) ++ { ++- if (server->f_cur && server->rc.m_mediaChannel && !paused) +++ if (server->f_cur && server->rc.m_mediaChannel && !paused && server->rc.m_channelTimestamp) ++ { ++ server->rc.m_pauseStamp = server->rc.m_channelTimestamp[server->rc.m_mediaChannel]; ++ if (RTMP_ToggleStream(&server->rc)) ++@@ -1123,7 +1268,6 @@ stopStreaming(STREAMING_SERVER * server) ++ } ++ } ++ ++- ++ void ++ sigIntHandler(int sig) ++ { ++@@ -1196,3 +1340,48 @@ main(int argc, char **argv) ++ #endif ++ return nStatus; ++ } +++ +++char * +++dumpAMF(AMFObject *obj, char *ptr) +++{ +++ int i; +++ const char opt[] = "NBSO Z"; +++ +++ for (i = 0; i < obj->o_num; i++) +++ { +++ AMFObjectProperty *p = &obj->o_props[i]; +++ if ((p->p_type == AMF_ECMA_ARRAY) || (p->p_type == AMF_STRICT_ARRAY)) +++ p->p_type = AMF_OBJECT; +++ if (p->p_type > 5) +++ continue; +++ ptr += sprintf(ptr, " -C "); +++ if (p->p_name.av_val) +++ *ptr++ = 'N'; +++ *ptr++ = opt[p->p_type]; +++ *ptr++ = ':'; +++ if (p->p_name.av_val) +++ ptr += sprintf(ptr, "%.*s:", p->p_name.av_len, p->p_name.av_val); +++ switch (p->p_type) +++ { +++ case AMF_BOOLEAN: +++ *ptr++ = p->p_vu.p_number != 0 ? '1' : '0'; +++ break; +++ case AMF_STRING: +++ memcpy(ptr, p->p_vu.p_aval.av_val, p->p_vu.p_aval.av_len); +++ ptr += p->p_vu.p_aval.av_len; +++ break; +++ case AMF_NUMBER: +++ ptr += sprintf(ptr, "%f", p->p_vu.p_number); +++ break; +++ case AMF_OBJECT: +++ *ptr++ = '1'; +++ ptr = dumpAMF(&p->p_vu.p_object, ptr); +++ ptr += sprintf(ptr, " -C O:0"); +++ break; +++ case AMF_NULL: +++ default: +++ break; +++ } +++ } +++ return ptr; +++} ++diff --git thread.c thread.c ++index 0913c98..13d624a 100644 ++--- thread.c +++++ thread.c ++@@ -32,7 +32,7 @@ ThreadCreate(thrfunc *routine, void *args) ++ HANDLE thd; ++ ++ thd = (HANDLE) _beginthread(routine, 0, args); ++- if (thd == -1L) +++ if (thd == INVALID_HANDLE_VALUE) ++ RTMP_LogPrintf("%s, _beginthread failed with %d\n", __FUNCTION__, errno); ++ ++ return thd; +diff --git a/tools/depends/target/librtmp/UpdateToLatest.diff b/tools/depends/target/librtmp/UpdateToLatest.diff +new file mode 100644 +index 0000000000000000000000000000000000000000..d9d5f6b8e4869efaba4b03abef4ccb534c4e8beb +--- /dev/null ++++ b/tools/depends/target/librtmp/UpdateToLatest.diff +@@ -0,0 +1,257 @@ ++diff --git b/ChangeLog a/ChangeLog ++index c3b1a14..b027e31 100644 ++--- b/ChangeLog +++++ a/ChangeLog ++@@ -1,6 +1,6 @@ ++ RTMPDump ++ Copyright 2008-2009 Andrej Stepanchuk; Distributed under the GPL v2 ++-Copyright 2009-2011 Howard Chu +++Copyright 2009-2015 Howard Chu ++ Copyright 2009 The Flvstreamer Team ++ http://rtmpdump.mplayerhq.hu/ ++ ++diff --git b/librtmp/amf.c a/librtmp/amf.c ++index 73d1486..7954144 100644 ++--- b/librtmp/amf.c +++++ a/librtmp/amf.c ++@@ -33,6 +33,7 @@ ++ #include "bytes.h" ++ ++ static const AMFObjectProperty AMFProp_Invalid = { {0, 0}, AMF_INVALID }; +++static const AMFObject AMFObj_Invalid = { 0, 0 }; ++ static const AVal AV_empty = { 0, 0 }; ++ ++ /* Data is Big-Endian */ ++@@ -340,13 +341,19 @@ AMFProp_GetBoolean(AMFObjectProperty *prop) ++ void ++ AMFProp_GetString(AMFObjectProperty *prop, AVal *str) ++ { ++- *str = prop->p_vu.p_aval; +++ if (prop->p_type == AMF_STRING) +++ *str = prop->p_vu.p_aval; +++ else +++ *str = AV_empty; ++ } ++ ++ void ++ AMFProp_GetObject(AMFObjectProperty *prop, AMFObject *obj) ++ { ++- *obj = prop->p_vu.p_object; +++ if (prop->p_type == AMF_OBJECT) +++ *obj = prop->p_vu.p_object; +++ else +++ *obj = AMFObj_Invalid; ++ } ++ ++ int ++@@ -471,6 +478,8 @@ AMF3ReadString(const char *data, AVal *str) ++ RTMP_Log(RTMP_LOGDEBUG, ++ "%s, string reference, index: %d, not supported, ignoring!", ++ __FUNCTION__, refIndex); +++ str->av_val = NULL; +++ str->av_len = 0; ++ return len; ++ } ++ else ++@@ -510,9 +519,11 @@ AMF3Prop_Decode(AMFObjectProperty *prop, const char *pBuffer, int nSize, ++ if (name.av_len <= 0) ++ return nRes; ++ +++ nSize -= nRes; +++ if (nSize <= 0) +++ return -1; ++ prop->p_name = name; ++ pBuffer += nRes; ++- nSize -= nRes; ++ } ++ ++ /* decode */ ++@@ -598,6 +609,8 @@ AMF3Prop_Decode(AMFObjectProperty *prop, const char *pBuffer, int nSize, ++ __FUNCTION__, (unsigned char)(*pBuffer), pBuffer); ++ return -1; ++ } +++ if (nSize < 0) +++ return -1; ++ ++ return nOriginalSize - nSize; ++ } ++@@ -992,9 +1005,17 @@ AMF_DecodeArray(AMFObject *obj, const char *pBuffer, int nSize, ++ int nRes; ++ nArrayLen--; ++ +++ if (nSize <= 0) +++ { +++ bError = TRUE; +++ break; +++ } ++ nRes = AMFProp_Decode(&prop, pBuffer, nSize, bDecodeName); ++ if (nRes == -1) ++- bError = TRUE; +++ { +++ bError = TRUE; +++ break; +++ } ++ else ++ { ++ nSize -= nRes; ++@@ -1053,12 +1074,12 @@ AMF3_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bAMFData) ++ else ++ { ++ int32_t classExtRef = (classRef >> 1); ++- int i; +++ int i, cdnum; ++ ++ cd.cd_externalizable = (classExtRef & 0x1) == 1; ++ cd.cd_dynamic = ((classExtRef >> 1) & 0x1) == 1; ++ ++- cd.cd_num = classExtRef >> 2; +++ cdnum = classExtRef >> 2; ++ ++ /* class name */ ++ ++@@ -1073,9 +1094,16 @@ AMF3_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bAMFData) ++ cd.cd_name.av_val, cd.cd_externalizable, cd.cd_dynamic, ++ cd.cd_num); ++ ++- for (i = 0; i < cd.cd_num; i++) +++ for (i = 0; i < cdnum; i++) ++ { ++ AVal memberName; +++ if (nSize <=0) +++ { +++invalid: +++ RTMP_Log(RTMP_LOGDEBUG, "%s, invalid class encoding!", +++ __FUNCTION__); +++ return nOriginalSize; +++ } ++ len = AMF3ReadString(pBuffer, &memberName); ++ RTMP_Log(RTMP_LOGDEBUG, "Member: %s", memberName.av_val); ++ AMF3CD_AddProp(&cd, &memberName); ++@@ -1111,6 +1139,8 @@ AMF3_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bAMFData) ++ int nRes, i; ++ for (i = 0; i < cd.cd_num; i++) /* non-dynamic */ ++ { +++ if (nSize <=0) +++ goto invalid; ++ nRes = AMF3Prop_Decode(&prop, pBuffer, nSize, FALSE); ++ if (nRes == -1) ++ RTMP_Log(RTMP_LOGDEBUG, "%s, failed to decode AMF3 property!", ++@@ -1128,6 +1158,8 @@ AMF3_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bAMFData) ++ ++ do ++ { +++ if (nSize <=0) +++ goto invalid; ++ nRes = AMF3Prop_Decode(&prop, pBuffer, nSize, TRUE); ++ AMF_AddProp(obj, &prop); ++ ++@@ -1175,10 +1207,18 @@ AMF_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bDecodeName) ++ ++ nRes = AMFProp_Decode(&prop, pBuffer, nSize, bDecodeName); ++ if (nRes == -1) ++- bError = TRUE; +++ { +++ bError = TRUE; +++ break; +++ } ++ else ++ { ++ nSize -= nRes; +++ if (nSize < 0) +++ { +++ bError = TRUE; +++ break; +++ } ++ pBuffer += nRes; ++ AMF_AddProp(obj, &prop); ++ } ++diff --git b/librtmp/log.c a/librtmp/log.c ++index 0012985..1b52000 100644 ++--- b/librtmp/log.c +++++ a/librtmp/log.c ++@@ -92,6 +92,10 @@ RTMP_LogLevel RTMP_LogGetLevel() ++ void RTMP_Log(int level, const char *format, ...) ++ { ++ va_list args; +++ +++ if ( level > RTMP_debuglevel ) +++ return; +++ ++ va_start(args, format); ++ cb(level, format, args); ++ va_end(args); ++diff --git b/librtmp/rtmp.c a/librtmp/rtmp.c ++index ca7db6a..a2863b0 100644 ++--- b/librtmp/rtmp.c +++++ a/librtmp/rtmp.c ++@@ -186,9 +186,12 @@ RTMPPacket_Reset(RTMPPacket *p) ++ } ++ ++ int ++-RTMPPacket_Alloc(RTMPPacket *p, int nSize) +++RTMPPacket_Alloc(RTMPPacket *p, uint32_t nSize) ++ { ++- char *ptr = calloc(1, nSize + RTMP_MAX_HEADER_SIZE); +++ char *ptr; +++ if (nSize > SIZE_MAX - RTMP_MAX_HEADER_SIZE) +++ return FALSE; +++ ptr = calloc(1, nSize + RTMP_MAX_HEADER_SIZE); ++ if (!ptr) ++ return FALSE; ++ p->m_body = ptr + RTMP_MAX_HEADER_SIZE; ++@@ -1180,7 +1183,7 @@ RTMP_GetNextMediaPacket(RTMP *r, RTMPPacket *packet) ++ while (!bHasMediaPacket && RTMP_IsConnected(r) ++ && RTMP_ReadPacket(r, packet)) ++ { ++- if (!RTMPPacket_IsReady(packet)) +++ if (!RTMPPacket_IsReady(packet) || !packet->m_nBodySize) ++ { ++ continue; ++ } ++@@ -3643,7 +3646,6 @@ RTMP_ReadPacket(RTMP *r, RTMPPacket *packet) ++ { ++ packet->m_nBodySize = AMF_DecodeInt24(header + 3); ++ packet->m_nBytesRead = 0; ++- RTMPPacket_Free(packet); ++ ++ if (nSize > 6) ++ { ++diff --git b/librtmp/rtmp.h a/librtmp/rtmp.h ++index 0248913..6d7dd89 100644 ++--- b/librtmp/rtmp.h +++++ a/librtmp/rtmp.h ++@@ -136,7 +136,7 @@ extern "C" ++ ++ void RTMPPacket_Reset(RTMPPacket *p); ++ void RTMPPacket_Dump(RTMPPacket *p); ++- int RTMPPacket_Alloc(RTMPPacket *p, int nSize); +++ int RTMPPacket_Alloc(RTMPPacket *p, uint32_t nSize); ++ void RTMPPacket_Free(RTMPPacket *p); ++ ++ #define RTMPPacket_IsReady(a) ((a)->m_nBytesRead == (a)->m_nBodySize) ++diff --git b/rtmpsrv.c a/rtmpsrv.c ++index a9e9045..5df4d3a 100644 ++--- b/rtmpsrv.c +++++ a/rtmpsrv.c ++@@ -404,10 +404,10 @@ countAMF(AMFObject *obj, int *argc) ++ static char * ++ dumpAMF(AMFObject *obj, char *ptr, AVal *argv, int *argc) ++ { ++- int i, len, ac = *argc; +++ int i, ac = *argc; ++ const char opt[] = "NBSO Z"; ++ ++- for (i=0, len=0; i < obj->o_num; i++) +++ for (i=0; i < obj->o_num; i++) ++ { ++ AMFObjectProperty *p = &obj->o_props[i]; ++ argv[ac].av_val = ptr+1; ++@@ -595,6 +595,8 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ uint32_t now; ++ RTMPPacket pc = {0}; ++ AMFProp_GetString(AMF_GetProp(&obj, NULL, 3), &r->Link.playpath); +++ if (!r->Link.playpath.av_len) +++ return 0; ++ /* ++ r->Link.seekTime = AMFProp_GetNumber(AMF_GetProp(&obj, NULL, 4)); ++ if (obj.o_num > 5) +diff --git a/tools/depends/target/librtmp/libm.patch b/tools/depends/target/librtmp/libm.patch +deleted file mode 100644 +index d86485b584920d3b8e7d775196e50f9b7fe3b297..0000000000000000000000000000000000000000 +--- a/tools/depends/target/librtmp/libm.patch ++++ /dev/null +@@ -1,11 +0,0 @@ +---- Makefile.old 2013-06-04 17:35:58.000000000 +0200 +-+++ Makefile 2013-06-04 17:36:13.000000000 +0200 +-@@ -25,7 +25,7 @@ +- REQ_GNUTLS=gnutls +- REQ_OPENSSL=libssl,libcrypto +- LIBZ=-lz +--LIBS_posix= +-+LIBS_posix=-lm +- LIBS_darwin= +- LIBS_mingw=-lws2_32 -lwinmm -lgdi32 +- LIB_GNUTLS=-lgnutls -lhogweed -lnettle -lgmp $(LIBZ) + +From 2c9b195f2c8cf3559bba7d7b21d35ee0d0bca59c Mon Sep 17 00:00:00 2001 +From: Claudio-Sjo +Date: Mon, 16 Feb 2015 14:51:26 +0100 +Subject: [PATCH 23/67] - allow reads < CDIO_CD_FRAMESIZE_RAW by using a buffer + - fixes #15794 + +--- + xbmc/filesystem/CDDAFile.cpp | 120 ++++++++++++++++++++++++++++++++----------- + xbmc/filesystem/CDDAFile.h | 3 ++ + 2 files changed, 92 insertions(+), 31 deletions(-) + +diff --git a/xbmc/filesystem/CDDAFile.cpp b/xbmc/filesystem/CDDAFile.cpp +index 722e62602084923bd040803f0e5a5c336a42fa3b..b0f53e5d44e108d88d7af0e46913a7f29328cd31 100644 +--- a/xbmc/filesystem/CDDAFile.cpp ++++ b/xbmc/filesystem/CDDAFile.cpp +@@ -42,10 +42,14 @@ CFileCDDA::CFileCDDA(void) + m_lsnEnd = CDIO_INVALID_LSN; + m_cdio = CLibcdio::GetInstance(); + m_iSectorCount = 52; ++ m_TrackBuf = (uint8_t *) malloc(CDIO_CD_FRAMESIZE_RAW); ++ p_TrackBuf = 0; ++ f_TrackBuf = 0; + } + + CFileCDDA::~CFileCDDA(void) + { ++ free(m_TrackBuf); + Close(); + } + +@@ -53,6 +57,9 @@ bool CFileCDDA::Open(const CURL& url) + { + std::string strURL = url.GetWithoutFilename(); + ++ // Flag TrackBuffer = FALSE, TrackBuffer is empty ++ f_TrackBuf = 0; ++ + if (!g_mediaManager.IsDiscInDrive(strURL) || !IsValidFile(url)) + return false; + +@@ -117,50 +124,98 @@ int CFileCDDA::Stat(const CURL& url, struct __stat64* buffer) + + ssize_t CFileCDDA::Read(void* lpBuf, size_t uiBufSize) + { +- if (!m_pCdIo || !g_mediaManager.IsDiscInDrive()) +- return -1; + +- if (uiBufSize > SSIZE_MAX) +- uiBufSize = SSIZE_MAX; ++ ssize_t returnValue; ++ int iSectorCount; ++ void *destBuf; + +- // limit number of sectors that fits in buffer by m_iSectorCount +- int iSectorCount = std::min((int)uiBufSize / CDIO_CD_FRAMESIZE_RAW, m_iSectorCount); + +- if (iSectorCount <= 0) ++ if (!m_pCdIo || !g_mediaManager.IsDiscInDrive()) ++ { ++ CLog::Log(LOGERROR, "file cdda: Aborted because no disc in drive or no m_pCdIo"); + return -1; ++ } + +- // Are there enough sectors left to read +- if (m_lsnCurrent + iSectorCount > m_lsnEnd) +- iSectorCount = m_lsnEnd - m_lsnCurrent; ++ uiBufSize = std::min( uiBufSize, (size_t)SSIZE_MAX ); + +- // The loop tries to solve read error problem by lowering number of sectors to read (iSectorCount). +- // When problem is solved the proper number of sectors is stored in m_iSectorCount +- int big_iSectorCount = iSectorCount; +- while (iSectorCount > 0) ++ // If we have data in the TrackBuffer, they must be used first ++ if (f_TrackBuf) + { +- int iret = m_cdio->cdio_read_audio_sectors(m_pCdIo, lpBuf, m_lsnCurrent, iSectorCount); ++ // Get at most the remaining data in m_TrackBuf ++ uiBufSize = std::min(uiBufSize, CDIO_CD_FRAMESIZE_RAW - p_TrackBuf); ++ memcpy(lpBuf, m_TrackBuf + p_TrackBuf, uiBufSize); ++ // Update the data offset ++ p_TrackBuf += uiBufSize; ++ // Is m_TrackBuf empty? ++ f_TrackBuf = (CDIO_CD_FRAMESIZE_RAW == p_TrackBuf); ++ // All done, return read bytes ++ return uiBufSize; ++ } ++ ++ // No data left in buffer ++ ++ // Is this a short read? ++ if (uiBufSize < CDIO_CD_FRAMESIZE_RAW) ++ { ++ // short request, buffer one full sector ++ iSectorCount = 1; ++ destBuf = m_TrackBuf; ++ } ++ else // normal request ++ { ++ // limit number of sectors that fits in buffer by m_iSectorCount ++ iSectorCount = std::min((int)uiBufSize / CDIO_CD_FRAMESIZE_RAW, m_iSectorCount); ++ destBuf = lpBuf; ++ } + ++ // Are there enough sectors left to read? ++ iSectorCount = std::min(iSectorCount, m_lsnEnd - m_lsnCurrent); ++ ++ // Have we reached EOF? ++ if (iSectorCount == 0) ++ { ++ CLog::Log(LOGNOTICE, "file cdda: Read EoF"); ++ return 0; // Success, but nothing read ++ } // Reached EoF ++ ++ // At leat one sector to read ++ int retries; ++ int iret; ++ // Try reading a decresing number of sectors, then 3 times with 1 sector ++ for (retries = 3; retries > 0; iSectorCount>1 ? iSectorCount-- : retries--) ++ { ++ iret = m_cdio->cdio_read_audio_sectors(m_pCdIo, destBuf, m_lsnCurrent, iSectorCount); + if (iret == DRIVER_OP_SUCCESS) ++ break; // Get out from the loop ++ else + { +- // If lower iSectorCount solved the problem limit it's value +- if (iSectorCount < big_iSectorCount) +- { +- m_iSectorCount = iSectorCount; +- } +- break; +- } +- +- // iSectorCount is low so it cannot solve read problem +- if (iSectorCount <= 10) +- { +- CLog::Log(LOGERROR, "file cdda: Reading %d sectors of audio data starting at lsn %d failed with error code %i", iSectorCount, m_lsnCurrent, iret); +- return -1; +- } +- +- iSectorCount = 10; ++ CLog::Log(LOGERROR, "file cdda: Read cdio error when reading track "); ++ } // Errors when reading file + } ++ // retries == 0 only if failed reading at least one sector ++ if (retries == 0) ++ { ++ CLog::Log(LOGERROR, "file cdda: Reading %d sectors of audio data starting at lsn %d failed with error code %i", iSectorCount, m_lsnCurrent, iret); ++ return -1; ++ } ++ ++ // Update position in file + m_lsnCurrent += iSectorCount; + ++ // Was it a short request? ++ if (uiBufSize < CDIO_CD_FRAMESIZE_RAW) ++ { ++ // We copy the amount if requested data into the destination buffer ++ memcpy(lpBuf, m_TrackBuf, uiBufSize); ++ // and keep track of the first available data ++ p_TrackBuf = uiBufSize; ++ // Finally, we set the buffer flag as TRUE ++ f_TrackBuf = true; ++ // We will return uiBufSize ++ return uiBufSize; ++ } ++ ++ // Otherwise, just return the size of read data + return iSectorCount*CDIO_CD_FRAMESIZE_RAW; + } + +@@ -194,6 +249,9 @@ int64_t CFileCDDA::Seek(int64_t iFilePosition, int iWhence /*=SEEK_SET*/) + + void CFileCDDA::Close() + { ++ // Flag TrackBuffer = FALSE, TrackBuffer is empty ++ f_TrackBuf = 0; ++ + if (m_pCdIo) + { + m_cdio->cdio_destroy(m_pCdIo); +diff --git a/xbmc/filesystem/CDDAFile.h b/xbmc/filesystem/CDDAFile.h +index 0427af4534bfe59a343f0518c7f4242d93299836..e99236294fa8b9b613e465a8ecaf3ad3ba8b5a6f 100644 +--- a/xbmc/filesystem/CDDAFile.h ++++ b/xbmc/filesystem/CDDAFile.h +@@ -50,6 +50,9 @@ protected: + + protected: + CdIo_t* m_pCdIo; ++ uint8_t *m_TrackBuf; ++ size_t p_TrackBuf; ++ int f_TrackBuf; + lsn_t m_lsnStart; // Start of m_iTrack in logical sector number + lsn_t m_lsnCurrent; // Position inside the track in logical sector number + lsn_t m_lsnEnd; // End of m_iTrack in logical sector number + +From d83b2890c3b4135d505f202f9081f59a1fd7b065 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Fri, 24 Jun 2016 19:38:13 +0100 +Subject: [PATCH 24/67] codecoverlay: Include codec name in overlay + +--- + xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp | 4 ++++ + xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp | 5 ++++- + xbmc/cores/omxplayer/OMXPlayerAudio.cpp | 4 ++++ + xbmc/cores/omxplayer/OMXPlayerVideo.cpp | 8 +++++--- + xbmc/cores/omxplayer/OMXPlayerVideo.h | 1 - + 5 files changed, 17 insertions(+), 5 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp b/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp +index 24228154ecf99911f74407d73d280778e6f98fcd..188b85b12b86f887324cdcfda3c3aa4cd90d3a11 100644 +--- a/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp ++++ b/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp +@@ -210,6 +210,10 @@ void CVideoPlayerAudio::UpdatePlayerInfo() + std::ostringstream s; + s << "aq:" << std::setw(2) << std::min(99,m_messageQueue.GetLevel()) << "%"; + s << ", Kb/s:" << std::fixed << std::setprecision(2) << (double)GetAudioBitrate() / 1024.0; ++ s << ", ac:" << m_processInfo.GetAudioDecoderName().c_str(); ++ if (!m_info.passthrough) ++ s << ", chan:" << m_processInfo.GetAudioChannels().c_str(); ++ s << ", " << m_streaminfo.samplerate/1000 << " kHz"; + + //print the inverse of the resample ratio, since that makes more sense + //if the resample ratio is 0.5, then we're playing twice as fast +diff --git a/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp b/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp +index fd260d4378f6b13a158a57a2493e59cbab1f7d9d..f6d1b8572c6a4a8b4a193ebfc9d36d85ccd2d819 100644 +--- a/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp ++++ b/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp +@@ -909,10 +909,13 @@ int CVideoPlayerVideo::OutputPicture(const DVDVideoPicture* src, double pts) + + std::string CVideoPlayerVideo::GetPlayerInfo() + { ++ int width, height; ++ m_processInfo.GetVideoDimensions(width, height); + std::ostringstream s; + s << "vq:" << std::setw(2) << std::min(99,GetLevel()) << "%"; + s << ", Mb/s:" << std::fixed << std::setprecision(2) << (double)GetVideoBitrate() / (1024.0*1024.0); +- s << ", fr:" << std::fixed << std::setprecision(3) << m_fFrameRate; ++ s << ", dc:" << m_processInfo.GetVideoDecoderName().c_str(); ++ s << ", " << width << "x" << height << "[" << std::setprecision(2) << m_processInfo.GetVideoDAR() << "]@" << std::fixed << std::setprecision(3) << m_processInfo.GetVideoFps() << ", deint:" << m_processInfo.GetVideoDeintMethod(); + s << ", drop:" << m_iDroppedFrames; + s << ", skip:" << m_renderManager.GetSkippedFrames(); + +diff --git a/xbmc/cores/omxplayer/OMXPlayerAudio.cpp b/xbmc/cores/omxplayer/OMXPlayerAudio.cpp +index 1e5d2b98bbef15b47994c3e4735873a9946b58c7..d43350fa0eefb5960475a02c1327efc24d138e0f 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerAudio.cpp ++++ b/xbmc/cores/omxplayer/OMXPlayerAudio.cpp +@@ -659,6 +659,10 @@ std::string OMXPlayerAudio::GetPlayerInfo() + std::ostringstream s; + s << "aq:" << std::setw(2) << std::min(99,m_messageQueue.GetLevel() + MathUtils::round_int(100.0/8.0*GetCacheTime())) << "%"; + s << ", Kb/s:" << std::fixed << std::setprecision(2) << (double)GetAudioBitrate() / 1024.0; ++ s << ", ac:" << m_processInfo.GetAudioDecoderName().c_str(); ++ if (!m_passthrough) ++ s << ", chan:" << m_processInfo.GetAudioChannels().c_str(); ++ s << ", " << m_processInfo.GetAudioSampleRate()/1000 << " kHz"; + + return s.str(); + } +diff --git a/xbmc/cores/omxplayer/OMXPlayerVideo.cpp b/xbmc/cores/omxplayer/OMXPlayerVideo.cpp +index 6efd0d51df46a530dd05b3add639f38a939cf92d..d61dc4f2668f8aca91bce79cfb631034061c491c 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerVideo.cpp ++++ b/xbmc/cores/omxplayer/OMXPlayerVideo.cpp +@@ -583,12 +583,14 @@ void OMXPlayerVideo::SetSpeed(int speed) + + std::string OMXPlayerVideo::GetPlayerInfo() + { ++ int width, height; ++ m_processInfo.GetVideoDimensions(width, height); + double match = 0.0f, phase = 0.0f, pll = 0.0f; + std::ostringstream s; +- s << "fr:" << std::fixed << std::setprecision(3) << m_fFrameRate; +- s << ", vq:" << std::setw(2) << std::min(99,GetLevel()) << "%"; +- s << ", dc:" << m_codecname; ++ s << "vq:" << std::setw(2) << std::min(99,GetLevel()) << "%"; + s << ", Mb/s:" << std::fixed << std::setprecision(2) << (double)GetVideoBitrate() / (1024.0*1024.0); ++ s << ", dc:" << m_processInfo.GetVideoDecoderName().c_str(); ++ s << ", " << width << "x" << height << "[" << std::setprecision(2) << m_processInfo.GetVideoDAR() << "]@" << std::fixed << std::setprecision(3) << m_processInfo.GetVideoFps() << ", deint:" << m_processInfo.GetVideoDeintMethod(); + if (m_omxVideo.GetPlayerInfo(match, phase, pll)) + { + s << ", match:" << std::fixed << std::setprecision(2) << match; +diff --git a/xbmc/cores/omxplayer/OMXPlayerVideo.h b/xbmc/cores/omxplayer/OMXPlayerVideo.h +index 0df7e72cc9d1947173c2bac5e72eb09976b51aa5..b5050081c360d29b1b478c27e6b88291e20ecdac 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerVideo.h ++++ b/xbmc/cores/omxplayer/OMXPlayerVideo.h +@@ -55,7 +55,6 @@ protected: + bool m_stalled; + IDVDStreamPlayer::ESyncState m_syncState; + bool m_flush; +- std::string m_codecname; + std::atomic_bool m_bAbortOutput; + double m_iSubtitleDelay; + bool m_bRenderSubs; + +From e46e93d403ab9f6cb6f61a5b7ac39f347bcf6089 Mon Sep 17 00:00:00 2001 +From: Anton Fedchin +Date: Tue, 8 Mar 2016 21:20:58 +0300 +Subject: [PATCH 25/67] [DebugInfo] Add cpu usage info. + +--- + .../VideoPlayer/VideoRenderers/DebugRenderer.cpp | 56 ++++++++-------------- + .../VideoPlayer/VideoRenderers/DebugRenderer.h | 9 ++-- + .../VideoPlayer/VideoRenderers/RenderManager.cpp | 7 ++- + 3 files changed, 30 insertions(+), 42 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/DebugRenderer.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/DebugRenderer.cpp +index 846868967e15309c22b4ee46795a914230dc65d1..36be6ce222cfae9680af6834e934d88495a55950 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/DebugRenderer.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/DebugRenderer.cpp +@@ -27,7 +27,7 @@ using namespace OVERLAY; + + CDebugRenderer::CDebugRenderer() + { +- for (int i=0; i<4; i++) ++ for (int i = 0; iRelease(); + } + } + +-void CDebugRenderer::SetInfo(std::string &info1, std::string &info2, std::string &info3, std::string &info4) ++void CDebugRenderer::SetInfo(std::vector &infos) + { + m_overlayRenderer.Release(0); + +- if (info1 != m_strDebug[0]) ++ for (size_t i = 0; i < std::min(infos.size(), (size_t)DEBUG_OVERLAY_COUNT_MAX); i++) + { +- m_strDebug[0] = info1; +- if (m_overlay[0]) +- m_overlay[0]->Release(); +- m_overlay[0] = new CDVDOverlayText(); +- m_overlay[0]->AddElement(new CDVDOverlayText::CElementText(m_strDebug[0])); +- } +- if (info2 != m_strDebug[1]) +- { +- m_strDebug[1] = info2; +- if (m_overlay[1]) +- m_overlay[1]->Release(); +- m_overlay[1] = new CDVDOverlayText(); +- m_overlay[1]->AddElement(new CDVDOverlayText::CElementText(m_strDebug[1])); +- } +- if (info3 != m_strDebug[2]) +- { +- m_strDebug[2] = info3; +- if (m_overlay[2]) +- m_overlay[2]->Release(); +- m_overlay[2] = new CDVDOverlayText(); +- m_overlay[2]->AddElement(new CDVDOverlayText::CElementText(m_strDebug[2])); ++ if (infos[i] != m_strDebug[i]) ++ { ++ if (infos[i].empty()) ++ continue; ++ m_strDebug[i] = infos[i]; ++ if (m_overlay[i]) ++ m_overlay[i]->Release(); ++ m_overlay[i] = new CDVDOverlayText(); ++ m_overlay[i]->AddElement(new CDVDOverlayText::CElementText(m_strDebug[i])); ++ } ++ + } +- if (info4 != m_strDebug[3]) ++ for (size_t i = 0; i < DEBUG_OVERLAY_COUNT_MAX; i++) + { +- m_strDebug[3] = info4; +- if (m_overlay[3]) +- m_overlay[3]->Release(); +- m_overlay[3] = new CDVDOverlayText(); +- m_overlay[3]->AddElement(new CDVDOverlayText::CElementText(m_strDebug[3])); ++ if (m_overlay[i]) ++ m_overlayRenderer.AddOverlay(m_overlay[i], 0, 0); + } +- +- m_overlayRenderer.AddOverlay(m_overlay[0], 0, 0); +- m_overlayRenderer.AddOverlay(m_overlay[1], 0, 0); +- m_overlayRenderer.AddOverlay(m_overlay[2], 0, 0); +- m_overlayRenderer.AddOverlay(m_overlay[3], 0, 0); + } + + void CDebugRenderer::Render(CRect &src, CRect &dst, CRect &view) +@@ -120,7 +102,7 @@ void CDebugRenderer::CRenderer::Render(int idx) + + COverlayText *text = dynamic_cast(o); + if (text) +- text->PrepareRender("arial.ttf", 1, 16, 0, m_font, m_fontBorder); ++ text->PrepareRender("arial.ttf", 1, 12, 0, m_font, m_fontBorder); + + o->m_pos = COverlay::POSITION_ABSOLUTE; + o->m_align = COverlay::ALIGN_SCREEN; +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/DebugRenderer.h b/xbmc/cores/VideoPlayer/VideoRenderers/DebugRenderer.h +index 85aefaace73994730f7d2bdff9de85c79e99b2a2..8005a13bc220be0c5c596d276197c11ed938ffb0 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/DebugRenderer.h ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/DebugRenderer.h +@@ -22,6 +22,9 @@ + + #include "OverlayRenderer.h" + #include ++#include ++ ++#define DEBUG_OVERLAY_COUNT_MAX 6 + + class CDVDOverlayText; + +@@ -30,7 +33,7 @@ class CDebugRenderer + public: + CDebugRenderer(); + virtual ~CDebugRenderer(); +- void SetInfo(std::string &info1, std::string &info2, std::string &info3, std::string &info4); ++ void SetInfo(std::vector &infos); + void Render(CRect &src, CRect &dst, CRect &view); + void Flush(); + +@@ -43,7 +46,7 @@ protected: + void Render(int idx) override; + }; + +- std::string m_strDebug[4]; +- CDVDOverlayText *m_overlay[4]; ++ std::string m_strDebug[DEBUG_OVERLAY_COUNT_MAX]; ++ CDVDOverlayText *m_overlay[DEBUG_OVERLAY_COUNT_MAX]; + CRenderer m_overlayRenderer; + }; +\ No newline at end of file +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp +index 93f8d6f292accf34e153fa4d3dd982e5a4b4fded..db537d33a5d55fc856bbd3ec0a7846df3bb060be 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp +@@ -24,6 +24,7 @@ + #include "guilib/GraphicContext.h" + #include "utils/MathUtils.h" + #include "threads/SingleLock.h" ++#include "utils/CPUInfo.h" + #include "utils/log.h" + #include "utils/StringUtils.h" + #include "windowing/WindowingFactory.h" +@@ -926,7 +927,7 @@ void CRenderManager::Render(bool clear, DWORD flags, DWORD alpha, bool gui) + + if (m_renderDebug) + { +- std::string audio, video, player, vsync; ++ std::string audio, video, player, vsync, cpu; + + m_playerPort->GetDebugInfo(audio, video, player); + +@@ -940,8 +941,10 @@ void CRenderManager::Render(bool clear, DWORD flags, DWORD alpha, bool gui) + missedvblanks, + clockspeed - 100.0); + } ++ cpu = g_cpuInfo.GetCoresUsageString(); + +- m_debugRenderer.SetInfo(audio, video, player, vsync); ++ std::vector infos = { audio, video, player, vsync, cpu }; ++ m_debugRenderer.SetInfo(infos); + m_debugRenderer.Render(src, dst, view); + + m_debugTimer.Set(1000); + +From e674b4137eb3ffe70a0bae619e24862ceae51b25 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Fri, 22 May 2015 13:56:29 +0100 +Subject: [PATCH 26/67] ffmpeg: Allow neon to be enabled in unified builds + +--- + tools/depends/target/ffmpeg/Makefile | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/tools/depends/target/ffmpeg/Makefile b/tools/depends/target/ffmpeg/Makefile +index dffe2da1dfd09e06c5f15c362f7cbe3cf2a26f75..4081dddb6bc2db53559d35506cad6af4cd668362 100644 +--- a/tools/depends/target/ffmpeg/Makefile ++++ b/tools/depends/target/ffmpeg/Makefile +@@ -23,7 +23,11 @@ ffmpg_config += --enable-gnutls + ffmpg_config += --enable-encoder=png --enable-encoder=mjpeg + + ifeq ($(CROSS_COMPILING), yes) ++ ifeq ($(CPU), cortex-a7) ++ ffmpg_config += --arch=arm --enable-cross-compile ++ else + ffmpg_config += --arch=$(CPU) --enable-cross-compile ++ endif + endif + ifeq ($(OS), linux) + ffmpg_config += --target-os=$(OS) --cpu=$(CPU) + +From c575384e34c27d1ffb70e0181e45bc4078b1d2ac Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Fri, 27 Feb 2015 14:37:27 +0000 +Subject: [PATCH 27/67] ffmpeg: Add some upstream HEVC optimisations + +--- + tools/depends/target/ffmpeg/Makefile | 6 +- + .../added_ARM_NEON_optimized_SAO_patches.patch | 3328 ++++++++++++++++++++ + tools/depends/target/ffmpeg/autobuild.sh | 3 + + ...hevcdsp_ARM_NEON_optimized_epel_functions.patch | 409 +++ + 4 files changed, 3745 insertions(+), 1 deletion(-) + create mode 100644 tools/depends/target/ffmpeg/added_ARM_NEON_optimized_SAO_patches.patch + create mode 100644 tools/depends/target/ffmpeg/hevcdsp_ARM_NEON_optimized_epel_functions.patch + +diff --git a/tools/depends/target/ffmpeg/Makefile b/tools/depends/target/ffmpeg/Makefile +index 4081dddb6bc2db53559d35506cad6af4cd668362..d9db534dd8c59a4993a3509737d901fbb3923de8 100644 +--- a/tools/depends/target/ffmpeg/Makefile ++++ b/tools/depends/target/ffmpeg/Makefile +@@ -1,7 +1,8 @@ + include ../../Makefile.include + include FFMPEG-VERSION + DEPS= ../../Makefile.include FFMPEG-VERSION Makefile \ +- 0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch ++ 0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch \ ++ hevcdsp_ARM_NEON_optimized_epel_functions.patch added_ARM_NEON_optimized_SAO_patches.patch + + # set to "yes" to enable patching + # we don't apply patches until we move to a vanilla ffmpeg tarball +@@ -78,6 +79,9 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + cd $(PLATFORM); $(ARCHIVE_TOOL) $(ARCHIVE_TOOL_FLAGS) $(TARBALLS_LOCATION)/$(ARCHIVE) + cd $(PLATFORM); sed -i".bak" -e "s%pkg_config_default=pkg-config%export PKG_CONFIG_LIBDIR=$(PREFIX)/lib/pkgconfig \&\& pkg_config_default=$(NATIVEPREFIX)/bin/pkg-config%" configure + cd $(PLATFORM); patch -p1 < ../0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch ++ cd $(PLATFORM); patch -p1 < ../hevcdsp_ARM_NEON_optimized_epel_functions.patch ++ cd $(PLATFORM); patch -p1 < ../added_ARM_NEON_optimized_SAO_patches.patch ++ + cd $(PLATFORM);\ + CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)" \ + ./configure $(ffmpg_config) +diff --git a/tools/depends/target/ffmpeg/added_ARM_NEON_optimized_SAO_patches.patch b/tools/depends/target/ffmpeg/added_ARM_NEON_optimized_SAO_patches.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..792b5fea581613a6fe9108443357f97518f4b4db +--- /dev/null ++++ b/tools/depends/target/ffmpeg/added_ARM_NEON_optimized_SAO_patches.patch +@@ -0,0 +1,3328 @@ ++From b0cb307c253d2c9f4b94a54dfc74ddb83af984cc Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi ++Date: Mon, 8 Dec 2014 13:24:40 +0200 ++Subject: [PATCH 1/9] added ARM NEON optimized SAO band offset ++ ++--- ++ libavcodec/arm/Makefile | 3 +- ++ libavcodec/arm/hevcdsp_init_neon.c | 47 +++++++++ ++ libavcodec/arm/hevcdsp_sao_neon.S | 204 +++++++++++++++++++++++++++++++++++++ ++ 3 files changed, 253 insertions(+), 1 deletion(-) ++ create mode 100644 libavcodec/arm/hevcdsp_sao_neon.S ++ ++diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile ++index 6051ec8..093a2e8 100644 ++--- a/libavcodec/arm/Makefile +++++ b/libavcodec/arm/Makefile ++@@ -133,7 +133,8 @@ NEON-OBJS-$(CONFIG_HEVC_DECODER) += arm/hevcdsp_init_neon.o \ ++ arm/hevcdsp_deblock_neon.o \ ++ arm/hevcdsp_epel_neon.o \ ++ arm/hevcdsp_idct_neon.o \ ++- arm/hevcdsp_qpel_neon.o +++ arm/hevcdsp_qpel_neon.o \ +++ arm/hevcdsp_sao_neon.o ++ NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_neon.o ++ NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o \ ++ arm/rv40dsp_neon.o ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index 733ff08..69e2b2c 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -22,6 +22,7 @@ ++ #include "libavutil/arm/cpu.h" ++ #include "libavcodec/hevcdsp.h" ++ #include "hevcdsp_arm.h" +++#include "../bit_depth_template.c" ++ ++ void ff_hevc_v_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q); ++ void ff_hevc_h_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q); ++@@ -43,6 +44,11 @@ void ff_hevc_transform_add_16x16_neon_8(uint8_t *_dst, int16_t *coeffs, ++ void ff_hevc_transform_add_32x32_neon_8(uint8_t *_dst, int16_t *coeffs, ++ ptrdiff_t stride); ++ +++void ff_hevc_sao_band_w8_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); +++void ff_hevc_sao_band_w16_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); +++void ff_hevc_sao_band_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); +++void ff_hevc_sao_band_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); +++ ++ #define PUT_PIXELS(name) \ ++ void name(int16_t *dst, uint8_t *src, \ ++ ptrdiff_t srcstride, int height, \ ++@@ -151,6 +157,44 @@ void ff_hevc_put_qpel_bi_neon_wrapper(uint8_t *dst, ptrdiff_t dststride, uint8_t ++ put_hevc_qpel_uw_neon[my][mx](dst, dststride, src, srcstride, width, height, src2, MAX_PB_SIZE); ++ } ++ +++static void ff_hevc_sao_band_neon_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, +++ int16_t *sao_offset_val, int sao_left_class, int width, int height) +++{ +++ pixel *dst = (pixel *)_dst; +++ pixel *src = (pixel *)_src; +++ int8_t offset_table[32] = { 0 }; +++ int k, y, x; +++ int shift = 3; // BIT_DEPTH - 5 +++ +++ stride_src /= sizeof(pixel); +++ stride_dst /= sizeof(pixel); +++ +++ for (k = 0; k < 4; k++) +++ offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1]; +++ +++ switch(width){ +++ case 8: +++ ff_hevc_sao_band_w8_neon_8(_dst, _src, stride_dst, stride_src, height, offset_table); +++ break; +++ case 16: +++ ff_hevc_sao_band_w16_neon_8(_dst, _src, stride_dst, stride_src, height, offset_table); +++ break; +++ case 32: +++ ff_hevc_sao_band_w32_neon_8(_dst, _src, stride_dst, stride_src, height, offset_table); +++ break; +++ case 64: +++ ff_hevc_sao_band_w64_neon_8(_dst, _src, stride_dst, stride_src, height, offset_table); +++ break; +++ default: +++ for (y = 0; y < height; y++) { +++ for (x = 0; x < width; x++) +++ dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]); +++ dst += stride_dst; +++ src += stride_src; +++ } +++ } +++} +++ ++ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) ++ { ++ if (bit_depth == 8) { ++@@ -170,6 +214,9 @@ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) ++ c->transform_add[2] = ff_hevc_transform_add_16x16_neon_8; ++ c->transform_add[3] = ff_hevc_transform_add_32x32_neon_8; ++ c->idct_4x4_luma = ff_hevc_transform_luma_4x4_neon_8; +++ for (x = 0; x < sizeof c->sao_band_filter / sizeof *c->sao_band_filter; x++) { +++ c->sao_band_filter[x] = ff_hevc_sao_band_neon_wrapper; +++ } ++ put_hevc_qpel_neon[1][0] = ff_hevc_put_qpel_v1_neon_8; ++ put_hevc_qpel_neon[2][0] = ff_hevc_put_qpel_v2_neon_8; ++ put_hevc_qpel_neon[3][0] = ff_hevc_put_qpel_v3_neon_8; ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++new file mode 100644 ++index 0000000..1f0ad64 ++--- /dev/null +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -0,0 +1,204 @@ +++/* +++ * Copyright (c) 2014 Seppo Tomperi +++ * +++ * This file is part of FFmpeg. +++ * +++ * FFmpeg is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * FFmpeg is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with FFmpeg; if not, write to the Free Software +++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +++ */ +++ +++#include "libavutil/arm/asm.S" +++#include "neon.S" +++ +++function ff_hevc_sao_band_w8_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // offset_table +++ vpush {d8-d15} +++ vld1.8 {q0, q1}, [r5] // offset table +++ +++1: subs r4, #1 +++ vld1.8 {d24}, [r1], r3 +++ vshr.u8 d16, d24, #3 +++ vtbl.8 d16, {q0, q1}, d16 +++ vmovl.s8 q2, d16 +++ vmovl.u8 q6, d24 +++ vadd.s16 q2, q6 +++ vqmovun.s16 d4, q2 +++ vst1.8 {d4}, [r0], r2 +++ bne 1b +++ +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_band_w16_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // offset_table +++ vpush {d8-d15} +++ vld1.8 {q0, q1}, [r5] // offset table +++ +++1: subs r4, #1 +++ vld1.8 {q12}, [r1], r3 +++ +++ vshr.u8 q8, q12, #3 +++ +++ vtbl.8 d16, {q0, q1}, d16 +++ vtbl.8 d17, {q0, q1}, d17 +++ +++ vmovl.s8 q2, d16 +++ vmovl.s8 q3, d17 +++ +++ vmovl.u8 q6, d24 +++ vmovl.u8 q7, d25 +++ +++ vadd.s16 q2, q6 +++ vadd.s16 q3, q7 +++ +++ vqmovun.s16 d4, q2 +++ vqmovun.s16 d5, q3 +++ +++ vstm.8 r0, {q2} +++ add r0, r2 +++ bne 1b +++ +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_band_w32_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // offset_table +++ vpush {d8-d15} +++ vld1.8 {q0, q1}, [r5] // offset table +++ +++1: subs r4, #1 +++ vld1.8 {q12-q13}, [r1], r3 +++ +++ vshr.u8 q8, q12, #3 +++ vshr.u8 q9, q13, #3 +++ +++ vtbl.8 d16, {q0, q1}, d16 +++ vtbl.8 d17, {q0, q1}, d17 +++ vtbl.8 d18, {q0, q1}, d18 +++ vtbl.8 d19, {q0, q1}, d19 +++ +++ vmovl.s8 q2, d16 +++ vmovl.s8 q3, d17 // q8 free +++ vmovl.s8 q4, d18 +++ vmovl.s8 q5, d19 // q9 free +++ +++ vmovl.u8 q6, d24 +++ vmovl.u8 q7, d25 // q12 free +++ vmovl.u8 q8, d26 +++ vmovl.u8 q9, d27 // q13 free +++ +++ vadd.s16 q2, q6 +++ vadd.s16 q3, q7 +++ vadd.s16 q4, q8 +++ vadd.s16 q5, q9 +++ +++ vqmovun.s16 d4, q2 +++ vqmovun.s16 d5, q3 +++ vqmovun.s16 d6, q4 // q4 free +++ vqmovun.s16 d7, q5 // q5 free +++ +++ vst1.8 {q2-q3}, [r0], r2 +++ bne 1b +++ +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_band_w64_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // offset_table +++ vpush {d8-d15} +++ vld1.8 {q0, q1}, [r5] // offset table +++ +++1: subs r4, #1 +++ vld1.8 {q12-q13}, [r1]! +++ vld1.8 {q14-q15}, [r1], r3 +++ sub r1, #32 +++ +++ vshr.u8 q8, q12, #3 +++ vshr.u8 q9, q13, #3 +++ vshr.u8 q10, q14, #3 +++ vshr.u8 q11, q15, #3 +++ +++ vtbl.8 d16, {q0, q1}, d16 +++ vtbl.8 d17, {q0, q1}, d17 +++ vtbl.8 d18, {q0, q1}, d18 +++ vtbl.8 d19, {q0, q1}, d19 +++ vtbl.8 d20, {q0, q1}, d20 +++ vtbl.8 d21, {q0, q1}, d21 +++ vtbl.8 d22, {q0, q1}, d22 +++ vtbl.8 d23, {q0, q1}, d23 +++ +++ vmovl.s8 q2, d16 +++ vmovl.s8 q3, d17 // q8 free +++ vmovl.s8 q4, d18 +++ vmovl.s8 q5, d19 // q9 free +++ +++ vmovl.u8 q6, d24 +++ vmovl.u8 q7, d25 // q12 free +++ vmovl.u8 q8, d26 +++ vmovl.u8 q9, d27 // q13 free +++ +++ vadd.s16 q2, q6 +++ vadd.s16 q3, q7 +++ vadd.s16 q4, q8 +++ vadd.s16 q5, q9 +++ +++ vqmovun.s16 d4, q2 +++ vqmovun.s16 d5, q3 +++ vqmovun.s16 d6, q4 // q4 free +++ vqmovun.s16 d7, q5 // q5 free +++ +++ // free q4 -q9, q12 - q13 +++ vmovl.s8 q4, d20 +++ vmovl.s8 q5, d21 // q10 free +++ vmovl.s8 q6, d22 +++ vmovl.s8 q7, d23 // q11 free +++ +++ vmovl.u8 q8, d28 +++ vmovl.u8 q9, d29 // q14 free +++ vmovl.u8 q10, d30 +++ vmovl.u8 q11, d31 // q15 free +++ +++ vadd.s16 q4, q8 +++ vadd.s16 q5, q9 +++ vadd.s16 q6, q10 +++ vadd.s16 q7, q11 +++ +++ vqmovun.s16 d8, q4 +++ vqmovun.s16 d9, q5 +++ vqmovun.s16 d10, q6 +++ vqmovun.s16 d11, q7 +++ +++ vstm.8 r0, {q2-q5} +++ add r0, r2 +++ bne 1b +++ +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ ++-- ++2.5.0 ++ ++ ++From 8429b1de64bb871d57651ecfe3b084e2dfe0af51 Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi ++Date: Wed, 27 May 2015 18:10:20 +0100 ++Subject: [PATCH 2/9] added NEON optimized sao edge for eo1 width 64 ++ ++--- ++ libavcodec/arm/hevcdsp_init_neon.c | 47 ++++++++++++ ++ libavcodec/arm/hevcdsp_sao_neon.S | 147 +++++++++++++++++++++++++++++++++++++ ++ 2 files changed, 194 insertions(+) ++ ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index 69e2b2c..c7b5404 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -22,6 +22,7 @@ ++ #include "libavutil/arm/cpu.h" ++ #include "libavcodec/hevcdsp.h" ++ #include "hevcdsp_arm.h" +++#include "libavcodec/avcodec.h" ++ #include "../bit_depth_template.c" ++ ++ void ff_hevc_v_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q); ++@@ -48,6 +49,7 @@ void ff_hevc_sao_band_w8_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_d ++ void ff_hevc_sao_band_w16_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); ++ void ff_hevc_sao_band_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); ++ void ff_hevc_sao_band_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); +++void ff_hevc_sao_edge_eo1_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); ++ ++ #define PUT_PIXELS(name) \ ++ void name(int16_t *dst, uint8_t *src, \ ++@@ -195,6 +197,50 @@ static void ff_hevc_sao_band_neon_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_ ++ } ++ } ++ +++#define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1)) +++static void ff_hevc_sao_edge_neon_wrapper(uint8_t *_dst /* align 16 */, uint8_t *_src /* align 32 */, ptrdiff_t stride_dst, +++ int16_t *_sao_offset_val, int eo, int width, int height) +++{ +++ static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 }; +++ static const int8_t pos[4][2][2] = { +++ { { -1, 0 }, { 1, 0 } }, // horizontal +++ { { 0, -1 }, { 0, 1 } }, // vertical +++ { { -1, -1 }, { 1, 1 } }, // 45 degree +++ { { 1, -1 }, { -1, 1 } }, // 135 degree +++ }; +++ int8_t sao_offset_val[8]; // padding of 3 for vld +++ ptrdiff_t stride_src = (2*MAX_PB_SIZE + FF_INPUT_BUFFER_PADDING_SIZE); +++ pixel *dst = (pixel *)_dst; +++ pixel *src = (pixel *)_src; +++ int a_stride, b_stride; +++ int x, y; +++ +++ for (x = 0; x < 5; x++) { +++ sao_offset_val[x] = _sao_offset_val[x]; +++ } +++ +++ stride_src /= sizeof(pixel); +++ stride_dst /= sizeof(pixel); +++ +++ if (eo == 1 && width == 64) { +++ ff_hevc_sao_edge_eo1_w64_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ } else { +++ a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src; +++ b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src; +++ for (y = 0; y < height; y++) { +++ for (x = 0; x < width; x++) { +++ int diff0 = CMP(src[x], src[x + a_stride]); +++ int diff1 = CMP(src[x], src[x + b_stride]); +++ int offset_val = edge_idx[2 + diff0 + diff1]; +++ dst[x] = av_clip_pixel(src[x] + sao_offset_val[offset_val]); +++ } +++ src += stride_src; +++ dst += stride_dst; +++ } +++ } +++} +++#undef CMP +++ ++ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) ++ { ++ if (bit_depth == 8) { ++@@ -216,6 +262,7 @@ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) ++ c->idct_4x4_luma = ff_hevc_transform_luma_4x4_neon_8; ++ for (x = 0; x < sizeof c->sao_band_filter / sizeof *c->sao_band_filter; x++) { ++ c->sao_band_filter[x] = ff_hevc_sao_band_neon_wrapper; +++ c->sao_edge_filter[x] = ff_hevc_sao_edge_neon_wrapper; ++ } ++ put_hevc_qpel_neon[1][0] = ff_hevc_put_qpel_v1_neon_8; ++ put_hevc_qpel_neon[2][0] = ff_hevc_put_qpel_v2_neon_8; ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++index 1f0ad64..5ec2de9 100644 ++--- a/libavcodec/arm/hevcdsp_sao_neon.S +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -202,3 +202,150 @@ function ff_hevc_sao_band_w64_neon_8, export=1 ++ bx lr ++ endfunc ++ +++function ff_hevc_sao_edge_eo1_w64_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x02 +++ vpush {d8-d15} +++1: subs r4, #1 +++ // load a +++ sub r1, r3 +++ vld1.8 {q0-q1}, [r1]! +++ vld1.8 {q2-q3}, [r1], r3 +++ sub r1, #32 +++ // load c +++ vld1.8 {q4-q5}, [r1]! +++ vld1.8 {q6-q7}, [r1], r3 +++ sub r1, #32 +++ // load b +++ vld1.8 {q8-q9}, [r1]! +++ vld1.8 {q10-q11}, [r1], r3 +++ sub r1, #32 +++ +++ vcgt.u8 q12, q4, q0 // c > a -> -1 , otherwise 0 +++ vcgt.u8 q0, q0, q4 // a > c -> -1 , otherwise 0 +++ vcgt.u8 q13, q5, q1 +++ vcgt.u8 q1, q1, q5 +++ vcgt.u8 q14, q6, q2 +++ vcgt.u8 q2, q2, q6 +++ vcgt.u8 q15, q7, q3 +++ vcgt.u8 q3, q3, q7 +++ +++ vsub.s8 q12, q0, q12 // diff0 +++ vsub.s8 q13, q1, q13 +++ vsub.s8 q14, q2, q14 +++ vsub.s8 q15, q3, q15 +++ +++ vcgt.u8 q0, q4, q8 // c > b +++ vcgt.u8 q8, q8, q4 // b > c +++ vcgt.u8 q1, q5, q9 +++ vcgt.u8 q9, q9, q5 +++ vcgt.u8 q2, q6, q10 +++ vcgt.u8 q10, q10, q6 +++ vcgt.u8 q3, q7, q11 +++ vcgt.u8 q11, q11, q7 +++ +++ vsub.s8 q0, q8, q0 // diff1 +++ vsub.s8 q1, q9, q1 +++ vsub.s8 q2, q10, q2 +++ vsub.s8 q3, q11, q3 +++ +++ veor.u8 q8, q8 // zero register +++ vdup.s8 q9, r6 // 2 to all elements +++ add r6, #1 +++ vdup.s8 q10, r6 // 3 to all elements +++ sub r6, #1 +++ +++ vadd.s8 q0, q12 //diff0 + diff1 +++ vadd.s8 q1, q13 +++ vadd.s8 q2, q14 +++ vadd.s8 q3, q15 +++ +++ vcgt.s8 q4, q0, q8 // diff0 + diff1 > 0 +++ vcgt.s8 q5, q1, q8 +++ vcgt.s8 q6, q2, q8 +++ vcgt.s8 q7, q3, q8 +++ +++ vclt.s8 q11, q0, q8 // diff0 + diff1 < 0 +++ vclt.s8 q12, q1, q8 +++ vclt.s8 q13, q2, q8 +++ vclt.s8 q14, q3, q8 +++ +++ vadd.s8 q8, q0, q9 // diff0 + diff1 + 2 +++ vand.8 q15, q8, q4 +++ vadd.s8 q8, q0, q10 // diff0 + diff1 + 3 +++ vand.8 q8, q8, q11 +++ vadd.s8 q0, q15, q8 // offset_idx +++ +++ vadd.s8 q8, q1, q9 // diff0 + diff1 + 2 +++ vand.8 q15, q8, q5 +++ vadd.s8 q8, q1, q10 // diff0 + diff1 + 3 +++ vand.8 q8, q8, q12 +++ vadd.s8 q1, q15, q8 // offset_idx +++ +++ vadd.s8 q8, q2, q9 // diff0 + diff1 + 2 + 2 +++ vand.8 q15, q8, q6 +++ vadd.s8 q8, q2, q10 // diff0 + diff1 + 2 + 3 +++ vand.8 q8, q8, q13 +++ vadd.s8 q2, q15, q8 // offset_idx +++ +++ vadd.s8 q8, q3, q9 // diff0 + diff1 + 2 + 2 +++ vand.8 q15, q8, q7 +++ vadd.s8 q8, q3, q10 // diff0 + diff1 + 2 + 3 +++ vand.8 q8, q8, q14 +++ vadd.s8 q3, q15, q8 // offset_idx +++ // TODO: load only once +++ vld1.8 d16, [r5] +++ +++ vtbl.8 d0, {d16}, d0 +++ vtbl.8 d1, {d16}, d1 +++ vtbl.8 d2, {d16}, d2 +++ vtbl.8 d3, {d16}, d3 +++ vtbl.8 d4, {d16}, d4 +++ vtbl.8 d5, {d16}, d5 +++ vtbl.8 d6, {d16}, d6 +++ vtbl.8 d7, {d16}, d7 +++ +++ // TODO: load only once +++ // load c again +++ sub r1, r3 +++ sub r1, r3 +++ vld1.8 {q4-q5}, [r1]! +++ vld1.8 {q6-q7}, [r1], r3 +++ sub r1, #32 +++ +++ vmovl.u8 q8, d8 +++ vmovl.u8 q9, d9 +++ vmovl.u8 q10, d10 +++ vmovl.u8 q11, d11 +++ vmovl.u8 q12, d12 +++ vmovl.u8 q13, d13 +++ vmovl.u8 q14, d14 +++ vmovl.u8 q15, d15 +++ +++ vaddw.s8 q8, d0 +++ vaddw.s8 q9, d1 +++ vaddw.s8 q10, d2 +++ vaddw.s8 q11, d3 +++ vaddw.s8 q12, d4 +++ vaddw.s8 q13, d5 +++ vaddw.s8 q14, d6 +++ vaddw.s8 q15, d7 +++ +++ vqmovun.s16 d0, q8 +++ vqmovun.s16 d1, q9 +++ vqmovun.s16 d2, q10 +++ vqmovun.s16 d3, q11 +++ vqmovun.s16 d4, q12 +++ vqmovun.s16 d5, q13 +++ vqmovun.s16 d6, q14 +++ vqmovun.s16 d7, q15 +++ +++ vstm r0, {q0-q3} +++ add r0, r2 +++ bne 1b +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc ++-- ++2.5.0 ++ ++ ++From 402e2bd1c5ad659c757bf9734abe6331904fb9e2 Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi ++Date: Tue, 16 Dec 2014 16:28:25 +0200 ++Subject: [PATCH 3/9] Added SAO edge offset for ARM NEON w32 and w64 ++ ++--- ++ libavcodec/arm/hevcdsp_init_neon.c | 46 +++- ++ libavcodec/arm/hevcdsp_sao_neon.S | 510 +++++++++++++++++++++++++++++++------ ++ 2 files changed, 474 insertions(+), 82 deletions(-) ++ ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index c7b5404..c32940e 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -49,7 +49,16 @@ void ff_hevc_sao_band_w8_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_d ++ void ff_hevc_sao_band_w16_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); ++ void ff_hevc_sao_band_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); ++ void ff_hevc_sao_band_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); +++ +++void ff_hevc_sao_edge_eo0_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); +++void ff_hevc_sao_edge_eo1_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); +++void ff_hevc_sao_edge_eo2_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); +++void ff_hevc_sao_edge_eo3_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); +++ +++void ff_hevc_sao_edge_eo0_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); ++ void ff_hevc_sao_edge_eo1_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); +++void ff_hevc_sao_edge_eo2_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); +++void ff_hevc_sao_edge_eo3_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); ++ ++ #define PUT_PIXELS(name) \ ++ void name(int16_t *dst, uint8_t *src, \ ++@@ -222,9 +231,40 @@ static void ff_hevc_sao_edge_neon_wrapper(uint8_t *_dst /* align 16 */, uint8_t ++ stride_src /= sizeof(pixel); ++ stride_dst /= sizeof(pixel); ++ ++- if (eo == 1 && width == 64) { ++- ff_hevc_sao_edge_eo1_w64_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); ++- } else { +++ switch (width) { +++ case 32: +++ switch(eo) { +++ case 0: +++ ff_hevc_sao_edge_eo0_w32_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ break; +++ case 1: +++ ff_hevc_sao_edge_eo1_w32_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ break; +++ case 2: +++ ff_hevc_sao_edge_eo2_w32_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ break; +++ case 3: +++ ff_hevc_sao_edge_eo3_w32_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ break; +++ } +++ break; +++ case 64: +++ switch(eo) { +++ case 0: +++ ff_hevc_sao_edge_eo0_w64_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ break; +++ case 1: +++ ff_hevc_sao_edge_eo1_w64_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ break; +++ case 2: +++ ff_hevc_sao_edge_eo2_w64_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ break; +++ case 3: +++ ff_hevc_sao_edge_eo3_w64_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ break; +++ } +++ break; +++ default: ++ a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src; ++ b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src; ++ for (y = 0; y < height; y++) { ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++index 5ec2de9..4687012 100644 ++--- a/libavcodec/arm/hevcdsp_sao_neon.S +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -202,27 +202,7 @@ function ff_hevc_sao_band_w64_neon_8, export=1 ++ bx lr ++ endfunc ++ ++-function ff_hevc_sao_edge_eo1_w64_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x02 ++- vpush {d8-d15} ++-1: subs r4, #1 ++- // load a ++- sub r1, r3 ++- vld1.8 {q0-q1}, [r1]! ++- vld1.8 {q2-q3}, [r1], r3 ++- sub r1, #32 ++- // load c ++- vld1.8 {q4-q5}, [r1]! ++- vld1.8 {q6-q7}, [r1], r3 ++- sub r1, #32 ++- // load b ++- vld1.8 {q8-q9}, [r1]! ++- vld1.8 {q10-q11}, [r1], r3 ++- sub r1, #32 ++- +++.macro edge_w64_body ++ vcgt.u8 q12, q4, q0 // c > a -> -1 , otherwise 0 ++ vcgt.u8 q0, q0, q4 // a > c -> -1 , otherwise 0 ++ vcgt.u8 q13, q5, q1 ++@@ -251,69 +231,61 @@ function ff_hevc_sao_edge_eo1_w64_neon_8, export=1 ++ vsub.s8 q2, q10, q2 ++ vsub.s8 q3, q11, q3 ++ ++- veor.u8 q8, q8 // zero register ++- vdup.s8 q9, r6 // 2 to all elements ++- add r6, #1 ++- vdup.s8 q10, r6 // 3 to all elements ++- sub r6, #1 ++- ++ vadd.s8 q0, q12 //diff0 + diff1 ++ vadd.s8 q1, q13 ++ vadd.s8 q2, q14 ++ vadd.s8 q3, q15 ++ ++- vcgt.s8 q4, q0, q8 // diff0 + diff1 > 0 ++- vcgt.s8 q5, q1, q8 ++- vcgt.s8 q6, q2, q8 ++- vcgt.s8 q7, q3, q8 ++- ++- vclt.s8 q11, q0, q8 // diff0 + diff1 < 0 ++- vclt.s8 q12, q1, q8 ++- vclt.s8 q13, q2, q8 ++- vclt.s8 q14, q3, q8 ++- ++- vadd.s8 q8, q0, q9 // diff0 + diff1 + 2 ++- vand.8 q15, q8, q4 ++- vadd.s8 q8, q0, q10 // diff0 + diff1 + 3 ++- vand.8 q8, q8, q11 ++- vadd.s8 q0, q15, q8 // offset_idx ++- ++- vadd.s8 q8, q1, q9 // diff0 + diff1 + 2 ++- vand.8 q15, q8, q5 ++- vadd.s8 q8, q1, q10 // diff0 + diff1 + 3 ++- vand.8 q8, q8, q12 ++- vadd.s8 q1, q15, q8 // offset_idx ++- ++- vadd.s8 q8, q2, q9 // diff0 + diff1 + 2 + 2 ++- vand.8 q15, q8, q6 ++- vadd.s8 q8, q2, q10 // diff0 + diff1 + 2 + 3 ++- vand.8 q8, q8, q13 ++- vadd.s8 q2, q15, q8 // offset_idx ++- ++- vadd.s8 q8, q3, q9 // diff0 + diff1 + 2 + 2 ++- vand.8 q15, q8, q7 ++- vadd.s8 q8, q3, q10 // diff0 + diff1 + 2 + 3 ++- vand.8 q8, q8, q14 ++- vadd.s8 q3, q15, q8 // offset_idx ++- // TODO: load only once ++- vld1.8 d16, [r5] ++- ++- vtbl.8 d0, {d16}, d0 ++- vtbl.8 d1, {d16}, d1 ++- vtbl.8 d2, {d16}, d2 ++- vtbl.8 d3, {d16}, d3 ++- vtbl.8 d4, {d16}, d4 ++- vtbl.8 d5, {d16}, d5 ++- vtbl.8 d6, {d16}, d6 ++- vtbl.8 d7, {d16}, d7 ++- ++- // TODO: load only once ++- // load c again ++- sub r1, r3 ++- sub r1, r3 ++- vld1.8 {q4-q5}, [r1]! ++- vld1.8 {q6-q7}, [r1], r3 ++- sub r1, #32 +++ vdup.s8 q9, r6 // 3 to all elements +++ sub r6, #1 +++ +++ vclt.s8 q12, q0, #0 // diff0 + diff1 < 0 +++ vclt.s8 q13, q1, #0 +++ vclt.s8 q14, q2, #0 +++ vclt.s8 q15, q3, #0 +++ +++ vadd.s8 q8, q0, q9 // diff0 + diff1 + 3 +++ vadd.s8 q10, q1, q9 +++ vand.8 q12, q8, q12 // if (diff0 + diff1 < 0) then (diff0 + diff1 + 3) else 0 +++ vand.8 q13, q10, q13 +++ vadd.s8 q8, q2, q9 +++ vadd.s8 q10, q3, q9 +++ vand.8 q14, q8, q14 +++ vand.8 q15, q10, q15 +++ +++ vdup.s8 q9, r6 // 2 to all elements +++ add r6, #1 +++ +++ vcgt.s8 q10, q0, #0 // diff0 + diff1 > 0 +++ vadd.s8 q8, q0, q9 // diff0 + diff1 + 2 +++ vand.8 q11, q8, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 +++ vcgt.s8 q10, q1, #0 +++ vadd.s8 q0, q11, q12 // offset_idx +++ +++ vadd.s8 q8, q1, q9 // diff0 + diff1 + 2 +++ vcgt.s8 q12, q2, #0 +++ vand.8 q11, q8, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 +++ vadd.s8 q8, q2, q9 // diff0 + diff1 + 2 +++ vadd.s8 q1, q11, q13 +++ +++ vand.8 q11, q8, q12 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 +++ vcgt.s8 q10, q3, #0 +++ vadd.s8 q2, q11, q14 +++ +++ vadd.s8 q8, q3, q9 // diff0 + diff1 + 2 +++ vmov.32 d18[0], r7 // load offset table from general registers +++ vand.8 q11, q8, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 +++ vmov.32 d18[1], r5 // load rest of offset table +++ vadd.s8 q3, q11, q15 +++ +++ vtbl.8 d0, {d18}, d0 +++ vtbl.8 d1, {d18}, d1 +++ vtbl.8 d2, {d18}, d2 +++ vtbl.8 d3, {d18}, d3 +++ vtbl.8 d4, {d18}, d4 +++ vtbl.8 d5, {d18}, d5 +++ vtbl.8 d6, {d18}, d6 +++ vtbl.8 d7, {d18}, d7 ++ ++ vmovl.u8 q8, d8 ++ vmovl.u8 q9, d9 ++@@ -344,8 +316,388 @@ function ff_hevc_sao_edge_eo1_w64_neon_8, export=1 ++ ++ vstm r0, {q0-q3} ++ add r0, r2 +++.endm +++ +++.macro edge_w32_body +++ vcgt.u8 q12, q4, q0 // c > a -> -1 , otherwise 0 +++ vcgt.u8 q0, q0, q4 // a > c -> -1 , otherwise 0 +++ vcgt.u8 q13, q5, q1 +++ vcgt.u8 q1, q1, q5 +++ +++ vsub.s8 q12, q0, q12 // diff0 +++ vcgt.u8 q0, q4, q8 // c > b +++ vsub.s8 q13, q1, q13 // diff0 part 2 +++ +++ vcgt.u8 q6, q8, q4 // b > c +++ vcgt.u8 q1, q5, q9 +++ vcgt.u8 q7, q9, q5 +++ +++ vsub.s8 q0, q6, q0 // diff1 +++ vsub.s8 q1, q7, q1 // diff1 part 2 +++ vadd.s8 q0, q12 //diff0 + diff1 +++ +++ vdup.s8 q7, r6 // 3 to all elements +++ sub r6, #1 +++ vadd.s8 q1, q13 +++ +++ vclt.s8 q12, q0, #0 // diff0 + diff1 < 0 +++ vclt.s8 q13, q1, #0 +++ +++ vadd.s8 q6, q0, q7 // diff0 + diff1 + 3 +++ vadd.s8 q10, q1, q7 +++ vdup.s8 q7, r6 // 2 to all elements +++ add r6, #1 +++ vand.8 q12, q6, q12 // if (diff0 + diff1 < 0) then (diff0 + diff1 + 3) else 0 +++ vand.8 q13, q10, q13 +++ +++ +++ vcgt.s8 q10, q0, #0 // diff0 + diff1 > 0 +++ vadd.s8 q6, q0, q7 // diff0 + diff1 + 2 +++ vand.8 q11, q6, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 +++ vcgt.s8 q10, q1, #0 +++ vadd.s8 q0, q11, q12 // offset_idx +++ +++ vadd.s8 q6, q1, q7 // diff0 + diff1 + 2 +++ vmov.32 d14[0], r7 // load offset table from general registers +++ vand.8 q11, q6, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 +++ vmov.32 d14[1], r5 // load rest of offset table +++ vadd.s8 q1, q11, q13 +++ +++ vtbl.8 d0, {d14}, d0 +++ vtbl.8 d1, {d14}, d1 +++ vtbl.8 d2, {d14}, d2 +++ vtbl.8 d3, {d14}, d3 +++ +++ vmovl.u8 q6, d8 +++ vmovl.u8 q7, d9 +++ vmovl.u8 q10, d10 +++ vmovl.u8 q11, d11 +++ +++ vaddw.s8 q6, d0 +++ vaddw.s8 q7, d1 +++ vaddw.s8 q10, d2 +++ vaddw.s8 q11, d3 +++ +++ vqmovun.s16 d0, q6 +++ vqmovun.s16 d1, q7 +++ vqmovun.s16 d2, q10 +++ vqmovun.s16 d3, q11 +++ +++ vstm r0, {q0-q1} +++ add r0, r2 +++.endm +++ +++function ff_hevc_sao_edge_eo0_w64_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x03 +++ ldr r7, [r5] +++ add r5, #4 +++ ldr r5, [r5] +++ vpush {d8-d15} +++ sub r1, #8 +++1: subs r4, #1 +++ vld1.64 {q10-q11}, [r1]! +++ vld1.64 {q12-q13}, [r1]! +++ vld1.64 {q14}, [r1], r3 +++ sub r1, #64 +++ // load a +++ vext.8 q0, q10, q11, #7 +++ vext.8 q1, q11, q12, #7 +++ vext.8 q2, q12, q13, #7 +++ vext.8 q3, q13, q14, #7 +++ // load c +++ vext.8 q4, q10, q11, #8 +++ vext.8 q5, q11, q12, #8 +++ vext.8 q6, q12, q13, #8 +++ vext.8 q7, q13, q14, #8 +++ // load b +++ vext.8 q8, q10, q11, #9 +++ vext.8 q9, q11, q12, #9 +++ vext.8 q10, q12, q13, #9 +++ vext.8 q11, q13, q14, #9 +++ edge_w64_body +++ bne 1b +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_edge_eo1_w64_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x03 +++ ldr r7, [r5] +++ add r5, #4 +++ ldr r5, [r5] +++ vpush {d8-d15} +++ sub r1, r3 +++ // load a +++ vld1.8 {q0-q1}, [r1]! +++ vld1.8 {q2-q3}, [r1], r3 +++ sub r1, #32 +++1: subs r4, #1 +++ // load c +++ vld1.8 {q4-q5}, [r1]! +++ vld1.8 {q6-q7}, [r1], r3 +++ sub r1, #32 +++ // load b +++ vld1.8 {q8-q9}, [r1]! +++ vld1.8 {q10-q11}, [r1] +++ sub r1, #32 +++ edge_w64_body +++ // copy c to a +++ vmov.64 q0, q4 +++ vmov.64 q1, q5 +++ vmov.64 q2, q6 +++ vmov.64 q3, q7 ++ bne 1b ++ vpop {d8-d15} ++ pop {r4-r8} ++ bx lr ++ endfunc +++ +++function ff_hevc_sao_edge_eo2_w64_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x03 +++ ldr r7, [r5] +++ add r5, #4 +++ ldr r5, [r5] +++ vpush {d8-d15} +++1: sub r1, r3 +++ // load a +++ // TODO: fix unaligned load +++ // don't reload a like in eo1 +++ sub r1, #1 +++ vld1.8 {q0-q1}, [r1]! +++ vld1.8 {q2-q3}, [r1], r3 +++ sub r1, #31 +++ subs r4, #1 +++ // load c +++ vld1.8 {q4-q5}, [r1]! +++ vld1.8 {q6-q7}, [r1], r3 +++ sub r1, #32 +++ // load b +++ add r1, #1 +++ vld1.8 {q8-q9}, [r1]! +++ vld1.8 {q10-q11}, [r1] +++ sub r1, #33 +++ edge_w64_body +++ // copy c to a +++ vmov.64 q0, q4 +++ vmov.64 q1, q5 +++ vmov.64 q2, q6 +++ vmov.64 q3, q7 +++ bne 1b +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_edge_eo3_w64_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x03 +++ ldr r7, [r5] +++ add r5, #4 +++ ldr r5, [r5] +++ vpush {d8-d15} +++1: sub r1, r3 +++ // load a +++ // TODO: fix unaligned load +++ // don't reload a like in eo1 +++ add r1, #1 +++ vld1.8 {q0-q1}, [r1]! +++ vld1.8 {q2-q3}, [r1], r3 +++ sub r1, #33 +++ subs r4, #1 +++ // load c +++ vld1.8 {q4-q5}, [r1]! +++ vld1.8 {q6-q7}, [r1], r3 +++ sub r1, #32 +++ // load b +++ sub r1, #1 +++ vld1.8 {q8-q9}, [r1]! +++ vld1.8 {q10-q11}, [r1] +++ sub r1, #31 +++ edge_w64_body +++ // copy c to a +++ vmov.64 q0, q4 +++ vmov.64 q1, q5 +++ vmov.64 q2, q6 +++ vmov.64 q3, q7 +++ bne 1b +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_edge_eo0_w32_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x03 +++ ldr r7, [r5] +++ add r5, #4 +++ ldr r5, [r5] +++ vpush {d8-d15} +++ sub r1, #8 // load 8 extra bytes +++1: subs r4, #1 +++ vld1.8 {q10-q11}, [r1] +++ add r1, #32 +++ vld1.8 {q12}, [r1], r3 // only first 9 bytes are used +++ sub r1, #32 +++ // a +++ vext.8 q0, q10, q11, #7 +++ vext.8 q1, q11, q12, #7 +++ // c +++ vext.8 q4, q10, q11, #8 +++ vext.8 q5, q11, q12, #8 +++ // b +++ vext.8 q8, q10, q11, #9 +++ vext.8 q9, q11, q12, #9 +++ edge_w32_body +++ bne 1b +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_edge_eo1_w32_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x03 +++ ldr r7, [r5] +++ add r5, #4 +++ ldr r5, [r5] +++ vpush {d8-d15} +++ // load a +++ sub r1, r3 +++ vld1.8 {q0-q1}, [r1], r3 +++ // load c +++ vld1.8 {q4-q5}, [r1], r3 +++1: subs r4, #1 +++ // load b +++ vld1.8 {q8-q9}, [r1], r3 +++ edge_w32_body +++ // inputs for next loop iteration +++ // a +++ vmov.64 q0, q4 +++ vmov.64 q1, q5 +++ // c +++ vmov.64 q4, q8 +++ vmov.64 q5, q9 +++ bne 1b +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_edge_eo2_w32_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x03 +++ ldr r7, [r5] +++ add r5, #4 +++ ldr r5, [r5] +++ vpush {d8-d15} +++ // load a +++ sub r1, r3 +++ sub r1, #8 +++ vld1.8 {q10-q11}, [r1] +++ add r1, #32 +++ vld1.8 {q12}, [r1], r3 +++ sub r1, #32 +++ vext.8 q0, q10, q11, #7 +++ vext.8 q1, q11, q12, #7 +++ // load c +++ vld1.8 {q10-q11}, [r1] +++ add r1, #32 +++ vld1.8 {q12}, [r1], r3 +++ sub r1, #32 +++ vext.8 q4, q10, q11, #8 +++ vext.8 q5, q11, q12, #8 +++ vext.8 q2, q10, q11, #7 +++1: subs r4, #1 +++ // load b +++ vld1.8 {q10-q11}, [r1] +++ add r1, #32 +++ vld1.8 {q12}, [r1], r3 +++ sub r1, #32 +++ vext.8 q8, q10, q11, #9 +++ vext.8 q9, q11, q12, #9 +++ vext.8 q14, q10, q11, #8 +++ vext.8 q15, q11, q12, #8 +++ vext.8 q3, q10, q11, #7 +++ edge_w32_body +++ // inputs for next loop iteration +++ // a +++ vmov.8 q0, q2 +++ vext.8 q1, q4, q5, #15 +++ // c +++ vmov.8 q4, q14 +++ vmov.8 q5, q15 +++ vmov.8 q2, q3 +++ bne 1b +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_edge_eo3_w32_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x03 +++ ldr r7, [r5] +++ add r5, #4 +++ sub r1, r3 +++ ldr r5, [r5] +++ sub r1, #8 +++ vpush {d8-d15} +++ // load a +++ vld1.8 {q10-q11}, [r1] +++ add r1, #32 +++ vld1.8 {q12}, [r1], r3 +++ sub r1, #32 +++ vext.8 q0, q10, q11, #9 +++ vext.8 q1, q11, q12, #9 +++ // load c +++ vld1.8 {q10-q11}, [r1] +++ add r1, #32 +++ vld1.8 {q12}, [r1], r3 +++ sub r1, #32 +++ vext.8 q4, q10, q11, #8 +++ vext.8 q5, q11, q12, #8 +++ vext.8 q2, q12, q11, #8 +++1: subs r4, #1 +++ // load b +++ vld1.8 {q10-q11}, [r1] +++ add r1, #32 +++ vld1.8 {q12}, [r1], r3 +++ sub r1, #32 +++ vext.8 q8, q10, q11, #7 +++ vext.8 q9, q11, q12, #7 +++ vext.8 q3, q12, q10, #7 +++ edge_w32_body +++ // inputs for next loop iteration +++ // a +++ vext.8 q0, q4, q5, #1 +++ vext.8 q1, q5, q2, #1 +++ // c +++ vext.8 q4, q8, q9, #1 +++ vext.8 q5, q9, q3, #1 +++ vext.8 q2, q3, q1, #1 +++ bne 1b +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ ++-- ++2.5.0 ++ ++ ++From 1898d052a73370166d57e17cc7c52b7275887df3 Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi ++Date: Fri, 19 Dec 2014 09:44:10 +0200 ++Subject: [PATCH 4/9] Improved SAO band NEON opimizations made SAO buffer 16 ++ byte aligned added alignment hints to loads and stores optimized register ++ usage in SAO band neon assembly ++ ++--- ++ libavcodec/arm/hevcdsp_sao_neon.S | 212 +++++++++++++++----------------------- ++ 1 file changed, 82 insertions(+), 130 deletions(-) ++ ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++index 4687012..ac21013 100644 ++--- a/libavcodec/arm/hevcdsp_sao_neon.S +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -22,120 +22,84 @@ ++ #include "neon.S" ++ ++ function ff_hevc_sao_band_w8_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // offset_table ++- vpush {d8-d15} ++- vld1.8 {q0, q1}, [r5] // offset table +++ ldr r12, [sp, #4] // offset_table address +++ vld1.8 {q0, q1}, [r12] // offset table +++ ldr r12, [sp, #0] // height ++ ++-1: subs r4, #1 ++- vld1.8 {d24}, [r1], r3 +++1: subs r12, #1 +++ vld1.8 {d24}, [r1,:64], r3 ++ vshr.u8 d16, d24, #3 ++ vtbl.8 d16, {q0, q1}, d16 ++- vmovl.s8 q2, d16 ++ vmovl.u8 q6, d24 ++- vadd.s16 q2, q6 +++ vaddw.s8 q6, d16 ++ vqmovun.s16 d4, q2 ++- vst1.8 {d4}, [r0], r2 +++ vst1.8 {d4}, [r0,:64], r2 ++ bne 1b ++ ++- vpop {d8-d15} ++- pop {r4-r8} ++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_band_w16_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // offset_table ++- vpush {d8-d15} ++- vld1.8 {q0, q1}, [r5] // offset table ++- ++-1: subs r4, #1 ++- vld1.8 {q12}, [r1], r3 +++ ldr r12, [sp, #4] // offset_table address +++ vld1.8 {q0, q1}, [r12] // offset table +++ ldr r12, [sp, #0] // height ++ +++1: subs r12, #1 +++ vld1.8 {q12}, [r1,:128], r3 ++ vshr.u8 q8, q12, #3 ++- ++ vtbl.8 d16, {q0, q1}, d16 ++ vtbl.8 d17, {q0, q1}, d17 ++- ++- vmovl.s8 q2, d16 ++- vmovl.s8 q3, d17 ++- ++- vmovl.u8 q6, d24 ++- vmovl.u8 q7, d25 ++- ++- vadd.s16 q2, q6 ++- vadd.s16 q3, q7 ++- ++- vqmovun.s16 d4, q2 ++- vqmovun.s16 d5, q3 ++- ++- vstm.8 r0, {q2} ++- add r0, r2 +++ vmovl.u8 q10, d24 +++ vmovl.u8 q11, d25 +++ vaddw.s8 q10, d16 +++ vaddw.s8 q11, d17 +++ vqmovun.s16 d4, q10 +++ vqmovun.s16 d5, q11 +++ vst1.8 {q2}, [r0,:128], r2 ++ bne 1b ++ ++- vpop {d8-d15} ++- pop {r4-r8} ++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_band_w32_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // offset_table ++- vpush {d8-d15} ++- vld1.8 {q0, q1}, [r5] // offset table ++- ++-1: subs r4, #1 ++- vld1.8 {q12-q13}, [r1], r3 ++- ++- vshr.u8 q8, q12, #3 ++- vshr.u8 q9, q13, #3 ++- ++- vtbl.8 d16, {q0, q1}, d16 ++- vtbl.8 d17, {q0, q1}, d17 ++- vtbl.8 d18, {q0, q1}, d18 ++- vtbl.8 d19, {q0, q1}, d19 ++- ++- vmovl.s8 q2, d16 ++- vmovl.s8 q3, d17 // q8 free ++- vmovl.s8 q4, d18 ++- vmovl.s8 q5, d19 // q9 free ++- ++- vmovl.u8 q6, d24 ++- vmovl.u8 q7, d25 // q12 free ++- vmovl.u8 q8, d26 ++- vmovl.u8 q9, d27 // q13 free ++- ++- vadd.s16 q2, q6 ++- vadd.s16 q3, q7 ++- vadd.s16 q4, q8 ++- vadd.s16 q5, q9 ++- ++- vqmovun.s16 d4, q2 ++- vqmovun.s16 d5, q3 ++- vqmovun.s16 d6, q4 // q4 free ++- vqmovun.s16 d7, q5 // q5 free ++- ++- vst1.8 {q2-q3}, [r0], r2 ++- bne 1b ++- ++- vpop {d8-d15} ++- pop {r4-r8} ++- bx lr +++ ldr r12, [sp, #4] // offset_table address +++ vld1.8 {q0, q1}, [r12] // offset table +++ ldr r12, [sp, #0] // height +++ +++1: subs r12, #1 +++ vld1.8 {q2-q3}, [r1,:128], r3 +++ vshr.u8 q8, q2, #3 +++ vshr.u8 q9, q3, #3 +++ vtbl.8 d16, {q0, q1}, d16 +++ vtbl.8 d17, {q0, q1}, d17 +++ vtbl.8 d18, {q0, q1}, d18 +++ vtbl.8 d19, {q0, q1}, d19 +++ vmovl.u8 q12, d4 +++ vmovl.u8 q13, d5 +++ vmovl.u8 q14, d6 +++ vmovl.u8 q15, d7 +++ vaddw.s8 q12, d16 +++ vaddw.s8 q13, d17 +++ vaddw.s8 q14, d18 +++ vaddw.s8 q15, d19 +++ vqmovun.s16 d4, q12 +++ vqmovun.s16 d5, q13 +++ vqmovun.s16 d6, q14 +++ vqmovun.s16 d7, q15 +++ vst1.8 {q2-q3}, [r0,:128], r2 +++ bne 1b +++ +++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_band_w64_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // offset_table ++- vpush {d8-d15} ++- vld1.8 {q0, q1}, [r5] // offset table +++ ldr r12, [sp, #4] // offset_table address +++ vld1.8 {q0, q1}, [r12] // offset table +++ ldr r12, [sp, #0] // height ++ ++-1: subs r4, #1 ++- vld1.8 {q12-q13}, [r1]! ++- vld1.8 {q14-q15}, [r1], r3 +++1: subs r12, #1 +++ vld1.8 {q12-q13}, [r1,:128]! +++ vld1.8 {q14-q15}, [r1,:128], r3 ++ sub r1, #32 ++ ++ vshr.u8 q8, q12, #3 ++@@ -152,53 +116,41 @@ function ff_hevc_sao_band_w64_neon_8, export=1 ++ vtbl.8 d22, {q0, q1}, d22 ++ vtbl.8 d23, {q0, q1}, d23 ++ ++- vmovl.s8 q2, d16 ++- vmovl.s8 q3, d17 // q8 free ++- vmovl.s8 q4, d18 ++- vmovl.s8 q5, d19 // q9 free +++ vmovl.u8 q2, d24 +++ vmovl.u8 q3, d25 +++ vmovl.u8 q12, d26 +++ vmovl.u8 q13, d27 ++ ++- vmovl.u8 q6, d24 ++- vmovl.u8 q7, d25 // q12 free ++- vmovl.u8 q8, d26 ++- vmovl.u8 q9, d27 // q13 free ++- ++- vadd.s16 q2, q6 ++- vadd.s16 q3, q7 ++- vadd.s16 q4, q8 ++- vadd.s16 q5, q9 +++ vaddw.s8 q2, d16 +++ vaddw.s8 q3, d17 +++ vaddw.s8 q12, d18 +++ vaddw.s8 q13, d19 ++ ++ vqmovun.s16 d4, q2 ++ vqmovun.s16 d5, q3 ++- vqmovun.s16 d6, q4 // q4 free ++- vqmovun.s16 d7, q5 // q5 free ++- ++- // free q4 -q9, q12 - q13 ++- vmovl.s8 q4, d20 ++- vmovl.s8 q5, d21 // q10 free ++- vmovl.s8 q6, d22 ++- vmovl.s8 q7, d23 // q11 free ++- ++- vmovl.u8 q8, d28 ++- vmovl.u8 q9, d29 // q14 free ++- vmovl.u8 q10, d30 ++- vmovl.u8 q11, d31 // q15 free ++- ++- vadd.s16 q4, q8 ++- vadd.s16 q5, q9 ++- vadd.s16 q6, q10 ++- vadd.s16 q7, q11 ++- ++- vqmovun.s16 d8, q4 ++- vqmovun.s16 d9, q5 ++- vqmovun.s16 d10, q6 ++- vqmovun.s16 d11, q7 ++- ++- vstm.8 r0, {q2-q5} ++- add r0, r2 +++ vqmovun.s16 d6, q12 +++ vqmovun.s16 d7, q13 +++ +++ vmovl.u8 q12, d28 +++ vmovl.u8 q13, d29 +++ vmovl.u8 q14, d30 +++ vmovl.u8 q15, d31 +++ +++ vaddw.s8 q12, d20 +++ vaddw.s8 q13, d21 +++ vaddw.s8 q14, d22 +++ vaddw.s8 q15, d23 +++ +++ vqmovun.s16 d8, q12 +++ vqmovun.s16 d9, q13 +++ vqmovun.s16 d10, q14 +++ vqmovun.s16 d11, q15 +++ +++ vst1.8 {q2-q3}, [r0,:128]! +++ vst1.8 {q4-q5}, [r0,:128], r2 +++ sub r0, #32 ++ bne 1b ++ ++- vpop {d8-d15} ++- pop {r4-r8} ++ bx lr ++ endfunc ++ ++-- ++2.5.0 ++ ++ ++From 26bd536800db2f50ff6a021e1fda0d0394d1ea01 Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi ++Date: Mon, 29 Dec 2014 15:00:49 +0200 ++Subject: [PATCH 5/9] better code reuse in NEON SAO band ++ ++--- ++ libavcodec/arm/hevcdsp_init_neon.c | 16 ++-- ++ libavcodec/arm/hevcdsp_sao_neon.S | 155 +++++++++++++------------------------ ++ 2 files changed, 61 insertions(+), 110 deletions(-) ++ ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index c32940e..6379810 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -45,10 +45,10 @@ void ff_hevc_transform_add_16x16_neon_8(uint8_t *_dst, int16_t *coeffs, ++ void ff_hevc_transform_add_32x32_neon_8(uint8_t *_dst, int16_t *coeffs, ++ ptrdiff_t stride); ++ ++-void ff_hevc_sao_band_w8_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); ++-void ff_hevc_sao_band_w16_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); ++-void ff_hevc_sao_band_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); ++-void ff_hevc_sao_band_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); +++void ff_hevc_sao_band_w8_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int8_t * offset_table, int height); +++void ff_hevc_sao_band_w16_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int8_t * offset_table, int height); +++void ff_hevc_sao_band_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int8_t * offset_table, int height); +++void ff_hevc_sao_band_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int8_t * offset_table, int height); ++ ++ void ff_hevc_sao_edge_eo0_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); ++ void ff_hevc_sao_edge_eo1_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); ++@@ -185,16 +185,16 @@ static void ff_hevc_sao_band_neon_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_ ++ ++ switch(width){ ++ case 8: ++- ff_hevc_sao_band_w8_neon_8(_dst, _src, stride_dst, stride_src, height, offset_table); +++ ff_hevc_sao_band_w8_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); ++ break; ++ case 16: ++- ff_hevc_sao_band_w16_neon_8(_dst, _src, stride_dst, stride_src, height, offset_table); +++ ff_hevc_sao_band_w16_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); ++ break; ++ case 32: ++- ff_hevc_sao_band_w32_neon_8(_dst, _src, stride_dst, stride_src, height, offset_table); +++ ff_hevc_sao_band_w32_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); ++ break; ++ case 64: ++- ff_hevc_sao_band_w64_neon_8(_dst, _src, stride_dst, stride_src, height, offset_table); +++ ff_hevc_sao_band_w64_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); ++ break; ++ default: ++ for (y = 0; y < height; y++) { ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++index ac21013..8852550 100644 ++--- a/libavcodec/arm/hevcdsp_sao_neon.S +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -21,53 +21,13 @@ ++ #include "libavutil/arm/asm.S" ++ #include "neon.S" ++ ++-function ff_hevc_sao_band_w8_neon_8, export=1 ++- ldr r12, [sp, #4] // offset_table address +++.macro init_sao_band +++ ldr r12, [sp, #0] // offset_table address ++ vld1.8 {q0, q1}, [r12] // offset table ++- ldr r12, [sp, #0] // height ++- ++-1: subs r12, #1 ++- vld1.8 {d24}, [r1,:64], r3 ++- vshr.u8 d16, d24, #3 ++- vtbl.8 d16, {q0, q1}, d16 ++- vmovl.u8 q6, d24 ++- vaddw.s8 q6, d16 ++- vqmovun.s16 d4, q2 ++- vst1.8 {d4}, [r0,:64], r2 ++- bne 1b ++- ++- bx lr ++-endfunc ++- ++-function ff_hevc_sao_band_w16_neon_8, export=1 ++- ldr r12, [sp, #4] // offset_table address ++- vld1.8 {q0, q1}, [r12] // offset table ++- ldr r12, [sp, #0] // height ++- ++-1: subs r12, #1 ++- vld1.8 {q12}, [r1,:128], r3 ++- vshr.u8 q8, q12, #3 ++- vtbl.8 d16, {q0, q1}, d16 ++- vtbl.8 d17, {q0, q1}, d17 ++- vmovl.u8 q10, d24 ++- vmovl.u8 q11, d25 ++- vaddw.s8 q10, d16 ++- vaddw.s8 q11, d17 ++- vqmovun.s16 d4, q10 ++- vqmovun.s16 d5, q11 ++- vst1.8 {q2}, [r0,:128], r2 ++- bne 1b ++- ++- bx lr ++-endfunc ++- ++-function ff_hevc_sao_band_w32_neon_8, export=1 ++- ldr r12, [sp, #4] // offset_table address ++- vld1.8 {q0, q1}, [r12] // offset table ++- ldr r12, [sp, #0] // height +++ ldr r12, [sp, #4] // height +++.endm ++ ++-1: subs r12, #1 ++- vld1.8 {q2-q3}, [r1,:128], r3 +++.macro sao_band_32 ++ vshr.u8 q8, q2, #3 ++ vshr.u8 q9, q3, #3 ++ vtbl.8 d16, {q0, q1}, d16 ++@@ -86,6 +46,43 @@ function ff_hevc_sao_band_w32_neon_8, export=1 ++ vqmovun.s16 d5, q13 ++ vqmovun.s16 d6, q14 ++ vqmovun.s16 d7, q15 +++.endm +++ +++function ff_hevc_sao_band_w8_neon_8, export=1 +++ init_sao_band +++1: subs r12, #4 +++ vld1.8 {d4}, [r1,:64], r3 +++ vld1.8 {d5}, [r1,:64], r3 +++ vld1.8 {d6}, [r1,:64], r3 +++ vld1.8 {d7}, [r1,:64], r3 +++ sao_band_32 +++ vst1.8 {d4}, [r0,:64], r2 +++ vst1.8 {d5}, [r0,:64], r2 +++ vst1.8 {d6}, [r0,:64], r2 +++ vst1.8 {d7}, [r0,:64], r2 +++ bne 1b +++ +++ bx lr +++endfunc +++ +++function ff_hevc_sao_band_w16_neon_8, export=1 +++ init_sao_band +++1: subs r12, #2 +++ vld1.8 {q2}, [r1,:128], r3 +++ vld1.8 {q3}, [r1,:128], r3 +++ sao_band_32 +++ vst1.8 {q2}, [r0,:128], r2 +++ vst1.8 {q3}, [r0,:128], r2 +++ bne 1b +++ +++ bx lr +++endfunc +++ +++function ff_hevc_sao_band_w32_neon_8, export=1 +++ init_sao_band +++1: subs r12, #1 +++ vld1.8 {q2-q3}, [r1,:128], r3 +++ sao_band_32 ++ vst1.8 {q2-q3}, [r0,:128], r2 ++ bne 1b ++ ++@@ -93,63 +90,17 @@ function ff_hevc_sao_band_w32_neon_8, export=1 ++ endfunc ++ ++ function ff_hevc_sao_band_w64_neon_8, export=1 ++- ldr r12, [sp, #4] // offset_table address ++- vld1.8 {q0, q1}, [r12] // offset table ++- ldr r12, [sp, #0] // height ++- ++-1: subs r12, #1 ++- vld1.8 {q12-q13}, [r1,:128]! ++- vld1.8 {q14-q15}, [r1,:128], r3 ++- sub r1, #32 ++- ++- vshr.u8 q8, q12, #3 ++- vshr.u8 q9, q13, #3 ++- vshr.u8 q10, q14, #3 ++- vshr.u8 q11, q15, #3 ++- ++- vtbl.8 d16, {q0, q1}, d16 ++- vtbl.8 d17, {q0, q1}, d17 ++- vtbl.8 d18, {q0, q1}, d18 ++- vtbl.8 d19, {q0, q1}, d19 ++- vtbl.8 d20, {q0, q1}, d20 ++- vtbl.8 d21, {q0, q1}, d21 ++- vtbl.8 d22, {q0, q1}, d22 ++- vtbl.8 d23, {q0, q1}, d23 ++- ++- vmovl.u8 q2, d24 ++- vmovl.u8 q3, d25 ++- vmovl.u8 q12, d26 ++- vmovl.u8 q13, d27 ++- ++- vaddw.s8 q2, d16 ++- vaddw.s8 q3, d17 ++- vaddw.s8 q12, d18 ++- vaddw.s8 q13, d19 ++- ++- vqmovun.s16 d4, q2 ++- vqmovun.s16 d5, q3 ++- vqmovun.s16 d6, q12 ++- vqmovun.s16 d7, q13 ++- ++- vmovl.u8 q12, d28 ++- vmovl.u8 q13, d29 ++- vmovl.u8 q14, d30 ++- vmovl.u8 q15, d31 ++- ++- vaddw.s8 q12, d20 ++- vaddw.s8 q13, d21 ++- vaddw.s8 q14, d22 ++- vaddw.s8 q15, d23 ++- ++- vqmovun.s16 d8, q12 ++- vqmovun.s16 d9, q13 ++- vqmovun.s16 d10, q14 ++- vqmovun.s16 d11, q15 ++- ++- vst1.8 {q2-q3}, [r0,:128]! ++- vst1.8 {q4-q5}, [r0,:128], r2 ++- sub r0, #32 ++- bne 1b +++ init_sao_band +++1: subs r12, #1 +++ vld1.8 {q2-q3}, [r1,:128]! +++ sao_band_32 +++ vst1.8 {q2-q3}, [r0,:128]! +++ vld1.8 {q2-q3}, [r1,:128], r3 +++ sub r1, #32 +++ sao_band_32 +++ vst1.8 {q2-q3}, [r0,:128], r2 +++ sub r0, #32 +++ bne 1b ++ ++ bx lr ++ endfunc ++-- ++2.5.0 ++ ++ ++From f93646a97bc885b81759e774d04be3781916a3e7 Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi ++Date: Wed, 7 Jan 2015 15:27:38 +0200 ++Subject: [PATCH 6/9] More SAO NEON optimizations Now uses only 8 bit integers ++ for SAO calculations ++ ++--- ++ libavcodec/arm/hevcdsp_init_neon.c | 7 +- ++ libavcodec/arm/hevcdsp_sao_neon.S | 664 +++++++++++++++---------------------- ++ 2 files changed, 272 insertions(+), 399 deletions(-) ++ ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index 6379810..8d6e863 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -225,7 +225,7 @@ static void ff_hevc_sao_edge_neon_wrapper(uint8_t *_dst /* align 16 */, uint8_t ++ int x, y; ++ ++ for (x = 0; x < 5; x++) { ++- sao_offset_val[x] = _sao_offset_val[x]; +++ sao_offset_val[x] = _sao_offset_val[edge_idx[x]]; ++ } ++ ++ stride_src /= sizeof(pixel); ++@@ -271,8 +271,9 @@ static void ff_hevc_sao_edge_neon_wrapper(uint8_t *_dst /* align 16 */, uint8_t ++ for (x = 0; x < width; x++) { ++ int diff0 = CMP(src[x], src[x + a_stride]); ++ int diff1 = CMP(src[x], src[x + b_stride]); ++- int offset_val = edge_idx[2 + diff0 + diff1]; ++- dst[x] = av_clip_pixel(src[x] + sao_offset_val[offset_val]); +++ int idx = diff0 + diff1; +++ if (idx) +++ dst[x] = av_clip_pixel(src[x] + sao_offset_val[idx+2]); ++ } ++ src += stride_src; ++ dst += stride_dst; ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++index 8852550..5fc482b 100644 ++--- a/libavcodec/arm/hevcdsp_sao_neon.S +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -1,5 +1,5 @@ ++ /* ++- * Copyright (c) 2014 Seppo Tomperi +++ * Copyright (c) 2014 - 2015 Seppo Tomperi ++ * ++ * This file is part of FFmpeg. ++ * ++@@ -23,6 +23,7 @@ ++ ++ .macro init_sao_band ++ ldr r12, [sp, #0] // offset_table address +++ pld [r1] ++ vld1.8 {q0, q1}, [r12] // offset table ++ ldr r12, [sp, #4] // height ++ .endm ++@@ -30,36 +31,31 @@ ++ .macro sao_band_32 ++ vshr.u8 q8, q2, #3 ++ vshr.u8 q9, q3, #3 +++ vmov.u8 q14, #128 ++ vtbl.8 d16, {q0, q1}, d16 ++ vtbl.8 d17, {q0, q1}, d17 ++ vtbl.8 d18, {q0, q1}, d18 ++ vtbl.8 d19, {q0, q1}, d19 ++- vmovl.u8 q12, d4 ++- vmovl.u8 q13, d5 ++- vmovl.u8 q14, d6 ++- vmovl.u8 q15, d7 ++- vaddw.s8 q12, d16 ++- vaddw.s8 q13, d17 ++- vaddw.s8 q14, d18 ++- vaddw.s8 q15, d19 ++- vqmovun.s16 d4, q12 ++- vqmovun.s16 d5, q13 ++- vqmovun.s16 d6, q14 ++- vqmovun.s16 d7, q15 +++ vadd.s8 q2, q14 +++ vadd.s8 q3, q14 +++ vqadd.s8 q2, q8 +++ vqadd.s8 q3, q9 +++ vsub.s8 q2, q14 +++ vsub.s8 q3, q14 ++ .endm ++ ++ function ff_hevc_sao_band_w8_neon_8, export=1 ++ init_sao_band ++ 1: subs r12, #4 ++- vld1.8 {d4}, [r1,:64], r3 ++- vld1.8 {d5}, [r1,:64], r3 ++- vld1.8 {d6}, [r1,:64], r3 ++- vld1.8 {d7}, [r1,:64], r3 +++ vld1.8 {d4}, [r1, :64], r3 +++ vld1.8 {d5}, [r1, :64], r3 +++ vld1.8 {d6}, [r1, :64], r3 +++ vld1.8 {d7}, [r1, :64], r3 ++ sao_band_32 ++- vst1.8 {d4}, [r0,:64], r2 ++- vst1.8 {d5}, [r0,:64], r2 ++- vst1.8 {d6}, [r0,:64], r2 ++- vst1.8 {d7}, [r0,:64], r2 +++ vst1.8 {d4}, [r0, :64], r2 +++ vst1.8 {d5}, [r0, :64], r2 +++ vst1.8 {d6}, [r0, :64], r2 +++ vst1.8 {d7}, [r0, :64], r2 ++ bne 1b ++ ++ bx lr ++@@ -68,11 +64,11 @@ endfunc ++ function ff_hevc_sao_band_w16_neon_8, export=1 ++ init_sao_band ++ 1: subs r12, #2 ++- vld1.8 {q2}, [r1,:128], r3 ++- vld1.8 {q3}, [r1,:128], r3 +++ vld1.8 {q2}, [r1, :128], r3 +++ vld1.8 {q3}, [r1, :128], r3 ++ sao_band_32 ++- vst1.8 {q2}, [r0,:128], r2 ++- vst1.8 {q3}, [r0,:128], r2 +++ vst1.8 {q2}, [r0, :128], r2 +++ vst1.8 {q3}, [r0, :128], r2 ++ bne 1b ++ ++ bx lr ++@@ -81,9 +77,9 @@ endfunc ++ function ff_hevc_sao_band_w32_neon_8, export=1 ++ init_sao_band ++ 1: subs r12, #1 ++- vld1.8 {q2-q3}, [r1,:128], r3 +++ vld1.8 {q2-q3}, [r1, :128], r3 ++ sao_band_32 ++- vst1.8 {q2-q3}, [r0,:128], r2 +++ vst1.8 {q2-q3}, [r0, :128], r2 ++ bne 1b ++ ++ bx lr ++@@ -92,263 +88,153 @@ endfunc ++ function ff_hevc_sao_band_w64_neon_8, export=1 ++ init_sao_band ++ 1: subs r12, #1 ++- vld1.8 {q2-q3}, [r1,:128]! +++ pld [r1, r3] +++ vld1.8 {q2-q3}, [r1, :128]! ++ sao_band_32 ++- vst1.8 {q2-q3}, [r0,:128]! ++- vld1.8 {q2-q3}, [r1,:128], r3 +++ vst1.8 {q2-q3}, [r0, :128]! +++ vld1.8 {q2-q3}, [r1, :128], r3 ++ sub r1, #32 ++ sao_band_32 ++- vst1.8 {q2-q3}, [r0,:128], r2 +++ vst1.8 {q2-q3}, [r0, :128], r2 ++ sub r0, #32 ++ bne 1b ++ ++ bx lr ++ endfunc ++- +++// input +++// a in q0 - q3 +++// c in q4 - q7 +++// b in q8 - q11 +++// offset table in r7 and r5 +++// output in q0 - q3 +++// clobbers q12 - q15 ++ .macro edge_w64_body ++- vcgt.u8 q12, q4, q0 // c > a -> -1 , otherwise 0 ++- vcgt.u8 q0, q0, q4 // a > c -> -1 , otherwise 0 ++- vcgt.u8 q13, q5, q1 ++- vcgt.u8 q1, q1, q5 ++- vcgt.u8 q14, q6, q2 ++- vcgt.u8 q2, q2, q6 ++- vcgt.u8 q15, q7, q3 ++- vcgt.u8 q3, q3, q7 ++- ++- vsub.s8 q12, q0, q12 // diff0 ++- vsub.s8 q13, q1, q13 ++- vsub.s8 q14, q2, q14 ++- vsub.s8 q15, q3, q15 ++- +++ vcgt.u8 q12, q4, q0 // c > a -> -1 , otherwise 0 +++ vcgt.u8 q0, q0, q4 // a > c -> -1 , otherwise 0 +++ vcgt.u8 q13, q5, q1 +++ vcgt.u8 q1, q1, q5 +++ vsub.s8 q12, q0, q12 // diff0 ++ vcgt.u8 q0, q4, q8 // c > b ++- vcgt.u8 q8, q8, q4 // b > c +++ vsub.s8 q13, q1, q13 +++ +++ vcgt.u8 q14, q8, q4 // b > c ++ vcgt.u8 q1, q5, q9 ++- vcgt.u8 q9, q9, q5 ++- vcgt.u8 q2, q6, q10 ++- vcgt.u8 q10, q10, q6 ++- vcgt.u8 q3, q7, q11 ++- vcgt.u8 q11, q11, q7 +++ vcgt.u8 q15, q9, q5 +++ vsub.s8 q0, q14, q0 // diff1 ++ ++- vsub.s8 q0, q8, q0 // diff1 ++- vsub.s8 q1, q9, q1 ++- vsub.s8 q2, q10, q2 ++- vsub.s8 q3, q11, q3 +++ vsub.s8 q1, q15, q1 ++ ++- vadd.s8 q0, q12 //diff0 + diff1 ++- vadd.s8 q1, q13 ++- vadd.s8 q2, q14 ++- vadd.s8 q3, q15 ++- ++- vdup.s8 q9, r6 // 3 to all elements ++- sub r6, #1 ++- ++- vclt.s8 q12, q0, #0 // diff0 + diff1 < 0 ++- vclt.s8 q13, q1, #0 ++- vclt.s8 q14, q2, #0 ++- vclt.s8 q15, q3, #0 ++- ++- vadd.s8 q8, q0, q9 // diff0 + diff1 + 3 ++- vadd.s8 q10, q1, q9 ++- vand.8 q12, q8, q12 // if (diff0 + diff1 < 0) then (diff0 + diff1 + 3) else 0 ++- vand.8 q13, q10, q13 ++- vadd.s8 q8, q2, q9 ++- vadd.s8 q10, q3, q9 ++- vand.8 q14, q8, q14 ++- vand.8 q15, q10, q15 ++- ++- vdup.s8 q9, r6 // 2 to all elements ++- add r6, #1 ++- ++- vcgt.s8 q10, q0, #0 // diff0 + diff1 > 0 ++- vadd.s8 q8, q0, q9 // diff0 + diff1 + 2 ++- vand.8 q11, q8, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 ++- vcgt.s8 q10, q1, #0 ++- vadd.s8 q0, q11, q12 // offset_idx ++- ++- vadd.s8 q8, q1, q9 // diff0 + diff1 + 2 ++- vcgt.s8 q12, q2, #0 ++- vand.8 q11, q8, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 ++- vadd.s8 q8, q2, q9 // diff0 + diff1 + 2 ++- vadd.s8 q1, q11, q13 ++- ++- vand.8 q11, q8, q12 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 ++- vcgt.s8 q10, q3, #0 ++- vadd.s8 q2, q11, q14 ++- ++- vadd.s8 q8, q3, q9 // diff0 + diff1 + 2 ++- vmov.32 d18[0], r7 // load offset table from general registers ++- vand.8 q11, q8, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 ++- vmov.32 d18[1], r5 // load rest of offset table ++- vadd.s8 q3, q11, q15 ++- ++- vtbl.8 d0, {d18}, d0 ++- vtbl.8 d1, {d18}, d1 ++- vtbl.8 d2, {d18}, d2 ++- vtbl.8 d3, {d18}, d3 ++- vtbl.8 d4, {d18}, d4 ++- vtbl.8 d5, {d18}, d5 ++- vtbl.8 d6, {d18}, d6 ++- vtbl.8 d7, {d18}, d7 ++- ++- vmovl.u8 q8, d8 ++- vmovl.u8 q9, d9 ++- vmovl.u8 q10, d10 ++- vmovl.u8 q11, d11 ++- vmovl.u8 q12, d12 ++- vmovl.u8 q13, d13 ++- vmovl.u8 q14, d14 ++- vmovl.u8 q15, d15 ++- ++- vaddw.s8 q8, d0 ++- vaddw.s8 q9, d1 ++- vaddw.s8 q10, d2 ++- vaddw.s8 q11, d3 ++- vaddw.s8 q12, d4 ++- vaddw.s8 q13, d5 ++- vaddw.s8 q14, d6 ++- vaddw.s8 q15, d7 ++- ++- vqmovun.s16 d0, q8 ++- vqmovun.s16 d1, q9 ++- vqmovun.s16 d2, q10 ++- vqmovun.s16 d3, q11 ++- vqmovun.s16 d4, q12 ++- vqmovun.s16 d5, q13 ++- vqmovun.s16 d6, q14 ++- vqmovun.s16 d7, q15 ++- ++- vstm r0, {q0-q3} ++- add r0, r2 ++-.endm +++ vadd.s8 q0, q12 //diff0 + diff1 +++ vadd.s8 q1, q13 ++ ++-.macro edge_w32_body ++- vcgt.u8 q12, q4, q0 // c > a -> -1 , otherwise 0 ++- vcgt.u8 q0, q0, q4 // a > c -> -1 , otherwise 0 ++- vcgt.u8 q13, q5, q1 ++- vcgt.u8 q1, q1, q5 +++ vcgt.u8 q14, q6, q2 +++ vcgt.u8 q2, q2, q6 +++ vcgt.u8 q15, q7, q3 +++ vcgt.u8 q3, q3, q7 ++ ++- vsub.s8 q12, q0, q12 // diff0 ++- vcgt.u8 q0, q4, q8 // c > b ++- vsub.s8 q13, q1, q13 // diff0 part 2 +++ vsub.s8 q14, q2, q14 +++ vcgt.u8 q2, q6, q10 +++ vsub.s8 q15, q3, q15 ++ ++- vcgt.u8 q6, q8, q4 // b > c ++- vcgt.u8 q1, q5, q9 ++- vcgt.u8 q7, q9, q5 +++ vcgt.u8 q12, q10, q6 +++ vcgt.u8 q3, q7, q11 +++ vcgt.u8 q13, q11, q7 +++ vsub.s8 q2, q12, q2 +++ vsub.s8 q3, q13, q3 ++ ++- vsub.s8 q0, q6, q0 // diff1 ++- vsub.s8 q1, q7, q1 // diff1 part 2 ++- vadd.s8 q0, q12 //diff0 + diff1 +++ vmov.s8 q13, #2 // 2 to all elements ++ ++- vdup.s8 q7, r6 // 3 to all elements ++- sub r6, #1 ++- vadd.s8 q1, q13 +++ vadd.s8 q2, q14 +++ vadd.s8 q3, q15 +++ +++ vmov.32 d24[0], r4 // load offset table from general registers +++ vmov.32 d24[1], r5 // load rest of offset table ++ ++- vclt.s8 q12, q0, #0 // diff0 + diff1 < 0 ++- vclt.s8 q13, q1, #0 ++- ++- vadd.s8 q6, q0, q7 // diff0 + diff1 + 3 ++- vadd.s8 q10, q1, q7 ++- vdup.s8 q7, r6 // 2 to all elements ++- add r6, #1 ++- vand.8 q12, q6, q12 // if (diff0 + diff1 < 0) then (diff0 + diff1 + 3) else 0 ++- vand.8 q13, q10, q13 ++- ++- ++- vcgt.s8 q10, q0, #0 // diff0 + diff1 > 0 ++- vadd.s8 q6, q0, q7 // diff0 + diff1 + 2 ++- vand.8 q11, q6, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 ++- vcgt.s8 q10, q1, #0 ++- vadd.s8 q0, q11, q12 // offset_idx ++- ++- vadd.s8 q6, q1, q7 // diff0 + diff1 + 2 ++- vmov.32 d14[0], r7 // load offset table from general registers ++- vand.8 q11, q6, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 ++- vmov.32 d14[1], r5 // load rest of offset table ++- vadd.s8 q1, q11, q13 ++- ++- vtbl.8 d0, {d14}, d0 ++- vtbl.8 d1, {d14}, d1 ++- vtbl.8 d2, {d14}, d2 ++- vtbl.8 d3, {d14}, d3 ++- ++- vmovl.u8 q6, d8 ++- vmovl.u8 q7, d9 ++- vmovl.u8 q10, d10 ++- vmovl.u8 q11, d11 ++- ++- vaddw.s8 q6, d0 ++- vaddw.s8 q7, d1 ++- vaddw.s8 q10, d2 ++- vaddw.s8 q11, d3 ++- ++- vqmovun.s16 d0, q6 ++- vqmovun.s16 d1, q7 ++- vqmovun.s16 d2, q10 ++- vqmovun.s16 d3, q11 ++- ++- vstm r0, {q0-q1} ++- add r0, r2 +++ vadd.s8 q0, q13 +++ vadd.s8 q1, q13 +++ vadd.s8 q2, q13 +++ vadd.s8 q3, q13 +++ +++ vmov.u8 q15, #128 // s8 #-128 +++ vtbl.8 d0, {d24}, d0 +++ vtbl.8 d1, {d24}, d1 +++ vtbl.8 d2, {d24}, d2 +++ vtbl.8 d3, {d24}, d3 +++ vtbl.8 d4, {d24}, d4 +++ vtbl.8 d5, {d24}, d5 +++ vtbl.8 d6, {d24}, d6 +++ vtbl.8 d7, {d24}, d7 +++ +++ vadd.s8 q12, q4, q15 +++ vadd.s8 q13, q5, q15 +++ vadd.s8 q14, q6, q15 +++ vadd.s8 q15, q7, q15 +++ vqadd.s8 q12, q0 +++ vqadd.s8 q15, q3 +++ vmov.u8 q3, #128 // s8 #-128 +++ vqadd.s8 q13, q1 +++ vqadd.s8 q14, q2 +++ vsub.s8 q0, q12, q3 +++ vsub.s8 q1, q13, q3 +++ vsub.s8 q2, q14, q3 +++ vsub.s8 q3, q15, q3 +++ vst1.8 {q0-q1}, [r0, :128]! +++ vst1.8 {q2-q3}, [r0, :128], r2 +++ sub r0, #32 ++ .endm ++ ++-function ff_hevc_sao_edge_eo0_w64_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x03 ++- ldr r7, [r5] +++.macro init_edge_64 +++ push {r4-r5} +++ ldr r12, [sp, #8] // height +++ ldr r5, [sp, #12] // sao_offset_val_table +++ ldr r4, [r5] ++ add r5, #4 ++ ldr r5, [r5] +++.endm +++ +++function ff_hevc_sao_edge_eo0_w64_neon_8, export=1 +++ init_edge_64 ++ vpush {d8-d15} ++ sub r1, #8 ++-1: subs r4, #1 ++- vld1.64 {q10-q11}, [r1]! ++- vld1.64 {q12-q13}, [r1]! ++- vld1.64 {q14}, [r1], r3 ++- sub r1, #64 +++1: subs r12, #1 +++ vld1.64 {d7}, [r1, :64]! +++ vld1.64 {q4-q5}, [r1, :128]! // load c +++ vld1.64 {q6-q7}, [r1, :128]! +++ vld1.64 {d24}, [r1, :64], r3 +++ sub r1, #72 ++ // load a ++- vext.8 q0, q10, q11, #7 ++- vext.8 q1, q11, q12, #7 ++- vext.8 q2, q12, q13, #7 ++- vext.8 q3, q13, q14, #7 ++- // load c ++- vext.8 q4, q10, q11, #8 ++- vext.8 q5, q11, q12, #8 ++- vext.8 q6, q12, q13, #8 ++- vext.8 q7, q13, q14, #8 +++ vext.8 q0, q3, q4, #15 +++ vext.8 q1, q4, q5, #15 +++ vext.8 q2, q5, q6, #15 +++ vext.8 q3, q6, q7, #15 ++ // load b ++- vext.8 q8, q10, q11, #9 ++- vext.8 q9, q11, q12, #9 ++- vext.8 q10, q12, q13, #9 ++- vext.8 q11, q13, q14, #9 +++ vext.8 q8, q4, q5, #1 +++ vext.8 q9, q5, q6, #1 +++ vext.8 q10, q6, q7, #1 +++ vext.8 q11, q7, q12, #1 ++ edge_w64_body ++ bne 1b ++ vpop {d8-d15} ++- pop {r4-r8} +++ pop {r4-r5} ++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_edge_eo1_w64_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x03 ++- ldr r7, [r5] ++- add r5, #4 ++- ldr r5, [r5] +++ init_edge_64 ++ vpush {d8-d15} ++ sub r1, r3 ++ // load a ++- vld1.8 {q0-q1}, [r1]! ++- vld1.8 {q2-q3}, [r1], r3 +++ vld1.8 {q0-q1}, [r1, :128]! +++ vld1.8 {q2-q3}, [r1, :128], r3 ++ sub r1, #32 ++-1: subs r4, #1 ++ // load c ++- vld1.8 {q4-q5}, [r1]! ++- vld1.8 {q6-q7}, [r1], r3 +++ vld1.8 {q4-q5}, [r1, :128]! +++ vld1.8 {q6-q7}, [r1, :128], r3 ++ sub r1, #32 +++1: subs r12, #1 ++ // load b ++- vld1.8 {q8-q9}, [r1]! ++- vld1.8 {q10-q11}, [r1] +++ vld1.8 {q8-q9}, [r1, :128]! +++ vld1.8 {q10-q11}, [r1, :128], r3 ++ sub r1, #32 ++ edge_w64_body ++ // copy c to a ++@@ -356,20 +242,19 @@ function ff_hevc_sao_edge_eo1_w64_neon_8, export=1 ++ vmov.64 q1, q5 ++ vmov.64 q2, q6 ++ vmov.64 q3, q7 +++ // copy b to c +++ vmov.64 q4, q8 +++ vmov.64 q5, q9 +++ vmov.64 q6, q10 +++ vmov.64 q7, q11 ++ bne 1b ++ vpop {d8-d15} ++- pop {r4-r8} +++ pop {r4-r5} ++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_edge_eo2_w64_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x03 ++- ldr r7, [r5] ++- add r5, #4 ++- ldr r5, [r5] +++ init_edge_64 ++ vpush {d8-d15} ++ 1: sub r1, r3 ++ // load a ++@@ -379,10 +264,10 @@ function ff_hevc_sao_edge_eo2_w64_neon_8, export=1 ++ vld1.8 {q0-q1}, [r1]! ++ vld1.8 {q2-q3}, [r1], r3 ++ sub r1, #31 ++- subs r4, #1 +++ subs r12, #1 ++ // load c ++- vld1.8 {q4-q5}, [r1]! ++- vld1.8 {q6-q7}, [r1], r3 +++ vld1.8 {q4-q5}, [r1, :128]! +++ vld1.8 {q6-q7}, [r1, :128], r3 ++ sub r1, #32 ++ // load b ++ add r1, #1 ++@@ -390,25 +275,14 @@ function ff_hevc_sao_edge_eo2_w64_neon_8, export=1 ++ vld1.8 {q10-q11}, [r1] ++ sub r1, #33 ++ edge_w64_body ++- // copy c to a ++- vmov.64 q0, q4 ++- vmov.64 q1, q5 ++- vmov.64 q2, q6 ++- vmov.64 q3, q7 ++ bne 1b ++ vpop {d8-d15} ++- pop {r4-r8} +++ pop {r4-r5} ++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_edge_eo3_w64_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x03 ++- ldr r7, [r5] ++- add r5, #4 ++- ldr r5, [r5] +++ init_edge_64 ++ vpush {d8-d15} ++ 1: sub r1, r3 ++ // load a ++@@ -418,10 +292,10 @@ function ff_hevc_sao_edge_eo3_w64_neon_8, export=1 ++ vld1.8 {q0-q1}, [r1]! ++ vld1.8 {q2-q3}, [r1], r3 ++ sub r1, #33 ++- subs r4, #1 +++ subs r12, #1 ++ // load c ++- vld1.8 {q4-q5}, [r1]! ++- vld1.8 {q6-q7}, [r1], r3 +++ vld1.8 {q4-q5}, [r1, :128]! +++ vld1.8 {q6-q7}, [r1, :128], r3 ++ sub r1, #32 ++ // load b ++ sub r1, #1 ++@@ -429,178 +303,176 @@ function ff_hevc_sao_edge_eo3_w64_neon_8, export=1 ++ vld1.8 {q10-q11}, [r1] ++ sub r1, #31 ++ edge_w64_body ++- // copy c to a ++- vmov.64 q0, q4 ++- vmov.64 q1, q5 ++- vmov.64 q2, q6 ++- vmov.64 q3, q7 ++ bne 1b ++ vpop {d8-d15} ++- pop {r4-r8} +++ pop {r4-r5} ++ bx lr ++ endfunc ++ +++// inputs: +++// a in q0, q1 +++// c in q2, q3 +++// b in q8, q9 +++// offset table in d31 +++// clobbered registers q0, q1, q10, q11, q12, q13 +++// output q0, q1 +++.macro edge_w32_body +++ vcgt.u8 q12, q2, q0 // c > a -> -1 , otherwise 0 +++ vcgt.u8 q0, q0, q2 // a > c -> -1 , otherwise 0 +++ vcgt.u8 q13, q3, q1 +++ vcgt.u8 q1, q1, q3 +++ +++ vsub.s8 q12, q0, q12 // diff0 +++ vcgt.u8 q0, q2, q8 // c > b +++ vsub.s8 q13, q1, q13 // diff0 part 2 +++ +++ vcgt.u8 q10, q8, q2 // b > c +++ vcgt.u8 q1, q3, q9 +++ vcgt.u8 q11, q9, q3 +++ +++ vsub.s8 q0, q10, q0 // diff1 +++ +++ vmov.s8 q10, #2 // 2 to all elements +++ vsub.s8 q1, q11, q1 // diff1 part 2 +++ vadd.s8 q0, q12 //diff0 + diff1 +++ vadd.s8 q1, q13 +++ +++ vadd.s8 q0, q10 +++ vadd.s8 q1, q10 +++ +++ vmov.u8 q10, #128 +++ vtbl.8 d0, {d31}, d0 +++ vtbl.8 d1, {d31}, d1 +++ vtbl.8 d2, {d31}, d2 +++ vtbl.8 d3, {d31}, d3 +++ +++ vadd.s8 q11, q2, q10 +++ vadd.s8 q12, q3, q10 +++ vqadd.s8 q11, q0 +++ vqadd.s8 q12, q1 +++ vsub.s8 q0, q11, q10 +++ vsub.s8 q1, q12, q10 +++ vst1.8 {q0-q1}, [r0, :128], r2 +++.endm +++ +++.macro init_edge_32 +++ ldr r12, [sp, #4] // sao_offset_val_table +++ vld1.32 {d31}, [r12] +++ ldr r12, [sp] // height +++.endm +++ ++ function ff_hevc_sao_edge_eo0_w32_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x03 ++- ldr r7, [r5] ++- add r5, #4 ++- ldr r5, [r5] ++- vpush {d8-d15} ++- sub r1, #8 // load 8 extra bytes ++-1: subs r4, #1 ++- vld1.8 {q10-q11}, [r1] ++- add r1, #32 ++- vld1.8 {q12}, [r1], r3 // only first 9 bytes are used ++- sub r1, #32 +++ init_edge_32 +++ sub r1, #4 // load 4 extra bytes +++1: subs r12, #1 +++ vld1.32 d3[1], [r1]! +++ vld1.8 {q2-q3}, [r1, :128]! // c +++ vld1.32 d20[0], [r1], r3 +++ sub r1, #36 ++ // a ++- vext.8 q0, q10, q11, #7 ++- vext.8 q1, q11, q12, #7 ++- // c ++- vext.8 q4, q10, q11, #8 ++- vext.8 q5, q11, q12, #8 +++ vext.8 q0, q1, q2, #15 +++ vext.8 q1, q2, q3, #15 ++ // b ++- vext.8 q8, q10, q11, #9 ++- vext.8 q9, q11, q12, #9 +++ vext.8 q8, q2, q3, #1 +++ vext.8 q9, q3, q10, #1 ++ edge_w32_body ++- bne 1b ++- vpop {d8-d15} ++- pop {r4-r8} ++- bx lr +++ bne 1b +++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_edge_eo1_w32_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x03 ++- ldr r7, [r5] ++- add r5, #4 ++- ldr r5, [r5] ++- vpush {d8-d15} +++ init_edge_32 ++ // load a ++ sub r1, r3 ++- vld1.8 {q0-q1}, [r1], r3 +++ vld1.8 {q0-q1}, [r1, :128], r3 ++ // load c ++- vld1.8 {q4-q5}, [r1], r3 ++-1: subs r4, #1 +++ vld1.8 {q2-q3}, [r1, :128], r3 +++1: subs r12, #1 ++ // load b ++- vld1.8 {q8-q9}, [r1], r3 +++ vld1.8 {q8-q9}, [r1, :128], r3 ++ edge_w32_body ++ // inputs for next loop iteration ++ // a ++- vmov.64 q0, q4 ++- vmov.64 q1, q5 +++ vmov.64 q0, q2 +++ vmov.64 q1, q3 ++ // c ++- vmov.64 q4, q8 ++- vmov.64 q5, q9 ++- bne 1b ++- vpop {d8-d15} ++- pop {r4-r8} ++- bx lr +++ vmov.64 q2, q8 +++ vmov.64 q3, q9 +++ bne 1b +++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_edge_eo2_w32_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x03 ++- ldr r7, [r5] ++- add r5, #4 ++- ldr r5, [r5] ++- vpush {d8-d15} +++ init_edge_32 +++ vpush {d8-d15} ++ // load a ++ sub r1, r3 ++- sub r1, #8 ++- vld1.8 {q10-q11}, [r1] ++- add r1, #32 ++- vld1.8 {q12}, [r1], r3 ++- sub r1, #32 +++ sub r1, #8 +++ vld1.8 {q10-q11}, [r1, :64]! +++ vld1.8 {d24}, [r1, :64], r3 +++ sub r1, #32 ++ vext.8 q0, q10, q11, #7 ++ vext.8 q1, q11, q12, #7 ++ // load c ++- vld1.8 {q10-q11}, [r1] ++- add r1, #32 ++- vld1.8 {q12}, [r1], r3 ++- sub r1, #32 ++- vext.8 q4, q10, q11, #8 ++- vext.8 q5, q11, q12, #8 ++- vext.8 q2, q10, q11, #7 ++-1: subs r4, #1 +++ vld1.8 {d9}, [r1, :64]! +++ vld1.8 {q2-q3}, [r1, :64], r3 +++ sub r1, #8 +++ vext.8 q4, q4, q2, #15 +++1: subs r12, #1 ++ // load b ++- vld1.8 {q10-q11}, [r1] ++- add r1, #32 ++- vld1.8 {q12}, [r1], r3 ++- sub r1, #32 +++ vld1.8 {q10-q11}, [r1, :64]! +++ vld1.8 {q12}, [r1, :64], r3 +++ sub r1, #32 ++ vext.8 q8, q10, q11, #9 ++ vext.8 q9, q11, q12, #9 ++- vext.8 q14, q10, q11, #8 ++- vext.8 q15, q11, q12, #8 ++- vext.8 q3, q10, q11, #7 +++ vext.8 q6, q10, q11, #8 +++ vext.8 q7, q11, q12, #8 +++ vext.8 q5, q10, q11, #7 ++ edge_w32_body ++ // inputs for next loop iteration ++ // a ++- vmov.8 q0, q2 ++- vext.8 q1, q4, q5, #15 +++ vmov.8 q0, q4 +++ vext.8 q1, q2, q3, #15 ++ // c ++- vmov.8 q4, q14 ++- vmov.8 q5, q15 ++- vmov.8 q2, q3 ++- bne 1b ++- vpop {d8-d15} ++- pop {r4-r8} ++- bx lr +++ vmov.8 q2, q6 +++ vmov.8 q3, q7 +++ vmov.8 q4, q5 +++ bne 1b +++ vpop {d8-d15} +++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_edge_eo3_w32_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x03 ++- ldr r7, [r5] ++- add r5, #4 ++- sub r1, r3 ++- ldr r5, [r5] ++- sub r1, #8 ++- vpush {d8-d15} +++ init_edge_32 +++ sub r1, r3 ++ // load a ++- vld1.8 {q10-q11}, [r1] ++- add r1, #32 ++- vld1.8 {q12}, [r1], r3 ++- sub r1, #32 ++- vext.8 q0, q10, q11, #9 ++- vext.8 q1, q11, q12, #9 +++ vld1.8 {q10-q11}, [r1, :64]! +++ vld1.8 {d24}, [r1, :64], r3 +++ sub r1, #32 +++ vext.8 q0, q10, q11, #1 +++ vext.8 q1, q11, q12, #1 ++ // load c ++- vld1.8 {q10-q11}, [r1] ++- add r1, #32 ++- vld1.8 {q12}, [r1], r3 ++- sub r1, #32 ++- vext.8 q4, q10, q11, #8 ++- vext.8 q5, q11, q12, #8 ++- vext.8 q2, q12, q11, #8 ++-1: subs r4, #1 +++ vld1.8 {q2-q3}, [r1, :64]! +++ vld1.8 {d30}, [r1, :64], r3 +++ sub r1, #40 +++1: subs r12, #1 ++ // load b ++- vld1.8 {q10-q11}, [r1] ++- add r1, #32 ++- vld1.8 {q12}, [r1], r3 ++- sub r1, #32 +++ vld1.8 {q10-q11}, [r1, :64]! +++ vld1.8 {q12}, [r1, :64], r3 +++ sub r1, #32 ++ vext.8 q8, q10, q11, #7 ++ vext.8 q9, q11, q12, #7 ++- vext.8 q3, q12, q10, #7 +++ vext.8 q14, q12, q10, #7 ++ edge_w32_body ++ // inputs for next loop iteration ++ // a ++- vext.8 q0, q4, q5, #1 ++- vext.8 q1, q5, q2, #1 +++ vext.8 q0, q2, q3, #1 +++ vext.8 q1, q3, q15, #1 ++ // c ++- vext.8 q4, q8, q9, #1 ++- vext.8 q5, q9, q3, #1 ++- vext.8 q2, q3, q1, #1 ++- bne 1b ++- vpop {d8-d15} ++- pop {r4-r8} ++- bx lr +++ vext.8 q2, q8, q9, #1 +++ vext.8 q3, q9, q14, #1 +++ vext.8 d30, d28, d2, #1 +++ bne 1b +++ bx lr ++ endfunc ++ ++-- ++2.5.0 ++ ++ ++From 016c39d46b86830204a4519590332d2a38f7ee51 Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi ++Date: Thu, 8 Jan 2015 09:58:55 +0200 ++Subject: [PATCH 7/9] small optimization to SAO BAND. correct path for ++ bit_depth_template.c ++ ++--- ++ libavcodec/arm/hevcdsp_init_neon.c | 2 +- ++ libavcodec/arm/hevcdsp_sao_neon.S | 2 +- ++ 2 files changed, 2 insertions(+), 2 deletions(-) ++ ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index 8d6e863..385c35d 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -23,7 +23,7 @@ ++ #include "libavcodec/hevcdsp.h" ++ #include "hevcdsp_arm.h" ++ #include "libavcodec/avcodec.h" ++-#include "../bit_depth_template.c" +++#include "libavcodec/bit_depth_template.c" ++ ++ void ff_hevc_v_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q); ++ void ff_hevc_h_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q); ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++index 5fc482b..710b32b 100644 ++--- a/libavcodec/arm/hevcdsp_sao_neon.S +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -26,12 +26,12 @@ ++ pld [r1] ++ vld1.8 {q0, q1}, [r12] // offset table ++ ldr r12, [sp, #4] // height +++ vmov.u8 q14, #128 ++ .endm ++ ++ .macro sao_band_32 ++ vshr.u8 q8, q2, #3 ++ vshr.u8 q9, q3, #3 ++- vmov.u8 q14, #128 ++ vtbl.8 d16, {q0, q1}, d16 ++ vtbl.8 d17, {q0, q1}, d17 ++ vtbl.8 d18, {q0, q1}, d18 ++-- ++2.5.0 ++ ++ ++From 579f1584d688e1ac24fb7d22697e2a7b64f62e8e Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi ++Date: Fri, 9 Jan 2015 10:28:52 +0200 ++Subject: [PATCH 8/9] Added height check for SAO NEON optimizations. Faster SAO ++ band NEON Some reordering to use NEON pipelines more efficiently ++ ++--- ++ libavcodec/arm/hevcdsp_init_neon.c | 12 +++- ++ libavcodec/arm/hevcdsp_sao_neon.S | 142 ++++++++++++++++++++++--------------- ++ 2 files changed, 93 insertions(+), 61 deletions(-) ++ ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index 385c35d..6d0689c 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -176,6 +176,7 @@ static void ff_hevc_sao_band_neon_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_ ++ int8_t offset_table[32] = { 0 }; ++ int k, y, x; ++ int shift = 3; // BIT_DEPTH - 5 +++ int cwidth = 0; ++ ++ stride_src /= sizeof(pixel); ++ stride_dst /= sizeof(pixel); ++@@ -183,7 +184,10 @@ static void ff_hevc_sao_band_neon_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_ ++ for (k = 0; k < 4; k++) ++ offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1]; ++ ++- switch(width){ +++ if (height % 8 == 0) +++ cwidth = width; +++ +++ switch(cwidth){ ++ case 8: ++ ff_hevc_sao_band_w8_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); ++ break; ++@@ -223,15 +227,19 @@ static void ff_hevc_sao_edge_neon_wrapper(uint8_t *_dst /* align 16 */, uint8_t ++ pixel *src = (pixel *)_src; ++ int a_stride, b_stride; ++ int x, y; +++ int cwidth = 0; ++ ++ for (x = 0; x < 5; x++) { ++ sao_offset_val[x] = _sao_offset_val[edge_idx[x]]; ++ } ++ +++ if (height % 8 == 0) +++ cwidth = width; +++ ++ stride_src /= sizeof(pixel); ++ stride_dst /= sizeof(pixel); ++ ++- switch (width) { +++ switch (cwidth) { ++ case 32: ++ switch(eo) { ++ case 0: ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++index 710b32b..08f50b8 100644 ++--- a/libavcodec/arm/hevcdsp_sao_neon.S +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -26,36 +26,59 @@ ++ pld [r1] ++ vld1.8 {q0, q1}, [r12] // offset table ++ ldr r12, [sp, #4] // height ++- vmov.u8 q14, #128 +++ vmov.u8 q3, #128 ++ .endm ++ ++-.macro sao_band_32 ++- vshr.u8 q8, q2, #3 ++- vshr.u8 q9, q3, #3 ++- vtbl.8 d16, {q0, q1}, d16 ++- vtbl.8 d17, {q0, q1}, d17 ++- vtbl.8 d18, {q0, q1}, d18 ++- vtbl.8 d19, {q0, q1}, d19 ++- vadd.s8 q2, q14 ++- vadd.s8 q3, q14 ++- vqadd.s8 q2, q8 ++- vqadd.s8 q3, q9 ++- vsub.s8 q2, q14 ++- vsub.s8 q3, q14 +++// 128 in q3 +++// input q8 - q11 +++// 32 cycles +++.macro sao_band_64 +++ vshr.u8 q12, q8, #3 +++ vshr.u8 q13, q9, #3 +++ vshr.u8 q14, q10, #3 +++ vshr.u8 q15, q11, #3 +++ vtbl.8 d24, {d0, d1, d2, d3}, d24 +++ vadd.s8 q8, q3 +++ vtbl.8 d25, {d0, d1, d2, d3}, d25 +++ vadd.s8 q9, q3 +++ vtbl.8 d26, {d0, d1, d2, d3}, d26 +++ vadd.s8 q10, q3 +++ vtbl.8 d27, {d0, d1, d2, d3}, d27 +++ vadd.s8 q11, q3 +++ vtbl.8 d28, {d0, d1, d2, d3}, d28 +++ vqadd.s8 q8, q12 +++ vtbl.8 d29, {d0, d1, d2, d3}, d29 +++ vqadd.s8 q9, q13 +++ vtbl.8 d30, {d0, d1, d2, d3}, d30 +++ vqadd.s8 q10, q14 +++ vtbl.8 d31, {d0, d1, d2, d3}, d31 +++ vqadd.s8 q11, q15 +++ vsub.s8 q8, q3 +++ vsub.s8 q9, q3 +++ vsub.s8 q10, q3 +++ vsub.s8 q11, q3 ++ .endm ++ ++ function ff_hevc_sao_band_w8_neon_8, export=1 ++ init_sao_band ++-1: subs r12, #4 ++- vld1.8 {d4}, [r1, :64], r3 ++- vld1.8 {d5}, [r1, :64], r3 ++- vld1.8 {d6}, [r1, :64], r3 ++- vld1.8 {d7}, [r1, :64], r3 ++- sao_band_32 ++- vst1.8 {d4}, [r0, :64], r2 ++- vst1.8 {d5}, [r0, :64], r2 ++- vst1.8 {d6}, [r0, :64], r2 ++- vst1.8 {d7}, [r0, :64], r2 +++1: subs r12, #8 +++ vld1.8 {d16}, [r1, :64], r3 +++ vld1.8 {d17}, [r1, :64], r3 +++ vld1.8 {d18}, [r1, :64], r3 +++ vld1.8 {d19}, [r1, :64], r3 +++ vld1.8 {d20}, [r1, :64], r3 +++ vld1.8 {d21}, [r1, :64], r3 +++ vld1.8 {d22}, [r1, :64], r3 +++ vld1.8 {d23}, [r1, :64], r3 +++ sao_band_64 +++ vst1.8 {d16}, [r0, :64], r2 +++ vst1.8 {d17}, [r0, :64], r2 +++ vst1.8 {d18}, [r0, :64], r2 +++ vst1.8 {d19}, [r0, :64], r2 +++ vst1.8 {d20}, [r0, :64], r2 +++ vst1.8 {d21}, [r0, :64], r2 +++ vst1.8 {d22}, [r0, :64], r2 +++ vst1.8 {d23}, [r0, :64], r2 ++ bne 1b ++ ++ bx lr ++@@ -63,12 +86,16 @@ endfunc ++ ++ function ff_hevc_sao_band_w16_neon_8, export=1 ++ init_sao_band ++-1: subs r12, #2 ++- vld1.8 {q2}, [r1, :128], r3 ++- vld1.8 {q3}, [r1, :128], r3 ++- sao_band_32 ++- vst1.8 {q2}, [r0, :128], r2 ++- vst1.8 {q3}, [r0, :128], r2 +++1: subs r12, #4 +++ vld1.8 {q8}, [r1, :128], r3 +++ vld1.8 {q9}, [r1, :128], r3 +++ vld1.8 {q10}, [r1, :128], r3 +++ vld1.8 {q11}, [r1, :128], r3 +++ sao_band_64 +++ vst1.8 {q8}, [r0, :128], r2 +++ vst1.8 {q9}, [r0, :128], r2 +++ vst1.8 {q10}, [r0, :128], r2 +++ vst1.8 {q11}, [r0, :128], r2 ++ bne 1b ++ ++ bx lr ++@@ -76,10 +103,12 @@ endfunc ++ ++ function ff_hevc_sao_band_w32_neon_8, export=1 ++ init_sao_band ++-1: subs r12, #1 ++- vld1.8 {q2-q3}, [r1, :128], r3 ++- sao_band_32 ++- vst1.8 {q2-q3}, [r0, :128], r2 +++1: subs r12, #2 +++ vld1.8 {q8-q9}, [r1, :128], r3 +++ vld1.8 {q10-q11}, [r1, :128], r3 +++ sao_band_64 +++ vst1.8 {q8-q9}, [r0, :128], r2 +++ vst1.8 {q10-q11}, [r0, :128], r2 ++ bne 1b ++ ++ bx lr ++@@ -89,13 +118,12 @@ function ff_hevc_sao_band_w64_neon_8, export=1 ++ init_sao_band ++ 1: subs r12, #1 ++ pld [r1, r3] ++- vld1.8 {q2-q3}, [r1, :128]! ++- sao_band_32 ++- vst1.8 {q2-q3}, [r0, :128]! ++- vld1.8 {q2-q3}, [r1, :128], r3 +++ vld1.8 {q8-q9}, [r1, :128]! +++ vld1.8 {q10-q11}, [r1, :128], r3 ++ sub r1, #32 ++- sao_band_32 ++- vst1.8 {q2-q3}, [r0, :128], r2 +++ sao_band_64 +++ vst1.8 {q8-q9}, [r0, :128]! +++ vst1.8 {q10-q11}, [r0, :128], r2 ++ sub r0, #32 ++ bne 1b ++ ++@@ -121,7 +149,6 @@ endfunc ++ vcgt.u8 q1, q5, q9 ++ vcgt.u8 q15, q9, q5 ++ vsub.s8 q0, q14, q0 // diff1 ++- ++ vsub.s8 q1, q15, q1 ++ ++ vadd.s8 q0, q12 //diff0 + diff1 ++@@ -157,27 +184,25 @@ endfunc ++ ++ vmov.u8 q15, #128 // s8 #-128 ++ vtbl.8 d0, {d24}, d0 +++ vadd.s8 q13, q4, q15 ++ vtbl.8 d1, {d24}, d1 +++ vadd.s8 q14, q5, q15 ++ vtbl.8 d2, {d24}, d2 +++ vqadd.s8 q0, q13 ++ vtbl.8 d3, {d24}, d3 +++ vqadd.s8 q1, q14 ++ vtbl.8 d4, {d24}, d4 +++ vadd.s8 q13, q6, q15 ++ vtbl.8 d5, {d24}, d5 +++ vadd.s8 q14, q7, q15 ++ vtbl.8 d6, {d24}, d6 +++ vqadd.s8 q2, q13 ++ vtbl.8 d7, {d24}, d7 ++- ++- vadd.s8 q12, q4, q15 ++- vadd.s8 q13, q5, q15 ++- vadd.s8 q14, q6, q15 ++- vadd.s8 q15, q7, q15 ++- vqadd.s8 q12, q0 ++- vqadd.s8 q15, q3 ++- vmov.u8 q3, #128 // s8 #-128 ++- vqadd.s8 q13, q1 ++- vqadd.s8 q14, q2 ++- vsub.s8 q0, q12, q3 ++- vsub.s8 q1, q13, q3 ++- vsub.s8 q2, q14, q3 ++- vsub.s8 q3, q15, q3 +++ vqadd.s8 q3, q14 +++ vsub.s8 q0, q15 +++ vsub.s8 q1, q15 +++ vsub.s8 q2, q15 +++ vsub.s8 q3, q15 ++ vst1.8 {q0-q1}, [r0, :128]! ++ vst1.8 {q2-q3}, [r0, :128], r2 ++ sub r0, #32 ++@@ -342,13 +367,12 @@ endfunc ++ ++ vmov.u8 q10, #128 ++ vtbl.8 d0, {d31}, d0 +++ vadd.s8 q11, q2, q10 ++ vtbl.8 d1, {d31}, d1 +++ vadd.s8 q12, q3, q10 ++ vtbl.8 d2, {d31}, d2 +++ vqadd.s8 q11, q0 ++ vtbl.8 d3, {d31}, d3 ++- ++- vadd.s8 q11, q2, q10 ++- vadd.s8 q12, q3, q10 ++- vqadd.s8 q11, q0 ++ vqadd.s8 q12, q1 ++ vsub.s8 q0, q11, q10 ++ vsub.s8 q1, q12, q10 ++-- ++2.5.0 ++ ++ ++From 026bac1824e4936e948e6b1efec82868c520ea66 Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi ++Date: Mon, 2 Feb 2015 16:08:27 +0200 ++Subject: [PATCH 9/9] Further SAO NEON optimisations ++ ++--- ++ libavcodec/arm/hevcdsp_init_neon.c | 16 +-- ++ libavcodec/arm/hevcdsp_sao_neon.S | 224 +++++++++++++++++++------------------ ++ 2 files changed, 124 insertions(+), 116 deletions(-) ++ ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index 6d0689c..e5da7e9 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -45,10 +45,10 @@ void ff_hevc_transform_add_16x16_neon_8(uint8_t *_dst, int16_t *coeffs, ++ void ff_hevc_transform_add_32x32_neon_8(uint8_t *_dst, int16_t *coeffs, ++ ptrdiff_t stride); ++ ++-void ff_hevc_sao_band_w8_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int8_t * offset_table, int height); ++-void ff_hevc_sao_band_w16_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int8_t * offset_table, int height); ++-void ff_hevc_sao_band_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int8_t * offset_table, int height); ++-void ff_hevc_sao_band_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int8_t * offset_table, int height); +++void ff_hevc_sao_band_w8_neon_8(uint8_t *_dst, uint8_t *_src, int8_t * offset_table, ptrdiff_t stride_src, ptrdiff_t stride_dst, int height); +++void ff_hevc_sao_band_w16_neon_8(uint8_t *_dst, uint8_t *_src, int8_t * offset_table, ptrdiff_t stride_src, ptrdiff_t stride_dst, int height); +++void ff_hevc_sao_band_w32_neon_8(uint8_t *_dst, uint8_t *_src, int8_t * offset_table, ptrdiff_t stride_src, ptrdiff_t stride_dst, int height); +++void ff_hevc_sao_band_w64_neon_8(uint8_t *_dst, uint8_t *_src, int8_t * offset_table, ptrdiff_t stride_src, ptrdiff_t stride_dst, int height); ++ ++ void ff_hevc_sao_edge_eo0_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); ++ void ff_hevc_sao_edge_eo1_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); ++@@ -189,16 +189,16 @@ static void ff_hevc_sao_band_neon_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_ ++ ++ switch(cwidth){ ++ case 8: ++- ff_hevc_sao_band_w8_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); +++ ff_hevc_sao_band_w8_neon_8(_dst, _src, offset_table, stride_src, stride_dst, height); ++ break; ++ case 16: ++- ff_hevc_sao_band_w16_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); +++ ff_hevc_sao_band_w16_neon_8(_dst, _src, offset_table, stride_src, stride_dst, height); ++ break; ++ case 32: ++- ff_hevc_sao_band_w32_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); +++ ff_hevc_sao_band_w32_neon_8(_dst, _src, offset_table, stride_src, stride_dst, height); ++ break; ++ case 64: ++- ff_hevc_sao_band_w64_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); +++ ff_hevc_sao_band_w64_neon_8(_dst, _src, offset_table, stride_src, stride_dst, height); ++ break; ++ default: ++ for (y = 0; y < height; y++) { ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++index 08f50b8..9c7808d 100644 ++--- a/libavcodec/arm/hevcdsp_sao_neon.S +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -22,21 +22,16 @@ ++ #include "neon.S" ++ ++ .macro init_sao_band ++- ldr r12, [sp, #0] // offset_table address ++ pld [r1] ++- vld1.8 {q0, q1}, [r12] // offset table ++- ldr r12, [sp, #4] // height +++ vld1.8 {q0, q1}, [r2] // offset table +++ ldr r2, [sp, #0] // stride_dst +++ ldr r12, [sp, #4] // height ++ vmov.u8 q3, #128 ++ .endm ++ ++ // 128 in q3 ++ // input q8 - q11 ++-// 32 cycles ++ .macro sao_band_64 ++- vshr.u8 q12, q8, #3 ++- vshr.u8 q13, q9, #3 ++- vshr.u8 q14, q10, #3 ++- vshr.u8 q15, q11, #3 ++ vtbl.8 d24, {d0, d1, d2, d3}, d24 ++ vadd.s8 q8, q3 ++ vtbl.8 d25, {d0, d1, d2, d3}, d25 ++@@ -52,8 +47,8 @@ ++ vtbl.8 d30, {d0, d1, d2, d3}, d30 ++ vqadd.s8 q10, q14 ++ vtbl.8 d31, {d0, d1, d2, d3}, d31 ++- vqadd.s8 q11, q15 ++ vsub.s8 q8, q3 +++ vqadd.s8 q11, q15 ++ vsub.s8 q9, q3 ++ vsub.s8 q10, q3 ++ vsub.s8 q11, q3 ++@@ -64,12 +59,16 @@ function ff_hevc_sao_band_w8_neon_8, export=1 ++ 1: subs r12, #8 ++ vld1.8 {d16}, [r1, :64], r3 ++ vld1.8 {d17}, [r1, :64], r3 +++ vshr.u8 q12, q8, #3 ++ vld1.8 {d18}, [r1, :64], r3 ++ vld1.8 {d19}, [r1, :64], r3 +++ vshr.u8 q13, q9, #3 ++ vld1.8 {d20}, [r1, :64], r3 ++ vld1.8 {d21}, [r1, :64], r3 +++ vshr.u8 q14, q10, #3 ++ vld1.8 {d22}, [r1, :64], r3 ++ vld1.8 {d23}, [r1, :64], r3 +++ vshr.u8 q15, q11, #3 ++ sao_band_64 ++ vst1.8 {d16}, [r0, :64], r2 ++ vst1.8 {d17}, [r0, :64], r2 ++@@ -88,9 +87,13 @@ function ff_hevc_sao_band_w16_neon_8, export=1 ++ init_sao_band ++ 1: subs r12, #4 ++ vld1.8 {q8}, [r1, :128], r3 +++ vshr.u8 q12, q8, #3 ++ vld1.8 {q9}, [r1, :128], r3 +++ vshr.u8 q13, q9, #3 ++ vld1.8 {q10}, [r1, :128], r3 +++ vshr.u8 q14, q10, #3 ++ vld1.8 {q11}, [r1, :128], r3 +++ vshr.u8 q15, q11, #3 ++ sao_band_64 ++ vst1.8 {q8}, [r0, :128], r2 ++ vst1.8 {q9}, [r0, :128], r2 ++@@ -105,7 +108,11 @@ function ff_hevc_sao_band_w32_neon_8, export=1 ++ init_sao_band ++ 1: subs r12, #2 ++ vld1.8 {q8-q9}, [r1, :128], r3 +++ vshr.u8 q12, q8, #3 +++ vshr.u8 q13, q9, #3 ++ vld1.8 {q10-q11}, [r1, :128], r3 +++ vshr.u8 q14, q10, #3 +++ vshr.u8 q15, q11, #3 ++ sao_band_64 ++ vst1.8 {q8-q9}, [r0, :128], r2 ++ vst1.8 {q10-q11}, [r0, :128], r2 ++@@ -119,7 +126,11 @@ function ff_hevc_sao_band_w64_neon_8, export=1 ++ 1: subs r12, #1 ++ pld [r1, r3] ++ vld1.8 {q8-q9}, [r1, :128]! +++ vshr.u8 q12, q8, #3 +++ vshr.u8 q13, q9, #3 ++ vld1.8 {q10-q11}, [r1, :128], r3 +++ vshr.u8 q14, q10, #3 +++ vshr.u8 q15, q11, #3 ++ sub r1, #32 ++ sao_band_64 ++ vst1.8 {q8-q9}, [r0, :128]! ++@@ -129,51 +140,18 @@ function ff_hevc_sao_band_w64_neon_8, export=1 ++ ++ bx lr ++ endfunc ++-// input ++-// a in q0 - q3 ++-// c in q4 - q7 ++-// b in q8 - q11 ++-// offset table in r7 and r5 ++-// output in q0 - q3 ++-// clobbers q12 - q15 ++-.macro edge_w64_body ++- vcgt.u8 q12, q4, q0 // c > a -> -1 , otherwise 0 ++- vcgt.u8 q0, q0, q4 // a > c -> -1 , otherwise 0 ++- vcgt.u8 q13, q5, q1 ++- vcgt.u8 q1, q1, q5 ++- vsub.s8 q12, q0, q12 // diff0 ++- vcgt.u8 q0, q4, q8 // c > b ++- vsub.s8 q13, q1, q13 ++- ++- vcgt.u8 q14, q8, q4 // b > c ++- vcgt.u8 q1, q5, q9 ++- vcgt.u8 q15, q9, q5 ++- vsub.s8 q0, q14, q0 // diff1 ++- vsub.s8 q1, q15, q1 ++ ++- vadd.s8 q0, q12 //diff0 + diff1 ++- vadd.s8 q1, q13 ++- ++- vcgt.u8 q14, q6, q2 ++- vcgt.u8 q2, q2, q6 ++- vcgt.u8 q15, q7, q3 ++- vcgt.u8 q3, q3, q7 ++- ++- vsub.s8 q14, q2, q14 ++- vcgt.u8 q2, q6, q10 ++- vsub.s8 q15, q3, q15 ++- ++- vcgt.u8 q12, q10, q6 ++- vcgt.u8 q3, q7, q11 ++- vcgt.u8 q13, q11, q7 ++- vsub.s8 q2, q12, q2 ++- vsub.s8 q3, q13, q3 +++.macro diff32 out0, out1, tmp0, tmp1, in0, in1, in2, in3 +++ vcgt.u8 \out0, \in2, \in0 // c > a -> -1 , otherwise 0 +++ vcgt.u8 \tmp0, \in0, \in2 // a > c -> -1 , otherwise 0 +++ vcgt.u8 \out1, \in3, \in1 // c > a -> -1 , otherwise 0 part 2 +++ vcgt.u8 \tmp1, \in1, \in3 // a > c -> -1 , otherwise 0 part 2 +++ vsub.s8 \out0, \tmp0, \out0 // diff0 +++ vsub.s8 \out1, \tmp1, \out1 // diff0 part 2 +++.endm ++ +++.macro table64 ++ vmov.s8 q13, #2 // 2 to all elements ++- ++- vadd.s8 q2, q14 ++- vadd.s8 q3, q15 ++- ++ vmov.32 d24[0], r4 // load offset table from general registers ++ vmov.32 d24[1], r5 // load rest of offset table ++ ++@@ -208,6 +186,28 @@ endfunc ++ sub r0, #32 ++ .endm ++ +++// input +++// a in q0 - q3 +++// c in q4 - q7 +++// b in q8 - q11 +++// offset table in r7 and r5 +++// output in q0 - q3 +++// clobbers q12 - q15 +++.macro edge_w64_body +++ diff32 q12, q13, q0, q1, q0, q1, q4, q5 +++ diff32 q0, q1, q14, q15, q8, q9, q4, q5 +++ +++ vadd.s8 q0, q12 //diff0 + diff1 +++ vadd.s8 q1, q13 +++ +++ diff32 q14, q15, q2, q3, q2, q3, q6, q7 +++ diff32 q2, q3, q12, q13, q10, q11, q6, q7 +++ +++ vadd.s8 q2, q14 +++ vadd.s8 q3, q15 +++ table64 +++.endm +++ ++ .macro init_edge_64 ++ push {r4-r5} ++ ldr r12, [sp, #8] // height ++@@ -334,38 +334,23 @@ function ff_hevc_sao_edge_eo3_w64_neon_8, export=1 ++ bx lr ++ endfunc ++ ++-// inputs: ++-// a in q0, q1 ++-// c in q2, q3 ++-// b in q8, q9 ++-// offset table in d31 ++-// clobbered registers q0, q1, q10, q11, q12, q13 ++-// output q0, q1 ++-.macro edge_w32_body ++- vcgt.u8 q12, q2, q0 // c > a -> -1 , otherwise 0 ++- vcgt.u8 q0, q0, q2 // a > c -> -1 , otherwise 0 ++- vcgt.u8 q13, q3, q1 ++- vcgt.u8 q1, q1, q3 ++- ++- vsub.s8 q12, q0, q12 // diff0 ++- vcgt.u8 q0, q2, q8 // c > b ++- vsub.s8 q13, q1, q13 // diff0 part 2 ++- ++- vcgt.u8 q10, q8, q2 // b > c ++- vcgt.u8 q1, q3, q9 ++- vcgt.u8 q11, q9, q3 ++- ++- vsub.s8 q0, q10, q0 // diff1 ++- ++- vmov.s8 q10, #2 // 2 to all elements ++- vsub.s8 q1, q11, q1 // diff1 part 2 ++- vadd.s8 q0, q12 //diff0 + diff1 ++- vadd.s8 q1, q13 +++.macro init_edge_32 +++ ldr r12, [sp, #4] // sao_offset_val_table +++ vld1.32 {d31}, [r12] +++ ldr r12, [sp] // height +++.endm ++ ++- vadd.s8 q0, q10 ++- vadd.s8 q1, q10 +++.macro diff out0, tmp0, in0, in1 +++ vcgt.u8 \out0, \in1, \in0 // c > a -> -1 , otherwise 0 +++ vcgt.u8 \tmp0, \in0, \in1 // a > c -> -1 , otherwise 0 +++ vsub.s8 \out0, \tmp0, \out0 // diff0 +++.endm ++ ++- vmov.u8 q10, #128 +++.macro table32 +++ vmov.s8 q10, #2 +++ vadd.s8 q0, q10 +++ vadd.s8 q1, q10 +++ vmov.s8 q10, #128 ++ vtbl.8 d0, {d31}, d0 ++ vadd.s8 q11, q2, q10 ++ vtbl.8 d1, {d31}, d1 ++@@ -373,56 +358,68 @@ endfunc ++ vtbl.8 d2, {d31}, d2 ++ vqadd.s8 q11, q0 ++ vtbl.8 d3, {d31}, d3 ++- vqadd.s8 q12, q1 ++- vsub.s8 q0, q11, q10 ++- vsub.s8 q1, q12, q10 +++ vqadd.s8 q12, q1 +++ vsub.s8 q0, q11, q10 +++ vsub.s8 q1, q12, q10 ++ vst1.8 {q0-q1}, [r0, :128], r2 ++ .endm ++ ++-.macro init_edge_32 ++- ldr r12, [sp, #4] // sao_offset_val_table ++- vld1.32 {d31}, [r12] ++- ldr r12, [sp] // height ++-.endm ++- ++ function ff_hevc_sao_edge_eo0_w32_neon_8, export=1 ++ init_edge_32 ++- sub r1, #4 // load 4 extra bytes +++ vpush {q4-q7} +++ sub r1, #4 ++ 1: subs r12, #1 ++- vld1.32 d3[1], [r1]! ++- vld1.8 {q2-q3}, [r1, :128]! // c ++- vld1.32 d20[0], [r1], r3 ++- sub r1, #36 +++ vld1.8 {q13-q14}, [r1]! +++ vld1.32 d30, [r1], r3 +++ sub r1, #32 ++ // a ++- vext.8 q0, q1, q2, #15 ++- vext.8 q1, q2, q3, #15 ++- // b ++- vext.8 q8, q2, q3, #1 ++- vext.8 q9, q3, q10, #1 ++- edge_w32_body +++ vext.8 q0, q13, q14, #3 +++ vext.8 q1, q14, q15, #3 +++ vshr.u64 d24, d30, #24 +++ // c +++ vext.8 q2, q13, q14, #4 +++ vext.8 q3, q14, q15, #4 +++ vshr.u64 d16, d30, #32 +++ // diff0 +++ diff32 q13, q14, q4, q5, q0, q1, q2, q3 +++ diff d18, d25, d24, d16 +++ // -diff1 +++ vext.s8 q0, q13, q14, #1 +++ vext.s8 q1, q14, q9, #1 +++ +++ vsub.s8 q0, q13, q0 //diff0 + diff1 +++ vsub.s8 q1, q14, q1 +++ table32 ++ bne 1b +++ vpop {q4-q7} +++ ++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_edge_eo1_w32_neon_8, export=1 ++ init_edge_32 +++ vpush {q4-q7} ++ // load a ++ sub r1, r3 ++ vld1.8 {q0-q1}, [r1, :128], r3 ++ // load c ++ vld1.8 {q2-q3}, [r1, :128], r3 +++ diff32 q12, q13, q0, q1, q0, q1, q2, q3 // CMP ( c, a ) ++ 1: subs r12, #1 ++ // load b ++ vld1.8 {q8-q9}, [r1, :128], r3 ++- edge_w32_body ++- // inputs for next loop iteration ++- // a ++- vmov.64 q0, q2 ++- vmov.64 q1, q3 +++ diff32 q4, q5, q10, q11, q8, q9, q2, q3 // CMP ( c, b ) +++ vadd.s8 q0, q4, q12 //diff0 + diff1 +++ vadd.s8 q1, q5, q13 +++ table32 +++ // CMP ( c, a ) +++ vneg.s8 q12, q4 +++ vneg.s8 q13, q5 ++ // c ++ vmov.64 q2, q8 ++ vmov.64 q3, q9 ++ bne 1b +++ vpop {q4-q7} ++ bx lr ++ endfunc ++ ++@@ -452,7 +449,11 @@ function ff_hevc_sao_edge_eo2_w32_neon_8, export=1 ++ vext.8 q6, q10, q11, #8 ++ vext.8 q7, q11, q12, #8 ++ vext.8 q5, q10, q11, #7 ++- edge_w32_body +++ diff32 q12, q13, q0, q1, q0, q1, q2, q3 +++ diff32 q0, q1, q10, q11, q8, q9, q2, q3 +++ vadd.s8 q0, q12 //diff0 + diff1 +++ vadd.s8 q1, q13 +++ table32 ++ // inputs for next loop iteration ++ // a ++ vmov.8 q0, q4 ++@@ -487,7 +488,14 @@ function ff_hevc_sao_edge_eo3_w32_neon_8, export=1 ++ vext.8 q8, q10, q11, #7 ++ vext.8 q9, q11, q12, #7 ++ vext.8 q14, q12, q10, #7 ++- edge_w32_body +++ +++ diff32 q12, q13, q0, q1, q0, q1, q2, q3 +++ diff32 q0, q1, q10, q11, q8, q9, q2, q3 +++ +++ vadd.s8 q0, q12 //diff0 + diff1 +++ vadd.s8 q1, q13 +++ table32 +++ ++ // inputs for next loop iteration ++ // a ++ vext.8 q0, q2, q3, #1 ++-- ++2.5.0 ++ +diff --git a/tools/depends/target/ffmpeg/autobuild.sh b/tools/depends/target/ffmpeg/autobuild.sh +index 9c26b239c2b2c1221bed7c4d99c46e909a4a5c5d..b9590d7b200a2ccf0fe3aa660e3b08b82d2133fc 100755 +--- a/tools/depends/target/ffmpeg/autobuild.sh ++++ b/tools/depends/target/ffmpeg/autobuild.sh +@@ -128,6 +128,9 @@ cd "ffmpeg-${VERSION}" || exit 2 + tar --strip-components=1 -xf $MYDIR/${ARCHIVE} + + patch -p1 < ../../0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch ++patch -p1 < ../../0001-Discard-data-before-VO-VOL-in-mpeg-4-over-mpegts.patch ++patch -p1 < ../../hevcdsp_ARM_NEON_optimized_epel_functions.patch ++patch -p1 < ../../added_ARM_NEON_optimized_SAO_patches.patch + + CFLAGS="$CFLAGS" CXXFLAGS="$CXXFLAGS" LDFLAGS="$LDFLAGS" \ + ./configure --prefix=$FFMPEG_PREFIX \ +diff --git a/tools/depends/target/ffmpeg/hevcdsp_ARM_NEON_optimized_epel_functions.patch b/tools/depends/target/ffmpeg/hevcdsp_ARM_NEON_optimized_epel_functions.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..5e8e07d407f045fc99554f0f061d1e818716ac62 +--- /dev/null ++++ b/tools/depends/target/ffmpeg/hevcdsp_ARM_NEON_optimized_epel_functions.patch +@@ -0,0 +1,409 @@ ++From 29c3327a0d72a7e872ff170363cfe5ed13bca5d0 Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi ++Date: Tue, 22 Dec 2015 18:10:24 +0000 ++Subject: [PATCH] hevcdsp: ARM NEON optimized epel functions ++ ++--- ++ libavcodec/arm/Makefile | 1 + ++ libavcodec/arm/hevcdsp_epel_neon.S | 334 +++++++++++++++++++++++++++++++++++++ ++ libavcodec/arm/hevcdsp_init_neon.c | 23 +++ ++ 3 files changed, 358 insertions(+) ++ create mode 100644 libavcodec/arm/hevcdsp_epel_neon.S ++ ++diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile ++index cdd35b0..6051ec8 100644 ++--- a/libavcodec/arm/Makefile +++++ b/libavcodec/arm/Makefile ++@@ -131,6 +131,7 @@ NEON-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_neon.o \ ++ arm/synth_filter_neon.o ++ NEON-OBJS-$(CONFIG_HEVC_DECODER) += arm/hevcdsp_init_neon.o \ ++ arm/hevcdsp_deblock_neon.o \ +++ arm/hevcdsp_epel_neon.o \ ++ arm/hevcdsp_idct_neon.o \ ++ arm/hevcdsp_qpel_neon.o ++ NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_neon.o ++diff --git a/libavcodec/arm/hevcdsp_epel_neon.S b/libavcodec/arm/hevcdsp_epel_neon.S ++new file mode 100644 ++index 0000000..516ae5b ++--- /dev/null +++++ b/libavcodec/arm/hevcdsp_epel_neon.S ++@@ -0,0 +1,334 @@ +++/* +++ * Copyright (c) 2014 - 2015 Seppo Tomperi +++ * +++ * This file is part of FFmpeg. +++ * +++ * FFmpeg is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * FFmpeg is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with FFmpeg; if not, write to the Free Software +++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +++ */ +++ +++#include "libavutil/arm/asm.S" +++#include "neon.S" +++ +++#define MAX_PB_SIZE #64 +++ +++.macro vextin_d4 +++ vld1.8 {q10}, [r1], r2 +++ vmov d16, d20 +++ vext.8 d17, d20, d21, #1 +++ vext.8 d18, d20, d21, #2 +++ vext.8 d19, d20, d21, #3 +++.endm +++ +++.macro vextin_d4_8 +++ vld1.8 d16, [r1], r2 +++ vext.8 d17, d16, d16, #1 +++ vext.8 d18, d16, d16, #2 +++ vext.8 d19, d16, d16, #3 +++.endm +++ +++.macro load_coeffs_16b coeffs +++ ldr \coeffs, [\coeffs] +++ vdup.i8 d0, \coeffs +++ lsr \coeffs, #8 +++ vdup.i8 d1, \coeffs +++ lsr \coeffs, #8 +++ vdup.i8 d2, \coeffs +++ lsr \coeffs, #8 +++ vdup.i8 d3, \coeffs +++.endm +++ +++.macro epel_filter_16b out=q12 +++ vmull.u8 q3, d16, d0 +++ vmull.u8 q11, d19, d3 +++ vmull.u8 \out, d17, d1 +++ vmull.u8 q10, d18, d2 +++ vadd.s16 q3, q11 +++ vadd.s16 \out, q10 +++ vsub.s16 \out, q3 +++.endm +++ +++.macro load_coeffs_32b coeffs +++ ldr \coeffs, [\coeffs] +++ vmov.i64 d4, #0 +++ vmov.8 d4[0], \coeffs +++ lsr \coeffs, #8 +++ vmov.8 d4[2], \coeffs +++ lsr \coeffs, #8 +++ vmov.8 d4[4], \coeffs +++ lsr \coeffs, #8 +++ vmov.8 d4[6], \coeffs +++.endm +++ +++.macro epel_filter_32b +++ vmull.s16 q3, d24, d4[0] //q12 +++ vmull.s16 q4, d25, d4[0] +++ vmull.s16 q5, d30, d4[3] //q15 +++ vmull.s16 q6, d31, d4[3] +++ +++ vmull.s16 q7, d26, d4[1] // q13 +++ vmull.s16 q8, d27, d4[1] +++ vmull.s16 q9, d28, d4[2] // q14 +++ vmull.s16 q10, d29, d4[2] +++ vadd.s32 q3, q5 +++ vadd.s32 q4, q6 +++ vadd.s32 q7, q9 +++ vadd.s32 q8, q10 +++ vsub.s32 q7, q3 +++ vsub.s32 q8, q4 +++ vqshrn.s32 d6, q7, #6 +++ vqshrn.s32 d7, q8, #6 +++.endm +++ +++.macro epel_filter_32b_4 +++ vmull.s16 q3, d24, d4[0] //q12 +++ vmull.s16 q5, d30, d4[3] //q15 +++ vmull.s16 q7, d26, d4[1] // q13 +++ vmull.s16 q9, d28, d4[2] // q14 +++ vadd.s32 q3, q5 +++ vadd.s32 q7, q9 +++ vsub.s32 q7, q3 +++ vqshrn.s32 d6, q7, #6 +++.endm +++ +++function ff_hevc_put_epel_h_neon_8, export=1 +++ push {r4-r7} +++ mov r4, MAX_PB_SIZE +++ ldr r7, [sp, #16] // mx +++ ldr r5, [sp, #24] // width +++ sub r7, #1 +++ lsl r7, #2 +++ vpush {d8-d15} +++ adrl r12, epel_coeffs +++ add r7, r12 +++ sub r1, #1 +++ lsl r4, #1 +++ load_coeffs_16b r7 +++ mov r12, r3 +++ mov r6, r0 +++ mov r7, r1 +++ cmp r5, #6 +++ bgt 8f +++ cmp r5, #4 +++ blt 2f +++ b 4f +++8: subs r3, #1 +++ pld [r1] +++ vextin_d4 +++ epel_filter_16b +++ vst1.16 {q12}, [r0], r4 +++ bne 8b +++ subs r5, #8 +++ beq 99f +++ mov r3, r12 +++ add r6, #16 +++ mov r0, r6 +++ add r7, #8 +++ mov r1, r7 +++ cmp r5, #4 +++ bgt 8b +++4: subs r3, #1 +++ pld [r1] +++ vextin_d4_8 +++ epel_filter_16b +++ vst1.16 d24, [r0], r4 +++ bne 4b +++ subs r5, #4 +++ beq 99f +++ mov r3, r12 +++ add r6, #8 +++ mov r0, r6 +++ add r7, #4 +++ mov r1, r7 +++2: subs r3, #1 +++ pld [r1] +++ vextin_d4_8 +++ epel_filter_16b +++ vst1.32 d24[0], [r0], r4 +++ bne 2b +++99: vpop {d8-d15} +++ pop {r4-r7} +++ bx lr +++endfunc +++ +++function ff_hevc_put_epel_v_neon_8, export=1 +++ push {r4-r7} +++ mov r4, MAX_PB_SIZE +++ ldr r7, [sp, #20] // my +++ ldr r5, [sp, #24] // width +++ sub r7, #1 +++ lsl r7, #2 +++ vpush {d8-d15} +++ adrl r12, epel_coeffs +++ add r7, r12 +++ load_coeffs_16b r7 +++ sub r1, r2 +++ lsl r4, #1 +++ mov r12, r3 +++ mov r6, r0 +++ mov r7, r1 +++0: pld [r1] +++ vld1.8 {d16}, [r1], r2 +++ pld [r1] +++ vld1.8 {d17}, [r1], r2 +++ pld [r1] +++ vld1.8 {d18}, [r1], r2 +++ cmp r5, #6 +++ bgt 8f +++ cmp r5, #4 +++ blt 2f +++ b 4f +++8: pld [r1] +++ vld1.8 {d19}, [r1], r2 +++ subs r3, #1 +++ epel_filter_16b +++ vst1.16 {q12}, [r0], r4 +++ vmov d16, d17 +++ vmov d17, d18 +++ vmov d18, d19 +++ bne 8b +++ subs r5, #8 +++ beq 99f +++ mov r3, r12 +++ add r6, #16 +++ mov r0, r6 +++ add r7, #8 +++ mov r1, r7 +++ b 0b +++4: pld [r1] +++ vld1.8 {d19}, [r1], r2 +++ subs r3, #1 +++ epel_filter_16b +++ vst1.16 d24, [r0], r4 +++ vmov d16, d17 +++ vmov d17, d18 +++ vmov d18, d19 +++ bne 4b +++ subs r5, #4 +++ beq 99f +++ mov r3, r12 +++ add r6, #8 +++ mov r0, r6 +++ add r7, #4 +++ mov r1, r7 +++ b 0b +++2: pld [r1] +++ vld1.8 {d19}, [r1], r2 +++ subs r3, #1 +++ epel_filter_16b +++ vst1.32 d24[0], [r0], r4 +++ vmov d16, d17 +++ vmov d17, d18 +++ vmov d18, d19 +++ bne 2b +++99: vpop {d8-d15} +++ pop {r4-r7} +++ bx lr +++endfunc +++ +++function ff_hevc_put_epel_hv_neon_8, export=1 +++ push {r4-r7} +++ mov r4, MAX_PB_SIZE +++ ldr r6, [sp, #16] // mx +++ ldr r7, [sp, #20] // my +++ ldr r5, [sp, #24] // width +++ sub r7, #1 +++ lsl r7, #2 +++ vpush {d8-d15} +++ adrl r12, epel_coeffs +++ sub r6, #1 +++ lsl r6, #2 +++ add r6, r12 // mx epel coeff offset +++ add r7, r12 +++ sub r1, #1 +++ sub r1, r2 +++ lsl r4, #1 +++ load_coeffs_16b r6 +++ load_coeffs_32b r7 +++ mov r12, r3 +++ mov r6, r0 +++ mov r7, r1 +++0: pld [r1] +++ vextin_d4 +++ epel_filter_16b q12 +++ pld [r1] +++ vextin_d4 +++ epel_filter_16b q13 +++ pld [r1] +++ vextin_d4 +++ epel_filter_16b q14 +++ cmp r5, #6 +++ bgt 8f +++ cmp r5, #4 +++ blt 2f +++ b 4f +++8: pld [r1] +++ vextin_d4 +++ epel_filter_16b q15 +++ subs r3, #1 +++ epel_filter_32b +++ vst1.16 {q3}, [r0], r4 +++ vmov q12, q13 +++ vmov q13, q14 +++ vmov q14, q15 +++ bne 8b +++ subs r5, #8 +++ beq 99f +++ mov r3, r12 +++ add r6, #16 +++ mov r0, r6 +++ add r7, #8 +++ mov r1, r7 +++ b 0b +++4: pld [r1] +++ vextin_d4_8 +++ epel_filter_16b q15 +++ subs r3, #1 +++ epel_filter_32b_4 +++ vst1.16 d6, [r0], r4 +++ vmov q12, q13 +++ vmov q13, q14 +++ vmov q14, q15 +++ bne 4b +++ subs r5, #4 +++ beq 99f +++ mov r3, r12 +++ add r6, #8 +++ mov r0, r6 +++ add r7, #4 +++ mov r1, r7 +++ b 0b +++2: pld [r1] +++ vextin_d4_8 +++ epel_filter_16b q15 +++ subs r3, #1 +++ epel_filter_32b_4 +++ vst1.32 d6[0], [r0], r4 +++ vmov q12, q13 +++ vmov q13, q14 +++ vmov q14, q15 +++ bne 2b +++99: vpop {d8-d15} +++ pop {r4-r7} +++ bx lr +++endfunc +++ +++epel_coeffs: +++ .byte 2, 58, 10, 2 +++ .byte 4, 54, 16, 2 +++ .byte 6, 46, 28, 4 +++ .byte 4, 36, 36, 4 +++ .byte 4, 28, 46, 6 +++ .byte 2, 16, 54, 4 +++ .byte 2, 10, 58, 2 ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index 5591807..733ff08 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -58,6 +58,15 @@ PUT_PIXELS(ff_hevc_put_pixels_w32_neon_8); ++ PUT_PIXELS(ff_hevc_put_pixels_w48_neon_8); ++ PUT_PIXELS(ff_hevc_put_pixels_w64_neon_8); ++ #undef PUT_PIXELS +++void ff_hevc_put_epel_h_neon_8(int16_t *dst, uint8_t *src, +++ ptrdiff_t srcstride, int height, +++ intptr_t mx, intptr_t my, int width); +++void ff_hevc_put_epel_v_neon_8(int16_t *dst, uint8_t *src, +++ ptrdiff_t srcstride, int height, +++ intptr_t mx, intptr_t my, int width); +++void ff_hevc_put_epel_hv_neon_8(int16_t *dst, uint8_t *src, +++ ptrdiff_t srcstride, int height, +++ intptr_t mx, intptr_t my, int width); ++ ++ static void (*put_hevc_qpel_neon[4][4])(int16_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, ++ int height, int width); ++@@ -201,7 +210,21 @@ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) ++ c->put_hevc_qpel_bi[x][1][0] = ff_hevc_put_qpel_bi_neon_wrapper; ++ c->put_hevc_qpel_bi[x][0][1] = ff_hevc_put_qpel_bi_neon_wrapper; ++ c->put_hevc_qpel_bi[x][1][1] = ff_hevc_put_qpel_bi_neon_wrapper; +++ c->put_hevc_epel[x][1][0] = ff_hevc_put_epel_v_neon_8; +++ c->put_hevc_epel[x][0][1] = ff_hevc_put_epel_h_neon_8; +++ c->put_hevc_epel[x][1][1] = ff_hevc_put_epel_hv_neon_8; ++ } +++ c->put_hevc_epel[0][0][0] = ff_hevc_put_pixels_w2_neon_8; +++ c->put_hevc_epel[1][0][0] = ff_hevc_put_pixels_w4_neon_8; +++ c->put_hevc_epel[2][0][0] = ff_hevc_put_pixels_w6_neon_8; +++ c->put_hevc_epel[3][0][0] = ff_hevc_put_pixels_w8_neon_8; +++ c->put_hevc_epel[4][0][0] = ff_hevc_put_pixels_w12_neon_8; +++ c->put_hevc_epel[5][0][0] = ff_hevc_put_pixels_w16_neon_8; +++ c->put_hevc_epel[6][0][0] = ff_hevc_put_pixels_w24_neon_8; +++ c->put_hevc_epel[7][0][0] = ff_hevc_put_pixels_w32_neon_8; +++ c->put_hevc_epel[8][0][0] = ff_hevc_put_pixels_w48_neon_8; +++ c->put_hevc_epel[9][0][0] = ff_hevc_put_pixels_w64_neon_8; +++ ++ c->put_hevc_qpel[0][0][0] = ff_hevc_put_pixels_w2_neon_8; ++ c->put_hevc_qpel[1][0][0] = ff_hevc_put_pixels_w4_neon_8; ++ c->put_hevc_qpel[2][0][0] = ff_hevc_put_pixels_w6_neon_8; ++-- ++2.5.0 ++ + +From 641013389142290475c0c053cf2cbd3a4866eae0 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Thu, 7 May 2015 14:04:18 +0100 +Subject: [PATCH 28/67] [ffmpeg] Add GPU acceleration to hevc + +--- + tools/depends/target/ffmpeg/Makefile | 4 +- + tools/depends/target/ffmpeg/autobuild.sh | 1 + + .../target/ffmpeg/pfcd_hevc_optimisations.patch | 38136 +++++++++++++++++++ + 3 files changed, 38140 insertions(+), 1 deletion(-) + create mode 100644 tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch + +diff --git a/tools/depends/target/ffmpeg/Makefile b/tools/depends/target/ffmpeg/Makefile +index d9db534dd8c59a4993a3509737d901fbb3923de8..2dc4addea504d142eb74385653584bf39b253156 100644 +--- a/tools/depends/target/ffmpeg/Makefile ++++ b/tools/depends/target/ffmpeg/Makefile +@@ -2,7 +2,8 @@ include ../../Makefile.include + include FFMPEG-VERSION + DEPS= ../../Makefile.include FFMPEG-VERSION Makefile \ + 0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch \ +- hevcdsp_ARM_NEON_optimized_epel_functions.patch added_ARM_NEON_optimized_SAO_patches.patch ++ hevcdsp_ARM_NEON_optimized_epel_functions.patch added_ARM_NEON_optimized_SAO_patches.patch \ ++ pfcd_hevc_optimisations.patch + + # set to "yes" to enable patching + # we don't apply patches until we move to a vanilla ffmpeg tarball +@@ -81,6 +82,7 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + cd $(PLATFORM); patch -p1 < ../0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch + cd $(PLATFORM); patch -p1 < ../hevcdsp_ARM_NEON_optimized_epel_functions.patch + cd $(PLATFORM); patch -p1 < ../added_ARM_NEON_optimized_SAO_patches.patch ++ cd $(PLATFORM); patch -p1 < ../pfcd_hevc_optimisations.patch + + cd $(PLATFORM);\ + CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)" \ +diff --git a/tools/depends/target/ffmpeg/autobuild.sh b/tools/depends/target/ffmpeg/autobuild.sh +index b9590d7b200a2ccf0fe3aa660e3b08b82d2133fc..b6bd57731bca6dfe5f814a4043b3e08d1bb08318 100755 +--- a/tools/depends/target/ffmpeg/autobuild.sh ++++ b/tools/depends/target/ffmpeg/autobuild.sh +@@ -131,6 +131,7 @@ patch -p1 < ../../0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patc + patch -p1 < ../../0001-Discard-data-before-VO-VOL-in-mpeg-4-over-mpegts.patch + patch -p1 < ../../hevcdsp_ARM_NEON_optimized_epel_functions.patch + patch -p1 < ../../added_ARM_NEON_optimized_SAO_patches.patch ++patch -p1 < ../../pfcd_hevc_optimisations.patch + + CFLAGS="$CFLAGS" CXXFLAGS="$CXXFLAGS" LDFLAGS="$LDFLAGS" \ + ./configure --prefix=$FFMPEG_PREFIX \ +diff --git a/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch b/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..e172ebf157aebffe1ae50b4a2b25fd71bc708c93 +--- /dev/null ++++ b/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch +@@ -0,0 +1,38136 @@ ++From b9b5434c61afd492a54dad5158b4d56ecbf7f01d Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Tue, 28 Apr 2015 16:18:40 +0100 ++Subject: [PATCH 01/68] Added display output ++ ++--- ++ ffmpeg.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ++ 1 file changed, 159 insertions(+) ++ ++diff --git a/ffmpeg.c b/ffmpeg.c ++index 9ffd833..50c6e86 100644 ++--- a/ffmpeg.c +++++ b/ffmpeg.c ++@@ -23,6 +23,11 @@ ++ * multimedia converter based on the FFmpeg libraries ++ */ ++ +++#ifdef RPI +++#define RPI_DISPLAY +++//#define RPI_ZERO_COPY +++#endif +++ ++ #include "config.h" ++ #include ++ #include ++@@ -66,6 +71,20 @@ ++ # include "libavfilter/buffersrc.h" ++ # include "libavfilter/buffersink.h" ++ +++#ifdef RPI_DISPLAY +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#ifdef RPI_ZERO_COPY +++#include "libavcodec/rpi_qpu.h" +++#endif +++#endif +++ ++ #if HAVE_SYS_RESOURCE_H ++ #include ++ #include ++@@ -158,6 +177,134 @@ static int restore_tty; ++ static void free_input_threads(void); ++ #endif ++ +++#ifdef RPI_DISPLAY +++ +++#define NUM_BUFFERS 4 +++ +++static MMAL_COMPONENT_T* rpi_display = NULL; +++static MMAL_POOL_T *rpi_pool = NULL; +++ +++#ifdef RPI_ZERO_COPY +++static uint8_t *get_vc_handle(AVBufferRef *bref) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); +++ return (uint8_t *)p->vc_handle; +++} +++#endif +++ +++static MMAL_POOL_T* display_alloc_pool(MMAL_PORT_T* port, size_t w, size_t h) +++{ +++ MMAL_POOL_T* pool; +++ size_t i; +++ size_t size = (w*h*3)/2; +++#ifdef RPI_ZERO_COPY +++ mmal_port_parameter_set_boolean(port, MMAL_PARAMETER_ZERO_COPY, MMAL_TRUE); // Does this mark that the buffer contains a vc_handle? Would have expected a vc_image? +++ pool = mmal_port_pool_create(port, NUM_BUFFERS, 0); +++ assert(pool); +++#else +++ pool = mmal_port_pool_create(port, NUM_BUFFERS, size); +++ +++ for (i = 0; i < NUM_BUFFERS; ++i) +++ { +++ MMAL_BUFFER_HEADER_T* buffer = pool->header[i]; +++ void* bufPtr = buffer->data; +++ memset(bufPtr, i*30, w*h); +++ memset(bufPtr+w*h, 128, (w*h)/2); +++ } +++#endif +++ +++ return pool; +++} +++ +++static void display_cb_input(MMAL_PORT_T *port,MMAL_BUFFER_HEADER_T *buffer) { +++ mmal_buffer_header_release(buffer); +++} +++ +++static MMAL_COMPONENT_T* display_init(size_t x, size_t y, size_t w, size_t h) +++{ +++ MMAL_COMPONENT_T* display; +++ int w2 = (w+31)&~31; +++ int h2 = (h+15)&~15; +++ MMAL_DISPLAYREGION_T region = +++ { +++ {MMAL_PARAMETER_DISPLAYREGION, sizeof(region)}, +++ .set = MMAL_DISPLAY_SET_LAYER | MMAL_DISPLAY_SET_FULLSCREEN | MMAL_DISPLAY_SET_DEST_RECT, +++ .layer = 2, +++ .fullscreen = 0, +++ .dest_rect = {x, y, w, h} +++ }; +++ bcm_host_init(); // TODO is this needed? +++ mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &display); +++ assert(display); +++ +++ mmal_port_parameter_set(display->input[0], ®ion.hdr); +++ +++ MMAL_ES_FORMAT_T* format = display->input[0]->format; +++ format->encoding = MMAL_ENCODING_I420; +++ format->es->video.width = w2; +++ format->es->video.height = h2; +++ format->es->video.crop.x = 0; +++ format->es->video.crop.y = 0; +++ format->es->video.crop.width = w; +++ format->es->video.crop.height = h; +++ mmal_port_format_commit(display->input[0]); +++ +++ mmal_component_enable(display); +++ +++ rpi_pool = display_alloc_pool(display->input[0], w2, h2); +++ +++ mmal_port_enable(display->input[0],display_cb_input); +++ mmal_port_enable(display->control,display_cb_input); +++ +++ printf("Allocated display %d %d\n",w,h); +++ +++ return display; +++} +++ +++static void display_frame(MMAL_COMPONENT_T* display,AVFrame* fr) +++{ +++ int w = fr->width; +++ int h = fr->height; +++ int w2 = (w+31)&~31; +++ int h2 = (h+15)&~15; +++ if (!display || !rpi_pool) +++ return; +++ MMAL_BUFFER_HEADER_T* buf = mmal_queue_get(rpi_pool->queue); +++ if (!buf) { +++ // Running too fast so drop the frame +++ return; +++ } +++ assert(buf); +++ buf->cmd = 0; +++ buf->length = (w2 * h2 * 3)/2; +++ buf->offset = 0; // Offset to valid data +++ buf->flags = 0; +++#ifdef RPI_ZERO_COPY +++ buf->data = get_vc_handle(fr->buf[0]); +++ buf->alloc_size = (w2*h2*3)/2; +++#else +++ //mmal_buffer_header_mem_lock(buf); +++ memcpy(buf->data, fr->data[0], w2 * h); +++ memcpy(buf->data+w2*h2, fr->data[1], w2 * h / 4); +++ memcpy(buf->data+w2*h2*5/4, fr->data[2], w2 * h / 4); +++ //mmal_buffer_header_mem_unlock(buf); +++#endif +++ +++ mmal_port_send_buffer(display->input[0], buf); // I assume this will automatically get released +++} +++ +++static void display_exit(MMAL_COMPONENT_T* display) +++{ +++ if (display) { +++ mmal_component_destroy(display); +++ } +++ if (rpi_pool) { +++ mmal_port_pool_destroy(display->input[0], rpi_pool); +++ } +++} +++ +++#endif +++ +++ ++ /* sub2video hack: ++ Convert subtitles to video with alpha to insert them in filter graphs. ++ This is a temporary solution until libavfilter gets real subtitles support. ++@@ -581,6 +728,10 @@ static void ffmpeg_cleanup(int ret) ++ } ++ term_exit(); ++ ffmpeg_exited = 1; +++ +++#ifdef RPI_DISPLAY +++ display_exit(rpi_display); +++#endif ++ } ++ ++ void remove_avoptions(AVDictionary **a, AVDictionary *b) ++@@ -940,6 +1091,14 @@ static void do_video_out(AVFormatContext *s, ++ int frame_size = 0; ++ InputStream *ist = NULL; ++ AVFilterContext *filter = ost->filter->filter; +++#ifdef RPI_DISPLAY +++ if (next_picture) +++ { +++ if (!rpi_display) +++ rpi_display = display_init(0,0,next_picture->width,next_picture->height); +++ display_frame(rpi_display,next_picture); +++ } +++#endif ++ ++ if (ost->source_index >= 0) ++ ist = input_streams[ost->source_index]; ++-- ++2.7.4 ++ ++ ++From b90a5aff7bf9112ebd2a07949c8d79a49fcafe48 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 29 Apr 2015 16:49:43 +0100 ++Subject: [PATCH 02/68] Split transform and intra prediction into commands ++ ++--- ++ libavcodec/hevc.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++- ++ libavcodec/hevc.h | 58 +++++++++++++++++++++++ ++ libavcodec/hevc_cabac.c | 15 ++++++ ++ 3 files changed, 191 insertions(+), 1 deletion(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index b478065..aa45dd6 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -931,6 +931,25 @@ static int hls_cross_component_pred(HEVCContext *s, int idx) { ++ return 0; ++ } ++ +++#ifdef RPI +++static void rpi_intra_pred(HEVCContext *s, int log2_trafo_size, int x0, int y0, int c_idx) +++{ +++ if (s->enable_rpi) { +++ HEVCLocalContext *lc = s->HEVClc; +++ HEVCPredCmd *cmd = s->univ_pred_cmds + s->num_pred_cmds++; +++ cmd->type = RPI_PRED_INTRA; +++ cmd->size = log2_trafo_size; +++ cmd->c_idx = c_idx; +++ cmd->x = x0; +++ cmd->y = y0; +++ cmd->na = (lc->na.cand_bottom_left<<4) + (lc->na.cand_left<<3) + (lc->na.cand_up_left<<2) + (lc->na.cand_up<<1) + lc->na.cand_up_right; +++ cmd->mode = c_idx ? lc->tu.intra_pred_mode_c : lc->tu.intra_pred_mode; +++ } else { +++ s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, c_idx); +++ } +++} +++#endif +++ ++ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ int xBase, int yBase, int cb_xBase, int cb_yBase, ++ int log2_cb_size, int log2_trafo_size, ++@@ -943,8 +962,11 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ if (lc->cu.pred_mode == MODE_INTRA) { ++ int trafo_size = 1 << log2_trafo_size; ++ ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size); ++- +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size, x0, y0, 0); +++#else ++ s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0); +++#endif ++ } ++ ++ if (cbf_luma || cbf_cb[0] || cbf_cr[0] || ++@@ -1030,7 +1052,11 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) { ++ if (lc->cu.pred_mode == MODE_INTRA) { ++ ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v); +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size_c, x0, y0 + (i << log2_trafo_size_c), 1); +++#else ++ s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1); +++#endif ++ } ++ if (cbf_cb[i]) ++ ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c), ++@@ -1059,7 +1085,11 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) { ++ if (lc->cu.pred_mode == MODE_INTRA) { ++ ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v); +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size_c, x0, y0 + (i << log2_trafo_size_c), 2); +++#else ++ s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2); +++#endif ++ } ++ if (cbf_cr[i]) ++ ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c), ++@@ -1088,7 +1118,11 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ if (lc->cu.pred_mode == MODE_INTRA) { ++ ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size), ++ trafo_size_h, trafo_size_v); +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size, xBase, yBase + (i << log2_trafo_size), 1); +++#else ++ s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1); +++#endif ++ } ++ if (cbf_cb[i]) ++ ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size), ++@@ -1098,7 +1132,11 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ if (lc->cu.pred_mode == MODE_INTRA) { ++ ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size), ++ trafo_size_h, trafo_size_v); +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size, xBase, yBase + (i << log2_trafo_size), 2); +++#else ++ s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2); +++#endif ++ } ++ if (cbf_cr[i]) ++ ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size), ++@@ -1110,26 +1148,46 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]); ++ int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]); ++ ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v); +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size_c, x0, y0, 1); +++ rpi_intra_pred(s, log2_trafo_size_c, x0, y0, 2); +++#else ++ s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1); ++ s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2); +++#endif ++ if (s->ps.sps->chroma_format_idc == 2) { ++ ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c), ++ trafo_size_h, trafo_size_v); +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size_c, x0, y0 + (1 << log2_trafo_size_c), 1); +++ rpi_intra_pred(s, log2_trafo_size_c, x0, y0 + (1 << log2_trafo_size_c), 2); +++#else ++ s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1); ++ s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2); +++#endif ++ } ++ } else if (blk_idx == 3) { ++ int trafo_size_h = 1 << (log2_trafo_size + 1); ++ int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]); ++ ff_hevc_set_neighbour_available(s, xBase, yBase, ++ trafo_size_h, trafo_size_v); +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size, xBase, yBase, 1); +++ rpi_intra_pred(s, log2_trafo_size, xBase, yBase, 2); +++#else ++ s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1); ++ s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2); +++#endif ++ if (s->ps.sps->chroma_format_idc == 2) { ++ ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)), ++ trafo_size_h, trafo_size_v); +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size, xBase, yBase + (1 << (log2_trafo_size)), 1); +++ rpi_intra_pred(s, log2_trafo_size, xBase, yBase + (1 << (log2_trafo_size)), 2); +++#else ++ s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1); ++ s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2); +++#endif ++ } ++ } ++ } ++@@ -2304,6 +2362,31 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb, ++ lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0) && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]])); ++ } ++ +++#ifdef RPI +++static void rpi_execute_pred_cmds(HEVCContext *s) +++{ +++ int i; +++ HEVCPredCmd *cmd = s->univ_pred_cmds; +++ HEVCLocalContext *lc = s->HEVClc; +++ +++ for(i = s->num_pred_cmds; i > 0; i--, cmd++) { +++ if (cmd->type == RPI_PRED_INTRA) { +++ lc->tu.intra_pred_mode_c = lc->tu.intra_pred_mode = cmd->mode; +++ lc->na.cand_bottom_left = (cmd->na >> 4) & 1; +++ lc->na.cand_left = (cmd->na >> 3) & 1; +++ lc->na.cand_up_left = (cmd->na >> 2) & 1; +++ lc->na.cand_up = (cmd->na >> 1) & 1; +++ lc->na.cand_up_right = (cmd->na >> 0) & 1; +++ s->hpc.intra_pred[cmd->size - 2](s, cmd->x, cmd->y, cmd->c_idx); +++ } else { +++ s->hevcdsp.transform_add[cmd->size-2](cmd->dst, cmd->buf, cmd->stride); +++ } +++ } +++ s->num_pred_cmds = 0; +++ s->num_coeffs = 0; +++} +++#endif +++ ++ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ { ++ HEVCContext *s = avctxt->priv_data; ++@@ -2313,6 +2396,10 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ int y_ctb = 0; ++ int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs]; ++ +++#ifdef RPI +++ s->enable_rpi = 1; // TODO this should depend on cross component and frame width etc. +++#endif +++ ++ if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) { ++ av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n"); ++ return AVERROR_INVALIDDATA; ++@@ -2342,6 +2429,9 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag; ++ ++ more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); +++#ifdef RPI +++ rpi_execute_pred_cmds(s); +++#endif ++ if (more_data < 0) { ++ s->tab_slice_address[ctb_addr_rs] = -1; ++ return more_data; ++@@ -2387,6 +2477,10 @@ static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int ++ s = s1->sList[self_id]; ++ lc = s->HEVClc; ++ +++#ifdef RPI +++ s->enable_rpi = 0; +++#endif +++ ++ if(ctb_row) { ++ ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]); ++ ++@@ -3075,6 +3169,13 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ ++ av_freep(&s->cabac_state); ++ +++#ifdef RPI +++ av_freep(&s->unif_mv_cmds); +++ av_freep(&s->unif_xfm_cmds); +++ av_freep(&s->univ_pred_cmds); +++ av_freep(&s->coeffs_buf); +++#endif +++ ++ for (i = 0; i < 3; i++) { ++ av_freep(&s->sao_pixel_buffer_h[i]); ++ av_freep(&s->sao_pixel_buffer_v[i]); ++@@ -3129,6 +3230,22 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ s->HEVClcList[0] = s->HEVClc; ++ s->sList[0] = s; ++ +++#ifdef RPI +++ s->unif_mv_cmds = av_mallocz(sizeof(HEVCMvCmd)*RPI_MAX_MV_CMDS); +++ if (!s->unif_mv_cmds) +++ goto fail; +++ s->unif_xfm_cmds = av_mallocz(sizeof(HEVCXfmCmd)*RPI_MAX_XFM_CMDS); +++ if (!s->unif_xfm_cmds) +++ goto fail; +++ s->univ_pred_cmds = av_mallocz(sizeof(HEVCPredCmd)*RPI_MAX_PRED_CMDS); +++ if (!s->univ_pred_cmds) +++ goto fail; +++ s->coeffs_buf = av_mallocz(sizeof(int16_t)*RPI_MAX_XFM_CMDS*16); +++ if (!s->coeffs_buf) +++ goto fail; +++ s->enable_rpi = 0; +++#endif +++ ++ s->cabac_state = av_malloc(HEVC_CONTEXTS); ++ if (!s->cabac_state) ++ goto fail; ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index be91010..7a1c35f 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -23,6 +23,9 @@ ++ #ifndef AVCODEC_HEVC_H ++ #define AVCODEC_HEVC_H ++ +++// define RPI to split the CABAC/prediction/transform into separate stages +++#include "config.h" +++ ++ #include "libavutil/buffer.h" ++ #include "libavutil/md5.h" ++ ++@@ -790,6 +793,49 @@ typedef struct HEVCLocalContext { ++ int boundary_flags; ++ } HEVCLocalContext; ++ +++#ifdef RPI +++ +++// RPI_MAX_WIDTH is maximum width in pixels supported by the accelerated code +++#define RPI_MAX_WIDTH 2048 +++ +++// Worst case is for 4:4:4 4x4 blocks with 64 high coding tree blocks, so 16 MV cmds per 4 pixels across for each colour plane +++#define RPI_MAX_MV_CMDS (16*3*(RPI_MAX_WIDTH/4)) +++#define RPI_MAX_XFM_CMDS (16*3*(RPI_MAX_WIDTH/4)) +++// Each block can have an intra prediction and a transform_add command +++#define RPI_MAX_PRED_CMDS (2*16*3*(RPI_MAX_WIDTH/4)) +++ +++// Command for inter prediction +++typedef struct HEVCMvCmd { +++} HEVCMvCmd; +++ +++// Command for transform to process a block of coefficients +++typedef struct HEVCXfmCmd { +++} HEVCXfmCmd; +++ +++// Command for intra prediction and transform_add of predictions to coefficients +++#define RPI_PRED_TRANSFORM_ADD 0 +++#define RPI_PRED_INTRA 1 +++typedef struct HEVCPredCmd { +++ uint8_t size; +++ uint8_t type; +++ uint8_t na; +++ uint8_t c_idx; +++ union { +++ uint8_t *dst; // RPI_PRED_TRANSFORM_ADD +++ uint32_t x; // RPI_PRED_INTRA +++ }; +++ union { +++ int16_t *buf; // RPI_PRED_TRANSFORM_ADD +++ uint32_t y; // RPI_PRED_INTRA +++ }; +++ union { +++ enum IntraPredMode mode; // RPI_PRED_TRANSFORM_ADD +++ uint32_t stride; // RPI_PRED_INTRA +++ }; +++} HEVCPredCmd; +++ +++#endif +++ ++ typedef struct HEVCContext { ++ const AVClass *c; // needed by private avoptions ++ AVCodecContext *avctx; ++@@ -805,6 +851,18 @@ typedef struct HEVCContext { ++ int width; ++ int height; ++ +++#ifdef RPI +++ int enable_rpi; +++ HEVCMvCmd *unif_mv_cmds; +++ HEVCXfmCmd *unif_xfm_cmds; +++ HEVCPredCmd *univ_pred_cmds; +++ int16_t *coeffs_buf; +++ int num_mv_cmds; +++ int num_xfm_cmds; +++ int num_pred_cmds; +++ int num_coeffs; +++#endif +++ ++ uint8_t *cabac_state; ++ ++ /** 1 if the independent slice segment header was successfully parsed */ ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index 05b2821..4e97f06 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1510,6 +1510,21 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ coeffs[i] = coeffs[i] + ((lc->tu.res_scale_val * coeffs_y[i]) >> 3); ++ } ++ } +++#ifdef RPI +++ if (s->enable_rpi) { +++ int16_t *c = s->coeffs_buf + s->num_coeffs; +++ int n = trafo_size * trafo_size; +++ HEVCPredCmd *cmd = s->univ_pred_cmds + s->num_pred_cmds++; +++ memcpy(c, coeffs, n * sizeof(int16_t)); // TODO change pointer earlier and we can avoid this copy +++ s->num_coeffs += n; +++ cmd->type = RPI_PRED_TRANSFORM_ADD; +++ cmd->size = log2_trafo_size; +++ cmd->buf = c; +++ cmd->dst = dst; +++ cmd->stride = stride; +++ return; +++ } +++#endif ++ s->hevcdsp.transform_add[log2_trafo_size-2](dst, coeffs, stride); ++ } ++ ++-- ++2.7.4 ++ ++ ++From f8293de11dc040d9fa2a558762a357c0c353d2c9 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 30 Apr 2015 15:23:22 +0100 ++Subject: [PATCH 03/68] Added simple VPU test code ++ ++--- ++ libavcodec/Makefile | 7 + ++ libavcodec/hevc.c | 33 +- ++ libavcodec/rpi_hevc_transform.h | 212 ++++++ ++ libavcodec/rpi_hevc_transform.s | 147 ++++ ++ libavcodec/rpi_mailbox.c | 293 ++++++++ ++ libavcodec/rpi_mailbox.h | 20 + ++ libavcodec/rpi_qpu.c | 652 ++++++++++++++++++ ++ libavcodec/rpi_qpu.h | 45 ++ ++ libavcodec/rpi_shader.c | 818 ++++++++++++++++++++++ ++ libavcodec/rpi_shader.h | 20 + ++ libavcodec/rpi_shader.qasm | 1413 +++++++++++++++++++++++++++++++++++++++ ++ libavcodec/rpi_user_vcsm.h | 425 ++++++++++++ ++ 12 files changed, 4084 insertions(+), 1 deletion(-) ++ create mode 100644 libavcodec/rpi_hevc_transform.h ++ create mode 100644 libavcodec/rpi_hevc_transform.s ++ create mode 100644 libavcodec/rpi_mailbox.c ++ create mode 100644 libavcodec/rpi_mailbox.h ++ create mode 100644 libavcodec/rpi_qpu.c ++ create mode 100644 libavcodec/rpi_qpu.h ++ create mode 100644 libavcodec/rpi_shader.c ++ create mode 100644 libavcodec/rpi_shader.h ++ create mode 100644 libavcodec/rpi_shader.qasm ++ create mode 100644 libavcodec/rpi_user_vcsm.h ++ ++diff --git a/libavcodec/Makefile b/libavcodec/Makefile ++index fd0d1f0..03065cd 100644 ++--- a/libavcodec/Makefile +++++ b/libavcodec/Makefile ++@@ -5,6 +5,10 @@ NAME = avcodec ++ HEADERS = avcodec.h \ ++ avdct.h \ ++ avfft.h \ +++ rpi_qpu.h \ +++ rpi_shader.h \ +++ rpi_mailbox.h \ +++ rpi_hevc_transform.h \ ++ d3d11va.h \ ++ dirac.h \ ++ dv_profile.h \ ++@@ -43,6 +47,9 @@ OBJS = allcodecs.o \ ++ resample.o \ ++ resample2.o \ ++ utils.o \ +++ rpi_qpu.o \ +++ rpi_shader.o \ +++ rpi_mailbox.o \ ++ vorbis_parser.o \ ++ xiph.o \ ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index aa45dd6..ab55df1 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -41,6 +41,10 @@ ++ #include "hevc.h" ++ #include "profiles.h" ++ +++#ifdef RPI +++#include "rpi_qpu.h" +++#endif +++ ++ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 }; ++ ++ /** ++@@ -2430,7 +2434,9 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ ++ more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); ++ #ifdef RPI ++- rpi_execute_pred_cmds(s); +++ if (x_ctb + ctb_size >= s->ps.sps->width) { +++ rpi_execute_pred_cmds(s); +++ } ++ #endif ++ if (more_data < 0) { ++ s->tab_slice_address[ctb_addr_rs] = -1; ++@@ -3244,6 +3250,31 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ if (!s->coeffs_buf) ++ goto fail; ++ s->enable_rpi = 0; +++ +++ // A little test program +++ { +++ GPU_MEM_PTR_T p; +++ int err = gpu_malloc_cached(16, &p); +++ short *q = (short *)p.arm; +++ int i; +++ int r; +++ printf("Allocated memory %d ARM 0x%x, VC 0x%x, Code 0x%x\n",err,(int)p.arm,p.vc,(int)vpu_get_fn()); +++ printf("Allocated memory %d ARM 0x%x, VC 0x%x\n",err,(int)p.arm,p.vc); +++ printf("Preparing data %p\n",q); +++ for(i=0;i<16;i++) +++ q[i] = i; +++ printf("Flush cache\n"); +++ gpu_cache_flush(&p); +++ printf("Executing code\n"); +++ r = vpu_execute_code( vpu_get_fn(), p.vc, 0, 0, 0, 0, 0); +++ printf("Return value %d (",r); +++ for(i=0;i<16;i++) +++ printf("%d ",q[i]); +++ printf(")\n"); +++ gpu_free(&p); +++ goto fail; // Early out +++ } +++ ++ #endif ++ ++ s->cabac_state = av_malloc(HEVC_CONTEXTS); ++diff --git a/libavcodec/rpi_hevc_transform.h b/libavcodec/rpi_hevc_transform.h ++new file mode 100644 ++index 0000000..85a9102 ++--- /dev/null +++++ b/libavcodec/rpi_hevc_transform.h ++@@ -0,0 +1,212 @@ +++unsigned char rpi_hevc_transform [] = { +++169, +++3, +++3, +++232, +++128, +++0, +++0, +++0, +++20, +++248, +++0, +++136, +++0, +++0, +++192, +++248, +++0, +++0, +++0, +++96, +++3, +++232, +++32, +++0, +++0, +++0, +++7, +++232, +++0, +++2, +++0, +++0, +++8, +++232, +++0, +++4, +++0, +++0, +++12, +++248, +++0, +++128, +++0, +++0, +++192, +++8, +++4, +++0, +++4, +++232, +++64, +++0, +++0, +++0, +++5, +++232, +++0, +++0, +++8, +++0, +++128, +++69, +++113, +++66, +++12, +++248, +++0, +++128, +++0, +++0, +++192, +++8, +++4, +++0, +++128, +++69, +++113, +++70, +++128, +++144, +++39, +++0, +++4, +++255, +++48, +++192, +++128, +++3, +++32, +++8, +++16, +++0, +++76, +++254, +++48, +++192, +++9, +++4, +++32, +++8, +++0, +++0, +++4, +++254, +++0, +++144, +++128, +++2, +++0, +++248, +++62, +++0, +++128, +++144, +++22, +++0, +++4, +++255, +++48, +++192, +++128, +++3, +++32, +++8, +++16, +++0, +++76, +++254, +++48, +++192, +++9, +++4, +++32, +++8, +++0, +++0, +++140, +++248, +++44, +++0, +++0, +++0, +++32, +++48, +++4, +++0, +++128, +++69, +++113, +++66, +++242, +++140, +++211, +++192, +++41, +++3, +++68, +++192, +++80, +++7, +++164, +++255, +++36, +++220, +++96, +++2, +++0, +++248, +++62, +++0, +++3, +++255, +++55, +++208, +++120, +++3, +++224, +++3, +++190, +++11, +++16, +++139, +++246, +++83, +++0, +++103, +++90, +++0, +++8, +++240, +++0, +++128, +++128, +++3, +++0, +++247, +++32, +++128, +++10, +++4, +++136, +++240, +++32, +++0, +++128, +++3, +++112, +++96, +++90, +++0, +++}; ++diff --git a/libavcodec/rpi_hevc_transform.s b/libavcodec/rpi_hevc_transform.s ++new file mode 100644 ++index 0000000..5e2728d ++--- /dev/null +++++ b/libavcodec/rpi_hevc_transform.s ++@@ -0,0 +1,147 @@ +++# ****************************************************************************** +++# Argon Design Ltd. +++# (c) Copyright 2015 Argon Design Ltd. All rights reserved. +++# +++# Module : HEVC +++# Author : Peter de Rivaz +++# ****************************************************************************** +++ +++# HEVC VPU Transform +++# +++# Transform matrix can be thought of as +++# output row vector = input row vector * transMatrix2 +++# +++# The even rows of the matrix are symmetric +++# The odd rows of the matrix are antisymmetric +++# +++# So only need to compute the first half of the results, then can compute the remainder with a butterfly +++# +++# EXAMPLE +++# (a b c d) (1 2 2 1) +++# (3 4 -4 -3) +++# (5 6 6 5) +++# (7 8 -8 -7) +++# +++# x=(a c)(1 2) = 1a+5c 2a+6c +++# (5 6) +++# +++# y=(b d)(3 4) = 3b+7d 4b+8d +++# (7 8) +++# +++# u=x+y = 1a+5c+3b+7d 2a+4b+6c+8d +++# v=x-y = 1a+5c-3b-7d 2a+6c-4b-8d +++# +++# Final results are (u , v[::-1]) +++# +++# +++# For 32x1 input, load even rows into HX(0++,0), odd rows into HX(16++,0) +++# Apply the even matrix first and stop before rounding +++# Then apply the odd matrix in a full manner: +++# +++# First step is to compute partial products with the first input (16 cycles) +++# 1a 3b 5c 7d 16x1 input coefficients produce 16x16 output +++# 2a 4b 6c 8d +++# 2a -4b 6c -8d +++# 1a -3b 5c -7d +++# +++# Second step is to sum partial products into final position (8 cycles) +++# 1a+3b+5c+7d +++# 2a+4b+6c+8d +++# 2a-4b+6c-8d +++# 1a-3b+5c-7d +++# +++# Then can apply butterfly to combine even results and odd results + rounding to produce 16 rows of output at a time (need to save in transposed format) +++# +++# For 16x16 no butterfly is required and can store final results in original location (Could do 2 16x16s in parallel to make use of the trick - saves on the adds) +++# +++# For 8x8 we could compute two in parallel. +++# +++# +++ +++test_add: +++ vldh HX(0,0),(r0) +++ vadd HX(0,0),HX(0,0),10 +++ vsth HX(0,0),(r0) +++ mov r0,7 # return value +++ b lr +++ +++# Columns are transformed first +++# +++# Store top left half of transMatrix2 in +++# Store bottom left half of transMatrix2 in HX(32,32) +++# +++# For 16x16 +++# HX(0:15,0) contains input data before transform +++# HY(0:15,0) contains 32bit output data after transform +++# HX(32,0) contains even rows of left half of transMatrix2 +++# HX(32,32) contains odd rows of left half of transMatrix2 +++# HY(48,0) contains partial products ready for summing +++# +++ +++ +++# hevc_trans_16x16(short *transMatrix2, short *coeffs, int num) +++# transMatrix2: address of the constant matrix (must be at 32 byte aligned address in Videocore memory) +++# coeffs: address of the transform coefficients (must be at 32 byte aligned address in Videocore memory) +++# num: number of 16x16 transforms to be done +++# +++hevc_trans_16x16: +++ push r6-r15, lr # TODO cut down number of used registers +++ +++ mov r3, 2*32*2 # Twice Stride of transMatrix2 in bytes +++ vld HX(32++,0),(r0 += r3) REP 16 # This is the 16x16 matrix, a transform is equivalent to multiplying input row vector * matrix +++ # Now use r0 to describe which matrix we are working on. +++ # Allows us to prefetch the next block of coefficients for efficiency. +++ mov r0,0 # This describes the location where we read our coefficients from +++ mov r3,16*2 # Stride of coefficients in bytes +++ mov r7,16*16*2 # Total block size +++ mov r8,64*16 # Value used to swap from current to next VRF location +++ vldh HX(0++,0)+r0,(r1 += r3) REP 16 +++ mov r4,64 # Constant used for rounding first pass +++ mov r5,1<<19 # Constant used for rounding second pass +++ +++ # At start of block r0,r1 point to the current block (that has already been loaded) +++block_loop: +++ eor r0,r8 +++ add r1,r7 +++ # Prefetch the next block +++ vldh HX(0++,0)+r0,(r1 += r3) REP 16 +++ eor r0,r8 +++ sub r1,r7 +++ +++ # Transform the current block +++ bl col_trans_16 +++ vadd HY(0++,0)+r0,HY(0++,0)+r0,r4 REP 16 # Now add on rounding, shift down by 7, and saturate +++ #vsasls HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # 9+7=16 so this ends up with the output saturated and in the top half of the word. +++ vasl HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # This should be saturating, but the instruction above does not assemble? +++ vmov VX(0,0++), HX(0++,32) REP 16 # For simplicity transpose this back to the original position +++ +++ bl col_trans_16 +++ vadd HY(0++,0)+r0,HY(0++,0)+r0,r4 REP 16 # Now add on rounding, shift down by 7, and saturate +++ #vsasls HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # 9+7=16 so this ends up with the output saturated and in the top half of the word. +++ vasl HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # This should be saturating, but the instruction above does not assemble? +++ +++ # Save results - note there has been a transposition during the processing so we save columns +++ vsth VX(0,32++)+r0, (r1 += r3) REP 16 +++ +++ # Move onto next block +++ eor r0,r8 +++ add r1,r7 +++ +++ addcmpbgt r2,-1,0,block_loop +++ pop r6-r15, pc +++ +++# r1,r2,r3 r7,r8 should be preserved +++# HX(0++,0)+r0 is the block to be transformed +++# HX(32++,0) is the 16x16 matrix of transform coefficients +++# Use HY(48,0) for intermediate results +++# r0 can be used, but should be returned to its original value at the end +++col_trans_16: +++ add r4,r0,16 # Final value for this loop +++col_trans_16_loop: +++ # First compute partial products for a single column +++ vmul32s VY(48,0++), VX(0,0)+r0, VX(32,0++) REP 16 +++ # Then sum up the results and place back +++ vadd VY(0,0)+r0, VY(48,0++), VY(48,8++) REP 8 CLRA SACC +++ addcmpblt r0,1,r4,col_trans_16_loop +++ sub r0,16 # but r0 back to its original value +++ b lr ++diff --git a/libavcodec/rpi_mailbox.c b/libavcodec/rpi_mailbox.c ++new file mode 100644 ++index 0000000..536896f ++--- /dev/null +++++ b/libavcodec/rpi_mailbox.c ++@@ -0,0 +1,293 @@ +++/* +++Copyright (c) 2012, Broadcom Europe Ltd. +++All rights reserved. +++ +++Redistribution and use in source and binary forms, with or without +++modification, are permitted provided that the following conditions are met: +++ * Redistributions of source code must retain the above copyright +++ notice, this list of conditions and the following disclaimer. +++ * Redistributions in binary form must reproduce the above copyright +++ notice, this list of conditions and the following disclaimer in the +++ documentation and/or other materials provided with the distribution. +++ * Neither the name of the copyright holder nor the +++ names of its contributors may be used to endorse or promote products +++ derived from this software without specific prior written permission. +++ +++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY +++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +++*/ +++ +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++#include +++ +++#include +++ +++#define MAJOR_NUM 100 +++#define IOCTL_MBOX_PROPERTY _IOWR(MAJOR_NUM, 0, char *) +++#define DEVICE_FILE_NAME "/dev/char_dev" +++ +++#include "rpi_mailbox.h" +++ +++#define PAGE_SIZE (4*1024) +++ +++// Shared memory will not be cached in ARM cache +++void *mapmem_shared(unsigned base, unsigned size) +++{ +++ int mem_fd; +++ unsigned offset = base % PAGE_SIZE; +++ base = base - offset; +++ /* open /dev/mem */ +++ if ((mem_fd = open("/dev/mem", O_RDWR|O_SYNC) ) < 0) { +++ printf("can't open /dev/mem\nThis program should be run as root. Try prefixing command with: sudo\n"); +++ return NULL; +++ } +++ void *mem = mmap( +++ 0, +++ size, +++ PROT_READ|PROT_WRITE, +++ MAP_SHARED/*|MAP_FIXED*/, +++ mem_fd, +++ base); +++#ifdef DEBUG +++ printf("base=0x%x, mem=%p\n", base, mem); +++#endif +++ if (mem == MAP_FAILED) { +++ printf("mmap error %d\n", (int)mem); +++ return NULL; +++ } +++ close(mem_fd); +++ return (char *)mem + offset; +++} +++ +++// Unshared memory will be faster as lives in ARM cache, but requires cache flushing +++void *mapmem_private(unsigned base, unsigned size) +++{ +++ int mem_fd; +++ unsigned offset = base % PAGE_SIZE; +++ base = base - offset; +++ /* open /dev/mem */ +++ if ((mem_fd = open("/dev/mem", O_RDWR|O_SYNC) ) < 0) { +++ printf("can't open /dev/mem\nThis program should be run as root. Try prefixing command with: sudo\n"); +++ return NULL; +++ } +++ void *mem = mmap( +++ 0, +++ size, +++ PROT_READ|PROT_WRITE, +++ MAP_PRIVATE/*|MAP_FIXED*/, +++ mem_fd, +++ base); +++#ifdef DEBUG +++ printf("base=0x%x, mem=%p\n", base, mem); +++#endif +++ if (mem == MAP_FAILED) { +++ printf("mmap error %d\n", (int)mem); +++ return NULL; +++ } +++ close(mem_fd); +++ return (char *)mem + offset; +++} +++ +++void unmapmem(void *addr, unsigned size) +++{ +++ int s = munmap(addr, size); +++ if (s != 0) { +++ printf("munmap error %d\n", s); +++ exit (-1); +++ } +++} +++ +++/* +++ * use ioctl to send mbox property message +++ */ +++ +++static int mbox_property(int file_desc, void *buf) +++{ +++ int ret_val = ioctl(file_desc, IOCTL_MBOX_PROPERTY, buf); +++ +++ if (ret_val < 0) { +++ printf("ioctl_set_msg failed:%d\n", ret_val); +++ } +++ +++#ifdef DEBUG +++ unsigned *p = buf; int i; unsigned size = *(unsigned *)buf; +++ for (i=0; i +++#include +++#include +++#include +++#include +++ +++#include "config.h" +++ +++#include +++#include +++ +++#include "rpi_mailbox.h" +++#include "rpi_qpu.h" +++#include "rpi_shader.h" +++#include "rpi_hevc_transform.h" +++ +++#ifdef RPI_USE_VCSM +++#include "rpi_user_vcsm.h" +++#endif +++ +++// On Pi2 there is no way to access the VPU L2 cache +++// GPU_MEM_FLG should be 4 for uncached memory. +++// However, if using VCSM allocated buffers, need to use C at the moment because VCSM does not allocate uncached memory correctly +++// The QPU crashes if we mix L2 cached and L2 uncached accesses due to a HW bug. +++#define GPU_MEM_FLG 0xC +++#define GPU_MEM_MAP 0x0 +++ +++#define vcos_verify(x) ((x)>=0) +++ +++typedef unsigned char uint8_t; +++typedef signed char int8_t; +++typedef unsigned short uint16_t; +++typedef unsigned int uint32_t; +++typedef int int32_t; +++ +++/*static const unsigned code[] = +++{ +++ #include "rpi_shader.hex" +++};*/ +++ +++// Size in 32bit words +++#define QPU_CODE_SIZE 2048 +++#define VPU_CODE_SIZE 2048 +++ +++struct GPU +++{ +++ unsigned int qpu_code[QPU_CODE_SIZE]; +++ unsigned int vpu_code[VPU_CODE_SIZE]; +++ int open_count; // Number of allocated video buffers +++ unsigned int vc_handle; // Handle of this memory +++ int mb; // Mailbox handle +++ int vc; // Address in GPU memory +++ int mail[12]; // These are used to pass pairs of code/unifs to the QPUs +++}; +++ +++// Stop more than one thread trying to allocate memory or use the processing resources at once +++static pthread_mutex_t gpu_mutex = PTHREAD_MUTEX_INITIALIZER; +++static volatile struct GPU* gpu = NULL; +++ +++#ifdef RPI_TIME_TOTAL_QPU +++static unsigned int Microseconds(void) { +++ struct timespec ts; +++ unsigned int x; +++ static unsigned int base = 0; +++ clock_gettime(CLOCK_REALTIME, &ts); +++ x = ts.tv_sec*1000000 + ts.tv_nsec/1000; +++ if (base==0) base=x; +++ return x-base; +++} +++#endif +++ +++// Connect to QPU, returns 0 on success. +++static int gpu_init(volatile struct GPU **gpu) { +++ int mb = mbox_open(); +++ int vc; +++ int handle; +++ volatile struct GPU* ptr; +++ if (mb < 0) +++ return -1; +++ +++ if (qpu_enable(mb, 1)) return -2; +++ +++#ifdef RPI_USE_VCSM +++ vcsm_init(); +++#endif +++ +++ handle = mem_alloc(mb, sizeof(struct GPU), 4096, GPU_MEM_FLG); +++ if (!handle) +++ { +++ qpu_enable(mb, 0); +++ return -3; +++ } +++ vc = mem_lock(mb, handle); +++ ptr = mapmem_shared((vc+GPU_MEM_MAP)&~0xc0000000, sizeof(struct GPU)); +++ if (ptr == NULL) +++ { mem_free(mb, handle); +++ mem_unlock(mb, handle); +++ qpu_enable(mb, 0); +++ return -4; +++ } +++ +++ ptr->mb = mb; +++ ptr->vc_handle = handle; +++ ptr->vc = vc; +++ +++ *gpu = ptr; +++ +++ // Now copy over the QPU code into GPU memory +++ { +++ int num_bytes = qpu_get_fn(QPU_MC_END) - qpu_get_fn(QPU_MC_SETUP); +++ assert(num_bytes<=QPU_CODE_SIZE*sizeof(unsigned int)); +++ memcpy((void*)ptr->qpu_code, rpi_shader, num_bytes); +++ } +++ // And the VPU code +++ { +++ int num_bytes = sizeof(rpi_hevc_transform); +++ assert(num_bytes<=VPU_CODE_SIZE*sizeof(unsigned int)); +++ memcpy((void*)ptr->vpu_code, rpi_hevc_transform, num_bytes); +++ } +++ +++ return 0; +++} +++ +++// Make sure we have exclusive access to the mailbox, and enable qpu if necessary. +++static void gpu_lock(void) { +++ pthread_mutex_lock(&gpu_mutex); +++ if (gpu==NULL) { +++ gpu_init(&gpu); +++ } +++} +++ +++static void gpu_unlock(void) { +++ pthread_mutex_unlock(&gpu_mutex); +++} +++ +++// Allocate memory on GPU +++// Fills in structure

containing ARM pointer, videocore handle, videocore memory address, numbytes +++// Returns 0 on success. +++// This allocates memory that will not be cached in ARM's data cache. +++// Therefore safe to use without data cache flushing. +++int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p) { +++ gpu_lock(); +++ p->vc_handle = mem_alloc(gpu->mb, numbytes, 4096, GPU_MEM_FLG); +++ p->vcsm_handle = 0; +++ if (!p->vc_handle) +++ { +++ qpu_enable(gpu->mb, 0); +++ return -3; +++ } +++ p->vc = mem_lock(gpu->mb, p->vc_handle); +++ p->arm = mapmem_shared((p->vc+GPU_MEM_MAP)&~0xc0000000,numbytes); +++ p->numbytes = numbytes; +++ if (p->arm == NULL) +++ { +++ mem_free(gpu->mb, p->vc_handle); +++ mem_unlock(gpu->mb, p->vc_handle); +++ gpu_unlock(); +++ qpu_enable(gpu->mb, 0); +++ return -4; +++ } +++ gpu->open_count++; +++ gpu_unlock(); +++ return 0; +++} +++ +++void gpu_cache_flush(GPU_MEM_PTR_T *p) +++{ +++ // This only works when using RPI_USE_VCSM +++ void *tmp = vcsm_lock(p->vcsm_handle); +++ vcsm_unlock_ptr(tmp); +++} +++ +++// This allocates data that will be +++// Cached in ARM L2 +++// Uncached in VPU L2 +++int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p) { +++ gpu_lock(); +++#ifdef RPI_USE_VCSM +++ { +++ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST, (char *)"Video Frame" ); // f....... locks up for VP9 - retest this? +++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_VC, (char *)"Video Frame" ); // 3b...... works +++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); //fb...... locks up +++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST_AND_VC, (char *)"Video Frame" ); // 3b works (but corrupted due to caching) +++ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); +++ p->arm = vcsm_lock(p->vcsm_handle); +++ p->vc = mem_lock(gpu->mb, p->vc_handle); +++ } +++#else +++ p->vc_handle = mem_alloc(gpu->mb, numbytes, 4096, GPU_MEM_FLG); +++ p->vcsm_handle = 0; +++ if (!p->handle) +++ { +++ qpu_enable(gpu->mb, 0); +++ return -3; +++ } +++ p->vc = mem_lock(gpu->mb, p->vc_handle); +++ printf("This mapmem_private does not seem to work\n"); +++ exit(-1); +++ p->arm = mapmem_private((p->vc+GPU_MEM_MAP)&~0xc0000000,numbytes); +++ p->numbytes = numbytes; +++ if (p->arm == NULL) +++ { +++ mem_free(gpu->mb, p->handle); +++ mem_unlock(gpu->mb, p->handle); +++ gpu_unlock(); +++ qpu_enable(gpu->mb, 0); +++ return -4; +++ } +++#endif +++ gpu->open_count++; +++ gpu_unlock(); +++ return 0; +++} +++ +++static void gpu_term(void) +++{ +++ int mb = gpu->mb; +++ unsigned handle = gpu->vc_handle; +++ if (gpu==NULL) +++ return; +++ unmapmem((void*)gpu, sizeof(struct GPU)); +++ mem_unlock(mb, handle); +++ mem_free(mb, handle); +++ qpu_enable(mb, 0); +++#ifdef RPI_USE_VCSM +++ vcsm_exit(); +++#endif +++ mbox_close(mb); +++ gpu = NULL; +++} +++ +++void gpu_free(GPU_MEM_PTR_T *p) { +++ int mb = gpu->mb; +++ unsigned handle = p->vc_handle; +++ gpu_lock(); +++#ifdef RPI_USE_VCSM +++ if (p->vcsm_handle) { +++ mem_unlock(mb,p->vc_handle); +++ vcsm_unlock_ptr(p->arm); +++ vcsm_free(p->vcsm_handle); +++ } else { +++ unmapmem((void*)p->arm, sizeof(struct GPU)); +++ mem_unlock(mb, handle); +++ mem_free(mb, handle); +++ } +++#else +++ unmapmem((void*)p->arm, sizeof(struct GPU)); +++ mem_unlock(mb, handle); +++ mem_free(mb, handle); +++#endif +++ +++ gpu->open_count--; +++ if (gpu->open_count==0) { +++ printf("Closing GPU\n"); +++ gpu_term(); +++ gpu = NULL; +++ } +++ gpu_unlock(); +++} +++ +++unsigned int vpu_get_fn(void) { +++ // Make sure that the gpu is initialized +++ if (gpu==NULL) { +++ printf("Preparing gpu\n"); +++ gpu_lock(); +++ gpu_unlock(); +++ } +++ return gpu->vc + offsetof(struct GPU,vpu_code); +++} +++ +++unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5) +++{ +++ unsigned r; +++ gpu_lock(); +++ r = execute_code(gpu->mb, code, r0, r1, r2, r3, r4, r5); +++ gpu_unlock(); +++ return r; +++} +++ +++// Run a program on a QPU with the given code and uniform stream (given in GPU addresses) +++// The first num QPUs will start at code, the next num2 QPUs will start at code2 +++void qpu_run_shader12(int code, int num, int code2, int num2, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8, int unifs9, int unifs10, int unifs11, int unifs12) +++{ +++ int i; +++#ifdef RPI_TIME_TOTAL_QPU +++ static int last_time=0; +++ static long long on_time=0; +++ static long long off_time=0; +++ int start_time; +++ int end_time; +++ static int count=0; +++#endif +++ +++ gpu_lock(); +++#ifdef RPI_TIME_TOTAL_QPU +++ start_time = Microseconds(); +++ if (last_time==0) +++ last_time = start_time; +++ off_time += start_time-last_time; +++#endif +++ for(i=0;imail[i*2 + 1] = code; +++ } +++ for(;imail[i*2 + 1] = code2; +++ } +++ gpu->mail[0 ] = unifs1; +++ gpu->mail[2 ] = unifs2; +++ gpu->mail[4 ] = unifs3; +++ gpu->mail[6 ] = unifs4; +++ gpu->mail[8 ] = unifs5; +++ gpu->mail[10] = unifs6; +++ gpu->mail[12] = unifs7; +++ gpu->mail[14] = unifs8; +++ gpu->mail[16] = unifs9; +++ gpu->mail[18] = unifs10; +++ gpu->mail[20] = unifs11; +++ gpu->mail[22] = unifs12; +++ execute_qpu( +++ gpu->mb, +++ 12 /* Number of QPUs */, +++ gpu->vc + offsetof(struct GPU, mail), +++ 1 /* no flush */, // Don't flush VPU L1 cache +++ 5000 /* timeout ms */); +++#ifdef RPI_TIME_TOTAL_QPU +++ end_time = Microseconds(); +++ last_time = end_time; +++ on_time += end_time - start_time; +++ count++; +++ if ((count&0x7f)==0) +++ printf("On=%dms, Off=%dms\n",(int)(on_time/1000),(int)(off_time/1000)); +++#endif +++ gpu_unlock(); +++} +++ +++unsigned int qpu_get_fn(int num) { +++ // Make sure that the gpu is initialized +++ unsigned int *fn; +++ if (gpu==NULL) { +++ printf("Preparing gpu\n"); +++ gpu_lock(); +++ gpu_unlock(); +++ } +++ switch(num) { +++ case QPU_MC_SETUP: +++ fn = mc_setup; +++ break; +++ case QPU_MC_FILTER: +++ fn = mc_filter; +++ break; +++ case QPU_MC_EXIT: +++ fn = mc_exit; +++ break; +++ case QPU_MC_INTERRUPT_EXIT: +++ fn = mc_interrupt_exit; +++ break; +++ case QPU_MC_FILTER_B: +++ fn = mc_filter_b; +++ break; +++ case QPU_MC_FILTER_HONLY: +++ fn = mc_filter_honly; +++ break; +++ case QPU_MC_SETUP_UV: +++ fn = mc_setup_uv; +++ break; +++ case QPU_MC_FILTER_UV: +++ fn = mc_filter_uv; +++ break; +++ case QPU_MC_FILTER_UV_B: +++ fn = mc_filter_uv_b; +++ break; +++ case QPU_MC_END: +++ fn = mc_end; +++ break; +++ default: +++ printf("Unknown function\n"); +++ exit(-1); +++ } +++ return gpu->vc + 4*(int)(fn-rpi_shader); +++ //return code[num] + gpu->vc; +++} +++ +++#if 0 +++ +++int32_t hcoeffs[] = {-4, 10, -21, 70, 90, -24, 11, -4}; +++//int32_t hcoeffs[] = {1, 1, 1, 1, 1, 1, 1, 1}; +++int32_t vcoeffs[] = {-2, 6, -13, 37, 115, -20, 9, -4}; +++//int32_t vcoeffs[] = {1, 1, 1, 1, 1, 1, 1, 1}; +++ +++#define ENCODE_COEFFS(c0, c1, c2, c3) (((c0-1) & 0xff) | ((c1-1) & 0xff) << 8 | ((c2-1) & 0xff) << 16 | ((c3-1) & 0xff) << 24); +++ +++static uint8_t av_clip_uint8(int32_t a) +++{ +++ if (a&(~255)) return (-a)>>31; +++ else return a; +++} +++ +++static int32_t filter8(const uint8_t *data, int pitch) +++{ +++ int32_t vsum = 0; +++ int x, y; +++ +++ for (y = 0; y < 8; y++) { +++ int32_t hsum = 0; +++ +++ for (x = 0; x < 8; x++) +++ hsum += hcoeffs[x]*data[x + y * pitch]; +++ +++ vsum += vcoeffs[y]*av_clip_uint8( (hsum + 64) >> 7); // Added brackets to stop compiler warning +++ } +++ +++ return av_clip_uint8( (vsum + 64) >> 7); +++} +++ +++// Note regression changes coefficients so is not thread safe +++//#define REGRESSION +++#ifdef REGRESSION +++#define CMAX 100 +++#else +++#define CMAX 2 +++#endif +++#define YMAX 16 +++ +++int rpi_test_shader(void) +++{ +++ int i, c; +++ +++ uint32_t *unifs; +++ +++ uint8_t *in_buffer; +++ uint8_t *out_buffer[2]; +++ +++ GPU_MEM_PTR_T unifs_ptr; +++ GPU_MEM_PTR_T in_buffer_ptr; +++ GPU_MEM_PTR_T out_buffer_ptr[2]; +++ +++ // Addresses in GPU memory of filter programs +++ uint32_t mc_setup = 0; +++ uint32_t mc_filter = 0; +++ uint32_t mc_exit = 0; +++ +++ int pitch = 0x500; +++ +++ if (gpu==NULL) { +++ gpu_lock(); +++ gpu_unlock(); +++ } +++ +++ printf("This needs to change to reflect new assembler\n"); +++ // Use table to compute locations of program start points +++ mc_setup = code[0] + gpu->vc; +++ mc_filter = code[1] + gpu->vc; +++ mc_exit = code[2] + gpu->vc; +++ +++ if (!vcos_verify(gpu_malloc_uncached(4*64,&unifs_ptr))) { +++ return -2; +++ } +++ unifs = (uint32_t*)unifs_ptr.arm; +++ +++ if (!vcos_verify(gpu_malloc_uncached(64*23,&in_buffer_ptr))) { +++ return -3; +++ } +++ in_buffer = (uint8_t*)in_buffer_ptr.arm; +++ +++ if (!vcos_verify(gpu_malloc_uncached(16*pitch,&out_buffer_ptr[0])) || !vcos_verify(gpu_malloc_uncached(16*pitch,&out_buffer_ptr[1]))) { +++ return -4; +++ } +++ out_buffer[0] = (uint8_t*)out_buffer_ptr[0].arm; +++ out_buffer[1] = (uint8_t*)out_buffer_ptr[1].arm; +++ +++ for (c = 0; c < CMAX; c++) { +++ int xo[] = {rand()&31, rand()&31}; +++ +++#ifdef REGRESSION +++ for (i = 0; i < 8; i++) { +++ hcoeffs[i] = (int8_t)rand(); +++ vcoeffs[i] = (int8_t)rand(); +++ if (hcoeffs[i]==-128) +++ hcoeffs[i]++; +++ if (vcoeffs[i]==-128) +++ vcoeffs[i]++; +++ } +++#endif +++ +++ for (i = 0; i < 64*23; i++) { +++ //printf("%d %d %p\n",i,gpu->mb,&in_buffer[i]); +++ in_buffer[i] = rand(); +++ } +++ +++ // Clear output array +++ { +++ int b; +++ for(b=0;b<2;b++) { +++ for(i=0;i<16*16;i++) { +++ out_buffer[b][i] = 3; +++ } +++ } +++ } +++ +++ unifs[0] = mc_filter; +++ unifs[1] = in_buffer_ptr.vc+xo[0]+16; +++ unifs[2] = 64; // src pitch +++ unifs[3] = pitch; // dst pitch +++ unifs[4] = 0; // Padding +++ unifs[5] = 0; +++ unifs[6] = 0; +++ unifs[7 ] = mc_filter; +++ unifs[8 ] = in_buffer_ptr.vc+xo[1]+16; +++ unifs[9 ] = ENCODE_COEFFS(hcoeffs[0], hcoeffs[1], hcoeffs[2], hcoeffs[3]); +++ unifs[10] = ENCODE_COEFFS(hcoeffs[4], hcoeffs[5], hcoeffs[6], hcoeffs[7]); +++ unifs[11] = ENCODE_COEFFS(vcoeffs[0], vcoeffs[1], vcoeffs[2], vcoeffs[3]); +++ unifs[12] = ENCODE_COEFFS(vcoeffs[4], vcoeffs[5], vcoeffs[6], vcoeffs[7]); +++ unifs[13] = out_buffer_ptr[0].vc; +++ unifs[14] = mc_exit; +++ unifs[15] = in_buffer_ptr.vc+xo[1]+16; // dummy +++ unifs[16] = ENCODE_COEFFS(hcoeffs[0], hcoeffs[1], hcoeffs[2], hcoeffs[3]); +++ unifs[17] = ENCODE_COEFFS(hcoeffs[4], hcoeffs[5], hcoeffs[6], hcoeffs[7]); +++ unifs[18] = ENCODE_COEFFS(vcoeffs[0], vcoeffs[1], vcoeffs[2], vcoeffs[3]); +++ unifs[19] = ENCODE_COEFFS(vcoeffs[4], vcoeffs[5], vcoeffs[6], vcoeffs[7]); +++ unifs[20] = out_buffer_ptr[1].vc; +++ +++ printf("Gpu->vc=%x Code=%x dst=%x\n",gpu->vc, mc_filter,out_buffer_ptr[1].vc); +++ +++ // flush_dcache(); TODO is this needed on ARM side? - tried to use the direct alias to avoid this problem +++ +++ //qpu_run_shader(mc_setup, unifs_ptr.vc); +++ //qpu_run_shader(gpu, gpu->vc, unifs_ptr.vc); +++ rpi_do_block(in_buffer_ptr.vc+xo[0]+16, 64, out_buffer_ptr[0].vc, pitch,out_buffer[0]); +++ rpi_do_block(in_buffer_ptr.vc+xo[1]+16, 64, out_buffer_ptr[1].vc, pitch,out_buffer[1]); +++ +++ if (1) +++ { +++ int x, y, b; +++ int bad = 0; +++ +++ for (b=0; b<2; ++b) +++ for (y=0; yvc; +++ mc_filter = code[1] + gpu->vc; +++ mc_exit = code[2] + gpu->vc; +++ +++ if (!vcos_verify(gpu_malloc_uncached(4*64,&unifs_ptr))) { +++ return; +++ } +++ //gpu_malloc_uncached(16*dst_pitch,&out_buffer_ptr); +++ //out_buffer = (uint8_t*)out_buffer_ptr.arm; +++ +++ /*for (y=0; y<16; ++y) { +++ for (x=0; x<16; ++x) { +++ out_buffer[x+y*dst_pitch] = 7; +++ } +++ }*/ +++ +++ unifs = (uint32_t*)unifs_ptr.arm; +++ +++ unifs[0] = mc_filter; +++ unifs[1] = (int)in_buffer_vc; +++ unifs[2] = src_pitch; // src pitch +++ unifs[3] = dst_pitch; // dst pitch +++ unifs[4] = 0; // Padding +++ unifs[5] = 0; +++ unifs[6] = 0; +++ unifs[7 ] = mc_exit; +++ unifs[8 ] = (int)in_buffer_vc; +++ unifs[9 ] = ENCODE_COEFFS(hcoeffs[0], hcoeffs[1], hcoeffs[2], hcoeffs[3]); +++ unifs[10] = ENCODE_COEFFS(hcoeffs[4], hcoeffs[5], hcoeffs[6], hcoeffs[7]); +++ unifs[11] = ENCODE_COEFFS(vcoeffs[0], vcoeffs[1], vcoeffs[2], vcoeffs[3]); +++ unifs[12] = ENCODE_COEFFS(vcoeffs[4], vcoeffs[5], vcoeffs[6], vcoeffs[7]); +++ unifs[13] = (int)dst_vc; +++ //unifs[13] = (int)out_buffer_ptr.vc; +++ +++ //printf("Gpu->vc=%x Code=%x dst=%x\n",gpu->vc, mc_filter,out_buffer_ptr[1].vc); +++ +++ qpu_run_shader(mc_setup, unifs_ptr.vc); +++ +++ /*for (y=0; y<16; ++y) { +++ for (x=0; x<16; ++x) { +++ dst[x+y*dst_pitch] = out_buffer[x+y*dst_pitch]; +++ } +++ }*/ +++ +++ gpu_free(&unifs_ptr); +++ //gpu_free(&out_buffer_ptr); +++} +++ +++ +++#endif +++ +++#endif // RPI ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++new file mode 100644 ++index 0000000..4e3c35c ++--- /dev/null +++++ b/libavcodec/rpi_qpu.h ++@@ -0,0 +1,45 @@ +++#ifndef RPI_QPU_H +++#define RPI_QPU_H +++ +++typedef struct gpu_mem_ptr_s { +++ unsigned char *arm; // Pointer to memory mapped on ARM side +++ int vc_handle; // Videocore handle of relocatable memory +++ int vcsm_handle; // Handle for use by VCSM +++ int vc; // Address for use in GPU code +++ int numbytes; // Size of memory block +++} GPU_MEM_PTR_T; +++ +++// General GPU functions +++extern int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p); +++extern int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p); +++extern void gpu_free(GPU_MEM_PTR_T *p); +++extern void gpu_cache_flush(GPU_MEM_PTR_T *p); +++ +++// QPU specific functions +++extern void qpu_run_shader12(int code, int num, int code2, int num2, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8, int unifs9, int unifs10, int unifs11, int unifs12); +++ +++enum { +++ QPU_MC_SETUP, +++ QPU_MC_FILTER, +++ QPU_MC_EXIT, +++ QPU_MC_INTERRUPT_EXIT, +++ QPU_MC_FILTER_B, +++ QPU_MC_FILTER_HONLY, +++ QPU_MC_SETUP_UV, +++ QPU_MC_FILTER_UV, +++ QPU_MC_FILTER_UV_B, +++ QPU_MC_END +++ }; +++extern unsigned int qpu_get_fn(int num); +++ +++// VPU specific functions +++extern unsigned int vpu_get_fn(void); +++extern unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5); +++ +++// Simple test of shader code +++extern int rpi_test_shader(void); +++ +++extern void rpi_do_block(const unsigned char *in_buffer_vc, int src_pitch, unsigned char *dst_vc, int dst_pitch, unsigned char *dst); +++extern void rpi_do_block_arm(const unsigned char *in_buffer, int src_pitch, unsigned char *dst, int dst_pitch); +++ +++#endif ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++new file mode 100644 ++index 0000000..41cc2e1 ++--- /dev/null +++++ b/libavcodec/rpi_shader.c ++@@ -0,0 +1,818 @@ +++#include "rpi_shader.h" +++ +++#ifdef _MSC_VER +++ #include +++ /* cast through uintptr_t to avoid warnings */ +++ #define POINTER_TO_UINT(X) ((unsigned int)(uintptr_t)(X)) +++#else +++ #define POINTER_TO_UINT(X) ((unsigned int)(X)) +++#endif +++ +++#ifdef __cplusplus +++extern "C" { /* the types are probably wrong... */ +++#endif +++#ifdef __cplusplus +++} +++#endif +++ +++#ifdef _MSC_VER +++__declspec(align(8)) +++#elif defined(__GNUC__) +++__attribute__((aligned(8))) +++#endif +++unsigned int rpi_shader[] = { +++// ::mc_setup +++/* [0x00000000] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000008] */ 0x0c9a0f80, 0x10020427, // add ra_x_base, unif, elem_num +++/* [0x00000010] */ 0x15827d80, 0x10020767, // mov ra_y, unif +++/* [0x00000018] */ 0x15827d80, 0x10020627, // mov ra_x2_base, unif +++/* [0x00000020] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 +++/* [0x00000028] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 +++/* [0x00000030] */ 0x15827d80, 0x10021427, // mov rb16, unif +++/* [0x00000038] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000040] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) +++/* [0x00000048] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 +++/* [0x00000050] */ 0x00000001, 0xe0020527, // mov ra20, 1 +++/* [0x00000058] */ 0x00000040, 0xe0020567, // mov ra21, 64 +++/* [0x00000060] */ 0x00000100, 0xe00205a7, // mov ra22, 256 +++/* [0x00000068] */ 0x00000008, 0xe00205e7, // mov ra23, 8 +++/* [0x00000070] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x00000078] */ 0x00000040, 0xe0021567, // mov rb21, 64 +++/* [0x00000080] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x00000088] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x00000090] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x00000098] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x000000a0] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x000000a8] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x000000b0] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x000000b8] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x000000c0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x000000c8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x000000d0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x000000d8] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x000000e0] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x000000e8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x000000f0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x000000f8] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000100] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000108] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000110] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00000118] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00000120] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00000128] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000130] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00000138] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000140] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000148] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000150] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000158] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000160] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000168] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000170] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000178] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base +++/* [0x00000180] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 +++/* [0x00000188] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x00000190] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base +++/* [0x00000198] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000001a0] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 +++/* [0x000001a8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x000001b0] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 +++/* [0x000001b8] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 +++/* [0x000001c0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x000001c8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x000001d0] */ 0x4c9d00cf, 0x10024821, // add r0, r0, r3; mul24 r1, r1, rb_pitch +++/* [0x000001d8] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 +++/* [0x000001e0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000001e8] */ 0x949dc5c0, 0xd0025890, // and r2, r2, ~3; mov ra_x_base, r0 +++/* [0x000001f0] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 +++/* [0x000001f8] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 +++/* [0x00000200] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000208] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000210] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000218] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000220] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000228] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000230] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000238] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base +++/* [0x00000240] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++// ::mc_filter_uv +++/* [0x00000248] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000250] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000258] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000260] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000268] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000270] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000278] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000280] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000288] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000290] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000298] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000002a0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000002a8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000002b0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002b8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000002c0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000002c8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000002d0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x000002d8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x000002e0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000002e8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000002f0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000002f8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000300] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000308] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000310] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000318] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000320] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000328] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000330] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000340] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000348] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000350] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000358] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000360] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000368] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000370] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000378] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000380] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000388] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000390] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000398] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003a0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :uvloop +++/* [0x000003a8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000003b0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000003b8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000003c0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003c8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003d0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003d8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003e0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003e8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000003f0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000003f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000400] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000408] */ 0x40038031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 +++/* [0x00000410] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000418] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000420] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000428] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000430] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000438] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000440] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000448] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000450] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000458] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000460] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000468] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000470] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000478] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000480] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x00000488] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000490] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000498] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x000004a0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x000004a8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x000004b0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x000004b8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000004c0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x000004c8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000004d0] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x000004d8] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x000004e0] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x000004e8] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x000004f0] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x000004f8] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000500] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000508] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000510] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000518] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000520] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000528] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x00000530] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000538] */ 0xfffffe50, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000540] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x00000548] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000550] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000558] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000560] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000568] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000570] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000578] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000580] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000588] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000590] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000598] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_filter +++/* [0x000005a0] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000005a8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000005b0] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next +++/* [0x000005b8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000005c0] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 +++/* [0x000005c8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000005d0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif +++/* [0x000005d8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000005e0] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 +++/* [0x000005e8] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 +++/* [0x000005f0] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 +++/* [0x000005f8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000600] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 +++/* [0x00000608] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000610] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 +++/* [0x00000618] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000620] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000628] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000630] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000638] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000640] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000648] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000650] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000658] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000660] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000668] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000670] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000678] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000680] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000688] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000690] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000698] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006a0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006a8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000006b0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006b8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006c0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006c8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000006d0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006d8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006e0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006e8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000006f0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006f8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000700] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000708] */ 0x000001d0, 0xf07809e7, // brr.anynn -, r:fast_path +++/* [0x00000710] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000718] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000720] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000728] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :loop +++/* [0x00000730] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000738] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000740] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000748] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000750] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 +++/* [0x00000758] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000760] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000768] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000770] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000778] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000780] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000788] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000790] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000798] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000007a0] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000007a8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000007b0] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000007b8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000007c0] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x000007c8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x000007d0] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x000007d8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x000007e0] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x000007e8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x000007f0] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x000007f8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000800] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x00000808] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000810] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000818] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000820] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000828] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000830] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000838] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000840] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000848] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:loop +++/* [0x00000850] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x00000858] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x00000860] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x00000868] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x00000870] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x00000878] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000880] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000888] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000890] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000898] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x000008a0] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x000008a8] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x000008b0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000008b8] */ 0xfffffe58, 0xf06809e7, // brr.anyn -, r:loop +++/* [0x000008c0] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x000008c8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000008d0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000008d8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000008e0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000008e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000008f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// :fast_path +++/* [0x000008f8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :fast_loop +++/* [0x00000900] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000908] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000910] */ 0x95727d9b, 0x1004475f, // mov.ifz ra_y, ra_y_next ; mov rb31, r3 +++/* [0x00000918] */ 0x95690dbf, 0x10044623, // mov.ifz ra_x2_base, ra_x2_base_next ; mov r3, rb_pitch +++/* [0x00000920] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000928] */ 0x929de5e4, 0x100248a1, // min r2, r2, rb_frame_height_minus_1 ; mov r1, r4 +++/* [0x00000930] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000938] */ 0xec414c87, 0x10024e20, // add t0s, ra_x_base, r2 ; v8subs r0, r0, rb20 +++/* [0x00000940] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000948] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000950] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000958] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000960] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000968] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000970] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000978] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000980] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000988] */ 0x8d9df4ff, 0x10024823, // sub r0, r2, r3 ; mov r3, rb31 +++/* [0x00000990] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000998] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x000009a0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x000009a8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x000009b0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x000009b8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000009c0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x000009c8] */ 0xffffff18, 0xf06809e7, // brr.anyn -, r:fast_loop +++/* [0x000009d0] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x000009d8] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x000009e0] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x000009e8] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x000009f0] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x000009f8] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000a00] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000a08] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000a10] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000a18] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000a20] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000a28] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x00000a30] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000a38] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:fast_loop +++/* [0x00000a40] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x00000a48] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000a50] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000a58] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000a60] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000a68] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a70] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_filter_b +++/* [0x00000a78] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000a80] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000a88] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next +++/* [0x00000a90] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000a98] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 +++/* [0x00000aa0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000aa8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif +++/* [0x00000ab0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000ab8] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 +++/* [0x00000ac0] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 +++/* [0x00000ac8] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 +++/* [0x00000ad0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000ad8] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 +++/* [0x00000ae0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000ae8] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 +++/* [0x00000af0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000af8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000b00] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000b08] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000b10] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000b18] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000b20] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000b28] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000b30] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000b38] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000b40] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000b48] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000b50] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000b58] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000b60] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000b68] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000b70] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 +++/* [0x00000b78] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000b80] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000b88] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000b90] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000b98] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000ba0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ba8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bb0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bb8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000bc0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bc8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bd0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bd8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000be0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000be8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bf0] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bf8] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000c00] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000c08] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000c10] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :bloop +++/* [0x00000c18] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000c20] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000c28] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000c30] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000c38] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 +++/* [0x00000c40] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000c48] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000c50] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000c58] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000c60] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000c68] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000c70] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000c78] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000c80] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000c88] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000c90] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000c98] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000ca0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000ca8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000cb0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000cb8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000cc0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000cc8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000cd0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000cd8] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000ce0] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000ce8] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x00000cf0] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000cf8] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000d00] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000d08] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000d10] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000d18] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000d20] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000d28] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000d30] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:bloop +++/* [0x00000d38] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x00000d40] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x00000d48] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x00000d50] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x00000d58] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x00000d60] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000d68] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000d70] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000d78] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000d80] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000d88] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000d90] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x00000d98] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000da0] */ 0x8fc8f3f6, 0xd0020867, // asr r1, r1, 15 ; mov -, vr_wait +++/* [0x00000da8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000db0] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 +++/* [0x00000db8] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:bloop +++/* [0x00000dc0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000dc8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x00000dd0] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 +++/* [0x00000dd8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000de0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000de8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000df0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_filter_honly +++/* [0x00000df8] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000e00] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000e08] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next +++/* [0x00000e10] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000e18] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 +++/* [0x00000e20] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000e28] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif +++/* [0x00000e30] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000e38] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 +++/* [0x00000e40] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 +++/* [0x00000e48] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 +++/* [0x00000e50] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000e58] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 +++/* [0x00000e60] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000e68] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 +++/* [0x00000e70] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000e78] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000e80] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000e88] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000e90] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000e98] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000ea0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000ea8] */ 0x0c9de1c0, 0xd0021467, // add rb17, r0, -2 +++/* [0x00000eb0] */ 0x919c71c0, 0xd0024812, // shl r0, r0, 7 ; mov rb18,r0 +++/* [0x00000eb8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000ec0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000ec8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000ed0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000ed8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ee0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ee8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ef0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000ef8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f00] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f08] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f10] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000f18] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000f20] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000f28] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000f30] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :loop_honly +++/* [0x00000f38] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000f40] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000f48] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000f50] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000f58] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 +++/* [0x00000f60] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000f68] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000f70] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000f78] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000f80] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000f88] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000f90] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000f98] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000fa0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000fa8] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000fb0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000fb8] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000fc0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000fc8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000fd0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000fd8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000fe0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000fe8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000ff0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000ff8] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00001000] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00001008] */ 0x8d9df4ff, 0x10024823, // sub r0, r2, r3 ; mov r3, rb31 +++/* [0x00001010] */ 0x8d5927f6, 0x100269e1, // sub.setf -, r3, rb18 ; mov r1, ra22 +++/* [0x00001018] */ 0x559f2fc1, 0x100049e0, // mov -, vw_wait ; mul24 r0, r0, r1 +++/* [0x00001020] */ 0xfffffef8, 0xf06809e7, // brr.anyn -, r:loop_honly +++/* [0x00001028] */ 0x0f9cf1c0, 0xd0020827, // asr r0, r0, 15 +++/* [0x00001030] */ 0x129d61c0, 0x10020827, // min r0, r0, rb22 +++/* [0x00001038] */ 0x139c01c0, 0xd0020c27, // max vpm, r0, 0 +++/* [0x00001040] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001048] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00001050] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001058] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_exit +++/* [0x00001060] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00001068] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00001070] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001078] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001080] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001088] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001090] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00001098] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000010a0] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_exit1 +++/* [0x000010a8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000010b0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010b8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010c0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010c8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010d0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000010d8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000010e0] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_interrupt_exit +++/* [0x000010e8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000010f0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010f8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001100] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001108] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001110] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001118] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001120] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001128] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001130] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001138] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001140] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001148] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001150] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001158] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001160] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001168] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00001170] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00001178] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_interrupt_exit4 +++/* [0x00001180] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00001188] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001190] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001198] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000011a0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000011a8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000011b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000011b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000011c0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000011c8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000011d0] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_interrupt_exit8 +++/* [0x000011d8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000011e0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000011e8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000011f0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000011f8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001200] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001208] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001210] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001218] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001220] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001228] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001230] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001238] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00001240] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00001248] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_setup_uv +++/* [0x00001250] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00001258] */ 0x0c9a0f80, 0x10020427, // add ra_x_base, unif, elem_num +++/* [0x00001260] */ 0x15827d80, 0x10020767, // mov ra_y, unif +++/* [0x00001268] */ 0x15827d80, 0x10020627, // mov ra_x2_base, unif +++/* [0x00001270] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00001278] */ 0x0d620f80, 0x10020667, // sub ra_u2v_ref_offset, unif, ra_x2_base +++/* [0x00001280] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 +++/* [0x00001288] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 +++/* [0x00001290] */ 0x15827d80, 0x10021427, // mov rb16, unif +++/* [0x00001298] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000012a0] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) +++/* [0x000012a8] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 +++/* [0x000012b0] */ 0x00000001, 0xe0020527, // mov ra20, 1 +++/* [0x000012b8] */ 0x00000040, 0xe0020567, // mov ra21, 64 +++/* [0x000012c0] */ 0x00000100, 0xe00205a7, // mov ra22, 256 +++/* [0x000012c8] */ 0x00000008, 0xe00205e7, // mov ra23, 8 +++/* [0x000012d0] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x000012d8] */ 0x00000040, 0xe0021567, // mov rb21, 64 +++/* [0x000012e0] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x000012e8] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x000012f0] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x000012f8] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x00001300] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x00001308] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x00001310] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x00001318] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x00001320] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x00001328] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x00001330] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00001338] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00001340] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00001348] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00001350] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00001358] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00001360] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00001368] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00001370] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00001378] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00001380] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00001388] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00001390] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00001398] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x000013a0] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x000013a8] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x000013b0] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x000013b8] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x000013c0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000013c8] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x000013d0] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x000013d8] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base +++/* [0x000013e0] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x000013e8] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base +++/* [0x000013f0] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset +++/* [0x000013f8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x00001400] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00001408] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00001410] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 +++/* [0x00001418] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00001420] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++/* [0x00001428] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 +++/* [0x00001430] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 +++/* [0x00001438] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001440] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001448] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001450] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00001458] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00001460] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00001468] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001470] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00001478] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base +++/* [0x00001480] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++// ::mc_filter_uv_b +++/* [0x00001488] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00001490] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00001498] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000014a0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000014a8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000014b0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000014b8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000014c0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000014c8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000014d0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000014d8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000014e0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000014e8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000014f0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000014f8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00001500] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00001508] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00001510] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00001518] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00001520] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00001528] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00001530] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00001538] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00001540] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00001548] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00001550] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00001558] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 +++/* [0x00001560] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00001568] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001570] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001578] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001580] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001588] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00001590] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001598] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015a0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015a8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000015b0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015b8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015c0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015c8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000015d0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015d8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015e0] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015e8] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x000015f0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000015f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00001600] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :uvloop_b +++/* [0x00001608] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00001610] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00001618] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00001620] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00001628] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00001630] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00001638] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00001640] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00001648] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00001650] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00001658] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00001660] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00001668] */ 0x40038031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 +++/* [0x00001670] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00001678] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00001680] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00001688] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00001690] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00001698] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000016a0] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x000016a8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x000016b0] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x000016b8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x000016c0] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x000016c8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x000016d0] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x000016d8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x000016e0] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x000016e8] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x000016f0] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x000016f8] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00001700] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00001708] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00001710] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00001718] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00001720] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00001728] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00001730] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x00001738] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x00001740] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x00001748] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x00001750] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x00001758] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00001760] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00001768] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00001770] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00001778] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00001780] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00001788] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x00001790] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00001798] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x000017a0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000017a8] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 +++/* [0x000017b0] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000017b8] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x000017c0] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x000017c8] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 +++/* [0x000017d0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000017d8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000017e0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000017e8] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000017f0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000017f8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001800] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00001808] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001810] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_end +++}; +++#ifdef __HIGHC__ +++#pragma Align_to(8, rpi_shader) +++#endif ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++new file mode 100644 ++index 0000000..db971f4 ++--- /dev/null +++++ b/libavcodec/rpi_shader.h ++@@ -0,0 +1,20 @@ +++#ifndef rpi_shader_H +++#define rpi_shader_H +++ +++extern unsigned int rpi_shader[]; +++ +++#define mc_setup (rpi_shader + 0) +++#define mc_filter_uv (rpi_shader + 146) +++#define mc_filter (rpi_shader + 360) +++#define mc_filter_b (rpi_shader + 670) +++#define mc_filter_honly (rpi_shader + 894) +++#define mc_exit (rpi_shader + 1048) +++#define mc_exit1 (rpi_shader + 1066) +++#define mc_interrupt_exit (rpi_shader + 1082) +++#define mc_interrupt_exit4 (rpi_shader + 1120) +++#define mc_interrupt_exit8 (rpi_shader + 1142) +++#define mc_setup_uv (rpi_shader + 1172) +++#define mc_filter_uv_b (rpi_shader + 1314) +++#define mc_end (rpi_shader + 1542) +++ +++#endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++new file mode 100644 ++index 0000000..6851e83 ++--- /dev/null +++++ b/libavcodec/rpi_shader.qasm ++@@ -0,0 +1,1413 @@ +++# register allocation +++# +++# ra0...ra7 eight horizontal filter coefficients +++# +++# rb1...rb7 seven shifted copies of the current unfiltered row +++# +++# ra8...ra15 eight filtered rows of context (rb15 == most recent) +++# +++# (ra15 isn't clamped to zero - this happens during the +++# copy to ra14, and during its use in the vertical filter) +++# +++# rb8...rb15 eight vertical filter coefficients +++# +++# ra16 clipped(row start address+elem_num)&~3 +++# ra17 per-channel shifts +++# ra19 next ra17 +++# +++# rb16 pitch +++# rb17 height + 5 +++# rb18 height + 7 +++# rb19 next ra16 +++# +++# ra20 1 +++# ra21 64 +++# ra22 256 +++# ra23 8 +++# +++# rb20 0xffffff00 +++# rb21 64 +++# rb22 255 +++# rb23 24 +++# +++# rb24 vdw_setup_1(dst_pitch) +++# rb25 frame width-1 +++# rb26 height<<23 + width<<16 + vdw_setup_0 +++# rb27 vdw_setup_0 (depends on QPU number) +++# rb28 vpm_setup (depends on QPU number) +++# rb29 vdw_setup_1(dst_pitch-width) +++# rb30 frame height-1 +++# rb31 used as temp to count loop iterations +++# +++# ra24...ra30 15, 14, 13, 12, 11, 10, 9 +++# ra24 clipped(row start address+8+elem_num)&~3 +++# ra25 per-channel shifts 2 +++# ra26 next ra24 +++# ra27 next ra25 +++# ra28 next y +++# ra29 y for next texture access +++# +++# ra31 next kernel address +++ +++.set rb_frame_width_minus_1, rb25 +++.set rb_frame_height_minus_1, rb30 +++.set rb_pitch, rb16 +++.set ra_x_base, ra16 +++.set rb_x_base_next, rb19 +++.set ra_x2_base, ra24 +++.set ra_x2_base_next, ra26 +++.set ra_xshift, ra17 +++ +++.set ra_x2shift, ra25 +++.set ra_u2v_ref_offset, ra25 +++ +++.set ra_xshift_next, ra19 +++ +++.set ra_x2shift_next, ra27 +++.set ra_u2v_dst_offset, ra27 +++ +++.set ra_y_next, ra28 +++.set ra_y, ra29 +++ +++.set rb_const_64, rb21 +++ +++# mc_setup(next_kernel, x, y, ref_base, frame_width, frame_height, pitch, dst_pitch, pad0, pad1) +++::mc_setup +++ +++# Read starting kernel +++mov ra31, unif +++ +++# Load first request location +++add ra_x_base, unif, elem_num # Store x +++mov ra_y, unif # Store y +++mov ra_x2_base, unif # Store frame base +++ +++# Read image dimensions +++sub rb25,unif,1 +++sub rb30,unif,1 +++ +++# get source pitch +++mov rb16, unif +++ +++# get destination pitch +++mov r0, unif +++mov r1, vdw_setup_1(0) +++add rb24, r1, r0 +++ +++# load constants +++ +++mov ra20, 1 +++mov ra21, 64 +++mov ra22, 256 +++mov ra23, 8 +++ +++mov rb20, 0xffffff00 +++mov rb21, 64 +++mov rb22, 255 +++mov rb23, 24 +++ +++# touch vertical context to keep simulator happy +++ +++mov ra8, 0 +++mov ra9, 0 +++mov ra10, 0 +++mov ra11, 0 +++mov ra12, 0 +++mov ra13, 0 +++mov ra14, 0 +++mov ra15, 0 +++ +++# Compute part of VPM to use for DMA output +++mov r2, qpu_num +++and r2, r2, 15 +++mov r1, r2 +++asr r1, r1, 2 +++shl r1, r1, 6 +++mov r0, r2 +++and r0, r0, 3 +++add r0, r0, r1 +++mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later +++shl r0, r0, 5 +++add rb27, r0, r1 +++ +++# Compute part of VPM to save data into +++mov r2, qpu_num +++and r2, r2, 15 +++mov r1, r2 +++asr r1, r1, 2 +++shl r1, r1, 6 +++mov r0, r2 +++and r0, r0, 3 +++add r0, r0, r1 +++mov r1, vpm_setup(0, 4, h8p(0, 0)) +++add rb28, r0, r1 +++ +++# Compute base address for first and second access +++#add r0, unif, elem_num # x +++mov r0, ra_x_base # Load x +++add r2, r0, 8 # x+8 +++max r0, r0, 0; mov r1, ra_y # Load y +++min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base # Load the frame base +++shl ra_xshift_next, r0, 3 +++max r2, r2, 0 +++add ra_y, r1, 1 +++min r2, r2, rb_frame_width_minus_1 +++shl ra_x2shift_next, r2, 3 +++max r1, r1, 0 # y +++min r1, r1, rb_frame_height_minus_1 +++add r0, r0, r3; mul24 r1, r1, rb_pitch +++add r2, r2, r3 +++and r0, r0, ~3 +++and r2, r2, ~3; mov ra_x_base, r0 +++# submit texture requests for first line +++add t0s, r0, r1 ; mov ra_x2_base, r2 +++add t0s, r2, r1 +++ +++# Dump padding words +++mov r0, unif +++mov r0, unif +++ +++# submit texture requests for second line +++max r1, ra_y, 0 +++min r1, r1, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 +++bra -, ra31 +++nop ; mul24 r1, r1, rb_pitch +++add t0s, r1, ra_x_base +++add t0s, r1, ra_x2_base +++ +++################################################################################ +++ +++# mc_filter_uv(next_kernel, x, y, frame_u_base, frame_v_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_u_dst, this_v_dst) +++ +++# At this point we have already issued two pairs of texture requests for the current block +++# ra_x_base, ra_x16_base point to the current coordinates for this block +++::mc_filter_uv +++mov ra31, unif +++ +++# per-channel shifts were calculated on the *previous* invocation +++ +++mov ra_xshift, ra_xshift_next +++ +++# get base addresses and per-channel shifts for *next* invocation +++add r0, unif, elem_num # x +++max r0, r0, 0; mov r1, unif # y +++min r0, r0, rb_frame_width_minus_1 ; mov r3, unif # frame_base +++shl ra_xshift_next, r0, 3 +++sub r2, unif, r3 # compute offset from frame base u to frame base v +++add r0, r0, r3 +++and rb_x_base_next, r0, ~3 +++mov ra_y_next, r1 +++add ra_x2_base_next, rb_x_base_next, r2 +++ +++# set up VPM write +++mov vw_setup, rb28 +++ +++# get width,height of block +++mov r2, 16 +++mov r0, unif +++shr r1, r0, r2 # Extract width +++sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++and r0, r0, rb22 # Extract height +++add rb17, r0, 5 +++add rb18, r0, 7 +++shl r0, r0, 7 +++add r0, r0, r1 # Combine width and height of destination area +++shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register +++add rb26, r0, rb27 +++ +++sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code +++ +++# get filter coefficients +++ +++mov r0, unif +++asr ra3, r0, rb23; mul24 r0, r0, ra22 +++asr ra2, r0, rb23; mul24 r0, r0, ra22 +++asr ra1, r0, rb23; mul24 r0, r0, ra22 +++asr ra0, r0, rb23; mov r0, unif +++asr ra7, r0, rb23; mul24 r0, r0, ra22 +++asr ra6, r0, rb23; mul24 r0, r0, ra22 +++asr ra5, r0, rb23; mul24 r0, r0, ra22 +++asr ra4, r0, rb23; mov r0, unif +++asr rb11, r0, rb23; mul24 r0, r0, ra22 +++asr rb10, r0, rb23; mul24 r0, r0, ra22 +++asr rb9, r0, rb23; mul24 r0, r0, ra22 +++asr rb8, r0, rb23; mov r0, unif +++asr rb15, r0, rb23; mul24 r0, r0, ra22 +++asr rb14, r0, rb23; mul24 r0, r0, ra22 +++asr rb13, r0, rb23; mul24 r0, r0, ra22 +++asr rb12, r0, rb23 +++ +++# r2 is elem_num +++# r3 is loop counter +++ +++mov r5rep, -8 +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++mov r3, 0 +++ +++:uvloop +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment +++shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++ +++max r2, ra_y, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_x2_base, r2 +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++mov r2, rb21 ; mul24 r3, r0, ra0 +++nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 +++sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++sub r0, r2, r3 +++ +++mov r3, rb31 +++ +++mov ra8, ra9 +++mov ra9, ra10 +++mov ra10, ra11 +++mov ra11, ra12 +++mov ra12, ra13 +++mov ra13, ra14 +++ +++sub.setf -, r3, 8 ; mov r1, ra22 +++ +++# apply horizontal filter +++brr.anyn -, r:uvloop +++max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero +++asr r0, r0, 15 ; mov r1, ra21 +++min.setf ra15, r0, rb22 +++ +++# apply vertical filter and write to VPM +++ +++nop ; mul24 r0, ra14, rb14 +++sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++sub.ifnn r1, r1, r0 ; mov -, vw_wait +++sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++brr.anyn -, r:uvloop +++asr r1, r1, 15 +++min r1, r1, rb22 +++max vpm, r1, 0 +++ +++# DMA out for U +++ +++mov vw_setup, rb26 # VDW setup 0 +++mov vw_setup, rb29 # Stride +++mov vw_addr, unif # start the VDW +++ +++# DMA out for V +++# We need to wait for the U to complete first, but have nothing useful to compute while we wait. +++# Could potentially push this write into the start of the next pipeline stage. +++mov r0, 16 +++mov -, vw_wait +++ +++bra -, ra31 +++add vw_setup, rb26, r0 # VDW setup 0 +++mov vw_setup, rb29 # Stride +++mov vw_addr, unif # start the VDW +++ +++################################################################################ +++ +++ +++# mc_filter(next_kernel, x, y, frame_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_dst) +++ +++# At this point we have already issued two pairs of texture requests for the current block +++# ra_x_base, ra_x16_base point to the current coordinates for this block +++::mc_filter +++mov ra31, unif +++ +++# per-channel shifts were calculated on the *previous* invocation +++ +++mov ra_xshift, ra_xshift_next +++mov ra_x2shift, ra_x2shift_next +++ +++# get base addresses and per-channel shifts for *next* invocation +++add r0, unif, elem_num # x +++add r2, r0, 8 # x+8 +++max r0, r0, 0; mov r1, unif # y +++min r0, r0, rb_frame_width_minus_1 ; mov r3,unif # frame_base +++shl ra_xshift_next, r0, 3 +++max r2, r2, 0 +++min r2, r2, rb_frame_width_minus_1 +++shl ra_x2shift_next, r2, 3 +++add r0, r0, r3 +++add r2, r2, r3 +++and rb_x_base_next, r0, ~3 +++and ra_x2_base_next, r2, ~3 +++mov ra_y_next, r1 +++ +++# set up VPM write +++mov vw_setup, rb28 +++ +++# get width,height of block +++mov r2, 16 +++mov r0, unif +++shr r1, r0, r2 # Extract width +++sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++and r0, r0, rb22 # Extract height +++add rb17, r0, 5 +++add rb18, r0, 7 +++shl r0, r0, 7 +++add r0, r0, r1 # Combine width and height of destination area +++shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register +++add rb26, r0, rb27 +++ +++sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code +++ +++# get filter coefficients +++ +++mov r0, unif +++asr ra3, r0, rb23; mul24 r0, r0, ra22 +++asr ra2, r0, rb23; mul24 r0, r0, ra22 +++asr ra1, r0, rb23; mul24 r0, r0, ra22 +++asr ra0, r0, rb23; mov r0, unif +++asr ra7, r0, rb23; mul24 r0, r0, ra22 +++asr ra6, r0, rb23; mul24 r0, r0, ra22 +++asr ra5, r0, rb23; mul24 r0, r0, ra22 +++asr ra4, r0, rb23; mov r0, unif +++asr rb11, r0, rb23; mul24 r0, r0, ra22 +++asr rb10, r0, rb23; mul24 r0, r0, ra22 +++asr rb9, r0, rb23; mul24 r0, r0, ra22 +++asr rb8, r0, rb23; mov r0, unif +++asr rb15, r0, rb23; mul24 r0, r0, ra22 +++asr rb14, r0, rb23; mul24 r0, r0, ra22 +++asr rb13, r0, rb23; mul24 r0, r0, ra22 +++brr.anynn -, r:fast_path +++asr rb12, r0, rb23 # delay slot 1 +++ +++# r2 is elem_num +++# r3 is loop counter +++ +++mov r5rep, -8 # delay slot 2 +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] # delay slot 3 +++ +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++## nop ; ldtmu0 # loop counter increment +++## shr r0, r4, ra17 ; ldtmu0 +++## shr r1, r4, ra17 ; v8subs r0, r0, rb20 +++## add t0s, ra16, r5 ; v8subs r1, r1, rb20 +++## add ra16, ra16, rb16 ; mov t0s, ra16 +++## +++## # generate seven shifted versions +++## # interleave with scroll of vertical context +++## +++## mov r2, rb21 ; mul24 r3, r0, ra0 +++## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++## nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++## nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++## nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++## nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++## nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++## nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++## nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++## sub r2, r2, r3 ; ldtmu0 +++## +++## mov r0, ra22 +++## shr r0, r4, ra17 ; mul24 r2, r2, r0 ; ldtmu0 +++## shr r1, r4, ra17 ; v8subs r0, r0, rb20 +++## add t0s, ra16, r5 ; v8subs r1, r1, rb20 +++## add ra16, ra16, rb16 ; mov t0s, ra16 +++## +++## # apply horizontal filter +++## +++## asr r2, r2, 15 ; mul24 r3, r0, ra0 +++## min r2, r2, rb22 +++## max ra13, r2, 0 +++## +++## # generate seven shifted versions +++## # interleave with scroll of vertical context +++## +++## mov r2, rb21 +++## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++## nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++## nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++## nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++## nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++## nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++## nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++## nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++## sub r0, r2, r3 +++## +++## # apply horizontal filter +++## +++## nop ; mul24 r0, r0, ra22 # last bit of context scroll, including clamp to zero +++## asr r0, r0, 15 +++## min r0, r0, rb22 +++## max ra14, r0, 0 +++## +++## +++## +++## +++## nop ; ldtmu0 # loop counter increment +++## shr r0, r4, ra17 ; ldtmu0 +++## shr r1, r4, ra17 ; v8subs r0, r0, rb20 +++## add t0s, ra16, r5 ; v8subs r1, r1, rb20 +++## add ra16, ra16, rb16 ; mov t0s, ra16 +++## +++## # generate seven shifted versions +++## # interleave with scroll of vertical context +++## +++## mov r2, rb21 ; mul24 r3, r0, ra0 +++## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++## nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++## nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++## nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++## nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++## nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++## nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++## nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++## sub r0, r2, r3 +++## +++## # apply horizontal filter +++## +++## nop ; mul24 r0, r0, ra22 # last bit of context scroll, including clamp to zero +++## asr r0, r0, 15 +++## min r0, r0, rb22 +++## max ra15, r0, 0 +++ +++ +++ +++ +++mov r3, 0 +++ +++:loop +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment +++shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++ +++max r2, ra_y, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_x2_base, r2 +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++mov r2, rb21 ; mul24 r3, r0, ra0 +++sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++sub r0, r2, r3 +++ +++mov r3, rb31 +++ +++mov ra8, ra9 +++mov ra9, ra10 +++mov ra10, ra11 +++mov ra11, ra12 +++mov ra12, ra13 +++mov ra13, ra14 +++ +++sub.setf -, r3, 8 ; mov r1, ra22 +++ +++# apply horizontal filter +++brr.anyn -, r:loop +++max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero +++asr r0, r0, 15 ; mov r1, ra21 +++min.setf ra15, r0, rb22 +++ +++# apply vertical filter and write to VPM +++ +++nop ; mul24 r0, ra14, rb14 +++sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++sub.ifnn r1, r1, r0 ; mov -, vw_wait +++sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++brr.anyn -, r:loop +++asr r1, r1, 15 +++min r1, r1, rb22 +++max vpm, r1, 0 +++ +++# DMA out +++ +++bra -, ra31 +++mov vw_setup, rb26 # VDW: height rows, 16 8-bit units long +++mov vw_setup, rb29 +++mov vw_addr, unif # start the VDW +++ +++#################################################### +++ +++:fast_path +++## nop ; ldtmu0 # loop counter increment +++## shr r0, r4, ra17 ; ldtmu0 +++## shr r1, r4, ra17 ; v8subs r0, r0, rb20 +++## add t0s, ra16, r5 ; v8subs r1, r1, rb20 +++## add ra16, ra16, rb16 ; mov t0s, ra16 +++## +++## # generate seven shifted versions +++## # interleave with scroll of vertical context +++## +++## mov r2, rb21 ; mul24 r3, r0, ra0 +++## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++## sub r2, r2, r3 ; ldtmu0 +++## +++## mov r0, ra22 +++## shr r0, r4, ra17 ; mul24 r2, r2, r0 ; ldtmu0 +++## shr r1, r4, ra17 ; v8subs r0, r0, rb20 +++## add t0s, ra16, r5 ; v8subs r1, r1, rb20 +++## add ra16, ra16, rb16 ; mov t0s, ra16 +++## +++## # apply horizontal filter +++## +++## asr r2, r2, 15 ; mul24 r3, r0, ra0 +++## min r2, r2, rb22 +++## max ra13, r2, 0 +++## +++## # generate seven shifted versions +++## # interleave with scroll of vertical context +++## +++## mov r2, rb21 +++## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++## sub r0, r2, r3 +++## +++## # apply horizontal filter +++## +++## nop ; mul24 r0, r0, ra22 # last bit of context scroll, including clamp to zero +++## asr r0, r0, 15 +++## min r0, r0, rb22 +++## max ra14, r0, 0 +++## +++## +++## +++## +++## nop ; ldtmu0 # loop counter increment +++## shr r0, r4, ra17 ; ldtmu0 +++## shr r1, r4, ra17 ; v8subs r0, r0, rb20 +++## add t0s, ra16, r5 ; v8subs r1, r1, rb20 +++## add ra16, ra16, rb16 ; mov t0s, ra16 +++## +++## # generate seven shifted versions +++## # interleave with scroll of vertical context +++## +++## mov r2, rb21 ; mul24 r3, r0, ra0 +++## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++## sub r0, r2, r3 +++## +++## # apply horizontal filter +++## +++## nop ; mul24 r0, r0, ra22 # last bit of context scroll, including clamp to zero +++## asr r0, r0, 15 +++## min r0, r0, rb22 +++## max ra15, r0, 0 +++ +++ +++mov r3, 0 # This signifies the amount of unrolling +++ +++:fast_loop +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++# Due to pipelining we can only skip second pipeline instructions related to the fetched pixels +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment +++shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++mov.ifz ra_y, ra_y_next ; mov rb31, r3 +++mov.ifz ra_x2_base, ra_x2_base_next ; mov r3, rb_pitch +++ +++max r2, ra_y, 0 +++min r2, r2, rb_frame_height_minus_1 ; mov r1, r4 # discard texture read +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_x_base, r2 ; v8subs r0, r0, rb20 +++add t0s, ra_x2_base, r2 +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov r2, rb21 ; mul24 r3, r0, ra0 +++sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++sub r0, r2, r3 ; mov r3, rb31 +++ +++mov ra8, ra9 +++mov ra9, ra10 +++mov ra10, ra11 +++mov ra11, ra12 +++mov ra12, ra13 +++mov ra13, ra14 +++ +++sub.setf -, r3, 8 ; mov r1, ra22 +++ +++# apply horizontal filter +++ +++brr.anyn -, r:fast_loop +++max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero +++asr r0, r0, 15 ; mov r1, ra21 +++min.setf ra15, r0, rb22 +++ +++# apply vertical filter and write to VPM +++ +++nop ; mul24 r0, ra14, rb14 +++sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++sub.ifnn r1, r1, r0 ; mov -, vw_wait +++sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++brr.anyn -, r:fast_loop +++asr r1, r1, 15 +++min r1, r1, rb22 +++max vpm, r1, 0 +++ +++# DMA out +++ +++bra -, ra31 +++mov vw_setup, rb26 # VDW: height rows, 16 8-bit units long +++mov vw_setup, rb29 +++mov vw_addr, unif # start the VDW +++ +++################################################################################ +++ +++# mc_filter_b(next_kernel, x, y, frame_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_dst) +++ +++# At this point we have already issued two pairs of texture requests for the current block +++# ra_x_base, ra_x16_base point to the current coordinates for this block +++::mc_filter_b +++mov ra31, unif +++ +++# per-channel shifts were calculated on the *previous* invocation +++ +++mov ra_xshift, ra_xshift_next +++mov ra_x2shift, ra_x2shift_next +++ +++# get base addresses and per-channel shifts for *next* invocation +++add r0, unif, elem_num # x +++add r2, r0, 8 # x+8 +++max r0, r0, 0; mov r1, unif # y +++min r0, r0, rb_frame_width_minus_1 ; mov r3,unif # frame_base +++shl ra_xshift_next, r0, 3 +++max r2, r2, 0 +++min r2, r2, rb_frame_width_minus_1 +++shl ra_x2shift_next, r2, 3 +++add r0, r0, r3 +++add r2, r2, r3 +++and rb_x_base_next, r0, ~3 +++and ra_x2_base_next, r2, ~3 +++mov ra_y_next, r1 +++ +++# set up VPM write +++mov vw_setup, rb28 +++ +++# get width,height of block +++mov r2, 16 +++mov r0, unif +++shr r1, r0, r2 # Extract width +++sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++and r0, r0, rb22 # Extract height +++add rb17, r0, 5 +++add rb18, r0, 7 +++shl r0, r0, 7 +++# r0 is currently height<<7 +++# For vr_setup we want height<<20 (so 20-7=13 additional bits) +++shl r3, r0, 13 +++shl r3, r3, 8 # Mask off top 8 bits +++shr r3, r3, 8 +++add r0, r0, r1 # Combine width and height of destination area +++shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register +++add rb26, r0, rb27 +++# In a B frame, so also set up VPM read +++add vr_setup, r3, rb28 +++ +++# get filter coefficients +++ +++mov r0, unif +++asr ra3, r0, rb23; mul24 r0, r0, ra22 +++asr ra2, r0, rb23; mul24 r0, r0, ra22 +++asr ra1, r0, rb23; mul24 r0, r0, ra22 +++asr ra0, r0, rb23; mov r0, unif +++asr ra7, r0, rb23; mul24 r0, r0, ra22 +++asr ra6, r0, rb23; mul24 r0, r0, ra22 +++asr ra5, r0, rb23; mul24 r0, r0, ra22 +++asr ra4, r0, rb23; mov r0, unif +++asr rb11, r0, rb23; mul24 r0, r0, ra22 +++asr rb10, r0, rb23; mul24 r0, r0, ra22 +++asr rb9, r0, rb23; mul24 r0, r0, ra22 +++asr rb8, r0, rb23; mov r0, unif +++asr rb15, r0, rb23; mul24 r0, r0, ra22 +++asr rb14, r0, rb23; mul24 r0, r0, ra22 +++asr rb13, r0, rb23; mul24 r0, r0, ra22 +++asr rb12, r0, rb23 +++ +++# r2 is elem_num +++# r3 is loop counter +++ +++mov r5rep, -8 +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++mov r3, 0 +++ +++:bloop +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment +++shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++ +++max r2, ra_y, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_x2_base, r2 +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++mov r2, rb21 ; mul24 r3, r0, ra0 +++sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++sub r0, r2, r3 +++ +++mov r3, rb31 +++ +++mov ra8, ra9 +++mov ra9, ra10 +++mov ra10, ra11 +++mov ra11, ra12 +++mov ra12, ra13 +++mov ra13, ra14 +++ +++sub.setf -, r3, 8 ; mov r1, ra22 +++ +++# apply horizontal filter +++brr.anyn -, r:bloop +++max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero +++asr r0, r0, 15 ; mov r1, ra21 +++min.setf ra15, r0, rb22 +++ +++# apply vertical filter and write to VPM +++ +++nop ; mul24 r0, ra14, rb14 +++sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++sub.ifnn r1, r1, r0 ; mov -, vw_wait +++sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++asr r1, r1, 15 ; mov -, vr_wait +++min r1, r1, rb22 +++add r0, vpm, 1 # Blend in previous VPM contents at this location +++brr.anyn -, r:bloop +++max r1, r1, 0 +++add r1, r1, r0 +++shr vpm, r1, 1 +++ +++# DMA out +++ +++bra -, ra31 +++mov vw_setup, rb26 # VDW: height rows, 16 8-bit units long +++mov vw_setup, rb29 +++mov vw_addr, unif # start the VDW +++ +++################################################################################ +++ +++# mc_filter_honly(next_kernel, x, y, frame_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_dst) +++# This filter only does horizontal filtering. +++# It is assumed that the region to fetch does not include extra rows above. +++ +++# At this point we have already issued two pairs of texture requests for the current block +++# ra_x_base, ra_x16_base point to the current coordinates for this block +++::mc_filter_honly +++mov ra31, unif +++ +++# per-channel shifts were calculated on the *previous* invocation +++ +++mov ra_xshift, ra_xshift_next +++mov ra_x2shift, ra_x2shift_next +++ +++# get base addresses and per-channel shifts for *next* invocation +++add r0, unif, elem_num # x +++add r2, r0, 8 # x+8 +++max r0, r0, 0; mov r1, unif # y +++min r0, r0, rb_frame_width_minus_1 ; mov r3,unif # frame_base +++shl ra_xshift_next, r0, 3 +++max r2, r2, 0 +++min r2, r2, rb_frame_width_minus_1 +++shl ra_x2shift_next, r2, 3 +++add r0, r0, r3 +++add r2, r2, r3 +++and rb_x_base_next, r0, ~3 +++and ra_x2_base_next, r2, ~3 +++mov ra_y_next, r1 +++ +++# set up VPM write +++mov vw_setup, rb28 +++ +++# get width,height of block +++mov r2, 16 +++mov r0, unif +++shr r1, r0, r2 # Extract width +++sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++and r0, r0, rb22 # Extract height +++add rb17, r0, -2 # Pipelining means we move data across 2 iterations early +++shl r0, r0, 7 ; mov rb18,r0 +++add r0, r0, r1 # Combine width and height of destination area +++shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register +++add rb26, r0, rb27 +++ +++# get filter coefficients +++ +++mov r0, unif +++asr ra3, r0, rb23; mul24 r0, r0, ra22 +++asr ra2, r0, rb23; mul24 r0, r0, ra22 +++asr ra1, r0, rb23; mul24 r0, r0, ra22 +++asr ra0, r0, rb23; mov r0, unif +++asr ra7, r0, rb23; mul24 r0, r0, ra22 +++asr ra6, r0, rb23; mul24 r0, r0, ra22 +++asr ra5, r0, rb23; mul24 r0, r0, ra22 +++asr ra4, r0, rb23; mov r0, unif +++mov r0, unif +++ +++# r2 is elem_num +++# r3 is loop counter +++mov r5rep, -8 +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] # delay slot 3 +++mov r3, 0 +++ +++:loop_honly +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment +++shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++ +++max r2, ra_y, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_x2_base, r2 +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++mov r2, rb21 ; mul24 r3, r0, ra0 +++sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++sub r0, r2, r3 ; mov r3, rb31 +++ +++sub.setf -, r3, rb18 ; mov r1, ra22 +++ +++mov -, vw_wait ; mul24 r0, r0, r1 +++brr.anyn -, r:loop_honly +++asr r0, r0, 15 # delay 1 +++min r0, r0, rb22 # delay 2 +++max vpm, r0, 0 # delay 3 +++ +++# DMA out +++bra -, ra31 +++mov vw_setup, rb26 # VDW: height rows, 16 8-bit units long +++mov vw_setup, rb29 +++mov vw_addr, unif # start the VDW +++ +++ +++################################################################################ +++ +++# mc_exit() +++ +++::mc_exit +++mov -, vw_wait # wait on the VDW +++ +++mov -,srel(0) +++ +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ +++nop ; nop ; thrend +++nop ; nop # delay slot 1 +++nop ; nop # delay slot 2 +++ +++::mc_exit1 +++mov -, vw_wait # wait on the VDW +++ +++#mov -,srel(1) +++ +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ +++nop ; nop ; thrend +++mov interrupt, 1; nop # delay slot 1 +++nop ; nop # delay slot 2 +++ +++# mc_interrupt_exit() +++::mc_interrupt_exit +++mov -, vw_wait # wait on the VDW +++ +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ +++mov -,sacq(0) # 1 +++mov -,sacq(0) # 2 +++mov -,sacq(0) # 3 +++mov -,sacq(0) # 4 +++mov -,sacq(0) # 5 +++mov -,sacq(0) # 6 +++mov -,sacq(0) # 7 +++mov -,sacq(0) # 8 +++mov -,sacq(0) # 9 +++mov -,sacq(0) # 10 +++mov -,sacq(0) # 11 +++ +++nop ; nop ; thrend +++mov interrupt, 1; nop # delay slot 1 +++nop ; nop # delay slot 2 +++ +++# mc_interrupt_exit4() +++::mc_interrupt_exit4 +++mov -, vw_wait # wait on the VDW +++ +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ +++mov -,sacq(0) # 1 +++mov -,sacq(0) # 2 +++mov -,sacq(0) # 3 +++ +++nop ; nop ; thrend +++mov interrupt, 1; nop # delay slot 1 +++nop ; nop # delay slot 2 +++ +++# mc_interrupt_exit8() +++::mc_interrupt_exit8 +++mov -, vw_wait # wait on the VDW +++ +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ +++mov -,sacq(0) # 1 +++mov -,sacq(0) # 2 +++mov -,sacq(0) # 3 +++mov -,sacq(0) # 4 +++mov -,sacq(0) # 5 +++mov -,sacq(0) # 6 +++mov -,sacq(0) # 7 +++ +++nop ; nop ; thrend +++mov interrupt, 1; nop # delay slot 1 +++nop ; nop # delay slot 2 +++ +++################################################################################ +++# mc_setup_uv(next_kernel, x, y, ref_u_base, ref_v_base, frame_width, frame_height, pitch, dst_pitch, pad0, pad1, pad2) +++::mc_setup_uv +++ +++# Read starting kernel +++mov ra31, unif +++ +++# Load first request location +++add ra_x_base, unif, elem_num # Store x +++mov ra_y, unif # Store y +++mov ra_x2_base, unif # Store frame u base +++nop +++sub ra_u2v_ref_offset, unif, ra_x2_base # Store offset to add to move from u to v in reference frame +++ +++# Read image dimensions +++sub rb25,unif,1 +++sub rb30,unif,1 +++ +++# get source pitch +++mov rb16, unif +++ +++# get destination pitch +++mov r0, unif +++mov r1, vdw_setup_1(0) +++add rb24, r1, r0 +++ +++# load constants +++ +++mov ra20, 1 +++mov ra21, 64 +++mov ra22, 256 +++mov ra23, 8 +++ +++mov rb20, 0xffffff00 +++mov rb21, 64 +++mov rb22, 255 +++mov rb23, 24 +++ +++# touch vertical context to keep simulator happy +++ +++mov ra8, 0 +++mov ra9, 0 +++mov ra10, 0 +++mov ra11, 0 +++mov ra12, 0 +++mov ra13, 0 +++mov ra14, 0 +++mov ra15, 0 +++ +++# Compute part of VPM to use for DMA output +++mov r2, qpu_num +++and r2, r2, 15 +++mov r1, r2 +++asr r1, r1, 2 +++shl r1, r1, 6 +++mov r0, r2 +++and r0, r0, 3 +++add r0, r0, r1 +++mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later +++shl r0, r0, 5 +++add rb27, r0, r1 +++ +++# Compute part of VPM to save data into +++mov r2, qpu_num +++and r2, r2, 15 +++mov r1, r2 +++asr r1, r1, 2 +++shl r1, r1, 6 +++mov r0, r2 +++and r0, r0, 3 +++add r0, r0, r1 +++mov r1, vpm_setup(0, 4, h8p(0, 0)) +++add rb28, r0, r1 +++ +++# Compute base address for first and second access +++mov r0, ra_x_base # Load x +++max r0, r0, 0; mov r1, ra_y # Load y +++min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base # Load the frame base +++shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset +++add ra_y, r1, 1 +++add r0, r0, r3 +++and r0, r0, ~3 +++max r1, r1, 0 ; mov ra_x_base, r0 # y +++min r1, r1, rb_frame_height_minus_1 +++# submit texture requests for first line +++add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++add t0s, r0, r1 ; mov ra_x2_base, r2 +++add t0s, r2, r1 +++ +++# Dump padding words +++mov r0, unif +++mov r0, unif +++mov r0, unif +++ +++# submit texture requests for second line +++max r1, ra_y, 0 +++min r1, r1, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 +++bra -, ra31 +++nop ; mul24 r1, r1, rb_pitch +++add t0s, r1, ra_x_base +++add t0s, r1, ra_x2_base +++ +++ +++ +++################################################################################ +++ +++::mc_filter_uv_b +++mov ra31, unif +++ +++# per-channel shifts were calculated on the *previous* invocation +++ +++mov ra_xshift, ra_xshift_next +++ +++# get base addresses and per-channel shifts for *next* invocation +++add r0, unif, elem_num # x +++max r0, r0, 0; mov r1, unif # y +++min r0, r0, rb_frame_width_minus_1 ; mov r3, unif # frame_base +++shl ra_xshift_next, r0, 3 +++sub r2, unif, r3 # compute offset from frame base u to frame base v +++add r0, r0, r3 +++and rb_x_base_next, r0, ~3 +++mov ra_y_next, r1 +++add ra_x2_base_next, rb_x_base_next, r2 +++ +++# set up VPM write +++mov vw_setup, rb28 +++ +++# get width,height of block +++mov r2, 16 +++mov r0, unif +++shr r1, r0, r2 # Extract width +++sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++and r0, r0, rb22 # Extract height +++add rb17, r0, 5 +++add rb18, r0, 7 +++shl r0, r0, 7 +++ +++# r0 is currently height<<7 +++# For vr_setup we want height<<20 (so 20-7=13 additional bits) +++shl r3, r0, 13 +++shl r3, r3, 8 # Mask off top 8 bits +++shr r3, r3, 8 +++ +++add r0, r0, r1 # Combine width and height of destination area +++shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register +++add rb26, r0, rb27 +++ +++# In a B frame, so also set up VPM read +++add vr_setup, r3, rb28 +++ +++sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code +++ +++# get filter coefficients +++ +++mov r0, unif +++asr ra3, r0, rb23; mul24 r0, r0, ra22 +++asr ra2, r0, rb23; mul24 r0, r0, ra22 +++asr ra1, r0, rb23; mul24 r0, r0, ra22 +++asr ra0, r0, rb23; mov r0, unif +++asr ra7, r0, rb23; mul24 r0, r0, ra22 +++asr ra6, r0, rb23; mul24 r0, r0, ra22 +++asr ra5, r0, rb23; mul24 r0, r0, ra22 +++asr ra4, r0, rb23; mov r0, unif +++asr rb11, r0, rb23; mul24 r0, r0, ra22 +++asr rb10, r0, rb23; mul24 r0, r0, ra22 +++asr rb9, r0, rb23; mul24 r0, r0, ra22 +++asr rb8, r0, rb23; mov r0, unif +++asr rb15, r0, rb23; mul24 r0, r0, ra22 +++asr rb14, r0, rb23; mul24 r0, r0, ra22 +++asr rb13, r0, rb23; mul24 r0, r0, ra22 +++asr rb12, r0, rb23 +++ +++# r2 is elem_num +++# r3 is loop counter +++ +++mov r5rep, -8 +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++mov r3, 0 +++ +++:uvloop_b +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment +++shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++ +++max r2, ra_y, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_x2_base, r2 +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++mov r2, rb21 ; mul24 r3, r0, ra0 +++nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 +++sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++sub r0, r2, r3 +++ +++mov r3, rb31 +++ +++mov ra8, ra9 +++mov ra9, ra10 +++mov ra10, ra11 +++mov ra11, ra12 +++mov ra12, ra13 +++mov ra13, ra14 +++ +++sub.setf -, r3, 8 ; mov r1, ra22 +++ +++# apply horizontal filter +++brr.anyn -, r:uvloop_b +++max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero +++asr r0, r0, 15 ; mov r1, ra21 +++min.setf ra15, r0, rb22 +++ +++# apply vertical filter and write to VPM +++ +++nop ; mul24 r0, ra14, rb14 +++sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++sub.ifnn r1, r1, r0 ; mov -, vw_wait +++sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++asr r1, r1, 15 +++min r1, r1, rb22 +++add r0, vpm, 1 # Blend in previous VPM contents at this location +++brr.anyn -, r:uvloop_b +++max r1, r1, 0 +++add r1, r1, r0 +++shr vpm, r1, 1 +++ +++ +++# DMA out for U +++ +++mov vw_setup, rb26 # VDW setup 0 +++mov vw_setup, rb29 # Stride +++mov vw_addr, unif # start the VDW +++ +++# DMA out for V +++# We need to wait for the U to complete first, but have nothing useful to compute while we wait. +++# Could potentially push this write into the start of the next pipeline stage. +++mov r0, 16 +++mov -, vw_wait +++ +++bra -, ra31 +++add vw_setup, rb26, r0 # VDW setup 0 +++mov vw_setup, rb29 # Stride +++mov vw_addr, unif # start the VDW +++ +++::mc_end ++diff --git a/libavcodec/rpi_user_vcsm.h b/libavcodec/rpi_user_vcsm.h ++new file mode 100644 ++index 0000000..fbebbbe ++--- /dev/null +++++ b/libavcodec/rpi_user_vcsm.h ++@@ -0,0 +1,425 @@ +++/* +++Copyright (c) 2012, Broadcom Europe Ltd +++All rights reserved. +++ +++Redistribution and use in source and binary forms, with or without +++modification, are permitted provided that the following conditions are met: +++ * Redistributions of source code must retain the above copyright +++ notice, this list of conditions and the following disclaimer. +++ * Redistributions in binary form must reproduce the above copyright +++ notice, this list of conditions and the following disclaimer in the +++ documentation and/or other materials provided with the distribution. +++ * Neither the name of the copyright holder nor the +++ names of its contributors may be used to endorse or promote products +++ derived from this software without specific prior written permission. +++ +++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY +++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +++*/ +++ +++#ifndef __USER_VCSM__H__INCLUDED__ +++#define __USER_VCSM__H__INCLUDED__ +++ +++/* VideoCore Shared Memory - user interface library. +++** +++** This library provides all the necessary abstraction for any application to +++** make use of the shared memory service which is distributed accross a kernel +++** driver and a videocore service. +++** +++** It is an application design decision to choose or not to use this service. +++** +++** The logical flow of operations that a user application needs to follow when +++** using this service is: +++** +++** 1) Initialize the service. +++** 2) Allocate shared memory blocks. +++** 3) Start using the allocated blocks. +++** - In order to gain ownership on a block, lock the allocated block, +++** locking a block returns a valid address that the user application +++** can access. +++** - When finished with using the block for the current execution cycle +++** or function, and so when giving up the ownership, unlock the block. +++** 4) A block can be locked/unlocked as many times required - within or outside +++** of - a specific execution context. +++** 5) To completely release an allocated block, free it. +++** 6) If the service is no longer required, terminate it. +++** +++** +++** Some generic considerations: +++ +++** Allocating memory blocks. +++** +++** Memory blocks can be allocated in different manners depending on the cache +++** behavior desired. A given block can either be: +++ +++** - Allocated in a non cached fashion all the way through host and videocore. +++** - Allocated in a cached fashion on host OR videocore. +++** - Allocated in a cached fashion on host AND videocore. +++** +++** It is an application decision to determine how to allocate a block. Evidently +++** if the application will be doing substantial read/write accesses to a given block, +++** it is recommended to allocate the block at least in a 'host cached' fashion for +++** better results. +++** +++** +++** Locking memory blocks. +++** +++** When the memory block has been allocated in a host cached fashion, locking the +++** memory block (and so taking ownership of it) will trigger a cache invalidation. +++** +++** For the above reason and when using host cached allocation, it is important that +++** an application properly implements the lock/unlock mechanism to ensure cache will +++** stay coherent, otherwise there is no guarantee it will at all be. +++** +++** It is possible to dynamically change the host cache behavior (ie cached or non +++** cached) of a given allocation without needing to free and re-allocate the block. +++** This feature can be useful for such application which requires access to the block +++** only at certain times and not otherwise. By changing the cache behavior dynamically +++** the application can optimize performances for a given duration of use. +++** Such dynamic cache behavior remapping only applies to host cache and not videocore +++** cache. If one requires to change the videocore cache behavior, then a new block +++** must be created to replace the old one. +++** +++** On successful locking, a valid pointer is returned that the application can use +++** to access to data inside the block. There is no guarantee that the pointer will +++** stay valid following the unlock action corresponding to this lock. +++** +++** +++** Unocking memory blocks. +++** +++** When the memory block has been allocated in a host cached fashion, unlocking the +++** memory block (and so forgiving its ownership) will trigger a cache flush unless +++** explicitely asked not to flush the cache for performances reasons. +++** +++** For the above reason and when using host cached allocation, it is important that +++** an application properly implements the lock/unlock mechanism to ensure cache will +++** stay coherent, otherwise there is no guarantee it will at all be. +++** +++** +++** A complete API is defined below. +++*/ +++ +++#ifdef __cplusplus +++extern "C" +++{ +++#endif +++ +++/* Different status that can be dumped. +++*/ +++typedef enum +++{ +++ VCSM_STATUS_VC_WALK_ALLOC = 0, // Walks *all* the allocation on videocore. +++ // Result of the walk is seen in the videocore +++ // log. +++ VCSM_STATUS_HOST_WALK_MAP, // Walks the *full* mapping allocation on host +++ // driver (ie for all processes). Result of +++ // the walk is seen in the kernel log. +++ VCSM_STATUS_HOST_WALK_PID_MAP, // Walks the per process mapping allocation on host +++ // driver (for current process). Result of +++ // the walk is seen in the kernel log. +++ VCSM_STATUS_HOST_WALK_PID_ALLOC, // Walks the per process host allocation on host +++ // driver (for current process). Result of +++ // the walk is seen in the kernel log. +++ VCSM_STATUS_VC_MAP_ALL, // Equivalent to both VCSM_STATUS_VC_WALK_ALLOC and +++ // VCSM_STATUS_HOST_WALK_MAP. +++ // +++ VCSM_STATUS_NONE, // Must be last - invalid. +++ +++} VCSM_STATUS_T; +++ +++/* Different kind of cache behavior. +++*/ +++typedef enum +++{ +++ VCSM_CACHE_TYPE_NONE = 0, // No caching applies. +++ VCSM_CACHE_TYPE_HOST, // Allocation is cached on host (user space). +++ VCSM_CACHE_TYPE_VC, // Allocation is cached on videocore. +++ VCSM_CACHE_TYPE_HOST_AND_VC, // Allocation is cached on both host and videocore. +++ +++} VCSM_CACHE_TYPE_T; +++ +++/* Initialize the vcsm processing. +++** +++** Must be called once before attempting to do anything else. +++** +++** Returns 0 on success, -1 on error. +++*/ +++int vcsm_init( void ); +++ +++ +++/* Terminates the vcsm processing. +++** +++** Must be called vcsm services are no longer needed, it will +++** take care of removing any allocation under the current process +++** control if deemed necessary. +++*/ +++void vcsm_exit( void ); +++ +++ +++/* Queries the status of the the vcsm. +++** +++** Triggers dump of various kind of information, see the +++** different variants specified in VCSM_STATUS_T. +++** +++** Pid is optional. +++*/ +++void vcsm_status( VCSM_STATUS_T status, int pid ); +++ +++ +++/* Allocates a non-cached block of memory of size 'size' via the vcsm memory +++** allocator. +++** +++** Returns: 0 on error +++** a non-zero opaque handle on success. +++** +++** On success, the user must invoke vcsm_lock with the returned opaque +++** handle to gain access to the memory associated with the opaque handle. +++** When finished using the memory, the user calls vcsm_unlock_xx (see those +++** function definition for more details on the one that can be used). +++** +++** A well behaved application should make every attempt to lock/unlock +++** only for the duration it needs to access the memory data associated with +++** the opaque handle. +++*/ +++unsigned int vcsm_malloc( unsigned int size, char *name ); +++ +++ +++/* Allocates a cached block of memory of size 'size' via the vcsm memory +++** allocator, the type of caching requested is passed as argument of the +++** function call. +++** +++** Returns: 0 on error +++** a non-zero opaque handle on success. +++** +++** On success, the user must invoke vcsm_lock with the returned opaque +++** handle to gain access to the memory associated with the opaque handle. +++** When finished using the memory, the user calls vcsm_unlock_xx (see those +++** function definition for more details on the one that can be used). +++** +++** A well behaved application should make every attempt to lock/unlock +++** only for the duration it needs to access the memory data associated with +++** the opaque handle. +++*/ +++unsigned int vcsm_malloc_cache( unsigned int size, VCSM_CACHE_TYPE_T cache, char *name ); +++ +++ +++/* Shares an allocated block of memory via the vcsm memory allocator. +++** +++** Returns: 0 on error +++** a non-zero opaque handle on success. +++** +++** On success, the user must invoke vcsm_lock with the returned opaque +++** handle to gain access to the memory associated with the opaque handle. +++** When finished using the memory, the user calls vcsm_unlock_xx (see those +++** function definition for more details on the one that can be used). +++** +++** A well behaved application should make every attempt to lock/unlock +++** only for the duration it needs to access the memory data associated with +++** the opaque handle. +++*/ +++unsigned int vcsm_malloc_share( unsigned int handle ); +++ +++ +++/* Resizes a block of memory allocated previously by vcsm_alloc. +++** +++** Returns: 0 on success +++** -errno on error. +++** +++** The handle must be unlocked by user prior to attempting any +++** resize action. +++** +++** On error, the original size allocated against the handle +++** remains available the same way it would be following a +++** successful vcsm_malloc. +++*/ +++int vcsm_resize( unsigned int handle, unsigned int new_size ); +++ +++ +++/* Frees a block of memory that was successfully allocated by +++** a prior call the vcms_alloc. +++** +++** The handle should be considered invalid upon return from this +++** call. +++** +++** Whether any memory is actually freed up or not as the result of +++** this call will depends on many factors, if all goes well it will +++** be freed. If something goes wrong, the memory will likely end up +++** being freed up as part of the vcsm_exit process. In the end the +++** memory is guaranteed to be freed one way or another. +++*/ +++void vcsm_free( unsigned int handle ); +++ +++ +++/* Retrieves a videocore opaque handle from a mapped user address +++** pointer. The videocore handle will correspond to the actual +++** memory mapped in videocore. +++** +++** Returns: 0 on error +++** a non-zero opaque handle on success. +++** +++** Note: the videocore opaque handle is distinct from the user +++** opaque handle (allocated via vcsm_malloc) and it is only +++** significant for such application which knows what to do +++** with it, for the others it is just a number with little +++** use since nothing can be done with it (in particular +++** for safety reason it cannot be used to map anything). +++*/ +++unsigned int vcsm_vc_hdl_from_ptr( void *usr_ptr ); +++ +++ +++/* Retrieves a videocore opaque handle from a opaque handle +++** pointer. The videocore handle will correspond to the actual +++** memory mapped in videocore. +++** +++** Returns: 0 on error +++** a non-zero opaque handle on success. +++** +++** Note: the videocore opaque handle is distinct from the user +++** opaque handle (allocated via vcsm_malloc) and it is only +++** significant for such application which knows what to do +++** with it, for the others it is just a number with little +++** use since nothing can be done with it (in particular +++** for safety reason it cannot be used to map anything). +++*/ +++unsigned int vcsm_vc_hdl_from_hdl( unsigned int handle ); +++ +++ +++/* Retrieves a user opaque handle from a mapped user address +++** pointer. +++** +++** Returns: 0 on error +++** a non-zero opaque handle on success. +++*/ +++unsigned int vcsm_usr_handle( void *usr_ptr ); +++ +++ +++/* Retrieves a mapped user address from an opaque user +++** handle. +++** +++** Returns: 0 on error +++** a non-zero address on success. +++** +++** On success, the address corresponds to the pointer +++** which can access the data allocated via the vcsm_malloc +++** call. +++*/ +++void *vcsm_usr_address( unsigned int handle ); +++ +++ +++/* Locks the memory associated with this opaque handle. +++** +++** Returns: NULL on error +++** a valid pointer on success. +++** +++** A user MUST lock the handle received from vcsm_malloc +++** in order to be able to use the memory associated with it. +++** +++** On success, the pointer returned is only valid within +++** the lock content (ie until a corresponding vcsm_unlock_xx +++** is invoked). +++*/ +++void *vcsm_lock( unsigned int handle ); +++ +++ +++/* Locks the memory associated with this opaque handle. The lock +++** also gives a chance to update the *host* cache behavior of the +++** allocated buffer if so desired. The *videocore* cache behavior +++** of the allocated buffer cannot be changed by this call and such +++** attempt will be ignored. +++** +++** The system will attempt to honour the cache_update mode request, +++** the cache_result mode will provide the final answer on which cache +++** mode is really in use. Failing to change the cache mode will not +++** result in a failure to lock the buffer as it is an application +++** decision to choose what to do if (cache_result != cache_update) +++** +++** The value returned in cache_result can only be considered valid if +++** the returned pointer is non NULL. The cache_result pointer may be +++** NULL if the application does not care about the actual outcome of +++** its action with regards to the cache behavior change. +++** +++** Returns: NULL on error +++** a valid pointer on success. +++** +++** A user MUST lock the handle received from vcsm_malloc +++** in order to be able to use the memory associated with it. +++** +++** On success, the pointer returned is only valid within +++** the lock content (ie until a corresponding vcsm_unlock_xx +++** is invoked). +++*/ +++void *vcsm_lock_cache( unsigned int handle, +++ VCSM_CACHE_TYPE_T cache_update, +++ VCSM_CACHE_TYPE_T *cache_result ); +++ +++ +++/* Unlocks the memory associated with this user mapped address. +++** +++** Returns: 0 on success +++** -errno on error. +++** +++** After unlocking a mapped address, the user should no longer +++** attempt to reference it. +++*/ +++int vcsm_unlock_ptr( void *usr_ptr ); +++ +++ +++/* Unlocks the memory associated with this user mapped address. +++** Apply special processing that would override the otherwise +++** default behavior. +++** +++** If 'cache_no_flush' is specified: +++** Do not flush cache as the result of the unlock (if cache +++** flush was otherwise applicable in this case). +++** +++** Returns: 0 on success +++** -errno on error. +++** +++** After unlocking a mapped address, the user should no longer +++** attempt to reference it. +++*/ +++int vcsm_unlock_ptr_sp( void *usr_ptr, int cache_no_flush ); +++ +++ +++/* Unlocks the memory associated with this user opaque handle. +++** +++** Returns: 0 on success +++** -errno on error. +++** +++** After unlocking an opaque handle, the user should no longer +++** attempt to reference the mapped addressed once associated +++** with it. +++*/ +++int vcsm_unlock_hdl( unsigned int handle ); +++ +++ +++/* Unlocks the memory associated with this user opaque handle. +++** Apply special processing that would override the otherwise +++** default behavior. +++** +++** If 'cache_no_flush' is specified: +++** Do not flush cache as the result of the unlock (if cache +++** flush was otherwise applicable in this case). +++** +++** Returns: 0 on success +++** -errno on error. +++** +++** After unlocking an opaque handle, the user should no longer +++** attempt to reference the mapped addressed once associated +++** with it. +++*/ +++int vcsm_unlock_hdl_sp( unsigned int handle, int cache_no_flush ); +++ +++#ifdef __cplusplus +++} +++#endif +++ +++#endif /* __USER_VCSM__H__INCLUDED__ */ ++-- ++2.7.4 ++ ++ ++From 6cfa5910be47865aaaf58c185587189c332765a6 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Sat, 2 May 2015 21:15:37 +0100 ++Subject: [PATCH 04/68] First working version with uncached memory ++ ++--- ++ libavcodec/hevc.c | 61 +++++- ++ libavcodec/hevc.h | 12 +- ++ libavcodec/hevc_cabac.c | 39 +++- ++ libavcodec/hevc_filter.c | 16 ++ ++ libavcodec/hevcpred_template.c | 6 + ++ libavcodec/rpi_hevc_transform.h | 422 +++++++++++++++++++++++++++++++++++++++- ++ libavcodec/rpi_hevc_transform.s | 153 +++++++++++++-- ++ libavcodec/rpi_qpu.c | 72 +++++++ ++ libavcodec/rpi_qpu.h | 1 + ++ 9 files changed, 736 insertions(+), 46 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index ab55df1..94ff709 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -45,6 +45,8 @@ ++ #include "rpi_qpu.h" ++ #endif ++ +++// #define DISABLE_MC +++ ++ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 }; ++ ++ /** ++@@ -1079,11 +1081,15 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ for (i = 0; i < (size * size); i++) { ++ coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3); ++ } +++ printf("Cross component not supported\n"); // TODO +++ exit(-1); ++ s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride); ++ } ++ } ++ ++ if (lc->tu.cross_pf) { +++ printf("Cross component not supported\n"); // TODO +++ exit(-1); ++ hls_cross_component_pred(s, 1); ++ } ++ for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) { ++@@ -1112,6 +1118,8 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ for (i = 0; i < (size * size); i++) { ++ coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3); ++ } +++ printf("Cross component not supported\n"); // TODO +++ exit(-1); ++ s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride); ++ } ++ } ++@@ -1409,6 +1417,10 @@ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++ int idx = ff_hevc_pel_weight[block_w]; ++ +++#ifdef DISABLE_MC +++ return; +++#endif +++ ++ x_off += mv->x >> 2; ++ y_off += mv->y >> 2; ++ src += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift)); ++@@ -1479,6 +1491,10 @@ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ uint8_t *src0 = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift); ++ uint8_t *src1 = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift); ++ +++#ifdef DISABLE_MC +++ return; +++#endif +++ ++ if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER || ++ x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER || ++ y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) { ++@@ -1564,6 +1580,10 @@ static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0, ++ intptr_t _mx = mx << (1 - hshift); ++ intptr_t _my = my << (1 - vshift); ++ +++#ifdef DISABLE_MC +++ return; +++#endif +++ ++ x_off += mv->x >> (2 + hshift); ++ y_off += mv->y >> (2 + vshift); ++ src0 += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift)); ++@@ -1628,6 +1648,10 @@ static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVF ++ int hshift = s->ps.sps->hshift[1]; ++ int vshift = s->ps.sps->vshift[1]; ++ +++#ifdef DISABLE_MC +++ return; +++#endif +++ ++ intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift); ++ intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift); ++ intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift); ++@@ -2367,6 +2391,22 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb, ++ } ++ ++ #ifdef RPI +++static void rpi_execute_transform(HEVCContext *s) +++{ +++ int i=2; +++ //int j; +++ //int16_t *coeffs = s->coeffs_buf_arm[i]; +++ //for(j=s->num_coeffs[i]; j > 0; j-= 16*16, coeffs+=16*16) { +++ // s->hevcdsp.idct[4-2](coeffs, 16); +++ //} +++ +++ //gpu_cache_flush(&s->coeffs_buf[i]); +++ vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf[i].vc, s->num_coeffs[i] >> 8, 0, 0, 0); +++ +++ for(i=0;i<4;i++) +++ s->num_coeffs[i] = 0; +++} +++ ++ static void rpi_execute_pred_cmds(HEVCContext *s) ++ { ++ int i; ++@@ -2387,7 +2427,6 @@ static void rpi_execute_pred_cmds(HEVCContext *s) ++ } ++ } ++ s->num_pred_cmds = 0; ++- s->num_coeffs = 0; ++ } ++ #endif ++ ++@@ -2434,7 +2473,8 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ ++ more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); ++ #ifdef RPI ++- if (x_ctb + ctb_size >= s->ps.sps->width) { +++ if (1 || x_ctb + ctb_size >= s->ps.sps->width) { // TODO watch out for deblocking! +++ rpi_execute_transform(s); ++ rpi_execute_pred_cmds(s); ++ } ++ #endif ++@@ -3179,7 +3219,9 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ av_freep(&s->unif_mv_cmds); ++ av_freep(&s->unif_xfm_cmds); ++ av_freep(&s->univ_pred_cmds); ++- av_freep(&s->coeffs_buf); +++ for(i = 0; i < 4; i++) { +++ gpu_free(&s->coeffs_buf[i]); +++ } ++ #endif ++ ++ for (i = 0; i < 3; i++) { ++@@ -3246,13 +3288,16 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ s->univ_pred_cmds = av_mallocz(sizeof(HEVCPredCmd)*RPI_MAX_PRED_CMDS); ++ if (!s->univ_pred_cmds) ++ goto fail; ++- s->coeffs_buf = av_mallocz(sizeof(int16_t)*RPI_MAX_XFM_CMDS*16); ++- if (!s->coeffs_buf) ++- goto fail; +++ for(i = 0; i < 4; i++) { +++ gpu_malloc_uncached(sizeof(int16_t)*RPI_MAX_XFM_CMDS*16, &s->coeffs_buf[i]); // TODO slim this down and share across sizes +++ s->coeffs_buf_arm[i] = (int16_t*) s->coeffs_buf[i].arm; +++ if (!s->coeffs_buf_arm[i]) +++ goto fail; +++ } ++ s->enable_rpi = 0; ++ ++ // A little test program ++- { +++ /*{ ++ GPU_MEM_PTR_T p; ++ int err = gpu_malloc_cached(16, &p); ++ short *q = (short *)p.arm; ++@@ -3273,7 +3318,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ printf(")\n"); ++ gpu_free(&p); ++ goto fail; // Early out ++- } +++ }*/ ++ ++ #endif ++ ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 7a1c35f..4167985 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -40,6 +40,11 @@ ++ #include "thread.h" ++ #include "videodsp.h" ++ +++// define RPI to split the CABAC/prediction/transform into separate stages +++#ifdef RPI +++#include "rpi_qpu.h" +++#endif +++ ++ #define MAX_DPB_SIZE 16 // A.4.1 ++ #define MAX_REFS 16 ++ ++@@ -856,11 +861,12 @@ typedef struct HEVCContext { ++ HEVCMvCmd *unif_mv_cmds; ++ HEVCXfmCmd *unif_xfm_cmds; ++ HEVCPredCmd *univ_pred_cmds; ++- int16_t *coeffs_buf; ++- int num_mv_cmds; +++ GPU_MEM_PTR_T coeffs_buf[4]; +++ int16_t *coeffs_buf_arm[4]; +++ int num_coeffs[4]; ++ int num_xfm_cmds; +++ int num_mv_cmds; ++ int num_pred_cmds; ++- int num_coeffs; ++ #endif ++ ++ uint8_t *cabac_state; ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index 4e97f06..d1cba86 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1031,6 +1031,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ int vshift = s->ps.sps->vshift[c_idx]; ++ uint8_t *dst = &s->frame->data[c_idx][(y0 >> vshift) * stride + ++ ((x0 >> hshift) << s->ps.sps->pixel_shift)]; +++ int use_vpu = s->enable_rpi && !lc->cu.cu_transquant_bypass_flag && !transform_skip_flag && !lc->tu.cross_pf && log2_trafo_size==4; ++ int16_t *coeffs = (int16_t*)(c_idx ? lc->edge_emu_buffer2 : lc->edge_emu_buffer); ++ uint8_t significant_coeff_group_flag[8][8] = {{0}}; ++ int explicit_rdpcm_flag = 0; ++@@ -1044,6 +1045,18 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ uint8_t dc_scale; ++ int pred_mode_intra = (c_idx == 0) ? lc->tu.intra_pred_mode : ++ lc->tu.intra_pred_mode_c; +++#ifdef RPI +++ if (s->enable_rpi) { +++ int n = trafo_size * trafo_size; +++ if (use_vpu) { +++ coeffs = s->coeffs_buf_arm[log2_trafo_size - 2] + s->num_coeffs[log2_trafo_size - 2]; +++ s->num_coeffs[log2_trafo_size - 2] += n; +++ } else { +++ coeffs = s->coeffs_buf_arm[0] + s->num_coeffs[0]; +++ s->num_coeffs[0] += n; +++ } +++ } +++#endif ++ ++ memset(coeffs, 0, trafo_size * trafo_size * sizeof(int16_t)); ++ ++@@ -1488,6 +1501,24 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ } else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 && log2_trafo_size == 2) { ++ s->hevcdsp.idct_4x4_luma(coeffs); ++ } else { +++#ifdef RPI +++ if (!use_vpu) { +++ int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y); +++ if (max_xy == 0) +++ s->hevcdsp.idct_dc[log2_trafo_size-2](coeffs); +++ else { +++ int col_limit = last_significant_coeff_x + last_significant_coeff_y + 4; +++ if (max_xy < 4) +++ col_limit = FFMIN(4, col_limit); +++ else if (max_xy < 8) +++ col_limit = FFMIN(8, col_limit); +++ else if (max_xy < 12) +++ col_limit = FFMIN(24, col_limit); +++ +++ s->hevcdsp.idct[log2_trafo_size-2](coeffs, col_limit); +++ } +++ } +++#else ++ int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y); ++ if (max_xy == 0) ++ s->hevcdsp.idct_dc[log2_trafo_size-2](coeffs); ++@@ -1501,6 +1532,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ col_limit = FFMIN(24, col_limit); ++ s->hevcdsp.idct[log2_trafo_size-2](coeffs, col_limit); ++ } +++#endif ++ } ++ } ++ if (lc->tu.cross_pf) { ++@@ -1512,14 +1544,11 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ } ++ #ifdef RPI ++ if (s->enable_rpi) { ++- int16_t *c = s->coeffs_buf + s->num_coeffs; ++- int n = trafo_size * trafo_size; ++ HEVCPredCmd *cmd = s->univ_pred_cmds + s->num_pred_cmds++; ++- memcpy(c, coeffs, n * sizeof(int16_t)); // TODO change pointer earlier and we can avoid this copy ++- s->num_coeffs += n; +++ //memcpy(coeffs2, coeffs, sizeof(int16_t) * trafo_size * trafo_size); // TODO ++ cmd->type = RPI_PRED_TRANSFORM_ADD; ++ cmd->size = log2_trafo_size; ++- cmd->buf = c; +++ cmd->buf = coeffs; ++ cmd->dst = dst; ++ cmd->stride = stride; ++ return; ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 1f33b0c..e4c3da7 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -22,6 +22,10 @@ ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ +++//#define DISABLE_SAO +++//#define DISABLE_DEBLOCK +++//#define DISABLE_STRENGTHS +++ ++ #include "libavutil/common.h" ++ #include "libavutil/internal.h" ++ ++@@ -273,6 +277,10 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y) ++ edges[2] = x_ctb == s->ps.sps->ctb_width - 1; ++ edges[3] = y_ctb == s->ps.sps->ctb_height - 1; ++ +++#ifdef DISABLE_SAO +++ return; +++#endif +++ ++ if (restore) { ++ if (!edges[0]) { ++ left_tile_edge = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]]; ++@@ -496,6 +504,10 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) ++ s->ps.sps->pcm.loop_filter_disable_flag) || ++ s->ps.pps->transquant_bypass_enable_flag; ++ +++#ifdef DISABLE_DEBLOCK +++ return; +++#endif +++ ++ if (x0) { ++ left_tc_offset = s->deblock[ctb - 1].tc_offset; ++ left_beta_offset = s->deblock[ctb - 1].beta_offset; ++@@ -726,6 +738,10 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, ++ int boundary_upper, boundary_left; ++ int i, j, bs; ++ +++#ifdef DISABLE_STRENGTHS +++ return; +++#endif +++ ++ boundary_upper = y0 > 0 && !(y0 & 7); ++ if (boundary_upper && ++ ((!s->sh.slice_loop_filter_across_slices_enabled_flag && ++diff --git a/libavcodec/hevcpred_template.c b/libavcodec/hevcpred_template.c ++index 6ae87cc..71c6d52 100644 ++--- a/libavcodec/hevcpred_template.c +++++ b/libavcodec/hevcpred_template.c ++@@ -20,6 +20,8 @@ ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ +++//#define DISABLE_INTRA +++ ++ #include "libavutil/pixdesc.h" ++ ++ #include "bit_depth_template.c" ++@@ -114,6 +116,10 @@ do { \ ++ int top_right_size = (FFMIN(x0 + 2 * size_in_luma_h, s->ps.sps->width) - ++ (x0 + size_in_luma_h)) >> hshift; ++ +++#ifdef DISABLE_INTRA +++ return; +++#endif +++ ++ if (s->ps.pps->constrained_intra_pred_flag == 1) { ++ int size_in_luma_pu_v = PU(size_in_luma_v); ++ int size_in_luma_pu_h = PU(size_in_luma_h); ++diff --git a/libavcodec/rpi_hevc_transform.h b/libavcodec/rpi_hevc_transform.h ++index 85a9102..c0c279f 100644 ++--- a/libavcodec/rpi_hevc_transform.h +++++ b/libavcodec/rpi_hevc_transform.h ++@@ -3,11 +3,11 @@ unsigned char rpi_hevc_transform [] = { ++ 3, ++ 3, ++ 232, ++-128, +++32, ++ 0, ++ 0, ++ 0, ++-20, +++12, ++ 248, ++ 0, ++ 136, ++@@ -56,9 +56,9 @@ unsigned char rpi_hevc_transform [] = { ++ 5, ++ 232, ++ 0, ++-0, ++ 8, ++ 0, +++0, ++ 128, ++ 69, ++ 113, ++@@ -108,8 +108,8 @@ unsigned char rpi_hevc_transform [] = { ++ 128, ++ 2, ++ 0, ++-248, ++-62, +++8, +++2, ++ 0, ++ 128, ++ 144, ++@@ -123,13 +123,13 @@ unsigned char rpi_hevc_transform [] = { ++ 3, ++ 32, ++ 8, ++-16, +++20, ++ 0, ++ 76, ++ 254, ++ 48, ++ 192, ++-9, +++4, ++ 4, ++ 32, ++ 8, ++@@ -155,14 +155,46 @@ unsigned char rpi_hevc_transform [] = { ++ 192, ++ 41, ++ 3, ++-68, +++70, +++192, +++80, +++7, +++164, +++255, +++36, +++204, +++96, +++2, +++0, +++248, +++62, +++0, +++3, +++255, +++55, +++208, +++120, +++3, +++224, +++3, +++190, +++11, +++16, +++139, +++246, +++91, +++0, +++103, +++90, +++0, +++70, ++ 192, ++ 80, ++ 7, ++ 164, ++ 255, ++ 36, ++-220, +++204, ++ 96, ++ 2, ++ 0, ++@@ -182,7 +214,7 @@ unsigned char rpi_hevc_transform [] = { ++ 16, ++ 139, ++ 246, ++-83, +++91, ++ 0, ++ 103, ++ 90, ++@@ -209,4 +241,374 @@ unsigned char rpi_hevc_transform [] = { ++ 96, ++ 90, ++ 0, +++169, +++3, +++3, +++232, +++32, +++0, +++0, +++0, +++12, +++248, +++0, +++136, +++0, +++0, +++192, +++248, +++0, +++0, +++64, +++232, +++0, +++2, +++0, +++0, +++12, +++248, +++0, +++168, +++0, +++0, +++192, +++248, +++0, +++0, +++3, +++232, +++128, +++0, +++0, +++0, +++7, +++232, +++0, +++2, +++0, +++0, +++4, +++232, +++64, +++0, +++0, +++0, +++5, +++232, +++0, +++8, +++0, +++0, +++57, +++239, +++224, +++247, +++255, +++255, +++72, +++192, +++95, +++207, +++88, +++122, +++88, +++124, +++137, +++64, +++26, +++64, +++161, +++64, +++152, +++64, +++128, +++144, +++31, +++0, +++72, +++232, +++32, +++0, +++0, +++0, +++65, +++232, +++32, +++0, +++0, +++0, +++128, +++144, +++23, +++0, +++145, +++64, +++168, +++64, +++128, +++144, +++19, +++0, +++72, +++232, +++32, +++0, +++0, +++0, +++65, +++232, +++32, +++0, +++0, +++0, +++128, +++144, +++11, +++0, +++74, +++232, +++0, +++8, +++0, +++0, +++242, +++140, +++229, +++192, +++57, +++239, +++32, +++8, +++0, +++0, +++41, +++3, +++12, +++248, +++0, +++128, +++0, +++0, +++192, +++8, +++4, +++0, +++12, +++248, +++0, +++132, +++64, +++0, +++192, +++8, +++4, +++0, +++0, +++96, +++255, +++159, +++131, +++255, +++0, +++232, +++0, +++4, +++0, +++0, +++255, +++159, +++142, +++255, +++4, +++255, +++48, +++204, +++16, +++3, +++224, +++251, +++62, +++0, +++5, +++255, +++51, +++204, +++128, +++3, +++224, +++251, +++16, +++0, +++77, +++254, +++51, +++204, +++9, +++4, +++224, +++251, +++0, +++0, +++128, +++64, +++6, +++232, +++64, +++0, +++0, +++0, +++140, +++248, +++47, +++0, +++0, +++0, +++224, +++99, +++0, +++0, +++4, +++254, +++0, +++144, +++128, +++2, +++0, +++8, +++2, +++0, +++32, +++247, +++240, +++207, +++16, +++3, +++32, +++247, +++176, +++207, +++17, +++3, +++32, +++247, +++112, +++207, +++18, +++3, +++32, +++247, +++48, +++207, +++19, +++3, +++32, +++247, +++240, +++206, +++20, +++3, +++32, +++247, +++176, +++206, +++21, +++3, +++32, +++247, +++112, +++206, +++22, +++3, +++32, +++247, +++48, +++206, +++23, +++3, +++32, +++247, +++240, +++205, +++24, +++3, +++32, +++247, +++176, +++205, +++25, +++3, +++32, +++247, +++112, +++205, +++26, +++3, +++32, +++247, +++48, +++205, +++27, +++3, +++32, +++247, +++240, +++204, +++28, +++3, +++32, +++247, +++176, +++204, +++29, +++3, +++32, +++247, +++112, +++204, +++30, +++3, +++32, +++247, +++48, +++204, +++31, +++3, +++5, +++255, +++51, +++204, +++128, +++3, +++224, +++251, +++16, +++0, +++77, +++254, +++51, +++204, +++9, +++4, +++224, +++251, +++0, +++0, +++0, +++237, +++0, +++4, +++0, +++0, +++140, +++248, +++47, +++0, +++0, +++0, +++224, +++99, +++0, +++0, +++90, +++0, ++ }; ++diff --git a/libavcodec/rpi_hevc_transform.s b/libavcodec/rpi_hevc_transform.s ++index 5e2728d..1e389c7 100644 ++--- a/libavcodec/rpi_hevc_transform.s +++++ b/libavcodec/rpi_hevc_transform.s ++@@ -58,13 +58,6 @@ ++ # ++ # ++ ++-test_add: ++- vldh HX(0,0),(r0) ++- vadd HX(0,0),HX(0,0),10 ++- vsth HX(0,0),(r0) ++- mov r0,7 # return value ++- b lr ++- ++ # Columns are transformed first ++ # ++ # Store top left half of transMatrix2 in ++@@ -79,7 +72,7 @@ test_add: ++ # ++ ++ ++-# hevc_trans_16x16(short *transMatrix2, short *coeffs, int num) +++# hevc_trans_16x16(short *transMatrix2, short *coeffs, int num) # TODO add size so we can branch to correct implementation (or perhaps have coeffs32 and num32 as secondary inputs!) ++ # transMatrix2: address of the constant matrix (must be at 32 byte aligned address in Videocore memory) ++ # coeffs: address of the transform coefficients (must be at 32 byte aligned address in Videocore memory) ++ # num: number of 16x16 transforms to be done ++@@ -87,17 +80,17 @@ test_add: ++ hevc_trans_16x16: ++ push r6-r15, lr # TODO cut down number of used registers ++ ++- mov r3, 2*32*2 # Twice Stride of transMatrix2 in bytes ++- vld HX(32++,0),(r0 += r3) REP 16 # This is the 16x16 matrix, a transform is equivalent to multiplying input row vector * matrix +++ mov r3, 16*2 # Stride of transMatrix2 in bytes +++ vldh HX(32++,0),(r0 += r3) REP 16 # This is the 16x16 matrix, a transform is equivalent to multiplying input row vector * matrix ++ # Now use r0 to describe which matrix we are working on. ++ # Allows us to prefetch the next block of coefficients for efficiency. ++ mov r0,0 # This describes the location where we read our coefficients from ++- mov r3,16*2 # Stride of coefficients in bytes +++ mov r3,16*2 # Stride of coefficients in bytes (TODO remove) ++ mov r7,16*16*2 # Total block size ++ mov r8,64*16 # Value used to swap from current to next VRF location ++ vldh HX(0++,0)+r0,(r1 += r3) REP 16 ++ mov r4,64 # Constant used for rounding first pass ++- mov r5,1<<19 # Constant used for rounding second pass +++ mov r5,1<<11 # Constant used for rounding second pass ++ ++ # At start of block r0,r1 point to the current block (that has already been loaded) ++ block_loop: ++@@ -113,12 +106,12 @@ block_loop: ++ vadd HY(0++,0)+r0,HY(0++,0)+r0,r4 REP 16 # Now add on rounding, shift down by 7, and saturate ++ #vsasls HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # 9+7=16 so this ends up with the output saturated and in the top half of the word. ++ vasl HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # This should be saturating, but the instruction above does not assemble? ++- vmov VX(0,0++), HX(0++,32) REP 16 # For simplicity transpose this back to the original position +++ vmov VX(0,0++)+r0, HX(0++,32)+r0 REP 16 # For simplicity transpose this back to the original position ++ ++ bl col_trans_16 ++- vadd HY(0++,0)+r0,HY(0++,0)+r0,r4 REP 16 # Now add on rounding, shift down by 7, and saturate ++- #vsasls HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # 9+7=16 so this ends up with the output saturated and in the top half of the word. ++- vasl HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # This should be saturating, but the instruction above does not assemble? +++ vadd HY(0++,0)+r0,HY(0++,0)+r0,r5 REP 16 # Now add on rounding, shift down by 7, and saturate +++ #vsasls HY(0++,0)+r0,HY(0++,0)+r0,4 REP 16 # 4+12=16 so this ends up with the output saturated and in the top half of the word. +++ vasl HY(0++,0)+r0,HY(0++,0)+r0,4 REP 16 # This should be saturating, but the instruction above does not assemble? (Probably because it ends with ls which is interpreted as a condition flag) ++ ++ # Save results - note there has been a transposition during the processing so we save columns ++ vsth VX(0,32++)+r0, (r1 += r3) REP 16 ++@@ -132,16 +125,136 @@ block_loop: ++ ++ # r1,r2,r3 r7,r8 should be preserved ++ # HX(0++,0)+r0 is the block to be transformed ++-# HX(32++,0) is the 16x16 matrix of transform coefficients +++# HX(32++,0)+r6 is the 16x16 matrix of transform coefficients ++ # Use HY(48,0) for intermediate results ++ # r0 can be used, but should be returned to its original value at the end ++ col_trans_16: ++- add r4,r0,16 # Final value for this loop +++ add r6,r0,16 # Final value for this loop ++ col_trans_16_loop: ++ # First compute partial products for a single column ++- vmul32s VY(48,0++), VX(0,0)+r0, VX(32,0++) REP 16 +++ vmul32s HY(48++,0), VX(0,0)+r0, VX(32,0++) REP 16 ++ # Then sum up the results and place back ++ vadd VY(0,0)+r0, VY(48,0++), VY(48,8++) REP 8 CLRA SACC ++- addcmpblt r0,1,r4,col_trans_16_loop +++ addcmpblt r0,1,r6,col_trans_16_loop ++ sub r0,16 # but r0 back to its original value ++ b lr +++ +++col_trans_odd_16: +++ add r6,r0,16 # Final value for this loop +++col_trans_odd_16_loop: +++ # First compute partial products for a single column +++ vmul32s HY(48++,0), VX(0,0)+r0, VX(32,0++) REP 16 +++ # Then sum up the results and place back +++ vadd VY(0,0)+r0, VY(48,0++), VY(48,8++) REP 8 CLRA SACC +++ addcmpblt r0,1,r6,col_trans_odd_16_loop +++ sub r0,16 # but r0 back to its original value +++ b lr +++ +++ +++test_add: +++ vldh HX(0,0),(r0) +++ vadd HX(0,0),HX(0,0),10 +++ vsth HX(0,0),(r0) +++ mov r0,7 # return value +++ b lr +++ +++# hevc_trans_32x32(short *transMatrix2, short *coeffs, int num) +++# transMatrix2: address of the constant matrix (must be at 32 byte aligned address in Videocore memory) Even followed by odd +++# coeffs: address of the transform coefficients (must be at 32 byte aligned address in Videocore memory) +++# num: number of 16x16 transforms to be done +++# +++hevc_trans_32x32: +++ push r6-r15, lr # TODO cut down number of used registers +++ +++ # Fetch transform matrices +++ mov r3, 16*2 # Stride of transMatrix2 in bytes (and of coefficients) +++ vldh HX(32++,0),(r0 += r3) REP 16 # This is the even 16x16 matrix +++ add r0, 16*16*2 +++ vldh HX(32++,32),(r0 += r3) REP 16 # This is the odd 16x16 matrix +++ +++ mov r3, 32*2*2 # Stride used to fetch alternate rows of our input coefficient buffer +++ mov r7, 16*16*2 # Total block size +++ mov r4, 64 # Constant used for rounding first pass +++ mov r5, 1<<11 # Constant used for rounding second pass +++ sub sp,sp,32*32*2+32 # Allocate some space on the stack for us to store 32*32 shorts as temporary results (needs to be aligned) +++ # set r8 to 32byte aligned stack pointer +++ add r8,sp,31 +++ lsr r8,5 +++ lsl r8,5 +++ mov r9,r8 # Backup of the temporary storage +++ mov r10,r1 # Backup of the coefficient buffer +++block_loop32: +++ +++ # COLUMN TRANSFORM +++ # Transform the first 16 columns +++ mov r1,r10 # Input Coefficient buffer +++ mov r8,r9 # Output temporary storage +++ bl trans32 +++ # Transform the second 16 columns +++ add r8,32 +++ add r1,32 +++ bl trans32 +++ +++ # ROW TRANSFORM +++ mov r1,r9 # Input temporary storage +++ mov r8,r10 # Output Coefficient buffer +++ bl trans32 +++ # Transform the second 16 columns +++ add r8,32 +++ add r1,32 +++ bl trans32 +++ +++ add r10, 32*32*2 # move onto next block of coefficients +++ addcmpbgt r2,-1,0,block_loop32 +++ +++ add sp,sp,32*32*2+32 # Restore stack +++ +++ pop r6-r15, pc +++ +++trans32: +++ # We can no longer afford the VRF space to do prefetching when doing 32x32 +++ # Fetch the even rows +++ vldh HX(0++,0)+r0,(r1 += r3) REP 16 +++ # Fetch the odd rows +++ vldh HX(16++,0)+r0,64(r1 += r3) REP 16 # First odd row is 32 shorts ahead of r1 +++ +++ # Transform the even rows using even matrix +++ mov r0, 0 # Even rows +++ bl col_trans_16 +++ +++ # Now transform the odd rows using odd matrix +++ mov r0, 64*16 # Odd rows +++ bl col_trans_odd_16 +++ +++ # Now apply butterfly to compute the first 16 results +++ vadd HY(48++,0),HY(0++,0),HY(16++,0) REP 16 +++ vadd HY(48++,0),HY(48++,0),r4 REP 32 # add on rounding, +++ vasl HY(48++,0),HY(48++,0),9 REP 32 # shift down by 7, and saturate +++ # 16bit results now in HX(48,32) +++ mov r0,r8 +++ mov r6,32*2 +++ vsth VX(48,32++),(r0+=r6) REP 16 +++ vmov VX(0,0++)+r0, HX(0++,32)+r0 REP 16 # Store transposed +++ +++ # Now apply butterfly to compute the second 16 results (in reverse order) +++ vsub HY(63,0),HY(0,0),HY(16,0) +++ vsub HY(62,0),HY(0,0),HY(17,0) +++ vsub HY(61,0),HY(0,0),HY(18,0) +++ vsub HY(60,0),HY(0,0),HY(19,0) +++ vsub HY(59,0),HY(0,0),HY(20,0) +++ vsub HY(58,0),HY(0,0),HY(21,0) +++ vsub HY(57,0),HY(0,0),HY(22,0) +++ vsub HY(56,0),HY(0,0),HY(23,0) +++ vsub HY(55,0),HY(0,0),HY(24,0) +++ vsub HY(54,0),HY(0,0),HY(25,0) +++ vsub HY(53,0),HY(0,0),HY(26,0) +++ vsub HY(52,0),HY(0,0),HY(27,0) +++ vsub HY(51,0),HY(0,0),HY(28,0) +++ vsub HY(50,0),HY(0,0),HY(29,0) +++ vsub HY(49,0),HY(0,0),HY(30,0) +++ vsub HY(48,0),HY(0,0),HY(31,0) +++ vadd HY(48++,0),HY(48++,0),r4 REP 32 # add on rounding, +++ vasl HY(48++,0),HY(48++,0),9 REP 32 # shift down by 7, and saturate +++ add r0,r8,16*32*2 # Move to 16th row +++ vsth VX(48,32++),(r0+=r6) REP 16 +++ b lr ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index b1f50ee..d720546 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -3,6 +3,7 @@ ++ // This works better than the mmap in that the memory can be cached, but requires a kernel modification to enable the device. ++ #define RPI_USE_VCSM ++ #define RPI_TIME_TOTAL_QPU +++#define RPI_TIME_TOTAL_VPU ++ ++ #include ++ #include ++@@ -48,10 +49,47 @@ typedef int int32_t; ++ #define QPU_CODE_SIZE 2048 ++ #define VPU_CODE_SIZE 2048 ++ +++const short rpi_transMatrix2even[32][16] = { // Even rows first +++{64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64}, +++{90, 87, 80, 70, 57, 43, 25, 9, -9, -25, -43, -57, -70, -80, -87, -90}, +++{89, 75, 50, 18, -18, -50, -75, -89, -89, -75, -50, -18, 18, 50, 75, 89}, +++{87, 57, 9, -43, -80, -90, -70, -25, 25, 70, 90, 80, 43, -9, -57, -87}, +++{83, 36, -36, -83, -83, -36, 36, 83, 83, 36, -36, -83, -83, -36, 36, 83}, +++{80, 9, -70, -87, -25, 57, 90, 43, -43, -90, -57, 25, 87, 70, -9, -80}, +++{75, -18, -89, -50, 50, 89, 18, -75, -75, 18, 89, 50, -50, -89, -18, 75}, +++{70, -43, -87, 9, 90, 25, -80, -57, 57, 80, -25, -90, -9, 87, 43, -70}, +++{64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64}, +++{57, -80, -25, 90, -9, -87, 43, 70, -70, -43, 87, 9, -90, 25, 80, -57}, +++{50, -89, 18, 75, -75, -18, 89, -50, -50, 89, -18, -75, 75, 18, -89, 50}, +++{43, -90, 57, 25, -87, 70, 9, -80, 80, -9, -70, 87, -25, -57, 90, -43}, +++{36, -83, 83, -36, -36, 83, -83, 36, 36, -83, 83, -36, -36, 83, -83, 36}, +++{25, -70, 90, -80, 43, 9, -57, 87, -87, 57, -9, -43, 80, -90, 70, -25}, +++{18, -50, 75, -89, 89, -75, 50, -18, -18, 50, -75, 89, -89, 75, -50, 18}, +++{ 9, -25, 43, -57, 70, -80, 87, -90, 90, -87, 80, -70, 57, -43, 25, -9}, +++// Odd rows +++{90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4}, +++{90, 82, 67, 46, 22, -4, -31, -54, -73, -85, -90, -88, -78, -61, -38, -13}, +++{88, 67, 31, -13, -54, -82, -90, -78, -46, -4, 38, 73, 90, 85, 61, 22}, +++{85, 46, -13, -67, -90, -73, -22, 38, 82, 88, 54, -4, -61, -90, -78, -31}, +++{82, 22, -54, -90, -61, 13, 78, 85, 31, -46, -90, -67, 4, 73, 88, 38}, +++{78, -4, -82, -73, 13, 85, 67, -22, -88, -61, 31, 90, 54, -38, -90, -46}, +++{73, -31, -90, -22, 78, 67, -38, -90, -13, 82, 61, -46, -88, -4, 85, 54}, +++{67, -54, -78, 38, 85, -22, -90, 4, 90, 13, -88, -31, 82, 46, -73, -61}, +++{61, -73, -46, 82, 31, -88, -13, 90, -4, -90, 22, 85, -38, -78, 54, 67}, +++{54, -85, -4, 88, -46, -61, 82, 13, -90, 38, 67, -78, -22, 90, -31, -73}, +++{46, -90, 38, 54, -90, 31, 61, -88, 22, 67, -85, 13, 73, -82, 4, 78}, +++{38, -88, 73, -4, -67, 90, -46, -31, 85, -78, 13, 61, -90, 54, 22, -82}, +++{31, -78, 90, -61, 4, 54, -88, 82, -38, -22, 73, -90, 67, -13, -46, 85}, +++{22, -61, 85, -90, 73, -38, -4, 46, -78, 90, -82, 54, -13, -31, 67, -88}, +++{13, -38, 61, -78, 88, -90, 85, -73, 54, -31, 4, 22, -46, 67, -82, 90}, +++{ 4, -13, 22, -31, 38, -46, 54, -61, 67, -73, 78, -82, 85, -88, 90, -90} +++}; +++ ++ struct GPU ++ { ++ unsigned int qpu_code[QPU_CODE_SIZE]; ++ unsigned int vpu_code[VPU_CODE_SIZE]; +++ short transMatrix2even[16*16]; ++ int open_count; // Number of allocated video buffers ++ unsigned int vc_handle; // Handle of this memory ++ int mb; // Mailbox handle ++@@ -123,6 +161,8 @@ static int gpu_init(volatile struct GPU **gpu) { ++ assert(num_bytes<=VPU_CODE_SIZE*sizeof(unsigned int)); ++ memcpy((void*)ptr->vpu_code, rpi_hevc_transform, num_bytes); ++ } +++ // And the transform coefficients +++ memcpy((void*)ptr->transMatrix2even, rpi_transMatrix2even, 16*16*sizeof(short)); ++ ++ return 0; ++ } ++@@ -274,11 +314,43 @@ unsigned int vpu_get_fn(void) { ++ return gpu->vc + offsetof(struct GPU,vpu_code); ++ } ++ +++unsigned int vpu_get_constants(void) { +++ if (gpu==NULL) { +++ gpu_lock(); +++ gpu_unlock(); +++ } +++ return gpu->vc + offsetof(struct GPU,transMatrix2even); +++} +++ ++ unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5) ++ { ++ unsigned r; +++#ifdef RPI_TIME_TOTAL_VPU +++ static int last_time=0; +++ static long long on_time=0; +++ static long long off_time=0; +++ int start_time; +++ int end_time; +++ static int count=0; +++ static long long countr2=0; +++#endif ++ gpu_lock(); +++#ifdef RPI_TIME_TOTAL_VPU +++ start_time = Microseconds(); +++ if (last_time==0) +++ last_time = start_time; +++ off_time += start_time-last_time; +++#endif ++ r = execute_code(gpu->mb, code, r0, r1, r2, r3, r4, r5); +++#ifdef RPI_TIME_TOTAL_VPU +++ end_time = Microseconds(); +++ last_time = end_time; +++ on_time += end_time - start_time; +++ count++; +++ countr2 += r2; +++ if ((count&0x7f)==0) +++ printf("VPU %d %lld On=%dms, Off=%dms\n",count,countr2,(int)(on_time/1000),(int)(off_time/1000)); +++#endif ++ gpu_unlock(); ++ return r; ++ } ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 4e3c35c..814fc3c 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -34,6 +34,7 @@ extern unsigned int qpu_get_fn(int num); ++ ++ // VPU specific functions ++ extern unsigned int vpu_get_fn(void); +++extern unsigned int vpu_get_constants(void); ++ extern unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5); ++ ++ // Simple test of shader code ++-- ++2.7.4 ++ ++ ++From 4bb0a7ba6723650e74d63cec2123f76da4c3eb0e Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Tue, 5 May 2015 09:41:23 +0100 ++Subject: [PATCH 05/68] Fixed deblocking ++ ++--- ++ libavcodec/hevc.c | 20 +++++++++++++++++--- ++ 1 file changed, 17 insertions(+), 3 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 94ff709..391c57a 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2400,8 +2400,9 @@ static void rpi_execute_transform(HEVCContext *s) ++ // s->hevcdsp.idct[4-2](coeffs, 16); ++ //} ++ ++- //gpu_cache_flush(&s->coeffs_buf[i]); +++ gpu_cache_flush(&s->coeffs_buf[i]); ++ vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf[i].vc, s->num_coeffs[i] >> 8, 0, 0, 0); +++ gpu_cache_flush(&s->coeffs_buf[i]); ++ ++ for(i=0;i<4;i++) ++ s->num_coeffs[i] = 0; ++@@ -2440,6 +2441,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs]; ++ ++ #ifdef RPI +++ int start_ctb_x = (s->sh.slice_ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size; ++ s->enable_rpi = 1; // TODO this should depend on cross component and frame width etc. ++ #endif ++ ++@@ -2473,9 +2475,17 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ ++ more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); ++ #ifdef RPI ++- if (1 || x_ctb + ctb_size >= s->ps.sps->width) { // TODO watch out for deblocking! +++ if (s->enable_rpi && x_ctb + ctb_size >= s->ps.sps->width) { +++ int x; +++ // Transform all blocks ++ rpi_execute_transform(s); +++ // Perform intra prediction and residual reconstruction ++ rpi_execute_pred_cmds(s); +++ // Perform deblocking for CTBs in this row +++ for(x = start_ctb_x; x <= x_ctb; x += ctb_size) { // TODO this will fail for tiles +++ ff_hevc_hls_filters(s, x, y_ctb, ctb_size); +++ } +++ start_ctb_x = 0; ++ } ++ #endif ++ if (more_data < 0) { ++@@ -2486,6 +2496,10 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ ++ ctb_addr_ts++; ++ ff_hevc_save_states(s, ctb_addr_ts); +++#ifdef RPI +++ if (s->enable_rpi) +++ continue; +++#endif ++ ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size); ++ } ++ ++@@ -3289,7 +3303,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ if (!s->univ_pred_cmds) ++ goto fail; ++ for(i = 0; i < 4; i++) { ++- gpu_malloc_uncached(sizeof(int16_t)*RPI_MAX_XFM_CMDS*16, &s->coeffs_buf[i]); // TODO slim this down and share across sizes +++ gpu_malloc_cached(sizeof(int16_t)*RPI_MAX_XFM_CMDS*16, &s->coeffs_buf[i]); // TODO slim this down and share across sizes ++ s->coeffs_buf_arm[i] = (int16_t*) s->coeffs_buf[i].arm; ++ if (!s->coeffs_buf_arm[i]) ++ goto fail; ++-- ++2.7.4 ++ ++ ++From 9079ef888e3d81a69f3c802ddc3c5134679e74a6 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Tue, 5 May 2015 11:32:30 +0100 ++Subject: [PATCH 06/68] Added 32x32 transform ++ ++--- ++ libavcodec/hevc.c | 8 +- ++ libavcodec/hevc_cabac.c | 4 +- ++ libavcodec/rpi_hevc_transform.h | 200 +++++++++++++++++----------------------- ++ libavcodec/rpi_hevc_transform.s | 102 ++++++++++---------- ++ libavcodec/rpi_qpu.c | 4 +- ++ 5 files changed, 148 insertions(+), 170 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 391c57a..0dde6f2 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2400,9 +2400,11 @@ static void rpi_execute_transform(HEVCContext *s) ++ // s->hevcdsp.idct[4-2](coeffs, 16); ++ //} ++ ++- gpu_cache_flush(&s->coeffs_buf[i]); ++- vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf[i].vc, s->num_coeffs[i] >> 8, 0, 0, 0); ++- gpu_cache_flush(&s->coeffs_buf[i]); +++ gpu_cache_flush(&s->coeffs_buf[2]); +++ gpu_cache_flush(&s->coeffs_buf[3]); +++ vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf[2].vc, s->num_coeffs[2] >> 8, s->coeffs_buf[3].vc, s->num_coeffs[3] >> 10, 0); +++ gpu_cache_flush(&s->coeffs_buf[2]); +++ gpu_cache_flush(&s->coeffs_buf[3]); ++ ++ for(i=0;i<4;i++) ++ s->num_coeffs[i] = 0; ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index d1cba86..88aa959 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1031,7 +1031,9 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ int vshift = s->ps.sps->vshift[c_idx]; ++ uint8_t *dst = &s->frame->data[c_idx][(y0 >> vshift) * stride + ++ ((x0 >> hshift) << s->ps.sps->pixel_shift)]; ++- int use_vpu = s->enable_rpi && !lc->cu.cu_transquant_bypass_flag && !transform_skip_flag && !lc->tu.cross_pf && log2_trafo_size==4; +++#ifdef RPI +++ int use_vpu = s->enable_rpi && !lc->cu.cu_transquant_bypass_flag && !transform_skip_flag && !lc->tu.cross_pf && log2_trafo_size>=4; +++#endif ++ int16_t *coeffs = (int16_t*)(c_idx ? lc->edge_emu_buffer2 : lc->edge_emu_buffer); ++ uint8_t significant_coeff_group_flag[8][8] = {{0}}; ++ int explicit_rdpcm_flag = 0; ++diff --git a/libavcodec/rpi_hevc_transform.h b/libavcodec/rpi_hevc_transform.h ++index c0c279f..6d772d7 100644 ++--- a/libavcodec/rpi_hevc_transform.h +++++ b/libavcodec/rpi_hevc_transform.h ++@@ -1,6 +1,10 @@ ++ unsigned char rpi_hevc_transform [] = { ++ 169, ++ 3, +++62, +++64, +++79, +++64, ++ 3, ++ 232, ++ 32, ++@@ -17,6 +21,22 @@ unsigned char rpi_hevc_transform [] = { ++ 248, ++ 0, ++ 0, +++64, +++232, +++0, +++2, +++0, +++0, +++12, +++248, +++0, +++168, +++0, +++0, +++192, +++248, +++0, +++0, ++ 0, ++ 96, ++ 3, ++@@ -79,7 +99,7 @@ unsigned char rpi_hevc_transform [] = { ++ 70, ++ 128, ++ 144, ++-39, +++40, ++ 0, ++ 4, ++ 255, ++@@ -113,7 +133,7 @@ unsigned char rpi_hevc_transform [] = { ++ 0, ++ 128, ++ 144, ++-22, +++23, ++ 0, ++ 4, ++ 255, ++@@ -153,6 +173,8 @@ unsigned char rpi_hevc_transform [] = { ++ 140, ++ 211, ++ 192, +++34, +++31, ++ 41, ++ 3, ++ 70, ++@@ -195,7 +217,7 @@ unsigned char rpi_hevc_transform [] = { ++ 255, ++ 36, ++ 204, ++-96, +++224, ++ 2, ++ 0, ++ 248, ++@@ -219,62 +241,10 @@ unsigned char rpi_hevc_transform [] = { ++ 103, ++ 90, ++ 0, ++-8, ++-240, ++-0, ++-128, ++-128, ++-3, ++-0, ++-247, ++-32, ++-128, ++-10, ++-4, ++-136, ++-240, ++-32, ++-0, ++-128, ++-3, ++-112, ++-96, ++-90, ++-0, ++-169, ++-3, ++-3, ++-232, ++-32, ++-0, ++-0, ++-0, ++-12, ++-248, ++-0, ++-136, ++-0, ++-0, ++-192, ++-248, ++-0, ++-0, +++225, +++64, +++242, ++ 64, ++-232, ++-0, ++-2, ++-0, ++-0, ++-12, ++-248, ++-0, ++-168, ++-0, ++-0, ++-192, ++-248, ++-0, ++-0, ++ 3, ++ 232, ++ 128, ++@@ -287,18 +257,6 @@ unsigned char rpi_hevc_transform [] = { ++ 2, ++ 0, ++ 0, ++-4, ++-232, ++-64, ++-0, ++-0, ++-0, ++-5, ++-232, ++-0, ++-8, ++-0, ++-0, ++ 57, ++ 239, ++ 224, ++@@ -317,18 +275,26 @@ unsigned char rpi_hevc_transform [] = { ++ 64, ++ 26, ++ 64, +++4, +++232, +++64, +++0, +++0, +++0, +++149, +++96, ++ 161, ++ 64, ++ 152, ++ 64, ++ 128, ++ 144, ++-31, +++35, ++ 0, ++ 72, ++ 232, ++-32, ++ 0, +++4, ++ 0, ++ 0, ++ 65, ++@@ -339,8 +305,16 @@ unsigned char rpi_hevc_transform [] = { ++ 0, ++ 128, ++ 144, ++-23, +++27, +++0, +++4, +++232, +++0, +++8, ++ 0, +++0, +++69, +++96, ++ 145, ++ 64, ++ 168, ++@@ -351,8 +325,8 @@ unsigned char rpi_hevc_transform [] = { ++ 0, ++ 72, ++ 232, ++-32, ++ 0, +++4, ++ 0, ++ 0, ++ 65, ++@@ -373,7 +347,7 @@ unsigned char rpi_hevc_transform [] = { ++ 0, ++ 242, ++ 140, ++-229, +++221, ++ 192, ++ 57, ++ 239, ++@@ -383,6 +357,8 @@ unsigned char rpi_hevc_transform [] = { ++ 0, ++ 41, ++ 3, +++239, +++3, ++ 12, ++ 248, ++ 0, ++@@ -390,7 +366,7 @@ unsigned char rpi_hevc_transform [] = { ++ 0, ++ 0, ++ 192, ++-8, +++248, ++ 4, ++ 0, ++ 12, ++@@ -400,14 +376,14 @@ unsigned char rpi_hevc_transform [] = { ++ 64, ++ 0, ++ 192, ++-8, +++248, ++ 4, ++ 0, ++ 0, ++ 96, ++ 255, ++ 159, ++-131, +++154, ++ 255, ++ 0, ++ 232, ++@@ -417,7 +393,7 @@ unsigned char rpi_hevc_transform [] = { ++ 0, ++ 255, ++ 159, ++-142, +++165, ++ 255, ++ 4, ++ 255, ++@@ -429,7 +405,7 @@ unsigned char rpi_hevc_transform [] = { ++ 251, ++ 62, ++ 0, ++-5, +++4, ++ 255, ++ 51, ++ 204, ++@@ -439,15 +415,15 @@ unsigned char rpi_hevc_transform [] = { ++ 251, ++ 16, ++ 0, ++-77, +++76, ++ 254, ++ 51, ++ 204, ++-9, ++-4, +++128, +++3, ++ 224, ++ 251, ++-0, +++20, ++ 0, ++ 128, ++ 64, ++@@ -467,16 +443,6 @@ unsigned char rpi_hevc_transform [] = { ++ 99, ++ 0, ++ 0, ++-4, ++-254, ++-0, ++-144, ++-128, ++-2, ++-0, ++-8, ++-2, ++-0, ++ 32, ++ 247, ++ 240, ++@@ -488,92 +454,92 @@ unsigned char rpi_hevc_transform [] = { ++ 176, ++ 207, ++ 17, ++-3, +++19, ++ 32, ++ 247, ++ 112, ++ 207, ++ 18, ++-3, +++35, ++ 32, ++ 247, ++ 48, ++ 207, ++ 19, ++-3, +++51, ++ 32, ++ 247, ++ 240, ++ 206, ++ 20, ++-3, +++67, ++ 32, ++ 247, ++ 176, ++ 206, ++ 21, ++-3, +++83, ++ 32, ++ 247, ++ 112, ++ 206, ++ 22, ++-3, +++99, ++ 32, ++ 247, ++ 48, ++ 206, ++ 23, ++-3, +++115, ++ 32, ++ 247, ++ 240, ++ 205, ++ 24, ++-3, +++131, ++ 32, ++ 247, ++ 176, ++ 205, ++ 25, ++-3, +++147, ++ 32, ++ 247, ++ 112, ++ 205, ++ 26, ++-3, +++163, ++ 32, ++ 247, ++ 48, ++ 205, ++ 27, ++-3, +++179, ++ 32, ++ 247, ++ 240, ++ 204, ++ 28, ++-3, +++195, ++ 32, ++ 247, ++ 176, ++ 204, ++ 29, ++-3, +++211, ++ 32, ++ 247, ++ 112, ++ 204, ++ 30, ++-3, +++227, ++ 32, ++ 247, ++ 48, ++ 204, ++ 31, ++-3, ++-5, +++243, +++4, ++ 255, ++ 51, ++ 204, ++@@ -583,20 +549,20 @@ unsigned char rpi_hevc_transform [] = { ++ 251, ++ 16, ++ 0, ++-77, +++76, ++ 254, ++ 51, ++ 204, ++-9, ++-4, +++128, +++3, ++ 224, ++ 251, ++-0, +++20, ++ 0, ++ 0, ++ 237, +++32, ++ 0, ++-4, ++ 0, ++ 0, ++ 140, ++@@ -609,6 +575,6 @@ unsigned char rpi_hevc_transform [] = { ++ 99, ++ 0, ++ 0, ++-90, ++-0, +++111, +++3, ++ }; ++diff --git a/libavcodec/rpi_hevc_transform.s b/libavcodec/rpi_hevc_transform.s ++index 1e389c7..afdb32a 100644 ++--- a/libavcodec/rpi_hevc_transform.s +++++ b/libavcodec/rpi_hevc_transform.s ++@@ -76,12 +76,19 @@ ++ # transMatrix2: address of the constant matrix (must be at 32 byte aligned address in Videocore memory) ++ # coeffs: address of the transform coefficients (must be at 32 byte aligned address in Videocore memory) ++ # num: number of 16x16 transforms to be done +++# coeffs32 +++# num32: number of 32x32 transforms ++ # ++ hevc_trans_16x16: ++ push r6-r15, lr # TODO cut down number of used registers ++- +++ mov r14,r3 # coeffs32 +++ mov r15,r4 # num32 ++ mov r3, 16*2 # Stride of transMatrix2 in bytes ++ vldh HX(32++,0),(r0 += r3) REP 16 # This is the 16x16 matrix, a transform is equivalent to multiplying input row vector * matrix +++ +++ add r0, 16*16*2 # For 32x32 transforms we also need this matrix +++ vldh HX(32++,32),(r0 += r3) REP 16 # This is the odd 16x16 matrix +++ ++ # Now use r0 to describe which matrix we are working on. ++ # Allows us to prefetch the next block of coefficients for efficiency. ++ mov r0,0 # This describes the location where we read our coefficients from ++@@ -121,6 +128,10 @@ block_loop: ++ add r1,r7 ++ ++ addcmpbgt r2,-1,0,block_loop +++ +++ # Now go and do any 32x32 transforms +++ b hevc_trans_32x32 +++ ++ pop r6-r15, pc ++ ++ # r1,r2,r3 r7,r8 should be preserved ++@@ -136,26 +147,18 @@ col_trans_16_loop: ++ # Then sum up the results and place back ++ vadd VY(0,0)+r0, VY(48,0++), VY(48,8++) REP 8 CLRA SACC ++ addcmpblt r0,1,r6,col_trans_16_loop ++- sub r0,16 # but r0 back to its original value +++ sub r0,16 # put r0 back to its original value ++ b lr ++ ++ col_trans_odd_16: ++ add r6,r0,16 # Final value for this loop ++ col_trans_odd_16_loop: ++ # First compute partial products for a single column ++- vmul32s HY(48++,0), VX(0,0)+r0, VX(32,0++) REP 16 +++ vmul32s HY(48++,0), VX(0,0)+r0, VX(32,32++) REP 16 ++ # Then sum up the results and place back ++ vadd VY(0,0)+r0, VY(48,0++), VY(48,8++) REP 8 CLRA SACC ++ addcmpblt r0,1,r6,col_trans_odd_16_loop ++- sub r0,16 # but r0 back to its original value ++- b lr ++- ++- ++-test_add: ++- vldh HX(0,0),(r0) ++- vadd HX(0,0),HX(0,0),10 ++- vsth HX(0,0),(r0) ++- mov r0,7 # return value +++ sub r0,16 # put r0 back to its original value ++ b lr ++ ++ # hevc_trans_32x32(short *transMatrix2, short *coeffs, int num) ++@@ -164,18 +167,17 @@ test_add: ++ # num: number of 16x16 transforms to be done ++ # ++ hevc_trans_32x32: ++- push r6-r15, lr # TODO cut down number of used registers +++ mov r1,r14 # coeffs +++ mov r2,r15 # num ++ ++- # Fetch transform matrices ++- mov r3, 16*2 # Stride of transMatrix2 in bytes (and of coefficients) ++- vldh HX(32++,0),(r0 += r3) REP 16 # This is the even 16x16 matrix ++- add r0, 16*16*2 ++- vldh HX(32++,32),(r0 += r3) REP 16 # This is the odd 16x16 matrix +++ # Fetch odd transform matrix +++ #mov r3, 16*2 # Stride of transMatrix2 in bytes (and of coefficients) +++ #vldh HX(32++,0),(r0 += r3) REP 16 # This is the even 16x16 matrix +++ #add r0, 16*16*2 +++ #vldh HX(32++,32),(r0 += r3) REP 16 # This is the odd 16x16 matrix ++ ++ mov r3, 32*2*2 # Stride used to fetch alternate rows of our input coefficient buffer ++ mov r7, 16*16*2 # Total block size ++- mov r4, 64 # Constant used for rounding first pass ++- mov r5, 1<<11 # Constant used for rounding second pass ++ sub sp,sp,32*32*2+32 # Allocate some space on the stack for us to store 32*32 shorts as temporary results (needs to be aligned) ++ # set r8 to 32byte aligned stack pointer ++ add r8,sp,31 ++@@ -186,21 +188,27 @@ hevc_trans_32x32: ++ block_loop32: ++ ++ # COLUMN TRANSFORM +++ mov r4, 64 # Constant used for rounding first pass +++ mov r5, 9 # left shift used for rounding first pass +++ ++ # Transform the first 16 columns ++ mov r1,r10 # Input Coefficient buffer ++ mov r8,r9 # Output temporary storage ++ bl trans32 ++ # Transform the second 16 columns ++- add r8,32 +++ add r8,32*16*2 ++ add r1,32 ++ bl trans32 ++ ++ # ROW TRANSFORM +++ mov r4, 1<<11 # Constant used for rounding second pass +++ mov r5, 4 # left shift used for rounding second pass +++ ++ mov r1,r9 # Input temporary storage ++ mov r8,r10 # Output Coefficient buffer ++ bl trans32 ++ # Transform the second 16 columns ++- add r8,32 +++ add r8,32*16*2 ++ add r1,32 ++ bl trans32 ++ ++@@ -212,11 +220,12 @@ block_loop32: ++ pop r6-r15, pc ++ ++ trans32: +++ push lr ++ # We can no longer afford the VRF space to do prefetching when doing 32x32 ++ # Fetch the even rows ++- vldh HX(0++,0)+r0,(r1 += r3) REP 16 +++ vldh HX(0++,0),(r1 += r3) REP 16 ++ # Fetch the odd rows ++- vldh HX(16++,0)+r0,64(r1 += r3) REP 16 # First odd row is 32 shorts ahead of r1 +++ vldh HX(16++,0),64(r1 += r3) REP 16 # First odd row is 32 shorts ahead of r1 ++ ++ # Transform the even rows using even matrix ++ mov r0, 0 # Even rows ++@@ -228,33 +237,32 @@ trans32: ++ ++ # Now apply butterfly to compute the first 16 results ++ vadd HY(48++,0),HY(0++,0),HY(16++,0) REP 16 ++- vadd HY(48++,0),HY(48++,0),r4 REP 32 # add on rounding, ++- vasl HY(48++,0),HY(48++,0),9 REP 32 # shift down by 7, and saturate +++ vadd HY(48++,0),HY(48++,0),r4 REP 16 # add on rounding, +++ vasl HY(48++,0),HY(48++,0),r5 REP 16 # shift down by 7, and saturate ++ # 16bit results now in HX(48,32) ++ mov r0,r8 ++ mov r6,32*2 ++ vsth VX(48,32++),(r0+=r6) REP 16 ++- vmov VX(0,0++)+r0, HX(0++,32)+r0 REP 16 # Store transposed ++ ++ # Now apply butterfly to compute the second 16 results (in reverse order) ++- vsub HY(63,0),HY(0,0),HY(16,0) ++- vsub HY(62,0),HY(0,0),HY(17,0) ++- vsub HY(61,0),HY(0,0),HY(18,0) ++- vsub HY(60,0),HY(0,0),HY(19,0) ++- vsub HY(59,0),HY(0,0),HY(20,0) ++- vsub HY(58,0),HY(0,0),HY(21,0) ++- vsub HY(57,0),HY(0,0),HY(22,0) ++- vsub HY(56,0),HY(0,0),HY(23,0) ++- vsub HY(55,0),HY(0,0),HY(24,0) ++- vsub HY(54,0),HY(0,0),HY(25,0) ++- vsub HY(53,0),HY(0,0),HY(26,0) ++- vsub HY(52,0),HY(0,0),HY(27,0) ++- vsub HY(51,0),HY(0,0),HY(28,0) ++- vsub HY(50,0),HY(0,0),HY(29,0) ++- vsub HY(49,0),HY(0,0),HY(30,0) ++- vsub HY(48,0),HY(0,0),HY(31,0) ++- vadd HY(48++,0),HY(48++,0),r4 REP 32 # add on rounding, ++- vasl HY(48++,0),HY(48++,0),9 REP 32 # shift down by 7, and saturate ++- add r0,r8,16*32*2 # Move to 16th row +++ vsub HY(63,0),HY(0 ,0),HY(16,0) +++ vsub HY(62,0),HY(1 ,0),HY(17,0) +++ vsub HY(61,0),HY(2 ,0),HY(18,0) +++ vsub HY(60,0),HY(3 ,0),HY(19,0) +++ vsub HY(59,0),HY(4 ,0),HY(20,0) +++ vsub HY(58,0),HY(5 ,0),HY(21,0) +++ vsub HY(57,0),HY(6 ,0),HY(22,0) +++ vsub HY(56,0),HY(7 ,0),HY(23,0) +++ vsub HY(55,0),HY(8 ,0),HY(24,0) +++ vsub HY(54,0),HY(9 ,0),HY(25,0) +++ vsub HY(53,0),HY(10,0),HY(26,0) +++ vsub HY(52,0),HY(11,0),HY(27,0) +++ vsub HY(51,0),HY(12,0),HY(28,0) +++ vsub HY(50,0),HY(13,0),HY(29,0) +++ vsub HY(49,0),HY(14,0),HY(30,0) +++ vsub HY(48,0),HY(15,0),HY(31,0) +++ vadd HY(48++,0),HY(48++,0),r4 REP 16 # add on rounding, +++ vasl HY(48++,0),HY(48++,0),r5 REP 16 # shift down by 7, and saturate +++ add r0,r8,32 ++ vsth VX(48,32++),(r0+=r6) REP 16 ++- b lr +++ pop pc ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index d720546..12ad5fb 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -89,7 +89,7 @@ struct GPU ++ { ++ unsigned int qpu_code[QPU_CODE_SIZE]; ++ unsigned int vpu_code[VPU_CODE_SIZE]; ++- short transMatrix2even[16*16]; +++ short transMatrix2even[16*16*2]; ++ int open_count; // Number of allocated video buffers ++ unsigned int vc_handle; // Handle of this memory ++ int mb; // Mailbox handle ++@@ -162,7 +162,7 @@ static int gpu_init(volatile struct GPU **gpu) { ++ memcpy((void*)ptr->vpu_code, rpi_hevc_transform, num_bytes); ++ } ++ // And the transform coefficients ++- memcpy((void*)ptr->transMatrix2even, rpi_transMatrix2even, 16*16*sizeof(short)); +++ memcpy((void*)ptr->transMatrix2even, rpi_transMatrix2even, sizeof(rpi_transMatrix2even)); ++ ++ return 0; ++ } ++-- ++2.7.4 ++ ++ ++From 6c2ed6109c4dd5c8ab16bf16e0ae3be6ae166e50 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Tue, 5 May 2015 16:57:03 +0100 ++Subject: [PATCH 07/68] Clear coefficients in advance ++ ++--- ++ libavcodec/hevc.c | 129 ++++++++++++++++++++++++++++------------ ++ libavcodec/hevc.h | 6 +- ++ libavcodec/hevc_cabac.c | 7 ++- ++ libavcodec/rpi_hevc_transform.h | 50 ++++++++++++++++ ++ libavcodec/rpi_hevc_transform.s | 16 +++++ ++ 5 files changed, 168 insertions(+), 40 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 0dde6f2..1424007 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -43,6 +43,8 @@ ++ ++ #ifdef RPI ++ #include "rpi_qpu.h" +++// For some unknown reason, the code seems to crash if I do a late malloc +++#define EARLY_MALLOC ++ #endif ++ ++ // #define DISABLE_MC ++@@ -61,6 +63,20 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ /* free everything allocated by pic_arrays_init() */ ++ static void pic_arrays_free(HEVCContext *s) ++ { +++#ifdef RPI +++#ifdef EARLY_MALLOC +++#else +++ printf("pic_arrays_free\n"); +++ if (s->coeffs_buf_arm[0]) { +++ gpu_free(&s->coeffs_buf_default); +++ s->coeffs_buf_arm[0] = 0; +++ } +++ if (s->coeffs_buf_arm[2]) { +++ gpu_free(&s->coeffs_buf_accelerated); +++ s->coeffs_buf_arm[2] = 0; +++ } +++#endif +++#endif ++ av_freep(&s->sao); ++ av_freep(&s->deblock); ++ ++@@ -97,6 +113,28 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ int ctb_count = sps->ctb_width * sps->ctb_height; ++ int min_pu_size = sps->min_pu_width * sps->min_pu_height; ++ +++#ifdef RPI +++#ifdef EARLY_MALLOC +++#else +++ int coeffs_in_ctb = (1 << s->ps.sps->log2_ctb_size) * (1 << s->ps.sps->log2_ctb_size); +++ int coefs_per_row = sps->ctb_width * coeffs_in_ctb * 3; // Allow space for chroma +++ printf("pic_arrays_init\n"); +++ printf("Allocated %d\n",coefs_per_row); +++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row, &s->coeffs_buf_default); +++ s->coeffs_buf_arm[0] = (int16_t*) s->coeffs_buf_default.arm; +++ if (!s->coeffs_buf_arm[0]) +++ goto fail; +++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row * 2, &s->coeffs_buf_accelerated); +++ s->coeffs_buf_arm[2] = (int16_t*) s->coeffs_buf_accelerated.arm; +++ s->coeffs_buf_vc[2] = s->coeffs_buf_accelerated.vc; +++ if (!s->coeffs_buf_arm[2]) +++ goto fail; +++ s->coeffs_buf_arm[3] = coefs_per_row + s->coeffs_buf_arm[2]; +++ s->coeffs_buf_vc[3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[2]; +++ printf("Done\n"); +++#endif +++#endif +++ ++ s->bs_width = (width >> 2) + 1; ++ s->bs_height = (height >> 2) + 1; ++ ++@@ -2400,11 +2438,10 @@ static void rpi_execute_transform(HEVCContext *s) ++ // s->hevcdsp.idct[4-2](coeffs, 16); ++ //} ++ ++- gpu_cache_flush(&s->coeffs_buf[2]); ++- gpu_cache_flush(&s->coeffs_buf[3]); ++- vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf[2].vc, s->num_coeffs[2] >> 8, s->coeffs_buf[3].vc, s->num_coeffs[3] >> 10, 0); ++- gpu_cache_flush(&s->coeffs_buf[2]); ++- gpu_cache_flush(&s->coeffs_buf[3]); +++ +++ gpu_cache_flush(&s->coeffs_buf_accelerated); +++ vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0); +++ //gpu_cache_flush(&s->coeffs_buf_accelerated); ++ ++ for(i=0;i<4;i++) ++ s->num_coeffs[i] = 0; ++@@ -2426,7 +2463,9 @@ static void rpi_execute_pred_cmds(HEVCContext *s) ++ lc->na.cand_up_right = (cmd->na >> 0) & 1; ++ s->hpc.intra_pred[cmd->size - 2](s, cmd->x, cmd->y, cmd->c_idx); ++ } else { +++ int trafo_size = 1 << cmd->size; ++ s->hevcdsp.transform_add[cmd->size-2](cmd->dst, cmd->buf, cmd->stride); +++ memset(cmd->buf, 0, trafo_size * trafo_size * sizeof(int16_t)); // Clear coefficients here while they are in the cache ++ } ++ } ++ s->num_pred_cmds = 0; ++@@ -3235,10 +3274,18 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ av_freep(&s->unif_mv_cmds); ++ av_freep(&s->unif_xfm_cmds); ++ av_freep(&s->univ_pred_cmds); ++- for(i = 0; i < 4; i++) { ++- gpu_free(&s->coeffs_buf[i]); +++ +++#ifdef EARLY_MALLOC +++ if (s->coeffs_buf_arm[0]) { +++ gpu_free(&s->coeffs_buf_default); +++ s->coeffs_buf_arm[0] = 0; +++ } +++ if (s->coeffs_buf_arm[2]) { +++ gpu_free(&s->coeffs_buf_accelerated); +++ s->coeffs_buf_arm[2] = 0; ++ } ++ #endif +++#endif ++ ++ for (i = 0; i < 3; i++) { ++ av_freep(&s->sao_pixel_buffer_h[i]); ++@@ -3281,6 +3328,16 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ return 0; ++ } ++ +++#ifdef RPI +++static av_cold void memclear16(int16_t *p, int n) +++{ +++ vpu_execute_code( vpu_get_fn(), p, n, 0, 0, 0, 1); +++ //int i; +++ //for(i=0;ipriv_data; ++@@ -3304,37 +3361,35 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ s->univ_pred_cmds = av_mallocz(sizeof(HEVCPredCmd)*RPI_MAX_PRED_CMDS); ++ if (!s->univ_pred_cmds) ++ goto fail; ++- for(i = 0; i < 4; i++) { ++- gpu_malloc_cached(sizeof(int16_t)*RPI_MAX_XFM_CMDS*16, &s->coeffs_buf[i]); // TODO slim this down and share across sizes ++- s->coeffs_buf_arm[i] = (int16_t*) s->coeffs_buf[i].arm; ++- if (!s->coeffs_buf_arm[i]) ++- goto fail; ++- } ++- s->enable_rpi = 0; ++ ++- // A little test program ++- /*{ ++- GPU_MEM_PTR_T p; ++- int err = gpu_malloc_cached(16, &p); ++- short *q = (short *)p.arm; ++- int i; ++- int r; ++- printf("Allocated memory %d ARM 0x%x, VC 0x%x, Code 0x%x\n",err,(int)p.arm,p.vc,(int)vpu_get_fn()); ++- printf("Allocated memory %d ARM 0x%x, VC 0x%x\n",err,(int)p.arm,p.vc); ++- printf("Preparing data %p\n",q); ++- for(i=0;i<16;i++) ++- q[i] = i; ++- printf("Flush cache\n"); ++- gpu_cache_flush(&p); ++- printf("Executing code\n"); ++- r = vpu_execute_code( vpu_get_fn(), p.vc, 0, 0, 0, 0, 0); ++- printf("Return value %d (",r); ++- for(i=0;i<16;i++) ++- printf("%d ",q[i]); ++- printf(")\n"); ++- gpu_free(&p); ++- goto fail; // Early out ++- }*/ +++ s->coeffs_buf_arm[0] = 0; +++ s->coeffs_buf_arm[2] = 0; +++ +++#ifdef EARLY_MALLOC +++ int coeffs_in_ctb = 64*64; +++ int coefs_per_row = (2048/64) * coeffs_in_ctb * 3; // Allow space for chroma +++ printf("Allocated %d\n",coefs_per_row); +++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row, &s->coeffs_buf_default); +++ s->coeffs_buf_arm[0] = (int16_t*) s->coeffs_buf_default.arm; +++ if (!s->coeffs_buf_arm[0]) +++ goto fail; +++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row * 2, &s->coeffs_buf_accelerated); +++ s->coeffs_buf_arm[2] = (int16_t*) s->coeffs_buf_accelerated.arm; +++ s->coeffs_buf_vc[2] = s->coeffs_buf_accelerated.vc; +++ if (!s->coeffs_buf_arm[2]) +++ goto fail; +++ s->coeffs_buf_arm[3] = coefs_per_row + s->coeffs_buf_arm[2]; +++ s->coeffs_buf_vc[3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[2]; +++ printf("Done\n"); +++ //memset(s->coeffs_buf_arm[0],0, sizeof(int16_t) * coefs_per_row); +++ memclear16(s->coeffs_buf_arm[0], coefs_per_row); +++ //memset(s->coeffs_buf_arm[2],0, sizeof(int16_t) * coefs_per_row); +++ memclear16(s->coeffs_buf_arm[2], coefs_per_row); +++ //memset(s->coeffs_buf_arm[3],0, sizeof(int16_t) * coefs_per_row); +++ memclear16(s->coeffs_buf_arm[3], coefs_per_row); +++#endif +++ +++ s->enable_rpi = 0; ++ ++ #endif ++ ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 4167985..9a228f6 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -861,8 +861,12 @@ typedef struct HEVCContext { ++ HEVCMvCmd *unif_mv_cmds; ++ HEVCXfmCmd *unif_xfm_cmds; ++ HEVCPredCmd *univ_pred_cmds; ++- GPU_MEM_PTR_T coeffs_buf[4]; +++ int buf_width; +++ GPU_MEM_PTR_T coeffs_buf_default; +++ GPU_MEM_PTR_T coeffs_buf_accelerated; ++ int16_t *coeffs_buf_arm[4]; +++ unsigned int coeffs_buf_vc[4]; +++ ++ int num_coeffs[4]; ++ int num_xfm_cmds; ++ int num_mv_cmds; ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index 88aa959..dbfee85 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1058,9 +1058,13 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ s->num_coeffs[0] += n; ++ } ++ } +++ // We now do the memset after transform_add while we know the data is cached. +++ //memset(coeffs, 0, trafo_size * trafo_size * sizeof(int16_t)); +++#else +++ memset(coeffs, 0, trafo_size * trafo_size * sizeof(int16_t)); ++ #endif ++ ++- memset(coeffs, 0, trafo_size * trafo_size * sizeof(int16_t)); +++ ++ ++ // Derive QP for dequant ++ if (!lc->cu.cu_transquant_bypass_flag) { ++@@ -1547,7 +1551,6 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ #ifdef RPI ++ if (s->enable_rpi) { ++ HEVCPredCmd *cmd = s->univ_pred_cmds + s->num_pred_cmds++; ++- //memcpy(coeffs2, coeffs, sizeof(int16_t) * trafo_size * trafo_size); // TODO ++ cmd->type = RPI_PRED_TRANSFORM_ADD; ++ cmd->size = log2_trafo_size; ++ cmd->buf = coeffs; ++diff --git a/libavcodec/rpi_hevc_transform.h b/libavcodec/rpi_hevc_transform.h ++index 6d772d7..4f13622 100644 ++--- a/libavcodec/rpi_hevc_transform.h +++++ b/libavcodec/rpi_hevc_transform.h ++@@ -1,4 +1,10 @@ ++ unsigned char rpi_hevc_transform [] = { +++21, +++106, +++0, +++144, +++35, +++1, ++ 169, ++ 3, ++ 62, ++@@ -577,4 +583,48 @@ unsigned char rpi_hevc_transform [] = { ++ 0, ++ 111, ++ 3, +++4, +++254, +++0, +++128, +++0, +++4, +++0, +++248, +++0, +++0, +++2, +++232, +++32, +++0, +++0, +++0, +++140, +++248, +++32, +++0, +++0, +++0, +++224, +++35, +++0, +++0, +++64, +++232, +++0, +++2, +++0, +++0, +++193, +++232, +++0, +++1, +++0, +++0, +++1, +++106, +++116, +++30, +++90, +++0, ++ }; ++diff --git a/libavcodec/rpi_hevc_transform.s b/libavcodec/rpi_hevc_transform.s ++index afdb32a..fd159bc 100644 ++--- a/libavcodec/rpi_hevc_transform.s +++++ b/libavcodec/rpi_hevc_transform.s ++@@ -78,8 +78,11 @@ ++ # num: number of 16x16 transforms to be done ++ # coeffs32 ++ # num32: number of 32x32 transforms +++# command 0 for transform, 1 for memclear16(int16_t *dst,num16) ++ # ++ hevc_trans_16x16: +++ cmp r5,1 +++ beq memclear16 ++ push r6-r15, lr # TODO cut down number of used registers ++ mov r14,r3 # coeffs32 ++ mov r15,r4 # num32 ++@@ -266,3 +269,16 @@ trans32: ++ add r0,r8,32 ++ vsth VX(48,32++),(r0+=r6) REP 16 ++ pop pc +++ +++memclear16: +++ # r0 is address +++ # r1 is number of 16bits values to set to 0 (may overrun past end and clear more than specified) +++ vmov HX(0++,0),0 REP 16 +++ mov r2,32 +++loop: +++ vsth HX(0++,0),(r0+=r2) REP 16 +++ add r0,16*16*2 +++ sub r1,16*16 +++ cmp r1,0 +++ bgt loop +++ b lr ++-- ++2.7.4 ++ ++ ++From 48282c2fb55c0d9a72222f384c03c432f78a3016 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 6 May 2015 09:56:43 +0100 ++Subject: [PATCH 08/68] Prepared inter offload ++ ++--- ++ libavcodec/hevc.c | 116 +++++++++++++++++++++++++++++++++++++++++++----- ++ libavcodec/hevc.h | 29 +++++++++++- ++ libavcodec/hevc_cabac.c | 5 ++- ++ 3 files changed, 137 insertions(+), 13 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 1424007..8215201 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -45,6 +45,8 @@ ++ #include "rpi_qpu.h" ++ // For some unknown reason, the code seems to crash if I do a late malloc ++ #define EARLY_MALLOC +++// Move Inter prediction into separate pass +++//#define RPI_INTER ++ #endif ++ ++ // #define DISABLE_MC ++@@ -1440,6 +1442,95 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size) ++ * @param luma_offset additive offset applied to the luma prediction value ++ */ ++ +++#ifdef RPI_INTER +++#define RPI_REDIRECT(fn) rpi_ ## fn +++static void rpi_luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, +++ AVFrame *ref, const Mv *mv, int x_off, int y_off, +++ int block_w, int block_h, int luma_weight, int luma_offset) +++{ +++ HEVCMvCmd *cmd = unif_mv_cmds + s->num_mv_cmds++; +++ cmd->cmd = RPI_CMD_LUMA_UNI; +++ cmd->dst = dst; +++ cmd->dststride = dststride; +++ cmd->src = ref->data[0]; +++ cmd->srcstride = ref->linesize[0]; +++ cmd->mv = *mv; +++ cmd->x_off = x_off; +++ cmd->y_off = y_off; +++ cmd->block_w = block_w; +++ cmd->block_h = block_h; +++ cmd->weight = luma_weight; +++ cmd->offset = luma_offset; +++} +++ +++static void rpi_luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, +++ AVFrame *ref0, const Mv *mv0, int x_off, int y_off, +++ int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv) +++{ +++ HEVCMvCmd *cmd = unif_mv_cmds + s->num_mv_cmds++; +++ cmd->cmd = RPI_CMD_LUMA_BI; +++ cmd->dst = dst; +++ cmd->dststride = dststride; +++ cmd->src = ref->data[0]; +++ cmd->srcstride = ref->linesize[0]; +++ cmd->mv = *mv; +++ cmd->x_off = x_off; +++ cmd->y_off = y_off; +++ cmd->block_w = block_w; +++ cmd->block_h = block_h; +++ cmd->weight = luma_weight; +++ cmd->offset = luma_offset; +++ cmd->src1 = ref1->data[]; +++ cmd->srcstride1 = ref1->linesize[0]; +++ cmd->mv1 = *mv1; +++ cmd->ref_idx[0] = current_mv->ref_idx[0]; +++ cmd->ref_idx[1] = current_mv->ref_idx[1]; +++} +++ +++static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0, +++ ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist, +++ int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset) +++{ +++ HEVCMvCmd *cmd = unif_mv_cmds + s->num_mv_cmds++; +++ cmd->cmd = RPI_CMD_CHROMA_UNI; +++ cmd->dst = dst0; +++ cmd->dststride = dststride; +++ cmd->src = src0; +++ cmd->srcstride = srcstride; +++ cmd->mv = current_mv->mv[reflist]; +++ cmd->x_off = x_off; +++ cmd->y_off = y_off; +++ cmd->block_w = block_w; +++ cmd->block_h = block_h; +++ cmd->weight = chroma_weight; +++ cmd->offset = chroma_offset; +++} +++ +++static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1, +++ int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx) +++{ +++ HEVCMvCmd *cmd = unif_mv_cmds + s->num_mv_cmds++; +++ cmd->cmd = RPI_CMD_CHROMA_BI+cidx; +++ cmd->dst = dst0; +++ cmd->dststride = dststride; +++ cmd->src = ref0->data[cidx+1]; +++ cmd->srcstride = ref0->linesize[cidx+1]; +++ cmd->mv = current_mv->mv[reflist]; +++ cmd->x_off = x_off; +++ cmd->y_off = y_off; +++ cmd->block_w = block_w; +++ cmd->block_h = block_h; +++ cmd->weight = chroma_weight; +++ cmd->offset = chroma_offset; +++ cmd->src = ref1->data[cidx+1]; +++ cmd->srcstride1 = ref1->linesize[cidx+1]; +++ cmd->ref_idx[0] = current_mv->ref_idx[0]; +++ cmd->ref_idx[1] = current_mv->ref_idx[1]; +++} +++#else +++#define RPI_REDIRECT(fn) fn +++#endif +++ ++ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ AVFrame *ref, const Mv *mv, int x_off, int y_off, ++ int block_w, int block_h, int luma_weight, int luma_offset) ++@@ -1505,7 +1596,7 @@ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ * @param mv1 motion vector1 (relative to block position) to get pixel data from ++ * @param current_mv current motion vector structure ++ */ ++- static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, +++static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ AVFrame *ref0, const Mv *mv0, int x_off, int y_off, ++ int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv) ++ { ++@@ -1887,16 +1978,16 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int nPbW_c = nPbW >> s->ps.sps->hshift[1]; ++ int nPbH_c = nPbH >> s->ps.sps->vshift[1]; ++ ++- luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame, +++ RPI_REDIRECT(luma_mc_uni)(s, dst0, s->frame->linesize[0], ref0->frame, ++ ¤t_mv.mv[0], x0, y0, nPbW, nPbH, ++ s->sh.luma_weight_l0[current_mv.ref_idx[0]], ++ s->sh.luma_offset_l0[current_mv.ref_idx[0]]); ++ ++ if (s->ps.sps->chroma_format_idc) { ++- chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1], +++ RPI_REDIRECT(chroma_mc_uni)(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1], ++ 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, ++ s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]); ++- chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2], +++ RPI_REDIRECT(chroma_mc_uni)(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2], ++ 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, ++ s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]); ++ } ++@@ -1906,17 +1997,17 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int nPbW_c = nPbW >> s->ps.sps->hshift[1]; ++ int nPbH_c = nPbH >> s->ps.sps->vshift[1]; ++ ++- luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame, +++ RPI_REDIRECT(luma_mc_uni)(s, dst0, s->frame->linesize[0], ref1->frame, ++ ¤t_mv.mv[1], x0, y0, nPbW, nPbH, ++ s->sh.luma_weight_l1[current_mv.ref_idx[1]], ++ s->sh.luma_offset_l1[current_mv.ref_idx[1]]); ++ ++ if (s->ps.sps->chroma_format_idc) { ++- chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1], +++ RPI_REDIRECT(chroma_mc_uni)(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1], ++ 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, ++ s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]); ++ ++- chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2], +++ RPI_REDIRECT(chroma_mc_uni)(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2], ++ 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, ++ s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]); ++ } ++@@ -1926,15 +2017,15 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int nPbW_c = nPbW >> s->ps.sps->hshift[1]; ++ int nPbH_c = nPbH >> s->ps.sps->vshift[1]; ++ ++- luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame, +++ RPI_REDIRECT(luma_mc_bi)(s, dst0, s->frame->linesize[0], ref0->frame, ++ ¤t_mv.mv[0], x0, y0, nPbW, nPbH, ++ ref1->frame, ¤t_mv.mv[1], ¤t_mv); ++ ++ if (s->ps.sps->chroma_format_idc) { ++- chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame, +++ RPI_REDIRECT(chroma_mc_bi)(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame, ++ x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 0); ++ ++- chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame, +++ RPI_REDIRECT(chroma_mc_bi)(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame, ++ x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 1); ++ } ++ } ++@@ -2465,7 +2556,9 @@ static void rpi_execute_pred_cmds(HEVCContext *s) ++ } else { ++ int trafo_size = 1 << cmd->size; ++ s->hevcdsp.transform_add[cmd->size-2](cmd->dst, cmd->buf, cmd->stride); +++#ifdef RPI_PRECLEAR ++ memset(cmd->buf, 0, trafo_size * trafo_size * sizeof(int16_t)); // Clear coefficients here while they are in the cache +++#endif ++ } ++ } ++ s->num_pred_cmds = 0; ++@@ -3381,6 +3474,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ s->coeffs_buf_arm[3] = coefs_per_row + s->coeffs_buf_arm[2]; ++ s->coeffs_buf_vc[3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[2]; ++ printf("Done\n"); +++#ifdef RPI_PRECLEAR ++ //memset(s->coeffs_buf_arm[0],0, sizeof(int16_t) * coefs_per_row); ++ memclear16(s->coeffs_buf_arm[0], coefs_per_row); ++ //memset(s->coeffs_buf_arm[2],0, sizeof(int16_t) * coefs_per_row); ++@@ -3389,6 +3483,8 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ memclear16(s->coeffs_buf_arm[3], coefs_per_row); ++ #endif ++ +++#endif +++ ++ s->enable_rpi = 0; ++ ++ #endif ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 9a228f6..1ac119a 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -803,14 +803,39 @@ typedef struct HEVCLocalContext { ++ // RPI_MAX_WIDTH is maximum width in pixels supported by the accelerated code ++ #define RPI_MAX_WIDTH 2048 ++ ++-// Worst case is for 4:4:4 4x4 blocks with 64 high coding tree blocks, so 16 MV cmds per 4 pixels across for each colour plane ++-#define RPI_MAX_MV_CMDS (16*3*(RPI_MAX_WIDTH/4)) +++// Worst case is for 4:4:4 4x4 blocks with 64 high coding tree blocks, so 16 MV cmds per 4 pixels across for each colour plane, * 2 for bi +++#define RPI_MAX_MV_CMDS (2*16*3*(RPI_MAX_WIDTH/4)) ++ #define RPI_MAX_XFM_CMDS (16*3*(RPI_MAX_WIDTH/4)) ++ // Each block can have an intra prediction and a transform_add command ++ #define RPI_MAX_PRED_CMDS (2*16*3*(RPI_MAX_WIDTH/4)) ++ +++#define RPI_CMD_LUMA_UNI 0 +++#define RPI_CMD_CHROMA_UNI 1 +++#define RPI_CMD_LUMA_BI 2 +++#define RPI_CMD_U_BI 3 +++#define RPI_CMD_V_BI 4 +++ +++// RPI_PRECLEAR is not working yet - perhaps clearing on VPUs is flawed? +++// #define RPI_PRECLEAR +++ ++ // Command for inter prediction ++ typedef struct HEVCMvCmd { +++ int cmd; +++ uint8_t *dst; +++ ptrdiff_t dststride; +++ uint8_t *src; +++ ptrdiff_t srcstride; +++ Mv mv; +++ int x_off; +++ int y_off; +++ int block_w; +++ int block_h; +++ int weight; +++ int offset; +++ uint8_t *src1; +++ ptrdiff_t srcstride1; +++ Mv mv1; +++ int8_t ref_idx[2]; ++ } HEVCMvCmd; ++ ++ // Command for transform to process a block of coefficients ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index dbfee85..4f072be 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1059,7 +1059,10 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ } ++ } ++ // We now do the memset after transform_add while we know the data is cached. ++- //memset(coeffs, 0, trafo_size * trafo_size * sizeof(int16_t)); +++ #ifdef RPI_PRECLEAR +++ #else +++ memset(coeffs, 0, trafo_size * trafo_size * sizeof(int16_t)); +++ #endif ++ #else ++ memset(coeffs, 0, trafo_size * trafo_size * sizeof(int16_t)); ++ #endif ++-- ++2.7.4 ++ ++ ++From 25d3b4e876febe08302a01abd85d5009160ead3e Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 6 May 2015 11:08:50 +0100 ++Subject: [PATCH 09/68] Inter prediction in separate pass ++ ++--- ++ libavcodec/hevc.c | 93 +++++++++++++++++++++++++++++++++++++++++++++---------- ++ libavcodec/hevc.h | 2 +- ++ 2 files changed, 77 insertions(+), 18 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 8215201..b7bc6ad 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -46,7 +46,7 @@ ++ // For some unknown reason, the code seems to crash if I do a late malloc ++ #define EARLY_MALLOC ++ // Move Inter prediction into separate pass ++-//#define RPI_INTER +++#define RPI_INTER ++ #endif ++ ++ // #define DISABLE_MC ++@@ -1448,7 +1448,7 @@ static void rpi_luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ AVFrame *ref, const Mv *mv, int x_off, int y_off, ++ int block_w, int block_h, int luma_weight, int luma_offset) ++ { ++- HEVCMvCmd *cmd = unif_mv_cmds + s->num_mv_cmds++; +++ HEVCMvCmd *cmd = s->unif_mv_cmds + s->num_mv_cmds++; ++ cmd->cmd = RPI_CMD_LUMA_UNI; ++ cmd->dst = dst; ++ cmd->dststride = dststride; ++@@ -1467,31 +1467,29 @@ static void rpi_luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ AVFrame *ref0, const Mv *mv0, int x_off, int y_off, ++ int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv) ++ { ++- HEVCMvCmd *cmd = unif_mv_cmds + s->num_mv_cmds++; +++ HEVCMvCmd *cmd = s->unif_mv_cmds + s->num_mv_cmds++; ++ cmd->cmd = RPI_CMD_LUMA_BI; ++ cmd->dst = dst; ++ cmd->dststride = dststride; ++- cmd->src = ref->data[0]; ++- cmd->srcstride = ref->linesize[0]; ++- cmd->mv = *mv; +++ cmd->src = ref0->data[0]; +++ cmd->srcstride = ref0->linesize[0]; +++ cmd->mv = *mv0; ++ cmd->x_off = x_off; ++ cmd->y_off = y_off; ++ cmd->block_w = block_w; ++ cmd->block_h = block_h; ++- cmd->weight = luma_weight; ++- cmd->offset = luma_offset; ++- cmd->src1 = ref1->data[]; +++ cmd->src1 = ref1->data[0]; ++ cmd->srcstride1 = ref1->linesize[0]; ++ cmd->mv1 = *mv1; ++ cmd->ref_idx[0] = current_mv->ref_idx[0]; ++ cmd->ref_idx[1] = current_mv->ref_idx[1]; ++ } ++ ++-static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0, +++static void rpi_chroma_mc_uni(HEVCContext *s, uint8_t *dst0, ++ ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist, ++ int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset) ++ { ++- HEVCMvCmd *cmd = unif_mv_cmds + s->num_mv_cmds++; +++ HEVCMvCmd *cmd = s->unif_mv_cmds + s->num_mv_cmds++; ++ cmd->cmd = RPI_CMD_CHROMA_UNI; ++ cmd->dst = dst0; ++ cmd->dststride = dststride; ++@@ -1506,27 +1504,27 @@ static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0, ++ cmd->offset = chroma_offset; ++ } ++ ++-static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1, +++static void rpi_chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1, ++ int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx) ++ { ++- HEVCMvCmd *cmd = unif_mv_cmds + s->num_mv_cmds++; +++ HEVCMvCmd *cmd = s->unif_mv_cmds + s->num_mv_cmds++; ++ cmd->cmd = RPI_CMD_CHROMA_BI+cidx; ++ cmd->dst = dst0; ++ cmd->dststride = dststride; ++ cmd->src = ref0->data[cidx+1]; ++ cmd->srcstride = ref0->linesize[cidx+1]; ++- cmd->mv = current_mv->mv[reflist]; +++ cmd->mv = current_mv->mv[0]; +++ cmd->mv1 = current_mv->mv[1]; ++ cmd->x_off = x_off; ++ cmd->y_off = y_off; ++ cmd->block_w = block_w; ++ cmd->block_h = block_h; ++- cmd->weight = chroma_weight; ++- cmd->offset = chroma_offset; ++- cmd->src = ref1->data[cidx+1]; +++ cmd->src1 = ref1->data[cidx+1]; ++ cmd->srcstride1 = ref1->linesize[cidx+1]; ++ cmd->ref_idx[0] = current_mv->ref_idx[0]; ++ cmd->ref_idx[1] = current_mv->ref_idx[1]; ++ } +++ ++ #else ++ #define RPI_REDIRECT(fn) fn ++ #endif ++@@ -2554,7 +2552,9 @@ static void rpi_execute_pred_cmds(HEVCContext *s) ++ lc->na.cand_up_right = (cmd->na >> 0) & 1; ++ s->hpc.intra_pred[cmd->size - 2](s, cmd->x, cmd->y, cmd->c_idx); ++ } else { +++#ifdef RPI_PRECLEAR ++ int trafo_size = 1 << cmd->size; +++#endif ++ s->hevcdsp.transform_add[cmd->size-2](cmd->dst, cmd->buf, cmd->stride); ++ #ifdef RPI_PRECLEAR ++ memset(cmd->buf, 0, trafo_size * trafo_size * sizeof(int16_t)); // Clear coefficients here while they are in the cache ++@@ -2563,6 +2563,61 @@ static void rpi_execute_pred_cmds(HEVCContext *s) ++ } ++ s->num_pred_cmds = 0; ++ } +++ +++static void rpi_execute_inter_cmds(HEVCContext *s) +++{ +++ HEVCMvCmd *cmd = s->unif_mv_cmds; +++ int n,cidx; +++ AVFrame myref; +++ AVFrame myref1; +++ struct MvField mymv; +++ if (s->num_mv_cmds > RPI_MAX_MV_CMDS) { +++ printf("Overflow inter_cmds\n"); +++ exit(-1); +++ } +++ for(n = s->num_mv_cmds; n>0 ; n--, cmd++) { +++ switch(cmd->cmd) { +++ case RPI_CMD_LUMA_UNI: +++ myref.data[0] = cmd->src; +++ myref.linesize[0] = cmd->srcstride; +++ luma_mc_uni(s, cmd->dst, cmd->dststride, &myref, &cmd->mv, cmd->x_off, cmd->y_off, cmd->block_w, cmd->block_h, cmd->weight, cmd->offset); +++ break; +++ case RPI_CMD_LUMA_BI: +++ myref.data[0] = cmd->src; +++ myref.linesize[0] = cmd->srcstride; +++ myref1.data[0] = cmd->src1; +++ myref1.linesize[0] = cmd->srcstride1; +++ mymv.ref_idx[0] = cmd->ref_idx[0]; +++ mymv.ref_idx[1] = cmd->ref_idx[1]; +++ luma_mc_bi(s, cmd->dst, cmd->dststride, +++ &myref, &cmd->mv, cmd->x_off, cmd->y_off, cmd->block_w, cmd->block_h, +++ &myref1, &cmd->mv1, &mymv); +++ break; +++ case RPI_CMD_CHROMA_UNI: +++ mymv.mv[0] = cmd->mv; +++ chroma_mc_uni(s, cmd->dst, +++ cmd->dststride, cmd->src, cmd->srcstride, 0, +++ cmd->x_off, cmd->y_off, cmd->block_w, cmd->block_h, &mymv, cmd->weight, cmd->offset); +++ break; +++ case RPI_CMD_CHROMA_BI: +++ case RPI_CMD_CHROMA_BI+1: +++ cidx = cmd->cmd - RPI_CMD_CHROMA_BI; +++ myref.data[cidx+1] = cmd->src; +++ myref.linesize[cidx+1] = cmd->srcstride; +++ myref1.data[cidx+1] = cmd->src1; +++ myref1.linesize[cidx+1] = cmd->srcstride1; +++ mymv.ref_idx[0] = cmd->ref_idx[0]; +++ mymv.ref_idx[1] = cmd->ref_idx[1]; +++ mymv.mv[0] = cmd->mv; +++ mymv.mv[1] = cmd->mv1; +++ chroma_mc_bi(s, cmd->dst, cmd->dststride, &myref, &myref1, +++ cmd->x_off, cmd->y_off, cmd->block_w, cmd->block_h, &mymv, cidx); +++ break; +++ } +++ } +++ s->num_mv_cmds = 0; +++} +++ ++ #endif ++ ++ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++@@ -2611,6 +2666,8 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ #ifdef RPI ++ if (s->enable_rpi && x_ctb + ctb_size >= s->ps.sps->width) { ++ int x; +++ // Perform inter prediction +++ rpi_execute_inter_cmds(s); ++ // Transform all blocks ++ rpi_execute_transform(s); ++ // Perform intra prediction and residual reconstruction ++@@ -3422,6 +3479,7 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ } ++ ++ #ifdef RPI +++#ifdef RPI_PRECLEAR ++ static av_cold void memclear16(int16_t *p, int n) ++ { ++ vpu_execute_code( vpu_get_fn(), p, n, 0, 0, 0, 1); ++@@ -3430,6 +3488,7 @@ static av_cold void memclear16(int16_t *p, int n) ++ // p[i] = 0; ++ } ++ #endif +++#endif ++ ++ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ { ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 1ac119a..a0eb71b 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -812,7 +812,7 @@ typedef struct HEVCLocalContext { ++ #define RPI_CMD_LUMA_UNI 0 ++ #define RPI_CMD_CHROMA_UNI 1 ++ #define RPI_CMD_LUMA_BI 2 ++-#define RPI_CMD_U_BI 3 +++#define RPI_CMD_CHROMA_BI 3 ++ #define RPI_CMD_V_BI 4 ++ ++ // RPI_PRECLEAR is not working yet - perhaps clearing on VPUs is flawed? ++-- ++2.7.4 ++ ++ ++From 8af0a0a036e4bb3883f144d0567bc527772dd65b Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 6 May 2015 13:03:50 +0100 ++Subject: [PATCH 10/68] Added VPU thread ++ ++--- ++ libavcodec/hevc.c | 11 +++-- ++ libavcodec/hevc.h | 1 + ++ libavcodec/rpi_qpu.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++-- ++ libavcodec/rpi_qpu.h | 2 + ++ 4 files changed, 133 insertions(+), 6 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index b7bc6ad..98dbd69 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2529,8 +2529,10 @@ static void rpi_execute_transform(HEVCContext *s) ++ ++ ++ gpu_cache_flush(&s->coeffs_buf_accelerated); ++- vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0); +++ s->vpu_id = vpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0, &s->coeffs_buf_accelerated); +++ //vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0); ++ //gpu_cache_flush(&s->coeffs_buf_accelerated); +++ //vpu_wait(s->vpu_id); ++ ++ for(i=0;i<4;i++) ++ s->num_coeffs[i] = 0; ++@@ -2666,10 +2668,12 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ #ifdef RPI ++ if (s->enable_rpi && x_ctb + ctb_size >= s->ps.sps->width) { ++ int x; ++- // Perform inter prediction ++- rpi_execute_inter_cmds(s); ++ // Transform all blocks ++ rpi_execute_transform(s); +++ // Perform inter prediction +++ rpi_execute_inter_cmds(s); +++ // Wait for transform completion +++ vpu_wait(s->vpu_id); ++ // Perform intra prediction and residual reconstruction ++ rpi_execute_pred_cmds(s); ++ // Perform deblocking for CTBs in this row ++@@ -3426,6 +3430,7 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ av_freep(&s->univ_pred_cmds); ++ ++ #ifdef EARLY_MALLOC +++ printf("hevc_decode_free\n"); ++ if (s->coeffs_buf_arm[0]) { ++ gpu_free(&s->coeffs_buf_default); ++ s->coeffs_buf_arm[0] = 0; ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index a0eb71b..0d8dfe9 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -896,6 +896,7 @@ typedef struct HEVCContext { ++ int num_xfm_cmds; ++ int num_mv_cmds; ++ int num_pred_cmds; +++ int vpu_id; ++ #endif ++ ++ uint8_t *cabac_state; ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 12ad5fb..378dd74 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -1,9 +1,13 @@ ++ #ifdef RPI ++-// Use the vcsm device for shared memory +++// define RPI_USE_VCSM to use the vcsm device for shared memory ++ // This works better than the mmap in that the memory can be cached, but requires a kernel modification to enable the device. ++ #define RPI_USE_VCSM ++-#define RPI_TIME_TOTAL_QPU ++-#define RPI_TIME_TOTAL_VPU +++// define RPI_TIME_TOTAL_QPU to print out how much time is spent in the QPU code +++//#define RPI_TIME_TOTAL_QPU +++// define RPI_TIME_TOTAL_VPU to print out how much time is spent in the VPI code +++//#define RPI_TIME_TOTAL_VPU +++// define RPI_ASYNC to run the VPU in a separate thread, need to make a separate call to check for completion +++#define RPI_ASYNC ++ ++ #include ++ #include ++@@ -113,6 +117,19 @@ static unsigned int Microseconds(void) { ++ } ++ #endif ++ +++#ifdef RPI_ASYNC +++pthread_t vpu_thread; +++static void *vpu_start(void *arg); +++ +++#define MAXCMDS 128 +++static pthread_cond_t post_cond = PTHREAD_COND_INITIALIZER; +++static pthread_mutex_t post_mutex = PTHREAD_MUTEX_INITIALIZER; +++ +++static int vpu_cmds[MAXCMDS][8]; +++static volatile int vpu_async_tail=0; // Contains the number of posted jobs +++static volatile int vpu_async_head=0; +++#endif +++ ++ // Connect to QPU, returns 0 on success. ++ static int gpu_init(volatile struct GPU **gpu) { ++ int mb = mbox_open(); ++@@ -164,12 +181,27 @@ static int gpu_init(volatile struct GPU **gpu) { ++ // And the transform coefficients ++ memcpy((void*)ptr->transMatrix2even, rpi_transMatrix2even, sizeof(rpi_transMatrix2even)); ++ +++#ifdef RPI_ASYNC +++ { +++ int err; +++ vpu_async_tail = 0; +++ vpu_async_head = 0; +++ err = pthread_create(&vpu_thread, NULL, vpu_start, NULL); +++ //printf("Created thread\n"); +++ if (err) { +++ printf("Failed to create vpu thread\n"); +++ return -4; +++ } +++ } +++#endif +++ ++ return 0; ++ } ++ ++ // Make sure we have exclusive access to the mailbox, and enable qpu if necessary. ++ static void gpu_lock(void) { ++ pthread_mutex_lock(&gpu_mutex); +++ ++ if (gpu==NULL) { ++ gpu_init(&gpu); ++ } ++@@ -264,6 +296,16 @@ static void gpu_term(void) ++ unsigned handle = gpu->vc_handle; ++ if (gpu==NULL) ++ return; +++ +++#ifdef RPI_ASYNC +++ { +++ void *res; +++ vpu_post_code(0, 0, 0, 0, 0, 0, -1, NULL); +++ pthread_join(vpu_thread, &res); +++ } +++#endif +++ +++ ++ unmapmem((void*)gpu, sizeof(struct GPU)); ++ mem_unlock(mb, handle); ++ mem_free(mb, handle); ++@@ -322,6 +364,79 @@ unsigned int vpu_get_constants(void) { ++ return gpu->vc + offsetof(struct GPU,transMatrix2even); ++ } ++ +++#ifdef RPI_ASYNC +++ +++static void *vpu_start(void *arg) { +++ while(1) { +++ pthread_mutex_lock(&post_mutex); +++ while( vpu_async_tail - vpu_async_head <= 0) +++ { +++ //printf("Checking number %d %d\n",vpu_async_head,vpu_async_tail); +++ pthread_cond_wait(&post_cond, &post_mutex); +++ } +++ int *p = vpu_cmds[vpu_async_head%MAXCMDS]; +++ pthread_mutex_unlock(&post_mutex); +++ +++ if (p[6] == -1) { +++ break; // Last job +++ } +++ if (p[7]) { +++ GPU_MEM_PTR_T *buf = (GPU_MEM_PTR_T *)p[7]; +++ //gpu_cache_flush(buf); +++ } +++ vpu_execute_code(p[0], p[1], p[2], p[3], p[4], p[5], p[6]); +++ +++ pthread_mutex_lock(&post_mutex); +++ vpu_async_head++; +++ pthread_cond_broadcast(&post_cond); +++ pthread_mutex_unlock(&post_mutex); +++ } +++ +++ return NULL; +++} +++ +++// Post a command to the queue +++// Returns an id which we can use to wait for completion +++int vpu_post_code(unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, GPU_MEM_PTR_T *buf) +++{ +++ pthread_mutex_lock(&post_mutex); +++ { +++ int id = vpu_async_tail++; +++ int *p = vpu_cmds[id%MAXCMDS]; +++ int num = vpu_async_tail - vpu_async_head; +++ if (num>MAXCMDS) { +++ printf("Too many commands submitted\n"); +++ exit(-1); +++ } +++ p[0] = code; +++ p[1] = r0; +++ p[2] = r1; +++ p[3] = r2; +++ p[4] = r3; +++ p[5] = r4; +++ p[6] = r5; +++ p[7] = (int) buf; +++ if (num<=1) +++ pthread_cond_broadcast(&post_cond); // Otherwise the vpu thread must already be awake +++ pthread_mutex_unlock(&post_mutex); +++ return id; +++ } +++} +++ +++// Wait for completion of the given command +++void vpu_wait(int id) +++{ +++ pthread_mutex_lock(&post_mutex); +++ while( id + 1 - vpu_async_head > 0) +++ { +++ pthread_cond_wait(&post_cond, &post_mutex); +++ } +++ pthread_mutex_unlock(&post_mutex); +++} +++ +++#endif +++ +++ ++ unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5) ++ { ++ unsigned r; ++@@ -334,7 +449,9 @@ unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, ++ static int count=0; ++ static long long countr2=0; ++ #endif +++#ifndef RPI_ASYNC ++ gpu_lock(); +++#endif ++ #ifdef RPI_TIME_TOTAL_VPU ++ start_time = Microseconds(); ++ if (last_time==0) ++@@ -351,7 +468,9 @@ unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, ++ if ((count&0x7f)==0) ++ printf("VPU %d %lld On=%dms, Off=%dms\n",count,countr2,(int)(on_time/1000),(int)(off_time/1000)); ++ #endif +++#ifndef RPI_ASYNC ++ gpu_unlock(); +++#endif ++ return r; ++ } ++ ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 814fc3c..3526fce 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -36,6 +36,8 @@ extern unsigned int qpu_get_fn(int num); ++ extern unsigned int vpu_get_fn(void); ++ extern unsigned int vpu_get_constants(void); ++ extern unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5); +++extern int vpu_post_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, GPU_MEM_PTR_T *buf); +++extern void vpu_wait( int id); ++ ++ // Simple test of shader code ++ extern int rpi_test_shader(void); ++-- ++2.7.4 ++ ++ ++From 016d3db644e60fbe272bfcf1d7c3670c82422317 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 6 May 2015 15:03:37 +0100 ++Subject: [PATCH 11/68] Added different signal when tail moves ++ ++--- ++ libavcodec/rpi_qpu.c | 11 ++++++----- ++ 1 file changed, 6 insertions(+), 5 deletions(-) ++ ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 378dd74..d1c3e20 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -122,7 +122,8 @@ pthread_t vpu_thread; ++ static void *vpu_start(void *arg); ++ ++ #define MAXCMDS 128 ++-static pthread_cond_t post_cond = PTHREAD_COND_INITIALIZER; +++static pthread_cond_t post_cond_head = PTHREAD_COND_INITIALIZER; +++static pthread_cond_t post_cond_tail = PTHREAD_COND_INITIALIZER; ++ static pthread_mutex_t post_mutex = PTHREAD_MUTEX_INITIALIZER; ++ ++ static int vpu_cmds[MAXCMDS][8]; ++@@ -372,7 +373,7 @@ static void *vpu_start(void *arg) { ++ while( vpu_async_tail - vpu_async_head <= 0) ++ { ++ //printf("Checking number %d %d\n",vpu_async_head,vpu_async_tail); ++- pthread_cond_wait(&post_cond, &post_mutex); +++ pthread_cond_wait(&post_cond_tail, &post_mutex); ++ } ++ int *p = vpu_cmds[vpu_async_head%MAXCMDS]; ++ pthread_mutex_unlock(&post_mutex); ++@@ -388,7 +389,7 @@ static void *vpu_start(void *arg) { ++ ++ pthread_mutex_lock(&post_mutex); ++ vpu_async_head++; ++- pthread_cond_broadcast(&post_cond); +++ pthread_cond_broadcast(&post_cond_head); ++ pthread_mutex_unlock(&post_mutex); ++ } ++ ++@@ -417,7 +418,7 @@ int vpu_post_code(unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned ++ p[6] = r5; ++ p[7] = (int) buf; ++ if (num<=1) ++- pthread_cond_broadcast(&post_cond); // Otherwise the vpu thread must already be awake +++ pthread_cond_broadcast(&post_cond_tail); // Otherwise the vpu thread must already be awake ++ pthread_mutex_unlock(&post_mutex); ++ return id; ++ } ++@@ -429,7 +430,7 @@ void vpu_wait(int id) ++ pthread_mutex_lock(&post_mutex); ++ while( id + 1 - vpu_async_head > 0) ++ { ++- pthread_cond_wait(&post_cond, &post_mutex); +++ pthread_cond_wait(&post_cond_head, &post_mutex); ++ } ++ pthread_mutex_unlock(&post_mutex); ++ } ++-- ++2.7.4 ++ ++ ++From b04a72641253dc89fd1ec688035c3e2a946aa370 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 7 May 2015 08:57:11 +0100 ++Subject: [PATCH 12/68] Add option to test for gpu_idle ++ ++--- ++ libavcodec/hevc.c | 3 ++- ++ libavcodec/rpi_qpu.c | 18 ++++++++++++++++++ ++ 2 files changed, 20 insertions(+), 1 deletion(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 98dbd69..2e269b6 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2527,7 +2527,6 @@ static void rpi_execute_transform(HEVCContext *s) ++ // s->hevcdsp.idct[4-2](coeffs, 16); ++ //} ++ ++- ++ gpu_cache_flush(&s->coeffs_buf_accelerated); ++ s->vpu_id = vpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0, &s->coeffs_buf_accelerated); ++ //vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0); ++@@ -2669,6 +2668,8 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ if (s->enable_rpi && x_ctb + ctb_size >= s->ps.sps->width) { ++ int x; ++ // Transform all blocks +++ //printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); +++ ++ rpi_execute_transform(s); ++ // Perform inter prediction ++ rpi_execute_inter_cmds(s); ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index d1c3e20..85f49db 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -199,6 +199,17 @@ static int gpu_init(volatile struct GPU **gpu) { ++ return 0; ++ } ++ +++// Returns 1 if the gpu is currently idle +++static int gpu_idle(void) +++{ +++ int ret = pthread_mutex_trylock(&gpu_mutex); +++ if (ret==0) { +++ pthread_mutex_unlock(&gpu_mutex); +++ return 1; +++ } +++ return 0; +++} +++ ++ // Make sure we have exclusive access to the mailbox, and enable qpu if necessary. ++ static void gpu_lock(void) { ++ pthread_mutex_lock(&gpu_mutex); ++@@ -400,6 +411,13 @@ static void *vpu_start(void *arg) { ++ // Returns an id which we can use to wait for completion ++ int vpu_post_code(unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, GPU_MEM_PTR_T *buf) ++ { +++ // If the gpu is idle then just run the command immediately +++ // This works, but doesn't seem to give any benefit +++ // if (gpu_idle()) { +++ // vpu_execute_code( code, r0, r1, r2, r3, r4, r5); +++ // return -1; // TODO perhaps a wraparound bug here? +++ // } +++ ++ pthread_mutex_lock(&post_mutex); ++ { ++ int id = vpu_async_tail++; ++-- ++2.7.4 ++ ++ ++From e7b457e683d4ca92bf2677b69708fbfc3849847b Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 7 May 2015 11:01:35 +0100 ++Subject: [PATCH 13/68] Added deblocking pass ++ ++--- ++ libavcodec/hevc.c | 33 +++++++++++++++++++++++++++------ ++ libavcodec/hevc.h | 7 ++++++- ++ libavcodec/hevc_filter.c | 6 +++++- ++ libavcodec/rpi_qpu.c | 2 +- ++ 4 files changed, 39 insertions(+), 9 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 2e269b6..29f8415 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2518,6 +2518,17 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb, ++ } ++ ++ #ifdef RPI +++static void rpi_execute_dblk_cmds(HEVCContext *s) +++{ +++ int n; +++ int ctb_size = 1 << s->ps.sps->log2_ctb_size; +++ int (*p)[2] = s->dblk_cmds; +++ for(n = s->num_dblk_cmds; n>0 ;n--,p++) { +++ ff_hevc_hls_filters(s, (*p)[0], (*p)[1], ctb_size); +++ } +++ s->num_dblk_cmds = 0; +++} +++ ++ static void rpi_execute_transform(HEVCContext *s) ++ { ++ int i=2; ++@@ -2631,7 +2642,6 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs]; ++ ++ #ifdef RPI ++- int start_ctb_x = (s->sh.slice_ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size; ++ s->enable_rpi = 1; // TODO this should depend on cross component and frame width etc. ++ #endif ++ ++@@ -2665,7 +2675,10 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ ++ more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); ++ #ifdef RPI ++- if (s->enable_rpi && x_ctb + ctb_size >= s->ps.sps->width) { +++ if (s->enable_rpi) { +++ s->dblk_cmds[s->num_dblk_cmds][0] = x_ctb; +++ s->dblk_cmds[s->num_dblk_cmds++][1] = y_ctb; +++ if ( (((y_ctb + ctb_size)&63) == 0) && x_ctb + ctb_size >= s->ps.sps->width) { ++ int x; ++ // Transform all blocks ++ //printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); ++@@ -2678,10 +2691,8 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ // Perform intra prediction and residual reconstruction ++ rpi_execute_pred_cmds(s); ++ // Perform deblocking for CTBs in this row ++- for(x = start_ctb_x; x <= x_ctb; x += ctb_size) { // TODO this will fail for tiles ++- ff_hevc_hls_filters(s, x, y_ctb, ctb_size); ++- } ++- start_ctb_x = 0; +++ rpi_execute_dblk_cmds(s); +++ } ++ } ++ #endif ++ if (more_data < 0) { ++@@ -2699,6 +2710,16 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size); ++ } ++ +++#ifdef RPI +++ if (s->enable_rpi && s->num_dblk_cmds) { +++ rpi_execute_transform(s); +++ rpi_execute_inter_cmds(s); +++ vpu_wait(s->vpu_id); +++ rpi_execute_pred_cmds(s); +++ rpi_execute_dblk_cmds(s); +++ } +++#endif +++ ++ if (x_ctb + ctb_size >= s->ps.sps->width && ++ y_ctb + ctb_size >= s->ps.sps->height) ++ ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size); ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 0d8dfe9..990bd8c 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -808,6 +808,8 @@ typedef struct HEVCLocalContext { ++ #define RPI_MAX_XFM_CMDS (16*3*(RPI_MAX_WIDTH/4)) ++ // Each block can have an intra prediction and a transform_add command ++ #define RPI_MAX_PRED_CMDS (2*16*3*(RPI_MAX_WIDTH/4)) +++// Worst case is 16x16 CTUs +++#define RPI_MAX_DEBLOCK_CMDS (RPI_MAX_WIDTH*4/16) ++ ++ #define RPI_CMD_LUMA_UNI 0 ++ #define RPI_CMD_CHROMA_UNI 1 ++@@ -867,6 +869,9 @@ typedef struct HEVCPredCmd { ++ #endif ++ ++ typedef struct HEVCContext { +++#ifdef RPI +++ int dblk_cmds[RPI_MAX_DEBLOCK_CMDS][2]; +++#endif ++ const AVClass *c; // needed by private avoptions ++ AVCodecContext *avctx; ++ ++@@ -891,11 +896,11 @@ typedef struct HEVCContext { ++ GPU_MEM_PTR_T coeffs_buf_accelerated; ++ int16_t *coeffs_buf_arm[4]; ++ unsigned int coeffs_buf_vc[4]; ++- ++ int num_coeffs[4]; ++ int num_xfm_cmds; ++ int num_mv_cmds; ++ int num_pred_cmds; +++ int num_dblk_cmds; ++ int vpu_id; ++ #endif ++ ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index e4c3da7..ea0af91 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -877,8 +877,12 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ if (s->threads_type & FF_THREAD_FRAME ) ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size, 0); ++ } ++- } else if (s->threads_type & FF_THREAD_FRAME && x_end) +++ } else if (s->threads_type & FF_THREAD_FRAME && x_end) { +++ int newh = y + ctb_size - 4; +++ //int currh = s->ref->tf.progress->data[0]; +++ //if (((y + ctb_size)&63)==0) ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); +++ } ++ } ++ ++ void ff_hevc_hls_filters(HEVCContext *s, int x_ctb, int y_ctb, int ctb_size) ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 85f49db..3b6dae7 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -105,7 +105,7 @@ struct GPU ++ static pthread_mutex_t gpu_mutex = PTHREAD_MUTEX_INITIALIZER; ++ static volatile struct GPU* gpu = NULL; ++ ++-#ifdef RPI_TIME_TOTAL_QPU +++#if defined(RPI_TIME_TOTAL_QPU) || defined(RPI_TIME_TOTAL_VPU) ++ static unsigned int Microseconds(void) { ++ struct timespec ts; ++ unsigned int x; ++-- ++2.7.4 ++ ++ ++From 7a443df9115f21b4428de378bd146dcdba3dd42a Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 7 May 2015 16:47:47 +0100 ++Subject: [PATCH 14/68] Added option to disable deblocking for non-ref frames ++ ++--- ++ libavcodec/hevc_filter.c | 10 ++++++++++ ++ 1 file changed, 10 insertions(+) ++ ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index ea0af91..2cdd621 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -25,6 +25,8 @@ ++ //#define DISABLE_SAO ++ //#define DISABLE_DEBLOCK ++ //#define DISABLE_STRENGTHS +++// define DISABLE_DEBLOCK_NONREF for a 6% speed boost (by skipping deblocking on unimportant frames) +++//#define DISABLE_DEBLOCK_NONREF ++ ++ #include "libavutil/common.h" ++ #include "libavutil/internal.h" ++@@ -504,6 +506,14 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) ++ s->ps.sps->pcm.loop_filter_disable_flag) || ++ s->ps.pps->transquant_bypass_enable_flag; ++ +++#ifdef DISABLE_DEBLOCK_NONREF +++ if ( s->nal_unit_type == NAL_TRAIL_N || +++ s->nal_unit_type == NAL_TSA_N || +++ s->nal_unit_type == NAL_STSA_N || +++ s->nal_unit_type == NAL_RADL_N || +++ s->nal_unit_type == NAL_RASL_N ) +++ return; // Don't deblock non-reference frames +++#endif ++ #ifdef DISABLE_DEBLOCK ++ return; ++ #endif ++-- ++2.7.4 ++ ++ ++From 9606e160a582db64ccf981d971cdc258d8cc02f7 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Mon, 11 May 2015 10:00:27 +0100 ++Subject: [PATCH 15/68] Moved buffers to VPU memory ++ ++--- ++ libavcodec/hevc_filter.c | 17 +++++++++++++- ++ libavcodec/utils.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++ ++ libavutil/buffer.c | 6 +++++ ++ libavutil/buffer.h | 3 +++ ++ 4 files changed, 84 insertions(+), 1 deletion(-) ++ ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 2cdd621..e1b32d4 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -866,6 +866,13 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, ++ #undef CB ++ #undef CR ++ +++#ifdef RPI_INTER_QPU +++static void flush_buffer(AVBufferRef *bref) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); +++ gpu_cache_flush(p); +++} +++#endif +++ ++ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ { ++ int x_end = x >= s->ps.sps->width - ctb_size; ++@@ -888,9 +895,17 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size, 0); ++ } ++ } else if (s->threads_type & FF_THREAD_FRAME && x_end) { ++- int newh = y + ctb_size - 4; +++ //int newh = y + ctb_size - 4; ++ //int currh = s->ref->tf.progress->data[0]; ++ //if (((y + ctb_size)&63)==0) +++ if (!( s->nal_unit_type == NAL_TRAIL_N || +++ s->nal_unit_type == NAL_TSA_N || +++ s->nal_unit_type == NAL_STSA_N || +++ s->nal_unit_type == NAL_RADL_N || +++ s->nal_unit_type == NAL_RASL_N )) { +++ flush_buffer(s->frame->buf[1]); +++ flush_buffer(s->frame->buf[2]); +++ } ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); ++ } ++ } ++diff --git a/libavcodec/utils.c b/libavcodec/utils.c ++index f7adb52..708526e 100644 ++--- a/libavcodec/utils.c +++++ b/libavcodec/utils.c ++@@ -26,6 +26,12 @@ ++ */ ++ ++ #include "config.h" +++ +++#ifdef RPI +++// Move video buffers to GPU memory +++#define RPI_GPU_BUFFERS +++#endif +++ ++ #include "libavutil/atomic.h" ++ #include "libavutil/attributes.h" ++ #include "libavutil/avassert.h" ++@@ -64,6 +70,10 @@ ++ #include "libavutil/ffversion.h" ++ const char av_codec_ffversion[] = "FFmpeg version " FFMPEG_VERSION; ++ +++#ifdef RPI_GPU_BUFFERS +++#include "rpi_qpu.h" +++#endif +++ ++ #if HAVE_PTHREADS || HAVE_W32THREADS || HAVE_OS2THREADS ++ static int default_lockmgr_cb(void **arg, enum AVLockOp op) ++ { ++@@ -503,6 +513,47 @@ int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels, ++ return ret; ++ } ++ +++#ifdef RPI_GPU_BUFFERS +++static void rpi_buffer_default_free(void *opaque, uint8_t *data) +++{ +++ GPU_MEM_PTR_T *p = opaque; +++ gpu_free(p); +++ av_free(p); +++} +++ +++static AVBufferRef *rpi_buffer_alloc(int size) +++{ +++ AVBufferRef *ret = NULL; +++ uint8_t *data = NULL; +++ GPU_MEM_PTR_T *p; +++ +++ static int total=0; +++ total+=size; +++ +++ p = av_malloc(sizeof *p); +++ if (!p) +++ return NULL; +++ +++ if (gpu_malloc_cached(size,p)<0) // Change this line to choose cached or uncached memory. The caching here refers to the ARM data cache. +++ return NULL; +++ +++ data = p->arm; +++ printf("Rpi alloc %d/%d ARM=%p VC=%x->%x\n",size,total,p->arm,p->vc,p->vc+size); +++ //memset(data, 64, size); +++ +++ if (!data) +++ return NULL; +++ +++ ret = av_buffer_create(data, size, rpi_buffer_default_free, p, 0); +++ if (!ret) { +++ gpu_free(p); +++ av_freep(&p); +++ } +++ +++ return ret; +++} +++#endif +++ ++ static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame) ++ { ++ FramePool *pool = avctx->internal->pool; ++@@ -550,6 +601,14 @@ static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame) ++ av_buffer_pool_uninit(&pool->pools[i]); ++ pool->linesize[i] = linesize[i]; ++ if (size[i]) { +++#ifdef RPI_GPU_BUFFERS +++ if (avctx->codec_id == AV_CODEC_ID_HEVC) +++ pool->pools[i] = av_buffer_pool_init(size[i] + 16 + STRIDE_ALIGN - 1, +++ CONFIG_MEMORY_POISONING ? +++ NULL : +++ rpi_buffer_alloc); +++ else +++#endif ++ pool->pools[i] = av_buffer_pool_init(size[i] + 16 + STRIDE_ALIGN - 1, ++ CONFIG_MEMORY_POISONING ? ++ NULL : ++diff --git a/libavutil/buffer.c b/libavutil/buffer.c ++index 694e116..203ca7b 100644 ++--- a/libavutil/buffer.c +++++ b/libavutil/buffer.c ++@@ -425,3 +425,9 @@ AVBufferRef *av_buffer_pool_get(AVBufferPool *pool) ++ ++ return ret; ++ } +++ +++// Return the opaque for the underlying frame (gives us a GPU_MEM_PTR_T) +++void *av_buffer_pool_opaque(AVBufferRef *ref) { +++ BufferPoolEntry *buf = av_buffer_get_opaque(ref); +++ return buf->opaque; +++} ++diff --git a/libavutil/buffer.h b/libavutil/buffer.h ++index 0c0ce12..82e0bc3 100644 ++--- a/libavutil/buffer.h +++++ b/libavutil/buffer.h ++@@ -283,6 +283,9 @@ void av_buffer_pool_uninit(AVBufferPool **pool); ++ */ ++ AVBufferRef *av_buffer_pool_get(AVBufferPool *pool); ++ +++// Return the opaque for the underlying frame +++void *av_buffer_pool_opaque(AVBufferRef *ref); +++ ++ /** ++ * @} ++ */ ++-- ++2.7.4 ++ ++ ++From f56515b9a720c829ba3ddf6da4232a91b13e0f03 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Mon, 11 May 2015 14:04:37 +0100 ++Subject: [PATCH 16/68] Prepared QPU execute code ++ ++--- ++ libavcodec/hevc.c | 227 ++++++++++++++++++++++++++++++++++++++++------- ++ libavcodec/hevc.h | 22 ++++- ++ libavcodec/hevc_filter.c | 7 +- ++ libavcodec/rpi_qpu.c | 55 +++++++++++- ++ libavcodec/rpi_qpu.h | 2 + ++ 5 files changed, 276 insertions(+), 37 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 29f8415..66ed37a 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -42,17 +42,45 @@ ++ #include "profiles.h" ++ ++ #ifdef RPI ++-#include "rpi_qpu.h" ++-// For some unknown reason, the code seems to crash if I do a late malloc ++-#define EARLY_MALLOC ++-// Move Inter prediction into separate pass ++-#define RPI_INTER +++ #include "rpi_qpu.h" +++ // For some unknown reason, the code seems to crash if I do a late malloc +++ #define EARLY_MALLOC +++ // Move Inter prediction into separate pass +++ #define RPI_INTER ++ #endif ++ ++ // #define DISABLE_MC ++ ++ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 }; ++ +++ +++#ifdef RPI_INTER_QPU +++ +++#define RPI_CHROMA_COMMAND_WORDS 12 +++// The QPU code for UV blocks only works up to a block width of 8 +++#define RPI_CHROMA_BLOCK_WIDTH 8 +++ +++#define ENCODE_COEFFS(c0, c1, c2, c3) (((-c0) & 0xff) | ((-c1) & 0xff) << 8 | ((-c2) & 0xff) << 16 | ((-c3) & 0xff) << 24) +++ +++// TODO Chroma only needs 4 taps +++static uint32_t rpi_filter_coefs[8][2] = { +++ { ENCODE_COEFFS( 0, 0, 0, 128), ENCODE_COEFFS( 0, 0, 0, 0 ) }, +++ { ENCODE_COEFFS( 0, 0, -2, 58), ENCODE_COEFFS( 10, -2, 0, 0 ) }, +++ { ENCODE_COEFFS( 0, 0, -4, 54), ENCODE_COEFFS( 16, -2, 0, 0 ) }, +++ { ENCODE_COEFFS( 0, 0, -6, 46), ENCODE_COEFFS( 28, -4, 0, 0 ) }, +++ { ENCODE_COEFFS( 0, 0, -4, 36), ENCODE_COEFFS( 36, -4, 0, 0 ) }, +++ { ENCODE_COEFFS( 0, 0, -4, 28), ENCODE_COEFFS( 46, -6, 0, 0 ) }, +++ { ENCODE_COEFFS( 0, 0, -2, 16), ENCODE_COEFFS( 54, -4, 0, 0 ) }, +++ { ENCODE_COEFFS( 0, 0, -2, 10), ENCODE_COEFFS( 58, -2, 0, 0 ) } +++}; +++ +++static uint32_t get_vc_address(AVBufferRef *bref) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); +++ return p->vc; +++} +++ +++#endif +++ ++ /** ++ * NOTE: Each function hls_foo correspond to the function foo in the ++ * specification (HLS stands for High Level Syntax). ++@@ -66,6 +94,7 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ static void pic_arrays_free(HEVCContext *s) ++ { ++ #ifdef RPI +++ ++ #ifdef EARLY_MALLOC ++ #else ++ printf("pic_arrays_free\n"); ++@@ -1982,6 +2011,43 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ s->sh.luma_offset_l0[current_mv.ref_idx[0]]); ++ ++ if (s->ps.sps->chroma_format_idc) { +++#ifdef RPI_INTER_QPU +++ if (s->enable_rpi) { +++ int reflist = 0; +++ int hshift = s->ps.sps->hshift[1]; +++ int vshift = s->ps.sps->vshift[1]; +++ const Mv *mv = ¤t_mv.mv[reflist]; +++ intptr_t mx = av_mod_uintp2(mv->x, 2 + hshift); +++ intptr_t my = av_mod_uintp2(mv->y, 2 + vshift); +++ intptr_t _mx = mx << (1 - hshift); +++ intptr_t _my = my << (1 - vshift); // Fractional part of motion vector +++ +++ int x1_c = x0_c + (mv->x >> (2 + hshift)); +++ int y1_c = y0_c + (mv->y >> (2 + hshift)); +++ int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width +++ +++ uint32_t *u = s->u_mvs[chan & 7]; +++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { +++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { +++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 3 + start_x; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 3 + start_y; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); +++ *u++ = ( (nPbW_csh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] +++ *u++ = rpi_filter_coefs[_mx][0]; +++ *u++ = rpi_filter_coefs[_mx][1]; +++ *u++ = rpi_filter_coefs[_my][0]; +++ *u++ = rpi_filter_coefs[_my][1]; +++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); +++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); +++ } +++ } +++ s->u_mvs[chan & 7] = u; +++ return; +++ } +++#endif ++ RPI_REDIRECT(chroma_mc_uni)(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1], ++ 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, ++ s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]); ++@@ -2632,6 +2698,54 @@ static void rpi_execute_inter_cmds(HEVCContext *s) ++ ++ #endif ++ +++#ifdef RPI_INTER_QPU +++static void rpi_inter_clear(HEVCContext *s) +++{ +++ int i; +++ int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1]; +++ int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1]; +++ for(i=0;i<8;i++) { +++ s->u_mvs[i] = s->mvs_base[i]; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = pic_width; +++ *s->u_mvs[i]++ = pic_height; +++ *s->u_mvs[i]++ = s->frame->linesize[1]; +++ *s->u_mvs[i]++ = s->frame->linesize[2]; +++ s->u_mvs[i] += 3; // Padding words +++ } +++} +++ +++static void rpi_execute_inter_qpu(HEVCContext *s) +++{ +++ int k; +++ uint32_t *unif_vc = (uint32_t *)s->unif_mvs_ptr.vc; +++ +++ if (s->sh.slice_type == I_SLICE) +++ return; +++ for(k=0;k<8;k++) { +++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command +++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined +++ } +++ +++ s->u_mvs[8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore +++ +++ qpu_run_shader8(qpu_get_fn(QPU_MC_SETUP_UV), +++ (uint32_t)(unif_vc+(s->mvs_base[0 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[1 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[2 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[3 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[4 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[5 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[6 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)) +++ ); +++} +++#endif +++ ++ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ { ++ HEVCContext *s = avctxt->priv_data; ++@@ -2658,6 +2772,10 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ } ++ } ++ +++#ifdef RPI_INTER_QPU +++ rpi_inter_clear(s); +++#endif +++ ++ while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) { ++ int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts]; ++ ++@@ -2679,19 +2797,30 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ s->dblk_cmds[s->num_dblk_cmds][0] = x_ctb; ++ s->dblk_cmds[s->num_dblk_cmds++][1] = y_ctb; ++ if ( (((y_ctb + ctb_size)&63) == 0) && x_ctb + ctb_size >= s->ps.sps->width) { ++- int x; +++#ifdef RPI_INTER_QPU +++ // Kick off inter prediction on QPUs +++ rpi_execute_inter_qpu(s); +++#endif ++ // Transform all blocks ++ //printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); ++- ++ rpi_execute_transform(s); ++ // Perform inter prediction ++ rpi_execute_inter_cmds(s); ++ // Wait for transform completion ++ vpu_wait(s->vpu_id); +++ +++ // Copy back reconstructed data +++ //memcpy(s->frame->data[0],s->dummy.arm,2048*64); +++ //memcpy(s->frame->data[1],s->dummy.arm,1024*32); +++ //memcpy(s->frame->data[2],s->dummy.arm,1024*32); +++ ++ // Perform intra prediction and residual reconstruction ++ rpi_execute_pred_cmds(s); ++ // Perform deblocking for CTBs in this row ++ rpi_execute_dblk_cmds(s); +++#ifdef RPI_INTER_QPU +++ rpi_inter_clear(s); +++#endif ++ } ++ } ++ #endif ++@@ -2712,6 +2841,9 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ ++ #ifdef RPI ++ if (s->enable_rpi && s->num_dblk_cmds) { +++#ifdef RPI_INTER_QPU +++ rpi_execute_inter_qpu(s); +++#endif ++ rpi_execute_transform(s); ++ rpi_execute_inter_cmds(s); ++ vpu_wait(s->vpu_id); ++@@ -3451,6 +3583,14 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ av_freep(&s->unif_xfm_cmds); ++ av_freep(&s->univ_pred_cmds); ++ +++#ifdef RPI_INTER_QPU +++ if (s->unif_mvs) { +++ gpu_free( &s->unif_mvs_ptr ); +++ s->unif_mvs = 0; +++ } +++#endif +++ //gpu_free(&s->dummy); +++ ++ #ifdef EARLY_MALLOC ++ printf("hevc_decode_free\n"); ++ if (s->coeffs_buf_arm[0]) { ++@@ -3541,34 +3681,59 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ if (!s->univ_pred_cmds) ++ goto fail; ++ ++- s->coeffs_buf_arm[0] = 0; ++- s->coeffs_buf_arm[2] = 0; +++#ifdef RPI_INTER_QPU +++ // We divide the image into blocks 256 wide and 64 high +++ // We support up to 2048 widths +++ // We compute the number of chroma motion vector commands for 4:4:4 format and 4x4 chroma blocks - assuming all blocks are B predicted +++ // Also add space for the startup command for each stream. +++ +++ { +++ int uv_commands_per_qpu = (1 + (256*64*2)/(4*4)) * RPI_CHROMA_COMMAND_WORDS; +++ uint32_t *p; +++ gpu_malloc_uncached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr ); +++ s->unif_mvs = (uint32_t *) s->unif_mvs_ptr.arm; // TODO support this allocation in non EARLY_MALLOC +++ +++ // Set up initial locations for uniform streams +++ p = s->unif_mvs; +++ for(i = 0; i < 8; i++) { +++ s->mvs_base[i] = p; +++ p += uv_commands_per_qpu; +++ } +++ s->mc_filter_uv = qpu_get_fn(QPU_MC_FILTER_UV); +++ s->mc_filter_uv_b = qpu_get_fn(QPU_MC_FILTER_UV_B); +++ +++ } +++#endif +++ //gpu_malloc_uncached(2048*64,&s->dummy); ++ ++ #ifdef EARLY_MALLOC ++- int coeffs_in_ctb = 64*64; ++- int coefs_per_row = (2048/64) * coeffs_in_ctb * 3; // Allow space for chroma ++- printf("Allocated %d\n",coefs_per_row); ++- gpu_malloc_cached(sizeof(int16_t) * coefs_per_row, &s->coeffs_buf_default); ++- s->coeffs_buf_arm[0] = (int16_t*) s->coeffs_buf_default.arm; ++- if (!s->coeffs_buf_arm[0]) ++- goto fail; ++- gpu_malloc_cached(sizeof(int16_t) * coefs_per_row * 2, &s->coeffs_buf_accelerated); ++- s->coeffs_buf_arm[2] = (int16_t*) s->coeffs_buf_accelerated.arm; ++- s->coeffs_buf_vc[2] = s->coeffs_buf_accelerated.vc; ++- if (!s->coeffs_buf_arm[2]) ++- goto fail; ++- s->coeffs_buf_arm[3] = coefs_per_row + s->coeffs_buf_arm[2]; ++- s->coeffs_buf_vc[3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[2]; ++- printf("Done\n"); +++ { +++ int coeffs_in_ctb = 64*64; +++ int coefs_per_row = (2048/64) * coeffs_in_ctb * 3; // Allow space for chroma +++ s->coeffs_buf_arm[0] = 0; +++ s->coeffs_buf_arm[2] = 0; +++ printf("Allocated %d\n",coefs_per_row); +++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row, &s->coeffs_buf_default); +++ s->coeffs_buf_arm[0] = (int16_t*) s->coeffs_buf_default.arm; +++ if (!s->coeffs_buf_arm[0]) +++ goto fail; +++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row * 2, &s->coeffs_buf_accelerated); +++ s->coeffs_buf_arm[2] = (int16_t*) s->coeffs_buf_accelerated.arm; +++ s->coeffs_buf_vc[2] = s->coeffs_buf_accelerated.vc; +++ if (!s->coeffs_buf_arm[2]) +++ goto fail; +++ s->coeffs_buf_arm[3] = coefs_per_row + s->coeffs_buf_arm[2]; +++ s->coeffs_buf_vc[3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[2]; +++ printf("Done\n"); ++ #ifdef RPI_PRECLEAR ++- //memset(s->coeffs_buf_arm[0],0, sizeof(int16_t) * coefs_per_row); ++- memclear16(s->coeffs_buf_arm[0], coefs_per_row); ++- //memset(s->coeffs_buf_arm[2],0, sizeof(int16_t) * coefs_per_row); ++- memclear16(s->coeffs_buf_arm[2], coefs_per_row); ++- //memset(s->coeffs_buf_arm[3],0, sizeof(int16_t) * coefs_per_row); ++- memclear16(s->coeffs_buf_arm[3], coefs_per_row); +++ //memset(s->coeffs_buf_arm[0],0, sizeof(int16_t) * coefs_per_row); +++ memclear16(s->coeffs_buf_arm[0], coefs_per_row); +++ //memset(s->coeffs_buf_arm[2],0, sizeof(int16_t) * coefs_per_row); +++ memclear16(s->coeffs_buf_arm[2], coefs_per_row); +++ //memset(s->coeffs_buf_arm[3],0, sizeof(int16_t) * coefs_per_row); +++ memclear16(s->coeffs_buf_arm[3], coefs_per_row); ++ #endif ++- +++ } ++ #endif ++ ++ s->enable_rpi = 0; ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 990bd8c..da345f6 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -42,7 +42,11 @@ ++ ++ // define RPI to split the CABAC/prediction/transform into separate stages ++ #ifdef RPI ++-#include "rpi_qpu.h" +++ +++ #include "rpi_qpu.h" +++ // Use QPU for inter prediction +++ //#define RPI_INTER_QPU +++ ++ #endif ++ ++ #define MAX_DPB_SIZE 16 // A.4.1 ++@@ -888,7 +892,7 @@ typedef struct HEVCContext { ++ ++ #ifdef RPI ++ int enable_rpi; ++- HEVCMvCmd *unif_mv_cmds; +++ HEVCMvCmd *unif_mv_cmds; // TODO rename ++ HEVCXfmCmd *unif_xfm_cmds; ++ HEVCPredCmd *univ_pred_cmds; ++ int buf_width; ++@@ -902,6 +906,20 @@ typedef struct HEVCContext { ++ int num_pred_cmds; ++ int num_dblk_cmds; ++ int vpu_id; +++ //GPU_MEM_PTR_T dummy; +++#ifdef RPI_INTER_QPU +++ GPU_MEM_PTR_T unif_mvs_ptr; +++ uint32_t *unif_mvs; // Base of memory for motion vector commands +++ +++ // _base pointers are to the start of the row +++ uint32_t *mvs_base[8]; +++ // these pointers are to the next free space +++ uint32_t *u_mvs[8]; +++ // Function pointers +++ uint32_t mc_filter_uv; +++ uint32_t mc_filter_uv_b; +++#endif +++ ++ #endif ++ ++ uint8_t *cabac_state; ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index e1b32d4..5b3d759 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -903,8 +903,11 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ s->nal_unit_type == NAL_STSA_N || ++ s->nal_unit_type == NAL_RADL_N || ++ s->nal_unit_type == NAL_RASL_N )) { ++- flush_buffer(s->frame->buf[1]); ++- flush_buffer(s->frame->buf[2]); +++ //flush_buffer(s->frame->buf[1]); +++ //flush_buffer(s->frame->buf[2]); +++ //memcpy(s->dummy.arm,s->frame->data[0],2048*64); +++ //memcpy(s->dummy.arm,s->frame->data[1],1024*32); +++ //memcpy(s->dummy.arm,s->frame->data[2],1024*32); ++ } ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); ++ } ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 3b6dae7..e4dd58a 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -3,7 +3,7 @@ ++ // This works better than the mmap in that the memory can be cached, but requires a kernel modification to enable the device. ++ #define RPI_USE_VCSM ++ // define RPI_TIME_TOTAL_QPU to print out how much time is spent in the QPU code ++-//#define RPI_TIME_TOTAL_QPU +++#define RPI_TIME_TOTAL_QPU ++ // define RPI_TIME_TOTAL_VPU to print out how much time is spent in the VPI code ++ //#define RPI_TIME_TOTAL_VPU ++ // define RPI_ASYNC to run the VPU in a separate thread, need to make a separate call to check for completion ++@@ -30,7 +30,7 @@ ++ #endif ++ ++ // On Pi2 there is no way to access the VPU L2 cache ++-// GPU_MEM_FLG should be 4 for uncached memory. +++// GPU_MEM_FLG should be 4 for uncached memory. (Or C for alias to allocate in the VPU L2 cache) ++ // However, if using VCSM allocated buffers, need to use C at the moment because VCSM does not allocate uncached memory correctly ++ // The QPU crashes if we mix L2 cached and L2 uncached accesses due to a HW bug. ++ #define GPU_MEM_FLG 0xC ++@@ -549,6 +549,54 @@ void qpu_run_shader12(int code, int num, int code2, int num2, int unifs1, int un ++ gpu_unlock(); ++ } ++ +++// Run a program on 8 QPUs with the given code and uniform stream (given in GPU addresses) +++void qpu_run_shader8(int code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8) +++{ +++ int i; +++#ifdef RPI_TIME_TOTAL_QPU +++ static int last_time=0; +++ static long long on_time=0; +++ static long long off_time=0; +++ int start_time; +++ int end_time; +++ static int count=0; +++#endif +++ +++ gpu_lock(); +++#ifdef RPI_TIME_TOTAL_QPU +++ start_time = Microseconds(); +++ if (last_time==0) +++ last_time = start_time; +++ off_time += start_time-last_time; +++#endif +++ for(i=0;i<8;i++) { +++ gpu->mail[i*2 + 1] = code; +++ } +++ gpu->mail[0 ] = unifs1; +++ gpu->mail[2 ] = unifs2; +++ gpu->mail[4 ] = unifs3; +++ gpu->mail[6 ] = unifs4; +++ gpu->mail[8 ] = unifs5; +++ gpu->mail[10] = unifs6; +++ gpu->mail[12] = unifs7; +++ gpu->mail[14] = unifs8; +++ execute_qpu( +++ gpu->mb, +++ 8 /* Number of QPUs */, +++ gpu->vc + offsetof(struct GPU, mail), +++ 1 /* no flush */, // Don't flush VPU L1 cache +++ 5000 /* timeout ms */); +++#ifdef RPI_TIME_TOTAL_QPU +++ end_time = Microseconds(); +++ last_time = end_time; +++ on_time += end_time - start_time; +++ count++; +++ if ((count&0x7f)==0) +++ printf("On=%dms, Off=%dms\n",(int)(on_time/1000),(int)(off_time/1000)); +++#endif +++ gpu_unlock(); +++} +++ ++ unsigned int qpu_get_fn(int num) { ++ // Make sure that the gpu is initialized ++ unsigned int *fn; ++@@ -585,6 +633,9 @@ unsigned int qpu_get_fn(int num) { ++ case QPU_MC_FILTER_UV_B: ++ fn = mc_filter_uv_b; ++ break; +++ case QPU_MC_INTERRUPT_EXIT8: +++ fn = mc_interrupt_exit8; +++ break; ++ case QPU_MC_END: ++ fn = mc_end; ++ break; ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 3526fce..2b22d98 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -16,6 +16,7 @@ extern void gpu_free(GPU_MEM_PTR_T *p); ++ extern void gpu_cache_flush(GPU_MEM_PTR_T *p); ++ ++ // QPU specific functions +++extern void qpu_run_shader8(int code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8); ++ extern void qpu_run_shader12(int code, int num, int code2, int num2, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8, int unifs9, int unifs10, int unifs11, int unifs12); ++ ++ enum { ++@@ -28,6 +29,7 @@ enum { ++ QPU_MC_SETUP_UV, ++ QPU_MC_FILTER_UV, ++ QPU_MC_FILTER_UV_B, +++ QPU_MC_INTERRUPT_EXIT8, ++ QPU_MC_END ++ }; ++ extern unsigned int qpu_get_fn(int num); ++-- ++2.7.4 ++ ++ ++From bd651e1569ebe0cdc41a6be169e139758cce069d Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 13 May 2015 11:47:23 +0100 ++Subject: [PATCH 17/68] Drafted chroma interpolation on QPUs ++ ++--- ++ libavcodec/hevc.c | 5 ++- ++ libavcodec/hevc.h | 2 +- ++ libavcodec/hevc_filter.c | 6 ++- ++ libavcodec/rpi_qpu.c | 101 +++++++++++++++++++++++++++++++++++++++++++-- ++ libavcodec/rpi_qpu.h | 1 + ++ libavcodec/rpi_shader.c | 42 +++++++++---------- ++ libavcodec/rpi_shader.qasm | 42 +++++++++---------- ++ 7 files changed, 149 insertions(+), 50 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 66ed37a..d5ea45e 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -60,11 +60,11 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ // The QPU code for UV blocks only works up to a block width of 8 ++ #define RPI_CHROMA_BLOCK_WIDTH 8 ++ ++-#define ENCODE_COEFFS(c0, c1, c2, c3) (((-c0) & 0xff) | ((-c1) & 0xff) << 8 | ((-c2) & 0xff) << 16 | ((-c3) & 0xff) << 24) +++#define ENCODE_COEFFS(c0, c1, c2, c3) (((c0) & 0xff) | ((c1) & 0xff) << 8 | ((c2) & 0xff) << 16 | ((c3) & 0xff) << 24) ++ ++ // TODO Chroma only needs 4 taps ++ static uint32_t rpi_filter_coefs[8][2] = { ++- { ENCODE_COEFFS( 0, 0, 0, 128), ENCODE_COEFFS( 0, 0, 0, 0 ) }, +++ { ENCODE_COEFFS( 0, 0, 0, 64), ENCODE_COEFFS( 0, 0, 0, 0 ) }, ++ { ENCODE_COEFFS( 0, 0, -2, 58), ENCODE_COEFFS( 10, -2, 0, 0 ) }, ++ { ENCODE_COEFFS( 0, 0, -4, 54), ENCODE_COEFFS( 16, -2, 0, 0 ) }, ++ { ENCODE_COEFFS( 0, 0, -6, 46), ENCODE_COEFFS( 28, -4, 0, 0 ) }, ++@@ -2729,6 +2729,7 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ for(k=0;k<8;k++) { ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined +++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+4] = qpu_get_fn(QPU_MC_SETUP); // Also need a dummy for V ++ } ++ ++ s->u_mvs[8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index da345f6..2497c47 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -45,7 +45,7 @@ ++ ++ #include "rpi_qpu.h" ++ // Use QPU for inter prediction ++- //#define RPI_INTER_QPU +++ // #define RPI_INTER_QPU ++ ++ #endif ++ ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 5b3d759..9b6e26d 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -903,8 +903,10 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ s->nal_unit_type == NAL_STSA_N || ++ s->nal_unit_type == NAL_RADL_N || ++ s->nal_unit_type == NAL_RASL_N )) { ++- //flush_buffer(s->frame->buf[1]); ++- //flush_buffer(s->frame->buf[2]); +++#ifdef RPI_INTER_QPU +++ flush_buffer(s->frame->buf[1]); +++ flush_buffer(s->frame->buf[2]); +++#endif ++ //memcpy(s->dummy.arm,s->frame->data[0],2048*64); ++ //memcpy(s->dummy.arm,s->frame->data[1],1024*32); ++ //memcpy(s->dummy.arm,s->frame->data[2],1024*32); ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index e4dd58a..4d9eda8 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -33,7 +33,8 @@ ++ // GPU_MEM_FLG should be 4 for uncached memory. (Or C for alias to allocate in the VPU L2 cache) ++ // However, if using VCSM allocated buffers, need to use C at the moment because VCSM does not allocate uncached memory correctly ++ // The QPU crashes if we mix L2 cached and L2 uncached accesses due to a HW bug. ++-#define GPU_MEM_FLG 0xC +++#define GPU_MEM_FLG 0x4 +++// GPU_MEM_MAP is meaningless on the Pi2 and should be left at 0 (On Pi1 it allows ARM to access VPU L2 cache) ++ #define GPU_MEM_MAP 0x0 ++ ++ #define vcos_verify(x) ((x)>=0) ++@@ -165,6 +166,8 @@ static int gpu_init(volatile struct GPU **gpu) { ++ ptr->vc_handle = handle; ++ ptr->vc = vc; ++ +++ printf("GPU allocated at 0x%x\n",vc); +++ ++ *gpu = ptr; ++ ++ // Now copy over the QPU code into GPU memory ++@@ -304,10 +307,13 @@ int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p) { ++ ++ static void gpu_term(void) ++ { ++- int mb = gpu->mb; ++- unsigned handle = gpu->vc_handle; +++ int mb; +++ unsigned handle; +++ ++ if (gpu==NULL) ++ return; +++ mb = gpu->mb; +++ handle = gpu->vc_handle; ++ ++ #ifdef RPI_ASYNC ++ { ++@@ -648,6 +654,95 @@ unsigned int qpu_get_fn(int num) { ++ } ++ ++ #if 0 +++typedef unsigned int uint32_t; +++ +++typedef struct mvs_s { +++ GPU_MEM_PTR_T unif_mvs_ptr; +++ uint32_t *unif_mvs; // Base of memory for motion vector commands +++ +++ // _base pointers are to the start of the row +++ uint32_t *mvs_base[8]; +++ // these pointers are to the next free space +++ uint32_t *u_mvs[8]; +++ +++} HEVCContext; +++ +++#define RPI_CHROMA_COMMAND_WORDS 12 +++ +++static void rpi_inter_clear(HEVCContext *s) +++{ +++ int i; +++ for(i=0;i<8;i++) { +++ s->u_mvs[i] = s->mvs_base[i]; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 128; // w +++ *s->u_mvs[i]++ = 128; // h +++ *s->u_mvs[i]++ = 128; // stride u +++ *s->u_mvs[i]++ = 128; // stride v +++ s->u_mvs[i] += 3; // Padding words +++ } +++} +++ +++static void rpi_execute_inter_qpu(HEVCContext *s) +++{ +++ int k; +++ uint32_t *unif_vc = (uint32_t *)s->unif_mvs_ptr.vc; +++ +++ for(k=0;k<8;k++) { +++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command +++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined +++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+4] = qpu_get_fn(QPU_MC_SETUP); // dummy location for V +++ } +++ +++ s->u_mvs[8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore +++ +++ qpu_run_shader8(qpu_get_fn(QPU_MC_SETUP_UV), +++ (uint32_t)(unif_vc+(s->mvs_base[0 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[1 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[2 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[3 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[4 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[5 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[6 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)) +++ ); +++} +++ +++void rpi_test_qpu(void) +++{ +++ HEVCContext mvs; +++ HEVCContext *s = &mvs; +++ int i; +++ int uv_commands_per_qpu = (1 + (256*64*2)/(4*4)) * RPI_CHROMA_COMMAND_WORDS; +++ uint32_t *p; +++ printf("Allocate memory\n"); +++ gpu_malloc_uncached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr ); +++ s->unif_mvs = (uint32_t *) s->unif_mvs_ptr.arm; +++ +++ // Set up initial locations for uniform streams +++ p = s->unif_mvs; +++ for(i = 0; i < 8; i++) { +++ s->mvs_base[i] = p; +++ p += uv_commands_per_qpu; +++ } +++ // Now run a simple program that should just quit immediately after a single texture fetch +++ rpi_inter_clear(s); +++ for(i=0;i<4;i++) { +++ printf("Launch QPUs\n"); +++ rpi_execute_inter_qpu(s); +++ printf("Done\n"); +++ } +++ printf("Free memory\n"); +++ gpu_free(&s->unif_mvs_ptr); +++ return; +++} +++#endif +++ +++#if 0 ++ ++ int32_t hcoeffs[] = {-4, 10, -21, 70, 90, -24, 11, -4}; ++ //int32_t hcoeffs[] = {1, 1, 1, 1, 1, 1, 1, 1}; ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 2b22d98..f9ad333 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -18,6 +18,7 @@ extern void gpu_cache_flush(GPU_MEM_PTR_T *p); ++ // QPU specific functions ++ extern void qpu_run_shader8(int code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8); ++ extern void qpu_run_shader12(int code, int num, int code2, int num2, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8, int unifs9, int unifs10, int unifs11, int unifs12); +++extern void rpi_test_qpu(void); ++ ++ enum { ++ QPU_MC_SETUP, ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 41cc2e1..d7ed297 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -152,23 +152,23 @@ unsigned int rpi_shader[] = { ++ /* [0x000003e8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++ /* [0x000003f0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++ /* [0x000003f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000400] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000408] */ 0x40038031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 ++-/* [0x00000410] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000400] */ 0x55015fc6, 0x100248a2, // mov r2, rb21 ; mul24 r2, r0, ra0 +++/* [0x00000408] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000410] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++ /* [0x00000418] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000420] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000420] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ /* [0x00000428] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000430] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000430] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ /* [0x00000438] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000440] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000440] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++ /* [0x00000448] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000450] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000450] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++ /* [0x00000458] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000460] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000460] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++ /* [0x00000468] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000470] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000470] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++ /* [0x00000478] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000480] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x00000480] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++ /* [0x00000488] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++ /* [0x00000490] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++ /* [0x00000498] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++@@ -179,20 +179,20 @@ unsigned int rpi_shader[] = { ++ /* [0x000004c0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++ /* [0x000004c8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop ++ /* [0x000004d0] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x000004d8] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x000004d8] */ 0x8f54e1f6, 0xd0024821, // asr r0, r0, 14 ; mov r1, ra21 ++ /* [0x000004e0] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x000004e8] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x000004f0] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x000004f8] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000500] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000508] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000510] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000518] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000520] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000528] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x000004e8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x000004f0] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x000004f8] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000500] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000508] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000510] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000518] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000520] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000528] */ 0x8c9f223f, 0x100a0867, // add.ifnn r1, r1, r0 ; mov -, vw_wait ++ /* [0x00000530] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++ /* [0x00000538] */ 0xfffffe50, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000540] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x00000540] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++ /* [0x00000548] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++ /* [0x00000550] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++ /* [0x00000558] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 6851e83..02fdcb2 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -270,23 +270,23 @@ add t0s, ra_x2_base, r2 ++ ++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ ++-mov r2, rb21 ; mul24 r3, r0, ra0 ++-nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 ++-sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++mov r2, rb21 ; mul24 r2, r0, ra0 +++nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++nop ; mul24 r3, ra1 << 1, r0 << 1 ++ nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++ nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++ nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++ nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++ nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-sub r0, r2, r3 +++add r0, r2, r3 ++ ++ mov r3, rb31 ++ ++@@ -302,23 +302,23 @@ sub.setf -, r3, 8 ; mov r1, ra22 ++ # apply horizontal filter ++ brr.anyn -, r:uvloop ++ max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++-asr r0, r0, 15 ; mov r1, ra21 +++asr r0, r0, 14 ; mov r1, ra21 ++ min.setf ra15, r0, rb22 ++ ++ # apply vertical filter and write to VPM ++ ++-nop ; mul24 r0, ra14, rb14 ++-sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-sub.ifnn r1, r1, r0 ; mov -, vw_wait +++nop ; mul24 r1, ra14, rb14 +++nop ; mul24 r0, ra13, rb13 +++add r1, r1, r0 ; mul24 r0, ra12, rb12 +++add r1, r1, r0 ; mul24 r0, ra11, rb11 +++add r1, r1, r0 ; mul24 r0, ra10, rb10 +++add r1, r1, r0 ; mul24 r0, ra9, rb9 +++add r1, r1, r0 ; mul24 r0, ra8, rb8 +++add r1, r1, r0 ; mul24 r0, ra15, rb15 +++add.ifnn r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++ brr.anyn -, r:uvloop ++-asr r1, r1, 15 +++asr r1, r1, 14 ++ min r1, r1, rb22 ++ max vpm, r1, 0 ++ ++-- ++2.7.4 ++ ++ ++From 61628063461ee5d891af6dbedfd495efcf464012 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 13 May 2015 13:54:11 +0100 ++Subject: [PATCH 18/68] Fixed chroma inter prediction ++ ++--- ++ libavcodec/hevc.c | 8 +- ++ libavcodec/hevc.h | 2 +- ++ libavcodec/rpi_shader.c | 1170 ++++++++++++++++++++++---------------------- ++ libavcodec/rpi_shader.h | 22 +- ++ libavcodec/rpi_shader.qasm | 24 +- ++ 5 files changed, 617 insertions(+), 609 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index d5ea45e..d6d78ee 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -57,9 +57,11 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ #ifdef RPI_INTER_QPU ++ ++ #define RPI_CHROMA_COMMAND_WORDS 12 +++#define UV_COMMANDS_PER_QPU ((1 + (256*64*2)/(4*4)) * RPI_CHROMA_COMMAND_WORDS) ++ // The QPU code for UV blocks only works up to a block width of 8 ++ #define RPI_CHROMA_BLOCK_WIDTH 8 ++ +++ ++ #define ENCODE_COEFFS(c0, c1, c2, c3) (((c0) & 0xff) | ((c1) & 0xff) << 8 | ((c2) & 0xff) << 16 | ((c3) & 0xff) << 24) ++ ++ // TODO Chroma only needs 4 taps ++@@ -2024,7 +2026,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ ++ int x1_c = x0_c + (mv->x >> (2 + hshift)); ++ int y1_c = y0_c + (mv->y >> (2 + hshift)); ++- int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width +++ //int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width +++ int chan = x0>>8; ++ ++ uint32_t *u = s->u_mvs[chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++@@ -2730,6 +2733,7 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+4] = qpu_get_fn(QPU_MC_SETUP); // Also need a dummy for V +++ assert(s->u_mvs[k] - s->mvs_base[k] < UV_COMMANDS_PER_QPU); ++ } ++ ++ s->u_mvs[8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore ++@@ -3689,7 +3693,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ // Also add space for the startup command for each stream. ++ ++ { ++- int uv_commands_per_qpu = (1 + (256*64*2)/(4*4)) * RPI_CHROMA_COMMAND_WORDS; +++ int uv_commands_per_qpu = UV_COMMANDS_PER_QPU; ++ uint32_t *p; ++ gpu_malloc_uncached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr ); ++ s->unif_mvs = (uint32_t *) s->unif_mvs_ptr.arm; // TODO support this allocation in non EARLY_MALLOC ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 2497c47..d513579 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -45,7 +45,7 @@ ++ ++ #include "rpi_qpu.h" ++ // Use QPU for inter prediction ++- // #define RPI_INTER_QPU +++ #define RPI_INTER_QPU ++ ++ #endif ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index d7ed297..831633b 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -33,7 +33,7 @@ unsigned int rpi_shader[] = { ++ /* [0x00000040] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) ++ /* [0x00000048] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 ++ /* [0x00000050] */ 0x00000001, 0xe0020527, // mov ra20, 1 ++-/* [0x00000058] */ 0x00000040, 0xe0020567, // mov ra21, 64 +++/* [0x00000058] */ 0x00000020, 0xe0020567, // mov ra21, 32 ++ /* [0x00000060] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++ /* [0x00000068] */ 0x00000008, 0xe00205e7, // mov ra23, 8 ++ /* [0x00000070] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++@@ -152,7 +152,7 @@ unsigned int rpi_shader[] = { ++ /* [0x000003e8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++ /* [0x000003f0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++ /* [0x000003f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000400] */ 0x55015fc6, 0x100248a2, // mov r2, rb21 ; mul24 r2, r0, ra0 +++/* [0x00000400] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++ /* [0x00000408] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++ /* [0x00000410] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++ /* [0x00000418] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++@@ -178,9 +178,9 @@ unsigned int rpi_shader[] = { ++ /* [0x000004b8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++ /* [0x000004c0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++ /* [0x000004c8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004d0] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x000004d8] */ 0x8f54e1f6, 0xd0024821, // asr r0, r0, 14 ; mov r1, ra21 ++-/* [0x000004e0] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x000004d0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x000004d8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x000004e0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ /* [0x000004e8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++ /* [0x000004f0] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++ /* [0x000004f8] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++@@ -189,400 +189,400 @@ unsigned int rpi_shader[] = { ++ /* [0x00000510] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++ /* [0x00000518] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++ /* [0x00000520] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000528] */ 0x8c9f223f, 0x100a0867, // add.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x00000528] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++ /* [0x00000530] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000538] */ 0xfffffe50, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000540] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000548] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000550] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000558] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000560] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000568] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000570] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000578] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000580] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000588] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000590] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000598] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000538] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000540] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x00000548] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000550] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x00000558] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000560] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000568] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000570] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000578] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000580] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000588] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000590] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000598] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000005a0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000005a8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter ++-/* [0x000005a0] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000005a8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000005b0] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next ++-/* [0x000005b8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000005c0] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 ++-/* [0x000005c8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000005d0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif ++-/* [0x000005d8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000005e0] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 ++-/* [0x000005e8] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 ++-/* [0x000005f0] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 ++-/* [0x000005f8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000600] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 ++-/* [0x00000608] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000610] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 ++-/* [0x00000618] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000620] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000628] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000630] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000638] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000640] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000648] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000650] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000658] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000660] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000668] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000670] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000678] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000680] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000688] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000690] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000698] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006a0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006a8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000006b0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006b8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006c0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006c8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000006d0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006d8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006e0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006e8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000006f0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006f8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000700] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000708] */ 0x000001d0, 0xf07809e7, // brr.anynn -, r:fast_path ++-/* [0x00000710] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000718] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000720] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000728] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000005b0] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000005b8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000005c0] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next +++/* [0x000005c8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000005d0] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 +++/* [0x000005d8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000005e0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif +++/* [0x000005e8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000005f0] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 +++/* [0x000005f8] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 +++/* [0x00000600] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 +++/* [0x00000608] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000610] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 +++/* [0x00000618] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000620] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 +++/* [0x00000628] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000630] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000638] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000640] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000648] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000650] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000658] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000660] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000668] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000670] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000678] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000680] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000688] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000690] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000698] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000006a0] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006a8] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006b0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006b8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000006c0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006c8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006d0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006d8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000006e0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006e8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006f0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006f8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000700] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000708] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000710] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000718] */ 0x000001d0, 0xf07809e7, // brr.anynn -, r:fast_path +++/* [0x00000720] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000728] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000730] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000738] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :loop ++-/* [0x00000730] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000738] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000740] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000748] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000750] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 ++-/* [0x00000758] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000760] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000768] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000770] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000778] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000780] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000788] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000790] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000798] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000007a0] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000007a8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000007b0] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000007b8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000007c0] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x000007c8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x000007d0] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x000007d8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x000007e0] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x000007e8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x000007f0] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x000007f8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000800] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 ++-/* [0x00000808] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000810] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000818] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000820] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000828] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000830] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000838] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000840] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000848] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:loop ++-/* [0x00000850] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x00000858] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x00000860] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x00000868] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x00000870] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x00000878] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000880] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000888] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000890] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000898] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x000008a0] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x000008a8] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-/* [0x000008b0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000008b8] */ 0xfffffe58, 0xf06809e7, // brr.anyn -, r:loop ++-/* [0x000008c0] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 ++-/* [0x000008c8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000008d0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000008d8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000008e0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000008e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000008f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000740] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000748] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000750] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000758] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000760] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 +++/* [0x00000768] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000770] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000778] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000780] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000788] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000790] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000798] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x000007a0] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000007a8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000007b0] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000007b8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000007c0] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000007c8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000007d0] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x000007d8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x000007e0] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x000007e8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x000007f0] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x000007f8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000800] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000808] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000810] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x00000818] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000820] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000828] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000830] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000838] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000840] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000848] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000850] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000858] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:loop +++/* [0x00000860] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x00000868] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x00000870] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x00000878] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x00000880] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x00000888] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000890] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000898] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x000008a0] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x000008a8] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x000008b0] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x000008b8] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x000008c0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000008c8] */ 0xfffffe58, 0xf06809e7, // brr.anyn -, r:loop +++/* [0x000008d0] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x000008d8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000008e0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000008e8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000008f0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000008f8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000900] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // :fast_path ++-/* [0x000008f8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000908] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :fast_loop ++-/* [0x00000900] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000908] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000910] */ 0x95727d9b, 0x1004475f, // mov.ifz ra_y, ra_y_next ; mov rb31, r3 ++-/* [0x00000918] */ 0x95690dbf, 0x10044623, // mov.ifz ra_x2_base, ra_x2_base_next ; mov r3, rb_pitch ++-/* [0x00000920] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000928] */ 0x929de5e4, 0x100248a1, // min r2, r2, rb_frame_height_minus_1 ; mov r1, r4 ++-/* [0x00000930] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000938] */ 0xec414c87, 0x10024e20, // add t0s, ra_x_base, r2 ; v8subs r0, r0, rb20 ++-/* [0x00000940] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000948] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000950] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000958] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000960] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000968] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000970] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000978] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000980] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000988] */ 0x8d9df4ff, 0x10024823, // sub r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000990] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000998] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x000009a0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x000009a8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x000009b0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x000009b8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000009c0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x000009c8] */ 0xffffff18, 0xf06809e7, // brr.anyn -, r:fast_loop ++-/* [0x000009d0] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x000009d8] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x000009e0] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x000009e8] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x000009f0] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x000009f8] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000a00] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000a08] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000a10] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000a18] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000a20] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000a28] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000a30] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000a38] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:fast_loop ++-/* [0x00000a40] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 ++-/* [0x00000a48] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000a50] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000a58] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000a60] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000a68] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a70] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000910] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000918] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000920] */ 0x95727d9b, 0x1004475f, // mov.ifz ra_y, ra_y_next ; mov rb31, r3 +++/* [0x00000928] */ 0x95690dbf, 0x10044623, // mov.ifz ra_x2_base, ra_x2_base_next ; mov r3, rb_pitch +++/* [0x00000930] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000938] */ 0x929de5e4, 0x100248a1, // min r2, r2, rb_frame_height_minus_1 ; mov r1, r4 +++/* [0x00000940] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000948] */ 0xec414c87, 0x10024e20, // add t0s, ra_x_base, r2 ; v8subs r0, r0, rb20 +++/* [0x00000950] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000958] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000960] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000968] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000970] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000978] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000980] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000988] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000990] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000998] */ 0x8d9df4ff, 0x10024823, // sub r0, r2, r3 ; mov r3, rb31 +++/* [0x000009a0] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x000009a8] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x000009b0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x000009b8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x000009c0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x000009c8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000009d0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x000009d8] */ 0xffffff18, 0xf06809e7, // brr.anyn -, r:fast_loop +++/* [0x000009e0] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x000009e8] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x000009f0] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x000009f8] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x00000a00] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x00000a08] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000a10] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000a18] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000a20] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000a28] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000a30] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000a38] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x00000a40] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000a48] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:fast_loop +++/* [0x00000a50] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x00000a58] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000a60] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000a68] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000a70] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000a78] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a80] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_b ++-/* [0x00000a78] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000a80] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000a88] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next ++-/* [0x00000a90] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000a98] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 ++-/* [0x00000aa0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000aa8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif ++-/* [0x00000ab0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000ab8] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 ++-/* [0x00000ac0] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 ++-/* [0x00000ac8] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 ++-/* [0x00000ad0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000ad8] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 ++-/* [0x00000ae0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000ae8] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 ++-/* [0x00000af0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000af8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000b00] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000b08] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000b10] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000b18] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000b20] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000b28] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000b30] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000b38] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000b40] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000b48] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000b50] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000b58] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000b60] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000b68] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000b70] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 ++-/* [0x00000b78] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000b80] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000b88] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000b90] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000b98] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000ba0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ba8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bb0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bb8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000bc0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bc8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bd0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bd8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000be0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000be8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bf0] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bf8] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000c00] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000c08] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000c10] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000a88] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000a90] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000a98] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next +++/* [0x00000aa0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000aa8] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 +++/* [0x00000ab0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000ab8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif +++/* [0x00000ac0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000ac8] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 +++/* [0x00000ad0] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 +++/* [0x00000ad8] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 +++/* [0x00000ae0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000ae8] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 +++/* [0x00000af0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000af8] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 +++/* [0x00000b00] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000b08] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000b10] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000b18] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000b20] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000b28] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000b30] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000b38] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000b40] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000b48] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000b50] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000b58] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000b60] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000b68] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000b70] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000b78] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000b80] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 +++/* [0x00000b88] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000b90] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000b98] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ba0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ba8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000bb0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bb8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bc0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bc8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000bd0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bd8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000be0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000be8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000bf0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bf8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000c00] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000c08] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000c10] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000c18] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000c20] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :bloop ++-/* [0x00000c18] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000c20] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000c28] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000c30] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000c38] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 ++-/* [0x00000c40] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000c48] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000c50] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000c58] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000c60] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000c68] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000c70] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000c78] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000c80] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000c88] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000c90] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000c98] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000ca0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000ca8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000cb0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000cb8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000cc0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000cc8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000cd0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000cd8] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000ce0] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000ce8] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 ++-/* [0x00000cf0] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000cf8] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000d00] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000d08] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000d10] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000d18] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000d20] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000d28] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000d30] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:bloop ++-/* [0x00000d38] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x00000d40] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x00000d48] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x00000d50] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x00000d58] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x00000d60] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000d68] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000d70] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000d78] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000d80] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000d88] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000d90] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000d98] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000da0] */ 0x8fc8f3f6, 0xd0020867, // asr r1, r1, 15 ; mov -, vr_wait ++-/* [0x00000da8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000db0] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 ++-/* [0x00000db8] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:bloop ++-/* [0x00000dc0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x00000dc8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x00000dd0] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 ++-/* [0x00000dd8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000de0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000de8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000df0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000c28] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000c30] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000c38] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000c40] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000c48] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 +++/* [0x00000c50] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000c58] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000c60] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000c68] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000c70] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000c78] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000c80] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000c88] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000c90] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000c98] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000ca0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000ca8] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000cb0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000cb8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000cc0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000cc8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000cd0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000cd8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000ce0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000ce8] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000cf0] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000cf8] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x00000d00] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000d08] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000d10] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000d18] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000d20] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000d28] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000d30] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000d38] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000d40] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:bloop +++/* [0x00000d48] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x00000d50] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x00000d58] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x00000d60] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x00000d68] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x00000d70] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000d78] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000d80] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000d88] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000d90] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000d98] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000da0] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x00000da8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000db0] */ 0x8fc8f3f6, 0xd0020867, // asr r1, r1, 15 ; mov -, vr_wait +++/* [0x00000db8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000dc0] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 +++/* [0x00000dc8] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:bloop +++/* [0x00000dd0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000dd8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x00000de0] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 +++/* [0x00000de8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000df0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000df8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000e00] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_honly ++-/* [0x00000df8] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000e00] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000e08] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next ++-/* [0x00000e10] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000e18] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 ++-/* [0x00000e20] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000e28] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif ++-/* [0x00000e30] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000e38] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 ++-/* [0x00000e40] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 ++-/* [0x00000e48] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 ++-/* [0x00000e50] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000e58] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 ++-/* [0x00000e60] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000e68] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 ++-/* [0x00000e70] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000e78] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000e80] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000e88] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000e90] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000e98] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000ea0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000ea8] */ 0x0c9de1c0, 0xd0021467, // add rb17, r0, -2 ++-/* [0x00000eb0] */ 0x919c71c0, 0xd0024812, // shl r0, r0, 7 ; mov rb18,r0 ++-/* [0x00000eb8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000ec0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000ec8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000ed0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000ed8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ee0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ee8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ef0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000ef8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f00] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f08] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f10] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000f18] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000f20] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000f28] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000f30] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000e08] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000e10] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000e18] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next +++/* [0x00000e20] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000e28] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 +++/* [0x00000e30] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000e38] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif +++/* [0x00000e40] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000e48] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 +++/* [0x00000e50] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 +++/* [0x00000e58] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 +++/* [0x00000e60] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000e68] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 +++/* [0x00000e70] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000e78] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 +++/* [0x00000e80] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000e88] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000e90] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000e98] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000ea0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000ea8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000eb0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000eb8] */ 0x0c9de1c0, 0xd0021467, // add rb17, r0, -2 +++/* [0x00000ec0] */ 0x919c71c0, 0xd0024812, // shl r0, r0, 7 ; mov rb18,r0 +++/* [0x00000ec8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000ed0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000ed8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000ee0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000ee8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ef0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ef8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f00] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000f08] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f10] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f18] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f20] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000f28] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000f30] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000f38] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000f40] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :loop_honly ++-/* [0x00000f38] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000f40] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000f48] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000f50] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000f58] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 ++-/* [0x00000f60] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000f68] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000f70] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000f78] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000f80] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000f88] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000f90] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000f98] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000fa0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000fa8] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000fb0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000fb8] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000fc0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000fc8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000fd0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000fd8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000fe0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000fe8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000ff0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000ff8] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00001000] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00001008] */ 0x8d9df4ff, 0x10024823, // sub r0, r2, r3 ; mov r3, rb31 ++-/* [0x00001010] */ 0x8d5927f6, 0x100269e1, // sub.setf -, r3, rb18 ; mov r1, ra22 ++-/* [0x00001018] */ 0x559f2fc1, 0x100049e0, // mov -, vw_wait ; mul24 r0, r0, r1 ++-/* [0x00001020] */ 0xfffffef8, 0xf06809e7, // brr.anyn -, r:loop_honly ++-/* [0x00001028] */ 0x0f9cf1c0, 0xd0020827, // asr r0, r0, 15 ++-/* [0x00001030] */ 0x129d61c0, 0x10020827, // min r0, r0, rb22 ++-/* [0x00001038] */ 0x139c01c0, 0xd0020c27, // max vpm, r0, 0 ++-/* [0x00001040] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001048] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00001050] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00001058] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000f48] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000f50] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000f58] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000f60] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000f68] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 +++/* [0x00000f70] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000f78] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000f80] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000f88] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000f90] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000f98] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000fa0] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000fa8] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000fb0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000fb8] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000fc0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000fc8] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000fd0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000fd8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000fe0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000fe8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000ff0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000ff8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00001000] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00001008] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00001010] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00001018] */ 0x8d9df4ff, 0x10024823, // sub r0, r2, r3 ; mov r3, rb31 +++/* [0x00001020] */ 0x8d5927f6, 0x100269e1, // sub.setf -, r3, rb18 ; mov r1, ra22 +++/* [0x00001028] */ 0x559f2fc1, 0x100049e0, // mov -, vw_wait ; mul24 r0, r0, r1 +++/* [0x00001030] */ 0xfffffef8, 0xf06809e7, // brr.anyn -, r:loop_honly +++/* [0x00001038] */ 0x0f9cf1c0, 0xd0020827, // asr r0, r0, 15 +++/* [0x00001040] */ 0x129d61c0, 0x10020827, // min r0, r0, rb22 +++/* [0x00001048] */ 0x139c01c0, 0xd0020c27, // max vpm, r0, 0 +++/* [0x00001050] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001058] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00001060] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001068] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00001060] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00001068] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00001070] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001078] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001070] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00001078] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++ /* [0x00001080] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00001088] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001090] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00001098] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000010a0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00001090] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001098] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010a0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000010a8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000010b0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_exit1 ++-/* [0x000010a8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000010b0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000010b8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010b8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++ /* [0x000010c0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x000010c8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000010d0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000010d8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x000010e0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000010d0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010d8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010e0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000010e8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000010f0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit ++-/* [0x000010e8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000010f0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000010f8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010f8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++ /* [0x00001100] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00001108] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001110] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001118] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001110] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001118] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00001120] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001128] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001130] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++@@ -592,225 +592,227 @@ unsigned int rpi_shader[] = { ++ /* [0x00001150] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001158] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001160] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001168] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00001170] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00001178] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00001168] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001170] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001178] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00001180] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00001188] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit4 ++-/* [0x00001180] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00001188] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001190] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001190] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++ /* [0x00001198] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x000011a0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000011a8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000011b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000011a8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000011b0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x000011b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000011c0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000011c8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x000011d0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000011c0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000011c8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000011d0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000011d8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000011e0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x000011d8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000011e0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000011e8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000011e8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++ /* [0x000011f0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x000011f8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001200] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001208] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001200] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001208] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00001210] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001218] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001220] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001228] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001230] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001238] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00001240] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00001248] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00001238] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001240] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001248] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00001250] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00001258] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_setup_uv ++-/* [0x00001250] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00001258] */ 0x0c9a0f80, 0x10020427, // add ra_x_base, unif, elem_num ++-/* [0x00001260] */ 0x15827d80, 0x10020767, // mov ra_y, unif ++-/* [0x00001268] */ 0x15827d80, 0x10020627, // mov ra_x2_base, unif ++-/* [0x00001270] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00001278] */ 0x0d620f80, 0x10020667, // sub ra_u2v_ref_offset, unif, ra_x2_base ++-/* [0x00001280] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 ++-/* [0x00001288] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 ++-/* [0x00001290] */ 0x15827d80, 0x10021427, // mov rb16, unif ++-/* [0x00001298] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000012a0] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) ++-/* [0x000012a8] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 ++-/* [0x000012b0] */ 0x00000001, 0xe0020527, // mov ra20, 1 ++-/* [0x000012b8] */ 0x00000040, 0xe0020567, // mov ra21, 64 ++-/* [0x000012c0] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++-/* [0x000012c8] */ 0x00000008, 0xe00205e7, // mov ra23, 8 ++-/* [0x000012d0] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++-/* [0x000012d8] */ 0x00000040, 0xe0021567, // mov rb21, 64 ++-/* [0x000012e0] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 ++-/* [0x000012e8] */ 0x00000018, 0xe00215e7, // mov rb23, 24 ++-/* [0x000012f0] */ 0x00000000, 0xe0020227, // mov ra8, 0 ++-/* [0x000012f8] */ 0x00000000, 0xe0020267, // mov ra9, 0 ++-/* [0x00001300] */ 0x00000000, 0xe00202a7, // mov ra10, 0 ++-/* [0x00001308] */ 0x00000000, 0xe00202e7, // mov ra11, 0 ++-/* [0x00001310] */ 0x00000000, 0xe0020327, // mov ra12, 0 ++-/* [0x00001318] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++-/* [0x00001320] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++-/* [0x00001328] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x00001330] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00001338] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00001340] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00001348] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00001350] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00001358] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00001360] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00001368] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00001370] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++-/* [0x00001378] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++-/* [0x00001380] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00001388] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00001390] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00001398] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x000013a0] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x000013a8] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x000013b0] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x000013b8] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x000013c0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000013c8] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x000013d0] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x000013d8] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base ++-/* [0x000013e0] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y ++-/* [0x000013e8] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base ++-/* [0x000013f0] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++-/* [0x000013f8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x00001400] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00001408] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00001410] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 ++-/* [0x00001418] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00001420] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-/* [0x00001428] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 ++-/* [0x00001430] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x00001438] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001440] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001260] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00001268] */ 0x0c9a0f80, 0x10020427, // add ra_x_base, unif, elem_num +++/* [0x00001270] */ 0x15827d80, 0x10020767, // mov ra_y, unif +++/* [0x00001278] */ 0x15827d80, 0x10020627, // mov ra_x2_base, unif +++/* [0x00001280] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00001288] */ 0x0d620f80, 0x10020667, // sub ra_u2v_ref_offset, unif, ra_x2_base +++/* [0x00001290] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 +++/* [0x00001298] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 +++/* [0x000012a0] */ 0x15827d80, 0x10021427, // mov rb16, unif +++/* [0x000012a8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000012b0] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) +++/* [0x000012b8] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 +++/* [0x000012c0] */ 0x00000001, 0xe0020527, // mov ra20, 1 +++/* [0x000012c8] */ 0x00000020, 0xe0020567, // mov ra21, 32 +++/* [0x000012d0] */ 0x00000100, 0xe00205a7, // mov ra22, 256 +++/* [0x000012d8] */ 0x00000008, 0xe00205e7, // mov ra23, 8 +++/* [0x000012e0] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x000012e8] */ 0x00000040, 0xe0021567, // mov rb21, 64 +++/* [0x000012f0] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x000012f8] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x00001300] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x00001308] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x00001310] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x00001318] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x00001320] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x00001328] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x00001330] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x00001338] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x00001340] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00001348] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00001350] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00001358] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00001360] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00001368] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00001370] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00001378] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00001380] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00001388] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00001390] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00001398] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x000013a0] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x000013a8] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x000013b0] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x000013b8] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x000013c0] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x000013c8] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x000013d0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000013d8] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x000013e0] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x000013e8] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base +++/* [0x000013f0] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x000013f8] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base +++/* [0x00001400] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset +++/* [0x00001408] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x00001410] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00001418] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00001420] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 +++/* [0x00001428] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00001430] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++/* [0x00001438] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 +++/* [0x00001440] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++ /* [0x00001448] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001450] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00001458] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00001460] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00001468] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001470] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00001478] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base ++-/* [0x00001480] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++/* [0x00001450] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001458] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001460] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00001468] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00001470] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00001478] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001480] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00001488] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base +++/* [0x00001490] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base ++ // ::mc_filter_uv_b ++-/* [0x00001488] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00001490] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00001498] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000014a0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000014a8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000014b0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000014b8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000014c0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000014c8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000014d0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000014d8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000014e0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000014e8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000014f0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000014f8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00001500] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00001508] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00001510] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00001518] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00001520] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00001528] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00001530] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00001538] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00001540] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00001548] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00001550] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00001558] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 ++-/* [0x00001560] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00001568] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001570] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001578] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001580] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001588] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00001590] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001598] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015a0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015a8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000015b0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015b8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015c0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015c8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000015d0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015d8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015e0] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015e8] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x000015f0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000015f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00001600] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00001498] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000014a0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000014a8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000014b0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000014b8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000014c0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000014c8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000014d0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000014d8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000014e0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000014e8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000014f0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000014f8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00001500] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001508] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00001510] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00001518] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00001520] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00001528] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00001530] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00001538] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00001540] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00001548] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00001550] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00001558] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00001560] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00001568] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 +++/* [0x00001570] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00001578] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001580] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001588] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001590] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001598] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000015a0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015a8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015b0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015b8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000015c0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015c8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015d0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015d8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000015e0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015e8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015f0] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015f8] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00001600] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00001608] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00001610] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x00001608] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00001610] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00001618] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00001620] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00001628] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00001630] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00001638] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00001640] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00001648] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00001650] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00001658] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00001660] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00001668] */ 0x40038031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 ++-/* [0x00001670] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00001678] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00001680] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00001688] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00001690] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00001698] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000016a0] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x000016a8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x000016b0] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x000016b8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x000016c0] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x000016c8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x000016d0] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x000016d8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x000016e0] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 ++-/* [0x000016e8] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x000016f0] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x000016f8] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00001700] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00001708] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00001710] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00001718] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00001720] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00001728] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00001730] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x00001738] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x00001740] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x00001748] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x00001750] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x00001758] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00001760] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00001768] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00001770] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00001778] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00001780] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00001788] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-/* [0x00001790] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00001798] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 ++-/* [0x000017a0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000017a8] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 ++-/* [0x000017b0] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000017b8] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x000017c0] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x000017c8] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 ++-/* [0x000017d0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000017d8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000017e0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000017e8] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000017f0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000017f8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001800] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00001808] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00001810] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00001618] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00001620] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00001628] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00001630] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00001638] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00001640] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00001648] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00001650] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00001658] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00001660] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00001668] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00001670] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00001678] */ 0x40038031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 +++/* [0x00001680] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00001688] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00001690] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00001698] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000016a0] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000016a8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000016b0] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x000016b8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x000016c0] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x000016c8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x000016d0] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x000016d8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x000016e0] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x000016e8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x000016f0] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x000016f8] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00001700] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00001708] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00001710] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00001718] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00001720] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00001728] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00001730] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00001738] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00001740] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x00001748] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x00001750] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x00001758] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x00001760] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x00001768] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00001770] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00001778] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00001780] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00001788] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00001790] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00001798] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x000017a0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000017a8] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x000017b0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000017b8] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 +++/* [0x000017c0] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000017c8] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x000017d0] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x000017d8] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 +++/* [0x000017e0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000017e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000017f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000017f8] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00001800] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00001808] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001810] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00001818] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001820] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index db971f4..3464cdb 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -5,16 +5,16 @@ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup (rpi_shader + 0) ++ #define mc_filter_uv (rpi_shader + 146) ++-#define mc_filter (rpi_shader + 360) ++-#define mc_filter_b (rpi_shader + 670) ++-#define mc_filter_honly (rpi_shader + 894) ++-#define mc_exit (rpi_shader + 1048) ++-#define mc_exit1 (rpi_shader + 1066) ++-#define mc_interrupt_exit (rpi_shader + 1082) ++-#define mc_interrupt_exit4 (rpi_shader + 1120) ++-#define mc_interrupt_exit8 (rpi_shader + 1142) ++-#define mc_setup_uv (rpi_shader + 1172) ++-#define mc_filter_uv_b (rpi_shader + 1314) ++-#define mc_end (rpi_shader + 1542) +++#define mc_filter (rpi_shader + 364) +++#define mc_filter_b (rpi_shader + 674) +++#define mc_filter_honly (rpi_shader + 898) +++#define mc_exit (rpi_shader + 1052) +++#define mc_exit1 (rpi_shader + 1070) +++#define mc_interrupt_exit (rpi_shader + 1086) +++#define mc_interrupt_exit4 (rpi_shader + 1124) +++#define mc_interrupt_exit8 (rpi_shader + 1146) +++#define mc_setup_uv (rpi_shader + 1176) +++#define mc_filter_uv_b (rpi_shader + 1318) +++#define mc_end (rpi_shader + 1546) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 02fdcb2..4809e1d 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -21,7 +21,7 @@ ++ # rb19 next ra16 ++ # ++ # ra20 1 ++-# ra21 64 +++# ra21 32 ++ # ra22 256 ++ # ra23 8 ++ # ++@@ -97,7 +97,7 @@ add rb24, r1, r0 ++ # load constants ++ ++ mov ra20, 1 ++-mov ra21, 64 +++mov ra21, 32 ++ mov ra22, 256 ++ mov ra23, 8 ++ ++@@ -270,7 +270,7 @@ add t0s, ra_x2_base, r2 ++ ++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ ++-mov r2, rb21 ; mul24 r2, r0, ra0 +++nop ; mul24 r2, r0, ra0 ++ nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++ nop ; mul24 r3, ra1 << 1, r0 << 1 ++ nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++@@ -301,9 +301,9 @@ sub.setf -, r3, 8 ; mov r1, ra22 ++ ++ # apply horizontal filter ++ brr.anyn -, r:uvloop ++-max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++-asr r0, r0, 14 ; mov r1, ra21 ++-min.setf ra15, r0, rb22 +++mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll +++asr ra15, r0, 8 ; nop +++nop ; nop # Delay slot 3 (TODO move more of the context scroll into here) ++ ++ # apply vertical filter and write to VPM ++ ++@@ -315,12 +315,14 @@ add r1, r1, r0 ; mul24 r0, ra10, rb10 ++ add r1, r1, r0 ; mul24 r0, ra9, rb9 ++ add r1, r1, r0 ; mul24 r0, ra8, rb8 ++ add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-add.ifnn r1, r1, r0 ; mov -, vw_wait +++add r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-brr.anyn -, r:uvloop ++ asr r1, r1, 14 ++-min r1, r1, rb22 ++-max vpm, r1, 0 +++add r1, r1, ra21 +++brr.anyn -, r:uvloop +++asr r1, r1, 6 # Delay 1 +++min r1, r1, rb22 # Delay 2 +++max vpm, r1, 0 # Delay 3 ++ ++ # DMA out for U ++ ++@@ -1161,7 +1163,7 @@ add rb24, r1, r0 ++ # load constants ++ ++ mov ra20, 1 ++-mov ra21, 64 +++mov ra21, 32 ++ mov ra22, 256 ++ mov ra23, 8 ++ ++-- ++2.7.4 ++ ++ ++From b7321192751956ed7deceeb3dabe22ccedb8e08d Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 13 May 2015 14:37:32 +0100 ++Subject: [PATCH 19/68] Removed unused luma functions ++ ++--- ++ libavcodec/hevc.c | 4 +- ++ libavcodec/rpi_qpu.c | 32 +- ++ libavcodec/rpi_shader.c | 1097 +++++++++++++------------------------------- ++ libavcodec/rpi_shader.h | 19 +- ++ libavcodec/rpi_shader.qasm | 970 +++------------------------------------ ++ 5 files changed, 396 insertions(+), 1726 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index d6d78ee..31b8b2f 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2731,8 +2731,8 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ return; ++ for(k=0;k<8;k++) { ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command ++- s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined ++- s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+4] = qpu_get_fn(QPU_MC_SETUP); // Also need a dummy for V +++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined +++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+4] = qpu_get_fn(QPU_MC_SETUP_UV); // Also need a dummy for V ++ assert(s->u_mvs[k] - s->mvs_base[k] < UV_COMMANDS_PER_QPU); ++ } ++ ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 4d9eda8..4e90cc1 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -172,7 +172,7 @@ static int gpu_init(volatile struct GPU **gpu) { ++ ++ // Now copy over the QPU code into GPU memory ++ { ++- int num_bytes = qpu_get_fn(QPU_MC_END) - qpu_get_fn(QPU_MC_SETUP); +++ int num_bytes = qpu_get_fn(QPU_MC_END) - qpu_get_fn(QPU_MC_SETUP_UV); ++ assert(num_bytes<=QPU_CODE_SIZE*sizeof(unsigned int)); ++ memcpy((void*)ptr->qpu_code, rpi_shader, num_bytes); ++ } ++@@ -612,24 +612,24 @@ unsigned int qpu_get_fn(int num) { ++ gpu_unlock(); ++ } ++ switch(num) { ++- case QPU_MC_SETUP: ++- fn = mc_setup; ++- break; ++- case QPU_MC_FILTER: ++- fn = mc_filter; ++- break; +++ //case QPU_MC_SETUP: +++ // fn = mc_setup; +++ // break; +++ //case QPU_MC_FILTER: +++ // fn = mc_filter; +++ // break; ++ case QPU_MC_EXIT: ++ fn = mc_exit; ++ break; ++- case QPU_MC_INTERRUPT_EXIT: ++- fn = mc_interrupt_exit; ++- break; ++- case QPU_MC_FILTER_B: ++- fn = mc_filter_b; ++- break; ++- case QPU_MC_FILTER_HONLY: ++- fn = mc_filter_honly; ++- break; +++ //case QPU_MC_INTERRUPT_EXIT: +++ // fn = mc_interrupt_exit; +++ // break; +++ //case QPU_MC_FILTER_B: +++ // fn = mc_filter_b; +++ // break; +++ //case QPU_MC_FILTER_HONLY: +++ // fn = mc_filter_honly; +++ // break; ++ case QPU_MC_SETUP_UV: ++ fn = mc_setup_uv; ++ break; ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 831633b..170e8ac 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -21,798 +21,331 @@ __declspec(align(8)) ++ __attribute__((aligned(8))) ++ #endif ++ unsigned int rpi_shader[] = { ++-// ::mc_setup +++// ::mc_setup_uv ++ /* [0x00000000] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++ /* [0x00000008] */ 0x0c9a0f80, 0x10020427, // add ra_x_base, unif, elem_num ++ /* [0x00000010] */ 0x15827d80, 0x10020767, // mov ra_y, unif ++ /* [0x00000018] */ 0x15827d80, 0x10020627, // mov ra_x2_base, unif ++-/* [0x00000020] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 ++-/* [0x00000028] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 ++-/* [0x00000030] */ 0x15827d80, 0x10021427, // mov rb16, unif ++-/* [0x00000038] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000040] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) ++-/* [0x00000048] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 ++-/* [0x00000050] */ 0x00000001, 0xe0020527, // mov ra20, 1 ++-/* [0x00000058] */ 0x00000020, 0xe0020567, // mov ra21, 32 ++-/* [0x00000060] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++-/* [0x00000068] */ 0x00000008, 0xe00205e7, // mov ra23, 8 ++-/* [0x00000070] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++-/* [0x00000078] */ 0x00000040, 0xe0021567, // mov rb21, 64 ++-/* [0x00000080] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 ++-/* [0x00000088] */ 0x00000018, 0xe00215e7, // mov rb23, 24 ++-/* [0x00000090] */ 0x00000000, 0xe0020227, // mov ra8, 0 ++-/* [0x00000098] */ 0x00000000, 0xe0020267, // mov ra9, 0 ++-/* [0x000000a0] */ 0x00000000, 0xe00202a7, // mov ra10, 0 ++-/* [0x000000a8] */ 0x00000000, 0xe00202e7, // mov ra11, 0 ++-/* [0x000000b0] */ 0x00000000, 0xe0020327, // mov ra12, 0 ++-/* [0x000000b8] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++-/* [0x000000c0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++-/* [0x000000c8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x000000d0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x000000d8] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x000000e0] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x000000e8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x000000f0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x000000f8] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000100] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000108] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000110] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++-/* [0x00000118] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++-/* [0x00000120] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00000128] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000130] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00000138] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000140] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000148] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000150] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000158] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000160] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000168] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x00000170] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x00000178] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base ++-/* [0x00000180] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 ++-/* [0x00000188] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y ++-/* [0x00000190] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base ++-/* [0x00000198] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000001a0] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 +++/* [0x00000020] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000028] */ 0x0d620f80, 0x10020667, // sub ra_u2v_ref_offset, unif, ra_x2_base +++/* [0x00000030] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 +++/* [0x00000038] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 +++/* [0x00000040] */ 0x15827d80, 0x10021427, // mov rb16, unif +++/* [0x00000048] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000050] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) +++/* [0x00000058] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 +++/* [0x00000060] */ 0x00000001, 0xe0020527, // mov ra20, 1 +++/* [0x00000068] */ 0x00000020, 0xe0020567, // mov ra21, 32 +++/* [0x00000070] */ 0x00000100, 0xe00205a7, // mov ra22, 256 +++/* [0x00000078] */ 0x00000008, 0xe00205e7, // mov ra23, 8 +++/* [0x00000080] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x00000088] */ 0x00000040, 0xe0021567, // mov rb21, 64 +++/* [0x00000090] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x00000098] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x000000a0] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x000000a8] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x000000b0] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x000000b8] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x000000c0] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x000000c8] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x000000d0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x000000d8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x000000e0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x000000e8] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x000000f0] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x000000f8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000100] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000108] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000110] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000118] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000120] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00000128] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00000130] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00000138] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000140] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00000148] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000150] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000158] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000160] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000168] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000170] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000178] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000180] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000188] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base +++/* [0x00000190] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x00000198] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base +++/* [0x000001a0] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++ /* [0x000001a8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x000001b0] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 ++-/* [0x000001b8] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 ++-/* [0x000001c0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x000001b0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000001b8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000001c0] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 ++ /* [0x000001c8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x000001d0] */ 0x4c9d00cf, 0x10024821, // add r0, r0, r3; mul24 r1, r1, rb_pitch ++-/* [0x000001d8] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 ++-/* [0x000001e0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000001e8] */ 0x949dc5c0, 0xd0025890, // and r2, r2, ~3; mov ra_x_base, r0 ++-/* [0x000001f0] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 ++-/* [0x000001f8] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x00000200] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000208] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000210] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000218] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000220] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000228] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000230] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000238] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base ++-/* [0x00000240] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++/* [0x000001d0] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++/* [0x000001d8] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 +++/* [0x000001e0] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 +++/* [0x000001e8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000001f0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000001f8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000200] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000208] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000210] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000218] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000220] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000228] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base +++/* [0x00000230] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base ++ // ::mc_filter_uv ++-/* [0x00000248] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000250] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000258] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000260] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000268] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000270] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000278] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000280] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000288] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000290] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000298] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000002a0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000002a8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000002b0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000002b8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000002c0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000002c8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002d0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x000002d8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x000002e0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000002e8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000002f0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000002f8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000300] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000308] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000310] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000318] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000320] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000328] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000330] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000340] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000348] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000350] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000358] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000360] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000368] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000370] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000378] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000380] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000388] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000390] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000398] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000003a0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000238] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000240] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000248] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000250] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000258] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000260] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000268] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000270] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000278] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000280] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000288] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000290] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000298] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000002a0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002a8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000002b0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000002b8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000002c0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x000002c8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x000002d0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000002d8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000002e0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000002e8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000002f0] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x000002f8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000300] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000308] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000310] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000318] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000320] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000328] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000330] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000340] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000348] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000350] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000358] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000360] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000368] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000370] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000378] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000380] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000388] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000390] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x000003a8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000003b0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000003b8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000003c0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003c8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003d0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003d8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003e0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003e8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000003f0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000003f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000400] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000408] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000410] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000418] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000420] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000428] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000430] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000438] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000440] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000448] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000450] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000458] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000460] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000468] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000470] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000478] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000480] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000488] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000490] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000498] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x000004a0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x000004a8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x000004b0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x000004b8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000004c0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x000004c8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004d0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x000004d8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x000004e0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000004e8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x000004f0] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x000004f8] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000500] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000508] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000510] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000518] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000520] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000528] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000530] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000538] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000540] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x00000548] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000550] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x00000558] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000560] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000568] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000570] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000578] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000580] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000588] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000590] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000598] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x000005a0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000005a8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-// ::mc_filter ++-/* [0x000005b0] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000005b8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000005c0] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next ++-/* [0x000005c8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000005d0] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 ++-/* [0x000005d8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000005e0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif ++-/* [0x000005e8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000005f0] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 ++-/* [0x000005f8] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 ++-/* [0x00000600] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 ++-/* [0x00000608] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000610] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 ++-/* [0x00000618] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000620] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 ++-/* [0x00000628] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000630] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000638] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000640] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000648] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000650] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000658] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000660] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000668] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000670] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000678] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000680] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000688] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000690] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000698] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000006a0] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006a8] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006b0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006b8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000006c0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006c8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006d0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006d8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000006e0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006e8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006f0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006f8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000700] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000708] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000710] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000718] */ 0x000001d0, 0xf07809e7, // brr.anynn -, r:fast_path ++-/* [0x00000720] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000728] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000730] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000738] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++-// :loop ++-/* [0x00000740] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000748] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000750] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000758] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000760] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 ++-/* [0x00000768] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000770] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000778] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000780] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000788] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000790] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000798] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x000007a0] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000007a8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000007b0] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000007b8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000007c0] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000007c8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000007d0] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x000007d8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x000007e0] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x000007e8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x000007f0] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x000007f8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000800] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000808] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000810] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 ++-/* [0x00000818] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000820] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000828] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000830] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000838] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000840] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000848] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000850] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000858] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:loop ++-/* [0x00000860] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x00000868] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x00000870] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x00000878] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x00000880] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x00000888] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000890] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000898] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x000008a0] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x000008a8] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x000008b0] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x000008b8] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-/* [0x000008c0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000008c8] */ 0xfffffe58, 0xf06809e7, // brr.anyn -, r:loop ++-/* [0x000008d0] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 ++-/* [0x000008d8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000008e0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000008e8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000008f0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000008f8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000900] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-// :fast_path ++-/* [0x00000908] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++-// :fast_loop ++-/* [0x00000910] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000918] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000920] */ 0x95727d9b, 0x1004475f, // mov.ifz ra_y, ra_y_next ; mov rb31, r3 ++-/* [0x00000928] */ 0x95690dbf, 0x10044623, // mov.ifz ra_x2_base, ra_x2_base_next ; mov r3, rb_pitch ++-/* [0x00000930] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000938] */ 0x929de5e4, 0x100248a1, // min r2, r2, rb_frame_height_minus_1 ; mov r1, r4 ++-/* [0x00000940] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000948] */ 0xec414c87, 0x10024e20, // add t0s, ra_x_base, r2 ; v8subs r0, r0, rb20 ++-/* [0x00000950] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000958] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000960] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000968] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000970] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000978] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000980] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000988] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000990] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000998] */ 0x8d9df4ff, 0x10024823, // sub r0, r2, r3 ; mov r3, rb31 ++-/* [0x000009a0] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x000009a8] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x000009b0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x000009b8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x000009c0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x000009c8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000009d0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x000009d8] */ 0xffffff18, 0xf06809e7, // brr.anyn -, r:fast_loop ++-/* [0x000009e0] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x000009e8] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x000009f0] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x000009f8] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x00000a00] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x00000a08] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000a10] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000a18] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000a20] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000a28] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000a30] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000a38] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000a40] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000a48] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:fast_loop ++-/* [0x00000a50] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 ++-/* [0x00000a58] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000a60] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000a68] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000a70] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000a78] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a80] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-// ::mc_filter_b ++-/* [0x00000a88] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000a90] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000a98] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next ++-/* [0x00000aa0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000aa8] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 ++-/* [0x00000ab0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000ab8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif ++-/* [0x00000ac0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000ac8] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 ++-/* [0x00000ad0] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 ++-/* [0x00000ad8] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 ++-/* [0x00000ae0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000ae8] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 ++-/* [0x00000af0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000af8] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 ++-/* [0x00000b00] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000b08] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000b10] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000b18] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000b20] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000b28] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000b30] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000b38] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000b40] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000b48] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000b50] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000b58] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000b60] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000b68] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000b70] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000b78] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000b80] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 ++-/* [0x00000b88] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000b90] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000b98] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ba0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ba8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000bb0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bb8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bc0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bc8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000bd0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bd8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000be0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000be8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000bf0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bf8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000c00] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000c08] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000c10] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000c18] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000c20] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++-// :bloop ++-/* [0x00000c28] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000c30] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000c38] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000c40] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000c48] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 ++-/* [0x00000c50] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000c58] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000c60] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000c68] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000c70] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000c78] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000c80] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000c88] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000c90] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000c98] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000ca0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000ca8] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000cb0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000cb8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000cc0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000cc8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000cd0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000cd8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000ce0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000ce8] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000cf0] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000cf8] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 ++-/* [0x00000d00] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000d08] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000d10] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000d18] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000d20] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000d28] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000d30] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000d38] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000d40] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:bloop ++-/* [0x00000d48] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x00000d50] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x00000d58] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x00000d60] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x00000d68] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x00000d70] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000d78] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000d80] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000d88] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000d90] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000d98] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000da0] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000da8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000db0] */ 0x8fc8f3f6, 0xd0020867, // asr r1, r1, 15 ; mov -, vr_wait ++-/* [0x00000db8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000dc0] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 ++-/* [0x00000dc8] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:bloop ++-/* [0x00000dd0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x00000dd8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x00000de0] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 ++-/* [0x00000de8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000df0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000df8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000e00] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-// ::mc_filter_honly ++-/* [0x00000e08] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000e10] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000e18] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next ++-/* [0x00000e20] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000e28] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 ++-/* [0x00000e30] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000e38] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif ++-/* [0x00000e40] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000e48] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 ++-/* [0x00000e50] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 ++-/* [0x00000e58] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 ++-/* [0x00000e60] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000e68] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 ++-/* [0x00000e70] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000e78] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 ++-/* [0x00000e80] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000e88] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000e90] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000e98] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000ea0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000ea8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000eb0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000eb8] */ 0x0c9de1c0, 0xd0021467, // add rb17, r0, -2 ++-/* [0x00000ec0] */ 0x919c71c0, 0xd0024812, // shl r0, r0, 7 ; mov rb18,r0 ++-/* [0x00000ec8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000ed0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000ed8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000ee0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000ee8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ef0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ef8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f00] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000f08] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f10] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f18] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f20] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000f28] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000f30] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000f38] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000f40] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++-// :loop_honly ++-/* [0x00000f48] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000f50] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000f58] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000f60] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000f68] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 ++-/* [0x00000f70] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000f78] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000f80] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000f88] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000f90] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000f98] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000fa0] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000fa8] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000fb0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000fb8] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000fc0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000fc8] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000fd0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000fd8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000fe0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000fe8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000ff0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000ff8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00001000] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00001008] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00001010] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00001018] */ 0x8d9df4ff, 0x10024823, // sub r0, r2, r3 ; mov r3, rb31 ++-/* [0x00001020] */ 0x8d5927f6, 0x100269e1, // sub.setf -, r3, rb18 ; mov r1, ra22 ++-/* [0x00001028] */ 0x559f2fc1, 0x100049e0, // mov -, vw_wait ; mul24 r0, r0, r1 ++-/* [0x00001030] */ 0xfffffef8, 0xf06809e7, // brr.anyn -, r:loop_honly ++-/* [0x00001038] */ 0x0f9cf1c0, 0xd0020827, // asr r0, r0, 15 ++-/* [0x00001040] */ 0x129d61c0, 0x10020827, // min r0, r0, rb22 ++-/* [0x00001048] */ 0x139c01c0, 0xd0020c27, // max vpm, r0, 0 ++-/* [0x00001050] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001058] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00001060] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00001068] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-// ::mc_exit ++-/* [0x00001070] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00001078] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00001080] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001088] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001090] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001098] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000010a0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000010a8] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000010b0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-// ::mc_exit1 ++-/* [0x000010b8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000010c0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000010c8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000010d0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000010d8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000010e0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000010e8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x000010f0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-// ::mc_interrupt_exit ++-/* [0x000010f8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00001100] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001108] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001110] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001118] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001120] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001128] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001130] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001138] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001140] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001148] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001150] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001158] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001160] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001168] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001170] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001178] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00001180] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00001188] */ 0x009e7000, 0x100009e7, // nop ; nop ++-// ::mc_interrupt_exit4 ++-/* [0x00001190] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00001198] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000011a0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000011a8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000011b0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000011b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000011c0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000011c8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000011d0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000011d8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x000011e0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-// ::mc_interrupt_exit8 ++-/* [0x000011e8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000011f0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000011f8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001200] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001208] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001210] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001218] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001220] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001228] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001230] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001238] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001240] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001248] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00001250] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00001258] */ 0x009e7000, 0x100009e7, // nop ; nop ++-// ::mc_setup_uv ++-/* [0x00001260] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00001268] */ 0x0c9a0f80, 0x10020427, // add ra_x_base, unif, elem_num ++-/* [0x00001270] */ 0x15827d80, 0x10020767, // mov ra_y, unif ++-/* [0x00001278] */ 0x15827d80, 0x10020627, // mov ra_x2_base, unif ++-/* [0x00001280] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00001288] */ 0x0d620f80, 0x10020667, // sub ra_u2v_ref_offset, unif, ra_x2_base ++-/* [0x00001290] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 ++-/* [0x00001298] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 ++-/* [0x000012a0] */ 0x15827d80, 0x10021427, // mov rb16, unif ++-/* [0x000012a8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000012b0] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) ++-/* [0x000012b8] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 ++-/* [0x000012c0] */ 0x00000001, 0xe0020527, // mov ra20, 1 ++-/* [0x000012c8] */ 0x00000020, 0xe0020567, // mov ra21, 32 ++-/* [0x000012d0] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++-/* [0x000012d8] */ 0x00000008, 0xe00205e7, // mov ra23, 8 ++-/* [0x000012e0] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++-/* [0x000012e8] */ 0x00000040, 0xe0021567, // mov rb21, 64 ++-/* [0x000012f0] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 ++-/* [0x000012f8] */ 0x00000018, 0xe00215e7, // mov rb23, 24 ++-/* [0x00001300] */ 0x00000000, 0xe0020227, // mov ra8, 0 ++-/* [0x00001308] */ 0x00000000, 0xe0020267, // mov ra9, 0 ++-/* [0x00001310] */ 0x00000000, 0xe00202a7, // mov ra10, 0 ++-/* [0x00001318] */ 0x00000000, 0xe00202e7, // mov ra11, 0 ++-/* [0x00001320] */ 0x00000000, 0xe0020327, // mov ra12, 0 ++-/* [0x00001328] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++-/* [0x00001330] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++-/* [0x00001338] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x00001340] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00001348] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00001350] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00001358] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00001360] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00001368] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00001370] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00001378] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00001380] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++-/* [0x00001388] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++-/* [0x00001390] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00001398] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x000013a0] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x000013a8] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x000013b0] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x000013b8] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x000013c0] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x000013c8] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x000013d0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000013d8] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x000013e0] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x000013e8] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base ++-/* [0x000013f0] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y ++-/* [0x000013f8] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base ++-/* [0x00001400] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++-/* [0x00001408] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x00001410] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00001418] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00001420] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 ++-/* [0x00001428] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00001430] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-/* [0x00001438] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 ++-/* [0x00001440] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x00001448] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001450] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001458] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001460] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00001468] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00001470] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00001478] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001480] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00001488] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base ++-/* [0x00001490] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++/* [0x00000398] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000003a0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000003a8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000003b0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003b8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003c0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003c8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003d0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003d8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000003e0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000003e8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003f0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000003f8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000400] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000408] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000410] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000418] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000420] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000428] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000430] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000438] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000440] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000448] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000450] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000458] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000460] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000468] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000470] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000478] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000480] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000488] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000490] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000498] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x000004a0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x000004a8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000004b0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x000004b8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000004c0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x000004c8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x000004d0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000004d8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x000004e0] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x000004e8] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x000004f0] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x000004f8] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000500] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000508] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000510] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000518] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000520] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000528] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000530] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x00000538] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000540] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x00000548] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000550] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000558] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000560] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000568] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000570] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000578] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000580] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000588] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000590] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000598] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b ++-/* [0x00001498] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000014a0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000014a8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000014b0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000014b8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000014c0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000014c8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000014d0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000014d8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000014e0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000014e8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000014f0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000014f8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00001500] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001508] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00001510] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00001518] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00001520] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00001528] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00001530] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00001538] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00001540] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00001548] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00001550] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00001558] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00001560] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00001568] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 ++-/* [0x00001570] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00001578] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001580] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001588] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001590] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001598] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000015a0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015a8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015b0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015b8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000015c0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015c8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015d0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015d8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000015e0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015e8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015f0] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015f8] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00001600] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00001608] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00001610] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000005a0] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000005a8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000005b0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000005b8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000005c0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000005c8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000005d0] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000005d8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000005e0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000005e8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000005f0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000005f8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000600] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000608] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000610] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000618] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000620] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000628] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000630] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000638] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000640] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000648] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000650] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000658] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000660] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000668] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000670] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 +++/* [0x00000678] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000680] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000688] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000690] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000698] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006a0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000006a8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006b0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006b8] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006c0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000006c8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006d0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006d8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006e0] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000006e8] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006f0] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006f8] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000700] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000708] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000710] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000718] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x00001618] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00001620] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00001628] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00001630] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00001638] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00001640] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00001648] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00001650] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00001658] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00001660] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00001668] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00001670] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00001678] */ 0x40038031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 ++-/* [0x00001680] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00001688] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00001690] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00001698] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000016a0] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000016a8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000016b0] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x000016b8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x000016c0] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x000016c8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x000016d0] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x000016d8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x000016e0] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x000016e8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x000016f0] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 ++-/* [0x000016f8] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00001700] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00001708] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00001710] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00001718] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00001720] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00001728] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00001730] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00001738] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00001740] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x00001748] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x00001750] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x00001758] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x00001760] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x00001768] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00001770] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00001778] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00001780] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00001788] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00001790] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00001798] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-/* [0x000017a0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000017a8] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 ++-/* [0x000017b0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000017b8] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 ++-/* [0x000017c0] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000017c8] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x000017d0] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x000017d8] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 ++-/* [0x000017e0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000017e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000017f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000017f8] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00001800] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00001808] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001810] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00001818] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00001820] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000720] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000728] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000730] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000738] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000740] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000748] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000750] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000758] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000760] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000768] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000770] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000778] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000780] */ 0x40038031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 +++/* [0x00000788] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000790] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000798] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000007a0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000007a8] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000007b0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000007b8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x000007c0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x000007c8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x000007d0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x000007d8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x000007e0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x000007e8] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x000007f0] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x000007f8] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x00000800] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000808] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000810] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000818] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000820] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000828] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000830] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000838] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000840] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000848] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x00000850] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x00000858] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x00000860] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x00000868] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x00000870] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000878] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000880] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000888] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000890] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000898] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x000008a0] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x000008a8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000008b0] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x000008b8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000008c0] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 +++/* [0x000008c8] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000008d0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x000008d8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x000008e0] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 +++/* [0x000008e8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000008f0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000008f8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000900] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000908] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000910] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000918] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000920] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000928] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_exit +++/* [0x00000930] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000938] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000940] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000948] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000950] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000958] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000960] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000968] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000970] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_interrupt_exit8 +++/* [0x00000978] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000980] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000988] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000990] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000998] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000009a0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009a8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009c0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009c8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009d0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009d8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000009e0] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000009e8] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 3464cdb..9de4535 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -3,18 +3,11 @@ ++ ++ extern unsigned int rpi_shader[]; ++ ++-#define mc_setup (rpi_shader + 0) ++-#define mc_filter_uv (rpi_shader + 146) ++-#define mc_filter (rpi_shader + 364) ++-#define mc_filter_b (rpi_shader + 674) ++-#define mc_filter_honly (rpi_shader + 898) ++-#define mc_exit (rpi_shader + 1052) ++-#define mc_exit1 (rpi_shader + 1070) ++-#define mc_interrupt_exit (rpi_shader + 1086) ++-#define mc_interrupt_exit4 (rpi_shader + 1124) ++-#define mc_interrupt_exit8 (rpi_shader + 1146) ++-#define mc_setup_uv (rpi_shader + 1176) ++-#define mc_filter_uv_b (rpi_shader + 1318) ++-#define mc_end (rpi_shader + 1546) +++#define mc_setup_uv (rpi_shader + 0) +++#define mc_filter_uv (rpi_shader + 142) +++#define mc_filter_uv_b (rpi_shader + 360) +++#define mc_exit (rpi_shader + 588) +++#define mc_interrupt_exit8 (rpi_shader + 606) +++#define mc_end (rpi_shader + 636) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 4809e1d..cd7346d 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -71,8 +71,10 @@ ++ ++ .set rb_const_64, rb21 ++ ++-# mc_setup(next_kernel, x, y, ref_base, frame_width, frame_height, pitch, dst_pitch, pad0, pad1) ++-::mc_setup +++ +++################################################################################ +++# mc_setup_uv(next_kernel, x, y, ref_u_base, ref_v_base, frame_width, frame_height, pitch, dst_pitch, pad0, pad1, pad2) +++::mc_setup_uv ++ ++ # Read starting kernel ++ mov ra31, unif ++@@ -80,7 +82,9 @@ mov ra31, unif ++ # Load first request location ++ add ra_x_base, unif, elem_num # Store x ++ mov ra_y, unif # Store y ++-mov ra_x2_base, unif # Store frame base +++mov ra_x2_base, unif # Store frame u base +++nop +++sub ra_u2v_ref_offset, unif, ra_x2_base # Store offset to add to move from u to v in reference frame ++ ++ # Read image dimensions ++ sub rb25,unif,1 ++@@ -143,29 +147,24 @@ mov r1, vpm_setup(0, 4, h8p(0, 0)) ++ add rb28, r0, r1 ++ ++ # Compute base address for first and second access ++-#add r0, unif, elem_num # x ++ mov r0, ra_x_base # Load x ++-add r2, r0, 8 # x+8 ++ max r0, r0, 0; mov r1, ra_y # Load y ++ min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base # Load the frame base ++-shl ra_xshift_next, r0, 3 ++-max r2, r2, 0 +++shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++ add ra_y, r1, 1 ++-min r2, r2, rb_frame_width_minus_1 ++-shl ra_x2shift_next, r2, 3 ++-max r1, r1, 0 # y ++-min r1, r1, rb_frame_height_minus_1 ++-add r0, r0, r3; mul24 r1, r1, rb_pitch ++-add r2, r2, r3 +++add r0, r0, r3 ++ and r0, r0, ~3 ++-and r2, r2, ~3; mov ra_x_base, r0 +++max r1, r1, 0 ; mov ra_x_base, r0 # y +++min r1, r1, rb_frame_height_minus_1 ++ # submit texture requests for first line +++add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++ add t0s, r0, r1 ; mov ra_x2_base, r2 ++ add t0s, r2, r1 ++ ++ # Dump padding words ++ mov r0, unif ++ mov r0, unif +++mov r0, unif ++ ++ # submit texture requests for second line ++ max r1, ra_y, 0 ++@@ -176,6 +175,8 @@ nop ; mul24 r1, r1, rb_pitch ++ add t0s, r1, ra_x_base ++ add t0s, r1, ra_x2_base ++ +++ +++ ++ ################################################################################ ++ ++ # mc_filter_uv(next_kernel, x, y, frame_u_base, frame_v_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_u_dst, this_v_dst) ++@@ -341,453 +342,26 @@ add vw_setup, rb26, r0 # VDW setup 0 ++ mov vw_setup, rb29 # Stride ++ mov vw_addr, unif # start the VDW ++ ++-################################################################################ ++- ++- ++-# mc_filter(next_kernel, x, y, frame_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_dst) ++- ++-# At this point we have already issued two pairs of texture requests for the current block ++-# ra_x_base, ra_x16_base point to the current coordinates for this block ++-::mc_filter ++-mov ra31, unif ++- ++-# per-channel shifts were calculated on the *previous* invocation ++- ++-mov ra_xshift, ra_xshift_next ++-mov ra_x2shift, ra_x2shift_next ++- ++-# get base addresses and per-channel shifts for *next* invocation ++-add r0, unif, elem_num # x ++-add r2, r0, 8 # x+8 ++-max r0, r0, 0; mov r1, unif # y ++-min r0, r0, rb_frame_width_minus_1 ; mov r3,unif # frame_base ++-shl ra_xshift_next, r0, 3 ++-max r2, r2, 0 ++-min r2, r2, rb_frame_width_minus_1 ++-shl ra_x2shift_next, r2, 3 ++-add r0, r0, r3 ++-add r2, r2, r3 ++-and rb_x_base_next, r0, ~3 ++-and ra_x2_base_next, r2, ~3 ++-mov ra_y_next, r1 ++- ++-# set up VPM write ++-mov vw_setup, rb28 ++- ++-# get width,height of block ++-mov r2, 16 ++-mov r0, unif ++-shr r1, r0, r2 # Extract width ++-sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) ++-and r0, r0, rb22 # Extract height ++-add rb17, r0, 5 ++-add rb18, r0, 7 ++-shl r0, r0, 7 ++-add r0, r0, r1 # Combine width and height of destination area ++-shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++-add rb26, r0, rb27 ++- ++-sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code ++- ++-# get filter coefficients ++- ++-mov r0, unif ++-asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-asr ra0, r0, rb23; mov r0, unif ++-asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-asr ra4, r0, rb23; mov r0, unif ++-asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-asr rb8, r0, rb23; mov r0, unif ++-asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-brr.anynn -, r:fast_path ++-asr rb12, r0, rb23 # delay slot 1 ++- ++-# r2 is elem_num ++-# r3 is loop counter ++- ++-mov r5rep, -8 # delay slot 2 ++- ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] # delay slot 3 ++- ++-# retrieve texture results and pick out bytes ++-# then submit two more texture requests ++- ++-## nop ; ldtmu0 # loop counter increment ++-## shr r0, r4, ra17 ; ldtmu0 ++-## shr r1, r4, ra17 ; v8subs r0, r0, rb20 ++-## add t0s, ra16, r5 ; v8subs r1, r1, rb20 ++-## add ra16, ra16, rb16 ; mov t0s, ra16 ++-## ++-## # generate seven shifted versions ++-## # interleave with scroll of vertical context ++-## ++-## mov r2, rb21 ; mul24 r3, r0, ra0 ++-## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-## nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-## nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-## nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-## nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-## nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-## nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-## nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-## sub r2, r2, r3 ; ldtmu0 ++-## ++-## mov r0, ra22 ++-## shr r0, r4, ra17 ; mul24 r2, r2, r0 ; ldtmu0 ++-## shr r1, r4, ra17 ; v8subs r0, r0, rb20 ++-## add t0s, ra16, r5 ; v8subs r1, r1, rb20 ++-## add ra16, ra16, rb16 ; mov t0s, ra16 ++-## ++-## # apply horizontal filter ++-## ++-## asr r2, r2, 15 ; mul24 r3, r0, ra0 ++-## min r2, r2, rb22 ++-## max ra13, r2, 0 ++-## ++-## # generate seven shifted versions ++-## # interleave with scroll of vertical context ++-## ++-## mov r2, rb21 ++-## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-## nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-## nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-## nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-## nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-## nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-## nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-## nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-## sub r0, r2, r3 ++-## ++-## # apply horizontal filter ++-## ++-## nop ; mul24 r0, r0, ra22 # last bit of context scroll, including clamp to zero ++-## asr r0, r0, 15 ++-## min r0, r0, rb22 ++-## max ra14, r0, 0 ++-## ++-## ++-## ++-## ++-## nop ; ldtmu0 # loop counter increment ++-## shr r0, r4, ra17 ; ldtmu0 ++-## shr r1, r4, ra17 ; v8subs r0, r0, rb20 ++-## add t0s, ra16, r5 ; v8subs r1, r1, rb20 ++-## add ra16, ra16, rb16 ; mov t0s, ra16 ++-## ++-## # generate seven shifted versions ++-## # interleave with scroll of vertical context ++-## ++-## mov r2, rb21 ; mul24 r3, r0, ra0 ++-## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-## nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-## nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-## nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-## nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-## nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-## nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-## nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-## sub r0, r2, r3 ++-## ++-## # apply horizontal filter ++-## ++-## nop ; mul24 r0, r0, ra22 # last bit of context scroll, including clamp to zero ++-## asr r0, r0, 15 ++-## min r0, r0, rb22 ++-## max ra15, r0, 0 ++- ++- ++- ++- ++-mov r3, 0 ++- ++-:loop ++-# retrieve texture results and pick out bytes ++-# then submit two more texture requests ++- ++-sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++- ++-max r2, ra_y, 0 # y ++-min r2, r2, rb_frame_height_minus_1 ++-add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_x2_base, r2 ++- ++-# generate seven shifted versions ++-# interleave with scroll of vertical context ++- ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++- ++-mov r2, rb21 ; mul24 r3, r0, ra0 ++-sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-sub r0, r2, r3 ++- ++-mov r3, rb31 ++- ++-mov ra8, ra9 ++-mov ra9, ra10 ++-mov ra10, ra11 ++-mov ra11, ra12 ++-mov ra12, ra13 ++-mov ra13, ra14 ++- ++-sub.setf -, r3, 8 ; mov r1, ra22 ++- ++-# apply horizontal filter ++-brr.anyn -, r:loop ++-max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++-asr r0, r0, 15 ; mov r1, ra21 ++-min.setf ra15, r0, rb22 ++- ++-# apply vertical filter and write to VPM ++- ++-nop ; mul24 r0, ra14, rb14 ++-sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-brr.anyn -, r:loop ++-asr r1, r1, 15 ++-min r1, r1, rb22 ++-max vpm, r1, 0 ++- ++-# DMA out ++- ++-bra -, ra31 ++-mov vw_setup, rb26 # VDW: height rows, 16 8-bit units long ++-mov vw_setup, rb29 ++-mov vw_addr, unif # start the VDW ++- ++-#################################################### ++- ++-:fast_path ++-## nop ; ldtmu0 # loop counter increment ++-## shr r0, r4, ra17 ; ldtmu0 ++-## shr r1, r4, ra17 ; v8subs r0, r0, rb20 ++-## add t0s, ra16, r5 ; v8subs r1, r1, rb20 ++-## add ra16, ra16, rb16 ; mov t0s, ra16 ++-## ++-## # generate seven shifted versions ++-## # interleave with scroll of vertical context ++-## ++-## mov r2, rb21 ; mul24 r3, r0, ra0 ++-## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-## sub r2, r2, r3 ; ldtmu0 ++-## ++-## mov r0, ra22 ++-## shr r0, r4, ra17 ; mul24 r2, r2, r0 ; ldtmu0 ++-## shr r1, r4, ra17 ; v8subs r0, r0, rb20 ++-## add t0s, ra16, r5 ; v8subs r1, r1, rb20 ++-## add ra16, ra16, rb16 ; mov t0s, ra16 ++-## ++-## # apply horizontal filter ++-## ++-## asr r2, r2, 15 ; mul24 r3, r0, ra0 ++-## min r2, r2, rb22 ++-## max ra13, r2, 0 ++-## ++-## # generate seven shifted versions ++-## # interleave with scroll of vertical context ++-## ++-## mov r2, rb21 ++-## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-## sub r0, r2, r3 ++-## ++-## # apply horizontal filter ++-## ++-## nop ; mul24 r0, r0, ra22 # last bit of context scroll, including clamp to zero ++-## asr r0, r0, 15 ++-## min r0, r0, rb22 ++-## max ra14, r0, 0 ++-## ++-## ++-## ++-## ++-## nop ; ldtmu0 # loop counter increment ++-## shr r0, r4, ra17 ; ldtmu0 ++-## shr r1, r4, ra17 ; v8subs r0, r0, rb20 ++-## add t0s, ra16, r5 ; v8subs r1, r1, rb20 ++-## add ra16, ra16, rb16 ; mov t0s, ra16 ++-## ++-## # generate seven shifted versions ++-## # interleave with scroll of vertical context ++-## ++-## mov r2, rb21 ; mul24 r3, r0, ra0 ++-## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-## sub r0, r2, r3 ++-## ++-## # apply horizontal filter ++-## ++-## nop ; mul24 r0, r0, ra22 # last bit of context scroll, including clamp to zero ++-## asr r0, r0, 15 ++-## min r0, r0, rb22 ++-## max ra15, r0, 0 ++- ++- ++-mov r3, 0 # This signifies the amount of unrolling ++- ++-:fast_loop ++-# retrieve texture results and pick out bytes ++-# then submit two more texture requests ++- ++-# Due to pipelining we can only skip second pipeline instructions related to the fetched pixels ++-sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-mov.ifz ra_y, ra_y_next ; mov rb31, r3 ++-mov.ifz ra_x2_base, ra_x2_base_next ; mov r3, rb_pitch ++- ++-max r2, ra_y, 0 ++-min r2, r2, rb_frame_height_minus_1 ; mov r1, r4 # discard texture read ++-add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_x_base, r2 ; v8subs r0, r0, rb20 ++-add t0s, ra_x2_base, r2 ++- ++-# generate seven shifted versions ++-# interleave with scroll of vertical context ++- ++-mov r2, rb21 ; mul24 r3, r0, ra0 ++-sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-sub r0, r2, r3 ; mov r3, rb31 ++- ++-mov ra8, ra9 ++-mov ra9, ra10 ++-mov ra10, ra11 ++-mov ra11, ra12 ++-mov ra12, ra13 ++-mov ra13, ra14 ++- ++-sub.setf -, r3, 8 ; mov r1, ra22 ++- ++-# apply horizontal filter ++- ++-brr.anyn -, r:fast_loop ++-max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++-asr r0, r0, 15 ; mov r1, ra21 ++-min.setf ra15, r0, rb22 ++- ++-# apply vertical filter and write to VPM ++- ++-nop ; mul24 r0, ra14, rb14 ++-sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-brr.anyn -, r:fast_loop ++-asr r1, r1, 15 ++-min r1, r1, rb22 ++-max vpm, r1, 0 ++- ++-# DMA out ++- ++-bra -, ra31 ++-mov vw_setup, rb26 # VDW: height rows, 16 8-bit units long ++-mov vw_setup, rb29 ++-mov vw_addr, unif # start the VDW ++ ++ ################################################################################ ++ ++-# mc_filter_b(next_kernel, x, y, frame_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_dst) ++- ++-# At this point we have already issued two pairs of texture requests for the current block ++-# ra_x_base, ra_x16_base point to the current coordinates for this block ++-::mc_filter_b +++::mc_filter_uv_b ++ mov ra31, unif ++ ++ # per-channel shifts were calculated on the *previous* invocation ++ ++ mov ra_xshift, ra_xshift_next ++-mov ra_x2shift, ra_x2shift_next ++ ++ # get base addresses and per-channel shifts for *next* invocation ++ add r0, unif, elem_num # x ++-add r2, r0, 8 # x+8 ++ max r0, r0, 0; mov r1, unif # y ++-min r0, r0, rb_frame_width_minus_1 ; mov r3,unif # frame_base +++min r0, r0, rb_frame_width_minus_1 ; mov r3, unif # frame_base ++ shl ra_xshift_next, r0, 3 ++-max r2, r2, 0 ++-min r2, r2, rb_frame_width_minus_1 ++-shl ra_x2shift_next, r2, 3 +++sub r2, unif, r3 # compute offset from frame base u to frame base v ++ add r0, r0, r3 ++-add r2, r2, r3 ++ and rb_x_base_next, r0, ~3 ++-and ra_x2_base_next, r2, ~3 ++ mov ra_y_next, r1 +++add ra_x2_base_next, rb_x_base_next, r2 ++ ++ # set up VPM write ++ mov vw_setup, rb28 ++@@ -801,17 +375,22 @@ and r0, r0, rb22 # Extract height ++ add rb17, r0, 5 ++ add rb18, r0, 7 ++ shl r0, r0, 7 +++ ++ # r0 is currently height<<7 ++ # For vr_setup we want height<<20 (so 20-7=13 additional bits) ++ shl r3, r0, 13 ++ shl r3, r3, 8 # Mask off top 8 bits ++ shr r3, r3, 8 +++ ++ add r0, r0, r1 # Combine width and height of destination area ++ shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++ add rb26, r0, rb27 +++ ++ # In a B frame, so also set up VPM read ++ add vr_setup, r3, rb28 ++ +++sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code +++ ++ # get filter coefficients ++ ++ mov r0, unif ++@@ -837,9 +416,13 @@ asr rb12, r0, rb23 ++ ++ mov r5rep, -8 ++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ ++ mov r3, 0 ++ ++-:bloop +++:uvloop_b ++ # retrieve texture results and pick out bytes ++ # then submit two more texture requests ++ ++@@ -847,7 +430,7 @@ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++ shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++ mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++ ++ max r2, ra_y, 0 # y ++ min r2, r2, rb_frame_height_minus_1 ++@@ -861,6 +444,7 @@ add t0s, ra_x2_base, r2 ++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ ++ mov r2, rb21 ; mul24 r3, r0, ra0 +++nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 ++ sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++ nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++ sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++@@ -889,7 +473,7 @@ mov ra13, ra14 ++ sub.setf -, r3, 8 ; mov r1, ra22 ++ ++ # apply horizontal filter ++-brr.anyn -, r:bloop +++brr.anyn -, r:uvloop_b ++ max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++ asr r0, r0, 15 ; mov r1, ra21 ++ min.setf ra15, r0, rb22 ++@@ -906,213 +490,50 @@ sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++ sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++ sub.ifnn r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-asr r1, r1, 15 ; mov -, vr_wait +++asr r1, r1, 15 ++ min r1, r1, rb22 ++ add r0, vpm, 1 # Blend in previous VPM contents at this location ++-brr.anyn -, r:bloop +++brr.anyn -, r:uvloop_b ++ max r1, r1, 0 ++ add r1, r1, r0 ++ shr vpm, r1, 1 ++ ++-# DMA out +++ +++# DMA out for U +++ +++mov vw_setup, rb26 # VDW setup 0 +++mov vw_setup, rb29 # Stride +++mov vw_addr, unif # start the VDW +++ +++# DMA out for V +++# We need to wait for the U to complete first, but have nothing useful to compute while we wait. +++# Could potentially push this write into the start of the next pipeline stage. +++mov r0, 16 +++mov -, vw_wait ++ ++ bra -, ra31 ++-mov vw_setup, rb26 # VDW: height rows, 16 8-bit units long ++-mov vw_setup, rb29 +++add vw_setup, rb26, r0 # VDW setup 0 +++mov vw_setup, rb29 # Stride ++ mov vw_addr, unif # start the VDW ++ ++ ################################################################################ ++ ++-# mc_filter_honly(next_kernel, x, y, frame_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_dst) ++-# This filter only does horizontal filtering. ++-# It is assumed that the region to fetch does not include extra rows above. +++# mc_exit() ++ ++-# At this point we have already issued two pairs of texture requests for the current block ++-# ra_x_base, ra_x16_base point to the current coordinates for this block ++-::mc_filter_honly ++-mov ra31, unif +++::mc_exit +++mov -, vw_wait # wait on the VDW ++ ++-# per-channel shifts were calculated on the *previous* invocation +++mov -,srel(0) ++ ++-mov ra_xshift, ra_xshift_next ++-mov ra_x2shift, ra_x2shift_next ++- ++-# get base addresses and per-channel shifts for *next* invocation ++-add r0, unif, elem_num # x ++-add r2, r0, 8 # x+8 ++-max r0, r0, 0; mov r1, unif # y ++-min r0, r0, rb_frame_width_minus_1 ; mov r3,unif # frame_base ++-shl ra_xshift_next, r0, 3 ++-max r2, r2, 0 ++-min r2, r2, rb_frame_width_minus_1 ++-shl ra_x2shift_next, r2, 3 ++-add r0, r0, r3 ++-add r2, r2, r3 ++-and rb_x_base_next, r0, ~3 ++-and ra_x2_base_next, r2, ~3 ++-mov ra_y_next, r1 ++- ++-# set up VPM write ++-mov vw_setup, rb28 ++- ++-# get width,height of block ++-mov r2, 16 ++-mov r0, unif ++-shr r1, r0, r2 # Extract width ++-sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) ++-and r0, r0, rb22 # Extract height ++-add rb17, r0, -2 # Pipelining means we move data across 2 iterations early ++-shl r0, r0, 7 ; mov rb18,r0 ++-add r0, r0, r1 # Combine width and height of destination area ++-shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++-add rb26, r0, rb27 ++- ++-# get filter coefficients ++- ++-mov r0, unif ++-asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-asr ra0, r0, rb23; mov r0, unif ++-asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-asr ra4, r0, rb23; mov r0, unif ++-mov r0, unif ++- ++-# r2 is elem_num ++-# r3 is loop counter ++-mov r5rep, -8 ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] # delay slot 3 ++-mov r3, 0 ++- ++-:loop_honly ++-# retrieve texture results and pick out bytes ++-# then submit two more texture requests ++- ++-sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++- ++-max r2, ra_y, 0 # y ++-min r2, r2, rb_frame_height_minus_1 ++-add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_x2_base, r2 ++- ++-# generate seven shifted versions ++-# interleave with scroll of vertical context ++- ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++- ++-mov r2, rb21 ; mul24 r3, r0, ra0 ++-sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-sub r0, r2, r3 ; mov r3, rb31 ++- ++-sub.setf -, r3, rb18 ; mov r1, ra22 ++- ++-mov -, vw_wait ; mul24 r0, r0, r1 ++-brr.anyn -, r:loop_honly ++-asr r0, r0, 15 # delay 1 ++-min r0, r0, rb22 # delay 2 ++-max vpm, r0, 0 # delay 3 ++- ++-# DMA out ++-bra -, ra31 ++-mov vw_setup, rb26 # VDW: height rows, 16 8-bit units long ++-mov vw_setup, rb29 ++-mov vw_addr, unif # start the VDW ++- ++- ++-################################################################################ ++- ++-# mc_exit() ++- ++-::mc_exit ++-mov -, vw_wait # wait on the VDW ++- ++-mov -,srel(0) ++- ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ldtmu0 ++ ++ nop ; nop ; thrend ++ nop ; nop # delay slot 1 ++ nop ; nop # delay slot 2 ++ ++-::mc_exit1 ++-mov -, vw_wait # wait on the VDW ++- ++-#mov -,srel(1) ++- ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++- ++-nop ; nop ; thrend ++-mov interrupt, 1; nop # delay slot 1 ++-nop ; nop # delay slot 2 ++- ++-# mc_interrupt_exit() ++-::mc_interrupt_exit ++-mov -, vw_wait # wait on the VDW ++- ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++- ++-mov -,sacq(0) # 1 ++-mov -,sacq(0) # 2 ++-mov -,sacq(0) # 3 ++-mov -,sacq(0) # 4 ++-mov -,sacq(0) # 5 ++-mov -,sacq(0) # 6 ++-mov -,sacq(0) # 7 ++-mov -,sacq(0) # 8 ++-mov -,sacq(0) # 9 ++-mov -,sacq(0) # 10 ++-mov -,sacq(0) # 11 ++- ++-nop ; nop ; thrend ++-mov interrupt, 1; nop # delay slot 1 ++-nop ; nop # delay slot 2 ++- ++-# mc_interrupt_exit4() ++-::mc_interrupt_exit4 ++-mov -, vw_wait # wait on the VDW ++- ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++- ++-mov -,sacq(0) # 1 ++-mov -,sacq(0) # 2 ++-mov -,sacq(0) # 3 ++- ++-nop ; nop ; thrend ++-mov interrupt, 1; nop # delay slot 1 ++-nop ; nop # delay slot 2 ++- ++ # mc_interrupt_exit8() ++ ::mc_interrupt_exit8 ++ mov -, vw_wait # wait on the VDW ++@@ -1134,282 +555,5 @@ nop ; nop ; thrend ++ mov interrupt, 1; nop # delay slot 1 ++ nop ; nop # delay slot 2 ++ ++-################################################################################ ++-# mc_setup_uv(next_kernel, x, y, ref_u_base, ref_v_base, frame_width, frame_height, pitch, dst_pitch, pad0, pad1, pad2) ++-::mc_setup_uv ++- ++-# Read starting kernel ++-mov ra31, unif ++- ++-# Load first request location ++-add ra_x_base, unif, elem_num # Store x ++-mov ra_y, unif # Store y ++-mov ra_x2_base, unif # Store frame u base ++-nop ++-sub ra_u2v_ref_offset, unif, ra_x2_base # Store offset to add to move from u to v in reference frame ++- ++-# Read image dimensions ++-sub rb25,unif,1 ++-sub rb30,unif,1 ++- ++-# get source pitch ++-mov rb16, unif ++- ++-# get destination pitch ++-mov r0, unif ++-mov r1, vdw_setup_1(0) ++-add rb24, r1, r0 ++- ++-# load constants ++- ++-mov ra20, 1 ++-mov ra21, 32 ++-mov ra22, 256 ++-mov ra23, 8 ++- ++-mov rb20, 0xffffff00 ++-mov rb21, 64 ++-mov rb22, 255 ++-mov rb23, 24 ++- ++-# touch vertical context to keep simulator happy ++- ++-mov ra8, 0 ++-mov ra9, 0 ++-mov ra10, 0 ++-mov ra11, 0 ++-mov ra12, 0 ++-mov ra13, 0 ++-mov ra14, 0 ++-mov ra15, 0 ++- ++-# Compute part of VPM to use for DMA output ++-mov r2, qpu_num ++-and r2, r2, 15 ++-mov r1, r2 ++-asr r1, r1, 2 ++-shl r1, r1, 6 ++-mov r0, r2 ++-and r0, r0, 3 ++-add r0, r0, r1 ++-mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later ++-shl r0, r0, 5 ++-add rb27, r0, r1 ++- ++-# Compute part of VPM to save data into ++-mov r2, qpu_num ++-and r2, r2, 15 ++-mov r1, r2 ++-asr r1, r1, 2 ++-shl r1, r1, 6 ++-mov r0, r2 ++-and r0, r0, 3 ++-add r0, r0, r1 ++-mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-add rb28, r0, r1 ++- ++-# Compute base address for first and second access ++-mov r0, ra_x_base # Load x ++-max r0, r0, 0; mov r1, ra_y # Load y ++-min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base # Load the frame base ++-shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++-add ra_y, r1, 1 ++-add r0, r0, r3 ++-and r0, r0, ~3 ++-max r1, r1, 0 ; mov ra_x_base, r0 # y ++-min r1, r1, rb_frame_height_minus_1 ++-# submit texture requests for first line ++-add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-add t0s, r0, r1 ; mov ra_x2_base, r2 ++-add t0s, r2, r1 ++- ++-# Dump padding words ++-mov r0, unif ++-mov r0, unif ++-mov r0, unif ++- ++-# submit texture requests for second line ++-max r1, ra_y, 0 ++-min r1, r1, rb_frame_height_minus_1 ++-add ra_y, ra_y, 1 ++-bra -, ra31 ++-nop ; mul24 r1, r1, rb_pitch ++-add t0s, r1, ra_x_base ++-add t0s, r1, ra_x2_base ++- ++- ++- ++-################################################################################ ++- ++-::mc_filter_uv_b ++-mov ra31, unif ++- ++-# per-channel shifts were calculated on the *previous* invocation ++- ++-mov ra_xshift, ra_xshift_next ++- ++-# get base addresses and per-channel shifts for *next* invocation ++-add r0, unif, elem_num # x ++-max r0, r0, 0; mov r1, unif # y ++-min r0, r0, rb_frame_width_minus_1 ; mov r3, unif # frame_base ++-shl ra_xshift_next, r0, 3 ++-sub r2, unif, r3 # compute offset from frame base u to frame base v ++-add r0, r0, r3 ++-and rb_x_base_next, r0, ~3 ++-mov ra_y_next, r1 ++-add ra_x2_base_next, rb_x_base_next, r2 ++- ++-# set up VPM write ++-mov vw_setup, rb28 ++- ++-# get width,height of block ++-mov r2, 16 ++-mov r0, unif ++-shr r1, r0, r2 # Extract width ++-sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) ++-and r0, r0, rb22 # Extract height ++-add rb17, r0, 5 ++-add rb18, r0, 7 ++-shl r0, r0, 7 ++- ++-# r0 is currently height<<7 ++-# For vr_setup we want height<<20 (so 20-7=13 additional bits) ++-shl r3, r0, 13 ++-shl r3, r3, 8 # Mask off top 8 bits ++-shr r3, r3, 8 ++- ++-add r0, r0, r1 # Combine width and height of destination area ++-shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++-add rb26, r0, rb27 ++- ++-# In a B frame, so also set up VPM read ++-add vr_setup, r3, rb28 ++- ++-sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code ++- ++-# get filter coefficients ++- ++-mov r0, unif ++-asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-asr ra0, r0, rb23; mov r0, unif ++-asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-asr ra4, r0, rb23; mov r0, unif ++-asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-asr rb8, r0, rb23; mov r0, unif ++-asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-asr rb12, r0, rb23 ++- ++-# r2 is elem_num ++-# r3 is loop counter ++- ++-mov r5rep, -8 ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++- ++-# retrieve texture results and pick out bytes ++-# then submit two more texture requests ++- ++-mov r3, 0 ++- ++-:uvloop_b ++-# retrieve texture results and pick out bytes ++-# then submit two more texture requests ++- ++-sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++- ++-max r2, ra_y, 0 # y ++-min r2, r2, rb_frame_height_minus_1 ++-add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_x2_base, r2 ++- ++-# generate seven shifted versions ++-# interleave with scroll of vertical context ++- ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++- ++-mov r2, rb21 ; mul24 r3, r0, ra0 ++-nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 ++-sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-sub r0, r2, r3 ++- ++-mov r3, rb31 ++- ++-mov ra8, ra9 ++-mov ra9, ra10 ++-mov ra10, ra11 ++-mov ra11, ra12 ++-mov ra12, ra13 ++-mov ra13, ra14 ++- ++-sub.setf -, r3, 8 ; mov r1, ra22 ++- ++-# apply horizontal filter ++-brr.anyn -, r:uvloop_b ++-max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++-asr r0, r0, 15 ; mov r1, ra21 ++-min.setf ra15, r0, rb22 ++- ++-# apply vertical filter and write to VPM ++- ++-nop ; mul24 r0, ra14, rb14 ++-sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-asr r1, r1, 15 ++-min r1, r1, rb22 ++-add r0, vpm, 1 # Blend in previous VPM contents at this location ++-brr.anyn -, r:uvloop_b ++-max r1, r1, 0 ++-add r1, r1, r0 ++-shr vpm, r1, 1 ++- ++- ++-# DMA out for U ++- ++-mov vw_setup, rb26 # VDW setup 0 ++-mov vw_setup, rb29 # Stride ++-mov vw_addr, unif # start the VDW ++- ++-# DMA out for V ++-# We need to wait for the U to complete first, but have nothing useful to compute while we wait. ++-# Could potentially push this write into the start of the next pipeline stage. ++-mov r0, 16 ++-mov -, vw_wait ++- ++-bra -, ra31 ++-add vw_setup, rb26, r0 # VDW setup 0 ++-mov vw_setup, rb29 # Stride ++-mov vw_addr, unif # start the VDW ++- ++ ::mc_end +++# Do not add code here because mc_end must appear after all other code. ++-- ++2.7.4 ++ ++ ++From d40d59de0f09fd1a6e7146532418b63d8e2711b7 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 13 May 2015 14:54:25 +0100 ++Subject: [PATCH 20/68] Moved chroma P1 to QPUs ++ ++--- ++ libavcodec/hevc.c | 38 ++++++++++++++++++++++++++++++++++++++ ++ 1 file changed, 38 insertions(+) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 31b8b2f..391d139 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2070,6 +2070,44 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ s->sh.luma_offset_l1[current_mv.ref_idx[1]]); ++ ++ if (s->ps.sps->chroma_format_idc) { +++#ifdef RPI_INTER_QPU +++ if (s->enable_rpi) { +++ int reflist = 1; +++ int hshift = s->ps.sps->hshift[1]; +++ int vshift = s->ps.sps->vshift[1]; +++ const Mv *mv = ¤t_mv.mv[reflist]; +++ intptr_t mx = av_mod_uintp2(mv->x, 2 + hshift); +++ intptr_t my = av_mod_uintp2(mv->y, 2 + vshift); +++ intptr_t _mx = mx << (1 - hshift); +++ intptr_t _my = my << (1 - vshift); // Fractional part of motion vector +++ +++ int x1_c = x0_c + (mv->x >> (2 + hshift)); +++ int y1_c = y0_c + (mv->y >> (2 + hshift)); +++ //int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width +++ int chan = x0>>8; +++ +++ uint32_t *u = s->u_mvs[chan & 7]; +++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { +++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { +++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 3 + start_x; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 3 + start_y; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); +++ *u++ = ( (nPbW_csh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] +++ *u++ = rpi_filter_coefs[_mx][0]; +++ *u++ = rpi_filter_coefs[_mx][1]; +++ *u++ = rpi_filter_coefs[_my][0]; +++ *u++ = rpi_filter_coefs[_my][1]; +++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); +++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); +++ } +++ } +++ s->u_mvs[chan & 7] = u; +++ return; +++ } +++#endif ++ RPI_REDIRECT(chroma_mc_uni)(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1], ++ 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, ++ s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]); ++-- ++2.7.4 ++ ++ ++From 75777ba7927086e862104b14f6446e81bc789611 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 13 May 2015 15:13:47 +0100 ++Subject: [PATCH 21/68] Added B prediction - not quite right ++ ++--- ++ libavcodec/hevc.c | 58 ++++++++++++++++++++++++ ++ libavcodec/rpi_shader.c | 108 +++++++++++++++++++++++---------------------- ++ libavcodec/rpi_shader.h | 6 +-- ++ libavcodec/rpi_shader.qasm | 48 ++++++++++---------- ++ 4 files changed, 141 insertions(+), 79 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 391d139..47ddfff 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2127,6 +2127,64 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ ref1->frame, ¤t_mv.mv[1], ¤t_mv); ++ ++ if (s->ps.sps->chroma_format_idc) { +++#ifdef RPI_INTER_QPU +++ if (s->enable_rpi) { +++ int hshift = s->ps.sps->hshift[1]; +++ int vshift = s->ps.sps->vshift[1]; +++ const Mv *mv = ¤t_mv.mv[0]; +++ intptr_t mx = av_mod_uintp2(mv->x, 2 + hshift); +++ intptr_t my = av_mod_uintp2(mv->y, 2 + vshift); +++ intptr_t _mx = mx << (1 - hshift); +++ intptr_t _my = my << (1 - vshift); // Fractional part of motion vector +++ int x1_c = x0_c + (mv->x >> (2 + hshift)); +++ int y1_c = y0_c + (mv->y >> (2 + hshift)); +++ +++ const Mv *mv2 = ¤t_mv.mv[1]; +++ intptr_t mx2 = av_mod_uintp2(mv2->x, 2 + hshift); +++ intptr_t my2 = av_mod_uintp2(mv2->y, 2 + vshift); +++ intptr_t _mx2 = mx2 << (1 - hshift); +++ intptr_t _my2 = my2 << (1 - vshift); // Fractional part of motion vector +++ +++ int x2_c = x0_c + (mv2->x >> (2 + hshift)); +++ int y2_c = y0_c + (mv2->y >> (2 + hshift)); +++ +++ int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width +++ +++ uint32_t *u = s->u_mvs[chan & 7]; +++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { +++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { +++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 3 + start_x; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 3 + start_y; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); +++ *u++ = ( (nPbW_cframe->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); // TODO this will become unused once we have a dedicated pass0 filter +++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); +++ +++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv_b; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = x2_c - 3 + start_x; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = y2_c - 3 + start_y; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); +++ *u++ = ( (nPbW_csh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] +++ *u++ = rpi_filter_coefs[_mx2][0]; +++ *u++ = rpi_filter_coefs[_mx2][1]; +++ *u++ = rpi_filter_coefs[_my2][0]; +++ *u++ = rpi_filter_coefs[_my2][1]; +++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); +++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); +++ } +++ } +++ s->u_mvs[chan & 7] = u; +++ return; +++ } +++#endif ++ RPI_REDIRECT(chroma_mc_bi)(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame, ++ x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 0); ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 170e8ac..5d00cb2 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -265,23 +265,23 @@ unsigned int rpi_shader[] = { ++ /* [0x00000760] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++ /* [0x00000768] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++ /* [0x00000770] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000778] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000780] */ 0x40038031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 ++-/* [0x00000788] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000778] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000780] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000788] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++ /* [0x00000790] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000798] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000798] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ /* [0x000007a0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000007a8] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000007a8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ /* [0x000007b0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000007b8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x000007b8] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++ /* [0x000007c0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x000007c8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x000007c8] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++ /* [0x000007d0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x000007d8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x000007d8] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++ /* [0x000007e0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x000007e8] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x000007e8] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++ /* [0x000007f0] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x000007f8] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x000007f8] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++ /* [0x00000800] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++ /* [0x00000808] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++ /* [0x00000810] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++@@ -291,61 +291,63 @@ unsigned int rpi_shader[] = { ++ /* [0x00000830] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++ /* [0x00000838] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++ /* [0x00000840] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000848] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x00000850] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x00000858] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x00000860] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x00000868] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x00000870] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000878] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000880] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000888] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000890] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000898] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x000008a0] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x00000848] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x00000850] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x00000858] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000860] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x00000868] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x00000870] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000878] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000880] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000888] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000890] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000898] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x000008a0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++ /* [0x000008a8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000008b0] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 ++-/* [0x000008b8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000008c0] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 ++-/* [0x000008c8] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000008d0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x000008d8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x000008e0] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 ++-/* [0x000008e8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000008f0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000008f8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000900] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000908] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000910] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000918] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000920] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000928] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008b0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000008b8] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x000008c0] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x000008c8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000008d0] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 +++/* [0x000008d8] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000008e0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x000008e8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x000008f0] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 +++/* [0x000008f8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000900] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000908] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000910] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000918] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000920] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000928] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000930] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000938] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000930] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000938] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000940] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000948] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000940] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000948] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++ /* [0x00000950] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000958] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000960] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000968] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000970] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000960] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000968] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000970] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000978] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000980] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000978] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000980] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000988] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000988] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++ /* [0x00000990] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000998] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000009a0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009a8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009a0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000009a8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x000009b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x000009b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x000009c0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x000009c8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x000009d0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009d8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000009e0] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x000009e8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000009d8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009e0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009e8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000009f0] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000009f8] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 9de4535..e36c4ae 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -6,8 +6,8 @@ extern unsigned int rpi_shader[]; ++ #define mc_setup_uv (rpi_shader + 0) ++ #define mc_filter_uv (rpi_shader + 142) ++ #define mc_filter_uv_b (rpi_shader + 360) ++-#define mc_exit (rpi_shader + 588) ++-#define mc_interrupt_exit8 (rpi_shader + 606) ++-#define mc_end (rpi_shader + 636) +++#define mc_exit (rpi_shader + 592) +++#define mc_interrupt_exit8 (rpi_shader + 610) +++#define mc_end (rpi_shader + 640) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index cd7346d..870437d2 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -443,23 +443,23 @@ add t0s, ra_x2_base, r2 ++ ++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ ++-mov r2, rb21 ; mul24 r3, r0, ra0 ++-nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 ++-sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24 r2, r0, ra0 +++nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++nop ; mul24 r3, ra1 << 1, r0 << 1 ++ nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++ nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++ nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++ nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++ nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-sub r0, r2, r3 +++add r0, r2, r3 ++ ++ mov r3, rb31 ++ ++@@ -474,23 +474,25 @@ sub.setf -, r3, 8 ; mov r1, ra22 ++ ++ # apply horizontal filter ++ brr.anyn -, r:uvloop_b ++-max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++-asr r0, r0, 15 ; mov r1, ra21 ++-min.setf ra15, r0, rb22 +++mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero +++asr ra15, r0, 8 ; nop +++nop ; nop ++ ++ # apply vertical filter and write to VPM ++ ++-nop ; mul24 r0, ra14, rb14 ++-sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-sub.ifnn r1, r1, r0 ; mov -, vw_wait +++nop ; mul24 r1, ra14, rb14 +++nop ; mul24 r0, ra13, rb13 +++add r1, r1, r0 ; mul24 r0, ra12, rb12 +++add r1, r1, r0 ; mul24 r0, ra11, rb11 +++add r1, r1, r0 ; mul24 r0, ra10, rb10 +++add r1, r1, r0 ; mul24 r0, ra9, rb9 +++add r1, r1, r0 ; mul24 r0, ra8, rb8 +++add r1, r1, r0 ; mul24 r0, ra15, rb15 +++add r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-asr r1, r1, 15 +++asr r1, r1, 14 +++add r1, r1, ra21 +++asr r1, r1, 6 ++ min r1, r1, rb22 ++ add r0, vpm, 1 # Blend in previous VPM contents at this location ++ brr.anyn -, r:uvloop_b ++-- ++2.7.4 ++ ++ ++From 3d4e94b8f0b08fe4c0b582fc7f1dbe9d1d9d60ed Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 14 May 2015 08:15:55 +0100 ++Subject: [PATCH 22/68] Added flush for SAO ++ ++--- ++ libavcodec/hevc.c | 2 +- ++ libavcodec/hevc_filter.c | 39 ++++++++++++++++++++++++++------------- ++ 2 files changed, 27 insertions(+), 14 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 47ddfff..93e1eba 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2903,7 +2903,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ rpi_execute_inter_qpu(s); ++ #endif ++ // Transform all blocks ++- //printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); +++ // printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); ++ rpi_execute_transform(s); ++ // Perform inter prediction ++ rpi_execute_inter_cmds(s); ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 9b6e26d..92a8271 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -871,6 +871,21 @@ static void flush_buffer(AVBufferRef *bref) { ++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++ gpu_cache_flush(p); ++ } +++ +++static void ff_hevc_flush_chroma(HEVCContext *s) +++{ +++ if (s->enable_rpi && !( s->nal_unit_type == NAL_TRAIL_N || +++ s->nal_unit_type == NAL_TSA_N || +++ s->nal_unit_type == NAL_STSA_N || +++ s->nal_unit_type == NAL_RADL_N || +++ s->nal_unit_type == NAL_RASL_N )) { +++ flush_buffer(s->frame->buf[1]); +++ flush_buffer(s->frame->buf[2]); +++ //memcpy(s->dummy.arm,s->frame->data[0],2048*64); +++ //memcpy(s->dummy.arm,s->frame->data[1],1024*32); +++ //memcpy(s->dummy.arm,s->frame->data[2],1024*32); +++ } +++} ++ #endif ++ ++ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++@@ -886,31 +901,29 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ sao_filter_CTB(s, x - ctb_size, y); ++ if (y && x_end) { ++ sao_filter_CTB(s, x, y - ctb_size); ++- if (s->threads_type & FF_THREAD_FRAME ) +++ if (s->threads_type & FF_THREAD_FRAME ) { +++#ifdef RPI_INTER_QPU +++ ff_hevc_flush_chroma(s); +++#endif ++ ff_thread_report_progress(&s->ref->tf, y, 0); +++ } ++ } ++ if (x_end && y_end) { ++ sao_filter_CTB(s, x , y); ++- if (s->threads_type & FF_THREAD_FRAME ) +++ if (s->threads_type & FF_THREAD_FRAME ) { +++#ifdef RPI_INTER_QPU +++ ff_hevc_flush_chroma(s); +++#endif ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size, 0); +++ } ++ } ++ } else if (s->threads_type & FF_THREAD_FRAME && x_end) { ++ //int newh = y + ctb_size - 4; ++ //int currh = s->ref->tf.progress->data[0]; ++ //if (((y + ctb_size)&63)==0) ++- if (!( s->nal_unit_type == NAL_TRAIL_N || ++- s->nal_unit_type == NAL_TSA_N || ++- s->nal_unit_type == NAL_STSA_N || ++- s->nal_unit_type == NAL_RADL_N || ++- s->nal_unit_type == NAL_RASL_N )) { ++ #ifdef RPI_INTER_QPU ++- flush_buffer(s->frame->buf[1]); ++- flush_buffer(s->frame->buf[2]); +++ ff_hevc_flush_chroma(s); ++ #endif ++- //memcpy(s->dummy.arm,s->frame->data[0],2048*64); ++- //memcpy(s->dummy.arm,s->frame->data[1],1024*32); ++- //memcpy(s->dummy.arm,s->frame->data[2],1024*32); ++- } ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); ++ } ++ } ++-- ++2.7.4 ++ ++ ++From 3e337b9c4ef0c356a0259be2254ad1bc4d5bbe29 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 14 May 2015 09:17:28 +0100 ++Subject: [PATCH 23/68] Stopped using acceleration in unsupported cases ++ ++--- ++ libavcodec/hevc.c | 14 +++++++------- ++ libavcodec/hevc_cabac.c | 4 ++-- ++ 2 files changed, 9 insertions(+), 9 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 93e1eba..bfd5a55 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -1152,15 +1152,11 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ for (i = 0; i < (size * size); i++) { ++ coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3); ++ } ++- printf("Cross component not supported\n"); // TODO ++- exit(-1); ++ s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride); ++ } ++ } ++ ++ if (lc->tu.cross_pf) { ++- printf("Cross component not supported\n"); // TODO ++- exit(-1); ++ hls_cross_component_pred(s, 1); ++ } ++ for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) { ++@@ -1189,8 +1185,6 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ for (i = 0; i < (size * size); i++) { ++ coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3); ++ } ++- printf("Cross component not supported\n"); // TODO ++- exit(-1); ++ s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride); ++ } ++ } ++@@ -2857,7 +2851,13 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs]; ++ ++ #ifdef RPI ++- s->enable_rpi = 1; // TODO this should depend on cross component and frame width etc. +++ s->enable_rpi = s->ps.sps->bit_depth == 8 +++ && s->ps.sps->width <= RPI_MAX_WIDTH +++ && !s->ps.pps->cross_component_prediction_enabled_flag +++ && s->ps.pps->num_tile_rows <= 1 && s->ps.pps->num_tile_columns <= 1 +++ && !(s->ps.pps->weighted_pred_flag && s->sh.slice_type == P_SLICE) +++ && !(s->ps.pps->weighted_bipred_flag && s->sh.slice_type == B_SLICE); +++ ++ #endif ++ ++ if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) { ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index 4f072be..38f53de 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1513,9 +1513,9 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ #ifdef RPI ++ if (!use_vpu) { ++ int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y); ++- if (max_xy == 0) +++ if (max_xy == 0) { ++ s->hevcdsp.idct_dc[log2_trafo_size-2](coeffs); ++- else { +++ } else { ++ int col_limit = last_significant_coeff_x + last_significant_coeff_y + 4; ++ if (max_xy < 4) ++ col_limit = FFMIN(4, col_limit); ++-- ++2.7.4 ++ ++ ++From 3941d3e4c2305fa037e8aba5a14cf698ac8673db Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 14 May 2015 09:42:16 +0100 ++Subject: [PATCH 24/68] Split B prediction into two passes ++ ++--- ++ libavcodec/hevc.c | 1 + ++ libavcodec/hevc.h | 1 + ++ libavcodec/rpi_qpu.c | 3 + ++ libavcodec/rpi_qpu.h | 1 + ++ libavcodec/rpi_shader.c | 559 +++++++++++++++++++++++++++------------------ ++ libavcodec/rpi_shader.h | 11 +- ++ libavcodec/rpi_shader.qasm | 196 ++++++++++++++-- ++ 7 files changed, 531 insertions(+), 241 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index bfd5a55..4b133d2 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -3801,6 +3801,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ p += uv_commands_per_qpu; ++ } ++ s->mc_filter_uv = qpu_get_fn(QPU_MC_FILTER_UV); +++ s->mc_filter_uv_b0 = qpu_get_fn(QPU_MC_FILTER_UV_B0); ++ s->mc_filter_uv_b = qpu_get_fn(QPU_MC_FILTER_UV_B); ++ ++ } ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index d513579..4a39e39 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -917,6 +917,7 @@ typedef struct HEVCContext { ++ uint32_t *u_mvs[8]; ++ // Function pointers ++ uint32_t mc_filter_uv; +++ uint32_t mc_filter_uv_b0; ++ uint32_t mc_filter_uv_b; ++ #endif ++ ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 4e90cc1..60bf079 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -636,6 +636,9 @@ unsigned int qpu_get_fn(int num) { ++ case QPU_MC_FILTER_UV: ++ fn = mc_filter_uv; ++ break; +++ case QPU_MC_FILTER_UV_B0: +++ fn = mc_filter_uv_b0; +++ break; ++ case QPU_MC_FILTER_UV_B: ++ fn = mc_filter_uv_b; ++ break; ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index f9ad333..543c84b 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -29,6 +29,7 @@ enum { ++ QPU_MC_FILTER_HONLY, ++ QPU_MC_SETUP_UV, ++ QPU_MC_FILTER_UV, +++ QPU_MC_FILTER_UV_B0, ++ QPU_MC_FILTER_UV_B, ++ QPU_MC_INTERRUPT_EXIT8, ++ QPU_MC_END ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 5d00cb2..88ad20b 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -39,18 +39,18 @@ unsigned int rpi_shader[] = { ++ /* [0x00000070] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++ /* [0x00000078] */ 0x00000008, 0xe00205e7, // mov ra23, 8 ++ /* [0x00000080] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++-/* [0x00000088] */ 0x00000040, 0xe0021567, // mov rb21, 64 ++-/* [0x00000090] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 ++-/* [0x00000098] */ 0x00000018, 0xe00215e7, // mov rb23, 24 ++-/* [0x000000a0] */ 0x00000000, 0xe0020227, // mov ra8, 0 ++-/* [0x000000a8] */ 0x00000000, 0xe0020267, // mov ra9, 0 ++-/* [0x000000b0] */ 0x00000000, 0xe00202a7, // mov ra10, 0 ++-/* [0x000000b8] */ 0x00000000, 0xe00202e7, // mov ra11, 0 ++-/* [0x000000c0] */ 0x00000000, 0xe0020327, // mov ra12, 0 ++-/* [0x000000c8] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++-/* [0x000000d0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++-/* [0x000000d8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x000000e0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000088] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x00000090] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x00000098] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x000000a0] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x000000a8] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x000000b0] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x000000b8] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x000000c0] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x000000c8] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x000000d0] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x000000d8] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x000000e0] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 ++ /* [0x000000e8] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++ /* [0x000000f0] */ 0x159e7480, 0x10020867, // mov r1, r2 ++ /* [0x000000f8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++@@ -62,176 +62,176 @@ unsigned int rpi_shader[] = { ++ /* [0x00000128] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++ /* [0x00000130] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++ /* [0x00000138] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000140] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00000148] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000150] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000158] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000160] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000168] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000170] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000178] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x00000180] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x00000188] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base ++-/* [0x00000190] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y ++-/* [0x00000198] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base ++-/* [0x000001a0] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++-/* [0x000001a8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x000001b0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000001b8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000001c0] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 ++-/* [0x000001c8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x000001d0] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-/* [0x000001d8] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 ++-/* [0x000001e0] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x000001e8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000001f0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000001f8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000200] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000208] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000210] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000218] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000220] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000228] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base ++-/* [0x00000230] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++/* [0x00000140] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 +++/* [0x00000148] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00000150] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000158] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000160] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000168] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000170] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000178] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000180] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000188] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000190] */ 0x0f9c11c0, 0xd0020827, // asr r0, r0, 1 +++/* [0x00000198] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) +++/* [0x000001a0] */ 0x0c9e7040, 0x10021567, // add rb21, r0, r1 +++/* [0x000001a8] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base +++/* [0x000001b0] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x000001b8] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base +++/* [0x000001c0] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset +++/* [0x000001c8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x000001d0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000001d8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000001e0] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 +++/* [0x000001e8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x000001f0] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++/* [0x000001f8] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 +++/* [0x00000200] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 +++/* [0x00000208] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000210] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000218] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000220] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000228] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000230] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000238] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000240] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000248] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base +++/* [0x00000250] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base ++ // ::mc_filter_uv ++-/* [0x00000238] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000240] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000248] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000250] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000258] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000260] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000268] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000270] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000278] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000280] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000288] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000290] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000298] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000002a0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000002a8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000002b0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000002b8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002c0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x000002c8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x000002d0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000002d8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000002e0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000002e8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000002f0] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x000002f8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000300] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000308] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000310] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000318] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000320] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000328] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000330] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000340] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000348] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000350] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000358] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000360] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000368] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000370] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000378] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000380] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000388] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000390] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000258] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000260] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000268] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000270] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000278] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000280] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000288] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000290] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000298] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000002a0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000002a8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000002b0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000002b8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000002c0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002c8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000002d0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000002d8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000002e0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x000002e8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x000002f0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000002f8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000300] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000308] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000310] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000318] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000320] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000328] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000330] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000340] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000348] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000350] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000358] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000360] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000368] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000370] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000378] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000380] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000388] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000390] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000398] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x000003a0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000003a8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003b0] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x00000398] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000003a0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000003a8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000003b0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003b8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003c0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003c8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003d0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003d8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000003e0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000003e8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000003f0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000003f8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000400] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000408] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000410] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000418] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000420] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000428] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000430] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000438] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000440] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000448] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000450] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000458] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000460] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000468] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000470] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000478] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000480] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000488] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000490] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000498] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x000004a0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x000004a8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000004b0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x000004b8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004c0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x000004c8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x000004d0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000004d8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x000004e0] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x000004e8] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x000004f0] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x000004f8] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000500] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000508] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000510] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000518] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000520] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000528] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000530] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x00000538] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000540] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x00000548] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000550] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000558] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000560] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000568] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000570] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000578] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000580] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000588] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000590] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000598] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-// ::mc_filter_uv_b ++-/* [0x000005a0] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000005a8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000005b0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000005b8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000005c0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000005c8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000005d0] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000005d8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000005e0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000005e8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000005f0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000005f8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000600] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000608] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000610] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000618] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000620] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000628] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000630] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000638] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000640] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000648] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000650] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000658] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000660] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000668] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000670] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 +++/* [0x000003b8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000003c0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000003c8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000003d0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003d8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003e0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003e8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003f0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003f8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000400] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000408] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000410] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000418] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000420] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000428] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000430] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000438] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000440] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000448] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000450] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000458] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000460] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000468] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000470] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000478] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000480] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000488] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000490] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000498] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x000004a0] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x000004a8] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x000004b0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x000004b8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x000004c0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x000004c8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000004d0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x000004d8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000004e0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x000004e8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x000004f0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000004f8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x00000500] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x00000508] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000510] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000518] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000520] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000528] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000530] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000538] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000540] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000548] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000550] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x00000558] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000560] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x00000568] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000570] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000578] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000580] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000588] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000590] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000598] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000005a0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000005a8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000005b0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000005b8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_filter_uv_b0 +++/* [0x000005c0] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000005c8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000005d0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000005d8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000005e0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000005e8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000005f0] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000005f8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000600] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000608] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000610] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000618] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000620] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000628] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000630] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000638] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000640] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000648] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000650] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000658] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000660] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000668] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000670] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++ /* [0x00000678] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++ /* [0x00000680] */ 0x15827d80, 0x10020827, // mov r0, unif ++ /* [0x00000688] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++@@ -253,7 +253,7 @@ unsigned int rpi_shader[] = { ++ /* [0x00000708] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++ /* [0x00000710] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ /* [0x00000718] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++-// :uvloop_b +++// :uvloop_b0 ++ /* [0x00000720] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++ /* [0x00000728] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++ /* [0x00000730] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++@@ -290,7 +290,7 @@ unsigned int rpi_shader[] = { ++ /* [0x00000828] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++ /* [0x00000830] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++ /* [0x00000838] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000840] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000840] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++ /* [0x00000848] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++ /* [0x00000850] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++ /* [0x00000858] */ 0x009e7000, 0x100009e7, // nop ; nop ++@@ -306,48 +306,163 @@ unsigned int rpi_shader[] = { ++ /* [0x000008a8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++ /* [0x000008b0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++ /* [0x000008b8] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x000008c0] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x000008c8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000008d0] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 ++-/* [0x000008d8] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000008e0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x000008e8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x000008f0] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 ++-/* [0x000008f8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000900] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000908] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000910] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000918] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000920] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000928] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000930] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000938] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008c0] */ 0xfffffad8, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000008c8] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x000008d0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000008d8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000008e0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000008e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000008f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008f8] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000900] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000908] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000910] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000918] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000920] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_filter_uv_b +++/* [0x00000928] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000930] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000938] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000940] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000948] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000950] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000958] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000960] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000968] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000970] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000978] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000980] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000988] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000990] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000998] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000009a0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000009a8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000009b0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x000009b8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x000009c0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000009c8] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x000009d0] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x000009d8] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x000009e0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000009e8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000009f0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000009f8] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 +++/* [0x00000a00] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000a08] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000a10] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a18] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a20] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a28] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000a30] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a38] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a40] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a48] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000a50] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a58] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a60] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a68] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000a70] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a78] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a80] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a88] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000a90] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000a98] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000aa0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :uvloop_b +++/* [0x00000aa8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000ab0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000ab8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000ac0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000ac8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000ad0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000ad8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000ae0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000ae8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000af0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000af8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000b00] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000b08] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000b10] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000b18] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000b20] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000b28] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000b30] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000b38] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000b40] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000b48] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000b50] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000b58] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000b60] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000b68] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000b70] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000b78] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000b80] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000b88] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000b90] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000b98] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000ba0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000ba8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000bb0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000bb8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000bc0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000bc8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000bd0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x00000bd8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x00000be0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000be8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x00000bf0] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x00000bf8] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000c00] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000c08] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000c10] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000c18] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000c20] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000c28] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000c30] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000c38] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000c40] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x00000c48] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x00000c50] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000c58] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 +++/* [0x00000c60] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000c68] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000c70] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x00000c78] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 +++/* [0x00000c80] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000c88] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000c90] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000c98] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000ca0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000ca8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000cb0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000cb8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000cc0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000940] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000948] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000950] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000958] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000960] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000968] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000970] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000978] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000980] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000cc8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000cd0] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000cd8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ce0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ce8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000cf0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000cf8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000d00] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000d08] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000988] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000990] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000998] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000009a0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000009a8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000009b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009c0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009c8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009d0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009d8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009e0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009e8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000009f0] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x000009f8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000d10] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000d18] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000d20] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000d28] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000d30] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000d38] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d40] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d48] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d50] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d58] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d60] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d68] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d70] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000d78] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000d80] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index e36c4ae..809e582 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -4,10 +4,11 @@ ++ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++-#define mc_filter_uv (rpi_shader + 142) ++-#define mc_filter_uv_b (rpi_shader + 360) ++-#define mc_exit (rpi_shader + 592) ++-#define mc_interrupt_exit8 (rpi_shader + 610) ++-#define mc_end (rpi_shader + 640) +++#define mc_filter_uv (rpi_shader + 150) +++#define mc_filter_uv_b0 (rpi_shader + 368) +++#define mc_filter_uv_b (rpi_shader + 586) +++#define mc_exit (rpi_shader + 818) +++#define mc_interrupt_exit8 (rpi_shader + 836) +++#define mc_end (rpi_shader + 866) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 870437d2..635b894 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -26,7 +26,7 @@ ++ # ra23 8 ++ # ++ # rb20 0xffffff00 ++-# rb21 64 +++# rb21 vpm_setup for writing 16bit results into VPM ++ # rb22 255 ++ # rb23 24 ++ # ++@@ -34,7 +34,7 @@ ++ # rb25 frame width-1 ++ # rb26 height<<23 + width<<16 + vdw_setup_0 ++ # rb27 vdw_setup_0 (depends on QPU number) ++-# rb28 vpm_setup (depends on QPU number) +++# rb28 vpm_setup (depends on QPU number) for writing 8bit results into VPM ++ # rb29 vdw_setup_1(dst_pitch-width) ++ # rb30 frame height-1 ++ # rb31 used as temp to count loop iterations ++@@ -69,8 +69,6 @@ ++ .set ra_y_next, ra28 ++ .set ra_y, ra29 ++ ++-.set rb_const_64, rb21 ++- ++ ++ ################################################################################ ++ # mc_setup_uv(next_kernel, x, y, ref_u_base, ref_v_base, frame_width, frame_height, pitch, dst_pitch, pad0, pad1, pad2) ++@@ -106,7 +104,6 @@ mov ra22, 256 ++ mov ra23, 8 ++ ++ mov rb20, 0xffffff00 ++-mov rb21, 64 ++ mov rb22, 255 ++ mov rb23, 24 ++ ++@@ -123,6 +120,7 @@ mov ra15, 0 ++ ++ # Compute part of VPM to use for DMA output ++ mov r2, qpu_num +++shl r2, r2, 1 # Convert QPU numbers to be even (this means we can only use 8 QPUs, but is necessary as we need to save 16bit intermediate results) ++ and r2, r2, 15 ++ mov r1, r2 ++ asr r1, r1, 2 ++@@ -135,16 +133,21 @@ shl r0, r0, 5 ++ add rb27, r0, r1 ++ ++ # Compute part of VPM to save data into ++-mov r2, qpu_num ++-and r2, r2, 15 ++-mov r1, r2 ++-asr r1, r1, 2 ++-shl r1, r1, 6 ++-mov r0, r2 ++-and r0, r0, 3 ++-add r0, r0, r1 ++-mov r1, vpm_setup(0, 4, h8p(0, 0)) +++mov r2, qpu_num # qpu_num = abcd +++shl r2, r2, 1 +++and r2, r2, 15 # r2 = bcd0 +++mov r1, r2 # r1 = bcd0 +++asr r1, r1, 2 # r1 = bc +++shl r1, r1, 6 # r1 = bc000000 +++mov r0, r2 # r0 = bcd0 +++and r0, r0, 3 # r0 = d0 +++add r0, r0, r1 # r0 = bc0000d0 +++mov r1, vpm_setup(0, 4, h8p(0, 0)) # 4 is stride - stride acts on ADDR which is Y[5:0],B[1:0] for 8 bit ++ add rb28, r0, r1 +++asr r0, r0, 1 # r0 = bc0000d +++# Prepare VPM command for 16bit intermediates +++mov r1, vpm_setup(0, 2, h16p(0, 0)) # 2 is stride - stride acts on ADDR which is Y[5:0],H[0] for 16 bit +++add rb21, r0, r1 ++ ++ # Compute base address for first and second access ++ mov r0, ra_x_base # Load x ++@@ -345,6 +348,171 @@ mov vw_addr, unif # start the VDW ++ ++ ################################################################################ ++ +++# mc_filter_uv_b0(next_kernel, x, y, frame_u_base, frame_v_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_u_dst, this_v_dst) +++ +++# At this point we have already issued two pairs of texture requests for the current block +++# ra_x_base, ra_x16_base point to the current coordinates for this block +++::mc_filter_uv_b0 +++mov ra31, unif +++ +++# per-channel shifts were calculated on the *previous* invocation +++ +++mov ra_xshift, ra_xshift_next +++ +++# get base addresses and per-channel shifts for *next* invocation +++add r0, unif, elem_num # x +++max r0, r0, 0; mov r1, unif # y +++min r0, r0, rb_frame_width_minus_1 ; mov r3, unif # frame_base +++shl ra_xshift_next, r0, 3 +++sub r2, unif, r3 # compute offset from frame base u to frame base v +++add r0, r0, r3 +++and rb_x_base_next, r0, ~3 +++mov ra_y_next, r1 +++add ra_x2_base_next, rb_x_base_next, r2 +++ +++# set up VPM write +++mov vw_setup, rb28 +++ +++# get width,height of block +++mov r2, 16 +++mov r0, unif +++shr r1, r0, r2 # Extract width +++sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++and r0, r0, rb22 # Extract height +++add rb17, r0, 5 +++add rb18, r0, 7 +++shl r0, r0, 7 +++add r0, r0, r1 # Combine width and height of destination area +++shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register +++add rb26, r0, rb27 +++ +++sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code +++ +++# get filter coefficients +++ +++mov r0, unif +++asr ra3, r0, rb23; mul24 r0, r0, ra22 +++asr ra2, r0, rb23; mul24 r0, r0, ra22 +++asr ra1, r0, rb23; mul24 r0, r0, ra22 +++asr ra0, r0, rb23; mov r0, unif +++asr ra7, r0, rb23; mul24 r0, r0, ra22 +++asr ra6, r0, rb23; mul24 r0, r0, ra22 +++asr ra5, r0, rb23; mul24 r0, r0, ra22 +++asr ra4, r0, rb23; mov r0, unif +++asr rb11, r0, rb23; mul24 r0, r0, ra22 +++asr rb10, r0, rb23; mul24 r0, r0, ra22 +++asr rb9, r0, rb23; mul24 r0, r0, ra22 +++asr rb8, r0, rb23; mov r0, unif +++asr rb15, r0, rb23; mul24 r0, r0, ra22 +++asr rb14, r0, rb23; mul24 r0, r0, ra22 +++asr rb13, r0, rb23; mul24 r0, r0, ra22 +++asr rb12, r0, rb23 +++ +++# r2 is elem_num +++# r3 is loop counter +++ +++mov r5rep, -8 +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++mov r3, 0 +++ +++:uvloop_b0 +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment +++shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++ +++max r2, ra_y, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_x2_base, r2 +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++nop ; mul24 r2, r0, ra0 +++nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++nop ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++add r0, r2, r3 +++ +++mov r3, rb31 +++ +++mov ra8, ra9 +++mov ra9, ra10 +++mov ra10, ra11 +++mov ra11, ra12 +++mov ra12, ra13 +++mov ra13, ra14 +++ +++sub.setf -, r3, 8 ; mov r1, ra22 +++ +++# apply horizontal filter +++brr.anyn -, r:uvloop_b0 +++mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll +++asr ra15, r0, 8 ; nop +++nop ; nop # Delay slot 3 (TODO move more of the context scroll into here) +++ +++# apply vertical filter and write to VPM +++ +++nop ; mul24 r1, ra14, rb14 +++nop ; mul24 r0, ra13, rb13 +++add r1, r1, r0 ; mul24 r0, ra12, rb12 +++add r1, r1, r0 ; mul24 r0, ra11, rb11 +++add r1, r1, r0 ; mul24 r0, ra10, rb10 +++add r1, r1, r0 ; mul24 r0, ra9, rb9 +++add r1, r1, r0 ; mul24 r0, ra8, rb8 +++add r1, r1, r0 ; mul24 r0, ra15, rb15 +++add r1, r1, r0 ; mov -, vw_wait +++sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++asr r1, r1, 14 +++add r1, r1, ra21 +++brr.anyn -, r:uvloop +++asr r1, r1, 6 # Delay 1 +++min r1, r1, rb22 # Delay 2 +++max vpm, r1, 0 # Delay 3 +++ +++# DMA out for U +++ +++mov vw_setup, rb26 # VDW setup 0 +++mov vw_setup, rb29 # Stride +++mov vw_addr, unif # start the VDW +++ +++# DMA out for V +++# We need to wait for the U to complete first, but have nothing useful to compute while we wait. +++# Could potentially push this write into the start of the next pipeline stage. +++mov r0, 16 +++mov -, vw_wait +++ +++bra -, ra31 +++add vw_setup, rb26, r0 # VDW setup 0 +++mov vw_setup, rb29 # Stride +++mov vw_addr, unif # start the VDW +++ +++################################################################################ +++ ++ ::mc_filter_uv_b ++ mov ra31, unif ++ ++-- ++2.7.4 ++ ++ ++From 85d0ffa2bcf6a2b94c1a0c8f84241cda9ac92ce2 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 14 May 2015 10:04:55 +0100 ++Subject: [PATCH 25/68] Switch to using 16bit temp buffers ++ ++--- ++ libavcodec/hevc.c | 2 +- ++ libavcodec/rpi_shader.c | 4 ++-- ++ libavcodec/rpi_shader.qasm | 10 +++++----- ++ 3 files changed, 8 insertions(+), 8 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 4b133d2..28a6660 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2147,7 +2147,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ uint32_t *u = s->u_mvs[chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++- u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv_b0; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 3 + start_x; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 3 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 88ad20b..ffd3a07 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -220,7 +220,7 @@ unsigned int rpi_shader[] = { ++ /* [0x00000600] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++ /* [0x00000608] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++ /* [0x00000610] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000618] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000618] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++ /* [0x00000620] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++ /* [0x00000628] */ 0x15827d80, 0x10020827, // mov r0, unif ++ /* [0x00000630] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++@@ -346,7 +346,7 @@ unsigned int rpi_shader[] = { ++ /* [0x000009e0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++ /* [0x000009e8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++ /* [0x000009f0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000009f8] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 +++/* [0x000009f8] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++ /* [0x00000a00] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++ /* [0x00000a08] */ 0x15827d80, 0x10020827, // mov r0, unif ++ /* [0x00000a10] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 635b894..9577121 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -26,7 +26,7 @@ ++ # ra23 8 ++ # ++ # rb20 0xffffff00 ++-# rb21 vpm_setup for writing 16bit results into VPM +++# rb21 vpm_setup for reading/writing 16bit results into VPM ++ # rb22 255 ++ # rb23 24 ++ # ++@@ -370,8 +370,8 @@ and rb_x_base_next, r0, ~3 ++ mov ra_y_next, r1 ++ add ra_x2_base_next, rb_x_base_next, r2 ++ ++-# set up VPM write ++-mov vw_setup, rb28 +++# set up VPM write, we need to save 16bit precision +++mov vw_setup, rb21 ++ ++ # get width,height of block ++ mov r2, 16 ++@@ -554,8 +554,8 @@ add r0, r0, r1 # Combine width and height of destination area ++ shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++ add rb26, r0, rb27 ++ ++-# In a B frame, so also set up VPM read ++-add vr_setup, r3, rb28 +++# In a B frame, so also set up VPM read (reading back 16bit precision) +++add vr_setup, r3, rb21 ++ ++ sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code ++ ++-- ++2.7.4 ++ ++ ++From abc51bf61df597082fbd7cf1bba5031e4d44318b Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 14 May 2015 10:30:44 +0100 ++Subject: [PATCH 26/68] Corrected B prediction: matching md5 sum for hobbit50 ++ ++--- ++ libavcodec/rpi_shader.c | 815 ++++++++++++++++++++++----------------------- ++ libavcodec/rpi_shader.h | 12 +- ++ libavcodec/rpi_shader.qasm | 36 +- ++ 3 files changed, 429 insertions(+), 434 deletions(-) ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index ffd3a07..77cca46 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -38,431 +38,428 @@ unsigned int rpi_shader[] = { ++ /* [0x00000068] */ 0x00000020, 0xe0020567, // mov ra21, 32 ++ /* [0x00000070] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++ /* [0x00000078] */ 0x00000008, 0xe00205e7, // mov ra23, 8 ++-/* [0x00000080] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++-/* [0x00000088] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 ++-/* [0x00000090] */ 0x00000018, 0xe00215e7, // mov rb23, 24 ++-/* [0x00000098] */ 0x00000000, 0xe0020227, // mov ra8, 0 ++-/* [0x000000a0] */ 0x00000000, 0xe0020267, // mov ra9, 0 ++-/* [0x000000a8] */ 0x00000000, 0xe00202a7, // mov ra10, 0 ++-/* [0x000000b0] */ 0x00000000, 0xe00202e7, // mov ra11, 0 ++-/* [0x000000b8] */ 0x00000000, 0xe0020327, // mov ra12, 0 ++-/* [0x000000c0] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++-/* [0x000000c8] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++-/* [0x000000d0] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x000000d8] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x000000e0] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 ++-/* [0x000000e8] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x000000f0] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x000000f8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000100] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000108] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000110] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000118] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000120] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++-/* [0x00000128] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++-/* [0x00000130] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00000138] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000140] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 ++-/* [0x00000148] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00000150] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000158] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000160] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000168] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000170] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000178] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000180] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x00000188] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x00000190] */ 0x0f9c11c0, 0xd0020827, // asr r0, r0, 1 ++-/* [0x00000198] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) ++-/* [0x000001a0] */ 0x0c9e7040, 0x10021567, // add rb21, r0, r1 ++-/* [0x000001a8] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base ++-/* [0x000001b0] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y ++-/* [0x000001b8] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base ++-/* [0x000001c0] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++-/* [0x000001c8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x000001d0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000001d8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000001e0] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 ++-/* [0x000001e8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x000001f0] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-/* [0x000001f8] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 ++-/* [0x00000200] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x00000208] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000080] */ 0x00000040, 0xe00207a7, // mov ra30, 64 +++/* [0x00000088] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x00000090] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x00000098] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x000000a0] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x000000a8] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x000000b0] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x000000b8] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x000000c0] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x000000c8] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x000000d0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x000000d8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x000000e0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x000000e8] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 +++/* [0x000000f0] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x000000f8] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000100] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000108] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000110] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000118] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000120] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000128] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00000130] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00000138] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00000140] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000148] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 +++/* [0x00000150] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00000158] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000160] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000168] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000170] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000178] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000180] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000188] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000190] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000198] */ 0x0f9c11c0, 0xd0020827, // asr r0, r0, 1 +++/* [0x000001a0] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) +++/* [0x000001a8] */ 0x0c9e7040, 0x10021567, // add rb21, r0, r1 +++/* [0x000001b0] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base +++/* [0x000001b8] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x000001c0] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base +++/* [0x000001c8] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset +++/* [0x000001d0] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x000001d8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000001e0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000001e8] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 +++/* [0x000001f0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x000001f8] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++/* [0x00000200] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 +++/* [0x00000208] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++ /* [0x00000210] */ 0x15827d80, 0x10020827, // mov r0, unif ++ /* [0x00000218] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000220] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000228] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000230] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000238] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000240] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000248] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base ++-/* [0x00000250] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++/* [0x00000220] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000228] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000230] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000238] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000240] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000248] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000250] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base +++/* [0x00000258] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base ++ // ::mc_filter_uv ++-/* [0x00000258] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000260] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000268] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000270] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000278] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000280] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000288] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000290] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000298] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000002a0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000002a8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000002b0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000002b8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000002c0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000002c8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000002d0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000002d8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002e0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x000002e8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x000002f0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000002f8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000300] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000308] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000310] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000318] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000320] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000328] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000330] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000340] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000348] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000350] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000358] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000360] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000368] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000370] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000378] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000380] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000388] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000390] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000398] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x000003a0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000003a8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000003b0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000260] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000268] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000270] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000278] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000280] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000288] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000290] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000298] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000002a0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000002a8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000002b0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000002b8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000002c0] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000002c8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002d0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000002d8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000002e0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000002e8] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x000002f0] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x000002f8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000300] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000308] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000310] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000318] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000320] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000328] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000330] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000340] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000348] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000350] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000358] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000360] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000368] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000370] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000378] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000380] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000388] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000390] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000398] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000003a0] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x000003a8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000003b0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003b8] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x000003b8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000003c0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000003c8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000003d0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003d8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003e0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003e8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003f0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003f8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000400] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000408] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000410] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000418] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000420] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000428] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000430] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000438] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000440] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000448] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000450] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000458] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000460] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000468] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000470] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000478] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000480] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000488] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000490] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000498] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x000004a0] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x000004a8] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x000004b0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x000004b8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x000004c0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x000004c8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000004d0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x000004d8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004e0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x000004e8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x000004f0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000004f8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x00000500] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x00000508] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000510] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000518] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000520] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000528] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000530] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000538] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000540] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000548] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000550] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x00000558] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000560] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x00000568] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000570] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000578] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000580] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000588] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000590] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000598] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000005a0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000005a8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x000005b0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000005b8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000003c0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000003c8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000003d0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000003d8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003e0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003e8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003f0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003f8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000400] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000408] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000410] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000418] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000420] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000428] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000430] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000438] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000440] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000448] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000450] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000458] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000460] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000468] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000470] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000478] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000480] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000488] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000490] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000498] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x000004a0] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x000004a8] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x000004b0] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x000004b8] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x000004c0] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x000004c8] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x000004d0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000004d8] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x000004e0] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000004e8] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x000004f0] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x000004f8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000500] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x00000508] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x00000510] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000518] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000520] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000528] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000530] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000538] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000540] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000548] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000550] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000558] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x00000560] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000568] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x00000570] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000578] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000580] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000588] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000590] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000598] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000005a0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000005a8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000005b0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000005b8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000005c0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x000005c0] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000005c8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000005d0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000005d8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000005e0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000005e8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000005f0] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000005f8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000600] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000608] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000610] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000618] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x00000620] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000628] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000630] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000638] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000640] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000648] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000650] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000658] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000660] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000668] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000670] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000678] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000680] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000688] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000690] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000698] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006a0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000006a8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006b0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006b8] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006c0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000006c8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006d0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006d8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006e0] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000006e8] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006f0] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006f8] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000700] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000708] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000710] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000718] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000005c8] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000005d0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000005d8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000005e0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000005e8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000005f0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000005f8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000600] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000608] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000610] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000618] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000620] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x00000628] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000630] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000638] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000640] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000648] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000650] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000658] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000660] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000668] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000670] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000678] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000680] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000688] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000690] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000698] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006a0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006a8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000006b0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006b8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006c0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006c8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000006d0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006d8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006e0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006e8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000006f0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006f8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000700] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000708] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000710] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000718] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000720] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000720] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000728] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000730] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000738] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000740] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000748] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000750] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000758] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000760] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000768] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000770] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000778] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000780] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000788] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000790] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000798] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000007a0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000007a8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000007b0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000007b8] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x000007c0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x000007c8] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x000007d0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x000007d8] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x000007e0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x000007e8] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x000007f0] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x000007f8] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000800] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000808] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000810] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000818] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000820] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000828] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000830] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000838] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000840] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000848] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x00000850] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x00000858] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000860] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x00000868] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x00000870] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000878] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000880] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000888] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000890] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000898] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x000008a0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000008a8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000008b0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000008b8] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x000008c0] */ 0xfffffad8, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000008c8] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x000008d0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000008d8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000008e0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000008e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000008f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000008f8] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000900] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000908] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000910] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000918] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000920] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000728] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000730] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000738] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000740] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000748] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000750] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000758] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000760] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000768] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000770] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000778] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000780] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000788] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000790] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000798] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000007a0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000007a8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000007b0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000007b8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000007c0] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x000007c8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x000007d0] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x000007d8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x000007e0] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x000007e8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x000007f0] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x000007f8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000800] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000808] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000810] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000818] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000820] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000828] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000830] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000838] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000840] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000848] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000850] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x00000858] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x00000860] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000868] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x00000870] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x00000878] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000880] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000888] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000890] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000898] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x000008a0] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x000008a8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000008b0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000008b8] */ 0xfffffe50, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x000008c0] */ 0x0f9ce3c0, 0xd0020c27, // asr vpm, r1, 14 +++/* [0x000008c8] */ 0x009e7000, 0x100009e7, // nop +++/* [0x000008d0] */ 0x009e7000, 0x100009e7, // nop +++/* [0x000008d8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000008e0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000008e8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008f0] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000008f8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000900] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000908] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000910] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000918] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b ++-/* [0x00000928] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000930] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000938] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000940] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000948] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000950] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000958] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000960] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000968] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000970] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000978] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000980] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000988] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000990] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000998] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000009a0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000009a8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000009b0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x000009b8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x000009c0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000009c8] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x000009d0] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x000009d8] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x000009e0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000009e8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000009f0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000009f8] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000a00] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000a08] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000a10] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a18] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a20] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a28] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000a30] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a38] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a40] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a48] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000a50] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a58] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a60] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a68] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000a70] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a78] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a80] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a88] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000a90] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000a98] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000aa0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000920] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000928] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000930] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000938] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000940] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000948] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000950] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000958] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000960] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000968] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000970] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000978] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000980] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000988] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000990] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000998] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000009a0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000009a8] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x000009b0] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x000009b8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000009c0] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x000009c8] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x000009d0] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x000009d8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000009e0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000009e8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000009f0] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x000009f8] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000a00] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000a08] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a10] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a18] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a20] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000a28] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a30] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a38] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a40] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000a48] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a50] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a58] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a60] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000a68] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a70] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a78] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a80] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000a88] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000a90] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000a98] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x00000aa8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000ab0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000ab8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000ac0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000ac8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000ad0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000ad8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000ae0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000ae8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000af0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000af8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000b00] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000b08] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000b10] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000b18] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000b20] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000b28] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000b30] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000b38] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000b40] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000b48] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000b50] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000b58] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000b60] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000b68] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000b70] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000b78] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000b80] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000b88] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000b90] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000b98] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000ba0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000ba8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000bb0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000bb8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000bc0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000bc8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000bd0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x00000bd8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x00000be0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000be8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x00000bf0] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x00000bf8] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000c00] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000c08] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000c10] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000c18] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000c20] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000c28] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000c30] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000c38] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000c40] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x00000c48] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x00000c50] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000c58] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 ++-/* [0x00000c60] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000c68] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x00000c70] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x00000c78] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 ++-/* [0x00000c80] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000c88] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000c90] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000c98] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000ca0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000ca8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000cb0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000cb8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000cc0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000aa0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000aa8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000ab0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000ab8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000ac0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000ac8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000ad0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000ad8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000ae0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000ae8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000af0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000af8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000b00] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000b08] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000b10] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000b18] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000b20] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000b28] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000b30] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000b38] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000b40] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000b48] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000b50] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000b58] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000b60] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000b68] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000b70] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000b78] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000b80] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000b88] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000b90] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000b98] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000ba0] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000ba8] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000bb0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000bb8] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000bc0] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000bc8] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x00000bd0] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x00000bd8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000be0] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x00000be8] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x00000bf0] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000bf8] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000c00] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000c08] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000c10] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000c18] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000c20] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000c28] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000c30] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000c38] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x00000c40] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000c48] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000c50] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000c58] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000c60] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000c68] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000c70] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000c78] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000c80] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000c88] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000c90] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000c98] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000ca0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000ca8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000cc8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000cd0] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000cb0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000cb8] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000cc0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000cc8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000cd0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000cd8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ce0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ce8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000cf0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000cf8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000d00] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000d08] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000ce0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000ce8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000cf0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000d10] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000cf8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000d00] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000d08] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000d10] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000d18] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000d20] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000d28] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000d30] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000d20] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d28] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d30] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000d38] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000d40] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000d48] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000d50] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d58] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d60] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d68] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d70] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000d78] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000d80] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000d58] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000d60] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000d68] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 809e582..6562fa9 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -4,11 +4,11 @@ ++ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++-#define mc_filter_uv (rpi_shader + 150) ++-#define mc_filter_uv_b0 (rpi_shader + 368) ++-#define mc_filter_uv_b (rpi_shader + 586) ++-#define mc_exit (rpi_shader + 818) ++-#define mc_interrupt_exit8 (rpi_shader + 836) ++-#define mc_end (rpi_shader + 866) +++#define mc_filter_uv (rpi_shader + 152) +++#define mc_filter_uv_b0 (rpi_shader + 370) +++#define mc_filter_uv_b (rpi_shader + 584) +++#define mc_exit (rpi_shader + 812) +++#define mc_interrupt_exit8 (rpi_shader + 830) +++#define mc_end (rpi_shader + 860) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 9577121..562dc35 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -39,13 +39,13 @@ ++ # rb30 frame height-1 ++ # rb31 used as temp to count loop iterations ++ # ++-# ra24...ra30 15, 14, 13, 12, 11, 10, 9 ++ # ra24 clipped(row start address+8+elem_num)&~3 ++ # ra25 per-channel shifts 2 ++ # ra26 next ra24 ++ # ra27 next ra25 ++ # ra28 next y ++ # ra29 y for next texture access +++# ra30 64 ++ # ++ # ra31 next kernel address ++ ++@@ -102,6 +102,7 @@ mov ra20, 1 ++ mov ra21, 32 ++ mov ra22, 256 ++ mov ra23, 8 +++mov ra30, 64 ++ ++ mov rb20, 0xffffff00 ++ mov rb22, 255 ++@@ -472,7 +473,7 @@ sub.setf -, r3, 8 ; mov r1, ra22 ++ # apply horizontal filter ++ brr.anyn -, r:uvloop_b0 ++ mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll ++-asr ra15, r0, 8 ; nop +++asr ra15, r0, 8 ; nop # TODO isn't ra15 already in 24bit precision, may not need the sign extension here? ++ nop ; nop # Delay slot 3 (TODO move more of the context scroll into here) ++ ++ # apply vertical filter and write to VPM ++@@ -487,18 +488,18 @@ add r1, r1, r0 ; mul24 r0, ra8, rb8 ++ add r1, r1, r0 ; mul24 r0, ra15, rb15 ++ add r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-asr r1, r1, 14 ++-add r1, r1, ra21 ++-brr.anyn -, r:uvloop ++-asr r1, r1, 6 # Delay 1 ++-min r1, r1, rb22 # Delay 2 ++-max vpm, r1, 0 # Delay 3 +++#asr r1, r1, 14 +++#add r1, r1, ra21 +++brr.anyn -, r:uvloop_b0 +++asr vpm, r1, 14 # Delay 1 shifts down by shift2=6, but results are still in 16bit precision TODO may be able to avoid the mul24 and use more delay slots +++nop # Delay 2 +++nop # Delay 3 ++ ++ # DMA out for U ++ ++ mov vw_setup, rb26 # VDW setup 0 ++ mov vw_setup, rb29 # Stride ++-mov vw_addr, unif # start the VDW +++mov vw_addr, unif # start the VDW # TODO in pass0 we don't need to save any results ++ ++ # DMA out for V ++ # We need to wait for the U to complete first, but have nothing useful to compute while we wait. ++@@ -639,12 +640,11 @@ mov ra12, ra13 ++ mov ra13, ra14 ++ ++ sub.setf -, r3, 8 ; mov r1, ra22 ++- ++ # apply horizontal filter ++ brr.anyn -, r:uvloop_b ++ mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++ asr ra15, r0, 8 ; nop ++-nop ; nop +++nop ; nop # TODO improve use of delay slots ++ ++ # apply vertical filter and write to VPM ++ ++@@ -658,15 +658,13 @@ add r1, r1, r0 ; mul24 r0, ra8, rb8 ++ add r1, r1, r0 ; mul24 r0, ra15, rb15 ++ add r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-asr r1, r1, 14 ++-add r1, r1, ra21 ++-asr r1, r1, 6 ++-min r1, r1, rb22 ++-add r0, vpm, 1 # Blend in previous VPM contents at this location +++asr r1, r1, 14 # shift2=6 +++add r1, r1, vpm # Blend in previous VPM contents at this location +++add r1, r1, ra30 ++ brr.anyn -, r:uvloop_b ++-max r1, r1, 0 ++-add r1, r1, r0 ++-shr vpm, r1, 1 +++asr r1, r1, 7 # Delay 1 +++min r1, r1, rb22 # Delay 2 +++max vpm, r1, 0 # Delay 3 ++ ++ ++ # DMA out for U ++-- ++2.7.4 ++ ++ ++From ea60373134f98099c4ebaf0d23cca666008b4bba Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 14 May 2015 10:55:07 +0100 ++Subject: [PATCH 27/68] P prediction uses 4 tap filters ++ ++--- ++ libavcodec/hevc.c | 50 ++-- ++ libavcodec/rpi_shader.c | 631 ++++++++++++++++++++++----------------------- ++ libavcodec/rpi_shader.h | 10 +- ++ libavcodec/rpi_shader.qasm | 43 +-- ++ 4 files changed, 344 insertions(+), 390 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 28a6660..a47ebc5 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -65,15 +65,15 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ #define ENCODE_COEFFS(c0, c1, c2, c3) (((c0) & 0xff) | ((c1) & 0xff) << 8 | ((c2) & 0xff) << 16 | ((c3) & 0xff) << 24) ++ ++ // TODO Chroma only needs 4 taps ++-static uint32_t rpi_filter_coefs[8][2] = { ++- { ENCODE_COEFFS( 0, 0, 0, 64), ENCODE_COEFFS( 0, 0, 0, 0 ) }, ++- { ENCODE_COEFFS( 0, 0, -2, 58), ENCODE_COEFFS( 10, -2, 0, 0 ) }, ++- { ENCODE_COEFFS( 0, 0, -4, 54), ENCODE_COEFFS( 16, -2, 0, 0 ) }, ++- { ENCODE_COEFFS( 0, 0, -6, 46), ENCODE_COEFFS( 28, -4, 0, 0 ) }, ++- { ENCODE_COEFFS( 0, 0, -4, 36), ENCODE_COEFFS( 36, -4, 0, 0 ) }, ++- { ENCODE_COEFFS( 0, 0, -4, 28), ENCODE_COEFFS( 46, -6, 0, 0 ) }, ++- { ENCODE_COEFFS( 0, 0, -2, 16), ENCODE_COEFFS( 54, -4, 0, 0 ) }, ++- { ENCODE_COEFFS( 0, 0, -2, 10), ENCODE_COEFFS( 58, -2, 0, 0 ) } +++static uint32_t rpi_filter_coefs[8][1] = { +++ { ENCODE_COEFFS( 0, 64, 0, 0) }, +++ { ENCODE_COEFFS( -2, 58, 10, -2) }, +++ { ENCODE_COEFFS( -4, 54, 16, -2) }, +++ { ENCODE_COEFFS( -6, 46, 28, -4) }, +++ { ENCODE_COEFFS( -4, 36, 36, -4) }, +++ { ENCODE_COEFFS( -4, 28, 46, -6) }, +++ { ENCODE_COEFFS( -2, 16, 54, -4) }, +++ { ENCODE_COEFFS( -2, 10, 58, -2) } ++ }; ++ ++ static uint32_t get_vc_address(AVBufferRef *bref) { ++@@ -2027,16 +2027,16 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 3 + start_x; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 3 + start_y; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); ++ *u++ = ( (nPbW_csh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx][0]; ++- *u++ = rpi_filter_coefs[_mx][1]; +++ u++; ++ *u++ = rpi_filter_coefs[_my][0]; ++- *u++ = rpi_filter_coefs[_my][1]; +++ u++; ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++@@ -2084,16 +2084,16 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 3 + start_x; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 3 + start_y; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); ++ *u++ = ( (nPbW_csh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx][0]; ++- *u++ = rpi_filter_coefs[_mx][1]; +++ u++; ++ *u++ = rpi_filter_coefs[_my][0]; ++- *u++ = rpi_filter_coefs[_my][1]; +++ u++; ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++@@ -2148,29 +2148,29 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv_b0; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 3 + start_x; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 3 + start_y; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); ++ *u++ = ( (nPbW_cframe->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); // TODO this will become unused once we have a dedicated pass0 filter ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv_b; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = x2_c - 3 + start_x; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = y2_c - 3 + start_y; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = x2_c - 1 + start_x; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = y2_c - 1 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); ++ *u++ = ( (nPbW_csh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx2][0]; ++- *u++ = rpi_filter_coefs[_mx2][1]; +++ u++; ++ *u++ = rpi_filter_coefs[_my2][0]; ++- *u++ = rpi_filter_coefs[_my2][1]; +++ u++; ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 77cca46..c8d0728 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -116,8 +116,8 @@ unsigned int rpi_shader[] = { ++ /* [0x000002d0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++ /* [0x000002d8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++ /* [0x000002e0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002e8] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x000002f0] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x000002e8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000002f0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++ /* [0x000002f8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++ /* [0x00000300] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++ /* [0x00000308] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++@@ -128,338 +128,315 @@ unsigned int rpi_shader[] = { ++ /* [0x00000330] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++ /* [0x00000338] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++ /* [0x00000340] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000348] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000350] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000358] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000360] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000368] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000370] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000378] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000380] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000388] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000390] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000398] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000003a0] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x000003a8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000003b0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000003b8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000348] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000350] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000358] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000360] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000368] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000370] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000378] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000380] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x000003c0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000003c8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000003d0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000003d8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003e0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003e8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003f0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003f8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000400] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000408] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000410] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000418] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000420] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000428] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000430] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000438] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000440] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000448] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000450] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000458] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000460] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000468] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000470] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000478] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000480] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000488] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000490] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000498] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x000004a0] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x000004a8] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x000004b0] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x000004b8] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x000004c0] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x000004c8] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x000004d0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000004d8] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x000004e0] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004e8] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x000004f0] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x000004f8] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000500] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x00000508] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x00000510] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000518] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000520] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000528] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000530] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000538] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000540] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000548] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000550] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000558] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x00000560] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000568] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x00000570] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000578] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000580] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000588] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000590] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000598] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000005a0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000005a8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000005b0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x000005b8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000005c0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000388] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000390] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000398] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000003a0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003a8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003b0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003b8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003c0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003c8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000003d0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000003d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003e0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000003e8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000003f0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000003f8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000400] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000408] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000410] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000418] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000420] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000428] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000430] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000438] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000440] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 +++/* [0x00000448] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000450] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x00000458] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x00000460] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000468] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000470] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000478] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000480] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000488] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000490] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000498] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000004a0] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x000004a8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000004b0] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x000004b8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000004c0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000004c8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000004d0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004d8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000004e0] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000004e8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000004f0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000004f8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000500] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000508] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x000005c8] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000005d0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000005d8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000005e0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000005e8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000005f0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000005f8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000600] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000608] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000610] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000618] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000620] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x00000628] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000630] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000638] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000640] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000648] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000650] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000658] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000660] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000668] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000670] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000678] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000680] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000688] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000690] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000698] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006a0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006a8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000006b0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006b8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006c0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006c8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000006d0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006d8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006e0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006e8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000006f0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006f8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000700] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000708] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000710] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000718] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000720] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000510] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000518] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000520] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000528] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000530] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000538] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000540] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000548] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000550] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000558] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000560] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000568] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x00000570] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000578] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000580] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000588] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000590] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000598] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x000005a0] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x000005a8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000005b0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000005b8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000005c0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000005c8] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x000005d0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005d8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005e0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005e8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005f0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000005f8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000600] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000608] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000610] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000618] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000620] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000628] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000630] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000638] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000640] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000648] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000650] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000658] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000660] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000668] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000728] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000730] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000738] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000740] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000748] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000750] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000758] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000760] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000768] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000770] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000778] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000780] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000788] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000790] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000798] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000007a0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000007a8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000007b0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000007b8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000007c0] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x000007c8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x000007d0] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x000007d8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x000007e0] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x000007e8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x000007f0] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x000007f8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000800] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000808] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000810] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000818] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000820] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000828] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000830] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000838] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000840] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000848] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000850] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x00000858] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x00000860] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000868] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x00000870] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x00000878] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000880] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000888] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000890] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000898] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x000008a0] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x000008a8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000008b0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000008b8] */ 0xfffffe50, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x000008c0] */ 0x0f9ce3c0, 0xd0020c27, // asr vpm, r1, 14 ++-/* [0x000008c8] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x000008d0] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x000008d8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000008e0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000008e8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000008f0] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000008f8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000900] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000908] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000910] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000918] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000670] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000678] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000680] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000688] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000690] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000698] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000006a0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000006a8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000006b0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000006b8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000006c0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000006c8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000006d0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000006d8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000006e0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000006e8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000006f0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000006f8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000700] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000708] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000710] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000718] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000720] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000728] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000730] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000738] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000740] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000748] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000750] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000758] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000760] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000768] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000770] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000778] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000780] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000788] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000790] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000798] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x000007a0] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x000007a8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000007b0] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x000007b8] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x000007c0] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x000007c8] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x000007d0] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x000007d8] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x000007e0] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x000007e8] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x000007f0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000007f8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000800] */ 0xfffffe50, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000808] */ 0x0f9ce3c0, 0xd0020c27, // asr vpm, r1, 14 +++/* [0x00000810] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000818] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000820] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000828] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000830] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000838] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000840] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000848] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000850] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000858] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000860] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b ++-/* [0x00000920] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000928] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000930] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000938] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000940] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000948] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000950] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000958] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000960] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000968] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000970] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000978] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000980] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000988] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000990] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000998] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000009a0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000009a8] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x000009b0] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x000009b8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000009c0] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x000009c8] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x000009d0] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x000009d8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000009e0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000009e8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000009f0] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x000009f8] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000a00] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000a08] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a10] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a18] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a20] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000a28] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a30] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a38] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a40] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000a48] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a50] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a58] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a60] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000a68] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a70] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a78] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a80] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000a88] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000a90] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000a98] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000868] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000870] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000878] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000880] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000888] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000890] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000898] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000008a0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000008a8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000008b0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000008b8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000008c0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000008c8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000008d0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000008d8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000008e0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000008e8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000008f0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x000008f8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000900] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000908] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000910] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000918] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000920] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000928] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000930] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000938] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000940] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000948] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000950] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000958] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000960] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000968] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000970] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000978] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000980] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000988] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000990] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000998] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000009a0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000009a8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000009b0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000009b8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000009c0] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000009c8] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x000009d0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000009d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000009e0] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x00000aa0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000aa8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000ab0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000ab8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000ac0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000ac8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000ad0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000ad8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000ae0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000ae8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000af0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000af8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000b00] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000b08] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000b10] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000b18] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000b20] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000b28] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000b30] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000b38] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000b40] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000b48] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000b50] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000b58] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000b60] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000b68] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000b70] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000b78] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000b80] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000b88] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000b90] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000b98] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000ba0] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000ba8] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000bb0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000bb8] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000bc0] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000bc8] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x00000bd0] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x00000bd8] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000be0] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x00000be8] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x00000bf0] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000bf8] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000c00] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000c08] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000c10] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000c18] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000c20] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000c28] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000c30] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000c38] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x00000c40] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x00000c48] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000c50] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000c58] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000c60] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000c68] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000c70] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000c78] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000c80] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000c88] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000c90] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000c98] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000ca0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000ca8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000009e8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000009f0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000009f8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000a00] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000a08] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000a10] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000a18] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000a20] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000a28] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000a30] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000a38] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000a40] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000a48] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000a50] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000a58] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000a60] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000a68] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000a70] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000a78] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000a80] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000a88] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000a90] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000a98] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000aa0] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000aa8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000ab0] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000ab8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000ac0] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000ac8] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000ad0] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000ad8] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000ae0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000ae8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000af0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000af8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000b00] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000b08] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000b10] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x00000b18] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x00000b20] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000b28] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x00000b30] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x00000b38] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000b40] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000b48] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000b50] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000b58] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000b60] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000b68] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000b70] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000b78] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000b80] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x00000b88] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000b90] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000b98] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000ba0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000ba8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000bb0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000bb8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000bc0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000bc8] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000bd0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000bd8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000be0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000be8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000bf0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000cb0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000cb8] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000cc0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000cc8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000cd0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000cd8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ce0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000ce8] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000cf0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000bf8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000c00] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000c08] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000c10] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000c18] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000c20] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000c28] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000c30] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000c38] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000cf8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000d00] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000d08] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000d10] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000d18] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000d20] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d28] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d30] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d38] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d40] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d48] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d50] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d58] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000d60] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000d68] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000c40] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000c48] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000c50] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000c58] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000c60] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000c68] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000c70] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000c78] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000c80] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000c88] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000c90] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000c98] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ca0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000ca8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000cb0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 6562fa9..1bf7a68 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -5,10 +5,10 @@ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++ #define mc_filter_uv (rpi_shader + 152) ++-#define mc_filter_uv_b0 (rpi_shader + 370) ++-#define mc_filter_uv_b (rpi_shader + 584) ++-#define mc_exit (rpi_shader + 812) ++-#define mc_interrupt_exit8 (rpi_shader + 830) ++-#define mc_end (rpi_shader + 860) +++#define mc_filter_uv_b0 (rpi_shader + 324) +++#define mc_filter_uv_b (rpi_shader + 538) +++#define mc_exit (rpi_shader + 766) +++#define mc_interrupt_exit8 (rpi_shader + 784) +++#define mc_end (rpi_shader + 814) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 562dc35..8e4f18f 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -16,8 +16,8 @@ ++ # ra19 next ra17 ++ # ++ # rb16 pitch ++-# rb17 height + 5 ++-# rb18 height + 7 +++# rb17 height + 1 +++# rb18 height + 3 ++ # rb19 next ra16 ++ # ++ # ra20 1 ++@@ -214,8 +214,8 @@ mov r0, unif ++ shr r1, r0, r2 # Extract width ++ sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) ++ and r0, r0, rb22 # Extract height ++-add rb17, r0, 5 ++-add rb18, r0, 7 +++add rb17, r0, 1 +++add rb18, r0, 3 ++ shl r0, r0, 7 ++ add r0, r0, r1 # Combine width and height of destination area ++ shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++@@ -230,18 +230,11 @@ asr ra3, r0, rb23; mul24 r0, r0, ra22 ++ asr ra2, r0, rb23; mul24 r0, r0, ra22 ++ asr ra1, r0, rb23; mul24 r0, r0, ra22 ++ asr ra0, r0, rb23; mov r0, unif ++-asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-asr ra4, r0, rb23; mov r0, unif +++ mov r0, unif ++ asr rb11, r0, rb23; mul24 r0, r0, ra22 ++ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++ asr rb8, r0, rb23; mov r0, unif ++-asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-asr rb12, r0, rb23 ++ ++ # r2 is elem_num ++ # r3 is loop counter ++@@ -283,26 +276,14 @@ add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++ add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++ add r0, r2, r3 ++ ++ mov r3, rb31 ++ ++-mov ra8, ra9 ++-mov ra9, ra10 ++-mov ra10, ra11 ++-mov ra11, ra12 ++ mov ra12, ra13 ++ mov ra13, ra14 ++ ++-sub.setf -, r3, 8 ; mov r1, ra22 +++sub.setf -, r3, 4 ; mov r1, ra22 ++ ++ # apply horizontal filter ++ brr.anyn -, r:uvloop ++@@ -312,14 +293,10 @@ nop ; nop # Delay slot 3 (TODO move more of the context scr ++ ++ # apply vertical filter and write to VPM ++ ++-nop ; mul24 r1, ra14, rb14 ++-nop ; mul24 r0, ra13, rb13 ++-add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-add r1, r1, r0 ; mul24 r0, ra15, rb15 +++nop ; mul24 r1, ra14, rb10 +++nop ; mul24 r0, ra13, rb9 +++add r1, r1, r0 ; mul24 r0, ra12, rb8 +++add r1, r1, r0 ; mul24 r0, ra15, rb11 ++ add r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++ asr r1, r1, 14 ++-- ++2.7.4 ++ ++ ++From e4bdd110d4640519b751ab428e7976a1e9a15802 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 14 May 2015 11:03:51 +0100 ++Subject: [PATCH 28/68] Optimised B0 pass ++ ++--- ++ libavcodec/rpi_shader.c | 424 +++++++++++++++++++++------------------------ ++ libavcodec/rpi_shader.h | 8 +- ++ libavcodec/rpi_shader.qasm | 43 +---- ++ 3 files changed, 212 insertions(+), 263 deletions(-) ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index c8d0728..1f63ee0 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -204,239 +204,215 @@ unsigned int rpi_shader[] = { ++ /* [0x00000580] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++ /* [0x00000588] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++ /* [0x00000590] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000598] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x000005a0] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000598] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000005a0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++ /* [0x000005a8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++ /* [0x000005b0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++ /* [0x000005b8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++ /* [0x000005c0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000005c8] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x000005d0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005d8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005e0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005e8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005f0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000005f8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000600] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000608] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000610] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000618] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000620] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000628] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000630] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000638] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000640] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000648] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000650] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000658] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000660] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000668] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000005c8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005d0] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005d8] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005e0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005e8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000005f0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005f8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000600] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000608] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000610] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000618] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000620] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000628] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000670] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000678] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000680] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000688] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000690] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000698] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000006a0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000006a8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000006b0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000006b8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000006c0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000006c8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000006d0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000006d8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000006e0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000006e8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000006f0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000006f8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000700] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000708] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000710] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000718] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000720] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000728] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000730] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000738] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000740] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000748] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000750] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000758] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000760] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000768] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000770] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000778] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000780] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000788] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000790] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000798] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x000007a0] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x000007a8] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000007b0] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x000007b8] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x000007c0] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x000007c8] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x000007d0] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x000007d8] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x000007e0] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x000007e8] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x000007f0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000007f8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000800] */ 0xfffffe50, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000808] */ 0x0f9ce3c0, 0xd0020c27, // asr vpm, r1, 14 ++-/* [0x00000810] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000818] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000820] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000828] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000830] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000838] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000840] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000848] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000850] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000858] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000860] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000630] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000638] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000640] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000648] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000650] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000658] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000660] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000668] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000670] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000678] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000680] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000688] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000690] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000698] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000006a0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000006a8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000006b0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000006b8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000006c0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000006c8] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x000006d0] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x000006d8] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x000006e0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000006e8] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 +++/* [0x000006f0] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x000006f8] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x00000700] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x00000708] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000710] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000718] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000720] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000728] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000730] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000738] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000740] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000748] */ 0x0f9ce3c0, 0xd0020c27, // asr vpm, r1, 14 +++/* [0x00000750] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000758] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000760] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000768] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000770] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000778] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000780] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000788] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000790] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000798] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000007a0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b ++-/* [0x00000868] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000870] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000878] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000880] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000888] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000890] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000898] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000008a0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000008a8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000008b0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000008b8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000008c0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000008c8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000008d0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000008d8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000008e0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000008e8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000008f0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x000008f8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000900] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000908] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000910] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000918] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000920] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000928] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000930] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000938] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000940] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000948] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000950] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000958] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000960] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000968] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000970] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000978] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000980] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000988] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000990] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000998] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000009a0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000009a8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000009b0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000009b8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000009c0] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000009c8] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x000009d0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000009d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000009e0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000007a8] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000007b0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000007b8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000007c0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000007c8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000007d0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000007d8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000007e0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000007e8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000007f0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000007f8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000800] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000808] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000810] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000818] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000820] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000828] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000830] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000838] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000840] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000848] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000850] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000858] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000860] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000868] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000870] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000878] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000880] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000888] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000890] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000898] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000008b0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008b8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008c0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008c8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000008d0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008d8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008e0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008e8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000008f0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008f8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000900] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000908] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000910] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000918] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000920] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x000009e8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000009f0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000009f8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000a00] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000a08] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000a10] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000a18] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000a20] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000a28] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000a30] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000a38] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000a40] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000a48] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000a50] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000a58] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000a60] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000a68] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000a70] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000a78] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000a80] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000a88] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000a90] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000a98] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000aa0] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000aa8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000ab0] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000ab8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000ac0] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000ac8] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000ad0] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000ad8] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000ae0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000ae8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000af0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000af8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000b00] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000b08] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000b10] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x00000b18] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x00000b20] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000b28] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x00000b30] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x00000b38] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000b40] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000b48] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000b50] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000b58] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000b60] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000b68] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000b70] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000b78] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000b80] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x00000b88] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x00000b90] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000b98] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000ba0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000ba8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000bb0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000bb8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000bc0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000bc8] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000bd0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000bd8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000be0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000be8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000bf0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000928] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000930] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000938] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000940] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000948] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000950] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000958] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000960] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000968] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000970] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000978] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000980] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000988] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000990] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000998] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000009a0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000009a8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000009b0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000009b8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000009c0] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x000009c8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x000009d0] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x000009d8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x000009e0] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x000009e8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x000009f0] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x000009f8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000a00] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000a08] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000a10] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000a18] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000a20] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000a28] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000a30] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000a38] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000a40] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000a48] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000a50] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x00000a58] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x00000a60] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a68] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x00000a70] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x00000a78] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000a80] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000a88] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000a90] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000a98] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000aa0] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000aa8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000ab0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000ab8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000ac0] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x00000ac8] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000ad0] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000ad8] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000ae0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000ae8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000af0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000af8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000b00] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000b08] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000b10] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000b18] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000b20] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000b28] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000b30] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000bf8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000c00] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000c08] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000c10] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000c18] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000c20] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000c28] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000c30] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000c38] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000b38] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000b40] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000b48] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000b50] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000b58] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000b60] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000b68] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000b70] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000b78] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000c40] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000c48] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000c50] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000c58] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000c60] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000c68] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000c70] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000c78] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000c80] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000c88] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000c90] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000c98] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ca0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000ca8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000cb0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000b80] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000b88] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000b90] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000b98] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ba0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ba8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000bb0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000bb8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000bc0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000bc8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000bd0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000bd8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000be0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000be8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000bf0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 1bf7a68..cb74887 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -6,9 +6,9 @@ extern unsigned int rpi_shader[]; ++ #define mc_setup_uv (rpi_shader + 0) ++ #define mc_filter_uv (rpi_shader + 152) ++ #define mc_filter_uv_b0 (rpi_shader + 324) ++-#define mc_filter_uv_b (rpi_shader + 538) ++-#define mc_exit (rpi_shader + 766) ++-#define mc_interrupt_exit8 (rpi_shader + 784) ++-#define mc_end (rpi_shader + 814) +++#define mc_filter_uv_b (rpi_shader + 490) +++#define mc_exit (rpi_shader + 718) +++#define mc_interrupt_exit8 (rpi_shader + 736) +++#define mc_end (rpi_shader + 766) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 8e4f18f..faa5755 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -357,15 +357,13 @@ mov r0, unif ++ shr r1, r0, r2 # Extract width ++ sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) ++ and r0, r0, rb22 # Extract height ++-add rb17, r0, 5 ++-add rb18, r0, 7 +++add rb17, r0, 1 +++add rb18, r0, 3 ++ shl r0, r0, 7 ++ add r0, r0, r1 # Combine width and height of destination area ++ shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++ add rb26, r0, rb27 ++ ++-sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code ++- ++ # get filter coefficients ++ ++ mov r0, unif ++@@ -373,18 +371,11 @@ asr ra3, r0, rb23; mul24 r0, r0, ra22 ++ asr ra2, r0, rb23; mul24 r0, r0, ra22 ++ asr ra1, r0, rb23; mul24 r0, r0, ra22 ++ asr ra0, r0, rb23; mov r0, unif ++-asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-asr ra4, r0, rb23; mov r0, unif +++ mov r0, unif ++ asr rb11, r0, rb23; mul24 r0, r0, ra22 ++ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++ asr rb8, r0, rb23; mov r0, unif ++-asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-asr rb12, r0, rb23 ++ ++ # r2 is elem_num ++ # r3 is loop counter ++@@ -426,26 +417,14 @@ add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++ add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++ add r0, r2, r3 ++ ++ mov r3, rb31 ++ ++-mov ra8, ra9 ++-mov ra9, ra10 ++-mov ra10, ra11 ++-mov ra11, ra12 ++ mov ra12, ra13 ++ mov ra13, ra14 ++ ++-sub.setf -, r3, 8 ; mov r1, ra22 +++sub.setf -, r3, 4 ; mov r1, ra22 ++ ++ # apply horizontal filter ++ brr.anyn -, r:uvloop_b0 ++@@ -455,18 +434,12 @@ nop ; nop # Delay slot 3 (TODO move more of the context scr ++ ++ # apply vertical filter and write to VPM ++ ++-nop ; mul24 r1, ra14, rb14 ++-nop ; mul24 r0, ra13, rb13 ++-add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-add r1, r1, r0 ; mul24 r0, ra15, rb15 +++nop ; mul24 r1, ra14, rb10 +++nop ; mul24 r0, ra13, rb9 +++add r1, r1, r0 ; mul24 r0, ra12, rb8 +++add r1, r1, r0 ; mul24 r0, ra15, rb11 ++ add r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-#asr r1, r1, 14 ++-#add r1, r1, ra21 ++ brr.anyn -, r:uvloop_b0 ++ asr vpm, r1, 14 # Delay 1 shifts down by shift2=6, but results are still in 16bit precision TODO may be able to avoid the mul24 and use more delay slots ++ nop # Delay 2 ++-- ++2.7.4 ++ ++ ++From 93805e78a13d36e28ed84a0e8456da2eac45be89 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 14 May 2015 11:12:43 +0100 ++Subject: [PATCH 29/68] Optimised B pass ++ ++--- ++ libavcodec/rpi_shader.c | 202 ++++++++++++++++++++------------------------- ++ libavcodec/rpi_shader.h | 6 +- ++ libavcodec/rpi_shader.qasm | 41 ++------- ++ 3 files changed, 100 insertions(+), 149 deletions(-) ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 1f63ee0..4e6c5ea 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -289,8 +289,8 @@ unsigned int rpi_shader[] = { ++ /* [0x00000818] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++ /* [0x00000820] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++ /* [0x00000828] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000830] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000838] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000830] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000838] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++ /* [0x00000840] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++ /* [0x00000848] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++ /* [0x00000850] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++@@ -299,120 +299,96 @@ unsigned int rpi_shader[] = { ++ /* [0x00000868] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++ /* [0x00000870] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++ /* [0x00000878] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000880] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000888] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000890] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000898] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000008b0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008b8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008c0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008c8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000008d0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008d8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008e0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008e8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000008f0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008f8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000900] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000908] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000910] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000918] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000920] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000880] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000888] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000890] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000898] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000008a8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000008b0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008b8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008c0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008c8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000008d0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000008d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008e0] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x00000928] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000930] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000938] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000940] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000948] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000950] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000958] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000960] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000968] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000970] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000978] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000980] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000988] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000990] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000998] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000009a0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000009a8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000009b0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000009b8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000009c0] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x000009c8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x000009d0] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x000009d8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x000009e0] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x000009e8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x000009f0] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x000009f8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000a00] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000a08] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000a10] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000a18] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000a20] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000a28] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000a30] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000a38] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000a40] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000a48] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000a50] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x00000a58] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x00000a60] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000a68] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x00000a70] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x00000a78] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000a80] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000a88] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000a90] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000a98] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000aa0] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000aa8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000ab0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000ab8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000ac0] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x00000ac8] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x00000ad0] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000ad8] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000ae0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000ae8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000af0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000af8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000b00] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000b08] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000b10] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000b18] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000b20] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000b28] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000b30] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008e8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000008f0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000008f8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000900] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000908] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000910] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000918] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000920] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000928] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000930] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000938] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000940] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000948] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000950] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000958] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000960] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000968] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000970] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000978] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000980] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000988] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000990] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000998] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000009a0] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 +++/* [0x000009a8] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000009b0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x000009b8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x000009c0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000009c8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000009d0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000009d8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000009e0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000009e8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000009f0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000009f8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000a00] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x00000a08] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000a10] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000a18] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000a20] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000a28] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000a30] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000a38] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a40] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000a48] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000a50] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a58] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000a60] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000a68] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a70] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000b38] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000b40] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000b48] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000b50] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000b58] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000b60] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000b68] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000b70] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000b78] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a78] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a80] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a88] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a90] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a98] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000aa0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000aa8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000ab0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000ab8] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000b80] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000b88] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000b90] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000b98] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ba0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ba8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000bb0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000bb8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000bc0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000bc8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000bd0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000bd8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000be0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000be8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000bf0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000ac0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000ac8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ad0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ad8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ae0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ae8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000af0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000af8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b00] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b08] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b10] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b18] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b20] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000b28] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000b30] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index cb74887..53da629 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -7,8 +7,8 @@ extern unsigned int rpi_shader[]; ++ #define mc_filter_uv (rpi_shader + 152) ++ #define mc_filter_uv_b0 (rpi_shader + 324) ++ #define mc_filter_uv_b (rpi_shader + 490) ++-#define mc_exit (rpi_shader + 718) ++-#define mc_interrupt_exit8 (rpi_shader + 736) ++-#define mc_end (rpi_shader + 766) +++#define mc_exit (rpi_shader + 670) +++#define mc_interrupt_exit8 (rpi_shader + 688) +++#define mc_end (rpi_shader + 718) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index faa5755..f38c926 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -491,8 +491,8 @@ mov r0, unif ++ shr r1, r0, r2 # Extract width ++ sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) ++ and r0, r0, rb22 # Extract height ++-add rb17, r0, 5 ++-add rb18, r0, 7 +++add rb17, r0, 1 +++add rb18, r0, 3 ++ shl r0, r0, 7 ++ ++ # r0 is currently height<<7 ++@@ -508,8 +508,6 @@ add rb26, r0, rb27 ++ # In a B frame, so also set up VPM read (reading back 16bit precision) ++ add vr_setup, r3, rb21 ++ ++-sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code ++- ++ # get filter coefficients ++ ++ mov r0, unif ++@@ -517,18 +515,11 @@ asr ra3, r0, rb23; mul24 r0, r0, ra22 ++ asr ra2, r0, rb23; mul24 r0, r0, ra22 ++ asr ra1, r0, rb23; mul24 r0, r0, ra22 ++ asr ra0, r0, rb23; mov r0, unif ++-asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-asr ra4, r0, rb23; mov r0, unif +++ mov r0, unif ++ asr rb11, r0, rb23; mul24 r0, r0, ra22 ++ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++ asr rb8, r0, rb23; mov r0, unif ++-asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-asr rb12, r0, rb23 ++ ++ # r2 is elem_num ++ # r3 is loop counter ++@@ -570,26 +561,14 @@ add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++ add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++ add r0, r2, r3 ++ ++ mov r3, rb31 ++ ++-mov ra8, ra9 ++-mov ra9, ra10 ++-mov ra10, ra11 ++-mov ra11, ra12 ++ mov ra12, ra13 ++ mov ra13, ra14 ++ ++-sub.setf -, r3, 8 ; mov r1, ra22 +++sub.setf -, r3, 4 ; mov r1, ra22 ++ # apply horizontal filter ++ brr.anyn -, r:uvloop_b ++ mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++@@ -598,14 +577,10 @@ nop ; nop # TODO improve use of delay slots ++ ++ # apply vertical filter and write to VPM ++ ++-nop ; mul24 r1, ra14, rb14 ++-nop ; mul24 r0, ra13, rb13 ++-add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-add r1, r1, r0 ; mul24 r0, ra15, rb15 +++nop ; mul24 r1, ra14, rb10 +++nop ; mul24 r0, ra13, rb9 +++add r1, r1, r0 ; mul24 r0, ra12, rb8 +++add r1, r1, r0 ; mul24 r0, ra15, rb11 ++ add r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++ asr r1, r1, 14 # shift2=6 ++-- ++2.7.4 ++ ++ ++From e48df43c16de74dddbc7c702d64dd01eaf8e6b39 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 14 May 2015 11:17:09 +0100 ++Subject: [PATCH 30/68] Used P delay slots more efficiently ++ ++--- ++ libavcodec/rpi_shader.c | 437 ++++++++++++++++++++++----------------------- ++ libavcodec/rpi_shader.h | 10 +- ++ libavcodec/rpi_shader.qasm | 19 +- ++ 3 files changed, 228 insertions(+), 238 deletions(-) ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 4e6c5ea..a1af4e3 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -156,239 +156,236 @@ unsigned int rpi_shader[] = { ++ /* [0x00000408] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++ /* [0x00000410] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ /* [0x00000418] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000420] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000428] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000430] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000420] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000428] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000430] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop ++ /* [0x00000438] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000440] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 ++-/* [0x00000448] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000450] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x00000458] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x00000460] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000468] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000470] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000478] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000480] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000488] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000490] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000498] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000004a0] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x000004a8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004b0] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x000004b8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000004c0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000004c8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000004d0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000004d8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000004e0] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000004e8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000004f0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000004f8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000500] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000508] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000440] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000448] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000450] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000458] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000460] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000468] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000470] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000478] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000480] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000488] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x00000490] */ 0xfffffed8, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000498] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x000004a0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000004a8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000004b0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000004b8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004c0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000004c8] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000004d0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000004d8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000004e0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000004e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x00000510] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000518] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000520] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000528] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000530] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000538] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000540] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000548] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000550] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000558] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000560] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000568] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x00000570] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000578] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000580] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000588] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000590] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000598] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000005a0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000005a8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000005b0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000005b8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000005c0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000005c8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005d0] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005d8] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005e0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005e8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000005f0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005f8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000600] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000608] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000610] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000618] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000620] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000628] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000004f8] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000500] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000508] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000510] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000518] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000520] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000528] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000530] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000538] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000540] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000548] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000550] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x00000558] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000560] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000568] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000570] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000578] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000580] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000588] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000590] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000598] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000005a0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000005a8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000005b0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005b8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005c0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005c8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005d0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000005d8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005e0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005e8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005f0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005f8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000600] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000608] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000610] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000630] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000638] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000640] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000648] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000650] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000658] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000660] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000668] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000670] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000678] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000680] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000688] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000690] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000698] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000006a0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000006a8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000006b0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000006b8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000006c0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000006c8] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x000006d0] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x000006d8] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x000006e0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000006e8] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 ++-/* [0x000006f0] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x000006f8] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x00000700] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x00000708] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000710] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000718] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000720] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000728] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000730] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000738] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000740] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000748] */ 0x0f9ce3c0, 0xd0020c27, // asr vpm, r1, 14 ++-/* [0x00000750] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000758] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000760] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000768] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000770] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000778] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000780] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000788] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000790] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000798] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000007a0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000618] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000620] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000628] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000630] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000638] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000640] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000648] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000650] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000658] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000660] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000668] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000670] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000678] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000680] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000688] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000690] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000698] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000006a0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000006a8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000006b0] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x000006b8] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x000006c0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x000006c8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000006d0] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 +++/* [0x000006d8] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x000006e0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x000006e8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x000006f0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000006f8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000700] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000708] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000710] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000718] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000720] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000728] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000730] */ 0x0f9ce3c0, 0xd0020c27, // asr vpm, r1, 14 +++/* [0x00000738] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000740] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000748] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000750] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000758] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000760] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000768] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000770] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000778] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000780] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000788] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b ++-/* [0x000007a8] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000007b0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000007b8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000007c0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000007c8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000007d0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000007d8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000007e0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000007e8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000007f0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000007f8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000800] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000808] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000810] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000818] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000820] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000828] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000830] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000838] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000840] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000848] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000850] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000858] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000860] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000868] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000870] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000878] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000880] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000888] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000890] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000898] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000008a8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000008b0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008b8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008c0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008c8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000008d0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000008d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000008e0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000790] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000798] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000007a0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000007a8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000007b0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000007b8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000007c0] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000007c8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000007d0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000007d8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000007e0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000007e8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000007f0] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000007f8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000800] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000808] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000810] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000818] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000820] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000828] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000830] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000838] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000840] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000848] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000850] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000858] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000860] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000868] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000870] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000878] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000880] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000888] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000890] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000898] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008b0] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000008b8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000008c0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008c8] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x000008e8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000008f0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000008f8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000900] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000908] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000910] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000918] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000920] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000928] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000930] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000938] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000940] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000948] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000950] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000958] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000960] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000968] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000970] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000978] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000980] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000988] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000990] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000998] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000009a0] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 ++-/* [0x000009a8] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000009b0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x000009b8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x000009c0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000009c8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000009d0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000009d8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000009e0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000009e8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000009f0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000009f8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000a00] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x00000a08] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x00000a10] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000a18] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000a20] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000a28] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000a30] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000a38] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a40] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000a48] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000a50] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a58] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000a60] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000a68] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a70] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008d0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000008d8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000008e0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000008e8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000008f0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000008f8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000900] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000908] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000910] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000918] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000920] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000928] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000930] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000938] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000940] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000948] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000950] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000958] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000960] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000968] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000970] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000978] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000980] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000988] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 +++/* [0x00000990] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000998] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x000009a0] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x000009a8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000009b0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000009b8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000009c0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000009c8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000009d0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000009d8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000009e0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000009e8] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x000009f0] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x000009f8] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000a00] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000a08] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000a10] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000a18] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000a20] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a28] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000a30] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000a38] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a40] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000a48] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000a50] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a58] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000a78] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a80] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a60] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a68] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a70] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a78] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a80] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000a88] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a90] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a98] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000aa0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000aa8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000ab0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000ab8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a90] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000a98] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000aa0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000ac0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000aa8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000ab0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ab8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ac0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000ac8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ad0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ad8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ae0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ad0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ad8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ae0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000ae8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000af0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000af8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000b00] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b08] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b10] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b18] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b20] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000b28] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000b30] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000b08] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000b10] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000b18] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 53da629..1fb3e37 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -5,10 +5,10 @@ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++ #define mc_filter_uv (rpi_shader + 152) ++-#define mc_filter_uv_b0 (rpi_shader + 324) ++-#define mc_filter_uv_b (rpi_shader + 490) ++-#define mc_exit (rpi_shader + 670) ++-#define mc_interrupt_exit8 (rpi_shader + 688) ++-#define mc_end (rpi_shader + 718) +++#define mc_filter_uv_b0 (rpi_shader + 318) +++#define mc_filter_uv_b (rpi_shader + 484) +++#define mc_exit (rpi_shader + 664) +++#define mc_interrupt_exit8 (rpi_shader + 682) +++#define mc_end (rpi_shader + 712) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index f38c926..02e95dd 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -268,6 +268,7 @@ add t0s, ra_x2_base, r2 ++ ++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ +++# apply horizontal filter ++ nop ; mul24 r2, r0, ra0 ++ nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++ nop ; mul24 r3, ra1 << 1, r0 << 1 ++@@ -276,20 +277,12 @@ add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++ add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-add r0, r2, r3 ++- ++-mov r3, rb31 ++- ++-mov ra12, ra13 ++-mov ra13, ra14 ++- ++-sub.setf -, r3, 4 ; mov r1, ra22 ++- ++-# apply horizontal filter +++add r0, r2, r3 ; mov r3, rb31 +++sub.setf -, r3, 4 ; mov ra12, ra13 ++ brr.anyn -, r:uvloop ++-mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll ++-asr ra15, r0, 8 ; nop ++-nop ; nop # Delay slot 3 (TODO move more of the context scroll into here) +++mov ra13, ra14 # Delay slot 1 +++mov ra14, ra15 # Delay slot 2 +++mov ra15, r0 # Delay slot 3 ++ ++ # apply vertical filter and write to VPM ++ ++-- ++2.7.4 ++ ++ ++From b33dfc243ff5509299685add3c532ab7f207fd73 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 14 May 2015 11:22:25 +0100 ++Subject: [PATCH 31/68] Improved use of delay slots ++ ++--- ++ libavcodec/rpi_shader.c | 503 ++++++++++++++++++++++----------------------- ++ libavcodec/rpi_shader.h | 10 +- ++ libavcodec/rpi_shader.qasm | 41 ++-- ++ 3 files changed, 265 insertions(+), 289 deletions(-) ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index a1af4e3..c498f28 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -122,270 +122,263 @@ unsigned int rpi_shader[] = { ++ /* [0x00000300] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++ /* [0x00000308] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++ /* [0x00000310] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000318] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000320] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000328] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000330] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000340] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000348] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000350] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000358] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000360] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000368] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000370] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000378] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000380] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000318] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000320] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000328] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000330] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000340] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000348] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000350] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000358] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000360] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000368] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000370] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000378] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x00000388] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000390] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000398] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000003a0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003a8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003b0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003b8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003c0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003c8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000003d0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000003d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000003e0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000003e8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000003f0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000003f8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000400] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000408] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000410] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000418] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000420] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000428] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000430] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000438] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000440] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000448] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000450] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000458] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000460] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000468] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000470] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000478] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000480] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000488] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x00000490] */ 0xfffffed8, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000498] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x000004a0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000004a8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000004b0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000004b8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000004c0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000004c8] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000004d0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000004d8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000004e0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x000004e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000004f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000380] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000388] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000390] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000398] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003a0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003a8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003b0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003b8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003c0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000003c8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000003d0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003d8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000003e0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000003e8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000003f0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000003f8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000400] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000408] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000410] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000418] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000420] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000428] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000430] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000438] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000440] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000448] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000450] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000458] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000460] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000468] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000470] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000478] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000480] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x00000488] */ 0xfffffed8, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000490] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x00000498] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000004a0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000004a8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000004b0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004b8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000004c0] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000004c8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000004d0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000004d8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000004e0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004e8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x000004f8] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000500] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000508] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000510] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000518] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000520] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000528] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000530] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000538] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000540] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000548] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000550] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x00000558] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000560] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000568] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000570] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000578] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000580] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000588] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000590] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000598] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000005a0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000005a8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000005b0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005b8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005c0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005c8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005d0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000005d8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005e0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005e8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005f0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005f8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000600] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000608] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000610] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000004f0] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000004f8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000500] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000508] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000510] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000518] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000520] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000528] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000530] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000538] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000540] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000548] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x00000550] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000558] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000560] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000568] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000570] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000578] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000580] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000588] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000590] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000598] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000005a0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000005a8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005b0] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005b8] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005c0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005c8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000005d0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005d8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005e0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005e8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005f0] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000005f8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000600] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000608] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000618] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000620] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000628] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000630] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000638] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000640] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000648] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000650] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000658] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000660] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000668] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000670] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000678] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000680] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000688] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000690] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000698] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000006a0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000006a8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000006b0] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x000006b8] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x000006c0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x000006c8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000006d0] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 ++-/* [0x000006d8] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x000006e0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x000006e8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x000006f0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000006f8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000700] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000708] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000710] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000718] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000720] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000728] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000730] */ 0x0f9ce3c0, 0xd0020c27, // asr vpm, r1, 14 ++-/* [0x00000738] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000740] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000748] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000750] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000758] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000760] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000768] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000770] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000778] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000780] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000788] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000610] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000618] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000620] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000628] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000630] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000638] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000640] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000648] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000650] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000658] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000660] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000668] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000670] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000678] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000680] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000688] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000690] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000698] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000006a0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000006a8] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x000006b0] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x000006b8] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x000006c0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000006c8] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x000006d0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000006d8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000006e0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000006e8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000006f0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000006f8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000700] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 +++/* [0x00000708] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000710] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 +++/* [0x00000718] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000720] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000728] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000730] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000738] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000740] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000748] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000750] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000758] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000760] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000768] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b ++-/* [0x00000790] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000798] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000007a0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000007a8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000007b0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000007b8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000007c0] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000007c8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000007d0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000007d8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000007e0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000007e8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000007f0] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000007f8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000800] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000808] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000810] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000818] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000820] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000828] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000830] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000838] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000840] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000848] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000850] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000858] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000860] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000868] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000870] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000878] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000880] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000888] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000890] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000898] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008b0] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000008b8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000008c0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000008c8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000770] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000778] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000780] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000788] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000790] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000798] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000007a0] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000007a8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000007b0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000007b8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000007c0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000007c8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000007d0] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000007d8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000007e0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000007e8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000007f0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000007f8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000800] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000808] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000810] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000818] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000820] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000828] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000830] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000838] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000840] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000848] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000850] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000858] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000860] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000868] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000870] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000878] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000880] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000888] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000890] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000898] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000008a0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008a8] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x000008d0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000008d8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000008e0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000008e8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000008f0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000008f8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000900] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000908] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000910] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000918] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000920] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000928] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000930] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000938] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000940] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000948] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000950] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000958] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000960] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000968] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000970] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000978] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000980] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000988] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 ++-/* [0x00000990] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000998] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x000009a0] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x000009a8] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000009b0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000009b8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000009c0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000009c8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000009d0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000009d8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000009e0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000009e8] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x000009f0] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x000009f8] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000a00] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000a08] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000a10] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000a18] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000a20] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a28] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000a30] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000a38] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a40] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000a48] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000a50] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a58] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008b0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000008b8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000008c0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000008c8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000008d0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000008d8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000008e0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000008e8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000008f0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000008f8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000900] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000908] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000910] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000918] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000920] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000928] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000930] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000938] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000940] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000948] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000950] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000958] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000960] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000968] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000970] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000978] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000980] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000988] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000990] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000998] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000009a0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000009a8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000009b0] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x000009b8] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x000009c0] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000009c8] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x000009d0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000009d8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000009e0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000009e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000009f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000009f8] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000a00] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a08] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000a10] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000a18] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a20] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000a60] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a68] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000a70] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a28] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a30] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a38] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a40] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a48] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a50] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a58] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000a60] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a68] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_interrupt_exit8 +++/* [0x00000a70] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++ /* [0x00000a78] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000a80] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000a88] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a90] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000a98] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000aa0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-// ::mc_interrupt_exit8 ++-/* [0x00000aa8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000ab0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ab8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ac0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ac8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ad0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ad8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ae0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ae8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000af0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000af8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b00] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b08] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000b10] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000b18] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a90] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a98] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000aa0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000aa8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ab0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ab8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ac0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ac8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ad0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000ad8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000ae0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 1fb3e37..3fac45f 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -5,10 +5,10 @@ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++ #define mc_filter_uv (rpi_shader + 152) ++-#define mc_filter_uv_b0 (rpi_shader + 318) ++-#define mc_filter_uv_b (rpi_shader + 484) ++-#define mc_exit (rpi_shader + 664) ++-#define mc_interrupt_exit8 (rpi_shader + 682) ++-#define mc_end (rpi_shader + 712) +++#define mc_filter_uv_b0 (rpi_shader + 316) +++#define mc_filter_uv_b (rpi_shader + 476) +++#define mc_exit (rpi_shader + 650) +++#define mc_interrupt_exit8 (rpi_shader + 668) +++#define mc_end (rpi_shader + 698) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 02e95dd..10f5113 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -221,8 +221,6 @@ add r0, r0, r1 # Combine width and height of destination area ++ shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++ add rb26, r0, rb27 ++ ++-sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code ++- ++ # get filter coefficients ++ ++ mov r0, unif ++@@ -410,20 +408,12 @@ add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++ add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-add r0, r2, r3 ++- ++-mov r3, rb31 ++- ++-mov ra12, ra13 ++-mov ra13, ra14 ++- ++-sub.setf -, r3, 4 ; mov r1, ra22 ++- ++-# apply horizontal filter +++add r0, r2, r3 ; mov r3, rb31 +++sub.setf -, r3, 4 ; mov ra12, ra13 ++ brr.anyn -, r:uvloop_b0 ++-mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll ++-asr ra15, r0, 8 ; nop # TODO isn't ra15 already in 24bit precision, may not need the sign extension here? ++-nop ; nop # Delay slot 3 (TODO move more of the context scroll into here) +++mov ra13, ra14 # Delay slot 1 +++mov ra14, ra15 # Delay slot 2 +++mov ra15, r0 # Delay slot 3 ++ ++ # apply vertical filter and write to VPM ++ ++@@ -432,9 +422,9 @@ nop ; mul24 r0, ra13, rb9 ++ add r1, r1, r0 ; mul24 r0, ra12, rb8 ++ add r1, r1, r0 ; mul24 r0, ra15, rb11 ++ add r1, r1, r0 ; mov -, vw_wait ++-sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++sub.setf -, r3, rb18 ++ brr.anyn -, r:uvloop_b0 ++-asr vpm, r1, 14 # Delay 1 shifts down by shift2=6, but results are still in 16bit precision TODO may be able to avoid the mul24 and use more delay slots +++asr vpm, r1, 6 # Delay 1 shifts down by shift2=6, but results are still in 16bit precision ++ nop # Delay 2 ++ nop # Delay 3 ++ ++@@ -554,19 +544,12 @@ add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++ add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-add r0, r2, r3 ++- ++-mov r3, rb31 ++- ++-mov ra12, ra13 ++-mov ra13, ra14 ++- ++-sub.setf -, r3, 4 ; mov r1, ra22 ++-# apply horizontal filter +++add r0, r2, r3 ; mov r3, rb31 +++sub.setf -, r3, 4 ; mov ra12, ra13 ++ brr.anyn -, r:uvloop_b ++-mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++-asr ra15, r0, 8 ; nop ++-nop ; nop # TODO improve use of delay slots +++mov ra13, ra14 # Delay slot 1 +++mov ra14, ra15 # Delay slot 2 +++mov ra15, r0 # Delay slot 3 ++ ++ # apply vertical filter and write to VPM ++ ++-- ++2.7.4 ++ ++ ++From af59f8e00eb977e97debc5e72ba47e0077db1787 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 14 May 2015 11:31:23 +0100 ++Subject: [PATCH 32/68] Avoid writeback of first B results ++ ++--- ++ libavcodec/rpi_shader.c | 229 ++++++++++++++++++++++----------------------- ++ libavcodec/rpi_shader.h | 8 +- ++ libavcodec/rpi_shader.qasm | 18 +--- ++ 3 files changed, 121 insertions(+), 134 deletions(-) ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index c498f28..ba453a2 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -255,130 +255,125 @@ unsigned int rpi_shader[] = { ++ /* [0x00000710] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 ++ /* [0x00000718] */ 0x009e7000, 0x100009e7, // nop ++ /* [0x00000720] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000728] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000730] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000738] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000740] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000748] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000750] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000758] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000760] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000768] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000728] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000730] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000738] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000740] */ 0x009e7000, 0x100009e7, // nop ++ // ::mc_filter_uv_b ++-/* [0x00000770] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000778] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000780] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000788] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000790] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000798] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000007a0] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000007a8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000007b0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000007b8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000007c0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000007c8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000007d0] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000007d8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000007e0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000007e8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000007f0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000007f8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000800] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000808] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000810] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000818] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000820] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000828] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000830] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000838] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000840] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000748] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000750] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000758] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000760] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000768] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000770] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000778] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000780] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000788] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000790] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000798] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000007a0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000007a8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000007b0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000007b8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000007c0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000007c8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000007d0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000007d8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000007e0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000007e8] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x000007f0] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x000007f8] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000800] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000808] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000810] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000818] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000820] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000828] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000830] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000838] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000840] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++ /* [0x00000848] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000850] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000858] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000860] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000868] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000870] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000878] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000880] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000888] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000890] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000898] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000008a0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000008a8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000850] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000858] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000860] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000868] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000870] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000878] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000880] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x000008b0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000008b8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000008c0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000008c8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000008d0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000008d8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000008e0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000008e8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000008f0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000008f8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000900] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000908] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000910] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000918] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000920] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000928] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000930] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000938] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000940] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000948] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000950] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000958] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000960] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000968] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000970] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000978] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000980] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000988] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000990] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000998] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000009a0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000009a8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000009b0] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x000009b8] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x000009c0] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000009c8] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x000009d0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000009d8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000009e0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000009e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000009f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000009f8] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000a00] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a08] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000a10] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000a18] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a20] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000888] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000890] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000898] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000008a0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000008a8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000008b0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000008b8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000008c0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000008c8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000008d0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000008d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008e0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000008e8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000008f0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000008f8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000900] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000908] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000910] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000918] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000920] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000928] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000930] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000938] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000940] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000948] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000950] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000958] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000960] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000968] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000970] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000978] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000980] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000988] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x00000990] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000998] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000009a0] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x000009a8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000009b0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000009b8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000009c0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000009c8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000009d0] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000009d8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000009e0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000009e8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000009f0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000009f8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000a28] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a30] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000a38] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a40] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a48] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a50] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a58] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000a60] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000a68] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a00] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a08] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a10] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a18] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a20] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a28] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a30] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000a38] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a40] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000a70] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a78] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a80] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a88] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a90] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a48] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a50] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a58] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a60] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a68] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a70] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a78] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a80] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a88] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a90] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000a98] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000aa0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000aa8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ab0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ab8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ac0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ac8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ad0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000ad8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000ae0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000aa8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000ab0] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000ab8] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 3fac45f..45dbe0e 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -6,9 +6,9 @@ extern unsigned int rpi_shader[]; ++ #define mc_setup_uv (rpi_shader + 0) ++ #define mc_filter_uv (rpi_shader + 152) ++ #define mc_filter_uv_b0 (rpi_shader + 316) ++-#define mc_filter_uv_b (rpi_shader + 476) ++-#define mc_exit (rpi_shader + 650) ++-#define mc_interrupt_exit8 (rpi_shader + 668) ++-#define mc_end (rpi_shader + 698) +++#define mc_filter_uv_b (rpi_shader + 466) +++#define mc_exit (rpi_shader + 640) +++#define mc_interrupt_exit8 (rpi_shader + 658) +++#define mc_end (rpi_shader + 688) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 10f5113..e138c95 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -428,22 +428,14 @@ asr vpm, r1, 6 # Delay 1 shifts down by shift2=6, but results are still ++ nop # Delay 2 ++ nop # Delay 3 ++ +++# in pass0 we don't really need to save any results, but need to discard the uniforms ++ # DMA out for U ++ ++-mov vw_setup, rb26 # VDW setup 0 ++-mov vw_setup, rb29 # Stride ++-mov vw_addr, unif # start the VDW # TODO in pass0 we don't need to save any results ++- ++-# DMA out for V ++-# We need to wait for the U to complete first, but have nothing useful to compute while we wait. ++-# Could potentially push this write into the start of the next pipeline stage. ++-mov r0, 16 ++-mov -, vw_wait ++- ++ bra -, ra31 ++-add vw_setup, rb26, r0 # VDW setup 0 ++-mov vw_setup, rb29 # Stride ++-mov vw_addr, unif # start the VDW +++mov r0, unif # Delay 1 +++mov r0, unif # Delay 2 +++nop # Delay 3 +++ ++ ++ ################################################################################ ++ ++-- ++2.7.4 ++ ++ ++From 12e57278cb19a769d2e1488e8e94003027493d09 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 14 May 2015 11:36:24 +0100 ++Subject: [PATCH 33/68] Cutdown size of chroma prediction commands ++ ++--- ++ libavcodec/hevc.c | 17 +- ++ libavcodec/rpi_shader.c | 543 ++++++++++++++++++++++----------------------- ++ libavcodec/rpi_shader.h | 12 +- ++ libavcodec/rpi_shader.qasm | 11 +- ++ 4 files changed, 281 insertions(+), 302 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index a47ebc5..32b89d5 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -56,7 +56,7 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ ++ #ifdef RPI_INTER_QPU ++ ++-#define RPI_CHROMA_COMMAND_WORDS 12 +++#define RPI_CHROMA_COMMAND_WORDS 10 ++ #define UV_COMMANDS_PER_QPU ((1 + (256*64*2)/(4*4)) * RPI_CHROMA_COMMAND_WORDS) ++ // The QPU code for UV blocks only works up to a block width of 8 ++ #define RPI_CHROMA_BLOCK_WIDTH 8 ++@@ -2032,11 +2032,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); ++ *u++ = ( (nPbW_csh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx][0]; ++- u++; ++ *u++ = rpi_filter_coefs[_my][0]; ++- u++; ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++@@ -2091,9 +2088,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = ( (nPbW_csh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx][0]; ++- u++; ++ *u++ = rpi_filter_coefs[_my][0]; ++- u++; ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++@@ -2154,11 +2149,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); ++ *u++ = ( (nPbW_cframe->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); // TODO this will become unused once we have a dedicated pass0 filter ++- *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); +++ u+=2; // Intermediate results are not written back in first pass of B filtering ++ ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv_b; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x2_c - 1 + start_x; ++@@ -2166,11 +2158,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); ++ *u++ = ( (nPbW_csh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx2][0]; ++- u++; ++ *u++ = rpi_filter_coefs[_my2][0]; ++- u++; ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++@@ -2808,7 +2797,7 @@ static void rpi_inter_clear(HEVCContext *s) ++ *s->u_mvs[i]++ = pic_height; ++ *s->u_mvs[i]++ = s->frame->linesize[1]; ++ *s->u_mvs[i]++ = s->frame->linesize[2]; ++- s->u_mvs[i] += 3; // Padding words +++ s->u_mvs[i] += 1; // Padding words ++ } ++ } ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index ba453a2..b0b93b5 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -89,291 +89,286 @@ unsigned int rpi_shader[] = { ++ /* [0x00000200] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 ++ /* [0x00000208] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++ /* [0x00000210] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000218] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000220] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000228] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000230] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000238] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000240] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000248] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000250] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base ++-/* [0x00000258] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++/* [0x00000218] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000220] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000228] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000230] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000238] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000240] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base +++/* [0x00000248] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base ++ // ::mc_filter_uv ++-/* [0x00000260] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000268] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000270] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000278] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000280] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000288] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000290] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000298] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000002a0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000002a8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000002b0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000002b8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000002c0] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000002c8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000002d0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000002d8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000002e0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002e8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000002f0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000002f8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000300] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000308] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000310] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000318] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000320] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000328] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000330] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000340] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000348] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000350] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000358] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000360] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000368] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000370] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000378] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000250] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000258] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000260] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000268] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000270] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000278] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000280] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000288] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000290] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000298] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000002a0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000002a8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000002b0] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000002b8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002c0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000002c8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000002d0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000002d8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000002e0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000002e8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000002f0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000002f8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000300] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000308] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000310] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000318] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000320] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000328] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000330] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000340] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000348] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000350] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000358] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000360] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x00000380] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000388] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000390] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000398] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003a0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003a8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003b0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003b8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003c0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000003c8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000003d0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000003d8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000003e0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000003e8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000003f0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000003f8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000400] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000408] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000410] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000418] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000420] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000428] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000430] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000438] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000440] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000448] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000450] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000458] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000460] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000468] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000470] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000478] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000480] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x00000488] */ 0xfffffed8, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000490] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x00000498] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000004a0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000004a8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000004b0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000004b8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000004c0] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000004c8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000004d0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000004d8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x000004e0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000004e8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000368] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000370] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000378] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000380] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000388] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000390] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000398] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003a0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003a8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000003b0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000003b8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003c0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000003c8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000003d0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000003d8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000003e0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000003e8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000003f0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000003f8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000400] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000408] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000410] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000418] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000420] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000428] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000430] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000438] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000440] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000448] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000450] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000458] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000460] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000468] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x00000470] */ 0xfffffed8, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000478] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x00000480] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000488] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000490] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000498] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004a0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000004a8] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000004b0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000004b8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000004c0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000004c8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004d0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x000004f0] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000004f8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000500] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000508] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000510] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000518] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000520] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000528] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000530] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000538] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000540] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000548] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x00000550] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000558] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000560] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000568] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000570] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000578] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000580] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000588] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000590] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000598] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000005a0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000005a8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005b0] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005b8] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005c0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005c8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000005d0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005d8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005e0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005e8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005f0] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000005f8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000600] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000608] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000004d8] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000004e0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000004e8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000004f0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000004f8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000500] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000508] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000510] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000518] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000520] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000528] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000530] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x00000538] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000540] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000548] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000550] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000558] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000560] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000568] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000570] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000578] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000580] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000588] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000590] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000598] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005a0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005a8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005b0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000005b8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005c0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005c8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005d0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x000005d8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000005e0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000005e8] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000610] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000618] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000620] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000628] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000630] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000638] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000640] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000648] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000650] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000658] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000660] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000668] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000670] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000678] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000680] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000688] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000690] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000698] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000006a0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000006a8] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x000006b0] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x000006b8] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x000006c0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000006c8] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x000006d0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x000006d8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000006e0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000006e8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000006f0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000006f8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000700] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 ++-/* [0x00000708] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000710] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 ++-/* [0x00000718] */ 0x009e7000, 0x100009e7, // nop +++/* [0x000005f0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000005f8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000600] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000608] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000610] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000618] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000620] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000628] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000630] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000638] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000640] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000648] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000650] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000658] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000660] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000668] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000670] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000678] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000680] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000688] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000690] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000698] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x000006a0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000006a8] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x000006b0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000006b8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000006c0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000006c8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000006d0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000006d8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000006e0] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 +++/* [0x000006e8] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x000006f0] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 +++/* [0x000006f8] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000700] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000708] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000710] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000718] */ 0x15827d80, 0x10020827, // mov r0, unif ++ /* [0x00000720] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000728] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000730] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000738] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000740] */ 0x009e7000, 0x100009e7, // nop ++ // ::mc_filter_uv_b ++-/* [0x00000748] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000750] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000758] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000760] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000768] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000770] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000778] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000780] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000788] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000790] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000798] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000007a0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000007a8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000007b0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000007b8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000007c0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000007c8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000007d0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000007d8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000007e0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000007e8] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x000007f0] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x000007f8] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000800] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000808] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000810] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000818] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000820] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000828] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000830] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000838] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000840] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000848] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000850] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000858] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000860] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000868] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000870] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000878] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000880] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000728] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000730] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000738] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000740] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000748] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000750] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000758] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000760] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000768] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000770] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000778] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000780] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000788] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000790] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000798] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000007a0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000007a8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000007b0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000007b8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000007c0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000007c8] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x000007d0] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x000007d8] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x000007e0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000007e8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000007f0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000007f8] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000800] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000808] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000810] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000818] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000820] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000828] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000830] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000838] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000840] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000848] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000850] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000858] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x00000888] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000890] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000898] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000008a0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000008a8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000008b0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000008b8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000008c0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000008c8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000008d0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000008d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000008e0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000008e8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000008f0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000008f8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000900] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000908] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000910] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000918] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000920] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000928] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000930] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000938] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000940] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000948] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000950] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000958] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000960] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000968] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000970] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000978] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000980] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000988] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x00000990] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x00000998] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000009a0] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x000009a8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000009b0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000009b8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000009c0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000009c8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000009d0] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000009d8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000009e0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000009e8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x000009f0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000009f8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000860] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000868] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000870] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000878] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000880] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000888] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000890] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000898] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000008a0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000008a8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000008b0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008b8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000008c0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000008c8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000008d0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000008d8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000008e0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000008e8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000008f0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000008f8] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000900] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000908] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000910] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000918] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000920] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000928] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000930] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000938] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000940] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000948] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000950] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000958] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000960] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x00000968] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000970] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000978] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000980] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000988] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000990] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000998] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000009a0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000009a8] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000009b0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000009b8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000009c0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000009c8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000009d0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000a00] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a08] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000a10] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a18] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a20] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a28] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a30] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000a38] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000a40] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000009d8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000009e0] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x000009e8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000009f0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000009f8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a00] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a08] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000a10] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a18] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000a48] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a50] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a58] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a60] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a68] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a20] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a28] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a30] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a38] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a40] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a48] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a50] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a58] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a60] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a68] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000a70] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000a78] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a80] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a88] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a90] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a98] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000aa0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000aa8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000ab0] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000ab8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a80] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000a88] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000a90] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 45dbe0e..99927c4 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -4,11 +4,11 @@ ++ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++-#define mc_filter_uv (rpi_shader + 152) ++-#define mc_filter_uv_b0 (rpi_shader + 316) ++-#define mc_filter_uv_b (rpi_shader + 466) ++-#define mc_exit (rpi_shader + 640) ++-#define mc_interrupt_exit8 (rpi_shader + 658) ++-#define mc_end (rpi_shader + 688) +++#define mc_filter_uv (rpi_shader + 148) +++#define mc_filter_uv_b0 (rpi_shader + 310) +++#define mc_filter_uv_b (rpi_shader + 458) +++#define mc_exit (rpi_shader + 630) +++#define mc_interrupt_exit8 (rpi_shader + 648) +++#define mc_end (rpi_shader + 678) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index e138c95..d9ffcda 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -167,8 +167,6 @@ add t0s, r2, r1 ++ ++ # Dump padding words ++ mov r0, unif ++-mov r0, unif ++-mov r0, unif ++ ++ # submit texture requests for second line ++ max r1, ra_y, 0 ++@@ -228,11 +226,10 @@ asr ra3, r0, rb23; mul24 r0, r0, ra22 ++ asr ra2, r0, rb23; mul24 r0, r0, ra22 ++ asr ra1, r0, rb23; mul24 r0, r0, ra22 ++ asr ra0, r0, rb23; mov r0, unif ++- mov r0, unif ++ asr rb11, r0, rb23; mul24 r0, r0, ra22 ++ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-asr rb8, r0, rb23; mov r0, unif +++asr rb8, r0, rb23 ++ ++ # r2 is elem_num ++ # r3 is loop counter ++@@ -362,11 +359,10 @@ asr ra3, r0, rb23; mul24 r0, r0, ra22 ++ asr ra2, r0, rb23; mul24 r0, r0, ra22 ++ asr ra1, r0, rb23; mul24 r0, r0, ra22 ++ asr ra0, r0, rb23; mov r0, unif ++- mov r0, unif ++ asr rb11, r0, rb23; mul24 r0, r0, ra22 ++ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-asr rb8, r0, rb23; mov r0, unif +++asr rb8, r0, rb23 ++ ++ # r2 is elem_num ++ # r3 is loop counter ++@@ -490,11 +486,10 @@ asr ra3, r0, rb23; mul24 r0, r0, ra22 ++ asr ra2, r0, rb23; mul24 r0, r0, ra22 ++ asr ra1, r0, rb23; mul24 r0, r0, ra22 ++ asr ra0, r0, rb23; mov r0, unif ++- mov r0, unif ++ asr rb11, r0, rb23; mul24 r0, r0, ra22 ++ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-asr rb8, r0, rb23; mov r0, unif +++asr rb8, r0, rb23 ++ ++ # r2 is elem_num ++ # r3 is loop counter ++-- ++2.7.4 ++ ++ ++From 3e8f02cf9d3e4bfcd07a5fcf321ace07c4f2e6f3 Mon Sep 17 00:00:00 2001 ++From: popcornmix ++Date: Thu, 14 May 2015 15:21:49 +0100 ++Subject: [PATCH 34/68] hevc: don't redirect when not rpi_enabled ++ ++--- ++ libavcodec/hevc.c | 2 +- ++ 1 file changed, 1 insertion(+), 1 deletion(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 32b89d5..2459e34 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -1468,7 +1468,7 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size) ++ */ ++ ++ #ifdef RPI_INTER ++-#define RPI_REDIRECT(fn) rpi_ ## fn +++#define RPI_REDIRECT(fn) (s->enable_rpi ? rpi_ ## fn : fn) ++ static void rpi_luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ AVFrame *ref, const Mv *mv, int x_off, int y_off, ++ int block_w, int block_h, int luma_weight, int luma_offset) ++-- ++2.7.4 ++ ++ ++From 6da455b382b28c3c1f4e98c1703a695cdb946ad3 Mon Sep 17 00:00:00 2001 ++From: popcornmix ++Date: Thu, 14 May 2015 15:22:02 +0100 ++Subject: [PATCH 35/68] Use /dev/vcio for mailbox access ++ ++--- ++ libavcodec/rpi_mailbox.c | 2 +- ++ 1 file changed, 1 insertion(+), 1 deletion(-) ++ ++diff --git a/libavcodec/rpi_mailbox.c b/libavcodec/rpi_mailbox.c ++index 536896f..77a56dd 100644 ++--- a/libavcodec/rpi_mailbox.c +++++ b/libavcodec/rpi_mailbox.c ++@@ -39,7 +39,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++ #define MAJOR_NUM 100 ++ #define IOCTL_MBOX_PROPERTY _IOWR(MAJOR_NUM, 0, char *) ++-#define DEVICE_FILE_NAME "/dev/char_dev" +++#define DEVICE_FILE_NAME "/dev/vcio" ++ ++ #include "rpi_mailbox.h" ++ ++-- ++2.7.4 ++ ++ ++From f96ef6131f16a4c03b8e2882bdf7319c3b646a6c Mon Sep 17 00:00:00 2001 ++From: popcornmix ++Date: Thu, 14 May 2015 15:25:25 +0100 ++Subject: [PATCH 36/68] Use vcsm for all memory allocations ++ ++--- ++ libavcodec/rpi_qpu.c | 174 +++++++++++++++++++-------------------------------- ++ 1 file changed, 64 insertions(+), 110 deletions(-) ++ ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 60bf079..f62051f 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -1,7 +1,5 @@ ++ #ifdef RPI ++-// define RPI_USE_VCSM to use the vcsm device for shared memory ++ // This works better than the mmap in that the memory can be cached, but requires a kernel modification to enable the device. ++-#define RPI_USE_VCSM ++ // define RPI_TIME_TOTAL_QPU to print out how much time is spent in the QPU code ++ #define RPI_TIME_TOTAL_QPU ++ // define RPI_TIME_TOTAL_VPU to print out how much time is spent in the VPI code ++@@ -25,9 +23,7 @@ ++ #include "rpi_shader.h" ++ #include "rpi_hevc_transform.h" ++ ++-#ifdef RPI_USE_VCSM ++ #include "rpi_user_vcsm.h" ++-#endif ++ ++ // On Pi2 there is no way to access the VPU L2 cache ++ // GPU_MEM_FLG should be 4 for uncached memory. (Or C for alias to allocate in the VPU L2 cache) ++@@ -96,7 +92,6 @@ struct GPU ++ unsigned int vpu_code[VPU_CODE_SIZE]; ++ short transMatrix2even[16*16*2]; ++ int open_count; // Number of allocated video buffers ++- unsigned int vc_handle; // Handle of this memory ++ int mb; // Mailbox handle ++ int vc; // Address in GPU memory ++ int mail[12]; // These are used to pass pairs of code/unifs to the QPUs ++@@ -105,6 +100,7 @@ struct GPU ++ // Stop more than one thread trying to allocate memory or use the processing resources at once ++ static pthread_mutex_t gpu_mutex = PTHREAD_MUTEX_INITIALIZER; ++ static volatile struct GPU* gpu = NULL; +++static GPU_MEM_PTR_T gpu_mem_ptr; ++ ++ #if defined(RPI_TIME_TOTAL_QPU) || defined(RPI_TIME_TOTAL_VPU) ++ static unsigned int Microseconds(void) { ++@@ -132,39 +128,27 @@ static volatile int vpu_async_tail=0; // Contains the number of posted jobs ++ static volatile int vpu_async_head=0; ++ #endif ++ +++static int gpu_malloc_uncached_internal(int numbytes, GPU_MEM_PTR_T *p, int mb); +++static void gpu_free_internal(GPU_MEM_PTR_T *p); +++ ++ // Connect to QPU, returns 0 on success. ++ static int gpu_init(volatile struct GPU **gpu) { ++ int mb = mbox_open(); ++ int vc; ++- int handle; ++ volatile struct GPU* ptr; ++ if (mb < 0) ++ return -1; ++ ++ if (qpu_enable(mb, 1)) return -2; ++ ++-#ifdef RPI_USE_VCSM ++ vcsm_init(); ++-#endif +++ gpu_malloc_uncached_internal(sizeof(struct GPU), &gpu_mem_ptr, mb); +++ ptr = (volatile struct GPU*)gpu_mem_ptr.arm; +++ memset(ptr, 0, sizeof *ptr); +++ vc = gpu_mem_ptr.vc; ++ ++- handle = mem_alloc(mb, sizeof(struct GPU), 4096, GPU_MEM_FLG); ++- if (!handle) ++- { ++- qpu_enable(mb, 0); ++- return -3; ++- } ++- vc = mem_lock(mb, handle); ++- ptr = mapmem_shared((vc+GPU_MEM_MAP)&~0xc0000000, sizeof(struct GPU)); ++- if (ptr == NULL) ++- { mem_free(mb, handle); ++- mem_unlock(mb, handle); ++- qpu_enable(mb, 0); ++- return -4; ++- } ++- ++- ptr->mb = mb; ++- ptr->vc_handle = handle; ++- ptr->vc = vc; +++ ptr->mb = mb; +++ ptr->vc = vc; ++ ++ printf("GPU allocated at 0x%x\n",vc); ++ ++@@ -226,94 +210,74 @@ static void gpu_unlock(void) { ++ pthread_mutex_unlock(&gpu_mutex); ++ } ++ +++static int gpu_malloc_uncached_internal(int numbytes, GPU_MEM_PTR_T *p, int mb) { +++ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); +++ assert(p->vcsm_handle); +++ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); +++ assert(p->vc_handle); +++ p->arm = vcsm_lock(p->vcsm_handle); +++ assert(p->arm); +++ p->vc = mem_lock(mb, p->vc_handle); +++ assert(p->vc); +++ return 0; +++} +++ ++ // Allocate memory on GPU ++ // Fills in structure

containing ARM pointer, videocore handle, videocore memory address, numbytes ++ // Returns 0 on success. ++ // This allocates memory that will not be cached in ARM's data cache. ++ // Therefore safe to use without data cache flushing. ++-int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p) { +++int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p) +++{ +++ int r; ++ gpu_lock(); ++- p->vc_handle = mem_alloc(gpu->mb, numbytes, 4096, GPU_MEM_FLG); ++- p->vcsm_handle = 0; ++- if (!p->vc_handle) ++- { ++- qpu_enable(gpu->mb, 0); ++- return -3; ++- } ++- p->vc = mem_lock(gpu->mb, p->vc_handle); ++- p->arm = mapmem_shared((p->vc+GPU_MEM_MAP)&~0xc0000000,numbytes); ++- p->numbytes = numbytes; ++- if (p->arm == NULL) ++- { ++- mem_free(gpu->mb, p->vc_handle); ++- mem_unlock(gpu->mb, p->vc_handle); ++- gpu_unlock(); ++- qpu_enable(gpu->mb, 0); ++- return -4; ++- } +++ r = gpu_malloc_uncached_internal(numbytes, p, gpu->mb); ++ gpu->open_count++; ++ gpu_unlock(); ++- return 0; +++ return r; ++ } ++ ++ void gpu_cache_flush(GPU_MEM_PTR_T *p) ++ { ++- // This only works when using RPI_USE_VCSM ++ void *tmp = vcsm_lock(p->vcsm_handle); ++ vcsm_unlock_ptr(tmp); ++ } ++ +++static int gpu_malloc_cached_internal(int numbytes, GPU_MEM_PTR_T *p) { +++ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST, (char *)"Video Frame" ); +++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_VC, (char *)"Video Frame" ); +++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); +++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST_AND_VC, (char *)"Video Frame" ); +++ assert(p->vcsm_handle); +++ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); +++ assert(p->vc_handle); +++ p->arm = vcsm_lock(p->vcsm_handle); +++ assert(p->arm); +++ p->vc = mem_lock(gpu->mb, p->vc_handle); +++ assert(p->vc); +++ return 0; +++} +++ ++ // This allocates data that will be ++ // Cached in ARM L2 ++ // Uncached in VPU L2 ++-int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p) { +++int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p) +++{ +++ int r; ++ gpu_lock(); ++-#ifdef RPI_USE_VCSM ++- { ++- p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST, (char *)"Video Frame" ); // f....... locks up for VP9 - retest this? ++- //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_VC, (char *)"Video Frame" ); // 3b...... works ++- //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); //fb...... locks up ++- //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST_AND_VC, (char *)"Video Frame" ); // 3b works (but corrupted due to caching) ++- p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); ++- p->arm = vcsm_lock(p->vcsm_handle); ++- p->vc = mem_lock(gpu->mb, p->vc_handle); ++- } ++-#else ++- p->vc_handle = mem_alloc(gpu->mb, numbytes, 4096, GPU_MEM_FLG); ++- p->vcsm_handle = 0; ++- if (!p->handle) ++- { ++- qpu_enable(gpu->mb, 0); ++- return -3; ++- } ++- p->vc = mem_lock(gpu->mb, p->vc_handle); ++- printf("This mapmem_private does not seem to work\n"); ++- exit(-1); ++- p->arm = mapmem_private((p->vc+GPU_MEM_MAP)&~0xc0000000,numbytes); ++- p->numbytes = numbytes; ++- if (p->arm == NULL) ++- { ++- mem_free(gpu->mb, p->handle); ++- mem_unlock(gpu->mb, p->handle); ++- gpu_unlock(); ++- qpu_enable(gpu->mb, 0); ++- return -4; ++- } ++-#endif +++ r = gpu_malloc_cached_internal(numbytes, p); ++ gpu->open_count++; ++ gpu_unlock(); ++- return 0; +++ return r; ++ } ++ ++ static void gpu_term(void) ++ { ++- int mb; ++- unsigned handle; +++ int mb; ++ ++ if (gpu==NULL) ++ return; ++ mb = gpu->mb; ++- handle = gpu->vc_handle; ++ ++ #ifdef RPI_ASYNC ++ { ++@@ -323,37 +287,26 @@ static void gpu_term(void) ++ } ++ #endif ++ +++ qpu_enable(mb, 0); +++ gpu_free_internal(&gpu_mem_ptr); ++ ++- unmapmem((void*)gpu, sizeof(struct GPU)); ++- mem_unlock(mb, handle); ++- mem_free(mb, handle); ++- qpu_enable(mb, 0); ++-#ifdef RPI_USE_VCSM ++ vcsm_exit(); ++-#endif ++- mbox_close(mb); +++ +++ mbox_close(mb); ++ gpu = NULL; ++ } ++ ++-void gpu_free(GPU_MEM_PTR_T *p) { +++void gpu_free_internal(GPU_MEM_PTR_T *p) { ++ int mb = gpu->mb; ++- unsigned handle = p->vc_handle; +++ mem_unlock(mb,p->vc_handle); +++ vcsm_unlock_ptr(p->arm); +++ vcsm_free(p->vcsm_handle); +++} +++ +++void gpu_free(GPU_MEM_PTR_T *p) { ++ gpu_lock(); ++-#ifdef RPI_USE_VCSM ++- if (p->vcsm_handle) { ++- mem_unlock(mb,p->vc_handle); ++- vcsm_unlock_ptr(p->arm); ++- vcsm_free(p->vcsm_handle); ++- } else { ++- unmapmem((void*)p->arm, sizeof(struct GPU)); ++- mem_unlock(mb, handle); ++- mem_free(mb, handle); ++- } ++-#else ++- unmapmem((void*)p->arm, sizeof(struct GPU)); ++- mem_unlock(mb, handle); ++- mem_free(mb, handle); ++-#endif +++ +++ gpu_free_internal(p); ++ ++ gpu->open_count--; ++ if (gpu->open_count==0) { ++@@ -386,20 +339,21 @@ unsigned int vpu_get_constants(void) { ++ ++ static void *vpu_start(void *arg) { ++ while(1) { +++ int *p; ++ pthread_mutex_lock(&post_mutex); ++ while( vpu_async_tail - vpu_async_head <= 0) ++ { ++ //printf("Checking number %d %d\n",vpu_async_head,vpu_async_tail); ++ pthread_cond_wait(&post_cond_tail, &post_mutex); ++ } ++- int *p = vpu_cmds[vpu_async_head%MAXCMDS]; +++ p = vpu_cmds[vpu_async_head%MAXCMDS]; ++ pthread_mutex_unlock(&post_mutex); ++ ++ if (p[6] == -1) { ++ break; // Last job ++ } ++ if (p[7]) { ++- GPU_MEM_PTR_T *buf = (GPU_MEM_PTR_T *)p[7]; +++ //GPU_MEM_PTR_T *buf = (GPU_MEM_PTR_T *)p[7]; ++ //gpu_cache_flush(buf); ++ } ++ vpu_execute_code(p[0], p[1], p[2], p[3], p[4], p[5], p[6]); ++-- ++2.7.4 ++ ++ ++From 7c94b833b48a455d27d82eb2ca1b53a162705caf Mon Sep 17 00:00:00 2001 ++From: popcornmix ++Date: Thu, 14 May 2015 15:43:17 +0100 ++Subject: [PATCH 37/68] Enable EARLY_MALLOC and fix sps access bug ++ ++--- ++ libavcodec/hevc.c | 5 +++-- ++ 1 file changed, 3 insertions(+), 2 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 2459e34..4e82a15 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -44,7 +44,7 @@ ++ #ifdef RPI ++ #include "rpi_qpu.h" ++ // For some unknown reason, the code seems to crash if I do a late malloc ++- #define EARLY_MALLOC +++ //#define EARLY_MALLOC ++ // Move Inter prediction into separate pass ++ #define RPI_INTER ++ #endif ++@@ -149,7 +149,8 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ #ifdef RPI ++ #ifdef EARLY_MALLOC ++ #else ++- int coeffs_in_ctb = (1 << s->ps.sps->log2_ctb_size) * (1 << s->ps.sps->log2_ctb_size); +++ assert(sps); +++ int coeffs_in_ctb = (1 << sps->log2_ctb_size) * (1 << sps->log2_ctb_size); ++ int coefs_per_row = sps->ctb_width * coeffs_in_ctb * 3; // Allow space for chroma ++ printf("pic_arrays_init\n"); ++ printf("Allocated %d\n",coefs_per_row); ++-- ++2.7.4 ++ ++ ++From 0a0a92817a7959d213dca9c75a242b6ad88d6b80 Mon Sep 17 00:00:00 2001 ++From: popcornmix ++Date: Thu, 14 May 2015 16:40:51 +0100 ++Subject: [PATCH 38/68] Add copy of av_mod_uintp2 for use with stable ffmpeg ++ ++--- ++ libavcodec/hevc.c | 8 ++++++++ ++ 1 file changed, 8 insertions(+) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 4e82a15..80db603 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -51,6 +51,14 @@ ++ ++ // #define DISABLE_MC ++ +++#ifndef av_mod_uintp2 +++static av_always_inline av_const unsigned av_mod_uintp2_c(unsigned a, unsigned p) +++{ +++ return a & ((1 << p) - 1); +++} +++# define av_mod_uintp2 av_mod_uintp2_c +++#endif +++ ++ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 }; ++ ++ ++-- ++2.7.4 ++ ++ ++From c48d08e968b24c2e260b0cc76c7901a1b4d75bbf Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Mon, 18 May 2015 11:11:02 +0100 ++Subject: [PATCH 39/68] Added support for weighted prediction in P frames ++ ++--- ++ libavcodec/hevc.c | 52 ++++- ++ libavcodec/rpi_shader.c | 566 +++++++++++++++++++++++---------------------- ++ libavcodec/rpi_shader.h | 12 +- ++ libavcodec/rpi_shader.qasm | 39 +++- ++ 4 files changed, 384 insertions(+), 285 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 80db603..9668ef8 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -64,7 +64,7 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ ++ #ifdef RPI_INTER_QPU ++ ++-#define RPI_CHROMA_COMMAND_WORDS 10 +++#define RPI_CHROMA_COMMAND_WORDS 12 ++ #define UV_COMMANDS_PER_QPU ((1 + (256*64*2)/(4*4)) * RPI_CHROMA_COMMAND_WORDS) ++ // The QPU code for UV blocks only works up to a block width of 8 ++ #define RPI_CHROMA_BLOCK_WIDTH 8 ++@@ -2031,6 +2031,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int y1_c = y0_c + (mv->y >> (2 + hshift)); ++ //int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width ++ int chan = x0>>8; +++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || +++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++ ++ uint32_t *u = s->u_mvs[chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++@@ -2043,6 +2045,13 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = ( (nPbW_csh.chroma_offset_l0[current_mv.ref_idx[0]][0] << 16) + (s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0] & 0xffff); +++ *u++ = (s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1] << 16) + (s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1] & 0xffff); +++ } else { +++ *u++ = 1; // Weight of 1 and offset of 0 +++ *u++ = 1; +++ } ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++@@ -2085,6 +2094,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int y1_c = y0_c + (mv->y >> (2 + hshift)); ++ //int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width ++ int chan = x0>>8; +++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || +++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++ ++ uint32_t *u = s->u_mvs[chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++@@ -2098,6 +2109,13 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ // TODO chroma weight and offset... s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx][0]; ++ *u++ = rpi_filter_coefs[_my][0]; +++ if (weight_flag) { +++ *u++ = (s->sh.chroma_offset_l0[current_mv.ref_idx[1]][0] << 16) + (s->sh.chroma_weight_l0[current_mv.ref_idx[1]][0] & 0xffff); +++ *u++ = (s->sh.chroma_offset_l0[current_mv.ref_idx[1]][1] << 16) + (s->sh.chroma_weight_l0[current_mv.ref_idx[1]][1] & 0xffff); +++ } else { +++ *u++ = 1; // Weight of 1 and offset of 0 +++ *u++ = 1; +++ } ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++@@ -2159,6 +2177,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = ( (nPbW_cmc_filter_uv_b; ++@@ -2169,6 +2188,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = ( (nPbW_cframe->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++@@ -2795,6 +2815,9 @@ static void rpi_inter_clear(HEVCContext *s) ++ int i; ++ int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1]; ++ int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1]; +++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || +++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); +++ ++ for(i=0;i<8;i++) { ++ s->u_mvs[i] = s->mvs_base[i]; ++ *s->u_mvs[i]++ = 0; ++@@ -2806,6 +2829,13 @@ static void rpi_inter_clear(HEVCContext *s) ++ *s->u_mvs[i]++ = pic_height; ++ *s->u_mvs[i]++ = s->frame->linesize[1]; ++ *s->u_mvs[i]++ = s->frame->linesize[2]; +++ if (weight_flag) { +++ *s->u_mvs[i]++ = 1 << (s->sh.chroma_log2_weight_denom + 6 - 1); +++ *s->u_mvs[i]++ = s->sh.chroma_log2_weight_denom + 6; +++ } else { +++ *s->u_mvs[i]++ = 1 << 5; +++ *s->u_mvs[i]++ = 6; +++ } ++ s->u_mvs[i] += 1; // Padding words ++ } ++ } ++@@ -2849,12 +2879,29 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs]; ++ ++ #ifdef RPI +++#ifdef RPI_INTER_QPU ++ s->enable_rpi = s->ps.sps->bit_depth == 8 ++ && s->ps.sps->width <= RPI_MAX_WIDTH ++ && !s->ps.pps->cross_component_prediction_enabled_flag ++ && s->ps.pps->num_tile_rows <= 1 && s->ps.pps->num_tile_columns <= 1 ++- && !(s->ps.pps->weighted_pred_flag && s->sh.slice_type == P_SLICE) ++ && !(s->ps.pps->weighted_bipred_flag && s->sh.slice_type == B_SLICE); +++#else +++ s->enable_rpi = s->ps.sps->bit_depth == 8 +++ && s->ps.sps->width <= RPI_MAX_WIDTH +++ && !s->ps.pps->cross_component_prediction_enabled_flag +++ && s->ps.pps->num_tile_rows <= 1 && s->ps.pps->num_tile_columns <= 1; +++#endif +++ +++ /*if (!s->enable_rpi) { +++ if (s->ps.pps->cross_component_prediction_enabled_flag) +++ printf("Cross component\n"); +++ if (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1) +++ printf("Tiles\n"); +++ if (s->ps.pps->weighted_pred_flag && s->sh.slice_type == P_SLICE) +++ printf("Weighted P slice\n"); +++ if (s->ps.pps->weighted_bipred_flag && s->sh.slice_type == B_SLICE) +++ printf("Weighted B slice\n"); +++ }*/ ++ ++ #endif ++ ++@@ -2987,6 +3034,7 @@ static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int ++ ++ #ifdef RPI ++ s->enable_rpi = 0; +++ //printf("Wavefront\n"); ++ #endif ++ ++ if(ctb_row) { ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index b0b93b5..3f04d80 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -88,287 +88,307 @@ unsigned int rpi_shader[] = { ++ /* [0x000001f8] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++ /* [0x00000200] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 ++ /* [0x00000208] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x00000210] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000218] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000220] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000228] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000230] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000238] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000240] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base ++-/* [0x00000248] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++/* [0x00000210] */ 0x15827d80, 0x10021327, // mov rb12,unif +++/* [0x00000218] */ 0x15827d80, 0x10021367, // mov rb13,unif +++/* [0x00000220] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000228] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000230] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000238] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000240] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000248] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000250] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base +++/* [0x00000258] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base ++ // ::mc_filter_uv ++-/* [0x00000250] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000258] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000260] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000268] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000270] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000278] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000280] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000288] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000290] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000298] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000002a0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000002a8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000002b0] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000002b8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000002c0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000002c8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000002d0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002d8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000002e0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000002e8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000002f0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000002f8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000300] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000308] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000310] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000318] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000320] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000328] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000330] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000340] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000348] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x00000350] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000358] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000360] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000260] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000268] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000270] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000278] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000280] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000288] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000290] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000298] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000002a0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000002a8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000002b0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000002b8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000002c0] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000002c8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002d0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000002d8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000002e0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000002e8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000002f0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000002f8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000300] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000308] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000310] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000318] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000320] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000328] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000330] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000340] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000348] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000350] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000358] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000360] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000368] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000370] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 +++/* [0x00000378] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000380] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 +++/* [0x00000388] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000390] */ 0x0f9e7080, 0x100613e7, // asr.ifnz rb15, r0, r2 +++/* [0x00000398] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000003a0] */ 0x0f9e7080, 0x100613a7, // asr.ifnz rb14, r0, r2 +++/* [0x000003a8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000003b0] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x00000368] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000370] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000378] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000380] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000388] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000390] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000398] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003a0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003a8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000003b0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000003b8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000003c0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000003c8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000003d0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000003d8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000003e0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000003e8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000003f0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000003f8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000400] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000408] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000410] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000418] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000420] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000428] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000430] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000438] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000440] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000448] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000450] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000458] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000460] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000468] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x00000470] */ 0xfffffed8, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000478] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x00000480] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000488] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000490] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000498] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000004a0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000004a8] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000004b0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000004b8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000004c0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x000004c8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000004d0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000003b8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000003c0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000003c8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000003d0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003d8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003e0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003e8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003f0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003f8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000400] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000408] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000410] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000418] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000420] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000428] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000430] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000438] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000440] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000448] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000450] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000458] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000460] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000468] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000470] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000478] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000480] */ 0x00000020, 0xe0021327, // mov rb12,32 +++/* [0x00000488] */ 0x00000006, 0xe0021367, // mov rb13,6 +++/* [0x00000490] */ 0x00000001, 0xe00213a7, // mov rb14,1 +++/* [0x00000498] */ 0x00000000, 0xe00213e7, // mov rb15,0 +++/* [0x000004a0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000004a8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000004b0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000004b8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000004c0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000004c8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000004d0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000004d8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 +++/* [0x000004e0] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 +++/* [0x000004e8] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 +++/* [0x000004f0] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000004f8] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 +++/* [0x00000500] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000508] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000510] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000518] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000520] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000528] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000530] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000538] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000540] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000548] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000550] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x000004d8] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000004e0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000004e8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000004f0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000004f8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000500] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000508] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000510] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000518] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000520] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000528] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000530] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x00000538] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000540] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000548] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000550] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000558] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000560] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000568] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000570] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000578] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000580] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000588] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000590] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000598] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005a0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005a8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005b0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000005b8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005c0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005c8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005d0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x000005d8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000005e0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000005e8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000558] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000560] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000568] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000570] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000578] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000580] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000588] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000590] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000598] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000005a0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000005a8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000005b0] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x000005b8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000005c0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005c8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000005d0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000005d8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000005e0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000005e8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000005f0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000005f8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000600] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000608] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000610] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000618] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000620] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000628] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000630] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000638] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000640] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000648] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000650] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000658] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000660] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000668] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000670] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000678] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x000005f0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000005f8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000600] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000608] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000610] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000618] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000620] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000628] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000630] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000638] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000640] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000648] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000650] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000658] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000660] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000668] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000670] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000678] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000680] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000688] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000690] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000698] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x000006a0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000006a8] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x000006b0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x000006b8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000006c0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000006c8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000006d0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000006d8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000006e0] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 ++-/* [0x000006e8] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x000006f0] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 ++-/* [0x000006f8] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000700] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000708] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000710] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000718] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000720] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000680] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000688] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000690] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000698] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000006a0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000006a8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000006b0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000006b8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000006c0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000006c8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000006d0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000006d8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000006e0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000006e8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000006f0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000006f8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000700] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000708] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000710] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000718] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000720] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000728] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000730] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000738] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000740] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000748] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000750] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000758] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000760] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000768] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000770] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 +++/* [0x00000778] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000780] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 +++/* [0x00000788] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000790] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000798] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000007a0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000007a8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000007b0] */ 0x009e7000, 0x100009e7, // nop ++ // ::mc_filter_uv_b ++-/* [0x00000728] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000730] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000738] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000740] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000748] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000750] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000758] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000760] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000768] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000770] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000778] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000780] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000788] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000790] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000798] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000007a0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000007a8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000007b0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000007b8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000007c0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000007c8] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x000007d0] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x000007d8] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x000007e0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000007e8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000007f0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000007f8] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000800] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000808] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000810] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000818] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000820] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000828] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000830] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000838] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000840] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x00000848] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000850] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000858] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000007b8] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000007c0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000007c8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000007d0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000007d8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000007e0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000007e8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000007f0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000007f8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000800] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000808] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000810] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000818] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000820] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000828] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000830] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000838] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000840] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000848] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000850] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000858] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000860] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000868] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000870] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000878] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000880] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000888] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000890] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000898] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008b0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000008b8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008c0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008c8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008d0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x000008d8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000008e0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000008e8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000008f0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008f8] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x00000860] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000868] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000870] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000878] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000880] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000888] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000890] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000898] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000008a0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000008a8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000008b0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000008b8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000008c0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000008c8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000008d0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000008d8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000008e0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000008e8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000008f0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000008f8] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000900] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000908] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000910] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000918] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000920] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000928] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000930] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000938] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000940] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000948] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000950] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000958] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000960] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x00000968] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x00000970] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000978] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000980] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000988] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000990] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000998] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000009a0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000009a8] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000009b0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000009b8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000009c0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x000009c8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000009d0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000900] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000908] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000910] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000918] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000920] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000928] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000930] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000938] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000940] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000948] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000950] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000958] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000960] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000968] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000970] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000978] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000980] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000988] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000990] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000998] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x000009a0] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x000009a8] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000009b0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000009b8] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x000009c0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000009c8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000009d0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000009d8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000009e0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000009e8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000009f0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000009f8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000a00] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x00000a08] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000a10] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000a18] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000a20] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000a28] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000a30] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000a38] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a40] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000a48] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000a50] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a58] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000a60] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000a68] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a70] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x000009d8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000009e0] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x000009e8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000009f0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000009f8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a00] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a08] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000a10] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000a18] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a78] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a80] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a88] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a90] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a98] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000aa0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000aa8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000ab0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000ab8] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000a20] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a28] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a30] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a38] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a40] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a48] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a50] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a58] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a60] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a68] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a70] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a78] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a80] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000a88] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000a90] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000ac0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000ac8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ad0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ad8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ae0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ae8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000af0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000af8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b00] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b08] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b10] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b18] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b20] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000b28] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000b30] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 99927c4..cec9901 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -4,11 +4,11 @@ ++ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++-#define mc_filter_uv (rpi_shader + 148) ++-#define mc_filter_uv_b0 (rpi_shader + 310) ++-#define mc_filter_uv_b (rpi_shader + 458) ++-#define mc_exit (rpi_shader + 630) ++-#define mc_interrupt_exit8 (rpi_shader + 648) ++-#define mc_end (rpi_shader + 678) +++#define mc_filter_uv (rpi_shader + 152) +++#define mc_filter_uv_b0 (rpi_shader + 342) +++#define mc_filter_uv_b (rpi_shader + 494) +++#define mc_exit (rpi_shader + 670) +++#define mc_interrupt_exit8 (rpi_shader + 688) +++#define mc_end (rpi_shader + 718) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index d9ffcda..97c4c02 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -9,7 +9,12 @@ ++ # (ra15 isn't clamped to zero - this happens during the ++ # copy to ra14, and during its use in the vertical filter) ++ # ++-# rb8...rb15 eight vertical filter coefficients +++# rb8...rb11 eight vertical filter coefficients +++ +++# rb12 offset to add before shift +++# rb13 shift +++# rb14 weight (U on left, V on right) +++# rb15 offset (U on left, V on right) ++ # ++ # ra16 clipped(row start address+elem_num)&~3 ++ # ra17 per-channel shifts ++@@ -165,6 +170,9 @@ add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++ add t0s, r0, r1 ; mov ra_x2_base, r2 ++ add t0s, r2, r1 ++ +++mov rb12,unif # offset before shift +++mov rb13,unif # offset after shift +++ ++ # Dump padding words ++ mov r0, unif ++ ++@@ -231,11 +239,21 @@ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++ asr rb8, r0, rb23 ++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++mov r0, unif # U offset/weight +++asr rb15, r0, r2 # Compute offset from MSBs +++shl r0, r0, r2 +++asr rb14, r0, r2 # Compute weight from LSBs +++mov r0, unif # V offset/weight +++asr.ifnz rb15, r0, r2 +++shl r0, r0, r2 +++asr.ifnz rb14, r0, r2 +++ ++ # r2 is elem_num ++ # r3 is loop counter ++ ++ mov r5rep, -8 ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ ++ # retrieve texture results and pick out bytes ++ # then submit two more texture requests ++@@ -279,6 +297,11 @@ mov ra13, ra14 # Delay slot 1 ++ mov ra14, ra15 # Delay slot 2 ++ mov ra15, r0 # Delay slot 3 ++ +++mov rb12,32 +++mov rb13,6 +++mov rb14,1 +++mov rb15,0 +++ ++ # apply vertical filter and write to VPM ++ ++ nop ; mul24 r1, ra14, rb10 ++@@ -288,9 +311,11 @@ add r1, r1, r0 ; mul24 r0, ra15, rb11 ++ add r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++ asr r1, r1, 14 ++-add r1, r1, ra21 +++nop ; mul24 r1, r1, rb14 +++add r1, r1, rb12 +++asr r1, r1, rb13 ++ brr.anyn -, r:uvloop ++-asr r1, r1, 6 # Delay 1 +++add r1, r1, rb15 # Delay 1 ++ min r1, r1, rb22 # Delay 2 ++ max vpm, r1, 0 # Delay 3 ++ ++@@ -364,6 +389,9 @@ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++ asr rb8, r0, rb23 ++ +++mov r0, unif # U offset/weight +++mov r0, unif # V offset/weight +++ ++ # r2 is elem_num ++ # r3 is loop counter ++ ++@@ -491,6 +519,9 @@ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++ asr rb8, r0, rb23 ++ +++mov r0, unif # U offset/weight +++mov r0, unif # V offset/weight +++ ++ # r2 is elem_num ++ # r3 is loop counter ++ ++-- ++2.7.4 ++ ++ ++From 310d994ea39e29b41a6a013abc4d94e6b90487b2 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Tue, 19 May 2015 08:43:30 +0100 ++Subject: [PATCH 40/68] Improved ordering of tasks ++ ++--- ++ libavcodec/hevc.c | 8 ++++---- ++ 1 file changed, 4 insertions(+), 4 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 9668ef8..951e2d3 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2943,15 +2943,15 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ s->dblk_cmds[s->num_dblk_cmds][0] = x_ctb; ++ s->dblk_cmds[s->num_dblk_cmds++][1] = y_ctb; ++ if ( (((y_ctb + ctb_size)&63) == 0) && x_ctb + ctb_size >= s->ps.sps->width) { ++-#ifdef RPI_INTER_QPU ++- // Kick off inter prediction on QPUs ++- rpi_execute_inter_qpu(s); ++-#endif ++ // Transform all blocks ++ // printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); ++ rpi_execute_transform(s); ++ // Perform inter prediction ++ rpi_execute_inter_cmds(s); +++#ifdef RPI_INTER_QPU +++ // Kick off inter prediction on QPUs +++ rpi_execute_inter_qpu(s); +++#endif ++ // Wait for transform completion ++ vpu_wait(s->vpu_id); ++ ++-- ++2.7.4 ++ ++ ++From d6e1ce7898196e49e52a6223c12979b3d0014588 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 20 May 2015 19:58:19 +0100 ++Subject: [PATCH 41/68] Drafted Luma inter prediction ++ ++--- ++ libavcodec/rpi_shader.qasm | 594 ++++++++++++++++++++++++++++++++++++++++++--- ++ 1 file changed, 554 insertions(+), 40 deletions(-) ++ ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 97c4c02..9cfc0d9 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -2,7 +2,10 @@ ++ # ++ # ra0...ra7 eight horizontal filter coefficients ++ # ++-# rb1...rb7 seven shifted copies of the current unfiltered row +++# rb0 rx_shift2 +++# rb1 ra_y2_next +++# +++# rb4...rb7 ++ # ++ # ra8...ra15 eight filtered rows of context (rb15 == most recent) ++ # ++@@ -26,9 +29,9 @@ ++ # rb19 next ra16 ++ # ++ # ra20 1 ++-# ra21 32 +++# ra21 ra_21 ++ # ra22 256 ++-# ra23 8 +++# ra23 rx_shift2_next ++ # ++ # rb20 0xffffff00 ++ # rb21 vpm_setup for reading/writing 16bit results into VPM ++@@ -57,16 +60,23 @@ ++ .set rb_frame_width_minus_1, rb25 ++ .set rb_frame_height_minus_1, rb30 ++ .set rb_pitch, rb16 ++-.set ra_x_base, ra16 ++-.set rb_x_base_next, rb19 ++-.set ra_x2_base, ra24 ++-.set ra_x2_base_next, ra26 +++.set ra_x, ra16 +++.set ra_y2, ra21 +++.set ra_y2_next, rb1 +++ +++.set rb_x_next, rb19 +++.set rx_frame_base2_next, rb19 +++ +++.set ra_frame_base, ra24 +++.set ra_frame_base_next, ra26 ++ .set ra_xshift, ra17 ++ ++-.set ra_x2shift, ra25 ++ .set ra_u2v_ref_offset, ra25 +++.set ra_frame_base2, ra25 ++ ++ .set ra_xshift_next, ra19 +++.set rx_xshift2, rb0 +++.set rx_xshift2_next, ra23 ++ ++ .set ra_x2shift_next, ra27 ++ .set ra_u2v_dst_offset, ra27 ++@@ -83,11 +93,11 @@ ++ mov ra31, unif ++ ++ # Load first request location ++-add ra_x_base, unif, elem_num # Store x +++add ra_x, unif, elem_num # Store x ++ mov ra_y, unif # Store y ++-mov ra_x2_base, unif # Store frame u base +++mov ra_frame_base, unif # Store frame u base ++ nop ++-sub ra_u2v_ref_offset, unif, ra_x2_base # Store offset to add to move from u to v in reference frame +++sub ra_u2v_ref_offset, unif, ra_frame_base # Store offset to add to move from u to v in reference frame ++ ++ # Read image dimensions ++ sub rb25,unif,1 ++@@ -104,9 +114,7 @@ add rb24, r1, r0 ++ # load constants ++ ++ mov ra20, 1 ++-mov ra21, 32 ++ mov ra22, 256 ++-mov ra23, 8 ++ mov ra30, 64 ++ ++ mov rb20, 0xffffff00 ++@@ -156,18 +164,18 @@ mov r1, vpm_setup(0, 2, h16p(0, 0)) # 2 is stride - stride acts on ADDR which i ++ add rb21, r0, r1 ++ ++ # Compute base address for first and second access ++-mov r0, ra_x_base # Load x +++mov r0, ra_x # Load x ++ max r0, r0, 0; mov r1, ra_y # Load y ++-min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base # Load the frame base +++min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_frame_base # Load the frame base ++ shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++ add ra_y, r1, 1 ++ add r0, r0, r3 ++ and r0, r0, ~3 ++-max r1, r1, 0 ; mov ra_x_base, r0 # y +++max r1, r1, 0 ; mov ra_x, r0 # y ++ min r1, r1, rb_frame_height_minus_1 ++ # submit texture requests for first line ++ add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-add t0s, r0, r1 ; mov ra_x2_base, r2 +++add t0s, r0, r1 ; mov ra_frame_base, r2 ++ add t0s, r2, r1 ++ ++ mov rb12,unif # offset before shift ++@@ -182,8 +190,8 @@ min r1, r1, rb_frame_height_minus_1 ++ add ra_y, ra_y, 1 ++ bra -, ra31 ++ nop ; mul24 r1, r1, rb_pitch ++-add t0s, r1, ra_x_base ++-add t0s, r1, ra_x2_base +++add t0s, r1, ra_x +++add t0s, r1, ra_frame_base ++ ++ ++ ++@@ -192,7 +200,7 @@ add t0s, r1, ra_x2_base ++ # mc_filter_uv(next_kernel, x, y, frame_u_base, frame_v_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_u_dst, this_v_dst) ++ ++ # At this point we have already issued two pairs of texture requests for the current block ++-# ra_x_base, ra_x16_base point to the current coordinates for this block +++# ra_x, ra_x16_base point to the current coordinates for this block ++ ::mc_filter_uv ++ mov ra31, unif ++ ++@@ -207,9 +215,9 @@ min r0, r0, rb_frame_width_minus_1 ; mov r3, unif # frame_base ++ shl ra_xshift_next, r0, 3 ++ sub r2, unif, r3 # compute offset from frame base u to frame base v ++ add r0, r0, r3 ++-and rb_x_base_next, r0, ~3 +++and rb_x_next, r0, ~3 ++ mov ra_y_next, r1 ++-add ra_x2_base_next, rb_x_base_next, r2 +++add ra_frame_base_next, rb_x_next, r2 ++ ++ # set up VPM write ++ mov vw_setup, rb28 ++@@ -265,16 +273,16 @@ mov r3, 0 ++ # then submit two more texture requests ++ ++ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++ shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++ ++ max r2, ra_y, 0 # y ++ min r2, r2, rb_frame_height_minus_1 ++ add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_x2_base, r2 +++add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_frame_base, r2 ++ ++ # generate seven shifted versions ++ # interleave with scroll of vertical context ++@@ -297,7 +305,7 @@ mov ra13, ra14 # Delay slot 1 ++ mov ra14, ra15 # Delay slot 2 ++ mov ra15, r0 # Delay slot 3 ++ ++-mov rb12,32 +++mov rb12,32 # TODO remove these to make P weighted prediction work properly ++ mov rb13,6 ++ mov rb14,1 ++ mov rb15,0 ++@@ -342,7 +350,7 @@ mov vw_addr, unif # start the VDW ++ # mc_filter_uv_b0(next_kernel, x, y, frame_u_base, frame_v_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_u_dst, this_v_dst) ++ ++ # At this point we have already issued two pairs of texture requests for the current block ++-# ra_x_base, ra_x16_base point to the current coordinates for this block +++# ra_x, ra_x16_base point to the current coordinates for this block ++ ::mc_filter_uv_b0 ++ mov ra31, unif ++ ++@@ -357,9 +365,9 @@ min r0, r0, rb_frame_width_minus_1 ; mov r3, unif # frame_base ++ shl ra_xshift_next, r0, 3 ++ sub r2, unif, r3 # compute offset from frame base u to frame base v ++ add r0, r0, r3 ++-and rb_x_base_next, r0, ~3 +++and rb_x_next, r0, ~3 ++ mov ra_y_next, r1 ++-add ra_x2_base_next, rb_x_base_next, r2 +++add ra_frame_base_next, rb_x_next, r2 ++ ++ # set up VPM write, we need to save 16bit precision ++ mov vw_setup, rb21 ++@@ -408,16 +416,16 @@ mov r3, 0 ++ # then submit two more texture requests ++ ++ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++ shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++ ++ max r2, ra_y, 0 # y ++ min r2, r2, rb_frame_height_minus_1 ++ add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_x2_base, r2 +++add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_frame_base, r2 ++ ++ # generate seven shifted versions ++ # interleave with scroll of vertical context ++@@ -477,9 +485,9 @@ min r0, r0, rb_frame_width_minus_1 ; mov r3, unif # frame_base ++ shl ra_xshift_next, r0, 3 ++ sub r2, unif, r3 # compute offset from frame base u to frame base v ++ add r0, r0, r3 ++-and rb_x_base_next, r0, ~3 +++and rb_x_next, r0, ~3 ++ mov ra_y_next, r1 ++-add ra_x2_base_next, rb_x_base_next, r2 +++add ra_frame_base_next, rb_x_next, r2 ++ ++ # set up VPM write ++ mov vw_setup, rb28 ++@@ -538,16 +546,16 @@ mov r3, 0 ++ # then submit two more texture requests ++ ++ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++ shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++ ++ max r2, ra_y, 0 # y ++ min r2, r2, rb_frame_height_minus_1 ++ add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_x2_base, r2 +++add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_frame_base, r2 ++ ++ # generate seven shifted versions ++ # interleave with scroll of vertical context ++@@ -642,5 +650,511 @@ nop ; nop ; thrend ++ mov interrupt, 1; nop # delay slot 1 ++ nop ; nop # delay slot 2 ++ +++ +++ +++ +++ +++# LUMA CODE +++ +++# The idea is to form B predictions by doing 8 pixels from ref0 in parallel with 8 pixels from ref1. +++# For P frames we make the second x,y coordinates offset by +8 +++ +++################################################################################ +++# mc_setup(next_kernel, x, y, ref_y_base, x2, y2, ref_y2_base, frame_width, frame_height, pitch, dst_pitch, offset, shift, pad2) +++::mc_setup +++ +++# Read starting kernel +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++mov ra31, unif +++ +++# Compute base address for first and second access +++add r0, unif, elem_num # Load x +++max r0, r0, 0; mov r1, unif # Load y +++min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base +++shl ra_xshift_next, r0, 3 # Compute shifts +++add ra_y, r1, 1 +++and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++add r2, r2, r0 # r2 is address for frame0 (not including y offset) +++max r1, r1, 0 +++min r1, r1, rb_frame_height_minus_1 +++nop ; mul24 r1, r1, rb_pitch # r2 contains the addresses (not including y offset) for frame0 +++add t0s, r2, r1 ; mov ra_frame_base, r2 +++ +++add r0, unif, elem_num # Load x +++max r0, r0, 0; mov r1, unif # Load y +++min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base +++shl rx_xshift2_next, r0, 3 # Compute shifts +++add ra_y2, r1, 1 +++and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++add r2, r2, r0 # r2 is address for frame1 (not including y offset) +++max r1, r1, 0 +++min r1, r1, rb_frame_height_minus_1 +++nop ; mul24 r1, r1, rb_pitch # r2 contains the addresses (not including y offset) for frame0 +++add t0s, r2, r1 ; mov ra_frame_base2, r2 +++ +++ +++# Read image dimensions +++sub rb25,unif,1 +++sub rb30,unif,1 +++ +++# get source pitch +++mov rb16, unif +++ +++# get destination pitch +++mov r0, unif +++mov r1, vdw_setup_1(0) +++add rb24, r1, r0 +++ +++# load constants +++ +++mov ra20, 1 +++mov ra22, 256 +++mov ra30, 64 +++ +++mov rb20, 0xffffff00 +++mov rb22, 255 +++mov rb23, 24 +++ +++# touch vertical context to keep simulator happy +++ +++mov ra8, 0 +++mov ra9, 0 +++mov ra10, 0 +++mov ra11, 0 +++mov ra12, 0 +++mov ra13, 0 +++mov ra14, 0 +++mov ra15, 0 +++ +++# Compute part of VPM to use for DMA output +++mov r2, qpu_num +++mov r1, r2 +++asr r1, r1, 2 +++shl r1, r1, 6 +++mov r0, r2 +++and r0, r0, 3 +++add r0, r0, r1 +++mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later +++shl r0, r0, 5 +++add rb27, r0, r1 +++ +++# Compute part of VPM to save data into +++mov r2, qpu_num # qpu_num = abcd +++mov r1, r2 +++asr r1, r1, 2 +++shl r1, r1, 6 +++mov r0, r2 +++and r0, r0, 3 +++add r0, r0, r1 +++mov r1, vpm_setup(0, 4, h8p(0, 0)) # 4 is stride - stride acts on ADDR which is Y[5:0],B[1:0] for 8 bit +++add rb28, r0, r1 +++ +++mov rb12,unif # offset before shift +++mov rb13,unif # shift +++ +++# Dump padding words +++mov r0, unif +++ +++# submit texture requests for second line +++max r1, ra_y, 0 +++min r1, r1, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 +++nop ; mul24 r1, r1, rb_pitch +++add t0s, r1, ra_frame_base +++ +++max r1, ra_y2, 0 +++min r1, r1, rb_frame_height_minus_1 +++bra -, ra31 +++add ra_y2, ra_y2, 1 # Delay 1 +++nop ; mul24 r1, r1, rb_pitch # Delay 2 +++add t0s, r1, ra_frame_base2 # Delay 3 +++ +++ +++################################################################################ +++ +++# mc_filter(next_kernel, x, y, frame_base, x2, y2, frame_base2, height, hcoeffs[0], hcoeffs2[0], hcoeffs[1], hcoeffs2[1], vcoeffs[0], vcoeffs2[0], vcoeffs[1], vcoeffs2[1], offsetweight0, offsetweight1, this_dst) +++# In a P block, only the first half of coefficients contain used information. +++# At this point we have already issued two pairs of texture requests for the current block +++# ra_x, ra_x16_base point to the current coordinates for this block +++::mc_filter +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++mov ra31, unif +++ +++# per-channel shifts were calculated on the *previous* invocation +++ +++mov ra_xshift, ra_xshift_next +++mov rx_xshift2, rx_xshift2_next +++ +++# get base addresses and per-channel shifts for *next* invocation +++add r0, unif, elem_num # Load x +++max r0, r0, 0; mov r1, unif # Load y +++min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base +++shl ra_xshift_next, r0, 3 # Compute shifts +++mov ra_y_next, r1 +++and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++add ra_frame_base_next, r2, r0 # r2 is address for frame0 (not including y offset) +++ +++add r0, unif, elem_num # Load x +++max r0, r0, 0 ; mov r1, unif # Load y +++min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base +++shl rx_xshift2_next, r0, 3 # Compute shifts +++add ra_y2_next, r1, 1 +++and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++add rx_frame_base2_next, r2, r0 # r2 is address for frame1 (not including y offset) +++ +++ +++# set up VPM write +++mov vw_setup, rb28 +++ +++# get width,height of block +++mov r2, 16 +++mov r0, unif +++shr r1, r0, r2 # Extract width +++sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++and r0, r0, rb22 # Extract height +++add rb17, r0, 5 +++add rb18, r0, 7 +++shl r0, r0, 7 +++add r0, r0, r1 # Combine width and height of destination area +++shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register +++add rb26, r0, rb27 +++ +++# get filter coefficients and discard unused B frame values +++mov r0, unif +++mov.ifnz -, unif # Alternate coefficients are unused for P frames +++asr ra3, r0, rb23; mul24 r0, r0, ra22 # These may need some pre-rotation to be used in B frames correctly +++asr ra2, r0, rb23; mul24 r0, r0, ra22 +++asr ra1, r0, rb23; mul24 r0, r0, ra22 +++asr ra0, r0, rb23; mov r0, unif +++mov.ifnz -, unif +++asr ra7, r0, rb23; mul24 r0, r0, ra22 +++asr ra6, r0, rb23; mul24 r0, r0, ra22 +++asr ra5, r0, rb23; mul24 r0, r0, ra22 +++asr ra4, r0, rb23; mov r0, unif +++mov.ifnz -, unif +++asr rb11, r0, rb23; mul24 r0, r0, ra22 +++asr rb10, r0, rb23; mul24 r0, r0, ra22 +++asr rb9, r0, rb23; mul24 r0, r0, ra22 +++asr rb8, r0, rb23; mov r0, unif +++mov.ifnz -, unif +++asr rb7, r0, rb23; mul24 r0, r0, ra22 +++asr rb6, r0, rb23; mul24 r0, r0, ra22 +++asr rb5, r0, rb23; mul24 r0, r0, ra22 +++asr rb4, r0, rb23 +++ +++mov r0, unif # Frame0 offset/weight +++mov.ifnz -, unif # Frame1 offset/weight unused +++asr rb15, r0, r2 # Compute offset from MSBs +++shl r0, r0, r2 +++asr rb14, r0, r2 # Compute weight from LSBs +++ +++# r3 is loop counter +++ +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++mov r3, 0 +++ +++:yloop +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++# If we knew there was no clipping then this code would get simpler. +++# Perhaps we could add on the pitch and clip using larger values? +++ +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 +++mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++shr r1, r4, rx_xshift2 +++mov.ifz ra_y2, ra_y2_next +++ +++max r2, ra_y, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++ +++max r2, ra_y2, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 +++ +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++# apply horizontal filter +++nop ; mul24 r2, r0, ra0 +++nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++nop ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++add r0, r2, r3 ; mov r3, rb31 +++sub.setf -, r3, 8 ; mov ra12, ra13 +++mov ra9, ra10 +++mov ra10, ra11 +++mov ra11, ra12 +++mov ra12, ra13 +++brr.anyn -, r:yloop +++mov ra13, ra14 # Delay slot 1 +++mov ra14, ra15 # Delay slot 2 +++mov ra15, r0 # Delay slot 3 +++ +++# apply vertical filter and write to VPM +++ +++nop ; mul24 r1, ra14, rb10 +++nop ; mul24 r0, ra13, rb9 +++add r1, r1, r0 ; mul24 r0, ra12, rb8 +++add r1, r1, r0 ; mul24 r0, ra15, rb11 +++add r1, r1, r0 ; mul24 r0, ra8, rb4 +++add r1, r1, r0 ; mul24 r0, ra9, rb5 +++add r1, r1, r0 ; mul24 r0, ra10, rb6 +++add r1, r1, r0 ; mul24 r0, ra11, rb7 +++ +++add r1, r1, r0 ; mov -, vw_wait +++sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++asr r1, r1, 14 +++nop ; mul24 r1, r1, rb14 +++add r1, r1, rb12 +++asr r1, r1, rb13 +++brr.anyn -, r:yloop +++add r1, r1, rb15 # Delay 1 +++min r1, r1, rb22 # Delay 2 +++max vpm, r1, 0 # Delay 3 +++ +++# DMA out +++ +++bra -, ra31 +++mov vw_setup, rb26 # VDW setup 0 Delay 1 +++mov vw_setup, rb29 # Stride Delay 2 +++mov vw_addr, unif # start the VDW Delay 3 +++ +++ +++ +++################################################################################ +++ +++# mc_filter_b(next_kernel, x, y, frame_base, x2, y2, frame_base2, width_height, hcoeffs[0], hcoeffs2[0], hcoeffs[1], hcoeffs2[1], vcoeffs[0], vcoeffs2[0], vcoeffs[1], vcoeffs2[1], offsetweight0, offsetweight1, this_dst) +++# In a P block, only the first half of coefficients contain used information. +++# At this point we have already issued two pairs of texture requests for the current block +++# May be better to just send 16.16 motion vector and figure out the coefficients inside this block (only 4 cases so can compute hcoeffs in around 24 cycles?) +++# Can fill in the coefficients so only +++# Can also assume default weighted prediction for B frames. +++# Perhaps can unpack coefficients in a more efficient manner by doing H/V for a and b at the same time? +++# Or possibly by taking advantage of symmetry? +++# From 19->7 32bits per command. +++::mc_filter_b +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++mov ra31, unif +++ +++# per-channel shifts were calculated on the *previous* invocation +++ +++mov ra_xshift, ra_xshift_next +++mov rx_xshift2, rx_xshift2_next +++ +++# get base addresses and per-channel shifts for *next* invocation +++add r0, unif, elem_num # Load x +++max r0, r0, 0; mov r1, unif # Load y +++min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base +++shl ra_xshift_next, r0, 3 # Compute shifts +++mov ra_y_next, r1 +++and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++add ra_frame_base_next, r2, r0 # r2 is address for frame0 (not including y offset) +++ +++add r0, unif, elem_num # Load x +++max r0, r0, 0 ; mov r1, unif # Load y +++min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base +++shl rx_xshift2_next, r0, 3 # Compute shifts +++add ra_y2_next, r1, 1 +++and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++add rx_frame_base2_next, r2, r0 # r2 is address for frame1 (not including y offset) +++ +++ +++# set up VPM write +++mov vw_setup, rb28 +++ +++# get width,height of block +++mov r2, 16 +++mov r0, unif +++shr r1, r0, r2 # Extract width +++sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++and r0, r0, rb22 # Extract height +++add rb17, r0, 5 +++add rb18, r0, 7 +++shl r0, r0, 7 +++add r0, r0, r1 # Combine width and height of destination area +++shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register +++add rb26, r0, rb27 +++ +++# get filter coefficients and discard unused B frame values +++mov r0, unif +++mov r1, 1 +++mov.ifnz r0, unif # Alternate coefficients are unused for P frames +++nop ; mul24 r0, r0 << 13, r1 << 13 +++asr ra3, r0, rb23; mul24 r0, r0, ra22 +++nop ; mul24 r0, r0 << 14, r1 << 14 +++asr ra2, r0, rb23; mul24 r0, r0, ra22 +++nop ; mul24 r0, r0 << 15, r1 << 15 # Adjust such that a rotate of 1 will produce the values with first 8 on left, second 8 on right +++asr ra1, r0, rb23; mul24 r0, r0, ra22 +++asr ra0, r0, rb23; mov r0, unif +++mov.ifnz r0, unif +++nop ; mul24 r0, r0 << 9, r1 << 9 +++asr ra7, r0, rb23; mul24 r0, r0, ra22 +++nop ; mul24 r0, r0 << 10, r1 << 10 +++asr ra6, r0, rb23; mul24 r0, r0, ra22 +++nop ; mul24 r0, r0 << 11, r1 << 11 +++asr ra5, r0, rb23; mul24 r0, r0, ra22 +++nop ; mul24 r0, r0 << 12, r1 << 12 +++asr ra4, r0, rb23; mov r0, unif +++mov.ifnz r0, unif +++asr rb11, r0, rb23; mul24 r0, r0, ra22 +++asr rb10, r0, rb23; mul24 r0, r0, ra22 +++asr rb9, r0, rb23; mul24 r0, r0, ra22 +++asr rb8, r0, rb23; mov r0, unif +++mov.ifnz r0, unif +++asr rb7, r0, rb23; mul24 r0, r0, ra22 +++asr rb6, r0, rb23; mul24 r0, r0, ra22 +++asr rb5, r0, rb23; mul24 r0, r0, ra22 +++asr rb4, r0, rb23 +++ +++mov r0, unif # Frame0 offset/weight +++mov.ifnz r0, unif # Frame1 offset/weight unused +++asr rb15, r0, r2 # Compute offset from MSBs +++shl r0, r0, r2 +++asr rb14, r0, r2 # Compute weight from LSBs +++ +++# r3 is loop counter +++ +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++mov r3, 0 +++ +++:yloopb +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++# If we knew there was no clipping then this code would get simpler. +++# Perhaps we could add on the pitch and clip using larger values? +++ +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 +++mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++shr r1, r4, rx_xshift2 +++mov.ifz ra_y2, ra_y2_next +++ +++max r2, ra_y, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++ +++max r2, ra_y2, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 +++ +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++# apply horizontal filter +++nop ; mul24 r2, r0, ra0 +++nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++nop ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++add r0, r2, r3 ; mov r3, rb31 +++sub.setf -, r3, 8 ; mov ra12, ra13 +++mov ra9, ra10 +++mov ra10, ra11 +++mov ra11, ra12 +++mov ra12, ra13 +++brr.anyn -, r:yloopb +++mov ra13, ra14 # Delay slot 1 +++mov ra14, ra15 # Delay slot 2 +++mov ra15, r0 # Delay slot 3 +++ +++# apply vertical filter and write to VPM +++ +++nop ; mul24 r1, ra14, rb10 +++nop ; mul24 r0, ra13, rb9 +++add r1, r1, r0 ; mul24 r0, ra12, rb8 +++add r1, r1, r0 ; mul24 r0, ra15, rb11 +++add r1, r1, r0 ; mul24 r0, ra8, rb4 +++add r1, r1, r0 ; mul24 r0, ra9, rb5 +++add r1, r1, r0 ; mul24 r0, ra10, rb6 +++add r1, r1, r0 ; mul24 r0, ra11, rb7 +++ +++add r1, r1, r0 ; mov -, vw_wait +++sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++asr r1, r1, 14 +++nop ; mul24 r1, r1 << 8, ra20 << 8 # Rotate to align left and right halves +++add r1, r1, ra30 ; mul24 r0, r1, rb14 +++add r1, r1, r0 +++brr.anyn -, r:yloopb +++asr r1, r1, 7 # Delay 1 +++min r1, r1, rb22 # Delay 2 +++max vpm, r1, 0 # Delay 3 +++ +++# DMA out +++bra -, ra31 +++mov vw_setup, rb26 # VDW setup 0 Delay 1 +++mov vw_setup, rb29 # Stride Delay 2 +++mov vw_addr, unif # start the VDW Delay 3 +++ +++################################################################################ +++ +++# mc_interrupt_exit12() +++::mc_interrupt_exit12 +++mov -, vw_wait # wait on the VDW +++ +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ +++mov -,sacq(0) # 1 +++mov -,sacq(0) # 2 +++mov -,sacq(0) # 3 +++mov -,sacq(0) # 4 +++mov -,sacq(0) # 5 +++mov -,sacq(0) # 6 +++mov -,sacq(0) # 7 +++mov -,sacq(0) # 8 +++mov -,sacq(0) # 9 +++mov -,sacq(0) # 10 +++mov -,sacq(0) # 11 +++ +++nop ; nop ; thrend +++mov interrupt, 1; nop # delay slot 1 +++nop ; nop # delay slot 2 +++ +++ ++ ::mc_end ++ # Do not add code here because mc_end must appear after all other code. ++-- ++2.7.4 ++ ++ ++From f2ffe4186fa49cb27579953c276b51728a08a8b5 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 20 May 2015 19:58:30 +0100 ++Subject: [PATCH 42/68] Added support for fast cache flush in deblocker ++ ++--- ++ libavcodec/hevc_filter.c | 44 +- ++ libavcodec/rpi_qpu.c | 6 + ++ libavcodec/rpi_qpu.h | 2 + ++ libavcodec/rpi_shader.c | 1028 +++++++++++++++++++++++++++++--------------- ++ libavcodec/rpi_shader.h | 16 +- ++ libavcodec/rpi_user_vcsm.h | 22 + ++ 6 files changed, 768 insertions(+), 350 deletions(-) ++ ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 92a8271..186317a 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -37,6 +37,11 @@ ++ ++ #include "bit_depth_template.c" ++ +++#ifdef RPI +++#include "rpi_user_vcsm.h" +++#include "rpi_qpu.h" +++#endif +++ ++ #define LUMA 0 ++ #define CB 1 ++ #define CR 2 ++@@ -872,15 +877,46 @@ static void flush_buffer(AVBufferRef *bref) { ++ gpu_cache_flush(p); ++ } ++ ++-static void ff_hevc_flush_chroma(HEVCContext *s) +++// Return Physical address for this image +++static int ff_hevc_buf_base(AVBufferRef *bref) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); +++ return p->vc & 0x3fffffff; +++} +++ +++static void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n) ++ { ++ if (s->enable_rpi && !( s->nal_unit_type == NAL_TRAIL_N || ++ s->nal_unit_type == NAL_TSA_N || ++ s->nal_unit_type == NAL_STSA_N || ++ s->nal_unit_type == NAL_RADL_N || ++ s->nal_unit_type == NAL_RASL_N )) { +++#define RPI_FAST_CACHEFLUSH +++#ifdef RPI_FAST_CACHEFLUSH +++ struct vcsm_user_clean_invalid_s iocache = {}; +++ int curr_y = f->progress->data[0]; +++ int sz,base; +++ if (curr_y < 0) curr_y = 0; +++ if (n<=curr_y) return; // Should not happen +++ sz = s->frame->linesize[1] * (n-curr_y); +++ base = s->frame->linesize[1] * curr_y; +++ iocache.s[0].cmd = 3; // Flush L1 cache +++ iocache.s[0].addr = 0; +++ iocache.s[0].size = 0; +++ +++ iocache.s[1].cmd = 2; +++ iocache.s[1].addr = ff_hevc_buf_base(s->frame->buf[1]) + base; +++ iocache.s[1].size = sz; +++ +++ iocache.s[2].cmd = 2; +++ iocache.s[2].addr = ff_hevc_buf_base(s->frame->buf[2]) + base; +++ iocache.s[2].size = sz; +++ +++ vcsm_clean_invalid( gpu_get_mailbox(), &iocache ); +++ +++#else ++ flush_buffer(s->frame->buf[1]); ++ flush_buffer(s->frame->buf[2]); +++#endif ++ //memcpy(s->dummy.arm,s->frame->data[0],2048*64); ++ //memcpy(s->dummy.arm,s->frame->data[1],1024*32); ++ //memcpy(s->dummy.arm,s->frame->data[2],1024*32); ++@@ -903,7 +939,7 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ sao_filter_CTB(s, x, y - ctb_size); ++ if (s->threads_type & FF_THREAD_FRAME ) { ++ #ifdef RPI_INTER_QPU ++- ff_hevc_flush_chroma(s); +++ ff_hevc_flush_chroma(s,&s->ref->tf, y); ++ #endif ++ ff_thread_report_progress(&s->ref->tf, y, 0); ++ } ++@@ -912,7 +948,7 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ sao_filter_CTB(s, x , y); ++ if (s->threads_type & FF_THREAD_FRAME ) { ++ #ifdef RPI_INTER_QPU ++- ff_hevc_flush_chroma(s); +++ ff_hevc_flush_chroma(s, &s->ref->tf, y + ctb_size); ++ #endif ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size, 0); ++ } ++@@ -922,7 +958,7 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ //int currh = s->ref->tf.progress->data[0]; ++ //if (((y + ctb_size)&63)==0) ++ #ifdef RPI_INTER_QPU ++- ff_hevc_flush_chroma(s); +++ ff_hevc_flush_chroma(s, &s->ref->tf, y + ctb_size - 4); ++ #endif ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); ++ } ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index f62051f..fd8a276 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -237,6 +237,12 @@ int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p) ++ return r; ++ } ++ +++int gpu_get_mailbox(void) +++{ +++ assert(gpu); +++ return gpu->mb; +++} +++ ++ void gpu_cache_flush(GPU_MEM_PTR_T *p) ++ { ++ void *tmp = vcsm_lock(p->vcsm_handle); ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 543c84b..88965e5 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -49,4 +49,6 @@ extern int rpi_test_shader(void); ++ extern void rpi_do_block(const unsigned char *in_buffer_vc, int src_pitch, unsigned char *dst_vc, int dst_pitch, unsigned char *dst); ++ extern void rpi_do_block_arm(const unsigned char *in_buffer, int src_pitch, unsigned char *dst, int dst_pitch); ++ +++extern int gpu_get_mailbox(void); +++ ++ #endif ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 3f04d80..9c30e32 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -23,11 +23,11 @@ __attribute__((aligned(8))) ++ unsigned int rpi_shader[] = { ++ // ::mc_setup_uv ++ /* [0x00000000] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000008] */ 0x0c9a0f80, 0x10020427, // add ra_x_base, unif, elem_num +++/* [0x00000008] */ 0x0c9a0f80, 0x10020427, // add ra_x, unif, elem_num ++ /* [0x00000010] */ 0x15827d80, 0x10020767, // mov ra_y, unif ++-/* [0x00000018] */ 0x15827d80, 0x10020627, // mov ra_x2_base, unif +++/* [0x00000018] */ 0x15827d80, 0x10020627, // mov ra_frame_base, unif ++ /* [0x00000020] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000028] */ 0x0d620f80, 0x10020667, // sub ra_u2v_ref_offset, unif, ra_x2_base +++/* [0x00000028] */ 0x0d620f80, 0x10020667, // sub ra_u2v_ref_offset, unif, ra_frame_base ++ /* [0x00000030] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 ++ /* [0x00000038] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 ++ /* [0x00000040] */ 0x15827d80, 0x10021427, // mov rb16, unif ++@@ -35,360 +35,708 @@ unsigned int rpi_shader[] = { ++ /* [0x00000050] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) ++ /* [0x00000058] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 ++ /* [0x00000060] */ 0x00000001, 0xe0020527, // mov ra20, 1 ++-/* [0x00000068] */ 0x00000020, 0xe0020567, // mov ra21, 32 ++-/* [0x00000070] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++-/* [0x00000078] */ 0x00000008, 0xe00205e7, // mov ra23, 8 ++-/* [0x00000080] */ 0x00000040, 0xe00207a7, // mov ra30, 64 ++-/* [0x00000088] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++-/* [0x00000090] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 ++-/* [0x00000098] */ 0x00000018, 0xe00215e7, // mov rb23, 24 ++-/* [0x000000a0] */ 0x00000000, 0xe0020227, // mov ra8, 0 ++-/* [0x000000a8] */ 0x00000000, 0xe0020267, // mov ra9, 0 ++-/* [0x000000b0] */ 0x00000000, 0xe00202a7, // mov ra10, 0 ++-/* [0x000000b8] */ 0x00000000, 0xe00202e7, // mov ra11, 0 ++-/* [0x000000c0] */ 0x00000000, 0xe0020327, // mov ra12, 0 ++-/* [0x000000c8] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++-/* [0x000000d0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++-/* [0x000000d8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x000000e0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x000000e8] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 ++-/* [0x000000f0] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x000000f8] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000100] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000108] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000110] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000118] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000120] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000128] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++-/* [0x00000130] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++-/* [0x00000138] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00000140] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000148] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 ++-/* [0x00000150] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00000158] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000160] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000168] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000170] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000178] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000180] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000188] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x00000190] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x00000198] */ 0x0f9c11c0, 0xd0020827, // asr r0, r0, 1 ++-/* [0x000001a0] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) ++-/* [0x000001a8] */ 0x0c9e7040, 0x10021567, // add rb21, r0, r1 ++-/* [0x000001b0] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base ++-/* [0x000001b8] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y ++-/* [0x000001c0] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base ++-/* [0x000001c8] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++-/* [0x000001d0] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x000001d8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000001e0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000001e8] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 ++-/* [0x000001f0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x000001f8] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-/* [0x00000200] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 ++-/* [0x00000208] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x00000210] */ 0x15827d80, 0x10021327, // mov rb12,unif ++-/* [0x00000218] */ 0x15827d80, 0x10021367, // mov rb13,unif ++-/* [0x00000220] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000228] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000230] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000238] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000240] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000248] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000250] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base ++-/* [0x00000258] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++/* [0x00000068] */ 0x00000100, 0xe00205a7, // mov ra22, 256 +++/* [0x00000070] */ 0x00000040, 0xe00207a7, // mov ra30, 64 +++/* [0x00000078] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x00000080] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x00000088] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x00000090] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x00000098] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x000000a0] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x000000a8] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x000000b0] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x000000b8] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x000000c0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x000000c8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x000000d0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x000000d8] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 +++/* [0x000000e0] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x000000e8] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x000000f0] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x000000f8] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000100] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000108] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000110] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000118] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00000120] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00000128] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00000130] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000138] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 +++/* [0x00000140] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00000148] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000150] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000158] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000160] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000168] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000170] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000178] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000180] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000188] */ 0x0f9c11c0, 0xd0020827, // asr r0, r0, 1 +++/* [0x00000190] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) +++/* [0x00000198] */ 0x0c9e7040, 0x10021567, // add rb21, r0, r1 +++/* [0x000001a0] */ 0x15427d80, 0x10020827, // mov r0, ra_x +++/* [0x000001a8] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x000001b0] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_frame_base +++/* [0x000001b8] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset +++/* [0x000001c0] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x000001c8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000001d0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000001d8] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x, r0 +++/* [0x000001e0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x000001e8] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++/* [0x000001f0] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_frame_base, r2 +++/* [0x000001f8] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 +++/* [0x00000200] */ 0x15827d80, 0x10021327, // mov rb12,unif +++/* [0x00000208] */ 0x15827d80, 0x10021367, // mov rb13,unif +++/* [0x00000210] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000218] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000220] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000228] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000230] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000238] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000240] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x +++/* [0x00000248] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base ++ // ::mc_filter_uv ++-/* [0x00000260] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000268] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000270] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000278] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000280] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000288] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000290] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000298] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000002a0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000002a8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000002b0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000002b8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000002c0] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000002c8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000002d0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000002d8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000002e0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002e8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000002f0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000002f8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000300] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000308] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000310] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000318] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000320] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000328] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000330] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000340] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000348] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000350] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000358] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x00000360] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000368] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000370] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 ++-/* [0x00000378] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000380] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 ++-/* [0x00000388] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000390] */ 0x0f9e7080, 0x100613e7, // asr.ifnz rb15, r0, r2 ++-/* [0x00000398] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000003a0] */ 0x0f9e7080, 0x100613a7, // asr.ifnz rb14, r0, r2 ++-/* [0x000003a8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000003b0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000250] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000258] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000260] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000268] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000270] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000278] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000280] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000288] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000290] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x00000298] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000002a0] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x000002a8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000002b0] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000002b8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002c0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000002c8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000002d0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000002d8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000002e0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000002e8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000002f0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000002f8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000300] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000308] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000310] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000318] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000320] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000328] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000330] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000340] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000348] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000350] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000358] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000360] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 +++/* [0x00000368] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000370] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 +++/* [0x00000378] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000380] */ 0x0f9e7080, 0x100613e7, // asr.ifnz rb15, r0, r2 +++/* [0x00000388] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000390] */ 0x0f9e7080, 0x100613a7, // asr.ifnz rb14, r0, r2 +++/* [0x00000398] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000003a0] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x000003b8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000003c0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000003c8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000003d0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003d8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003e0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003e8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003f0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003f8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000400] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000408] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000410] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000418] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000420] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000428] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000430] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000438] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000440] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000448] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000450] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000458] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000460] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000468] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000470] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000478] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000480] */ 0x00000020, 0xe0021327, // mov rb12,32 ++-/* [0x00000488] */ 0x00000006, 0xe0021367, // mov rb13,6 ++-/* [0x00000490] */ 0x00000001, 0xe00213a7, // mov rb14,1 ++-/* [0x00000498] */ 0x00000000, 0xe00213e7, // mov rb15,0 ++-/* [0x000004a0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000004a8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000004b0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000004b8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000004c0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000004c8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000004d0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000004d8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 ++-/* [0x000004e0] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 ++-/* [0x000004e8] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 ++-/* [0x000004f0] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004f8] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 ++-/* [0x00000500] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000508] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000510] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000518] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000520] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000528] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000530] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000538] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000540] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000548] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000550] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000003a8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000003b0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++/* [0x000003b8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x000003c0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003c8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003d0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003d8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003e0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003e8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x000003f0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 +++/* [0x000003f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000400] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000408] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000410] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000418] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000420] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000428] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000430] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000438] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000440] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000448] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000450] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000458] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000460] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000468] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000470] */ 0x00000020, 0xe0021327, // mov rb12,32 +++/* [0x00000478] */ 0x00000006, 0xe0021367, // mov rb13,6 +++/* [0x00000480] */ 0x00000001, 0xe00213a7, // mov rb14,1 +++/* [0x00000488] */ 0x00000000, 0xe00213e7, // mov rb15,0 +++/* [0x00000490] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000498] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000004a0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000004a8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000004b0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000004b8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000004c0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000004c8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 +++/* [0x000004d0] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 +++/* [0x000004d8] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 +++/* [0x000004e0] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000004e8] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 +++/* [0x000004f0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000004f8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000500] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000508] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000510] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000518] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000520] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000528] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000530] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000538] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000540] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x00000558] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000560] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000568] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000570] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000578] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000580] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000588] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000590] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000598] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000005a0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000005a8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000005b0] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x000005b8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000005c0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005c8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000005d0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000005d8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000005e0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000005e8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000005f0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000005f8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000600] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000608] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000610] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000618] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000620] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000628] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000630] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000638] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000640] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000648] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000650] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x00000658] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000660] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000668] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000670] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000678] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000548] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000550] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000558] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000560] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000568] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000570] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000578] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000580] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000588] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x00000590] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000598] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x000005a0] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x000005a8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000005b0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005b8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000005c0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000005c8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000005d0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000005d8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000005e0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000005e8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000005f0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000005f8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000600] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000608] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000610] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000618] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000620] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000628] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000630] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000638] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000640] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000648] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000650] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000658] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000660] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000668] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000680] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000688] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000690] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000698] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000006a0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000006a8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000006b0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000006b8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000006c0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000006c8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000006d0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000006d8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000006e0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000006e8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000006f0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000006f8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000700] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000708] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000710] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000718] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000720] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000728] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000730] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000738] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000740] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000748] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000750] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000758] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000760] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000768] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000770] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 ++-/* [0x00000778] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000780] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 ++-/* [0x00000788] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000790] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000798] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000007a0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000007a8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000007b0] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000670] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000678] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++/* [0x00000680] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00000688] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000690] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000698] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000006a0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000006a8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000006b0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x000006b8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 +++/* [0x000006c0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000006c8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000006d0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000006d8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000006e0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000006e8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000006f0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000006f8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000700] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000708] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000710] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000718] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000720] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000728] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000730] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000738] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000740] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000748] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000750] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000758] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000760] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 +++/* [0x00000768] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000770] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 +++/* [0x00000778] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000780] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000788] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000790] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000798] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000007a0] */ 0x009e7000, 0x100009e7, // nop ++ // ::mc_filter_uv_b ++-/* [0x000007b8] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000007c0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000007c8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000007d0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000007d8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000007e0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000007e8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000007f0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000007f8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000800] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000808] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000810] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000818] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000820] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000828] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000830] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000838] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000840] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000848] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000850] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000858] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000860] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000868] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000870] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000878] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000880] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000888] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000890] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000898] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008b0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000008b8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008c0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008c8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008d0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x000008d8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000008e0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000008e8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000008f0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000008f8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000007a8] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000007b0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000007b8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000007c0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000007c8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000007d0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000007d8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000007e0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000007e8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x000007f0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000007f8] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x00000800] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000808] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000810] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000818] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000820] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000828] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000830] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000838] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000840] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000848] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000850] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000858] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000860] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000868] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000870] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000878] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000880] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000888] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000890] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000898] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000008a8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008b0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008b8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008c0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x000008c8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000008d0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000008d8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000008e0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008e8] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x00000900] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000908] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000910] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000918] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000920] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000928] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000930] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000938] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000940] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000948] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000950] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000958] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000960] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000968] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000970] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000978] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000980] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000988] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000990] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000998] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x000009a0] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x000009a8] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000009b0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000009b8] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x000009c0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x000009c8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000009d0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000009d8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000009e0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000009e8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000009f0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000009f8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000a00] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x00000a08] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x00000a10] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000a18] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000a20] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000a28] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000a30] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000a38] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a40] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000a48] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000a50] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a58] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000a60] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000a68] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a70] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008f0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000008f8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++/* [0x00000900] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00000908] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000910] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000918] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000920] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000928] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000930] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x00000938] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 +++/* [0x00000940] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000948] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000950] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000958] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000960] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000968] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000970] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000978] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000980] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000988] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000990] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000998] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000009a0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000009a8] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x000009b0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000009b8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000009c0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000009c8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000009d0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000009d8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000009e0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000009e8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000009f0] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x000009f8] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000a00] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000a08] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000a10] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000a18] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000a20] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000a28] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a30] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000a38] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000a40] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a48] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000a50] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000a58] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a60] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000a78] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a80] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a68] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a70] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a78] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a80] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000a88] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000a90] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a98] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000aa0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000aa8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000ab0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000ab8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a98] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000aa0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000aa8] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000ac0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000ab0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000ab8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ac0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000ac8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000ad0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ad8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ae0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ad8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ae0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000ae8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000af0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000af8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000b00] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000b08] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b10] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b18] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b20] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000b28] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000b30] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000b10] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000b18] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000b20] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_setup +++/* [0x00000b28] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000b30] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000b38] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000b40] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000b48] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000b50] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000b58] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x00000b60] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000b68] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 +++/* [0x00000b70] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000b78] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000b80] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000b88] */ 0x8c9e7452, 0x10025e18, // add t0s, r2, r1 ; mov ra_frame_base, r2 +++/* [0x00000b90] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000b98] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000ba0] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000ba8] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 +++/* [0x00000bb0] */ 0x0c9c13c0, 0xd0020567, // add ra_y2, r1, 1 +++/* [0x00000bb8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000bc0] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 +++/* [0x00000bc8] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000bd0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000bd8] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000be0] */ 0x8c9e7452, 0x10025e19, // add t0s, r2, r1 ; mov ra_frame_base2, r2 +++/* [0x00000be8] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 +++/* [0x00000bf0] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 +++/* [0x00000bf8] */ 0x15827d80, 0x10021427, // mov rb16, unif +++/* [0x00000c00] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000c08] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) +++/* [0x00000c10] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 +++/* [0x00000c18] */ 0x00000001, 0xe0020527, // mov ra20, 1 +++/* [0x00000c20] */ 0x00000100, 0xe00205a7, // mov ra22, 256 +++/* [0x00000c28] */ 0x00000040, 0xe00207a7, // mov ra30, 64 +++/* [0x00000c30] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x00000c38] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x00000c40] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x00000c48] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x00000c50] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x00000c58] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x00000c60] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x00000c68] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x00000c70] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x00000c78] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x00000c80] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x00000c88] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000c90] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000c98] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000ca0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000ca8] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000cb0] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000cb8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000cc0] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00000cc8] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00000cd0] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00000cd8] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000ce0] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000ce8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000cf0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000cf8] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000d00] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000d08] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000d10] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000d18] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000d20] */ 0x15827d80, 0x10021327, // mov rb12,unif +++/* [0x00000d28] */ 0x15827d80, 0x10021367, // mov rb13,unif +++/* [0x00000d30] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000d38] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000d40] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000d48] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000d50] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000d58] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base +++/* [0x00000d60] */ 0x13540dc0, 0xd0020867, // max r1, ra_y2, 0 +++/* [0x00000d68] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000d70] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000d78] */ 0x0c541dc0, 0xd0020567, // add ra_y2, ra_y2, 1 +++/* [0x00000d80] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000d88] */ 0x0c667380, 0x10020e27, // add t0s, r1, ra_frame_base2 +++// ::mc_filter +++/* [0x00000d90] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000d98] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000da0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000da8] */ 0x155e7d80, 0x10021027, // mov rx_xshift2, rx_xshift2_next +++/* [0x00000db0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000db8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000dc0] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000dc8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000dd0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000dd8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000de0] */ 0x0c9e7400, 0x100206a7, // add ra_frame_base_next, r2, r0 +++/* [0x00000de8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000df0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0 ; mov r1, unif +++/* [0x00000df8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000e00] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 +++/* [0x00000e08] */ 0x0c9c13c0, 0xd0021067, // add ra_y2_next, r1, 1 +++/* [0x00000e10] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000e18] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 +++/* [0x00000e20] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000e28] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000e30] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000e38] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000e40] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000e48] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000e50] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000e58] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000e60] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000e68] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000e70] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000e78] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000e80] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000e88] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000e90] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000e98] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ea0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ea8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000eb0] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000eb8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ec0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ec8] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ed0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000ed8] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000ee0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ee8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ef0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ef8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000f00] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000f08] */ 0x4f5971c6, 0x100251e0, // asr rb7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f10] */ 0x4f5971c6, 0x100251a0, // asr rb6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f18] */ 0x4f5971c6, 0x10025160, // asr rb5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f20] */ 0x0f9d71c0, 0x10021127, // asr rb4, r0, rb23 +++/* [0x00000f28] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000f30] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000f38] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 +++/* [0x00000f40] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000f48] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 +++/* [0x00000f50] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :yloop +++/* [0x00000f58] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000f60] */ 0x8e4539bf, 0xa0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 +++/* [0x00000f68] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00000f70] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000f78] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 +++/* [0x00000f80] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next +++/* [0x00000f88] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000f90] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000f98] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000fa0] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 +++/* [0x00000fa8] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 +++/* [0x00000fb0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000fb8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++/* [0x00000fc0] */ 0xec654c87, 0x10024e20, // add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 +++/* [0x00000fc8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000fd0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000fd8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000fe0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000fe8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000ff0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000ff8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00001000] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00001008] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00001010] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00001018] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00001020] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00001028] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00001030] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00001038] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00001040] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00001048] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00001050] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00001058] */ 0x8d3487f6, 0xd00279cc, // sub.setf -, r3, 8 ; mov ra12, ra13 +++/* [0x00001060] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00001068] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00001070] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00001078] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00001080] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloop +++/* [0x00001088] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00001090] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00001098] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000010a0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000010a8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000010b0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000010b8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000010c0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 +++/* [0x000010c8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 +++/* [0x000010d0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 +++/* [0x000010d8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 +++/* [0x000010e0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000010e8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000010f0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000010f8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 +++/* [0x00001100] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 +++/* [0x00001108] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 +++/* [0x00001110] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloop +++/* [0x00001118] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 +++/* [0x00001120] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00001128] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00001130] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001138] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00001140] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001148] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_filter_b +++/* [0x00001150] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00001158] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00001160] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00001168] */ 0x155e7d80, 0x10021027, // mov rx_xshift2, rx_xshift2_next +++/* [0x00001170] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00001178] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00001180] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00001188] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00001190] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00001198] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000011a0] */ 0x0c9e7400, 0x100206a7, // add ra_frame_base_next, r2, r0 +++/* [0x000011a8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000011b0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0 ; mov r1, unif +++/* [0x000011b8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x000011c0] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 +++/* [0x000011c8] */ 0x0c9c13c0, 0xd0021067, // add ra_y2_next, r1, 1 +++/* [0x000011d0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000011d8] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 +++/* [0x000011e0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000011e8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000011f0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000011f8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00001200] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00001208] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00001210] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00001218] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00001220] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00001228] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00001230] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00001238] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00001240] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001248] */ 0x00000001, 0xe0020867, // mov r1, 1 +++/* [0x00001250] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x00001258] */ 0x409f3001, 0xd00049e0, // nop ; mul24 r0, r0 << 13, r1 << 13 +++/* [0x00001260] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001268] */ 0x409f2001, 0xd00049e0, // nop ; mul24 r0, r0 << 14, r1 << 14 +++/* [0x00001270] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001278] */ 0x409f1001, 0xd00049e0, // nop ; mul24 r0, r0 << 15, r1 << 15 +++/* [0x00001280] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001288] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00001290] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x00001298] */ 0x409f7001, 0xd00049e0, // nop ; mul24 r0, r0 << 9, r1 << 9 +++/* [0x000012a0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012a8] */ 0x409f6001, 0xd00049e0, // nop ; mul24 r0, r0 << 10, r1 << 10 +++/* [0x000012b0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012b8] */ 0x409f5001, 0xd00049e0, // nop ; mul24 r0, r0 << 11, r1 << 11 +++/* [0x000012c0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012c8] */ 0x409f4001, 0xd00049e0, // nop ; mul24 r0, r0 << 12, r1 << 12 +++/* [0x000012d0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000012d8] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x000012e0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012e8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012f0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012f8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00001300] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x00001308] */ 0x4f5971c6, 0x100251e0, // asr rb7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001310] */ 0x4f5971c6, 0x100251a0, // asr rb6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001318] */ 0x4f5971c6, 0x10025160, // asr rb5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001320] */ 0x0f9d71c0, 0x10021127, // asr rb4, r0, rb23 +++/* [0x00001328] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001330] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x00001338] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 +++/* [0x00001340] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00001348] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 +++/* [0x00001350] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :yloopb +++/* [0x00001358] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00001360] */ 0x8e4539bf, 0xa0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 +++/* [0x00001368] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00001370] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00001378] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 +++/* [0x00001380] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next +++/* [0x00001388] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00001390] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00001398] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000013a0] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 +++/* [0x000013a8] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 +++/* [0x000013b0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000013b8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++/* [0x000013c0] */ 0xec654c87, 0x10024e20, // add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 +++/* [0x000013c8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000013d0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000013d8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000013e0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000013e8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000013f0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000013f8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00001400] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00001408] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00001410] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00001418] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00001420] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00001428] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00001430] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00001438] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00001440] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00001448] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00001450] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00001458] */ 0x8d3487f6, 0xd00279cc, // sub.setf -, r3, 8 ; mov ra12, ra13 +++/* [0x00001460] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00001468] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00001470] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00001478] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00001480] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloopb +++/* [0x00001488] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00001490] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00001498] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000014a0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000014a8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000014b0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000014b8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000014c0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 +++/* [0x000014c8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 +++/* [0x000014d0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 +++/* [0x000014d8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 +++/* [0x000014e0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000014e8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000014f0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000014f8] */ 0x4053800e, 0xd00049e1, // nop ; mul24 r1, r1 << 8, ra20 << 8 +++/* [0x00001500] */ 0x4c78e38f, 0x10024860, // add r1, r1, ra30 ; mul24 r0, r1, rb14 +++/* [0x00001508] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x00001510] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloopb +++/* [0x00001518] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00001520] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00001528] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00001530] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001538] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00001540] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001548] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_interrupt_exit12 +++/* [0x00001550] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00001558] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001560] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001568] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001570] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001578] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001580] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001588] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001590] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001598] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000015a0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000015a8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000015b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000015b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000015c0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000015c8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000015d0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000015d8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000015e0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index cec9901..3fa8531 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -4,11 +4,15 @@ ++ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++-#define mc_filter_uv (rpi_shader + 152) ++-#define mc_filter_uv_b0 (rpi_shader + 342) ++-#define mc_filter_uv_b (rpi_shader + 494) ++-#define mc_exit (rpi_shader + 670) ++-#define mc_interrupt_exit8 (rpi_shader + 688) ++-#define mc_end (rpi_shader + 718) +++#define mc_filter_uv (rpi_shader + 148) +++#define mc_filter_uv_b0 (rpi_shader + 338) +++#define mc_filter_uv_b (rpi_shader + 490) +++#define mc_exit (rpi_shader + 666) +++#define mc_interrupt_exit8 (rpi_shader + 684) +++#define mc_setup (rpi_shader + 714) +++#define mc_filter (rpi_shader + 868) +++#define mc_filter_b (rpi_shader + 1108) +++#define mc_interrupt_exit12 (rpi_shader + 1364) +++#define mc_end (rpi_shader + 1402) ++ ++ #endif ++diff --git a/libavcodec/rpi_user_vcsm.h b/libavcodec/rpi_user_vcsm.h ++index fbebbbe..95e6de1 100644 ++--- a/libavcodec/rpi_user_vcsm.h +++++ b/libavcodec/rpi_user_vcsm.h ++@@ -418,6 +418,28 @@ int vcsm_unlock_hdl( unsigned int handle ); ++ */ ++ int vcsm_unlock_hdl_sp( unsigned int handle, int cache_no_flush ); ++ +++/* Clean and/or invalidate the memory associated with this user opaque handle +++** +++** Returns: non-zero on error +++** +++** structure contains a list of flush/invalidate commands. Commands are: +++** 0: nop +++** 1: invalidate given physical range in L2 +++** 2: clean given physical range in L2 +++** 3: clean+invalidate all of L1 +++** 4: flush all of L2 and all of L1 +++*/ +++struct vcsm_user_clean_invalid_s { +++ struct { +++ unsigned int cmd; +++ unsigned int addr; +++ unsigned int size; +++ } s[8]; +++}; +++ +++int vcsm_clean_invalid( unsigned int handle, struct vcsm_user_clean_invalid_s *s ); +++ +++ ++ #ifdef __cplusplus ++ } ++ #endif ++-- ++2.7.4 ++ ++ ++From 09685ab55aecb9400e354522894e0fbbb6381ca9 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 20 May 2015 21:12:55 +0100 ++Subject: [PATCH 43/68] Added multi mailbox - not working ++ ++--- ++ libavcodec/hevc.c | 40 ++++++++++++++++++++++++++++--- ++ libavcodec/rpi_mailbox.c | 47 +++++++++++++++++++++++++++++++++++++ ++ libavcodec/rpi_mailbox.h | 5 ++++ ++ libavcodec/rpi_qpu.c | 61 ++++++++++++++++++++++++++++++++++++++++++++---- ++ libavcodec/rpi_qpu.h | 2 ++ ++ 5 files changed, 147 insertions(+), 8 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 951e2d3..ab63efd 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -47,6 +47,11 @@ ++ //#define EARLY_MALLOC ++ // Move Inter prediction into separate pass ++ #define RPI_INTER +++ +++ #ifdef RPI_INTER_QPU +++ // Define RPI_MULTI_MAILBOX to use the updated mailbox that can launch both QPU and VPU +++ #define RPI_MULTI_MAILBOX +++ #endif ++ #endif ++ ++ // #define DISABLE_MC ++@@ -2843,10 +2848,14 @@ static void rpi_inter_clear(HEVCContext *s) ++ static void rpi_execute_inter_qpu(HEVCContext *s) ++ { ++ int k; +++ int i; ++ uint32_t *unif_vc = (uint32_t *)s->unif_mvs_ptr.vc; ++- ++- if (s->sh.slice_type == I_SLICE) ++- return; +++ if (s->sh.slice_type == I_SLICE) { +++#ifdef RPI_MULTI_MAILBOX +++ rpi_execute_transform(s); +++ return; +++#endif +++ } ++ for(k=0;k<8;k++) { ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined ++@@ -2856,6 +2865,22 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ ++ s->u_mvs[8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore ++ +++#ifdef RPI_MULTI_MAILBOX +++ gpu_cache_flush(&s->coeffs_buf_accelerated); +++ s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0, +++ qpu_get_fn(QPU_MC_SETUP_UV), +++ (uint32_t)(unif_vc+(s->mvs_base[0 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[1 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[2 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[3 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[4 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[5 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[6 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)) +++ ); +++ for(i=0;i<4;i++) +++ s->num_coeffs[i] = 0; +++#else ++ qpu_run_shader8(qpu_get_fn(QPU_MC_SETUP_UV), ++ (uint32_t)(unif_vc+(s->mvs_base[0 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++ (uint32_t)(unif_vc+(s->mvs_base[1 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++@@ -2866,6 +2891,7 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ (uint32_t)(unif_vc+(s->mvs_base[6 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++ (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)) ++ ); +++#endif ++ } ++ #endif ++ ++@@ -2945,6 +2971,12 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ if ( (((y_ctb + ctb_size)&63) == 0) && x_ctb + ctb_size >= s->ps.sps->width) { ++ // Transform all blocks ++ // printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); +++#ifdef RPI_MULTI_MAILBOX +++ // Kick off inter prediction on QPUs +++ rpi_execute_inter_qpu(s); +++ // Perform luma inter prediction +++ rpi_execute_inter_cmds(s); +++#else ++ rpi_execute_transform(s); ++ // Perform inter prediction ++ rpi_execute_inter_cmds(s); ++@@ -2952,6 +2984,8 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ // Kick off inter prediction on QPUs ++ rpi_execute_inter_qpu(s); ++ #endif +++#endif +++ ++ // Wait for transform completion ++ vpu_wait(s->vpu_id); ++ ++diff --git a/libavcodec/rpi_mailbox.c b/libavcodec/rpi_mailbox.c ++index 77a56dd..3904efc 100644 ++--- a/libavcodec/rpi_mailbox.c +++++ b/libavcodec/rpi_mailbox.c ++@@ -276,6 +276,53 @@ unsigned execute_qpu(int file_desc, unsigned num_qpus, unsigned control, unsigne ++ return p[5]; ++ } ++ +++void execute_multi(int file_desc, +++ unsigned num_qpus, unsigned control, unsigned noflush, unsigned timeout, +++ unsigned num_qpus_2, unsigned control_2, unsigned noflush_2, unsigned timeout_2, +++ unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, +++ unsigned code_2, unsigned r0_2, unsigned r1_2, unsigned r2_2, unsigned r3_2, unsigned r4_2, unsigned r5_2) { +++ int i=0; +++ unsigned p[32]; +++ +++ p[i++] = 0; // size +++ p[i++] = 0x00000000; // process request +++ p[i++] = 0x30018; // (the tag id) +++ p[i++] = 88; // (size of the buffer) +++ p[i++] = 88; // (size of the data) +++ +++ p[i++] = num_qpus; +++ p[i++] = control; +++ p[i++] = noflush; +++ p[i++] = timeout; // ms +++ +++ p[i++] = num_qpus_2; +++ p[i++] = control_2; +++ p[i++] = noflush_2; +++ p[i++] = timeout_2; // ms +++ +++ p[i++] = code; +++ p[i++] = r0; +++ p[i++] = r1; +++ p[i++] = r2; +++ p[i++] = r3; +++ p[i++] = r4; +++ p[i++] = r5; +++ +++ p[i++] = code_2; +++ p[i++] = r0_2; +++ p[i++] = r1_2; +++ p[i++] = r2_2; +++ p[i++] = r3_2; +++ p[i++] = r4_2; +++ p[i++] = r5_2; +++ +++ p[i++] = 0x00000000; // end tag +++ p[0] = i*sizeof *p; // actual size +++ +++ mbox_property(file_desc, p); +++ return; +++} +++ ++ int mbox_open() { ++ int file_desc; ++ ++diff --git a/libavcodec/rpi_mailbox.h b/libavcodec/rpi_mailbox.h ++index c264d2e..5898102 100644 ++--- a/libavcodec/rpi_mailbox.h +++++ b/libavcodec/rpi_mailbox.h ++@@ -15,6 +15,11 @@ extern void unmapmem(void *addr, unsigned size); ++ ++ extern unsigned execute_code(int file_desc, unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5); ++ extern unsigned execute_qpu(int file_desc, unsigned num_qpus, unsigned control, unsigned noflush, unsigned timeout); +++extern void execute_multi(int file_desc, +++ unsigned num_qpus, unsigned control, unsigned noflush, unsigned timeout, +++ unsigned num_qpus_2, unsigned control_2, unsigned noflush_2, unsigned timeout_2, +++ unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, +++ unsigned code_2, unsigned r0_2, unsigned r1_2, unsigned r2_2, unsigned r3_2, unsigned r4_2, unsigned r5_2); ++ extern unsigned qpu_enable(int file_desc, unsigned enable); ++ ++ #endif ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index fd8a276..feb3284 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -123,7 +123,7 @@ static pthread_cond_t post_cond_head = PTHREAD_COND_INITIALIZER; ++ static pthread_cond_t post_cond_tail = PTHREAD_COND_INITIALIZER; ++ static pthread_mutex_t post_mutex = PTHREAD_MUTEX_INITIALIZER; ++ ++-static int vpu_cmds[MAXCMDS][8]; +++static int vpu_cmds[MAXCMDS][16]; ++ static volatile int vpu_async_tail=0; // Contains the number of posted jobs ++ static volatile int vpu_async_head=0; ++ #endif ++@@ -346,6 +346,7 @@ unsigned int vpu_get_constants(void) { ++ static void *vpu_start(void *arg) { ++ while(1) { ++ int *p; +++ int qpu_code; ++ pthread_mutex_lock(&post_mutex); ++ while( vpu_async_tail - vpu_async_head <= 0) ++ { ++@@ -358,12 +359,25 @@ static void *vpu_start(void *arg) { ++ if (p[6] == -1) { ++ break; // Last job ++ } ++- if (p[7]) { +++ qpu_code = p[7]; +++ //if (p[7]) { ++ //GPU_MEM_PTR_T *buf = (GPU_MEM_PTR_T *)p[7]; ++ //gpu_cache_flush(buf); ++- } ++- vpu_execute_code(p[0], p[1], p[2], p[3], p[4], p[5], p[6]); +++ //} +++ if (!qpu_code) { +++ vpu_execute_code(p[0], p[1], p[2], p[3], p[4], p[5], p[6]); +++ } else { +++ int i; +++ for(i=0;i<8;i++) { +++ gpu->mail[i*2] = p[8+i]; +++ gpu->mail[i*2 + 1] = qpu_code; +++ } ++ +++ execute_multi(gpu->mb,8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */, +++ 0, 0, 0, 0, +++ p[0], p[1], p[2], p[3], p[4], p[5], p[6], // VPU0 +++ 0, 0 , 0 , 0 , 0 , 0 , 0); // VPU1 +++ } ++ pthread_mutex_lock(&post_mutex); ++ vpu_async_head++; ++ pthread_cond_broadcast(&post_cond_head); ++@@ -400,7 +414,43 @@ int vpu_post_code(unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned ++ p[4] = r3; ++ p[5] = r4; ++ p[6] = r5; ++- p[7] = (int) buf; +++ p[7] = 0; +++ if (num<=1) +++ pthread_cond_broadcast(&post_cond_tail); // Otherwise the vpu thread must already be awake +++ pthread_mutex_unlock(&post_mutex); +++ return id; +++ } +++} +++ +++int vpu_qpu_post_code(unsigned vpu_code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, +++ int qpu_code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8) +++{ +++ +++ pthread_mutex_lock(&post_mutex); +++ { +++ int id = vpu_async_tail++; +++ int *p = vpu_cmds[id%MAXCMDS]; +++ int num = vpu_async_tail - vpu_async_head; +++ if (num>MAXCMDS) { +++ printf("Too many commands submitted\n"); +++ exit(-1); +++ } +++ p[0] = vpu_code; +++ p[1] = r0; +++ p[2] = r1; +++ p[3] = r2; +++ p[4] = r3; +++ p[5] = r4; +++ p[6] = r5; +++ p[7] = qpu_code; +++ p[8 ] = unifs1; +++ p[9 ] = unifs2; +++ p[10] = unifs3; +++ p[11] = unifs4; +++ p[12] = unifs5; +++ p[13] = unifs6; +++ p[14] = unifs7; +++ p[15] = unifs8; ++ if (num<=1) ++ pthread_cond_broadcast(&post_cond_tail); // Otherwise the vpu thread must already be awake ++ pthread_mutex_unlock(&post_mutex); ++@@ -966,6 +1016,7 @@ void rpi_do_block(const uint8_t *in_buffer_vc, int src_pitch, uint8_t *dst_vc, i ++ } ++ ++ +++ ++ #endif ++ ++ #endif // RPI ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 88965e5..2f08f03 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -41,6 +41,8 @@ extern unsigned int vpu_get_fn(void); ++ extern unsigned int vpu_get_constants(void); ++ extern unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5); ++ extern int vpu_post_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, GPU_MEM_PTR_T *buf); +++int vpu_qpu_post_code(unsigned vpu_code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, +++ int qpu_code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8); ++ extern void vpu_wait( int id); ++ ++ // Simple test of shader code ++-- ++2.7.4 ++ ++ ++From 311f2da06d13a98d9bdda2df8684d7cf55b9a08e Mon Sep 17 00:00:00 2001 ++From: popcornmix ++Date: Thu, 21 May 2015 16:50:02 +0100 ++Subject: [PATCH 44/68] Pass qpu number in as uniform ++ ++--- ++ libavcodec/hevc.c | 2 +- ++ libavcodec/rpi_shader.c | 1288 ++++++++++++++++++++++---------------------- ++ libavcodec/rpi_shader.h | 20 +- ++ libavcodec/rpi_shader.qasm | 10 +- ++ 4 files changed, 657 insertions(+), 663 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index ab63efd..caadfaa 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2834,6 +2834,7 @@ static void rpi_inter_clear(HEVCContext *s) ++ *s->u_mvs[i]++ = pic_height; ++ *s->u_mvs[i]++ = s->frame->linesize[1]; ++ *s->u_mvs[i]++ = s->frame->linesize[2]; +++ *s->u_mvs[i]++ = i; ++ if (weight_flag) { ++ *s->u_mvs[i]++ = 1 << (s->sh.chroma_log2_weight_denom + 6 - 1); ++ *s->u_mvs[i]++ = s->sh.chroma_log2_weight_denom + 6; ++@@ -2841,7 +2842,6 @@ static void rpi_inter_clear(HEVCContext *s) ++ *s->u_mvs[i]++ = 1 << 5; ++ *s->u_mvs[i]++ = 6; ++ } ++- s->u_mvs[i] += 1; // Padding words ++ } ++ } ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 9c30e32..a0f0282 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -48,8 +48,8 @@ unsigned int rpi_shader[] = { ++ /* [0x000000b8] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++ /* [0x000000c0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++ /* [0x000000c8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x000000d0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x000000d8] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 +++/* [0x000000d0] */ 0x15827d80, 0x100208e7, // mov r3, unif +++/* [0x000000d8] */ 0x119c17c0, 0xd00208a7, // shl r2, r3, 1 ++ /* [0x000000e0] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++ /* [0x000000e8] */ 0x159e7480, 0x10020867, // mov r1, r2 ++ /* [0x000000f0] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++@@ -60,669 +60,669 @@ unsigned int rpi_shader[] = { ++ /* [0x00000118] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++ /* [0x00000120] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++ /* [0x00000128] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00000130] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000138] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 ++-/* [0x00000140] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00000148] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000150] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000158] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000160] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000168] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000170] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000178] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x00000180] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x00000188] */ 0x0f9c11c0, 0xd0020827, // asr r0, r0, 1 ++-/* [0x00000190] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) ++-/* [0x00000198] */ 0x0c9e7040, 0x10021567, // add rb21, r0, r1 ++-/* [0x000001a0] */ 0x15427d80, 0x10020827, // mov r0, ra_x ++-/* [0x000001a8] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y ++-/* [0x000001b0] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_frame_base ++-/* [0x000001b8] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++-/* [0x000001c0] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x000001c8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000001d0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000001d8] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x, r0 ++-/* [0x000001e0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x000001e8] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-/* [0x000001f0] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_frame_base, r2 ++-/* [0x000001f8] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x00000200] */ 0x15827d80, 0x10021327, // mov rb12,unif ++-/* [0x00000208] */ 0x15827d80, 0x10021367, // mov rb13,unif ++-/* [0x00000210] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000218] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000220] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000228] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000230] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000238] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000240] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x ++-/* [0x00000248] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base +++/* [0x00000130] */ 0x119c17c0, 0xd00208a7, // shl r2, r3, 1 +++/* [0x00000138] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00000140] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000148] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000150] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000158] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000160] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000168] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000170] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000178] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000180] */ 0x0f9c11c0, 0xd0020827, // asr r0, r0, 1 +++/* [0x00000188] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) +++/* [0x00000190] */ 0x0c9e7040, 0x10021567, // add rb21, r0, r1 +++/* [0x00000198] */ 0x15427d80, 0x10020827, // mov r0, ra_x +++/* [0x000001a0] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x000001a8] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_frame_base +++/* [0x000001b0] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset +++/* [0x000001b8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x000001c0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000001c8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000001d0] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x, r0 +++/* [0x000001d8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x000001e0] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++/* [0x000001e8] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_frame_base, r2 +++/* [0x000001f0] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 +++/* [0x000001f8] */ 0x15827d80, 0x10021327, // mov rb12,unif +++/* [0x00000200] */ 0x15827d80, 0x10021367, // mov rb13,unif +++/* [0x00000208] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000210] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000218] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000220] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000228] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000230] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x +++/* [0x00000238] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base ++ // ::mc_filter_uv ++-/* [0x00000250] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000258] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000260] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000268] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000270] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000278] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000280] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000288] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000290] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 ++-/* [0x00000298] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000002a0] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 ++-/* [0x000002a8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000002b0] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000002b8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000002c0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000002c8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000002d0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002d8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000002e0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000002e8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000002f0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000002f8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000300] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000308] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000310] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000318] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000320] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000328] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000330] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000340] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000348] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x00000350] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000358] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000360] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 ++-/* [0x00000368] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000370] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 ++-/* [0x00000378] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000380] */ 0x0f9e7080, 0x100613e7, // asr.ifnz rb15, r0, r2 ++-/* [0x00000388] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000390] */ 0x0f9e7080, 0x100613a7, // asr.ifnz rb14, r0, r2 ++-/* [0x00000398] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000003a0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000240] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000248] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000250] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000258] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000260] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000268] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000270] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000278] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000280] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x00000288] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000290] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x00000298] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000002a0] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000002a8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002b0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000002b8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000002c0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000002c8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000002d0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000002d8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000002e0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000002e8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000002f0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000002f8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000300] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000308] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000310] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000318] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000320] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000328] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000330] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000340] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000348] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000350] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 +++/* [0x00000358] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000360] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 +++/* [0x00000368] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000370] */ 0x0f9e7080, 0x100613e7, // asr.ifnz rb15, r0, r2 +++/* [0x00000378] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000380] */ 0x0f9e7080, 0x100613a7, // asr.ifnz rb14, r0, r2 +++/* [0x00000388] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000390] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x000003a8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000003b0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 ++-/* [0x000003b8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x000003c0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003c8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003d0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003d8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003e0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003e8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-/* [0x000003f0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 ++-/* [0x000003f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000400] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000408] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000410] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000418] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000420] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000428] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000430] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000438] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000440] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000448] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000450] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000458] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000460] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000468] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000470] */ 0x00000020, 0xe0021327, // mov rb12,32 ++-/* [0x00000478] */ 0x00000006, 0xe0021367, // mov rb13,6 ++-/* [0x00000480] */ 0x00000001, 0xe00213a7, // mov rb14,1 ++-/* [0x00000488] */ 0x00000000, 0xe00213e7, // mov rb15,0 ++-/* [0x00000490] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000498] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000004a0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000004a8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000004b0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000004b8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000004c0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000004c8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 ++-/* [0x000004d0] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 ++-/* [0x000004d8] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 ++-/* [0x000004e0] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004e8] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 ++-/* [0x000004f0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000004f8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000500] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000508] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000510] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000518] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000520] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000528] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000530] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000538] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000540] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000398] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000003a0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++/* [0x000003a8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x000003b0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003b8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003c0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003c8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003d0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003d8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x000003e0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 +++/* [0x000003e8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003f0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000003f8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000400] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000408] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000410] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000418] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000420] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000428] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000430] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000438] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000440] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000448] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000450] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000458] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000460] */ 0x00000020, 0xe0021327, // mov rb12,32 +++/* [0x00000468] */ 0x00000006, 0xe0021367, // mov rb13,6 +++/* [0x00000470] */ 0x00000001, 0xe00213a7, // mov rb14,1 +++/* [0x00000478] */ 0x00000000, 0xe00213e7, // mov rb15,0 +++/* [0x00000480] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000488] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000490] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000498] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000004a0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000004a8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000004b0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000004b8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 +++/* [0x000004c0] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 +++/* [0x000004c8] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 +++/* [0x000004d0] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000004d8] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 +++/* [0x000004e0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000004e8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000004f0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000004f8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000500] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000508] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000510] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000518] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000520] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000528] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000530] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x00000548] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000550] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000558] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000560] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000568] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000570] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000578] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000580] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000588] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 ++-/* [0x00000590] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000598] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 ++-/* [0x000005a0] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x000005a8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000005b0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005b8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000005c0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000005c8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000005d0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000005d8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000005e0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000005e8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000005f0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000005f8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000600] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000608] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000610] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000618] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000620] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000628] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000630] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000638] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000640] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x00000648] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000650] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000658] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000660] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000668] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000538] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000540] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000548] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000550] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000558] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000560] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000568] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000570] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000578] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x00000580] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000588] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x00000590] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x00000598] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000005a0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005a8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000005b0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000005b8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000005c0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000005c8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000005d0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000005d8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000005e0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000005e8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000005f0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005f8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000600] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000608] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000610] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000618] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000620] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000628] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000630] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000638] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000640] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000648] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000650] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000658] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000670] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000678] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 ++-/* [0x00000680] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x00000688] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000690] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000698] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000006a0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000006a8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000006b0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-/* [0x000006b8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 ++-/* [0x000006c0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000006c8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000006d0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000006d8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000006e0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000006e8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000006f0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000006f8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000700] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000708] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000710] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000718] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000720] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000728] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000730] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000738] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000740] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000748] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000750] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000758] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000760] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 ++-/* [0x00000768] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000770] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 ++-/* [0x00000778] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000780] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000788] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000790] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000798] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000007a0] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000660] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000668] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++/* [0x00000670] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00000678] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000680] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000688] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000690] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000698] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000006a0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x000006a8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 +++/* [0x000006b0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000006b8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000006c0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000006c8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000006d0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000006d8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000006e0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000006e8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000006f0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000006f8] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000700] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000708] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000710] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000718] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000720] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000728] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000730] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000738] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000740] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000748] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000750] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 +++/* [0x00000758] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000760] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 +++/* [0x00000768] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000770] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000778] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000780] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000788] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000790] */ 0x009e7000, 0x100009e7, // nop ++ // ::mc_filter_uv_b ++-/* [0x000007a8] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000007b0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000007b8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000007c0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000007c8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000007d0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000007d8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000007e0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000007e8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 ++-/* [0x000007f0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000007f8] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 ++-/* [0x00000800] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000808] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000810] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000818] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000820] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000828] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000830] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000838] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000840] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000848] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000850] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000858] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000860] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000868] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000870] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000878] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000880] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000888] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000890] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000898] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000008a8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008b0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008b8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008c0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x000008c8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000008d0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000008d8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000008e0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000008e8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000798] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000007a0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000007a8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000007b0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000007b8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000007c0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000007c8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000007d0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000007d8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x000007e0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000007e8] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x000007f0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000007f8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000800] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000808] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000810] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000818] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000820] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000828] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000830] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000838] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000840] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000848] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000850] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000858] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000860] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000868] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000870] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000878] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000880] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000888] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000890] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000898] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008b0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x000008b8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000008c0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000008c8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000008d0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008d8] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x000008f0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000008f8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 ++-/* [0x00000900] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x00000908] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000910] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000918] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000920] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000928] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000930] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000938] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 ++-/* [0x00000940] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000948] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000950] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000958] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000960] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000968] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000970] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000978] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000980] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000988] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000990] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000998] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000009a0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000009a8] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x000009b0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x000009b8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000009c0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000009c8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000009d0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000009d8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000009e0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000009e8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000009f0] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x000009f8] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x00000a00] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000a08] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000a10] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000a18] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000a20] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000a28] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a30] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000a38] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000a40] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a48] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000a50] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000a58] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a60] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008e0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000008e8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++/* [0x000008f0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x000008f8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000900] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000908] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000910] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000918] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000920] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x00000928] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 +++/* [0x00000930] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000938] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000940] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000948] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000950] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000958] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000960] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000968] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000970] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000978] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000980] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000988] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000990] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000998] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x000009a0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000009a8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000009b0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000009b8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000009c0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000009c8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000009d0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000009d8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000009e0] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x000009e8] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x000009f0] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000009f8] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000a00] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000a08] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000a10] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000a18] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a20] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000a28] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000a30] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a38] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000a40] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000a48] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a50] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000a68] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a70] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a58] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a60] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a68] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a70] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000a78] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000a80] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a88] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a90] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a98] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000aa0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000aa8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a88] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000a90] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a98] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000ab0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000aa0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000aa8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ab0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000ab8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000ac0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ac8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ad0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ac8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ad0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000ad8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000ae0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000ae8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000af0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000af8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b00] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b08] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b10] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000b18] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000b20] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000b00] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000b08] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000b10] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_setup ++-/* [0x00000b28] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000b30] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000b38] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000b40] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000b48] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00000b50] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000b58] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x00000b60] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00000b68] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 ++-/* [0x00000b70] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x00000b78] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000b80] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000b88] */ 0x8c9e7452, 0x10025e18, // add t0s, r2, r1 ; mov ra_frame_base, r2 ++-/* [0x00000b90] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000b98] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000ba0] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00000ba8] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 ++-/* [0x00000bb0] */ 0x0c9c13c0, 0xd0020567, // add ra_y2, r1, 1 ++-/* [0x00000bb8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00000bc0] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 ++-/* [0x00000bc8] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x00000bd0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000bd8] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000be0] */ 0x8c9e7452, 0x10025e19, // add t0s, r2, r1 ; mov ra_frame_base2, r2 ++-/* [0x00000be8] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 ++-/* [0x00000bf0] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 ++-/* [0x00000bf8] */ 0x15827d80, 0x10021427, // mov rb16, unif ++-/* [0x00000c00] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000c08] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) ++-/* [0x00000c10] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 ++-/* [0x00000c18] */ 0x00000001, 0xe0020527, // mov ra20, 1 ++-/* [0x00000c20] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++-/* [0x00000c28] */ 0x00000040, 0xe00207a7, // mov ra30, 64 ++-/* [0x00000c30] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++-/* [0x00000c38] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 ++-/* [0x00000c40] */ 0x00000018, 0xe00215e7, // mov rb23, 24 ++-/* [0x00000c48] */ 0x00000000, 0xe0020227, // mov ra8, 0 ++-/* [0x00000c50] */ 0x00000000, 0xe0020267, // mov ra9, 0 ++-/* [0x00000c58] */ 0x00000000, 0xe00202a7, // mov ra10, 0 ++-/* [0x00000c60] */ 0x00000000, 0xe00202e7, // mov ra11, 0 ++-/* [0x00000c68] */ 0x00000000, 0xe0020327, // mov ra12, 0 ++-/* [0x00000c70] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++-/* [0x00000c78] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++-/* [0x00000c80] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x00000c88] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000c90] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000c98] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000ca0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000ca8] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000cb0] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000cb8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000cc0] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++-/* [0x00000cc8] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++-/* [0x00000cd0] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00000cd8] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000ce0] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000ce8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000cf0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000cf8] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000d00] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000d08] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000d10] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x00000d18] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x00000d20] */ 0x15827d80, 0x10021327, // mov rb12,unif ++-/* [0x00000d28] */ 0x15827d80, 0x10021367, // mov rb13,unif ++-/* [0x00000d30] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000d38] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000d40] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000d48] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000d50] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000d58] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base ++-/* [0x00000d60] */ 0x13540dc0, 0xd0020867, // max r1, ra_y2, 0 ++-/* [0x00000d68] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000d70] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000d78] */ 0x0c541dc0, 0xd0020567, // add ra_y2, ra_y2, 1 ++-/* [0x00000d80] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000d88] */ 0x0c667380, 0x10020e27, // add t0s, r1, ra_frame_base2 +++/* [0x00000b18] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000b20] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000b28] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000b30] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000b38] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000b40] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000b48] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x00000b50] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000b58] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 +++/* [0x00000b60] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000b68] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000b70] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000b78] */ 0x8c9e7452, 0x10025e18, // add t0s, r2, r1 ; mov ra_frame_base, r2 +++/* [0x00000b80] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000b88] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000b90] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000b98] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 +++/* [0x00000ba0] */ 0x0c9c13c0, 0xd0020567, // add ra_y2, r1, 1 +++/* [0x00000ba8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000bb0] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 +++/* [0x00000bb8] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000bc0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000bc8] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000bd0] */ 0x8c9e7452, 0x10025e19, // add t0s, r2, r1 ; mov ra_frame_base2, r2 +++/* [0x00000bd8] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 +++/* [0x00000be0] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 +++/* [0x00000be8] */ 0x15827d80, 0x10021427, // mov rb16, unif +++/* [0x00000bf0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000bf8] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) +++/* [0x00000c00] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 +++/* [0x00000c08] */ 0x00000001, 0xe0020527, // mov ra20, 1 +++/* [0x00000c10] */ 0x00000100, 0xe00205a7, // mov ra22, 256 +++/* [0x00000c18] */ 0x00000040, 0xe00207a7, // mov ra30, 64 +++/* [0x00000c20] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x00000c28] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x00000c30] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x00000c38] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x00000c40] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x00000c48] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x00000c50] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x00000c58] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x00000c60] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x00000c68] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x00000c70] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x00000c78] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000c80] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000c88] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000c90] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000c98] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000ca0] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000ca8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000cb0] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00000cb8] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00000cc0] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00000cc8] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000cd0] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000cd8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000ce0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000ce8] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000cf0] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000cf8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000d00] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000d08] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000d10] */ 0x15827d80, 0x10021327, // mov rb12,unif +++/* [0x00000d18] */ 0x15827d80, 0x10021367, // mov rb13,unif +++/* [0x00000d20] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000d28] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000d30] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000d38] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000d40] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000d48] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base +++/* [0x00000d50] */ 0x13540dc0, 0xd0020867, // max r1, ra_y2, 0 +++/* [0x00000d58] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000d60] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000d68] */ 0x0c541dc0, 0xd0020567, // add ra_y2, ra_y2, 1 +++/* [0x00000d70] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000d78] */ 0x0c667380, 0x10020e27, // add t0s, r1, ra_frame_base2 ++ // ::mc_filter ++-/* [0x00000d90] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000d98] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000da0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000da8] */ 0x155e7d80, 0x10021027, // mov rx_xshift2, rx_xshift2_next ++-/* [0x00000db0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000db8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000dc0] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00000dc8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000dd0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000dd8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00000de0] */ 0x0c9e7400, 0x100206a7, // add ra_frame_base_next, r2, r0 ++-/* [0x00000de8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000df0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0 ; mov r1, unif ++-/* [0x00000df8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00000e00] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 ++-/* [0x00000e08] */ 0x0c9c13c0, 0xd0021067, // add ra_y2_next, r1, 1 ++-/* [0x00000e10] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00000e18] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 ++-/* [0x00000e20] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000e28] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000e30] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000e38] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000e40] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000e48] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000e50] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000e58] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000e60] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000e68] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000e70] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000e78] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000e80] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000e88] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000e90] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000e98] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ea0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ea8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000eb0] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000eb8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ec0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ec8] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ed0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000ed8] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000ee0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ee8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ef0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ef8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000f00] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000f08] */ 0x4f5971c6, 0x100251e0, // asr rb7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f10] */ 0x4f5971c6, 0x100251a0, // asr rb6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f18] */ 0x4f5971c6, 0x10025160, // asr rb5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f20] */ 0x0f9d71c0, 0x10021127, // asr rb4, r0, rb23 ++-/* [0x00000f28] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000f30] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000f38] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 ++-/* [0x00000f40] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000f48] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 ++-/* [0x00000f50] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000d80] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000d88] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000d90] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000d98] */ 0x155e7d80, 0x10021027, // mov rx_xshift2, rx_xshift2_next +++/* [0x00000da0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000da8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000db0] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000db8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000dc0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000dc8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000dd0] */ 0x0c9e7400, 0x100206a7, // add ra_frame_base_next, r2, r0 +++/* [0x00000dd8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000de0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0 ; mov r1, unif +++/* [0x00000de8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000df0] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 +++/* [0x00000df8] */ 0x0c9c13c0, 0xd0021067, // add ra_y2_next, r1, 1 +++/* [0x00000e00] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000e08] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 +++/* [0x00000e10] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000e18] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000e20] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000e28] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000e30] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000e38] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000e40] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000e48] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000e50] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000e58] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000e60] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000e68] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000e70] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000e78] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000e80] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000e88] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000e90] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000e98] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000ea0] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000ea8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000eb0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000eb8] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ec0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000ec8] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000ed0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ed8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ee0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ee8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000ef0] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000ef8] */ 0x4f5971c6, 0x100251e0, // asr rb7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f00] */ 0x4f5971c6, 0x100251a0, // asr rb6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f08] */ 0x4f5971c6, 0x10025160, // asr rb5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f10] */ 0x0f9d71c0, 0x10021127, // asr rb4, r0, rb23 +++/* [0x00000f18] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000f20] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000f28] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 +++/* [0x00000f30] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000f38] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 +++/* [0x00000f40] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :yloop ++-/* [0x00000f58] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000f60] */ 0x8e4539bf, 0xa0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 ++-/* [0x00000f68] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x00000f70] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000f78] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 ++-/* [0x00000f80] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next ++-/* [0x00000f88] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000f90] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000f98] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000fa0] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 ++-/* [0x00000fa8] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 ++-/* [0x00000fb0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000fb8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 ++-/* [0x00000fc0] */ 0xec654c87, 0x10024e20, // add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 ++-/* [0x00000fc8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000fd0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000fd8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000fe0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000fe8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000ff0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000ff8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00001000] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00001008] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00001010] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00001018] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00001020] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00001028] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00001030] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00001038] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00001040] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00001048] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00001050] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00001058] */ 0x8d3487f6, 0xd00279cc, // sub.setf -, r3, 8 ; mov ra12, ra13 ++-/* [0x00001060] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00001068] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00001070] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00001078] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00001080] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloop ++-/* [0x00001088] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00001090] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00001098] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x000010a0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000010a8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000010b0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000010b8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000010c0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 ++-/* [0x000010c8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 ++-/* [0x000010d0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 ++-/* [0x000010d8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 ++-/* [0x000010e0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000010e8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000010f0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000010f8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 ++-/* [0x00001100] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 ++-/* [0x00001108] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 ++-/* [0x00001110] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloop ++-/* [0x00001118] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 ++-/* [0x00001120] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00001128] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00001130] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001138] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00001140] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00001148] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000f48] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000f50] */ 0x8e4539bf, 0xa0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 +++/* [0x00000f58] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00000f60] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000f68] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 +++/* [0x00000f70] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next +++/* [0x00000f78] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000f80] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000f88] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000f90] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 +++/* [0x00000f98] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 +++/* [0x00000fa0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000fa8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++/* [0x00000fb0] */ 0xec654c87, 0x10024e20, // add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 +++/* [0x00000fb8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000fc0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000fc8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000fd0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000fd8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000fe0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000fe8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000ff0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000ff8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00001000] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00001008] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00001010] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00001018] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00001020] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00001028] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00001030] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00001038] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00001040] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00001048] */ 0x8d3487f6, 0xd00279cc, // sub.setf -, r3, 8 ; mov ra12, ra13 +++/* [0x00001050] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00001058] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00001060] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00001068] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00001070] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloop +++/* [0x00001078] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00001080] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00001088] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00001090] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00001098] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000010a0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000010a8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000010b0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 +++/* [0x000010b8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 +++/* [0x000010c0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 +++/* [0x000010c8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 +++/* [0x000010d0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000010d8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000010e0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000010e8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 +++/* [0x000010f0] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 +++/* [0x000010f8] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 +++/* [0x00001100] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloop +++/* [0x00001108] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 +++/* [0x00001110] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00001118] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00001120] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001128] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00001130] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001138] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_b ++-/* [0x00001150] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00001158] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00001160] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00001168] */ 0x155e7d80, 0x10021027, // mov rx_xshift2, rx_xshift2_next ++-/* [0x00001170] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00001178] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00001180] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00001188] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00001190] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00001198] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000011a0] */ 0x0c9e7400, 0x100206a7, // add ra_frame_base_next, r2, r0 ++-/* [0x000011a8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000011b0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0 ; mov r1, unif ++-/* [0x000011b8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x000011c0] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 ++-/* [0x000011c8] */ 0x0c9c13c0, 0xd0021067, // add ra_y2_next, r1, 1 ++-/* [0x000011d0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000011d8] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 ++-/* [0x000011e0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000011e8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000011f0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000011f8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00001200] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00001208] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00001210] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00001218] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00001220] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00001228] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00001230] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00001238] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00001240] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001248] */ 0x00000001, 0xe0020867, // mov r1, 1 ++-/* [0x00001250] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x00001258] */ 0x409f3001, 0xd00049e0, // nop ; mul24 r0, r0 << 13, r1 << 13 ++-/* [0x00001260] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001268] */ 0x409f2001, 0xd00049e0, // nop ; mul24 r0, r0 << 14, r1 << 14 ++-/* [0x00001270] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001278] */ 0x409f1001, 0xd00049e0, // nop ; mul24 r0, r0 << 15, r1 << 15 ++-/* [0x00001280] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001288] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00001290] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x00001298] */ 0x409f7001, 0xd00049e0, // nop ; mul24 r0, r0 << 9, r1 << 9 ++-/* [0x000012a0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012a8] */ 0x409f6001, 0xd00049e0, // nop ; mul24 r0, r0 << 10, r1 << 10 ++-/* [0x000012b0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012b8] */ 0x409f5001, 0xd00049e0, // nop ; mul24 r0, r0 << 11, r1 << 11 ++-/* [0x000012c0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012c8] */ 0x409f4001, 0xd00049e0, // nop ; mul24 r0, r0 << 12, r1 << 12 ++-/* [0x000012d0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000012d8] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x000012e0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012e8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012f0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012f8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00001300] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x00001308] */ 0x4f5971c6, 0x100251e0, // asr rb7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001310] */ 0x4f5971c6, 0x100251a0, // asr rb6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001318] */ 0x4f5971c6, 0x10025160, // asr rb5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001320] */ 0x0f9d71c0, 0x10021127, // asr rb4, r0, rb23 ++-/* [0x00001328] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001330] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x00001338] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 ++-/* [0x00001340] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00001348] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 ++-/* [0x00001350] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00001140] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00001148] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00001150] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00001158] */ 0x155e7d80, 0x10021027, // mov rx_xshift2, rx_xshift2_next +++/* [0x00001160] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00001168] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00001170] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00001178] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00001180] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00001188] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00001190] */ 0x0c9e7400, 0x100206a7, // add ra_frame_base_next, r2, r0 +++/* [0x00001198] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000011a0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0 ; mov r1, unif +++/* [0x000011a8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x000011b0] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 +++/* [0x000011b8] */ 0x0c9c13c0, 0xd0021067, // add ra_y2_next, r1, 1 +++/* [0x000011c0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000011c8] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 +++/* [0x000011d0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000011d8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000011e0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000011e8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000011f0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000011f8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00001200] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00001208] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00001210] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00001218] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00001220] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00001228] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00001230] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001238] */ 0x00000001, 0xe0020867, // mov r1, 1 +++/* [0x00001240] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x00001248] */ 0x409f3001, 0xd00049e0, // nop ; mul24 r0, r0 << 13, r1 << 13 +++/* [0x00001250] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001258] */ 0x409f2001, 0xd00049e0, // nop ; mul24 r0, r0 << 14, r1 << 14 +++/* [0x00001260] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001268] */ 0x409f1001, 0xd00049e0, // nop ; mul24 r0, r0 << 15, r1 << 15 +++/* [0x00001270] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001278] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00001280] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x00001288] */ 0x409f7001, 0xd00049e0, // nop ; mul24 r0, r0 << 9, r1 << 9 +++/* [0x00001290] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001298] */ 0x409f6001, 0xd00049e0, // nop ; mul24 r0, r0 << 10, r1 << 10 +++/* [0x000012a0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012a8] */ 0x409f5001, 0xd00049e0, // nop ; mul24 r0, r0 << 11, r1 << 11 +++/* [0x000012b0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012b8] */ 0x409f4001, 0xd00049e0, // nop ; mul24 r0, r0 << 12, r1 << 12 +++/* [0x000012c0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000012c8] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x000012d0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012d8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012e0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012e8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000012f0] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x000012f8] */ 0x4f5971c6, 0x100251e0, // asr rb7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001300] */ 0x4f5971c6, 0x100251a0, // asr rb6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001308] */ 0x4f5971c6, 0x10025160, // asr rb5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001310] */ 0x0f9d71c0, 0x10021127, // asr rb4, r0, rb23 +++/* [0x00001318] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001320] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x00001328] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 +++/* [0x00001330] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00001338] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 +++/* [0x00001340] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :yloopb ++-/* [0x00001358] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00001360] */ 0x8e4539bf, 0xa0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 ++-/* [0x00001368] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x00001370] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00001378] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 ++-/* [0x00001380] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next ++-/* [0x00001388] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00001390] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00001398] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000013a0] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 ++-/* [0x000013a8] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 ++-/* [0x000013b0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000013b8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 ++-/* [0x000013c0] */ 0xec654c87, 0x10024e20, // add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 ++-/* [0x000013c8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000013d0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000013d8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000013e0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000013e8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000013f0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000013f8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00001400] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00001408] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00001410] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00001418] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00001420] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00001428] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00001430] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00001438] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00001440] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00001448] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00001450] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00001458] */ 0x8d3487f6, 0xd00279cc, // sub.setf -, r3, 8 ; mov ra12, ra13 ++-/* [0x00001460] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00001468] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00001470] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00001478] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00001480] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloopb ++-/* [0x00001488] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00001490] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00001498] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x000014a0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000014a8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000014b0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000014b8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000014c0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 ++-/* [0x000014c8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 ++-/* [0x000014d0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 ++-/* [0x000014d8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 ++-/* [0x000014e0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000014e8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000014f0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000014f8] */ 0x4053800e, 0xd00049e1, // nop ; mul24 r1, r1 << 8, ra20 << 8 ++-/* [0x00001500] */ 0x4c78e38f, 0x10024860, // add r1, r1, ra30 ; mul24 r0, r1, rb14 ++-/* [0x00001508] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x00001510] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloopb ++-/* [0x00001518] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00001520] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00001528] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00001530] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001538] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00001540] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00001548] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00001348] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00001350] */ 0x8e4539bf, 0xa0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 +++/* [0x00001358] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00001360] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00001368] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 +++/* [0x00001370] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next +++/* [0x00001378] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00001380] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00001388] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00001390] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 +++/* [0x00001398] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 +++/* [0x000013a0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000013a8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++/* [0x000013b0] */ 0xec654c87, 0x10024e20, // add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 +++/* [0x000013b8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000013c0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000013c8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000013d0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000013d8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000013e0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000013e8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000013f0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000013f8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00001400] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00001408] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00001410] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00001418] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00001420] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00001428] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00001430] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00001438] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00001440] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00001448] */ 0x8d3487f6, 0xd00279cc, // sub.setf -, r3, 8 ; mov ra12, ra13 +++/* [0x00001450] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00001458] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00001460] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00001468] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00001470] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloopb +++/* [0x00001478] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00001480] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00001488] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00001490] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00001498] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000014a0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000014a8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000014b0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 +++/* [0x000014b8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 +++/* [0x000014c0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 +++/* [0x000014c8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 +++/* [0x000014d0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000014d8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000014e0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000014e8] */ 0x4053800e, 0xd00049e1, // nop ; mul24 r1, r1 << 8, ra20 << 8 +++/* [0x000014f0] */ 0x4c78e38f, 0x10024860, // add r1, r1, ra30 ; mul24 r0, r1, rb14 +++/* [0x000014f8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x00001500] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloopb +++/* [0x00001508] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00001510] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00001518] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00001520] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001528] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00001530] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001538] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_interrupt_exit12 ++-/* [0x00001550] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00001540] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00001548] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001550] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00001558] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00001560] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001568] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001570] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001568] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001570] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001578] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001580] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001588] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++@@ -732,11 +732,9 @@ unsigned int rpi_shader[] = { ++ /* [0x000015a8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x000015b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x000015b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000015c0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000015c8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000015d0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000015d8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x000015e0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000015c0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000015c8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000015d0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 3fa8531..6e552d9 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -4,15 +4,15 @@ ++ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++-#define mc_filter_uv (rpi_shader + 148) ++-#define mc_filter_uv_b0 (rpi_shader + 338) ++-#define mc_filter_uv_b (rpi_shader + 490) ++-#define mc_exit (rpi_shader + 666) ++-#define mc_interrupt_exit8 (rpi_shader + 684) ++-#define mc_setup (rpi_shader + 714) ++-#define mc_filter (rpi_shader + 868) ++-#define mc_filter_b (rpi_shader + 1108) ++-#define mc_interrupt_exit12 (rpi_shader + 1364) ++-#define mc_end (rpi_shader + 1402) +++#define mc_filter_uv (rpi_shader + 144) +++#define mc_filter_uv_b0 (rpi_shader + 334) +++#define mc_filter_uv_b (rpi_shader + 486) +++#define mc_exit (rpi_shader + 662) +++#define mc_interrupt_exit8 (rpi_shader + 680) +++#define mc_setup (rpi_shader + 710) +++#define mc_filter (rpi_shader + 864) +++#define mc_filter_b (rpi_shader + 1104) +++#define mc_interrupt_exit12 (rpi_shader + 1360) +++#define mc_end (rpi_shader + 1398) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 9cfc0d9..a0b8e5a 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -133,8 +133,8 @@ mov ra14, 0 ++ mov ra15, 0 ++ ++ # Compute part of VPM to use for DMA output ++-mov r2, qpu_num ++-shl r2, r2, 1 # Convert QPU numbers to be even (this means we can only use 8 QPUs, but is necessary as we need to save 16bit intermediate results) +++mov r3, unif +++shl r2, r3, 1 # Convert QPU numbers to be even (this means we can only use 8 QPUs, but is necessary as we need to save 16bit intermediate results) ++ and r2, r2, 15 ++ mov r1, r2 ++ asr r1, r1, 2 ++@@ -147,8 +147,7 @@ shl r0, r0, 5 ++ add rb27, r0, r1 ++ ++ # Compute part of VPM to save data into ++-mov r2, qpu_num # qpu_num = abcd ++-shl r2, r2, 1 +++shl r2, r3, 1 ++ and r2, r2, 15 # r2 = bcd0 ++ mov r1, r2 # r1 = bcd0 ++ asr r1, r1, 2 # r1 = bc ++@@ -181,9 +180,6 @@ add t0s, r2, r1 ++ mov rb12,unif # offset before shift ++ mov rb13,unif # offset after shift ++ ++-# Dump padding words ++-mov r0, unif ++- ++ # submit texture requests for second line ++ max r1, ra_y, 0 ++ min r1, r1, rb_frame_height_minus_1 ++-- ++2.7.4 ++ ++ ++From db6fe49d50e42c444b5833acc6206c0bbfaacef4 Mon Sep 17 00:00:00 2001 ++From: popcornmix ++Date: Sat, 23 May 2015 13:20:21 +0100 ++Subject: [PATCH 45/68] Add new cache flushing routine ++ ++--- ++ libavcodec/hevc.c | 8 +++-- ++ libavcodec/hevc_filter.c | 39 ++++++++++----------- ++ libavcodec/rpi_qpu.c | 17 +++++++-- ++ libavcodec/rpi_qpu.h | 2 ++ ++ libavcodec/rpi_user_vcsm.h | 86 ++++++++++++++++++++++++++-------------------- ++ 5 files changed, 91 insertions(+), 61 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index caadfaa..9d12583 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -3575,9 +3575,13 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length) ++ } ++ ++ fail: ++- if (s->ref && s->threads_type == FF_THREAD_FRAME) +++ if (s->ref && s->threads_type == FF_THREAD_FRAME) { +++#ifdef RPI_INTER_QPU +++ void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n); +++ ff_hevc_flush_chroma(s, &s->ref->tf, s->ps.sps->height); +++#endif ++ ff_thread_report_progress(&s->ref->tf, INT_MAX, 0); ++- +++ } ++ return ret; ++ } ++ ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 186317a..ec84e8a 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -883,36 +883,35 @@ static int ff_hevc_buf_base(AVBufferRef *bref) { ++ return p->vc & 0x3fffffff; ++ } ++ ++-static void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n) +++void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n); +++void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n) ++ { ++ if (s->enable_rpi && !( s->nal_unit_type == NAL_TRAIL_N || ++ s->nal_unit_type == NAL_TSA_N || ++ s->nal_unit_type == NAL_STSA_N || ++ s->nal_unit_type == NAL_RADL_N || ++ s->nal_unit_type == NAL_RASL_N )) { ++-#define RPI_FAST_CACHEFLUSH ++ #ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; ++- int curr_y = f->progress->data[0]; +++ int curr_y = ((int *)f->progress->data)[0]; +++ int curr_uv = curr_y >> s->ps.sps->vshift[1]; +++ int n_uv = n >> s->ps.sps->vshift[1]; ++ int sz,base; ++- if (curr_y < 0) curr_y = 0; ++- if (n<=curr_y) return; // Should not happen ++- sz = s->frame->linesize[1] * (n-curr_y); ++- base = s->frame->linesize[1] * curr_y; ++- iocache.s[0].cmd = 3; // Flush L1 cache ++- iocache.s[0].addr = 0; ++- iocache.s[0].size = 0; ++- ++- iocache.s[1].cmd = 2; ++- iocache.s[1].addr = ff_hevc_buf_base(s->frame->buf[1]) + base; +++ if (curr_uv < 0) curr_uv = 0; +++ if (n_uv<=curr_uv) { assert(0); return; } // Should not happen +++ sz = s->frame->linesize[1] * (n_uv-curr_uv); +++ base = s->frame->linesize[1] * curr_uv; +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(s->frame->buf[1]); +++ iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].cmd = 3; // clean+invalidate +++ iocache.s[0].addr = p->arm + base; +++ iocache.s[0].size = sz; +++ p = av_buffer_pool_opaque(s->frame->buf[2]); +++ iocache.s[1].handle = p->vcsm_handle; +++ iocache.s[1].cmd = 3; // clean+invalidate +++ iocache.s[1].addr = p->arm + base; ++ iocache.s[1].size = sz; ++- ++- iocache.s[2].cmd = 2; ++- iocache.s[2].addr = ff_hevc_buf_base(s->frame->buf[2]) + base; ++- iocache.s[2].size = sz; ++- ++- vcsm_clean_invalid( gpu_get_mailbox(), &iocache ); ++- +++ vcsm_clean_invalid( &iocache ); ++ #else ++ flush_buffer(s->frame->buf[1]); ++ flush_buffer(s->frame->buf[2]); ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index feb3284..aa65a77 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -211,6 +211,7 @@ static void gpu_unlock(void) { ++ } ++ ++ static int gpu_malloc_uncached_internal(int numbytes, GPU_MEM_PTR_T *p, int mb) { +++ p->numbytes = numbytes; ++ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); ++ assert(p->vcsm_handle); ++ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); ++@@ -243,13 +244,25 @@ int gpu_get_mailbox(void) ++ return gpu->mb; ++ } ++ +++// Call this to clean and invalidate a region of memory ++ void gpu_cache_flush(GPU_MEM_PTR_T *p) ++ { ++- void *tmp = vcsm_lock(p->vcsm_handle); ++- vcsm_unlock_ptr(tmp); +++#define RPI_FAST_CACHEFLUSH +++#ifdef RPI_FAST_CACHEFLUSH +++ struct vcsm_user_clean_invalid_s iocache = {}; +++ iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].cmd = 3; // clean+invalidate +++ iocache.s[0].addr = p->arm; +++ iocache.s[0].size = p->numbytes; +++ vcsm_clean_invalid( &iocache ); +++#else +++ void *tmp = vcsm_lock(p->vcsm_handle); +++ vcsm_unlock_ptr(tmp); +++#endif ++ } ++ ++ static int gpu_malloc_cached_internal(int numbytes, GPU_MEM_PTR_T *p) { +++ p->numbytes = numbytes; ++ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST, (char *)"Video Frame" ); ++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_VC, (char *)"Video Frame" ); ++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 2f08f03..0565a60 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -1,6 +1,8 @@ ++ #ifndef RPI_QPU_H ++ #define RPI_QPU_H ++ +++#define RPI_FAST_CACHEFLUSH +++ ++ typedef struct gpu_mem_ptr_s { ++ unsigned char *arm; // Pointer to memory mapped on ARM side ++ int vc_handle; // Videocore handle of relocatable memory ++diff --git a/libavcodec/rpi_user_vcsm.h b/libavcodec/rpi_user_vcsm.h ++index 95e6de1..db41a4d 100644 ++--- a/libavcodec/rpi_user_vcsm.h +++++ b/libavcodec/rpi_user_vcsm.h ++@@ -1,29 +1,41 @@ ++-/* ++-Copyright (c) 2012, Broadcom Europe Ltd ++-All rights reserved. ++- ++-Redistribution and use in source and binary forms, with or without ++-modification, are permitted provided that the following conditions are met: ++- * Redistributions of source code must retain the above copyright ++- notice, this list of conditions and the following disclaimer. ++- * Redistributions in binary form must reproduce the above copyright ++- notice, this list of conditions and the following disclaimer in the ++- documentation and/or other materials provided with the distribution. ++- * Neither the name of the copyright holder nor the ++- names of its contributors may be used to endorse or promote products ++- derived from this software without specific prior written permission. ++- ++-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++-*/ +++/***************************************************************************** +++* Copyright 2001 - 2011 Broadcom Corporation. All rights reserved. +++* +++* This program is the proprietary software of Broadcom Corporation and/or +++* its licensors, and may only be used, duplicated, modified or distributed +++* pursuant to the terms and conditions of a separate, written license +++* agreement executed between you and Broadcom (an "Authorized License"). +++* Except as set forth in an Authorized License, Broadcom grants no license +++* (express or implied), right to use, or waiver of any kind with respect to +++* the Software, and Broadcom expressly reserves all rights in and to the +++* Software and all intellectual property rights therein. IF YOU HAVE NO +++* AUTHORIZED LICENSE, THEN YOU HAVE NO RIGHT TO USE THIS SOFTWARE IN ANY +++* WAY, AND SHOULD IMMEDIATELY NOTIFY BROADCOM AND DISCONTINUE ALL USE OF +++* THE SOFTWARE. +++* +++* Except as expressly set forth in the Authorized License, +++* 1. This program, including its structure, sequence and organization, +++* constitutes the valuable trade secrets of Broadcom, and you shall use +++* all reasonable efforts to protect the confidentiality thereof, and to +++* use this information only in connection with your use of Broadcom +++* integrated circuit products. +++* 2. TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +++* AND WITH ALL FAULTS AND BROADCOM MAKES NO PROMISES, REPRESENTATIONS OR +++* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH +++* RESPECT TO THE SOFTWARE. BROADCOM SPECIFICALLY DISCLAIMS ANY AND ALL +++* IMPLIED WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS +++* FOR A PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, +++* QUIET ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. YOU +++* ASSUME THE ENTIRE RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE. +++* 3. TO THE MAXIMUM EXTENT PERMITTED BY LAW, IN NO EVENT SHALL BROADCOM OR ITS +++* LICENSORS BE LIABLE FOR (i) CONSEQUENTIAL, INCIDENTAL, SPECIAL, INDIRECT, +++* OR EXEMPLARY DAMAGES WHATSOEVER ARISING OUT OF OR IN ANY WAY RELATING TO +++* YOUR USE OF OR INABILITY TO USE THE SOFTWARE EVEN IF BROADCOM HAS BEEN +++* ADVISED OF THE POSSIBILITY OF SUCH DAMAGES; OR (ii) ANY AMOUNT IN EXCESS +++* OF THE AMOUNT ACTUALLY PAID FOR THE SOFTWARE ITSELF OR U.S. $1, WHICHEVER +++* IS GREATER. THESE LIMITATIONS SHALL APPLY NOTWITHSTANDING ANY FAILURE OF +++* ESSENTIAL PURPOSE OF ANY LIMITED REMEDY. +++*****************************************************************************/ ++ ++ #ifndef __USER_VCSM__H__INCLUDED__ ++ #define __USER_VCSM__H__INCLUDED__ ++@@ -424,21 +436,21 @@ int vcsm_unlock_hdl_sp( unsigned int handle, int cache_no_flush ); ++ ** ++ ** structure contains a list of flush/invalidate commands. Commands are: ++ ** 0: nop ++-** 1: invalidate given physical range in L2 ++-** 2: clean given physical range in L2 ++-** 3: clean+invalidate all of L1 ++-** 4: flush all of L2 and all of L1 +++** 1: invalidate given virtual range in L1/L2 +++** 2: clean given virtual range in L1/L2 +++** 3: clean+invalidate given virtual range in L1/L2 +++** 4: flush all L1/L2 ++ */ ++ struct vcsm_user_clean_invalid_s { ++- struct { ++- unsigned int cmd; ++- unsigned int addr; ++- unsigned int size; ++- } s[8]; +++ struct { +++ unsigned int cmd; +++ unsigned int handle; +++ unsigned int addr; +++ unsigned int size; +++ } s[8]; ++ }; ++ ++-int vcsm_clean_invalid( unsigned int handle, struct vcsm_user_clean_invalid_s *s ); ++- +++int vcsm_clean_invalid( struct vcsm_user_clean_invalid_s *s ); ++ ++ #ifdef __cplusplus ++ } ++-- ++2.7.4 ++ ++ ++From 87a6cb3a4f7189e711c85de6d20077b6453b2ebe Mon Sep 17 00:00:00 2001 ++From: popcornmix ++Date: Sat, 23 May 2015 21:10:10 +0100 ++Subject: [PATCH 46/68] Fix multi mailbox extra transform call ++ ++--- ++ libavcodec/hevc.c | 2 ++ ++ 1 file changed, 2 insertions(+) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 9d12583..30f5834 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -3024,7 +3024,9 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ #ifdef RPI_INTER_QPU ++ rpi_execute_inter_qpu(s); ++ #endif +++#ifndef RPI_MULTI_MAILBOX ++ rpi_execute_transform(s); +++#endif ++ rpi_execute_inter_cmds(s); ++ vpu_wait(s->vpu_id); ++ rpi_execute_pred_cmds(s); ++-- ++2.7.4 ++ ++ ++From 2a3672a1bda0296453953bebe8b17d69445260b4 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 27 May 2015 16:44:29 +0100 ++Subject: [PATCH 47/68] Added support for running luma prediction on QPUs ++ ++--- ++ libavcodec/hevc.c | 237 +++++++- ++ libavcodec/hevc.h | 26 +- ++ libavcodec/hevc_filter.c | 23 +- ++ libavcodec/rpi_qpu.c | 156 ++++-- ++ libavcodec/rpi_qpu.h | 8 +- ++ libavcodec/rpi_shader.c | 1313 ++++++++++++++++++++++---------------------- ++ libavcodec/rpi_shader.h | 21 +- ++ libavcodec/rpi_shader.qasm | 883 ++++++++++++++--------------- ++ 8 files changed, 1464 insertions(+), 1203 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 30f5834..2da88ec 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -52,6 +52,11 @@ ++ // Define RPI_MULTI_MAILBOX to use the updated mailbox that can launch both QPU and VPU ++ #define RPI_MULTI_MAILBOX ++ #endif +++ +++ // Define RPI_CACHE_UNIF_MVS to write motion vector uniform stream to cached memory +++ // RPI_CACHE_UNIF_MVS doesn't seem to make much difference, so left undefined. +++ +++ ++ #endif ++ ++ // #define DISABLE_MC ++@@ -74,6 +79,13 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ // The QPU code for UV blocks only works up to a block width of 8 ++ #define RPI_CHROMA_BLOCK_WIDTH 8 ++ +++// Split image of 2048 into parts 64 wide +++// So some QPUs will have 3 blocks of 64 to do, and others 2 blocks for an image 2048 wide with 32 blocks across +++// Each block of 64*64 +++// Smallest CTU size is 16x16, so smallest block is 8x8 +++// Corresponds to a total of 83kbytes over all 12 QPUs +++#define RPI_LUMA_COMMAND_WORDS 9 +++#define Y_COMMANDS_PER_QPU ((1+3*(64*64)/(8*8)) * RPI_LUMA_COMMAND_WORDS) ++ ++ #define ENCODE_COEFFS(c0, c1, c2, c3) (((c0) & 0xff) | ((c1) & 0xff) << 8 | ((c2) & 0xff) << 16 | ((c3) & 0xff) << 24) ++ ++@@ -2015,10 +2027,46 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int nPbW_c = nPbW >> s->ps.sps->hshift[1]; ++ int nPbH_c = nPbH >> s->ps.sps->vshift[1]; ++ ++- RPI_REDIRECT(luma_mc_uni)(s, dst0, s->frame->linesize[0], ref0->frame, +++#ifdef RPI_LUMA_QPU +++ if (s->enable_rpi) { +++ int reflist = 0; +++ const Mv *mv = ¤t_mv.mv[reflist]; +++ int mx = mv->x & 3; +++ int my = mv->y & 3; +++ int my_mx = (my<<8) + mx; +++ int my2_mx2_my_mx = (my_mx << 16) + my_mx; +++ int x1 = x0 + (mv->x >> 2); +++ int y1 = y0 + (mv->y >> 2); +++ int chan = x0>>6; // 64 wide blocks per QPU +++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || +++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); +++ uint32_t *y = s->y_mvs[chan % 12]; +++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go +++ for(int start_x=0;start_x < nPbW;start_x+=16) { +++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + 8 + start_x) & 0xffff); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); +++ *y++ = ( (nPbW<16 ? nPbW : 16) << 16 ) + (nPbH<16 ? nPbH : 16); +++ *y++ = my2_mx2_my_mx; +++ if (weight_flag) { +++ *y++ = (s->sh.luma_offset_l0[current_mv.ref_idx[reflist]] << 16) + (s->sh.luma_weight_l0[current_mv.ref_idx[reflist]] & 0xffff); +++ } else { +++ *y++ = 1; // Weight of 1 and offset of 0 +++ } +++ *y++ = (get_vc_address(s->frame->buf[0]) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter; +++ } +++ } +++ s->y_mvs[chan % 12] = y; +++ } else +++#endif +++ { +++ RPI_REDIRECT(luma_mc_uni)(s, dst0, s->frame->linesize[0], ref0->frame, ++ ¤t_mv.mv[0], x0, y0, nPbW, nPbH, ++ s->sh.luma_weight_l0[current_mv.ref_idx[0]], ++ s->sh.luma_offset_l0[current_mv.ref_idx[0]]); +++ } ++ ++ if (s->ps.sps->chroma_format_idc) { ++ #ifdef RPI_INTER_QPU ++@@ -2078,10 +2126,47 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int nPbW_c = nPbW >> s->ps.sps->hshift[1]; ++ int nPbH_c = nPbH >> s->ps.sps->vshift[1]; ++ ++- RPI_REDIRECT(luma_mc_uni)(s, dst0, s->frame->linesize[0], ref1->frame, +++#ifdef RPI_LUMA_QPU +++ if (s->enable_rpi) { +++ int reflist = 1; +++ const Mv *mv = ¤t_mv.mv[reflist]; +++ int mx = mv->x & 3; +++ int my = mv->y & 3; +++ int my_mx = (my<<8) + mx; +++ int my2_mx2_my_mx = (my_mx << 16) + my_mx; +++ int x1 = x0 + (mv->x >> 2); +++ int y1 = y0 + (mv->y >> 2); +++ int chan = x0>>6; // 64 wide blocks per QPU +++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || +++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); +++ uint32_t *y = s->y_mvs[chan % 12]; +++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go +++ for(int start_x=0;start_x < nPbW;start_x+=16) { +++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + 8 + start_x) & 0xffff); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); +++ *y++ = ( (nPbW<16 ? nPbW : 16) << 16 ) + (nPbH<16 ? nPbH : 16); +++ *y++ = my2_mx2_my_mx; +++ if (weight_flag) { +++ *y++ = (s->sh.luma_offset_l0[current_mv.ref_idx[reflist]] << 16) + (s->sh.luma_weight_l0[current_mv.ref_idx[reflist]] & 0xffff); +++ } else { +++ *y++ = 1; // Weight of 1 and offset of 0 +++ } +++ *y++ = (get_vc_address(s->frame->buf[0]) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter; +++ } +++ } +++ s->y_mvs[chan % 12] = y; +++ } else +++#endif +++ +++ { +++ RPI_REDIRECT(luma_mc_uni)(s, dst0, s->frame->linesize[0], ref1->frame, ++ ¤t_mv.mv[1], x0, y0, nPbW, nPbH, ++ s->sh.luma_weight_l1[current_mv.ref_idx[1]], ++ s->sh.luma_offset_l1[current_mv.ref_idx[1]]); +++ } ++ ++ if (s->ps.sps->chroma_format_idc) { ++ #ifdef RPI_INTER_QPU ++@@ -2115,8 +2200,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = rpi_filter_coefs[_mx][0]; ++ *u++ = rpi_filter_coefs[_my][0]; ++ if (weight_flag) { ++- *u++ = (s->sh.chroma_offset_l0[current_mv.ref_idx[1]][0] << 16) + (s->sh.chroma_weight_l0[current_mv.ref_idx[1]][0] & 0xffff); ++- *u++ = (s->sh.chroma_offset_l0[current_mv.ref_idx[1]][1] << 16) + (s->sh.chroma_weight_l0[current_mv.ref_idx[1]][1] & 0xffff); +++ *u++ = (s->sh.chroma_offset_l0[current_mv.ref_idx[reflist]][0] << 16) + (s->sh.chroma_weight_l0[current_mv.ref_idx[reflist]][0] & 0xffff); +++ *u++ = (s->sh.chroma_offset_l0[current_mv.ref_idx[reflist]][1] << 16) + (s->sh.chroma_weight_l0[current_mv.ref_idx[reflist]][1] & 0xffff); ++ } else { ++ *u++ = 1; // Weight of 1 and offset of 0 ++ *u++ = 1; ++@@ -2143,9 +2228,44 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int nPbW_c = nPbW >> s->ps.sps->hshift[1]; ++ int nPbH_c = nPbH >> s->ps.sps->vshift[1]; ++ ++- RPI_REDIRECT(luma_mc_bi)(s, dst0, s->frame->linesize[0], ref0->frame, +++#ifdef RPI_LUMA_QPU +++ if (s->enable_rpi) { +++ const Mv *mv = ¤t_mv.mv[0]; +++ int mx = mv->x & 3; +++ int my = mv->y & 3; +++ int my_mx = (my<<8) + mx; +++ const Mv *mv2 = ¤t_mv.mv[1]; +++ int mx2 = mv2->x & 3; +++ int my2 = mv2->y & 3; +++ int my2_mx2 = (my2<<8) + mx2; +++ int my2_mx2_my_mx = (my2_mx2 << 16) + my_mx; +++ int x1 = x0 + (mv->x >> 2); +++ int y1 = y0 + (mv->y >> 2); +++ int x2 = x0 + (mv2->x >> 2); +++ int y2 = y0 + (mv2->y >> 2); +++ int chan = x0>>6; // 64 wide blocks per QPU +++ uint32_t *y = s->y_mvs[chan % 12]; +++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go +++ for(int start_x=0;start_x < nPbW;start_x+=8) { // B blocks work 8 at a time +++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y2 - 3 + start_y) << 16) + ( (x2 - 3 + start_x) & 0xffff); // Second fetch is for ref1 +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); +++ *y++ = ( (nPbW<8 ? nPbW : 8) << 16 ) + (nPbH<16 ? nPbH : 16); +++ *y++ = my2_mx2_my_mx; +++ *y++ = 1; // B frame weighted prediction not supported +++ *y++ = (get_vc_address(s->frame->buf[0]) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter_b; +++ } +++ } +++ s->y_mvs[chan % 12] = y; +++ } else +++#endif +++ { +++ RPI_REDIRECT(luma_mc_bi)(s, dst0, s->frame->linesize[0], ref0->frame, ++ ¤t_mv.mv[0], x0, y0, nPbW, nPbH, ++ ref1->frame, ¤t_mv.mv[1], ¤t_mv); +++ } ++ ++ if (s->ps.sps->chroma_format_idc) { ++ #ifdef RPI_INTER_QPU ++@@ -2834,7 +2954,6 @@ static void rpi_inter_clear(HEVCContext *s) ++ *s->u_mvs[i]++ = pic_height; ++ *s->u_mvs[i]++ = s->frame->linesize[1]; ++ *s->u_mvs[i]++ = s->frame->linesize[2]; ++- *s->u_mvs[i]++ = i; ++ if (weight_flag) { ++ *s->u_mvs[i]++ = 1 << (s->sh.chroma_log2_weight_denom + 6 - 1); ++ *s->u_mvs[i]++ = s->sh.chroma_log2_weight_denom + 6; ++@@ -2842,7 +2961,31 @@ static void rpi_inter_clear(HEVCContext *s) ++ *s->u_mvs[i]++ = 1 << 5; ++ *s->u_mvs[i]++ = 6; ++ } +++ *s->u_mvs[i]++ = i; // Select section of VPM (avoid collisions with 3d unit) +++ } +++ +++#ifdef RPI_LUMA_QPU +++ for(i=0;i<12;i++) { +++ s->y_mvs[i] = s->y_mvs_base[i]; +++ *s->y_mvs[i]++ = 0; // y_x +++ *s->y_mvs[i]++ = 0; // ref_y_base +++ *s->y_mvs[i]++ = 0; // y2_x2 +++ *s->y_mvs[i]++ = 0; // ref_y2_base +++ *s->y_mvs[i]++ = (s->ps.sps->width << 16) + s->ps.sps->height; +++ *s->y_mvs[i]++ = s->frame->linesize[0]; // pitch +++ *s->y_mvs[i]++ = s->frame->linesize[0]; // dst_pitch +++ if (weight_flag) { +++ int offset = 1 << (s->sh.luma_log2_weight_denom + 6 - 1); +++ int shift = s->sh.luma_log2_weight_denom + 6; +++ *s->y_mvs[i]++ = (offset << 16) + shift; +++ } else { +++ int offset = 1 << 5; +++ int shift = 6; +++ *s->y_mvs[i]++ = (offset << 16) + shift; +++ } +++ *s->y_mvs[i]++ = 0; // Next kernel ++ } +++#endif ++ } ++ ++ static void rpi_execute_inter_qpu(HEVCContext *s) ++@@ -2850,6 +2993,9 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ int k; ++ int i; ++ uint32_t *unif_vc = (uint32_t *)s->unif_mvs_ptr.vc; +++#ifdef RPI_LUMA_QPU +++ uint32_t *y_unif_vc = (uint32_t *)s->y_unif_mvs_ptr.vc; +++#endif ++ if (s->sh.slice_type == I_SLICE) { ++ #ifdef RPI_MULTI_MAILBOX ++ rpi_execute_transform(s); ++@@ -2865,8 +3011,23 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ ++ s->u_mvs[8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore ++ +++#ifdef RPI_LUMA_QPU +++ for(k=0;k<12;k++) { +++ s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+1] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined +++ s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // Also need a dummy for second request +++ s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+8] = qpu_get_fn(QPU_MC_EXIT); // Add exit command +++ assert(s->y_mvs[k] - s->y_mvs_base[k] < Y_COMMANDS_PER_QPU); +++ } +++ s->y_mvs[12-1][-RPI_LUMA_COMMAND_WORDS+8] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT12); // This QPU will signal interrupt when all others are done and have acquired a semaphore +++#endif +++ +++ ++ #ifdef RPI_MULTI_MAILBOX +++#ifdef RPI_CACHE_UNIF_MVS +++ gpu_cache_flush3(&s->coeffs_buf_accelerated,&s->y_unif_mvs_ptr, &s->unif_mvs_ptr); +++#else ++ gpu_cache_flush(&s->coeffs_buf_accelerated); +++#endif ++ s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0, ++ qpu_get_fn(QPU_MC_SETUP_UV), ++ (uint32_t)(unif_vc+(s->mvs_base[0 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++@@ -2876,7 +3037,27 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ (uint32_t)(unif_vc+(s->mvs_base[4 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++ (uint32_t)(unif_vc+(s->mvs_base[5 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++ (uint32_t)(unif_vc+(s->mvs_base[6 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)) +++ (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++#ifdef RPI_LUMA_QPU +++ qpu_get_fn(QPU_MC_SETUP), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[0 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[1 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[2 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[3 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[4 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[5 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[6 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[7 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[8 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[9 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[10 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[11 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)) +++#else +++ 0, +++ 0,0,0,0, +++ 0,0,0,0, +++ 0,0,0,0 +++#endif ++ ); ++ for(i=0;i<4;i++) ++ s->num_coeffs[i] = 0; ++@@ -2892,6 +3073,8 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)) ++ ); ++ #endif +++ +++ ++ } ++ #endif ++ ++@@ -3579,8 +3762,7 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length) ++ fail: ++ if (s->ref && s->threads_type == FF_THREAD_FRAME) { ++ #ifdef RPI_INTER_QPU ++- void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n); ++- ff_hevc_flush_chroma(s, &s->ref->tf, s->ps.sps->height); +++ ff_hevc_flush_buffer(s, &s->ref->tf, s->ps.sps->height); ++ #endif ++ ff_thread_report_progress(&s->ref->tf, INT_MAX, 0); ++ } ++@@ -3767,7 +3949,6 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ ++ #ifdef RPI ++ av_freep(&s->unif_mv_cmds); ++- av_freep(&s->unif_xfm_cmds); ++ av_freep(&s->univ_pred_cmds); ++ ++ #ifdef RPI_INTER_QPU ++@@ -3776,7 +3957,12 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ s->unif_mvs = 0; ++ } ++ #endif ++- //gpu_free(&s->dummy); +++#ifdef RPI_LUMA_QPU +++ if (s->y_unif_mvs) { +++ gpu_free( &s->y_unif_mvs_ptr ); +++ s->y_unif_mvs = 0; +++ } +++#endif ++ ++ #ifdef EARLY_MALLOC ++ printf("hevc_decode_free\n"); ++@@ -3861,9 +4047,6 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ s->unif_mv_cmds = av_mallocz(sizeof(HEVCMvCmd)*RPI_MAX_MV_CMDS); ++ if (!s->unif_mv_cmds) ++ goto fail; ++- s->unif_xfm_cmds = av_mallocz(sizeof(HEVCXfmCmd)*RPI_MAX_XFM_CMDS); ++- if (!s->unif_xfm_cmds) ++- goto fail; ++ s->univ_pred_cmds = av_mallocz(sizeof(HEVCPredCmd)*RPI_MAX_PRED_CMDS); ++ if (!s->univ_pred_cmds) ++ goto fail; ++@@ -3877,7 +4060,11 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ { ++ int uv_commands_per_qpu = UV_COMMANDS_PER_QPU; ++ uint32_t *p; +++#ifdef RPI_CACHE_UNIF_MVS +++ gpu_malloc_cached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr ); +++#else ++ gpu_malloc_uncached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr ); +++#endif ++ s->unif_mvs = (uint32_t *) s->unif_mvs_ptr.arm; // TODO support this allocation in non EARLY_MALLOC ++ ++ // Set up initial locations for uniform streams ++@@ -3892,6 +4079,28 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ ++ } ++ #endif +++#ifdef RPI_LUMA_QPU +++ { +++ int y_commands_per_qpu = Y_COMMANDS_PER_QPU; +++ uint32_t *p; +++#ifdef RPI_CACHE_UNIF_MVS +++ gpu_malloc_cached( 12 * y_commands_per_qpu * sizeof(uint32_t), &s->y_unif_mvs_ptr ); +++#else +++ gpu_malloc_uncached( 12 * y_commands_per_qpu * sizeof(uint32_t), &s->y_unif_mvs_ptr ); +++#endif +++ s->y_unif_mvs = (uint32_t *) s->y_unif_mvs_ptr.arm; // TODO support this allocation in non EARLY_MALLOC +++ +++ // Set up initial locations for uniform streams +++ p = s->y_unif_mvs; +++ for(i = 0; i < 12; i++) { +++ s->y_mvs_base[i] = p; +++ p += y_commands_per_qpu; +++ } +++ s->mc_filter = qpu_get_fn(QPU_MC_FILTER); +++ s->mc_filter_b = qpu_get_fn(QPU_MC_FILTER_B); +++ +++ } +++#endif ++ //gpu_malloc_uncached(2048*64,&s->dummy); ++ ++ #ifdef EARLY_MALLOC ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 4a39e39..5df9dcd 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -44,9 +44,13 @@ ++ #ifdef RPI ++ ++ #include "rpi_qpu.h" ++- // Use QPU for inter prediction +++ // Define RPI_INTER_QPU to use QPU for chroma inter prediction ++ #define RPI_INTER_QPU ++ +++ #ifdef RPI_INTER_QPU +++ // Define RPI_LUMA_QPU to also use QPU for luma inter prediction +++ #define RPI_LUMA_QPU +++ #endif ++ #endif ++ ++ #define MAX_DPB_SIZE 16 // A.4.1 ++@@ -809,7 +813,6 @@ typedef struct HEVCLocalContext { ++ ++ // Worst case is for 4:4:4 4x4 blocks with 64 high coding tree blocks, so 16 MV cmds per 4 pixels across for each colour plane, * 2 for bi ++ #define RPI_MAX_MV_CMDS (2*16*3*(RPI_MAX_WIDTH/4)) ++-#define RPI_MAX_XFM_CMDS (16*3*(RPI_MAX_WIDTH/4)) ++ // Each block can have an intra prediction and a transform_add command ++ #define RPI_MAX_PRED_CMDS (2*16*3*(RPI_MAX_WIDTH/4)) ++ // Worst case is 16x16 CTUs ++@@ -844,9 +847,6 @@ typedef struct HEVCMvCmd { ++ int8_t ref_idx[2]; ++ } HEVCMvCmd; ++ ++-// Command for transform to process a block of coefficients ++-typedef struct HEVCXfmCmd { ++-} HEVCXfmCmd; ++ ++ // Command for intra prediction and transform_add of predictions to coefficients ++ #define RPI_PRED_TRANSFORM_ADD 0 ++@@ -892,8 +892,7 @@ typedef struct HEVCContext { ++ ++ #ifdef RPI ++ int enable_rpi; ++- HEVCMvCmd *unif_mv_cmds; // TODO rename ++- HEVCXfmCmd *unif_xfm_cmds; +++ HEVCMvCmd *unif_mv_cmds; ++ HEVCPredCmd *univ_pred_cmds; ++ int buf_width; ++ GPU_MEM_PTR_T coeffs_buf_default; ++@@ -920,6 +919,15 @@ typedef struct HEVCContext { ++ uint32_t mc_filter_uv_b0; ++ uint32_t mc_filter_uv_b; ++ #endif +++#ifdef RPI_LUMA_QPU +++ GPU_MEM_PTR_T y_unif_mvs_ptr; +++ uint32_t *y_unif_mvs; // Base of memory for motion vector commands +++ uint32_t *y_mvs_base[12]; +++ uint32_t *y_mvs[12]; +++ // Function pointers +++ uint32_t mc_filter; +++ uint32_t mc_filter_b; +++#endif ++ ++ #endif ++ ++@@ -1166,6 +1174,10 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ int log2_trafo_size, enum ScanType scan_idx, ++ int c_idx); ++ +++#ifdef RPI_INTER_QPU +++extern void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n); +++#endif +++ ++ void ff_hevc_hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size); ++ ++ ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index ec84e8a..11629e4 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -883,8 +883,7 @@ static int ff_hevc_buf_base(AVBufferRef *bref) { ++ return p->vc & 0x3fffffff; ++ } ++ ++-void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n); ++-void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n) +++void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ { ++ if (s->enable_rpi && !( s->nal_unit_type == NAL_TRAIL_N || ++ s->nal_unit_type == NAL_TSA_N || ++@@ -911,10 +910,24 @@ void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n) ++ iocache.s[1].cmd = 3; // clean+invalidate ++ iocache.s[1].addr = p->arm + base; ++ iocache.s[1].size = sz; +++ +++#ifdef RPI_LUMA_QPU +++ p = av_buffer_pool_opaque(s->frame->buf[0]); +++ sz = s->frame->linesize[0] * (n-curr_y); +++ base = s->frame->linesize[0] * curr_y; +++ iocache.s[2].handle = p->vcsm_handle; +++ iocache.s[2].cmd = 3; // clean+invalidate +++ iocache.s[2].addr = p->arm + base; +++ iocache.s[2].size = sz; +++#endif ++ vcsm_clean_invalid( &iocache ); ++ #else ++ flush_buffer(s->frame->buf[1]); ++ flush_buffer(s->frame->buf[2]); +++#ifdef RPI_LUMA_QPU +++ flush_buffer(s->frame->buf[1]); +++#endif +++ ++ #endif ++ //memcpy(s->dummy.arm,s->frame->data[0],2048*64); ++ //memcpy(s->dummy.arm,s->frame->data[1],1024*32); ++@@ -938,7 +951,7 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ sao_filter_CTB(s, x, y - ctb_size); ++ if (s->threads_type & FF_THREAD_FRAME ) { ++ #ifdef RPI_INTER_QPU ++- ff_hevc_flush_chroma(s,&s->ref->tf, y); +++ ff_hevc_flush_buffer(s,&s->ref->tf, y); ++ #endif ++ ff_thread_report_progress(&s->ref->tf, y, 0); ++ } ++@@ -947,7 +960,7 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ sao_filter_CTB(s, x , y); ++ if (s->threads_type & FF_THREAD_FRAME ) { ++ #ifdef RPI_INTER_QPU ++- ff_hevc_flush_chroma(s, &s->ref->tf, y + ctb_size); +++ ff_hevc_flush_buffer(s, &s->ref->tf, y + ctb_size); ++ #endif ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size, 0); ++ } ++@@ -957,7 +970,7 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ //int currh = s->ref->tf.progress->data[0]; ++ //if (((y + ctb_size)&63)==0) ++ #ifdef RPI_INTER_QPU ++- ff_hevc_flush_chroma(s, &s->ref->tf, y + ctb_size - 4); +++ ff_hevc_flush_buffer(s, &s->ref->tf, y + ctb_size - 4); ++ #endif ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); ++ } ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index aa65a77..e12304b 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -1,9 +1,11 @@ ++ #ifdef RPI ++ // This works better than the mmap in that the memory can be cached, but requires a kernel modification to enable the device. ++ // define RPI_TIME_TOTAL_QPU to print out how much time is spent in the QPU code ++-#define RPI_TIME_TOTAL_QPU +++//#define RPI_TIME_TOTAL_QPU ++ // define RPI_TIME_TOTAL_VPU to print out how much time is spent in the VPI code ++ //#define RPI_TIME_TOTAL_VPU +++// define RPI_TIME_TOTAL_POSTED to print out how much time is spent in the multi execute QPU/VPU combined +++//#define RPI_TIME_TOTAL_POSTED ++ // define RPI_ASYNC to run the VPU in a separate thread, need to make a separate call to check for completion ++ #define RPI_ASYNC ++ ++@@ -94,7 +96,8 @@ struct GPU ++ int open_count; // Number of allocated video buffers ++ int mb; // Mailbox handle ++ int vc; // Address in GPU memory ++- int mail[12]; // These are used to pass pairs of code/unifs to the QPUs +++ int mail[12*2]; // These are used to pass pairs of code/unifs to the QPUs for the first QPU task +++ int mail2[12*2]; // These are used to pass pairs of code/unifs to the QPUs for the second QPU task ++ }; ++ ++ // Stop more than one thread trying to allocate memory or use the processing resources at once ++@@ -102,7 +105,7 @@ static pthread_mutex_t gpu_mutex = PTHREAD_MUTEX_INITIALIZER; ++ static volatile struct GPU* gpu = NULL; ++ static GPU_MEM_PTR_T gpu_mem_ptr; ++ ++-#if defined(RPI_TIME_TOTAL_QPU) || defined(RPI_TIME_TOTAL_VPU) +++#if defined(RPI_TIME_TOTAL_QPU) || defined(RPI_TIME_TOTAL_VPU) || defined(RPI_TIME_TOTAL_POSTED) ++ static unsigned int Microseconds(void) { ++ struct timespec ts; ++ unsigned int x; ++@@ -123,7 +126,7 @@ static pthread_cond_t post_cond_head = PTHREAD_COND_INITIALIZER; ++ static pthread_cond_t post_cond_tail = PTHREAD_COND_INITIALIZER; ++ static pthread_mutex_t post_mutex = PTHREAD_MUTEX_INITIALIZER; ++ ++-static int vpu_cmds[MAXCMDS][16]; +++static int vpu_cmds[MAXCMDS][32]; ++ static volatile int vpu_async_tail=0; // Contains the number of posted jobs ++ static volatile int vpu_async_head=0; ++ #endif ++@@ -247,7 +250,6 @@ int gpu_get_mailbox(void) ++ // Call this to clean and invalidate a region of memory ++ void gpu_cache_flush(GPU_MEM_PTR_T *p) ++ { ++-#define RPI_FAST_CACHEFLUSH ++ #ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; ++ iocache.s[0].handle = p->vcsm_handle; ++@@ -261,6 +263,34 @@ void gpu_cache_flush(GPU_MEM_PTR_T *p) ++ #endif ++ } ++ +++void gpu_cache_flush3(GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2) +++{ +++#ifdef RPI_FAST_CACHEFLUSH +++ struct vcsm_user_clean_invalid_s iocache = {}; +++ iocache.s[0].handle = p0->vcsm_handle; +++ iocache.s[0].cmd = 3; // clean+invalidate +++ iocache.s[0].addr = (int) p0->arm; +++ iocache.s[0].size = p0->numbytes; +++ iocache.s[1].handle = p1->vcsm_handle; +++ iocache.s[1].cmd = 3; // clean+invalidate +++ iocache.s[1].addr = (int) p1->arm; +++ iocache.s[1].size = p1->numbytes; +++ iocache.s[2].handle = p2->vcsm_handle; +++ iocache.s[2].cmd = 3; // clean+invalidate +++ iocache.s[2].addr = (int) p2->arm; +++ iocache.s[2].size = p2->numbytes; +++ vcsm_clean_invalid( &iocache ); +++#else +++ void *tmp; +++ tmp = vcsm_lock(p0->vcsm_handle); +++ vcsm_unlock_ptr(tmp); +++ tmp = vcsm_lock(p1->vcsm_handle); +++ vcsm_unlock_ptr(tmp); +++ tmp = vcsm_lock(p2->vcsm_handle); +++ vcsm_unlock_ptr(tmp); +++#endif +++} +++ ++ static int gpu_malloc_cached_internal(int numbytes, GPU_MEM_PTR_T *p) { ++ p->numbytes = numbytes; ++ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST, (char *)"Video Frame" ); ++@@ -357,9 +387,19 @@ unsigned int vpu_get_constants(void) { ++ #ifdef RPI_ASYNC ++ ++ static void *vpu_start(void *arg) { +++#ifdef RPI_TIME_TOTAL_POSTED +++ int last_time=0; +++ long long on_time=0; +++ long long off_time=0; +++ int start_time; +++ int end_time; +++ int count=0; +++#endif ++ while(1) { +++ int i; ++ int *p; ++ int qpu_code; +++ int qpu_codeb; ++ pthread_mutex_lock(&post_mutex); ++ while( vpu_async_tail - vpu_async_head <= 0) ++ { ++@@ -373,24 +413,49 @@ static void *vpu_start(void *arg) { ++ break; // Last job ++ } ++ qpu_code = p[7]; +++ qpu_codeb = p[16]; ++ //if (p[7]) { ++ //GPU_MEM_PTR_T *buf = (GPU_MEM_PTR_T *)p[7]; ++ //gpu_cache_flush(buf); ++ //} +++ +++#ifdef RPI_TIME_TOTAL_POSTED +++ start_time = Microseconds(); +++ if (last_time==0) +++ last_time = start_time; +++ off_time += start_time-last_time; +++#endif +++ ++ if (!qpu_code) { ++ vpu_execute_code(p[0], p[1], p[2], p[3], p[4], p[5], p[6]); ++ } else { ++- int i; ++ for(i=0;i<8;i++) { ++ gpu->mail[i*2] = p[8+i]; ++ gpu->mail[i*2 + 1] = qpu_code; ++ } ++- ++- execute_multi(gpu->mb,8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */, ++- 0, 0, 0, 0, +++ for(i=0;i<12;i++) { +++ gpu->mail2[i*2] = p[17+i]; +++ gpu->mail2[i*2 + 1] = qpu_codeb; +++ } +++#if (0) +++ vpu_execute_code(p[0], p[1], p[2], p[3], p[4], p[5], p[6]); +++ execute_qpu(gpu->mb,8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */); +++#else +++ execute_multi(gpu->mb, +++ 12,gpu->vc + offsetof(struct GPU, mail2), 1, 5000, +++ 8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */, ++ p[0], p[1], p[2], p[3], p[4], p[5], p[6], // VPU0 ++ 0, 0 , 0 , 0 , 0 , 0 , 0); // VPU1 +++#endif ++ } +++#ifdef RPI_TIME_TOTAL_POSTED +++ end_time = Microseconds(); +++ last_time = end_time; +++ on_time += end_time - start_time; +++ count++; +++ if ((count&0x7f)==0) +++ printf("Posted %d On=%dms, Off=%dms\n",count,(int)(on_time/1000),(int)(off_time/1000)); +++#endif ++ pthread_mutex_lock(&post_mutex); ++ vpu_async_head++; ++ pthread_cond_broadcast(&post_cond_head); ++@@ -436,7 +501,9 @@ int vpu_post_code(unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned ++ } ++ ++ int vpu_qpu_post_code(unsigned vpu_code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, ++- int qpu_code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8) +++ int qpu_code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8, +++ int qpu_codeb, int unifs1b, int unifs2b, int unifs3b, int unifs4b, int unifs5b, int unifs6b, int unifs7b, int unifs8b, int unifs9b, int unifs10b, int unifs11b, int unifs12b +++ ) ++ { ++ ++ pthread_mutex_lock(&post_mutex); ++@@ -464,6 +531,21 @@ int vpu_qpu_post_code(unsigned vpu_code, unsigned r0, unsigned r1, unsigned r2, ++ p[13] = unifs6; ++ p[14] = unifs7; ++ p[15] = unifs8; +++ +++ p[16] = qpu_codeb; +++ p[17] = unifs1b; +++ p[18] = unifs2b; +++ p[19] = unifs3b; +++ p[20] = unifs4b; +++ p[21] = unifs5b; +++ p[22] = unifs6b; +++ p[23] = unifs7b; +++ p[24] = unifs8b; +++ p[25] = unifs9b; +++ p[26] = unifs10b; +++ p[27] = unifs11b; +++ p[28] = unifs12b; +++ ++ if (num<=1) ++ pthread_cond_broadcast(&post_cond_tail); // Otherwise the vpu thread must already be awake ++ pthread_mutex_unlock(&post_mutex); ++@@ -544,27 +626,27 @@ void qpu_run_shader12(int code, int num, int code2, int num2, int unifs1, int un ++ off_time += start_time-last_time; ++ #endif ++ for(i=0;imail[i*2 + 1] = code; +++ gpu->mail2[i*2 + 1] = code; ++ } ++ for(;imail[i*2 + 1] = code2; +++ gpu->mail2[i*2 + 1] = code2; ++ } ++- gpu->mail[0 ] = unifs1; ++- gpu->mail[2 ] = unifs2; ++- gpu->mail[4 ] = unifs3; ++- gpu->mail[6 ] = unifs4; ++- gpu->mail[8 ] = unifs5; ++- gpu->mail[10] = unifs6; ++- gpu->mail[12] = unifs7; ++- gpu->mail[14] = unifs8; ++- gpu->mail[16] = unifs9; ++- gpu->mail[18] = unifs10; ++- gpu->mail[20] = unifs11; ++- gpu->mail[22] = unifs12; +++ gpu->mail2[0 ] = unifs1; +++ gpu->mail2[2 ] = unifs2; +++ gpu->mail2[4 ] = unifs3; +++ gpu->mail2[6 ] = unifs4; +++ gpu->mail2[8 ] = unifs5; +++ gpu->mail2[10] = unifs6; +++ gpu->mail2[12] = unifs7; +++ gpu->mail2[14] = unifs8; +++ gpu->mail2[16] = unifs9; +++ gpu->mail2[18] = unifs10; +++ gpu->mail2[20] = unifs11; +++ gpu->mail2[22] = unifs12; ++ execute_qpu( ++ gpu->mb, ++ 12 /* Number of QPUs */, ++- gpu->vc + offsetof(struct GPU, mail), +++ gpu->vc + offsetof(struct GPU, mail2), ++ 1 /* no flush */, // Don't flush VPU L1 cache ++ 5000 /* timeout ms */); ++ #ifdef RPI_TIME_TOTAL_QPU ++@@ -635,21 +717,21 @@ unsigned int qpu_get_fn(int num) { ++ gpu_unlock(); ++ } ++ switch(num) { ++- //case QPU_MC_SETUP: ++- // fn = mc_setup; ++- // break; ++- //case QPU_MC_FILTER: ++- // fn = mc_filter; ++- // break; +++ case QPU_MC_SETUP: +++ fn = mc_setup; +++ break; +++ case QPU_MC_FILTER: +++ fn = mc_filter; +++ break; ++ case QPU_MC_EXIT: ++ fn = mc_exit; ++ break; ++- //case QPU_MC_INTERRUPT_EXIT: ++- // fn = mc_interrupt_exit; ++- // break; ++- //case QPU_MC_FILTER_B: ++- // fn = mc_filter_b; ++- // break; +++ case QPU_MC_INTERRUPT_EXIT12: +++ fn = mc_interrupt_exit12; +++ break; +++ case QPU_MC_FILTER_B: +++ fn = mc_filter_b; +++ break; ++ //case QPU_MC_FILTER_HONLY: ++ // fn = mc_filter_honly; ++ // break; ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 0565a60..81c2bb1 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -1,6 +1,7 @@ ++ #ifndef RPI_QPU_H ++ #define RPI_QPU_H ++ +++// Define RPI_FAST_CACHEFLUSH to use the VCSM cache flush code ++ #define RPI_FAST_CACHEFLUSH ++ ++ typedef struct gpu_mem_ptr_s { ++@@ -16,6 +17,7 @@ extern int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p); ++ extern int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p); ++ extern void gpu_free(GPU_MEM_PTR_T *p); ++ extern void gpu_cache_flush(GPU_MEM_PTR_T *p); +++extern void gpu_cache_flush3(GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2); ++ ++ // QPU specific functions ++ extern void qpu_run_shader8(int code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8); ++@@ -26,7 +28,7 @@ enum { ++ QPU_MC_SETUP, ++ QPU_MC_FILTER, ++ QPU_MC_EXIT, ++- QPU_MC_INTERRUPT_EXIT, +++ QPU_MC_INTERRUPT_EXIT12, ++ QPU_MC_FILTER_B, ++ QPU_MC_FILTER_HONLY, ++ QPU_MC_SETUP_UV, ++@@ -44,7 +46,9 @@ extern unsigned int vpu_get_constants(void); ++ extern unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5); ++ extern int vpu_post_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, GPU_MEM_PTR_T *buf); ++ int vpu_qpu_post_code(unsigned vpu_code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, ++- int qpu_code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8); +++ int qpu_code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8, +++ int qpu_codeb, int unifs1b, int unifs2b, int unifs3b, int unifs4b, int unifs5b, int unifs6b, int unifs7b, int unifs8b, int unifs9b, int unifs10b, int unifs11b, int unifs12b +++ ); ++ extern void vpu_wait( int id); ++ ++ // Simple test of shader code ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index a0f0282..e86eb30 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -48,693 +48,674 @@ unsigned int rpi_shader[] = { ++ /* [0x000000b8] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++ /* [0x000000c0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++ /* [0x000000c8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x000000d0] */ 0x15827d80, 0x100208e7, // mov r3, unif ++-/* [0x000000d8] */ 0x119c17c0, 0xd00208a7, // shl r2, r3, 1 ++-/* [0x000000e0] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x000000e8] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x000000f0] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x000000f8] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000100] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000108] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000110] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000118] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++-/* [0x00000120] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++-/* [0x00000128] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00000130] */ 0x119c17c0, 0xd00208a7, // shl r2, r3, 1 ++-/* [0x00000138] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00000140] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000148] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000150] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000158] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000160] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000168] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000170] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x00000178] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x00000180] */ 0x0f9c11c0, 0xd0020827, // asr r0, r0, 1 ++-/* [0x00000188] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) ++-/* [0x00000190] */ 0x0c9e7040, 0x10021567, // add rb21, r0, r1 ++-/* [0x00000198] */ 0x15427d80, 0x10020827, // mov r0, ra_x ++-/* [0x000001a0] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y ++-/* [0x000001a8] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_frame_base ++-/* [0x000001b0] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++-/* [0x000001b8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x000001c0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000001c8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000001d0] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x, r0 +++/* [0x000000d0] */ 0x15427d80, 0x10020827, // mov r0, ra_x +++/* [0x000000d8] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x000000e0] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_frame_base +++/* [0x000000e8] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset +++/* [0x000000f0] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x000000f8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000100] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000108] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x, r0 +++/* [0x00000110] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000118] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++/* [0x00000120] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_frame_base, r2 +++/* [0x00000128] */ 0x0c9e7440, 0x10020f27, // add t1s, r2, r1 +++/* [0x00000130] */ 0x00000008, 0xe00208a7, // mov r2,8 +++/* [0x00000138] */ 0x11827c80, 0x10021327, // shl rb12,unif, r2 +++/* [0x00000140] */ 0x0c827c80, 0x10021367, // add rb13,unif,r2 +++/* [0x00000148] */ 0x15827d80, 0x100208a7, // mov r2, unif +++/* [0x00000150] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 +++/* [0x00000158] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00000160] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000168] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000170] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000178] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000180] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000188] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000190] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000198] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x000001a0] */ 0x0f9c11c0, 0xd00208a7, // asr r2, r0, 1 +++/* [0x000001a8] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) +++/* [0x000001b0] */ 0x0c9e7440, 0x10021567, // add rb21, r2, r1 +++/* [0x000001b8] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x000001c0] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x000001c8] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x000001d0] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++ /* [0x000001d8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x000001e0] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-/* [0x000001e8] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_frame_base, r2 ++-/* [0x000001f0] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x000001f8] */ 0x15827d80, 0x10021327, // mov rb12,unif ++-/* [0x00000200] */ 0x15827d80, 0x10021367, // mov rb13,unif ++-/* [0x00000208] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000210] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000218] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000220] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000228] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000230] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x ++-/* [0x00000238] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base +++/* [0x000001e0] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x000001e8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000001f0] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x000001f8] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x +++/* [0x00000200] */ 0x0c627380, 0x10020f27, // add t1s, r1, ra_frame_base ++ // ::mc_filter_uv ++-/* [0x00000240] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000248] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000250] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000258] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000260] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000268] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000270] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000278] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000280] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 ++-/* [0x00000288] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000290] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 ++-/* [0x00000298] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000002a0] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000002a8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000002b0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000002b8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000002c0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002c8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000002d0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000002d8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000002e0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000002e8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000002f0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000002f8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000300] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000308] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000310] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000318] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000320] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000328] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000330] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x00000340] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000348] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000350] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 ++-/* [0x00000358] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000360] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 ++-/* [0x00000368] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000370] */ 0x0f9e7080, 0x100613e7, // asr.ifnz rb15, r0, r2 ++-/* [0x00000378] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000380] */ 0x0f9e7080, 0x100613a7, // asr.ifnz rb14, r0, r2 ++-/* [0x00000388] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000390] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000208] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000210] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000218] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000220] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000228] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000230] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000238] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000240] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000248] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x00000250] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000258] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x00000260] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000268] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000270] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000278] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000280] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000288] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000290] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000298] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000002a0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000002a8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000002b0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000002b8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000002c0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002c8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000002d0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000002d8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000002e0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000002e8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000002f0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000002f8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000300] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000308] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000310] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000318] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 +++/* [0x00000320] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000328] */ 0x0f9e7080, 0x100208e7, // asr r3, r0, r2 +++/* [0x00000330] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000338] */ 0x0f9e7080, 0x100613e7, // asr.ifnz rb15, r0, r2 +++/* [0x00000340] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000348] */ 0x0f9e7080, 0x100608e7, // asr.ifnz r3, r0, r2 +++/* [0x00000350] */ 0x119c87c0, 0xd00213a7, // shl rb14,r3,8 +++/* [0x00000358] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x00000398] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000003a0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 ++-/* [0x000003a8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x000003b0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003b8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003c0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003c8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003d0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003d8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-/* [0x000003e0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 ++-/* [0x000003e8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000003f0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000003f8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000400] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000408] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000410] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000418] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000420] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000428] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000430] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000438] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000440] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000448] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000450] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000458] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000460] */ 0x00000020, 0xe0021327, // mov rb12,32 ++-/* [0x00000468] */ 0x00000006, 0xe0021367, // mov rb13,6 ++-/* [0x00000470] */ 0x00000001, 0xe00213a7, // mov rb14,1 ++-/* [0x00000478] */ 0x00000000, 0xe00213e7, // mov rb15,0 ++-/* [0x00000480] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000488] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000490] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000498] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000004a0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000004a8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000004b0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000004b8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 ++-/* [0x000004c0] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 ++-/* [0x000004c8] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 ++-/* [0x000004d0] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004d8] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 ++-/* [0x000004e0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000004e8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000004f0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000004f8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000500] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000508] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000510] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000518] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000520] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000528] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000530] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000360] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000368] */ 0x8e4539bf, 0xb0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu1 +++/* [0x00000370] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00000378] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000380] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000388] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000390] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000398] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003a0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x000003a8] */ 0x0c627c80, 0x10020f27, // add t1s, ra_frame_base, r2 +++/* [0x000003b0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003b8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000003c0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000003c8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000003d0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000003d8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000003e0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000003e8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000003f0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000003f8] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000400] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000408] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000410] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000418] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000420] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000428] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000430] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000438] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000440] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000448] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000450] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000458] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000460] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 +++/* [0x00000468] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 +++/* [0x00000470] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 +++/* [0x00000478] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000480] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 +++/* [0x00000488] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000490] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000498] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000004a0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004a8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000004b0] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000004b8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000004c0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000004c8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000004d0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004d8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x00000538] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000540] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000548] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000550] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000558] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000560] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000568] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000570] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000578] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 ++-/* [0x00000580] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000588] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 ++-/* [0x00000590] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x00000598] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000005a0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005a8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000005b0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000005b8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000005c0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000005c8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000005d0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000005d8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000005e0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000005e8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000005f0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005f8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000600] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000608] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000610] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000618] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000620] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000628] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000630] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x00000638] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000640] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000648] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000650] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000658] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000004e0] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000004e8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000004f0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000004f8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000500] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000508] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000510] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000518] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000520] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x00000528] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000530] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x00000538] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x00000540] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000548] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000550] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000558] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000560] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000568] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000570] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000578] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000580] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000588] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000590] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000598] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005a0] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005a8] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005b0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005b8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000005c0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005c8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005d0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005d8] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x000005e0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005e8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005f0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000005f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000600] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000660] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000668] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 ++-/* [0x00000670] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x00000678] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000680] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000688] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000690] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000698] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000006a0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-/* [0x000006a8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 ++-/* [0x000006b0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000006b8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000006c0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000006c8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000006d0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000006d8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000006e0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000006e8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000006f0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000006f8] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000700] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000708] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000710] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000718] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000720] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000728] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000730] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000738] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000740] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000748] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000750] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 ++-/* [0x00000758] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000760] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 ++-/* [0x00000768] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000770] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000778] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000780] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000788] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000790] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000608] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000610] */ 0x8e4539bf, 0xb0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu1 +++/* [0x00000618] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00000620] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000628] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000630] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000638] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000640] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000648] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x00000650] */ 0x0c627c80, 0x10020f27, // add t1s, ra_frame_base, r2 +++/* [0x00000658] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000660] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000668] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000670] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000678] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000680] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000688] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000690] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000698] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000006a0] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x000006a8] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x000006b0] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x000006b8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000006c0] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x000006c8] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000006d0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000006d8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000006e0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000006e8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000006f0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000006f8] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 +++/* [0x00000700] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000708] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 +++/* [0x00000710] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000718] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000720] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000728] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000730] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000738] */ 0x009e7000, 0x100009e7, // nop ++ // ::mc_filter_uv_b ++-/* [0x00000798] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000007a0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000007a8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000007b0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000007b8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000007c0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000007c8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000007d0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000007d8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 ++-/* [0x000007e0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000007e8] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 ++-/* [0x000007f0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000007f8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000800] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000808] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000810] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000818] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000820] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000828] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000830] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000838] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000840] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000848] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000850] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000858] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000860] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000868] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000870] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000878] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000880] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000888] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000890] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000898] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008b0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x000008b8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000008c0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000008c8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000008d0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000008d8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000740] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000748] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000750] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000758] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000760] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000768] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000770] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000778] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000780] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x00000788] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000790] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x00000798] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000007a0] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000007a8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000007b0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000007b8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000007c0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000007c8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000007d0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000007d8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000007e0] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x000007e8] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x000007f0] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x000007f8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000800] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000808] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000810] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000818] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000820] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000828] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000830] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000838] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000840] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000848] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000850] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000858] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000860] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000868] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000870] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000878] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000880] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x000008e0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000008e8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 ++-/* [0x000008f0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x000008f8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000900] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000908] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000910] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000918] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000920] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000928] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 ++-/* [0x00000930] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000938] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000940] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000948] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000950] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000958] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000960] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000968] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000970] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000978] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000980] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000988] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000990] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000998] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x000009a0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x000009a8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000009b0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000009b8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000009c0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000009c8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000009d0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000009d8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000009e0] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x000009e8] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x000009f0] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000009f8] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000a00] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000a08] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000a10] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000a18] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a20] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000a28] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000a30] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a38] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000a40] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000a48] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a50] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000888] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000890] */ 0x8e4539bf, 0xb0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu1 +++/* [0x00000898] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x000008a0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000008a8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000008b0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000008b8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000008c0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000008c8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x000008d0] */ 0x0c627c80, 0x10020f27, // add t1s, ra_frame_base, r2 +++/* [0x000008d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008e0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000008e8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000008f0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000008f8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000900] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000908] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000910] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000918] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000920] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000928] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000930] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000938] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000940] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000948] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000950] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000958] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000960] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000968] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000970] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000978] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000980] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000988] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x00000990] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000998] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000009a0] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x000009a8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000009b0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000009b8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000009c0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000009c8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000009d0] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000009d8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000009e0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000009e8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000009f0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000009f8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000a58] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a60] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000a68] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a70] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a78] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a80] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a88] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000a90] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000a98] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a00] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a08] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a10] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a18] */ 0x009e7000, 0xb00009e7, // ldtmu1 +++/* [0x00000a20] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a28] */ 0x009e7000, 0xb00009e7, // ldtmu1 +++/* [0x00000a30] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000a38] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a40] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000aa0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000aa8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ab0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ab8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ac0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ac8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ad0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ad8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ae0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ae8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000af0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000af8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b00] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000b08] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000b10] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a48] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a50] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a58] */ 0x009e7000, 0xb00009e7, // ldtmu1 +++/* [0x00000a60] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a68] */ 0x009e7000, 0xb00009e7, // ldtmu1 +++/* [0x00000a70] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a78] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a80] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a88] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a90] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a98] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000aa0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000aa8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000ab0] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000ab8] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_setup ++-/* [0x00000b18] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000b20] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000b28] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000b30] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000b38] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00000b40] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000b48] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x00000b50] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00000b58] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 ++-/* [0x00000b60] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x00000b68] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000b70] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000b78] */ 0x8c9e7452, 0x10025e18, // add t0s, r2, r1 ; mov ra_frame_base, r2 ++-/* [0x00000b80] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000b88] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000b90] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00000b98] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 ++-/* [0x00000ba0] */ 0x0c9c13c0, 0xd0020567, // add ra_y2, r1, 1 ++-/* [0x00000ba8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00000bb0] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 ++-/* [0x00000bb8] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x00000bc0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000bc8] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000bd0] */ 0x8c9e7452, 0x10025e19, // add t0s, r2, r1 ; mov ra_frame_base2, r2 ++-/* [0x00000bd8] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 ++-/* [0x00000be0] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 ++-/* [0x00000be8] */ 0x15827d80, 0x10021427, // mov rb16, unif ++-/* [0x00000bf0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000bf8] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) ++-/* [0x00000c00] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 ++-/* [0x00000c08] */ 0x00000001, 0xe0020527, // mov ra20, 1 ++-/* [0x00000c10] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++-/* [0x00000c18] */ 0x00000040, 0xe00207a7, // mov ra30, 64 ++-/* [0x00000c20] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++-/* [0x00000c28] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 ++-/* [0x00000c30] */ 0x00000018, 0xe00215e7, // mov rb23, 24 ++-/* [0x00000c38] */ 0x00000000, 0xe0020227, // mov ra8, 0 ++-/* [0x00000c40] */ 0x00000000, 0xe0020267, // mov ra9, 0 ++-/* [0x00000c48] */ 0x00000000, 0xe00202a7, // mov ra10, 0 ++-/* [0x00000c50] */ 0x00000000, 0xe00202e7, // mov ra11, 0 ++-/* [0x00000c58] */ 0x00000000, 0xe0020327, // mov ra12, 0 ++-/* [0x00000c60] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++-/* [0x00000c68] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++-/* [0x00000c70] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x00000c78] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000c80] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000c88] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000c90] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000c98] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000ca0] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000ca8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000cb0] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++-/* [0x00000cb8] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++-/* [0x00000cc0] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00000cc8] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000cd0] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000cd8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000ce0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000ce8] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000cf0] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000cf8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000d00] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x00000d08] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x00000d10] */ 0x15827d80, 0x10021327, // mov rb12,unif ++-/* [0x00000d18] */ 0x15827d80, 0x10021367, // mov rb13,unif ++-/* [0x00000d20] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000d28] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000d30] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000d38] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000d40] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000d48] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base ++-/* [0x00000d50] */ 0x13540dc0, 0xd0020867, // max r1, ra_y2, 0 ++-/* [0x00000d58] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000d60] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000ac0] */ 0x00000010, 0xe00208e7, // mov r3, 16 +++/* [0x00000ac8] */ 0x15827d80, 0x10020227, // mov ra8, unif +++/* [0x00000ad0] */ 0x15827d80, 0x10020267, // mov ra9, unif +++/* [0x00000ad8] */ 0x15827d80, 0x100202a7, // mov ra10, unif +++/* [0x00000ae0] */ 0x15827d80, 0x100202e7, // mov ra11, unif +++/* [0x00000ae8] */ 0x15827d80, 0x10020867, // mov r1, unif +++/* [0x00000af0] */ 0x119e72c0, 0x10020827, // shl r0,r1,r3 +++/* [0x00000af8] */ 0x0f9e72c0, 0x10020867, // asr r1,r1,r3 +++/* [0x00000b00] */ 0x0f9e70c0, 0x10020827, // asr r0,r0,r3 +++/* [0x00000b08] */ 0x0d9c13c0, 0xd0021667, // sub rb_frame_width_minus_1,r1,1 +++/* [0x00000b10] */ 0x0d9c11c0, 0xd00217a7, // sub rb_frame_height_minus_1,r0,1 +++/* [0x00000b18] */ 0x15827d80, 0x10021427, // mov rb_pitch, unif +++/* [0x00000b20] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000b28] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) +++/* [0x00000b30] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 +++/* [0x00000b38] */ 0x15227d80, 0x10020867, // mov r1, ra8 +++/* [0x00000b40] */ 0x119e72c0, 0x10020827, // shl r0,r1,r3 +++/* [0x00000b48] */ 0x0f9e72c0, 0x10020867, // asr r1,r1,r3 +++/* [0x00000b50] */ 0x0f9e70c0, 0x10020827, // asr r0,r0,r3 +++/* [0x00000b58] */ 0x0c9a7180, 0x10020827, // add r0, r0, elem_num +++/* [0x00000b60] */ 0x139c01c0, 0xd0020827, // max r0, r0, 0 +++/* [0x00000b68] */ 0x922591f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, ra9 +++/* [0x00000b70] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000b78] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x00000b80] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000b88] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 +++/* [0x00000b90] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000b98] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000ba0] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000ba8] */ 0x8c9e7452, 0x10025e18, // add t0s, r2, r1 ; mov ra_frame_base, r2 +++/* [0x00000bb0] */ 0x152a7d80, 0x10020867, // mov r1, ra10 +++/* [0x00000bb8] */ 0x119e72c0, 0x10020827, // shl r0,r1,r3 +++/* [0x00000bc0] */ 0x0f9e72c0, 0x10020867, // asr r1,r1,r3 +++/* [0x00000bc8] */ 0x0f9e70c0, 0x10020827, // asr r0,r0,r3 +++/* [0x00000bd0] */ 0x0c9a7180, 0x10020827, // add r0, r0, elem_num +++/* [0x00000bd8] */ 0x139c01c0, 0xd0020827, // max r0, r0, 0 +++/* [0x00000be0] */ 0x922d91f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, ra11 +++/* [0x00000be8] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 +++/* [0x00000bf0] */ 0x0c9c13c0, 0xd0020567, // add ra_y2, r1, 1 +++/* [0x00000bf8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000c00] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 +++/* [0x00000c08] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000c10] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000c18] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000c20] */ 0x8c9e7452, 0x10025f19, // add t1s, r2, r1 ; mov ra_frame_base2, r2 +++/* [0x00000c28] */ 0x00000001, 0xe0020527, // mov ra20, 1 +++/* [0x00000c30] */ 0x00000100, 0xe00205a7, // mov ra22, 256 +++/* [0x00000c38] */ 0x00000040, 0xe00207a7, // mov ra30, 64 +++/* [0x00000c40] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x00000c48] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x00000c50] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x00000c58] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x00000c60] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x00000c68] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x00000c70] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x00000c78] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x00000c80] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x00000c88] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x00000c90] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x00000c98] */ 0x00004000, 0xe00204a7, // mov ra18, 0x4000 +++/* [0x00000ca0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000ca8] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000cb0] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000cb8] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000cc0] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000cc8] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000cd0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000cd8] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000ce0] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000ce8] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00000cf0] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00000cf8] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00000d00] */ 0x15827d80, 0x10020867, // mov r1, unif +++/* [0x00000d08] */ 0x919c82ff, 0xd0024822, // shl r0,r1,r3 ; mov r2,8 +++/* [0x00000d10] */ 0x0f9e70c0, 0x10021367, // asr rb13,r0,r3 +++/* [0x00000d18] */ 0x0f9e72c0, 0x10021327, // asr rb12,r1,r3 +++/* [0x00000d20] */ 0x0c9cde80, 0x10021367, // add rb13,rb13,r2 +++/* [0x00000d28] */ 0x119cce80, 0x10021327, // shl rb12, rb12, r2 +++/* [0x00000d30] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000d38] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000d40] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000d48] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000d50] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base +++/* [0x00000d58] */ 0x13540dc0, 0xd0020867, // max r1, ra_y2, 0 +++/* [0x00000d60] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++ /* [0x00000d68] */ 0x0c541dc0, 0xd0020567, // add ra_y2, ra_y2, 1 ++ /* [0x00000d70] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000d78] */ 0x0c667380, 0x10020e27, // add t0s, r1, ra_frame_base2 ++-// ::mc_filter +++/* [0x00000d78] */ 0x0c667380, 0x10020f27, // add t1s, r1, ra_frame_base2 +++// :per_block_setup ++ /* [0x00000d80] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ /* [0x00000d88] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++ /* [0x00000d90] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++ /* [0x00000d98] */ 0x155e7d80, 0x10021027, // mov rx_xshift2, rx_xshift2_next ++-/* [0x00000da0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000da8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000db0] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00000db8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000dc0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000dc8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00000dd0] */ 0x0c9e7400, 0x100206a7, // add ra_frame_base_next, r2, r0 ++-/* [0x00000dd8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000de0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0 ; mov r1, unif ++-/* [0x00000de8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00000df0] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 ++-/* [0x00000df8] */ 0x0c9c13c0, 0xd0021067, // add ra_y2_next, r1, 1 ++-/* [0x00000e00] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00000e08] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 ++-/* [0x00000e10] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000e18] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000e20] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000e28] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000e30] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000e38] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000e40] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000e48] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000e50] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000e58] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000e60] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000e68] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000e70] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000e78] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000e80] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000e88] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000e90] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000e98] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000ea0] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000ea8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000eb0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000eb8] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ec0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000ec8] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000ed0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ed8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ee0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ee8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000ef0] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000ef8] */ 0x4f5971c6, 0x100251e0, // asr rb7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f00] */ 0x4f5971c6, 0x100251a0, // asr rb6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f08] */ 0x4f5971c6, 0x10025160, // asr rb5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000da0] */ 0x00000010, 0xe00208e7, // mov r3, 16 +++/* [0x00000da8] */ 0x15827d80, 0x10020867, // mov r1, unif +++/* [0x00000db0] */ 0x119e72c0, 0x10020827, // shl r0,r1,r3 +++/* [0x00000db8] */ 0x0f9e72c0, 0x10020867, // asr r1,r1,r3 +++/* [0x00000dc0] */ 0x0f9e70c0, 0x10020827, // asr r0,r0,r3 +++/* [0x00000dc8] */ 0x0c9a7180, 0x10020827, // add r0, r0, elem_num +++/* [0x00000dd0] */ 0x139c01c0, 0xd0020827, // max r0, r0, 0 +++/* [0x00000dd8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000de0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000de8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000df0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000df8] */ 0x8c827436, 0x100246a1, // add ra_frame_base_next, r2, r0 ; mov r1, unif +++/* [0x00000e00] */ 0x119e72c0, 0x10020827, // shl r0,r1,r3 +++/* [0x00000e08] */ 0x0f9e72c0, 0x10020867, // asr r1,r1,r3 +++/* [0x00000e10] */ 0x0f9e70c0, 0x10020827, // asr r0,r0,r3 +++/* [0x00000e18] */ 0x0c9a7180, 0x10020827, // add r0, r0, elem_num +++/* [0x00000e20] */ 0x139c01c0, 0xd0020827, // max r0, r0, 0 +++/* [0x00000e28] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000e30] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 +++/* [0x00000e38] */ 0x159e7240, 0x10021067, // mov ra_y2_next, r1 +++/* [0x00000e40] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000e48] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 +++/* [0x00000e50] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000e58] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000e60] */ 0x0e9e70c0, 0x10020867, // shr r1, r0, r3 +++/* [0x00000e68] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000e70] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000e78] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000e80] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000e88] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000e90] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000e98] */ 0x119e70c0, 0x10020827, // shl r0, r0, r3 +++/* [0x00000ea0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000ea8] */ 0x95801dbf, 0xd0024821, // mov r0, unif ; mov r1,1 +++/* [0x00000eb0] */ 0x4f5971c6, 0x10024260, // asr ra9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000eb8] */ 0x4f5971c6, 0x10024220, // asr ra8, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ec0] */ 0x4f5971c6, 0x10044260, // asr.ifz ra9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ec8] */ 0x0f9d71c0, 0x10040227, // asr.ifz ra8, r0, rb23 +++/* [0x00000ed0] */ 0x0d243f80, 0xd0020267, // sub ra9,3,ra9 +++/* [0x00000ed8] */ 0x0d203f80, 0xd0020227, // sub ra8,3,ra8 +++/* [0x00000ee0] */ 0x11243dc0, 0xd0020267, // shl ra9,ra9,3 +++/* [0x00000ee8] */ 0x11203dc0, 0xd0020227, // shl ra8,ra8,3 +++/* [0x00000ef0] */ 0x00ffff00, 0xe0020867, // mov r1,0xffff00 +++/* [0x00000ef8] */ 0x11227380, 0x10020827, // shl r0, r1, ra8 +++/* [0x00000f00] */ 0x0f9d71c0, 0x10020027, // asr ra0, r0, rb23 +++/* [0x00000f08] */ 0x11267380, 0x10020827, // shl r0, r1, ra9 ++ /* [0x00000f10] */ 0x0f9d71c0, 0x10021127, // asr rb4, r0, rb23 ++-/* [0x00000f18] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000f20] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000f28] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 ++-/* [0x00000f30] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000f38] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 ++-/* [0x00000f40] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000f18] */ 0x01040400, 0xe0020867, // mov r1,0x1040400 +++/* [0x00000f20] */ 0x11227380, 0x10020827, // shl r0, r1, ra8 +++/* [0x00000f28] */ 0x0f9d71c0, 0x10020067, // asr ra1, r0, rb23 +++/* [0x00000f30] */ 0x11267380, 0x10020827, // shl r0, r1, ra9 +++/* [0x00000f38] */ 0x0f9d71c0, 0x10021167, // asr rb5, r0, rb23 +++/* [0x00000f40] */ 0xfbf5f600, 0xe0020867, // mov r1,0xfbf5f600 +++/* [0x00000f48] */ 0x11227380, 0x10020827, // shl r0, r1, ra8 +++/* [0x00000f50] */ 0x0f9d71c0, 0x100200a7, // asr ra2, r0, rb23 +++/* [0x00000f58] */ 0x11267380, 0x10020827, // shl r0, r1, ra9 +++/* [0x00000f60] */ 0x0f9d71c0, 0x100211a7, // asr rb6, r0, rb23 +++/* [0x00000f68] */ 0x11283a40, 0xe0020867, // mov r1,0x11283a40 +++/* [0x00000f70] */ 0x11227380, 0x10020827, // shl r0, r1, ra8 +++/* [0x00000f78] */ 0x0f9d71c0, 0x100200e7, // asr ra3, r0, rb23 +++/* [0x00000f80] */ 0x11267380, 0x10020827, // shl r0, r1, ra9 +++/* [0x00000f88] */ 0x0f9d71c0, 0x100211e7, // asr rb7, r0, rb23 +++/* [0x00000f90] */ 0x3a281100, 0xe0020867, // mov r1,0x3a281100 +++/* [0x00000f98] */ 0x11227380, 0x10020827, // shl r0, r1, ra8 +++/* [0x00000fa0] */ 0x0f9d71c0, 0x10020127, // asr ra4, r0, rb23 +++/* [0x00000fa8] */ 0x11267380, 0x10020827, // shl r0, r1, ra9 +++/* [0x00000fb0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000fb8] */ 0xf6f5fb00, 0xe0020867, // mov r1,0xf6f5fb00 +++/* [0x00000fc0] */ 0x11227380, 0x10020827, // shl r0, r1, ra8 +++/* [0x00000fc8] */ 0x0f9d71c0, 0x10020167, // asr ra5, r0, rb23 +++/* [0x00000fd0] */ 0x11267380, 0x10020827, // shl r0, r1, ra9 +++/* [0x00000fd8] */ 0x0f9d71c0, 0x10021267, // asr rb9, r0, rb23 +++/* [0x00000fe0] */ 0x04040100, 0xe0020867, // mov r1,0x4040100 +++/* [0x00000fe8] */ 0x11227380, 0x10020827, // shl r0, r1, ra8 +++/* [0x00000ff0] */ 0x0f9d71c0, 0x100201a7, // asr ra6, r0, rb23 +++/* [0x00000ff8] */ 0x11267380, 0x10020827, // shl r0, r1, ra9 +++/* [0x00001000] */ 0x0f9d71c0, 0x100212a7, // asr rb10, r0, rb23 +++/* [0x00001008] */ 0xffff0000, 0xe0020867, // mov r1,0xffff0000 +++/* [0x00001010] */ 0x11227380, 0x10020827, // shl r0, r1, ra8 +++/* [0x00001018] */ 0x0f9d71c0, 0x100201e7, // asr ra7, r0, rb23 +++/* [0x00001020] */ 0x11267380, 0x10020827, // shl r0, r1, ra9 +++/* [0x00001028] */ 0x0f9d71c0, 0x100212e7, // asr rb11, r0, rb23 +++/* [0x00001030] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001038] */ 0x0f9e70c0, 0x100213e7, // asr rb15, r0, r3 +++/* [0x00001040] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001048] */ 0x119e70c0, 0x10020827, // shl r0, r0, r3 +++/* [0x00001050] */ 0x8f9c00ff, 0xd0024823, // asr r0, r0, r3 ; mov r3, 0 +++/* [0x00001058] */ 0x119c81c0, 0xd00213a7, // shl rb14, r0, 8 +++// ::mc_filter ++ // :yloop ++-/* [0x00000f48] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000f50] */ 0x8e4539bf, 0xa0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 ++-/* [0x00000f58] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x00000f60] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000f68] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 ++-/* [0x00000f70] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next ++-/* [0x00000f78] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000f80] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000f88] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000f90] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 ++-/* [0x00000f98] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 ++-/* [0x00000fa0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000fa8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 ++-/* [0x00000fb0] */ 0xec654c87, 0x10024e20, // add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 ++-/* [0x00000fb8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000fc0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000fc8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000fd0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000fd8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000fe0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000fe8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000ff0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000ff8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00001000] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00001008] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00001010] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00001018] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00001020] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00001028] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00001030] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00001038] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00001040] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00001048] */ 0x8d3487f6, 0xd00279cc, // sub.setf -, r3, 8 ; mov ra12, ra13 ++-/* [0x00001050] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00001058] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00001060] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00001068] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00001070] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloop ++-/* [0x00001078] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00001080] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00001088] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00001090] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00001098] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000010a0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000010a8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000010b0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 ++-/* [0x000010b8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 ++-/* [0x000010c0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 ++-/* [0x000010c8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 ++-/* [0x000010d0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000010d8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000010e0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000010e8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 ++-/* [0x000010f0] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 ++-/* [0x000010f8] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 ++-/* [0x00001100] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloop ++-/* [0x00001108] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 ++-/* [0x00001110] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00001118] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00001120] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001128] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00001130] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00001138] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00001060] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00001068] */ 0x8e4539bf, 0xb0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu1 +++/* [0x00001070] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00001078] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00001080] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 +++/* [0x00001088] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next +++/* [0x00001090] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00001098] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000010a0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000010a8] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 +++/* [0x000010b0] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 +++/* [0x000010b8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000010c0] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++/* [0x000010c8] */ 0xec654c8f, 0x10024f21, // add t1s, ra_frame_base2, r2 ; v8subs r1, r1, rb20 +++/* [0x000010d0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000010d8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000010e0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000010e8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000010f0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000010f8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00001100] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00001108] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00001110] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00001118] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00001120] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00001128] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00001130] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00001138] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00001140] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00001148] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00001150] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00001158] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00001160] */ 0x8d2487f6, 0xd00279c8, // sub.setf -, r3, 8 ; mov ra8, ra9 +++/* [0x00001168] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00001170] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00001178] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00001180] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00001188] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloop +++/* [0x00001190] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00001198] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x000011a0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000011a8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000011b0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000011b8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000011c0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000011c8] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 +++/* [0x000011d0] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 +++/* [0x000011d8] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 +++/* [0x000011e0] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 +++/* [0x000011e8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000011f0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000011f8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00001200] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 +++/* [0x00001208] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 +++/* [0x00001210] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 +++/* [0x00001218] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloop +++/* [0x00001220] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 +++/* [0x00001228] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00001230] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00001238] */ 0xfffffb28, 0xf0f809e7, // brr -, r:per_block_setup +++/* [0x00001240] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00001248] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001250] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_b ++-/* [0x00001140] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00001148] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00001150] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00001158] */ 0x155e7d80, 0x10021027, // mov rx_xshift2, rx_xshift2_next ++-/* [0x00001160] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00001168] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00001170] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00001178] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00001180] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00001188] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00001190] */ 0x0c9e7400, 0x100206a7, // add ra_frame_base_next, r2, r0 ++-/* [0x00001198] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000011a0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0 ; mov r1, unif ++-/* [0x000011a8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x000011b0] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 ++-/* [0x000011b8] */ 0x0c9c13c0, 0xd0021067, // add ra_y2_next, r1, 1 ++-/* [0x000011c0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000011c8] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 ++-/* [0x000011d0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000011d8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000011e0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000011e8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000011f0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000011f8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00001200] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00001208] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00001210] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00001218] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00001220] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00001228] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00001230] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001238] */ 0x00000001, 0xe0020867, // mov r1, 1 ++-/* [0x00001240] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x00001248] */ 0x409f3001, 0xd00049e0, // nop ; mul24 r0, r0 << 13, r1 << 13 ++-/* [0x00001250] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001258] */ 0x409f2001, 0xd00049e0, // nop ; mul24 r0, r0 << 14, r1 << 14 ++-/* [0x00001260] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001268] */ 0x409f1001, 0xd00049e0, // nop ; mul24 r0, r0 << 15, r1 << 15 ++-/* [0x00001270] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001278] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00001280] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x00001288] */ 0x409f7001, 0xd00049e0, // nop ; mul24 r0, r0 << 9, r1 << 9 ++-/* [0x00001290] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001298] */ 0x409f6001, 0xd00049e0, // nop ; mul24 r0, r0 << 10, r1 << 10 ++-/* [0x000012a0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012a8] */ 0x409f5001, 0xd00049e0, // nop ; mul24 r0, r0 << 11, r1 << 11 ++-/* [0x000012b0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012b8] */ 0x409f4001, 0xd00049e0, // nop ; mul24 r0, r0 << 12, r1 << 12 ++-/* [0x000012c0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000012c8] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x000012d0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012d8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012e0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012e8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000012f0] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x000012f8] */ 0x4f5971c6, 0x100251e0, // asr rb7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001300] */ 0x4f5971c6, 0x100251a0, // asr rb6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001308] */ 0x4f5971c6, 0x10025160, // asr rb5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001310] */ 0x0f9d71c0, 0x10021127, // asr rb4, r0, rb23 ++-/* [0x00001318] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001320] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x00001328] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 ++-/* [0x00001330] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00001338] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 ++-/* [0x00001340] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :yloopb ++-/* [0x00001348] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00001350] */ 0x8e4539bf, 0xa0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 ++-/* [0x00001358] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x00001360] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00001368] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 ++-/* [0x00001370] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next ++-/* [0x00001378] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00001380] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00001388] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00001390] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 ++-/* [0x00001398] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 ++-/* [0x000013a0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000013a8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 ++-/* [0x000013b0] */ 0xec654c87, 0x10024e20, // add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 ++-/* [0x000013b8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000013c0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000013c8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000013d0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000013d8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000013e0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000013e8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000013f0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000013f8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00001400] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00001408] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00001410] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00001418] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00001420] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00001428] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00001430] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00001438] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00001440] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00001448] */ 0x8d3487f6, 0xd00279cc, // sub.setf -, r3, 8 ; mov ra12, ra13 ++-/* [0x00001450] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00001458] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00001460] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00001468] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00001470] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloopb ++-/* [0x00001478] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00001480] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00001488] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00001490] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00001498] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000014a0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000014a8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000014b0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 ++-/* [0x000014b8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 ++-/* [0x000014c0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 ++-/* [0x000014c8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 ++-/* [0x000014d0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000014d8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000014e0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000014e8] */ 0x4053800e, 0xd00049e1, // nop ; mul24 r1, r1 << 8, ra20 << 8 ++-/* [0x000014f0] */ 0x4c78e38f, 0x10024860, // add r1, r1, ra30 ; mul24 r0, r1, rb14 ++-/* [0x000014f8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x00001500] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloopb ++-/* [0x00001508] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00001510] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00001518] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00001520] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001528] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00001530] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00001538] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00001258] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00001260] */ 0x8e4539bf, 0xb0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu1 +++/* [0x00001268] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00001270] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00001278] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 +++/* [0x00001280] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next +++/* [0x00001288] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00001290] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00001298] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000012a0] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 +++/* [0x000012a8] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 +++/* [0x000012b0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000012b8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++/* [0x000012c0] */ 0xec654c8f, 0x10024f21, // add t1s, ra_frame_base2, r2 ; v8subs r1, r1, rb20 +++/* [0x000012c8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000012d0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000012d8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000012e0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000012e8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000012f0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000012f8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00001300] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00001308] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00001310] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00001318] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00001320] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00001328] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00001330] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00001338] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00001340] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00001348] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00001350] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00001358] */ 0x8d2487f6, 0xd00279c8, // sub.setf -, r3, 8 ; mov ra8, ra9 +++/* [0x00001360] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00001368] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00001370] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00001378] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00001380] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloopb +++/* [0x00001388] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00001390] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00001398] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000013a0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000013a8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000013b0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000013b8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000013c0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 +++/* [0x000013c8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 +++/* [0x000013d0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 +++/* [0x000013d8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 +++/* [0x000013e0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000013e8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000013f0] */ 0x0f9ce3c0, 0xd0020827, // asr r0, r1, 14 +++/* [0x000013f8] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x00001400] */ 0x405b8006, 0xd00049e0, // nop ; mul24 r0, r0 << 8, ra22 << 8 +++/* [0x00001408] */ 0x0c4a7380, 0x10020867, // add r1, r1, ra18 +++/* [0x00001410] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x00001418] */ 0xfffffe20, 0xf06809e7, // brr.anyn -, r:yloopb +++/* [0x00001420] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x00001428] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00001430] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00001438] */ 0xfffff928, 0xf0f809e7, // brr -, r:per_block_setup +++/* [0x00001440] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00001448] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001450] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_interrupt_exit12 ++-/* [0x00001540] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00001548] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001550] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001558] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001560] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001568] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001570] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001578] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001580] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001588] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001590] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001598] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000015a0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000015a8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000015b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000015b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000015c0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000015c8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x000015d0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00001458] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00001460] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001468] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001470] */ 0x009e7000, 0xb00009e7, // ldtmu1 +++/* [0x00001478] */ 0x009e7000, 0xb00009e7, // ldtmu1 +++/* [0x00001480] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001488] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001490] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001498] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000014a0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000014a8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000014b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000014b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000014c0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000014c8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000014d0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000014d8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000014e0] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000014e8] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_exit1 +++/* [0x000014f0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000014f8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001500] */ 0x009e7000, 0xb00009e7, // ldtmu1 +++/* [0x00001508] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001510] */ 0x009e7000, 0xb00009e7, // ldtmu1 +++/* [0x00001518] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00001520] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00001528] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 6e552d9..760bd17 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -4,15 +4,16 @@ ++ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++-#define mc_filter_uv (rpi_shader + 144) ++-#define mc_filter_uv_b0 (rpi_shader + 334) ++-#define mc_filter_uv_b (rpi_shader + 486) ++-#define mc_exit (rpi_shader + 662) ++-#define mc_interrupt_exit8 (rpi_shader + 680) ++-#define mc_setup (rpi_shader + 710) ++-#define mc_filter (rpi_shader + 864) ++-#define mc_filter_b (rpi_shader + 1104) ++-#define mc_interrupt_exit12 (rpi_shader + 1360) ++-#define mc_end (rpi_shader + 1398) +++#define mc_filter_uv (rpi_shader + 130) +++#define mc_filter_uv_b0 (rpi_shader + 312) +++#define mc_filter_uv_b (rpi_shader + 464) +++#define mc_exit (rpi_shader + 640) +++#define mc_interrupt_exit8 (rpi_shader + 658) +++#define mc_setup (rpi_shader + 688) +++#define mc_filter (rpi_shader + 1048) +++#define mc_filter_b (rpi_shader + 1174) +++#define mc_interrupt_exit12 (rpi_shader + 1302) +++#define mc_exit1 (rpi_shader + 1340) +++#define mc_end (rpi_shader + 1356) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index a0b8e5a..60d1ec2 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -21,6 +21,7 @@ ++ # ++ # ra16 clipped(row start address+elem_num)&~3 ++ # ra17 per-channel shifts +++# ra18 0x4000 ++ # ra19 next ra17 ++ # ++ # rb16 pitch ++@@ -86,7 +87,7 @@ ++ ++ ++ ################################################################################ ++-# mc_setup_uv(next_kernel, x, y, ref_u_base, ref_v_base, frame_width, frame_height, pitch, dst_pitch, pad0, pad1, pad2) +++# mc_setup_uv(next_kernel, x, y, ref_u_base, ref_v_base, frame_width, frame_height, pitch, dst_pitch, offset, denom, vpm_id) ++ ::mc_setup_uv ++ ++ # Read starting kernel ++@@ -132,36 +133,6 @@ mov ra13, 0 ++ mov ra14, 0 ++ mov ra15, 0 ++ ++-# Compute part of VPM to use for DMA output ++-mov r3, unif ++-shl r2, r3, 1 # Convert QPU numbers to be even (this means we can only use 8 QPUs, but is necessary as we need to save 16bit intermediate results) ++-and r2, r2, 15 ++-mov r1, r2 ++-asr r1, r1, 2 ++-shl r1, r1, 6 ++-mov r0, r2 ++-and r0, r0, 3 ++-add r0, r0, r1 ++-mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later ++-shl r0, r0, 5 ++-add rb27, r0, r1 ++- ++-# Compute part of VPM to save data into ++-shl r2, r3, 1 ++-and r2, r2, 15 # r2 = bcd0 ++-mov r1, r2 # r1 = bcd0 ++-asr r1, r1, 2 # r1 = bc ++-shl r1, r1, 6 # r1 = bc000000 ++-mov r0, r2 # r0 = bcd0 ++-and r0, r0, 3 # r0 = d0 ++-add r0, r0, r1 # r0 = bc0000d0 ++-mov r1, vpm_setup(0, 4, h8p(0, 0)) # 4 is stride - stride acts on ADDR which is Y[5:0],B[1:0] for 8 bit ++-add rb28, r0, r1 ++-asr r0, r0, 1 # r0 = bc0000d ++-# Prepare VPM command for 16bit intermediates ++-mov r1, vpm_setup(0, 2, h16p(0, 0)) # 2 is stride - stride acts on ADDR which is Y[5:0],H[0] for 16 bit ++-add rb21, r0, r1 ++- ++ # Compute base address for first and second access ++ mov r0, ra_x # Load x ++ max r0, r0, 0; mov r1, ra_y # Load y ++@@ -175,10 +146,31 @@ min r1, r1, rb_frame_height_minus_1 ++ # submit texture requests for first line ++ add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++ add t0s, r0, r1 ; mov ra_frame_base, r2 ++-add t0s, r2, r1 +++add t1s, r2, r1 +++ +++mov r2,8 +++shl rb12,unif, r2 # offset before shift +++add rb13,unif,r2 # offset after shift +++ +++# Compute part of VPM to use for DMA output +++mov r2, unif +++shl r2, r2, 1 # Convert QPU numbers to be even (this means we can only use 8 QPUs, but is necessary as we need to save 16bit intermediate results) +++and r2, r2, 15 +++mov r1, r2 +++asr r1, r1, 2 +++shl r1, r1, 6 +++mov r0, r2 +++and r0, r0, 3 +++add r0, r0, r1 ++ ++-mov rb12,unif # offset before shift ++-mov rb13,unif # offset after shift +++mov r1, vpm_setup(0, 4, h8p(0, 0)) # 4 is stride - stride acts on ADDR which is Y[5:0],B[1:0] for 8 bit +++add rb28, r0, r1 # VPM 8bit storage +++asr r2, r0, 1 # r0 = bc0000d +++mov r1, vpm_setup(0, 2, h16p(0, 0)) # 2 is stride - stride acts on ADDR which is Y[5:0],H[0] for 16 bit +++add rb21, r2, r1 # VPM for 16bit intermediates +++mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later +++shl r0, r0, 5 +++add rb27, r0, r1 # DMA out ++ ++ # submit texture requests for second line ++ max r1, ra_y, 0 ++@@ -187,7 +179,7 @@ add ra_y, ra_y, 1 ++ bra -, ra31 ++ nop ; mul24 r1, r1, rb_pitch ++ add t0s, r1, ra_x ++-add t0s, r1, ra_frame_base +++add t1s, r1, ra_frame_base ++ ++ ++ ++@@ -248,17 +240,15 @@ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ mov r0, unif # U offset/weight ++ asr rb15, r0, r2 # Compute offset from MSBs ++ shl r0, r0, r2 ++-asr rb14, r0, r2 # Compute weight from LSBs +++asr r3, r0, r2 # Compute weight from LSBs ++ mov r0, unif # V offset/weight ++ asr.ifnz rb15, r0, r2 ++ shl r0, r0, r2 ++-asr.ifnz rb14, r0, r2 +++asr.ifnz r3, r0, r2 +++shl rb14,r3,8 # Scale up weights so we can use mul24 in signed fashion ++ ++ # r2 is elem_num ++ # r3 is loop counter ++- ++-mov r5rep, -8 ++- ++ # retrieve texture results and pick out bytes ++ # then submit two more texture requests ++ ++@@ -269,7 +259,7 @@ mov r3, 0 ++ # then submit two more texture requests ++ ++ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu1 ++ mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++ shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++@@ -278,7 +268,7 @@ max r2, ra_y, 0 # y ++ min r2, r2, rb_frame_height_minus_1 ++ add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++ add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_frame_base, r2 +++add t1s, ra_frame_base, r2 ++ ++ # generate seven shifted versions ++ # interleave with scroll of vertical context ++@@ -301,11 +291,6 @@ mov ra13, ra14 # Delay slot 1 ++ mov ra14, ra15 # Delay slot 2 ++ mov ra15, r0 # Delay slot 3 ++ ++-mov rb12,32 # TODO remove these to make P weighted prediction work properly ++-mov rb13,6 ++-mov rb14,1 ++-mov rb15,0 ++- ++ # apply vertical filter and write to VPM ++ ++ nop ; mul24 r1, ra14, rb10 ++@@ -412,7 +397,7 @@ mov r3, 0 ++ # then submit two more texture requests ++ ++ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu1 ++ mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++ shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++@@ -421,7 +406,7 @@ max r2, ra_y, 0 # y ++ min r2, r2, rb_frame_height_minus_1 ++ add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++ add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_frame_base, r2 +++add t1s, ra_frame_base, r2 ++ ++ # generate seven shifted versions ++ # interleave with scroll of vertical context ++@@ -542,7 +527,7 @@ mov r3, 0 ++ # then submit two more texture requests ++ ++ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu1 ++ mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++ shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++@@ -551,7 +536,7 @@ max r2, ra_y, 0 # y ++ min r2, r2, rb_frame_height_minus_1 ++ add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++ add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_frame_base, r2 +++add t1s, ra_frame_base, r2 ++ ++ # generate seven shifted versions ++ # interleave with scroll of vertical context ++@@ -617,9 +602,9 @@ mov -, vw_wait # wait on the VDW ++ mov -,srel(0) ++ ++ ldtmu0 +++ldtmu1 ++ ldtmu0 ++-ldtmu0 ++-ldtmu0 +++ldtmu1 ++ ++ nop ; nop ; thrend ++ nop ; nop # delay slot 1 ++@@ -630,9 +615,9 @@ nop ; nop # delay slot 2 ++ mov -, vw_wait # wait on the VDW ++ ++ ldtmu0 +++ldtmu1 ++ ldtmu0 ++-ldtmu0 ++-ldtmu0 +++ldtmu1 ++ ++ mov -,sacq(0) # 1 ++ mov -,sacq(0) # 2 ++@@ -656,200 +641,249 @@ nop ; nop # delay slot 2 ++ # For P frames we make the second x,y coordinates offset by +8 ++ ++ ################################################################################ ++-# mc_setup(next_kernel, x, y, ref_y_base, x2, y2, ref_y2_base, frame_width, frame_height, pitch, dst_pitch, offset, shift, pad2) +++# mc_setup(y_x, ref_y_base, y2_x2, ref_y2_base, frame_width_height, pitch, dst_pitch, offset_shift, next_kernel) ++ ::mc_setup +++ mov r3, 16 ++ ++-# Read starting kernel ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-mov ra31, unif ++- ++-# Compute base address for first and second access ++-add r0, unif, elem_num # Load x ++-max r0, r0, 0; mov r1, unif # Load y ++-min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base ++-shl ra_xshift_next, r0, 3 # Compute shifts ++-add ra_y, r1, 1 ++-and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate ++-add r2, r2, r0 # r2 is address for frame0 (not including y offset) ++-max r1, r1, 0 ++-min r1, r1, rb_frame_height_minus_1 ++-nop ; mul24 r1, r1, rb_pitch # r2 contains the addresses (not including y offset) for frame0 ++-add t0s, r2, r1 ; mov ra_frame_base, r2 ++- ++-add r0, unif, elem_num # Load x ++-max r0, r0, 0; mov r1, unif # Load y ++-min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base ++-shl rx_xshift2_next, r0, 3 # Compute shifts ++-add ra_y2, r1, 1 ++-and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate ++-add r2, r2, r0 # r2 is address for frame1 (not including y offset) ++-max r1, r1, 0 ++-min r1, r1, rb_frame_height_minus_1 ++-nop ; mul24 r1, r1, rb_pitch # r2 contains the addresses (not including y offset) for frame0 ++-add t0s, r2, r1 ; mov ra_frame_base2, r2 ++- +++ # Need to save these because we need to know the frame dimensions before computing texture coordinates +++ mov ra8, unif +++ mov ra9, unif +++ mov ra10, unif +++ mov ra11, unif ++ ++ # Read image dimensions ++-sub rb25,unif,1 ++-sub rb30,unif,1 +++ mov r1, unif # width_height +++ shl r0,r1,r3 +++ asr r1,r1,r3 # width +++ asr r0,r0,r3 # height +++ sub rb_frame_width_minus_1,r1,1 +++ sub rb_frame_height_minus_1,r0,1 ++ ++ # get source pitch ++-mov rb16, unif +++ mov rb_pitch, unif ++ ++ # get destination pitch ++-mov r0, unif ++-mov r1, vdw_setup_1(0) ++-add rb24, r1, r0 +++ mov r0, unif +++ mov r1, vdw_setup_1(0) +++ add rb24, r1, r0 ++ ++-# load constants ++- ++-mov ra20, 1 ++-mov ra22, 256 ++-mov ra30, 64 ++- ++-mov rb20, 0xffffff00 ++-mov rb22, 255 ++-mov rb23, 24 +++# Compute base address for first and second access +++ mov r1, ra8 # y_x +++ shl r0,r1,r3 # r0 is x<<16 +++ asr r1,r1,r3 # r1 is y +++ asr r0,r0,r3 # r0 is x +++ add r0, r0, elem_num # Load x +++ max r0, r0, 0 +++ min r0, r0, rb_frame_width_minus_1 ; mov r2, ra9 # Load the frame base +++ shl ra_xshift_next, r0, 3 # Compute shifts +++ add ra_y, r1, 1 +++ and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++ add r2, r2, r0 # r2 is address for frame0 (not including y offset) +++ max r1, r1, 0 +++ min r1, r1, rb_frame_height_minus_1 +++ nop ; mul24 r1, r1, rb_pitch # r2 contains the addresses (not including y offset) for frame0 +++ add t0s, r2, r1 ; mov ra_frame_base, r2 +++ +++ mov r1, ra10 # y_x +++ shl r0,r1,r3 # r0 is x<<16 +++ asr r1,r1,r3 # r1 is y +++ asr r0,r0,r3 # r0 is x +++ add r0, r0, elem_num # Load x +++ max r0, r0, 0 +++ min r0, r0, rb_frame_width_minus_1 ; mov r2, ra11 # Load the frame base +++ shl rx_xshift2_next, r0, 3 # Compute shifts +++ add ra_y2, r1, 1 +++ and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++ add r2, r2, r0 # r2 is address for frame1 (not including y offset) +++ max r1, r1, 0 +++ min r1, r1, rb_frame_height_minus_1 +++ nop ; mul24 r1, r1, rb_pitch # r2 contains the addresses (not including y offset) for frame0 +++ add t1s, r2, r1 ; mov ra_frame_base2, r2 ++ ++-# touch vertical context to keep simulator happy ++ ++-mov ra8, 0 ++-mov ra9, 0 ++-mov ra10, 0 ++-mov ra11, 0 ++-mov ra12, 0 ++-mov ra13, 0 ++-mov ra14, 0 ++-mov ra15, 0 +++# load constants ++ ++-# Compute part of VPM to use for DMA output ++-mov r2, qpu_num ++-mov r1, r2 ++-asr r1, r1, 2 ++-shl r1, r1, 6 ++-mov r0, r2 ++-and r0, r0, 3 ++-add r0, r0, r1 ++-mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later ++-shl r0, r0, 5 ++-add rb27, r0, r1 +++ mov ra20, 1 +++ mov ra22, 256 +++ mov ra30, 64 ++ ++-# Compute part of VPM to save data into ++-mov r2, qpu_num # qpu_num = abcd ++-mov r1, r2 ++-asr r1, r1, 2 ++-shl r1, r1, 6 ++-mov r0, r2 ++-and r0, r0, 3 ++-add r0, r0, r1 ++-mov r1, vpm_setup(0, 4, h8p(0, 0)) # 4 is stride - stride acts on ADDR which is Y[5:0],B[1:0] for 8 bit ++-add rb28, r0, r1 +++ mov rb20, 0xffffff00 +++ mov rb22, 255 +++ mov rb23, 24 ++ ++-mov rb12,unif # offset before shift ++-mov rb13,unif # shift +++# touch vertical context to keep simulator happy ++ ++-# Dump padding words ++-mov r0, unif +++ mov ra8, 0 +++ mov ra9, 0 +++ mov ra10, 0 +++ mov ra11, 0 +++ mov ra12, 0 +++ mov ra13, 0 +++ mov ra14, 0 +++ mov ra15, 0 +++ mov ra18, 0x4000 +++ +++# Compute part of VPM to use +++ mov r2, qpu_num +++ mov r1, r2 +++ asr r1, r1, 2 +++ shl r1, r1, 6 +++ mov r0, r2 +++ and r0, r0, 3 +++ add r0, r0, r1 +++ mov r1, vpm_setup(0, 4, h8p(0, 0)) # 4 is stride - stride acts on ADDR which is Y[5:0],B[1:0] for 8 bit +++ add rb28, r0, r1 # VPM for saving data +++ mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later +++ shl r0, r0, 5 +++ add rb27, r0, r1 # Command for dma output +++ +++# Weighted prediction denom +++ +++ mov r1, unif # offset_shift +++ shl r0,r1,r3 ; mov r2,8 +++ asr rb13,r0,r3 # shift +++ asr rb12,r1,r3 # offset +++ add rb13,rb13,r2 # mul24 is unsigned so scale up into high bits +++ shl rb12, rb12, r2 # Account for larger shift ++ ++ # submit texture requests for second line ++-max r1, ra_y, 0 ++-min r1, r1, rb_frame_height_minus_1 ++-add ra_y, ra_y, 1 ++-nop ; mul24 r1, r1, rb_pitch ++-add t0s, r1, ra_frame_base ++- ++-max r1, ra_y2, 0 ++-min r1, r1, rb_frame_height_minus_1 ++-bra -, ra31 ++-add ra_y2, ra_y2, 1 # Delay 1 ++-nop ; mul24 r1, r1, rb_pitch # Delay 2 ++-add t0s, r1, ra_frame_base2 # Delay 3 ++- ++- ++-################################################################################ ++- ++-# mc_filter(next_kernel, x, y, frame_base, x2, y2, frame_base2, height, hcoeffs[0], hcoeffs2[0], hcoeffs[1], hcoeffs2[1], vcoeffs[0], vcoeffs2[0], vcoeffs[1], vcoeffs2[1], offsetweight0, offsetweight1, this_dst) ++-# In a P block, only the first half of coefficients contain used information. ++-# At this point we have already issued two pairs of texture requests for the current block ++-# ra_x, ra_x16_base point to the current coordinates for this block ++-::mc_filter ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-mov ra31, unif +++ max r1, ra_y, 0 +++ min r1, r1, rb_frame_height_minus_1 +++ add ra_y, ra_y, 1 +++ nop ; mul24 r1, r1, rb_pitch +++ add t0s, r1, ra_frame_base +++ +++ max r1, ra_y2, 0 +++ min r1, r1, rb_frame_height_minus_1 +++ add ra_y2, ra_y2, 1 +++ nop ; mul24 r1, r1, rb_pitch +++ add t1s, r1, ra_frame_base2 +++ +++# FALL THROUGHT TO PER-BLOCK SETUP +++ +++# Start of per-block setup code +++# P and B blocks share the same setup code to save on Icache space +++:per_block_setup +++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ mov ra31, unif ++ ++ # per-channel shifts were calculated on the *previous* invocation ++- ++-mov ra_xshift, ra_xshift_next ++-mov rx_xshift2, rx_xshift2_next +++ mov ra_xshift, ra_xshift_next +++ mov rx_xshift2, rx_xshift2_next ++ ++ # get base addresses and per-channel shifts for *next* invocation ++-add r0, unif, elem_num # Load x ++-max r0, r0, 0; mov r1, unif # Load y ++-min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base ++-shl ra_xshift_next, r0, 3 # Compute shifts ++-mov ra_y_next, r1 ++-and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate ++-add ra_frame_base_next, r2, r0 # r2 is address for frame0 (not including y offset) ++- ++-add r0, unif, elem_num # Load x ++-max r0, r0, 0 ; mov r1, unif # Load y ++-min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base ++-shl rx_xshift2_next, r0, 3 # Compute shifts ++-add ra_y2_next, r1, 1 ++-and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate ++-add rx_frame_base2_next, r2, r0 # r2 is address for frame1 (not including y offset) ++- +++ mov r3, 16 +++ mov r1, unif # y_x +++ shl r0,r1,r3 # r0 is x<<16 +++ asr r1,r1,r3 # r1 is y +++ asr r0,r0,r3 # r0 is x +++ add r0, r0, elem_num # Load x +++ max r0, r0, 0 +++ min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base +++ shl ra_xshift_next, r0, 3 # Compute shifts +++ mov ra_y_next, r1 +++ and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++ add ra_frame_base_next, r2, r0 ; mov r1, unif # y2_x2 +++ +++ shl r0,r1,r3 # r0 is x2<<16 +++ asr r1,r1,r3 # r1 is y2 +++ asr r0,r0,r3 # r0 is x2 +++ add r0, r0, elem_num # Load x +++ max r0, r0, 0 +++ min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base +++ shl rx_xshift2_next, r0, 3 # Compute shifts +++ mov ra_y2_next, r1 +++ and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++ add rx_frame_base2_next, r2, r0 # r2 is address for frame1 (not including y offset) ++ ++ # set up VPM write ++-mov vw_setup, rb28 +++ mov vw_setup, rb28 ++ ++ # get width,height of block ++-mov r2, 16 ++-mov r0, unif ++-shr r1, r0, r2 # Extract width ++-sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) ++-and r0, r0, rb22 # Extract height ++-add rb17, r0, 5 ++-add rb18, r0, 7 ++-shl r0, r0, 7 ++-add r0, r0, r1 # Combine width and height of destination area ++-shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++-add rb26, r0, rb27 +++ mov r0, unif +++ shr r1, r0, r3 # Extract width +++ sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++ and r0, r0, rb22 # Extract height +++ add rb17, r0, 5 +++ add rb18, r0, 7 +++ shl r0, r0, 7 +++ add r0, r0, r1 # Combine width and height of destination area +++ shl r0, r0, r3 # Shift into bits 16 upwards of the vdw_setup0 register +++ add rb26, r0, rb27 ++ ++ # get filter coefficients and discard unused B frame values ++-mov r0, unif ++-mov.ifnz -, unif # Alternate coefficients are unused for P frames ++-asr ra3, r0, rb23; mul24 r0, r0, ra22 # These may need some pre-rotation to be used in B frames correctly ++-asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-asr ra0, r0, rb23; mov r0, unif ++-mov.ifnz -, unif ++-asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-asr ra4, r0, rb23; mov r0, unif ++-mov.ifnz -, unif ++-asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-asr rb8, r0, rb23; mov r0, unif ++-mov.ifnz -, unif ++-asr rb7, r0, rb23; mul24 r0, r0, ra22 ++-asr rb6, r0, rb23; mul24 r0, r0, ra22 ++-asr rb5, r0, rb23; mul24 r0, r0, ra22 ++-asr rb4, r0, rb23 ++- ++-mov r0, unif # Frame0 offset/weight ++-mov.ifnz -, unif # Frame1 offset/weight unused ++-asr rb15, r0, r2 # Compute offset from MSBs ++-shl r0, r0, r2 ++-asr rb14, r0, r2 # Compute weight from LSBs ++- ++-# r3 is loop counter +++ mov r0, unif ; mov r1,1 # Packed filter offsets, unpack into ra8... (to be used for vertical context later) +++ asr ra9, r0, rb23; mul24 r0, r0, ra22 # my2 +++ asr ra8, r0, rb23; mul24 r0, r0, ra22 # mx2 +++ asr.ifz ra9, r0, rb23; mul24 r0, r0, ra22 # my:my2 +++ asr.ifz ra8, r0, rb23 # mx:mx2 +++ sub ra9,3,ra9 +++ sub ra8,3,ra8 +++ shl ra9,ra9,3 # Scale up by 8 +++ shl ra8,ra8,3 # Scale up by 8 +++# Now if we want aligned we have a mul of 1, so put 0 coefficients at the top +++ mov r1,0xffff00 +++ shl r0, r1, ra8 +++ asr ra0, r0, rb23 +++ shl r0, r1, ra9 +++ asr rb4, r0, rb23 +++ +++ mov r1,0x1040400 +++ shl r0, r1, ra8 +++ asr ra1, r0, rb23 +++ shl r0, r1, ra9 +++ asr rb5, r0, rb23 +++ +++ mov r1,0xfbf5f600 +++ shl r0, r1, ra8 +++ asr ra2, r0, rb23 +++ shl r0, r1, ra9 +++ asr rb6, r0, rb23 +++ +++ mov r1,0x11283a40 +++ shl r0, r1, ra8 +++ asr ra3, r0, rb23 +++ shl r0, r1, ra9 +++ asr rb7, r0, rb23 +++ +++ mov r1,0x3a281100 +++ shl r0, r1, ra8 +++ asr ra4, r0, rb23 +++ shl r0, r1, ra9 +++ asr rb8, r0, rb23 +++ +++ mov r1,0xf6f5fb00 +++ shl r0, r1, ra8 +++ asr ra5, r0, rb23 +++ shl r0, r1, ra9 +++ asr rb9, r0, rb23 +++ +++ mov r1,0x4040100 +++ shl r0, r1, ra8 +++ asr ra6, r0, rb23 +++ shl r0, r1, ra9 +++ asr rb10, r0, rb23 +++ +++ mov r1,0xffff0000 +++ shl r0, r1, ra8 +++ asr ra7, r0, rb23 +++ shl r0, r1, ra9 +++ asr rb11, r0, rb23 +++ +++# Extract weighted prediction information +++ mov r0, unif # offset/weight TODO move up +++ asr rb15, r0, r3 # Compute offset from MSBs +++ bra -, ra31 +++ shl r0, r0, r3 # Delay 1 +++ asr r0, r0, r3 ; mov r3, 0 # Compute weight from LSBs and reset loop counter Delay 2 +++ shl rb14, r0, 8 # Use a larger shift to avoid unsigned multiply problem Delay 3 ++ ++-# retrieve texture results and pick out bytes ++-# then submit two more texture requests +++################################################################################ +++# mc_filter(y_x, frame_base, y2_x2, frame_base2, width_height, my2_mx2_my_mx, offsetweight0, this_dst, next_kernel) +++# In a P block, y2_x2 should be y_x+8 +++# At this point we have already issued two pairs of texture requests for the current block ++ ++-mov r3, 0 +++::mc_filter ++ ++ :yloop ++ # retrieve texture results and pick out bytes ++@@ -858,91 +892,90 @@ mov r3, 0 ++ # If we knew there was no clipping then this code would get simpler. ++ # Perhaps we could add on the pitch and clip using larger values? ++ ++-sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 ++-mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-shr r1, r4, rx_xshift2 ++-mov.ifz ra_y2, ra_y2_next +++ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++ shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu1 +++ mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++ shr r1, r4, rx_xshift2 +++ mov.ifz ra_y2, ra_y2_next ++ ++-max r2, ra_y, 0 # y ++-min r2, r2, rb_frame_height_minus_1 ++-add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++- ++-max r2, ra_y2, 0 # y ++-min r2, r2, rb_frame_height_minus_1 ++-add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 +++ max r2, ra_y, 0 # y +++ min r2, r2, rb_frame_height_minus_1 +++ add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++ add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++ +++ max r2, ra_y2, 0 # y +++ min r2, r2, rb_frame_height_minus_1 +++ add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++ add t1s, ra_frame_base2, r2 ; v8subs r1, r1, rb20 ++ ++ # generate seven shifted versions ++ # interleave with scroll of vertical context ++ ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ ++ # apply horizontal filter ++-nop ; mul24 r2, r0, ra0 ++-nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-nop ; mul24 r3, ra1 << 1, r0 << 1 ++-nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-add r0, r2, r3 ; mov r3, rb31 ++-sub.setf -, r3, 8 ; mov ra12, ra13 ++-mov ra9, ra10 ++-mov ra10, ra11 ++-mov ra11, ra12 ++-mov ra12, ra13 ++-brr.anyn -, r:yloop ++-mov ra13, ra14 # Delay slot 1 ++-mov ra14, ra15 # Delay slot 2 ++-mov ra15, r0 # Delay slot 3 +++ nop ; mul24 r2, r0, ra0 +++ nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++ nop ; mul24 r3, ra1 << 1, r0 << 1 +++ nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++ add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++ add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++ add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++ nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++ add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++ nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++ add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++ nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++ add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++ nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++ add r0, r2, r3 ; mov r3, rb31 +++ sub.setf -, r3, 8 ; mov ra8, ra9 +++ mov ra9, ra10 +++ mov ra10, ra11 +++ mov ra11, ra12 +++ mov ra12, ra13 +++ brr.anyn -, r:yloop +++ mov ra13, ra14 # Delay slot 1 +++ mov ra14, ra15 # Delay slot 2 +++ mov ra15, r0 # Delay slot 3 ++ ++ # apply vertical filter and write to VPM ++ ++-nop ; mul24 r1, ra14, rb10 ++-nop ; mul24 r0, ra13, rb9 ++-add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-add r1, r1, r0 ; mul24 r0, ra8, rb4 ++-add r1, r1, r0 ; mul24 r0, ra9, rb5 ++-add r1, r1, r0 ; mul24 r0, ra10, rb6 ++-add r1, r1, r0 ; mul24 r0, ra11, rb7 ++- ++-add r1, r1, r0 ; mov -, vw_wait ++-sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-asr r1, r1, 14 ++-nop ; mul24 r1, r1, rb14 ++-add r1, r1, rb12 ++-asr r1, r1, rb13 ++-brr.anyn -, r:yloop ++-add r1, r1, rb15 # Delay 1 ++-min r1, r1, rb22 # Delay 2 ++-max vpm, r1, 0 # Delay 3 +++ nop ; mul24 r1, ra14, rb10 +++ nop ; mul24 r0, ra13, rb9 +++ add r1, r1, r0 ; mul24 r0, ra12, rb8 +++ add r1, r1, r0 ; mul24 r0, ra15, rb11 +++ add r1, r1, r0 ; mul24 r0, ra8, rb4 +++ add r1, r1, r0 ; mul24 r0, ra9, rb5 +++ add r1, r1, r0 ; mul24 r0, ra10, rb6 +++ add r1, r1, r0 ; mul24 r0, ra11, rb7 +++ +++ add r1, r1, r0 ; mov -, vw_wait +++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++ asr r1, r1, 14 +++ nop ; mul24 r1, r1, rb14 +++ add r1, r1, rb12 +++ asr r1, r1, rb13 +++ brr.anyn -, r:yloop +++ add r1, r1, rb15 # Delay 1 +++ min r1, r1, rb22 # Delay 2 +++ max vpm, r1, 0 # Delay 3 ++ ++ # DMA out ++ ++-bra -, ra31 ++-mov vw_setup, rb26 # VDW setup 0 Delay 1 ++-mov vw_setup, rb29 # Stride Delay 2 ++-mov vw_addr, unif # start the VDW Delay 3 +++ brr -, r:per_block_setup +++ mov vw_setup, rb26 # VDW setup 0 Delay 1 +++ mov vw_setup, rb29 # Stride Delay 2 +++ mov vw_addr, unif # start the VDW Delay 3 ++ ++ ++ ++ ################################################################################ ++ ++-# mc_filter_b(next_kernel, x, y, frame_base, x2, y2, frame_base2, width_height, hcoeffs[0], hcoeffs2[0], hcoeffs[1], hcoeffs2[1], vcoeffs[0], vcoeffs2[0], vcoeffs[1], vcoeffs2[1], offsetweight0, offsetweight1, this_dst) +++# mc_filter_b(y_x, frame_base, y2_x2, frame_base2, width_height, my2_mx2_my_mx, offsetweight0, this_dst, next_kernel) ++ # In a P block, only the first half of coefficients contain used information. ++ # At this point we have already issued two pairs of texture requests for the current block ++ # May be better to just send 16.16 motion vector and figure out the coefficients inside this block (only 4 cases so can compute hcoeffs in around 24 cycles?) ++@@ -952,92 +985,6 @@ mov vw_addr, unif # start the VDW Delay 3 ++ # Or possibly by taking advantage of symmetry? ++ # From 19->7 32bits per command. ++ ::mc_filter_b ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-mov ra31, unif ++- ++-# per-channel shifts were calculated on the *previous* invocation ++- ++-mov ra_xshift, ra_xshift_next ++-mov rx_xshift2, rx_xshift2_next ++- ++-# get base addresses and per-channel shifts for *next* invocation ++-add r0, unif, elem_num # Load x ++-max r0, r0, 0; mov r1, unif # Load y ++-min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base ++-shl ra_xshift_next, r0, 3 # Compute shifts ++-mov ra_y_next, r1 ++-and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate ++-add ra_frame_base_next, r2, r0 # r2 is address for frame0 (not including y offset) ++- ++-add r0, unif, elem_num # Load x ++-max r0, r0, 0 ; mov r1, unif # Load y ++-min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base ++-shl rx_xshift2_next, r0, 3 # Compute shifts ++-add ra_y2_next, r1, 1 ++-and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate ++-add rx_frame_base2_next, r2, r0 # r2 is address for frame1 (not including y offset) ++- ++- ++-# set up VPM write ++-mov vw_setup, rb28 ++- ++-# get width,height of block ++-mov r2, 16 ++-mov r0, unif ++-shr r1, r0, r2 # Extract width ++-sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) ++-and r0, r0, rb22 # Extract height ++-add rb17, r0, 5 ++-add rb18, r0, 7 ++-shl r0, r0, 7 ++-add r0, r0, r1 # Combine width and height of destination area ++-shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++-add rb26, r0, rb27 ++- ++-# get filter coefficients and discard unused B frame values ++-mov r0, unif ++-mov r1, 1 ++-mov.ifnz r0, unif # Alternate coefficients are unused for P frames ++-nop ; mul24 r0, r0 << 13, r1 << 13 ++-asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-nop ; mul24 r0, r0 << 14, r1 << 14 ++-asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-nop ; mul24 r0, r0 << 15, r1 << 15 # Adjust such that a rotate of 1 will produce the values with first 8 on left, second 8 on right ++-asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-asr ra0, r0, rb23; mov r0, unif ++-mov.ifnz r0, unif ++-nop ; mul24 r0, r0 << 9, r1 << 9 ++-asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-nop ; mul24 r0, r0 << 10, r1 << 10 ++-asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-nop ; mul24 r0, r0 << 11, r1 << 11 ++-asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-nop ; mul24 r0, r0 << 12, r1 << 12 ++-asr ra4, r0, rb23; mov r0, unif ++-mov.ifnz r0, unif ++-asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-asr rb8, r0, rb23; mov r0, unif ++-mov.ifnz r0, unif ++-asr rb7, r0, rb23; mul24 r0, r0, ra22 ++-asr rb6, r0, rb23; mul24 r0, r0, ra22 ++-asr rb5, r0, rb23; mul24 r0, r0, ra22 ++-asr rb4, r0, rb23 ++- ++-mov r0, unif # Frame0 offset/weight ++-mov.ifnz r0, unif # Frame1 offset/weight unused ++-asr rb15, r0, r2 # Compute offset from MSBs ++-shl r0, r0, r2 ++-asr rb14, r0, r2 # Compute weight from LSBs ++- ++-# r3 is loop counter ++- ++-# retrieve texture results and pick out bytes ++-# then submit two more texture requests ++- ++-mov r3, 0 ++- ++ :yloopb ++ # retrieve texture results and pick out bytes ++ # then submit two more texture requests ++@@ -1045,111 +992,123 @@ mov r3, 0 ++ # If we knew there was no clipping then this code would get simpler. ++ # Perhaps we could add on the pitch and clip using larger values? ++ ++-sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 ++-mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-shr r1, r4, rx_xshift2 ++-mov.ifz ra_y2, ra_y2_next +++ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++ shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu1 +++ mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++ shr r1, r4, rx_xshift2 +++ mov.ifz ra_y2, ra_y2_next ++ ++-max r2, ra_y, 0 # y ++-min r2, r2, rb_frame_height_minus_1 ++-add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++- ++-max r2, ra_y2, 0 # y ++-min r2, r2, rb_frame_height_minus_1 ++-add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 +++ max r2, ra_y, 0 # y +++ min r2, r2, rb_frame_height_minus_1 +++ add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++ add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++ +++ max r2, ra_y2, 0 # y +++ min r2, r2, rb_frame_height_minus_1 +++ add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++ add t1s, ra_frame_base2, r2 ; v8subs r1, r1, rb20 ++ ++ # generate seven shifted versions ++ # interleave with scroll of vertical context ++ ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ ++ # apply horizontal filter ++-nop ; mul24 r2, r0, ra0 ++-nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-nop ; mul24 r3, ra1 << 1, r0 << 1 ++-nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-add r0, r2, r3 ; mov r3, rb31 ++-sub.setf -, r3, 8 ; mov ra12, ra13 ++-mov ra9, ra10 ++-mov ra10, ra11 ++-mov ra11, ra12 ++-mov ra12, ra13 ++-brr.anyn -, r:yloopb ++-mov ra13, ra14 # Delay slot 1 ++-mov ra14, ra15 # Delay slot 2 ++-mov ra15, r0 # Delay slot 3 ++- ++-# apply vertical filter and write to VPM ++- ++-nop ; mul24 r1, ra14, rb10 ++-nop ; mul24 r0, ra13, rb9 ++-add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-add r1, r1, r0 ; mul24 r0, ra8, rb4 ++-add r1, r1, r0 ; mul24 r0, ra9, rb5 ++-add r1, r1, r0 ; mul24 r0, ra10, rb6 ++-add r1, r1, r0 ; mul24 r0, ra11, rb7 ++- ++-add r1, r1, r0 ; mov -, vw_wait ++-sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-asr r1, r1, 14 ++-nop ; mul24 r1, r1 << 8, ra20 << 8 # Rotate to align left and right halves ++-add r1, r1, ra30 ; mul24 r0, r1, rb14 ++-add r1, r1, r0 ++-brr.anyn -, r:yloopb ++-asr r1, r1, 7 # Delay 1 ++-min r1, r1, rb22 # Delay 2 ++-max vpm, r1, 0 # Delay 3 +++ nop ; mul24 r2, r0, ra0 +++ nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++ nop ; mul24 r3, ra1 << 1, r0 << 1 +++ nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++ add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++ add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++ add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++ nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++ add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++ nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++ add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++ nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++ add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++ nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++ add r0, r2, r3 ; mov r3, rb31 +++ sub.setf -, r3, 8 ; mov ra8, ra9 +++ mov ra9, ra10 +++ mov ra10, ra11 +++ mov ra11, ra12 +++ mov ra12, ra13 +++ brr.anyn -, r:yloopb +++ mov ra13, ra14 # Delay slot 1 +++ mov ra14, ra15 # Delay slot 2 +++ mov ra15, r0 # Delay slot 3 +++ +++ # apply vertical filter and write to VPM +++ +++ nop ; mul24 r1, ra14, rb10 +++ nop ; mul24 r0, ra13, rb9 +++ add r1, r1, r0 ; mul24 r0, ra12, rb8 +++ add r1, r1, r0 ; mul24 r0, ra15, rb11 +++ add r1, r1, r0 ; mul24 r0, ra8, rb4 +++ add r1, r1, r0 ; mul24 r0, ra9, rb5 +++ add r1, r1, r0 ; mul24 r0, ra10, rb6 +++ add r1, r1, r0 ; mul24 r0, ra11, rb7 +++ +++ add r1, r1, r0 ; mov -, vw_wait +++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++ asr r0, r1, 14 +++ asr r1, r1, 6 # Wait state so we can use the rotate instruction +++ nop ; mul24 r0, r0 << 8, ra22 << 8 # Rotate to align left and right halves +++ add r1, r1, ra18 +++ add r1, r1, r0 +++ brr.anyn -, r:yloopb +++ asr r1, r1, 15 # Delay 1 +++ min r1, r1, rb22 # Delay 2 +++ max vpm, r1, 0 # Delay 3 ++ ++ # DMA out ++-bra -, ra31 ++-mov vw_setup, rb26 # VDW setup 0 Delay 1 ++-mov vw_setup, rb29 # Stride Delay 2 ++-mov vw_addr, unif # start the VDW Delay 3 +++ brr -, r:per_block_setup +++ mov vw_setup, rb26 # VDW setup 0 Delay 1 +++ mov vw_setup, rb29 # Stride Delay 2 +++ mov vw_addr, unif # start the VDW Delay 3 ++ ++ ################################################################################ ++ ++ # mc_interrupt_exit12() ++ ::mc_interrupt_exit12 ++-mov -, vw_wait # wait on the VDW ++- ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++- ++-mov -,sacq(0) # 1 ++-mov -,sacq(0) # 2 ++-mov -,sacq(0) # 3 ++-mov -,sacq(0) # 4 ++-mov -,sacq(0) # 5 ++-mov -,sacq(0) # 6 ++-mov -,sacq(0) # 7 ++-mov -,sacq(0) # 8 ++-mov -,sacq(0) # 9 ++-mov -,sacq(0) # 10 ++-mov -,sacq(0) # 11 ++- ++-nop ; nop ; thrend ++-mov interrupt, 1; nop # delay slot 1 ++-nop ; nop # delay slot 2 +++ mov -, vw_wait # wait on the VDW +++ +++ ldtmu0 +++ ldtmu0 +++ ldtmu1 +++ ldtmu1 +++ +++ mov -,sacq(0) # 1 +++ mov -,sacq(0) # 2 +++ mov -,sacq(0) # 3 +++ mov -,sacq(0) # 4 +++ mov -,sacq(0) # 5 +++ mov -,sacq(0) # 6 +++ mov -,sacq(0) # 7 +++ mov -,sacq(0) # 8 +++ mov -,sacq(0) # 9 +++ mov -,sacq(0) # 10 +++ mov -,sacq(0) # 11 +++ +++ nop ; nop ; thrend +++ mov interrupt, 1; nop # delay slot 1 +++ nop ; nop # delay slot 2 +++ +++ +++::mc_exit1 +++ mov -, vw_wait # wait on the VDW +++ +++ ldtmu0 +++ ldtmu1 +++ ldtmu0 +++ ldtmu1 +++ nop ; nop ; thrend +++ mov interrupt, 1; nop # delay slot 1 +++ nop ; nop # delay slot 2 ++ ++ ++ ::mc_end ++-- ++2.7.4 ++ ++ ++From f02ec34c772aad3caa17432c6a4860f9ed0d5dc6 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Tue, 2 Jun 2015 10:58:25 +0100 ++Subject: [PATCH 48/68] Added option to simulate QPUs ++ ++--- ++ libavcodec/hevc.c | 288 +++++++++++++++++++++++++++++++++++++++++++-- ++ libavcodec/rpi_qpu.c | 24 ++-- ++ libavcodec/rpi_shader.qasm | 6 +- ++ 3 files changed, 295 insertions(+), 23 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 2da88ec..34d92e2 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -56,6 +56,8 @@ ++ // Define RPI_CACHE_UNIF_MVS to write motion vector uniform stream to cached memory ++ // RPI_CACHE_UNIF_MVS doesn't seem to make much difference, so left undefined. ++ +++ // Define RPI_SIMULATE_QPUS for debugging to run QPU code on the ARMs +++ //#define RPI_SIMULATE_QPUS ++ ++ #endif ++ ++@@ -124,7 +126,6 @@ static void pic_arrays_free(HEVCContext *s) ++ ++ #ifdef EARLY_MALLOC ++ #else ++- printf("pic_arrays_free\n"); ++ if (s->coeffs_buf_arm[0]) { ++ gpu_free(&s->coeffs_buf_default); ++ s->coeffs_buf_arm[0] = 0; ++@@ -174,11 +175,9 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ #ifdef RPI ++ #ifdef EARLY_MALLOC ++ #else ++- assert(sps); +++ av_assert0(sps); ++ int coeffs_in_ctb = (1 << sps->log2_ctb_size) * (1 << sps->log2_ctb_size); ++ int coefs_per_row = sps->ctb_width * coeffs_in_ctb * 3; // Allow space for chroma ++- printf("pic_arrays_init\n"); ++- printf("Allocated %d\n",coefs_per_row); ++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row, &s->coeffs_buf_default); ++ s->coeffs_buf_arm[0] = (int16_t*) s->coeffs_buf_default.arm; ++ if (!s->coeffs_buf_arm[0]) ++@@ -2988,6 +2987,274 @@ static void rpi_inter_clear(HEVCContext *s) ++ #endif ++ } ++ +++ +++#ifdef RPI_SIMULATE_QPUS +++ +++static int32_t clipx(int x,int FRAME_WIDTH) +++{ +++ if (x<=0) return 0; +++ if (x>=FRAME_WIDTH) return FRAME_WIDTH-1; +++ return x; +++} +++ +++static int32_t clipy(int y,int FRAME_HEIGHT) +++{ +++ if (y<=0) return 0; +++ if (y>=FRAME_HEIGHT) return FRAME_HEIGHT-1; +++ return y; +++} +++ +++/*static int32_t filter8(uint8_t *data, int x0, int y0, int pitch, int mx, int my,int round,int denom,int weight,int offset) +++{ +++ int32_t vsum = 0; +++ int x, y; +++ +++ for (y = 0; y < 8; y++) { +++ int32_t hsum = 0; +++ +++ for (x = 0; x < 8; x++) +++ hsum += lumaFilter[mx][x]*data[clipx(x + x0) + clipy(y + y0) * pitch]; +++ +++ vsum += lumaFilter[my][y]*hsum; +++ } +++ vsum >>= 6; +++ vsum = (((vsum*weight)+round)>>denom)+offset; +++ +++ return av_clip_uint8( vsum ); +++}*/ +++ +++static int32_t filter8_chroma(uint8_t *data, int x0, int y0, int pitch, int hcoeffs, int vcoeffs,int offset_weight,int offset_before,int denom,int pic_width, int pic_height) +++{ +++ int32_t vsum = 0; +++ int x, y; +++ int chromaFilterH[4]; +++ int chromaFilterV[4]; +++ int i; +++ int offset_after = offset_weight>>16; +++ int weight = (offset_weight<<16)>>16; +++ for(i=0;i<4;i++) { +++ chromaFilterH[i] = ((hcoeffs>>(8*i))<<24)>>24; +++ chromaFilterV[i] = ((vcoeffs>>(8*i))<<24)>>24; +++ } +++ +++ for (y = 0; y < 4; y++) { +++ int32_t hsum = 0; +++ +++ for (x = 0; x < 4; x++) +++ hsum += chromaFilterH[x]*data[clipx(x + x0,pic_width) + clipy(y + y0,pic_height) * pitch]; +++ +++ vsum += chromaFilterV[y]*hsum; +++ } +++ vsum >>= 6; +++ vsum = (((vsum*weight)+offset_before)>>denom)+offset_after; +++ +++ return vsum; +++} +++ +++int lumaFilter[4][8]={ {0,0,0,64,0,0,0,0},{-1,4,-10,58,17,-5,1,0},{-1,4,-11,40,40,-11,4,-1},{0,1,-5,17,58,-10,4,-1} }; +++ +++static int32_t filter8_luma(uint8_t *data, int x0, int y0, int pitch, int my_mx,int offset_weight,int offset_before,int denom,int pic_width, int pic_height) +++{ +++ int32_t vsum = 0; +++ int x, y; +++ int i; +++ int offset_after = offset_weight>>16; +++ int weight = (offset_weight<<16)>>16; +++ +++ for (y = 0; y < 8; y++) { +++ int32_t hsum = 0; +++ +++ for (x = 0; x < 8; x++) +++ hsum += lumaFilter[my_mx&3][x]*data[clipx(x + x0,pic_width) + clipy(y + y0,pic_height) * pitch]; +++ +++ vsum += lumaFilter[(my_mx>>8)&3][y]*hsum; +++ } +++ vsum >>= 6; +++ vsum = (((vsum*weight)+offset_before)>>denom)+offset_after; +++ +++ return vsum; +++} +++ +++static uint8_t *test_frame(HEVCContext *s,uint32_t p, AVFrame *frame, int cIdx) +++{ +++ //int pic_width = s->ps.sps->width >> s->ps.sps->hshift[cIdx]; +++ int pic_height = s->ps.sps->height >> s->ps.sps->vshift[cIdx]; +++ int pitch = frame->linesize[cIdx]; +++ uint32_t base = get_vc_address(frame->buf[cIdx]); +++ if (p>=base && pdata[cIdx] + (p-base); +++ } +++ return NULL; +++} +++ +++static uint8_t *compute_arm_addr(HEVCContext *s,uint32_t p, int cIdx) +++{ +++ SliceHeader *sh = &s->sh; +++ uint8_t *arm = test_frame(s,p,s->frame,cIdx); +++ int i; +++ if (arm) return arm; +++ if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) +++ { +++ for(i=0;inb_refs[L0];i++) { +++ arm = test_frame(s,p,s->ref->refPicList[0].ref[i]->frame,cIdx); +++ if (arm) return arm; +++ } +++ } +++ if (sh->slice_type == B_SLICE) { +++ for(i=0;inb_refs[L1];i++) { +++ arm = test_frame(s,p,s->ref->refPicList[1].ref[i]->frame,cIdx); +++ if (arm) return arm; +++ } +++ } +++ printf("Frame 0x%x not found! Exit=%x\n",p,qpu_get_fn(QPU_MC_EXIT)); +++ exit(-1); +++ return NULL; +++} +++ +++static void rpi_simulate_inter_chroma(HEVCContext *s,uint32_t *p) +++{ +++ uint32_t next_kernel; +++ uint32_t x0; +++ uint32_t y0; +++ uint8_t *ref_u_base; +++ uint8_t *ref_v_base; +++ uint32_t frame_width = p[5]; +++ uint32_t frame_height = p[6]; +++ uint32_t pitch = p[7]; +++ uint32_t dst_pitch = p[8]; +++ int32_t offset_before = p[9]; +++ int32_t denom = p[10]; +++ uint32_t vpm_id = p[11]; +++ uint32_t tmp_u_dst[256]; +++ uint32_t tmp_v_dst[256]; +++ while(1) { +++ p += 12; +++ next_kernel = p[0-12]; +++ x0 = p[1-12]; +++ y0 = p[2-12]; +++ if (next_kernel==s->mc_filter_uv || next_kernel==s->mc_filter_uv_b0 || next_kernel==s->mc_filter_uv_b) { +++ int x,y; +++ uint32_t width_height = p[5]; +++ uint32_t hcoeffs = p[6]; +++ uint32_t vcoeffs = p[7]; +++ uint32_t offset_weight_u = p[8]; +++ uint32_t offset_weight_v = p[9]; +++ uint8_t *this_u_dst; +++ uint8_t *this_v_dst; +++ uint32_t width = width_height >> 16; +++ uint32_t height = (width_height << 16) >> 16; +++ ref_u_base = compute_arm_addr(s,p[3-12],1); +++ ref_v_base = compute_arm_addr(s,p[4-12],2); +++ if (next_kernel!=s->mc_filter_uv_b0) +++ { +++ this_u_dst = compute_arm_addr(s,p[10],1); +++ this_v_dst = compute_arm_addr(s,p[11],2); +++ } +++ for (y=0; ymc_filter_uv) { +++ int32_t refa = filter8_chroma(ref_u_base,x+x0, y+y0, pitch, hcoeffs, vcoeffs, offset_weight_u,offset_before,denom,frame_width,frame_height); +++ int32_t refb = filter8_chroma(ref_v_base,x+x0, y+y0, pitch, hcoeffs, vcoeffs, offset_weight_v,offset_before,denom,frame_width,frame_height); +++ this_u_dst[x+y*dst_pitch] = av_clip_uint8(refa); +++ this_v_dst[x+y*dst_pitch] = av_clip_uint8(refb); +++ } else if (next_kernel==s->mc_filter_uv_b0) { +++ int32_t refa = filter8_chroma(ref_u_base, x+x0, y+y0, pitch, hcoeffs, vcoeffs, 1,0,0,frame_width,frame_height); +++ int32_t refb = filter8_chroma(ref_v_base, x+x0, y+y0, pitch, hcoeffs, vcoeffs, 1,0,0,frame_width,frame_height); +++ tmp_u_dst[x+y*16] = refa; +++ tmp_v_dst[x+y*16] = refb; +++ } else { +++ int32_t refa = filter8_chroma(ref_u_base, x+x0, y+y0, pitch, hcoeffs, vcoeffs, 1, 64 + tmp_u_dst[x+y*16], 7, frame_width, frame_height); +++ int32_t refb = filter8_chroma(ref_v_base, x+x0, y+y0, pitch, hcoeffs, vcoeffs, 1, 64 + tmp_v_dst[x+y*16], 7, frame_width, frame_height); +++ this_u_dst[x+y*dst_pitch] = av_clip_uint8(refa); +++ this_v_dst[x+y*dst_pitch] = av_clip_uint8(refb); +++ } +++ } +++ } +++ } else { +++ av_assert0(next_kernel==qpu_get_fn(QPU_MC_INTERRUPT_EXIT8) || next_kernel==qpu_get_fn(QPU_MC_EXIT) ); +++ break; +++ } +++ } +++} +++ +++// mc_setup(y_x, ref_y_base, y2_x2, ref_y2_base, frame_width_height, pitch, dst_pitch, offset_shift, next_kernel) +++static void rpi_simulate_inter_luma(HEVCContext *s,uint32_t *p) +++{ +++ uint32_t next_kernel; +++ int y_x,y2_x2; +++ uint32_t x0; +++ uint32_t y0; +++ uint32_t x2; +++ uint32_t y2; +++ uint8_t *ref_y_base; +++ uint8_t *ref_y2_base; +++ uint32_t frame_width_height = p[4]; +++ uint32_t frame_width = frame_width_height>>16; +++ uint32_t frame_height = (frame_width_height<<16)>>16; +++ uint32_t pitch = p[5]; +++ uint32_t dst_pitch = p[6]; +++ int offset_shift = p[7]; +++ int32_t offset_before = offset_shift>>16; +++ int32_t denom = (offset_shift<<16)>>16; +++ while(1) { +++ p += 9; +++ next_kernel = p[8-9]; +++ y_x = p[0-9]; +++ x0 = (y_x<<16)>>16; +++ y0 = y_x>>16; +++ y2_x2 = p[2-9]; +++ x2 = (y2_x2<<16)>>16; +++ y2 = y2_x2>>16; +++ +++ if (next_kernel==s->mc_filter || next_kernel==s->mc_filter_b) { +++ // y_x, frame_base, y2_x2, frame_base2, width_height, my2_mx2_my_mx, offsetweight0, this_dst, next_kernel) +++ int x,y; +++ uint32_t width_height = p[4]; +++ uint32_t my2_mx2_my_mx = p[5]; +++ uint32_t offset_weight = p[6]; +++ uint8_t *this_dst = compute_arm_addr(s,p[7],0); +++ uint32_t width = width_height >> 16; +++ uint32_t height = (width_height << 16) >> 16; +++ ref_y_base = compute_arm_addr(s,p[1-9],0); +++ ref_y2_base = compute_arm_addr(s,p[3-9],0); +++ for (y=0; ymc_filter) { +++ int32_t refa = filter8_luma(ref_y_base,x+x0, y+y0, pitch, my2_mx2_my_mx, offset_weight,offset_before,denom,frame_width,frame_height); +++ this_dst[x+y*dst_pitch] = av_clip_uint8(refa); +++ } +++ else { +++ int32_t refa = filter8_luma(ref_y_base, x+x0, y+y0, pitch, my2_mx2_my_mx, 1, 0, 0, frame_width, frame_height); +++ int32_t refb = filter8_luma(ref_y2_base, x+x2, y+y2, pitch, my2_mx2_my_mx>>16, 1, 64 + refa, 7, frame_width, frame_height); +++ this_dst[x+y*dst_pitch] = av_clip_uint8(refb); +++ } +++ } +++ } +++ } else { +++ av_assert0(next_kernel==qpu_get_fn(QPU_MC_INTERRUPT_EXIT12) || next_kernel==qpu_get_fn(QPU_MC_EXIT) ); +++ break; +++ } +++ } +++} +++ +++static void rpi_simulate_inter_qpu(HEVCContext *s) +++{ +++ // First run the transform as normal +++ int i; +++ rpi_execute_transform(s); +++ for(i=0;i<8;i++) +++ { +++ rpi_simulate_inter_chroma(s,s->mvs_base[i]); +++ } +++ for(i=0;i<12;i++) +++ { +++ rpi_simulate_inter_luma(s,s->y_mvs_base[i]); +++ } +++} +++ +++#endif +++ +++ ++ static void rpi_execute_inter_qpu(HEVCContext *s) ++ { ++ int k; ++@@ -3006,7 +3273,7 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+4] = qpu_get_fn(QPU_MC_SETUP_UV); // Also need a dummy for V ++- assert(s->u_mvs[k] - s->mvs_base[k] < UV_COMMANDS_PER_QPU); +++ av_assert0(s->u_mvs[k] - s->mvs_base[k] < UV_COMMANDS_PER_QPU); ++ } ++ ++ s->u_mvs[8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore ++@@ -3016,11 +3283,16 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+1] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined ++ s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // Also need a dummy for second request ++ s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+8] = qpu_get_fn(QPU_MC_EXIT); // Add exit command ++- assert(s->y_mvs[k] - s->y_mvs_base[k] < Y_COMMANDS_PER_QPU); +++ av_assert0(s->y_mvs[k] - s->y_mvs_base[k] < Y_COMMANDS_PER_QPU); ++ } ++ s->y_mvs[12-1][-RPI_LUMA_COMMAND_WORDS+8] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT12); // This QPU will signal interrupt when all others are done and have acquired a semaphore ++ #endif ++ +++#ifdef RPI_SIMULATE_QPUS +++ rpi_simulate_inter_qpu(s); +++ s->vpu_id = -1; +++ return; +++#endif ++ ++ #ifdef RPI_MULTI_MAILBOX ++ #ifdef RPI_CACHE_UNIF_MVS ++@@ -3101,7 +3373,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ && s->ps.pps->num_tile_rows <= 1 && s->ps.pps->num_tile_columns <= 1; ++ #endif ++ ++- /*if (!s->enable_rpi) { +++ if (!s->enable_rpi) { ++ if (s->ps.pps->cross_component_prediction_enabled_flag) ++ printf("Cross component\n"); ++ if (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1) ++@@ -3110,7 +3382,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ printf("Weighted P slice\n"); ++ if (s->ps.pps->weighted_bipred_flag && s->sh.slice_type == B_SLICE) ++ printf("Weighted B slice\n"); ++- }*/ +++ } ++ ++ #endif ++ ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index e12304b..4480f72 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -13,7 +13,7 @@ ++ #include ++ #include ++ #include ++-#include +++#include "libavutil/avassert.h" ++ ++ #include "config.h" ++ ++@@ -160,13 +160,13 @@ static int gpu_init(volatile struct GPU **gpu) { ++ // Now copy over the QPU code into GPU memory ++ { ++ int num_bytes = qpu_get_fn(QPU_MC_END) - qpu_get_fn(QPU_MC_SETUP_UV); ++- assert(num_bytes<=QPU_CODE_SIZE*sizeof(unsigned int)); +++ av_assert0(num_bytes<=QPU_CODE_SIZE*sizeof(unsigned int)); ++ memcpy((void*)ptr->qpu_code, rpi_shader, num_bytes); ++ } ++ // And the VPU code ++ { ++ int num_bytes = sizeof(rpi_hevc_transform); ++- assert(num_bytes<=VPU_CODE_SIZE*sizeof(unsigned int)); +++ av_assert0(num_bytes<=VPU_CODE_SIZE*sizeof(unsigned int)); ++ memcpy((void*)ptr->vpu_code, rpi_hevc_transform, num_bytes); ++ } ++ // And the transform coefficients ++@@ -216,13 +216,13 @@ static void gpu_unlock(void) { ++ static int gpu_malloc_uncached_internal(int numbytes, GPU_MEM_PTR_T *p, int mb) { ++ p->numbytes = numbytes; ++ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); ++- assert(p->vcsm_handle); +++ av_assert0(p->vcsm_handle); ++ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); ++- assert(p->vc_handle); +++ av_assert0(p->vc_handle); ++ p->arm = vcsm_lock(p->vcsm_handle); ++- assert(p->arm); +++ av_assert0(p->arm); ++ p->vc = mem_lock(mb, p->vc_handle); ++- assert(p->vc); +++ av_assert0(p->vc); ++ return 0; ++ } ++ ++@@ -243,7 +243,7 @@ int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p) ++ ++ int gpu_get_mailbox(void) ++ { ++- assert(gpu); +++ av_assert0(gpu); ++ return gpu->mb; ++ } ++ ++@@ -297,13 +297,13 @@ static int gpu_malloc_cached_internal(int numbytes, GPU_MEM_PTR_T *p) { ++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_VC, (char *)"Video Frame" ); ++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); ++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST_AND_VC, (char *)"Video Frame" ); ++- assert(p->vcsm_handle); +++ av_assert0(p->vcsm_handle); ++ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); ++- assert(p->vc_handle); +++ av_assert0(p->vc_handle); ++ p->arm = vcsm_lock(p->vcsm_handle); ++- assert(p->arm); +++ av_assert0(p->arm); ++ p->vc = mem_lock(gpu->mb, p->vc_handle); ++- assert(p->vc); +++ av_assert0(p->vc); ++ return 0; ++ } ++ ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 60d1ec2..0686249 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -149,8 +149,8 @@ add t0s, r0, r1 ; mov ra_frame_base, r2 ++ add t1s, r2, r1 ++ ++ mov r2,8 ++-shl rb12,unif, r2 # offset before shift ++-add rb13,unif,r2 # offset after shift +++shl rb12,unif,r2 # offset before shift +++add rb13,unif,r2 # denominator ++ ++ # Compute part of VPM to use for DMA output ++ mov r2, unif ++@@ -185,7 +185,7 @@ add t1s, r1, ra_frame_base ++ ++ ################################################################################ ++ ++-# mc_filter_uv(next_kernel, x, y, frame_u_base, frame_v_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_u_dst, this_v_dst) +++# mc_filter_uv(next_kernel, x, y, frame_u_base, frame_v_base, width_height, hcoeffs, vcoeffs, offset_weight_u, offset_weight_v, this_u_dst, this_v_dst) ++ ++ # At this point we have already issued two pairs of texture requests for the current block ++ # ra_x, ra_x16_base point to the current coordinates for this block ++-- ++2.7.4 ++ ++ ++From 8bdf6b06c612ff4971c2ce99a62d093cf92468ca Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Tue, 2 Jun 2015 13:17:50 +0100 ++Subject: [PATCH 49/68] Increased motion vector memory and fixed block size ++ computation for non-multiple of 2 block sizes ++ ++--- ++ libavcodec/hevc.c | 50 +++++++++++++++++++++++++++++++------------------- ++ 1 file changed, 31 insertions(+), 19 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 34d92e2..3fb1e2a 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -83,11 +83,9 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ ++ // Split image of 2048 into parts 64 wide ++ // So some QPUs will have 3 blocks of 64 to do, and others 2 blocks for an image 2048 wide with 32 blocks across ++-// Each block of 64*64 ++-// Smallest CTU size is 16x16, so smallest block is 8x8 ++-// Corresponds to a total of 83kbytes over all 12 QPUs +++// For each block of 64*64 the smallest block size is 8x4 ++ #define RPI_LUMA_COMMAND_WORDS 9 ++-#define Y_COMMANDS_PER_QPU ((1+3*(64*64)/(8*8)) * RPI_LUMA_COMMAND_WORDS) +++#define Y_COMMANDS_PER_QPU ((1+3*(64*64)/(8*4)) * RPI_LUMA_COMMAND_WORDS) ++ ++ #define ENCODE_COEFFS(c0, c1, c2, c3) (((c0) & 0xff) | ((c1) & 0xff) << 8 | ((c2) & 0xff) << 16 | ((c3) & 0xff) << 24) ++ ++@@ -2042,11 +2040,13 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ uint32_t *y = s->y_mvs[chan % 12]; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=16) { +++ int bw = nPbW-start_x; +++ int bh = nPbH-start_y; ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); ++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + 8 + start_x) & 0xffff); ++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); ++- *y++ = ( (nPbW<16 ? nPbW : 16) << 16 ) + (nPbH<16 ? nPbH : 16); +++ *y++ = ( (bw<16 ? bw : 16) << 16 ) + (bh<16 ? bh : 16); ++ *y++ = my2_mx2_my_mx; ++ if (weight_flag) { ++ *y++ = (s->sh.luma_offset_l0[current_mv.ref_idx[reflist]] << 16) + (s->sh.luma_weight_l0[current_mv.ref_idx[reflist]] & 0xffff); ++@@ -2089,12 +2089,14 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ uint32_t *u = s->u_mvs[chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { +++ int bw = nPbW_c-start_x; +++ int bh = nPbH_c-start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); ++- *u++ = ( (nPbW_cy_mvs[chan % 12]; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=16) { +++ int bw = nPbW-start_x; +++ int bh = nPbH-start_y; ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); ++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + 8 + start_x) & 0xffff); ++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); ++- *y++ = ( (nPbW<16 ? nPbW : 16) << 16 ) + (nPbH<16 ? nPbH : 16); +++ *y++ = ( (bw<16 ? bw : 16) << 16 ) + (bh<16 ? bh : 16); ++ *y++ = my2_mx2_my_mx; ++ if (weight_flag) { ++ *y++ = (s->sh.luma_offset_l0[current_mv.ref_idx[reflist]] << 16) + (s->sh.luma_weight_l0[current_mv.ref_idx[reflist]] & 0xffff); ++@@ -2189,12 +2193,14 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ uint32_t *u = s->u_mvs[chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { +++ int bw = nPbW_c-start_x; +++ int bh = nPbH_c-start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); ++- *u++ = ( (nPbW_csh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx][0]; ++ *u++ = rpi_filter_coefs[_my][0]; ++@@ -2246,11 +2252,13 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ uint32_t *y = s->y_mvs[chan % 12]; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=8) { // B blocks work 8 at a time +++ int bw = nPbW-start_x; +++ int bh = nPbH-start_y; ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); ++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y2 - 3 + start_y) << 16) + ( (x2 - 3 + start_x) & 0xffff); // Second fetch is for ref1 ++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); ++- *y++ = ( (nPbW<8 ? nPbW : 8) << 16 ) + (nPbH<16 ? nPbH : 16); +++ *y++ = ( (bw<8 ? bw : 8) << 16 ) + (bh<16 ? bh : 16); ++ *y++ = my2_mx2_my_mx; ++ *y++ = 1; // B frame weighted prediction not supported ++ *y++ = (get_vc_address(s->frame->buf[0]) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); ++@@ -2293,12 +2301,14 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ uint32_t *u = s->u_mvs[chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { +++ int bw = nPbW_c-start_x; +++ int bh = nPbH_c-start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv_b0; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); ++- *u++ = ( (nPbW_cframe->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); ++- *u++ = ( (nPbW_c> 16; ++ uint32_t height = (width_height << 16) >> 16; +++ uint8_t *dst_base = s->frame->data[0]; ++ ref_y_base = compute_arm_addr(s,p[1-9],0); ++ ref_y2_base = compute_arm_addr(s,p[3-9],0); ++ for (y=0; ymc_filter) { ++ int32_t refa = filter8_luma(ref_y_base,x+x0, y+y0, pitch, my2_mx2_my_mx, offset_weight,offset_before,denom,frame_width,frame_height); ++- this_dst[x+y*dst_pitch] = av_clip_uint8(refa); +++ refa = av_clip_uint8(refa); +++ this_dst[x+y*dst_pitch] = refa; ++ } ++ else { ++ int32_t refa = filter8_luma(ref_y_base, x+x0, y+y0, pitch, my2_mx2_my_mx, 1, 0, 0, frame_width, frame_height); ++@@ -3248,7 +3261,7 @@ static void rpi_simulate_inter_qpu(HEVCContext *s) ++ } ++ for(i=0;i<12;i++) ++ { ++- rpi_simulate_inter_luma(s,s->y_mvs_base[i]); +++ rpi_simulate_inter_luma(s,s->y_mvs_base[i],i); ++ } ++ } ++ ++@@ -3290,7 +3303,6 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ ++ #ifdef RPI_SIMULATE_QPUS ++ rpi_simulate_inter_qpu(s); ++- s->vpu_id = -1; ++ return; ++ #endif ++ ++-- ++2.7.4 ++ ++ ++From da5ae7e96dd961ccc7bc162c8acf336d54a50092 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Tue, 2 Jun 2015 14:36:54 +0100 ++Subject: [PATCH 50/68] Added support for skip deblock ++ ++--- ++ libavcodec/hevc.c | 5 +++++ ++ libavcodec/hevc.h | 2 ++ ++ libavcodec/hevc_filter.c | 14 ++++---------- ++ 3 files changed, 11 insertions(+), 10 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 3fb1e2a..0ac4f4c 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -3397,6 +3397,11 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ } ++ ++ #endif +++ s->used_for_ref = !(s->nal_unit_type == NAL_TRAIL_N || +++ s->nal_unit_type == NAL_TSA_N || +++ s->nal_unit_type == NAL_STSA_N || +++ s->nal_unit_type == NAL_RADL_N || +++ s->nal_unit_type == NAL_RASL_N); ++ ++ if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) { ++ av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n"); ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 5df9dcd..5cb90b5 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -890,6 +890,8 @@ typedef struct HEVCContext { ++ int width; ++ int height; ++ +++ int used_for_ref; +++ ++ #ifdef RPI ++ int enable_rpi; ++ HEVCMvCmd *unif_mv_cmds; ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 11629e4..14a0952 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -512,16 +512,14 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) ++ s->ps.pps->transquant_bypass_enable_flag; ++ ++ #ifdef DISABLE_DEBLOCK_NONREF ++- if ( s->nal_unit_type == NAL_TRAIL_N || ++- s->nal_unit_type == NAL_TSA_N || ++- s->nal_unit_type == NAL_STSA_N || ++- s->nal_unit_type == NAL_RADL_N || ++- s->nal_unit_type == NAL_RASL_N ) +++ if (!s->used_for_ref) ++ return; // Don't deblock non-reference frames ++ #endif ++ #ifdef DISABLE_DEBLOCK ++ return; ++ #endif +++ if (!s->used_for_ref && s->avctx->skip_loop_filter >= AVDISCARD_NONREF) +++ return; ++ ++ if (x0) { ++ left_tc_offset = s->deblock[ctb - 1].tc_offset; ++@@ -885,11 +883,7 @@ static int ff_hevc_buf_base(AVBufferRef *bref) { ++ ++ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ { ++- if (s->enable_rpi && !( s->nal_unit_type == NAL_TRAIL_N || ++- s->nal_unit_type == NAL_TSA_N || ++- s->nal_unit_type == NAL_STSA_N || ++- s->nal_unit_type == NAL_RADL_N || ++- s->nal_unit_type == NAL_RASL_N )) { +++ if (s->enable_rpi && s->used_for_ref) { ++ #ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; ++ int curr_y = ((int *)f->progress->data)[0]; ++-- ++2.7.4 ++ ++ ++From 6401d88c310cd3bfec7be94bf3ceb6d0c5736c7e Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Tue, 2 Jun 2015 15:22:52 +0100 ++Subject: [PATCH 51/68] Added support for skip_frame ++ ++--- ++ libavcodec/hevc.c | 15 ++++++++++----- ++ 1 file changed, 10 insertions(+), 5 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 0ac4f4c..639e4df 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -3397,11 +3397,6 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ } ++ ++ #endif ++- s->used_for_ref = !(s->nal_unit_type == NAL_TRAIL_N || ++- s->nal_unit_type == NAL_TSA_N || ++- s->nal_unit_type == NAL_STSA_N || ++- s->nal_unit_type == NAL_RADL_N || ++- s->nal_unit_type == NAL_RASL_N); ++ ++ if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) { ++ av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n"); ++@@ -3925,6 +3920,16 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal) ++ if (ret < 0) ++ return ret; ++ +++ s->used_for_ref = !(s->nal_unit_type == NAL_TRAIL_N || +++ s->nal_unit_type == NAL_TSA_N || +++ s->nal_unit_type == NAL_STSA_N || +++ s->nal_unit_type == NAL_RADL_N || +++ s->nal_unit_type == NAL_RASL_N); +++ +++ if (!s->used_for_ref && s->avctx->skip_frame >= AVDISCARD_NONREF) { +++ s->is_decoded = 0; +++ break; +++ } ++ if (s->max_ra == INT_MAX) { ++ if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) { ++ s->max_ra = s->poc; ++-- ++2.7.4 ++ ++ ++From d2951e2ca73e234d1b775621e3993948a4a2c8ea Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 3 Jun 2015 09:15:38 +0100 ++Subject: [PATCH 52/68] Fixed cache flushing of luma when using old method ++ ++--- ++ libavcodec/hevc_filter.c | 2 +- ++ 1 file changed, 1 insertion(+), 1 deletion(-) ++ ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 14a0952..b286bbf 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -919,7 +919,7 @@ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ flush_buffer(s->frame->buf[1]); ++ flush_buffer(s->frame->buf[2]); ++ #ifdef RPI_LUMA_QPU ++- flush_buffer(s->frame->buf[1]); +++ flush_buffer(s->frame->buf[0]); ++ #endif ++ ++ #endif ++-- ++2.7.4 ++ ++ ++From 7ae612e69c1cabcc7d0b37b65efa8c5bdcfa7bf5 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 3 Jun 2015 11:37:27 +0100 ++Subject: [PATCH 53/68] Option to parallelise coefficient decode and inter ++ prediction and deblock for each frame ++ ++--- ++ libavcodec/hevc.c | 701 +++++++++++++++++++++++++++-------------- ++ libavcodec/hevc.h | 74 +++-- ++ libavcodec/hevc_cabac.c | 12 +- ++ libavcodec/hevcpred_template.c | 5 +- ++ 4 files changed, 522 insertions(+), 270 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 639e4df..12aacc5 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -43,8 +43,6 @@ ++ ++ #ifdef RPI ++ #include "rpi_qpu.h" ++- // For some unknown reason, the code seems to crash if I do a late malloc ++- //#define EARLY_MALLOC ++ // Move Inter prediction into separate pass ++ #define RPI_INTER ++ ++@@ -58,6 +56,21 @@ ++ ++ // Define RPI_SIMULATE_QPUS for debugging to run QPU code on the ARMs ++ //#define RPI_SIMULATE_QPUS +++ #ifdef RPI_WORKER +++ #include "pthread.h" +++ #endif +++ +++ static void rpi_execute_dblk_cmds(HEVCContext *s); +++ static void rpi_execute_transform(HEVCContext *s); +++ static void rpi_execute_inter_qpu(HEVCContext *s); +++ static void rpi_execute_pred_cmds(HEVCContext *s); +++ static void rpi_execute_inter_cmds(HEVCContext *s); +++ static void rpi_inter_clear(HEVCContext *s); +++ +++ // Define INTER_PASS0 to do inter prediction in first pass +++ //#define INTER_PASS0 +++ // Define LAUNCH_PASS0 to launch QPU/VPU from pass0 +++ //#define LAUNCH_PASS0 ++ ++ #endif ++ ++@@ -105,6 +118,143 @@ static uint32_t get_vc_address(AVBufferRef *bref) { ++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++ return p->vc; ++ } +++#endif +++ +++ +++#ifdef RPI_WORKER +++ +++//#define LOG_ENTER printf("Enter %s: p0=%d p1=%d (%d jobs) %p\n", __func__,s->pass0_job,s->pass1_job,s->worker_tail-s->worker_head,s); +++//#define LOG_EXIT printf("Exit %s: p0=%d p1=%d (%d jobs) %p\n", __func__,s->pass0_job,s->pass1_job,s->worker_tail-s->worker_head,s); +++ +++#define LOG_ENTER +++#define LOG_EXIT +++ +++// Call this when we have completed pass0 and wish to trigger pass1 for the current job +++static void worker_submit_job(HEVCContext *s) +++{ +++ LOG_ENTER +++ //pthread_mutex_lock(&s->worker_mutex); +++ s->worker_tail++; // This is the only place that can change tail so we do not need the mutex +++ s->pass0_job = (s->pass0_job + 1) % RPI_MAX_JOBS; // Move onto the next slot +++ pthread_cond_broadcast(&s->worker_cond_tail); // Let people know that the tail has moved +++ //pthread_mutex_unlock(&s->worker_mutex); +++ LOG_EXIT +++} +++ +++// Call this to say we have completed pass1 +++static void worker_complete_middle_job(HEVCContext *s) +++{ +++ LOG_ENTER +++ //pthread_mutex_lock(&s->worker_mutex); +++ s->worker_middle++; // This is the only place that can change head so we do not need the mutex +++ s->pass1_job = (s->pass1_job + 1) % RPI_MAX_JOBS; // Move onto the next slot +++ pthread_cond_broadcast(&s->worker_cond_middle); // Let people know that the tail has moved +++ //pthread_mutex_unlock(&s->worker_mutex); +++ LOG_EXIT +++} +++ +++// Call this to say we have completed pass2 +++static void worker_complete_job(HEVCContext *s) +++{ +++ LOG_ENTER +++ //pthread_mutex_lock(&s->worker_mutex); +++ s->worker_head++; // This is the only place that can change head so we do not need the mutex +++ s->pass2_job = (s->pass2_job + 1) % RPI_MAX_JOBS; // Move onto the next slot +++ pthread_cond_broadcast(&s->worker_cond_head); // Let people know that the tail has moved +++ //pthread_mutex_unlock(&s->worker_mutex); +++ LOG_EXIT +++} +++ +++// Call this to wait for all jobs to have completed at the end of a frame +++static void worker_wait(HEVCContext *s) +++{ +++ LOG_ENTER +++ pthread_mutex_lock(&s->worker_mutex); +++ while( s->worker_head !=s->worker_tail) +++ { +++ pthread_cond_wait(&s->worker_cond_head, &s->worker_mutex); +++ } +++ pthread_mutex_unlock(&s->worker_mutex); +++ LOG_EXIT +++} +++ +++// Call worker_pass0_ready to wait until the s->pass0_job slot becomes +++// available to receive the next job. +++static void worker_pass0_ready(HEVCContext *s) +++{ +++ LOG_ENTER +++ pthread_mutex_lock(&s->worker_mutex); +++ // tail is number of submitted jobs +++ // head is number of completed jobs +++ // tail-head is number of outstanding jobs in the queue +++ // we need to ensure there is at least 1 space left for us to use +++ while( s->worker_tail - s->worker_head >= RPI_MAX_JOBS) +++ { +++ // Wait until another job is completed +++ pthread_cond_wait(&s->worker_cond_head, &s->worker_mutex); +++ } +++ pthread_mutex_unlock(&s->worker_mutex); +++ LOG_EXIT +++} +++ +++static void *worker_start(void *arg) +++{ +++ HEVCContext *s = (HEVCContext *)arg; +++ while(1) { +++ pthread_mutex_lock(&s->worker_mutex); +++ +++ while( !s->kill_worker && s->worker_tail - s->worker_middle <= 0) +++ { +++ pthread_cond_wait(&s->worker_cond_tail, &s->worker_mutex); +++ } +++ pthread_mutex_unlock(&s->worker_mutex); +++ +++ if (s->kill_worker) { +++ break; +++ } +++ LOG_ENTER +++ // printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); +++#ifndef LAUNCH_PASS0 +++ rpi_execute_inter_qpu(s); +++#endif +++#ifndef INTER_PASS0 +++ // Perform inter prediction +++ rpi_execute_inter_cmds(s); +++#endif +++ // Wait for transform completion +++ vpu_wait(s->vpu_id); +++ +++ worker_complete_middle_job(s); +++ LOG_EXIT +++ } +++ return NULL; +++} +++ +++static void *worker_deblock_start(void *arg) +++{ +++ HEVCContext *s = (HEVCContext *)arg; +++ while(1) { +++ pthread_mutex_lock(&s->worker_mutex); +++ while( !s->kill_worker && s->worker_middle - s->worker_head <= 0) +++ { +++ pthread_cond_wait(&s->worker_cond_middle, &s->worker_mutex); +++ } +++ pthread_mutex_unlock(&s->worker_mutex); +++ +++ if (s->kill_worker) { +++ break; +++ } +++ LOG_ENTER +++ // Perform intra prediction and residual reconstruction +++ rpi_execute_pred_cmds(s); +++ // Perform deblocking for CTBs in this row +++ rpi_execute_dblk_cmds(s); +++ +++ worker_complete_job(s); +++ LOG_EXIT +++ } +++ return NULL; +++} ++ ++ #endif ++ ++@@ -121,19 +271,18 @@ static uint32_t get_vc_address(AVBufferRef *bref) { ++ static void pic_arrays_free(HEVCContext *s) ++ { ++ #ifdef RPI ++- ++-#ifdef EARLY_MALLOC ++-#else ++- if (s->coeffs_buf_arm[0]) { ++- gpu_free(&s->coeffs_buf_default); ++- s->coeffs_buf_arm[0] = 0; ++- } ++- if (s->coeffs_buf_arm[2]) { ++- gpu_free(&s->coeffs_buf_accelerated); ++- s->coeffs_buf_arm[2] = 0; +++ int job; +++ for(job=0;jobcoeffs_buf_arm[job][0]) { +++ gpu_free(&s->coeffs_buf_default[job]); +++ s->coeffs_buf_arm[job][0] = 0; +++ } +++ if (s->coeffs_buf_arm[job][2]) { +++ gpu_free(&s->coeffs_buf_accelerated[job]); +++ s->coeffs_buf_arm[job][2] = 0; +++ } ++ } ++ #endif ++-#endif ++ av_freep(&s->sao); ++ av_freep(&s->deblock); ++ ++@@ -171,24 +320,26 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ int min_pu_size = sps->min_pu_width * sps->min_pu_height; ++ ++ #ifdef RPI ++-#ifdef EARLY_MALLOC ++-#else ++ av_assert0(sps); ++ int coeffs_in_ctb = (1 << sps->log2_ctb_size) * (1 << sps->log2_ctb_size); ++ int coefs_per_row = sps->ctb_width * coeffs_in_ctb * 3; // Allow space for chroma ++- gpu_malloc_cached(sizeof(int16_t) * coefs_per_row, &s->coeffs_buf_default); ++- s->coeffs_buf_arm[0] = (int16_t*) s->coeffs_buf_default.arm; ++- if (!s->coeffs_buf_arm[0]) ++- goto fail; ++- gpu_malloc_cached(sizeof(int16_t) * coefs_per_row * 2, &s->coeffs_buf_accelerated); ++- s->coeffs_buf_arm[2] = (int16_t*) s->coeffs_buf_accelerated.arm; ++- s->coeffs_buf_vc[2] = s->coeffs_buf_accelerated.vc; ++- if (!s->coeffs_buf_arm[2]) ++- goto fail; ++- s->coeffs_buf_arm[3] = coefs_per_row + s->coeffs_buf_arm[2]; ++- s->coeffs_buf_vc[3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[2]; ++- printf("Done\n"); ++-#endif +++ int job; +++ for(job=0;jobcoeffs_buf_default[job]); +++ s->coeffs_buf_arm[job][0] = (int16_t*) s->coeffs_buf_default[job].arm; +++ if (!s->coeffs_buf_arm[job][0]) +++ goto fail; +++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row * 2, &s->coeffs_buf_accelerated[job]); +++ s->coeffs_buf_arm[job][2] = (int16_t*) s->coeffs_buf_accelerated[job].arm; +++ s->coeffs_buf_vc[job][2] = s->coeffs_buf_accelerated[job].vc; +++ if (!s->coeffs_buf_arm[job][2]) +++ goto fail; +++ s->coeffs_buf_arm[job][3] = coefs_per_row + s->coeffs_buf_arm[job][2]; +++ s->coeffs_buf_vc[job][3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[job][2]; +++ } +++ } ++ #endif ++ ++ s->bs_width = (width >> 2) + 1; ++@@ -1036,7 +1187,7 @@ static void rpi_intra_pred(HEVCContext *s, int log2_trafo_size, int x0, int y0, ++ { ++ if (s->enable_rpi) { ++ HEVCLocalContext *lc = s->HEVClc; ++- HEVCPredCmd *cmd = s->univ_pred_cmds + s->num_pred_cmds++; +++ HEVCPredCmd *cmd = s->univ_pred_cmds[s->pass0_job] + s->num_pred_cmds[s->pass0_job]++; ++ cmd->type = RPI_PRED_INTRA; ++ cmd->size = log2_trafo_size; ++ cmd->c_idx = c_idx; ++@@ -1496,7 +1647,7 @@ static void rpi_luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ AVFrame *ref, const Mv *mv, int x_off, int y_off, ++ int block_w, int block_h, int luma_weight, int luma_offset) ++ { ++- HEVCMvCmd *cmd = s->unif_mv_cmds + s->num_mv_cmds++; +++ HEVCMvCmd *cmd = s->unif_mv_cmds[s->pass0_job] + s->num_mv_cmds[s->pass0_job]++; ++ cmd->cmd = RPI_CMD_LUMA_UNI; ++ cmd->dst = dst; ++ cmd->dststride = dststride; ++@@ -1515,7 +1666,7 @@ static void rpi_luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ AVFrame *ref0, const Mv *mv0, int x_off, int y_off, ++ int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv) ++ { ++- HEVCMvCmd *cmd = s->unif_mv_cmds + s->num_mv_cmds++; +++ HEVCMvCmd *cmd = s->unif_mv_cmds[s->pass0_job] + s->num_mv_cmds[s->pass0_job]++; ++ cmd->cmd = RPI_CMD_LUMA_BI; ++ cmd->dst = dst; ++ cmd->dststride = dststride; ++@@ -1537,7 +1688,7 @@ static void rpi_chroma_mc_uni(HEVCContext *s, uint8_t *dst0, ++ ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist, ++ int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset) ++ { ++- HEVCMvCmd *cmd = s->unif_mv_cmds + s->num_mv_cmds++; +++ HEVCMvCmd *cmd = s->unif_mv_cmds[s->pass0_job] + s->num_mv_cmds[s->pass0_job]++; ++ cmd->cmd = RPI_CMD_CHROMA_UNI; ++ cmd->dst = dst0; ++ cmd->dststride = dststride; ++@@ -1555,7 +1706,7 @@ static void rpi_chroma_mc_uni(HEVCContext *s, uint8_t *dst0, ++ static void rpi_chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1, ++ int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx) ++ { ++- HEVCMvCmd *cmd = s->unif_mv_cmds + s->num_mv_cmds++; +++ HEVCMvCmd *cmd = s->unif_mv_cmds[s->pass0_job] + s->num_mv_cmds[s->pass0_job]++; ++ cmd->cmd = RPI_CMD_CHROMA_BI+cidx; ++ cmd->dst = dst0; ++ cmd->dststride = dststride; ++@@ -2037,7 +2188,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int chan = x0>>6; // 64 wide blocks per QPU ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++- uint32_t *y = s->y_mvs[chan % 12]; +++ uint32_t *y = s->y_mvs[s->pass0_job][chan % 12]; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=16) { ++ int bw = nPbW-start_x; ++@@ -2057,7 +2208,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter; ++ } ++ } ++- s->y_mvs[chan % 12] = y; +++ s->y_mvs[s->pass0_job][chan % 12] = y; ++ } else ++ #endif ++ { ++@@ -2086,7 +2237,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++ ++- uint32_t *u = s->u_mvs[chan & 7]; +++ uint32_t *u = s->u_mvs[s->pass0_job][chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ int bw = nPbW_c-start_x; ++@@ -2110,7 +2261,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++- s->u_mvs[chan & 7] = u; +++ s->u_mvs[s->pass0_job][chan & 7] = u; ++ return; ++ } ++ #endif ++@@ -2140,7 +2291,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int chan = x0>>6; // 64 wide blocks per QPU ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++- uint32_t *y = s->y_mvs[chan % 12]; +++ uint32_t *y = s->y_mvs[s->pass0_job][chan % 12]; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=16) { ++ int bw = nPbW-start_x; ++@@ -2160,7 +2311,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter; ++ } ++ } ++- s->y_mvs[chan % 12] = y; +++ s->y_mvs[s->pass0_job][chan % 12] = y; ++ } else ++ #endif ++ ++@@ -2190,7 +2341,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++ ++- uint32_t *u = s->u_mvs[chan & 7]; +++ uint32_t *u = s->u_mvs[s->pass0_job][chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ int bw = nPbW_c-start_x; ++@@ -2215,7 +2366,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++- s->u_mvs[chan & 7] = u; +++ s->u_mvs[s->pass0_job][chan & 7] = u; ++ return; ++ } ++ #endif ++@@ -2249,7 +2400,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int x2 = x0 + (mv2->x >> 2); ++ int y2 = y0 + (mv2->y >> 2); ++ int chan = x0>>6; // 64 wide blocks per QPU ++- uint32_t *y = s->y_mvs[chan % 12]; +++ uint32_t *y = s->y_mvs[s->pass0_job][chan % 12]; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=8) { // B blocks work 8 at a time ++ int bw = nPbW-start_x; ++@@ -2265,7 +2416,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter_b; ++ } ++ } ++- s->y_mvs[chan % 12] = y; +++ s->y_mvs[s->pass0_job][chan % 12] = y; ++ } else ++ #endif ++ { ++@@ -2298,7 +2449,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ ++ int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width ++ ++- uint32_t *u = s->u_mvs[chan & 7]; +++ uint32_t *u = s->u_mvs[s->pass0_job][chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ int bw = nPbW_c-start_x; ++@@ -2327,7 +2478,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++- s->u_mvs[chan & 7] = u; +++ s->u_mvs[s->pass0_job][chan & 7] = u; ++ return; ++ } ++ #endif ++@@ -2832,40 +2983,54 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb, ++ static void rpi_execute_dblk_cmds(HEVCContext *s) ++ { ++ int n; +++ int job = s->pass2_job; ++ int ctb_size = 1 << s->ps.sps->log2_ctb_size; ++- int (*p)[2] = s->dblk_cmds; ++- for(n = s->num_dblk_cmds; n>0 ;n--,p++) { +++ int (*p)[2] = s->dblk_cmds[job]; +++ for(n = s->num_dblk_cmds[job]; n>0 ;n--,p++) { ++ ff_hevc_hls_filters(s, (*p)[0], (*p)[1], ctb_size); ++ } ++- s->num_dblk_cmds = 0; +++ s->num_dblk_cmds[job] = 0; ++ } ++ ++ static void rpi_execute_transform(HEVCContext *s) ++ { ++ int i=2; +++#ifdef LAUNCH_PASS0 +++ int job = s->pass0_job; +++#else +++ int job = s->pass1_job; +++#endif ++ //int j; ++ //int16_t *coeffs = s->coeffs_buf_arm[i]; ++ //for(j=s->num_coeffs[i]; j > 0; j-= 16*16, coeffs+=16*16) { ++ // s->hevcdsp.idct[4-2](coeffs, 16); ++ //} ++ ++- gpu_cache_flush(&s->coeffs_buf_accelerated); ++- s->vpu_id = vpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0, &s->coeffs_buf_accelerated); +++ gpu_cache_flush(&s->coeffs_buf_accelerated[job]); +++ s->vpu_id = vpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[job][2], +++ s->num_coeffs[job][2] >> 8, s->coeffs_buf_vc[job][3], +++ s->num_coeffs[job][3] >> 10, 0, &s->coeffs_buf_accelerated[job]); ++ //vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0); ++ //gpu_cache_flush(&s->coeffs_buf_accelerated); ++ //vpu_wait(s->vpu_id); ++ ++ for(i=0;i<4;i++) ++- s->num_coeffs[i] = 0; +++ s->num_coeffs[job][i] = 0; ++ } ++ ++ static void rpi_execute_pred_cmds(HEVCContext *s) ++ { ++ int i; ++- HEVCPredCmd *cmd = s->univ_pred_cmds; +++ int job = s->pass2_job; +++ HEVCPredCmd *cmd = s->univ_pred_cmds[job]; +++#ifdef RPI_WORKER +++ HEVCLocalContextIntra *lc = &s->HEVClcIntra; +++#else ++ HEVCLocalContext *lc = s->HEVClc; +++#endif ++ ++- for(i = s->num_pred_cmds; i > 0; i--, cmd++) { +++ for(i = s->num_pred_cmds[job]; i > 0; i--, cmd++) { +++ //printf("i=%d cmd=%p job1=%d job0=%d\n",i,cmd,s->pass1_job,s->pass0_job); ++ if (cmd->type == RPI_PRED_INTRA) { ++ lc->tu.intra_pred_mode_c = lc->tu.intra_pred_mode = cmd->mode; ++ lc->na.cand_bottom_left = (cmd->na >> 4) & 1; ++@@ -2884,21 +3049,26 @@ static void rpi_execute_pred_cmds(HEVCContext *s) ++ #endif ++ } ++ } ++- s->num_pred_cmds = 0; +++ s->num_pred_cmds[job] = 0; ++ } ++ ++ static void rpi_execute_inter_cmds(HEVCContext *s) ++ { ++- HEVCMvCmd *cmd = s->unif_mv_cmds; +++#ifdef INTER_PASS0 +++ int job = s->pass0_job; +++#else +++ int job = s->pass1_job; +++#endif +++ HEVCMvCmd *cmd = s->unif_mv_cmds[job]; ++ int n,cidx; ++ AVFrame myref; ++ AVFrame myref1; ++ struct MvField mymv; ++- if (s->num_mv_cmds > RPI_MAX_MV_CMDS) { +++ if (s->num_mv_cmds[job] > RPI_MAX_MV_CMDS) { ++ printf("Overflow inter_cmds\n"); ++ exit(-1); ++ } ++- for(n = s->num_mv_cmds; n>0 ; n--, cmd++) { +++ for(n = s->num_mv_cmds[job]; n>0 ; n--, cmd++) { ++ switch(cmd->cmd) { ++ case RPI_CMD_LUMA_UNI: ++ myref.data[0] = cmd->src; ++@@ -2938,7 +3108,28 @@ static void rpi_execute_inter_cmds(HEVCContext *s) ++ break; ++ } ++ } ++- s->num_mv_cmds = 0; +++ s->num_mv_cmds[job] = 0; +++} +++ +++static void rpi_do_all_passes(HEVCContext *s) +++{ +++#ifdef RPI_INTER_QPU +++ // Kick off inter prediction on QPUs +++ rpi_execute_inter_qpu(s); +++#else +++ rpi_execute_transform(s); +++#endif +++ // Perform luma inter prediction +++ rpi_execute_inter_cmds(s); +++ // Wait for transform completion +++ vpu_wait(s->vpu_id); +++ // Perform intra prediction and residual reconstruction +++ rpi_execute_pred_cmds(s); +++ // Perform deblocking for CTBs in this row +++ rpi_execute_dblk_cmds(s); +++#ifdef RPI_INTER_QPU +++ rpi_inter_clear(s); +++#endif ++ } ++ ++ #endif ++@@ -2946,6 +3137,7 @@ static void rpi_execute_inter_cmds(HEVCContext *s) ++ #ifdef RPI_INTER_QPU ++ static void rpi_inter_clear(HEVCContext *s) ++ { +++ int job = s->pass0_job; ++ int i; ++ int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1]; ++ int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1]; ++@@ -2953,51 +3145,50 @@ static void rpi_inter_clear(HEVCContext *s) ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++ ++ for(i=0;i<8;i++) { ++- s->u_mvs[i] = s->mvs_base[i]; ++- *s->u_mvs[i]++ = 0; ++- *s->u_mvs[i]++ = 0; ++- *s->u_mvs[i]++ = 0; ++- *s->u_mvs[i]++ = 0; ++- *s->u_mvs[i]++ = 0; ++- *s->u_mvs[i]++ = pic_width; ++- *s->u_mvs[i]++ = pic_height; ++- *s->u_mvs[i]++ = s->frame->linesize[1]; ++- *s->u_mvs[i]++ = s->frame->linesize[2]; +++ s->u_mvs[job][i] = s->mvs_base[job][i]; +++ *s->u_mvs[job][i]++ = 0; +++ *s->u_mvs[job][i]++ = 0; +++ *s->u_mvs[job][i]++ = 0; +++ *s->u_mvs[job][i]++ = 0; +++ *s->u_mvs[job][i]++ = 0; +++ *s->u_mvs[job][i]++ = pic_width; +++ *s->u_mvs[job][i]++ = pic_height; +++ *s->u_mvs[job][i]++ = s->frame->linesize[1]; +++ *s->u_mvs[job][i]++ = s->frame->linesize[2]; ++ if (weight_flag) { ++- *s->u_mvs[i]++ = 1 << (s->sh.chroma_log2_weight_denom + 6 - 1); ++- *s->u_mvs[i]++ = s->sh.chroma_log2_weight_denom + 6; +++ *s->u_mvs[job][i]++ = 1 << (s->sh.chroma_log2_weight_denom + 6 - 1); +++ *s->u_mvs[job][i]++ = s->sh.chroma_log2_weight_denom + 6; ++ } else { ++- *s->u_mvs[i]++ = 1 << 5; ++- *s->u_mvs[i]++ = 6; +++ *s->u_mvs[job][i]++ = 1 << 5; +++ *s->u_mvs[job][i]++ = 6; ++ } ++- *s->u_mvs[i]++ = i; // Select section of VPM (avoid collisions with 3d unit) +++ *s->u_mvs[job][i]++ = i; // Select section of VPM (avoid collisions with 3d unit) ++ } ++ ++ #ifdef RPI_LUMA_QPU ++ for(i=0;i<12;i++) { ++- s->y_mvs[i] = s->y_mvs_base[i]; ++- *s->y_mvs[i]++ = 0; // y_x ++- *s->y_mvs[i]++ = 0; // ref_y_base ++- *s->y_mvs[i]++ = 0; // y2_x2 ++- *s->y_mvs[i]++ = 0; // ref_y2_base ++- *s->y_mvs[i]++ = (s->ps.sps->width << 16) + s->ps.sps->height; ++- *s->y_mvs[i]++ = s->frame->linesize[0]; // pitch ++- *s->y_mvs[i]++ = s->frame->linesize[0]; // dst_pitch +++ s->y_mvs[job][i] = s->y_mvs_base[job][i]; +++ *s->y_mvs[job][i]++ = 0; // y_x +++ *s->y_mvs[job][i]++ = 0; // ref_y_base +++ *s->y_mvs[job][i]++ = 0; // y2_x2 +++ *s->y_mvs[job][i]++ = 0; // ref_y2_base +++ *s->y_mvs[job][i]++ = (s->ps.sps->width << 16) + s->ps.sps->height; +++ *s->y_mvs[job][i]++ = s->frame->linesize[0]; // pitch +++ *s->y_mvs[job][i]++ = s->frame->linesize[0]; // dst_pitch ++ if (weight_flag) { ++ int offset = 1 << (s->sh.luma_log2_weight_denom + 6 - 1); ++ int shift = s->sh.luma_log2_weight_denom + 6; ++- *s->y_mvs[i]++ = (offset << 16) + shift; +++ *s->y_mvs[job][i]++ = (offset << 16) + shift; ++ } else { ++ int offset = 1 << 5; ++ int shift = 6; ++- *s->y_mvs[i]++ = (offset << 16) + shift; +++ *s->y_mvs[job][i]++ = (offset << 16) + shift; ++ } ++- *s->y_mvs[i]++ = 0; // Next kernel +++ *s->y_mvs[job][i]++ = 0; // Next kernel ++ } ++ #endif ++ } ++ ++- ++ #ifdef RPI_SIMULATE_QPUS ++ ++ static int32_t clipx(int x,int FRAME_WIDTH) ++@@ -3271,10 +3462,15 @@ static void rpi_simulate_inter_qpu(HEVCContext *s) ++ static void rpi_execute_inter_qpu(HEVCContext *s) ++ { ++ int k; +++#ifdef LAUNCH_PASS0 +++ int job = s->pass0_job; +++#else +++ int job = s->pass1_job; +++#endif ++ int i; ++- uint32_t *unif_vc = (uint32_t *)s->unif_mvs_ptr.vc; +++ uint32_t *unif_vc = (uint32_t *)s->unif_mvs_ptr[job].vc; ++ #ifdef RPI_LUMA_QPU ++- uint32_t *y_unif_vc = (uint32_t *)s->y_unif_mvs_ptr.vc; +++ uint32_t *y_unif_vc = (uint32_t *)s->y_unif_mvs_ptr[job].vc; ++ #endif ++ if (s->sh.slice_type == I_SLICE) { ++ #ifdef RPI_MULTI_MAILBOX ++@@ -3283,22 +3479,22 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ #endif ++ } ++ for(k=0;k<8;k++) { ++- s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command ++- s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined ++- s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+4] = qpu_get_fn(QPU_MC_SETUP_UV); // Also need a dummy for V ++- av_assert0(s->u_mvs[k] - s->mvs_base[k] < UV_COMMANDS_PER_QPU); +++ s->u_mvs[job][k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command +++ s->u_mvs[job][k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined +++ s->u_mvs[job][k][-RPI_CHROMA_COMMAND_WORDS+4] = qpu_get_fn(QPU_MC_SETUP_UV); // Also need a dummy for V +++ av_assert0(s->u_mvs[job][k] - s->mvs_base[job][k] < UV_COMMANDS_PER_QPU); ++ } ++ ++- s->u_mvs[8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore +++ s->u_mvs[job][8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore ++ ++ #ifdef RPI_LUMA_QPU ++ for(k=0;k<12;k++) { ++- s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+1] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined ++- s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // Also need a dummy for second request ++- s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+8] = qpu_get_fn(QPU_MC_EXIT); // Add exit command ++- av_assert0(s->y_mvs[k] - s->y_mvs_base[k] < Y_COMMANDS_PER_QPU); +++ s->y_mvs[job][k][-RPI_LUMA_COMMAND_WORDS+1] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined +++ s->y_mvs[job][k][-RPI_LUMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // Also need a dummy for second request +++ s->y_mvs[job][k][-RPI_LUMA_COMMAND_WORDS+8] = qpu_get_fn(QPU_MC_EXIT); // Add exit command +++ av_assert0(s->y_mvs[job][k] - s->y_mvs_base[job][k] < Y_COMMANDS_PER_QPU); ++ } ++- s->y_mvs[12-1][-RPI_LUMA_COMMAND_WORDS+8] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT12); // This QPU will signal interrupt when all others are done and have acquired a semaphore +++ s->y_mvs[job][12-1][-RPI_LUMA_COMMAND_WORDS+8] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT12); // This QPU will signal interrupt when all others are done and have acquired a semaphore ++ #endif ++ ++ #ifdef RPI_SIMULATE_QPUS ++@@ -3308,34 +3504,34 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ ++ #ifdef RPI_MULTI_MAILBOX ++ #ifdef RPI_CACHE_UNIF_MVS ++- gpu_cache_flush3(&s->coeffs_buf_accelerated,&s->y_unif_mvs_ptr, &s->unif_mvs_ptr); +++ gpu_cache_flush3(&s->coeffs_buf_accelerated[job],&s->y_unif_mvs_ptr[job], &s->unif_mvs_ptr[job]); ++ #else ++- gpu_cache_flush(&s->coeffs_buf_accelerated); +++ gpu_cache_flush(&s->coeffs_buf_accelerated[job]); ++ #endif ++- s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0, +++ s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[job][2], s->num_coeffs[job][2] >> 8, s->coeffs_buf_vc[job][3], s->num_coeffs[job][3] >> 10, 0, ++ qpu_get_fn(QPU_MC_SETUP_UV), ++- (uint32_t)(unif_vc+(s->mvs_base[0 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[1 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[2 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[3 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[4 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[5 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[6 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][0 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][1 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][2 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][3 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][4 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][5 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][6 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][7 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), ++ #ifdef RPI_LUMA_QPU ++ qpu_get_fn(QPU_MC_SETUP), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[0 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[1 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[2 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[3 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[4 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[5 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[6 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[7 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[8 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[9 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[10 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[11 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)) +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][0 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][1 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][2 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][3 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][4 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][5 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][6 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][7 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][8 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][9 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][10 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][11 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)) ++ #else ++ 0, ++ 0,0,0,0, ++@@ -3344,17 +3540,17 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ #endif ++ ); ++ for(i=0;i<4;i++) ++- s->num_coeffs[i] = 0; +++ s->num_coeffs[job][i] = 0; ++ #else ++ qpu_run_shader8(qpu_get_fn(QPU_MC_SETUP_UV), ++- (uint32_t)(unif_vc+(s->mvs_base[0 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[1 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[2 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[3 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[4 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[5 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[6 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)) +++ (uint32_t)(unif_vc+(s->mvs_base[job][0 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][1 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][2 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][3 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][4 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][5 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][6 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][7 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)) ++ ); ++ #endif ++ ++@@ -3411,6 +3607,11 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ } ++ } ++ +++#ifdef RPI_WORKER +++ s->pass0_job = 0; +++ s->pass1_job = 0; +++ s->pass2_job = 0; +++#endif ++ #ifdef RPI_INTER_QPU ++ rpi_inter_clear(s); ++ #endif ++@@ -3431,46 +3632,42 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag; ++ ++ more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); +++ ++ #ifdef RPI ++ if (s->enable_rpi) { ++- s->dblk_cmds[s->num_dblk_cmds][0] = x_ctb; ++- s->dblk_cmds[s->num_dblk_cmds++][1] = y_ctb; +++ s->dblk_cmds[s->pass0_job][s->num_dblk_cmds[s->pass0_job]][0] = x_ctb; +++ s->dblk_cmds[s->pass0_job][s->num_dblk_cmds[s->pass0_job]++][1] = y_ctb; ++ if ( (((y_ctb + ctb_size)&63) == 0) && x_ctb + ctb_size >= s->ps.sps->width) { ++- // Transform all blocks ++- // printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); ++-#ifdef RPI_MULTI_MAILBOX ++- // Kick off inter prediction on QPUs ++- rpi_execute_inter_qpu(s); ++- // Perform luma inter prediction ++- rpi_execute_inter_cmds(s); ++-#else ++- rpi_execute_transform(s); ++- // Perform inter prediction ++- rpi_execute_inter_cmds(s); ++-#ifdef RPI_INTER_QPU ++- // Kick off inter prediction on QPUs ++- rpi_execute_inter_qpu(s); ++-#endif ++-#endif ++- ++- // Wait for transform completion ++- vpu_wait(s->vpu_id); ++- ++- // Copy back reconstructed data ++- //memcpy(s->frame->data[0],s->dummy.arm,2048*64); ++- //memcpy(s->frame->data[1],s->dummy.arm,1024*32); ++- //memcpy(s->frame->data[2],s->dummy.arm,1024*32); +++#ifdef RPI_WORKER +++ if (s->used_for_ref) { +++ // Split work load onto separate threads so we make as rapid progress as possible with this frame +++ #ifdef INTER_PASS0 +++ rpi_execute_inter_cmds(s); +++ #endif +++ #ifdef LAUNCH_PASS0 +++ rpi_execute_inter_qpu(s); +++ #endif +++ // Pass on this job to worker thread +++ worker_submit_job(s); +++ // Make sure we have space to prepare the next job +++ worker_pass0_ready(s); ++ ++- // Perform intra prediction and residual reconstruction ++- rpi_execute_pred_cmds(s); ++- // Perform deblocking for CTBs in this row ++- rpi_execute_dblk_cmds(s); +++ // Prepare the next batch of commands ++ #ifdef RPI_INTER_QPU ++- rpi_inter_clear(s); +++ rpi_inter_clear(s); +++#endif +++ } else { +++ // Non-ref frame so do it all on this thread +++ rpi_do_all_passes(s); +++ } +++#else +++ rpi_do_all_passes(s); ++ #endif ++ } ++ } ++ #endif +++ +++ ++ if (more_data < 0) { ++ s->tab_slice_address[ctb_addr_rs] = -1; ++ return more_data; ++@@ -3487,18 +3684,21 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ } ++ ++ #ifdef RPI ++- if (s->enable_rpi && s->num_dblk_cmds) { ++-#ifdef RPI_INTER_QPU ++- rpi_execute_inter_qpu(s); ++-#endif ++-#ifndef RPI_MULTI_MAILBOX ++- rpi_execute_transform(s); +++ +++#ifdef RPI_WORKER +++ // Wait for the worker to finish all its jobs +++ if (s->enable_rpi) { +++ worker_wait(s); +++ av_assert0(s->pass0_job==s->pass1_job); +++ av_assert0(s->pass1_job==s->pass2_job); +++ } ++ #endif ++- rpi_execute_inter_cmds(s); ++- vpu_wait(s->vpu_id); ++- rpi_execute_pred_cmds(s); ++- rpi_execute_dblk_cmds(s); +++ +++ // Finish off any half-completed rows +++ if (s->enable_rpi && s->num_dblk_cmds[s->pass0_job]) { +++ rpi_do_all_passes(s); ++ } +++ ++ #endif ++ ++ if (x_ctb + ctb_size >= s->ps.sps->width && ++@@ -4230,6 +4430,48 @@ fail: ++ return AVERROR(ENOMEM); ++ } ++ +++#ifdef RPI_WORKER +++static av_cold void hevc_init_worker(HEVCContext *s) +++{ +++ int err; +++ pthread_cond_init(&s->worker_cond_head, NULL); +++ pthread_cond_init(&s->worker_cond_middle, NULL); +++ pthread_cond_init(&s->worker_cond_tail, NULL); +++ pthread_mutex_init(&s->worker_mutex, NULL); +++ +++ s->worker_tail=0; +++ s->worker_middle=0; +++ s->worker_head=0; +++ s->kill_worker=0; +++ err = pthread_create(&s->worker_thread, NULL, worker_start, s); +++ err = pthread_create(&s->worker_deblock_thread, NULL, worker_deblock_start, s); +++ if (err) { +++ printf("Failed to create worker thread\n"); +++ exit(-1); +++ } +++} +++ +++static av_cold void hevc_exit_worker(HEVCContext *s) +++{ +++ void *res; +++ s->kill_worker=1; +++ pthread_cond_broadcast(&s->worker_cond_tail); +++ pthread_cond_broadcast(&s->worker_cond_middle); +++ pthread_join(s->worker_thread, &res); +++ pthread_join(s->worker_deblock_thread, &res); +++ +++ pthread_cond_destroy(&s->worker_cond_head); +++ pthread_cond_destroy(&s->worker_cond_middle); +++ pthread_cond_destroy(&s->worker_cond_tail); +++ pthread_mutex_destroy(&s->worker_mutex); +++ +++ s->worker_tail=0; +++ s->worker_middle=0; +++ s->worker_head=0; +++ s->kill_worker=0; +++} +++#endif +++ ++ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ { ++ HEVCContext *s = avctx->priv_data; ++@@ -4242,33 +4484,29 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ av_freep(&s->cabac_state); ++ ++ #ifdef RPI ++- av_freep(&s->unif_mv_cmds); ++- av_freep(&s->univ_pred_cmds); +++ +++#ifdef RPI_WORKER +++ hevc_exit_worker(s); +++#endif +++ +++ for(i=0;iunif_mv_cmds[i]); +++ av_freep(&s->univ_pred_cmds[i]); ++ ++ #ifdef RPI_INTER_QPU ++- if (s->unif_mvs) { ++- gpu_free( &s->unif_mvs_ptr ); ++- s->unif_mvs = 0; ++- } +++ if (s->unif_mvs[i]) { +++ gpu_free( &s->unif_mvs_ptr[i] ); +++ s->unif_mvs[i] = 0; +++ } ++ #endif ++ #ifdef RPI_LUMA_QPU ++- if (s->y_unif_mvs) { ++- gpu_free( &s->y_unif_mvs_ptr ); ++- s->y_unif_mvs = 0; ++- } +++ if (s->y_unif_mvs[i]) { +++ gpu_free( &s->y_unif_mvs_ptr[i] ); +++ s->y_unif_mvs[i] = 0; +++ } ++ #endif ++- ++-#ifdef EARLY_MALLOC ++- printf("hevc_decode_free\n"); ++- if (s->coeffs_buf_arm[0]) { ++- gpu_free(&s->coeffs_buf_default); ++- s->coeffs_buf_arm[0] = 0; ++- } ++- if (s->coeffs_buf_arm[2]) { ++- gpu_free(&s->coeffs_buf_accelerated); ++- s->coeffs_buf_arm[2] = 0; ++ } ++-#endif +++ ++ #endif ++ ++ for (i = 0; i < 3; i++) { ++@@ -4328,6 +4566,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ { ++ HEVCContext *s = avctx->priv_data; ++ int i; +++ int job; ++ ++ s->avctx = avctx; ++ ++@@ -4338,12 +4577,14 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ s->sList[0] = s; ++ ++ #ifdef RPI ++- s->unif_mv_cmds = av_mallocz(sizeof(HEVCMvCmd)*RPI_MAX_MV_CMDS); ++- if (!s->unif_mv_cmds) ++- goto fail; ++- s->univ_pred_cmds = av_mallocz(sizeof(HEVCPredCmd)*RPI_MAX_PRED_CMDS); ++- if (!s->univ_pred_cmds) ++- goto fail; +++ for(job=0;jobunif_mv_cmds[job] = av_mallocz(sizeof(HEVCMvCmd)*RPI_MAX_MV_CMDS); +++ if (!s->unif_mv_cmds[job]) +++ goto fail; +++ s->univ_pred_cmds[job] = av_mallocz(sizeof(HEVCPredCmd)*RPI_MAX_PRED_CMDS); +++ if (!s->univ_pred_cmds[job]) +++ goto fail; +++ } ++ ++ #ifdef RPI_INTER_QPU ++ // We divide the image into blocks 256 wide and 64 high ++@@ -4354,18 +4595,20 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ { ++ int uv_commands_per_qpu = UV_COMMANDS_PER_QPU; ++ uint32_t *p; +++ for(job=0;jobunif_mvs_ptr ); +++ gpu_malloc_cached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr[job] ); ++ #else ++- gpu_malloc_uncached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr ); +++ gpu_malloc_uncached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr[job] ); ++ #endif ++- s->unif_mvs = (uint32_t *) s->unif_mvs_ptr.arm; // TODO support this allocation in non EARLY_MALLOC +++ s->unif_mvs[job] = (uint32_t *) s->unif_mvs_ptr[job].arm; ++ ++- // Set up initial locations for uniform streams ++- p = s->unif_mvs; ++- for(i = 0; i < 8; i++) { ++- s->mvs_base[i] = p; +++ // Set up initial locations for uniform streams +++ p = s->unif_mvs[job]; +++ for(i = 0; i < 8; i++) { +++ s->mvs_base[job][i] = p; ++ p += uv_commands_per_qpu; +++ } ++ } ++ s->mc_filter_uv = qpu_get_fn(QPU_MC_FILTER_UV); ++ s->mc_filter_uv_b0 = qpu_get_fn(QPU_MC_FILTER_UV_B0); ++@@ -4374,61 +4617,35 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ } ++ #endif ++ #ifdef RPI_LUMA_QPU +++ for(job=0;joby_unif_mvs_ptr ); +++ gpu_malloc_cached( 12 * y_commands_per_qpu * sizeof(uint32_t), &s->y_unif_mvs_ptr[job] ); ++ #else ++- gpu_malloc_uncached( 12 * y_commands_per_qpu * sizeof(uint32_t), &s->y_unif_mvs_ptr ); +++ gpu_malloc_uncached( 12 * y_commands_per_qpu * sizeof(uint32_t), &s->y_unif_mvs_ptr[job] ); ++ #endif ++- s->y_unif_mvs = (uint32_t *) s->y_unif_mvs_ptr.arm; // TODO support this allocation in non EARLY_MALLOC +++ s->y_unif_mvs[job] = (uint32_t *) s->y_unif_mvs_ptr[job].arm; ++ ++ // Set up initial locations for uniform streams ++- p = s->y_unif_mvs; +++ p = s->y_unif_mvs[job]; ++ for(i = 0; i < 12; i++) { ++- s->y_mvs_base[i] = p; +++ s->y_mvs_base[job][i] = p; ++ p += y_commands_per_qpu; ++ } ++- s->mc_filter = qpu_get_fn(QPU_MC_FILTER); ++- s->mc_filter_b = qpu_get_fn(QPU_MC_FILTER_B); ++- ++ } +++ s->mc_filter = qpu_get_fn(QPU_MC_FILTER); +++ s->mc_filter_b = qpu_get_fn(QPU_MC_FILTER_B); ++ #endif ++ //gpu_malloc_uncached(2048*64,&s->dummy); ++ ++-#ifdef EARLY_MALLOC ++- { ++- int coeffs_in_ctb = 64*64; ++- int coefs_per_row = (2048/64) * coeffs_in_ctb * 3; // Allow space for chroma ++- s->coeffs_buf_arm[0] = 0; ++- s->coeffs_buf_arm[2] = 0; ++- printf("Allocated %d\n",coefs_per_row); ++- gpu_malloc_cached(sizeof(int16_t) * coefs_per_row, &s->coeffs_buf_default); ++- s->coeffs_buf_arm[0] = (int16_t*) s->coeffs_buf_default.arm; ++- if (!s->coeffs_buf_arm[0]) ++- goto fail; ++- gpu_malloc_cached(sizeof(int16_t) * coefs_per_row * 2, &s->coeffs_buf_accelerated); ++- s->coeffs_buf_arm[2] = (int16_t*) s->coeffs_buf_accelerated.arm; ++- s->coeffs_buf_vc[2] = s->coeffs_buf_accelerated.vc; ++- if (!s->coeffs_buf_arm[2]) ++- goto fail; ++- s->coeffs_buf_arm[3] = coefs_per_row + s->coeffs_buf_arm[2]; ++- s->coeffs_buf_vc[3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[2]; ++- printf("Done\n"); ++-#ifdef RPI_PRECLEAR ++- //memset(s->coeffs_buf_arm[0],0, sizeof(int16_t) * coefs_per_row); ++- memclear16(s->coeffs_buf_arm[0], coefs_per_row); ++- //memset(s->coeffs_buf_arm[2],0, sizeof(int16_t) * coefs_per_row); ++- memclear16(s->coeffs_buf_arm[2], coefs_per_row); ++- //memset(s->coeffs_buf_arm[3],0, sizeof(int16_t) * coefs_per_row); ++- memclear16(s->coeffs_buf_arm[3], coefs_per_row); ++-#endif ++- } ++-#endif ++- ++ s->enable_rpi = 0; ++ +++#ifdef RPI_WORKER +++ hevc_init_worker(s); +++#endif +++ ++ #endif ++ ++ s->cabac_state = av_malloc(HEVC_CONTEXTS); ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 5cb90b5..7bd295a 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -51,6 +51,12 @@ ++ // Define RPI_LUMA_QPU to also use QPU for luma inter prediction ++ #define RPI_LUMA_QPU ++ #endif +++ +++ // By passing jobs to a worker thread we hope to be able to catch up during slow frames +++ #define RPI_MAX_JOBS 2 +++ // Define RPI_WORKER to launch a worker thread for pixel processing tasks +++ #define RPI_WORKER +++ ++ #endif ++ ++ #define MAX_DPB_SIZE 16 // A.4.1 ++@@ -806,6 +812,13 @@ typedef struct HEVCLocalContext { ++ int boundary_flags; ++ } HEVCLocalContext; ++ +++#ifdef RPI_WORKER +++typedef struct HEVCLocalContextIntra { +++ TransformUnit tu; +++ NeighbourAvailable na; +++} HEVCLocalContextIntra; +++#endif +++ ++ #ifdef RPI ++ ++ // RPI_MAX_WIDTH is maximum width in pixels supported by the accelerated code ++@@ -874,7 +887,7 @@ typedef struct HEVCPredCmd { ++ ++ typedef struct HEVCContext { ++ #ifdef RPI ++- int dblk_cmds[RPI_MAX_DEBLOCK_CMDS][2]; +++ int dblk_cmds[RPI_MAX_JOBS][RPI_MAX_DEBLOCK_CMDS][2]; ++ #endif ++ const AVClass *c; // needed by private avoptions ++ AVCodecContext *avctx; ++@@ -883,7 +896,9 @@ typedef struct HEVCContext { ++ ++ HEVCLocalContext *HEVClcList[MAX_NB_THREADS]; ++ HEVCLocalContext *HEVClc; ++- +++#ifdef RPI_WORKER +++ HEVCLocalContextIntra HEVClcIntra; +++#endif ++ uint8_t threads_type; ++ uint8_t threads_number; ++ ++@@ -894,43 +909,60 @@ typedef struct HEVCContext { ++ ++ #ifdef RPI ++ int enable_rpi; ++- HEVCMvCmd *unif_mv_cmds; ++- HEVCPredCmd *univ_pred_cmds; +++ HEVCMvCmd *unif_mv_cmds[RPI_MAX_JOBS]; +++ HEVCPredCmd *univ_pred_cmds[RPI_MAX_JOBS]; ++ int buf_width; ++- GPU_MEM_PTR_T coeffs_buf_default; ++- GPU_MEM_PTR_T coeffs_buf_accelerated; ++- int16_t *coeffs_buf_arm[4]; ++- unsigned int coeffs_buf_vc[4]; ++- int num_coeffs[4]; ++- int num_xfm_cmds; ++- int num_mv_cmds; ++- int num_pred_cmds; ++- int num_dblk_cmds; +++ GPU_MEM_PTR_T coeffs_buf_default[RPI_MAX_JOBS]; +++ GPU_MEM_PTR_T coeffs_buf_accelerated[RPI_MAX_JOBS]; +++ int16_t *coeffs_buf_arm[RPI_MAX_JOBS][4]; +++ unsigned int coeffs_buf_vc[RPI_MAX_JOBS][4]; +++ int num_coeffs[RPI_MAX_JOBS][4]; +++ int num_xfm_cmds[RPI_MAX_JOBS]; +++ int num_mv_cmds[RPI_MAX_JOBS]; +++ int num_pred_cmds[RPI_MAX_JOBS]; +++ int num_dblk_cmds[RPI_MAX_JOBS]; ++ int vpu_id; ++ //GPU_MEM_PTR_T dummy; +++ int pass0_job; // Pass0 does coefficient decode +++ int pass1_job; // Pass1 does pixel processing +++ int pass2_job; // Pass2 does reconstruction and deblocking ++ #ifdef RPI_INTER_QPU ++- GPU_MEM_PTR_T unif_mvs_ptr; ++- uint32_t *unif_mvs; // Base of memory for motion vector commands +++ GPU_MEM_PTR_T unif_mvs_ptr[RPI_MAX_JOBS]; +++ uint32_t *unif_mvs[RPI_MAX_JOBS]; // Base of memory for motion vector commands ++ ++ // _base pointers are to the start of the row ++- uint32_t *mvs_base[8]; +++ uint32_t *mvs_base[RPI_MAX_JOBS][8]; ++ // these pointers are to the next free space ++- uint32_t *u_mvs[8]; +++ uint32_t *u_mvs[RPI_MAX_JOBS][8]; ++ // Function pointers ++ uint32_t mc_filter_uv; ++ uint32_t mc_filter_uv_b0; ++ uint32_t mc_filter_uv_b; ++ #endif ++ #ifdef RPI_LUMA_QPU ++- GPU_MEM_PTR_T y_unif_mvs_ptr; ++- uint32_t *y_unif_mvs; // Base of memory for motion vector commands ++- uint32_t *y_mvs_base[12]; ++- uint32_t *y_mvs[12]; +++ GPU_MEM_PTR_T y_unif_mvs_ptr[RPI_MAX_JOBS]; +++ uint32_t *y_unif_mvs[RPI_MAX_JOBS]; // Base of memory for motion vector commands +++ uint32_t *y_mvs_base[RPI_MAX_JOBS][12]; +++ uint32_t *y_mvs[RPI_MAX_JOBS][12]; ++ // Function pointers ++ uint32_t mc_filter; ++ uint32_t mc_filter_b; ++ #endif ++ +++#ifdef RPI_WORKER +++ pthread_t worker_thread; +++ pthread_t worker_deblock_thread; +++ pthread_cond_t worker_cond_head; +++ pthread_cond_t worker_cond_tail; +++ pthread_cond_t worker_cond_middle; +++ pthread_mutex_t worker_mutex; +++ +++ int worker_tail; // Contains the number of posted jobs +++ int worker_head; // Contains the number of completed jobs +++ int worker_middle; // Contains the number of completed jobs +++ int kill_worker; // set to 1 to terminate the worker +++#endif +++ ++ #endif ++ ++ uint8_t *cabac_state; ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index 38f53de..f0982cd 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1051,11 +1051,11 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ if (s->enable_rpi) { ++ int n = trafo_size * trafo_size; ++ if (use_vpu) { ++- coeffs = s->coeffs_buf_arm[log2_trafo_size - 2] + s->num_coeffs[log2_trafo_size - 2]; ++- s->num_coeffs[log2_trafo_size - 2] += n; +++ coeffs = s->coeffs_buf_arm[s->pass0_job][log2_trafo_size - 2] + s->num_coeffs[s->pass0_job][log2_trafo_size - 2]; +++ s->num_coeffs[s->pass0_job][log2_trafo_size - 2] += n; ++ } else { ++- coeffs = s->coeffs_buf_arm[0] + s->num_coeffs[0]; ++- s->num_coeffs[0] += n; +++ coeffs = s->coeffs_buf_arm[s->pass0_job][0] + s->num_coeffs[s->pass0_job][0]; +++ s->num_coeffs[s->pass0_job][0] += n; ++ } ++ } ++ // We now do the memset after transform_add while we know the data is cached. ++@@ -1508,7 +1508,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ s->hevcdsp.transform_rdpcm(coeffs, log2_trafo_size, mode); ++ } ++ } else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 && log2_trafo_size == 2) { ++- s->hevcdsp.idct_4x4_luma(coeffs); +++ s->hevcdsp.idct_4x4_luma(coeffs); ++ } else { ++ #ifdef RPI ++ if (!use_vpu) { ++@@ -1553,7 +1553,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ } ++ #ifdef RPI ++ if (s->enable_rpi) { ++- HEVCPredCmd *cmd = s->univ_pred_cmds + s->num_pred_cmds++; +++ HEVCPredCmd *cmd = s->univ_pred_cmds[s->pass0_job] + s->num_pred_cmds[s->pass0_job]++; ++ cmd->type = RPI_PRED_TRANSFORM_ADD; ++ cmd->size = log2_trafo_size; ++ cmd->buf = coeffs; ++diff --git a/libavcodec/hevcpred_template.c b/libavcodec/hevcpred_template.c ++index 71c6d52..344e021 100644 ++--- a/libavcodec/hevcpred_template.c +++++ b/libavcodec/hevcpred_template.c ++@@ -71,8 +71,11 @@ do { \ ++ AV_WN4P(&ptr[i], a); \ ++ else \ ++ a = PIXEL_SPLAT_X4(ptr[i + 3]) ++- +++#ifdef RPI_WORKER +++ HEVCLocalContextIntra *lc = &s->HEVClcIntra; +++#else ++ HEVCLocalContext *lc = s->HEVClc; +++#endif ++ int i; ++ int hshift = s->ps.sps->hshift[c_idx]; ++ int vshift = s->ps.sps->vshift[c_idx]; ++-- ++2.7.4 ++ ++ ++From 1e0885f8d98175777fff65b4cedd708176c2abcf Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 3 Jun 2015 13:43:48 +0100 ++Subject: [PATCH 54/68] Avoid lockup bug with RPI_WORKER enabled ++ ++--- ++ libavcodec/hevc.c | 22 +++++++++++----------- ++ libavcodec/hevc_cabac.c | 1 - ++ 2 files changed, 11 insertions(+), 12 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 12aacc5..182a82f 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -133,11 +133,11 @@ static uint32_t get_vc_address(AVBufferRef *bref) { ++ static void worker_submit_job(HEVCContext *s) ++ { ++ LOG_ENTER ++- //pthread_mutex_lock(&s->worker_mutex); ++- s->worker_tail++; // This is the only place that can change tail so we do not need the mutex +++ pthread_mutex_lock(&s->worker_mutex); +++ s->worker_tail++; ++ s->pass0_job = (s->pass0_job + 1) % RPI_MAX_JOBS; // Move onto the next slot ++ pthread_cond_broadcast(&s->worker_cond_tail); // Let people know that the tail has moved ++- //pthread_mutex_unlock(&s->worker_mutex); +++ pthread_mutex_unlock(&s->worker_mutex); ++ LOG_EXIT ++ } ++ ++@@ -145,11 +145,11 @@ static void worker_submit_job(HEVCContext *s) ++ static void worker_complete_middle_job(HEVCContext *s) ++ { ++ LOG_ENTER ++- //pthread_mutex_lock(&s->worker_mutex); ++- s->worker_middle++; // This is the only place that can change head so we do not need the mutex +++ pthread_mutex_lock(&s->worker_mutex); +++ s->worker_middle++; ++ s->pass1_job = (s->pass1_job + 1) % RPI_MAX_JOBS; // Move onto the next slot ++- pthread_cond_broadcast(&s->worker_cond_middle); // Let people know that the tail has moved ++- //pthread_mutex_unlock(&s->worker_mutex); +++ pthread_cond_broadcast(&s->worker_cond_middle); // Let people know that the middle has moved +++ pthread_mutex_unlock(&s->worker_mutex); ++ LOG_EXIT ++ } ++ ++@@ -157,11 +157,11 @@ static void worker_complete_middle_job(HEVCContext *s) ++ static void worker_complete_job(HEVCContext *s) ++ { ++ LOG_ENTER ++- //pthread_mutex_lock(&s->worker_mutex); ++- s->worker_head++; // This is the only place that can change head so we do not need the mutex +++ pthread_mutex_lock(&s->worker_mutex); +++ s->worker_head++; ++ s->pass2_job = (s->pass2_job + 1) % RPI_MAX_JOBS; // Move onto the next slot ++- pthread_cond_broadcast(&s->worker_cond_head); // Let people know that the tail has moved ++- //pthread_mutex_unlock(&s->worker_mutex); +++ pthread_cond_broadcast(&s->worker_cond_head); // Let people know that the head has moved +++ pthread_mutex_unlock(&s->worker_mutex); ++ LOG_EXIT ++ } ++ ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index f0982cd..6523e66 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1497,7 +1497,6 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ for (i = 0; i < 8; i++) ++ FFSWAP(int16_t, coeffs[i], coeffs[16 - i - 1]); ++ } ++- ++ s->hevcdsp.transform_skip(coeffs, log2_trafo_size); ++ ++ if (explicit_rdpcm_flag || (s->ps.sps->implicit_rdpcm_enabled_flag && ++-- ++2.7.4 ++ ++ ++From 1d7ad81069dec6914ec7e9983855d7a1b5e4b123 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 3 Jun 2015 15:37:19 +0100 ++Subject: [PATCH 55/68] Added code to flush buffers at start of frame ++ ++--- ++ libavcodec/hevc.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ ++ 1 file changed, 72 insertions(+) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 182a82f..e5b9f1e 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -43,6 +43,7 @@ ++ ++ #ifdef RPI ++ #include "rpi_qpu.h" +++ #include "rpi_user_vcsm.h" ++ // Move Inter prediction into separate pass ++ #define RPI_INTER ++ ++@@ -3508,6 +3509,7 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ #else ++ gpu_cache_flush(&s->coeffs_buf_accelerated[job]); ++ #endif +++ ++ s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[job][2], s->num_coeffs[job][2] >> 8, s->coeffs_buf_vc[job][3], s->num_coeffs[job][3] >> 10, 0, ++ qpu_get_fn(QPU_MC_SETUP_UV), ++ (uint32_t)(unif_vc+(s->mvs_base[job][0 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), ++@@ -3558,6 +3560,71 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ } ++ #endif ++ +++#ifdef RPI +++ +++static void flush_buffer(AVBufferRef *bref) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); +++ gpu_cache_flush(p); +++} +++ +++static void flush_frame(HEVCContext *s,AVFrame *frame) +++{ +++#if 1 +++ struct vcsm_user_clean_invalid_s iocache = {}; +++ int n = s->ps.sps->height; +++ int curr_y = 0; +++ int curr_uv = 0; +++ int n_uv = n >> s->ps.sps->vshift[1]; +++ int sz,base; +++ sz = s->frame->linesize[1] * (n_uv-curr_uv); +++ base = s->frame->linesize[1] * curr_uv; +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); +++ iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].cmd = 3; // clean+invalidate +++ iocache.s[0].addr = p->arm + base; +++ iocache.s[0].size = sz; +++ p = av_buffer_pool_opaque(frame->buf[2]); +++ iocache.s[1].handle = p->vcsm_handle; +++ iocache.s[1].cmd = 3; // clean+invalidate +++ iocache.s[1].addr = p->arm + base; +++ iocache.s[1].size = sz; +++ p = av_buffer_pool_opaque(frame->buf[0]); +++ sz = s->frame->linesize[0] * (n-curr_y); +++ base = s->frame->linesize[0] * curr_y; +++ iocache.s[2].handle = p->vcsm_handle; +++ iocache.s[2].cmd = 3; // clean+invalidate +++ iocache.s[2].addr = p->arm + base; +++ iocache.s[2].size = sz; +++ vcsm_clean_invalid( &iocache ); +++#else +++ flush_buffer(frame->buf[0]); +++ flush_buffer(frame->buf[1]); +++ flush_buffer(frame->buf[2]); +++#endif +++} +++ +++static void flush_all(HEVCContext *s) +++{ +++#if 0 +++ struct vcsm_user_clean_invalid_s iocache = {}; +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(s->frame->buf[0]); +++ iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].cmd = 4; // Flush all +++ iocache.s[0].addr = p->arm; +++ iocache.s[0].size = 4096; +++ vcsm_clean_invalid( &iocache ); +++#else +++ int i,k; +++ for(i=0;i<2;i++) { +++ for (k = 0; k < s->sh.nb_refs[i]; k++) { +++ flush_frame(s,s->ref->refPicList[i].ref[k]->frame); +++ } +++ } +++ flush_frame(s,s->frame); +++#endif +++} +++#endif +++ ++ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ { ++ HEVCContext *s = avctxt->priv_data; ++@@ -3592,8 +3659,12 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ printf("Weighted B slice\n"); ++ } ++ +++ // Now flush all reference frames and our destination frame to get everything ready for decode +++ flush_all(s); ++ #endif ++ +++ //printf("L0=%d L1=%d\n",s->sh.nb_refs[L1],s->sh.nb_refs[L1]); +++ ++ if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) { ++ av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n"); ++ return AVERROR_INVALIDDATA; ++@@ -3664,6 +3735,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ rpi_do_all_passes(s); ++ #endif ++ } +++ ++ } ++ #endif ++ ++-- ++2.7.4 ++ ++ ++From 7a57f233dcd4048e20a0b5bc06bc20abb589d3fa Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 3 Jun 2015 16:42:24 +0100 ++Subject: [PATCH 56/68] Reduce the amount that needs to be flushed ++ ++--- ++ libavcodec/hevc.c | 35 +++++++++++------------------------ ++ 1 file changed, 11 insertions(+), 24 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index e5b9f1e..73d7f74 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -3569,7 +3569,7 @@ static void flush_buffer(AVBufferRef *bref) { ++ ++ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ { ++-#if 1 +++#ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; ++ int n = s->ps.sps->height; ++ int curr_y = 0; ++@@ -3603,26 +3603,6 @@ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ #endif ++ } ++ ++-static void flush_all(HEVCContext *s) ++-{ ++-#if 0 ++- struct vcsm_user_clean_invalid_s iocache = {}; ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(s->frame->buf[0]); ++- iocache.s[0].handle = p->vcsm_handle; ++- iocache.s[0].cmd = 4; // Flush all ++- iocache.s[0].addr = p->arm; ++- iocache.s[0].size = 4096; ++- vcsm_clean_invalid( &iocache ); ++-#else ++- int i,k; ++- for(i=0;i<2;i++) { ++- for (k = 0; k < s->sh.nb_refs[i]; k++) { ++- flush_frame(s,s->ref->refPicList[i].ref[k]->frame); ++- } ++- } ++- flush_frame(s,s->frame); ++-#endif ++-} ++ #endif ++ ++ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++@@ -3658,9 +3638,6 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ if (s->ps.pps->weighted_bipred_flag && s->sh.slice_type == B_SLICE) ++ printf("Weighted B slice\n"); ++ } ++- ++- // Now flush all reference frames and our destination frame to get everything ready for decode ++- flush_all(s); ++ #endif ++ ++ //printf("L0=%d L1=%d\n",s->sh.nb_refs[L1],s->sh.nb_refs[L1]); ++@@ -4130,6 +4107,11 @@ static int hevc_frame_start(HEVCContext *s) ++ if (!s->avctx->hwaccel) ++ ff_thread_finish_setup(s->avctx); ++ +++#ifdef RPI_INTER_QPU +++ // Invalidate the output data buffer so it is ready for the QPUs to write into it. +++ flush_frame(s,s->frame); +++#endif +++ ++ return 0; ++ ++ fail: ++@@ -4331,6 +4313,11 @@ fail: ++ ff_hevc_flush_buffer(s, &s->ref->tf, s->ps.sps->height); ++ #endif ++ ff_thread_report_progress(&s->ref->tf, INT_MAX, 0); +++ } else if (s->ref) { +++#ifdef RPI_INTER_QPU +++ // When running single threaded we need to flush the whole frame +++ flush_frame(s,s->frame); +++#endif ++ } ++ return ret; ++ } ++-- ++2.7.4 ++ ++ ++From 26eba8e3266cc5f2120e8284a1ce486d6a402010 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 4 Jun 2015 07:59:28 +0100 ++Subject: [PATCH 57/68] Corrected support for disabled rpi when using ++ RPI_WORKER ++ ++--- ++ libavcodec/hevc.h | 18 ++++++++++-------- ++ libavcodec/hevcpred_template.c | 2 +- ++ 2 files changed, 11 insertions(+), 9 deletions(-) ++ ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 7bd295a..3cb34bd 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -769,7 +769,17 @@ typedef struct HEVCFrame { ++ uint8_t flags; ++ } HEVCFrame; ++ +++#ifdef RPI_WORKER +++typedef struct HEVCLocalContextIntra { +++ TransformUnit tu; +++ NeighbourAvailable na; +++} HEVCLocalContextIntra; +++#endif +++ ++ typedef struct HEVCLocalContext { +++ TransformUnit tu; +++ NeighbourAvailable na; // WARNING tu and na must be the first two fields to match HEVCLocalContextIntra +++ ++ uint8_t cabac_state[HEVC_CONTEXTS]; ++ ++ uint8_t stat_coeff[4]; ++@@ -784,7 +794,6 @@ typedef struct HEVCLocalContext { ++ ++ int qPy_pred; ++ ++- TransformUnit tu; ++ ++ uint8_t ctb_left_flag; ++ uint8_t ctb_up_flag; ++@@ -801,7 +810,6 @@ typedef struct HEVCLocalContext { ++ int ct_depth; ++ CodingUnit cu; ++ PredictionUnit pu; ++- NeighbourAvailable na; ++ ++ #define BOUNDARY_LEFT_SLICE (1 << 0) ++ #define BOUNDARY_LEFT_TILE (1 << 1) ++@@ -812,12 +820,6 @@ typedef struct HEVCLocalContext { ++ int boundary_flags; ++ } HEVCLocalContext; ++ ++-#ifdef RPI_WORKER ++-typedef struct HEVCLocalContextIntra { ++- TransformUnit tu; ++- NeighbourAvailable na; ++-} HEVCLocalContextIntra; ++-#endif ++ ++ #ifdef RPI ++ ++diff --git a/libavcodec/hevcpred_template.c b/libavcodec/hevcpred_template.c ++index 344e021..325b60e 100644 ++--- a/libavcodec/hevcpred_template.c +++++ b/libavcodec/hevcpred_template.c ++@@ -72,7 +72,7 @@ do { \ ++ else \ ++ a = PIXEL_SPLAT_X4(ptr[i + 3]) ++ #ifdef RPI_WORKER ++- HEVCLocalContextIntra *lc = &s->HEVClcIntra; +++ HEVCLocalContextIntra *lc = s->enable_rpi ? &s->HEVClcIntra : (HEVCLocalContextIntra *)s->HEVClc ; ++ #else ++ HEVCLocalContext *lc = s->HEVClc; ++ #endif ++-- ++2.7.4 ++ ++ ++From 5b3eee9be88a5326df7621de95095def969e05a8 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 4 Jun 2015 11:52:55 +0100 ++Subject: [PATCH 58/68] Draft support for tiles ++ ++--- ++ libavcodec/hevc.c | 140 +++++++++++++++++++++++------------------ ++ libavcodec/hevc.h | 21 +++++-- ++ libavcodec/hevc_filter.c | 2 +- ++ libavcodec/hevcpred_template.c | 2 +- ++ 4 files changed, 99 insertions(+), 66 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 73d7f74..ec67252 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -63,10 +63,10 @@ ++ ++ static void rpi_execute_dblk_cmds(HEVCContext *s); ++ static void rpi_execute_transform(HEVCContext *s); ++- static void rpi_execute_inter_qpu(HEVCContext *s); +++ static void rpi_launch_vpu_qpu(HEVCContext *s); ++ static void rpi_execute_pred_cmds(HEVCContext *s); ++ static void rpi_execute_inter_cmds(HEVCContext *s); ++- static void rpi_inter_clear(HEVCContext *s); +++ static void rpi_begin(HEVCContext *s); ++ ++ // Define INTER_PASS0 to do inter prediction in first pass ++ //#define INTER_PASS0 ++@@ -90,16 +90,18 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ ++ #ifdef RPI_INTER_QPU ++ +++// Each luma QPU processes 2*RPI_NUM_CHUNKS 64x64 blocks +++// Each chroma QPU processes 3*RPI_NUM_CHUNKS 64x64 blocks, but requires two commands for B blocks +++// For each block of 64*64 the smallest block size is 8x4 +++// We also need an extra command for the setup information +++ ++ #define RPI_CHROMA_COMMAND_WORDS 12 ++-#define UV_COMMANDS_PER_QPU ((1 + (256*64*2)/(4*4)) * RPI_CHROMA_COMMAND_WORDS) +++#define UV_COMMANDS_PER_QPU ((1 + 3*RPI_NUM_CHUNKS*(64*64)*2/(8*4)) * RPI_CHROMA_COMMAND_WORDS) ++ // The QPU code for UV blocks only works up to a block width of 8 ++ #define RPI_CHROMA_BLOCK_WIDTH 8 ++ ++-// Split image of 2048 into parts 64 wide ++-// So some QPUs will have 3 blocks of 64 to do, and others 2 blocks for an image 2048 wide with 32 blocks across ++-// For each block of 64*64 the smallest block size is 8x4 ++ #define RPI_LUMA_COMMAND_WORDS 9 ++-#define Y_COMMANDS_PER_QPU ((1+3*(64*64)/(8*4)) * RPI_LUMA_COMMAND_WORDS) +++#define Y_COMMANDS_PER_QPU ((1+2*RPI_NUM_CHUNKS*(64*64)/(8*4)) * RPI_LUMA_COMMAND_WORDS) ++ ++ #define ENCODE_COEFFS(c0, c1, c2, c3) (((c0) & 0xff) | ((c1) & 0xff) << 8 | ((c2) & 0xff) << 16 | ((c3) & 0xff) << 24) ++ ++@@ -216,7 +218,7 @@ static void *worker_start(void *arg) ++ LOG_ENTER ++ // printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); ++ #ifndef LAUNCH_PASS0 ++- rpi_execute_inter_qpu(s); +++ rpi_launch_vpu_qpu(s); ++ #endif ++ #ifndef INTER_PASS0 ++ // Perform inter prediction ++@@ -322,9 +324,14 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ ++ #ifdef RPI ++ av_assert0(sps); ++- int coeffs_in_ctb = (1 << sps->log2_ctb_size) * (1 << sps->log2_ctb_size); ++- int coefs_per_row = sps->ctb_width * coeffs_in_ctb * 3; // Allow space for chroma +++ int coefs_in_ctb = (1 << sps->log2_ctb_size) * (1 << sps->log2_ctb_size); +++ int coefs_per_luma = 64*64*24*RPI_NUM_CHUNKS; +++ int coefs_per_chroma = (coefs_per_luma * 2) >> sps->vshift[1] >> sps->hshift[1]; +++ int coefs_per_row = coefs_per_luma + coefs_per_chroma; ++ int job; +++ s->max_ctu_count = coefs_per_luma / coefs_in_ctb; +++ s->ctu_per_y_chan = s->max_ctu_count / 12; +++ s->ctu_per_uv_chan = s->max_ctu_count / 8; ++ for(job=0;jobx >> 2); ++ int y1 = y0 + (mv->y >> 2); ++- int chan = x0>>6; // 64 wide blocks per QPU ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++- uint32_t *y = s->y_mvs[s->pass0_job][chan % 12]; +++ uint32_t *y = s->curr_y_mvs; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=16) { ++ int bw = nPbW-start_x; ++@@ -2209,7 +2215,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter; ++ } ++ } ++- s->y_mvs[s->pass0_job][chan % 12] = y; +++ s->curr_y_mvs = y; ++ } else ++ #endif ++ { ++@@ -2233,12 +2239,10 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ ++ int x1_c = x0_c + (mv->x >> (2 + hshift)); ++ int y1_c = y0_c + (mv->y >> (2 + hshift)); ++- //int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width ++- int chan = x0>>8; ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++ ++- uint32_t *u = s->u_mvs[s->pass0_job][chan & 7]; +++ uint32_t *u = s->curr_u_mvs; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ int bw = nPbW_c-start_x; ++@@ -2262,7 +2266,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++- s->u_mvs[s->pass0_job][chan & 7] = u; +++ s->curr_u_mvs = u; ++ return; ++ } ++ #endif ++@@ -2289,10 +2293,9 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int my2_mx2_my_mx = (my_mx << 16) + my_mx; ++ int x1 = x0 + (mv->x >> 2); ++ int y1 = y0 + (mv->y >> 2); ++- int chan = x0>>6; // 64 wide blocks per QPU ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++- uint32_t *y = s->y_mvs[s->pass0_job][chan % 12]; +++ uint32_t *y = s->curr_y_mvs; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=16) { ++ int bw = nPbW-start_x; ++@@ -2312,7 +2315,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter; ++ } ++ } ++- s->y_mvs[s->pass0_job][chan % 12] = y; +++ s->curr_y_mvs = y; ++ } else ++ #endif ++ ++@@ -2337,12 +2340,10 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ ++ int x1_c = x0_c + (mv->x >> (2 + hshift)); ++ int y1_c = y0_c + (mv->y >> (2 + hshift)); ++- //int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width ++- int chan = x0>>8; ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++ ++- uint32_t *u = s->u_mvs[s->pass0_job][chan & 7]; +++ uint32_t *u = s->curr_u_mvs; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ int bw = nPbW_c-start_x; ++@@ -2367,7 +2368,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++- s->u_mvs[s->pass0_job][chan & 7] = u; +++ s->curr_u_mvs = u; ++ return; ++ } ++ #endif ++@@ -2400,8 +2401,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int y1 = y0 + (mv->y >> 2); ++ int x2 = x0 + (mv2->x >> 2); ++ int y2 = y0 + (mv2->y >> 2); ++- int chan = x0>>6; // 64 wide blocks per QPU ++- uint32_t *y = s->y_mvs[s->pass0_job][chan % 12]; +++ uint32_t *y = s->curr_y_mvs; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=8) { // B blocks work 8 at a time ++ int bw = nPbW-start_x; ++@@ -2417,7 +2417,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter_b; ++ } ++ } ++- s->y_mvs[s->pass0_job][chan % 12] = y; +++ s->curr_y_mvs = y; ++ } else ++ #endif ++ { ++@@ -2448,9 +2448,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int x2_c = x0_c + (mv2->x >> (2 + hshift)); ++ int y2_c = y0_c + (mv2->y >> (2 + hshift)); ++ ++- int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width ++ ++- uint32_t *u = s->u_mvs[s->pass0_job][chan & 7]; +++ uint32_t *u = s->curr_u_mvs; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ int bw = nPbW_c-start_x; ++@@ -2479,7 +2478,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++- s->u_mvs[s->pass0_job][chan & 7] = u; +++ s->curr_u_mvs = u; ++ return; ++ } ++ #endif ++@@ -3114,12 +3113,8 @@ static void rpi_execute_inter_cmds(HEVCContext *s) ++ ++ static void rpi_do_all_passes(HEVCContext *s) ++ { ++-#ifdef RPI_INTER_QPU ++- // Kick off inter prediction on QPUs ++- rpi_execute_inter_qpu(s); ++-#else ++- rpi_execute_transform(s); ++-#endif +++ // Kick off QPUs and VPUs +++ rpi_launch_vpu_qpu(s); ++ // Perform luma inter prediction ++ rpi_execute_inter_cmds(s); ++ // Wait for transform completion ++@@ -3128,18 +3123,18 @@ static void rpi_do_all_passes(HEVCContext *s) ++ rpi_execute_pred_cmds(s); ++ // Perform deblocking for CTBs in this row ++ rpi_execute_dblk_cmds(s); ++-#ifdef RPI_INTER_QPU ++- rpi_inter_clear(s); ++-#endif +++ // Prepare next batch +++ rpi_begin(s); ++ } ++ ++ #endif ++ ++-#ifdef RPI_INTER_QPU ++-static void rpi_inter_clear(HEVCContext *s) +++#ifdef RPI +++static void rpi_begin(HEVCContext *s) ++ { ++ int job = s->pass0_job; ++ int i; +++#ifdef RPI_INTER_QPU ++ int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1]; ++ int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1]; ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++@@ -3165,6 +3160,8 @@ static void rpi_inter_clear(HEVCContext *s) ++ } ++ *s->u_mvs[job][i]++ = i; // Select section of VPM (avoid collisions with 3d unit) ++ } +++ s->curr_u_mvs = s->u_mvs[job][0]; +++#endif ++ ++ #ifdef RPI_LUMA_QPU ++ for(i=0;i<12;i++) { ++@@ -3187,8 +3184,11 @@ static void rpi_inter_clear(HEVCContext *s) ++ } ++ *s->y_mvs[job][i]++ = 0; // Next kernel ++ } +++ s->curr_y_mvs = s->y_mvs[job][0]; ++ #endif +++ s->ctu_count = 0; ++ } +++#endif ++ ++ #ifdef RPI_SIMULATE_QPUS ++ ++@@ -3459,8 +3459,9 @@ static void rpi_simulate_inter_qpu(HEVCContext *s) ++ ++ #endif ++ +++#ifdef RPI_INTER_QPU ++ ++-static void rpi_execute_inter_qpu(HEVCContext *s) +++static void rpi_launch_vpu_qpu(HEVCContext *s) ++ { ++ int k; ++ #ifdef LAUNCH_PASS0 ++@@ -3558,6 +3559,15 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ ++ ++ } +++#else +++ +++#ifdef RPI +++static void rpi_launch_vpu_qpu(HEVCContext *s) +++{ +++ rpi_execute_transform(s); +++} +++#endif +++ ++ #endif ++ ++ #ifdef RPI ++@@ -3617,29 +3627,20 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ #ifdef RPI ++ #ifdef RPI_INTER_QPU ++ s->enable_rpi = s->ps.sps->bit_depth == 8 ++- && s->ps.sps->width <= RPI_MAX_WIDTH ++ && !s->ps.pps->cross_component_prediction_enabled_flag ++- && s->ps.pps->num_tile_rows <= 1 && s->ps.pps->num_tile_columns <= 1 ++ && !(s->ps.pps->weighted_bipred_flag && s->sh.slice_type == B_SLICE); ++ #else ++ s->enable_rpi = s->ps.sps->bit_depth == 8 ++- && s->ps.sps->width <= RPI_MAX_WIDTH ++- && !s->ps.pps->cross_component_prediction_enabled_flag ++- && s->ps.pps->num_tile_rows <= 1 && s->ps.pps->num_tile_columns <= 1; +++ && !s->ps.pps->cross_component_prediction_enabled_flag; ++ #endif ++ ++ if (!s->enable_rpi) { ++ if (s->ps.pps->cross_component_prediction_enabled_flag) ++ printf("Cross component\n"); ++- if (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1) ++- printf("Tiles\n"); ++- if (s->ps.pps->weighted_pred_flag && s->sh.slice_type == P_SLICE) ++- printf("Weighted P slice\n"); ++ if (s->ps.pps->weighted_bipred_flag && s->sh.slice_type == B_SLICE) ++ printf("Weighted B slice\n"); ++ } ++ #endif ++- ++ //printf("L0=%d L1=%d\n",s->sh.nb_refs[L1],s->sh.nb_refs[L1]); ++ ++ if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) { ++@@ -3660,8 +3661,8 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ s->pass1_job = 0; ++ s->pass2_job = 0; ++ #endif ++-#ifdef RPI_INTER_QPU ++- rpi_inter_clear(s); +++#ifdef RPI +++ rpi_begin(s); ++ #endif ++ ++ while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) { ++@@ -3679,13 +3680,34 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ s->deblock[ctb_addr_rs].tc_offset = s->sh.tc_offset; ++ s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag; ++ +++#ifdef RPI_INTER_QPU +++ s->curr_u_mvs = s->u_mvs[s->pass0_job][s->ctu_count / s->ctu_per_uv_chan]; +++#endif +++#ifdef RPI_LUMA_QPU +++ s->curr_y_mvs = s->y_mvs[s->pass0_job][s->ctu_count / s->ctu_per_y_chan]; +++#endif +++ ++ more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); ++ +++#ifdef RPI_INTER_QPU +++ s->u_mvs[s->pass0_job][s->ctu_count / s->ctu_per_uv_chan] = s->curr_u_mvs; +++#endif +++#ifdef RPI_LUMA_QPU +++ s->y_mvs[s->pass0_job][s->ctu_count / s->ctu_per_y_chan] = s->curr_y_mvs; +++#endif +++ ++ #ifdef RPI ++ if (s->enable_rpi) { +++ //av_assert0(s->num_dblk_cmds[s->pass0_job]>=0); +++ //av_assert0(s->num_dblk_cmds[s->pass0_job]pass0_jobpass0_job>=0); ++ s->dblk_cmds[s->pass0_job][s->num_dblk_cmds[s->pass0_job]][0] = x_ctb; ++ s->dblk_cmds[s->pass0_job][s->num_dblk_cmds[s->pass0_job]++][1] = y_ctb; ++- if ( (((y_ctb + ctb_size)&63) == 0) && x_ctb + ctb_size >= s->ps.sps->width) { +++ s->ctu_count++; +++ //printf("%d %d/%d job=%d\n",s->ctu_count,s->num_dblk_cmds[s->pass0_job],RPI_MAX_DEBLOCK_CMDS,s->pass0_job); +++ +++ if ( s->ctu_count >= s->max_ctu_count ) { ++ #ifdef RPI_WORKER ++ if (s->used_for_ref) { ++ // Split work load onto separate threads so we make as rapid progress as possible with this frame ++@@ -3693,7 +3715,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ rpi_execute_inter_cmds(s); ++ #endif ++ #ifdef LAUNCH_PASS0 ++- rpi_execute_inter_qpu(s); +++ rpi_launch_vpu_qpu(s); ++ #endif ++ // Pass on this job to worker thread ++ worker_submit_job(s); ++@@ -3701,9 +3723,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ worker_pass0_ready(s); ++ ++ // Prepare the next batch of commands ++-#ifdef RPI_INTER_QPU ++- rpi_inter_clear(s); ++-#endif +++ rpi_begin(s); ++ } else { ++ // Non-ref frame so do it all on this thread ++ rpi_do_all_passes(s); ++@@ -3744,7 +3764,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ #endif ++ ++ // Finish off any half-completed rows ++- if (s->enable_rpi && s->num_dblk_cmds[s->pass0_job]) { +++ if (s->enable_rpi && s->ctu_count) { ++ rpi_do_all_passes(s); ++ } ++ ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 3cb34bd..a141316 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -823,8 +823,15 @@ typedef struct HEVCLocalContext { ++ ++ #ifdef RPI ++ +++// The processing is done in chunks +++// Each chunk corresponds to 24 64x64 luma blocks (24 so it is divisible by 8 for chroma and 12 for luma) +++// This is a distance of 1536 pixels across the screen +++// Increasing RPI_NUM_CHUNKS will reduce time spent activating QPUs and cache flushing, +++// but allocate more memory and increase the latency before data in the next frame can be processed +++#define RPI_NUM_CHUNKS 1 +++ ++ // RPI_MAX_WIDTH is maximum width in pixels supported by the accelerated code ++-#define RPI_MAX_WIDTH 2048 +++#define RPI_MAX_WIDTH (RPI_NUM_CHUNKS*64*24) ++ ++ // Worst case is for 4:4:4 4x4 blocks with 64 high coding tree blocks, so 16 MV cmds per 4 pixels across for each colour plane, * 2 for bi ++ #define RPI_MAX_MV_CMDS (2*16*3*(RPI_MAX_WIDTH/4)) ++@@ -888,9 +895,6 @@ typedef struct HEVCPredCmd { ++ #endif ++ ++ typedef struct HEVCContext { ++-#ifdef RPI ++- int dblk_cmds[RPI_MAX_JOBS][RPI_MAX_DEBLOCK_CMDS][2]; ++-#endif ++ const AVClass *c; // needed by private avoptions ++ AVCodecContext *avctx; ++ ++@@ -928,6 +932,10 @@ typedef struct HEVCContext { ++ int pass0_job; // Pass0 does coefficient decode ++ int pass1_job; // Pass1 does pixel processing ++ int pass2_job; // Pass2 does reconstruction and deblocking +++ int ctu_count; // Number of CTUs done in pass0 so far +++ int max_ctu_count; // Number of CTUs when we trigger a round of processing +++ int ctu_per_y_chan; // Number of CTUs per luma QPU +++ int ctu_per_uv_chan; // Number of CTUs per chroma QPU ++ #ifdef RPI_INTER_QPU ++ GPU_MEM_PTR_T unif_mvs_ptr[RPI_MAX_JOBS]; ++ uint32_t *unif_mvs[RPI_MAX_JOBS]; // Base of memory for motion vector commands ++@@ -936,6 +944,7 @@ typedef struct HEVCContext { ++ uint32_t *mvs_base[RPI_MAX_JOBS][8]; ++ // these pointers are to the next free space ++ uint32_t *u_mvs[RPI_MAX_JOBS][8]; +++ uint32_t *curr_u_mvs; // Current uniform stream to use for chroma ++ // Function pointers ++ uint32_t mc_filter_uv; ++ uint32_t mc_filter_uv_b0; ++@@ -946,6 +955,7 @@ typedef struct HEVCContext { ++ uint32_t *y_unif_mvs[RPI_MAX_JOBS]; // Base of memory for motion vector commands ++ uint32_t *y_mvs_base[RPI_MAX_JOBS][12]; ++ uint32_t *y_mvs[RPI_MAX_JOBS][12]; +++ uint32_t *curr_y_mvs; // Current uniform stream for luma ++ // Function pointers ++ uint32_t mc_filter; ++ uint32_t mc_filter_b; ++@@ -1084,6 +1094,9 @@ typedef struct HEVCContext { ++ uint32_t max_mastering_luminance; ++ uint32_t min_mastering_luminance; ++ +++#ifdef RPI +++ int dblk_cmds[RPI_MAX_JOBS][RPI_MAX_DEBLOCK_CMDS][2]; +++#endif ++ } HEVCContext; ++ ++ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index b286bbf..1f04790 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -891,7 +891,7 @@ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ int n_uv = n >> s->ps.sps->vshift[1]; ++ int sz,base; ++ if (curr_uv < 0) curr_uv = 0; ++- if (n_uv<=curr_uv) { assert(0); return; } // Should not happen +++ if (n_uv<=curr_uv) { return; } ++ sz = s->frame->linesize[1] * (n_uv-curr_uv); ++ base = s->frame->linesize[1] * curr_uv; ++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(s->frame->buf[1]); ++diff --git a/libavcodec/hevcpred_template.c b/libavcodec/hevcpred_template.c ++index 325b60e..28d2653 100644 ++--- a/libavcodec/hevcpred_template.c +++++ b/libavcodec/hevcpred_template.c ++@@ -72,7 +72,7 @@ do { \ ++ else \ ++ a = PIXEL_SPLAT_X4(ptr[i + 3]) ++ #ifdef RPI_WORKER ++- HEVCLocalContextIntra *lc = s->enable_rpi ? &s->HEVClcIntra : (HEVCLocalContextIntra *)s->HEVClc ; +++ HEVCLocalContextIntra *lc = (s->enable_rpi) ? &s->HEVClcIntra : (HEVCLocalContextIntra *)s->HEVClc ; ++ #else ++ HEVCLocalContext *lc = s->HEVClc; ++ #endif ++-- ++2.7.4 ++ ++ ++From 1674a80d147e5342ef6ea9a4fb4ddfc640c15a05 Mon Sep 17 00:00:00 2001 ++From: popcornmix ++Date: Thu, 4 Jun 2015 15:48:10 +0100 ++Subject: [PATCH 59/68] Move deblocker into second pass ++ ++--- ++ libavcodec/hevc.c | 79 +++++++++++++++++++++++++++++++++++++++++++++---------- ++ 1 file changed, 65 insertions(+), 14 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index ec67252..6cecbdd 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -67,6 +67,8 @@ ++ static void rpi_execute_pred_cmds(HEVCContext *s); ++ static void rpi_execute_inter_cmds(HEVCContext *s); ++ static void rpi_begin(HEVCContext *s); +++ static void flush_frame(HEVCContext *s,AVFrame *frame); +++ static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2); ++ ++ // Define INTER_PASS0 to do inter prediction in first pass ++ //#define INTER_PASS0 ++@@ -227,6 +229,11 @@ static void *worker_start(void *arg) ++ // Wait for transform completion ++ vpu_wait(s->vpu_id); ++ +++ // Perform intra prediction and residual reconstruction +++ rpi_execute_pred_cmds(s); +++ // Perform deblocking for CTBs in this row +++ rpi_execute_dblk_cmds(s); +++ ++ worker_complete_middle_job(s); ++ LOG_EXIT ++ } ++@@ -248,10 +255,6 @@ static void *worker_deblock_start(void *arg) ++ break; ++ } ++ LOG_ENTER ++- // Perform intra prediction and residual reconstruction ++- rpi_execute_pred_cmds(s); ++- // Perform deblocking for CTBs in this row ++- rpi_execute_dblk_cmds(s); ++ ++ worker_complete_job(s); ++ LOG_EXIT ++@@ -2983,7 +2986,7 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb, ++ static void rpi_execute_dblk_cmds(HEVCContext *s) ++ { ++ int n; ++- int job = s->pass2_job; +++ int job = s->pass1_job; ++ int ctb_size = 1 << s->ps.sps->log2_ctb_size; ++ int (*p)[2] = s->dblk_cmds[job]; ++ for(n = s->num_dblk_cmds[job]; n>0 ;n--,p++) { ++@@ -3021,7 +3024,7 @@ static void rpi_execute_transform(HEVCContext *s) ++ static void rpi_execute_pred_cmds(HEVCContext *s) ++ { ++ int i; ++- int job = s->pass2_job; +++ int job = s->pass1_job; ++ HEVCPredCmd *cmd = s->univ_pred_cmds[job]; ++ #ifdef RPI_WORKER ++ HEVCLocalContextIntra *lc = &s->HEVClcIntra; ++@@ -3506,11 +3509,10 @@ static void rpi_launch_vpu_qpu(HEVCContext *s) ++ ++ #ifdef RPI_MULTI_MAILBOX ++ #ifdef RPI_CACHE_UNIF_MVS ++- gpu_cache_flush3(&s->coeffs_buf_accelerated[job],&s->y_unif_mvs_ptr[job], &s->unif_mvs_ptr[job]); +++ flush_frame3(s, s->frame,&s->coeffs_buf_accelerated[job],&s->y_unif_mvs_ptr[job], &s->unif_mvs_ptr[job]); ++ #else ++- gpu_cache_flush(&s->coeffs_buf_accelerated[job]); +++ flush_frame3(s, s->frame,&s->coeffs_buf_accelerated[job],NULL,NULL); ++ #endif ++- ++ s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[job][2], s->num_coeffs[job][2] >> 8, s->coeffs_buf_vc[job][3], s->num_coeffs[job][3] >> 10, 0, ++ qpu_get_fn(QPU_MC_SETUP_UV), ++ (uint32_t)(unif_vc+(s->mvs_base[job][0 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), ++@@ -3613,6 +3615,60 @@ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ #endif ++ } ++ +++static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2) +++{ +++#ifdef RPI_FAST_CACHEFLUSH +++ struct vcsm_user_clean_invalid_s iocache = {}; +++ int n = s->ps.sps->height; +++ int curr_y = 0; +++ int curr_uv = 0; +++ int n_uv = n >> s->ps.sps->vshift[1]; +++ int sz,base; +++ sz = s->frame->linesize[1] * (n_uv-curr_uv); +++ base = s->frame->linesize[1] * curr_uv; +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); +++ iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].cmd = 3; // clean+invalidate +++ iocache.s[0].addr = p->arm + base; +++ iocache.s[0].size = sz; +++ p = av_buffer_pool_opaque(frame->buf[2]); +++ iocache.s[1].handle = p->vcsm_handle; +++ iocache.s[1].cmd = 3; // clean+invalidate +++ iocache.s[1].addr = p->arm + base; +++ iocache.s[1].size = sz; +++ p = av_buffer_pool_opaque(frame->buf[0]); +++ sz = s->frame->linesize[0] * (n-curr_y); +++ base = s->frame->linesize[0] * curr_y; +++ iocache.s[2].handle = p->vcsm_handle; +++ iocache.s[2].cmd = 3; // clean+invalidate +++ iocache.s[2].addr = p->arm + base; +++ iocache.s[2].size = sz; +++ +++ iocache.s[3].handle = p0->vcsm_handle; +++ iocache.s[3].cmd = 3; // clean+invalidate +++ iocache.s[3].addr = (int) p0->arm; +++ iocache.s[3].size = p0->numbytes; +++ if (p1) { +++ iocache.s[4].handle = p1->vcsm_handle; +++ iocache.s[4].cmd = 3; // clean+invalidate +++ iocache.s[4].addr = (int) p1->arm; +++ iocache.s[4].size = p1->numbytes; +++ } +++ if (p2) { +++ iocache.s[5].handle = p2->vcsm_handle; +++ iocache.s[5].cmd = 3; // clean+invalidate +++ iocache.s[5].addr = (int) p2->arm; +++ iocache.s[5].size = p2->numbytes; +++ } +++ vcsm_clean_invalid( &iocache ); +++#else +++ flush_buffer(frame->buf[0]); +++ flush_buffer(frame->buf[1]); +++ flush_buffer(frame->buf[2]); +++ gpu_cache_flush3(p0, p1, p2); +++#endif +++} +++ ++ #endif ++ ++ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++@@ -4127,11 +4183,6 @@ static int hevc_frame_start(HEVCContext *s) ++ if (!s->avctx->hwaccel) ++ ff_thread_finish_setup(s->avctx); ++ ++-#ifdef RPI_INTER_QPU ++- // Invalidate the output data buffer so it is ready for the QPUs to write into it. ++- flush_frame(s,s->frame); ++-#endif ++- ++ return 0; ++ ++ fail: ++-- ++2.7.4 ++ ++ ++From a453fe438c4ab311d6476955d0a40a5d2ed8a1c6 Mon Sep 17 00:00:00 2001 ++From: popcornmix ++Date: Thu, 4 Jun 2015 16:10:23 +0100 ++Subject: [PATCH 60/68] Change order of ctu accesses to improve qpu performance ++ ++--- ++ libavcodec/hevc.c | 8 ++++---- ++ 1 file changed, 4 insertions(+), 4 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 6cecbdd..ec17e64 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -3737,19 +3737,19 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag; ++ ++ #ifdef RPI_INTER_QPU ++- s->curr_u_mvs = s->u_mvs[s->pass0_job][s->ctu_count / s->ctu_per_uv_chan]; +++ s->curr_u_mvs = s->u_mvs[s->pass0_job][s->ctu_count % 8]; ++ #endif ++ #ifdef RPI_LUMA_QPU ++- s->curr_y_mvs = s->y_mvs[s->pass0_job][s->ctu_count / s->ctu_per_y_chan]; +++ s->curr_y_mvs = s->y_mvs[s->pass0_job][s->ctu_count % 12]; ++ #endif ++ ++ more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); ++ ++ #ifdef RPI_INTER_QPU ++- s->u_mvs[s->pass0_job][s->ctu_count / s->ctu_per_uv_chan] = s->curr_u_mvs; +++ s->u_mvs[s->pass0_job][s->ctu_count % 8]= s->curr_u_mvs; ++ #endif ++ #ifdef RPI_LUMA_QPU ++- s->y_mvs[s->pass0_job][s->ctu_count / s->ctu_per_y_chan] = s->curr_y_mvs; +++ s->y_mvs[s->pass0_job][s->ctu_count % 12] = s->curr_y_mvs; ++ #endif ++ ++ #ifdef RPI ++-- ++2.7.4 ++ ++ ++From 504de0435e8f660c1b7b2d6ec053dc922a2d2896 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Mon, 8 Jun 2015 09:36:59 +0100 ++Subject: [PATCH 61/68] Removed deblocker thread ++ ++--- ++ libavcodec/hevc.c | 77 +++---------------------------------------------------- ++ libavcodec/hevc.h | 4 --- ++ 2 files changed, 4 insertions(+), 77 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index ec17e64..1868532 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -70,11 +70,6 @@ ++ static void flush_frame(HEVCContext *s,AVFrame *frame); ++ static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2); ++ ++- // Define INTER_PASS0 to do inter prediction in first pass ++- //#define INTER_PASS0 ++- // Define LAUNCH_PASS0 to launch QPU/VPU from pass0 ++- //#define LAUNCH_PASS0 ++- ++ #endif ++ ++ // #define DISABLE_MC ++@@ -147,24 +142,12 @@ static void worker_submit_job(HEVCContext *s) ++ } ++ ++ // Call this to say we have completed pass1 ++-static void worker_complete_middle_job(HEVCContext *s) ++-{ ++- LOG_ENTER ++- pthread_mutex_lock(&s->worker_mutex); ++- s->worker_middle++; ++- s->pass1_job = (s->pass1_job + 1) % RPI_MAX_JOBS; // Move onto the next slot ++- pthread_cond_broadcast(&s->worker_cond_middle); // Let people know that the middle has moved ++- pthread_mutex_unlock(&s->worker_mutex); ++- LOG_EXIT ++-} ++- ++-// Call this to say we have completed pass2 ++ static void worker_complete_job(HEVCContext *s) ++ { ++ LOG_ENTER ++ pthread_mutex_lock(&s->worker_mutex); ++ s->worker_head++; ++- s->pass2_job = (s->pass2_job + 1) % RPI_MAX_JOBS; // Move onto the next slot +++ s->pass1_job = (s->pass1_job + 1) % RPI_MAX_JOBS; // Move onto the next slot ++ pthread_cond_broadcast(&s->worker_cond_head); // Let people know that the head has moved ++ pthread_mutex_unlock(&s->worker_mutex); ++ LOG_EXIT ++@@ -208,7 +191,7 @@ static void *worker_start(void *arg) ++ while(1) { ++ pthread_mutex_lock(&s->worker_mutex); ++ ++- while( !s->kill_worker && s->worker_tail - s->worker_middle <= 0) +++ while( !s->kill_worker && s->worker_tail - s->worker_head <= 0) ++ { ++ pthread_cond_wait(&s->worker_cond_tail, &s->worker_mutex); ++ } ++@@ -219,13 +202,9 @@ static void *worker_start(void *arg) ++ } ++ LOG_ENTER ++ // printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); ++-#ifndef LAUNCH_PASS0 ++ rpi_launch_vpu_qpu(s); ++-#endif ++-#ifndef INTER_PASS0 ++ // Perform inter prediction ++ rpi_execute_inter_cmds(s); ++-#endif ++ // Wait for transform completion ++ vpu_wait(s->vpu_id); ++ ++@@ -234,28 +213,6 @@ static void *worker_start(void *arg) ++ // Perform deblocking for CTBs in this row ++ rpi_execute_dblk_cmds(s); ++ ++- worker_complete_middle_job(s); ++- LOG_EXIT ++- } ++- return NULL; ++-} ++- ++-static void *worker_deblock_start(void *arg) ++-{ ++- HEVCContext *s = (HEVCContext *)arg; ++- while(1) { ++- pthread_mutex_lock(&s->worker_mutex); ++- while( !s->kill_worker && s->worker_middle - s->worker_head <= 0) ++- { ++- pthread_cond_wait(&s->worker_cond_middle, &s->worker_mutex); ++- } ++- pthread_mutex_unlock(&s->worker_mutex); ++- ++- if (s->kill_worker) { ++- break; ++- } ++- LOG_ENTER ++- ++ worker_complete_job(s); ++ LOG_EXIT ++ } ++@@ -2998,11 +2955,7 @@ static void rpi_execute_dblk_cmds(HEVCContext *s) ++ static void rpi_execute_transform(HEVCContext *s) ++ { ++ int i=2; ++-#ifdef LAUNCH_PASS0 ++- int job = s->pass0_job; ++-#else ++ int job = s->pass1_job; ++-#endif ++ //int j; ++ //int16_t *coeffs = s->coeffs_buf_arm[i]; ++ //for(j=s->num_coeffs[i]; j > 0; j-= 16*16, coeffs+=16*16) { ++@@ -3057,11 +3010,7 @@ static void rpi_execute_pred_cmds(HEVCContext *s) ++ ++ static void rpi_execute_inter_cmds(HEVCContext *s) ++ { ++-#ifdef INTER_PASS0 ++- int job = s->pass0_job; ++-#else ++ int job = s->pass1_job; ++-#endif ++ HEVCMvCmd *cmd = s->unif_mv_cmds[job]; ++ int n,cidx; ++ AVFrame myref; ++@@ -3467,11 +3416,7 @@ static void rpi_simulate_inter_qpu(HEVCContext *s) ++ static void rpi_launch_vpu_qpu(HEVCContext *s) ++ { ++ int k; ++-#ifdef LAUNCH_PASS0 ++- int job = s->pass0_job; ++-#else ++ int job = s->pass1_job; ++-#endif ++ int i; ++ uint32_t *unif_vc = (uint32_t *)s->unif_mvs_ptr[job].vc; ++ #ifdef RPI_LUMA_QPU ++@@ -3574,10 +3519,12 @@ static void rpi_launch_vpu_qpu(HEVCContext *s) ++ ++ #ifdef RPI ++ +++#ifndef RPI_FAST_CACHEFLUSH ++ static void flush_buffer(AVBufferRef *bref) { ++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++ gpu_cache_flush(p); ++ } +++#endif ++ ++ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ { ++@@ -3715,7 +3662,6 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ #ifdef RPI_WORKER ++ s->pass0_job = 0; ++ s->pass1_job = 0; ++- s->pass2_job = 0; ++ #endif ++ #ifdef RPI ++ rpi_begin(s); ++@@ -3767,12 +3713,6 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ #ifdef RPI_WORKER ++ if (s->used_for_ref) { ++ // Split work load onto separate threads so we make as rapid progress as possible with this frame ++- #ifdef INTER_PASS0 ++- rpi_execute_inter_cmds(s); ++- #endif ++- #ifdef LAUNCH_PASS0 ++- rpi_launch_vpu_qpu(s); ++- #endif ++ // Pass on this job to worker thread ++ worker_submit_job(s); ++ // Make sure we have space to prepare the next job ++@@ -3814,8 +3754,6 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ // Wait for the worker to finish all its jobs ++ if (s->enable_rpi) { ++ worker_wait(s); ++- av_assert0(s->pass0_job==s->pass1_job); ++- av_assert0(s->pass1_job==s->pass2_job); ++ } ++ #endif ++ ++@@ -4565,16 +4503,13 @@ static av_cold void hevc_init_worker(HEVCContext *s) ++ { ++ int err; ++ pthread_cond_init(&s->worker_cond_head, NULL); ++- pthread_cond_init(&s->worker_cond_middle, NULL); ++ pthread_cond_init(&s->worker_cond_tail, NULL); ++ pthread_mutex_init(&s->worker_mutex, NULL); ++ ++ s->worker_tail=0; ++- s->worker_middle=0; ++ s->worker_head=0; ++ s->kill_worker=0; ++ err = pthread_create(&s->worker_thread, NULL, worker_start, s); ++- err = pthread_create(&s->worker_deblock_thread, NULL, worker_deblock_start, s); ++ if (err) { ++ printf("Failed to create worker thread\n"); ++ exit(-1); ++@@ -4586,17 +4521,13 @@ static av_cold void hevc_exit_worker(HEVCContext *s) ++ void *res; ++ s->kill_worker=1; ++ pthread_cond_broadcast(&s->worker_cond_tail); ++- pthread_cond_broadcast(&s->worker_cond_middle); ++ pthread_join(s->worker_thread, &res); ++- pthread_join(s->worker_deblock_thread, &res); ++ ++ pthread_cond_destroy(&s->worker_cond_head); ++- pthread_cond_destroy(&s->worker_cond_middle); ++ pthread_cond_destroy(&s->worker_cond_tail); ++ pthread_mutex_destroy(&s->worker_mutex); ++ ++ s->worker_tail=0; ++- s->worker_middle=0; ++ s->worker_head=0; ++ s->kill_worker=0; ++ } ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index a141316..ef5bfb1 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -931,7 +931,6 @@ typedef struct HEVCContext { ++ //GPU_MEM_PTR_T dummy; ++ int pass0_job; // Pass0 does coefficient decode ++ int pass1_job; // Pass1 does pixel processing ++- int pass2_job; // Pass2 does reconstruction and deblocking ++ int ctu_count; // Number of CTUs done in pass0 so far ++ int max_ctu_count; // Number of CTUs when we trigger a round of processing ++ int ctu_per_y_chan; // Number of CTUs per luma QPU ++@@ -963,15 +962,12 @@ typedef struct HEVCContext { ++ ++ #ifdef RPI_WORKER ++ pthread_t worker_thread; ++- pthread_t worker_deblock_thread; ++ pthread_cond_t worker_cond_head; ++ pthread_cond_t worker_cond_tail; ++- pthread_cond_t worker_cond_middle; ++ pthread_mutex_t worker_mutex; ++ ++ int worker_tail; // Contains the number of posted jobs ++ int worker_head; // Contains the number of completed jobs ++- int worker_middle; // Contains the number of completed jobs ++ int kill_worker; // set to 1 to terminate the worker ++ #endif ++ ++-- ++2.7.4 ++ ++ ++From 74892301cdb0829de959b798debac6ffe1c71603 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Mon, 8 Jun 2015 11:04:43 +0100 ++Subject: [PATCH 62/68] Reduced amount of output frame that is invalidated ++ ++--- ++ libavcodec/hevc.c | 45 +++++++++++++++++++++++++++++---------------- ++ 1 file changed, 29 insertions(+), 16 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 1868532..cbb4f46 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -68,7 +68,7 @@ ++ static void rpi_execute_inter_cmds(HEVCContext *s); ++ static void rpi_begin(HEVCContext *s); ++ static void flush_frame(HEVCContext *s,AVFrame *frame); ++- static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2); +++ static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2, int job); ++ ++ #endif ++ ++@@ -3454,9 +3454,9 @@ static void rpi_launch_vpu_qpu(HEVCContext *s) ++ ++ #ifdef RPI_MULTI_MAILBOX ++ #ifdef RPI_CACHE_UNIF_MVS ++- flush_frame3(s, s->frame,&s->coeffs_buf_accelerated[job],&s->y_unif_mvs_ptr[job], &s->unif_mvs_ptr[job]); +++ flush_frame3(s, s->frame,&s->coeffs_buf_accelerated[job],&s->y_unif_mvs_ptr[job], &s->unif_mvs_ptr[job], job); ++ #else ++- flush_frame3(s, s->frame,&s->coeffs_buf_accelerated[job],NULL,NULL); +++ flush_frame3(s, s->frame,&s->coeffs_buf_accelerated[job],NULL,NULL, job); ++ #endif ++ s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[job][2], s->num_coeffs[job][2] >> 8, s->coeffs_buf_vc[job][3], s->num_coeffs[job][3] >> 10, 0, ++ qpu_get_fn(QPU_MC_SETUP_UV), ++@@ -3530,6 +3530,7 @@ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ { ++ #ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); ++ int n = s->ps.sps->height; ++ int curr_y = 0; ++ int curr_uv = 0; ++@@ -3537,22 +3538,21 @@ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ int sz,base; ++ sz = s->frame->linesize[1] * (n_uv-curr_uv); ++ base = s->frame->linesize[1] * curr_uv; ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); ++ iocache.s[0].handle = p->vcsm_handle; ++ iocache.s[0].cmd = 3; // clean+invalidate ++- iocache.s[0].addr = p->arm + base; +++ iocache.s[0].addr = (int)(p->arm) + base; ++ iocache.s[0].size = sz; ++ p = av_buffer_pool_opaque(frame->buf[2]); ++ iocache.s[1].handle = p->vcsm_handle; ++ iocache.s[1].cmd = 3; // clean+invalidate ++- iocache.s[1].addr = p->arm + base; +++ iocache.s[1].addr = (int)(p->arm) + base; ++ iocache.s[1].size = sz; ++ p = av_buffer_pool_opaque(frame->buf[0]); ++ sz = s->frame->linesize[0] * (n-curr_y); ++ base = s->frame->linesize[0] * curr_y; ++ iocache.s[2].handle = p->vcsm_handle; ++ iocache.s[2].cmd = 3; // clean+invalidate ++- iocache.s[2].addr = p->arm + base; +++ iocache.s[2].addr = (int)(p->arm) + base; ++ iocache.s[2].size = sz; ++ vcsm_clean_invalid( &iocache ); ++ #else ++@@ -3562,33 +3562,46 @@ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ #endif ++ } ++ ++-static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2) +++static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2, int job) ++ { ++ #ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; ++- int n = s->ps.sps->height; ++- int curr_y = 0; ++- int curr_uv = 0; ++- int n_uv = n >> s->ps.sps->vshift[1]; +++ int n; +++ int curr_y; +++ int curr_uv; +++ int n_uv; +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); ++ int sz,base; +++ int (*d)[2] = s->dblk_cmds[job]; +++ int low=(*d)[1]; +++ int high=(*d)[1]; +++ for(n = s->num_dblk_cmds[job]; n>0 ;n--,d++) { +++ int y = (*d)[1]; +++ low=FFMIN(low,y); +++ high=FFMAX(high,y); +++ } +++ curr_y = low; +++ n = high+(1 << s->ps.sps->log2_ctb_size); +++ curr_uv = curr_y >> s->ps.sps->vshift[1]; +++ n_uv = n >> s->ps.sps->vshift[1]; +++ ++ sz = s->frame->linesize[1] * (n_uv-curr_uv); ++ base = s->frame->linesize[1] * curr_uv; ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); ++ iocache.s[0].handle = p->vcsm_handle; ++ iocache.s[0].cmd = 3; // clean+invalidate ++- iocache.s[0].addr = p->arm + base; +++ iocache.s[0].addr = (int)(p->arm) + base; ++ iocache.s[0].size = sz; ++ p = av_buffer_pool_opaque(frame->buf[2]); ++ iocache.s[1].handle = p->vcsm_handle; ++ iocache.s[1].cmd = 3; // clean+invalidate ++- iocache.s[1].addr = p->arm + base; +++ iocache.s[1].addr = (int)(p->arm) + base; ++ iocache.s[1].size = sz; ++ p = av_buffer_pool_opaque(frame->buf[0]); ++ sz = s->frame->linesize[0] * (n-curr_y); ++ base = s->frame->linesize[0] * curr_y; ++ iocache.s[2].handle = p->vcsm_handle; ++ iocache.s[2].cmd = 3; // clean+invalidate ++- iocache.s[2].addr = p->arm + base; +++ iocache.s[2].addr = (int)(p->arm) + base; ++ iocache.s[2].size = sz; ++ ++ iocache.s[3].handle = p0->vcsm_handle; ++-- ++2.7.4 ++ ++ ++From 090b6be5b501bd3c547700926e540397f0b39e69 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Mon, 8 Jun 2015 11:55:29 +0100 ++Subject: [PATCH 63/68] Packed 16x16 and 32x32 into the same buffer ++ ++--- ++ libavcodec/hevc.c | 24 +++++++++++++++--------- ++ libavcodec/hevc_cabac.c | 9 ++++++++- ++ libavcodec/rpi_qpu.c | 2 +- ++ 3 files changed, 24 insertions(+), 11 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index cbb4f46..a596534 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -299,12 +299,12 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ s->coeffs_buf_arm[job][0] = (int16_t*) s->coeffs_buf_default[job].arm; ++ if (!s->coeffs_buf_arm[job][0]) ++ goto fail; ++- gpu_malloc_cached(sizeof(int16_t) * coefs_per_row * 2, &s->coeffs_buf_accelerated[job]); +++ gpu_malloc_cached(sizeof(int16_t) * (coefs_per_row + 32*32), &s->coeffs_buf_accelerated[job]); // We prefetch past the end so provide an extra blocks worth of data ++ s->coeffs_buf_arm[job][2] = (int16_t*) s->coeffs_buf_accelerated[job].arm; ++ s->coeffs_buf_vc[job][2] = s->coeffs_buf_accelerated[job].vc; ++ if (!s->coeffs_buf_arm[job][2]) ++ goto fail; ++- s->coeffs_buf_arm[job][3] = coefs_per_row + s->coeffs_buf_arm[job][2]; +++ s->coeffs_buf_arm[job][3] = coefs_per_row + s->coeffs_buf_arm[job][2]; // This points to just beyond the end of the buffer. Coefficients fill in backwards. ++ s->coeffs_buf_vc[job][3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[job][2]; ++ } ++ } ++@@ -2956,15 +2956,20 @@ static void rpi_execute_transform(HEVCContext *s) ++ { ++ int i=2; ++ int job = s->pass1_job; ++- //int j; ++- //int16_t *coeffs = s->coeffs_buf_arm[i]; ++- //for(j=s->num_coeffs[i]; j > 0; j-= 16*16, coeffs+=16*16) { ++- // s->hevcdsp.idct[4-2](coeffs, 16); ++- //} +++ /*int j; +++ int16_t *coeffs = s->coeffs_buf_arm[job][i]; +++ for(j=s->num_coeffs[job][i]; j > 0; j-= 16*16, coeffs+=16*16) { +++ s->hevcdsp.idct[4-2](coeffs, 16); +++ } +++ i=3; +++ coeffs = s->coeffs_buf_arm[job][i] - s->num_coeffs[job][i]; +++ for(j=s->num_coeffs[job][i]; j > 0; j-= 32*32, coeffs+=32*32) { +++ s->hevcdsp.idct[5-2](coeffs, 32); +++ }*/ ++ ++ gpu_cache_flush(&s->coeffs_buf_accelerated[job]); ++ s->vpu_id = vpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[job][2], ++- s->num_coeffs[job][2] >> 8, s->coeffs_buf_vc[job][3], +++ s->num_coeffs[job][2] >> 8, s->coeffs_buf_vc[job][3] - sizeof(int16_t) * s->num_coeffs[job][3], ++ s->num_coeffs[job][3] >> 10, 0, &s->coeffs_buf_accelerated[job]); ++ //vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0); ++ //gpu_cache_flush(&s->coeffs_buf_accelerated); ++@@ -3458,7 +3463,8 @@ static void rpi_launch_vpu_qpu(HEVCContext *s) ++ #else ++ flush_frame3(s, s->frame,&s->coeffs_buf_accelerated[job],NULL,NULL, job); ++ #endif ++- s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[job][2], s->num_coeffs[job][2] >> 8, s->coeffs_buf_vc[job][3], s->num_coeffs[job][3] >> 10, 0, +++ s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[job][2], s->num_coeffs[job][2] >> 8, +++ s->coeffs_buf_vc[job][3] - sizeof(int16_t) * s->num_coeffs[job][3], s->num_coeffs[job][3] >> 10, 0, ++ qpu_get_fn(QPU_MC_SETUP_UV), ++ (uint32_t)(unif_vc+(s->mvs_base[job][0 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), ++ (uint32_t)(unif_vc+(s->mvs_base[job][1 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index 6523e66..8656917 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1051,7 +1051,14 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ if (s->enable_rpi) { ++ int n = trafo_size * trafo_size; ++ if (use_vpu) { ++- coeffs = s->coeffs_buf_arm[s->pass0_job][log2_trafo_size - 2] + s->num_coeffs[s->pass0_job][log2_trafo_size - 2]; +++ // We support size 4 and size 5. +++ // Size 4 grows from the front (Coeffs_buf_arm[2] points to start of buf) +++ // Size 5 grows from the back (Coeffs_buf_arm[3] points to end of buf) +++ // num_coeffs is indexed by log2_trafo_size-2 +++ if (log2_trafo_size == 4) +++ coeffs = s->coeffs_buf_arm[s->pass0_job][log2_trafo_size - 2] + s->num_coeffs[s->pass0_job][log2_trafo_size - 2]; +++ else +++ coeffs = s->coeffs_buf_arm[s->pass0_job][log2_trafo_size - 2] - s->num_coeffs[s->pass0_job][log2_trafo_size - 2] - n; ++ s->num_coeffs[s->pass0_job][log2_trafo_size - 2] += n; ++ } else { ++ coeffs = s->coeffs_buf_arm[s->pass0_job][0] + s->num_coeffs[s->pass0_job][0]; ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 4480f72..0121fca 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -5,7 +5,7 @@ ++ // define RPI_TIME_TOTAL_VPU to print out how much time is spent in the VPI code ++ //#define RPI_TIME_TOTAL_VPU ++ // define RPI_TIME_TOTAL_POSTED to print out how much time is spent in the multi execute QPU/VPU combined ++-//#define RPI_TIME_TOTAL_POSTED +++#define RPI_TIME_TOTAL_POSTED ++ // define RPI_ASYNC to run the VPU in a separate thread, need to make a separate call to check for completion ++ #define RPI_ASYNC ++ ++-- ++2.7.4 ++ ++ ++From ed359bbce56817bf9db0e54701103bd0505c353b Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Thu, 25 Jun 2015 09:02:47 +0100 ++Subject: [PATCH 64/68] Moved luma deblock to VPU ++ ++--- ++ libavcodec/hevc.c | 18 +- ++ libavcodec/hevc.h | 11 + ++ libavcodec/hevc_filter.c | 120 ++- ++ libavcodec/rpi_hevc_transform.h | 1802 ++++++++++++++++++++++++++++++++++++++- ++ libavcodec/rpi_hevc_transform.s | 426 +++++++++ ++ libavcodec/rpi_qpu.c | 12 +- ++ libavcodec/rpi_shader.c | 2 +- ++ 7 files changed, 2378 insertions(+), 13 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index a596534..4ce94a7 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -246,6 +246,12 @@ static void pic_arrays_free(HEVCContext *s) ++ } ++ } ++ #endif +++#ifdef RPI_DEBLOCK_VPU +++ if (s->y_setup_arm) { +++ gpu_free(&s->y_setup_ptr); +++ s->y_setup_arm = 0; +++ } +++#endif ++ av_freep(&s->sao); ++ av_freep(&s->deblock); ++ ++@@ -283,12 +289,12 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ int min_pu_size = sps->min_pu_width * sps->min_pu_height; ++ ++ #ifdef RPI ++- av_assert0(sps); ++ int coefs_in_ctb = (1 << sps->log2_ctb_size) * (1 << sps->log2_ctb_size); ++ int coefs_per_luma = 64*64*24*RPI_NUM_CHUNKS; ++ int coefs_per_chroma = (coefs_per_luma * 2) >> sps->vshift[1] >> sps->hshift[1]; ++ int coefs_per_row = coefs_per_luma + coefs_per_chroma; ++ int job; +++ av_assert0(sps); ++ s->max_ctu_count = coefs_per_luma / coefs_in_ctb; ++ s->ctu_per_y_chan = s->max_ctu_count / 12; ++ s->ctu_per_uv_chan = s->max_ctu_count / 8; ++@@ -309,6 +315,16 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ } ++ } ++ #endif +++#ifdef RPI_DEBLOCK_VPU +++ s->enable_rpi_deblock = !sps->sao_enabled; +++ s->setup_width = (sps->width+15) / 16; +++ s->setup_height = (sps->height+15) / 16; +++ gpu_malloc_uncached(sizeof(*s->y_setup_arm) * s->setup_width * s->setup_height, &s->y_setup_ptr); // TODO make this cached +++ s->y_setup_arm = (void*)s->y_setup_ptr.arm; +++ s->y_setup_vc = (void*)s->y_setup_ptr.vc; +++ memset(s->y_setup_arm, 0, s->y_setup_ptr.numbytes); +++ printf("Setup %d by %d by %d\n",s->setup_width,s->setup_height,sizeof(*s->y_setup_arm)); +++#endif ++ ++ s->bs_width = (width >> 2) + 1; ++ s->bs_height = (height >> 2) + 1; ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index ef5bfb1..cf08489 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -57,6 +57,8 @@ ++ // Define RPI_WORKER to launch a worker thread for pixel processing tasks ++ #define RPI_WORKER ++ +++ #define RPI_DEBLOCK_VPU +++ ++ #endif ++ ++ #define MAX_DPB_SIZE 16 // A.4.1 ++@@ -971,6 +973,15 @@ typedef struct HEVCContext { ++ int kill_worker; // set to 1 to terminate the worker ++ #endif ++ +++#ifdef RPI_DEBLOCK_VPU +++ int enable_rpi_deblock; +++ GPU_MEM_PTR_T y_setup_ptr; +++ uint8_t (*y_setup_arm)[2][2][2][4]; +++ uint8_t (*y_setup_vc)[2][2][2][4]; +++ int setup_width; // Number of 16x16 blocks across the image +++ int setup_height; // Number of 16x16 blocks down the image +++#endif +++ ++ #endif ++ ++ uint8_t *cabac_state; ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 1f04790..06371da 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -564,6 +564,19 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) ++ s->frame->linesize[LUMA], ++ beta, tc, no_p, no_q); ++ } else +++#ifdef RPI_DEBLOCK_VPU +++ if (s->enable_rpi_deblock) { +++ uint8_t (*setup)[2][2][4]; +++ int num16 = (y>>4)*s->setup_width + (x>>4); +++ int a = ((y>>3) & 1) << 1; +++ int b = (x>>3) & 1; +++ setup = s->y_setup_arm[num16]; +++ setup[0][b][0][a] = beta; +++ setup[0][b][0][a + 1] = beta; +++ setup[0][b][1][a] = tc[0]; +++ setup[0][b][1][a + 1] = tc[1]; +++ } else +++#endif ++ s->hevcdsp.hevc_v_loop_filter_luma(src, ++ s->frame->linesize[LUMA], ++ beta, tc, no_p, no_q); ++@@ -596,6 +609,19 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) ++ s->frame->linesize[LUMA], ++ beta, tc, no_p, no_q); ++ } else +++#ifdef RPI_DEBLOCK_VPU +++ if (s->enable_rpi_deblock) { +++ uint8_t (*setup)[2][2][4]; +++ int num16 = (y>>4)*s->setup_width + (x>>4); +++ int a = ((x>>3) & 1) << 1; +++ int b = (y>>3) & 1; +++ setup = s->y_setup_arm[num16]; +++ setup[1][b][0][a] = beta; +++ setup[1][b][0][a + 1] = beta; +++ setup[1][b][1][a] = tc[0]; +++ setup[1][b][1][a + 1] = tc[1]; +++ } else +++#endif ++ s->hevcdsp.hevc_h_loop_filter_luma(src, ++ s->frame->linesize[LUMA], ++ beta, tc, no_p, no_q); ++@@ -876,33 +902,85 @@ static void flush_buffer(AVBufferRef *bref) { ++ } ++ ++ // Return Physical address for this image ++-static int ff_hevc_buf_base(AVBufferRef *bref) { +++static uint32_t get_vc_address(AVBufferRef *bref) { ++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++- return p->vc & 0x3fffffff; +++ return p->vc; ++ } ++ +++// ff_hevc_flush_buffer_lines +++// flushes and invalidates all pixel rows in [start,end-1] +++static void ff_hevc_flush_buffer_lines(HEVCContext *s, int start, int end, int flush_luma, int flush_chroma) +++{ +++#ifdef RPI_FAST_CACHEFLUSH +++ struct vcsm_user_clean_invalid_s iocache = {}; +++ int curr_y = start; +++ int n = end; +++ int curr_uv = curr_y >> s->ps.sps->vshift[1]; +++ int n_uv = n >> s->ps.sps->vshift[1]; +++ int sz,base; +++ GPU_MEM_PTR_T *p; +++ if (curr_uv < 0) curr_uv = 0; +++ if (n_uv<=curr_uv) { return; } +++ sz = s->frame->linesize[1] * (n_uv-curr_uv); +++ base = s->frame->linesize[1] * curr_uv; +++ if (flush_chroma) { +++ p = av_buffer_pool_opaque(s->frame->buf[1]); +++ iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].cmd = 3; // clean+invalidate +++ iocache.s[0].addr = (int)p->arm + base; +++ iocache.s[0].size = sz; +++ p = av_buffer_pool_opaque(s->frame->buf[2]); +++ iocache.s[1].handle = p->vcsm_handle; +++ iocache.s[1].cmd = 3; // clean+invalidate +++ iocache.s[1].addr = (int)p->arm + base; +++ iocache.s[1].size = sz; +++ } +++ if (flush_luma) { +++ p = av_buffer_pool_opaque(s->frame->buf[0]); +++ sz = s->frame->linesize[0] * (n-curr_y); +++ base = s->frame->linesize[0] * curr_y; +++ iocache.s[2].handle = p->vcsm_handle; +++ iocache.s[2].cmd = 3; // clean+invalidate +++ iocache.s[2].addr = (int)p->arm + base; +++ iocache.s[2].size = sz; +++ } +++ vcsm_clean_invalid( &iocache ); +++#else +++ if (flush_chroma) { +++ flush_buffer(s->frame->buf[1]); +++ flush_buffer(s->frame->buf[2]); +++ } +++ if (flush_luma) { +++ flush_buffer(s->frame->buf[0]); +++ } +++#endif +++} +++ +++ ++ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ { ++ if (s->enable_rpi && s->used_for_ref) { +++ // TODO make this use ff_hevc_flush_buffer_lines ++ #ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; ++ int curr_y = ((int *)f->progress->data)[0]; ++ int curr_uv = curr_y >> s->ps.sps->vshift[1]; ++ int n_uv = n >> s->ps.sps->vshift[1]; ++ int sz,base; +++ GPU_MEM_PTR_T *p; ++ if (curr_uv < 0) curr_uv = 0; ++ if (n_uv<=curr_uv) { return; } ++ sz = s->frame->linesize[1] * (n_uv-curr_uv); ++ base = s->frame->linesize[1] * curr_uv; ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(s->frame->buf[1]); +++ p = av_buffer_pool_opaque(s->frame->buf[1]); ++ iocache.s[0].handle = p->vcsm_handle; ++ iocache.s[0].cmd = 3; // clean+invalidate ++- iocache.s[0].addr = p->arm + base; +++ iocache.s[0].addr = (int)p->arm + base; ++ iocache.s[0].size = sz; ++ p = av_buffer_pool_opaque(s->frame->buf[2]); ++ iocache.s[1].handle = p->vcsm_handle; ++ iocache.s[1].cmd = 3; // clean+invalidate ++- iocache.s[1].addr = p->arm + base; +++ iocache.s[1].addr = (int)p->arm + base; ++ iocache.s[1].size = sz; ++ ++ #ifdef RPI_LUMA_QPU ++@@ -911,7 +989,7 @@ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ base = s->frame->linesize[0] * curr_y; ++ iocache.s[2].handle = p->vcsm_handle; ++ iocache.s[2].cmd = 3; // clean+invalidate ++- iocache.s[2].addr = p->arm + base; +++ iocache.s[2].addr = (int)p->arm + base; ++ iocache.s[2].size = sz; ++ #endif ++ vcsm_clean_invalid( &iocache ); ++@@ -930,11 +1008,40 @@ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ } ++ #endif ++ +++#ifdef RPI_DEBLOCK_VPU +++/* rpi_deblock deblocks an entire row of ctbs using the VPU */ +++static void rpi_deblock(HEVCContext *s, int y, int ctb_size) +++{ +++ // Flush image, 4 lines above to bottom of ctb stripe +++ ff_hevc_flush_buffer_lines(s, FFMAX(y-4,0), y+ctb_size, 1, 0); +++ // TODO flush buffer of beta/tc setup when it becomes cached +++ // Call VPU +++ // TODO add this to a separate pipeline of VPU jobs that can be run in parallel and wait for completion +++ vpu_wait(vpu_post_code( vpu_get_fn(), get_vc_address(s->frame->buf[0]) + s->frame->linesize[0] * y, s->frame->linesize[0], +++ s->setup_width, (int) ( s->y_setup_vc + s->setup_width * (y>>4) ), +++ ctb_size>>4, 2, 0)); // 2 means to do the deblocking code +++} +++ +++static void rpi_deblock2(HEVCContext *s, int y, int ctb_size) +++{ +++ int y2; +++ for(y2=y;y2= s->ps.sps->width - ctb_size; ++ if (s->avctx->skip_loop_filter < AVDISCARD_ALL) ++ deblocking_filter_CTB(s, x, y); +++#ifdef RPI_DEBLOCK_VPU +++ if (s->enable_rpi_deblock && x_end) +++ { +++ rpi_deblock(s, y, ctb_size); +++ } +++#endif ++ if (s->ps.sps->sao_enabled) { ++ int y_end = y >= s->ps.sps->height - ctb_size; ++ if (y && x) ++@@ -965,6 +1072,7 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ //if (((y + ctb_size)&63)==0) ++ #ifdef RPI_INTER_QPU ++ ff_hevc_flush_buffer(s, &s->ref->tf, y + ctb_size - 4); +++ // TODO we no longer need to flush the luma buffer as it is in GPU memory when using deblocking on the rpi ++ #endif ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); ++ } ++diff --git a/libavcodec/rpi_hevc_transform.h b/libavcodec/rpi_hevc_transform.h ++index 4f13622..b3f155f 100644 ++--- a/libavcodec/rpi_hevc_transform.h +++++ b/libavcodec/rpi_hevc_transform.h ++@@ -3,7 +3,13 @@ unsigned char rpi_hevc_transform [] = { ++ 106, ++ 0, ++ 144, ++-35, +++38, +++1, +++37, +++106, +++0, +++144, +++57, ++ 1, ++ 169, ++ 3, ++@@ -627,4 +633,1798 @@ unsigned char rpi_hevc_transform [] = { ++ 30, ++ 90, ++ 0, +++169, +++3, +++73, +++64, +++52, +++64, +++45, +++64, +++2, +++64, +++10, +++64, +++64, +++198, +++1, +++7, +++8, +++232, +++63, +++0, +++0, +++0, +++6, +++232, +++253, +++255, +++255, +++255, +++0, +++246, +++0, +++0, +++0, +++4, +++215, +++64, +++3, +++96, +++2, +++248, +++0, +++35, +++0, +++0, +++64, +++56, +++0, +++0, +++4, +++248, +++0, +++36, +++0, +++0, +++64, +++56, +++8, +++0, +++0, +++240, +++64, +++0, +++132, +++3, +++128, +++240, +++0, +++0, +++132, +++3, +++128, +++144, +++137, +++0, +++131, +++98, +++0, +++255, +++64, +++0, +++0, +++20, +++200, +++243, +++0, +++0, +++128, +++144, +++129, +++0, +++131, +++102, +++0, +++158, +++67, +++0, +++2, +++248, +++0, +++35, +++0, +++0, +++64, +++56, +++0, +++0, +++4, +++248, +++0, +++36, +++0, +++0, +++64, +++56, +++8, +++0, +++0, +++240, +++64, +++0, +++132, +++3, +++128, +++240, +++0, +++0, +++132, +++3, +++128, +++144, +++108, +++0, +++131, +++98, +++0, +++255, +++64, +++0, +++0, +++20, +++200, +++243, +++0, +++0, +++128, +++144, +++100, +++0, +++131, +++102, +++0, +++248, +++64, +++0, +++112, +++0, +++192, +++243, +++211, +++31, +++128, +++248, +++0, +++0, +++112, +++0, +++192, +++243, +++211, +++31, +++128, +++144, +++161, +++0, +++188, +++64, +++67, +++232, +++0, +++2, +++0, +++0, +++0, +++255, +++64, +++0, +++0, +++20, +++200, +++243, +++0, +++0, +++128, +++144, +++150, +++0, +++195, +++232, +++0, +++2, +++0, +++0, +++12, +++128, +++7, +++192, +++130, +++248, +++0, +++0, +++112, +++192, +++224, +++16, +++195, +++31, +++132, +++248, +++1, +++0, +++112, +++0, +++224, +++16, +++203, +++31, +++3, +++99, +++131, +++71, +++68, +++232, +++32, +++0, +++0, +++0, +++0, +++99, +++2, +++99, +++23, +++102, +++7, +++106, +++127, +++156, +++182, +++255, +++0, +++248, +++64, +++0, +++112, +++0, +++192, +++243, +++211, +++31, +++128, +++248, +++0, +++0, +++112, +++0, +++192, +++243, +++211, +++31, +++128, +++144, +++112, +++0, +++188, +++64, +++67, +++232, +++0, +++2, +++0, +++0, +++0, +++255, +++64, +++0, +++0, +++20, +++200, +++243, +++0, +++0, +++128, +++144, +++101, +++0, +++195, +++232, +++0, +++2, +++0, +++0, +++12, +++128, +++7, +++192, +++130, +++248, +++0, +++0, +++112, +++192, +++224, +++16, +++195, +++31, +++132, +++248, +++1, +++0, +++112, +++0, +++224, +++16, +++203, +++31, +++25, +++102, +++9, +++106, +++2, +++30, +++41, +++3, +++26, +++87, +++162, +++64, +++64, +++198, +++1, +++23, +++127, +++158, +++103, +++255, +++239, +++3, +++0, +++254, +++0, +++143, +++92, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++64, +++143, +++93, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++128, +++143, +++94, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++192, +++143, +++95, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++192, +++142, +++208, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++128, +++142, +++209, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++64, +++142, +++210, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++0, +++142, +++211, +++0, +++0, +++240, +++12, +++0, +++128, +++144, +++107, +++0, +++8, +++255, +++99, +++23, +++0, +++212, +++192, +++51, +++0, +++0, +++8, +++255, +++163, +++23, +++0, +++228, +++192, +++51, +++0, +++0, +++8, +++255, +++227, +++23, +++0, +++244, +++192, +++51, +++0, +++0, +++8, +++255, +++35, +++52, +++0, +++180, +++192, +++51, +++0, +++0, +++8, +++255, +++99, +++52, +++0, +++164, +++192, +++51, +++0, +++0, +++8, +++255, +++163, +++52, +++0, +++148, +++192, +++51, +++0, +++0, +++111, +++3, +++239, +++3, +++0, +++254, +++0, +++143, +++12, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++64, +++143, +++13, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++128, +++143, +++14, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++192, +++143, +++15, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++192, +++142, +++16, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++128, +++142, +++17, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++64, +++142, +++18, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++0, +++142, +++19, +++0, +++0, +++240, +++12, +++0, +++128, +++144, +++33, +++0, +++8, +++255, +++99, +++3, +++0, +++212, +++192, +++51, +++0, +++0, +++8, +++255, +++163, +++3, +++0, +++228, +++192, +++51, +++0, +++0, +++8, +++255, +++227, +++3, +++0, +++244, +++192, +++51, +++0, +++0, +++8, +++255, +++35, +++4, +++0, +++180, +++192, +++51, +++0, +++0, +++8, +++255, +++99, +++4, +++0, +++164, +++192, +++51, +++0, +++0, +++8, +++255, +++163, +++4, +++0, +++148, +++192, +++51, +++0, +++0, +++111, +++3, +++32, +++246, +++192, +++11, +++1, +++16, +++32, +++246, +++2, +++137, +++47, +++240, +++40, +++246, +++2, +++140, +++47, +++240, +++128, +++245, +++99, +++140, +++5, +++4, +++0, +++247, +++99, +++140, +++1, +++20, +++88, +++246, +++99, +++140, +++1, +++20, +++0, +++247, +++35, +++136, +++62, +++226, +++32, +++247, +++35, +++136, +++32, +++210, +++0, +++247, +++34, +++136, +++63, +++2, +++208, +++246, +++34, +++136, +++0, +++4, +++0, +++247, +++99, +++136, +++58, +++162, +++32, +++247, +++99, +++136, +++33, +++146, +++0, +++247, +++98, +++136, +++59, +++18, +++208, +++246, +++98, +++136, +++0, +++20, +++0, +++247, +++162, +++136, +++33, +++2, +++88, +++246, +++98, +++137, +++2, +++68, +++88, +++246, +++162, +++137, +++3, +++68, +++208, +++254, +++227, +++136, +++60, +++242, +++192, +++243, +++188, +++11, +++208, +++254, +++227, +++136, +++56, +++178, +++192, +++243, +++188, +++10, +++32, +++255, +++226, +++136, +++38, +++58, +++192, +++243, +++60, +++0, +++208, +++254, +++227, +++136, +++59, +++242, +++192, +++243, +++60, +++128, +++32, +++255, +++226, +++136, +++49, +++58, +++192, +++243, +++60, +++128, +++0, +++255, +++226, +++136, +++34, +++34, +++192, +++243, +++60, +++128, +++32, +++255, +++226, +++136, +++37, +++58, +++192, +++243, +++60, +++128, +++0, +++254, +++192, +++136, +++1, +++4, +++0, +++240, +++0, +++160, +++0, +++255, +++194, +++8, +++0, +++52, +++195, +++243, +++0, +++128, +++0, +++255, +++202, +++40, +++0, +++52, +++195, +++243, +++0, +++128, +++0, +++254, +++0, +++240, +++35, +++10, +++0, +++240, +++60, +++0, +++0, +++254, +++192, +++136, +++1, +++4, +++0, +++240, +++0, +++160, +++0, +++255, +++226, +++140, +++34, +++34, +++195, +++243, +++60, +++0, +++32, +++255, +++227, +++140, +++36, +++58, +++192, +++243, +++60, +++0, +++0, +++254, +++192, +++136, +++0, +++4, +++0, +++240, +++0, +++160, +++16, +++246, +++226, +++136, +++35, +++50, +++16, +++246, +++226, +++136, +++35, +++50, +++32, +++246, +++226, +++136, +++35, +++50, +++32, +++254, +++226, +++136, +++35, +++58, +++192, +++243, +++60, +++0, +++11, +++96, +++0, +++254, +++0, +++240, +++1, +++4, +++0, +++240, +++64, +++115, +++5, +++106, +++0, +++144, +++173, +++1, +++27, +++96, +++0, +++254, +++0, +++240, +++1, +++4, +++0, +++240, +++64, +++147, +++5, +++106, +++0, +++144, +++227, +++0, +++64, +++246, +++163, +++140, +++1, +++4, +++0, +++246, +++192, +++175, +++63, +++2, +++0, +++246, +++192, +++174, +++59, +++2, +++0, +++246, +++128, +++175, +++62, +++2, +++0, +++246, +++128, +++174, +++58, +++2, +++0, +++246, +++64, +++175, +++61, +++2, +++0, +++246, +++64, +++174, +++57, +++2, +++0, +++255, +++43, +++240, +++4, +++212, +++192, +++243, +++128, +++11, +++64, +++254, +++43, +++240, +++1, +++228, +++192, +++243, +++128, +++10, +++64, +++254, +++43, +++240, +++1, +++244, +++192, +++243, +++128, +++10, +++64, +++254, +++43, +++240, +++1, +++180, +++192, +++243, +++128, +++10, +++64, +++254, +++43, +++141, +++0, +++164, +++192, +++243, +++128, +++10, +++88, +++246, +++35, +++141, +++3, +++68, +++32, +++247, +++35, +++141, +++191, +++66, +++240, +++246, +++35, +++141, +++50, +++66, +++0, +++255, +++235, +++143, +++52, +++242, +++192, +++243, +++60, +++128, +++0, +++255, +++43, +++240, +++2, +++212, +++192, +++243, +++128, +++11, +++0, +++255, +++43, +++240, +++191, +++226, +++192, +++243, +++188, +++10, +++64, +++254, +++43, +++141, +++0, +++180, +++192, +++243, +++128, +++10, +++88, +++246, +++35, +++141, +++2, +++68, +++32, +++247, +++35, +++141, +++190, +++66, +++240, +++246, +++35, +++141, +++50, +++66, +++0, +++255, +++171, +++143, +++52, +++226, +++192, +++243, +++60, +++128, +++0, +++255, +++43, +++240, +++4, +++180, +++192, +++243, +++128, +++11, +++0, +++255, +++43, +++240, +++191, +++226, +++192, +++243, +++188, +++10, +++128, +++253, +++43, +++240, +++3, +++212, +++192, +++243, +++128, +++10, +++64, +++254, +++35, +++141, +++1, +++196, +++192, +++243, +++128, +++10, +++88, +++246, +++35, +++141, +++3, +++68, +++32, +++247, +++35, +++141, +++189, +++66, +++240, +++246, +++35, +++141, +++50, +++66, +++0, +++255, +++107, +++143, +++52, +++210, +++192, +++243, +++60, +++128, +++0, +++255, +++43, +++240, +++4, +++148, +++192, +++243, +++128, +++11, +++64, +++254, +++43, +++240, +++1, +++164, +++192, +++243, +++128, +++10, +++64, +++254, +++43, +++240, +++1, +++180, +++192, +++243, +++128, +++10, +++64, +++254, +++43, +++240, +++1, +++244, +++192, +++243, +++128, +++10, +++64, +++254, +++43, +++141, +++0, +++228, +++192, +++243, +++128, +++10, +++88, +++246, +++35, +++141, +++3, +++68, +++32, +++247, +++35, +++141, +++187, +++66, +++240, +++246, +++35, +++141, +++50, +++66, +++0, +++255, +++235, +++142, +++52, +++178, +++192, +++243, +++60, +++128, +++0, +++255, +++43, +++240, +++2, +++148, +++192, +++243, +++128, +++11, +++0, +++255, +++43, +++240, +++187, +++162, +++192, +++243, +++188, +++10, +++64, +++254, +++43, +++141, +++0, +++244, +++192, +++243, +++128, +++10, +++88, +++246, +++35, +++141, +++2, +++68, +++32, +++247, +++35, +++141, +++186, +++66, +++240, +++246, +++35, +++141, +++50, +++66, +++0, +++255, +++171, +++142, +++52, +++162, +++192, +++243, +++60, +++128, +++0, +++255, +++43, +++240, +++4, +++244, +++192, +++243, +++128, +++11, +++0, +++255, +++43, +++240, +++187, +++162, +++192, +++243, +++188, +++10, +++128, +++253, +++43, +++240, +++3, +++148, +++192, +++243, +++128, +++10, +++64, +++254, +++35, +++141, +++1, +++132, +++192, +++243, +++128, +++10, +++88, +++246, +++35, +++141, +++3, +++68, +++32, +++247, +++35, +++141, +++185, +++66, +++240, +++246, +++35, +++141, +++50, +++66, +++0, +++255, +++107, +++142, +++52, +++146, +++192, +++243, +++60, +++128, +++64, +++255, +++98, +++141, +++0, +++52, +++192, +++243, +++0, +++0, +++0, +++254, +++0, +++240, +++53, +++10, +++0, +++240, +++60, +++0, +++0, +++254, +++0, +++240, +++1, +++4, +++0, +++240, +++64, +++147, +++5, +++106, +++0, +++144, +++177, +++0, +++88, +++246, +++163, +++140, +++1, +++4, +++128, +++245, +++99, +++141, +++10, +++4, +++88, +++246, +++162, +++138, +++1, +++68, +++0, +++247, +++162, +++138, +++36, +++162, +++88, +++254, +++162, +++138, +++3, +++164, +++192, +++243, +++128, +++11, +++0, +++255, +++226, +++137, +++32, +++2, +++195, +++243, +++60, +++0, +++32, +++247, +++226, +++137, +++42, +++114, +++0, +++255, +++34, +++138, +++33, +++18, +++195, +++243, +++60, +++0, +++32, +++247, +++34, +++138, +++42, +++130, +++16, +++246, +++98, +++138, +++40, +++114, +++16, +++246, +++98, +++138, +++41, +++146, +++32, +++246, +++98, +++138, +++41, +++146, +++32, +++246, +++226, +++137, +++41, +++146, +++40, +++246, +++34, +++138, +++41, +++146, +++32, +++247, +++163, +++141, +++63, +++178, +++32, +++247, +++227, +++141, +++62, +++162, +++0, +++254, +++0, +++240, +++8, +++4, +++0, +++240, +++128, +++11, +++128, +++253, +++35, +++240, +++9, +++100, +++192, +++243, +++128, +++10, +++128, +++253, +++163, +++141, +++128, +++115, +++192, +++243, +++152, +++10, +++88, +++246, +++163, +++141, +++4, +++100, +++208, +++246, +++35, +++139, +++0, +++100, +++32, +++255, +++34, +++139, +++53, +++202, +++192, +++243, +++60, +++128, +++0, +++254, +++0, +++139, +++0, +++4, +++0, +++240, +++0, +++160, +++240, +++246, +++163, +++141, +++48, +++98, +++0, +++247, +++99, +++139, +++63, +++210, +++0, +++247, +++98, +++139, +++1, +++212, +++88, +++254, +++98, +++139, +++1, +++212, +++192, +++243, +++128, +++11, +++32, +++255, +++99, +++139, +++62, +++98, +++192, +++243, +++188, +++10, +++88, +++246, +++98, +++139, +++1, +++212, +++240, +++246, +++98, +++139, +++50, +++210, +++0, +++247, +++163, +++128, +++59, +++146, +++0, +++247, +++160, +++128, +++1, +++36, +++88, +++254, +++160, +++128, +++1, +++36, +++192, +++243, +++128, +++11, +++0, +++247, +++163, +++128, +++58, +++98, +++64, +++255, +++35, +++240, +++0, +++100, +++192, +++243, +++128, +++10, +++64, +++255, +++163, +++128, +++0, +++164, +++192, +++243, +++128, +++10, +++88, +++246, +++160, +++128, +++1, +++36, +++240, +++246, +++160, +++128, +++50, +++34, +++8, +++255, +++227, +++143, +++54, +++242, +++192, +++243, +++60, +++128, +++40, +++255, +++227, +++142, +++54, +++178, +++192, +++243, +++60, +++128, +++0, +++254, +++0, +++240, +++39, +++10, +++0, +++240, +++60, +++128, +++8, +++255, +++163, +++143, +++45, +++226, +++192, +++243, +++60, +++128, +++0, +++254, +++0, +++240, +++44, +++10, +++0, +++240, +++60, +++0, +++0, +++254, +++0, +++240, +++40, +++10, +++0, +++240, +++60, +++128, +++8, +++255, +++163, +++142, +++2, +++162, +++192, +++243, +++60, +++128, +++90, +++0, ++ }; ++diff --git a/libavcodec/rpi_hevc_transform.s b/libavcodec/rpi_hevc_transform.s ++index fd159bc..b055208 100644 ++--- a/libavcodec/rpi_hevc_transform.s +++++ b/libavcodec/rpi_hevc_transform.s ++@@ -83,6 +83,8 @@ ++ hevc_trans_16x16: ++ cmp r5,1 ++ beq memclear16 +++ cmp r5,2 +++ beq hevc_deblock_16x16 ++ push r6-r15, lr # TODO cut down number of used registers ++ mov r14,r3 # coeffs32 ++ mov r15,r4 # num32 ++@@ -282,3 +284,427 @@ loop: ++ cmp r1,0 ++ bgt loop ++ b lr +++ +++ +++################################################################################ +++# HEVC VPU Deblock +++# +++# Vertical edges before horizontal +++# Decision can change every 4 pixels, but only 8 pixel boundaries are deblocked +++# +++# ARM is responsible for storing beta and tc for each 4 pixels horiz and vert edge. +++# The VPU code works in units of 16x16 blocks. +++# We do vertical filtering for the current block followed by horizontal filtering for the previous (except for the first time). +++# One final horizontal filter is required at the end. +++# PCM is not allowed in this code. +++# +++# +++# H(16-4:16+15,0) contains previous block (note that we need 4 lines above of context that may get altered during filtering) +++# H(16:31,16) contains current block (note that we do not need the upper lines until the horizontal filtering. +++ +++.set P0,63 +++.set P1,62 +++.set P2,61 +++.set P3,60 +++.set Q0,59 +++.set Q1,58 +++.set Q2,57 +++.set Q3,56 +++ +++.set dp,32 +++.set dq,33 +++.set d,34 +++.set decision,35 +++.set beta,36 +++.set beta2,37 +++.set beta3,38 +++.set ptest,39 +++.set qtest,40 +++.set pqtest,41 +++.set thresh,42 +++.set deltatest, 44 +++.set deltap1, 45 +++.set tc25, 46 +++.set setup,47 +++.set tc,48 +++.set tc25,49 +++.set tc2, 50 +++.set do_filter, 51 +++.set delta, 52 +++.set tc10, 53 +++.set delta0, 54 +++.set delta1, 55 +++.set zeros, 0 +++.set setup_input, 1 +++.set deltaq1, 2 +++ +++ +++ +++# hevc_deblock_16x16 deblocks an entire row that is 16 pixels high by the full width of the image. +++# Row has num16 16x16 blocks across +++# Beta goes from 0 to 64 +++# tc goes from 0 to 24 +++# setup[block_idx][0=vert,1=horz][0=first edge, 1=second edge][0=beta,1=tc][0..3=edge number] +++# has 8 bytes per edge +++# has 16 bytes per direction +++# has 32 bytes per 16x16 block +++# hevc_deblock_16x16(uint8_t *img (r0), int stride (r1), int num16w (r2), uint8_t setup[num16][2][2][2][4](r3),int num16h(r4)) +++hevc_deblock_16x16: +++ push r6-r15, lr +++ mov r9,r4 +++ mov r4,r3 +++ mov r13,r2 +++ mov r2,r0 +++ mov r10,r0 +++ subscale4 r0,r1 +++ mov r8,63 +++ mov r6,-3 +++ vmov H(zeros,0),0 +++# r7 is number of blocks still to load +++# r0 is location of current block - 4 * stride +++# r1 is stride +++# r2 is location of current block +++# r3 is offset of start of block (actual edges start at H(16,16)+r3 for horizontal and H(16,0)+r3 for vertical +++# r4 is setup +++# r5 is for temporary calculations +++# r8 holds 63 +++# r6 holds -3 +++# r9 holds the number of 16 high rows to process +++# r10 holds the original img base +++# r11 returns 0 if no filtering was done on the edge +++# r12 saves a copy of this +++# r13 is copy of width +++ +++process_row: +++ # First iteration does not do horizontal filtering on previous +++ mov r7, r13 +++ mov r3,0 +++ vldb H(12++,16)+r3,(r0 += r1) REP 4 # Load the current block +++ vldb H(16++,16)+r3,(r2 += r1) REP 16 +++ vldb H(setup_input,0), (r4) # We may wish to prefetch these +++ vstb H(zeros,0),(r4) +++ bl vert_filter +++ add r3,8 +++ vadd H(setup_input,0),H(setup_input,8),0 # Rotate to second set of 8 +++ bl vert_filter +++ sub r3,8 +++ b start_deblock_loop +++deblock_loop: +++ # Middle iterations do vertical on current block and horizontal on preceding +++ vldb H(12++,16)+r3,(r0 += r1) REP 4 # load the current block +++ vldb H(16++,16)+r3,(r2 += r1) REP 16 +++ vldb H(setup_input,0), (r4) +++ vstb H(zeros,0),(r4) +++ bl vert_filter +++ add r3,8 +++ vadd H(setup_input,0),H(setup_input,8),0 +++ bl vert_filter +++ sub r3,8 +++ vldb H(setup_input,0), -16(r4) +++ vstb H(zeros,0),-16(r4) +++ bl horz_filter +++ mov r12,r11 +++ add r3,8*64 +++ vadd H(setup_input,0),H(setup_input,8),0 +++ bl horz_filter +++ sub r3,8*64 +++ addcmpbeq r12,0,0,skip_save_top +++ vstb H(12++,0)+r3,-16(r0 += r1) REP 4 # Save the deblocked pixels for the previous block +++skip_save_top: +++ vstb H(16++,0)+r3,-16(r2 += r1) REP 16 +++start_deblock_loop: +++ # move onto next 16x16 (could do this with circular buffer support instead) +++ add r3,16 +++ and r3,r8 +++ add r4,32 +++ # Perform loop counter operations (may work with an addcmpbgt as well?) +++ add r0,16 +++ add r2,16 +++ sub r7,1 +++ cmp r7,0 # Are there still more blocks to load +++ bgt deblock_loop +++ +++ # Final iteration needs to just do horizontal filtering +++ vldb H(setup_input,0), -16(r4) +++ vstb H(zeros,0),-16(r4) +++ bl horz_filter +++ mov r12,r11 +++ add r3,8*64 +++ vadd H(setup_input,0),H(setup_input,8),0 +++ bl horz_filter +++ sub r3,64*8 +++ addcmpbeq r12,0,0,skip_save_top2 +++ vstb H(12++,0)+r3,-16(r0 += r1) REP 4 # Save the deblocked pixels for the previous block +++skip_save_top2: +++ vstb H(16++,0)+r3,-16(r2 += r1) REP 16 +++ +++# Now look to see if we should do another row +++ sub r9,1 +++ cmp r9,0 +++ bgt start_again +++ pop r6-r15, pc +++start_again: +++ # Need to sort out r0,r2 to point to next row down +++ addscale16 r10,r1 +++ mov r2,r10 +++ subscale4 r0,r2,r1 +++ b process_row +++ +++ +++# At this stage H(16,16)+r3 points to the first pixel of the 16 high edge to be filtered +++# So we can reuse the code we move the parts to be filtered into HX(P0/P1/P2/P3/Q0/Q1/Q2/Q3,0) - we will perform a final saturation step on placing them back into the correct locations +++ +++vert_filter: +++ push lr +++ +++ vmov HX(P3,0), V(16,12)+r3 +++ vmov HX(P2,0), V(16,13)+r3 +++ vmov HX(P1,0), V(16,14)+r3 +++ vmov HX(P0,0), V(16,15)+r3 +++ vmov HX(Q0,0), V(16,16)+r3 +++ vmov HX(Q1,0), V(16,17)+r3 +++ vmov HX(Q2,0), V(16,18)+r3 +++ vmov HX(Q3,0), V(16,19)+r3 +++ +++ bl do_luma_filter +++ +++ vadds V(16,13)+r3, HX(P2,0), 0 +++ vadds V(16,14)+r3, HX(P1,0), 0 +++ vadds V(16,15)+r3, HX(P0,0), 0 +++ # P3 and Q3 never change so don't bother saving back +++ vadds V(16,16)+r3, HX(Q0,0), 0 +++ vadds V(16,17)+r3, HX(Q1,0), 0 +++ vadds V(16,18)+r3, HX(Q2,0), 0 +++ +++ pop pc +++ +++# Filter edge at H(16,0)+r3 +++horz_filter: +++ push lr +++ +++ vmov HX(P3,0), H(12,0)+r3 +++ vmov HX(P2,0), H(13,0)+r3 +++ vmov HX(P1,0), H(14,0)+r3 +++ vmov HX(P0,0), H(15,0)+r3 +++ vmov HX(Q0,0), H(16,0)+r3 +++ vmov HX(Q1,0), H(17,0)+r3 +++ vmov HX(Q2,0), H(18,0)+r3 +++ vmov HX(Q3,0), H(19,0)+r3 +++ +++ bl do_luma_filter +++ +++ vadds H(13,0)+r3, HX(P2,0), 0 +++ vadds H(14,0)+r3, HX(P1,0), 0 +++ vadds H(15,0)+r3, HX(P0,0), 0 +++ # P3 and Q3 never change so don't bother saving back +++ vadds H(16,0)+r3, HX(Q0,0), 0 +++ vadds H(17,0)+r3, HX(Q1,0), 0 +++ vadds H(18,0)+r3, HX(Q2,0), 0 +++ +++ pop pc +++ +++# r4 points to array of beta/tc for each 4 length edge +++do_luma_filter: +++ valtl H(setup,0),H(setup_input,0),H(setup_input,0) # b*8tc*8 +++ valtl HX(beta,0),H(setup,0),H(setup,0) +++ valtu HX(tc,0),H(setup,0),H(setup,0) +++ vmul HX(tc25,0), HX(tc,0), 5 +++ vadd HX(tc25,0),HX(tc25,0), 1 +++ vasr HX(tc25,0), HX(tc25,0), 1 +++ +++ # Compute decision +++ vadd HX(dp,0),HX(P1,0),HX(P1,0) # 2*P1 +++ vsub HX(dp,0),HX(P2,0),HX(dp,0) # P2-2*P1 +++ vadd HX(dp,0),HX(dp,0),HX(P0,0) # P2-2*P1+P0 +++ vdist HX(dp,0),HX(dp,0),0 # abs(P2-2*P1+P0) # dp0 +++ +++ vadd HX(dq,0),HX(Q1,0),HX(Q1,0) # 2*Q1 +++ vsub HX(dq,0),HX(Q2,0),HX(dq,0) # Q2-2*Q1 +++ vadd HX(dq,0),HX(dq,0),HX(Q0,0) # Q2-2*Q1+Q0 +++ vdist HX(dq,0),HX(dq,0),0 # abs(Q2-2*Q1+Q0) # dq0 +++ +++ vadd HX(d,0), HX(dp,0), HX(dq,0) +++ vasr HX(beta2,0),HX(beta,0),2 +++ vasr HX(beta3,0),HX(beta,0),3 +++ +++ # Compute flags that are negative if all conditions pass +++ vdist HX(decision,0), HX(P0,0), HX(P3,0) CLRA SACC +++ vdist HX(decision,0), HX(Q0,0), HX(Q3,0) SACC +++ vsub HX(decision,0), HX(decision,0), HX(beta3,0) SETF +++ +++ vdist HX(decision,0), HX(P0,0), HX(Q0,0) IFN +++ vsub HX(decision,0), HX(decision,0), HX(tc25,0) IFN SETF +++ vadd HX(decision,0), HX(d,0), HX(d,0) IFN +++ vsub HX(decision,0), HX(decision,0), HX(beta2,0) IFN SETF +++ vmov HX(decision,0), 1 IFNN +++ vadd H(decision,0),H(decision,3),0 IFN +++ vadd H(decision,16),H(decision,19),0 IFN +++ vmov -,HX(decision,0) SETF # N marks strong filter +++ vmov HX(decision,0), 1 IFNN # NN marks normal filter +++ +++ vadd HX(do_filter,0), HX(d,3), HX(d,0) +++ vsub HX(do_filter,0), HX(do_filter,0), HX(beta,0) SETF # IFNN means no filter +++ vmov HX(decision,0),0 IFNN # Z marks no filter +++ +++ # Expand out decision (currently valid one every 4 pixels) 0...1...2...3 +++ # First extract out even terms +++ vodd HX(decision,0),HX(decision,0),HX(decision,0) # 0.1.2.3 +++ vodd HX(decision,0),HX(decision,0),HX(decision,0) # 0123 +++ # Now expand back +++ valtl HX(decision,0),HX(decision,0),HX(decision,0) # 00112233 +++ valtl HX(decision,0),HX(decision,0),HX(decision,0) SETF # 0000111122223333 +++ +++ # HX(decision,0) is negative if want strong filtering, 1 if want normal filtering, 0 if want no filtering +++ +++ # Do a quick check to see if there is anything to do +++ mov r11, 0 # Signal no filtering +++ vmov -,1 IFNZ SUMS r5 +++ cmp r5,0 +++ beq filtering_done +++ mov r11, 1 # Signal some filtering +++ # And whether there is any strong filtering +++ vmov -,1 IFN SUMS r5 +++ cmp r5,0 +++ beq normal_filtering +++ +++ ############################################################################## +++ # Strong filtering - could maybe fast case if all have same sign? (especially if all disabled!) +++ vshl HX(tc2,0), HX(tc,0), 1 # Note that in normal filtering tx2 is tc/2, while here it is tc*2 +++ +++ # Take a copy of the original pixels for use in decision calculation +++ vmov HX(P0,32),HX(P0,0) +++ vmov HX(Q0,32),HX(Q0,0) +++ vmov HX(P1,32),HX(P1,0) +++ vmov HX(Q1,32),HX(Q1,0) +++ vmov HX(P2,32),HX(P2,0) +++ vmov HX(Q2,32),HX(Q2,0) +++ +++ vadd -,HX(P2,32),4 CLRA SACC +++ vshl -,HX(P1,32),1 SACC +++ vshl -,HX(P0,32),1 SACC +++ vshl -,HX(Q0,32),1 SACC +++ vshl HX(delta,0),HX(Q1,32),0 SACC +++ vasr HX(delta,0),HX(delta,0), 3 +++ vsub HX(delta,0),HX(delta,0),HX(P0,32) +++ vclamps HX(delta,0), HX(delta,0), HX(tc2,0) +++ vadd HX(P0,0),HX(P0,32),HX(delta,0) IFN +++ +++ vadd -,HX(P2,32),2 CLRA SACC +++ vadd -,HX(P1,32),HX(P0,32) SACC +++ vshl HX(delta,0),HX(Q0,32),0 SACC +++ vasr HX(delta,0),HX(delta,0), 2 +++ vsub HX(delta,0),HX(delta,0),HX(P1,32) +++ vclamps HX(delta,0), HX(delta,0), HX(tc2,0) +++ vadd HX(P1,0),HX(P1,32),HX(delta,0) IFN +++ +++ vadd -,HX(Q0,32),4 CLRA SACC +++ vadd -,HX(P1,32),HX(P0,32) SACC +++ vmul -,HX(P2,32),3 SACC +++ vshl HX(delta,0),HX(P3,0),1 SACC # Note that we have not made a copy of P3, so using P3,0 is correct +++ vasr HX(delta,0),HX(delta,0), 3 +++ vsub HX(delta,0),HX(delta,0),HX(P2,32) +++ vclamps HX(delta,0), HX(delta,0), HX(tc2,0) +++ vadd HX(P2,0),HX(P2,32),HX(delta,0) IFN +++ #vmov HX(P2,0),3 IFN +++ +++ # Now reverse all P/Qs +++ +++ vadd -,HX(Q2,32),4 CLRA SACC +++ vshl -,HX(Q1,32),1 SACC +++ vshl -,HX(Q0,32),1 SACC +++ vshl -,HX(P0,32),1 SACC +++ vshl HX(delta,0),HX(P1,32),0 SACC +++ vasr HX(delta,0),HX(delta,0), 3 +++ vsub HX(delta,0),HX(delta,0),HX(Q0,32) +++ vclamps HX(delta,0), HX(delta,0), HX(tc2,0) +++ vadd HX(Q0,0),HX(Q0,32),HX(delta,0) IFN +++ +++ vadd -,HX(Q2,32),2 CLRA SACC +++ vadd -,HX(Q1,32),HX(Q0,32) SACC +++ vshl HX(delta,0),HX(P0,32),0 SACC +++ vasr HX(delta,0),HX(delta,0), 2 +++ vsub HX(delta,0),HX(delta,0),HX(Q1,32) +++ vclamps HX(delta,0), HX(delta,0), HX(tc2,0) +++ vadd HX(Q1,0),HX(Q1,32),HX(delta,0) IFN +++ +++ vadd -,HX(P0,32),4 CLRA SACC +++ vadd -,HX(Q1,32),HX(Q0,32) SACC +++ vmul -,HX(Q2,32),3 SACC +++ vshl HX(delta,0),HX(Q3,0),1 SACC # Note that we have not made a copy of Q3, so using Q3,0 is correct +++ vasr HX(delta,0),HX(delta,0), 3 +++ vsub HX(delta,0),HX(delta,0),HX(Q2,32) +++ vclamps HX(delta,0), HX(delta,0), HX(tc2,0) +++ vadd HX(Q2,0),HX(Q2,32),HX(delta,0) IFN +++ +++ ############################################################################## +++ # Normal filtering +++normal_filtering: +++ # Invert the decision flags +++ # make instruction more complicated as assembler has error and loses SETF +++ vrsub HX(tc10,0), HX(decision,0), 0 SETF # IFN means normal filtering +++ vmov -, HX(tc10,0) SETF # IFN means normal filtering +++ +++ vmov -,1 IFN SUMS r5 +++ cmp r5,0 +++ beq filtering_done +++ +++ vasr HX(tc2,0), HX(tc,0), 1 +++ vmul HX(tc10,0), HX(tc,0), 10 +++ +++ vasr HX(thresh,0), HX(beta,0), 1 +++ vadd HX(thresh,0), HX(thresh,0), HX(beta,0) +++ vasr HX(thresh,0), HX(thresh,0), 3 CLRA SACC +++ +++ vadd HX(ptest,0),HX(dp,3),HX(dp,0) +++ vsub HX(ptest,0),HX(ptest,0),HX(thresh,0) # ptest is negative if we need to do the P2 pixel +++ vadd HX(qtest,0),HX(dq,3),HX(dq,0) +++ vsub HX(qtest,0),HX(qtest,0),HX(thresh,0) # qtest is negative if we need to do the Q2 pixel +++ # Expand ptest and qtest together +++ vodd HX(pqtest,0),HX(ptest,0),HX(qtest,0) # p.p.p.p.q.q.q.q +++ vodd HX(pqtest,0),HX(pqtest,0),HX(pqtest,0) # ppppqqqq........ +++ valtl HX(pqtest,0),HX(pqtest,0),HX(pqtest,0) # ppppppppqqqqqqqq +++ valtl HX(ptest,0),HX(pqtest,0),HX(pqtest,0) +++ valtu HX(qtest,0),HX(pqtest,0),HX(pqtest,0) +++ +++ vsub HX(delta0,0), HX(Q0,0), HX(P0,0) +++ vsub HX(delta1,0), HX(Q1,0), HX(P1,0) +++ vmov -,8 CLRA SACC +++ vmul -,HX(delta0,0), 9 SACC +++ vmul HX(delta0,0),HX(delta1,0), r6 SACC +++ vasr HX(delta0,0), HX(delta0,0), 4 +++ vdist HX(deltatest,0), HX(delta0,0), 0 +++ vsub HX(deltatest,0), HX(deltatest,0), HX(tc10,0) IFN SETF # negative if still need to do something +++ vmov HX(deltatest,0), 0 IFNN # clear if no need to do anything so we can reload flags later +++ +++ vclamps HX(delta0,0), HX(delta0,0), HX(tc,0) +++ +++ vadd HX(deltap1,0), HX(P2,0), HX(P0,0) +++ vadd HX(deltap1,0), HX(deltap1,0), 1 +++ vasr HX(deltap1,0), HX(deltap1,0), 1 CLRA SACC +++ vsub HX(deltap1,0), HX(delta0,0), HX(P1,0) SACC +++ vasr HX(deltap1,0), HX(deltap1,0), 1 +++ vclamps HX(deltap1,0), HX(deltap1,0), HX(tc2,0) +++ +++ vadd HX(deltaq1,0), HX(Q2,0), HX(Q0,0) +++ vadd HX(deltaq1,0), HX(deltaq1,0), 1 +++ vasr HX(deltaq1,0), HX(deltaq1,0), 1 CLRA SACC +++ vadd HX(deltaq1,0), HX(delta0,0), HX(Q1,0) +++ vrsub -, HX(delta0,0), 0 SACC +++ vrsub HX(deltaq1,0), HX(Q1,0), 0 SACC +++ vasr HX(deltaq1,0), HX(deltaq1,0), 1 +++ vclamps HX(deltaq1,0), HX(deltaq1,0), HX(tc2,0) +++ +++ vadds HX(P0,0), HX(P0,0), HX(delta0,0) IFN +++ vsubs HX(Q0,0), HX(Q0,0), HX(delta0,0) IFN +++ +++ vmov -,HX(ptest,0) IFN SETF # Negative if need to do p1 +++ vadds HX(P1,0), HX(P1,0), HX(deltap1,0) IFN +++ +++ vmov -,HX(deltatest,0) SETF +++ vmov -,HX(qtest,0) IFN SETF # Negative if need to do q1 +++ vadds HX(Q1,0), HX(Q1,0), HX(deltaq1,0) IFN +++ +++ #vmov HX(P2,0),1 IFN +++ +++filtering_done: +++ b lr ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 0121fca..05b2169 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -147,7 +147,7 @@ static int gpu_init(volatile struct GPU **gpu) { ++ vcsm_init(); ++ gpu_malloc_uncached_internal(sizeof(struct GPU), &gpu_mem_ptr, mb); ++ ptr = (volatile struct GPU*)gpu_mem_ptr.arm; ++- memset(ptr, 0, sizeof *ptr); +++ memset((void*)ptr, 0, sizeof *ptr); ++ vc = gpu_mem_ptr.vc; ++ ++ ptr->mb = mb; ++@@ -254,7 +254,7 @@ void gpu_cache_flush(GPU_MEM_PTR_T *p) ++ struct vcsm_user_clean_invalid_s iocache = {}; ++ iocache.s[0].handle = p->vcsm_handle; ++ iocache.s[0].cmd = 3; // clean+invalidate ++- iocache.s[0].addr = p->arm; +++ iocache.s[0].addr = (int) p->arm; ++ iocache.s[0].size = p->numbytes; ++ vcsm_clean_invalid( &iocache ); ++ #else ++@@ -390,6 +390,7 @@ static void *vpu_start(void *arg) { ++ #ifdef RPI_TIME_TOTAL_POSTED ++ int last_time=0; ++ long long on_time=0; +++ long long on_time_deblock=0; ++ long long off_time=0; ++ int start_time; ++ int end_time; ++@@ -451,10 +452,13 @@ static void *vpu_start(void *arg) { ++ #ifdef RPI_TIME_TOTAL_POSTED ++ end_time = Microseconds(); ++ last_time = end_time; ++- on_time += end_time - start_time; +++ if (p[6]==2) +++ on_time_deblock += end_time - start_time; +++ else +++ on_time += end_time - start_time; ++ count++; ++ if ((count&0x7f)==0) ++- printf("Posted %d On=%dms, Off=%dms\n",count,(int)(on_time/1000),(int)(off_time/1000)); +++ printf("Posted %d On=%dms, On_deblock=%dms, Off=%dms\n",count,(int)(on_time/1000),(int)(on_time_deblock/1000),(int)(off_time/1000)); ++ #endif ++ pthread_mutex_lock(&post_mutex); ++ vpu_async_head++; ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index e86eb30..c5d8b29 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -61,7 +61,7 @@ unsigned int rpi_shader[] = { ++ /* [0x00000120] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_frame_base, r2 ++ /* [0x00000128] */ 0x0c9e7440, 0x10020f27, // add t1s, r2, r1 ++ /* [0x00000130] */ 0x00000008, 0xe00208a7, // mov r2,8 ++-/* [0x00000138] */ 0x11827c80, 0x10021327, // shl rb12,unif, r2 +++/* [0x00000138] */ 0x11827c80, 0x10021327, // shl rb12,unif,r2 ++ /* [0x00000140] */ 0x0c827c80, 0x10021367, // add rb13,unif,r2 ++ /* [0x00000148] */ 0x15827d80, 0x100208a7, // mov r2, unif ++ /* [0x00000150] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 ++-- ++2.7.4 ++ ++ ++From e9c59f0d7b42dfb10d85ab2477f95b44484a8d70 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 1 Jul 2015 09:21:17 +0100 ++Subject: [PATCH 65/68] Added ability to combine jobs ++ ++--- ++ libavcodec/rpi_qpu.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++- ++ 1 file changed, 80 insertions(+), 1 deletion(-) ++ ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 05b2169..91777be 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -8,6 +8,8 @@ ++ #define RPI_TIME_TOTAL_POSTED ++ // define RPI_ASYNC to run the VPU in a separate thread, need to make a separate call to check for completion ++ #define RPI_ASYNC +++// Define RPI_COMBINE_JOBS to find jobs that can be executed in parallel +++#define RPI_COMBINE_JOBS ++ ++ #include ++ #include ++@@ -398,9 +400,15 @@ static void *vpu_start(void *arg) { ++ #endif ++ while(1) { ++ int i; ++- int *p; +++ int *p; // Pointer for a QPU/VPU job +++#ifdef RPI_COMBINE_JOBS +++ int *q = NULL; // Pointer for a VPU only job +++ int have_qpu = 0; +++ int have_vpu = 0; +++#endif ++ int qpu_code; ++ int qpu_codeb; +++ int num_jobs; // Number of jobs available ++ pthread_mutex_lock(&post_mutex); ++ while( vpu_async_tail - vpu_async_head <= 0) ++ { ++@@ -408,13 +416,38 @@ static void *vpu_start(void *arg) { ++ pthread_cond_wait(&post_cond_tail, &post_mutex); ++ } ++ p = vpu_cmds[vpu_async_head%MAXCMDS]; +++ num_jobs = vpu_async_tail - vpu_async_head; ++ pthread_mutex_unlock(&post_mutex); ++ ++ if (p[6] == -1) { ++ break; // Last job ++ } +++ if (p[7] == 0 && p[0] == 0 && p[16]==0) +++ goto job_done_early; +++ +++#ifdef RPI_COMBINE_JOBS +++ // First scan for a qpu job +++ for (int x=0;xmail[i*2] = p[8+i]; +++ gpu->mail[i*2 + 1] = qpu_code; +++ } +++ for(i=0;i<12;i++) { +++ gpu->mail2[i*2] = p[17+i]; +++ gpu->mail2[i*2 + 1] = qpu_codeb; +++ } +++ if (have_vpu) { +++ execute_multi(gpu->mb, +++ 12,gpu->vc + offsetof(struct GPU, mail2), 1, 5000, +++ 8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */, +++ p[0], p[1], p[2], p[3], p[4], p[5], p[6], // VPU0 +++ q[0], q[1], q[2], q[3], q[4], q[5], q[6]); // VPU1 +++ q[0] = 0; +++ } else { +++ execute_multi(gpu->mb, +++ 12,gpu->vc + offsetof(struct GPU, mail2), 1, 5000, +++ 8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */, +++ p[0], p[1], p[2], p[3], p[4], p[5], p[6], // VPU0 +++ 0, 0 , 0 , 0 , 0 , 0 , 0); // VPU1 +++ } +++ p[0] = 0; +++ p[7] = 0; +++ p[16] = 0; +++ } else { +++ av_assert0(have_vpu); +++ vpu_execute_code(q[0], q[1], q[2], q[3], q[4], q[5], q[6]); +++ q[0] = 0; +++ } +++#else +++ ++ if (!qpu_code) { ++ vpu_execute_code(p[0], p[1], p[2], p[3], p[4], p[5], p[6]); ++ } else { ++@@ -449,17 +516,29 @@ static void *vpu_start(void *arg) { ++ 0, 0 , 0 , 0 , 0 , 0 , 0); // VPU1 ++ #endif ++ } +++#endif +++ ++ #ifdef RPI_TIME_TOTAL_POSTED ++ end_time = Microseconds(); ++ last_time = end_time; +++#ifdef RPI_COMBINE_JOBS +++ // There are three cases we may wish to distinguish of VPU/QPU activity +++ on_time += end_time - start_time; +++#else ++ if (p[6]==2) ++ on_time_deblock += end_time - start_time; ++ else ++ on_time += end_time - start_time; +++#endif ++ count++; ++ if ((count&0x7f)==0) +++#ifdef RPI_COMBINE_JOBS ++ printf("Posted %d On=%dms, On_deblock=%dms, Off=%dms\n",count,(int)(on_time/1000),(int)(on_time_deblock/1000),(int)(off_time/1000)); +++#else +++ printf("Posted %d On=%dms, Off=%dms\n",count,(int)(on_time/1000),(int)(off_time/1000)); +++#endif ++ #endif +++job_done_early: ++ pthread_mutex_lock(&post_mutex); ++ vpu_async_head++; ++ pthread_cond_broadcast(&post_cond_head); ++-- ++2.7.4 ++ ++ ++From 0d54661f303b2a8903e806648ed54a34dcf315dc Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 1 Jul 2015 12:53:10 +0100 ++Subject: [PATCH 66/68] Added chroma deblocking ++ ++--- ++ libavcodec/hevc.c | 20 ++ ++ libavcodec/hevc.h | 12 +- ++ libavcodec/hevc_filter.c | 92 +++++- ++ libavcodec/rpi_hevc_transform.h | 644 +++++++++++++++++++++++++++++++++++++++- ++ libavcodec/rpi_hevc_transform.s | 207 +++++++++++++ ++ libavcodec/rpi_qpu.c | 27 +- ++ libavcodec/rpi_shader.qasm | 11 + ++ 7 files changed, 988 insertions(+), 25 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 4ce94a7..8437e10 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -251,6 +251,14 @@ static void pic_arrays_free(HEVCContext *s) ++ gpu_free(&s->y_setup_ptr); ++ s->y_setup_arm = 0; ++ } +++ if (s->uv_setup_arm) { +++ gpu_free(&s->uv_setup_ptr); +++ s->uv_setup_arm = 0; +++ } +++ if (s->vpu_cmds_arm) { +++ gpu_free(&s->vpu_cmds_ptr); +++ s->vpu_cmds_arm = 0; +++ } ++ #endif ++ av_freep(&s->sao); ++ av_freep(&s->deblock); ++@@ -324,6 +332,18 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ s->y_setup_vc = (void*)s->y_setup_ptr.vc; ++ memset(s->y_setup_arm, 0, s->y_setup_ptr.numbytes); ++ printf("Setup %d by %d by %d\n",s->setup_width,s->setup_height,sizeof(*s->y_setup_arm)); +++ +++ s->uv_setup_width = ( (sps->width >> sps->hshift[1]) + 15) / 16; +++ s->uv_setup_height = ( (sps->height >> sps->vshift[1]) + 15) / 16; +++ gpu_malloc_uncached(sizeof(*s->uv_setup_arm) * s->uv_setup_width * s->uv_setup_height, &s->uv_setup_ptr); // TODO make this cached +++ s->uv_setup_arm = (void*)s->uv_setup_ptr.arm; +++ s->uv_setup_vc = (void*)s->uv_setup_ptr.vc; +++ memset(s->uv_setup_arm, 0, s->uv_setup_ptr.numbytes); +++ printf("Setup uv %d by %d by %d\n",s->uv_setup_width,s->uv_setup_height,sizeof(*s->uv_setup_arm)); +++ +++ gpu_malloc_uncached(sizeof(*s->vpu_cmds_arm) * 3,&s->vpu_cmds_ptr); +++ s->vpu_cmds_arm = (void*) s->vpu_cmds_ptr.arm; +++ s->vpu_cmds_vc = s->vpu_cmds_ptr.vc; ++ #endif ++ ++ s->bs_width = (width >> 2) + 1; ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index cf08489..7eb37e6 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -56,7 +56,7 @@ ++ #define RPI_MAX_JOBS 2 ++ // Define RPI_WORKER to launch a worker thread for pixel processing tasks ++ #define RPI_WORKER ++- +++ // Define RPI_DEBLOCK_VPU to perform deblocking on the VPUs ++ #define RPI_DEBLOCK_VPU ++ ++ #endif ++@@ -980,6 +980,16 @@ typedef struct HEVCContext { ++ uint8_t (*y_setup_vc)[2][2][2][4]; ++ int setup_width; // Number of 16x16 blocks across the image ++ int setup_height; // Number of 16x16 blocks down the image +++ +++ GPU_MEM_PTR_T uv_setup_ptr; +++ uint8_t (*uv_setup_arm)[2][2][2][4]; // Half of this is unused [][][1][], but easier for the VPU as it allows us to store with zeros and addresses are aligned +++ uint8_t (*uv_setup_vc)[2][2][2][4]; +++ int uv_setup_width; +++ int uv_setup_height; +++ +++ GPU_MEM_PTR_T vpu_cmds_ptr; +++ int (*vpu_cmds_arm)[6]; // r0-r5 for each command +++ int vpu_cmds_vc; ++ #endif ++ ++ #endif ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 06371da..6367068 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -656,9 +656,23 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) ++ s->frame->linesize[chroma], ++ c_tc, no_p, no_q); ++ } else +++#ifdef RPI_DEBLOCK_VPU +++ if (s->enable_rpi_deblock) { +++ uint8_t (*setup)[2][2][4]; +++ int xc = x>>s->ps.sps->hshift[chroma]; +++ int yc = y>>s->ps.sps->vshift[chroma]; +++ int num16 = (yc>>4)*s->uv_setup_width + (xc>>4); +++ int a = ((yc>>3) & 1) << 1; +++ int b = (xc>>3) & 1; +++ setup = s->uv_setup_arm[num16]; +++ setup[0][b][0][a] = c_tc[0]; +++ setup[0][b][0][a + 1] = c_tc[1]; +++ } else +++#endif ++ s->hevcdsp.hevc_v_loop_filter_chroma(src, ++ s->frame->linesize[chroma], ++ c_tc, no_p, no_q); +++ ++ } ++ } ++ ++@@ -689,6 +703,19 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) ++ s->frame->linesize[chroma], ++ c_tc, no_p, no_q); ++ } else +++#ifdef RPI_DEBLOCK_VPU +++ if (s->enable_rpi_deblock) { +++ uint8_t (*setup)[2][2][4]; +++ int xc = x>>s->ps.sps->hshift[chroma]; +++ int yc = y>>s->ps.sps->vshift[chroma]; +++ int num16 = (yc>>4)*s->uv_setup_width + (xc>>4); +++ int a = ((xc>>3) & 1) << 1; +++ int b = (yc>>3) & 1; +++ setup = s->uv_setup_arm[num16]; +++ setup[1][b][0][a] = c_tc[0]; +++ setup[1][b][0][a + 1] = c_tc[1]; +++ } else +++#endif ++ s->hevcdsp.hevc_h_loop_filter_chroma(src, ++ s->frame->linesize[chroma], ++ c_tc, no_p, no_q); ++@@ -1013,33 +1040,56 @@ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ static void rpi_deblock(HEVCContext *s, int y, int ctb_size) ++ { ++ // Flush image, 4 lines above to bottom of ctb stripe ++- ff_hevc_flush_buffer_lines(s, FFMAX(y-4,0), y+ctb_size, 1, 0); +++ ff_hevc_flush_buffer_lines(s, FFMAX(y-4,0), y+ctb_size, 1, 1); ++ // TODO flush buffer of beta/tc setup when it becomes cached +++ +++ // Prepare three commands at once to avoid calling overhead +++ s->vpu_cmds_arm[0][0] = get_vc_address(s->frame->buf[0]) + s->frame->linesize[0] * y; +++ s->vpu_cmds_arm[0][1] = s->frame->linesize[0]; +++ s->vpu_cmds_arm[0][2] = s->setup_width; +++ s->vpu_cmds_arm[0][3] = (int) ( s->y_setup_vc + s->setup_width * (y>>4) ); +++ s->vpu_cmds_arm[0][4] = ctb_size>>4; +++ s->vpu_cmds_arm[0][5] = 2; +++ +++ s->vpu_cmds_arm[1][0] = get_vc_address(s->frame->buf[1]) + s->frame->linesize[1] * (y>> s->ps.sps->vshift[1]); +++ s->vpu_cmds_arm[1][1] = s->frame->linesize[1]; +++ s->vpu_cmds_arm[1][2] = s->uv_setup_width; +++ s->vpu_cmds_arm[1][3] = (int) ( s->uv_setup_vc + s->uv_setup_width * ((y>>4)>> s->ps.sps->vshift[1]) ); +++ s->vpu_cmds_arm[1][4] = (ctb_size>>4)>> s->ps.sps->vshift[1]; +++ s->vpu_cmds_arm[1][5] = 3; +++ +++ s->vpu_cmds_arm[2][0] = get_vc_address(s->frame->buf[2]) + s->frame->linesize[2] * (y>> s->ps.sps->vshift[2]); +++ s->vpu_cmds_arm[2][1] = s->frame->linesize[2]; +++ s->vpu_cmds_arm[2][2] = s->uv_setup_width; +++ s->vpu_cmds_arm[2][3] = (int) ( s->uv_setup_vc + s->uv_setup_width * ((y>>4)>> s->ps.sps->vshift[1]) ); +++ s->vpu_cmds_arm[2][4] = (ctb_size>>4)>> s->ps.sps->vshift[1]; +++ s->vpu_cmds_arm[2][5] = 4; +++ ++ // Call VPU ++- // TODO add this to a separate pipeline of VPU jobs that can be run in parallel and wait for completion ++- vpu_wait(vpu_post_code( vpu_get_fn(), get_vc_address(s->frame->buf[0]) + s->frame->linesize[0] * y, s->frame->linesize[0], ++- s->setup_width, (int) ( s->y_setup_vc + s->setup_width * (y>>4) ), ++- ctb_size>>4, 2, 0)); // 2 means to do the deblocking code +++ vpu_wait(vpu_post_code( vpu_get_fn(), s->vpu_cmds_vc, 3, 0, 0, 0, 5, 0)); // 5 means to do all the commands ++ } ++ ++-static void rpi_deblock2(HEVCContext *s, int y, int ctb_size) ++-{ ++- int y2; ++- for(y2=y;y2= s->ps.sps->width - ctb_size; +++#ifdef RPI_DEBLOCK_VPU +++ int done_deblock = 0; +++#endif ++ if (s->avctx->skip_loop_filter < AVDISCARD_ALL) ++ deblocking_filter_CTB(s, x, y); ++ #ifdef RPI_DEBLOCK_VPU ++ if (s->enable_rpi_deblock && x_end) ++ { ++- rpi_deblock(s, y, ctb_size); +++ int y_at_end = y >= s->ps.sps->height - ctb_size; +++ int height = 64; // Deblock in units 64 high to avoid too many VPU calls +++ int y_start = y&~63; +++ if (y_at_end) height = s->ps.sps->height - y_start; +++ if ((((y+ctb_size)&63)==0) || y_at_end) { +++ done_deblock = 1; +++ rpi_deblock(s, y_start, height); +++ } ++ } ++ #endif ++ if (s->ps.sps->sao_enabled) { ++@@ -1070,11 +1120,25 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ //int newh = y + ctb_size - 4; ++ //int currh = s->ref->tf.progress->data[0]; ++ //if (((y + ctb_size)&63)==0) +++#ifdef RPI_DEBLOCK_VPU +++ if (s->enable_rpi_deblock) { +++ // we no longer need to flush the luma buffer as it is in GPU memory when using deblocking on the rpi +++ if (done_deblock) { +++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); +++ } +++ } else { +++#ifdef RPI_INTER_QPU +++ ff_hevc_flush_buffer(s, &s->ref->tf, y + ctb_size - 4); +++#endif +++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); +++ } +++#else ++ #ifdef RPI_INTER_QPU ++ ff_hevc_flush_buffer(s, &s->ref->tf, y + ctb_size - 4); ++- // TODO we no longer need to flush the luma buffer as it is in GPU memory when using deblocking on the rpi +++ // we no longer need to flush the luma buffer as it is in GPU memory when using deblocking on the rpi ++ #endif ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); +++#endif ++ } ++ } ++ ++diff --git a/libavcodec/rpi_hevc_transform.h b/libavcodec/rpi_hevc_transform.h ++index b3f155f..4309f1c 100644 ++--- a/libavcodec/rpi_hevc_transform.h +++++ b/libavcodec/rpi_hevc_transform.h ++@@ -3,14 +3,32 @@ unsigned char rpi_hevc_transform [] = { ++ 106, ++ 0, ++ 144, ++-38, +++47, ++ 1, ++ 37, ++ 106, ++ 0, ++ 144, ++-57, +++66, ++ 1, +++53, +++106, +++0, +++144, +++192, +++4, +++69, +++106, +++0, +++144, +++192, +++4, +++85, +++106, +++0, +++144, +++220, +++5, ++ 169, ++ 3, ++ 62, ++@@ -2427,4 +2445,626 @@ unsigned char rpi_hevc_transform [] = { ++ 128, ++ 90, ++ 0, +++169, +++3, +++14, +++96, +++4, +++31, +++169, +++3, +++30, +++96, +++1, +++31, +++73, +++64, +++52, +++64, +++45, +++64, +++2, +++64, +++10, +++64, +++64, +++198, +++1, +++7, +++8, +++232, +++63, +++0, +++0, +++0, +++6, +++232, +++253, +++255, +++255, +++255, +++0, +++246, +++0, +++0, +++0, +++4, +++215, +++64, +++3, +++96, +++2, +++248, +++0, +++35, +++0, +++0, +++64, +++56, +++0, +++0, +++4, +++248, +++0, +++36, +++0, +++0, +++64, +++56, +++8, +++0, +++0, +++240, +++64, +++0, +++132, +++3, +++30, +++106, +++132, +++24, +++128, +++240, +++0, +++0, +++132, +++3, +++128, +++144, +++143, +++0, +++131, +++98, +++0, +++255, +++64, +++0, +++0, +++20, +++200, +++243, +++0, +++0, +++128, +++144, +++135, +++0, +++131, +++102, +++0, +++158, +++71, +++0, +++2, +++248, +++0, +++35, +++0, +++0, +++64, +++56, +++0, +++0, +++4, +++248, +++0, +++36, +++0, +++0, +++64, +++56, +++8, +++0, +++0, +++240, +++64, +++0, +++132, +++3, +++30, +++106, +++132, +++24, +++128, +++240, +++0, +++0, +++132, +++3, +++128, +++144, +++112, +++0, +++131, +++98, +++0, +++255, +++64, +++0, +++0, +++20, +++200, +++243, +++0, +++0, +++128, +++144, +++104, +++0, +++131, +++102, +++0, +++248, +++64, +++0, +++112, +++0, +++192, +++243, +++211, +++31, +++30, +++106, +++134, +++24, +++128, +++248, +++0, +++0, +++112, +++0, +++192, +++243, +++211, +++31, +++128, +++144, +++123, +++0, +++188, +++64, +++67, +++232, +++0, +++2, +++0, +++0, +++0, +++255, +++64, +++0, +++0, +++20, +++200, +++243, +++0, +++0, +++128, +++144, +++112, +++0, +++195, +++232, +++0, +++2, +++0, +++0, +++12, +++128, +++7, +++192, +++130, +++248, +++0, +++0, +++112, +++192, +++224, +++16, +++195, +++31, +++132, +++248, +++1, +++0, +++112, +++0, +++224, +++16, +++203, +++31, +++3, +++99, +++131, +++71, +++68, +++232, +++32, +++0, +++0, +++0, +++0, +++99, +++2, +++99, +++23, +++102, +++7, +++106, +++127, +++156, +++178, +++255, +++0, +++248, +++64, +++0, +++112, +++0, +++192, +++243, +++211, +++31, +++30, +++106, +++134, +++24, +++128, +++248, +++0, +++0, +++112, +++0, +++192, +++243, +++211, +++31, +++128, +++144, +++72, +++0, +++188, +++64, +++67, +++232, +++0, +++2, +++0, +++0, +++0, +++255, +++64, +++0, +++0, +++20, +++200, +++243, +++0, +++0, +++128, +++144, +++61, +++0, +++195, +++232, +++0, +++2, +++0, +++0, +++12, +++128, +++7, +++192, +++130, +++248, +++0, +++0, +++112, +++192, +++224, +++16, +++195, +++31, +++132, +++248, +++1, +++0, +++112, +++0, +++224, +++16, +++203, +++31, +++25, +++102, +++9, +++106, +++2, +++30, +++41, +++3, +++26, +++87, +++162, +++64, +++64, +++198, +++1, +++23, +++127, +++158, +++95, +++255, +++239, +++3, +++0, +++254, +++128, +++143, +++94, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++192, +++143, +++95, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++192, +++142, +++208, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++128, +++142, +++209, +++0, +++0, +++240, +++12, +++0, +++128, +++144, +++47, +++0, +++8, +++255, +++227, +++23, +++0, +++244, +++192, +++51, +++0, +++0, +++8, +++255, +++35, +++52, +++0, +++180, +++192, +++51, +++0, +++0, +++111, +++3, +++239, +++3, +++0, +++254, +++128, +++143, +++14, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++192, +++143, +++15, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++192, +++142, +++16, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++128, +++142, +++17, +++0, +++0, +++240, +++12, +++0, +++128, +++144, +++13, +++0, +++8, +++255, +++227, +++3, +++0, +++244, +++192, +++51, +++0, +++0, +++8, +++255, +++35, +++4, +++0, +++180, +++192, +++51, +++0, +++0, +++111, +++3, +++32, +++246, +++192, +++11, +++1, +++16, +++32, +++246, +++2, +++140, +++47, +++240, +++32, +++247, +++35, +++141, +++63, +++178, +++64, +++254, +++35, +++141, +++2, +++68, +++192, +++243, +++128, +++11, +++32, +++255, +++35, +++240, +++58, +++226, +++192, +++243, +++188, +++10, +++0, +++254, +++0, +++141, +++4, +++4, +++0, +++240, +++128, +++10, +++88, +++246, +++35, +++141, +++3, +++68, +++240, +++246, +++35, +++141, +++48, +++66, +++0, +++247, +++227, +++143, +++52, +++242, +++32, +++247, +++227, +++142, +++52, +++178, +++90, +++0, +++161, +++3, +++6, +++64, +++23, +++64, +++96, +++8, +++70, +++98, +++97, +++8, +++70, +++98, +++98, +++8, +++70, +++98, +++99, +++8, +++70, +++98, +++100, +++8, +++70, +++98, +++101, +++8, +++70, +++98, +++255, +++159, +++8, +++250, +++23, +++102, +++7, +++106, +++112, +++30, +++33, +++3, ++ }; ++diff --git a/libavcodec/rpi_hevc_transform.s b/libavcodec/rpi_hevc_transform.s ++index b055208..5543093 100644 ++--- a/libavcodec/rpi_hevc_transform.s +++++ b/libavcodec/rpi_hevc_transform.s ++@@ -85,6 +85,13 @@ hevc_trans_16x16: ++ beq memclear16 ++ cmp r5,2 ++ beq hevc_deblock_16x16 +++ cmp r5,3 +++ beq hevc_uv_deblock_16x16 +++ cmp r5,4 +++ beq hevc_uv_deblock_16x16_with_clear +++ cmp r5,5 +++ beq hevc_run_command_list +++ ++ push r6-r15, lr # TODO cut down number of used registers ++ mov r14,r3 # coeffs32 ++ mov r15,r4 # num32 ++@@ -708,3 +715,203 @@ normal_filtering: ++ ++ filtering_done: ++ b lr +++ +++ +++hevc_uv_deblock_16x16: +++ push r6-r15, lr +++ mov r14,0 +++ b hevc_uv_start +++hevc_uv_deblock_16x16_with_clear: +++ push r6-r15, lr +++ mov r14,1 +++ b hevc_uv_start +++ +++hevc_uv_start: +++ mov r9,r4 +++ mov r4,r3 +++ mov r13,r2 +++ mov r2,r0 +++ mov r10,r0 +++ subscale4 r0,r1 +++ mov r8,63 +++ mov r6,-3 +++ vmov H(zeros,0),0 +++# r7 is number of blocks still to load +++# r0 is location of current block - 4 * stride +++# r1 is stride +++# r2 is location of current block +++# r3 is offset of start of block (actual edges start at H(16,16)+r3 for horizontal and H(16,0)+r3 for vertical +++# r4 is setup +++# r5 is for temporary calculations +++# r8 holds 63 +++# r6 holds -3 +++# r9 holds the number of 16 high rows to process +++# r10 holds the original img base +++# r11 returns 0 if no filtering was done on the edge +++# r12 saves a copy of this +++# r13 is copy of width +++# r14 is 1 if we should clear the old contents, or 0 if not +++ +++uv_process_row: +++ # First iteration does not do horizontal filtering on previous +++ mov r7, r13 +++ mov r3,0 +++ vldb H(12++,16)+r3,(r0 += r1) REP 4 # Load the current block +++ vldb H(16++,16)+r3,(r2 += r1) REP 16 +++ vldb H(setup_input,0), (r4) # We may wish to prefetch these +++ cmp r14,1 +++ bne uv_skip0 +++ vstb H(zeros,0),(r4) +++uv_skip0: +++ bl uv_vert_filter +++ add r3,8 +++ vadd H(setup_input,0),H(setup_input,8),0 # Rotate to second set of 8 +++ bl uv_vert_filter +++ sub r3,8 +++ b uv_start_deblock_loop +++uv_deblock_loop: +++ # Middle iterations do vertical on current block and horizontal on preceding +++ vldb H(12++,16)+r3,(r0 += r1) REP 4 # load the current block +++ vldb H(16++,16)+r3,(r2 += r1) REP 16 +++ vldb H(setup_input,0), (r4) +++ cmp r14,1 +++ bne uv_skip1 +++ vstb H(zeros,0),(r4) +++uv_skip1: +++ bl uv_vert_filter +++ add r3,8 +++ vadd H(setup_input,0),H(setup_input,8),0 +++ bl uv_vert_filter +++ sub r3,8 +++ vldb H(setup_input,0), -16(r4) +++ cmp r14,1 +++ bne uv_skip3 +++ vstb H(zeros,0),-16(r4) +++uv_skip3: +++ bl uv_horz_filter +++ mov r12,r11 +++ add r3,8*64 +++ vadd H(setup_input,0),H(setup_input,8),0 +++ bl uv_horz_filter +++ sub r3,8*64 +++ addcmpbeq r12,0,0,uv_skip_save_top +++ vstb H(12++,0)+r3,-16(r0 += r1) REP 4 # Save the deblocked pixels for the previous block +++uv_skip_save_top: +++ vstb H(16++,0)+r3,-16(r2 += r1) REP 16 +++uv_start_deblock_loop: +++ # move onto next 16x16 (could do this with circular buffer support instead) +++ add r3,16 +++ and r3,r8 +++ add r4,32 +++ # Perform loop counter operations (may work with an addcmpbgt as well?) +++ add r0,16 +++ add r2,16 +++ sub r7,1 +++ cmp r7,0 # Are there still more blocks to load +++ bgt uv_deblock_loop +++ +++ # Final iteration needs to just do horizontal filtering +++ vldb H(setup_input,0), -16(r4) +++ cmp r14,1 +++ bne uv_skip2 +++ vstb H(zeros,0),-16(r4) +++uv_skip2: +++ bl uv_horz_filter +++ mov r12,r11 +++ add r3,8*64 +++ vadd H(setup_input,0),H(setup_input,8),0 +++ bl uv_horz_filter +++ sub r3,64*8 +++ addcmpbeq r12,0,0,uv_skip_save_top2 +++ vstb H(12++,0)+r3,-16(r0 += r1) REP 4 # Save the deblocked pixels for the previous block +++uv_skip_save_top2: +++ vstb H(16++,0)+r3,-16(r2 += r1) REP 16 +++ +++# Now look to see if we should do another row +++ sub r9,1 +++ cmp r9,0 +++ bgt uv_start_again +++ pop r6-r15, pc +++uv_start_again: +++ # Need to sort out r0,r2 to point to next row down +++ addscale16 r10,r1 +++ mov r2,r10 +++ subscale4 r0,r2,r1 +++ b uv_process_row +++ +++ +++# At this stage H(16,16)+r3 points to the first pixel of the 16 high edge to be filtered +++# So we can reuse the code we move the parts to be filtered into HX(P0/P1/P2/P3/Q0/Q1/Q2/Q3,0) - we will perform a final saturation step on placing them back into the correct locations +++ +++uv_vert_filter: +++ push lr +++ +++ vmov HX(P1,0), V(16,14)+r3 +++ vmov HX(P0,0), V(16,15)+r3 +++ vmov HX(Q0,0), V(16,16)+r3 +++ vmov HX(Q1,0), V(16,17)+r3 +++ +++ bl do_chroma_filter +++ +++ vadds V(16,15)+r3, HX(P0,0), 0 +++ vadds V(16,16)+r3, HX(Q0,0), 0 +++ +++ pop pc +++ +++# Filter edge at H(16,0)+r3 +++uv_horz_filter: +++ push lr +++ +++ vmov HX(P1,0), H(14,0)+r3 +++ vmov HX(P0,0), H(15,0)+r3 +++ vmov HX(Q0,0), H(16,0)+r3 +++ vmov HX(Q1,0), H(17,0)+r3 +++ +++ bl do_chroma_filter +++ +++ vadds H(15,0)+r3, HX(P0,0), 0 +++ # P3 and Q3 never change so don't bother saving back +++ vadds H(16,0)+r3, HX(Q0,0), 0 +++ +++ pop pc +++ +++# r4 points to array of beta/tc for each 4 length edge +++do_chroma_filter: +++ valtl H(setup,0),H(setup_input,0),H(setup_input,0) # tc*8 +++ valtl HX(tc,0),H(setup,0),H(setup,0) +++ +++ vsub HX(delta,0),HX(Q0,0),HX(P0,0) +++ vshl HX(delta,0),HX(delta,0),2 CLRA SACC +++ vsub -,HX(P1,0),HX(Q1,0) SACC +++ vmov HX(delta,0),4 SACC +++ vasr HX(delta,0),HX(delta,0),3 +++ vclamps HX(delta,0), HX(delta,0), HX(tc,0) +++ vadd HX(P0,0),HX(P0,0),HX(delta,0) +++ vsub HX(Q0,0),HX(Q0,0),HX(delta,0) +++ b lr +++ +++# r0 = list +++# r1 = number +++hevc_run_command_list: +++ push r6-r7, lr +++ mov r6, r0 +++ mov r7, r1 +++loop_cmds: +++ ld r0,(r6) # How to encode r6++? +++ add r6,4 +++ ld r1,(r6) +++ add r6,4 +++ ld r2,(r6) +++ add r6,4 +++ ld r3,(r6) +++ add r6,4 +++ ld r4,(r6) +++ add r6,4 +++ ld r5,(r6) +++ add r6,4 +++ bl hevc_trans_16x16 +++ sub r7,1 +++ cmp r7,0 +++ bgt loop_cmds +++ +++ pop r6-r7, pc ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 91777be..5aa0432 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -397,6 +397,8 @@ static void *vpu_start(void *arg) { ++ int start_time; ++ int end_time; ++ int count=0; +++ int count_deblock=0; +++ int count_qpu=0; ++ #endif ++ while(1) { ++ int i; ++@@ -442,7 +444,7 @@ static void *vpu_start(void *arg) { ++ break; ++ } ++ } ++- printf("Have_qpu = %d, have_vpu=%d\n",have_qpu,have_vpu); +++ //printf("Have_qpu = %d, have_vpu=%d\n",have_qpu,have_vpu); ++ #endif ++ qpu_code = p[7]; ++ qpu_codeb = p[16]; ++@@ -460,6 +462,12 @@ static void *vpu_start(void *arg) { ++ off_time += start_time-last_time; ++ #endif ++ +++#define NO_FLUSH 1 +++#define CLEAR_PROFILE 2 +++#define OUTPUT_COUNTS 4 +++ +++#define FLAGS_FOR_PROFILING (NO_FLUSH) +++ ++ #ifdef RPI_COMBINE_JOBS ++ if (have_qpu) { ++ for(i=0;i<8;i++) { ++@@ -472,14 +480,14 @@ static void *vpu_start(void *arg) { ++ } ++ if (have_vpu) { ++ execute_multi(gpu->mb, ++- 12,gpu->vc + offsetof(struct GPU, mail2), 1, 5000, +++ 12,gpu->vc + offsetof(struct GPU, mail2), FLAGS_FOR_PROFILING, 5000, ++ 8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */, ++ p[0], p[1], p[2], p[3], p[4], p[5], p[6], // VPU0 ++ q[0], q[1], q[2], q[3], q[4], q[5], q[6]); // VPU1 ++ q[0] = 0; ++ } else { ++ execute_multi(gpu->mb, ++- 12,gpu->vc + offsetof(struct GPU, mail2), 1, 5000, +++ 12,gpu->vc + offsetof(struct GPU, mail2), FLAGS_FOR_PROFILING, 5000, ++ 8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */, ++ p[0], p[1], p[2], p[3], p[4], p[5], p[6], // VPU0 ++ 0, 0 , 0 , 0 , 0 , 0 , 0); // VPU1 ++@@ -510,7 +518,7 @@ static void *vpu_start(void *arg) { ++ execute_qpu(gpu->mb,8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */); ++ #else ++ execute_multi(gpu->mb, ++- 12,gpu->vc + offsetof(struct GPU, mail2), 1, 5000, +++ 12,gpu->vc + offsetof(struct GPU, mail2), FLAGS_FOR_PROFILING , 5000, ++ 8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */, ++ p[0], p[1], p[2], p[3], p[4], p[5], p[6], // VPU0 ++ 0, 0 , 0 , 0 , 0 , 0 , 0); // VPU1 ++@@ -525,17 +533,20 @@ static void *vpu_start(void *arg) { ++ // There are three cases we may wish to distinguish of VPU/QPU activity ++ on_time += end_time - start_time; ++ #else ++- if (p[6]==2) +++ if (p[6]>1) { +++ count_deblock++; ++ on_time_deblock += end_time - start_time; ++- else +++ } else { ++ on_time += end_time - start_time; +++ count_qpu++; +++ } ++ #endif ++ count++; ++ if ((count&0x7f)==0) ++ #ifdef RPI_COMBINE_JOBS ++- printf("Posted %d On=%dms, On_deblock=%dms, Off=%dms\n",count,(int)(on_time/1000),(int)(on_time_deblock/1000),(int)(off_time/1000)); ++-#else ++ printf("Posted %d On=%dms, Off=%dms\n",count,(int)(on_time/1000),(int)(off_time/1000)); +++#else +++ printf("Posted %d On=%dms (%d calls), On_deblock=%dms (%d calls), Off=%dms\n",count,(int)(on_time/1000),count_qpu,(int)(on_time_deblock/1000),count_deblock,(int)(off_time/1000)); ++ #endif ++ #endif ++ job_done_early: ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 0686249..64bf5b0 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -1077,6 +1077,17 @@ nop ; nop # delay slot 2 ++ ::mc_interrupt_exit12 ++ mov -, vw_wait # wait on the VDW ++ +++ # Dummy wait to test instructions +++# mov r3,1000000 +++#:dummy_loop +++# sub.setf r3, r3, 1 +++# nop +++# nop +++# brr.anynn -, r:dummy_loop +++# nop +++# nop +++# nop +++ ++ ldtmu0 ++ ldtmu0 ++ ldtmu1 ++-- ++2.7.4 ++ ++ ++From 12a194bddd049ab97154e9fbdd46b63b558a3bee Mon Sep 17 00:00:00 2001 ++From: Ben Avison ++Date: Tue, 23 Jun 2015 23:42:03 +0100 ++Subject: [PATCH 67/68] armv7/hevc: Optimise deblocking boundary strength ++ calculation ++ ++--- ++ libavcodec/arm/hevcdsp_deblock_neon.S | 115 +++++++++++++++++ ++ libavcodec/arm/hevcdsp_init_neon.c | 9 ++ ++ libavcodec/hevc.h | 11 -- ++ libavcodec/hevc_filter.c | 224 ++++++++++++++-------------------- ++ libavcodec/hevcdsp.c | 116 ++++++++++++++++++ ++ libavcodec/hevcdsp.h | 14 +++ ++ 6 files changed, 344 insertions(+), 145 deletions(-) ++ ++diff --git a/libavcodec/arm/hevcdsp_deblock_neon.S b/libavcodec/arm/hevcdsp_deblock_neon.S ++index 166bddb..bad4589 100644 ++--- a/libavcodec/arm/hevcdsp_deblock_neon.S +++++ b/libavcodec/arm/hevcdsp_deblock_neon.S ++@@ -383,3 +383,118 @@ function ff_hevc_h_loop_filter_chroma_neon, export=1 ++ vst1.8 {d4}, [r0] ++ bx lr ++ endfunc +++ +++/* ff_hevc_deblocking_boundary_strengths_neon(int pus, int dup, int in_inc, int out_inc, +++ * int *curr_rpl0, int *curr_rpl1, int *neigh_rpl0, int *neigh_rpl1, +++ * MvField *curr, MvField *neigh, uint8_t *bs) +++ */ +++function ff_hevc_deblocking_boundary_strengths_neon, export=1 +++ add ip, sp, #4*4 +++ push {a2-a4,v1-v8,lr} +++ ldmia ip, {v5-v7} +++1: ldmdb ip, {v1-v4} +++ ldrsb a3, [v5, #8] @ curr->ref_idx +++ ldrsb v8, [v5, #9] +++ ldrsb ip, [v6, #8] @ neigh->ref_idx +++ ldrsb lr, [v6, #9] +++ ldr v1, [v1, a3, lsl #2] +++ ldrb a3, [v5, #10] @ curr->pred_flag +++ ldr v2, [v2, v8, lsl #2] +++ ldrb v8, [v6, #10] @ neigh->pred_flag +++ ldr v3, [v3, ip, lsl #2] +++ ldr v4, [v4, lr, lsl #2] +++ teq a3, #3 +++ beq 20f +++ teq v8, #3 +++ beq 90f +++ +++ tst a3, #1 +++ ldrne a3, [v5, #0] @ curr->mv[0] +++ ldreq a3, [v5, #4] @ curr->mv[1] +++ moveq v1, v2 +++ tst v8, #1 +++ ldrne v8, [v6, #0] @ neigh->mv[0] +++ ldreq v8, [v6, #4] @ neigh->mv[1] +++ moveq v3, v4 +++ teq v1, v3 +++ bne 10f +++ ldr lr, =0xFFFCFFFC +++ ssub16 ip, v8, a3 +++ ssub16 a3, a3, v8 +++ sel a3, a3, ip +++ ands a3, a3, lr +++ @ drop through +++10: movne a3, #1 +++11: subs a2, a2, #1 +++12: strbhs a3, [v7], a4 +++ subs a2, a2, #1 +++ bhs 12b +++ +++ ldm sp, {a2, a3} +++ add ip, sp, #16*4 +++ subs a1, a1, #1 +++ add v5, v5, a3 +++ add v6, v6, a3 +++ bhi 1b +++ pop {a2-a4,v1-v8,pc} +++ +++20: teq v8, #3 +++ bne 10b +++ +++ teq v1, v3 +++ teqeq v2, v4 +++ bne 40f +++ teq v1, v2 +++ bne 30f +++ +++ ldrd v1, v2, [v5] @ curr->mv +++ ldrd v3, v4, [v6] @ neigh->mv +++ ldr lr, =0xFFFCFFFC +++ ssub16 ip, v3, v1 +++ ssub16 a3, v1, v3 +++ sel a3, a3, ip +++ ands a3, a3, lr +++ bne 25f +++ ssub16 ip, v4, v2 +++ ssub16 a3, v2, v4 +++ sel a3, a3, ip +++ ands a3, a3, lr +++ beq 11b +++ @ drop through +++25: ssub16 ip, v4, v1 +++ ssub16 a3, v1, v4 +++ sel a3, a3, ip +++ ands a3, a3, lr +++ bne 10b +++ ssub16 ip, v3, v2 +++ ssub16 a3, v2, v3 +++ sel a3, a3, ip +++ ands a3, a3, lr +++ b 10b +++ +++30: ldrd v1, v2, [v5] @ curr->mv +++ ldrd v3, v4, [v6] @ neigh->mv +++ ldr lr, =0xFFFCFFFC +++ ssub16 ip, v3, v1 +++ ssub16 a3, v1, v3 +++ sel a3, a3, ip +++ ands a3, a3, lr +++ bne 10b +++ ssub16 ip, v4, v2 +++ ssub16 a3, v2, v4 +++ sel a3, a3, ip +++ ands a3, a3, lr +++ b 10b +++ +++40: teq v1, v4 +++ teqeq v2, v3 +++ bne 10b +++ +++ ldrd v1, v2, [v5] @ curr->mv +++ ldrd v3, v4, [v6] @ neigh->mv +++ ldr lr, =0xFFFCFFFC +++ b 25b +++ +++90: mov a3, #1 +++ b 11b +++endfunc ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index e5da7e9..49c70dd 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -290,6 +290,10 @@ static void ff_hevc_sao_edge_neon_wrapper(uint8_t *_dst /* align 16 */, uint8_t ++ } ++ #undef CMP ++ +++void ff_hevc_deblocking_boundary_strengths_neon(int pus, int dup, int in_inc, int out_inc, +++ int *curr_rpl0, int *curr_rpl1, int *neigh_rpl0, int *neigh_rpl1, +++ MvField *curr, MvField *neigh, uint8_t *bs); +++ ++ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) ++ { ++ if (bit_depth == 8) { ++@@ -387,4 +391,9 @@ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) ++ c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_qpel_uw_pixels_w48_neon_8; ++ c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_qpel_uw_pixels_w64_neon_8; ++ } +++ +++ assert(offsetof(MvField, mv) == 0); +++ assert(offsetof(MvField, ref_idx) == 8); +++ assert(offsetof(MvField, pred_flag) == 10); +++ c->hevc_deblocking_boundary_strengths = ff_hevc_deblocking_boundary_strengths_neon; ++ } ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 7eb37e6..496c0e1 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -684,17 +684,6 @@ typedef struct CodingUnit { ++ uint8_t cu_transquant_bypass_flag; ++ } CodingUnit; ++ ++-typedef struct Mv { ++- int16_t x; ///< horizontal component of motion vector ++- int16_t y; ///< vertical component of motion vector ++-} Mv; ++- ++-typedef struct MvField { ++- DECLARE_ALIGNED(4, Mv, mv)[2]; ++- int8_t ref_idx[2]; ++- int8_t pred_flag; ++-} MvField; ++- ++ typedef struct NeighbourAvailable { ++ int cand_bottom_left; ++ int cand_left; ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 6367068..826a82f 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -726,69 +726,6 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) ++ } ++ } ++ ++-static int boundary_strength(HEVCContext *s, MvField *curr, MvField *neigh, ++- RefPicList *neigh_refPicList) ++-{ ++- if (curr->pred_flag == PF_BI && neigh->pred_flag == PF_BI) { ++- // same L0 and L1 ++- if (s->ref->refPicList[0].list[curr->ref_idx[0]] == neigh_refPicList[0].list[neigh->ref_idx[0]] && ++- s->ref->refPicList[0].list[curr->ref_idx[0]] == s->ref->refPicList[1].list[curr->ref_idx[1]] && ++- neigh_refPicList[0].list[neigh->ref_idx[0]] == neigh_refPicList[1].list[neigh->ref_idx[1]]) { ++- if ((FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 || ++- FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4) && ++- (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 || ++- FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4)) ++- return 1; ++- else ++- return 0; ++- } else if (neigh_refPicList[0].list[neigh->ref_idx[0]] == s->ref->refPicList[0].list[curr->ref_idx[0]] && ++- neigh_refPicList[1].list[neigh->ref_idx[1]] == s->ref->refPicList[1].list[curr->ref_idx[1]]) { ++- if (FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 || ++- FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4) ++- return 1; ++- else ++- return 0; ++- } else if (neigh_refPicList[1].list[neigh->ref_idx[1]] == s->ref->refPicList[0].list[curr->ref_idx[0]] && ++- neigh_refPicList[0].list[neigh->ref_idx[0]] == s->ref->refPicList[1].list[curr->ref_idx[1]]) { ++- if (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 || ++- FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4) ++- return 1; ++- else ++- return 0; ++- } else { ++- return 1; ++- } ++- } else if ((curr->pred_flag != PF_BI) && (neigh->pred_flag != PF_BI)){ // 1 MV ++- Mv A, B; ++- int ref_A, ref_B; ++- ++- if (curr->pred_flag & 1) { ++- A = curr->mv[0]; ++- ref_A = s->ref->refPicList[0].list[curr->ref_idx[0]]; ++- } else { ++- A = curr->mv[1]; ++- ref_A = s->ref->refPicList[1].list[curr->ref_idx[1]]; ++- } ++- ++- if (neigh->pred_flag & 1) { ++- B = neigh->mv[0]; ++- ref_B = neigh_refPicList[0].list[neigh->ref_idx[0]]; ++- } else { ++- B = neigh->mv[1]; ++- ref_B = neigh_refPicList[1].list[neigh->ref_idx[1]]; ++- } ++- ++- if (ref_A == ref_B) { ++- if (FFABS(A.x - B.x) >= 4 || FFABS(A.y - B.y) >= 4) ++- return 1; ++- else ++- return 0; ++- } else ++- return 1; ++- } ++- ++- return 1; ++-} ++ ++ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, ++ int log2_trafo_size) ++@@ -799,10 +736,17 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, ++ int log2_min_tu_size = s->ps.sps->log2_min_tb_size; ++ int min_pu_width = s->ps.sps->min_pu_width; ++ int min_tu_width = s->ps.sps->min_tb_width; ++- int is_intra = tab_mvf[(y0 >> log2_min_pu_size) * min_pu_width + ++- (x0 >> log2_min_pu_size)].pred_flag == PF_INTRA; ++ int boundary_upper, boundary_left; ++- int i, j, bs; +++ int i, j; +++ RefPicList *rpl = s->ref->refPicList; +++ int min_pu_in_4pix = (1 << log2_min_pu_size) >> 2; +++ int trafo_in_min_pus = (1 << log2_trafo_size) >> log2_min_pu_size; +++ int y_pu = y0 >> log2_min_pu_size; +++ int x_pu = x0 >> log2_min_pu_size; +++ MvField *curr = &tab_mvf[y_pu * min_pu_width + x_pu]; +++ int is_intra = curr->pred_flag == PF_INTRA; +++ int inc = log2_min_pu_size == 2 ? 2 : 1; +++ uint8_t *bs; ++ ++ #ifdef DISABLE_STRENGTHS ++ return; ++@@ -818,34 +762,56 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, ++ (y0 % (1 << s->ps.sps->log2_ctb_size)) == 0))) ++ boundary_upper = 0; ++ +++ bs = &s->horizontal_bs[(x0 + y0 * s->bs_width) >> 2]; +++ ++ if (boundary_upper) { ++ RefPicList *rpl_top = (lc->boundary_flags & BOUNDARY_UPPER_SLICE) ? ++ ff_hevc_get_ref_list(s, s->ref, x0, y0 - 1) : ++- s->ref->refPicList; ++- int yp_pu = (y0 - 1) >> log2_min_pu_size; ++- int yq_pu = y0 >> log2_min_pu_size; ++- int yp_tu = (y0 - 1) >> log2_min_tu_size; ++- int yq_tu = y0 >> log2_min_tu_size; +++ rpl; +++ MvField *top = curr - min_pu_width; +++ +++ if (is_intra) { +++ for (i = 0; i < (1 << log2_trafo_size); i += 4) +++ bs[i >> 2] = 2; +++ +++ } else { +++ int y_tu = y0 >> log2_min_tu_size; +++ int x_tu = x0 >> log2_min_tu_size; +++ uint8_t *curr_cbf_luma = &s->cbf_luma[y_tu * min_tu_width + x_tu]; +++ uint8_t *top_cbf_luma = curr_cbf_luma - min_tu_width; +++ +++ s->hevcdsp.hevc_deblocking_boundary_strengths(trafo_in_min_pus, +++ min_pu_in_4pix, sizeof (MvField), 4 >> 2, +++ rpl[0].list, rpl[1].list, rpl_top[0].list, rpl_top[1].list, +++ curr, top, bs); ++ ++ for (i = 0; i < (1 << log2_trafo_size); i += 4) { ++- int x_pu = (x0 + i) >> log2_min_pu_size; ++- int x_tu = (x0 + i) >> log2_min_tu_size; ++- MvField *top = &tab_mvf[yp_pu * min_pu_width + x_pu]; ++- MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu]; ++- uint8_t top_cbf_luma = s->cbf_luma[yp_tu * min_tu_width + x_tu]; ++- uint8_t curr_cbf_luma = s->cbf_luma[yq_tu * min_tu_width + x_tu]; ++- ++- if (curr->pred_flag == PF_INTRA || top->pred_flag == PF_INTRA) ++- bs = 2; ++- else if (curr_cbf_luma || top_cbf_luma) ++- bs = 1; ++- else ++- bs = boundary_strength(s, curr, top, rpl_top); ++- s->horizontal_bs[((x0 + i) + y0 * s->bs_width) >> 2] = bs; +++ int i_pu = i >> log2_min_pu_size; +++ int i_tu = i >> log2_min_tu_size; +++ +++ if (top[i_pu].pred_flag == PF_INTRA) +++ bs[i >> 2] = 2; +++ else if (curr_cbf_luma[i_tu] || top_cbf_luma[i_tu]) +++ bs[i >> 2] = 1; ++ } +++ } +++ } +++ +++ if (!is_intra) { +++ for (j = inc; j < trafo_in_min_pus; j += inc) { +++ MvField *top; +++ +++ curr += min_pu_width * inc; +++ top = curr - min_pu_width; +++ bs += s->bs_width * inc << log2_min_pu_size >> 2; +++ +++ s->hevcdsp.hevc_deblocking_boundary_strengths(trafo_in_min_pus, +++ min_pu_in_4pix, sizeof (MvField), 4 >> 2, +++ rpl[0].list, rpl[1].list, rpl[0].list, rpl[1].list, +++ curr, top, bs); +++ } ++ } ++ ++- // bs for vertical TU boundaries ++ boundary_left = x0 > 0 && !(x0 & 7); ++ if (boundary_left && ++ ((!s->sh.slice_loop_filter_across_slices_enabled_flag && ++@@ -856,64 +822,54 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, ++ (x0 % (1 << s->ps.sps->log2_ctb_size)) == 0))) ++ boundary_left = 0; ++ +++ curr = &tab_mvf[y_pu * min_pu_width + x_pu]; +++ bs = &s->vertical_bs[(x0 + y0 * s->bs_width) >> 2]; +++ ++ if (boundary_left) { ++ RefPicList *rpl_left = (lc->boundary_flags & BOUNDARY_LEFT_SLICE) ? ++ ff_hevc_get_ref_list(s, s->ref, x0 - 1, y0) : ++- s->ref->refPicList; ++- int xp_pu = (x0 - 1) >> log2_min_pu_size; ++- int xq_pu = x0 >> log2_min_pu_size; ++- int xp_tu = (x0 - 1) >> log2_min_tu_size; ++- int xq_tu = x0 >> log2_min_tu_size; ++- ++- for (i = 0; i < (1 << log2_trafo_size); i += 4) { ++- int y_pu = (y0 + i) >> log2_min_pu_size; ++- int y_tu = (y0 + i) >> log2_min_tu_size; ++- MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu]; ++- MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu]; ++- uint8_t left_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xp_tu]; ++- uint8_t curr_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xq_tu]; ++- ++- if (curr->pred_flag == PF_INTRA || left->pred_flag == PF_INTRA) ++- bs = 2; ++- else if (curr_cbf_luma || left_cbf_luma) ++- bs = 1; ++- else ++- bs = boundary_strength(s, curr, left, rpl_left); ++- s->vertical_bs[(x0 + (y0 + i) * s->bs_width) >> 2] = bs; ++- } ++- } +++ rpl; +++ MvField *left = curr - 1; ++ ++- if (log2_trafo_size > log2_min_pu_size && !is_intra) { ++- RefPicList *rpl = s->ref->refPicList; +++ if (is_intra) { +++ for (j = 0; j < (1 << log2_trafo_size); j += 4) +++ bs[j * s->bs_width >> 2] = 2; ++ ++- // bs for TU internal horizontal PU boundaries ++- for (j = 8; j < (1 << log2_trafo_size); j += 8) { ++- int yp_pu = (y0 + j - 1) >> log2_min_pu_size; ++- int yq_pu = (y0 + j) >> log2_min_pu_size; ++- ++- for (i = 0; i < (1 << log2_trafo_size); i += 4) { ++- int x_pu = (x0 + i) >> log2_min_pu_size; ++- MvField *top = &tab_mvf[yp_pu * min_pu_width + x_pu]; ++- MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu]; ++- ++- bs = boundary_strength(s, curr, top, rpl); ++- s->horizontal_bs[((x0 + i) + (y0 + j) * s->bs_width) >> 2] = bs; +++ } else { +++ int y_tu = y0 >> log2_min_tu_size; +++ int x_tu = x0 >> log2_min_tu_size; +++ uint8_t *curr_cbf_luma = &s->cbf_luma[y_tu * min_tu_width + x_tu]; +++ uint8_t *left_cbf_luma = curr_cbf_luma - 1; +++ +++ s->hevcdsp.hevc_deblocking_boundary_strengths(trafo_in_min_pus, +++ min_pu_in_4pix, min_pu_width * sizeof (MvField), 4 * s->bs_width >> 2, +++ rpl[0].list, rpl[1].list, rpl_left[0].list, rpl_left[1].list, +++ curr, left, bs); +++ +++ for (j = 0; j < (1 << log2_trafo_size); j += 4) { +++ int j_pu = j >> log2_min_pu_size; +++ int j_tu = j >> log2_min_tu_size; +++ +++ if (left[j_pu * min_pu_width].pred_flag == PF_INTRA) +++ bs[j * s->bs_width >> 2] = 2; +++ else if (curr_cbf_luma[j_tu * min_tu_width] || left_cbf_luma[j_tu * min_tu_width]) +++ bs[j * s->bs_width >> 2] = 1; ++ } ++ } +++ } ++ ++- // bs for TU internal vertical PU boundaries ++- for (j = 0; j < (1 << log2_trafo_size); j += 4) { ++- int y_pu = (y0 + j) >> log2_min_pu_size; +++ if (!is_intra) { +++ for (i = inc; i < trafo_in_min_pus; i += inc) { +++ MvField *left; ++ ++- for (i = 8; i < (1 << log2_trafo_size); i += 8) { ++- int xp_pu = (x0 + i - 1) >> log2_min_pu_size; ++- int xq_pu = (x0 + i) >> log2_min_pu_size; ++- MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu]; ++- MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu]; +++ curr += inc; +++ left = curr - 1; +++ bs += inc << log2_min_pu_size >> 2; ++ ++- bs = boundary_strength(s, curr, left, rpl); ++- s->vertical_bs[((x0 + i) + (y0 + j) * s->bs_width) >> 2] = bs; ++- } +++ s->hevcdsp.hevc_deblocking_boundary_strengths(trafo_in_min_pus, +++ min_pu_in_4pix, min_pu_width * sizeof (MvField), 4 * s->bs_width >> 2, +++ rpl[0].list, rpl[1].list, rpl[0].list, rpl[1].list, +++ curr, left, bs); ++ } ++ } ++ } ++diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c ++index 9d773d9..a6534a9 100644 ++--- a/libavcodec/hevcdsp.c +++++ b/libavcodec/hevcdsp.c ++@@ -123,6 +123,120 @@ DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters[3][16]) = { ++ #include "hevcdsp_template.c" ++ #undef BIT_DEPTH ++ +++static void hevc_deblocking_boundary_strengths(int pus, int dup, int in_inc, int out_inc, +++ int *curr_rpl0, int *curr_rpl1, int *neigh_rpl0, int *neigh_rpl1, +++ MvField *curr, MvField *neigh, uint8_t *bs) +++{ +++ for (; pus > 0; pus--) { +++ int strength, out; +++ int curr_refL0 = curr_rpl0[curr->ref_idx[0]]; +++ int curr_refL1 = curr_rpl1[curr->ref_idx[1]]; +++ int neigh_refL0 = neigh_rpl0[neigh->ref_idx[0]]; +++ int neigh_refL1 = neigh_rpl1[neigh->ref_idx[1]]; +++ +++#if 1 // This more directly matches the original implementation +++ if (curr->pred_flag == PF_BI && neigh->pred_flag == PF_BI) { +++ // same L0 and L1 +++ if (curr_refL0 == neigh_refL0 && +++ curr_refL0 == curr_refL1 && +++ neigh_refL0 == neigh_refL1) { +++ if ((FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 || +++ FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4) && +++ (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 || +++ FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4)) +++ strength = 1; +++ else +++ strength = 0; +++ } else if (neigh_refL0 == curr_refL0 && +++ neigh_refL1 == curr_refL1) { +++ if (FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 || +++ FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4) +++ strength = 1; +++ else +++ strength = 0; +++ } else if (neigh_refL1 == curr_refL0 && +++ neigh_refL0 == curr_refL1) { +++ if (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 || +++ FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4) +++ strength = 1; +++ else +++ strength = 0; +++ } else { +++ strength = 1; +++ } +++ } else if ((curr->pred_flag != PF_BI) && (neigh->pred_flag != PF_BI)){ // 1 MV +++ Mv curr_mv0, neigh_mv0; +++ +++ if (curr->pred_flag & 1) { +++ curr_mv0 = curr->mv[0]; +++ } else { +++ curr_mv0 = curr->mv[1]; +++ curr_refL0 = curr_refL1; +++ } +++ +++ if (neigh->pred_flag & 1) { +++ neigh_mv0 = neigh->mv[0]; +++ } else { +++ neigh_mv0 = neigh->mv[1]; +++ neigh_refL0 = neigh_refL1; +++ } +++ +++ if (curr_refL0 == neigh_refL0) { +++ if (FFABS(curr_mv0.x - neigh_mv0.x) >= 4 || FFABS(curr_mv0.y - neigh_mv0.y) >= 4) +++ strength = 1; +++ else +++ strength = 0; +++ } else +++ strength = 1; +++ } else +++ strength = 1; +++#else // This has exactly the same effect, but is more suitable for vectorisation +++ Mv curr_mv[2]; +++ Mv neigh_mv[2]; +++ memcpy(curr_mv, curr->mv, sizeof curr_mv); +++ memcpy(neigh_mv, neigh->mv, sizeof neigh_mv); +++ +++ if (!(curr->pred_flag & 2)) { +++ curr_mv[1] = curr_mv[0]; +++ curr_refL1 = curr_refL0; +++ } +++ if (!(neigh->pred_flag & 2)) { +++ neigh_mv[1] = neigh_mv[0]; +++ neigh_refL1 = neigh_refL0; +++ } +++ if (!(curr->pred_flag & 1)) { +++ curr_mv[0] = curr_mv[1]; +++ curr_refL0 = curr_refL1; +++ } +++ if (!(neigh->pred_flag & 1)) { +++ neigh_mv[0] = neigh_mv[1]; +++ neigh_refL0 = neigh_refL1; +++ } +++ +++ strength = 1; +++ +++ strength &= (neigh_refL0 != curr_refL0) | (neigh_refL1 != curr_refL1) | +++ (FFABS(neigh_mv[0].x - curr_mv[0].x) >= 4) | (FFABS(neigh_mv[0].y - curr_mv[0].y) >= 4) | +++ (FFABS(neigh_mv[1].x - curr_mv[1].x) >= 4) | (FFABS(neigh_mv[1].y - curr_mv[1].y) >= 4); +++ +++ strength &= (neigh_refL1 != curr_refL0) | (neigh_refL0 != curr_refL1) | +++ (FFABS(neigh_mv[1].x - curr_mv[0].x) >= 4) | (FFABS(neigh_mv[1].y - curr_mv[0].y) >= 4) | +++ (FFABS(neigh_mv[0].x - curr_mv[1].x) >= 4) | (FFABS(neigh_mv[0].y - curr_mv[1].y) >= 4); +++ +++ strength |= (((curr->pred_flag + 1) ^ (neigh->pred_flag + 1)) >> 2); +++#endif +++ +++ curr += in_inc / sizeof (MvField); +++ neigh += in_inc / sizeof (MvField); +++ +++ for (out = dup; out > 0; out--) +++ { +++ *bs = strength; +++ bs += out_inc; +++ } +++ } +++} +++ ++ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) ++ { ++ #undef FUNC ++@@ -257,6 +371,8 @@ int i = 0; ++ break; ++ } ++ +++ hevcdsp->hevc_deblocking_boundary_strengths = hevc_deblocking_boundary_strengths; +++ ++ if (ARCH_X86) ++ ff_hevc_dsp_init_x86(hevcdsp, bit_depth); ++ if (ARCH_ARM) ++diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h ++index 9f1f6dd..e221e54 100644 ++--- a/libavcodec/hevcdsp.h +++++ b/libavcodec/hevcdsp.h ++@@ -42,6 +42,17 @@ typedef struct SAOParams { ++ uint8_t type_idx[3]; ///< sao_type_idx ++ } SAOParams; ++ +++typedef struct Mv { +++ int16_t x; ///< horizontal component of motion vector +++ int16_t y; ///< vertical component of motion vector +++} Mv; +++ +++typedef struct MvField { +++ DECLARE_ALIGNED(4, Mv, mv)[2]; +++ int8_t ref_idx[2]; +++ int8_t pred_flag; +++} MvField; +++ ++ typedef struct HEVCDSPContext { ++ void (*put_pcm)(uint8_t *_dst, ptrdiff_t _stride, int width, int height, ++ struct GetBitContext *gb, int pcm_bit_depth); ++@@ -120,6 +131,9 @@ typedef struct HEVCDSPContext { ++ void (*hevc_v_loop_filter_chroma_c)(uint8_t *pix, ptrdiff_t stride, ++ int32_t *tc, uint8_t *no_p, ++ uint8_t *no_q); +++ void (*hevc_deblocking_boundary_strengths)(int pus, int dup, int in_inc, int out_inc, +++ int *curr_rpl0, int *curr_rpl1, int *neigh_rpl0, int *neigh_rpl1, +++ MvField *curr, MvField *neigh, uint8_t *bs); ++ } HEVCDSPContext; ++ ++ void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth); ++-- ++2.7.4 ++ ++ ++From 619366d6acfd5f040a3116fda97b1146c8e40250 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz ++Date: Wed, 15 Jul 2015 09:09:11 +0100 ++Subject: [PATCH 68/68] Only enable qpu when needed ++ ++--- ++ libavcodec/hevc.h | 2 +- ++ libavcodec/rpi_qpu.c | 21 ++++++++++++++++----- ++ 2 files changed, 17 insertions(+), 6 deletions(-) ++ ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 496c0e1..ce14975 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -57,7 +57,7 @@ ++ // Define RPI_WORKER to launch a worker thread for pixel processing tasks ++ #define RPI_WORKER ++ // Define RPI_DEBLOCK_VPU to perform deblocking on the VPUs ++- #define RPI_DEBLOCK_VPU +++ //#define RPI_DEBLOCK_VPU ++ ++ #endif ++ ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 5aa0432..ffd13ca 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -9,7 +9,7 @@ ++ // define RPI_ASYNC to run the VPU in a separate thread, need to make a separate call to check for completion ++ #define RPI_ASYNC ++ // Define RPI_COMBINE_JOBS to find jobs that can be executed in parallel ++-#define RPI_COMBINE_JOBS +++//#define RPI_COMBINE_JOBS ++ ++ #include ++ #include ++@@ -143,9 +143,9 @@ static int gpu_init(volatile struct GPU **gpu) { ++ volatile struct GPU* ptr; ++ if (mb < 0) ++ return -1; ++- +++#ifndef RPI_ASYNC ++ if (qpu_enable(mb, 1)) return -2; ++- +++#endif ++ vcsm_init(); ++ gpu_malloc_uncached_internal(sizeof(struct GPU), &gpu_mem_ptr, mb); ++ ptr = (volatile struct GPU*)gpu_mem_ptr.arm; ++@@ -336,9 +336,9 @@ static void gpu_term(void) ++ vpu_post_code(0, 0, 0, 0, 0, 0, -1, NULL); ++ pthread_join(vpu_thread, &res); ++ } ++-#endif ++- +++#else ++ qpu_enable(mb, 0); +++#endif ++ gpu_free_internal(&gpu_mem_ptr); ++ ++ vcsm_exit(); ++@@ -400,6 +400,7 @@ static void *vpu_start(void *arg) { ++ int count_deblock=0; ++ int count_qpu=0; ++ #endif +++ int qpu_started = 0; ++ while(1) { ++ int i; ++ int *p; // Pointer for a QPU/VPU job ++@@ -427,6 +428,12 @@ static void *vpu_start(void *arg) { ++ if (p[7] == 0 && p[0] == 0 && p[16]==0) ++ goto job_done_early; ++ +++ if (!qpu_started) { +++ int result = qpu_enable(gpu->mb, 1); +++ av_assert0(result==0); +++ qpu_started = 1; +++ } +++ ++ #ifdef RPI_COMBINE_JOBS ++ // First scan for a qpu job ++ for (int x=0;xmb, 0); +++ } +++ ++ return NULL; ++ } ++ ++-- ++2.7.4 ++ ++From a0d0946951b53e64ce103dd61b455f8d1f72caf9 Mon Sep 17 00:00:00 2001 ++From: John Cox ++Date: Tue, 9 Feb 2016 11:57:40 +0000 ++Subject: [PATCH 1/2] Zero copy code v6 ++ ++This version has GPU buffer pooling code ++--- ++ ffmpeg.c | 123 +++++++++----- ++ libavcodec/Makefile | 2 + ++ libavcodec/avcodec.h | 6 + ++ libavcodec/hevc.c | 92 ++++++----- ++ libavcodec/hevc_filter.c | 83 +++++----- ++ libavcodec/rpi_qpu.c | 2 +- ++ libavcodec/rpi_qpu.h | 109 ++++++++++++- ++ libavcodec/rpi_zc.c | 406 +++++++++++++++++++++++++++++++++++++++++++++++ ++ libavcodec/rpi_zc.h | 83 ++++++++++ ++ 9 files changed, 779 insertions(+), 127 deletions(-) ++ create mode 100644 libavcodec/rpi_zc.c ++ create mode 100644 libavcodec/rpi_zc.h ++ ++diff --git a/ffmpeg.c b/ffmpeg.c ++index 50c6e86..953e5b8 100644 ++--- a/ffmpeg.c +++++ b/ffmpeg.c ++@@ -25,7 +25,7 @@ ++ ++ #ifdef RPI ++ #define RPI_DISPLAY ++-//#define RPI_ZERO_COPY +++#define RPI_ZERO_COPY ++ #endif ++ ++ #include "config.h" ++@@ -80,9 +80,7 @@ ++ #include ++ #include ++ #include ++-#ifdef RPI_ZERO_COPY ++-#include "libavcodec/rpi_qpu.h" ++-#endif +++#include "libavcodec/rpi_zc.h" ++ #endif ++ ++ #if HAVE_SYS_RESOURCE_H ++@@ -183,13 +181,7 @@ static void free_input_threads(void); ++ ++ static MMAL_COMPONENT_T* rpi_display = NULL; ++ static MMAL_POOL_T *rpi_pool = NULL; ++- ++-#ifdef RPI_ZERO_COPY ++-static uint8_t *get_vc_handle(AVBufferRef *bref) { ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++- return (uint8_t *)p->vc_handle; ++-} ++-#endif +++static volatile int rpi_display_count = 0; ++ ++ static MMAL_POOL_T* display_alloc_pool(MMAL_PORT_T* port, size_t w, size_t h) ++ { ++@@ -206,7 +198,7 @@ static MMAL_POOL_T* display_alloc_pool(MMAL_PORT_T* port, size_t w, size_t h) ++ for (i = 0; i < NUM_BUFFERS; ++i) ++ { ++ MMAL_BUFFER_HEADER_T* buffer = pool->header[i]; ++- void* bufPtr = buffer->data; +++ char * bufPtr = buffer->data; ++ memset(bufPtr, i*30, w*h); ++ memset(bufPtr+w*h, 128, (w*h)/2); ++ } ++@@ -215,23 +207,31 @@ static MMAL_POOL_T* display_alloc_pool(MMAL_PORT_T* port, size_t w, size_t h) ++ return pool; ++ } ++ ++-static void display_cb_input(MMAL_PORT_T *port,MMAL_BUFFER_HEADER_T *buffer) { +++static void display_cb_input(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer) { +++#ifdef RPI_ZERO_COPY +++ av_rpi_zc_unref(buffer->user_data); +++ --rpi_display_count; +++#endif +++ mmal_buffer_header_release(buffer); +++} +++ +++static void display_cb_control(MMAL_PORT_T *port,MMAL_BUFFER_HEADER_T *buffer) { ++ mmal_buffer_header_release(buffer); ++ } ++ ++ static MMAL_COMPONENT_T* display_init(size_t x, size_t y, size_t w, size_t h) ++ { ++ MMAL_COMPONENT_T* display; ++- int w2 = (w+31)&~31; ++- int h2 = (h+15)&~15; ++ MMAL_DISPLAYREGION_T region = ++ { ++- {MMAL_PARAMETER_DISPLAYREGION, sizeof(region)}, +++ .hdr = {MMAL_PARAMETER_DISPLAYREGION, sizeof(region)}, ++ .set = MMAL_DISPLAY_SET_LAYER | MMAL_DISPLAY_SET_FULLSCREEN | MMAL_DISPLAY_SET_DEST_RECT, ++ .layer = 2, ++ .fullscreen = 0, ++ .dest_rect = {x, y, w, h} ++ }; +++ const AVRpiZcFrameGeometry geo = av_rpi_zc_frame_geometry(w, h); +++ ++ bcm_host_init(); // TODO is this needed? ++ mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &display); ++ assert(display); ++@@ -240,8 +240,8 @@ static MMAL_COMPONENT_T* display_init(size_t x, size_t y, size_t w, size_t h) ++ ++ MMAL_ES_FORMAT_T* format = display->input[0]->format; ++ format->encoding = MMAL_ENCODING_I420; ++- format->es->video.width = w2; ++- format->es->video.height = h2; +++ format->es->video.width = geo.stride_y; +++ format->es->video.height = geo.height_y; ++ format->es->video.crop.x = 0; ++ format->es->video.crop.y = 0; ++ format->es->video.crop.width = w; ++@@ -250,46 +250,75 @@ static MMAL_COMPONENT_T* display_init(size_t x, size_t y, size_t w, size_t h) ++ ++ mmal_component_enable(display); ++ ++- rpi_pool = display_alloc_pool(display->input[0], w2, h2); +++ rpi_pool = display_alloc_pool(display->input[0], geo.stride_y, geo.height_y); ++ ++ mmal_port_enable(display->input[0],display_cb_input); ++- mmal_port_enable(display->control,display_cb_input); +++ mmal_port_enable(display->control,display_cb_control); ++ ++- printf("Allocated display %d %d\n",w,h); +++ printf("Allocated display %dx%d in %dx%d\n", w, h, geo.stride_y, geo.height_y); ++ ++ return display; ++ } ++ ++-static void display_frame(MMAL_COMPONENT_T* display,AVFrame* fr) +++static void display_frame(struct AVCodecContext * const s, MMAL_COMPONENT_T* const display, const AVFrame* const fr) ++ { ++- int w = fr->width; ++- int h = fr->height; ++- int w2 = (w+31)&~31; ++- int h2 = (h+15)&~15; ++ if (!display || !rpi_pool) ++ return; +++ +++ if (rpi_display_count >= 3) { +++ av_log(s, AV_LOG_VERBOSE, "Frame dropped\n"); +++ return; +++ } +++ ++ MMAL_BUFFER_HEADER_T* buf = mmal_queue_get(rpi_pool->queue); ++ if (!buf) { ++- // Running too fast so drop the frame ++- return; +++ // Running too fast so drop the frame +++ printf("Q alloc failure\n"); +++ return; ++ } ++ assert(buf); ++ buf->cmd = 0; ++- buf->length = (w2 * h2 * 3)/2; ++ buf->offset = 0; // Offset to valid data ++ buf->flags = 0; ++ #ifdef RPI_ZERO_COPY ++- buf->data = get_vc_handle(fr->buf[0]); ++- buf->alloc_size = (w2*h2*3)/2; +++{ +++ const AVRpiZcRefPtr fr_buf = av_rpi_zc_ref(s, fr, 1); +++ +++ buf->user_data = fr_buf; +++ buf->data = av_rpi_zc_vc_handle(fr_buf); +++ buf->alloc_size = +++ buf->length = av_rpi_zc_numbytes(fr_buf); +++ +++ ++rpi_display_count; +++} ++ #else +++{ +++#error YYY +++ int w = fr->width; +++ int h = fr->height; +++ int w2 = (w+31)&~31; +++ int h2 = (h+15)&~15; +++ +++ buf->length = (w2 * h2 * 3)/2; +++ buf->user_data = NULL; +++ ++ //mmal_buffer_header_mem_lock(buf); ++ memcpy(buf->data, fr->data[0], w2 * h); ++ memcpy(buf->data+w2*h2, fr->data[1], w2 * h / 4); ++ memcpy(buf->data+w2*h2*5/4, fr->data[2], w2 * h / 4); ++ //mmal_buffer_header_mem_unlock(buf); +++} ++ #endif ++ ++- mmal_port_send_buffer(display->input[0], buf); // I assume this will automatically get released +++ while (rpi_display_count >= 3) { +++ usleep(5000); +++ } +++ +++ if (mmal_port_send_buffer(display->input[0], buf) != MMAL_SUCCESS) +++ { +++ printf("** send failed: depth=%d\n", rpi_display_count); +++ display_cb_input(NULL, buf); +++ } ++ } ++ ++ static void display_exit(MMAL_COMPONENT_T* display) ++@@ -687,6 +716,11 @@ static void ffmpeg_cleanup(int ret) ++ avformat_close_input(&input_files[i]->ctx); ++ av_freep(&input_files[i]); ++ } +++ +++#ifdef RPI_DISPLAY +++ display_exit(rpi_display); +++#endif +++ ++ for (i = 0; i < nb_input_streams; i++) { ++ InputStream *ist = input_streams[i]; ++ ++@@ -698,6 +732,9 @@ static void ffmpeg_cleanup(int ret) ++ av_freep(&ist->filters); ++ av_freep(&ist->hwaccel_device); ++ +++#ifdef RPI_ZERO_COPY +++ av_rpi_zc_uninit(ist->dec_ctx); +++#endif ++ avcodec_free_context(&ist->dec_ctx); ++ ++ av_freep(&input_streams[i]); ++@@ -729,9 +766,6 @@ static void ffmpeg_cleanup(int ret) ++ term_exit(); ++ ffmpeg_exited = 1; ++ ++-#ifdef RPI_DISPLAY ++- display_exit(rpi_display); ++-#endif ++ } ++ ++ void remove_avoptions(AVDictionary **a, AVDictionary *b) ++@@ -1091,18 +1125,19 @@ static void do_video_out(AVFormatContext *s, ++ int frame_size = 0; ++ InputStream *ist = NULL; ++ AVFilterContext *filter = ost->filter->filter; +++ +++ if (ost->source_index >= 0) +++ ist = input_streams[ost->source_index]; +++ ++ #ifdef RPI_DISPLAY ++- if (next_picture) +++ if (next_picture && ist != NULL) ++ { ++- if (!rpi_display) +++ if (!rpi_display) ++ rpi_display = display_init(0,0,next_picture->width,next_picture->height); ++- display_frame(rpi_display,next_picture); +++ display_frame(ist->dec_ctx, rpi_display, next_picture); ++ } ++ #endif ++ ++- if (ost->source_index >= 0) ++- ist = input_streams[ost->source_index]; ++- ++ if (filter->inputs[0]->frame_rate.num > 0 && ++ filter->inputs[0]->frame_rate.den > 0) ++ duration = 1/(av_q2d(filter->inputs[0]->frame_rate) * av_q2d(enc->time_base)); ++@@ -2708,6 +2743,12 @@ static int init_input_stream(int ist_index, char *error, int error_len) ++ ist->dec_ctx->opaque = ist; ++ ist->dec_ctx->get_format = get_format; ++ ist->dec_ctx->get_buffer2 = get_buffer; +++ +++#ifdef RPI_ZERO_COPY +++ // Overrides the above get_buffer2 +++ av_rpi_zc_init(ist->dec_ctx); +++#endif +++ ++ ist->dec_ctx->thread_safe_callbacks = 1; ++ ++ av_opt_set_int(ist->dec_ctx, "refcounted_frames", 1, 0); ++diff --git a/libavcodec/Makefile b/libavcodec/Makefile ++index 03065cd..21e4514 100644 ++--- a/libavcodec/Makefile +++++ b/libavcodec/Makefile ++@@ -9,6 +9,7 @@ HEADERS = avcodec.h \ ++ rpi_shader.h \ ++ rpi_mailbox.h \ ++ rpi_hevc_transform.h \ +++ rpi_zc.h \ ++ d3d11va.h \ ++ dirac.h \ ++ dv_profile.h \ ++@@ -50,6 +51,7 @@ OBJS = allcodecs.o \ ++ rpi_qpu.o \ ++ rpi_shader.o \ ++ rpi_mailbox.o \ +++ rpi_zc.o \ ++ vorbis_parser.o \ ++ xiph.o \ ++ ++diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h ++index 39713ed..a1ba217 100644 ++--- a/libavcodec/avcodec.h +++++ b/libavcodec/avcodec.h ++@@ -3505,6 +3505,12 @@ typedef struct AVCodecContext { ++ #define FF_SUB_TEXT_FMT_ASS_WITH_TIMINGS 1 ++ #endif ++ +++ /** +++ * Opaque pointer for use by replacement get_buffer2 code +++ * +++ * @author jc (08/02/2016) +++ */ +++ void * get_buffer_context; ++ } AVCodecContext; ++ ++ AVRational av_codec_get_pkt_timebase (const AVCodecContext *avctx); ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 8437e10..51736c7 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -114,10 +114,6 @@ static uint32_t rpi_filter_coefs[8][1] = { ++ { ENCODE_COEFFS( -2, 10, 58, -2) } ++ }; ++ ++-static uint32_t get_vc_address(AVBufferRef *bref) { ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++- return p->vc; ++-} ++ #endif ++ ++ ++@@ -2197,9 +2193,9 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int bw = nPbW-start_x; ++ int bh = nPbH-start_y; ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); ++- y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address_y(ref0->frame); ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + 8 + start_x) & 0xffff); ++- y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address_y(ref0->frame); ++ *y++ = ( (bw<16 ? bw : 16) << 16 ) + (bh<16 ? bh : 16); ++ *y++ = my2_mx2_my_mx; ++ if (weight_flag) { ++@@ -2207,7 +2203,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ } else { ++ *y++ = 1; // Weight of 1 and offset of 0 ++ } ++- *y++ = (get_vc_address(s->frame->buf[0]) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); +++ *y++ = (get_vc_address_y(s->frame) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter; ++ } ++ } ++@@ -2246,8 +2242,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); ++- u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address_u(ref0->frame); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address_v(ref0->frame); ++ *u++ = ( (bwframe->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++- *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); +++ *u++ = (get_vc_address_u(s->frame) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); +++ *u++ = (get_vc_address_v(s->frame) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++ s->curr_u_mvs = u; ++@@ -2297,9 +2293,9 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int bw = nPbW-start_x; ++ int bh = nPbH-start_y; ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); ++- y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address_y(ref1->frame); ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + 8 + start_x) & 0xffff); ++- y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address_y(ref1->frame); ++ *y++ = ( (bw<16 ? bw : 16) << 16 ) + (bh<16 ? bh : 16); ++ *y++ = my2_mx2_my_mx; ++ if (weight_flag) { ++@@ -2307,7 +2303,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ } else { ++ *y++ = 1; // Weight of 1 and offset of 0 ++ } ++- *y++ = (get_vc_address(s->frame->buf[0]) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); +++ *y++ = (get_vc_address_y(s->frame) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter; ++ } ++ } ++@@ -2347,8 +2343,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); ++- u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address_u(ref1->frame); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address_v(ref1->frame); ++ *u++ = ( (bwsh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx][0]; ++@@ -2360,8 +2356,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = 1; // Weight of 1 and offset of 0 ++ *u++ = 1; ++ } ++- *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++- *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); +++ *u++ = (get_vc_address_u(s->frame) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); +++ *u++ = (get_vc_address_v(s->frame) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++ s->curr_u_mvs = u; ++@@ -2403,13 +2399,13 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int bw = nPbW-start_x; ++ int bh = nPbH-start_y; ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); ++- y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address_y(ref0->frame); ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y2 - 3 + start_y) << 16) + ( (x2 - 3 + start_x) & 0xffff); // Second fetch is for ref1 ++- y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address_y(ref1->frame); ++ *y++ = ( (bw<8 ? bw : 8) << 16 ) + (bh<16 ? bh : 16); ++ *y++ = my2_mx2_my_mx; ++ *y++ = 1; // B frame weighted prediction not supported ++- *y++ = (get_vc_address(s->frame->buf[0]) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); +++ *y++ = (get_vc_address_y(s->frame) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter_b; ++ } ++ } ++@@ -2453,8 +2449,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv_b0; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); ++- u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address_u(ref0->frame); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address_v(ref0->frame); ++ *u++ = ( (bwmc_filter_uv_b; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x2_c - 1 + start_x; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y2_c - 1 + start_y; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); ++- u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address_u(ref1->frame); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address_v(ref1->frame); ++ *u++ = ( (bwframe->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++- *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); +++ *u++ = (get_vc_address_u(s->frame) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); +++ *u++ = (get_vc_address_v(s->frame) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++ s->curr_u_mvs = u; ++@@ -3270,12 +3266,13 @@ static int32_t filter8_luma(uint8_t *data, int x0, int y0, int pitch, int my_mx, ++ return vsum; ++ } ++ ++-static uint8_t *test_frame(HEVCContext *s,uint32_t p, AVFrame *frame, int cIdx) +++static uint8_t *test_frame(HEVCContext *s,uint32_t p, AVFrame *frame, const int cIdx) ++ { ++ //int pic_width = s->ps.sps->width >> s->ps.sps->hshift[cIdx]; ++ int pic_height = s->ps.sps->height >> s->ps.sps->vshift[cIdx]; ++ int pitch = frame->linesize[cIdx]; ++- uint32_t base = get_vc_address(frame->buf[cIdx]); +++ uint32_t base = c_idx == 0 ? get_vc_address_y(frame); +++ c_idx == 1 ? get_vc_address_u(frame) : get_vc_address_v(frame); ++ if (p>=base && pdata[cIdx] + (p-base); ++ } ++@@ -3562,6 +3559,7 @@ static void rpi_launch_vpu_qpu(HEVCContext *s) ++ #ifdef RPI ++ ++ #ifndef RPI_FAST_CACHEFLUSH +++#error RPI_FAST_CACHEFLUSH is broken ++ static void flush_buffer(AVBufferRef *bref) { ++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++ gpu_cache_flush(p); ++@@ -3572,7 +3570,7 @@ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ { ++ #ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); +++ GPU_MEM_PTR_T p = get_gpu_mem_ptr_u(s->frame); ++ int n = s->ps.sps->height; ++ int curr_y = 0; ++ int curr_uv = 0; ++@@ -3580,21 +3578,21 @@ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ int sz,base; ++ sz = s->frame->linesize[1] * (n_uv-curr_uv); ++ base = s->frame->linesize[1] * curr_uv; ++- iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].handle = p.vcsm_handle; ++ iocache.s[0].cmd = 3; // clean+invalidate ++- iocache.s[0].addr = (int)(p->arm) + base; +++ iocache.s[0].addr = (int)(p.arm) + base; ++ iocache.s[0].size = sz; ++- p = av_buffer_pool_opaque(frame->buf[2]); ++- iocache.s[1].handle = p->vcsm_handle; +++ p = get_gpu_mem_ptr_v(s->frame); +++ iocache.s[1].handle = p.vcsm_handle; ++ iocache.s[1].cmd = 3; // clean+invalidate ++- iocache.s[1].addr = (int)(p->arm) + base; +++ iocache.s[1].addr = (int)(p.arm) + base; ++ iocache.s[1].size = sz; ++- p = av_buffer_pool_opaque(frame->buf[0]); +++ p = get_gpu_mem_ptr_y(s->frame); ++ sz = s->frame->linesize[0] * (n-curr_y); ++ base = s->frame->linesize[0] * curr_y; ++- iocache.s[2].handle = p->vcsm_handle; +++ iocache.s[2].handle = p.vcsm_handle; ++ iocache.s[2].cmd = 3; // clean+invalidate ++- iocache.s[2].addr = (int)(p->arm) + base; +++ iocache.s[2].addr = (int)(p.arm) + base; ++ iocache.s[2].size = sz; ++ vcsm_clean_invalid( &iocache ); ++ #else ++@@ -3612,7 +3610,7 @@ static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM ++ int curr_y; ++ int curr_uv; ++ int n_uv; ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); +++ GPU_MEM_PTR_T p = get_gpu_mem_ptr_u(s->frame); ++ int sz,base; ++ int (*d)[2] = s->dblk_cmds[job]; ++ int low=(*d)[1]; ++@@ -3629,21 +3627,21 @@ static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM ++ ++ sz = s->frame->linesize[1] * (n_uv-curr_uv); ++ base = s->frame->linesize[1] * curr_uv; ++- iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].handle = p.vcsm_handle; ++ iocache.s[0].cmd = 3; // clean+invalidate ++- iocache.s[0].addr = (int)(p->arm) + base; +++ iocache.s[0].addr = (int)(p.arm) + base; ++ iocache.s[0].size = sz; ++- p = av_buffer_pool_opaque(frame->buf[2]); ++- iocache.s[1].handle = p->vcsm_handle; +++ p = get_gpu_mem_ptr_v(s->frame); +++ iocache.s[1].handle = p.vcsm_handle; ++ iocache.s[1].cmd = 3; // clean+invalidate ++- iocache.s[1].addr = (int)(p->arm) + base; +++ iocache.s[1].addr = (int)(p.arm) + base; ++ iocache.s[1].size = sz; ++- p = av_buffer_pool_opaque(frame->buf[0]); +++ p = get_gpu_mem_ptr_y(s->frame); ++ sz = s->frame->linesize[0] * (n-curr_y); ++ base = s->frame->linesize[0] * curr_y; ++- iocache.s[2].handle = p->vcsm_handle; +++ iocache.s[2].handle = p.vcsm_handle; ++ iocache.s[2].cmd = 3; // clean+invalidate ++- iocache.s[2].addr = (int)(p->arm) + base; +++ iocache.s[2].addr = (int)(p.arm) + base; ++ iocache.s[2].size = sz; ++ ++ iocache.s[3].handle = p0->vcsm_handle; ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 826a82f..c4fa305 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -879,17 +879,25 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, ++ #undef CR ++ ++ #ifdef RPI_INTER_QPU ++-static void flush_buffer(AVBufferRef *bref) { ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++- gpu_cache_flush(p); +++static void flush_buffer_y(const AVFrame * const frame) { +++ GPU_MEM_PTR_T p = get_gpu_mem_ptr_y(frame); +++ gpu_cache_flush(&p); ++ } ++ ++-// Return Physical address for this image ++-static uint32_t get_vc_address(AVBufferRef *bref) { ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++- return p->vc; +++static void flush_buffer_u(const AVFrame * const frame) { +++ GPU_MEM_PTR_T p = get_gpu_mem_ptr_u(frame); +++ gpu_cache_flush(&p); ++ } ++ +++static void flush_buffer_v(const AVFrame * const frame) { +++ GPU_MEM_PTR_T p = get_gpu_mem_ptr_v(frame); +++ gpu_cache_flush(&p); +++} +++ +++ +++#ifdef RPI_DEBLOCK_VPU +++#error Not fixed yet +++ ++ // ff_hevc_flush_buffer_lines ++ // flushes and invalidates all pixel rows in [start,end-1] ++ static void ff_hevc_flush_buffer_lines(HEVCContext *s, int start, int end, int flush_luma, int flush_chroma) ++@@ -901,44 +909,44 @@ static void ff_hevc_flush_buffer_lines(HEVCContext *s, int start, int end, int f ++ int curr_uv = curr_y >> s->ps.sps->vshift[1]; ++ int n_uv = n >> s->ps.sps->vshift[1]; ++ int sz,base; ++- GPU_MEM_PTR_T *p; +++ GPU_MEM_PTR_T p; ++ if (curr_uv < 0) curr_uv = 0; ++ if (n_uv<=curr_uv) { return; } ++ sz = s->frame->linesize[1] * (n_uv-curr_uv); ++ base = s->frame->linesize[1] * curr_uv; ++ if (flush_chroma) { ++- p = av_buffer_pool_opaque(s->frame->buf[1]); ++- iocache.s[0].handle = p->vcsm_handle; +++ p = get_gpu_mem_ptr_u(s->frame); +++ iocache.s[0].handle = p.vcsm_handle; ++ iocache.s[0].cmd = 3; // clean+invalidate ++- iocache.s[0].addr = (int)p->arm + base; +++ iocache.s[0].addr = (int)p.arm + base; ++ iocache.s[0].size = sz; ++- p = av_buffer_pool_opaque(s->frame->buf[2]); ++- iocache.s[1].handle = p->vcsm_handle; +++ p = get_gpu_mem_ptr_v(s->frame); +++ iocache.s[1].handle = p.vcsm_handle; ++ iocache.s[1].cmd = 3; // clean+invalidate ++- iocache.s[1].addr = (int)p->arm + base; +++ iocache.s[1].addr = (int)p.arm + base; ++ iocache.s[1].size = sz; ++ } ++ if (flush_luma) { ++- p = av_buffer_pool_opaque(s->frame->buf[0]); +++ p = get_gpu_mem_ptr_y(s->frame); ++ sz = s->frame->linesize[0] * (n-curr_y); ++ base = s->frame->linesize[0] * curr_y; ++- iocache.s[2].handle = p->vcsm_handle; +++ iocache.s[2].handle = p.vcsm_handle; ++ iocache.s[2].cmd = 3; // clean+invalidate ++- iocache.s[2].addr = (int)p->arm + base; +++ iocache.s[2].addr = (int)p.arm + base; ++ iocache.s[2].size = sz; ++ } ++ vcsm_clean_invalid( &iocache ); ++ #else ++ if (flush_chroma) { ++- flush_buffer(s->frame->buf[1]); ++- flush_buffer(s->frame->buf[2]); +++ flush_buffer_u(s->frame); +++ flush_buffer_v(s->frame); ++ } ++ if (flush_luma) { ++- flush_buffer(s->frame->buf[0]); +++ flush_buffer_y(s->frame); ++ } ++ #endif ++ } ++- +++#endif ++ ++ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ { ++@@ -950,37 +958,37 @@ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ int curr_uv = curr_y >> s->ps.sps->vshift[1]; ++ int n_uv = n >> s->ps.sps->vshift[1]; ++ int sz,base; ++- GPU_MEM_PTR_T *p; +++ GPU_MEM_PTR_T p; ++ if (curr_uv < 0) curr_uv = 0; ++ if (n_uv<=curr_uv) { return; } ++ sz = s->frame->linesize[1] * (n_uv-curr_uv); ++ base = s->frame->linesize[1] * curr_uv; ++- p = av_buffer_pool_opaque(s->frame->buf[1]); ++- iocache.s[0].handle = p->vcsm_handle; +++ p = get_gpu_mem_ptr_u(s->frame); +++ iocache.s[0].handle = p.vcsm_handle; ++ iocache.s[0].cmd = 3; // clean+invalidate ++- iocache.s[0].addr = (int)p->arm + base; +++ iocache.s[0].addr = (int)p.arm + base; ++ iocache.s[0].size = sz; ++- p = av_buffer_pool_opaque(s->frame->buf[2]); ++- iocache.s[1].handle = p->vcsm_handle; +++ p = get_gpu_mem_ptr_v(s->frame); +++ iocache.s[1].handle = p.vcsm_handle; ++ iocache.s[1].cmd = 3; // clean+invalidate ++- iocache.s[1].addr = (int)p->arm + base; +++ iocache.s[1].addr = (int)p.arm + base; ++ iocache.s[1].size = sz; ++ ++ #ifdef RPI_LUMA_QPU ++- p = av_buffer_pool_opaque(s->frame->buf[0]); +++ p = get_gpu_mem_ptr_y(s->frame); ++ sz = s->frame->linesize[0] * (n-curr_y); ++ base = s->frame->linesize[0] * curr_y; ++- iocache.s[2].handle = p->vcsm_handle; +++ iocache.s[2].handle = p.vcsm_handle; ++ iocache.s[2].cmd = 3; // clean+invalidate ++- iocache.s[2].addr = (int)p->arm + base; +++ iocache.s[2].addr = (int)p.arm + base; ++ iocache.s[2].size = sz; ++ #endif ++ vcsm_clean_invalid( &iocache ); ++ #else ++- flush_buffer(s->frame->buf[1]); ++- flush_buffer(s->frame->buf[2]); +++ flush_buffer_u(s->frame); +++ flush_buffer_v(s->frame); ++ #ifdef RPI_LUMA_QPU ++- flush_buffer(s->frame->buf[0]); +++ flush_buffer_y(s->frame); ++ #endif ++ ++ #endif ++@@ -992,6 +1000,7 @@ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ #endif ++ ++ #ifdef RPI_DEBLOCK_VPU +++#error XXX ++ /* rpi_deblock deblocks an entire row of ctbs using the VPU */ ++ static void rpi_deblock(HEVCContext *s, int y, int ctb_size) ++ { ++@@ -1000,21 +1009,21 @@ static void rpi_deblock(HEVCContext *s, int y, int ctb_size) ++ // TODO flush buffer of beta/tc setup when it becomes cached ++ ++ // Prepare three commands at once to avoid calling overhead ++- s->vpu_cmds_arm[0][0] = get_vc_address(s->frame->buf[0]) + s->frame->linesize[0] * y; +++ s->vpu_cmds_arm[0][0] = get_vc_address_y(s->frame) + s->frame->linesize[0] * y; ++ s->vpu_cmds_arm[0][1] = s->frame->linesize[0]; ++ s->vpu_cmds_arm[0][2] = s->setup_width; ++ s->vpu_cmds_arm[0][3] = (int) ( s->y_setup_vc + s->setup_width * (y>>4) ); ++ s->vpu_cmds_arm[0][4] = ctb_size>>4; ++ s->vpu_cmds_arm[0][5] = 2; ++ ++- s->vpu_cmds_arm[1][0] = get_vc_address(s->frame->buf[1]) + s->frame->linesize[1] * (y>> s->ps.sps->vshift[1]); +++ s->vpu_cmds_arm[1][0] = get_vc_address_u(s->frame) + s->frame->linesize[1] * (y>> s->ps.sps->vshift[1]); ++ s->vpu_cmds_arm[1][1] = s->frame->linesize[1]; ++ s->vpu_cmds_arm[1][2] = s->uv_setup_width; ++ s->vpu_cmds_arm[1][3] = (int) ( s->uv_setup_vc + s->uv_setup_width * ((y>>4)>> s->ps.sps->vshift[1]) ); ++ s->vpu_cmds_arm[1][4] = (ctb_size>>4)>> s->ps.sps->vshift[1]; ++ s->vpu_cmds_arm[1][5] = 3; ++ ++- s->vpu_cmds_arm[2][0] = get_vc_address(s->frame->buf[2]) + s->frame->linesize[2] * (y>> s->ps.sps->vshift[2]); +++ s->vpu_cmds_arm[2][0] = get_vc_address_v(s->frame) + s->frame->linesize[2] * (y>> s->ps.sps->vshift[2]); ++ s->vpu_cmds_arm[2][1] = s->frame->linesize[2]; ++ s->vpu_cmds_arm[2][2] = s->uv_setup_width; ++ s->vpu_cmds_arm[2][3] = (int) ( s->uv_setup_vc + s->uv_setup_width * ((y>>4)>> s->ps.sps->vshift[1]) ); ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index ffd13ca..b0c9bc5 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -250,7 +250,7 @@ int gpu_get_mailbox(void) ++ } ++ ++ // Call this to clean and invalidate a region of memory ++-void gpu_cache_flush(GPU_MEM_PTR_T *p) +++void gpu_cache_flush(const GPU_MEM_PTR_T * const p) ++ { ++ #ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 81c2bb1..b913f79 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -2,8 +2,11 @@ ++ #define RPI_QPU_H ++ ++ // Define RPI_FAST_CACHEFLUSH to use the VCSM cache flush code +++// *** N.B. Code has rotted & crashes if this is unset (before this set of changes) ++ #define RPI_FAST_CACHEFLUSH ++ +++#define RPI_ONE_BUF 1 +++ ++ typedef struct gpu_mem_ptr_s { ++ unsigned char *arm; // Pointer to memory mapped on ARM side ++ int vc_handle; // Videocore handle of relocatable memory ++@@ -16,9 +19,113 @@ typedef struct gpu_mem_ptr_s { ++ extern int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p); ++ extern int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p); ++ extern void gpu_free(GPU_MEM_PTR_T *p); ++-extern void gpu_cache_flush(GPU_MEM_PTR_T *p); +++extern void gpu_cache_flush(const GPU_MEM_PTR_T * const p); ++ extern void gpu_cache_flush3(GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2); ++ +++#include "libavutil/frame.h" +++#if !RPI_ONE_BUF +++static inline uint32_t get_vc_address_y(const AVFrame * const frame) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[0]); +++ return p->vc; +++} +++ +++static inline uint32_t get_vc_address_u(const AVFrame * const frame) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); +++ return p->vc; +++} +++ +++static inline uint32_t get_vc_address_v(const AVFrame * const frame) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[2]); +++ return p->vc; +++} +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_y(const AVFrame * const frame) { +++ return *(GPU_MEM_PTR_T *)av_buffer_pool_opaque(frame->buf[0]); +++} +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_u(const AVFrame * const frame) { +++ return *(GPU_MEM_PTR_T *)av_buffer_pool_opaque(frame->buf[1]); +++} +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_v(const AVFrame * const frame) { +++ return *(GPU_MEM_PTR_T *)av_buffer_pool_opaque(frame->buf[2]); +++} +++ +++#else +++ +++static inline int gpu_is_buf1(const AVFrame * const frame) +++{ +++ return frame->buf[1] == NULL; +++} +++ +++static inline GPU_MEM_PTR_T * gpu_buf1_gmem(const AVFrame * const frame) +++{ +++ return av_buffer_get_opaque(frame->buf[0]); +++} +++ +++static inline GPU_MEM_PTR_T * gpu_buf3_gmem(const AVFrame * const frame, const int n) +++{ +++ return av_buffer_pool_opaque(frame->buf[n]); +++} +++ +++ +++static inline uint32_t get_vc_address_y(const AVFrame * const frame) { +++ return gpu_is_buf1(frame) ? gpu_buf1_gmem(frame)->vc : gpu_buf3_gmem(frame, 0)->vc; +++} +++ +++static inline uint32_t get_vc_address_u(const AVFrame * const frame) { +++ return gpu_is_buf1(frame) ? +++ gpu_buf1_gmem(frame)->vc + frame->data[1] - frame->data[0] : +++ gpu_buf3_gmem(frame, 1)->vc; +++} +++ +++static inline uint32_t get_vc_address_v(const AVFrame * const frame) { +++ return gpu_is_buf1(frame) ? +++ gpu_buf1_gmem(frame)->vc + frame->data[2] - frame->data[0] : +++ gpu_buf3_gmem(frame, 2)->vc; +++} +++ +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_y(const AVFrame * const frame) { +++ if (gpu_is_buf1(frame)) +++ { +++ GPU_MEM_PTR_T g = *gpu_buf1_gmem(frame); +++ g.numbytes = frame->data[1] - frame->data[0]; +++ return g; +++ } +++ else +++ return *gpu_buf3_gmem(frame, 0); +++} +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_u(const AVFrame * const frame) { +++ if (gpu_is_buf1(frame)) +++ { +++ GPU_MEM_PTR_T g = *gpu_buf1_gmem(frame); +++ g.arm += frame->data[1] - frame->data[0]; +++ g.vc += frame->data[1] - frame->data[0]; +++ g.numbytes = frame->data[2] - frame->data[1]; // chroma size +++ return g; +++ } +++ else +++ return *gpu_buf3_gmem(frame, 1); +++} +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_v(const AVFrame * const frame) { +++ if (gpu_is_buf1(frame)) +++ { +++ GPU_MEM_PTR_T g = *gpu_buf1_gmem(frame); +++ g.arm += frame->data[2] - frame->data[0]; +++ g.vc += frame->data[2] - frame->data[0]; +++ g.numbytes = frame->data[2] - frame->data[1]; // chroma size +++ return g; +++ } +++ else +++ return *gpu_buf3_gmem(frame, 2); +++} +++ +++#endif +++ +++ ++ // QPU specific functions ++ extern void qpu_run_shader8(int code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8); ++ extern void qpu_run_shader12(int code, int num, int code2, int num2, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8, int unifs9, int unifs10, int unifs11, int unifs12); ++diff --git a/libavcodec/rpi_zc.c b/libavcodec/rpi_zc.c ++new file mode 100644 ++index 0000000..9580165 ++--- /dev/null +++++ b/libavcodec/rpi_zc.c ++@@ -0,0 +1,406 @@ +++#include "config.h" +++#ifdef RPI +++#include "rpi_qpu.h" +++#include "rpi_zc.h" +++ +++#include "libavutil/buffer_internal.h" +++ +++struct ZcPoolEnt; +++ +++typedef struct ZcPool +++{ +++ int numbytes; +++ struct ZcPoolEnt * head; +++ pthread_mutex_t lock; +++} ZcPool; +++ +++typedef struct ZcPoolEnt +++{ +++ // It is important that we start with gmem as other bits of code will expect to see that +++ GPU_MEM_PTR_T gmem; +++ struct ZcPoolEnt * next; +++ struct ZcPool * pool; +++} ZcPoolEnt; +++ +++static ZcPoolEnt * zc_pool_ent_alloc(ZcPool * const pool, const int size) +++{ +++ ZcPoolEnt * const zp = av_malloc(sizeof(ZcPoolEnt)); +++ +++ if (zp == NULL) { +++ av_log(NULL, AV_LOG_ERROR, "av_malloc(ZcPoolEnt) failed\n"); +++ goto fail0; +++ } +++ +++ if (gpu_malloc_cached(size, &zp->gmem) != 0) +++ { +++ av_log(NULL, AV_LOG_ERROR, "av_gpu_malloc_cached(%d) failed\n", size); +++ goto fail1; +++ } +++ +++ zp->next = NULL; +++ zp->pool = pool; +++ return zp; +++ +++fail1: +++ av_free(zp); +++fail0: +++ return NULL; +++} +++ +++static void zc_pool_ent_free(ZcPoolEnt * const zp) +++{ +++ gpu_free(&zp->gmem); +++ av_free(zp); +++} +++ +++static void zc_pool_flush(ZcPool * const pool) +++{ +++ ZcPoolEnt * p = pool->head; +++ pool->head = NULL; +++ while (p != NULL) +++ { +++ ZcPoolEnt * const zp = p; +++ p = p->next; +++ zc_pool_ent_free(zp); +++ } +++} +++ +++static ZcPoolEnt * zc_pool_alloc(ZcPool * const pool, const int numbytes) +++{ +++ ZcPoolEnt * zp; +++ pthread_mutex_lock(&pool->lock); +++ +++ if (numbytes != pool->numbytes) +++ { +++ zc_pool_flush(pool); +++ pool->numbytes = numbytes; +++ } +++ +++ if (pool->head != NULL) +++ { +++ zp = pool->head; +++ pool->head = zp->next; +++ } +++ else +++ { +++ zp = zc_pool_ent_alloc(pool, numbytes); +++ } +++ +++ pthread_mutex_unlock(&pool->lock); +++ return zp; +++} +++ +++static void zc_pool_free(ZcPoolEnt * const zp) +++{ +++ ZcPool * const pool = zp == NULL ? NULL : zp->pool; +++ if (zp != NULL) +++ { +++ pthread_mutex_lock(&pool->lock); +++ if (pool->numbytes == zp->gmem.numbytes) +++ { +++ zp->next = pool->head; +++ pool->head = zp; +++ pthread_mutex_unlock(&pool->lock); +++ } +++ else +++ { +++ pthread_mutex_unlock(&pool->lock); +++ zc_pool_ent_free(zp); +++ } +++ } +++} +++ +++static void +++zc_pool_init(ZcPool * const pool) +++{ +++ pool->numbytes = -1; +++ pool->head = NULL; +++ pthread_mutex_init(&pool->lock, NULL); +++} +++ +++static void +++zc_pool_destroy(ZcPool * const pool) +++{ +++ pool->numbytes = -1; +++ zc_pool_flush(pool); +++ pthread_mutex_destroy(&pool->lock); +++} +++ +++ +++typedef struct AVZcEnv +++{ +++ ZcPool pool; +++} ZcEnv; +++ +++// Callback when buffer unrefed to zero +++static void rpi_free_display_buffer(void *opaque, uint8_t *data) +++{ +++ ZcPoolEnt *const zp = opaque; +++// printf("%s: data=%p\n", __func__, data); +++ zc_pool_free(zp); +++} +++ +++static inline GPU_MEM_PTR_T * pic_gm_ptr(AVBufferRef * const buf) +++{ +++ // Kludge where we check the free fn to check this is really +++ // one of our buffers - can't think of a better way +++ return buf == NULL || buf->buffer->free != rpi_free_display_buffer ? NULL : +++ av_buffer_get_opaque(buf); +++} +++ +++AVRpiZcFrameGeometry av_rpi_zc_frame_geometry( +++ const unsigned int video_width, const unsigned int video_height) +++{ +++ AVRpiZcFrameGeometry geo; +++ geo.stride_y = (video_width + 32 + 31) & ~31; +++ geo.stride_c = geo.stride_y / 2; +++// geo.height_y = (video_height + 15) & ~15; +++ geo.height_y = (video_height + 32 + 31) & ~31; +++ geo.height_c = geo.height_y / 2; +++ return geo; +++} +++ +++static AVBufferRef * rpi_buf_pool_alloc(ZcPool * const pool, int size) +++{ +++ ZcPoolEnt *const zp = zc_pool_alloc(pool, size); +++ AVBufferRef * buf; +++ +++ if (zp == NULL) { +++ av_log(NULL, AV_LOG_ERROR, "zc_pool_alloc(%d) failed\n", size); +++ goto fail0; +++ } +++ +++ if ((buf = av_buffer_create(zp->gmem.arm, size, rpi_free_display_buffer, zp, AV_BUFFER_FLAG_READONLY)) == NULL) +++ { +++ av_log(NULL, AV_LOG_ERROR, "av_buffer_create() failed\n"); +++ goto fail2; +++ } +++ +++ return buf; +++ +++fail2: +++ zc_pool_free(zp); +++fail0: +++ return NULL; +++} +++ +++static int rpi_get_display_buffer(struct AVCodecContext * const s, AVFrame * const frame) +++{ +++ ZcEnv *const zc = s->get_buffer_context; +++ const AVRpiZcFrameGeometry geo = av_rpi_zc_frame_geometry(frame->width, frame->height); +++ const unsigned int size_y = geo.stride_y * geo.height_y; +++ const unsigned int size_c = geo.stride_c * geo.height_c; +++ const unsigned int size_pic = size_y + size_c * 2; +++ AVBufferRef * buf; +++ unsigned int i; +++ +++// printf("Do local alloc: format=%#x, %dx%d: %u\n", frame->format, frame->width, frame->height, size_pic); +++ +++ if ((buf = rpi_buf_pool_alloc(&zc->pool, size_pic)) == NULL) +++ { +++ av_log(s, AV_LOG_ERROR, "rpi_get_display_buffer: Failed to get buffer from pool\n"); +++ return AVERROR(ENOMEM); +++ } +++ +++ for (i = 0; i < AV_NUM_DATA_POINTERS; i++) { +++ frame->buf[i] = NULL; +++ frame->data[i] = NULL; +++ frame->linesize[i] = 0; +++ } +++ +++ frame->buf[0] = buf; +++ frame->linesize[0] = geo.stride_y; +++ frame->linesize[1] = geo.stride_c; +++ frame->linesize[2] = geo.stride_c; +++ frame->data[0] = buf->data; +++ frame->data[1] = frame->data[0] + size_y; +++ frame->data[2] = frame->data[1] + size_c; +++ frame->extended_data = frame->data; +++ // Leave extended buf alone +++ +++ return 0; +++} +++ +++ +++#define RPI_GET_BUFFER2 1 +++ +++int av_rpi_zc_get_buffer2(struct AVCodecContext *s, AVFrame *frame, int flags) +++{ +++#if !RPI_GET_BUFFER2 +++ return avcodec_default_get_buffer2(s, frame, flags); +++#else +++ int rv; +++ +++ if ((s->codec->capabilities & AV_CODEC_CAP_DR1) == 0 || +++ frame->format != AV_PIX_FMT_YUV420P) +++ { +++// printf("Do default alloc: format=%#x\n", frame->format); +++ rv = avcodec_default_get_buffer2(s, frame, flags); +++ } +++ else +++ { +++ rv = rpi_get_display_buffer(s, frame); +++ } +++ +++#if 0 +++ printf("%s: %dx%d lsize=%d/%d/%d data=%p/%p/%p bref=%p/%p/%p opaque[0]=%p\n", __func__, +++ frame->width, frame->height, +++ frame->linesize[0], frame->linesize[1], frame->linesize[2], +++ frame->data[0], frame->data[1], frame->data[2], +++ frame->buf[0], frame->buf[1], frame->buf[2], +++ av_buffer_get_opaque(frame->buf[0])); +++#endif +++ return rv; +++#endif +++} +++ +++ +++static AVBufferRef * zc_copy(struct AVCodecContext * const s, +++ const AVFrame * const src) +++{ +++ AVFrame dest_frame; +++ AVFrame * const dest = &dest_frame; +++ unsigned int i; +++ uint8_t * psrc, * pdest; +++ +++ dest->width = src->width; +++ dest->height = src->height; +++ +++ if (rpi_get_display_buffer(s, dest) != 0) +++ { +++ return NULL; +++ } +++ +++ for (i = 0, psrc = src->data[0], pdest = dest->data[0]; +++ i != dest->height; +++ ++i, psrc += src->linesize[0], pdest += dest->linesize[0]) +++ { +++ memcpy(pdest, psrc, dest->width); +++ } +++ for (i = 0, psrc = src->data[1], pdest = dest->data[1]; +++ i != dest->height / 2; +++ ++i, psrc += src->linesize[1], pdest += dest->linesize[1]) +++ { +++ memcpy(pdest, psrc, dest->width / 2); +++ } +++ for (i = 0, psrc = src->data[2], pdest = dest->data[2]; +++ i != dest->height / 2; +++ ++i, psrc += src->linesize[2], pdest += dest->linesize[2]) +++ { +++ memcpy(pdest, psrc, dest->width / 2); +++ } +++ +++ return dest->buf[0]; +++} +++ +++ +++AVRpiZcRefPtr av_rpi_zc_ref(struct AVCodecContext * const s, +++ const AVFrame * const frame, const int maycopy) +++{ +++ assert(s != NULL); +++ +++ if (frame->format != AV_PIX_FMT_YUV420P) +++ { +++ av_log(s, AV_LOG_WARNING, "%s: *** Format not YUV420P: %d\n", __func__, frame->format); +++ return NULL; +++ } +++ +++ if (frame->buf[1] != NULL) +++ { +++ if (maycopy) +++ { +++ av_log(s, AV_LOG_INFO, "%s: *** Not a single buf frame: copying\n", __func__); +++ return zc_copy(s, frame); +++ } +++ else +++ { +++ av_log(s, AV_LOG_WARNING, "%s: *** Not a single buf frame: NULL\n", __func__); +++ return NULL; +++ } +++ } +++ +++ if (pic_gm_ptr(frame->buf[0]) == NULL) +++ { +++ if (maycopy) +++ { +++ av_log(s, AV_LOG_INFO, "%s: *** Not one of our buffers: copying\n", __func__); +++ return zc_copy(s, frame); +++ } +++ else +++ { +++ av_log(s, AV_LOG_WARNING, "%s: *** Not one of our buffers: NULL\n", __func__); +++ return NULL; +++ } +++ } +++ +++ return av_buffer_ref(frame->buf[0]); +++} +++ +++int av_rpi_zc_vc_handle(const AVRpiZcRefPtr fr_ref) +++{ +++ const GPU_MEM_PTR_T * const p = pic_gm_ptr(fr_ref); +++ return p == NULL ? -1 : p->vc_handle; +++} +++ +++int av_rpi_zc_numbytes(const AVRpiZcRefPtr fr_ref) +++{ +++ const GPU_MEM_PTR_T * const p = pic_gm_ptr(fr_ref); +++ return p == NULL ? 0 : p->numbytes; +++} +++ +++void av_rpi_zc_unref(AVRpiZcRefPtr fr_ref) +++{ +++ if (fr_ref != NULL) +++ { +++ av_buffer_unref(&fr_ref); +++ } +++} +++ +++AVZcEnvPtr av_rpi_zc_env_alloc(void) +++{ +++ ZcEnv * const zc = av_mallocz(sizeof(ZcEnv)); +++ if (zc == NULL) +++ { +++ av_log(NULL, AV_LOG_ERROR, "av_rpi_zc_env_alloc: Context allocation failed\n"); +++ return NULL; +++ } +++ +++ zc_pool_init(&zc->pool); +++ return zc; +++} +++ +++void av_rpi_zc_env_free(AVZcEnvPtr zc) +++{ +++ if (zc != NULL) +++ { +++ zc_pool_destroy(&zc->pool); ; +++ av_free(zc); +++ } +++} +++ +++int av_rpi_zc_init(struct AVCodecContext * const s) +++{ +++ ZcEnv * const zc = av_rpi_zc_env_alloc(); +++ if (zc == NULL) +++ { +++ return AVERROR(ENOMEM); +++ } +++ +++ s->get_buffer_context = zc; +++ s->get_buffer2 = av_rpi_zc_get_buffer2; +++ return 0; +++} +++ +++void av_rpi_zc_uninit(struct AVCodecContext * const s) +++{ +++ if (s->get_buffer2 == av_rpi_zc_get_buffer2) +++ { +++ ZcEnv * const zc = s->get_buffer_context; +++ s->get_buffer2 = avcodec_default_get_buffer2; +++ s->get_buffer_context = NULL; +++ av_rpi_zc_env_free(zc); +++ } +++} +++ +++#endif // RPI +++ ++diff --git a/libavcodec/rpi_zc.h b/libavcodec/rpi_zc.h ++new file mode 100644 ++index 0000000..f0109f4 ++--- /dev/null +++++ b/libavcodec/rpi_zc.h ++@@ -0,0 +1,83 @@ +++#ifndef LIBAVCODEC_RPI_ZC_H +++#define LIBAVCODEC_RPI_ZC_H +++ +++// Zero-Copy frame code for RPi +++// RPi needs Y/U/V planes to be contiguous for display. By default +++// ffmpeg will allocate separated planes so a memcpy is needed before +++// display. This code prodes a method a making ffmpeg allocate a single +++// bit of memory for the frame when can then be refrence counted until +++// display ahs finsihed with it. +++ +++#include "libavutil/frame.h" +++#include "libavcodec/avcodec.h" +++ +++// "Opaque" pointer to whatever we are using as a buffer reference +++typedef AVBufferRef * AVRpiZcRefPtr; +++ +++struct AVZcEnv; +++typedef struct AVZcEnv * AVZcEnvPtr; +++ +++typedef struct AVRpiZcFrameGeometry +++{ +++ unsigned int stride_y; +++ unsigned int height_y; +++ unsigned int stride_c; +++ unsigned int height_c; +++} AVRpiZcFrameGeometry; +++ +++ +++AVRpiZcFrameGeometry av_rpi_zc_frame_geometry( +++ const unsigned int video_width, const unsigned int video_height); +++ +++// Replacement fn for avctx->get_buffer2 +++// Should be set before calling avcodec_decode_open2 +++// +++// N.B. in addition to to setting avctx->get_buffer2, avctx->refcounted_frames +++// must be set to 1 as otherwise the buffer info is killed before being returned +++// by avcodec_decode_video2. Note also that this means that the AVFrame that is +++// return must be manually derefed with av_frame_unref. This should be done +++// after av_rpi_zc_ref has been called. +++int av_rpi_zc_get_buffer2(struct AVCodecContext *s, AVFrame *frame, int flags); +++ +++// Generate a ZC reference to the buffer(s) in this frame +++// If the buffer doesn't appear to be one allocated by _get_buffer_2 +++// then the behaviour depends on maycopy: +++// If maycopy=0 then return NULL +++// If maycopy=1 && the src frame is in a form where we can easily copy +++// the data, then allocate a new buffer and copy the data into it +++// Otherwise return NULL +++AVRpiZcRefPtr av_rpi_zc_ref(struct AVCodecContext * const s, +++ const AVFrame * const frame, const int maycopy); +++ +++// Get the vc_handle from the frame ref +++// Returns -1 if ref doesn't look valid +++int av_rpi_zc_vc_handle(const AVRpiZcRefPtr fr_ref); +++// Get the number of bytes allocated from the frame ref +++// Returns 0 if ref doesn't look valid +++int av_rpi_zc_numbytes(const AVRpiZcRefPtr fr_ref); +++ +++// Unreference the buffer refed/allocated by _zc_ref +++// If fr_ref is NULL then this will NOP +++void av_rpi_zc_unref(AVRpiZcRefPtr fr_ref); +++ +++// Allocate an environment for the buffer pool used by the ZC code +++// This should be put in avctx->get_buffer_context so it can be found by +++// av_rpi_zc_get_buffer2 when it is called from ffmpeg +++AVZcEnvPtr av_rpi_zc_env_alloc(void); +++ +++// Allocate the environment used by the ZC code +++void av_rpi_zc_env_free(AVZcEnvPtr); +++ +++ +++// Init ZC into a context +++// There is nothing magic in this fn - it just packages setting +++// get_buffer2 & get_buffer_context +++int av_rpi_zc_init(struct AVCodecContext * const s); +++ +++// Free ZC from a context +++// There is nothing magic in this fn - it just packages unsetting +++// get_buffer2 & get_buffer_context +++void av_rpi_zc_uninit(struct AVCodecContext * const s); +++ +++#endif +++ ++-- ++2.7.4 ++ ++ ++From a6da64e1ca42f0394ccfa55dca782a456841da94 Mon Sep 17 00:00:00 2001 ++From: John Cox ++Date: Tue, 1 Mar 2016 14:21:25 +0000 ++Subject: [PATCH 2/2] Set VPU scheduling thread to high priority after creation ++ ++--- ++ libavcodec/rpi_qpu.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- ++ 1 file changed, 47 insertions(+), 1 deletion(-) ++ ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index b0c9bc5..ee19231 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -182,9 +182,55 @@ static int gpu_init(volatile struct GPU **gpu) { ++ err = pthread_create(&vpu_thread, NULL, vpu_start, NULL); ++ //printf("Created thread\n"); ++ if (err) { ++- printf("Failed to create vpu thread\n"); +++ av_log(NULL, AV_LOG_FATAL, "Failed to create vpu thread\n"); ++ return -4; ++ } +++ +++ { +++ struct sched_param param = {0}; +++ int policy = 0; +++ +++ if (pthread_getschedparam(vpu_thread, &policy, ¶m) != 0) +++ { +++ av_log(NULL, AV_LOG_ERROR, "Unable to get VPU thread scheduling parameters\n"); +++ } +++ else +++ { +++ av_log(NULL, AV_LOG_INFO, "VPU thread: policy=%d (%s), pri=%d\n", +++ policy, +++ policy == SCHED_RR ? "RR" : policy == SCHED_FIFO ? "FIFO" : "???" , +++ param.sched_priority); +++ +++ policy = SCHED_FIFO; +++ param.sched_priority = sched_get_priority_max(SCHED_FIFO); +++ +++ av_log(NULL, AV_LOG_INFO, "Attempt to set: policy=%d (%s), pri=%d\n", +++ policy, +++ policy == SCHED_RR ? "RR" : policy == SCHED_FIFO ? "FIFO" : "???" , +++ param.sched_priority); +++ +++ if (pthread_setschedparam(vpu_thread, policy, ¶m) != 0) +++ { +++ av_log(NULL, AV_LOG_ERROR, "Unable to set VPU thread scheduling parameters\n"); +++ } +++ else +++ { +++ if (pthread_getschedparam(vpu_thread, &policy, ¶m) != 0) +++ { +++ av_log(NULL, AV_LOG_ERROR, "Unable to get VPU thread scheduling parameters\n"); +++ } +++ else +++ { +++ av_log(NULL, AV_LOG_INFO, "VPU thread (after): policy=%d (%s), pri=%d\n", +++ policy, +++ policy == SCHED_RR ? "RR" : policy == SCHED_FIFO ? "FIFO" : "???" , +++ param.sched_priority); +++ } +++ } +++ } +++ +++ } +++ ++ } ++ #endif ++ ++-- ++2.7.4 ++ + +From 4dcf6adc09c509c7e448a4fcfe48bc7da6f907a8 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Sat, 22 Aug 2015 23:06:56 +0100 +Subject: [PATCH 29/67] [dvdmessage] Increase timeout on + CDVDMsgGeneralSynchronize + +--- + xbmc/cores/VideoPlayer/DVDMessage.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDMessage.cpp b/xbmc/cores/VideoPlayer/DVDMessage.cpp +index 5aed6918d217df884107fe6366b3668efa96af20..2442fc808ae89c5550b8db34b2605f5037f2ef29 100644 +--- a/xbmc/cores/VideoPlayer/DVDMessage.cpp ++++ b/xbmc/cores/VideoPlayer/DVDMessage.cpp +@@ -90,7 +90,7 @@ bool CDVDMsgGeneralSynchronize::Wait(unsigned int milliseconds, unsigned int sou + + void CDVDMsgGeneralSynchronize::Wait(volatile bool *abort, unsigned int source) + { +- while(!Wait(100, source)) ++ while(!Wait(200, source)) + { + if(abort && *abort) + return; + +From 01bce0b478e428f0e8805868222928e8274bb809 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Wed, 16 Sep 2015 19:05:12 +0100 +Subject: [PATCH 30/67] [3d] Make MVC a valid 3D filename tag + +--- + xbmc/guilib/StereoscopicsManager.cpp | 9 +++++++++ + xbmc/settings/AdvancedSettings.cpp | 2 ++ + xbmc/settings/AdvancedSettings.h | 1 + + 3 files changed, 12 insertions(+) + +diff --git a/xbmc/guilib/StereoscopicsManager.cpp b/xbmc/guilib/StereoscopicsManager.cpp +index b34873cba6534086ae243326550385867a03256a..1443acaf0f25df458ae49766e13dd0323454f2eb 100644 +--- a/xbmc/guilib/StereoscopicsManager.cpp ++++ b/xbmc/guilib/StereoscopicsManager.cpp +@@ -197,6 +197,15 @@ std::string CStereoscopicsManager::DetectStereoModeByString(const std::string &n + if (re.RegFind(searchString) > -1) + stereoMode = "top_bottom"; + ++ if (!re.RegComp(g_advancedSettings.m_stereoscopicregex_mvc.c_str())) ++ { ++ CLog::Log(LOGERROR, "%s: Invalid RegExp for matching 3d MVC content:'%s'", __FUNCTION__, g_advancedSettings.m_stereoscopicregex_mvc.c_str()); ++ return stereoMode; ++ } ++ ++ if (re.RegFind(searchString) > -1) ++ stereoMode = "left_right"; ++ + return stereoMode; + } + +diff --git a/xbmc/settings/AdvancedSettings.cpp b/xbmc/settings/AdvancedSettings.cpp +index 446293308010f3b8cd8d325fa6d0285fcc9f892d..ae21da29314ae8faa35129a79e62e82b55fbc306 100644 +--- a/xbmc/settings/AdvancedSettings.cpp ++++ b/xbmc/settings/AdvancedSettings.cpp +@@ -403,6 +403,7 @@ void CAdvancedSettings::Initialize() + m_stereoscopicregex_3d = "[-. _]3d[-. _]"; + m_stereoscopicregex_sbs = "[-. _]h?sbs[-. _]"; + m_stereoscopicregex_tab = "[-. _]h?tab[-. _]"; ++ m_stereoscopicregex_mvc = "[-. _]h?mvc[-. _]"; + + m_useDisplayControlHWStereo = false; + +@@ -517,6 +518,7 @@ void CAdvancedSettings::ParseSettingsFile(const std::string &file) + XMLUtils::GetString(pElement, "stereoscopicregex3d", m_stereoscopicregex_3d); + XMLUtils::GetString(pElement, "stereoscopicregexsbs", m_stereoscopicregex_sbs); + XMLUtils::GetString(pElement, "stereoscopicregextab", m_stereoscopicregex_tab); ++ XMLUtils::GetString(pElement, "stereoscopicregexmvc", m_stereoscopicregex_mvc); + XMLUtils::GetFloat(pElement, "subsdelayrange", m_videoSubsDelayRange, 10, 600); + XMLUtils::GetFloat(pElement, "audiodelayrange", m_videoAudioDelayRange, 10, 600); + XMLUtils::GetString(pElement, "defaultplayer", m_videoDefaultPlayer); +diff --git a/xbmc/settings/AdvancedSettings.h b/xbmc/settings/AdvancedSettings.h +index bcbd5d1c68b576034a418dd2dce0b47071229e0b..d4a30863806eb1c86042e0991793aedf20bf8344 100644 +--- a/xbmc/settings/AdvancedSettings.h ++++ b/xbmc/settings/AdvancedSettings.h +@@ -372,6 +372,7 @@ class CAdvancedSettings : public ISettingCallback, public ISettingsHandler + std::string m_stereoscopicregex_3d; + std::string m_stereoscopicregex_sbs; + std::string m_stereoscopicregex_tab; ++ std::string m_stereoscopicregex_mvc; + + bool m_useDisplayControlHWStereo; + + +From 6268ac7405bc4f407e486644d11383f30e48c952 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Mon, 5 Oct 2015 14:58:05 +0100 +Subject: [PATCH 31/67] [3d] Swap top/bottom sides of GUI + +--- + xbmc/guilib/GraphicContext.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xbmc/guilib/GraphicContext.cpp b/xbmc/guilib/GraphicContext.cpp +index 9caa43113f63139d277bd71242a858a581736845..3ace73527a7c359ac21c87bf38b5d648a0f4d9c2 100644 +--- a/xbmc/guilib/GraphicContext.cpp ++++ b/xbmc/guilib/GraphicContext.cpp +@@ -265,7 +265,7 @@ CPoint CGraphicContext::StereoCorrection(const CPoint &point) const + { + const RESOLUTION_INFO info = GetResInfo(); + +- if(m_stereoView == RENDER_STEREO_VIEW_RIGHT) ++ if(m_stereoView == RENDER_STEREO_VIEW_LEFT) + res.y += info.iHeight + info.iBlanking; + } + if(m_stereoMode == RENDER_STEREO_MODE_SPLIT_VERTICAL) + +From 97e700d5324b40fc895f1cbcf656ad4291ecfbee Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Sun, 11 Oct 2015 20:51:37 +0100 +Subject: [PATCH 32/67] Revert "Revert "Disable extra logging by default"" + +This reverts commit a880554325be187b877cd8f0e2b338e7267da636. +--- + system/settings/settings.xml | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/system/settings/settings.xml b/system/settings/settings.xml +index ca7e8892606782e54d4883c5b2f0e6686b1ae280..b67d1113477541f5ce3533495a9960b8646b83ed 100644 +--- a/system/settings/settings.xml ++++ b/system/settings/settings.xml +@@ -2649,12 +2649,12 @@ + + + 1 +- true ++ false + + + + 1 +- 32768 ++ + + loggingcomponents + , + +From 52605aac1a6ca5d9a77513d6467935e7795c540a Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Thu, 26 Nov 2015 17:14:49 +0000 +Subject: [PATCH 33/67] [ae] Add debug logging showing resamplerate + +--- + xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp +index 5bb87b2764fdf1606f438fb3a008b322f8adf271..f9e8a9beaa9b3b4590c698a4d64351cb14c2339d 100644 +--- a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp ++++ b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp +@@ -2471,6 +2471,7 @@ CSampleBuffer* CActiveAE::SyncStream(CActiveAEStream *stream) + if (stream->m_resampleBuffers) + { + stream->m_resampleBuffers->m_resampleRatio = stream->CalcResampleRatio(error); ++ CLog::Log(LOGDEBUG, "CDVDPlayerAudio::%s rr:%.5f error:%.6f", __FUNCTION__, stream->m_resampleBuffers->m_resampleRatio, error); + } + } + else if (stream->m_resampleBuffers) + +From 17b01a2c74a918d1d6fddc35d7b3a7da986d7225 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Mon, 21 Dec 2015 22:17:25 +0000 +Subject: [PATCH 34/67] [omximage] Fall back to arm jpeg encode/decode when gpu + is busy + +--- + xbmc/cores/omxplayer/OMXImage.cpp | 50 ++++++++++++++++++++++++++++++++------- + xbmc/cores/omxplayer/OMXImage.h | 7 ++++++ + 2 files changed, 48 insertions(+), 9 deletions(-) + +diff --git a/xbmc/cores/omxplayer/OMXImage.cpp b/xbmc/cores/omxplayer/OMXImage.cpp +index d2560aa78980e44d5f2d1483bce976cb83353502..e16dbf00b8d8192df4c6e946a48d8f20a72d762d 100644 +--- a/xbmc/cores/omxplayer/OMXImage.cpp ++++ b/xbmc/cores/omxplayer/OMXImage.cpp +@@ -57,12 +57,17 @@ static XbmcThreads::ConditionVariable g_count_cond; + static CCriticalSection g_count_lock; + static int g_count_val; + +-static void limit_calls_enter() ++static bool limit_calls_enter() + { + CSingleLock lock(g_count_lock); ++ // on Pi2 fall back to arm decode if the queue is getting big ++ if (g_RBP.RasberryPiVersion() > 1 && g_count_val >= 2) ++ return false; ++ + while (g_count_val >= 3) + g_count_cond.wait(lock); + g_count_val++; ++ return true; + } + + static void limit_calls_leave() +@@ -112,6 +117,9 @@ bool COMXImage::CreateThumbnailFromSurface(unsigned char* buffer, unsigned int w + unsigned int format, unsigned int pitch, const std::string& destFile) + { + COMXImageEnc omxImageEnc; ++ if (!omxImageEnc.Gpu()) ++ return false; ++ + bool ret = omxImageEnc.CreateThumbnailFromSurface(buffer, width, height, format, pitch, destFile); + if (!ret) + CLog::Log(LOGNOTICE, "%s: unable to create thumbnail %s %dx%d", __func__, destFile.c_str(), width, height); +@@ -205,6 +213,8 @@ bool COMXImage::CreateThumb(const std::string& srcFile, unsigned int maxHeight, + bool okay = false; + COMXImageFile file; + COMXImageReEnc reenc; ++ if (!reenc.Gpu()) ++ return false; + void *pDestBuffer; + unsigned int nDestSize; + int orientation = additional_info == "flipped" ? 1:0; +@@ -310,6 +320,9 @@ bool COMXImage::DecodeJpegToTexture(COMXImageFile *file, unsigned int width, uns + bool ret = false; + COMXTexture omx_image; + ++ if (!omx_image.Gpu()) ++ return false; ++ + struct textureinfo *tex = new struct textureinfo; + if (!tex) + return NULL; +@@ -924,7 +937,7 @@ bool COMXImageFile::ReadFile(const std::string& inputFile, int orientation) + + COMXImageDec::COMXImageDec() + { +- limit_calls_enter(); ++ m_gpu = limit_calls_enter(); + m_decoded_buffer = NULL; + OMX_INIT_STRUCTURE(m_decoded_format); + m_success = false; +@@ -936,7 +949,8 @@ COMXImageDec::~COMXImageDec() + + OMX_INIT_STRUCTURE(m_decoded_format); + m_decoded_buffer = NULL; +- limit_calls_leave(); ++ if (m_gpu) ++ limit_calls_leave(); + } + + void COMXImageDec::Close() +@@ -1086,6 +1100,9 @@ bool COMXImageDec::HandlePortSettingChange(unsigned int resize_width, unsigned i + + bool COMXImageDec::Decode(const uint8_t *demuxer_content, unsigned demuxer_bytes, unsigned width, unsigned height, unsigned stride, void *pixels) + { ++ if (!m_gpu) ++ return false; ++ + CSingleLock lock(m_OMXSection); + OMX_ERRORTYPE omx_err = OMX_ErrorNone; + OMX_BUFFERHEADERTYPE *omx_buffer = NULL; +@@ -1223,7 +1240,7 @@ bool COMXImageDec::Decode(const uint8_t *demuxer_content, unsigned demuxer_bytes + + COMXImageEnc::COMXImageEnc() + { +- limit_calls_enter(); ++ m_gpu = limit_calls_enter(); + CSingleLock lock(m_OMXSection); + OMX_INIT_STRUCTURE(m_encoded_format); + m_encoded_buffer = NULL; +@@ -1247,11 +1264,15 @@ COMXImageEnc::~COMXImageEnc() + m_omx_encoder.Deinitialize(); + } + } +- limit_calls_leave(); ++ if (m_gpu) ++ limit_calls_leave(); + } + + bool COMXImageEnc::Encode(unsigned char *buffer, int size, unsigned width, unsigned height, unsigned int pitch) + { ++ if (!m_gpu) ++ return false; ++ + CSingleLock lock(m_OMXSection); + + unsigned int demuxer_bytes = 0; +@@ -1432,6 +1453,9 @@ bool COMXImageEnc::Encode(unsigned char *buffer, int size, unsigned width, unsig + bool COMXImageEnc::CreateThumbnailFromSurface(unsigned char* buffer, unsigned int width, unsigned int height, + unsigned int format, unsigned int pitch, const std::string& destFile) + { ++ if (!m_gpu) ++ return false; ++ + if(format != XB_FMT_A8R8G8B8 || !buffer) + { + CLog::Log(LOGDEBUG, "%s::%s : %s failed format=0x%x\n", CLASSNAME, __func__, destFile.c_str(), format); +@@ -1465,7 +1489,7 @@ bool COMXImageEnc::CreateThumbnailFromSurface(unsigned char* buffer, unsigned in + + COMXImageReEnc::COMXImageReEnc() + { +- limit_calls_enter(); ++ m_gpu = limit_calls_enter(); + m_encoded_buffer = NULL; + m_pDestBuffer = NULL; + m_nDestAllocSize = 0; +@@ -1479,7 +1503,8 @@ COMXImageReEnc::~COMXImageReEnc() + free (m_pDestBuffer); + m_pDestBuffer = NULL; + m_nDestAllocSize = 0; +- limit_calls_leave(); ++ if (m_gpu) ++ limit_calls_leave(); + } + + void COMXImageReEnc::Close() +@@ -1771,6 +1796,9 @@ bool COMXImageReEnc::HandlePortSettingChange(unsigned int resize_width, unsigned + + bool COMXImageReEnc::ReEncode(COMXImageFile &srcFile, unsigned int maxWidth, unsigned int maxHeight, void * &pDestBuffer, unsigned int &nDestSize) + { ++ if (!m_gpu) ++ return false; ++ + CSingleLock lock(m_OMXSection); + OMX_ERRORTYPE omx_err = OMX_ErrorNone; + +@@ -1943,14 +1971,15 @@ bool COMXImageReEnc::ReEncode(COMXImageFile &srcFile, unsigned int maxWidth, uns + + COMXTexture::COMXTexture() + { +- limit_calls_enter(); ++ m_gpu = limit_calls_enter(); + m_success = false; + } + + COMXTexture::~COMXTexture() + { + Close(); +- limit_calls_leave(); ++ if (m_gpu) ++ limit_calls_leave(); + } + + void COMXTexture::Close() +@@ -2134,6 +2163,9 @@ bool COMXTexture::HandlePortSettingChange(unsigned int resize_width, unsigned in + + bool COMXTexture::Decode(const uint8_t *demuxer_content, unsigned demuxer_bytes, unsigned int width, unsigned int height, void *egl_image) + { ++ if (!m_gpu) ++ return false; ++ + CSingleLock lock(m_OMXSection); + OMX_ERRORTYPE omx_err = OMX_ErrorNone; + +diff --git a/xbmc/cores/omxplayer/OMXImage.h b/xbmc/cores/omxplayer/OMXImage.h +index a93aa82663903fb1bf712058c2e259290ee742e6..6f38dbc7e5cc721c59a3633935f08218eb1dd169 100644 +--- a/xbmc/cores/omxplayer/OMXImage.h ++++ b/xbmc/cores/omxplayer/OMXImage.h +@@ -133,6 +133,7 @@ protected: + OMX_PARAM_PORTDEFINITIONTYPE m_decoded_format; + CCriticalSection m_OMXSection; + bool m_success; ++ bool m_gpu; + }; + + class COMXImageEnc +@@ -144,6 +145,7 @@ public: + // Required overrides + bool CreateThumbnailFromSurface(unsigned char* buffer, unsigned int width, unsigned int height, + unsigned int format, unsigned int pitch, const std::string& destFile); ++ bool Gpu() { return m_gpu; } + protected: + bool Encode(unsigned char *buffer, int size, unsigned int width, unsigned int height, unsigned int pitch); + // Components +@@ -152,6 +154,7 @@ protected: + OMX_PARAM_PORTDEFINITIONTYPE m_encoded_format; + CCriticalSection m_OMXSection; + bool m_success; ++ bool m_gpu; + }; + + class COMXImageReEnc +@@ -163,6 +166,7 @@ public: + // Required overrides + void Close(); + bool ReEncode(COMXImageFile &srcFile, unsigned int width, unsigned int height, void * &pDestBuffer, unsigned int &nDestSize); ++ bool Gpu() { return m_gpu; } + protected: + bool HandlePortSettingChange(unsigned int resize_width, unsigned int resize_height, int orientation, bool port_settings_changed); + // Components +@@ -176,6 +180,7 @@ protected: + void *m_pDestBuffer; + unsigned int m_nDestAllocSize; + bool m_success; ++ bool m_gpu; + }; + + class COMXTexture +@@ -187,6 +192,7 @@ public: + // Required overrides + void Close(void); + bool Decode(const uint8_t *data, unsigned size, unsigned int width, unsigned int height, void *egl_image); ++ bool Gpu() { return m_gpu; } + protected: + bool HandlePortSettingChange(unsigned int resize_width, unsigned int resize_height, void *egl_image, bool port_settings_changed); + +@@ -201,6 +207,7 @@ protected: + OMX_BUFFERHEADERTYPE *m_egl_buffer; + CCriticalSection m_OMXSection; + bool m_success; ++ bool m_gpu; + }; + + extern COMXImage g_OMXImage; + +From 95d0673204e2559173405d03df038ac152a5501b Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Wed, 9 Dec 2015 13:31:14 +0000 +Subject: [PATCH 35/67] [mmalcodec] Fail to open when width is invalid. Can + happen with mpegts files + +--- + xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +index c0e553ca060749edff28bcbb880ed3e149b9f751..8691b086a46fcdd03eee809a53ea9b20f74dcc05 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +@@ -541,6 +541,9 @@ bool CMMALVideo::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options) + if (g_advancedSettings.CanLogComponent(LOGVIDEO)) + CLog::Log(LOGDEBUG, "%s::%s usemmal:%d software:%d %dx%d renderer:%p", CLASSNAME, __func__, CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEMMAL), hints.software, hints.width, hints.height, options.m_opaque_pointer); + ++ // This occurs at start of m2ts files before streams have been fully identified - just ignore ++ if (!hints.width) ++ return false; + // we always qualify even if DVDFactoryCodec does this too. + if (!CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEMMAL) || hints.software) + return false; + +From 9f2c6309ca9bcc281124fa0a5bdb665d9fb50f35 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Tue, 12 Jan 2016 16:29:57 +0000 +Subject: [PATCH 36/67] ffmpeg: Add cabac opimisations for hevc + +--- + .../0001-Squashed-commit-of-the-following.patch | 2179 ++++++++++++++++++++ + tools/depends/target/ffmpeg/Makefile | 5 +- + tools/depends/target/ffmpeg/autobuild.sh | 1 + + 3 files changed, 2184 insertions(+), 1 deletion(-) + create mode 100644 tools/depends/target/ffmpeg/0001-Squashed-commit-of-the-following.patch + +diff --git a/tools/depends/target/ffmpeg/0001-Squashed-commit-of-the-following.patch b/tools/depends/target/ffmpeg/0001-Squashed-commit-of-the-following.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..38554af0af30a85b7d88d31b7d21775cf294b0e3 +--- /dev/null ++++ b/tools/depends/target/ffmpeg/0001-Squashed-commit-of-the-following.patch +@@ -0,0 +1,2179 @@ ++From d08594462136274636c1f2f476a6410ff92a9e16 Mon Sep 17 00:00:00 2001 ++From: John Cox ++Date: Wed, 13 Jan 2016 16:13:33 +0000 ++Subject: [PATCH] H.265 residual decode rework (v2) ++ ++Rework the cabac decode functions ++Simplify the code flow and variable usage where possible ++ ++(Remove profiling and other spurious deltas that were in v1) ++--- ++ libavcodec/arm/cabac.h | 155 ++++- ++ libavcodec/arm/hevc_cabac.h | 491 +++++++++++++++ ++ libavcodec/arm/hevcdsp_deblock_neon.S | 13 +- ++ libavcodec/arm/hevcdsp_epel_neon.S | 9 +- ++ libavcodec/cabac.h | 9 +- ++ libavcodec/hevc_cabac.c | 1096 +++++++++++++++++++++++++-------- ++ 6 files changed, 1509 insertions(+), 264 deletions(-) ++ create mode 100644 libavcodec/arm/hevc_cabac.h ++ ++diff --git a/libavcodec/arm/cabac.h b/libavcodec/arm/cabac.h ++index fdbf86b..0a3980a 100644 ++--- a/libavcodec/arm/cabac.h +++++ b/libavcodec/arm/cabac.h ++@@ -26,13 +26,34 @@ ++ #include "libavutil/internal.h" ++ #include "libavcodec/cabac.h" ++ +++ +++#if UNCHECKED_BITSTREAM_READER +++#define LOAD_16BITS_BEHI\ +++ "ldrh %[tmp] , [%[ptr]] , #2 \n\t"\ +++ "rev %[tmp] , %[tmp] \n\t" +++#elif CONFIG_THUMB +++#define LOAD_16BITS_BEHI\ +++ "ldr %[tmp] , [%[c], %[end]] \n\t"\ +++ "cmp %[tmp] , %[ptr] \n\t"\ +++ "it cs \n\t"\ +++ "ldrhcs %[tmp] , [%[ptr]] , #2 \n\t"\ +++ "rev %[tmp] , %[tmp] \n\t" +++#else +++#define LOAD_16BITS_BEHI\ +++ "ldr %[tmp] , [%[c], %[end]] \n\t"\ +++ "cmp %[tmp] , %[ptr] \n\t"\ +++ "ldrcsh %[tmp] , [%[ptr]] , #2 \n\t"\ +++ "rev %[tmp] , %[tmp] \n\t" +++#endif +++ +++ ++ #define get_cabac_inline get_cabac_inline_arm ++ static av_always_inline int get_cabac_inline_arm(CABACContext *c, ++ uint8_t *const state) ++ { ++ int bit; +++#if 0 ++ void *reg_b, *reg_c, *tmp; ++- ++ __asm__ volatile( ++ "ldrb %[bit] , [%[state]] \n\t" ++ "add %[r_b] , %[tables] , %[lps_off] \n\t" ++@@ -100,9 +121,141 @@ static av_always_inline int get_cabac_inline_arm(CABACContext *c, ++ [mlps_off]"I"(H264_MLPS_STATE_OFFSET + 128) ++ : "memory", "cc" ++ ); +++#else +++ // *** Not thumb compatible yet +++ unsigned int reg_b, tmp; +++ __asm__ ( +++ "ldrb %[bit] , [%[state]] \n\t" +++ "sub %[r_b] , %[mlps_tables], %[lps_off] \n\t" +++ "and %[tmp] , %[range] , #0xC0 \n\t" +++ "add %[r_b] , %[r_b] , %[bit] \n\t" +++ "ldrb %[tmp] , [%[r_b] , %[tmp], lsl #1] \n\t" +++// %bit = *state +++// %range = range +++// %tmp = RangeLPS +++ "sub %[range] , %[range] , %[tmp] \n\t" +++ +++ "cmp %[low] , %[range] , lsl #17 \n\t" +++ "ittt ge \n\t" +++ "subge %[low] , %[low] , %[range], lsl #17 \n\t" +++ "mvnge %[bit] , %[bit] \n\t" +++ "movge %[range] , %[tmp] \n\t" +++ +++ "clz %[tmp] , %[range] \n\t" +++ "sub %[tmp] , #23 \n\t" +++ +++ "ldrb %[r_b] , [%[mlps_tables], %[bit]] \n\t" +++ "lsl %[low] , %[low] , %[tmp] \n\t" +++ "lsl %[range] , %[range] , %[tmp] \n\t" +++ +++ "strb %[r_b] , [%[state]] \n\t" +++ "lsls %[tmp] , %[low] , #16 \n\t" +++ +++ "bne 2f \n\t" +++ LOAD_16BITS_BEHI +++ "lsr %[tmp] , %[tmp] , #15 \n\t" +++ "movw %[r_b] , #0xFFFF \n\t" +++ "sub %[tmp] , %[tmp] , %[r_b] \n\t" +++ +++ "rbit %[r_b] , %[low] \n\t" +++ "clz %[r_b] , %[r_b] \n\t" +++ "sub %[r_b] , %[r_b] , #16 \n\t" +++#if CONFIG_THUMB +++ "lsl %[tmp] , %[tmp] , %[r_b] \n\t" +++ "add %[low] , %[low] , %[tmp] \n\t" +++#else +++ "add %[low] , %[low] , %[tmp], lsl %[r_b] \n\t" +++#endif +++ "2: \n\t" +++ : [bit]"=&r"(bit), +++ [low]"+&r"(c->low), +++ [range]"+&r"(c->range), +++ [r_b]"=&r"(reg_b), +++ [ptr]"+&r"(c->bytestream), +++ [tmp]"=&r"(tmp) +++ : [state]"r"(state), +++ [mlps_tables]"r"(ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET + 128), +++ [byte]"M"(offsetof(CABACContext, bytestream)), +++#if !UNCHECKED_BITSTREAM_READER +++ [c]"r"(c), +++ [end]"M"(offsetof(CABACContext, bytestream_end)), +++#endif +++ [lps_off]"I"((H264_MLPS_STATE_OFFSET + 128) - H264_LPS_RANGE_OFFSET) +++ : "memory", "cc" +++ ); +++#endif ++ ++ return bit & 1; ++ } +++ +++#define get_cabac_bypass get_cabac_bypass_arm +++static inline int get_cabac_bypass_arm(CABACContext * const c) +++{ +++ int rv = 0; +++ unsigned int tmp; +++ __asm ( +++ "lsl %[low] , #1 \n\t" +++ "cmp %[low] , %[range] , lsl #17 \n\t" +++ "adc %[rv] , %[rv] , #0 \n\t" +++ "it cs \n\t" +++ "subcs %[low] , %[low] , %[range], lsl #17 \n\t" +++ "lsls %[tmp] , %[low] , #16 \n\t" +++ "bne 1f \n\t" +++ LOAD_16BITS_BEHI +++ "add %[low] , %[low] , %[tmp], lsr #15 \n\t" +++ "movw %[tmp] , #0xFFFF \n\t" +++ "sub %[low] , %[low] , %[tmp] \n\t" +++ "1: \n\t" +++ : // Outputs +++ [rv]"+&r"(rv), +++ [low]"+&r"(c->low), +++ [tmp]"=&r"(tmp), +++ [ptr]"+&r"(c->bytestream) +++ : // Inputs +++#if !UNCHECKED_BITSTREAM_READER +++ [c]"r"(c), +++ [end]"M"(offsetof(CABACContext, bytestream_end)), +++#endif +++ [range]"r"(c->range) +++ : "cc" +++ ); +++ return rv; +++} +++ +++ +++#define get_cabac_bypass_sign get_cabac_bypass_sign_arm +++static inline int get_cabac_bypass_sign_arm(CABACContext * const c, int rv) +++{ +++ unsigned int tmp; +++ __asm ( +++ "lsl %[low] , #1 \n\t" +++ "cmp %[low] , %[range] , lsl #17 \n\t" +++ "ite cc \n\t" +++ "rsbcc %[rv] , %[rv] , #0 \n\t" +++ "subcs %[low] , %[low] , %[range], lsl #17 \n\t" +++ "lsls %[tmp] , %[low] , #16 \n\t" +++ "bne 1f \n\t" +++ LOAD_16BITS_BEHI +++ "add %[low] , %[low] , %[tmp], lsr #15 \n\t" +++ "movw %[tmp] , #0xFFFF \n\t" +++ "sub %[low] , %[low] , %[tmp] \n\t" +++ "1: \n\t" +++ : // Outputs +++ [rv]"+&r"(rv), +++ [low]"+&r"(c->low), +++ [tmp]"=&r"(tmp), +++ [ptr]"+&r"(c->bytestream) +++ : // Inputs +++#if !UNCHECKED_BITSTREAM_READER +++ [c]"r"(c), +++ [end]"M"(offsetof(CABACContext, bytestream_end)), +++#endif +++ [range]"r"(c->range) +++ : "cc" +++ ); +++ return rv; +++} +++ ++ #endif /* HAVE_ARMV6T2_INLINE */ ++ ++ #endif /* AVCODEC_ARM_CABAC_H */ ++diff --git a/libavcodec/arm/hevc_cabac.h b/libavcodec/arm/hevc_cabac.h ++new file mode 100644 ++index 0000000..31d3c59 ++--- /dev/null +++++ b/libavcodec/arm/hevc_cabac.h ++@@ -0,0 +1,491 @@ +++/* +++ * This file is part of FFmpeg. +++ * +++ * FFmpeg is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * FFmpeg is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with FFmpeg; if not, write to the Free Software +++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +++ */ +++ +++#ifndef AVCODEC_ARM_HEVC_CABAC_H +++#define AVCODEC_ARM_HEVC_CABAC_H +++ +++#include "config.h" +++#if HAVE_ARMV6T2_INLINE +++ +++#define hevc_mem_bits32 hevc_mem_bits32_arm +++static inline uint32_t hevc_mem_bits32_arm(const void * p, const unsigned int bits) +++{ +++ unsigned int n; +++ __asm__ ( +++ "rev %[n], %[x] \n\t" +++ : [n]"=r"(n) +++ : [x]"r"(*(const uint32_t *)((const uint8_t *)p + (bits >> 3))) +++ : +++ ); +++ return n << (bits & 7); +++} +++ +++ +++// --------------------------------------------------------------------------- +++// +++// Helper fns - little bits of code where ARM has an instraction that the +++// compiler doesn't know about / use +++ +++#define trans_scale_sat trans_scale_sat_arm +++static inline int trans_scale_sat_arm(const int level, const unsigned int scale, const unsigned int scale_m, const unsigned int shift) +++{ +++ int rv; +++ int t = ((level * (int)(scale * scale_m)) >> shift) + 1; +++ +++ __asm__ ( +++ "ssat %[rv], #16, %[t], ASR #1 \n\t" +++ : [rv]"=r"(rv) +++ : [t]"r"(t) +++ : +++ ); +++ return rv; +++} +++ +++#define update_rice update_rice_arm +++static inline void update_rice_arm(uint8_t * const stat_coeff, +++ const unsigned int last_coeff_abs_level_remaining, +++ const unsigned int c_rice_param) +++{ +++ int t; +++ __asm__ ( +++ "lsl %[t], %[coeff], #1 \n\t" +++ "lsrs %[t], %[t], %[shift] \n\t" +++ "it eq \n\t" +++ "subeq %[stat], %[stat], #1 \n\t" +++ "cmp %[t], #6 \n\t" +++ "adc %[stat], %[stat], #0 \n\t" +++ "usat %[stat], #8, %[stat] \n\t" +++ : [stat]"+&r"(*stat_coeff), +++ [t]"=&r"(t) +++ : [coeff]"r"(last_coeff_abs_level_remaining), +++ [shift]"r"(c_rice_param) +++ : "cc" +++ ); +++} +++ +++// --------------------------------------------------------------------------- +++// +++// CABAC get loops +++// +++// Where the loop is simple enough we can normally do 10-30% better than the +++// compiler +++ +++// Get the residual greater than 1 bits +++ +++#define get_cabac_greater1_bits get_cabac_greater1_bits_arm +++static inline unsigned int get_cabac_greater1_bits_arm(CABACContext * const c, const unsigned int n, +++ uint8_t * const state0) +++{ +++ unsigned int i, reg_b, st, tmp, bit, rv; +++ __asm__ ( +++ "mov %[i] , #0 \n\t" +++ "mov %[rv] , #0 \n\t" +++ "1: \n\t" +++ "add %[i] , %[i] , #1 \n\t" +++ "cmp %[rv] , #0 \n\t" +++ "ite eq \n\t" +++ "usateq %[st] , #2 , %[i] \n\t" +++ "movne %[st] , #0 \n\t" +++ +++ "ldrb %[bit] , [%[state0], %[st]] \n\t" +++ "sub %[r_b] , %[mlps_tables], %[lps_off] \n\t" +++ "and %[tmp] , %[range] , #0xC0 \n\t" +++ "add %[r_b] , %[r_b] , %[bit] \n\t" +++ "ldrb %[tmp] , [%[r_b], %[tmp], lsl #1] \n\t" +++ "sub %[range] , %[range] , %[tmp] \n\t" +++ +++ "cmp %[low] , %[range], lsl #17 \n\t" +++ "ittt ge \n\t" +++ "subge %[low] , %[low] , %[range], lsl #17 \n\t" +++ "mvnge %[bit] , %[bit] \n\t" +++ "movge %[range] , %[tmp] \n\t" +++ +++ "ldrb %[r_b] , [%[mlps_tables], %[bit]] \n\t" +++ "and %[bit] , %[bit] , #1 \n\t" +++ "orr %[rv] , %[bit] , %[rv], lsl #1 \n\t" +++ +++ "clz %[tmp] , %[range] \n\t" +++ "sub %[tmp] , #23 \n\t" +++ +++ "lsl %[low] , %[low] , %[tmp] \n\t" +++ "lsl %[range] , %[range] , %[tmp] \n\t" +++ +++ "strb %[r_b] , [%[state0], %[st]] \n\t" +++// There is a small speed gain from combining both conditions, using a single +++// branch and then working out what that meant later +++ "lsls %[tmp] , %[low] , #16 \n\t" +++ "it ne \n\t" +++ "cmpne %[n] , %[i] \n\t" +++ "bne 1b \n\t" +++ +++// If reload is not required then we must have run out of flags to decode +++ "tst %[tmp] , %[tmp] \n\t" +++ "bne 2f \n\t" +++ +++// Do reload +++ "ldrh %[tmp] , [%[bptr]] , #2 \n\t" +++ "movw %[r_b] , #0xFFFF \n\t" +++ "rev %[tmp] , %[tmp] \n\t" +++ "rsb %[tmp] , %[r_b] , %[tmp], lsr #15 \n\t" +++ +++ "rbit %[r_b] , %[low] \n\t" +++ "clz %[r_b] , %[r_b] \n\t" +++ "sub %[r_b] , %[r_b] , #16 \n\t" +++ +++#if CONFIG_THUMB +++ "lsl %[tmp] , %[tmp] , %[r_b] \n\t" +++ "add %[low] , %[low] , %[tmp] \n\t" +++#else +++ "add %[low] , %[low] , %[tmp], lsl %[r_b] \n\t" +++#endif +++ +++ "cmp %[n] , %[i] \n\t" +++ "bne 1b \n\t" +++ "2: \n\t" +++ : [bit]"=&r"(bit), +++ [low]"+&r"(c->low), +++ [range]"+&r"(c->range), +++ [r_b]"=&r"(reg_b), +++ [bptr]"+&r"(c->bytestream), +++ [i]"=&r"(i), +++ [tmp]"=&r"(tmp), +++ [st]"=&r"(st), +++ [rv]"=&r"(rv) +++ : [state0]"r"(state0), +++ [n]"r"(n), +++ [mlps_tables]"r"(ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET + 128), +++ [byte]"M"(offsetof(CABACContext, bytestream)), +++ [lps_off]"I"((H264_MLPS_STATE_OFFSET + 128) - H264_LPS_RANGE_OFFSET) +++ : "memory", "cc" +++ ); +++ return rv; +++} +++ +++ +++// n must be > 0 on entry +++#define get_cabac_sig_coeff_flag_idxs get_cabac_sig_coeff_flag_idxs_arm +++static inline uint8_t * get_cabac_sig_coeff_flag_idxs_arm(CABACContext * const c, uint8_t * const state0, +++ unsigned int n, +++ const uint8_t const * ctx_map, +++ uint8_t * p) +++{ +++ unsigned int reg_b, tmp, st, bit; +++ __asm__ ( +++ "1: \n\t" +++// Get bin from map +++ "ldrb %[st] , [%[ctx_map], %[n]] \n\t" +++ +++// Load state & ranges +++ "sub %[r_b] , %[mlps_tables], %[lps_off] \n\t" +++ "ldrb %[bit] , [%[state0], %[st]] \n\t" +++ "and %[tmp] , %[range] , #0xC0 \n\t" +++ "add %[r_b] , %[r_b] , %[tmp], lsl #1 \n\t" +++ "ldrb %[tmp] , [%[r_b], %[bit]] \n\t" +++ "sub %[range] , %[range] , %[tmp] \n\t" +++ +++ "cmp %[low] , %[range], lsl #17 \n\t" +++ "ittt ge \n\t" +++ "subge %[low] , %[low] , %[range], lsl #17 \n\t" +++ "mvnge %[bit] , %[bit] \n\t" +++ "movge %[range] , %[tmp] \n\t" +++ +++ "ldrb %[r_b] , [%[mlps_tables], %[bit]] \n\t" +++ "tst %[bit] , #1 \n\t" +++// GCC asm seems to need strbne written differently for thumb and arm +++#if CONFIG_THUMB +++ "it ne \n\t" +++ "strbne %[n] , [%[idx]] , #1 \n\t" +++#else +++ "strneb %[n] , [%[idx]] , #1 \n\t" +++#endif +++ +++// Renorm +++ "clz %[tmp] , %[range] \n\t" +++ "sub %[tmp] , #23 \n\t" +++ "lsl %[low] , %[low] , %[tmp] \n\t" +++ "lsl %[range] , %[range] , %[tmp] \n\t" +++ +++ "strb %[r_b] , [%[state0], %[st]] \n\t" +++// There is a small speed gain from combining both conditions, using a single +++// branch and then working out what that meant later +++ "subs %[n] , %[n] , #1 \n\t" +++#if CONFIG_THUMB +++ "itt ne \n\t" +++ "lslsne %[tmp] , %[low] , #16 \n\t" +++ "bne 1b \n\t" +++#else +++ "lslnes %[tmp] , %[low] , #16 \n\t" +++ "bne 1b \n\t" +++#endif +++ +++// If we have bits left then n must be 0 so give up now +++ "lsls %[tmp] , %[low] , #16 \n\t" +++ "bne 2f \n\t" +++ +++// Do reload +++ "ldrh %[tmp] , [%[bptr]] , #2 \n\t" +++ "movw %[r_b] , #0xFFFF \n\t" +++ "rev %[tmp] , %[tmp] \n\t" +++ "rsb %[tmp] , %[r_b] , %[tmp], lsr #15 \n\t" +++ +++ "rbit %[r_b] , %[low] \n\t" +++ "clz %[r_b] , %[r_b] \n\t" +++ "sub %[r_b] , %[r_b] , #16 \n\t" +++ +++#if CONFIG_THUMB +++ "lsl %[tmp] , %[tmp] , %[r_b] \n\t" +++ "add %[low] , %[low] , %[tmp] \n\t" +++#else +++ "add %[low] , %[low] , %[tmp], lsl %[r_b] \n\t" +++#endif +++ +++// Check to see if we still have more to do +++ "cmp %[n] , #0 \n\t" +++ "bne 1b \n\t" +++ "2: \n\t" +++ : [bit]"=&r"(bit), +++ [low]"+&r"(c->low), +++ [range]"+&r"(c->range), +++ [r_b]"=&r"(reg_b), +++ [bptr]"+&r"(c->bytestream), +++ [idx]"+&r"(p), +++ [n]"+&r"(n), +++ [tmp]"=&r"(tmp), +++ [st]"=&r"(st) +++ : [state0]"r"(state0), +++ [ctx_map]"r"(ctx_map), +++ [mlps_tables]"r"(ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET + 128), +++ [byte]"M"(offsetof(CABACContext, bytestream)), +++ [lps_off]"I"((H264_MLPS_STATE_OFFSET + 128) - H264_LPS_RANGE_OFFSET) +++ : "memory", "cc" +++ ); +++ +++ return p; +++} +++ +++// --------------------------------------------------------------------------- +++// +++// CABAC_BY22 functions +++// +++// By and large these are (at best) no faster than their C equivalents - the +++// only one worth having is _peek where we do a slightly better job than the +++// compiler +++// +++// The others have been stashed here for reference in case larger scale asm +++// is attempted in which case they might be a useful base +++ +++ +++#define get_cabac_by22_peek get_cabac_by22_peek_arm +++static inline uint32_t get_cabac_by22_peek_arm(const CABACContext *const c) +++{ +++ uint32_t rv, tmp; +++ __asm__ ( +++ "bic %[rv] , %[low], #1 \n\t" +++ "cmp %[inv] , #0 \n\t" +++ "it ne \n\t" +++ "umullne %[tmp] , %[rv] , %[inv], %[rv] \n\t" +++ : // Outputs +++ [rv]"=&r"(rv), +++ [tmp]"=r"(tmp) +++ : // Inputs +++ [low]"r"(c->low), +++ [inv]"r"(c->range) +++ : // Clobbers +++ "cc" +++ ); +++ return rv << 1; +++} +++ +++#if 0 +++ +++// ***** Slower than the C :-( +++#define get_cabac_by22_flush get_cabac_by22_flush_arm +++static inline void get_cabac_by22_flush_arm(CABACContext *const c, const unsigned int n, const uint32_t val) +++{ +++ uint32_t m, tmp; +++ __asm__ ( +++ "add %[bits], %[bits], %[n] \n\t" +++ "ldr %[m], [%[ptr], %[bits], lsr #3] \n\t" +++ +++ "rsb %[tmp], %[n], #32 \n\t" +++ "lsr %[tmp], %[val], %[tmp] \n\t" +++ "mul %[tmp], %[range], %[tmp] \n\t" +++ +++ "rev %[m], %[m] \n\t" +++ +++ "lsl %[tmp], %[tmp], #23 \n\t" +++ "rsb %[low], %[tmp], %[low], lsl %[n] \n\t" +++ +++ "and %[tmp], %[bits], #7 \n\t" +++ "lsl %[m], %[m], %[tmp] \n\t" +++ +++ "orr %[low], %[low], %[m], lsr #9 \n\t" +++ : // Outputs +++ [m]"=&r"(m), +++ [tmp]"=&r"(tmp), +++ [bits]"+&r"(c->by22.bits), +++ [low]"+&r"(c->low) +++ : // Inputs +++ [n]"r"(n), +++ [val]"r"(val), +++ [inv]"r"(c->range), +++ [range]"r"(c->by22.range), +++ [ptr]"r"(c->bytestream) +++ : // Clobbers +++ ); +++} +++ +++ +++// Works but slower than C +++#define coeff_abs_level_remaining_decode_by22(c,r) coeff_abs_level_remaining_decode_by22_arm(c, r) +++static int coeff_abs_level_remaining_decode_by22_arm(CABACContext * const c, const unsigned int c_rice_param) +++{ +++ uint32_t n, val, tmp, level; +++ +++// PROFILE_START(); +++ +++ __asm__ ( +++ // Peek +++ "bic %[val], %[low], #1 \n\t" +++ "cmp %[inv], #0 \n\t" +++ "umullne %[tmp], %[val], %[inv], %[val] \n\t" +++ "lsl %[val], %[val], #1 \n\t" +++ +++ // Count bits (n = prefix) +++ "mvn %[n], %[val] \n\t" +++ "clz %[n], %[n] \n\t" +++ +++ "lsl %[level], %[val], %[n] \n\t" +++ "subs %[tmp], %[n], #3 \n\t" +++ "blo 2f \n\t" +++ +++ // prefix >= 3 +++ // < tmp = prefix - 3 +++ // > tmp = prefix + rice - 3 +++ "add %[tmp], %[tmp], %[rice] \n\t" +++ // > n = prefix * 2 + rice - 3 +++ "add %[n], %[tmp], %[n] \n\t" +++ "cmp %[n], #21 \n\t" +++ "bhi 3f \n\t" +++ +++ "orr %[level], %[level], #0x80000000 \n\t" +++ "rsb %[tmp], %[tmp], #31 \n\t" +++ "lsr %[level], %[level], %[tmp] \n\t" +++ +++ "mov %[tmp], #2 \n\t" +++ "add %[level], %[level], %[tmp], lsl %[rice] \n\t" +++ "b 1f \n\t" +++ +++ // > 22 bits used in total - need reload +++ "3: \n\t" +++ +++ // Stash prefix + rice - 3 in level (only spare reg) +++ "mov %[level], %[tmp] \n\t" +++ // Restore n to flush value (prefix) +++ "sub %[n], %[n], %[tmp] \n\t" +++ +++ // Flush + reload +++ +++// "rsb %[tmp], %[n], #32 \n\t" +++// "lsr %[tmp], %[val], %[tmp] \n\t" +++// "mul %[tmp], %[range], %[tmp] \n\t" +++ +++ // As it happens we know that all the bits we are flushing are 1 +++ // so we can cheat slightly +++ "rsb %[tmp], %[range], %[range], lsl %[n] \n\t" +++ "lsl %[tmp], %[tmp], #23 \n\t" +++ "rsb %[low], %[tmp], %[low], lsl %[n] \n\t" +++ +++ "add %[bits], %[bits], %[n] \n\t" +++ "ldr %[n], [%[ptr], %[bits], lsr #3] \n\t" +++ "rev %[n], %[n] \n\t" +++ "and %[tmp], %[bits], #7 \n\t" +++ "lsl %[n], %[n], %[tmp] \n\t" +++ +++ "orr %[low], %[low], %[n], lsr #9 \n\t" +++ +++ // (reload) +++ +++ "bic %[val], %[low], #1 \n\t" +++ "cmp %[inv], #0 \n\t" +++ "umullne %[tmp], %[val], %[inv], %[val] \n\t" +++ "lsl %[val], %[val], #1 \n\t" +++ +++ // Build value +++ +++ "mov %[n], %[level] \n\t" +++ +++ "orr %[tmp], %[val], #0x80000000 \n\t" +++ "rsb %[level], %[level], #31 \n\t" +++ "lsr %[level], %[tmp], %[level] \n\t" +++ +++ "mov %[tmp], #2 \n\t" +++ "add %[level], %[level], %[tmp], lsl %[rice] \n\t" +++ "b 1f \n\t" +++ +++ // prefix < 3 +++ "2: \n\t" +++ "rsb %[tmp], %[rice], #31 \n\t" +++ "lsr %[level], %[level], %[tmp] \n\t" +++ "orr %[level], %[level], %[n], lsl %[rice] \n\t" +++ "add %[n], %[n], %[rice] \n\t" +++ +++ "1: \n\t" +++ // Flush +++ "add %[n], %[n], #1 \n\t" +++ +++ "rsb %[tmp], %[n], #32 \n\t" +++ "lsr %[tmp], %[val], %[tmp] \n\t" +++ +++ "add %[bits], %[bits], %[n] \n\t" +++ "ldr %[val], [%[ptr], %[bits], lsr #3] \n\t" +++ +++ "mul %[tmp], %[range], %[tmp] \n\t" +++ "lsl %[tmp], %[tmp], #23 \n\t" +++ "rsb %[low], %[tmp], %[low], lsl %[n] \n\t" +++ +++ "rev %[val], %[val] \n\t" +++ "and %[tmp], %[bits], #7 \n\t" +++ "lsl %[val], %[val], %[tmp] \n\t" +++ +++ "orr %[low], %[low], %[val], lsr #9 \n\t" +++ : // Outputs +++ [level]"=&r"(level), +++ [n]"=&r"(n), +++ [val]"=&r"(val), +++ [tmp]"=&r"(tmp), +++ [bits]"+&r"(c->by22.bits), +++ [low]"+&r"(c->low) +++ : // Inputs +++ [rice]"r"(c_rice_param), +++ [inv]"r"(c->range), +++ [range]"r"(c->by22.range), +++ [ptr]"r"(c->bytestream) +++ : // Clobbers +++ "cc" +++ ); +++ +++// PROFILE_ACC(residual_abs); +++ +++ return level; +++} +++#endif +++ +++#endif /* HAVE_ARMV6T2_INLINE */ +++ +++#endif /* AVCODEC_ARM_HEVC_CABAC_H */ ++diff --git a/libavcodec/arm/hevcdsp_deblock_neon.S b/libavcodec/arm/hevcdsp_deblock_neon.S ++index bad4589..a088cc3 100644 ++--- a/libavcodec/arm/hevcdsp_deblock_neon.S +++++ b/libavcodec/arm/hevcdsp_deblock_neon.S ++@@ -409,10 +409,12 @@ function ff_hevc_deblocking_boundary_strengths_neon, export=1 ++ beq 90f ++ ++ tst a3, #1 +++ itee ne ++ ldrne a3, [v5, #0] @ curr->mv[0] ++ ldreq a3, [v5, #4] @ curr->mv[1] ++ moveq v1, v2 ++ tst v8, #1 +++ itee ne ++ ldrne v8, [v6, #0] @ neigh->mv[0] ++ ldreq v8, [v6, #4] @ neigh->mv[1] ++ moveq v3, v4 ++@@ -424,9 +426,14 @@ function ff_hevc_deblocking_boundary_strengths_neon, export=1 ++ sel a3, a3, ip ++ ands a3, a3, lr ++ @ drop through ++-10: movne a3, #1 +++10: it ne +++ movne a3, #1 ++ 11: subs a2, a2, #1 ++-12: strbhs a3, [v7], a4 +++12: +++A strbhs a3, [v7], a4 +++T itt hs +++T strbhs a3, [v7] +++T addhs v7, v7, a4 ++ subs a2, a2, #1 ++ bhs 12b ++ ++@@ -442,6 +449,7 @@ function ff_hevc_deblocking_boundary_strengths_neon, export=1 ++ bne 10b ++ ++ teq v1, v3 +++ it eq ++ teqeq v2, v4 ++ bne 40f ++ teq v1, v2 ++@@ -487,6 +495,7 @@ function ff_hevc_deblocking_boundary_strengths_neon, export=1 ++ b 10b ++ ++ 40: teq v1, v4 +++ ite eq ++ teqeq v2, v3 ++ bne 10b ++ ++diff --git a/libavcodec/arm/hevcdsp_epel_neon.S b/libavcodec/arm/hevcdsp_epel_neon.S ++index 516ae5b..00eab9e 100644 ++--- a/libavcodec/arm/hevcdsp_epel_neon.S +++++ b/libavcodec/arm/hevcdsp_epel_neon.S ++@@ -110,7 +110,9 @@ function ff_hevc_put_epel_h_neon_8, export=1 ++ sub r7, #1 ++ lsl r7, #2 ++ vpush {d8-d15} ++- adrl r12, epel_coeffs +++@ adr reaches if we are in thumb mode but not in arm +++T adr r12, epel_coeffs +++A adrl r12, epel_coeffs ++ add r7, r12 ++ sub r1, #1 ++ lsl r4, #1 ++@@ -170,7 +172,8 @@ function ff_hevc_put_epel_v_neon_8, export=1 ++ sub r7, #1 ++ lsl r7, #2 ++ vpush {d8-d15} ++- adrl r12, epel_coeffs +++T adr r12, epel_coeffs +++A adrl r12, epel_coeffs ++ add r7, r12 ++ load_coeffs_16b r7 ++ sub r1, r2 ++@@ -246,7 +249,7 @@ function ff_hevc_put_epel_hv_neon_8, export=1 ++ sub r7, #1 ++ lsl r7, #2 ++ vpush {d8-d15} ++- adrl r12, epel_coeffs +++ adr r12, epel_coeffs ++ sub r6, #1 ++ lsl r6, #2 ++ add r6, r12 // mx epel coeff offset ++diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h ++index 1bf1c62..ccfa991 100644 ++--- a/libavcodec/cabac.h +++++ b/libavcodec/cabac.h ++@@ -43,7 +43,14 @@ extern const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63]; ++ typedef struct CABACContext{ ++ int low; ++ int range; ++- int outstanding_count; +++ union +++ { +++ int outstanding_count; +++ struct { +++ uint16_t bits; +++ uint16_t range; +++ } by22; +++ }; ++ const uint8_t *bytestream_start; ++ const uint8_t *bytestream; ++ const uint8_t *bytestream_end; ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index 8656917..4caf720 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -21,14 +21,72 @@ ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ +++#define UNCHECKED_BITSTREAM_READER 1 +++ ++ #include "libavutil/attributes.h" ++ #include "libavutil/common.h" ++ ++-#include "cabac_functions.h" ++ #include "hevc.h" +++#include "cabac_functions.h" +++ +++// BY22 is probably faster than simple bypass if the processor has +++// either a fast 32-bit divide or a fast 32x32->64[63:32] instruction +++// x86 has fast int divide +++// Arm doesn't have divide or general fast 64 bit, but does have the multiply +++// * Beware: ARCH_xxx isn't set if configure --disable-asm is used +++#define USE_BY22 (HAVE_FAST_64BIT || ARCH_ARM || ARCH_X86) +++// Use native divide if we have a fast one - otherwise use mpy 1/x +++// x86 has a fast integer divide - arm doesn't - unsure about other +++// architectures +++#define USE_BY22_DIV ARCH_X86 +++ +++// Special case blocks with a single significant ceoff +++// Decreases the complexity of the code for a common case but increases the +++// code size. +++#define USE_N_END_1 1 +++ +++#if ARCH_ARM +++#include "arm/hevc_cabac.h" +++#endif ++ ++ #define CABAC_MAX_BIN 31 ++ +++ +++#if USE_BY22 && !USE_BY22_DIV +++#define I(x) (uint32_t)((0x10000000000ULL / (uint64_t)(x)) + 1ULL) +++ +++static const uint32_t cabac_by22_inv_range[256] = { +++ 0, I(257), I(258), I(259), +++ I(260), I(261), I(262), I(263), I(264), I(265), I(266), I(267), I(268), I(269), +++ I(270), I(271), I(272), I(273), I(274), I(275), I(276), I(277), I(278), I(279), +++ I(280), I(281), I(282), I(283), I(284), I(285), I(286), I(287), I(288), I(289), +++ I(290), I(291), I(292), I(293), I(294), I(295), I(296), I(297), I(298), I(299), +++ I(300), I(301), I(302), I(303), I(304), I(305), I(306), I(307), I(308), I(309), +++ I(310), I(311), I(312), I(313), I(314), I(315), I(316), I(317), I(318), I(319), +++ I(320), I(321), I(322), I(323), I(324), I(325), I(326), I(327), I(328), I(329), +++ I(330), I(331), I(332), I(333), I(334), I(335), I(336), I(337), I(338), I(339), +++ I(340), I(341), I(342), I(343), I(344), I(345), I(346), I(347), I(348), I(349), +++ I(350), I(351), I(352), I(353), I(354), I(355), I(356), I(357), I(358), I(359), +++ I(360), I(361), I(362), I(363), I(364), I(365), I(366), I(367), I(368), I(369), +++ I(370), I(371), I(372), I(373), I(374), I(375), I(376), I(377), I(378), I(379), +++ I(380), I(381), I(382), I(383), I(384), I(385), I(386), I(387), I(388), I(389), +++ I(390), I(391), I(392), I(393), I(394), I(395), I(396), I(397), I(398), I(399), +++ I(400), I(401), I(402), I(403), I(404), I(405), I(406), I(407), I(408), I(409), +++ I(410), I(411), I(412), I(413), I(414), I(415), I(416), I(417), I(418), I(419), +++ I(420), I(421), I(422), I(423), I(424), I(425), I(426), I(427), I(428), I(429), +++ I(430), I(431), I(432), I(433), I(434), I(435), I(436), I(437), I(438), I(439), +++ I(440), I(441), I(442), I(443), I(444), I(445), I(446), I(447), I(448), I(449), +++ I(450), I(451), I(452), I(453), I(454), I(455), I(456), I(457), I(458), I(459), +++ I(460), I(461), I(462), I(463), I(464), I(465), I(466), I(467), I(468), I(469), +++ I(470), I(471), I(472), I(473), I(474), I(475), I(476), I(477), I(478), I(479), +++ I(480), I(481), I(482), I(483), I(484), I(485), I(486), I(487), I(488), I(489), +++ I(490), I(491), I(492), I(493), I(494), I(495), I(496), I(497), I(498), I(499), +++ I(500), I(501), I(502), I(503), I(504), I(505), I(506), I(507), I(508), I(509), +++ I(510), I(511) +++}; +++#undef I +++#endif // USE_BY22 +++ ++ /** ++ * number of bin by SyntaxElement. ++ */ ++@@ -445,6 +503,211 @@ static const uint8_t diag_scan8x8_inv[8][8] = { ++ { 28, 36, 43, 49, 54, 58, 61, 63, }, ++ }; ++ +++ +++typedef struct +++{ +++ uint16_t coeff; +++ uint16_t scale; +++} xy_off_t; +++ +++#define XYT_C(x,y,t) ((x) + ((y) << (t))) +++#define SCALE_TRAFO(t) ((t) > 3 ? 3 : (t)) +++#define SCALE_SHR(t) ((t) - SCALE_TRAFO(t)) +++#define XYT_S(x,y,t) (((x) >> SCALE_SHR(t)) + (((y) >> SCALE_SHR(t)) << SCALE_TRAFO(t))) +++ +++#define XYT(x,y,t) {XYT_C(x,y,t), XYT_S(x,y,t)} +++ +++#define OFF_DIAG(t) {\ +++ XYT(0,0,t), XYT(0,1,t), XYT(1,0,t), XYT(0,2,t),\ +++ XYT(1,1,t), XYT(2,0,t), XYT(0,3,t), XYT(1,2,t),\ +++ XYT(2,1,t), XYT(3,0,t), XYT(1,3,t), XYT(2,2,t),\ +++ XYT(3,1,t), XYT(2,3,t), XYT(3,2,t), XYT(3,3,t)\ +++} +++ +++#define OFF_HORIZ(t) {\ +++ XYT(0,0,t), XYT(1,0,t), XYT(2,0,t), XYT(3,0,t),\ +++ XYT(0,1,t), XYT(1,1,t), XYT(2,1,t), XYT(3,1,t),\ +++ XYT(0,2,t), XYT(1,2,t), XYT(2,2,t), XYT(3,2,t),\ +++ XYT(0,3,t), XYT(1,3,t), XYT(2,3,t), XYT(3,3,t)\ +++} +++ +++#define OFF_VERT(t) {\ +++ XYT(0,0,t), XYT(0,1,t), XYT(0,2,t), XYT(0,3,t),\ +++ XYT(1,0,t), XYT(1,1,t), XYT(1,2,t), XYT(1,3,t),\ +++ XYT(2,0,t), XYT(2,1,t), XYT(2,2,t), XYT(2,3,t),\ +++ XYT(3,0,t), XYT(3,1,t), XYT(3,2,t), XYT(3,3,t)\ +++} +++ +++static const xy_off_t off_xys[3][4][16] = +++{ +++ {OFF_DIAG(2), OFF_DIAG(3), OFF_DIAG(4), OFF_DIAG(5)}, +++ {OFF_HORIZ(2), OFF_HORIZ(3), OFF_HORIZ(4), OFF_HORIZ(5)}, +++ {OFF_VERT(2), OFF_VERT(3), OFF_VERT(4), OFF_VERT(5)} +++}; +++ +++ +++// Helper fns +++#ifndef hevc_mem_bits32 +++static av_always_inline uint32_t hevc_mem_bits32(const void * buf, const unsigned int offset) +++{ +++ return AV_RB32((const uint8_t *)buf + (offset >> 3)) << (offset & 7); +++} +++#endif +++ +++#if AV_GCC_VERSION_AT_LEAST(3,4) && !defined(hevc_clz32) +++#define hevc_clz32 hevc_clz32_builtin +++static av_always_inline unsigned int hevc_clz32_builtin(const uint32_t x) +++{ +++ // __builtin_clz says it works on ints - so adjust if int is >32 bits long +++ return __builtin_clz(x) - (sizeof(int) * 8 - 32); +++} +++#endif +++ +++// It is unlikely that we will ever need this but include for completeness +++#ifndef hevc_clz32 +++static inline unsigned int hevc_clz32(unsigned int x) +++{ +++ unsigned int n = 1; +++ if ((x & 0xffff0000) == 0) { +++ n += 16; +++ x <<= 16; +++ } +++ if ((x & 0xff000000) == 0) { +++ n += 8; +++ x <<= 8; +++ } +++ if ((x & 0xf0000000) == 0) { +++ n += 4; +++ x <<= 4; +++ } +++ if ((x & 0xc0000000) == 0) { +++ n += 2; +++ x <<= 2; +++ } +++ return n - ((x >> 31) & 1); +++} +++#endif +++ +++ +++#if !USE_BY22 +++// If no by22 then _by22 functions will revert to normal and so _peek/_flush +++// will no longer be called but the setup calls will still exist and we want +++// to null them out +++#define bypass_start(s) +++#define bypass_finish(s) +++#else +++// Use BY22 for residual bypass block +++ +++#define bypass_start(s) get_cabac_by22_start(&s->HEVClc->cc) +++#define bypass_finish(s) get_cabac_by22_finish(&s->HEVClc->cc) +++ +++// BY22 notes that bypass is simply a divide into the bitstream and so we +++// can peek out large quantities of bits at one and treat the result as if +++// it was VLC. In many cases this will lead to O(1) processing rather than +++// O(n) though the setup and teardown is sufficiently expensive that it is +++// only worth using if we expect to be dealing with more than a few bits +++// The definition of "a few bits" will vary from platform to platform but +++// tests on ARM show that it probably isn't worth it for a single coded +++// residual, but is for >1 - this is probaly reinforced that if there are +++// more residuals then they are likely to be bigger and this will make the +++// O(1) nature of the code more worthwhile. +++ +++ +++#if !USE_BY22_DIV +++// * 1/x @ 32 bits gets us 22 bits of accuracy +++#define CABAC_BY22_PEEK_BITS 22 +++#else +++// A real 32-bit divide gets us another bit +++// If we have a 64 bit int & a unit time divider then we should get a lot +++// of bits (55) but that is untested and it is unclear if it would give +++// us a large advantage +++#define CABAC_BY22_PEEK_BITS 23 +++#endif +++ +++// Bypass block start +++// Must be called before _by22_peek is used as it sets the CABAC environment +++// into the correct state. _by22_finish must be called to return to 'normal' +++// (i.e. non-bypass) cabac decoding +++static inline void get_cabac_by22_start(CABACContext * const c) +++{ +++ const unsigned int bits = __builtin_ctz(c->low); +++ const uint32_t m = hevc_mem_bits32(c->bytestream, 0); +++ uint32_t x = (c->low << (22 - CABAC_BITS)) ^ ((m ^ 0x80000000U) >> (9 + CABAC_BITS - bits)); +++#if !USE_BY22_DIV +++ const uint32_t inv = cabac_by22_inv_range[c->range & 0xff]; +++#endif +++ +++ c->bytestream -= (CABAC_BITS / 8); +++ c->by22.bits = bits; +++#if !USE_BY22_DIV +++ c->by22.range = c->range; +++ c->range = inv; +++#endif +++ c->low = x; +++} +++ +++// Bypass block finish +++// Must be called at the end of the bypass block to return to normal operation +++static inline void get_cabac_by22_finish(CABACContext * const c) +++{ +++ unsigned int used = c->by22.bits; +++ unsigned int bytes_used = (used / CABAC_BITS) * (CABAC_BITS / 8); +++ unsigned int bits_used = used & (CABAC_BITS == 16 ? 15 : 7); +++ +++ c->bytestream += bytes_used + (CABAC_BITS / 8); +++ c->low = (((uint32_t)c->low >> (22 - CABAC_BITS + bits_used)) | 1) << bits_used; +++#if !USE_BY22_DIV +++ c->range = c->by22.range; +++#endif +++} +++ +++// Peek bypass bits +++// _by22_start must be called before _by22_peek is called and _by22_flush +++// must be called afterwards to flush any used bits +++// The actual number of valid bits returned is +++// min(, CABAC_BY22_PEEK_BITS). CABAC_BY22_PEEK_BITS +++// will be at least 22 which should be long enough for any prefix or suffix +++// though probably not long enough for the worst case combination +++#ifndef get_cabac_by22_peek +++static inline uint32_t get_cabac_by22_peek(const CABACContext * const c) +++{ +++#if USE_BY22_DIV +++ return ((unsigned int)c->low / (unsigned int)c->range) << 9; +++#else +++ uint32_t x = c->low & ~1U; +++ const uint32_t inv = c->range; +++ +++ if (inv != 0) +++ x = (uint32_t)(((uint64_t)x * (uint64_t)inv) >> 32); +++ +++ return x << 1; +++#endif +++} +++#endif +++ +++// Flush bypass bits peeked by _by22_peek +++// Flush n bypass bits. n must be >= 1 to guarantee correct operation +++// val is an unmodified copy of whatever _by22_peek returned +++#ifndef get_cabac_by22_flush +++static inline void get_cabac_by22_flush(CABACContext * c, const unsigned int n, const uint32_t val) +++{ +++ // Subtract the bits used & reshift up to the top of the word +++#if USE_BY22_DIV +++ const uint32_t low = (((unsigned int)c->low << n) - (((val >> (32 - n)) * (unsigned int)c->range) << 23)); +++#else +++ const uint32_t low = (((uint32_t)c->low << n) - (((val >> (32 - n)) * c->by22.range) << 23)); +++#endif +++ +++ // and refill lower bits +++ // We will probably OR over some existing bits but that doesn't matter +++ c->by22.bits += n; +++ c->low = low | (hevc_mem_bits32(c->bytestream, c->by22.bits) >> 9); +++} +++#endif +++ +++#endif // USE_BY22 +++ +++ ++ void ff_hevc_save_states(HEVCContext *s, int ctb_addr_ts) ++ { ++ if (s->ps.pps->entropy_coding_sync_enabled_flag && ++@@ -863,19 +1126,19 @@ int ff_hevc_cbf_luma_decode(HEVCContext *s, int trafo_depth) ++ return GET_CABAC(elem_offset[CBF_LUMA] + !trafo_depth); ++ } ++ ++-static int hevc_transform_skip_flag_decode(HEVCContext *s, int c_idx) +++static int hevc_transform_skip_flag_decode(HEVCContext *s, int c_idx_nz) ++ { ++- return GET_CABAC(elem_offset[TRANSFORM_SKIP_FLAG] + !!c_idx); +++ return GET_CABAC(elem_offset[TRANSFORM_SKIP_FLAG] + c_idx_nz); ++ } ++ ++-static int explicit_rdpcm_flag_decode(HEVCContext *s, int c_idx) +++static int explicit_rdpcm_flag_decode(HEVCContext *s, int c_idx_nz) ++ { ++- return GET_CABAC(elem_offset[EXPLICIT_RDPCM_FLAG] + !!c_idx); +++ return GET_CABAC(elem_offset[EXPLICIT_RDPCM_FLAG] + c_idx_nz); ++ } ++ ++-static int explicit_rdpcm_dir_flag_decode(HEVCContext *s, int c_idx) +++static int explicit_rdpcm_dir_flag_decode(HEVCContext *s, int c_idx_nz) ++ { ++- return GET_CABAC(elem_offset[EXPLICIT_RDPCM_DIR_FLAG] + !!c_idx); +++ return GET_CABAC(elem_offset[EXPLICIT_RDPCM_DIR_FLAG] + c_idx_nz); ++ } ++ ++ int ff_hevc_log2_res_scale_abs(HEVCContext *s, int idx) { ++@@ -891,14 +1154,14 @@ int ff_hevc_res_scale_sign_flag(HEVCContext *s, int idx) { ++ return GET_CABAC(elem_offset[RES_SCALE_SIGN_FLAG] + idx); ++ } ++ ++-static av_always_inline void last_significant_coeff_xy_prefix_decode(HEVCContext *s, int c_idx, +++static av_always_inline void last_significant_coeff_xy_prefix_decode(HEVCContext *s, int c_idx_nz, ++ int log2_size, int *last_scx_prefix, int *last_scy_prefix) ++ { ++ int i = 0; ++ int max = (log2_size << 1) - 1; ++ int ctx_offset, ctx_shift; ++ ++- if (!c_idx) { +++ if (!c_idx_nz) { ++ ctx_offset = 3 * (log2_size - 2) + ((log2_size - 1) >> 2); ++ ctx_shift = (log2_size + 1) >> 2; ++ } else { ++@@ -929,22 +1192,16 @@ static av_always_inline int last_significant_coeff_suffix_decode(HEVCContext *s, ++ return value; ++ } ++ ++-static av_always_inline int significant_coeff_group_flag_decode(HEVCContext *s, int c_idx, int ctx_cg) +++static av_always_inline int significant_coeff_group_flag_decode(HEVCContext *s, int c_idx_nz, int ctx_cg) ++ { ++ int inc; ++ ++- inc = FFMIN(ctx_cg, 1) + (c_idx>0 ? 2 : 0); +++ inc = (ctx_cg != 0) + (c_idx_nz << 1); ++ ++ return GET_CABAC(elem_offset[SIGNIFICANT_COEFF_GROUP_FLAG] + inc); ++ } ++-static av_always_inline int significant_coeff_flag_decode(HEVCContext *s, int x_c, int y_c, ++- int offset, const uint8_t *ctx_idx_map) ++-{ ++- int inc = ctx_idx_map[(y_c << 2) + x_c] + offset; ++- return GET_CABAC(elem_offset[SIGNIFICANT_COEFF_FLAG] + inc); ++-} ++ ++-static av_always_inline int significant_coeff_flag_decode_0(HEVCContext *s, int c_idx, int offset) +++static av_always_inline int significant_coeff_flag_decode_0(HEVCContext *s, int offset) ++ { ++ return GET_CABAC(elem_offset[SIGNIFICANT_COEFF_FLAG] + offset); ++ } ++@@ -966,65 +1223,305 @@ static av_always_inline int coeff_abs_level_greater2_flag_decode(HEVCContext *s, ++ return GET_CABAC(elem_offset[COEFF_ABS_LEVEL_GREATER2_FLAG] + inc); ++ } ++ ++-static av_always_inline int coeff_abs_level_remaining_decode(HEVCContext *s, int rc_rice_param) +++ +++#if !USE_BY22 +++#define coeff_abs_level_remaining_decode_bypass(s,r) coeff_abs_level_remaining_decode(s, r) +++#endif +++ +++ +++#ifndef coeff_abs_level_remaining_decode_bypass +++static int coeff_abs_level_remaining_decode_bypass(HEVCContext * const s, const unsigned int rice_param) +++{ +++ CABACContext * const c = &s->HEVClc->cc; +++ uint32_t y; +++ unsigned int prefix; +++ unsigned int last_coeff_abs_level_remaining; +++ unsigned int n; +++ +++ y = get_cabac_by22_peek(c); +++ prefix = hevc_clz32(~y); +++ // y << prefix will always have top bit 0 +++ +++ if (prefix < 3) { +++ const unsigned int suffix = (y << prefix) >> (31 - rice_param); +++ last_coeff_abs_level_remaining = (prefix << rice_param) + suffix; +++ n = prefix + 1 + rice_param; +++ } +++ else if (prefix * 2 + rice_param <= CABAC_BY22_PEEK_BITS + 2) +++ { +++ const uint32_t suffix = ((y << prefix) | 0x80000000) >> (34 - (prefix + rice_param)); +++ +++ last_coeff_abs_level_remaining = (2 << rice_param) + suffix; +++ n = prefix * 2 + rice_param - 2; +++ } +++ else { +++ unsigned int suffix; +++ +++ get_cabac_by22_flush(c, prefix, y); +++ y = get_cabac_by22_peek(c); +++ +++ suffix = (y | 0x80000000) >> (34 - (prefix + rice_param)); +++ last_coeff_abs_level_remaining = (2 << rice_param) + suffix; +++ n = prefix + rice_param - 2; +++ } +++ +++ get_cabac_by22_flush(c, n, y); +++ +++ return last_coeff_abs_level_remaining; +++} +++#endif +++ +++static int coeff_abs_level_remaining_decode(HEVCContext * const s, int rc_rice_param) ++ { +++ CABACContext * const c = &s->HEVClc->cc; ++ int prefix = 0; ++ int suffix = 0; ++ int last_coeff_abs_level_remaining; ++ int i; ++ ++- while (prefix < CABAC_MAX_BIN && get_cabac_bypass(&s->HEVClc->cc)) +++ while (prefix < CABAC_MAX_BIN && get_cabac_bypass(c)) ++ prefix++; ++ if (prefix == CABAC_MAX_BIN) { ++ av_log(s->avctx, AV_LOG_ERROR, "CABAC_MAX_BIN : %d\n", prefix); ++ return 0; ++ } +++ ++ if (prefix < 3) { ++ for (i = 0; i < rc_rice_param; i++) ++- suffix = (suffix << 1) | get_cabac_bypass(&s->HEVClc->cc); +++ suffix = (suffix << 1) | get_cabac_bypass(c); ++ last_coeff_abs_level_remaining = (prefix << rc_rice_param) + suffix; ++ } else { ++ int prefix_minus3 = prefix - 3; ++ for (i = 0; i < prefix_minus3 + rc_rice_param; i++) ++- suffix = (suffix << 1) | get_cabac_bypass(&s->HEVClc->cc); +++ suffix = (suffix << 1) | get_cabac_bypass(c); ++ last_coeff_abs_level_remaining = (((1 << prefix_minus3) + 3 - 1) ++ << rc_rice_param) + suffix; ++ } +++ ++ return last_coeff_abs_level_remaining; ++ } ++ ++-static av_always_inline int coeff_sign_flag_decode(HEVCContext *s, uint8_t nb) +++#if !USE_BY22 +++#define coeff_sign_flag_decode_bypass coeff_sign_flag_decode +++static inline uint32_t coeff_sign_flag_decode(HEVCContext * const s, const unsigned int nb) ++ { ++- int i; ++- int ret = 0; +++ CABACContext * const c = &s->HEVClc->cc; +++ unsigned int i; +++ uint32_t ret = 0; ++ ++ for (i = 0; i < nb; i++) ++- ret = (ret << 1) | get_cabac_bypass(&s->HEVClc->cc); ++- return ret; +++ ret = (ret << 1) | get_cabac_bypass(c); +++ +++ return ret << (32 - nb); ++ } +++#endif +++ +++#ifndef coeff_sign_flag_decode_bypass +++static inline uint32_t coeff_sign_flag_decode_bypass(HEVCContext * const s, const unsigned int nb) +++{ +++ CABACContext * const c = &s->HEVClc->cc; +++ uint32_t y; +++ y = get_cabac_by22_peek(c); +++ get_cabac_by22_flush(c, nb, y); +++ return y & ~(0xffffffffU >> nb); +++} +++#endif +++ +++ +++#ifndef get_cabac_greater1_bits +++static inline unsigned int get_cabac_greater1_bits(CABACContext * const c, const unsigned int n, +++ uint8_t * const state0) +++{ +++ unsigned int i; +++ unsigned int rv = 0; +++ for (i = 0; i != n; ++i) { +++ const unsigned int idx = rv != 0 ? 0 : i < 3 ? i + 1 : 3; +++ const unsigned int b = get_cabac(c, state0 + idx); +++ rv = (rv << 1) | b; +++ } +++ return rv; +++} +++#endif +++ +++ +++// N.B. levels returned are the values assuming coeff_abs_level_remaining +++// is uncoded, so 1 must be added if it is coded. sum_abs also reflects +++// this version of events. +++static inline uint32_t get_greaterx_bits(HEVCContext * const s, const unsigned int n_end, int * const levels, +++ int * const pprev_subset_coded, int * const psum, +++ const unsigned int idx0_gt1, const unsigned int idx_gt2) +++{ +++ CABACContext * const c = &s->HEVClc->cc; +++ uint8_t * const state0 = s->HEVClc->cabac_state + idx0_gt1; +++ uint8_t * const state_gt2 = s->HEVClc->cabac_state + idx_gt2; +++ unsigned int rv; +++ unsigned int i; +++ const unsigned int n = FFMIN(n_end, 8); +++ +++ // Really this is i != n but the simple unconditional loop is cheaper +++ // and faster +++ for (i = 0; i != 8; ++i) +++ levels[i] = 1; +++ +++ rv = get_cabac_greater1_bits(c, n, state0); +++ +++ *pprev_subset_coded = 0; +++ *psum = n; +++ +++ rv <<= (32 - n); +++ if (rv != 0) +++ { +++ *pprev_subset_coded = 1; +++ *psum = n + 1; +++ i = hevc_clz32(rv); +++ levels[i] = 2; +++ if (get_cabac(c, state_gt2) == 0) +++ { +++ // Unset first coded bit +++ rv &= ~(0x80000000U >> i); +++ } +++ } +++ +++ if (n_end > 8) { +++ const unsigned int g8 = n_end - 8; +++ rv |= ((1 << g8) - 1) << (24 - g8); +++ for (i = 0; i != g8; ++i) { +++ levels[i + 8] = 0; +++ } +++ } +++ +++ return rv; +++} +++ +++// extended_precision_processing_flag must be false given we are +++// putting the result into a 16-bit array +++// So trans_coeff_level must fit in 16 bits too (7.4.9.1 definition of coeff_abs_level_remaining) +++// scale_m is uint8_t +++// +++// scale is [40 - 72] << [0..12] based on qp- worst case is (45 << 12) +++// or it can be 2 (if we have transquant_bypass) +++// shift is set to one less than we really want but would normally be +++// s->ps.sps->bit_depth (max 16, min 8) + log2_trafo_size (max 5, min 2?) - 5 = max 16 min 5? +++// however the scale shift is substracted from shift to a min 0 so scale_m worst = 45 << 6 +++// This can still theoretically lead to overflow but the coding would have to be very odd (& inefficient) +++// to achieve it +++ +++#ifndef trans_scale_sat +++static inline int trans_scale_sat(const int level, const unsigned int scale, const unsigned int scale_m, const unsigned int shift) +++{ +++ return av_clip_int16((((level * (int)(scale * scale_m)) >> shift) + 1) >> 1); +++} +++#endif +++ +++ +++#ifndef update_rice +++static inline void update_rice(uint8_t * const stat_coeff, +++ const unsigned int last_coeff_abs_level_remaining, +++ const unsigned int c_rice_param) +++{ +++ const unsigned int x = (last_coeff_abs_level_remaining << 1) >> c_rice_param; +++ if (x >= 6) +++ (*stat_coeff)++; +++ else if (x == 0 && *stat_coeff > 0) +++ (*stat_coeff)--; +++} +++#endif +++ +++ +++// n must be > 0 on entry +++#ifndef get_cabac_sig_coeff_flag_idxs +++static inline uint8_t * get_cabac_sig_coeff_flag_idxs(CABACContext * const c, uint8_t * const state0, +++ unsigned int n, +++ const uint8_t const * ctx_map, +++ uint8_t * p) +++{ +++ do { +++ if (get_cabac(c, state0 + ctx_map[n])) +++ *p++ = n; +++ } while (--n != 0); +++ return p; +++} +++#endif +++ +++ +++static int get_sig_coeff_flag_idxs(CABACContext * const c, uint8_t * const state0, +++ unsigned int n, +++ const uint8_t const * ctx_map, +++ uint8_t * const flag_idx) +++{ +++ int rv; +++ +++ rv = get_cabac_sig_coeff_flag_idxs(c, state0, n, ctx_map, flag_idx) - flag_idx; +++ +++ return rv; +++} +++ +++#define H4x4(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) {\ +++ x0, x1, x2, x3,\ +++ x4, x5, x6, x7,\ +++ x8, x9, x10, x11,\ +++ x12, x13, x14, x15} +++ +++#define V4x4(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) {\ +++ x0, x4, x8, x12,\ +++ x1, x5, x9, x13,\ +++ x2, x6, x10, x14,\ +++ x3, x7, x11, x15} +++ +++#define D4x4(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) {\ +++ x0, x4, x1, x8,\ +++ x5, x2, x12, x9,\ +++ x6, x3, x13, x10,\ +++ x7, x14, x11, x15} +++ +++ +++static inline int next_subset(HEVCContext * const s, int i, const int c_idx_nz, +++ uint8_t * const significant_coeff_group_flag, +++ const uint8_t * const scan_x_cg, const uint8_t * const scan_y_cg, +++ int * const pPrev_sig) +++{ +++ while (--i >= 0) { +++ unsigned int x_cg = scan_x_cg[i]; +++ unsigned int y_cg = scan_y_cg[i]; +++ +++ // For the flag decode we only care about Z/NZ but +++ // we use the full Right + Down * 2 when calculating +++ // significant coeff flags so we obtain it here +++ //. +++ // The group flag array is one longer than it needs to +++ // be so we don't need to check for y_cg limits +++ unsigned int prev_sig = ((significant_coeff_group_flag[y_cg] >> (x_cg + 1)) & 1) | +++ (((significant_coeff_group_flag[y_cg + 1] >> x_cg) & 1) << 1); +++ +++ if (i == 0 || +++ significant_coeff_group_flag_decode(s, c_idx_nz, prev_sig)) +++ { +++ significant_coeff_group_flag[y_cg] |= (1 << x_cg); +++ *pPrev_sig = prev_sig; +++ break; +++ } +++ } +++ +++ return i; +++} +++ ++ ++ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ int log2_trafo_size, enum ScanType scan_idx, ++ int c_idx) ++ { ++-#define GET_COORD(offset, n) \ ++- do { \ ++- x_c = (x_cg << 2) + scan_x_off[n]; \ ++- y_c = (y_cg << 2) + scan_y_off[n]; \ ++- } while (0) ++- HEVCLocalContext *lc = s->HEVClc; ++- int transform_skip_flag = 0; +++ HEVCLocalContext * const lc = s->HEVClc; +++ int trans_skip_or_bypass = lc->cu.cu_transquant_bypass_flag; ++ ++ int last_significant_coeff_x, last_significant_coeff_y; ++- int last_scan_pos; ++- int n_end; ++ int num_coeff = 0; ++- int greater1_ctx = 1; +++ int prev_subset_coded = 0; ++ ++ int num_last_subset; ++ int x_cg_last_sig, y_cg_last_sig; ++ ++- const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off; +++ const uint8_t *scan_x_cg, *scan_y_cg; +++ const xy_off_t * scan_xy_off; ++ ++ ptrdiff_t stride = s->frame->linesize[c_idx]; ++ int hshift = s->ps.sps->hshift[c_idx]; ++@@ -1032,21 +1529,28 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ uint8_t *dst = &s->frame->data[c_idx][(y0 >> vshift) * stride + ++ ((x0 >> hshift) << s->ps.sps->pixel_shift)]; ++ #ifdef RPI ++- int use_vpu = s->enable_rpi && !lc->cu.cu_transquant_bypass_flag && !transform_skip_flag && !lc->tu.cross_pf && log2_trafo_size>=4; +++ //***** transform_skip_flag decoded later! +++ int use_vpu = s->enable_rpi && !lc->cu.cu_transquant_bypass_flag /* && !transform_skip_flag*/ && !lc->tu.cross_pf && log2_trafo_size>=4; ++ #endif ++ int16_t *coeffs = (int16_t*)(c_idx ? lc->edge_emu_buffer2 : lc->edge_emu_buffer); ++- uint8_t significant_coeff_group_flag[8][8] = {{0}}; +++ uint8_t significant_coeff_group_flag[9] = {0}; // Allow 1 final byte that is always zero ++ int explicit_rdpcm_flag = 0; ++ int explicit_rdpcm_dir_flag; ++ ++ int trafo_size = 1 << log2_trafo_size; ++ int i; ++- int qp,shift,add,scale,scale_m; +++ int qp,shift,scale; ++ static const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 }; ++ const uint8_t *scale_matrix = NULL; ++ uint8_t dc_scale; ++ int pred_mode_intra = (c_idx == 0) ? lc->tu.intra_pred_mode : ++ lc->tu.intra_pred_mode_c; +++ +++ int prev_sig = 0; +++ const int c_idx_nz = (c_idx != 0); +++ +++ int may_hide_sign; +++ ++ #ifdef RPI ++ if (s->enable_rpi) { ++ int n = trafo_size * trafo_size; ++@@ -1078,7 +1582,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ ++ // Derive QP for dequant ++ if (!lc->cu.cu_transquant_bypass_flag) { ++- static const int qp_c[] = { 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37 }; +++ static const uint8_t qp_c[] = { 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37 }; ++ static const uint8_t rem6[51 + 4 * 6 + 1] = { ++ 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, ++ 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, ++@@ -1094,9 +1598,19 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ }; ++ int qp_y = lc->qp_y; ++ +++ may_hide_sign = s->ps.pps->sign_data_hiding_flag; +++ ++ if (s->ps.pps->transform_skip_enabled_flag && ++ log2_trafo_size <= s->ps.pps->log2_max_transform_skip_block_size) { ++- transform_skip_flag = hevc_transform_skip_flag_decode(s, c_idx); +++ int transform_skip_flag = hevc_transform_skip_flag_decode(s, c_idx_nz); +++ if (transform_skip_flag) { +++ trans_skip_or_bypass = 1; +++ if (lc->cu.pred_mode == MODE_INTRA && +++ s->ps.sps->implicit_rdpcm_enabled_flag && +++ (pred_mode_intra == 10 || pred_mode_intra == 26)) { +++ may_hide_sign = 0; +++ } +++ } ++ } ++ ++ if (c_idx == 0) { ++@@ -1129,39 +1643,73 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ qp += s->ps.sps->qp_bd_offset; ++ } ++ ++- shift = s->ps.sps->bit_depth + log2_trafo_size - 5; ++- add = 1 << (shift-1); ++- scale = level_scale[rem6[qp]] << (div6[qp]); ++- scale_m = 16; // default when no custom scaling lists. ++- dc_scale = 16; +++ // Shift is set to one less than will actually occur as the scale +++ // and saturate step adds 1 and then shifts right again +++ shift = s->ps.sps->bit_depth + log2_trafo_size - 6; +++ scale = level_scale[rem6[qp]]; +++ if (div6[qp] >= shift) { +++ scale <<= (div6[qp] - shift); +++ shift = 0; +++ } else { +++ shift -= div6[qp]; +++ } ++ ++- if (s->ps.sps->scaling_list_enable_flag && !(transform_skip_flag && log2_trafo_size > 2)) { +++ if (s->ps.sps->scaling_list_enable_flag && !(trans_skip_or_bypass && log2_trafo_size > 2)) { ++ const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ? ++- &s->ps.pps->scaling_list : &s->ps.sps->scaling_list; +++ &s->ps.pps->scaling_list : &s->ps.sps->scaling_list; ++ int matrix_id = lc->cu.pred_mode != MODE_INTRA; ++ ++ matrix_id = 3 * matrix_id + c_idx; ++ ++ scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id]; +++ dc_scale = scale_matrix[0]; ++ if (log2_trafo_size >= 4) ++ dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id]; ++ } +++ else +++ { +++ static const uint8_t sixteen_scale[64] = { +++ 16, 16, 16, 16, 16, 16, 16, 16, +++ 16, 16, 16, 16, 16, 16, 16, 16, +++ 16, 16, 16, 16, 16, 16, 16, 16, +++ 16, 16, 16, 16, 16, 16, 16, 16, +++ 16, 16, 16, 16, 16, 16, 16, 16, +++ 16, 16, 16, 16, 16, 16, 16, 16, +++ 16, 16, 16, 16, 16, 16, 16, 16, +++ 16, 16, 16, 16, 16, 16, 16, 16 +++ }; +++ scale_matrix = sixteen_scale; +++ dc_scale = 16; +++ } ++ } else { +++ static const uint8_t unit_scale[64] = { +++ 1, 1, 1, 1, 1, 1, 1, 1, +++ 1, 1, 1, 1, 1, 1, 1, 1, +++ 1, 1, 1, 1, 1, 1, 1, 1, +++ 1, 1, 1, 1, 1, 1, 1, 1, +++ 1, 1, 1, 1, 1, 1, 1, 1, +++ 1, 1, 1, 1, 1, 1, 1, 1, +++ 1, 1, 1, 1, 1, 1, 1, 1, +++ 1, 1, 1, 1, 1, 1, 1, 1, +++ }; +++ scale_matrix = unit_scale; ++ shift = 0; ++- add = 0; ++- scale = 0; ++- dc_scale = 0; +++ scale = 2; // We will shift right to kill this +++ dc_scale = 1; +++ +++ may_hide_sign = 0; ++ } ++ ++ if (lc->cu.pred_mode == MODE_INTER && s->ps.sps->explicit_rdpcm_enabled_flag && ++- (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) { ++- explicit_rdpcm_flag = explicit_rdpcm_flag_decode(s, c_idx); +++ trans_skip_or_bypass) { +++ explicit_rdpcm_flag = explicit_rdpcm_flag_decode(s, c_idx_nz); ++ if (explicit_rdpcm_flag) { ++- explicit_rdpcm_dir_flag = explicit_rdpcm_dir_flag_decode(s, c_idx); +++ may_hide_sign = 0; +++ explicit_rdpcm_dir_flag = explicit_rdpcm_dir_flag_decode(s, c_idx_nz); ++ } ++ } ++ ++- last_significant_coeff_xy_prefix_decode(s, c_idx, log2_trafo_size, +++ last_significant_coeff_xy_prefix_decode(s, c_idx_nz, log2_trafo_size, ++ &last_significant_coeff_x, &last_significant_coeff_y); ++ ++ if (last_significant_coeff_x > 3) { ++@@ -1189,119 +1737,113 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ int last_x_c = last_significant_coeff_x & 3; ++ int last_y_c = last_significant_coeff_y & 3; ++ ++- scan_x_off = ff_hevc_diag_scan4x4_x; ++- scan_y_off = ff_hevc_diag_scan4x4_y; ++ num_coeff = diag_scan4x4_inv[last_y_c][last_x_c]; ++- if (trafo_size == 4) { +++ +++ switch (log2_trafo_size) { +++ case 2: ++ scan_x_cg = scan_1x1; ++ scan_y_cg = scan_1x1; ++- } else if (trafo_size == 8) { +++ break; +++ case 3: ++ num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4; ++ scan_x_cg = diag_scan2x2_x; ++ scan_y_cg = diag_scan2x2_y; ++- } else if (trafo_size == 16) { +++ break; +++ case 4: ++ num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4; ++ scan_x_cg = ff_hevc_diag_scan4x4_x; ++ scan_y_cg = ff_hevc_diag_scan4x4_y; ++- } else { // trafo_size == 32 +++ break; +++ case 5: +++ default: ++ num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4; ++ scan_x_cg = ff_hevc_diag_scan8x8_x; ++ scan_y_cg = ff_hevc_diag_scan8x8_y; +++ break; ++ } ++ break; ++ } ++ case SCAN_HORIZ: ++ scan_x_cg = horiz_scan2x2_x; ++ scan_y_cg = horiz_scan2x2_y; ++- scan_x_off = horiz_scan4x4_x; ++- scan_y_off = horiz_scan4x4_y; ++ num_coeff = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x]; ++ break; ++ default: //SCAN_VERT ++ scan_x_cg = horiz_scan2x2_y; ++ scan_y_cg = horiz_scan2x2_x; ++- scan_x_off = horiz_scan4x4_y; ++- scan_y_off = horiz_scan4x4_x; ++ num_coeff = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y]; ++ break; ++ } ++ num_coeff++; ++ num_last_subset = (num_coeff - 1) >> 4; ++ ++- for (i = num_last_subset; i >= 0; i--) { ++- int n, m; ++- int x_cg, y_cg, x_c, y_c, pos; ++- int implicit_non_zero_coeff = 0; ++- int64_t trans_coeff_level; ++- int prev_sig = 0; ++- int offset = i << 4; ++- int rice_init = 0; ++- ++- uint8_t significant_coeff_flag_idx[16]; ++- uint8_t nb_significant_coeff_flag = 0; +++ significant_coeff_group_flag[y_cg_last_sig] = 1 << x_cg_last_sig; // 1st subset always significant ++ ++- x_cg = scan_x_cg[i]; ++- y_cg = scan_y_cg[i]; +++ scan_xy_off = off_xys[scan_idx][log2_trafo_size - 2]; ++ ++- if ((i < num_last_subset) && (i > 0)) { ++- int ctx_cg = 0; ++- if (x_cg < (1 << (log2_trafo_size - 2)) - 1) ++- ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg]; ++- if (y_cg < (1 << (log2_trafo_size - 2)) - 1) ++- ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1]; ++- ++- significant_coeff_group_flag[x_cg][y_cg] = ++- significant_coeff_group_flag_decode(s, c_idx, ctx_cg); ++- implicit_non_zero_coeff = 1; ++- } else { ++- significant_coeff_group_flag[x_cg][y_cg] = ++- ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) || ++- (x_cg == 0 && y_cg == 0)); ++- } +++ i = num_last_subset; +++ do { +++ int implicit_non_zero_coeff = 0; +++ int n_end; ++ ++- last_scan_pos = num_coeff - offset - 1; +++ uint8_t significant_coeff_flag_idx[16]; +++ unsigned int nb_significant_coeff_flag = 0; ++ ++ if (i == num_last_subset) { +++ // First time through +++ int last_scan_pos = num_coeff - (i << 4) - 1; ++ n_end = last_scan_pos - 1; ++ significant_coeff_flag_idx[0] = last_scan_pos; ++ nb_significant_coeff_flag = 1; ++ } else { ++ n_end = 15; +++ implicit_non_zero_coeff = (i != 0); ++ } ++ ++- if (x_cg < ((1 << log2_trafo_size) - 1) >> 2) ++- prev_sig = !!significant_coeff_group_flag[x_cg + 1][y_cg]; ++- if (y_cg < ((1 << log2_trafo_size) - 1) >> 2) ++- prev_sig += (!!significant_coeff_group_flag[x_cg][y_cg + 1] << 1); ++- ++- if (significant_coeff_group_flag[x_cg][y_cg] && n_end >= 0) { ++- static const uint8_t ctx_idx_map[] = { ++- 0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 8, // log2_trafo_size == 2 ++- 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, // prev_sig == 0 ++- 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, // prev_sig == 1 ++- 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, // prev_sig == 2 ++- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 // default +++ if (n_end >= 0) { +++ static const uint8_t ctx_idx_maps_ts2[3][16] = { +++ D4x4(0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 8), // log2_trafo_size == 2 +++ H4x4(0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 8), // log2_trafo_size == 2 +++ V4x4(0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 8) // log2_trafo_size == 2 +++ }; +++ static const uint8_t ctx_idx_maps[3][4][16] = { +++ { +++ D4x4(1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 0 +++ D4x4(2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 1 +++ D4x4(2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0), // prev_sig == 2 +++ D4x4(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2) // prev_sig == 3, default +++ }, +++ { +++ H4x4(1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 0 +++ H4x4(2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 1 +++ H4x4(2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0), // prev_sig == 2 +++ H4x4(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2) // prev_sig == 3, default +++ }, +++ { +++ V4x4(1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 0 +++ V4x4(2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 1 +++ V4x4(2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0), // prev_sig == 2 +++ V4x4(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2) // prev_sig == 3, default +++ } ++ }; ++ const uint8_t *ctx_idx_map_p; ++ int scf_offset = 0; ++- if (s->ps.sps->transform_skip_context_enabled_flag && ++- (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) { ++- ctx_idx_map_p = (uint8_t*) &ctx_idx_map[4 * 16]; ++- if (c_idx == 0) { ++- scf_offset = 40; ++- } else { ++- scf_offset = 14 + 27; ++- } +++ +++ if (s->ps.sps->transform_skip_context_enabled_flag && trans_skip_or_bypass) { +++ ctx_idx_map_p = ctx_idx_maps[0][3]; +++ scf_offset = 40 + c_idx_nz; ++ } else { ++- if (c_idx != 0) +++ if (c_idx_nz != 0) ++ scf_offset = 27; +++ ++ if (log2_trafo_size == 2) { ++- ctx_idx_map_p = (uint8_t*) &ctx_idx_map[0]; +++ ctx_idx_map_p = ctx_idx_maps_ts2[scan_idx]; ++ } else { ++- ctx_idx_map_p = (uint8_t*) &ctx_idx_map[(prev_sig + 1) << 4]; ++- if (c_idx == 0) { ++- if ((x_cg > 0 || y_cg > 0)) +++ ctx_idx_map_p = ctx_idx_maps[scan_idx][prev_sig]; +++ if (!c_idx_nz) { +++ if (i != 0) ++ scf_offset += 3; +++ ++ if (log2_trafo_size == 3) { ++ scf_offset += (scan_idx == SCAN_DIAG) ? 9 : 15; ++ } else { ++@@ -1315,34 +1857,30 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ } ++ } ++ } ++- for (n = n_end; n > 0; n--) { ++- x_c = scan_x_off[n]; ++- y_c = scan_y_off[n]; ++- if (significant_coeff_flag_decode(s, x_c, y_c, scf_offset, ctx_idx_map_p)) { ++- significant_coeff_flag_idx[nb_significant_coeff_flag] = n; ++- nb_significant_coeff_flag++; +++ +++ if (n_end > 0) { +++ int cnt = get_sig_coeff_flag_idxs(&s->HEVClc->cc, +++ s->HEVClc->cabac_state + elem_offset[SIGNIFICANT_COEFF_FLAG] + scf_offset, +++ n_end, ctx_idx_map_p, +++ significant_coeff_flag_idx + nb_significant_coeff_flag); +++ +++ nb_significant_coeff_flag += cnt; +++ if (cnt != 0) { ++ implicit_non_zero_coeff = 0; ++ } ++ } +++ ++ if (implicit_non_zero_coeff == 0) { ++- if (s->ps.sps->transform_skip_context_enabled_flag && ++- (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) { ++- if (c_idx == 0) { ++- scf_offset = 42; ++- } else { ++- scf_offset = 16 + 27; ++- } +++ if (s->ps.sps->transform_skip_context_enabled_flag && trans_skip_or_bypass) { +++ scf_offset = 42 + c_idx_nz; ++ } else { ++ if (i == 0) { ++- if (c_idx == 0) ++- scf_offset = 0; ++- else ++- scf_offset = 27; +++ scf_offset = c_idx_nz ? 27 : 0; ++ } else { ++ scf_offset = 2 + scf_offset; ++ } ++ } ++- if (significant_coeff_flag_decode_0(s, c_idx, scf_offset) == 1) { +++ if (significant_coeff_flag_decode_0(s, scf_offset) == 1) { ++ significant_coeff_flag_idx[nb_significant_coeff_flag] = 0; ++ nb_significant_coeff_flag++; ++ } ++@@ -1352,141 +1890,185 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ } ++ } ++ ++- n_end = nb_significant_coeff_flag; ++- +++ if (nb_significant_coeff_flag != 0) { +++ const unsigned int gt1_idx_delta = (c_idx_nz << 2) | +++ ((i != 0 && !c_idx_nz) ? 2 : 0) | +++ prev_subset_coded; +++ const unsigned int idx0_gt1 = elem_offset[COEFF_ABS_LEVEL_GREATER1_FLAG] + +++ (gt1_idx_delta << 2); +++ const unsigned int idx_gt2 = elem_offset[COEFF_ABS_LEVEL_GREATER2_FLAG] + +++ gt1_idx_delta; +++ +++ const unsigned int x_cg = scan_x_cg[i]; +++ const unsigned int y_cg = scan_y_cg[i]; +++ int16_t * const blk_coeffs = coeffs + +++ ((x_cg + (y_cg << log2_trafo_size)) << 2); +++ // This calculation is 'wrong' for log2_traffo_size == 2 +++ // but that doesn't mattor as in this case x_cg & y_cg +++ // are always 0 so result is correct (0) anyway +++ const uint8_t * const blk_scale = scale_matrix + +++ (((x_cg + (y_cg << 3)) << (5 - log2_trafo_size))); +++ +++ // * THe following code block doesn't deal with these flags: +++ // (nor did the one it replaces) +++ // +++ // cabac_bypass_alignment_enabled_flag +++ // This should be easy but I can't find a test case +++ // extended_precision_processing_flag +++ // This can extend the required precision past 16bits +++ // so is probably tricky - also no example found yet +++ +++#if USE_N_END_1 +++ if (nb_significant_coeff_flag == 1) { +++ // There is a small gain to be had from special casing the single +++ // transform coefficient case. The reduction in complexity +++ // makes up for the code duplicatioon. +++ +++ int trans_coeff_level = 1; +++ int coeff_sign_flag; +++ int coded_val = 0; +++ +++ // initialize first elem of coeff_bas_level_greater1_flag +++ prev_subset_coded = 0; +++ +++ if (get_cabac(&s->HEVClc->cc, s->HEVClc->cabac_state + idx0_gt1 + 1)) { +++ trans_coeff_level = 2; +++ prev_subset_coded = 1; +++ coded_val = get_cabac(&s->HEVClc->cc, s->HEVClc->cabac_state + idx_gt2); +++ } ++ ++- if (n_end) { ++- int first_nz_pos_in_cg; ++- int last_nz_pos_in_cg; ++- int c_rice_param = 0; ++- int first_greater1_coeff_idx = -1; ++- uint8_t coeff_abs_level_greater1_flag[8]; ++- uint16_t coeff_sign_flag; ++- int sum_abs = 0; ++- int sign_hidden; ++- int sb_type; +++ // Probably not worth the overhead of starting by22 for just one value +++ coeff_sign_flag = get_cabac_bypass(&s->HEVClc->cc); ++ +++ if (coded_val) +++ { +++ if (!s->ps.sps->persistent_rice_adaptation_enabled_flag) { +++ trans_coeff_level = 3 + coeff_abs_level_remaining_decode(s, 0); +++ } else { +++ uint8_t * const stat_coeff = +++ lc->stat_coeff + trans_skip_or_bypass + 2 - ((c_idx_nz) << 1); +++ const unsigned int c_rice_param = *stat_coeff >> 2; +++ const int last_coeff_abs_level_remaining = coeff_abs_level_remaining_decode(s, c_rice_param); ++ ++- // initialize first elem of coeff_bas_level_greater1_flag ++- int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0; +++ trans_coeff_level = 3 + last_coeff_abs_level_remaining; +++ update_rice(stat_coeff, last_coeff_abs_level_remaining, c_rice_param); +++ } +++ } ++ ++- if (s->ps.sps->persistent_rice_adaptation_enabled_flag) { ++- if (!transform_skip_flag && !lc->cu.cu_transquant_bypass_flag) ++- sb_type = 2 * (c_idx == 0 ? 1 : 0); ++- else ++- sb_type = 2 * (c_idx == 0 ? 1 : 0) + 1; ++- c_rice_param = lc->stat_coeff[sb_type] / 4; ++- } +++ { +++ const xy_off_t * const xy_off = scan_xy_off + significant_coeff_flag_idx[0]; +++ const int k = (int32_t)(coeff_sign_flag << 31) >> 31; +++ const unsigned int scale_m = blk_scale[xy_off->scale]; ++ ++- if (!(i == num_last_subset) && greater1_ctx == 0) ++- ctx_set++; ++- greater1_ctx = 1; ++- last_nz_pos_in_cg = significant_coeff_flag_idx[0]; ++- ++- for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) { ++- int inc = (ctx_set << 2) + greater1_ctx; ++- coeff_abs_level_greater1_flag[m] = ++- coeff_abs_level_greater1_flag_decode(s, c_idx, inc); ++- if (coeff_abs_level_greater1_flag[m]) { ++- greater1_ctx = 0; ++- if (first_greater1_coeff_idx == -1) ++- first_greater1_coeff_idx = m; ++- } else if (greater1_ctx > 0 && greater1_ctx < 3) { ++- greater1_ctx++; +++ blk_coeffs[xy_off->coeff] = trans_scale_sat( +++ (trans_coeff_level ^ k) - k, // Apply sign +++ scale, +++ i == 0 && xy_off->coeff == 0 ? dc_scale : scale_m, +++ shift); ++ } ++ } ++- first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1]; ++- ++- if (lc->cu.cu_transquant_bypass_flag || ++- (lc->cu.pred_mode == MODE_INTRA && ++- s->ps.sps->implicit_rdpcm_enabled_flag && transform_skip_flag && ++- (pred_mode_intra == 10 || pred_mode_intra == 26 )) || ++- explicit_rdpcm_flag) ++- sign_hidden = 0; ++ else ++- sign_hidden = (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4); +++#endif +++ { +++ int sign_hidden = may_hide_sign; +++ int levels[16]; // Should be able to get away with int16_t but that fails some tests +++ uint32_t coeff_sign_flags; +++ uint32_t coded_vals = 0; +++ // Sum(abs(level[])) +++ // In fact we only need the bottom bit and in some future +++ // version that may be all we calculate +++ unsigned int sum_abs; +++ +++ coded_vals = get_greaterx_bits(s, nb_significant_coeff_flag, levels, +++ &prev_subset_coded, &sum_abs, idx0_gt1, idx_gt2); +++ +++ if (significant_coeff_flag_idx[0] - significant_coeff_flag_idx[nb_significant_coeff_flag - 1] <= 3) +++ sign_hidden = 0; +++ +++ // -- Start bypass block +++ +++ bypass_start(s); +++ +++ coeff_sign_flags = coeff_sign_flag_decode_bypass(s, nb_significant_coeff_flag - sign_hidden); +++ +++ if (coded_vals != 0) +++ { +++ const int rice_adaptation_enabled = s->ps.sps->persistent_rice_adaptation_enabled_flag; +++ uint8_t * stat_coeff = !rice_adaptation_enabled ? NULL : +++ lc->stat_coeff + trans_skip_or_bypass + 2 - ((c_idx_nz) << 1); +++ int c_rice_param = !rice_adaptation_enabled ? 0 : *stat_coeff >> 2; +++ int * level = levels - 1; +++ +++ do { +++ { +++ const unsigned int z = hevc_clz32(coded_vals) + 1; +++ level += z; +++ coded_vals <<= z; +++ } ++ ++- if (first_greater1_coeff_idx != -1) { ++- coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set); ++- } ++- if (!s->ps.pps->sign_data_hiding_flag || !sign_hidden ) { ++- coeff_sign_flag = coeff_sign_flag_decode(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag); ++- } else { ++- coeff_sign_flag = coeff_sign_flag_decode(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1)); ++- } +++ { +++ const int last_coeff_abs_level_remaining = coeff_abs_level_remaining_decode_bypass(s, c_rice_param); +++ const int trans_coeff_level = *level + last_coeff_abs_level_remaining + 1; +++ +++ sum_abs += last_coeff_abs_level_remaining + 1; +++ *level = trans_coeff_level; ++ ++- for (m = 0; m < n_end; m++) { ++- n = significant_coeff_flag_idx[m]; ++- GET_COORD(offset, n); ++- if (m < 8) { ++- trans_coeff_level = 1 + coeff_abs_level_greater1_flag[m]; ++- if (trans_coeff_level == ((m == first_greater1_coeff_idx) ? 3 : 2)) { ++- int last_coeff_abs_level_remaining = coeff_abs_level_remaining_decode(s, c_rice_param); ++- ++- trans_coeff_level += last_coeff_abs_level_remaining; ++- if (trans_coeff_level > (3 << c_rice_param)) ++- c_rice_param = s->ps.sps->persistent_rice_adaptation_enabled_flag ? c_rice_param + 1 : FFMIN(c_rice_param + 1, 4); ++- if (s->ps.sps->persistent_rice_adaptation_enabled_flag && !rice_init) { ++- int c_rice_p_init = lc->stat_coeff[sb_type] / 4; ++- if (last_coeff_abs_level_remaining >= (3 << c_rice_p_init)) ++- lc->stat_coeff[sb_type]++; ++- else if (2 * last_coeff_abs_level_remaining < (1 << c_rice_p_init)) ++- if (lc->stat_coeff[sb_type] > 0) ++- lc->stat_coeff[sb_type]--; ++- rice_init = 1; +++ if (stat_coeff != NULL) +++ update_rice(stat_coeff, last_coeff_abs_level_remaining, c_rice_param); +++ stat_coeff = NULL; +++ +++ if (trans_coeff_level > (3 << c_rice_param) && +++ (c_rice_param < 4 || rice_adaptation_enabled)) +++ ++c_rice_param; ++ } ++- } ++- } else { ++- int last_coeff_abs_level_remaining = coeff_abs_level_remaining_decode(s, c_rice_param); ++- ++- trans_coeff_level = 1 + last_coeff_abs_level_remaining; ++- if (trans_coeff_level > (3 << c_rice_param)) ++- c_rice_param = s->ps.sps->persistent_rice_adaptation_enabled_flag ? c_rice_param + 1 : FFMIN(c_rice_param + 1, 4); ++- if (s->ps.sps->persistent_rice_adaptation_enabled_flag && !rice_init) { ++- int c_rice_p_init = lc->stat_coeff[sb_type] / 4; ++- if (last_coeff_abs_level_remaining >= (3 << c_rice_p_init)) ++- lc->stat_coeff[sb_type]++; ++- else if (2 * last_coeff_abs_level_remaining < (1 << c_rice_p_init)) ++- if (lc->stat_coeff[sb_type] > 0) ++- lc->stat_coeff[sb_type]--; ++- rice_init = 1; ++- } +++ } while (coded_vals != 0); ++ } ++- if (s->ps.pps->sign_data_hiding_flag && sign_hidden) { ++- sum_abs += trans_coeff_level; ++- if (n == first_nz_pos_in_cg && (sum_abs&1)) ++- trans_coeff_level = -trans_coeff_level; +++ +++ // sign_hidden = 0 or 1 so we can combine the tests +++ if ((sign_hidden & sum_abs) != 0) { +++ levels[nb_significant_coeff_flag - 1] = -levels[nb_significant_coeff_flag - 1]; ++ } ++- if (coeff_sign_flag >> 15) ++- trans_coeff_level = -trans_coeff_level; ++- coeff_sign_flag <<= 1; ++- if(!lc->cu.cu_transquant_bypass_flag) { ++- if (s->ps.sps->scaling_list_enable_flag && !(transform_skip_flag && log2_trafo_size > 2)) { ++- if(y_c || x_c || log2_trafo_size < 4) { ++- switch(log2_trafo_size) { ++- case 3: pos = (y_c << 3) + x_c; break; ++- case 4: pos = ((y_c >> 1) << 3) + (x_c >> 1); break; ++- case 5: pos = ((y_c >> 2) << 3) + (x_c >> 2); break; ++- default: pos = (y_c << 2) + x_c; break; ++- } ++- scale_m = scale_matrix[pos]; ++- } else { ++- scale_m = dc_scale; ++- } +++ +++ bypass_finish(s); +++ +++ // -- Finish bypass block +++ +++ // Scale loop +++ { +++ int m = nb_significant_coeff_flag - 1; +++ +++ // Deal with DC component (if any) first +++ if (i == 0 && significant_coeff_flag_idx[m] == 0) +++ { +++ const int k = (int32_t)(coeff_sign_flags << m) >> 31; +++ blk_coeffs[0] = trans_scale_sat( +++ (levels[m] ^ k) - k, scale, dc_scale, shift); +++ --m; ++ } ++- trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift; ++- if(trans_coeff_level < 0) { ++- if((~trans_coeff_level) & 0xFffffffffff8000) ++- trans_coeff_level = -32768; ++- } else { ++- if(trans_coeff_level & 0xffffffffffff8000) ++- trans_coeff_level = 32767; +++ +++#if !USE_N_END_1 +++ // If N_END_! set then m was at least 1 initially +++ if (m >= 0) +++#endif +++ { +++ do { +++ const xy_off_t * const xy_off = scan_xy_off + +++ significant_coeff_flag_idx[m]; +++ const int k = (int32_t)(coeff_sign_flags << m) >> 31; +++ +++ blk_coeffs[xy_off->coeff] = trans_scale_sat( +++ (levels[m] ^ k) - k, +++ scale, +++ blk_scale[xy_off->scale], +++ shift); +++ } while (--m >= 0); ++ } ++ } ++- coeffs[y_c * trafo_size + x_c] = trans_coeff_level; +++ ++ } ++ } ++- } +++ } while ((i = next_subset(s, i, c_idx_nz, +++ significant_coeff_group_flag, scan_x_cg, scan_y_cg, &prev_sig)) >= 0); ++ ++ if (lc->cu.cu_transquant_bypass_flag) { ++ if (explicit_rdpcm_flag || (s->ps.sps->implicit_rdpcm_enabled_flag && ++@@ -1496,7 +2078,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ s->hevcdsp.transform_rdpcm(coeffs, log2_trafo_size, mode); ++ } ++ } else { ++- if (transform_skip_flag) { +++ if (trans_skip_or_bypass) { // Must be trans_skip as we've already dealt with bypass ++ int rot = s->ps.sps->transform_skip_rotation_enabled_flag && ++ log2_trafo_size == 2 && ++ lc->cu.pred_mode == MODE_INTRA; ++-- ++2.7.4 ++ +diff --git a/tools/depends/target/ffmpeg/Makefile b/tools/depends/target/ffmpeg/Makefile +index 2dc4addea504d142eb74385653584bf39b253156..d1d76cb2ce04d5fd056796cc133fceb3f3c246c9 100644 +--- a/tools/depends/target/ffmpeg/Makefile ++++ b/tools/depends/target/ffmpeg/Makefile +@@ -3,7 +3,8 @@ include FFMPEG-VERSION + DEPS= ../../Makefile.include FFMPEG-VERSION Makefile \ + 0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch \ + hevcdsp_ARM_NEON_optimized_epel_functions.patch added_ARM_NEON_optimized_SAO_patches.patch \ +- pfcd_hevc_optimisations.patch ++ pfcd_hevc_optimisations.patch \ ++ 0001-Squashed-commit-of-the-following.patch + + # set to "yes" to enable patching + # we don't apply patches until we move to a vanilla ffmpeg tarball +@@ -67,6 +68,7 @@ ifeq ($(Configuration), Release) + ffmpg_config += --disable-debug + endif + ++ffmpg_config += --extra-cflags="-DRPI=1" + + CLEAN_FILES=$(ARCHIVE) $(PLATFORM) + +@@ -83,6 +85,7 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + cd $(PLATFORM); patch -p1 < ../hevcdsp_ARM_NEON_optimized_epel_functions.patch + cd $(PLATFORM); patch -p1 < ../added_ARM_NEON_optimized_SAO_patches.patch + cd $(PLATFORM); patch -p1 < ../pfcd_hevc_optimisations.patch ++ cd $(PLATFORM); patch -p1 < ../0001-Squashed-commit-of-the-following.patch + + cd $(PLATFORM);\ + CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)" \ +diff --git a/tools/depends/target/ffmpeg/autobuild.sh b/tools/depends/target/ffmpeg/autobuild.sh +index b6bd57731bca6dfe5f814a4043b3e08d1bb08318..65800dfccc7cbf17124a96d81378b1c3ddf92342 100755 +--- a/tools/depends/target/ffmpeg/autobuild.sh ++++ b/tools/depends/target/ffmpeg/autobuild.sh +@@ -132,6 +132,7 @@ patch -p1 < ../../0001-Discard-data-before-VO-VOL-in-mpeg-4-over-mpegts.patch + patch -p1 < ../../hevcdsp_ARM_NEON_optimized_epel_functions.patch + patch -p1 < ../../added_ARM_NEON_optimized_SAO_patches.patch + patch -p1 < ../../pfcd_hevc_optimisations.patch ++patch -p1 < ../../0001-Squashed-commit-of-the-following.patch + + CFLAGS="$CFLAGS" CXXFLAGS="$CXXFLAGS" LDFLAGS="$LDFLAGS" \ + ./configure --prefix=$FFMPEG_PREFIX \ + +From 3dac5d0c77bfd3b88d90944154c058d1e6429bb8 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Fri, 19 Sep 2014 11:54:49 +0100 +Subject: [PATCH 37/67] [videoplayer/rbp] Add pi specific option to maintain + vsync with pll adjustment + +New A/V sync option in settings/video/playback to do "Adjust PLL". +This uses video clock (so perfect video syncing) but avoids having to resample +or drop/dupe audio packets which is normally required. +--- + .../resource.language.en_gb/resources/strings.po | 32 ++++++++++++++++++++++ + system/settings/rbp.xml | 14 ++++++++++ + .../AudioEngine/Engines/ActiveAE/ActiveAE.cpp | 31 +++++++++++++++------ + xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.h | 9 +++++- + .../Engines/ActiveAE/ActiveAEStream.cpp | 7 +++-- + .../AudioEngine/Engines/ActiveAE/ActiveAEStream.h | 4 ++- + xbmc/cores/AudioEngine/Interfaces/AEStream.h | 10 ++++++- + xbmc/cores/VideoPlayer/DVDAudio.cpp | 4 +-- + xbmc/cores/VideoPlayer/DVDAudio.h | 2 +- + xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp | 12 ++++++-- + xbmc/linux/RBP.cpp | 13 +++++++++ + xbmc/linux/RBP.h | 3 ++ + 12 files changed, 122 insertions(+), 19 deletions(-) + +diff --git a/addons/resource.language.en_gb/resources/strings.po b/addons/resource.language.en_gb/resources/strings.po +index 085e2a195d2e52ce6bea3ed791bf817f5be23b15..8cb9f8503c29c54cd0cb55018f867a45248c649f 100644 +--- a/addons/resource.language.en_gb/resources/strings.po ++++ b/addons/resource.language.en_gb/resources/strings.po +@@ -19417,3 +19417,35 @@ msgstr "" + msgctxt "#38190" + msgid "Extract thumbnails from video files" + msgstr "" ++ ++#. Description of setting "System -> Audio Ouput -> A/V sync method" with label #38200 ++#: system/settings/settings.xml ++msgctxt "#38200" ++msgid "PLL adustment to maintain audio/video sync" ++msgstr "" ++ ++#. Description of setting "Videos -> Playback -> A/V sync method" with label #38201 ++#: system/settings/settings.xml ++msgctxt "#38201" ++msgid "Allows sync adjustment without resampling. Lower the settings if you get audio/video dropouts." ++msgstr "" ++ ++msgctxt "#38202" ++msgid "Off" ++msgstr "" ++ ++msgctxt "#38203" ++msgid "Low" ++msgstr "" ++ ++msgctxt "#38204" ++msgid "Medium" ++msgstr "" ++ ++msgctxt "#38205" ++msgid "High" ++msgstr "" ++ ++msgctxt "#38206" ++msgid "Max" ++msgstr "" +diff --git a/system/settings/rbp.xml b/system/settings/rbp.xml +index 737ec4e0c7f0feb98a6dd008b53e238c41dde8af..2e6c903df5e4d2cd064466db0ef55deada5cdc80 100644 +--- a/system/settings/rbp.xml ++++ b/system/settings/rbp.xml +@@ -98,6 +98,20 @@ + + 101 + ++ ++ 3 ++ 0 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ + + + +diff --git a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp +index f9e8a9beaa9b3b4590c698a4d64351cb14c2339d..6a22f8145ce9dfb46f0ddae27eb0753413b066d3 100644 +--- a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp ++++ b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp +@@ -34,6 +34,10 @@ using namespace ActiveAE; + #include "windowing/WindowingFactory.h" + #include "utils/log.h" + ++#if defined(TARGET_RASPBERRY_PI) ++#include "linux/RBP.h" ++#endif ++ + #define MAX_CACHE_LEVEL 0.4 // total cache time of stream in seconds + #define MAX_WATER_LEVEL 0.2 // buffered time after stream stages in seconds + #define MAX_BUFFER_TIME 0.1 // max time of a buffer in seconds +@@ -365,11 +369,12 @@ void CActiveAE::StateMachine(int signal, Protocol *port, Message *msg) + m_sink.m_controlPort.SendOutMessage(CSinkControlProtocol::APPFOCUSED, msg->data, sizeof(bool)); + return; + case CActiveAEControlProtocol::STREAMRESAMPLEMODE: +- MsgStreamParameter *par; +- par = (MsgStreamParameter*)msg->data; ++ MsgStreamResample *par; ++ par = (MsgStreamResample*)msg->data; + if (par->stream) + { +- par->stream->m_resampleMode = par->parameter.int_par; ++ par->stream->m_resampleMode = par->mode; ++ par->stream->m_pllAdjust = par->plladjust; + par->stream->m_resampleIntegral = 0.0; + } + return; +@@ -2466,7 +2471,16 @@ CSampleBuffer* CActiveAE::SyncStream(CActiveAEStream *stream) + if (!newerror || stream->m_syncState != CAESyncInfo::AESyncState::SYNC_INSYNC) + return ret; + +- if (stream->m_resampleMode) ++ if (stream->m_pllAdjust > 0) // pll adjust ++ { ++#if defined(TARGET_RASPBERRY_PI) ++ double e = std::max(std::min(error / 50.0, 1.0), -1.0); ++ double m_plladjust = 1.0 + e * stream->m_pllAdjust; ++ double m_last_plladjust = g_RBP.AdjustHDMIClock(m_plladjust); ++ CLog::Log(LOGDEBUG, "CDVDPlayerAudio::%s pll:%.5f (%.5f) error:%.6f e:%.6f a:%f", __FUNCTION__, m_plladjust, m_last_plladjust, error, e * stream->m_pllAdjust, stream->m_pllAdjust ); ++#endif ++ } ++ else if (stream->m_resampleMode) + { + if (stream->m_resampleBuffers) + { +@@ -3322,13 +3336,14 @@ void CActiveAE::SetStreamResampleRatio(CActiveAEStream *stream, double ratio) + &msg, sizeof(MsgStreamParameter)); + } + +-void CActiveAE::SetStreamResampleMode(CActiveAEStream *stream, int mode) ++void CActiveAE::SetStreamResampleMode(CActiveAEStream *stream, int mode, float plladjust) + { +- MsgStreamParameter msg; ++ MsgStreamResample msg; + msg.stream = stream; +- msg.parameter.int_par = mode; ++ msg.mode = mode; ++ msg.plladjust = plladjust; + m_controlPort.SendOutMessage(CActiveAEControlProtocol::STREAMRESAMPLEMODE, +- &msg, sizeof(MsgStreamParameter)); ++ &msg, sizeof(MsgStreamResample)); + } + + void CActiveAE::SetStreamFFmpegInfo(CActiveAEStream *stream, int profile, enum AVMatrixEncoding matrix_encoding, enum AVAudioServiceType audio_service_type) +diff --git a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.h b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.h +index 2a31a6e3c09fa61907ef9e518158773ba7d3b03e..3efc7afc255c542ea2aedbf83d6962beeae286a2 100644 +--- a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.h ++++ b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.h +@@ -174,6 +174,13 @@ struct MsgStreamFFmpegInfo + enum AVAudioServiceType audio_service_type; + }; + ++struct MsgStreamResample ++{ ++ CActiveAEStream *stream; ++ int mode; ++ float plladjust; ++}; ++ + class CEngineStats + { + public: +@@ -290,7 +297,7 @@ protected: + void SetStreamReplaygain(CActiveAEStream *stream, float rgain); + void SetStreamVolume(CActiveAEStream *stream, float volume); + void SetStreamResampleRatio(CActiveAEStream *stream, double ratio); +- void SetStreamResampleMode(CActiveAEStream *stream, int mode); ++ void SetStreamResampleMode(CActiveAEStream *stream, int mode, float plladjust); + void SetStreamFFmpegInfo(CActiveAEStream *stream, int profile, enum AVMatrixEncoding matrix_encoding, enum AVAudioServiceType audio_service_type); + void SetStreamFade(CActiveAEStream *stream, float from, float target, unsigned int millis); + +diff --git a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.cpp b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.cpp +index 1d58691db79e53a4a4cfb32c45f209a115853722..d1e8863cb9600bf1a026520f77501bb98e51918a 100644 +--- a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.cpp ++++ b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.cpp +@@ -503,11 +503,12 @@ void CActiveAEStream::SetResampleRatio(double ratio) + m_streamResampleRatio = ratio; + } + +-void CActiveAEStream::SetResampleMode(int mode) ++void CActiveAEStream::SetResampleMode(int mode, float plladjust) + { +- if (mode != m_streamResampleMode) +- AE.SetStreamResampleMode(this, mode); ++ if (mode != m_streamResampleMode || plladjust != m_streamPllAdjust) ++ AE.SetStreamResampleMode(this, mode, plladjust); + m_streamResampleMode = mode; ++ m_streamPllAdjust = plladjust; + } + + void CActiveAEStream::SetFFmpegInfo(int profile, enum AVMatrixEncoding matrix_encoding, enum AVAudioServiceType audio_service_type) +diff --git a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.h b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.h +index 0fd959b8071e5a03d7749689e2e0042907d4d4bf..8b25159f198279f2515fe4f84fc9403dcb46c401 100644 +--- a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.h ++++ b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.h +@@ -137,7 +137,7 @@ public: + + virtual double GetResampleRatio(); + virtual void SetResampleRatio(double ratio); +- virtual void SetResampleMode(int mode); ++ virtual void SetResampleMode(int mode, float plladjust); + virtual void RegisterAudioCallback(IAudioCallback* pCallback); + virtual void UnRegisterAudioCallback(); + virtual void FadeVolume(float from, float to, unsigned int time); +@@ -154,6 +154,7 @@ protected: + float m_streamAmplify; + double m_streamResampleRatio; + int m_streamResampleMode; ++ float m_streamPllAdjust; + unsigned int m_streamSpace; + bool m_streamDraining; + bool m_streamDrained; +@@ -194,6 +195,7 @@ protected: + int m_fadingTime; + int m_profile; + int m_resampleMode; ++ float m_pllAdjust; + double m_resampleIntegral; + enum AVMatrixEncoding m_matrixEncoding; + enum AVAudioServiceType m_audioServiceType; +diff --git a/xbmc/cores/AudioEngine/Interfaces/AEStream.h b/xbmc/cores/AudioEngine/Interfaces/AEStream.h +index 7416685ef766492b13bbbde9001f868f28907d34..e3dbc5f2ddd6269f5e80086d2fd04e1ae68ac828 100644 +--- a/xbmc/cores/AudioEngine/Interfaces/AEStream.h ++++ b/xbmc/cores/AudioEngine/Interfaces/AEStream.h +@@ -41,6 +41,14 @@ public: + class CAESyncInfo + { + public: ++ CAESyncInfo() ++ { ++ delay = 0.0; ++ error = 0.0; ++ rr = 1.0; ++ errortime = 0; ++ state = SYNC_OFF; ++ } + double delay; + double error; + double rr; +@@ -231,7 +239,7 @@ public: + /** + * Sets the resamplling on/ff + */ +- virtual void SetResampleMode(int mode) = 0; ++ virtual void SetResampleMode(int mode, float plladjust) = 0; + + /** + * Registers the audio callback to call with each block of data, this is used by Audio Visualizations +diff --git a/xbmc/cores/VideoPlayer/DVDAudio.cpp b/xbmc/cores/VideoPlayer/DVDAudio.cpp +index 2674fb381aa42a3575ae85ad54be5f9891cafe2a..48894c6cea7ba54e37855963cf2caf76e1d1ab36 100644 +--- a/xbmc/cores/VideoPlayer/DVDAudio.cpp ++++ b/xbmc/cores/VideoPlayer/DVDAudio.cpp +@@ -321,12 +321,12 @@ double CDVDAudio::GetResampleRatio() + return m_resampleRatio; + } + +-void CDVDAudio::SetResampleMode(int mode) ++void CDVDAudio::SetResampleMode(int mode, float plladjust) + { + CSingleLock lock (m_critSection); + if(m_pAudioStream) + { +- m_pAudioStream->SetResampleMode(mode); ++ m_pAudioStream->SetResampleMode(mode, plladjust); + } + } + +diff --git a/xbmc/cores/VideoPlayer/DVDAudio.h b/xbmc/cores/VideoPlayer/DVDAudio.h +index 48b5c42d2998a25901c31a9ad762d81f89eb430b..70559f9570041a11693d21d2de890f81b07fe2ab 100644 +--- a/xbmc/cores/VideoPlayer/DVDAudio.h ++++ b/xbmc/cores/VideoPlayer/DVDAudio.h +@@ -61,7 +61,7 @@ public: + double GetSyncError(); + void SetSyncErrorCorrection(double correction); + double GetResampleRatio(); +- void SetResampleMode(int mode); ++ void SetResampleMode(int mode, float plladjust); + void Flush(); + void Drain(); + void AbortAddPackets(); +diff --git a/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp b/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp +index 188b85b12b86f887324cdcfda3c3aa4cd90d3a11..b05c4e4c6a2361455ab553133965aa2018e9d684 100644 +--- a/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp ++++ b/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp +@@ -96,6 +96,7 @@ bool CVideoPlayerAudio::OpenStream(CDVDStreamInfo &hints) + bool allowpassthrough = !CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEDISPLAYASCLOCK); + if (hints.realtime) + allowpassthrough = false; ++ allowpassthrough |= CSettings::GetInstance().GetInt("audiooutput.plladjust") > 0; + CDVDAudioCodec* codec = CDVDFactoryCodec::CreateAudioCodec(hints, m_processInfo, allowpassthrough, m_processInfo.AllowDTSHDDecode()); + if(!codec) + { +@@ -217,8 +218,12 @@ void CVideoPlayerAudio::UpdatePlayerInfo() + + //print the inverse of the resample ratio, since that makes more sense + //if the resample ratio is 0.5, then we're playing twice as fast ++#ifdef TARGET_RASPBERRY_PI ++ s << ", rr:" << std::fixed << std::setprecision(5) << 1.0 / m_dvdAudio.GetResampleRatio() << ", pll:" << std::fixed << std::setprecision(5) << g_RBP.GetAdjustHDMIClock() << ", err:" << std::fixed << std::setprecision(1) << m_dvdAudio.GetSyncError() * 1e-3 << "ms"; ++#else + if (m_synctype == SYNC_RESAMPLE) + s << ", rr:" << std::fixed << std::setprecision(5) << 1.0 / m_dvdAudio.GetResampleRatio(); ++#endif + + s << ", att:" << std::fixed << std::setprecision(1) << log(GetCurrentAttenuation()) * 20.0f << " dB"; + +@@ -545,10 +550,12 @@ void CVideoPlayerAudio::SetSyncType(bool passthrough) + int synctype = (m_synctype >= 0 && m_synctype <= 1) ? m_synctype : 2; + CLog::Log(LOGDEBUG, "CVideoPlayerAudio:: synctype set to %i: %s", m_synctype, synctypes[synctype]); + m_prevsynctype = m_synctype; ++ const float plladjusts[] = { 0.0f, 0.00001f, 0.0001f, 0.001f, 0.01f }; ++ float plladjust = plladjusts[CSettings::GetInstance().GetInt("audiooutput.plladjust")]; + if (m_synctype == SYNC_RESAMPLE) +- m_dvdAudio.SetResampleMode(1); ++ m_dvdAudio.SetResampleMode(1, plladjust); + else +- m_dvdAudio.SetResampleMode(0); ++ m_dvdAudio.SetResampleMode(0, plladjust); + } + } + +@@ -606,6 +613,7 @@ bool CVideoPlayerAudio::SwitchCodecIfNeeded() + bool allowpassthrough = !CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEDISPLAYASCLOCK); + if (m_streaminfo.realtime) + allowpassthrough = false; ++ allowpassthrough |= CSettings::GetInstance().GetInt("audiooutput.plladjust") > 0; + CDVDAudioCodec *codec = CDVDFactoryCodec::CreateAudioCodec(m_streaminfo, m_processInfo, allowpassthrough, m_processInfo.AllowDTSHDDecode()); + if (!codec || codec->NeedPassthrough() == m_pAudioCodec->NeedPassthrough()) { + // passthrough state has not changed +diff --git a/xbmc/linux/RBP.cpp b/xbmc/linux/RBP.cpp +index fbffa3a952d920cb41412f00f59d5c1c91f98740..d6591cc4e1938b231cd3ce9035ca9334dcffdde9 100644 +--- a/xbmc/linux/RBP.cpp ++++ b/xbmc/linux/RBP.cpp +@@ -49,6 +49,7 @@ CRBP::CRBP() + m_DllBcmHost = new DllBcmHost(); + m_OMX = new COMXCore(); + m_display = DISPMANX_NO_HANDLE; ++ m_last_pll_adjust = 1.0; + m_p = NULL; + m_x = 0; + m_y = 0; +@@ -162,6 +163,7 @@ void CRBP::CloseDisplay(DISPMANX_DISPLAY_HANDLE_T display) + assert(s == 0); + vc_dispmanx_display_close(m_display); + m_display = DISPMANX_NO_HANDLE; ++ m_last_pll_adjust = 1.0; + } + + void CRBP::GetDisplaySize(int &width, int &height) +@@ -504,4 +506,15 @@ void CRBP::uninit_cursor() + mailbox_set_cursor_position(m_mb, 0, 0, 0); + } + ++double CRBP::AdjustHDMIClock(double adjust) ++{ ++ char response[80]; ++ vc_gencmd(response, sizeof response, "hdmi_adjust_clock %f", adjust); ++ char *p = strchr(response, '='); ++ if (p) ++ m_last_pll_adjust = atof(p+1); ++ CLog::Log(LOGDEBUG, "CRBP::%s(%.4f) = %.4f", __func__, adjust, m_last_pll_adjust); ++ return m_last_pll_adjust; ++} ++ + #endif +diff --git a/xbmc/linux/RBP.h b/xbmc/linux/RBP.h +index 90b04db5405058be2ff20aeaa6af2d2ac651586f..084fba87f49f4c3b33a8dd4a20a626a370a1f371 100644 +--- a/xbmc/linux/RBP.h ++++ b/xbmc/linux/RBP.h +@@ -82,6 +82,8 @@ public: + uint32_t WaitVsync(uint32_t target = ~0U); + void VSyncCallback(); + int GetMBox() { return m_mb; } ++ double AdjustHDMIClock(double adjust); ++ double GetAdjustHDMIClock() { return m_last_pll_adjust; } + + private: + DllBcmHost *m_DllBcmHost; +@@ -107,6 +109,7 @@ private: + int m_x; + int m_y; + bool m_enabled; ++ double m_last_pll_adjust; + public: + void init_cursor(); + void set_cursor(const void *pixels, int width, int height, int hotspot_x, int hotspot_y); + +From d7f88d01cde2cd4b0894463321e1ff0c413d9446 Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Thu, 7 May 2015 15:35:43 +0100 +Subject: [PATCH 38/67] rbp: Support zero copy interface with hevc acceleration + +--- + xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp | 9 +++++++++ + xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp | 5 +++-- + 2 files changed, 12 insertions(+), 2 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp +index 967d5181a42f7cad0fe7b559a8eb958073a8144d..ec2d47d7443ab75af5ad119b8ae04fb072eca677 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp +@@ -306,6 +306,15 @@ bool CDVDVideoCodecFFmpeg::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options + if (tryhw && m_decoderState == STATE_NONE) + { + m_decoderState = STATE_HW_SINGLE; ++#ifdef TARGET_RASPBERRY_PI ++ int num_threads = g_cpuInfo.getCPUCount() * 3 / 2; ++ num_threads = std::max(1, std::min(num_threads, 16)); ++ if (pCodec->id == AV_CODEC_ID_HEVC) ++ num_threads = 8; ++ m_pCodecContext->thread_count = num_threads; ++ m_pCodecContext->thread_safe_callbacks = 0; ++ CLog::Log(LOGDEBUG, "CDVDVideoCodecFFmpeg - open frame threaded with %d threads", num_threads); ++#endif + } + else + { +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp +index 3825e4cca4df7e1a791410b741aecc64823a3c69..e1bb3ab37f68b69e39fb00ab6e4785a430250173 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp +@@ -355,8 +355,9 @@ bool CDecoder::GetPicture(AVCodecContext* avctx, AVFrame* frame, DVDVideoPicture + assert(!picture->MMALBuffer->mmal_buffer); + picture->MMALBuffer->mmal_buffer = mmal_buffer; + +- // need to flush ARM cache so GPU can see it +- gmem->Flush(); ++ // need to flush ARM cache so GPU can see it (HEVC will have already done this) ++ if (avctx->codec_id != AV_CODEC_ID_HEVC) ++ gmem->Flush(); + + if (g_advancedSettings.CanLogComponent(LOGVIDEO)) + CLog::Log(LOGDEBUG, "%s::%s - mmal:%p dts:%.3f pts:%.3f buf:%p gpu:%p", CLASSNAME, __FUNCTION__, picture->MMALBuffer->mmal_buffer, 1e-6*picture->dts, 1e-6*picture->pts, picture->MMALBuffer, gmem); + +From 2b6121f39768cf5d22ffc73a475484519ac2881e Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Sat, 16 May 2015 18:26:04 +0100 +Subject: [PATCH 39/67] ffmpeg: use upstream mvc patches + +--- + ...vcodec-add-h264_mvc-codec-id-and-profiles.patch | 68 ++++++++++++ + ...er-add-support-for-parsing-h264-mvc-NALUs.patch | 116 +++++++++++++++++++++ + tools/depends/target/ffmpeg/Makefile | 7 +- + tools/depends/target/ffmpeg/autobuild.sh | 3 + + ...arsing_of_mvc_slices_in_some_corner_cases.patch | 55 ++++++++++ + 5 files changed, 248 insertions(+), 1 deletion(-) + create mode 100644 tools/depends/target/ffmpeg/0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch + create mode 100644 tools/depends/target/ffmpeg/0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch + create mode 100644 tools/depends/target/ffmpeg/h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch + +diff --git a/tools/depends/target/ffmpeg/0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch b/tools/depends/target/ffmpeg/0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..2e7381fe6538089759ebc7288c0a5d908cd0973c +--- /dev/null ++++ b/tools/depends/target/ffmpeg/0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch +@@ -0,0 +1,68 @@ ++From 4060f15e2d29e268110032d4366382e370e088d0 Mon Sep 17 00:00:00 2001 ++From: popcornmix ++Date: Sun, 26 Jun 2016 20:09:18 +0100 ++Subject: [PATCH] avcodec: add h264_mvc codec id and profiles ++ ++--- ++ libavcodec/avcodec.h | 5 +++++ ++ libavcodec/codec_desc.c | 7 +++++++ ++ libavformat/mpegts.c | 2 +- ++ 3 files changed, 13 insertions(+), 1 deletion(-) ++ ++diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h ++index a1ba217..abd2e91 100644 ++--- a/libavcodec/avcodec.h +++++ b/libavcodec/avcodec.h ++@@ -410,6 +410,8 @@ enum AVCodecID { ++ AV_CODEC_ID_SHEERVIDEO, ++ AV_CODEC_ID_YLC, ++ +++ AV_CODEC_ID_H264_MVC, +++ ++ /* various PCM "codecs" */ ++ AV_CODEC_ID_FIRST_AUDIO = 0x10000, ///< A dummy id pointing at the start of audio codecs ++ AV_CODEC_ID_PCM_S16LE = 0x10000, ++@@ -3195,6 +3197,9 @@ typedef struct AVCodecContext { ++ #define FF_PROFILE_H264_HIGH_444_PREDICTIVE 244 ++ #define FF_PROFILE_H264_HIGH_444_INTRA (244|FF_PROFILE_H264_INTRA) ++ #define FF_PROFILE_H264_CAVLC_444 44 +++#define FF_PROFILE_H264_MULTIVIEW_HIGH 118 +++#define FF_PROFILE_H264_STEREO_HIGH 128 +++#define FF_PROFILE_H264_MULTIVIEW_HIGH_DEPTH 138 ++ ++ #define FF_PROFILE_VC1_SIMPLE 0 ++ #define FF_PROFILE_VC1_MAIN 1 ++diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c ++index 9d94b72..535ebf0 100644 ++--- a/libavcodec/codec_desc.c +++++ b/libavcodec/codec_desc.c ++@@ -1563,6 +1563,13 @@ static const AVCodecDescriptor codec_descriptors[] = { ++ .long_name = NULL_IF_CONFIG_SMALL("YUY2 Lossless Codec"), ++ .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, ++ }, +++ { +++ .id = AV_CODEC_ID_H264_MVC, +++ .type = AVMEDIA_TYPE_VIDEO, +++ .name = "h264_mvc", +++ .long_name = NULL_IF_CONFIG_SMALL("H264 MVC"), +++ .props = AV_CODEC_PROP_LOSSY, +++ }, ++ ++ /* various PCM "codecs" */ ++ { ++diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c ++index b31d233..2767306 100644 ++--- a/libavformat/mpegts.c +++++ b/libavformat/mpegts.c ++@@ -701,7 +701,7 @@ static const StreamType ISO_types[] = { ++ #endif ++ { 0x1b, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_H264 }, ++ { 0x1c, AVMEDIA_TYPE_AUDIO, AV_CODEC_ID_AAC }, ++- { 0x20, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_H264 }, +++ { 0x20, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_H264_MVC }, ++ { 0x21, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_JPEG2000 }, ++ { 0x24, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_HEVC }, ++ { 0x42, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_CAVS }, ++-- ++2.7.4 ++ +diff --git a/tools/depends/target/ffmpeg/0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch b/tools/depends/target/ffmpeg/0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..399e8a95984771e4388bfe4785423ff3f664f89b +--- /dev/null ++++ b/tools/depends/target/ffmpeg/0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch +@@ -0,0 +1,116 @@ ++From 23dd20678a05e1764e5d8d30481cb354a51b6c8b Mon Sep 17 00:00:00 2001 ++From: popcornmix ++Date: Sun, 26 Jun 2016 20:16:03 +0100 ++Subject: [PATCH] h264_parser: add support for parsing h264 mvc NALUs ++ ++--- ++ libavcodec/allcodecs.c | 1 + ++ libavcodec/h264.h | 2 ++ ++ libavcodec/h264_parser.c | 34 ++++++++++++++++++++++++++++++---- ++ 3 files changed, 33 insertions(+), 4 deletions(-) ++ ++diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c ++index 54efaad..02a89c3 100644 ++--- a/libavcodec/allcodecs.c +++++ b/libavcodec/allcodecs.c ++@@ -667,6 +667,7 @@ void avcodec_register_all(void) ++ REGISTER_PARSER(H261, h261); ++ REGISTER_PARSER(H263, h263); ++ REGISTER_PARSER(H264, h264); +++ REGISTER_PARSER(H264_MVC, h264_mvc); ++ REGISTER_PARSER(HEVC, hevc); ++ REGISTER_PARSER(MJPEG, mjpeg); ++ REGISTER_PARSER(MLP, mlp); ++diff --git a/libavcodec/h264.h b/libavcodec/h264.h ++index efe3555..16358aa 100644 ++--- a/libavcodec/h264.h +++++ b/libavcodec/h264.h ++@@ -126,7 +126,9 @@ enum { ++ NAL_END_STREAM = 11, ++ NAL_FILLER_DATA = 12, ++ NAL_SPS_EXT = 13, +++ NAL_SPS_SUBSET = 15, ++ NAL_AUXILIARY_SLICE = 19, +++ NAL_SLICE_EXT = 20, ++ NAL_FF_IGNORE = 0xff0f001, ++ }; ++ ++diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c ++index ce4bab2..082ac17 100644 ++--- a/libavcodec/h264_parser.c +++++ b/libavcodec/h264_parser.c ++@@ -58,6 +58,7 @@ typedef struct H264ParseContext { ++ uint8_t parse_history[6]; ++ int parse_history_count; ++ int parse_last_mb; +++ int is_mvc; ++ } H264ParseContext; ++ ++ ++@@ -105,14 +106,18 @@ static int h264_find_frame_end(H264ParseContext *p, const uint8_t *buf, ++ } else if (state <= 5) { ++ int nalu_type = buf[i] & 0x1F; ++ if (nalu_type == NAL_SEI || nalu_type == NAL_SPS || ++- nalu_type == NAL_PPS || nalu_type == NAL_AUD) { +++ nalu_type == NAL_PPS || nalu_type == NAL_AUD || +++ nalu_type == NAL_SPS_SUBSET) { ++ if (pc->frame_start_found) { ++ i++; ++ goto found; ++ } ++ } else if (nalu_type == NAL_SLICE || nalu_type == NAL_DPA || ++- nalu_type == NAL_IDR_SLICE) { +++ nalu_type == NAL_IDR_SLICE || (p->is_mvc && nalu_type == NAL_SLICE_EXT)) { ++ state += 8; +++ +++ if (nalu_type == NAL_SLICE_EXT) +++ i += 3; // skip mvc extension ++ continue; ++ } ++ state = 7; ++@@ -585,7 +590,8 @@ static int h264_parse(AVCodecParserContext *s, ++ } ++ } ++ ++- parse_nal_units(s, avctx, buf, buf_size); +++ if (!p->is_mvc) +++ parse_nal_units(s, avctx, buf, buf_size); ++ ++ if (avctx->framerate.num) ++ avctx->time_base = av_inv_q(av_mul_q(avctx->framerate, (AVRational){avctx->ticks_per_frame, 1})); ++@@ -622,7 +628,7 @@ static int h264_split(AVCodecContext *avctx, ++ if ((state & 0xFFFFFF00) != 0x100) ++ break; ++ nalu_type = state & 0x1F; ++- if (nalu_type == NAL_SPS) { +++ if (nalu_type == NAL_SPS || nalu_type == NAL_SPS_SUBSET) { ++ has_sps = 1; ++ } else if (nalu_type == NAL_PPS) ++ has_pps = 1; ++@@ -672,3 +678,23 @@ AVCodecParser ff_h264_parser = { ++ .parser_close = h264_close, ++ .split = h264_split, ++ }; +++ +++static av_cold int init_mvc(AVCodecParserContext *s) +++{ +++ H264ParseContext *p = s->priv_data; +++ int ret = init(s); +++ if (ret < 0) +++ return ret; +++ +++ p->is_mvc = 1; +++ return 0; +++} +++ +++AVCodecParser ff_h264_mvc_parser = { +++ .codec_ids = { AV_CODEC_ID_H264_MVC }, +++ .priv_data_size = sizeof(H264ParseContext), +++ .parser_init = init_mvc, +++ .parser_parse = h264_parse, +++ .parser_close = h264_close, +++ .split = h264_split, +++}; ++-- ++2.7.4 ++ +diff --git a/tools/depends/target/ffmpeg/Makefile b/tools/depends/target/ffmpeg/Makefile +index d1d76cb2ce04d5fd056796cc133fceb3f3c246c9..92d9437b36eaa4e655990f7e68634e0bbf4d9605 100644 +--- a/tools/depends/target/ffmpeg/Makefile ++++ b/tools/depends/target/ffmpeg/Makefile +@@ -4,7 +4,9 @@ DEPS= ../../Makefile.include FFMPEG-VERSION Makefile \ + 0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch \ + hevcdsp_ARM_NEON_optimized_epel_functions.patch added_ARM_NEON_optimized_SAO_patches.patch \ + pfcd_hevc_optimisations.patch \ +- 0001-Squashed-commit-of-the-following.patch ++ 0001-Squashed-commit-of-the-following.patch \ ++ 0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch 0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch \ ++ h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch + + # set to "yes" to enable patching + # we don't apply patches until we move to a vanilla ffmpeg tarball +@@ -86,6 +88,9 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + cd $(PLATFORM); patch -p1 < ../added_ARM_NEON_optimized_SAO_patches.patch + cd $(PLATFORM); patch -p1 < ../pfcd_hevc_optimisations.patch + cd $(PLATFORM); patch -p1 < ../0001-Squashed-commit-of-the-following.patch ++ cd $(PLATFORM); patch -p1 < ../0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch ++ cd $(PLATFORM); patch -p1 < ../0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch ++ cd $(PLATFORM); patch -p1 < ../h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch + + cd $(PLATFORM);\ + CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)" \ +diff --git a/tools/depends/target/ffmpeg/autobuild.sh b/tools/depends/target/ffmpeg/autobuild.sh +index 65800dfccc7cbf17124a96d81378b1c3ddf92342..4217ea350aa93e4a7acbe9dd15c9f8699db383b8 100755 +--- a/tools/depends/target/ffmpeg/autobuild.sh ++++ b/tools/depends/target/ffmpeg/autobuild.sh +@@ -133,6 +133,9 @@ patch -p1 < ../../hevcdsp_ARM_NEON_optimized_epel_functions.patch + patch -p1 < ../../added_ARM_NEON_optimized_SAO_patches.patch + patch -p1 < ../../pfcd_hevc_optimisations.patch + patch -p1 < ../../0001-Squashed-commit-of-the-following.patch ++patch -p1 < ../../0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch ++patch -p1 < ../../0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch ++patch -p1 < ../../h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch + + CFLAGS="$CFLAGS" CXXFLAGS="$CXXFLAGS" LDFLAGS="$LDFLAGS" \ + ./configure --prefix=$FFMPEG_PREFIX \ +diff --git a/tools/depends/target/ffmpeg/h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch b/tools/depends/target/ffmpeg/h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..b39480ad098b9cd0882fcf75b96afb1b98686bcc +--- /dev/null ++++ b/tools/depends/target/ffmpeg/h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch +@@ -0,0 +1,55 @@ ++From 12d99a92469e5916de3bc787dce4c13abfdd5e09 Mon Sep 17 00:00:00 2001 ++From: popcornmix ++Date: Sun, 26 Jun 2016 20:20:04 +0100 ++Subject: [PATCH] h264_parser: fix parsing of mvc slices in some corner cases ++ ++--- ++ libavcodec/h264_parser.c | 10 +++++----- ++ 1 file changed, 5 insertions(+), 5 deletions(-) ++ ++diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c ++index 082ac17..b9b0c78 100644 ++--- a/libavcodec/h264_parser.c +++++ b/libavcodec/h264_parser.c ++@@ -59,6 +59,7 @@ typedef struct H264ParseContext { ++ int parse_history_count; ++ int parse_last_mb; ++ int is_mvc; +++ int slice_ext; ++ } H264ParseContext; ++ ++ ++@@ -116,18 +117,17 @@ static int h264_find_frame_end(H264ParseContext *p, const uint8_t *buf, ++ nalu_type == NAL_IDR_SLICE || (p->is_mvc && nalu_type == NAL_SLICE_EXT)) { ++ state += 8; ++ ++- if (nalu_type == NAL_SLICE_EXT) ++- i += 3; // skip mvc extension +++ p->slice_ext = (nalu_type == NAL_SLICE_EXT); ++ continue; ++ } ++ state = 7; ++ } else { ++ p->parse_history[p->parse_history_count++] = buf[i]; ++- if (p->parse_history_count > 5) { +++ if (p->parse_history_count > 8) { ++ unsigned int mb, last_mb = p->parse_last_mb; ++ GetBitContext gb; ++ ++- init_get_bits(&gb, p->parse_history, 8*p->parse_history_count); +++ init_get_bits8(&gb, p->parse_history + 3*p->slice_ext, p->parse_history_count - 3*p->slice_ext); ++ p->parse_history_count = 0; ++ mb= get_ue_golomb_long(&gb); ++ p->parse_last_mb = mb; ++@@ -150,7 +150,7 @@ found: ++ pc->frame_start_found = 0; ++ if (p->is_avc) ++ return next_avc; ++- return i - (state & 5) - 5 * (state > 7); +++ return i - (state & 5) - 8 * (state > 7); ++ } ++ ++ static int scan_mmco_reset(AVCodecParserContext *s, GetBitContext *gb, ++-- ++2.7.4 ++ + +From c2b0929d428aa4eb33d771121448a59e883c9842 Mon Sep 17 00:00:00 2001 +From: Anton Fedchin +Date: Wed, 20 Jan 2016 17:02:16 +0300 +Subject: [PATCH 40/67] [VideoPlayer] DemuxFFmpeg: Properly demuxing h264_mvc + streams. + +--- + .../VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp | 23 +++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +index 84310bbda6440dd10f9aa0711859f4dc0bb1fd1a..16e8e270b5a060bd174f794480a8b178a620d490 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +@@ -25,6 +25,7 @@ + + #include "commons/Exception.h" + #include "cores/FFmpeg.h" ++#include "DVDCodecs/DVDCodecUtils.h" + #include "DVDClock.h" // for DVD_TIME_BASE + #include "DVDDemuxUtils.h" + #include "DVDInputStreams/DVDInputStream.h" +@@ -1249,6 +1250,15 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + } + case AVMEDIA_TYPE_VIDEO: + { ++ if (pStream->codec->codec_id == AV_CODEC_ID_H264_MVC) ++ { ++ // ignore MVC extension streams, they are handled specially ++ stream = new CDemuxStream(); ++ stream->type = STREAM_DATA; ++ stream->disabled = true; ++ pStream->need_parsing = AVSTREAM_PARSE_NONE; ++ break; ++ } + CDemuxStreamVideoFFmpeg* st = new CDemuxStreamVideoFFmpeg(this, pStream); + stream = st; + if(strcmp(m_pFormatContext->iformat->name, "flv") == 0) +@@ -1257,7 +1267,7 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + st->bVFR = false; + + // never trust pts in avi files with h264. +- if (m_bAVI && pStream->codec->codec_id == AV_CODEC_ID_H264) ++ if (m_bAVI && (pStream->codec->codec_id == AV_CODEC_ID_H264 || pStream->codec->codec_id == AV_CODEC_ID_H264_MVC)) + st->bPTSInvalid = true; + + #if defined(AVFORMAT_HAS_STREAM_GET_R_FRAME_RATE) +@@ -1328,6 +1338,17 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + if (av_dict_get(pStream->metadata, "title", NULL, 0)) + st->m_description = av_dict_get(pStream->metadata, "title", NULL, 0)->value; + ++ if (pStream->codec->codec_id == AV_CODEC_ID_H264) ++ { ++ if (CDVDCodecUtils::IsH264AnnexB(m_pFormatContext->iformat->name, pStream)) ++ { ++ // TODO ++ } ++ else if (CDVDCodecUtils::ProcessH264MVCExtradata(pStream->codec->extradata, pStream->codec->extradata_size)) ++ { ++ pStream->codec->codec_tag = MKTAG('M', 'V', 'C', '1'); ++ } ++ } + break; + } + case AVMEDIA_TYPE_DATA: + +From 052ba44b0a0bd4736bc330c2f86e34cb8424ba60 Mon Sep 17 00:00:00 2001 +From: Anton Fedchin +Date: Thu, 25 Feb 2016 11:21:25 +0300 +Subject: [PATCH 41/67] [Stereo3D] Added mvc modes. + +--- + xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp | 4 ++++ + xbmc/guilib/StereoscopicsManager.cpp | 6 +++++- + 2 files changed, 9 insertions(+), 1 deletion(-) + +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp +index 809766a64b0289ca0a7f69cf68dd7651c249d161..04ceed1504c2d81aaa165d232e128c410b9fdc2c 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp +@@ -104,6 +104,8 @@ namespace RenderManager { + convert["anaglyph_yellow_blue"] = 0u; + convert["block_lr"] = 0u; + convert["block_rl"] = 0u; ++ convert["mvc_lr"] = 0u; ++ convert["mvc_rl"] = 0u; + } + return convert[mode]; + } +@@ -125,6 +127,8 @@ namespace RenderManager { + convert["col_interleaved_lr"] = "col_interleaved_rl"; + convert["block_lr"] = "block_lr"; + convert["block_rl"] = "block_rl"; ++ convert["mvc_lr"] = "mvc_rl"; ++ convert["mvc_rl"] = "mvc_lr"; + } + std::string res = convert[mode]; + if(res.empty()) +diff --git a/xbmc/guilib/StereoscopicsManager.cpp b/xbmc/guilib/StereoscopicsManager.cpp +index 1443acaf0f25df458ae49766e13dd0323454f2eb..6eb0752994bc5f8c47efbbf211120af0a0720d0c 100644 +--- a/xbmc/guilib/StereoscopicsManager.cpp ++++ b/xbmc/guilib/StereoscopicsManager.cpp +@@ -72,6 +72,10 @@ static const struct StereoModeMap VideoModeToGuiModeMap[] = + { "anaglyph_yellow_blue", RENDER_STEREO_MODE_ANAGLYPH_YELLOW_BLUE }, + { "block_lr", RENDER_STEREO_MODE_OFF }, // unsupported + { "block_rl", RENDER_STEREO_MODE_OFF }, // unsupported ++ { "mvc_lr", RENDER_STEREO_MODE_HARDWAREBASED }, ++ { "mvc_rl", RENDER_STEREO_MODE_HARDWAREBASED }, ++ { "mvc_lr", RENDER_STEREO_MODE_SPLIT_HORIZONTAL }, // fallback ++ { "mvc_rl", RENDER_STEREO_MODE_SPLIT_HORIZONTAL }, // fallback + {} + }; + +@@ -310,7 +314,7 @@ int CStereoscopicsManager::ConvertVideoToGuiStereoMode(const std::string &mode) + size_t i = 0; + while (VideoModeToGuiModeMap[i].name) + { +- if (mode == VideoModeToGuiModeMap[i].name) ++ if (mode == VideoModeToGuiModeMap[i].name && g_Windowing.SupportsStereo(VideoModeToGuiModeMap[i].mode)) + return VideoModeToGuiModeMap[i].mode; + i++; + } + +From 0bcb7f56f0fa79c4d7af4c64e0b931a997045d72 Mon Sep 17 00:00:00 2001 +From: Anton Fedchin +Date: Sat, 23 Jan 2016 10:21:32 +0300 +Subject: [PATCH 42/67] [VideoPlayer] Fix possible wrong aspect. + +--- + xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp b/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp +index f6d1b8572c6a4a8b4a193ebfc9d36d85ccd2d819..6b97183835ce7d614e8814cb065ac168947f5ce1 100644 +--- a/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp ++++ b/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp +@@ -182,7 +182,7 @@ void CVideoPlayerVideo::OpenStream(CDVDStreamInfo &hint, CDVDVideoCodec* codec) + } + + // use aspect in stream if available +- if(hint.forced_aspect) ++ if (hint.forced_aspect && !std::isnan(hint.aspect)) + m_fForcedAspectRatio = hint.aspect; + else + m_fForcedAspectRatio = 0.0; + +From b409948c86ffdb3b000a82333be9c4ddeb45ddd7 Mon Sep 17 00:00:00 2001 +From: Anton Fedchin +Date: Fri, 22 Jan 2016 18:18:33 +0300 +Subject: [PATCH 43/67] [VideoPlayer] DemuxFFmpeg: ssif remux + +--- + project/VS2010Express/XBMC.vcxproj | 2 + + project/VS2010Express/XBMC.vcxproj.filters | 8 +- + xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt | 2 + + .../VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp | 47 ++++++- + .../cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.h | 2 + + .../VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp | 156 +++++++++++++++++++++ + .../VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h | 49 +++++++ + xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in | 1 + + xbmc/settings/AdvancedSettings.cpp | 2 +- + 9 files changed, 260 insertions(+), 9 deletions(-) + create mode 100644 xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp + create mode 100644 xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h + +diff --git a/project/VS2010Express/XBMC.vcxproj b/project/VS2010Express/XBMC.vcxproj +index 164b608f77e4848bda558daf44dede1fc18a8fb4..601c5848ab9bda32e90fced986cf61dad38800bf 100644 +--- a/project/VS2010Express/XBMC.vcxproj ++++ b/project/VS2010Express/XBMC.vcxproj +@@ -295,6 +295,7 @@ copy "..\Win32BuildSetup\dependencies\python27.dll" "$(TargetDir)" + + + ++ + + + +@@ -1069,6 +1070,7 @@ copy "..\Win32BuildSetup\dependencies\python27.dll" "$(TargetDir)" + + + ++ + + + +diff --git a/project/VS2010Express/XBMC.vcxproj.filters b/project/VS2010Express/XBMC.vcxproj.filters +index b3c53788819764a400ea53e12440ba229735819c..b2d5230fdcd32f6db50e580f55cd7a63d4d19247 100644 +--- a/project/VS2010Express/XBMC.vcxproj.filters ++++ b/project/VS2010Express/XBMC.vcxproj.filters +@@ -3452,6 +3452,9 @@ + + dialogs + ++ ++ cores\VideoPlayer\DVDDemuxers ++ + + + +@@ -6704,6 +6707,9 @@ + + cores\AudioEngine\Engines\ActiveAE\AudioDSPAddons + ++ ++ cores\VideoPlayer\DVDDemuxers ++ + + + +@@ -6790,4 +6796,4 @@ + shaders + + +- +\ No newline at end of file ++ +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt b/xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt +index 7d254f7650377485b909f26189d126455d49569a..65b369054c4ea329649a51f20f448394c70b110d 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt +@@ -5,6 +5,7 @@ set(SOURCES DemuxMultiSource.cpp + DVDDemuxCDDA.cpp + DVDDemuxClient.cpp + DVDDemuxFFmpeg.cpp ++ DVDDemuxStreamSSIF.cpp + DVDDemuxUtils.cpp + DVDDemuxVobsub.cpp + DVDFactoryDemuxer.cpp) +@@ -16,6 +17,7 @@ set(HEADERS DemuxMultiSource.h + DVDDemuxCDDA.h + DVDDemuxClient.h + DVDDemuxFFmpeg.h ++ DVDDemuxStreamSSIF.h + DVDDemuxPacket.h + DVDDemuxUtils.h + DVDDemuxVobsub.h +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +index 16e8e270b5a060bd174f794480a8b178a620d490..4490b16318e1c54822cdbbf5fa6344d66c2fdbdd 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +@@ -164,6 +164,7 @@ CDVDDemuxFFmpeg::CDVDDemuxFFmpeg() : CDVDDemux() + m_currentPts = DVD_NOPTS_VALUE; + m_bMatroska = false; + m_bAVI = false; ++ m_pSSIF = nullptr; + m_speed = DVD_PLAYSPEED_NORMAL; + m_program = UINT_MAX; + m_pkt.result = -1; +@@ -537,6 +538,8 @@ void CDVDDemuxFFmpeg::Dispose() + m_pkt.result = -1; + av_packet_unref(&m_pkt.pkt); + ++ SAFE_DELETE(m_pSSIF); ++ + if (m_pFormatContext) + { + for (unsigned int i = 0; i < m_pFormatContext->nb_streams; i++) +@@ -587,6 +590,9 @@ void CDVDDemuxFFmpeg::Flush() + + m_displayTime = 0; + m_dtsAtDisplayTime = DVD_NOPTS_VALUE; ++ ++ if (m_pSSIF) ++ m_pSSIF->Flush(); + } + + void CDVDDemuxFFmpeg::Abort() +@@ -808,7 +814,9 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + { + Flush(); + } +- else if (IsProgramChange()) ++ // libavformat is confused by the interleaved SSIF. ++ // Disable program management for those ++ else if (!m_pSSIF && IsProgramChange()) + { + // update streams + CreateStreams(m_program); +@@ -836,6 +844,9 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + + m_pkt.result = -1; + av_packet_unref(&m_pkt.pkt); ++ ++ if (m_pSSIF) ++ m_pSSIF->Flush(); + } + else + { +@@ -845,7 +856,9 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + + if (IsVideoReady()) + { +- if (m_program != UINT_MAX) ++ // libavformat is confused by the interleaved SSIF. ++ // Disable program management for those ++ if (!m_pSSIF && m_program != UINT_MAX ) + { + /* check so packet belongs to selected program */ + for (unsigned int i = 0; i < m_pFormatContext->programs[m_program]->nb_stream_indexes; i++) +@@ -994,6 +1007,15 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + stream = AddStream(pPacket->iStreamId); + } + } ++ if (stream && m_pSSIF) ++ { ++ if (stream->type == STREAM_VIDEO || ++ stream->type == STREAM_DATA) ++ pPacket = m_pSSIF->AddPacket(pPacket); ++ ++ if (stream->type == STREAM_DATA && stream->codec == AV_CODEC_ID_H264_MVC && pPacket->iSize) ++ stream = GetStream(pPacket->iStreamId); ++ } + if (!stream) + { + CLog::Log(LOGERROR, "CDVDDemuxFFmpeg::AddStream - internal error, stream is null"); +@@ -1018,6 +1040,9 @@ bool CDVDDemuxFFmpeg::SeekTime(int time, bool backwords, double *startpts) + m_pkt.result = -1; + av_packet_unref(&m_pkt.pkt); + ++ if (m_pSSIF) ++ m_pSSIF->Flush(); ++ + CDVDInputStream::IPosTime* ist = m_pInput->GetIPosTime(); + if (ist) + { +@@ -1085,6 +1110,9 @@ bool CDVDDemuxFFmpeg::SeekByte(int64_t pos) + m_pkt.result = -1; + av_packet_unref(&m_pkt.pkt); + ++ if (m_pSSIF) ++ m_pSSIF->Flush(); ++ + return (ret >= 0); + } + +@@ -1252,11 +1280,12 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + { + if (pStream->codec->codec_id == AV_CODEC_ID_H264_MVC) + { +- // ignore MVC extension streams, they are handled specially ++ m_pSSIF = new CDVDDemuxStreamSSIF(); ++ m_pSSIF->SetMVCStreamId(streamIdx); ++ + stream = new CDemuxStream(); + stream->type = STREAM_DATA; +- stream->disabled = true; +- pStream->need_parsing = AVSTREAM_PARSE_NONE; ++ pStream->codec->codec_type = AVMEDIA_TYPE_DATA; + break; + } + CDemuxStreamVideoFFmpeg* st = new CDemuxStreamVideoFFmpeg(this, pStream); +@@ -1342,7 +1371,11 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + { + if (CDVDCodecUtils::IsH264AnnexB(m_pFormatContext->iformat->name, pStream)) + { +- // TODO ++ if (m_pSSIF) ++ { ++ m_pSSIF->SetH264StreamId(streamIdx); ++ pStream->codec->codec_tag = MKTAG('A', 'M', 'V', 'C'); ++ } + } + else if (CDVDCodecUtils::ProcessH264MVCExtradata(pStream->codec->extradata, pStream->codec->extradata_size)) + { +@@ -1435,7 +1468,7 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + if (langTag) + strncpy(stream->language, langTag->value, 3); + +- if( stream->type != STREAM_NONE && pStream->codec->extradata && pStream->codec->extradata_size > 0 ) ++ if (stream->type != STREAM_NONE && pStream->codec->extradata && pStream->codec->extradata_size > 0) + { + stream->ExtraSize = pStream->codec->extradata_size; + stream->ExtraData = new uint8_t[pStream->codec->extradata_size]; +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.h b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.h +index 60cfc6fe48df93210d6bb5a12f85af571dfa1f72..dae871cff339e085cf2aa6d8d921d20b0db03132 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.h ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.h +@@ -21,6 +21,7 @@ + */ + + #include "DVDDemux.h" ++#include "DVDDemuxStreamSSIF.h" + #include "threads/CriticalSection.h" + #include "threads/SystemClock.h" + #include +@@ -152,6 +153,7 @@ protected: + double m_currentPts; // used for stream length estimation + bool m_bMatroska; + bool m_bAVI; ++ CDVDDemuxStreamSSIF* m_pSSIF; + int m_speed; + unsigned m_program; + XbmcThreads::EndTime m_timeout; +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp +new file mode 100644 +index 0000000000000000000000000000000000000000..e99352a90f348a95673ef3442d3f6cb020cd57d4 +--- /dev/null ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp +@@ -0,0 +1,156 @@ ++/* ++* Copyright (C) 2005-2013 Team XBMC ++* http://xbmc.org ++* ++* This Program is free software; you can redistribute it and/or modify ++* it under the terms of the GNU General Public License as published by ++* the Free Software Foundation; either version 2, or (at your option) ++* any later version. ++* ++* This Program is distributed in the hope that it will be useful, ++* but WITHOUT ANY WARRANTY; without even the implied warranty of ++* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++* GNU General Public License for more details. ++* ++* You should have received a copy of the GNU General Public License ++* along with XBMC; see the file COPYING. If not, see ++* . ++* ++*/ ++ ++#include "DVDDemuxStreamSSIF.h" ++#include "DVDClock.h" ++#include "DVDDemuxUtils.h" ++#include "utils/log.h" ++ ++//#define DEBUG_VERBOSE ++ ++DemuxPacket* CDVDDemuxStreamSSIF::AddPacket(DemuxPacket* &srcPkt) ++{ ++ if (srcPkt->iStreamId != m_h264StreamId && ++ srcPkt->iStreamId != m_mvcStreamId) ++ return srcPkt; ++ ++ if (srcPkt->iStreamId == m_h264StreamId) ++ { ++ m_H264queue.push(srcPkt); ++ } ++ else if (srcPkt->iStreamId == m_mvcStreamId) ++ { ++ m_MVCqueue.push(srcPkt); ++ } ++ ++ return GetMVCPacket(); ++} ++ ++void CDVDDemuxStreamSSIF::Flush() ++{ ++ while (!m_H264queue.empty()) ++ { ++ CDVDDemuxUtils::FreeDemuxPacket(m_H264queue.front()); ++ m_H264queue.pop(); ++ } ++ while (!m_MVCqueue.empty()) ++ { ++ CDVDDemuxUtils::FreeDemuxPacket(m_MVCqueue.front()); ++ m_MVCqueue.pop(); ++ } ++} ++ ++DemuxPacket* CDVDDemuxStreamSSIF::MergePacket(DemuxPacket* &srcPkt, DemuxPacket* &appendPkt) ++{ ++ DemuxPacket* newpkt = NULL; ++ newpkt = CDVDDemuxUtils::AllocateDemuxPacket(srcPkt->iSize + appendPkt->iSize); ++ newpkt->iSize = srcPkt->iSize + appendPkt->iSize; ++ ++ newpkt->pts = srcPkt->pts; ++ newpkt->dts = srcPkt->dts; ++ newpkt->duration = srcPkt->duration; ++ newpkt->iGroupId = srcPkt->iGroupId; ++ newpkt->iStreamId = srcPkt->iStreamId; ++ memcpy(newpkt->pData, srcPkt->pData, srcPkt->iSize); ++ memcpy(newpkt->pData + srcPkt->iSize, appendPkt->pData, appendPkt->iSize); ++ ++ CDVDDemuxUtils::FreeDemuxPacket(srcPkt); ++ srcPkt = NULL; ++ CDVDDemuxUtils::FreeDemuxPacket(appendPkt); ++ appendPkt = NULL; ++ ++ return newpkt; ++} ++ ++DemuxPacket* CDVDDemuxStreamSSIF::GetMVCPacket() ++{ ++ // Here, we recreate a h264 MVC packet from the base one + buffered MVC NALU's ++ while (!m_H264queue.empty() && !m_MVCqueue.empty()) ++ { ++ DemuxPacket* h264pkt = m_H264queue.front(); ++ double tsH264 = (h264pkt->dts != DVD_NOPTS_VALUE ? h264pkt->dts : h264pkt->pts); ++ DemuxPacket* mvcpkt = m_MVCqueue.front(); ++ double tsMVC = (mvcpkt->dts != DVD_NOPTS_VALUE ? mvcpkt->dts : mvcpkt->pts); ++ ++ if (tsH264 == tsMVC) ++ { ++ m_H264queue.pop(); ++ m_MVCqueue.pop(); ++ ++ while (!m_H264queue.empty()) ++ { ++ DemuxPacket* pkt = m_H264queue.front(); ++ double ts = (pkt->dts != DVD_NOPTS_VALUE ? pkt->dts : pkt->pts); ++ if (ts == DVD_NOPTS_VALUE) ++ { ++#if defined(DEBUG_VERBOSE) ++ CLog::Log(LOGDEBUG, ">>> MVC merge h264 fragment: %6d+%6d, pts(%.3f/%.3f) dts(%.3f/%.3f)", h264pkt->iSize, pkt->iSize, h264pkt->pts*1e-6, pkt->pts*1e-6, h264pkt->dts*1e-6, pkt->dts*1e-6); ++#endif ++ h264pkt = MergePacket(h264pkt, pkt); ++ m_H264queue.pop(); ++ } ++ else ++ break; ++ } ++ while (!m_MVCqueue.empty()) ++ { ++ DemuxPacket* pkt = m_MVCqueue.front(); ++ double ts = (pkt->dts != DVD_NOPTS_VALUE ? pkt->dts : pkt->pts); ++ if (ts == DVD_NOPTS_VALUE) ++ { ++#if defined(DEBUG_VERBOSE) ++ CLog::Log(LOGDEBUG, ">>> MVC merge mvc fragment: %6d+%6d, pts(%.3f/%.3f) dts(%.3f/%.3f)", mvcpkt->iSize, pkt->iSize, mvcpkt->pts*1e-6, pkt->pts*1e-6, mvcpkt->dts*1e-6, pkt->dts*1e-6); ++#endif ++ mvcpkt = MergePacket(mvcpkt, pkt); ++ m_MVCqueue.pop(); ++ } ++ else ++ break; ++ } ++ ++#if defined(DEBUG_VERBOSE) ++ CLog::Log(LOGDEBUG, ">>> MVC merge packet: %6d+%6d, pts(%.3f/%.3f) dts(%.3f/%.3f)", h264pkt->iSize, mvcpkt->iSize, h264pkt->pts*1e-6, mvcpkt->pts*1e-6, h264pkt->dts*1e-6, mvcpkt->dts*1e-6); ++#endif ++ return MergePacket(h264pkt, mvcpkt); ++ } ++ else if (tsH264 > tsMVC) ++ { ++#if defined(DEBUG_VERBOSE) ++ CLog::Log(LOGDEBUG, ">>> MVC discard mvc: %6d, pts(%.3f) dts(%.3f)", mvcpkt->iSize, mvcpkt->pts*1e-6, mvcpkt->dts*1e-6); ++#endif ++ CDVDDemuxUtils::FreeDemuxPacket(mvcpkt); ++ m_MVCqueue.pop(); ++ } ++ else ++ { ++#if defined(DEBUG_VERBOSE) ++ CLog::Log(LOGDEBUG, ">>> MVC discard h264: %6d, pts(%.3f) dts(%.3f)", h264pkt->iSize, h264pkt->pts*1e-6, h264pkt->dts*1e-6); ++#endif ++ CDVDDemuxUtils::FreeDemuxPacket(h264pkt); ++ m_H264queue.pop(); ++ } ++ } ++ ++#if defined(DEBUG_VERBOSE) ++ CLog::Log(LOGDEBUG, ">>> MVC waiting. MVC(%d) H264(%d)", m_MVCqueue.size(), m_H264queue.size()); ++#endif ++ ++ return CDVDDemuxUtils::AllocateDemuxPacket(0); ++} +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h +new file mode 100644 +index 0000000000000000000000000000000000000000..8412627a3ea13f59bd2c96c23bd386e4b5b2658e +--- /dev/null ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h +@@ -0,0 +1,49 @@ ++#pragma once ++ ++/* ++* Copyright (C) 2005-2013 Team XBMC ++* http://xbmc.org ++* ++* This Program is free software; you can redistribute it and/or modify ++* it under the terms of the GNU General Public License as published by ++* the Free Software Foundation; either version 2, or (at your option) ++* any later version. ++* ++* This Program is distributed in the hope that it will be useful, ++* but WITHOUT ANY WARRANTY; without even the implied warranty of ++* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++* GNU General Public License for more details. ++* ++* You should have received a copy of the GNU General Public License ++* along with XBMC; see the file COPYING. If not, see ++* . ++* ++*/ ++ ++#include "DVDDemuxPacket.h" ++#include ++ ++extern "C" { ++#include "libavformat/avformat.h" ++} ++ ++class CDVDDemuxStreamSSIF ++{ ++public: ++ CDVDDemuxStreamSSIF() {}; ++ ~CDVDDemuxStreamSSIF() { Flush(); } ++ ++ DemuxPacket* AddPacket(DemuxPacket* &scrPkt); ++ void Flush(); ++ void SetH264StreamId(int id) { m_h264StreamId = id; }; ++ void SetMVCStreamId(int id) { m_mvcStreamId = id; }; ++ ++private: ++ DemuxPacket* GetMVCPacket(); ++ DemuxPacket* MergePacket(DemuxPacket* &srcPkt, DemuxPacket* &appendPkt); ++ ++ std::queue m_H264queue; ++ std::queue m_MVCqueue; ++ int m_h264StreamId = 0; ++ int m_mvcStreamId = 0; ++}; +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in b/xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in +index e4f8aed0af96fe0dceec4d8517087742f2c7df81..f3b717ddabb4729fe0db5ebab5a7913b8fe8297c 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in +@@ -10,6 +10,7 @@ SRCS += DVDDemuxUtils.cpp + SRCS += DVDDemuxVobsub.cpp + SRCS += DVDDemuxCC.cpp + SRCS += DVDFactoryDemuxer.cpp ++SRCS += DVDDemuxStreamSSIF.cpp + + LIB = DVDDemuxers.a + +diff --git a/xbmc/settings/AdvancedSettings.cpp b/xbmc/settings/AdvancedSettings.cpp +index ae21da29314ae8faa35129a79e62e82b55fbc306..8426b6c3f8f6af274e2990c8da323e4064db9b65 100644 +--- a/xbmc/settings/AdvancedSettings.cpp ++++ b/xbmc/settings/AdvancedSettings.cpp +@@ -392,7 +392,7 @@ void CAdvancedSettings::Initialize() + + m_pictureExtensions = ".png|.jpg|.jpeg|.bmp|.gif|.ico|.tif|.tiff|.tga|.pcx|.cbz|.zip|.cbr|.rar|.rss|.webp|.jp2|.apng"; + m_musicExtensions = ".nsv|.m4a|.flac|.aac|.strm|.pls|.rm|.rma|.mpa|.wav|.wma|.ogg|.mp3|.mp2|.m3u|.gdm|.imf|.m15|.sfx|.uni|.ac3|.dts|.cue|.aif|.aiff|.wpl|.ape|.mac|.mpc|.mp+|.mpp|.shn|.zip|.rar|.wv|.dsp|.xsp|.xwav|.waa|.wvs|.wam|.gcm|.idsp|.mpdsp|.mss|.spt|.rsd|.sap|.cmc|.cmr|.dmc|.mpt|.mpd|.rmt|.tmc|.tm8|.tm2|.oga|.url|.pxml|.tta|.rss|.wtv|.mka|.tak|.opus|.dff|.dsf"; +- m_videoExtensions = ".m4v|.3g2|.3gp|.nsv|.tp|.ts|.ty|.strm|.pls|.rm|.rmvb|.mpd|.m3u|.m3u8|.ifo|.mov|.qt|.divx|.xvid|.bivx|.vob|.nrg|.img|.iso|.pva|.wmv|.asf|.asx|.ogm|.m2v|.avi|.bin|.dat|.mpg|.mpeg|.mp4|.mkv|.mk3d|.avc|.vp3|.svq3|.nuv|.viv|.dv|.fli|.flv|.rar|.001|.wpl|.zip|.vdr|.dvr-ms|.xsp|.mts|.m2t|.m2ts|.evo|.ogv|.sdp|.avs|.rec|.url|.pxml|.vc1|.h264|.rcv|.rss|.mpls|.webm|.bdmv|.wtv"; ++ m_videoExtensions = ".m4v|.3g2|.3gp|.nsv|.tp|.ts|.ty|.strm|.pls|.rm|.rmvb|.mpd|.m3u|.m3u8|.ifo|.mov|.qt|.divx|.xvid|.bivx|.vob|.nrg|.img|.iso|.pva|.wmv|.asf|.asx|.ogm|.m2v|.avi|.bin|.dat|.mpg|.mpeg|.mp4|.mkv|.mk3d|.avc|.vp3|.svq3|.nuv|.viv|.dv|.fli|.flv|.rar|.001|.wpl|.zip|.vdr|.dvr-ms|.xsp|.mts|.m2t|.m2ts|.evo|.ogv|.sdp|.avs|.rec|.url|.pxml|.vc1|.h264|.rcv|.rss|.mpls|.webm|.bdmv|.wtv|.ssif"; + m_subtitlesExtensions = ".utf|.utf8|.utf-8|.sub|.srt|.smi|.rt|.txt|.ssa|.text|.ssa|.aqt|.jss|.ass|.idx|.ifo|.rar|.zip"; + m_discStubExtensions = ".disc"; + // internal music extensions + +From ac2167deb4a7e8408903ca2aab446b3d0d954fa7 Mon Sep 17 00:00:00 2001 +From: Anton Fedchin +Date: Tue, 23 Feb 2016 16:01:08 +0300 +Subject: [PATCH 44/67] [libbluray] bump libbluray to 0.9.2-mvc. + +--- + project/BuildDependencies/scripts/0_package.list | 2 +- + xbmc/DllPaths_win32.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/project/BuildDependencies/scripts/0_package.list b/project/BuildDependencies/scripts/0_package.list +index 71024bfb7da48ddb033b159f83037319176229b4..2565d1d08f6591955266fcca3f1a8031db4379e4 100644 +--- a/project/BuildDependencies/scripts/0_package.list ++++ b/project/BuildDependencies/scripts/0_package.list +@@ -16,7 +16,7 @@ freetype-2.4.6-win32-3.7z + giflib-5.1.4-win32-vc140.7z + jsonschemabuilder-1.0.0-win32-3.7z + libass-0.12.1-win32.7z +-libbluray-0.8.1-win32-vc120.7z ++libbluray-0.9.2-mvc-win32-vc120.7z + libcdio-0.83-win32-2.7z + libcec-3.0.0-win32-2.7z + libexpat_2.0.1-win32.7z +diff --git a/xbmc/DllPaths_win32.h b/xbmc/DllPaths_win32.h +index 3748589f39b1f83f1e23e9eb4f64eddcf61cb030..ff34ff541049ad7d2fa5472c49e6412e0d68056b 100644 +--- a/xbmc/DllPaths_win32.h ++++ b/xbmc/DllPaths_win32.h +@@ -35,7 +35,7 @@ + #define DLL_PATH_LIBDVDNAV "special://xbmcbin/system/players/VideoPlayer/libdvdnav.dll" + + /* libbluray */ +-#define DLL_PATH_LIBBLURAY "special://xbmcbin/system/players/dvdplayer/libbluray.dll" ++#define DLL_PATH_LIBBLURAY "special://xbmcbin/system/players/VideoPlayer/libbluray.dll" + + #endif + + +From 6bb5fcf3d003296bbe290c171577bb65ba6ea04d Mon Sep 17 00:00:00 2001 +From: Anton Fedchin +Date: Tue, 23 Feb 2016 16:02:46 +0300 +Subject: [PATCH 45/67] [3DBD] Added support of 3D-BluRay playback. + +--- + lib/DllLibbluray.h | 8 + + project/VS2010Express/XBMC.vcxproj | 2 + + project/VS2010Express/XBMC.vcxproj.filters | 6 + + xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt | 2 + + .../VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp | 57 ++++- + xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.cpp | 262 +++++++++++++++++++++ + xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.h | 57 +++++ + .../VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp | 40 +++- + .../VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h | 12 +- + xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in | 1 + + .../DVDInputStreams/DVDInputStreamBluray.cpp | 159 +++++++++++-- + .../DVDInputStreams/DVDInputStreamBluray.h | 20 ++ + 12 files changed, 591 insertions(+), 35 deletions(-) + create mode 100644 xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.cpp + create mode 100644 xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.h + +diff --git a/lib/DllLibbluray.h b/lib/DllLibbluray.h +index f5a337fe19beff472557c97ff7a203ad30a912b2..03f93391265e164837c2a17a8fe6d7da41c2f13e 100644 +--- a/lib/DllLibbluray.h ++++ b/lib/DllLibbluray.h +@@ -31,6 +31,8 @@ extern "C" + #include + #include + #include ++#include ++#include + } + + class DllLibblurayInterface +@@ -80,6 +82,8 @@ public: + #endif + virtual int bd_menu_call (BLURAY *bd, int64_t pts)=0; + virtual int bd_mouse_select (BLURAY *bd, int64_t pts, uint16_t x, uint16_t y)=0; ++ virtual MPLS_PL* bd_get_title_mpls (BLURAY *bd) = 0; ++ virtual int bd_get_clip_infos (BLURAY *bd, unsigned clip, uint64_t *clip_start_time, uint64_t *stream_start_time, uint64_t *pos, uint64_t *duration) = 0; + }; + + class DllLibbluray : public DllDynamic, DllLibblurayInterface +@@ -128,6 +132,8 @@ class DllLibbluray : public DllDynamic, DllLibblurayInterface + #endif + DEFINE_METHOD2(int, bd_menu_call, (BLURAY *p1, int64_t p2)) + DEFINE_METHOD4(int, bd_mouse_select, (BLURAY *p1, int64_t p2, uint16_t p3, uint16_t p4)) ++ DEFINE_METHOD1(MPLS_PL*, bd_get_title_mpls, (BLURAY *p1)) ++ DEFINE_METHOD6(int, bd_get_clip_infos, (BLURAY *p1, unsigned p2, uint64_t *p3, uint64_t *p4, uint64_t *p5, uint64_t *p6)) + + BEGIN_METHOD_RESOLVE() + RESOLVE_METHOD(bd_get_titles) +@@ -172,6 +178,8 @@ class DllLibbluray : public DllDynamic, DllLibblurayInterface + #endif + RESOLVE_METHOD(bd_menu_call) + RESOLVE_METHOD(bd_mouse_select) ++ RESOLVE_METHOD(bd_get_title_mpls) ++ RESOLVE_METHOD(bd_get_clip_infos) + END_METHOD_RESOLVE() + + public: +diff --git a/project/VS2010Express/XBMC.vcxproj b/project/VS2010Express/XBMC.vcxproj +index 601c5848ab9bda32e90fced986cf61dad38800bf..189b698f57d1e2bbb50dd7541136309c59a1fb84 100644 +--- a/project/VS2010Express/XBMC.vcxproj ++++ b/project/VS2010Express/XBMC.vcxproj +@@ -295,6 +295,7 @@ copy "..\Win32BuildSetup\dependencies\python27.dll" "$(TargetDir)" + + + ++ + + + +@@ -1070,6 +1071,7 @@ copy "..\Win32BuildSetup\dependencies\python27.dll" "$(TargetDir)" + + + ++ + + + +diff --git a/project/VS2010Express/XBMC.vcxproj.filters b/project/VS2010Express/XBMC.vcxproj.filters +index b2d5230fdcd32f6db50e580f55cd7a63d4d19247..65d8e075716c05669c1e5665de9e3ba0ac1188ea 100644 +--- a/project/VS2010Express/XBMC.vcxproj.filters ++++ b/project/VS2010Express/XBMC.vcxproj.filters +@@ -3455,6 +3455,9 @@ + + cores\VideoPlayer\DVDDemuxers + ++ ++ cores\VideoPlayer\DVDDemuxers ++ + + + +@@ -6710,6 +6713,9 @@ + + cores\VideoPlayer\DVDDemuxers + ++ ++ cores\VideoPlayer\DVDDemuxers ++ + + + +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt b/xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt +index 65b369054c4ea329649a51f20f448394c70b110d..2706bcadc177a4f8f9c12c3be7976f7a0f81fc8f 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt +@@ -5,6 +5,7 @@ set(SOURCES DemuxMultiSource.cpp + DVDDemuxCDDA.cpp + DVDDemuxClient.cpp + DVDDemuxFFmpeg.cpp ++ DVDDemuxMVC.cpp + DVDDemuxStreamSSIF.cpp + DVDDemuxUtils.cpp + DVDDemuxVobsub.cpp +@@ -17,6 +18,7 @@ set(HEADERS DemuxMultiSource.h + DVDDemuxCDDA.h + DVDDemuxClient.h + DVDDemuxFFmpeg.h ++ DVDDemuxMVC.h + DVDDemuxStreamSSIF.h + DVDDemuxPacket.h + DVDDemuxUtils.h +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +index 4490b16318e1c54822cdbbf5fa6344d66c2fdbdd..54e4d0b66680a08c1e4c1be343fabe4371aec6af 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +@@ -27,6 +27,7 @@ + #include "cores/FFmpeg.h" + #include "DVDCodecs/DVDCodecUtils.h" + #include "DVDClock.h" // for DVD_TIME_BASE ++#include "DVDDemuxMVC.h" + #include "DVDDemuxUtils.h" + #include "DVDInputStreams/DVDInputStream.h" + #include "DVDInputStreams/DVDInputStreamFFmpeg.h" +@@ -497,6 +498,16 @@ bool CDVDDemuxFFmpeg::Open(CDVDInputStream* pInput, bool streaminfo, bool filein + + UpdateCurrentPTS(); + ++ if (!fileinfo && m_pInput->IsStreamType(DVDSTREAM_TYPE_BLURAY)) ++ { ++ CDVDInputStreamBluray *bluRay = static_cast(m_pInput); ++ if (bluRay->HasMVC()) ++ { ++ SAFE_DELETE(m_pSSIF); ++ m_pSSIF = new CDVDDemuxStreamSSIF(); ++ m_pSSIF->SetBluRay(bluRay); ++ } ++ } + // in case of mpegts and we have not seen pat/pmt, defer creation of streams + if (!skipCreateStreams || m_pFormatContext->nb_programs > 0) + { +@@ -814,9 +825,7 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + { + Flush(); + } +- // libavformat is confused by the interleaved SSIF. +- // Disable program management for those +- else if (!m_pSSIF && IsProgramChange()) ++ else if (IsProgramChange()) + { + // update streams + CreateStreams(m_program); +@@ -857,8 +866,7 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + if (IsVideoReady()) + { + // libavformat is confused by the interleaved SSIF. +- // Disable program management for those +- if (!m_pSSIF && m_program != UINT_MAX ) ++ if ((!m_pSSIF || m_pSSIF->IsBluRay()) && m_program != UINT_MAX) + { + /* check so packet belongs to selected program */ + for (unsigned int i = 0; i < m_pFormatContext->programs[m_program]->nb_stream_indexes; i++) +@@ -1009,10 +1017,7 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + } + if (stream && m_pSSIF) + { +- if (stream->type == STREAM_VIDEO || +- stream->type == STREAM_DATA) +- pPacket = m_pSSIF->AddPacket(pPacket); +- ++ pPacket = m_pSSIF->AddPacket(pPacket); + if (stream->type == STREAM_DATA && stream->codec == AV_CODEC_ID_H264_MVC && pPacket->iSize) + stream = GetStream(pPacket->iStreamId); + } +@@ -1375,6 +1380,29 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + { + m_pSSIF->SetH264StreamId(streamIdx); + pStream->codec->codec_tag = MKTAG('A', 'M', 'V', 'C'); ++ ++ AVStream* mvcStream = nullptr; ++ if (m_pInput->IsStreamType(DVDSTREAM_TYPE_BLURAY)) ++ { ++ CDVDInputStreamBluray *bluRay = static_cast(m_pInput); ++ if (bluRay->HasMVC()) ++ { ++ st->stereo_mode = bluRay->AreEyesFlipped() ? "mvc_rl" : "mvc_lr"; ++ mvcStream = static_cast(bluRay->GetDemuxMVC())->GetAVStream(); ++ } ++ } ++ else ++ mvcStream = m_pFormatContext->streams[m_pSSIF->GetMVCStreamId()]; ++ ++ if (mvcStream && pStream->codec->extradata_size > 0 && mvcStream->codec->extradata_size > 0) ++ { ++ uint8_t* extr = pStream->codec->extradata; ++ pStream->codec->extradata = (uint8_t*)av_mallocz(pStream->codec->extradata_size + mvcStream->codec->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE); ++ memcpy(pStream->codec->extradata, extr, pStream->codec->extradata_size); ++ memcpy(pStream->codec->extradata + pStream->codec->extradata_size, mvcStream->codec->extradata, mvcStream->codec->extradata_size); ++ pStream->codec->extradata_size += mvcStream->codec->extradata_size; ++ av_free(extr); ++ } + } + } + else if (CDVDCodecUtils::ProcessH264MVCExtradata(pStream->codec->extradata, pStream->codec->extradata_size)) +@@ -1635,6 +1663,12 @@ bool CDVDDemuxFFmpeg::SeekChapter(int chapter, double* startpts) + } + + Flush(); ++ if (m_pInput->IsStreamType(DVDSTREAM_TYPE_BLURAY) ++ && static_cast(m_pInput)->HasMVC()) ++ { ++ // also empty the internal ffmpeg buffer otherwise it may cause MVC buffers hang ++ m_ioContext->buf_ptr = m_ioContext->buf_end; ++ } + return true; + } + +@@ -1704,6 +1738,11 @@ std::string CDVDDemuxFFmpeg::GetStreamCodecName(int iStreamId) + + bool CDVDDemuxFFmpeg::IsProgramChange() + { ++ // libavformat is confused by the interleaved SSIF. ++ // disable program management for those ++ if (m_pSSIF && !m_pSSIF->IsBluRay()) ++ return false; ++ + if (m_program == UINT_MAX) + return false; + +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.cpp +new file mode 100644 +index 0000000000000000000000000000000000000000..4ed7c439e7c36de211f2136c9b6b9a95549fe634 +--- /dev/null ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.cpp +@@ -0,0 +1,262 @@ ++/* ++* Copyright (C) 2005-2013 Team XBMC ++* http://xbmc.org ++* ++* This Program is free software; you can redistribute it and/or modify ++* it under the terms of the GNU General Public License as published by ++* the Free Software Foundation; either version 2, or (at your option) ++* any later version. ++* ++* This Program is distributed in the hope that it will be useful, ++* but WITHOUT ANY WARRANTY; without even the implied warranty of ++* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++* GNU General Public License for more details. ++* ++* You should have received a copy of the GNU General Public License ++* along with XBMC; see the file COPYING. If not, see ++* . ++* ++*/ ++ ++#include "DVDDemuxMVC.h" ++#include "DVDDemuxUtils.h" ++#include "DVDInputStreams/DVDInputStream.h" ++#include "DVDClock.h" ++#include "cores/FFmpeg.h" ++#include "utils/log.h" ++ ++extern "C" { ++#include "libavutil/opt.h" ++}; ++ ++#define MVC_SEEK_TIME_WINDOW 75000 // experimental value depends on seeking accurate ++ ++static int mvc_file_read(void *h, uint8_t* buf, int size) ++{ ++ CDVDInputStream* pInputStream = static_cast(h)->m_pInput; ++ return pInputStream->Read(buf, size); ++} ++ ++static int64_t mvc_file_seek(void *h, int64_t pos, int whence) ++{ ++ CDVDInputStream* pInputStream = static_cast(h)->m_pInput; ++ if (whence == AVSEEK_SIZE) ++ return pInputStream->GetLength(); ++ else ++ return pInputStream->Seek(pos, whence & ~AVSEEK_FORCE); ++} ++ ++CDVDDemuxMVC::CDVDDemuxMVC() ++{ ++ m_ioContext = nullptr; ++ m_pFormatContext = nullptr; ++ m_pInput = nullptr; ++ m_nStreamIndex = -1; ++} ++ ++CDVDDemuxMVC::~CDVDDemuxMVC() ++{ ++ Dispose(); ++} ++ ++bool CDVDDemuxMVC::Open(CDVDInputStream* pInput) ++{ ++ int ret; ++ ++ if (!pInput) ++ return false; ++ m_pInput = pInput; ++ ++ unsigned char* buffer = (unsigned char*)av_malloc(FFMPEG_FILE_BUFFER_SIZE); ++ m_ioContext = avio_alloc_context(buffer, FFMPEG_FILE_BUFFER_SIZE, 0, this, mvc_file_read, NULL, mvc_file_seek); ++ m_ioContext->max_packet_size = m_pInput->GetBlockSize(); ++ if (m_ioContext->max_packet_size) ++ m_ioContext->max_packet_size *= FFMPEG_FILE_BUFFER_SIZE / m_ioContext->max_packet_size; ++ ++ m_pFormatContext = avformat_alloc_context(); ++ m_pFormatContext->pb = m_ioContext; ++ ++ AVInputFormat *format = av_find_input_format("mpegts"); ++ ret = avformat_open_input(&m_pFormatContext, m_pInput->GetFileName().c_str(), format, nullptr); ++ if (ret < 0) ++ { ++ CLog::Log(LOGDEBUG, "%s: Opening MVC demuxing context failed (%d)", __FUNCTION__, ret); ++ Dispose(); ++ return false; ++ } ++ ++ av_opt_set_int(m_pFormatContext, "analyzeduration", 500000, 0); ++ av_opt_set_int(m_pFormatContext, "correct_ts_overflow", 0, 0); ++ m_pFormatContext->flags |= AVFMT_FLAG_KEEP_SIDE_DATA; ++ ++ // Find the streams ++ ret = avformat_find_stream_info(m_pFormatContext, nullptr); ++ //it always returns -1 so just ignore it ++ //if (ret < 0) ++ //{ ++ // CLog::Log(LOGDEBUG, "CDVDInputStreamBluray::OpenMVCDemuxer(): avformat_find_stream_info failed (%d)", ret); ++ // Dispose(); ++ // return false; ++ //} ++ ++ // print some extra information ++ av_dump_format(m_pFormatContext, 0, m_pInput->GetFileName().c_str(), 0); ++ ++ // Find and select our MVC stream ++ CLog::Log(LOGDEBUG, "%s: MVC m2ts has %d streams", __FUNCTION__, m_pFormatContext->nb_streams); ++ for (unsigned i = 0; i < m_pFormatContext->nb_streams; i++) ++ { ++ if (m_pFormatContext->streams[i]->codec->codec_id == AV_CODEC_ID_H264_MVC ++ && m_pFormatContext->streams[i]->codec->extradata_size > 0) ++ { ++ m_nStreamIndex = i; ++ break; ++ } ++ else ++ m_pFormatContext->streams[i]->discard = AVDISCARD_ALL; ++ } ++ ++ if (m_nStreamIndex < 0) ++ { ++ CLog::Log(LOGDEBUG, "%s: MVC Stream not found", __FUNCTION__); ++ Dispose(); ++ return false; ++ } ++ ++ return true; ++} ++ ++void CDVDDemuxMVC::Reset() ++{ ++ CDVDInputStream* pInput = m_pInput; ++ Dispose(); ++ Open(pInput); ++} ++ ++void CDVDDemuxMVC::Abort() ++{ ++} ++ ++void CDVDDemuxMVC::Flush() ++{ ++ if (m_pFormatContext) ++ avformat_flush(m_pFormatContext); ++} ++ ++DemuxPacket* CDVDDemuxMVC::Read() ++{ ++ int ret; ++ AVPacket mvcPacket = { 0 }; ++ av_init_packet(&mvcPacket); ++ ++ while (true) ++ { ++ ret = av_read_frame(m_pFormatContext, &mvcPacket); ++ ++ if (ret == AVERROR(EINTR) || ret == AVERROR(EAGAIN)) ++ continue; ++ else if (ret == AVERROR_EOF) ++ break; ++ else if (mvcPacket.size <= 0 || mvcPacket.stream_index != m_nStreamIndex) ++ { ++ av_packet_unref(&mvcPacket); ++ continue; ++ } ++ else ++ { ++ AVStream *stream = m_pFormatContext->streams[mvcPacket.stream_index]; ++ double dts = ConvertTimestamp(mvcPacket.dts, stream->time_base.den, stream->time_base.num); ++ double pts = ConvertTimestamp(mvcPacket.pts, stream->time_base.den, stream->time_base.num); ++ ++ DemuxPacket* newPkt = CDVDDemuxUtils::AllocateDemuxPacket(mvcPacket.size); ++ if (mvcPacket.data) ++ memcpy(newPkt->pData, mvcPacket.data, mvcPacket.size); ++ newPkt->iSize = mvcPacket.size; ++ newPkt->dts = dts; ++ newPkt->pts = pts; ++ newPkt->iStreamId = stream->id; ++ ++ av_packet_unref(&mvcPacket); ++ return newPkt; ++ } ++ } ++ ++ return nullptr; ++} ++ ++bool CDVDDemuxMVC::SeekTime(int time, bool backwords, double* startpts) ++{ ++ if (!m_pInput) ++ return false; ++ ++ AVRational time_base = m_pFormatContext->streams[m_nStreamIndex]->time_base; ++ int64_t seek_pts = av_rescale(DVD_MSEC_TO_TIME(time), time_base.den, (int64_t)time_base.num * AV_TIME_BASE); ++ int64_t starttime = 0; ++ ++ if (m_pFormatContext->start_time != (int64_t)AV_NOPTS_VALUE) ++ starttime = av_rescale(m_pFormatContext->start_time, time_base.den, (int64_t)time_base.num * AV_TIME_BASE); ++ if (starttime != 0) ++ seek_pts += starttime; ++ if (seek_pts < MVC_SEEK_TIME_WINDOW) ++ seek_pts = 0; ++ else ++ seek_pts -= MVC_SEEK_TIME_WINDOW; ++ ++ av_seek_frame(m_pFormatContext, m_nStreamIndex, seek_pts, AVSEEK_FLAG_BACKWARD); ++ return true; ++} ++ ++std::string CDVDDemuxMVC::GetFileName() ++{ ++ return m_pInput->GetFileName(); ++} ++ ++AVStream* CDVDDemuxMVC::GetAVStream() ++{ ++ return m_pFormatContext ? m_pFormatContext->streams[m_nStreamIndex] : nullptr; ++} ++ ++void CDVDDemuxMVC::Dispose() ++{ ++ if (m_pFormatContext) ++ avformat_close_input(&m_pFormatContext); ++ ++ if (m_ioContext) ++ { ++ av_free(m_ioContext->buffer); ++ av_free(m_ioContext); ++ } ++ ++ m_ioContext = nullptr; ++ m_pFormatContext = nullptr; ++ m_pInput = nullptr; ++ m_nStreamIndex = -1; ++} ++ ++double CDVDDemuxMVC::ConvertTimestamp(int64_t pts, int den, int num) ++{ ++ if (pts == (int64_t)AV_NOPTS_VALUE) ++ return DVD_NOPTS_VALUE; ++ ++ // do calculations in floats as they can easily overflow otherwise ++ // we don't care for having a completly exact timestamp anyway ++ double timestamp = (double)pts * num / den; ++ double starttime = 0.0f; ++ ++ /*if (m_MVCFormatContext->start_time != (int64_t)AV_NOPTS_VALUE) ++ starttime = (double)m_MVCFormatContext->start_time / AV_TIME_BASE;*/ ++ ++ if (timestamp > starttime) ++ timestamp -= starttime; ++ // allow for largest possible difference in pts and dts for a single packet ++ else if (timestamp + 0.5f > starttime) ++ timestamp = 0; ++ ++ return timestamp * DVD_TIME_BASE; ++} ++ ++std::vector CDVDDemuxMVC::GetStreams() const ++{ ++ std::vector streams; ++ return streams; ++} +\ No newline at end of file +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.h b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.h +new file mode 100644 +index 0000000000000000000000000000000000000000..284358f282ed3d708be5929e6b04d6f49782079d +--- /dev/null ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.h +@@ -0,0 +1,57 @@ ++#pragma once ++ ++/* ++* Copyright (C) 2005-2013 Team XBMC ++* http://xbmc.org ++* ++* This Program is free software; you can redistribute it and/or modify ++* it under the terms of the GNU General Public License as published by ++* the Free Software Foundation; either version 2, or (at your option) ++* any later version. ++* ++* This Program is distributed in the hope that it will be useful, ++* but WITHOUT ANY WARRANTY; without even the implied warranty of ++* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++* GNU General Public License for more details. ++* ++* You should have received a copy of the GNU General Public License ++* along with XBMC; see the file COPYING. If not, see ++* . ++* ++*/ ++ ++#include "DVDDemux.h" ++ ++extern "C" { ++#include "libavformat/avformat.h" ++} ++ ++class CDVDDemuxMVC : public CDVDDemux ++{ ++public: ++ CDVDDemuxMVC(); ++ virtual ~CDVDDemuxMVC(); ++ bool Open(CDVDInputStream* pInput); ++ virtual void Reset(); ++ virtual void Abort(); ++ virtual void Flush(); ++ virtual DemuxPacket* Read(); ++ virtual bool SeekTime(int time, bool backwords = false, double* startpts = nullptr); ++ virtual void SetSpeed(int iSpeed) { }; ++ virtual int GetStreamLength() { return 0; }; ++ virtual CDemuxStream* GetStream(int iStreamId) const override { return nullptr; }; ++ virtual std::vector GetStreams() const override; ++ virtual int GetNrOfStreams() const override { return 1; }; ++ virtual std::string GetFileName(); ++ ++ AVStream* GetAVStream(); ++ CDVDInputStream* m_pInput; ++ ++private: ++ void Dispose(); ++ double ConvertTimestamp(int64_t pts, int den, int num); ++ ++ AVIOContext *m_ioContext = nullptr; ++ AVFormatContext *m_pFormatContext = nullptr; ++ int m_nStreamIndex = -1; ++}; +\ No newline at end of file +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp +index e99352a90f348a95673ef3442d3f6cb020cd57d4..e390529bb8602af04c3853337821123546fb098d 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp +@@ -19,11 +19,13 @@ + */ + + #include "DVDDemuxStreamSSIF.h" ++#include "DVDDemux.h" + #include "DVDClock.h" + #include "DVDDemuxUtils.h" + #include "utils/log.h" + + //#define DEBUG_VERBOSE ++#define MVC_QUEUE_SIZE 100 + + DemuxPacket* CDVDDemuxStreamSSIF::AddPacket(DemuxPacket* &srcPkt) + { +@@ -37,7 +39,7 @@ DemuxPacket* CDVDDemuxStreamSSIF::AddPacket(DemuxPacket* &srcPkt) + } + else if (srcPkt->iStreamId == m_mvcStreamId) + { +- m_MVCqueue.push(srcPkt); ++ AddMVCExtPacket(srcPkt); + } + + return GetMVCPacket(); +@@ -81,6 +83,10 @@ DemuxPacket* CDVDDemuxStreamSSIF::MergePacket(DemuxPacket* &srcPkt, DemuxPacket* + + DemuxPacket* CDVDDemuxStreamSSIF::GetMVCPacket() + { ++ // if input is a bluray fill mvc queue before processing ++ if (m_bluRay && m_MVCqueue.empty() && !m_H264queue.empty()) ++ FillMVCQueue(m_H264queue.front()->dts); ++ + // Here, we recreate a h264 MVC packet from the base one + buffered MVC NALU's + while (!m_H264queue.empty() && !m_MVCqueue.empty()) + { +@@ -151,6 +157,36 @@ DemuxPacket* CDVDDemuxStreamSSIF::GetMVCPacket() + #if defined(DEBUG_VERBOSE) + CLog::Log(LOGDEBUG, ">>> MVC waiting. MVC(%d) H264(%d)", m_MVCqueue.size(), m_H264queue.size()); + #endif +- + return CDVDDemuxUtils::AllocateDemuxPacket(0); + } ++ ++void CDVDDemuxStreamSSIF::AddMVCExtPacket(DemuxPacket* &mvcExtPkt) ++{ ++ m_MVCqueue.push(mvcExtPkt); ++} ++ ++bool CDVDDemuxStreamSSIF::FillMVCQueue(double dtsBase) ++{ ++ if (!m_bluRay) ++ return false; ++ ++ CDVDDemux* demux = m_bluRay->GetDemuxMVC(); ++ DemuxPacket* mvc; ++ while ((m_MVCqueue.size() < MVC_QUEUE_SIZE) && (mvc = demux->Read())) ++ { ++ if (dtsBase == DVD_NOPTS_VALUE || mvc->dts == DVD_NOPTS_VALUE) ++ { ++ // do nothing, can't compare timestamps when they are not set ++ } ++ else if (mvc->dts < dtsBase) ++ { ++#if defined(DEBUG_VERBOSE) ++ CLog::Log(LOGDEBUG, ">>> MVC discard mvc: %6d, pts(%.3f) dts(%.3f)", mvc->iSize, mvc->pts*1e-6, mvc->dts*1e-6); ++#endif ++ CDVDDemuxUtils::FreeDemuxPacket(mvc); ++ continue; ++ } ++ AddMVCExtPacket(mvc); ++ }; ++ return m_MVCqueue.size() == MVC_QUEUE_SIZE; ++} +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h +index 8412627a3ea13f59bd2c96c23bd386e4b5b2658e..579c382dca988b0c37e5da7396803f70d4cf15ff 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h +@@ -21,6 +21,7 @@ + */ + + #include "DVDDemuxPacket.h" ++#include "DVDInputStreams/DVDInputStreamBluray.h" + #include + + extern "C" { +@@ -37,13 +38,20 @@ public: + void Flush(); + void SetH264StreamId(int id) { m_h264StreamId = id; }; + void SetMVCStreamId(int id) { m_mvcStreamId = id; }; ++ int GetH264StreamId() { return m_h264StreamId; }; ++ int GetMVCStreamId() { return m_mvcStreamId; }; ++ void AddMVCExtPacket(DemuxPacket* &scrPkt); ++ void SetBluRay(CDVDInputStreamBluray* &bluRay) { m_bluRay = bluRay; }; ++ bool IsBluRay() { return m_bluRay != nullptr; }; + + private: + DemuxPacket* GetMVCPacket(); + DemuxPacket* MergePacket(DemuxPacket* &srcPkt, DemuxPacket* &appendPkt); ++ bool FillMVCQueue(double dtsBase); + ++ CDVDInputStreamBluray* m_bluRay = nullptr; + std::queue m_H264queue; + std::queue m_MVCqueue; +- int m_h264StreamId = 0; +- int m_mvcStreamId = 0; ++ int m_h264StreamId = -1; ++ int m_mvcStreamId = -1; + }; +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in b/xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in +index f3b717ddabb4729fe0db5ebab5a7913b8fe8297c..80ceeeaea6f061ec0b82f95d1edf7c572960397c 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in +@@ -11,6 +11,7 @@ SRCS += DVDDemuxVobsub.cpp + SRCS += DVDDemuxCC.cpp + SRCS += DVDFactoryDemuxer.cpp + SRCS += DVDDemuxStreamSSIF.cpp ++SRCS += DVDDemuxMVC.cpp + + LIB = DVDDemuxers.a + +diff --git a/xbmc/cores/VideoPlayer/DVDInputStreams/DVDInputStreamBluray.cpp b/xbmc/cores/VideoPlayer/DVDInputStreams/DVDInputStreamBluray.cpp +index 6ccd9a8c00fdc5175df3ecbb3a5d30dc93e319ab..5e85db23b09c920c4b19f3e7cae6e3f8ccae2db9 100644 +--- a/xbmc/cores/VideoPlayer/DVDInputStreams/DVDInputStreamBluray.cpp ++++ b/xbmc/cores/VideoPlayer/DVDInputStreams/DVDInputStreamBluray.cpp +@@ -26,6 +26,8 @@ + #include "IVideoPlayer.h" + #include "DVDCodecs/Overlay/DVDOverlay.h" + #include "DVDCodecs/Overlay/DVDOverlayImage.h" ++#include "DVDInputStreamFile.h" ++#include "DVDDemuxers/DVDDemuxMVC.h" + #include "settings/Settings.h" + #include "LangInfo.h" + #include "utils/log.h" +@@ -231,10 +233,8 @@ bool CDVDInputStreamBluray::IsEOF() + + BLURAY_TITLE_INFO* CDVDInputStreamBluray::GetTitleLongest() + { +- int titles = m_dll->bd_get_titles(m_bd, TITLES_RELEVANT, 0); +- + BLURAY_TITLE_INFO *s = NULL; +- for(int i=0; i < titles; i++) ++ for(int i=0; i < m_nTitles; i++) + { + BLURAY_TITLE_INFO *t = m_dll->bd_get_title_info(m_bd, i, 0); + if(!t) +@@ -326,6 +326,7 @@ bool CDVDInputStreamBluray::Open() + return false; + } + ++ m_root = root; + const BLURAY_DISC_INFO *disc_info; + + disc_info = m_dll->bd_get_disc_info(m_bd); +@@ -349,6 +350,7 @@ bool CDVDInputStreamBluray::Open() + CLog::Log(LOGDEBUG, "CDVDInputStreamBluray::Open - BD+ detected : %d", disc_info->bdplus_detected); + CLog::Log(LOGDEBUG, "CDVDInputStreamBluray::Open - libbdplus detected : %d", disc_info->libbdplus_detected); + CLog::Log(LOGDEBUG, "CDVDInputStreamBluray::Open - BD+ handled : %d", disc_info->bdplus_handled); ++ CLog::Log(LOGDEBUG, "CDVDInputStreamBluray::Open - 3D content exist : %d", disc_info->content_exist_3D); + } + else + CLog::Log(LOGERROR, "CDVDInputStreamBluray::Open - BluRay not detected"); +@@ -365,6 +367,7 @@ bool CDVDInputStreamBluray::Open() + return false; + } + ++ m_nTitles = m_dll->bd_get_titles(m_bd, TITLES_RELEVANT, 0); + int mode = CSettings::GetInstance().GetInt(CSettings::SETTING_DISC_PLAYBACK); + + if (URIUtils::HasExtension(filename, ".mpls")) +@@ -393,18 +396,17 @@ bool CDVDInputStreamBluray::Open() + m_title = GetTitleLongest(); + } + +- if(m_navmode) ++ SetupPlayerSettings(); ++ m_dll->bd_get_event(m_bd, NULL); ++ ++ if (m_navmode) + { +- SetupPlayerSettings(); + + m_dll->bd_register_overlay_proc (m_bd, this, bluray_overlay_cb); + #ifdef HAVE_LIBBLURAY_BDJ + m_dll->bd_register_argb_overlay_proc (m_bd, this, bluray_overlay_argb_cb, NULL); + #endif + +- m_dll->bd_get_event(m_bd, NULL); +- +- + if(m_dll->bd_play(m_bd) <= 0) + { + CLog::Log(LOGERROR, "CDVDInputStreamBluray::Open - failed play disk %s", strPath.c_str()); +@@ -419,21 +421,25 @@ bool CDVDInputStreamBluray::Open() + CLog::Log(LOGERROR, "CDVDInputStreamBluray::Open - failed to get title info"); + return false; + } +- +- if(m_dll->bd_select_playlist(m_bd, m_title->playlist) == 0 ) ++ if (m_dll->bd_select_playlist(m_bd, m_title->playlist) == 0) + { + CLog::Log(LOGERROR, "CDVDInputStreamBluray::Open - failed to select title %d", m_title->idx); + return false; + } +- m_clip = 0; + } + ++ // Process any events that occured during opening ++ while (m_dll->bd_get_event(m_bd, &m_event)) ++ ProcessEvent(); ++ + return true; + } + + // close file and reset everyting + void CDVDInputStreamBluray::Close() + { ++ CloseMVCDemux(); ++ + if (!m_dll) + return; + if(m_title) +@@ -449,7 +455,7 @@ void CDVDInputStreamBluray::Close() + + void CDVDInputStreamBluray::ProcessEvent() { + +- int pid = -1; ++ int pid = -1, ret; + switch (m_event.event) { + + case BD_EVENT_ERROR: +@@ -514,15 +520,17 @@ void CDVDInputStreamBluray::ProcessEvent() { + CLog::Log(LOGDEBUG, "CDVDInputStreamBluray - BD_EVENT_PLAYLIST %d", + m_event.param); + m_playlist = m_event.param; +- if(m_title) +- m_dll->bd_free_title_info(m_title); +- m_title = m_dll->bd_get_playlist_info(m_bd, m_playlist, m_angle); ++ ProcessItem(m_playlist); + break; + + case BD_EVENT_PLAYITEM: + CLog::Log(LOGDEBUG, "CDVDInputStreamBluray - BD_EVENT_PLAYITEM %d", + m_event.param); + m_clip = m_event.param; ++ uint64_t clip_start, clip_in, bytepos; ++ ret = m_dll->bd_get_clip_infos(m_bd, m_clip, &clip_start, &clip_in, &bytepos, nullptr); ++ if (ret) ++ m_clipStartTime = clip_start / 90; + break; + + case BD_EVENT_CHAPTER: +@@ -601,14 +609,20 @@ void CDVDInputStreamBluray::ProcessEvent() { + + /* event has been consumed */ + m_event.event = BD_EVENT_NONE; ++ ++ if (m_bMVCPlayback && m_clip >= 0 && m_title && m_clip < m_title->clip_count && m_nMVCClip != m_clip) ++ { ++ CloseMVCDemux(); ++ OpenMVCDemux(m_clip); ++ } + } + + int CDVDInputStreamBluray::Read(uint8_t* buf, int buf_size) + { ++ int result = 0; + m_dispTimeBeforeRead = (int)(m_dll->bd_tell_time(m_bd) / 90); + if(m_navmode) + { +- int result = 0; + do { + + if(m_hold == HOLD_HELD) +@@ -658,10 +672,14 @@ int CDVDInputStreamBluray::Read(uint8_t* buf, int buf_size) + + } while(result == 0); + +- return result; + } + else +- return m_dll->bd_read(m_bd, buf, buf_size); ++ { ++ result = m_dll->bd_read(m_bd, buf, buf_size); ++ while (m_dll->bd_get_event(m_bd, &m_event)) ++ ProcessEvent(); ++ } ++ return result; + } + + static uint8_t clamp(double v) +@@ -909,8 +927,12 @@ bool CDVDInputStreamBluray::PosTime(int ms) + { + if(m_dll->bd_seek_time(m_bd, ms * 90) < 0) + return false; +- else +- return true; ++ ++ while (m_dll->bd_get_event(m_bd, &m_event)) ++ ProcessEvent(); ++ ++ SeekMVCDemux(ms - m_clipStartTime); ++ return true; + } + + int CDVDInputStreamBluray::GetChapterCount() +@@ -933,8 +955,12 @@ bool CDVDInputStreamBluray::SeekChapter(int ch) + { + if(m_title && m_dll->bd_seek_chapter(m_bd, ch-1) < 0) + return false; +- else +- return true; ++ ++ while (m_dll->bd_get_event(m_bd, &m_event)) ++ ProcessEvent(); ++ ++ SeekMVCDemux(GetChapterPos(ch) * 1000 - m_clipStartTime); ++ return true; + } + + int64_t CDVDInputStreamBluray::GetChapterPos(int ch) +@@ -1132,6 +1158,95 @@ bool CDVDInputStreamBluray::HasMenu() + return m_navmode; + } + ++bool CDVDInputStreamBluray::ProcessItem(int playitem) ++{ ++ if (m_title) ++ m_dll->bd_free_title_info(m_title); ++ ++ m_title = m_dll->bd_get_playlist_info(m_bd, playitem, m_angle); ++ ++ if (CSettings::GetInstance().GetBool("videoplayer.supportmvc")) ++ { ++ MPLS_PL * mpls = m_dll->bd_get_title_mpls(m_bd); ++ if (mpls) ++ { ++ for (int i = 0; i < mpls->ext_sub_count; i++) ++ { ++ if (mpls->ext_sub_path[i].type == 8 ++ && mpls->ext_sub_path[i].sub_playitem_count == mpls->list_count) ++ { ++ CLog::Log(LOGDEBUG, "CDVDInputStreamBluray - Enabling BD3D MVC demuxing"); ++ CLog::Log(LOGDEBUG, "CDVDInputStreamBluray - MVC_Base_view_R_flag: %d", m_title->mvc_base_view_r_flag); ++ m_bMVCPlayback = true; ++ m_nMVCSubPathIndex = i; ++ m_bFlipEyes = m_title->mvc_base_view_r_flag != 0; ++ break; ++ } ++ } ++ } ++ } ++ CloseMVCDemux(); ++ return true; ++} ++ ++bool CDVDInputStreamBluray::OpenMVCDemux(int playItem) ++{ ++ MPLS_PL *pl = m_dll->bd_get_title_mpls(m_bd); ++ if (!pl) ++ return false; ++ ++ std::string strFileName; ++ strFileName.append(m_root); ++ strFileName.append("/BDMV/STREAM/"); ++ strFileName.append(pl->ext_sub_path[m_nMVCSubPathIndex].sub_play_item[playItem].clip->clip_id); ++ strFileName.append(".m2ts"); ++ ++ CLog::Log(LOGDEBUG, "CDVDInputStreamBluray::OpenMVCDemuxer(): Opening MVC extension stream at %s", strFileName.c_str()); ++ ++ CFileItem fileitem(CURL(strFileName), false); ++ m_pMVCInput = new CDVDInputStreamFile(fileitem); ++ ++ // Try to open the MVC stream ++ if (!m_pMVCInput->Open()) ++ { ++ CloseMVCDemux(); ++ m_bMVCPlayback = false; ++ return false; ++ } ++ ++ if (m_pMVCDemux) ++ SAFE_DELETE(m_pMVCDemux); ++ ++ CDVDDemuxMVC* pMVCDemux = new CDVDDemuxMVC; ++ m_pMVCDemux = pMVCDemux; ++ ++ if (!pMVCDemux->Open(m_pMVCInput)) ++ { ++ CloseMVCDemux(); ++ m_bMVCPlayback = false; ++ return false; ++ } ++ ++ m_nMVCClip = playItem; ++ return true; ++} ++ ++bool CDVDInputStreamBluray::CloseMVCDemux() ++{ ++ if (m_pMVCDemux) ++ SAFE_DELETE(m_pMVCDemux); ++ ++ SAFE_DELETE(m_pMVCInput); ++ m_nMVCClip = -1; ++ return true; ++} ++ ++void CDVDInputStreamBluray::SeekMVCDemux(int64_t time) ++{ ++ if (m_bMVCPlayback && m_pMVCDemux) ++ m_pMVCDemux->SeekTime(time); ++} ++ + void CDVDInputStreamBluray::SetupPlayerSettings() + { + int region = CSettings::GetInstance().GetInt(CSettings::SETTING_BLURAY_PLAYERREGION); +diff --git a/xbmc/cores/VideoPlayer/DVDInputStreams/DVDInputStreamBluray.h b/xbmc/cores/VideoPlayer/DVDInputStreams/DVDInputStreamBluray.h +index b967a85e6557e42a7f1235cdd804d5a0263b866f..561fb5cd4f971bc9ee4f41218a60bb3d5bc5625f 100644 +--- a/xbmc/cores/VideoPlayer/DVDInputStreams/DVDInputStreamBluray.h ++++ b/xbmc/cores/VideoPlayer/DVDInputStreams/DVDInputStreamBluray.h +@@ -38,6 +38,7 @@ extern "C" + class CDVDOverlayImage; + class DllLibbluray; + class IVideoPlayer; ++class CDVDDemux; + + class CDVDInputStreamBluray + : public CDVDInputStream +@@ -119,6 +120,9 @@ public: + BLURAY_TITLE_INFO* GetTitleFile(const std::string& name); + + void ProcessEvent(); ++ CDVDDemux* GetDemuxMVC() { return m_pMVCDemux; }; ++ bool HasMVC() { return m_bMVCPlayback; } ++ bool AreEyesFlipped() { return m_bFlipEyes; } + + protected: + struct SPlane; +@@ -127,6 +131,11 @@ protected: + void OverlayClose(); + static void OverlayClear(SPlane& plane, int x, int y, int w, int h); + static void OverlayInit (SPlane& plane, int w, int h); ++ bool ProcessItem(int playitem); ++ ++ bool OpenMVCDemux(int playItem); ++ bool CloseMVCDemux(); ++ void SeekMVCDemux(int64_t time); + + IVideoPlayer* m_player; + DllLibbluray* m_dll; +@@ -138,6 +147,17 @@ protected: + bool m_menu; + bool m_navmode; + int m_dispTimeBeforeRead; ++ int m_nTitles = -1; ++ std::string m_root; ++ ++ // MVC related members ++ CDVDDemux* m_pMVCDemux = nullptr; ++ CDVDInputStream *m_pMVCInput = nullptr; ++ bool m_bMVCPlayback = false; ++ int m_nMVCSubPathIndex = 0; ++ int m_nMVCClip = -1; ++ bool m_bFlipEyes = false; ++ uint64_t m_clipStartTime = 0; + + typedef std::shared_ptr SOverlay; + typedef std::list SOverlays; + +From 9209fd1862041094e9f01e17c377c6d50c37ebb0 Mon Sep 17 00:00:00 2001 +From: Anton Fedchin +Date: Wed, 2 Mar 2016 23:31:50 +0300 +Subject: [PATCH 46/67] [BaseRenderer] Fix aspect for TAB/SBS (need more + testing) + +--- + xbmc/cores/VideoPlayer/VideoRenderers/BaseRenderer.cpp | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/BaseRenderer.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/BaseRenderer.cpp +index f18c671d90c85eed1ca4bd52028d7e5074a1312a..5c6f7453c2b3fd1155c18af8d37cb3d4fa9de1c6 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/BaseRenderer.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/BaseRenderer.cpp +@@ -35,6 +35,9 @@ + #include "settings/AdvancedSettings.h" + #include "cores/VideoPlayer/VideoRenderers/RenderFlags.h" + ++extern "C" { ++#include "libavformat/version.h" ++} + + CBaseRenderer::CBaseRenderer() + { +@@ -369,6 +372,21 @@ void CBaseRenderer::CalculateFrameAspectRatio(unsigned int desired_width, unsign + if (m_sourceHeight == 576) // PAL + m_sourceFrameRatio = imageFrameRatio * PALPixelRatio * Non4by3Correction; + } ++#if (LIBAVFORMAT_VERSION_MAJOR >= 57) ++ bool isAnamorph = m_sourceWidth <= 1920 && m_sourceHeight <= 1080; ++ float factor = isAnamorph ? 2.0f : 4.0f; ++ switch (CONF_FLAGS_STEREO_MODE_MASK(m_iFlags)) ++ { ++ case CONF_FLAGS_STEREO_MODE_TAB: ++ m_sourceFrameRatio *= factor; ++ break; ++ case CONF_FLAGS_STEREO_MODE_SBS: ++ m_sourceFrameRatio /= factor; ++ break; ++ default: ++ break; ++ } ++#endif + } + + void CBaseRenderer::ManageRenderArea() + +From 7aa4746fe6adef77e5ff99b60d242a575fff583c Mon Sep 17 00:00:00 2001 +From: popcornmix +Date: Mon, 29 Feb 2016 17:00:50 +0000 +Subject: [PATCH 47/67] libbluray: Bump to Nevcairie's v0.9.2 + +This includes 3D support +--- + tools/depends/target/libbluray/Makefile | 1 + + .../libbluray/bump_to_Nevcairie_v0.9.2.patch | 24397 +++++++++++++++++++ + 2 files changed, 24398 insertions(+) + create mode 100644 tools/depends/target/libbluray/bump_to_Nevcairie_v0.9.2.patch + +diff --git a/tools/depends/target/libbluray/Makefile b/tools/depends/target/libbluray/Makefile +index 3c85b96ca38409fec6de87cb30162b725ce170db..d8fa16ed83ea997c8b3cf34ee83383e830986197 100644 +--- a/tools/depends/target/libbluray/Makefile ++++ b/tools/depends/target/libbluray/Makefile +@@ -27,6 +27,7 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + ifeq ($(OS),android) + cd $(PLATFORM); patch -p1 < ../android.patch + endif ++ cd $(PLATFORM); patch -p1 < ../bump_to_Nevcairie_v0.9.2.patch + cd $(PLATFORM); ./bootstrap + cd $(PLATFORM); $(CONFIGURE) + +diff --git a/tools/depends/target/libbluray/bump_to_Nevcairie_v0.9.2.patch b/tools/depends/target/libbluray/bump_to_Nevcairie_v0.9.2.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..5884d91590f80927cc5138fdd0ed41072c65354a +--- /dev/null ++++ b/tools/depends/target/libbluray/bump_to_Nevcairie_v0.9.2.patch +@@ -0,0 +1,24397 @@ ++diff --git a/ChangeLog b/ChangeLog ++index ffc7788..545fb3f 100644 ++--- a/ChangeLog +++++ b/ChangeLog ++@@ -1,3 +1,29 @@ +++2015-12-01: Version 0.9.2 +++- Add primary audio stream to bd_select_stream(). +++- Improve error resilience. +++- Fix Java 8 compability issues. +++- Fix Android build. +++- Fix SecurityException in AWTAutoShutdown. +++- Fix BD-J check when install path in Windows contains non-ASCII chars. +++- Fix jvm.dll loading in Windows ($JAVA_HOME/bin should be in dll load path). +++- Fix class translating in recent Java 8 versions. +++ +++2015-11-03: Version 0.9.1 +++- Improved BD-J security. +++- Improved error resilience. +++- Improved seeking (avoid skipping PAT/PMT/PCR). +++- Fix UO mask check when bd_play_title() is used for Top Menu. +++- Fix re-starting of title bound Xlets when title changes. +++- Fix loading classes with invalid debug info. +++ +++2015-10-02: Version 0.9.0 +++- Add functions to read files from VFS. +++- Improved error resilience. +++- Improved BD-J compability. +++- Fix Xlet-initiated font caching. +++- Fix return value when setting BLURAY_PLAYER_SETTING_DECODE_PG. +++- Fix build with C++ compiler +++ ++ 2015-05-15: Version 0.8.1 ++ - Notify application when UO mask changes. ++ - Improved error resilience. ++diff --git a/Makefile.am b/Makefile.am ++index e03e926..87093c4 100644 ++--- a/Makefile.am +++++ b/Makefile.am ++@@ -26,7 +26,8 @@ EXTRA_DIST = \ ++ src/libbluray/bdj/build.xml \ ++ src/libbluray/bdj/java \ ++ src/libbluray/bdj/java-j2me \ ++- src/libbluray/bdj/java-j2se +++ src/libbluray/bdj/java-j2se \ +++ contrib/asm ++ ++ lib_LTLIBRARIES=libbluray.la ++ libbluray_la_SOURCES = \ ++@@ -149,7 +150,7 @@ libbluray_la_SOURCES+= \ ++ endif ++ endif ++ ++-libbluray_la_LDFLAGS= -version-info $(LT_VERSION_INFO) -export-symbols-regex "^bd_" +++libbluray_la_LDFLAGS= -no-undefined -version-info $(LT_VERSION_INFO) -export-symbols-regex "^bd_" ++ libbluray_la_LIBADD= $(LIBXML2_LIBS) $(FT2_LIBS) $(FONTCONFIG_LIBS) ++ ++ noinst_HEADERS = \ ++@@ -158,6 +159,15 @@ noinst_HEADERS = \ ++ jni/win32/jni_md.h \ ++ jni/darwin/jni_md.h ++ +++ +++bdnavdir=$(pkgincludedir)/bdnav +++bdnav_HEADERS = \ +++ src/libbluray/bdnav/clpi_data.h +++ +++utildir=$(pkgincludedir)/../util +++util_HEADERS = \ +++ src/util/attributes.h +++ ++ pkginclude_HEADERS = \ ++ src/file/filesystem.h \ ++ src/libbluray/bluray.h \ ++@@ -165,6 +175,9 @@ pkginclude_HEADERS = \ ++ src/libbluray/keys.h \ ++ src/libbluray/player_settings.h \ ++ src/libbluray/bdnav/clpi_data.h \ +++ src/libbluray/bdnav/clpi_parse.h \ +++ src/libbluray/bdnav/mpls_parse.h \ +++ src/libbluray/bdnav/uo_mask_table.h \ ++ src/libbluray/bdnav/meta_data.h \ ++ src/libbluray/decoders/overlay.h \ ++ src/util/log_control.h ++@@ -185,10 +198,12 @@ endif ++ ++ ++ if USING_BDJAVA +++if USING_BDJAVA_BUILD_JAR ++ jardir=$(datadir)/java/ ++ jar_DATA=$(top_builddir)/.libs/libbluray-$(BDJ_TYPE)-$(VERSION).jar ++ ++ $(top_builddir)/.libs/libbluray-$(BDJ_TYPE)-$(VERSION).jar: all-local +++endif ++ ++ libbluray_la_SOURCES += \ ++ src/libbluray/bdj/bdj.h \ ++@@ -213,6 +228,7 @@ libbluray_la_SOURCES += \ ++ ++ AM_CFLAGS += $(BDJAVA_CFLAGS) ++ +++if USING_BDJAVA_BUILD_JAR ++ all-local: ++ ant -f $(top_srcdir)/src/libbluray/bdj/build.xml \ ++ -Dbuild='$(abs_builddir)/src/libbluray/bdj/build' \ ++@@ -228,6 +244,7 @@ clean-local: ++ -Dversion='$(BDJ_TYPE)-$(VERSION)' \ ++ clean ++ endif +++endif ++ ++ pkgconfigdir = $(libdir)/pkgconfig ++ pkgconfig_DATA = src/libbluray.pc ++@@ -265,20 +282,20 @@ bd_info_LDADD = libbluray.la ++ bdsplice_SOURCES = src/examples/bdsplice.c ++ bdsplice_LDADD = libbluray.la ++ ++-bdj_test_SOURCES = src/examples/bdj_test.c +++bdj_test_SOURCES = src/devtools/bdj_test.c ++ bdj_test_LDADD = libbluray.la ++ ++-bdjo_dump_SOURCES = src/examples/bdjo_dump.c +++bdjo_dump_SOURCES = src/devtools/bdjo_dump.c ++ bdjo_dump_LDADD = libbluray.la ++ ++ clpi_dump_CFLAGS = $(AM_CFLAGS) ++ clpi_dump_SOURCES = \ ++- src/examples/clpi_dump.c \ ++- src/examples/util.c \ ++- src/examples/util.h +++ src/devtools/clpi_dump.c \ +++ src/devtools/util.c \ +++ src/devtools/util.h ++ clpi_dump_LDADD = libbluray.la ++ ++-hdmv_test_SOURCES = src/examples/hdmv_test.c +++hdmv_test_SOURCES = src/devtools/hdmv_test.c ++ hdmv_test_LDADD = libbluray.la ++ ++ index_dump_SOURCES = src/examples/index_dump.c ++@@ -291,15 +308,15 @@ list_titles_SOURCES = src/examples/list_titles.c ++ list_titles_LDADD = libbluray.la ++ ++ mobj_dump_CFLAGS = $(AM_CFLAGS) ++-mobj_dump_SOURCES = src/examples/mobj_dump.c \ +++mobj_dump_SOURCES = src/devtools/mobj_dump.c \ ++ src/libbluray/hdmv/mobj_print.c ++ mobj_dump_LDADD = libbluray.la ++ ++ mpls_dump_CFLAGS = $(AM_CFLAGS) ++ mpls_dump_SOURCES = \ ++- src/examples/mpls_dump.c \ ++- src/examples/util.c \ ++- src/examples/util.h +++ src/devtools/mpls_dump.c \ +++ src/devtools/util.c \ +++ src/devtools/util.h ++ mpls_dump_LDADD = libbluray.la ++ ++ sound_dump_SOURCES = src/examples/sound_dump.c ++diff --git a/bootstrap b/bootstrap ++index 872167c..bde67cb 100755 ++--- a/bootstrap +++++ b/bootstrap ++@@ -1,3 +1,7 @@ ++ #!/bin/sh ++ +++set -e +++ +++cd "$(dirname "$0")" +++ ++ autoreconf -vif ++diff --git a/config.h b/config.h ++new file mode 100644 ++index 0000000..6764704 ++--- /dev/null +++++ b/config.h ++@@ -0,0 +1,157 @@ +++/* config.h. Generated from config.h.in by configure. */ +++/* config.h.in. Generated from configure.ac by autoheader. */ +++ +++/* Define to 1 if libudfread is to be used for disc image access */ +++/* #undef ENABLE_UDF */ +++ +++/* Define to 1 if using libbluray J2ME stack */ +++/* #undef HAVE_BDJ_J2ME */ +++ +++/* Define to 1 if you have the header file, and it defines `DIR'. +++ */ +++/* #undef HAVE_DIRENT_H */ +++ +++/* Define to 1 if you have the header file. */ +++/* #undef HAVE_DLFCN_H */ +++ +++/* Define to 1 if you have the header file. */ +++#define HAVE_ERRNO_H 1 +++ +++/* Define to 1 if you have the header file. */ +++/* #undef HAVE_FCNTL_H */ +++ +++/* Define this if you have fontconfig library */ +++/* #undef HAVE_FONTCONFIG */ +++ +++/* Define this if you have FreeType2 library */ +++/* #undef HAVE_FT2 */ +++ +++/* Define to 1 if you have the header file. */ +++#define HAVE_INTTYPES_H 1 +++ +++/* Define to 1 if you have the header file. */ +++/* #undef HAVE_JNI_H */ +++ +++/* Define to 1 if you have the header file. */ +++#define HAVE_LIBGEN_H 1 +++ +++/* Define to 1 if libxml2 is to be used for metadata parsing */ +++/* #undef HAVE_LIBXML2 */ +++ +++/* Define to 1 if you have the header file. */ +++/* #undef HAVE_LINUX_CDROM_H */ +++ +++/* Define to 1 if you have the header file. */ +++#define HAVE_MALLOC_H 1 +++ +++/* Define to 1 if you have the header file. */ +++#define HAVE_MEMORY_H 1 +++ +++/* Define to 1 if you have the header file. */ +++/* #undef HAVE_MNTENT_H */ +++ +++/* Define to 1 if you have the header file, and it defines `DIR'. */ +++/* #undef HAVE_NDIR_H */ +++ +++/* Define to 1 if you have the header file. */ +++/* #undef HAVE_PTHREAD_H */ +++ +++/* Define to 1 if you have the header file. */ +++#define HAVE_STDARG_H 1 +++ +++/* Define to 1 if you have the header file. */ +++#define HAVE_STDINT_H 1 +++ +++/* Define to 1 if you have the header file. */ +++#define HAVE_STDLIB_H 1 +++ +++/* Define to 1 if you have the header file. */ +++/* #undef HAVE_STRINGS_H */ +++ +++/* Define to 1 if you have the header file. */ +++#define HAVE_STRING_H 1 +++ +++/* Define to 1 if `d_type' is a member of `struct dirent'. */ +++/* #undef HAVE_STRUCT_DIRENT_D_TYPE */ +++ +++/* Define to 1 if you have the header file, and it defines `DIR'. +++ */ +++/* #undef HAVE_SYS_DIR_H */ +++ +++/* Define to 1 if you have the header file, and it defines `DIR'. +++ */ +++/* #undef HAVE_SYS_NDIR_H */ +++ +++/* Define to 1 if you have the header file. */ +++#define HAVE_SYS_STAT_H 1 +++ +++/* Define to 1 if you have the header file. */ +++#define HAVE_SYS_TIME_H 1 +++ +++/* Define to 1 if you have the header file. */ +++#define HAVE_SYS_TYPES_H 1 +++ +++/* Define to 1 if you have the header file. */ +++#define HAVE_TIME_H 1 +++ +++/* Define to 1 if you have the header file. */ +++/* #undef HAVE_UNISTD_H */ +++ +++/* "Defines the architecture of the java vm." */ +++/* #undef JAVA_ARCH */ +++ +++/* "" */ +++/* #undef JDK_HOME */ +++ +++/* Define to the sub-directory where libtool stores uninstalled libraries. */ +++#define LT_OBJDIR ".libs/" +++ +++/* Name of package */ +++#define PACKAGE "libbluray" +++ +++/* Define to the address where bug reports for this package should be sent. */ +++#define PACKAGE_BUGREPORT "http://www.videolan.org/developers/libbluray.html" +++ +++/* Define to the full name of this package. */ +++#define PACKAGE_NAME "libbluray" +++ +++/* Define to the full name and version of this package. */ +++#define PACKAGE_STRING "libbluray 0.9.2" +++ +++/* Define to the one symbol short name of this package. */ +++#define PACKAGE_TARNAME "libbluray" +++ +++/* Define to the home page for this package. */ +++#define PACKAGE_URL "" +++ +++/* Define to the version of this package. */ +++#define PACKAGE_VERSION "0.9.2" +++ +++/* Define as the return type of signal handlers (`int' or `void'). */ +++#define RETSIGTYPE void +++ +++/* Define to 1 if you have the ANSI C header files. */ +++#define STDC_HEADERS 1 +++ +++/* "Define to 1 if using BD-Java" */ +++/* #undef USING_BDJAVA */ +++ +++/* Version number of package */ +++#define VERSION "0.9.2" +++ +++/* Enable large inode numbers on Mac OS X 10.5. */ +++#ifndef _DARWIN_USE_64_BIT_INODE +++# define _DARWIN_USE_64_BIT_INODE 1 +++#endif +++ +++/* Number of bits in a file offset, on hosts where this is settable. */ +++#define _FILE_OFFSET_BITS 64 +++ +++/* Define for large files, on AIX-style hosts. */ +++/* #undef _LARGE_FILES */ +++ +++/* Define to '0x0501' for IE 5.01. */ +++#define _WIN32_IE 0x0501 +++ +++/* Define to '0x0502' for Windows XP SP2 APIs. */ +++#define _WIN32_WINNT 0x0502 ++diff --git a/configure.ac b/configure.ac ++index 5d5fe2c..ed08c96 100644 ++--- a/configure.ac +++++ b/configure.ac ++@@ -1,7 +1,7 @@ ++ dnl library version number ++ m4_define([bluray_major], 0) ++-m4_define([bluray_minor], 8) ++-m4_define([bluray_micro], 1) +++m4_define([bluray_minor], 9) +++m4_define([bluray_micro], 2) ++ m4_define([bluray_version],[bluray_major.bluray_minor.bluray_micro]) ++ ++ dnl shared library version (.so version) ++@@ -12,9 +12,9 @@ dnl - If interfaces have been changed or removed, increase current and set age a ++ dnl ++ dnl Library file name will be libbluray.so.(current-age).age.revision ++ dnl ++-m4_define([lt_current], 9) ++-m4_define([lt_revision], 1) ++-m4_define([lt_age], 8) +++m4_define([lt_current], 10) +++m4_define([lt_revision], 2) +++m4_define([lt_age], 9) ++ ++ dnl initilization ++ AC_INIT([libbluray], bluray_version, [http://www.videolan.org/developers/libbluray.html]) ++@@ -87,7 +87,15 @@ AC_ARG_ENABLE([bdjava], ++ [use_bdjava=yes]) ++ ++ AC_ARG_ENABLE([udf], ++- [AS_HELP_STRING([--enable-udf], [enable UDF support @<:@default=disabled@:>@])]) +++ [AS_HELP_STRING([--disable-udf], [disable UDF support @<:@default=enabled@:>@])], +++ [enable_udf=$enableval], +++ [enable_udf=yes]) +++ +++AC_ARG_ENABLE([bdjava-jar], +++ [AS_HELP_STRING([--disable-bdjava-jar], +++ [disable building of BD-Java JAR file @<:@default=enabled@:>@])], +++ [use_bdjava_jar=$enableval], +++ [use_bdjava_jar=yes]) ++ ++ AC_ARG_WITH([libxml2], ++ [AS_HELP_STRING([--without-libxml2], [build without libxml2 support @<:@default=with@:>@])]) ++@@ -224,7 +232,7 @@ if [[ $use_bdjava = "yes" ]]; then ++ ]) ++ ++ AC_CHECK_PROG(HAVE_ANT, [ant], yes, no) ++- if test "x$HAVE_ANT" = "xno"; then +++ if test "x$use_bdjava_jar" = "xyes" && test "x$HAVE_ANT" = "xno"; then ++ AC_MSG_ERROR([BD-J requires ANT, but ant was not found. Please install it.]) ++ fi ++ ++@@ -233,6 +241,7 @@ if [[ $use_bdjava = "yes" ]]; then ++ AC_DEFINE_UNQUOTED([JDK_HOME], ["$JDK_HOME"], [""]) ++ fi ++ AM_CONDITIONAL([USING_BDJAVA], [ test $use_bdjava = "yes" ]) +++AM_CONDITIONAL([USING_BDJAVA_BUILD_JAR], [ test $use_bdjava_jar = "yes" ]) ++ ++ dnl BD-J type ++ if test "$BDJ_TYPE" = "j2me"; then ++@@ -292,6 +301,7 @@ echo " --------" ++ echo " BD-J support: $use_bdjava" ++ if [[ $use_bdjava = "yes" ]]; then ++ echo " BD-J type: $BDJ_TYPE" +++echo " build JAR: $use_bdjava_jar" ++ if test x"$BDJ_BOOTCLASSPATH" != x""; then ++ echo " BD-J bootclasspath: $BDJ_BOOTCLASSPATH" ++ fi ++diff --git a/contrib/asm/LICENSE.txt b/contrib/asm/LICENSE.txt ++new file mode 100644 ++index 0000000..4d19185 ++--- /dev/null +++++ b/contrib/asm/LICENSE.txt ++@@ -0,0 +1,28 @@ +++ +++ ASM: a very small and fast Java bytecode manipulation framework +++ Copyright (c) 2000-2011 INRIA, France Telecom +++ All rights reserved. +++ +++ Redistribution and use in source and binary forms, with or without +++ modification, are permitted provided that the following conditions +++ are met: +++ 1. Redistributions of source code must retain the above copyright +++ notice, this list of conditions and the following disclaimer. +++ 2. Redistributions in binary form must reproduce the above copyright +++ notice, this list of conditions and the following disclaimer in the +++ documentation and/or other materials provided with the distribution. +++ 3. Neither the name of the copyright holders nor the names of its +++ contributors may be used to endorse or promote products derived from +++ this software without specific prior written permission. +++ +++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ THE POSSIBILITY OF SUCH DAMAGE. ++diff --git a/contrib/asm/SOURCE b/contrib/asm/SOURCE ++new file mode 100644 ++index 0000000..804aede ++--- /dev/null +++++ b/contrib/asm/SOURCE ++@@ -0,0 +1,9 @@ +++Core functionality from asm 5.0.4 +++ +++http://asm.ow2.org/ +++ +++ASM is an all purpose Java bytecode manipulation and analysis framework. It can be used to modify +++existing classes or dynamically generate classes, directly in binary form. Provided common +++transformations and analysis algorithms allow to easily assemble custom complex transformations +++and code analysis tools. +++ ++diff --git a/contrib/asm/src/org/objectweb/asm/AnnotationVisitor.java b/contrib/asm/src/org/objectweb/asm/AnnotationVisitor.java ++new file mode 100644 ++index 0000000..b644083 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/AnnotationVisitor.java ++@@ -0,0 +1,169 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A visitor to visit a Java annotation. The methods of this class must be +++ * called in the following order: ( visit | visitEnum | +++ * visitAnnotation | visitArray )* visitEnd. +++ * +++ * @author Eric Bruneton +++ * @author Eugene Kuleshov +++ */ +++public abstract class AnnotationVisitor { +++ +++ /** +++ * The ASM API version implemented by this visitor. The value of this field +++ * must be one of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ */ +++ protected final int api; +++ +++ /** +++ * The annotation visitor to which this visitor must delegate method calls. +++ * May be null. +++ */ +++ protected AnnotationVisitor av; +++ +++ /** +++ * Constructs a new {@link AnnotationVisitor}. +++ * +++ * @param api +++ * the ASM API version implemented by this visitor. Must be one +++ * of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ */ +++ public AnnotationVisitor(final int api) { +++ this(api, null); +++ } +++ +++ /** +++ * Constructs a new {@link AnnotationVisitor}. +++ * +++ * @param api +++ * the ASM API version implemented by this visitor. Must be one +++ * of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ * @param av +++ * the annotation visitor to which this visitor must delegate +++ * method calls. May be null. +++ */ +++ public AnnotationVisitor(final int api, final AnnotationVisitor av) { +++ if (api != Opcodes.ASM4 && api != Opcodes.ASM5) { +++ throw new IllegalArgumentException(); +++ } +++ this.api = api; +++ this.av = av; +++ } +++ +++ /** +++ * Visits a primitive value of the annotation. +++ * +++ * @param name +++ * the value name. +++ * @param value +++ * the actual value, whose type must be {@link Byte}, +++ * {@link Boolean}, {@link Character}, {@link Short}, +++ * {@link Integer} , {@link Long}, {@link Float}, {@link Double}, +++ * {@link String} or {@link Type} or OBJECT or ARRAY sort. This +++ * value can also be an array of byte, boolean, short, char, int, +++ * long, float or double values (this is equivalent to using +++ * {@link #visitArray visitArray} and visiting each array element +++ * in turn, but is more convenient). +++ */ +++ public void visit(String name, Object value) { +++ if (av != null) { +++ av.visit(name, value); +++ } +++ } +++ +++ /** +++ * Visits an enumeration value of the annotation. +++ * +++ * @param name +++ * the value name. +++ * @param desc +++ * the class descriptor of the enumeration class. +++ * @param value +++ * the actual enumeration value. +++ */ +++ public void visitEnum(String name, String desc, String value) { +++ if (av != null) { +++ av.visitEnum(name, desc, value); +++ } +++ } +++ +++ /** +++ * Visits a nested annotation value of the annotation. +++ * +++ * @param name +++ * the value name. +++ * @param desc +++ * the class descriptor of the nested annotation class. +++ * @return a visitor to visit the actual nested annotation value, or +++ * null if this visitor is not interested in visiting this +++ * nested annotation. The nested annotation value must be fully +++ * visited before calling other methods on this annotation +++ * visitor. +++ */ +++ public AnnotationVisitor visitAnnotation(String name, String desc) { +++ if (av != null) { +++ return av.visitAnnotation(name, desc); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits an array value of the annotation. Note that arrays of primitive +++ * types (such as byte, boolean, short, char, int, long, float or double) +++ * can be passed as value to {@link #visit visit}. This is what +++ * {@link ClassReader} does. +++ * +++ * @param name +++ * the value name. +++ * @return a visitor to visit the actual array value elements, or +++ * null if this visitor is not interested in visiting these +++ * values. The 'name' parameters passed to the methods of this +++ * visitor are ignored. All the array values must be visited +++ * before calling other methods on this annotation visitor. +++ */ +++ public AnnotationVisitor visitArray(String name) { +++ if (av != null) { +++ return av.visitArray(name); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits the end of the annotation. +++ */ +++ public void visitEnd() { +++ if (av != null) { +++ av.visitEnd(); +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/AnnotationWriter.java b/contrib/asm/src/org/objectweb/asm/AnnotationWriter.java ++new file mode 100644 ++index 0000000..6b95608 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/AnnotationWriter.java ++@@ -0,0 +1,371 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * An {@link AnnotationVisitor} that generates annotations in bytecode form. +++ * +++ * @author Eric Bruneton +++ * @author Eugene Kuleshov +++ */ +++final class AnnotationWriter extends AnnotationVisitor { +++ +++ /** +++ * The class writer to which this annotation must be added. +++ */ +++ private final ClassWriter cw; +++ +++ /** +++ * The number of values in this annotation. +++ */ +++ private int size; +++ +++ /** +++ * true if values are named, false otherwise. Annotation +++ * writers used for annotation default and annotation arrays use unnamed +++ * values. +++ */ +++ private final boolean named; +++ +++ /** +++ * The annotation values in bytecode form. This byte vector only contains +++ * the values themselves, i.e. the number of values must be stored as a +++ * unsigned short just before these bytes. +++ */ +++ private final ByteVector bv; +++ +++ /** +++ * The byte vector to be used to store the number of values of this +++ * annotation. See {@link #bv}. +++ */ +++ private final ByteVector parent; +++ +++ /** +++ * Where the number of values of this annotation must be stored in +++ * {@link #parent}. +++ */ +++ private final int offset; +++ +++ /** +++ * Next annotation writer. This field is used to store annotation lists. +++ */ +++ AnnotationWriter next; +++ +++ /** +++ * Previous annotation writer. This field is used to store annotation lists. +++ */ +++ AnnotationWriter prev; +++ +++ // ------------------------------------------------------------------------ +++ // Constructor +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Constructs a new {@link AnnotationWriter}. +++ * +++ * @param cw +++ * the class writer to which this annotation must be added. +++ * @param named +++ * true if values are named, false otherwise. +++ * @param bv +++ * where the annotation values must be stored. +++ * @param parent +++ * where the number of annotation values must be stored. +++ * @param offset +++ * where in parent the number of annotation values must +++ * be stored. +++ */ +++ AnnotationWriter(final ClassWriter cw, final boolean named, +++ final ByteVector bv, final ByteVector parent, final int offset) { +++ super(Opcodes.ASM5); +++ this.cw = cw; +++ this.named = named; +++ this.bv = bv; +++ this.parent = parent; +++ this.offset = offset; +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Implementation of the AnnotationVisitor abstract class +++ // ------------------------------------------------------------------------ +++ +++ @Override +++ public void visit(final String name, final Object value) { +++ ++size; +++ if (named) { +++ bv.putShort(cw.newUTF8(name)); +++ } +++ if (value instanceof String) { +++ bv.put12('s', cw.newUTF8((String) value)); +++ } else if (value instanceof Byte) { +++ bv.put12('B', cw.newInteger(((Byte) value).byteValue()).index); +++ } else if (value instanceof Boolean) { +++ int v = ((Boolean) value).booleanValue() ? 1 : 0; +++ bv.put12('Z', cw.newInteger(v).index); +++ } else if (value instanceof Character) { +++ bv.put12('C', cw.newInteger(((Character) value).charValue()).index); +++ } else if (value instanceof Short) { +++ bv.put12('S', cw.newInteger(((Short) value).shortValue()).index); +++ } else if (value instanceof Type) { +++ bv.put12('c', cw.newUTF8(((Type) value).getDescriptor())); +++ } else if (value instanceof byte[]) { +++ byte[] v = (byte[]) value; +++ bv.put12('[', v.length); +++ for (int i = 0; i < v.length; i++) { +++ bv.put12('B', cw.newInteger(v[i]).index); +++ } +++ } else if (value instanceof boolean[]) { +++ boolean[] v = (boolean[]) value; +++ bv.put12('[', v.length); +++ for (int i = 0; i < v.length; i++) { +++ bv.put12('Z', cw.newInteger(v[i] ? 1 : 0).index); +++ } +++ } else if (value instanceof short[]) { +++ short[] v = (short[]) value; +++ bv.put12('[', v.length); +++ for (int i = 0; i < v.length; i++) { +++ bv.put12('S', cw.newInteger(v[i]).index); +++ } +++ } else if (value instanceof char[]) { +++ char[] v = (char[]) value; +++ bv.put12('[', v.length); +++ for (int i = 0; i < v.length; i++) { +++ bv.put12('C', cw.newInteger(v[i]).index); +++ } +++ } else if (value instanceof int[]) { +++ int[] v = (int[]) value; +++ bv.put12('[', v.length); +++ for (int i = 0; i < v.length; i++) { +++ bv.put12('I', cw.newInteger(v[i]).index); +++ } +++ } else if (value instanceof long[]) { +++ long[] v = (long[]) value; +++ bv.put12('[', v.length); +++ for (int i = 0; i < v.length; i++) { +++ bv.put12('J', cw.newLong(v[i]).index); +++ } +++ } else if (value instanceof float[]) { +++ float[] v = (float[]) value; +++ bv.put12('[', v.length); +++ for (int i = 0; i < v.length; i++) { +++ bv.put12('F', cw.newFloat(v[i]).index); +++ } +++ } else if (value instanceof double[]) { +++ double[] v = (double[]) value; +++ bv.put12('[', v.length); +++ for (int i = 0; i < v.length; i++) { +++ bv.put12('D', cw.newDouble(v[i]).index); +++ } +++ } else { +++ Item i = cw.newConstItem(value); +++ bv.put12(".s.IFJDCS".charAt(i.type), i.index); +++ } +++ } +++ +++ @Override +++ public void visitEnum(final String name, final String desc, +++ final String value) { +++ ++size; +++ if (named) { +++ bv.putShort(cw.newUTF8(name)); +++ } +++ bv.put12('e', cw.newUTF8(desc)).putShort(cw.newUTF8(value)); +++ } +++ +++ @Override +++ public AnnotationVisitor visitAnnotation(final String name, +++ final String desc) { +++ ++size; +++ if (named) { +++ bv.putShort(cw.newUTF8(name)); +++ } +++ // write tag and type, and reserve space for values count +++ bv.put12('@', cw.newUTF8(desc)).putShort(0); +++ return new AnnotationWriter(cw, true, bv, bv, bv.length - 2); +++ } +++ +++ @Override +++ public AnnotationVisitor visitArray(final String name) { +++ ++size; +++ if (named) { +++ bv.putShort(cw.newUTF8(name)); +++ } +++ // write tag, and reserve space for array size +++ bv.put12('[', 0); +++ return new AnnotationWriter(cw, false, bv, bv, bv.length - 2); +++ } +++ +++ @Override +++ public void visitEnd() { +++ if (parent != null) { +++ byte[] data = parent.data; +++ data[offset] = (byte) (size >>> 8); +++ data[offset + 1] = (byte) size; +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Utility methods +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the size of this annotation writer list. +++ * +++ * @return the size of this annotation writer list. +++ */ +++ int getSize() { +++ int size = 0; +++ AnnotationWriter aw = this; +++ while (aw != null) { +++ size += aw.bv.length; +++ aw = aw.next; +++ } +++ return size; +++ } +++ +++ /** +++ * Puts the annotations of this annotation writer list into the given byte +++ * vector. +++ * +++ * @param out +++ * where the annotations must be put. +++ */ +++ void put(final ByteVector out) { +++ int n = 0; +++ int size = 2; +++ AnnotationWriter aw = this; +++ AnnotationWriter last = null; +++ while (aw != null) { +++ ++n; +++ size += aw.bv.length; +++ aw.visitEnd(); // in case user forgot to call visitEnd +++ aw.prev = last; +++ last = aw; +++ aw = aw.next; +++ } +++ out.putInt(size); +++ out.putShort(n); +++ aw = last; +++ while (aw != null) { +++ out.putByteArray(aw.bv.data, 0, aw.bv.length); +++ aw = aw.prev; +++ } +++ } +++ +++ /** +++ * Puts the given annotation lists into the given byte vector. +++ * +++ * @param panns +++ * an array of annotation writer lists. +++ * @param off +++ * index of the first annotation to be written. +++ * @param out +++ * where the annotations must be put. +++ */ +++ static void put(final AnnotationWriter[] panns, final int off, +++ final ByteVector out) { +++ int size = 1 + 2 * (panns.length - off); +++ for (int i = off; i < panns.length; ++i) { +++ size += panns[i] == null ? 0 : panns[i].getSize(); +++ } +++ out.putInt(size).putByte(panns.length - off); +++ for (int i = off; i < panns.length; ++i) { +++ AnnotationWriter aw = panns[i]; +++ AnnotationWriter last = null; +++ int n = 0; +++ while (aw != null) { +++ ++n; +++ aw.visitEnd(); // in case user forgot to call visitEnd +++ aw.prev = last; +++ last = aw; +++ aw = aw.next; +++ } +++ out.putShort(n); +++ aw = last; +++ while (aw != null) { +++ out.putByteArray(aw.bv.data, 0, aw.bv.length); +++ aw = aw.prev; +++ } +++ } +++ } +++ +++ /** +++ * Puts the given type reference and type path into the given bytevector. +++ * LOCAL_VARIABLE and RESOURCE_VARIABLE target types are not supported. +++ * +++ * @param typeRef +++ * a reference to the annotated type. See {@link TypeReference}. +++ * @param typePath +++ * the path to the annotated type argument, wildcard bound, array +++ * element type, or static inner type within 'typeRef'. May be +++ * null if the annotation targets 'typeRef' as a whole. +++ * @param out +++ * where the type reference and type path must be put. +++ */ +++ static void putTarget(int typeRef, TypePath typePath, ByteVector out) { +++ switch (typeRef >>> 24) { +++ case 0x00: // CLASS_TYPE_PARAMETER +++ case 0x01: // METHOD_TYPE_PARAMETER +++ case 0x16: // METHOD_FORMAL_PARAMETER +++ out.putShort(typeRef >>> 16); +++ break; +++ case 0x13: // FIELD +++ case 0x14: // METHOD_RETURN +++ case 0x15: // METHOD_RECEIVER +++ out.putByte(typeRef >>> 24); +++ break; +++ case 0x47: // CAST +++ case 0x48: // CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT +++ case 0x49: // METHOD_INVOCATION_TYPE_ARGUMENT +++ case 0x4A: // CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT +++ case 0x4B: // METHOD_REFERENCE_TYPE_ARGUMENT +++ out.putInt(typeRef); +++ break; +++ // case 0x10: // CLASS_EXTENDS +++ // case 0x11: // CLASS_TYPE_PARAMETER_BOUND +++ // case 0x12: // METHOD_TYPE_PARAMETER_BOUND +++ // case 0x17: // THROWS +++ // case 0x42: // EXCEPTION_PARAMETER +++ // case 0x43: // INSTANCEOF +++ // case 0x44: // NEW +++ // case 0x45: // CONSTRUCTOR_REFERENCE +++ // case 0x46: // METHOD_REFERENCE +++ default: +++ out.put12(typeRef >>> 24, (typeRef & 0xFFFF00) >> 8); +++ break; +++ } +++ if (typePath == null) { +++ out.putByte(0); +++ } else { +++ int length = typePath.b[typePath.offset] * 2 + 1; +++ out.putByteArray(typePath.b, typePath.offset, length); +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Attribute.java b/contrib/asm/src/org/objectweb/asm/Attribute.java ++new file mode 100644 ++index 0000000..8a2a882 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Attribute.java ++@@ -0,0 +1,255 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A non standard class, field, method or code attribute. +++ * +++ * @author Eric Bruneton +++ * @author Eugene Kuleshov +++ */ +++public class Attribute { +++ +++ /** +++ * The type of this attribute. +++ */ +++ public final String type; +++ +++ /** +++ * The raw value of this attribute, used only for unknown attributes. +++ */ +++ byte[] value; +++ +++ /** +++ * The next attribute in this attribute list. May be null. +++ */ +++ Attribute next; +++ +++ /** +++ * Constructs a new empty attribute. +++ * +++ * @param type +++ * the type of the attribute. +++ */ +++ protected Attribute(final String type) { +++ this.type = type; +++ } +++ +++ /** +++ * Returns true if this type of attribute is unknown. The default +++ * implementation of this method always returns true. +++ * +++ * @return true if this type of attribute is unknown. +++ */ +++ public boolean isUnknown() { +++ return true; +++ } +++ +++ /** +++ * Returns true if this type of attribute is a code attribute. +++ * +++ * @return true if this type of attribute is a code attribute. +++ */ +++ public boolean isCodeAttribute() { +++ return false; +++ } +++ +++ /** +++ * Returns the labels corresponding to this attribute. +++ * +++ * @return the labels corresponding to this attribute, or null if +++ * this attribute is not a code attribute that contains labels. +++ */ +++ protected Label[] getLabels() { +++ return null; +++ } +++ +++ /** +++ * Reads a {@link #type type} attribute. This method must return a +++ * new {@link Attribute} object, of type {@link #type type}, +++ * corresponding to the len bytes starting at the given offset, in +++ * the given class reader. +++ * +++ * @param cr +++ * the class that contains the attribute to be read. +++ * @param off +++ * index of the first byte of the attribute's content in +++ * {@link ClassReader#b cr.b}. The 6 attribute header bytes, +++ * containing the type and the length of the attribute, are not +++ * taken into account here. +++ * @param len +++ * the length of the attribute's content. +++ * @param buf +++ * buffer to be used to call {@link ClassReader#readUTF8 +++ * readUTF8}, {@link ClassReader#readClass(int,char[]) readClass} +++ * or {@link ClassReader#readConst readConst}. +++ * @param codeOff +++ * index of the first byte of code's attribute content in +++ * {@link ClassReader#b cr.b}, or -1 if the attribute to be read +++ * is not a code attribute. The 6 attribute header bytes, +++ * containing the type and the length of the attribute, are not +++ * taken into account here. +++ * @param labels +++ * the labels of the method's code, or null if the +++ * attribute to be read is not a code attribute. +++ * @return a new {@link Attribute} object corresponding to the given +++ * bytes. +++ */ +++ protected Attribute read(final ClassReader cr, final int off, +++ final int len, final char[] buf, final int codeOff, +++ final Label[] labels) { +++ Attribute attr = new Attribute(type); +++ attr.value = new byte[len]; +++ System.arraycopy(cr.b, off, attr.value, 0, len); +++ return attr; +++ } +++ +++ /** +++ * Returns the byte array form of this attribute. +++ * +++ * @param cw +++ * the class to which this attribute must be added. This +++ * parameter can be used to add to the constant pool of this +++ * class the items that corresponds to this attribute. +++ * @param code +++ * the bytecode of the method corresponding to this code +++ * attribute, or null if this attribute is not a code +++ * attributes. +++ * @param len +++ * the length of the bytecode of the method corresponding to this +++ * code attribute, or null if this attribute is not a +++ * code attribute. +++ * @param maxStack +++ * the maximum stack size of the method corresponding to this +++ * code attribute, or -1 if this attribute is not a code +++ * attribute. +++ * @param maxLocals +++ * the maximum number of local variables of the method +++ * corresponding to this code attribute, or -1 if this attribute +++ * is not a code attribute. +++ * @return the byte array form of this attribute. +++ */ +++ protected ByteVector write(final ClassWriter cw, final byte[] code, +++ final int len, final int maxStack, final int maxLocals) { +++ ByteVector v = new ByteVector(); +++ v.data = value; +++ v.length = value.length; +++ return v; +++ } +++ +++ /** +++ * Returns the length of the attribute list that begins with this attribute. +++ * +++ * @return the length of the attribute list that begins with this attribute. +++ */ +++ final int getCount() { +++ int count = 0; +++ Attribute attr = this; +++ while (attr != null) { +++ count += 1; +++ attr = attr.next; +++ } +++ return count; +++ } +++ +++ /** +++ * Returns the size of all the attributes in this attribute list. +++ * +++ * @param cw +++ * the class writer to be used to convert the attributes into +++ * byte arrays, with the {@link #write write} method. +++ * @param code +++ * the bytecode of the method corresponding to these code +++ * attributes, or null if these attributes are not code +++ * attributes. +++ * @param len +++ * the length of the bytecode of the method corresponding to +++ * these code attributes, or null if these attributes +++ * are not code attributes. +++ * @param maxStack +++ * the maximum stack size of the method corresponding to these +++ * code attributes, or -1 if these attributes are not code +++ * attributes. +++ * @param maxLocals +++ * the maximum number of local variables of the method +++ * corresponding to these code attributes, or -1 if these +++ * attributes are not code attributes. +++ * @return the size of all the attributes in this attribute list. This size +++ * includes the size of the attribute headers. +++ */ +++ final int getSize(final ClassWriter cw, final byte[] code, final int len, +++ final int maxStack, final int maxLocals) { +++ Attribute attr = this; +++ int size = 0; +++ while (attr != null) { +++ cw.newUTF8(attr.type); +++ size += attr.write(cw, code, len, maxStack, maxLocals).length + 6; +++ attr = attr.next; +++ } +++ return size; +++ } +++ +++ /** +++ * Writes all the attributes of this attribute list in the given byte +++ * vector. +++ * +++ * @param cw +++ * the class writer to be used to convert the attributes into +++ * byte arrays, with the {@link #write write} method. +++ * @param code +++ * the bytecode of the method corresponding to these code +++ * attributes, or null if these attributes are not code +++ * attributes. +++ * @param len +++ * the length of the bytecode of the method corresponding to +++ * these code attributes, or null if these attributes +++ * are not code attributes. +++ * @param maxStack +++ * the maximum stack size of the method corresponding to these +++ * code attributes, or -1 if these attributes are not code +++ * attributes. +++ * @param maxLocals +++ * the maximum number of local variables of the method +++ * corresponding to these code attributes, or -1 if these +++ * attributes are not code attributes. +++ * @param out +++ * where the attributes must be written. +++ */ +++ final void put(final ClassWriter cw, final byte[] code, final int len, +++ final int maxStack, final int maxLocals, final ByteVector out) { +++ Attribute attr = this; +++ while (attr != null) { +++ ByteVector b = attr.write(cw, code, len, maxStack, maxLocals); +++ out.putShort(cw.newUTF8(attr.type)).putInt(b.length); +++ out.putByteArray(b.data, 0, b.length); +++ attr = attr.next; +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/ByteVector.java b/contrib/asm/src/org/objectweb/asm/ByteVector.java ++new file mode 100644 ++index 0000000..9c532be ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/ByteVector.java ++@@ -0,0 +1,339 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A dynamically extensible vector of bytes. This class is roughly equivalent to +++ * a DataOutputStream on top of a ByteArrayOutputStream, but is more efficient. +++ * +++ * @author Eric Bruneton +++ */ +++public class ByteVector { +++ +++ /** +++ * The content of this vector. +++ */ +++ byte[] data; +++ +++ /** +++ * Actual number of bytes in this vector. +++ */ +++ int length; +++ +++ /** +++ * Constructs a new {@link ByteVector ByteVector} with a default initial +++ * size. +++ */ +++ public ByteVector() { +++ data = new byte[64]; +++ } +++ +++ /** +++ * Constructs a new {@link ByteVector ByteVector} with the given initial +++ * size. +++ * +++ * @param initialSize +++ * the initial size of the byte vector to be constructed. +++ */ +++ public ByteVector(final int initialSize) { +++ data = new byte[initialSize]; +++ } +++ +++ /** +++ * Puts a byte into this byte vector. The byte vector is automatically +++ * enlarged if necessary. +++ * +++ * @param b +++ * a byte. +++ * @return this byte vector. +++ */ +++ public ByteVector putByte(final int b) { +++ int length = this.length; +++ if (length + 1 > data.length) { +++ enlarge(1); +++ } +++ data[length++] = (byte) b; +++ this.length = length; +++ return this; +++ } +++ +++ /** +++ * Puts two bytes into this byte vector. The byte vector is automatically +++ * enlarged if necessary. +++ * +++ * @param b1 +++ * a byte. +++ * @param b2 +++ * another byte. +++ * @return this byte vector. +++ */ +++ ByteVector put11(final int b1, final int b2) { +++ int length = this.length; +++ if (length + 2 > data.length) { +++ enlarge(2); +++ } +++ byte[] data = this.data; +++ data[length++] = (byte) b1; +++ data[length++] = (byte) b2; +++ this.length = length; +++ return this; +++ } +++ +++ /** +++ * Puts a short into this byte vector. The byte vector is automatically +++ * enlarged if necessary. +++ * +++ * @param s +++ * a short. +++ * @return this byte vector. +++ */ +++ public ByteVector putShort(final int s) { +++ int length = this.length; +++ if (length + 2 > data.length) { +++ enlarge(2); +++ } +++ byte[] data = this.data; +++ data[length++] = (byte) (s >>> 8); +++ data[length++] = (byte) s; +++ this.length = length; +++ return this; +++ } +++ +++ /** +++ * Puts a byte and a short into this byte vector. The byte vector is +++ * automatically enlarged if necessary. +++ * +++ * @param b +++ * a byte. +++ * @param s +++ * a short. +++ * @return this byte vector. +++ */ +++ ByteVector put12(final int b, final int s) { +++ int length = this.length; +++ if (length + 3 > data.length) { +++ enlarge(3); +++ } +++ byte[] data = this.data; +++ data[length++] = (byte) b; +++ data[length++] = (byte) (s >>> 8); +++ data[length++] = (byte) s; +++ this.length = length; +++ return this; +++ } +++ +++ /** +++ * Puts an int into this byte vector. The byte vector is automatically +++ * enlarged if necessary. +++ * +++ * @param i +++ * an int. +++ * @return this byte vector. +++ */ +++ public ByteVector putInt(final int i) { +++ int length = this.length; +++ if (length + 4 > data.length) { +++ enlarge(4); +++ } +++ byte[] data = this.data; +++ data[length++] = (byte) (i >>> 24); +++ data[length++] = (byte) (i >>> 16); +++ data[length++] = (byte) (i >>> 8); +++ data[length++] = (byte) i; +++ this.length = length; +++ return this; +++ } +++ +++ /** +++ * Puts a long into this byte vector. The byte vector is automatically +++ * enlarged if necessary. +++ * +++ * @param l +++ * a long. +++ * @return this byte vector. +++ */ +++ public ByteVector putLong(final long l) { +++ int length = this.length; +++ if (length + 8 > data.length) { +++ enlarge(8); +++ } +++ byte[] data = this.data; +++ int i = (int) (l >>> 32); +++ data[length++] = (byte) (i >>> 24); +++ data[length++] = (byte) (i >>> 16); +++ data[length++] = (byte) (i >>> 8); +++ data[length++] = (byte) i; +++ i = (int) l; +++ data[length++] = (byte) (i >>> 24); +++ data[length++] = (byte) (i >>> 16); +++ data[length++] = (byte) (i >>> 8); +++ data[length++] = (byte) i; +++ this.length = length; +++ return this; +++ } +++ +++ /** +++ * Puts an UTF8 string into this byte vector. The byte vector is +++ * automatically enlarged if necessary. +++ * +++ * @param s +++ * a String whose UTF8 encoded length must be less than 65536. +++ * @return this byte vector. +++ */ +++ public ByteVector putUTF8(final String s) { +++ int charLength = s.length(); +++ if (charLength > 65535) { +++ throw new IllegalArgumentException(); +++ } +++ int len = length; +++ if (len + 2 + charLength > data.length) { +++ enlarge(2 + charLength); +++ } +++ byte[] data = this.data; +++ // optimistic algorithm: instead of computing the byte length and then +++ // serializing the string (which requires two loops), we assume the byte +++ // length is equal to char length (which is the most frequent case), and +++ // we start serializing the string right away. During the serialization, +++ // if we find that this assumption is wrong, we continue with the +++ // general method. +++ data[len++] = (byte) (charLength >>> 8); +++ data[len++] = (byte) charLength; +++ for (int i = 0; i < charLength; ++i) { +++ char c = s.charAt(i); +++ if (c >= '\001' && c <= '\177') { +++ data[len++] = (byte) c; +++ } else { +++ length = len; +++ return encodeUTF8(s, i, 65535); +++ } +++ } +++ length = len; +++ return this; +++ } +++ +++ /** +++ * Puts an UTF8 string into this byte vector. The byte vector is +++ * automatically enlarged if necessary. The string length is encoded in two +++ * bytes before the encoded characters, if there is space for that (i.e. if +++ * this.length - i - 2 >= 0). +++ * +++ * @param s +++ * the String to encode. +++ * @param i +++ * the index of the first character to encode. The previous +++ * characters are supposed to have already been encoded, using +++ * only one byte per character. +++ * @param maxByteLength +++ * the maximum byte length of the encoded string, including the +++ * already encoded characters. +++ * @return this byte vector. +++ */ +++ ByteVector encodeUTF8(final String s, int i, int maxByteLength) { +++ int charLength = s.length(); +++ int byteLength = i; +++ char c; +++ for (int j = i; j < charLength; ++j) { +++ c = s.charAt(j); +++ if (c >= '\001' && c <= '\177') { +++ byteLength++; +++ } else if (c > '\u07FF') { +++ byteLength += 3; +++ } else { +++ byteLength += 2; +++ } +++ } +++ if (byteLength > maxByteLength) { +++ throw new IllegalArgumentException(); +++ } +++ int start = length - i - 2; +++ if (start >= 0) { +++ data[start] = (byte) (byteLength >>> 8); +++ data[start + 1] = (byte) byteLength; +++ } +++ if (length + byteLength - i > data.length) { +++ enlarge(byteLength - i); +++ } +++ int len = length; +++ for (int j = i; j < charLength; ++j) { +++ c = s.charAt(j); +++ if (c >= '\001' && c <= '\177') { +++ data[len++] = (byte) c; +++ } else if (c > '\u07FF') { +++ data[len++] = (byte) (0xE0 | c >> 12 & 0xF); +++ data[len++] = (byte) (0x80 | c >> 6 & 0x3F); +++ data[len++] = (byte) (0x80 | c & 0x3F); +++ } else { +++ data[len++] = (byte) (0xC0 | c >> 6 & 0x1F); +++ data[len++] = (byte) (0x80 | c & 0x3F); +++ } +++ } +++ length = len; +++ return this; +++ } +++ +++ /** +++ * Puts an array of bytes into this byte vector. The byte vector is +++ * automatically enlarged if necessary. +++ * +++ * @param b +++ * an array of bytes. May be null to put len +++ * null bytes into this byte vector. +++ * @param off +++ * index of the fist byte of b that must be copied. +++ * @param len +++ * number of bytes of b that must be copied. +++ * @return this byte vector. +++ */ +++ public ByteVector putByteArray(final byte[] b, final int off, final int len) { +++ if (length + len > data.length) { +++ enlarge(len); +++ } +++ if (b != null) { +++ System.arraycopy(b, off, data, length, len); +++ } +++ length += len; +++ return this; +++ } +++ +++ /** +++ * Enlarge this byte vector so that it can receive n more bytes. +++ * +++ * @param size +++ * number of additional bytes that this byte vector should be +++ * able to receive. +++ */ +++ private void enlarge(final int size) { +++ int length1 = 2 * data.length; +++ int length2 = length + size; +++ byte[] newData = new byte[length1 > length2 ? length1 : length2]; +++ System.arraycopy(data, 0, newData, 0, length); +++ data = newData; +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/ClassReader.java b/contrib/asm/src/org/objectweb/asm/ClassReader.java ++new file mode 100644 ++index 0000000..e23fd60 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/ClassReader.java ++@@ -0,0 +1,2506 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++import java.io.IOException; +++import java.io.InputStream; +++ +++/** +++ * A Java class parser to make a {@link ClassVisitor} visit an existing class. +++ * This class parses a byte array conforming to the Java class file format and +++ * calls the appropriate visit methods of a given class visitor for each field, +++ * method and bytecode instruction encountered. +++ * +++ * @author Eric Bruneton +++ * @author Eugene Kuleshov +++ */ +++public class ClassReader { +++ +++ /** +++ * True to enable signatures support. +++ */ +++ static final boolean SIGNATURES = true; +++ +++ /** +++ * True to enable annotations support. +++ */ +++ static final boolean ANNOTATIONS = true; +++ +++ /** +++ * True to enable stack map frames support. +++ */ +++ static final boolean FRAMES = true; +++ +++ /** +++ * True to enable bytecode writing support. +++ */ +++ static final boolean WRITER = true; +++ +++ /** +++ * True to enable JSR_W and GOTO_W support. +++ */ +++ static final boolean RESIZE = true; +++ +++ /** +++ * Flag to skip method code. If this class is set CODE +++ * attribute won't be visited. This can be used, for example, to retrieve +++ * annotations for methods and method parameters. +++ */ +++ public static final int SKIP_CODE = 1; +++ +++ /** +++ * Flag to skip the debug information in the class. If this flag is set the +++ * debug information of the class is not visited, i.e. the +++ * {@link MethodVisitor#visitLocalVariable visitLocalVariable} and +++ * {@link MethodVisitor#visitLineNumber visitLineNumber} methods will not be +++ * called. +++ */ +++ public static final int SKIP_DEBUG = 2; +++ +++ /** +++ * Flag to skip the stack map frames in the class. If this flag is set the +++ * stack map frames of the class is not visited, i.e. the +++ * {@link MethodVisitor#visitFrame visitFrame} method will not be called. +++ * This flag is useful when the {@link ClassWriter#COMPUTE_FRAMES} option is +++ * used: it avoids visiting frames that will be ignored and recomputed from +++ * scratch in the class writer. +++ */ +++ public static final int SKIP_FRAMES = 4; +++ +++ /** +++ * Flag to expand the stack map frames. By default stack map frames are +++ * visited in their original format (i.e. "expanded" for classes whose +++ * version is less than V1_6, and "compressed" for the other classes). If +++ * this flag is set, stack map frames are always visited in expanded format +++ * (this option adds a decompression/recompression step in ClassReader and +++ * ClassWriter which degrades performances quite a lot). +++ */ +++ public static final int EXPAND_FRAMES = 8; +++ +++ /** +++ * The class to be parsed. The content of this array must not be +++ * modified. This field is intended for {@link Attribute} sub classes, and +++ * is normally not needed by class generators or adapters. +++ */ +++ public final byte[] b; +++ +++ /** +++ * The start index of each constant pool item in {@link #b b}, plus one. The +++ * one byte offset skips the constant pool item tag that indicates its type. +++ */ +++ private final int[] items; +++ +++ /** +++ * The String objects corresponding to the CONSTANT_Utf8 items. This cache +++ * avoids multiple parsing of a given CONSTANT_Utf8 constant pool item, +++ * which GREATLY improves performances (by a factor 2 to 3). This caching +++ * strategy could be extended to all constant pool items, but its benefit +++ * would not be so great for these items (because they are much less +++ * expensive to parse than CONSTANT_Utf8 items). +++ */ +++ private final String[] strings; +++ +++ /** +++ * Maximum length of the strings contained in the constant pool of the +++ * class. +++ */ +++ private final int maxStringLength; +++ +++ /** +++ * Start index of the class header information (access, name...) in +++ * {@link #b b}. +++ */ +++ public final int header; +++ +++ // ------------------------------------------------------------------------ +++ // Constructors +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Constructs a new {@link ClassReader} object. +++ * +++ * @param b +++ * the bytecode of the class to be read. +++ */ +++ public ClassReader(final byte[] b) { +++ this(b, 0, b.length); +++ } +++ +++ /** +++ * Constructs a new {@link ClassReader} object. +++ * +++ * @param b +++ * the bytecode of the class to be read. +++ * @param off +++ * the start offset of the class data. +++ * @param len +++ * the length of the class data. +++ */ +++ public ClassReader(final byte[] b, final int off, final int len) { +++ this.b = b; +++ // checks the class version +++ if (readShort(off + 6) > Opcodes.V1_8) { +++ throw new IllegalArgumentException(); +++ } +++ // parses the constant pool +++ items = new int[readUnsignedShort(off + 8)]; +++ int n = items.length; +++ strings = new String[n]; +++ int max = 0; +++ int index = off + 10; +++ for (int i = 1; i < n; ++i) { +++ items[i] = index + 1; +++ int size; +++ switch (b[index]) { +++ case ClassWriter.FIELD: +++ case ClassWriter.METH: +++ case ClassWriter.IMETH: +++ case ClassWriter.INT: +++ case ClassWriter.FLOAT: +++ case ClassWriter.NAME_TYPE: +++ case ClassWriter.INDY: +++ size = 5; +++ break; +++ case ClassWriter.LONG: +++ case ClassWriter.DOUBLE: +++ size = 9; +++ ++i; +++ break; +++ case ClassWriter.UTF8: +++ size = 3 + readUnsignedShort(index + 1); +++ if (size > max) { +++ max = size; +++ } +++ break; +++ case ClassWriter.HANDLE: +++ size = 4; +++ break; +++ // case ClassWriter.CLASS: +++ // case ClassWriter.STR: +++ // case ClassWriter.MTYPE +++ default: +++ size = 3; +++ break; +++ } +++ index += size; +++ } +++ maxStringLength = max; +++ // the class header information starts just after the constant pool +++ header = index; +++ } +++ +++ /** +++ * Returns the class's access flags (see {@link Opcodes}). This value may +++ * not reflect Deprecated and Synthetic flags when bytecode is before 1.5 +++ * and those flags are represented by attributes. +++ * +++ * @return the class access flags +++ * +++ * @see ClassVisitor#visit(int, int, String, String, String, String[]) +++ */ +++ public int getAccess() { +++ return readUnsignedShort(header); +++ } +++ +++ /** +++ * Returns the internal name of the class (see +++ * {@link Type#getInternalName() getInternalName}). +++ * +++ * @return the internal class name +++ * +++ * @see ClassVisitor#visit(int, int, String, String, String, String[]) +++ */ +++ public String getClassName() { +++ return readClass(header + 2, new char[maxStringLength]); +++ } +++ +++ /** +++ * Returns the internal of name of the super class (see +++ * {@link Type#getInternalName() getInternalName}). For interfaces, the +++ * super class is {@link Object}. +++ * +++ * @return the internal name of super class, or null for +++ * {@link Object} class. +++ * +++ * @see ClassVisitor#visit(int, int, String, String, String, String[]) +++ */ +++ public String getSuperName() { +++ return readClass(header + 4, new char[maxStringLength]); +++ } +++ +++ /** +++ * Returns the internal names of the class's interfaces (see +++ * {@link Type#getInternalName() getInternalName}). +++ * +++ * @return the array of internal names for all implemented interfaces or +++ * null. +++ * +++ * @see ClassVisitor#visit(int, int, String, String, String, String[]) +++ */ +++ public String[] getInterfaces() { +++ int index = header + 6; +++ int n = readUnsignedShort(index); +++ String[] interfaces = new String[n]; +++ if (n > 0) { +++ char[] buf = new char[maxStringLength]; +++ for (int i = 0; i < n; ++i) { +++ index += 2; +++ interfaces[i] = readClass(index, buf); +++ } +++ } +++ return interfaces; +++ } +++ +++ /** +++ * Copies the constant pool data into the given {@link ClassWriter}. Should +++ * be called before the {@link #accept(ClassVisitor,int)} method. +++ * +++ * @param classWriter +++ * the {@link ClassWriter} to copy constant pool into. +++ */ +++ void copyPool(final ClassWriter classWriter) { +++ char[] buf = new char[maxStringLength]; +++ int ll = items.length; +++ Item[] items2 = new Item[ll]; +++ for (int i = 1; i < ll; i++) { +++ int index = items[i]; +++ int tag = b[index - 1]; +++ Item item = new Item(i); +++ int nameType; +++ switch (tag) { +++ case ClassWriter.FIELD: +++ case ClassWriter.METH: +++ case ClassWriter.IMETH: +++ nameType = items[readUnsignedShort(index + 2)]; +++ item.set(tag, readClass(index, buf), readUTF8(nameType, buf), +++ readUTF8(nameType + 2, buf)); +++ break; +++ case ClassWriter.INT: +++ item.set(readInt(index)); +++ break; +++ case ClassWriter.FLOAT: +++ item.set(Float.intBitsToFloat(readInt(index))); +++ break; +++ case ClassWriter.NAME_TYPE: +++ item.set(tag, readUTF8(index, buf), readUTF8(index + 2, buf), +++ null); +++ break; +++ case ClassWriter.LONG: +++ item.set(readLong(index)); +++ ++i; +++ break; +++ case ClassWriter.DOUBLE: +++ item.set(Double.longBitsToDouble(readLong(index))); +++ ++i; +++ break; +++ case ClassWriter.UTF8: { +++ String s = strings[i]; +++ if (s == null) { +++ index = items[i]; +++ s = strings[i] = readUTF(index + 2, +++ readUnsignedShort(index), buf); +++ } +++ item.set(tag, s, null, null); +++ break; +++ } +++ case ClassWriter.HANDLE: { +++ int fieldOrMethodRef = items[readUnsignedShort(index + 1)]; +++ nameType = items[readUnsignedShort(fieldOrMethodRef + 2)]; +++ item.set(ClassWriter.HANDLE_BASE + readByte(index), +++ readClass(fieldOrMethodRef, buf), +++ readUTF8(nameType, buf), readUTF8(nameType + 2, buf)); +++ break; +++ } +++ case ClassWriter.INDY: +++ if (classWriter.bootstrapMethods == null) { +++ copyBootstrapMethods(classWriter, items2, buf); +++ } +++ nameType = items[readUnsignedShort(index + 2)]; +++ item.set(readUTF8(nameType, buf), readUTF8(nameType + 2, buf), +++ readUnsignedShort(index)); +++ break; +++ // case ClassWriter.STR: +++ // case ClassWriter.CLASS: +++ // case ClassWriter.MTYPE +++ default: +++ item.set(tag, readUTF8(index, buf), null, null); +++ break; +++ } +++ +++ int index2 = item.hashCode % items2.length; +++ item.next = items2[index2]; +++ items2[index2] = item; +++ } +++ +++ int off = items[1] - 1; +++ classWriter.pool.putByteArray(b, off, header - off); +++ classWriter.items = items2; +++ classWriter.threshold = (int) (0.75d * ll); +++ classWriter.index = ll; +++ } +++ +++ /** +++ * Copies the bootstrap method data into the given {@link ClassWriter}. +++ * Should be called before the {@link #accept(ClassVisitor,int)} method. +++ * +++ * @param classWriter +++ * the {@link ClassWriter} to copy bootstrap methods into. +++ */ +++ private void copyBootstrapMethods(final ClassWriter classWriter, +++ final Item[] items, final char[] c) { +++ // finds the "BootstrapMethods" attribute +++ int u = getAttributes(); +++ boolean found = false; +++ for (int i = readUnsignedShort(u); i > 0; --i) { +++ String attrName = readUTF8(u + 2, c); +++ if ("BootstrapMethods".equals(attrName)) { +++ found = true; +++ break; +++ } +++ u += 6 + readInt(u + 4); +++ } +++ if (!found) { +++ return; +++ } +++ // copies the bootstrap methods in the class writer +++ int boostrapMethodCount = readUnsignedShort(u + 8); +++ for (int j = 0, v = u + 10; j < boostrapMethodCount; j++) { +++ int position = v - u - 10; +++ int hashCode = readConst(readUnsignedShort(v), c).hashCode(); +++ for (int k = readUnsignedShort(v + 2); k > 0; --k) { +++ hashCode ^= readConst(readUnsignedShort(v + 4), c).hashCode(); +++ v += 2; +++ } +++ v += 4; +++ Item item = new Item(j); +++ item.set(position, hashCode & 0x7FFFFFFF); +++ int index = item.hashCode % items.length; +++ item.next = items[index]; +++ items[index] = item; +++ } +++ int attrSize = readInt(u + 4); +++ ByteVector bootstrapMethods = new ByteVector(attrSize + 62); +++ bootstrapMethods.putByteArray(b, u + 10, attrSize - 2); +++ classWriter.bootstrapMethodsCount = boostrapMethodCount; +++ classWriter.bootstrapMethods = bootstrapMethods; +++ } +++ +++ /** +++ * Constructs a new {@link ClassReader} object. +++ * +++ * @param is +++ * an input stream from which to read the class. +++ * @throws IOException +++ * if a problem occurs during reading. +++ */ +++ public ClassReader(final InputStream is) throws IOException { +++ this(readClass(is, false)); +++ } +++ +++ /** +++ * Constructs a new {@link ClassReader} object. +++ * +++ * @param name +++ * the binary qualified name of the class to be read. +++ * @throws IOException +++ * if an exception occurs during reading. +++ */ +++ public ClassReader(final String name) throws IOException { +++ this(readClass( +++ ClassLoader.getSystemResourceAsStream(name.replace('.', '/') +++ + ".class"), true)); +++ } +++ +++ /** +++ * Reads the bytecode of a class. +++ * +++ * @param is +++ * an input stream from which to read the class. +++ * @param close +++ * true to close the input stream after reading. +++ * @return the bytecode read from the given input stream. +++ * @throws IOException +++ * if a problem occurs during reading. +++ */ +++ private static byte[] readClass(final InputStream is, boolean close) +++ throws IOException { +++ if (is == null) { +++ throw new IOException("Class not found"); +++ } +++ try { +++ byte[] b = new byte[is.available()]; +++ int len = 0; +++ while (true) { +++ int n = is.read(b, len, b.length - len); +++ if (n == -1) { +++ if (len < b.length) { +++ byte[] c = new byte[len]; +++ System.arraycopy(b, 0, c, 0, len); +++ b = c; +++ } +++ return b; +++ } +++ len += n; +++ if (len == b.length) { +++ int last = is.read(); +++ if (last < 0) { +++ return b; +++ } +++ byte[] c = new byte[b.length + 1000]; +++ System.arraycopy(b, 0, c, 0, len); +++ c[len++] = (byte) last; +++ b = c; +++ } +++ } +++ } finally { +++ if (close) { +++ is.close(); +++ } +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Public methods +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Makes the given visitor visit the Java class of this {@link ClassReader} +++ * . This class is the one specified in the constructor (see +++ * {@link #ClassReader(byte[]) ClassReader}). +++ * +++ * @param classVisitor +++ * the visitor that must visit this class. +++ * @param flags +++ * option flags that can be used to modify the default behavior +++ * of this class. See {@link #SKIP_DEBUG}, {@link #EXPAND_FRAMES} +++ * , {@link #SKIP_FRAMES}, {@link #SKIP_CODE}. +++ */ +++ public void accept(final ClassVisitor classVisitor, final int flags) { +++ accept(classVisitor, new Attribute[0], flags); +++ } +++ +++ /** +++ * Makes the given visitor visit the Java class of this {@link ClassReader}. +++ * This class is the one specified in the constructor (see +++ * {@link #ClassReader(byte[]) ClassReader}). +++ * +++ * @param classVisitor +++ * the visitor that must visit this class. +++ * @param attrs +++ * prototypes of the attributes that must be parsed during the +++ * visit of the class. Any attribute whose type is not equal to +++ * the type of one the prototypes will not be parsed: its byte +++ * array value will be passed unchanged to the ClassWriter. +++ * This may corrupt it if this value contains references to +++ * the constant pool, or has syntactic or semantic links with a +++ * class element that has been transformed by a class adapter +++ * between the reader and the writer. +++ * @param flags +++ * option flags that can be used to modify the default behavior +++ * of this class. See {@link #SKIP_DEBUG}, {@link #EXPAND_FRAMES} +++ * , {@link #SKIP_FRAMES}, {@link #SKIP_CODE}. +++ */ +++ public void accept(final ClassVisitor classVisitor, +++ final Attribute[] attrs, final int flags) { +++ int u = header; // current offset in the class file +++ char[] c = new char[maxStringLength]; // buffer used to read strings +++ +++ Context context = new Context(); +++ context.attrs = attrs; +++ context.flags = flags; +++ context.buffer = c; +++ +++ // reads the class declaration +++ int access = readUnsignedShort(u); +++ String name = readClass(u + 2, c); +++ String superClass = readClass(u + 4, c); +++ String[] interfaces = new String[readUnsignedShort(u + 6)]; +++ u += 8; +++ for (int i = 0; i < interfaces.length; ++i) { +++ interfaces[i] = readClass(u, c); +++ u += 2; +++ } +++ +++ // reads the class attributes +++ String signature = null; +++ String sourceFile = null; +++ String sourceDebug = null; +++ String enclosingOwner = null; +++ String enclosingName = null; +++ String enclosingDesc = null; +++ int anns = 0; +++ int ianns = 0; +++ int tanns = 0; +++ int itanns = 0; +++ int innerClasses = 0; +++ Attribute attributes = null; +++ +++ u = getAttributes(); +++ for (int i = readUnsignedShort(u); i > 0; --i) { +++ String attrName = readUTF8(u + 2, c); +++ // tests are sorted in decreasing frequency order +++ // (based on frequencies observed on typical classes) +++ if ("SourceFile".equals(attrName)) { +++ sourceFile = readUTF8(u + 8, c); +++ } else if ("InnerClasses".equals(attrName)) { +++ innerClasses = u + 8; +++ } else if ("EnclosingMethod".equals(attrName)) { +++ enclosingOwner = readClass(u + 8, c); +++ int item = readUnsignedShort(u + 10); +++ if (item != 0) { +++ enclosingName = readUTF8(items[item], c); +++ enclosingDesc = readUTF8(items[item] + 2, c); +++ } +++ } else if (SIGNATURES && "Signature".equals(attrName)) { +++ signature = readUTF8(u + 8, c); +++ } else if (ANNOTATIONS +++ && "RuntimeVisibleAnnotations".equals(attrName)) { +++ anns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeVisibleTypeAnnotations".equals(attrName)) { +++ tanns = u + 8; +++ } else if ("Deprecated".equals(attrName)) { +++ access |= Opcodes.ACC_DEPRECATED; +++ } else if ("Synthetic".equals(attrName)) { +++ access |= Opcodes.ACC_SYNTHETIC +++ | ClassWriter.ACC_SYNTHETIC_ATTRIBUTE; +++ } else if ("SourceDebugExtension".equals(attrName)) { +++ int len = readInt(u + 4); +++ sourceDebug = readUTF(u + 8, len, new char[len]); +++ } else if (ANNOTATIONS +++ && "RuntimeInvisibleAnnotations".equals(attrName)) { +++ ianns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeInvisibleTypeAnnotations".equals(attrName)) { +++ itanns = u + 8; +++ } else if ("BootstrapMethods".equals(attrName)) { +++ int[] bootstrapMethods = new int[readUnsignedShort(u + 8)]; +++ for (int j = 0, v = u + 10; j < bootstrapMethods.length; j++) { +++ bootstrapMethods[j] = v; +++ v += 2 + readUnsignedShort(v + 2) << 1; +++ } +++ context.bootstrapMethods = bootstrapMethods; +++ } else { +++ Attribute attr = readAttribute(attrs, attrName, u + 8, +++ readInt(u + 4), c, -1, null); +++ if (attr != null) { +++ attr.next = attributes; +++ attributes = attr; +++ } +++ } +++ u += 6 + readInt(u + 4); +++ } +++ +++ // visits the class declaration +++ classVisitor.visit(readInt(items[1] - 7), access, name, signature, +++ superClass, interfaces); +++ +++ // visits the source and debug info +++ if ((flags & SKIP_DEBUG) == 0 +++ && (sourceFile != null || sourceDebug != null)) { +++ classVisitor.visitSource(sourceFile, sourceDebug); +++ } +++ +++ // visits the outer class +++ if (enclosingOwner != null) { +++ classVisitor.visitOuterClass(enclosingOwner, enclosingName, +++ enclosingDesc); +++ } +++ +++ // visits the class annotations and type annotations +++ if (ANNOTATIONS && anns != 0) { +++ for (int i = readUnsignedShort(anns), v = anns + 2; i > 0; --i) { +++ v = readAnnotationValues(v + 2, c, true, +++ classVisitor.visitAnnotation(readUTF8(v, c), true)); +++ } +++ } +++ if (ANNOTATIONS && ianns != 0) { +++ for (int i = readUnsignedShort(ianns), v = ianns + 2; i > 0; --i) { +++ v = readAnnotationValues(v + 2, c, true, +++ classVisitor.visitAnnotation(readUTF8(v, c), false)); +++ } +++ } +++ if (ANNOTATIONS && tanns != 0) { +++ for (int i = readUnsignedShort(tanns), v = tanns + 2; i > 0; --i) { +++ v = readAnnotationTarget(context, v); +++ v = readAnnotationValues(v + 2, c, true, +++ classVisitor.visitTypeAnnotation(context.typeRef, +++ context.typePath, readUTF8(v, c), true)); +++ } +++ } +++ if (ANNOTATIONS && itanns != 0) { +++ for (int i = readUnsignedShort(itanns), v = itanns + 2; i > 0; --i) { +++ v = readAnnotationTarget(context, v); +++ v = readAnnotationValues(v + 2, c, true, +++ classVisitor.visitTypeAnnotation(context.typeRef, +++ context.typePath, readUTF8(v, c), false)); +++ } +++ } +++ +++ // visits the attributes +++ while (attributes != null) { +++ Attribute attr = attributes.next; +++ attributes.next = null; +++ classVisitor.visitAttribute(attributes); +++ attributes = attr; +++ } +++ +++ // visits the inner classes +++ if (innerClasses != 0) { +++ int v = innerClasses + 2; +++ for (int i = readUnsignedShort(innerClasses); i > 0; --i) { +++ classVisitor.visitInnerClass(readClass(v, c), +++ readClass(v + 2, c), readUTF8(v + 4, c), +++ readUnsignedShort(v + 6)); +++ v += 8; +++ } +++ } +++ +++ // visits the fields and methods +++ u = header + 10 + 2 * interfaces.length; +++ for (int i = readUnsignedShort(u - 2); i > 0; --i) { +++ u = readField(classVisitor, context, u); +++ } +++ u += 2; +++ for (int i = readUnsignedShort(u - 2); i > 0; --i) { +++ u = readMethod(classVisitor, context, u); +++ } +++ +++ // visits the end of the class +++ classVisitor.visitEnd(); +++ } +++ +++ /** +++ * Reads a field and makes the given visitor visit it. +++ * +++ * @param classVisitor +++ * the visitor that must visit the field. +++ * @param context +++ * information about the class being parsed. +++ * @param u +++ * the start offset of the field in the class file. +++ * @return the offset of the first byte following the field in the class. +++ */ +++ private int readField(final ClassVisitor classVisitor, +++ final Context context, int u) { +++ // reads the field declaration +++ char[] c = context.buffer; +++ int access = readUnsignedShort(u); +++ String name = readUTF8(u + 2, c); +++ String desc = readUTF8(u + 4, c); +++ u += 6; +++ +++ // reads the field attributes +++ String signature = null; +++ int anns = 0; +++ int ianns = 0; +++ int tanns = 0; +++ int itanns = 0; +++ Object value = null; +++ Attribute attributes = null; +++ +++ for (int i = readUnsignedShort(u); i > 0; --i) { +++ String attrName = readUTF8(u + 2, c); +++ // tests are sorted in decreasing frequency order +++ // (based on frequencies observed on typical classes) +++ if ("ConstantValue".equals(attrName)) { +++ int item = readUnsignedShort(u + 8); +++ value = item == 0 ? null : readConst(item, c); +++ } else if (SIGNATURES && "Signature".equals(attrName)) { +++ signature = readUTF8(u + 8, c); +++ } else if ("Deprecated".equals(attrName)) { +++ access |= Opcodes.ACC_DEPRECATED; +++ } else if ("Synthetic".equals(attrName)) { +++ access |= Opcodes.ACC_SYNTHETIC +++ | ClassWriter.ACC_SYNTHETIC_ATTRIBUTE; +++ } else if (ANNOTATIONS +++ && "RuntimeVisibleAnnotations".equals(attrName)) { +++ anns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeVisibleTypeAnnotations".equals(attrName)) { +++ tanns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeInvisibleAnnotations".equals(attrName)) { +++ ianns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeInvisibleTypeAnnotations".equals(attrName)) { +++ itanns = u + 8; +++ } else { +++ Attribute attr = readAttribute(context.attrs, attrName, u + 8, +++ readInt(u + 4), c, -1, null); +++ if (attr != null) { +++ attr.next = attributes; +++ attributes = attr; +++ } +++ } +++ u += 6 + readInt(u + 4); +++ } +++ u += 2; +++ +++ // visits the field declaration +++ FieldVisitor fv = classVisitor.visitField(access, name, desc, +++ signature, value); +++ if (fv == null) { +++ return u; +++ } +++ +++ // visits the field annotations and type annotations +++ if (ANNOTATIONS && anns != 0) { +++ for (int i = readUnsignedShort(anns), v = anns + 2; i > 0; --i) { +++ v = readAnnotationValues(v + 2, c, true, +++ fv.visitAnnotation(readUTF8(v, c), true)); +++ } +++ } +++ if (ANNOTATIONS && ianns != 0) { +++ for (int i = readUnsignedShort(ianns), v = ianns + 2; i > 0; --i) { +++ v = readAnnotationValues(v + 2, c, true, +++ fv.visitAnnotation(readUTF8(v, c), false)); +++ } +++ } +++ if (ANNOTATIONS && tanns != 0) { +++ for (int i = readUnsignedShort(tanns), v = tanns + 2; i > 0; --i) { +++ v = readAnnotationTarget(context, v); +++ v = readAnnotationValues(v + 2, c, true, +++ fv.visitTypeAnnotation(context.typeRef, +++ context.typePath, readUTF8(v, c), true)); +++ } +++ } +++ if (ANNOTATIONS && itanns != 0) { +++ for (int i = readUnsignedShort(itanns), v = itanns + 2; i > 0; --i) { +++ v = readAnnotationTarget(context, v); +++ v = readAnnotationValues(v + 2, c, true, +++ fv.visitTypeAnnotation(context.typeRef, +++ context.typePath, readUTF8(v, c), false)); +++ } +++ } +++ +++ // visits the field attributes +++ while (attributes != null) { +++ Attribute attr = attributes.next; +++ attributes.next = null; +++ fv.visitAttribute(attributes); +++ attributes = attr; +++ } +++ +++ // visits the end of the field +++ fv.visitEnd(); +++ +++ return u; +++ } +++ +++ /** +++ * Reads a method and makes the given visitor visit it. +++ * +++ * @param classVisitor +++ * the visitor that must visit the method. +++ * @param context +++ * information about the class being parsed. +++ * @param u +++ * the start offset of the method in the class file. +++ * @return the offset of the first byte following the method in the class. +++ */ +++ private int readMethod(final ClassVisitor classVisitor, +++ final Context context, int u) { +++ // reads the method declaration +++ char[] c = context.buffer; +++ context.access = readUnsignedShort(u); +++ context.name = readUTF8(u + 2, c); +++ context.desc = readUTF8(u + 4, c); +++ u += 6; +++ +++ // reads the method attributes +++ int code = 0; +++ int exception = 0; +++ String[] exceptions = null; +++ String signature = null; +++ int methodParameters = 0; +++ int anns = 0; +++ int ianns = 0; +++ int tanns = 0; +++ int itanns = 0; +++ int dann = 0; +++ int mpanns = 0; +++ int impanns = 0; +++ int firstAttribute = u; +++ Attribute attributes = null; +++ +++ for (int i = readUnsignedShort(u); i > 0; --i) { +++ String attrName = readUTF8(u + 2, c); +++ // tests are sorted in decreasing frequency order +++ // (based on frequencies observed on typical classes) +++ if ("Code".equals(attrName)) { +++ if ((context.flags & SKIP_CODE) == 0) { +++ code = u + 8; +++ } +++ } else if ("Exceptions".equals(attrName)) { +++ exceptions = new String[readUnsignedShort(u + 8)]; +++ exception = u + 10; +++ for (int j = 0; j < exceptions.length; ++j) { +++ exceptions[j] = readClass(exception, c); +++ exception += 2; +++ } +++ } else if (SIGNATURES && "Signature".equals(attrName)) { +++ signature = readUTF8(u + 8, c); +++ } else if ("Deprecated".equals(attrName)) { +++ context.access |= Opcodes.ACC_DEPRECATED; +++ } else if (ANNOTATIONS +++ && "RuntimeVisibleAnnotations".equals(attrName)) { +++ anns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeVisibleTypeAnnotations".equals(attrName)) { +++ tanns = u + 8; +++ } else if (ANNOTATIONS && "AnnotationDefault".equals(attrName)) { +++ dann = u + 8; +++ } else if ("Synthetic".equals(attrName)) { +++ context.access |= Opcodes.ACC_SYNTHETIC +++ | ClassWriter.ACC_SYNTHETIC_ATTRIBUTE; +++ } else if (ANNOTATIONS +++ && "RuntimeInvisibleAnnotations".equals(attrName)) { +++ ianns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeInvisibleTypeAnnotations".equals(attrName)) { +++ itanns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeVisibleParameterAnnotations".equals(attrName)) { +++ mpanns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeInvisibleParameterAnnotations".equals(attrName)) { +++ impanns = u + 8; +++ } else if ("MethodParameters".equals(attrName)) { +++ methodParameters = u + 8; +++ } else { +++ Attribute attr = readAttribute(context.attrs, attrName, u + 8, +++ readInt(u + 4), c, -1, null); +++ if (attr != null) { +++ attr.next = attributes; +++ attributes = attr; +++ } +++ } +++ u += 6 + readInt(u + 4); +++ } +++ u += 2; +++ +++ // visits the method declaration +++ MethodVisitor mv = classVisitor.visitMethod(context.access, +++ context.name, context.desc, signature, exceptions); +++ if (mv == null) { +++ return u; +++ } +++ +++ /* +++ * if the returned MethodVisitor is in fact a MethodWriter, it means +++ * there is no method adapter between the reader and the writer. If, in +++ * addition, the writer's constant pool was copied from this reader +++ * (mw.cw.cr == this), and the signature and exceptions of the method +++ * have not been changed, then it is possible to skip all visit events +++ * and just copy the original code of the method to the writer (the +++ * access, name and descriptor can have been changed, this is not +++ * important since they are not copied as is from the reader). +++ */ +++ if (WRITER && mv instanceof MethodWriter) { +++ MethodWriter mw = (MethodWriter) mv; +++ if (mw.cw.cr == this && signature == mw.signature) { +++ boolean sameExceptions = false; +++ if (exceptions == null) { +++ sameExceptions = mw.exceptionCount == 0; +++ } else if (exceptions.length == mw.exceptionCount) { +++ sameExceptions = true; +++ for (int j = exceptions.length - 1; j >= 0; --j) { +++ exception -= 2; +++ if (mw.exceptions[j] != readUnsignedShort(exception)) { +++ sameExceptions = false; +++ break; +++ } +++ } +++ } +++ if (sameExceptions) { +++ /* +++ * we do not copy directly the code into MethodWriter to +++ * save a byte array copy operation. The real copy will be +++ * done in ClassWriter.toByteArray(). +++ */ +++ mw.classReaderOffset = firstAttribute; +++ mw.classReaderLength = u - firstAttribute; +++ return u; +++ } +++ } +++ } +++ +++ // visit the method parameters +++ if (methodParameters != 0) { +++ for (int i = b[methodParameters] & 0xFF, v = methodParameters + 1; i > 0; --i, v = v + 4) { +++ mv.visitParameter(readUTF8(v, c), readUnsignedShort(v + 2)); +++ } +++ } +++ +++ // visits the method annotations +++ if (ANNOTATIONS && dann != 0) { +++ AnnotationVisitor dv = mv.visitAnnotationDefault(); +++ readAnnotationValue(dann, c, null, dv); +++ if (dv != null) { +++ dv.visitEnd(); +++ } +++ } +++ if (ANNOTATIONS && anns != 0) { +++ for (int i = readUnsignedShort(anns), v = anns + 2; i > 0; --i) { +++ v = readAnnotationValues(v + 2, c, true, +++ mv.visitAnnotation(readUTF8(v, c), true)); +++ } +++ } +++ if (ANNOTATIONS && ianns != 0) { +++ for (int i = readUnsignedShort(ianns), v = ianns + 2; i > 0; --i) { +++ v = readAnnotationValues(v + 2, c, true, +++ mv.visitAnnotation(readUTF8(v, c), false)); +++ } +++ } +++ if (ANNOTATIONS && tanns != 0) { +++ for (int i = readUnsignedShort(tanns), v = tanns + 2; i > 0; --i) { +++ v = readAnnotationTarget(context, v); +++ v = readAnnotationValues(v + 2, c, true, +++ mv.visitTypeAnnotation(context.typeRef, +++ context.typePath, readUTF8(v, c), true)); +++ } +++ } +++ if (ANNOTATIONS && itanns != 0) { +++ for (int i = readUnsignedShort(itanns), v = itanns + 2; i > 0; --i) { +++ v = readAnnotationTarget(context, v); +++ v = readAnnotationValues(v + 2, c, true, +++ mv.visitTypeAnnotation(context.typeRef, +++ context.typePath, readUTF8(v, c), false)); +++ } +++ } +++ if (ANNOTATIONS && mpanns != 0) { +++ readParameterAnnotations(mv, context, mpanns, true); +++ } +++ if (ANNOTATIONS && impanns != 0) { +++ readParameterAnnotations(mv, context, impanns, false); +++ } +++ +++ // visits the method attributes +++ while (attributes != null) { +++ Attribute attr = attributes.next; +++ attributes.next = null; +++ mv.visitAttribute(attributes); +++ attributes = attr; +++ } +++ +++ // visits the method code +++ if (code != 0) { +++ mv.visitCode(); +++ readCode(mv, context, code); +++ } +++ +++ // visits the end of the method +++ mv.visitEnd(); +++ +++ return u; +++ } +++ +++ /** +++ * Reads the bytecode of a method and makes the given visitor visit it. +++ * +++ * @param mv +++ * the visitor that must visit the method's code. +++ * @param context +++ * information about the class being parsed. +++ * @param u +++ * the start offset of the code attribute in the class file. +++ */ +++ private void readCode(final MethodVisitor mv, final Context context, int u) { +++ // reads the header +++ byte[] b = this.b; +++ char[] c = context.buffer; +++ int maxStack = readUnsignedShort(u); +++ int maxLocals = readUnsignedShort(u + 2); +++ int codeLength = readInt(u + 4); +++ u += 8; +++ +++ // reads the bytecode to find the labels +++ int codeStart = u; +++ int codeEnd = u + codeLength; +++ Label[] labels = context.labels = new Label[codeLength + 2]; +++ readLabel(codeLength + 1, labels); +++ while (u < codeEnd) { +++ int offset = u - codeStart; +++ int opcode = b[u] & 0xFF; +++ switch (ClassWriter.TYPE[opcode]) { +++ case ClassWriter.NOARG_INSN: +++ case ClassWriter.IMPLVAR_INSN: +++ u += 1; +++ break; +++ case ClassWriter.LABEL_INSN: +++ readLabel(offset + readShort(u + 1), labels); +++ u += 3; +++ break; +++ case ClassWriter.LABELW_INSN: +++ readLabel(offset + readInt(u + 1), labels); +++ u += 5; +++ break; +++ case ClassWriter.WIDE_INSN: +++ opcode = b[u + 1] & 0xFF; +++ if (opcode == Opcodes.IINC) { +++ u += 6; +++ } else { +++ u += 4; +++ } +++ break; +++ case ClassWriter.TABL_INSN: +++ // skips 0 to 3 padding bytes +++ u = u + 4 - (offset & 3); +++ // reads instruction +++ readLabel(offset + readInt(u), labels); +++ for (int i = readInt(u + 8) - readInt(u + 4) + 1; i > 0; --i) { +++ readLabel(offset + readInt(u + 12), labels); +++ u += 4; +++ } +++ u += 12; +++ break; +++ case ClassWriter.LOOK_INSN: +++ // skips 0 to 3 padding bytes +++ u = u + 4 - (offset & 3); +++ // reads instruction +++ readLabel(offset + readInt(u), labels); +++ for (int i = readInt(u + 4); i > 0; --i) { +++ readLabel(offset + readInt(u + 12), labels); +++ u += 8; +++ } +++ u += 8; +++ break; +++ case ClassWriter.VAR_INSN: +++ case ClassWriter.SBYTE_INSN: +++ case ClassWriter.LDC_INSN: +++ u += 2; +++ break; +++ case ClassWriter.SHORT_INSN: +++ case ClassWriter.LDCW_INSN: +++ case ClassWriter.FIELDORMETH_INSN: +++ case ClassWriter.TYPE_INSN: +++ case ClassWriter.IINC_INSN: +++ u += 3; +++ break; +++ case ClassWriter.ITFMETH_INSN: +++ case ClassWriter.INDYMETH_INSN: +++ u += 5; +++ break; +++ // case MANA_INSN: +++ default: +++ u += 4; +++ break; +++ } +++ } +++ +++ // reads the try catch entries to find the labels, and also visits them +++ for (int i = readUnsignedShort(u); i > 0; --i) { +++ Label start = readLabel(readUnsignedShort(u + 2), labels); +++ Label end = readLabel(readUnsignedShort(u + 4), labels); +++ Label handler = readLabel(readUnsignedShort(u + 6), labels); +++ String type = readUTF8(items[readUnsignedShort(u + 8)], c); +++ mv.visitTryCatchBlock(start, end, handler, type); +++ u += 8; +++ } +++ u += 2; +++ +++ // reads the code attributes +++ int[] tanns = null; // start index of each visible type annotation +++ int[] itanns = null; // start index of each invisible type annotation +++ int tann = 0; // current index in tanns array +++ int itann = 0; // current index in itanns array +++ int ntoff = -1; // next visible type annotation code offset +++ int nitoff = -1; // next invisible type annotation code offset +++ int varTable = 0; +++ int varTypeTable = 0; +++ boolean zip = true; +++ boolean unzip = (context.flags & EXPAND_FRAMES) != 0; +++ int stackMap = 0; +++ int stackMapSize = 0; +++ int frameCount = 0; +++ Context frame = null; +++ Attribute attributes = null; +++ +++ for (int i = readUnsignedShort(u); i > 0; --i) { +++ String attrName = readUTF8(u + 2, c); +++ if ("LocalVariableTable".equals(attrName)) { +++ if ((context.flags & SKIP_DEBUG) == 0) { +++ varTable = u + 8; +++ for (int j = readUnsignedShort(u + 8), v = u; j > 0; --j) { +++ int label = readUnsignedShort(v + 10); +++ if (labels[label] == null) { +++ readLabel(label, labels).status |= Label.DEBUG; +++ } +++ label += readUnsignedShort(v + 12); +++ if (labels[label] == null) { +++ readLabel(label, labels).status |= Label.DEBUG; +++ } +++ v += 10; +++ } +++ } +++ } else if ("LocalVariableTypeTable".equals(attrName)) { +++ varTypeTable = u + 8; +++ } else if ("LineNumberTable".equals(attrName)) { +++ if ((context.flags & SKIP_DEBUG) == 0) { +++ for (int j = readUnsignedShort(u + 8), v = u; j > 0; --j) { +++ int label = readUnsignedShort(v + 10); +++ if (labels[label] == null) { +++ readLabel(label, labels).status |= Label.DEBUG; +++ } +++ Label l = labels[label]; +++ while (l.line > 0) { +++ if (l.next == null) { +++ l.next = new Label(); +++ } +++ l = l.next; +++ } +++ l.line = readUnsignedShort(v + 12); +++ v += 4; +++ } +++ } +++ } else if (ANNOTATIONS +++ && "RuntimeVisibleTypeAnnotations".equals(attrName)) { +++ tanns = readTypeAnnotations(mv, context, u + 8, true); +++ ntoff = tanns.length == 0 || readByte(tanns[0]) < 0x43 ? -1 +++ : readUnsignedShort(tanns[0] + 1); +++ } else if (ANNOTATIONS +++ && "RuntimeInvisibleTypeAnnotations".equals(attrName)) { +++ itanns = readTypeAnnotations(mv, context, u + 8, false); +++ nitoff = itanns.length == 0 || readByte(itanns[0]) < 0x43 ? -1 +++ : readUnsignedShort(itanns[0] + 1); +++ } else if (FRAMES && "StackMapTable".equals(attrName)) { +++ if ((context.flags & SKIP_FRAMES) == 0) { +++ stackMap = u + 10; +++ stackMapSize = readInt(u + 4); +++ frameCount = readUnsignedShort(u + 8); +++ } +++ /* +++ * here we do not extract the labels corresponding to the +++ * attribute content. This would require a full parsing of the +++ * attribute, which would need to be repeated in the second +++ * phase (see below). Instead the content of the attribute is +++ * read one frame at a time (i.e. after a frame has been +++ * visited, the next frame is read), and the labels it contains +++ * are also extracted one frame at a time. Thanks to the +++ * ordering of frames, having only a "one frame lookahead" is +++ * not a problem, i.e. it is not possible to see an offset +++ * smaller than the offset of the current insn and for which no +++ * Label exist. +++ */ +++ /* +++ * This is not true for UNINITIALIZED type offsets. We solve +++ * this by parsing the stack map table without a full decoding +++ * (see below). +++ */ +++ } else if (FRAMES && "StackMap".equals(attrName)) { +++ if ((context.flags & SKIP_FRAMES) == 0) { +++ zip = false; +++ stackMap = u + 10; +++ stackMapSize = readInt(u + 4); +++ frameCount = readUnsignedShort(u + 8); +++ } +++ /* +++ * IMPORTANT! here we assume that the frames are ordered, as in +++ * the StackMapTable attribute, although this is not guaranteed +++ * by the attribute format. +++ */ +++ } else { +++ for (int j = 0; j < context.attrs.length; ++j) { +++ if (context.attrs[j].type.equals(attrName)) { +++ Attribute attr = context.attrs[j].read(this, u + 8, +++ readInt(u + 4), c, codeStart - 8, labels); +++ if (attr != null) { +++ attr.next = attributes; +++ attributes = attr; +++ } +++ } +++ } +++ } +++ u += 6 + readInt(u + 4); +++ } +++ u += 2; +++ +++ // generates the first (implicit) stack map frame +++ if (FRAMES && stackMap != 0) { +++ /* +++ * for the first explicit frame the offset is not offset_delta + 1 +++ * but only offset_delta; setting the implicit frame offset to -1 +++ * allow the use of the "offset_delta + 1" rule in all cases +++ */ +++ frame = context; +++ frame.offset = -1; +++ frame.mode = 0; +++ frame.localCount = 0; +++ frame.localDiff = 0; +++ frame.stackCount = 0; +++ frame.local = new Object[maxLocals]; +++ frame.stack = new Object[maxStack]; +++ if (unzip) { +++ getImplicitFrame(context); +++ } +++ /* +++ * Finds labels for UNINITIALIZED frame types. Instead of decoding +++ * each element of the stack map table, we look for 3 consecutive +++ * bytes that "look like" an UNINITIALIZED type (tag 8, offset +++ * within code bounds, NEW instruction at this offset). We may find +++ * false positives (i.e. not real UNINITIALIZED types), but this +++ * should be rare, and the only consequence will be the creation of +++ * an unneeded label. This is better than creating a label for each +++ * NEW instruction, and faster than fully decoding the whole stack +++ * map table. +++ */ +++ for (int i = stackMap; i < stackMap + stackMapSize - 2; ++i) { +++ if (b[i] == 8) { // UNINITIALIZED FRAME TYPE +++ int v = readUnsignedShort(i + 1); +++ if (v >= 0 && v < codeLength) { +++ if ((b[codeStart + v] & 0xFF) == Opcodes.NEW) { +++ readLabel(v, labels); +++ } +++ } +++ } +++ } +++ } +++ +++ // visits the instructions +++ u = codeStart; +++ while (u < codeEnd) { +++ int offset = u - codeStart; +++ +++ // visits the label and line number for this offset, if any +++ Label l = labels[offset]; +++ if (l != null) { +++ Label next = l.next; +++ l.next = null; +++ mv.visitLabel(l); +++ if ((context.flags & SKIP_DEBUG) == 0 && l.line > 0) { +++ mv.visitLineNumber(l.line, l); +++ while (next != null) { +++ mv.visitLineNumber(next.line, l); +++ next = next.next; +++ } +++ } +++ } +++ +++ // visits the frame for this offset, if any +++ while (FRAMES && frame != null +++ && (frame.offset == offset || frame.offset == -1)) { +++ // if there is a frame for this offset, makes the visitor visit +++ // it, and reads the next frame if there is one. +++ if (frame.offset != -1) { +++ if (!zip || unzip) { +++ mv.visitFrame(Opcodes.F_NEW, frame.localCount, +++ frame.local, frame.stackCount, frame.stack); +++ } else { +++ mv.visitFrame(frame.mode, frame.localDiff, frame.local, +++ frame.stackCount, frame.stack); +++ } +++ } +++ if (frameCount > 0) { +++ stackMap = readFrame(stackMap, zip, unzip, frame); +++ --frameCount; +++ } else { +++ frame = null; +++ } +++ } +++ +++ // visits the instruction at this offset +++ int opcode = b[u] & 0xFF; +++ switch (ClassWriter.TYPE[opcode]) { +++ case ClassWriter.NOARG_INSN: +++ mv.visitInsn(opcode); +++ u += 1; +++ break; +++ case ClassWriter.IMPLVAR_INSN: +++ if (opcode > Opcodes.ISTORE) { +++ opcode -= 59; // ISTORE_0 +++ mv.visitVarInsn(Opcodes.ISTORE + (opcode >> 2), +++ opcode & 0x3); +++ } else { +++ opcode -= 26; // ILOAD_0 +++ mv.visitVarInsn(Opcodes.ILOAD + (opcode >> 2), opcode & 0x3); +++ } +++ u += 1; +++ break; +++ case ClassWriter.LABEL_INSN: +++ mv.visitJumpInsn(opcode, labels[offset + readShort(u + 1)]); +++ u += 3; +++ break; +++ case ClassWriter.LABELW_INSN: +++ mv.visitJumpInsn(opcode - 33, labels[offset + readInt(u + 1)]); +++ u += 5; +++ break; +++ case ClassWriter.WIDE_INSN: +++ opcode = b[u + 1] & 0xFF; +++ if (opcode == Opcodes.IINC) { +++ mv.visitIincInsn(readUnsignedShort(u + 2), readShort(u + 4)); +++ u += 6; +++ } else { +++ mv.visitVarInsn(opcode, readUnsignedShort(u + 2)); +++ u += 4; +++ } +++ break; +++ case ClassWriter.TABL_INSN: { +++ // skips 0 to 3 padding bytes +++ u = u + 4 - (offset & 3); +++ // reads instruction +++ int label = offset + readInt(u); +++ int min = readInt(u + 4); +++ int max = readInt(u + 8); +++ Label[] table = new Label[max - min + 1]; +++ u += 12; +++ for (int i = 0; i < table.length; ++i) { +++ table[i] = labels[offset + readInt(u)]; +++ u += 4; +++ } +++ mv.visitTableSwitchInsn(min, max, labels[label], table); +++ break; +++ } +++ case ClassWriter.LOOK_INSN: { +++ // skips 0 to 3 padding bytes +++ u = u + 4 - (offset & 3); +++ // reads instruction +++ int label = offset + readInt(u); +++ int len = readInt(u + 4); +++ int[] keys = new int[len]; +++ Label[] values = new Label[len]; +++ u += 8; +++ for (int i = 0; i < len; ++i) { +++ keys[i] = readInt(u); +++ values[i] = labels[offset + readInt(u + 4)]; +++ u += 8; +++ } +++ mv.visitLookupSwitchInsn(labels[label], keys, values); +++ break; +++ } +++ case ClassWriter.VAR_INSN: +++ mv.visitVarInsn(opcode, b[u + 1] & 0xFF); +++ u += 2; +++ break; +++ case ClassWriter.SBYTE_INSN: +++ mv.visitIntInsn(opcode, b[u + 1]); +++ u += 2; +++ break; +++ case ClassWriter.SHORT_INSN: +++ mv.visitIntInsn(opcode, readShort(u + 1)); +++ u += 3; +++ break; +++ case ClassWriter.LDC_INSN: +++ mv.visitLdcInsn(readConst(b[u + 1] & 0xFF, c)); +++ u += 2; +++ break; +++ case ClassWriter.LDCW_INSN: +++ mv.visitLdcInsn(readConst(readUnsignedShort(u + 1), c)); +++ u += 3; +++ break; +++ case ClassWriter.FIELDORMETH_INSN: +++ case ClassWriter.ITFMETH_INSN: { +++ int cpIndex = items[readUnsignedShort(u + 1)]; +++ boolean itf = b[cpIndex - 1] == ClassWriter.IMETH; +++ String iowner = readClass(cpIndex, c); +++ cpIndex = items[readUnsignedShort(cpIndex + 2)]; +++ String iname = readUTF8(cpIndex, c); +++ String idesc = readUTF8(cpIndex + 2, c); +++ if (opcode < Opcodes.INVOKEVIRTUAL) { +++ mv.visitFieldInsn(opcode, iowner, iname, idesc); +++ } else { +++ mv.visitMethodInsn(opcode, iowner, iname, idesc, itf); +++ } +++ if (opcode == Opcodes.INVOKEINTERFACE) { +++ u += 5; +++ } else { +++ u += 3; +++ } +++ break; +++ } +++ case ClassWriter.INDYMETH_INSN: { +++ int cpIndex = items[readUnsignedShort(u + 1)]; +++ int bsmIndex = context.bootstrapMethods[readUnsignedShort(cpIndex)]; +++ Handle bsm = (Handle) readConst(readUnsignedShort(bsmIndex), c); +++ int bsmArgCount = readUnsignedShort(bsmIndex + 2); +++ Object[] bsmArgs = new Object[bsmArgCount]; +++ bsmIndex += 4; +++ for (int i = 0; i < bsmArgCount; i++) { +++ bsmArgs[i] = readConst(readUnsignedShort(bsmIndex), c); +++ bsmIndex += 2; +++ } +++ cpIndex = items[readUnsignedShort(cpIndex + 2)]; +++ String iname = readUTF8(cpIndex, c); +++ String idesc = readUTF8(cpIndex + 2, c); +++ mv.visitInvokeDynamicInsn(iname, idesc, bsm, bsmArgs); +++ u += 5; +++ break; +++ } +++ case ClassWriter.TYPE_INSN: +++ mv.visitTypeInsn(opcode, readClass(u + 1, c)); +++ u += 3; +++ break; +++ case ClassWriter.IINC_INSN: +++ mv.visitIincInsn(b[u + 1] & 0xFF, b[u + 2]); +++ u += 3; +++ break; +++ // case MANA_INSN: +++ default: +++ mv.visitMultiANewArrayInsn(readClass(u + 1, c), b[u + 3] & 0xFF); +++ u += 4; +++ break; +++ } +++ +++ // visit the instruction annotations, if any +++ while (tanns != null && tann < tanns.length && ntoff <= offset) { +++ if (ntoff == offset) { +++ int v = readAnnotationTarget(context, tanns[tann]); +++ readAnnotationValues(v + 2, c, true, +++ mv.visitInsnAnnotation(context.typeRef, +++ context.typePath, readUTF8(v, c), true)); +++ } +++ ntoff = ++tann >= tanns.length || readByte(tanns[tann]) < 0x43 ? -1 +++ : readUnsignedShort(tanns[tann] + 1); +++ } +++ while (itanns != null && itann < itanns.length && nitoff <= offset) { +++ if (nitoff == offset) { +++ int v = readAnnotationTarget(context, itanns[itann]); +++ readAnnotationValues(v + 2, c, true, +++ mv.visitInsnAnnotation(context.typeRef, +++ context.typePath, readUTF8(v, c), false)); +++ } +++ nitoff = ++itann >= itanns.length +++ || readByte(itanns[itann]) < 0x43 ? -1 +++ : readUnsignedShort(itanns[itann] + 1); +++ } +++ } +++ if (labels[codeLength] != null) { +++ mv.visitLabel(labels[codeLength]); +++ } +++ +++ // visits the local variable tables +++ if ((context.flags & SKIP_DEBUG) == 0 && varTable != 0) { +++ int[] typeTable = null; +++ if (varTypeTable != 0) { +++ u = varTypeTable + 2; +++ typeTable = new int[readUnsignedShort(varTypeTable) * 3]; +++ for (int i = typeTable.length; i > 0;) { +++ typeTable[--i] = u + 6; // signature +++ typeTable[--i] = readUnsignedShort(u + 8); // index +++ typeTable[--i] = readUnsignedShort(u); // start +++ u += 10; +++ } +++ } +++ u = varTable + 2; +++ for (int i = readUnsignedShort(varTable); i > 0; --i) { +++ int start = readUnsignedShort(u); +++ int length = readUnsignedShort(u + 2); +++ int index = readUnsignedShort(u + 8); +++ String vsignature = null; +++ if (typeTable != null) { +++ for (int j = 0; j < typeTable.length; j += 3) { +++ if (typeTable[j] == start && typeTable[j + 1] == index) { +++ vsignature = readUTF8(typeTable[j + 2], c); +++ break; +++ } +++ } +++ } +++ mv.visitLocalVariable(readUTF8(u + 4, c), readUTF8(u + 6, c), +++ vsignature, labels[start], labels[start + length], +++ index); +++ u += 10; +++ } +++ } +++ +++ // visits the local variables type annotations +++ if (tanns != null) { +++ for (int i = 0; i < tanns.length; ++i) { +++ if ((readByte(tanns[i]) >> 1) == (0x40 >> 1)) { +++ int v = readAnnotationTarget(context, tanns[i]); +++ v = readAnnotationValues(v + 2, c, true, +++ mv.visitLocalVariableAnnotation(context.typeRef, +++ context.typePath, context.start, +++ context.end, context.index, readUTF8(v, c), +++ true)); +++ } +++ } +++ } +++ if (itanns != null) { +++ for (int i = 0; i < itanns.length; ++i) { +++ if ((readByte(itanns[i]) >> 1) == (0x40 >> 1)) { +++ int v = readAnnotationTarget(context, itanns[i]); +++ v = readAnnotationValues(v + 2, c, true, +++ mv.visitLocalVariableAnnotation(context.typeRef, +++ context.typePath, context.start, +++ context.end, context.index, readUTF8(v, c), +++ false)); +++ } +++ } +++ } +++ +++ // visits the code attributes +++ while (attributes != null) { +++ Attribute attr = attributes.next; +++ attributes.next = null; +++ mv.visitAttribute(attributes); +++ attributes = attr; +++ } +++ +++ // visits the max stack and max locals values +++ mv.visitMaxs(maxStack, maxLocals); +++ } +++ +++ /** +++ * Parses a type annotation table to find the labels, and to visit the try +++ * catch block annotations. +++ * +++ * @param u +++ * the start offset of a type annotation table. +++ * @param mv +++ * the method visitor to be used to visit the try catch block +++ * annotations. +++ * @param context +++ * information about the class being parsed. +++ * @param visible +++ * if the type annotation table to parse contains runtime visible +++ * annotations. +++ * @return the start offset of each type annotation in the parsed table. +++ */ +++ private int[] readTypeAnnotations(final MethodVisitor mv, +++ final Context context, int u, boolean visible) { +++ char[] c = context.buffer; +++ int[] offsets = new int[readUnsignedShort(u)]; +++ u += 2; +++ for (int i = 0; i < offsets.length; ++i) { +++ offsets[i] = u; +++ int target = readInt(u); +++ switch (target >>> 24) { +++ case 0x00: // CLASS_TYPE_PARAMETER +++ case 0x01: // METHOD_TYPE_PARAMETER +++ case 0x16: // METHOD_FORMAL_PARAMETER +++ u += 2; +++ break; +++ case 0x13: // FIELD +++ case 0x14: // METHOD_RETURN +++ case 0x15: // METHOD_RECEIVER +++ u += 1; +++ break; +++ case 0x40: // LOCAL_VARIABLE +++ case 0x41: // RESOURCE_VARIABLE +++ for (int j = readUnsignedShort(u + 1); j > 0; --j) { +++ int start = readUnsignedShort(u + 3); +++ int length = readUnsignedShort(u + 5); +++ readLabel(start, context.labels); +++ readLabel(start + length, context.labels); +++ u += 6; +++ } +++ u += 3; +++ break; +++ case 0x47: // CAST +++ case 0x48: // CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT +++ case 0x49: // METHOD_INVOCATION_TYPE_ARGUMENT +++ case 0x4A: // CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT +++ case 0x4B: // METHOD_REFERENCE_TYPE_ARGUMENT +++ u += 4; +++ break; +++ // case 0x10: // CLASS_EXTENDS +++ // case 0x11: // CLASS_TYPE_PARAMETER_BOUND +++ // case 0x12: // METHOD_TYPE_PARAMETER_BOUND +++ // case 0x17: // THROWS +++ // case 0x42: // EXCEPTION_PARAMETER +++ // case 0x43: // INSTANCEOF +++ // case 0x44: // NEW +++ // case 0x45: // CONSTRUCTOR_REFERENCE +++ // case 0x46: // METHOD_REFERENCE +++ default: +++ u += 3; +++ break; +++ } +++ int pathLength = readByte(u); +++ if ((target >>> 24) == 0x42) { +++ TypePath path = pathLength == 0 ? null : new TypePath(b, u); +++ u += 1 + 2 * pathLength; +++ u = readAnnotationValues(u + 2, c, true, +++ mv.visitTryCatchAnnotation(target, path, +++ readUTF8(u, c), visible)); +++ } else { +++ u = readAnnotationValues(u + 3 + 2 * pathLength, c, true, null); +++ } +++ } +++ return offsets; +++ } +++ +++ /** +++ * Parses the header of a type annotation to extract its target_type and +++ * target_path (the result is stored in the given context), and returns the +++ * start offset of the rest of the type_annotation structure (i.e. the +++ * offset to the type_index field, which is followed by +++ * num_element_value_pairs and then the name,value pairs). +++ * +++ * @param context +++ * information about the class being parsed. This is where the +++ * extracted target_type and target_path must be stored. +++ * @param u +++ * the start offset of a type_annotation structure. +++ * @return the start offset of the rest of the type_annotation structure. +++ */ +++ private int readAnnotationTarget(final Context context, int u) { +++ int target = readInt(u); +++ switch (target >>> 24) { +++ case 0x00: // CLASS_TYPE_PARAMETER +++ case 0x01: // METHOD_TYPE_PARAMETER +++ case 0x16: // METHOD_FORMAL_PARAMETER +++ target &= 0xFFFF0000; +++ u += 2; +++ break; +++ case 0x13: // FIELD +++ case 0x14: // METHOD_RETURN +++ case 0x15: // METHOD_RECEIVER +++ target &= 0xFF000000; +++ u += 1; +++ break; +++ case 0x40: // LOCAL_VARIABLE +++ case 0x41: { // RESOURCE_VARIABLE +++ target &= 0xFF000000; +++ int n = readUnsignedShort(u + 1); +++ context.start = new Label[n]; +++ context.end = new Label[n]; +++ context.index = new int[n]; +++ u += 3; +++ for (int i = 0; i < n; ++i) { +++ int start = readUnsignedShort(u); +++ int length = readUnsignedShort(u + 2); +++ context.start[i] = readLabel(start, context.labels); +++ context.end[i] = readLabel(start + length, context.labels); +++ context.index[i] = readUnsignedShort(u + 4); +++ u += 6; +++ } +++ break; +++ } +++ case 0x47: // CAST +++ case 0x48: // CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT +++ case 0x49: // METHOD_INVOCATION_TYPE_ARGUMENT +++ case 0x4A: // CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT +++ case 0x4B: // METHOD_REFERENCE_TYPE_ARGUMENT +++ target &= 0xFF0000FF; +++ u += 4; +++ break; +++ // case 0x10: // CLASS_EXTENDS +++ // case 0x11: // CLASS_TYPE_PARAMETER_BOUND +++ // case 0x12: // METHOD_TYPE_PARAMETER_BOUND +++ // case 0x17: // THROWS +++ // case 0x42: // EXCEPTION_PARAMETER +++ // case 0x43: // INSTANCEOF +++ // case 0x44: // NEW +++ // case 0x45: // CONSTRUCTOR_REFERENCE +++ // case 0x46: // METHOD_REFERENCE +++ default: +++ target &= (target >>> 24) < 0x43 ? 0xFFFFFF00 : 0xFF000000; +++ u += 3; +++ break; +++ } +++ int pathLength = readByte(u); +++ context.typeRef = target; +++ context.typePath = pathLength == 0 ? null : new TypePath(b, u); +++ return u + 1 + 2 * pathLength; +++ } +++ +++ /** +++ * Reads parameter annotations and makes the given visitor visit them. +++ * +++ * @param mv +++ * the visitor that must visit the annotations. +++ * @param context +++ * information about the class being parsed. +++ * @param v +++ * start offset in {@link #b b} of the annotations to be read. +++ * @param visible +++ * true if the annotations to be read are visible at +++ * runtime. +++ */ +++ private void readParameterAnnotations(final MethodVisitor mv, +++ final Context context, int v, final boolean visible) { +++ int i; +++ int n = b[v++] & 0xFF; +++ // workaround for a bug in javac (javac compiler generates a parameter +++ // annotation array whose size is equal to the number of parameters in +++ // the Java source file, while it should generate an array whose size is +++ // equal to the number of parameters in the method descriptor - which +++ // includes the synthetic parameters added by the compiler). This work- +++ // around supposes that the synthetic parameters are the first ones. +++ int synthetics = Type.getArgumentTypes(context.desc).length - n; +++ AnnotationVisitor av; +++ for (i = 0; i < synthetics; ++i) { +++ // virtual annotation to detect synthetic parameters in MethodWriter +++ av = mv.visitParameterAnnotation(i, "Ljava/lang/Synthetic;", false); +++ if (av != null) { +++ av.visitEnd(); +++ } +++ } +++ char[] c = context.buffer; +++ for (; i < n + synthetics; ++i) { +++ int j = readUnsignedShort(v); +++ v += 2; +++ for (; j > 0; --j) { +++ av = mv.visitParameterAnnotation(i, readUTF8(v, c), visible); +++ v = readAnnotationValues(v + 2, c, true, av); +++ } +++ } +++ } +++ +++ /** +++ * Reads the values of an annotation and makes the given visitor visit them. +++ * +++ * @param v +++ * the start offset in {@link #b b} of the values to be read +++ * (including the unsigned short that gives the number of +++ * values). +++ * @param buf +++ * buffer to be used to call {@link #readUTF8 readUTF8}, +++ * {@link #readClass(int,char[]) readClass} or {@link #readConst +++ * readConst}. +++ * @param named +++ * if the annotation values are named or not. +++ * @param av +++ * the visitor that must visit the values. +++ * @return the end offset of the annotation values. +++ */ +++ private int readAnnotationValues(int v, final char[] buf, +++ final boolean named, final AnnotationVisitor av) { +++ int i = readUnsignedShort(v); +++ v += 2; +++ if (named) { +++ for (; i > 0; --i) { +++ v = readAnnotationValue(v + 2, buf, readUTF8(v, buf), av); +++ } +++ } else { +++ for (; i > 0; --i) { +++ v = readAnnotationValue(v, buf, null, av); +++ } +++ } +++ if (av != null) { +++ av.visitEnd(); +++ } +++ return v; +++ } +++ +++ /** +++ * Reads a value of an annotation and makes the given visitor visit it. +++ * +++ * @param v +++ * the start offset in {@link #b b} of the value to be read +++ * (not including the value name constant pool index). +++ * @param buf +++ * buffer to be used to call {@link #readUTF8 readUTF8}, +++ * {@link #readClass(int,char[]) readClass} or {@link #readConst +++ * readConst}. +++ * @param name +++ * the name of the value to be read. +++ * @param av +++ * the visitor that must visit the value. +++ * @return the end offset of the annotation value. +++ */ +++ private int readAnnotationValue(int v, final char[] buf, final String name, +++ final AnnotationVisitor av) { +++ int i; +++ if (av == null) { +++ switch (b[v] & 0xFF) { +++ case 'e': // enum_const_value +++ return v + 5; +++ case '@': // annotation_value +++ return readAnnotationValues(v + 3, buf, true, null); +++ case '[': // array_value +++ return readAnnotationValues(v + 1, buf, false, null); +++ default: +++ return v + 3; +++ } +++ } +++ switch (b[v++] & 0xFF) { +++ case 'I': // pointer to CONSTANT_Integer +++ case 'J': // pointer to CONSTANT_Long +++ case 'F': // pointer to CONSTANT_Float +++ case 'D': // pointer to CONSTANT_Double +++ av.visit(name, readConst(readUnsignedShort(v), buf)); +++ v += 2; +++ break; +++ case 'B': // pointer to CONSTANT_Byte +++ av.visit(name, (byte) readInt(items[readUnsignedShort(v)])); +++ v += 2; +++ break; +++ case 'Z': // pointer to CONSTANT_Boolean +++ av.visit(name, +++ readInt(items[readUnsignedShort(v)]) == 0 ? Boolean.FALSE +++ : Boolean.TRUE); +++ v += 2; +++ break; +++ case 'S': // pointer to CONSTANT_Short +++ av.visit(name, (short) readInt(items[readUnsignedShort(v)])); +++ v += 2; +++ break; +++ case 'C': // pointer to CONSTANT_Char +++ av.visit(name, (char) readInt(items[readUnsignedShort(v)])); +++ v += 2; +++ break; +++ case 's': // pointer to CONSTANT_Utf8 +++ av.visit(name, readUTF8(v, buf)); +++ v += 2; +++ break; +++ case 'e': // enum_const_value +++ av.visitEnum(name, readUTF8(v, buf), readUTF8(v + 2, buf)); +++ v += 4; +++ break; +++ case 'c': // class_info +++ av.visit(name, Type.getType(readUTF8(v, buf))); +++ v += 2; +++ break; +++ case '@': // annotation_value +++ v = readAnnotationValues(v + 2, buf, true, +++ av.visitAnnotation(name, readUTF8(v, buf))); +++ break; +++ case '[': // array_value +++ int size = readUnsignedShort(v); +++ v += 2; +++ if (size == 0) { +++ return readAnnotationValues(v - 2, buf, false, +++ av.visitArray(name)); +++ } +++ switch (this.b[v++] & 0xFF) { +++ case 'B': +++ byte[] bv = new byte[size]; +++ for (i = 0; i < size; i++) { +++ bv[i] = (byte) readInt(items[readUnsignedShort(v)]); +++ v += 3; +++ } +++ av.visit(name, bv); +++ --v; +++ break; +++ case 'Z': +++ boolean[] zv = new boolean[size]; +++ for (i = 0; i < size; i++) { +++ zv[i] = readInt(items[readUnsignedShort(v)]) != 0; +++ v += 3; +++ } +++ av.visit(name, zv); +++ --v; +++ break; +++ case 'S': +++ short[] sv = new short[size]; +++ for (i = 0; i < size; i++) { +++ sv[i] = (short) readInt(items[readUnsignedShort(v)]); +++ v += 3; +++ } +++ av.visit(name, sv); +++ --v; +++ break; +++ case 'C': +++ char[] cv = new char[size]; +++ for (i = 0; i < size; i++) { +++ cv[i] = (char) readInt(items[readUnsignedShort(v)]); +++ v += 3; +++ } +++ av.visit(name, cv); +++ --v; +++ break; +++ case 'I': +++ int[] iv = new int[size]; +++ for (i = 0; i < size; i++) { +++ iv[i] = readInt(items[readUnsignedShort(v)]); +++ v += 3; +++ } +++ av.visit(name, iv); +++ --v; +++ break; +++ case 'J': +++ long[] lv = new long[size]; +++ for (i = 0; i < size; i++) { +++ lv[i] = readLong(items[readUnsignedShort(v)]); +++ v += 3; +++ } +++ av.visit(name, lv); +++ --v; +++ break; +++ case 'F': +++ float[] fv = new float[size]; +++ for (i = 0; i < size; i++) { +++ fv[i] = Float +++ .intBitsToFloat(readInt(items[readUnsignedShort(v)])); +++ v += 3; +++ } +++ av.visit(name, fv); +++ --v; +++ break; +++ case 'D': +++ double[] dv = new double[size]; +++ for (i = 0; i < size; i++) { +++ dv[i] = Double +++ .longBitsToDouble(readLong(items[readUnsignedShort(v)])); +++ v += 3; +++ } +++ av.visit(name, dv); +++ --v; +++ break; +++ default: +++ v = readAnnotationValues(v - 3, buf, false, av.visitArray(name)); +++ } +++ } +++ return v; +++ } +++ +++ /** +++ * Computes the implicit frame of the method currently being parsed (as +++ * defined in the given {@link Context}) and stores it in the given context. +++ * +++ * @param frame +++ * information about the class being parsed. +++ */ +++ private void getImplicitFrame(final Context frame) { +++ String desc = frame.desc; +++ Object[] locals = frame.local; +++ int local = 0; +++ if ((frame.access & Opcodes.ACC_STATIC) == 0) { +++ if ("".equals(frame.name)) { +++ locals[local++] = Opcodes.UNINITIALIZED_THIS; +++ } else { +++ locals[local++] = readClass(header + 2, frame.buffer); +++ } +++ } +++ int i = 1; +++ loop: while (true) { +++ int j = i; +++ switch (desc.charAt(i++)) { +++ case 'Z': +++ case 'C': +++ case 'B': +++ case 'S': +++ case 'I': +++ locals[local++] = Opcodes.INTEGER; +++ break; +++ case 'F': +++ locals[local++] = Opcodes.FLOAT; +++ break; +++ case 'J': +++ locals[local++] = Opcodes.LONG; +++ break; +++ case 'D': +++ locals[local++] = Opcodes.DOUBLE; +++ break; +++ case '[': +++ while (desc.charAt(i) == '[') { +++ ++i; +++ } +++ if (desc.charAt(i) == 'L') { +++ ++i; +++ while (desc.charAt(i) != ';') { +++ ++i; +++ } +++ } +++ locals[local++] = desc.substring(j, ++i); +++ break; +++ case 'L': +++ while (desc.charAt(i) != ';') { +++ ++i; +++ } +++ locals[local++] = desc.substring(j + 1, i++); +++ break; +++ default: +++ break loop; +++ } +++ } +++ frame.localCount = local; +++ } +++ +++ /** +++ * Reads a stack map frame and stores the result in the given +++ * {@link Context} object. +++ * +++ * @param stackMap +++ * the start offset of a stack map frame in the class file. +++ * @param zip +++ * if the stack map frame at stackMap is compressed or not. +++ * @param unzip +++ * if the stack map frame must be uncompressed. +++ * @param frame +++ * where the parsed stack map frame must be stored. +++ * @return the offset of the first byte following the parsed frame. +++ */ +++ private int readFrame(int stackMap, boolean zip, boolean unzip, +++ Context frame) { +++ char[] c = frame.buffer; +++ Label[] labels = frame.labels; +++ int tag; +++ int delta; +++ if (zip) { +++ tag = b[stackMap++] & 0xFF; +++ } else { +++ tag = MethodWriter.FULL_FRAME; +++ frame.offset = -1; +++ } +++ frame.localDiff = 0; +++ if (tag < MethodWriter.SAME_LOCALS_1_STACK_ITEM_FRAME) { +++ delta = tag; +++ frame.mode = Opcodes.F_SAME; +++ frame.stackCount = 0; +++ } else if (tag < MethodWriter.RESERVED) { +++ delta = tag - MethodWriter.SAME_LOCALS_1_STACK_ITEM_FRAME; +++ stackMap = readFrameType(frame.stack, 0, stackMap, c, labels); +++ frame.mode = Opcodes.F_SAME1; +++ frame.stackCount = 1; +++ } else { +++ delta = readUnsignedShort(stackMap); +++ stackMap += 2; +++ if (tag == MethodWriter.SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED) { +++ stackMap = readFrameType(frame.stack, 0, stackMap, c, labels); +++ frame.mode = Opcodes.F_SAME1; +++ frame.stackCount = 1; +++ } else if (tag >= MethodWriter.CHOP_FRAME +++ && tag < MethodWriter.SAME_FRAME_EXTENDED) { +++ frame.mode = Opcodes.F_CHOP; +++ frame.localDiff = MethodWriter.SAME_FRAME_EXTENDED - tag; +++ frame.localCount -= frame.localDiff; +++ frame.stackCount = 0; +++ } else if (tag == MethodWriter.SAME_FRAME_EXTENDED) { +++ frame.mode = Opcodes.F_SAME; +++ frame.stackCount = 0; +++ } else if (tag < MethodWriter.FULL_FRAME) { +++ int local = unzip ? frame.localCount : 0; +++ for (int i = tag - MethodWriter.SAME_FRAME_EXTENDED; i > 0; i--) { +++ stackMap = readFrameType(frame.local, local++, stackMap, c, +++ labels); +++ } +++ frame.mode = Opcodes.F_APPEND; +++ frame.localDiff = tag - MethodWriter.SAME_FRAME_EXTENDED; +++ frame.localCount += frame.localDiff; +++ frame.stackCount = 0; +++ } else { // if (tag == FULL_FRAME) { +++ frame.mode = Opcodes.F_FULL; +++ int n = readUnsignedShort(stackMap); +++ stackMap += 2; +++ frame.localDiff = n; +++ frame.localCount = n; +++ for (int local = 0; n > 0; n--) { +++ stackMap = readFrameType(frame.local, local++, stackMap, c, +++ labels); +++ } +++ n = readUnsignedShort(stackMap); +++ stackMap += 2; +++ frame.stackCount = n; +++ for (int stack = 0; n > 0; n--) { +++ stackMap = readFrameType(frame.stack, stack++, stackMap, c, +++ labels); +++ } +++ } +++ } +++ frame.offset += delta + 1; +++ readLabel(frame.offset, labels); +++ return stackMap; +++ } +++ +++ /** +++ * Reads a stack map frame type and stores it at the given index in the +++ * given array. +++ * +++ * @param frame +++ * the array where the parsed type must be stored. +++ * @param index +++ * the index in 'frame' where the parsed type must be stored. +++ * @param v +++ * the start offset of the stack map frame type to read. +++ * @param buf +++ * a buffer to read strings. +++ * @param labels +++ * the labels of the method currently being parsed, indexed by +++ * their offset. If the parsed type is an Uninitialized type, a +++ * new label for the corresponding NEW instruction is stored in +++ * this array if it does not already exist. +++ * @return the offset of the first byte after the parsed type. +++ */ +++ private int readFrameType(final Object[] frame, final int index, int v, +++ final char[] buf, final Label[] labels) { +++ int type = b[v++] & 0xFF; +++ switch (type) { +++ case 0: +++ frame[index] = Opcodes.TOP; +++ break; +++ case 1: +++ frame[index] = Opcodes.INTEGER; +++ break; +++ case 2: +++ frame[index] = Opcodes.FLOAT; +++ break; +++ case 3: +++ frame[index] = Opcodes.DOUBLE; +++ break; +++ case 4: +++ frame[index] = Opcodes.LONG; +++ break; +++ case 5: +++ frame[index] = Opcodes.NULL; +++ break; +++ case 6: +++ frame[index] = Opcodes.UNINITIALIZED_THIS; +++ break; +++ case 7: // Object +++ frame[index] = readClass(v, buf); +++ v += 2; +++ break; +++ default: // Uninitialized +++ frame[index] = readLabel(readUnsignedShort(v), labels); +++ v += 2; +++ } +++ return v; +++ } +++ +++ /** +++ * Returns the label corresponding to the given offset. The default +++ * implementation of this method creates a label for the given offset if it +++ * has not been already created. +++ * +++ * @param offset +++ * a bytecode offset in a method. +++ * @param labels +++ * the already created labels, indexed by their offset. If a +++ * label already exists for offset this method must not create a +++ * new one. Otherwise it must store the new label in this array. +++ * @return a non null Label, which must be equal to labels[offset]. +++ */ +++ protected Label readLabel(int offset, Label[] labels) { +++ if (labels[offset] == null) { +++ labels[offset] = new Label(); +++ } +++ return labels[offset]; +++ } +++ +++ /** +++ * Returns the start index of the attribute_info structure of this class. +++ * +++ * @return the start index of the attribute_info structure of this class. +++ */ +++ private int getAttributes() { +++ // skips the header +++ int u = header + 8 + readUnsignedShort(header + 6) * 2; +++ // skips fields and methods +++ for (int i = readUnsignedShort(u); i > 0; --i) { +++ for (int j = readUnsignedShort(u + 8); j > 0; --j) { +++ u += 6 + readInt(u + 12); +++ } +++ u += 8; +++ } +++ u += 2; +++ for (int i = readUnsignedShort(u); i > 0; --i) { +++ for (int j = readUnsignedShort(u + 8); j > 0; --j) { +++ u += 6 + readInt(u + 12); +++ } +++ u += 8; +++ } +++ // the attribute_info structure starts just after the methods +++ return u + 2; +++ } +++ +++ /** +++ * Reads an attribute in {@link #b b}. +++ * +++ * @param attrs +++ * prototypes of the attributes that must be parsed during the +++ * visit of the class. Any attribute whose type is not equal to +++ * the type of one the prototypes is ignored (i.e. an empty +++ * {@link Attribute} instance is returned). +++ * @param type +++ * the type of the attribute. +++ * @param off +++ * index of the first byte of the attribute's content in +++ * {@link #b b}. The 6 attribute header bytes, containing the +++ * type and the length of the attribute, are not taken into +++ * account here (they have already been read). +++ * @param len +++ * the length of the attribute's content. +++ * @param buf +++ * buffer to be used to call {@link #readUTF8 readUTF8}, +++ * {@link #readClass(int,char[]) readClass} or {@link #readConst +++ * readConst}. +++ * @param codeOff +++ * index of the first byte of code's attribute content in +++ * {@link #b b}, or -1 if the attribute to be read is not a code +++ * attribute. The 6 attribute header bytes, containing the type +++ * and the length of the attribute, are not taken into account +++ * here. +++ * @param labels +++ * the labels of the method's code, or null if the +++ * attribute to be read is not a code attribute. +++ * @return the attribute that has been read, or null to skip this +++ * attribute. +++ */ +++ private Attribute readAttribute(final Attribute[] attrs, final String type, +++ final int off, final int len, final char[] buf, final int codeOff, +++ final Label[] labels) { +++ for (int i = 0; i < attrs.length; ++i) { +++ if (attrs[i].type.equals(type)) { +++ return attrs[i].read(this, off, len, buf, codeOff, labels); +++ } +++ } +++ return new Attribute(type).read(this, off, len, null, -1, null); +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Utility methods: low level parsing +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the number of constant pool items in {@link #b b}. +++ * +++ * @return the number of constant pool items in {@link #b b}. +++ */ +++ public int getItemCount() { +++ return items.length; +++ } +++ +++ /** +++ * Returns the start index of the constant pool item in {@link #b b}, plus +++ * one. This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters. +++ * +++ * @param item +++ * the index a constant pool item. +++ * @return the start index of the constant pool item in {@link #b b}, plus +++ * one. +++ */ +++ public int getItem(final int item) { +++ return items[item]; +++ } +++ +++ /** +++ * Returns the maximum length of the strings contained in the constant pool +++ * of the class. +++ * +++ * @return the maximum length of the strings contained in the constant pool +++ * of the class. +++ */ +++ public int getMaxStringLength() { +++ return maxStringLength; +++ } +++ +++ /** +++ * Reads a byte value in {@link #b b}. This method is intended for +++ * {@link Attribute} sub classes, and is normally not needed by class +++ * generators or adapters. +++ * +++ * @param index +++ * the start index of the value to be read in {@link #b b}. +++ * @return the read value. +++ */ +++ public int readByte(final int index) { +++ return b[index] & 0xFF; +++ } +++ +++ /** +++ * Reads an unsigned short value in {@link #b b}. This method is intended +++ * for {@link Attribute} sub classes, and is normally not needed by class +++ * generators or adapters. +++ * +++ * @param index +++ * the start index of the value to be read in {@link #b b}. +++ * @return the read value. +++ */ +++ public int readUnsignedShort(final int index) { +++ byte[] b = this.b; +++ return ((b[index] & 0xFF) << 8) | (b[index + 1] & 0xFF); +++ } +++ +++ /** +++ * Reads a signed short value in {@link #b b}. This method is intended +++ * for {@link Attribute} sub classes, and is normally not needed by class +++ * generators or adapters. +++ * +++ * @param index +++ * the start index of the value to be read in {@link #b b}. +++ * @return the read value. +++ */ +++ public short readShort(final int index) { +++ byte[] b = this.b; +++ return (short) (((b[index] & 0xFF) << 8) | (b[index + 1] & 0xFF)); +++ } +++ +++ /** +++ * Reads a signed int value in {@link #b b}. This method is intended for +++ * {@link Attribute} sub classes, and is normally not needed by class +++ * generators or adapters. +++ * +++ * @param index +++ * the start index of the value to be read in {@link #b b}. +++ * @return the read value. +++ */ +++ public int readInt(final int index) { +++ byte[] b = this.b; +++ return ((b[index] & 0xFF) << 24) | ((b[index + 1] & 0xFF) << 16) +++ | ((b[index + 2] & 0xFF) << 8) | (b[index + 3] & 0xFF); +++ } +++ +++ /** +++ * Reads a signed long value in {@link #b b}. This method is intended for +++ * {@link Attribute} sub classes, and is normally not needed by class +++ * generators or adapters. +++ * +++ * @param index +++ * the start index of the value to be read in {@link #b b}. +++ * @return the read value. +++ */ +++ public long readLong(final int index) { +++ long l1 = readInt(index); +++ long l0 = readInt(index + 4) & 0xFFFFFFFFL; +++ return (l1 << 32) | l0; +++ } +++ +++ /** +++ * Reads an UTF8 string constant pool item in {@link #b b}. This method +++ * is intended for {@link Attribute} sub classes, and is normally not needed +++ * by class generators or adapters. +++ * +++ * @param index +++ * the start index of an unsigned short value in {@link #b b}, +++ * whose value is the index of an UTF8 constant pool item. +++ * @param buf +++ * buffer to be used to read the item. This buffer must be +++ * sufficiently large. It is not automatically resized. +++ * @return the String corresponding to the specified UTF8 item. +++ */ +++ public String readUTF8(int index, final char[] buf) { +++ int item = readUnsignedShort(index); +++ if (index == 0 || item == 0) { +++ return null; +++ } +++ String s = strings[item]; +++ if (s != null) { +++ return s; +++ } +++ index = items[item]; +++ return strings[item] = readUTF(index + 2, readUnsignedShort(index), buf); +++ } +++ +++ /** +++ * Reads UTF8 string in {@link #b b}. +++ * +++ * @param index +++ * start offset of the UTF8 string to be read. +++ * @param utfLen +++ * length of the UTF8 string to be read. +++ * @param buf +++ * buffer to be used to read the string. This buffer must be +++ * sufficiently large. It is not automatically resized. +++ * @return the String corresponding to the specified UTF8 string. +++ */ +++ private String readUTF(int index, final int utfLen, final char[] buf) { +++ int endIndex = index + utfLen; +++ byte[] b = this.b; +++ int strLen = 0; +++ int c; +++ int st = 0; +++ char cc = 0; +++ while (index < endIndex) { +++ c = b[index++]; +++ switch (st) { +++ case 0: +++ c = c & 0xFF; +++ if (c < 0x80) { // 0xxxxxxx +++ buf[strLen++] = (char) c; +++ } else if (c < 0xE0 && c > 0xBF) { // 110x xxxx 10xx xxxx +++ cc = (char) (c & 0x1F); +++ st = 1; +++ } else { // 1110 xxxx 10xx xxxx 10xx xxxx +++ cc = (char) (c & 0x0F); +++ st = 2; +++ } +++ break; +++ +++ case 1: // byte 2 of 2-byte char or byte 3 of 3-byte char +++ buf[strLen++] = (char) ((cc << 6) | (c & 0x3F)); +++ st = 0; +++ break; +++ +++ case 2: // byte 2 of 3-byte char +++ cc = (char) ((cc << 6) | (c & 0x3F)); +++ st = 1; +++ break; +++ } +++ } +++ return new String(buf, 0, strLen); +++ } +++ +++ /** +++ * Reads a class constant pool item in {@link #b b}. This method is +++ * intended for {@link Attribute} sub classes, and is normally not needed by +++ * class generators or adapters. +++ * +++ * @param index +++ * the start index of an unsigned short value in {@link #b b}, +++ * whose value is the index of a class constant pool item. +++ * @param buf +++ * buffer to be used to read the item. This buffer must be +++ * sufficiently large. It is not automatically resized. +++ * @return the String corresponding to the specified class item. +++ */ +++ public String readClass(final int index, final char[] buf) { +++ // computes the start index of the CONSTANT_Class item in b +++ // and reads the CONSTANT_Utf8 item designated by +++ // the first two bytes of this CONSTANT_Class item +++ return readUTF8(items[readUnsignedShort(index)], buf); +++ } +++ +++ /** +++ * Reads a numeric or string constant pool item in {@link #b b}. This +++ * method is intended for {@link Attribute} sub classes, and is normally not +++ * needed by class generators or adapters. +++ * +++ * @param item +++ * the index of a constant pool item. +++ * @param buf +++ * buffer to be used to read the item. This buffer must be +++ * sufficiently large. It is not automatically resized. +++ * @return the {@link Integer}, {@link Float}, {@link Long}, {@link Double}, +++ * {@link String}, {@link Type} or {@link Handle} corresponding to +++ * the given constant pool item. +++ */ +++ public Object readConst(final int item, final char[] buf) { +++ int index = items[item]; +++ switch (b[index - 1]) { +++ case ClassWriter.INT: +++ return readInt(index); +++ case ClassWriter.FLOAT: +++ return Float.intBitsToFloat(readInt(index)); +++ case ClassWriter.LONG: +++ return readLong(index); +++ case ClassWriter.DOUBLE: +++ return Double.longBitsToDouble(readLong(index)); +++ case ClassWriter.CLASS: +++ return Type.getObjectType(readUTF8(index, buf)); +++ case ClassWriter.STR: +++ return readUTF8(index, buf); +++ case ClassWriter.MTYPE: +++ return Type.getMethodType(readUTF8(index, buf)); +++ default: // case ClassWriter.HANDLE_BASE + [1..9]: +++ int tag = readByte(index); +++ int[] items = this.items; +++ int cpIndex = items[readUnsignedShort(index + 1)]; +++ String owner = readClass(cpIndex, buf); +++ cpIndex = items[readUnsignedShort(cpIndex + 2)]; +++ String name = readUTF8(cpIndex, buf); +++ String desc = readUTF8(cpIndex + 2, buf); +++ return new Handle(tag, owner, name, desc); +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/ClassVisitor.java b/contrib/asm/src/org/objectweb/asm/ClassVisitor.java ++new file mode 100644 ++index 0000000..107ada0 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/ClassVisitor.java ++@@ -0,0 +1,320 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A visitor to visit a Java class. The methods of this class must be called in +++ * the following order: visit [ visitSource ] [ +++ * visitOuterClass ] ( visitAnnotation | +++ * visitTypeAnnotation | visitAttribute )* ( +++ * visitInnerClass | visitField | visitMethod )* +++ * visitEnd. +++ * +++ * @author Eric Bruneton +++ */ +++public abstract class ClassVisitor { +++ +++ /** +++ * The ASM API version implemented by this visitor. The value of this field +++ * must be one of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ */ +++ protected final int api; +++ +++ /** +++ * The class visitor to which this visitor must delegate method calls. May +++ * be null. +++ */ +++ protected ClassVisitor cv; +++ +++ /** +++ * Constructs a new {@link ClassVisitor}. +++ * +++ * @param api +++ * the ASM API version implemented by this visitor. Must be one +++ * of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ */ +++ public ClassVisitor(final int api) { +++ this(api, null); +++ } +++ +++ /** +++ * Constructs a new {@link ClassVisitor}. +++ * +++ * @param api +++ * the ASM API version implemented by this visitor. Must be one +++ * of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ * @param cv +++ * the class visitor to which this visitor must delegate method +++ * calls. May be null. +++ */ +++ public ClassVisitor(final int api, final ClassVisitor cv) { +++ if (api != Opcodes.ASM4 && api != Opcodes.ASM5) { +++ throw new IllegalArgumentException(); +++ } +++ this.api = api; +++ this.cv = cv; +++ } +++ +++ /** +++ * Visits the header of the class. +++ * +++ * @param version +++ * the class version. +++ * @param access +++ * the class's access flags (see {@link Opcodes}). This parameter +++ * also indicates if the class is deprecated. +++ * @param name +++ * the internal name of the class (see +++ * {@link Type#getInternalName() getInternalName}). +++ * @param signature +++ * the signature of this class. May be null if the class +++ * is not a generic one, and does not extend or implement generic +++ * classes or interfaces. +++ * @param superName +++ * the internal of name of the super class (see +++ * {@link Type#getInternalName() getInternalName}). For +++ * interfaces, the super class is {@link Object}. May be +++ * null, but only for the {@link Object} class. +++ * @param interfaces +++ * the internal names of the class's interfaces (see +++ * {@link Type#getInternalName() getInternalName}). May be +++ * null. +++ */ +++ public void visit(int version, int access, String name, String signature, +++ String superName, String[] interfaces) { +++ if (cv != null) { +++ cv.visit(version, access, name, signature, superName, interfaces); +++ } +++ } +++ +++ /** +++ * Visits the source of the class. +++ * +++ * @param source +++ * the name of the source file from which the class was compiled. +++ * May be null. +++ * @param debug +++ * additional debug information to compute the correspondance +++ * between source and compiled elements of the class. May be +++ * null. +++ */ +++ public void visitSource(String source, String debug) { +++ if (cv != null) { +++ cv.visitSource(source, debug); +++ } +++ } +++ +++ /** +++ * Visits the enclosing class of the class. This method must be called only +++ * if the class has an enclosing class. +++ * +++ * @param owner +++ * internal name of the enclosing class of the class. +++ * @param name +++ * the name of the method that contains the class, or +++ * null if the class is not enclosed in a method of its +++ * enclosing class. +++ * @param desc +++ * the descriptor of the method that contains the class, or +++ * null if the class is not enclosed in a method of its +++ * enclosing class. +++ */ +++ public void visitOuterClass(String owner, String name, String desc) { +++ if (cv != null) { +++ cv.visitOuterClass(owner, name, desc); +++ } +++ } +++ +++ /** +++ * Visits an annotation of the class. +++ * +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * true if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or null if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitAnnotation(String desc, boolean visible) { +++ if (cv != null) { +++ return cv.visitAnnotation(desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits an annotation on a type in the class signature. +++ * +++ * @param typeRef +++ * a reference to the annotated type. The sort of this type +++ * reference must be {@link TypeReference#CLASS_TYPE_PARAMETER +++ * CLASS_TYPE_PARAMETER}, +++ * {@link TypeReference#CLASS_TYPE_PARAMETER_BOUND +++ * CLASS_TYPE_PARAMETER_BOUND} or +++ * {@link TypeReference#CLASS_EXTENDS CLASS_EXTENDS}. See +++ * {@link TypeReference}. +++ * @param typePath +++ * the path to the annotated type argument, wildcard bound, array +++ * element type, or static inner type within 'typeRef'. May be +++ * null if the annotation targets 'typeRef' as a whole. +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * true if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or null if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitTypeAnnotation(int typeRef, +++ TypePath typePath, String desc, boolean visible) { +++ if (api < Opcodes.ASM5) { +++ throw new RuntimeException(); +++ } +++ if (cv != null) { +++ return cv.visitTypeAnnotation(typeRef, typePath, desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits a non standard attribute of the class. +++ * +++ * @param attr +++ * an attribute. +++ */ +++ public void visitAttribute(Attribute attr) { +++ if (cv != null) { +++ cv.visitAttribute(attr); +++ } +++ } +++ +++ /** +++ * Visits information about an inner class. This inner class is not +++ * necessarily a member of the class being visited. +++ * +++ * @param name +++ * the internal name of an inner class (see +++ * {@link Type#getInternalName() getInternalName}). +++ * @param outerName +++ * the internal name of the class to which the inner class +++ * belongs (see {@link Type#getInternalName() getInternalName}). +++ * May be null for not member classes. +++ * @param innerName +++ * the (simple) name of the inner class inside its enclosing +++ * class. May be null for anonymous inner classes. +++ * @param access +++ * the access flags of the inner class as originally declared in +++ * the enclosing class. +++ */ +++ public void visitInnerClass(String name, String outerName, +++ String innerName, int access) { +++ if (cv != null) { +++ cv.visitInnerClass(name, outerName, innerName, access); +++ } +++ } +++ +++ /** +++ * Visits a field of the class. +++ * +++ * @param access +++ * the field's access flags (see {@link Opcodes}). This parameter +++ * also indicates if the field is synthetic and/or deprecated. +++ * @param name +++ * the field's name. +++ * @param desc +++ * the field's descriptor (see {@link Type Type}). +++ * @param signature +++ * the field's signature. May be null if the field's +++ * type does not use generic types. +++ * @param value +++ * the field's initial value. This parameter, which may be +++ * null if the field does not have an initial value, +++ * must be an {@link Integer}, a {@link Float}, a {@link Long}, a +++ * {@link Double} or a {@link String} (for int, +++ * float, long or String fields +++ * respectively). This parameter is only used for static +++ * fields. Its value is ignored for non static fields, which +++ * must be initialized through bytecode instructions in +++ * constructors or methods. +++ * @return a visitor to visit field annotations and attributes, or +++ * null if this class visitor is not interested in visiting +++ * these annotations and attributes. +++ */ +++ public FieldVisitor visitField(int access, String name, String desc, +++ String signature, Object value) { +++ if (cv != null) { +++ return cv.visitField(access, name, desc, signature, value); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits a method of the class. This method must return a new +++ * {@link MethodVisitor} instance (or null) each time it is called, +++ * i.e., it should not return a previously returned visitor. +++ * +++ * @param access +++ * the method's access flags (see {@link Opcodes}). This +++ * parameter also indicates if the method is synthetic and/or +++ * deprecated. +++ * @param name +++ * the method's name. +++ * @param desc +++ * the method's descriptor (see {@link Type Type}). +++ * @param signature +++ * the method's signature. May be null if the method +++ * parameters, return type and exceptions do not use generic +++ * types. +++ * @param exceptions +++ * the internal names of the method's exception classes (see +++ * {@link Type#getInternalName() getInternalName}). May be +++ * null. +++ * @return an object to visit the byte code of the method, or null +++ * if this class visitor is not interested in visiting the code of +++ * this method. +++ */ +++ public MethodVisitor visitMethod(int access, String name, String desc, +++ String signature, String[] exceptions) { +++ if (cv != null) { +++ return cv.visitMethod(access, name, desc, signature, exceptions); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits the end of the class. This method, which is the last one to be +++ * called, is used to inform the visitor that all the fields and methods of +++ * the class have been visited. +++ */ +++ public void visitEnd() { +++ if (cv != null) { +++ cv.visitEnd(); +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/ClassWriter.java b/contrib/asm/src/org/objectweb/asm/ClassWriter.java ++new file mode 100644 ++index 0000000..63e1d7e ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/ClassWriter.java ++@@ -0,0 +1,1776 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A {@link ClassVisitor} that generates classes in bytecode form. More +++ * precisely this visitor generates a byte array conforming to the Java class +++ * file format. It can be used alone, to generate a Java class "from scratch", +++ * or with one or more {@link ClassReader ClassReader} and adapter class visitor +++ * to generate a modified class from one or more existing Java classes. +++ * +++ * @author Eric Bruneton +++ */ +++public class ClassWriter extends ClassVisitor { +++ +++ /** +++ * Flag to automatically compute the maximum stack size and the maximum +++ * number of local variables of methods. If this flag is set, then the +++ * arguments of the {@link MethodVisitor#visitMaxs visitMaxs} method of the +++ * {@link MethodVisitor} returned by the {@link #visitMethod visitMethod} +++ * method will be ignored, and computed automatically from the signature and +++ * the bytecode of each method. +++ * +++ * @see #ClassWriter(int) +++ */ +++ public static final int COMPUTE_MAXS = 1; +++ +++ /** +++ * Flag to automatically compute the stack map frames of methods from +++ * scratch. If this flag is set, then the calls to the +++ * {@link MethodVisitor#visitFrame} method are ignored, and the stack map +++ * frames are recomputed from the methods bytecode. The arguments of the +++ * {@link MethodVisitor#visitMaxs visitMaxs} method are also ignored and +++ * recomputed from the bytecode. In other words, computeFrames implies +++ * computeMaxs. +++ * +++ * @see #ClassWriter(int) +++ */ +++ public static final int COMPUTE_FRAMES = 2; +++ +++ /** +++ * Pseudo access flag to distinguish between the synthetic attribute and the +++ * synthetic access flag. +++ */ +++ static final int ACC_SYNTHETIC_ATTRIBUTE = 0x40000; +++ +++ /** +++ * Factor to convert from ACC_SYNTHETIC_ATTRIBUTE to Opcode.ACC_SYNTHETIC. +++ */ +++ static final int TO_ACC_SYNTHETIC = ACC_SYNTHETIC_ATTRIBUTE +++ / Opcodes.ACC_SYNTHETIC; +++ +++ /** +++ * The type of instructions without any argument. +++ */ +++ static final int NOARG_INSN = 0; +++ +++ /** +++ * The type of instructions with an signed byte argument. +++ */ +++ static final int SBYTE_INSN = 1; +++ +++ /** +++ * The type of instructions with an signed short argument. +++ */ +++ static final int SHORT_INSN = 2; +++ +++ /** +++ * The type of instructions with a local variable index argument. +++ */ +++ static final int VAR_INSN = 3; +++ +++ /** +++ * The type of instructions with an implicit local variable index argument. +++ */ +++ static final int IMPLVAR_INSN = 4; +++ +++ /** +++ * The type of instructions with a type descriptor argument. +++ */ +++ static final int TYPE_INSN = 5; +++ +++ /** +++ * The type of field and method invocations instructions. +++ */ +++ static final int FIELDORMETH_INSN = 6; +++ +++ /** +++ * The type of the INVOKEINTERFACE/INVOKEDYNAMIC instruction. +++ */ +++ static final int ITFMETH_INSN = 7; +++ +++ /** +++ * The type of the INVOKEDYNAMIC instruction. +++ */ +++ static final int INDYMETH_INSN = 8; +++ +++ /** +++ * The type of instructions with a 2 bytes bytecode offset label. +++ */ +++ static final int LABEL_INSN = 9; +++ +++ /** +++ * The type of instructions with a 4 bytes bytecode offset label. +++ */ +++ static final int LABELW_INSN = 10; +++ +++ /** +++ * The type of the LDC instruction. +++ */ +++ static final int LDC_INSN = 11; +++ +++ /** +++ * The type of the LDC_W and LDC2_W instructions. +++ */ +++ static final int LDCW_INSN = 12; +++ +++ /** +++ * The type of the IINC instruction. +++ */ +++ static final int IINC_INSN = 13; +++ +++ /** +++ * The type of the TABLESWITCH instruction. +++ */ +++ static final int TABL_INSN = 14; +++ +++ /** +++ * The type of the LOOKUPSWITCH instruction. +++ */ +++ static final int LOOK_INSN = 15; +++ +++ /** +++ * The type of the MULTIANEWARRAY instruction. +++ */ +++ static final int MANA_INSN = 16; +++ +++ /** +++ * The type of the WIDE instruction. +++ */ +++ static final int WIDE_INSN = 17; +++ +++ /** +++ * The instruction types of all JVM opcodes. +++ */ +++ static final byte[] TYPE; +++ +++ /** +++ * The type of CONSTANT_Class constant pool items. +++ */ +++ static final int CLASS = 7; +++ +++ /** +++ * The type of CONSTANT_Fieldref constant pool items. +++ */ +++ static final int FIELD = 9; +++ +++ /** +++ * The type of CONSTANT_Methodref constant pool items. +++ */ +++ static final int METH = 10; +++ +++ /** +++ * The type of CONSTANT_InterfaceMethodref constant pool items. +++ */ +++ static final int IMETH = 11; +++ +++ /** +++ * The type of CONSTANT_String constant pool items. +++ */ +++ static final int STR = 8; +++ +++ /** +++ * The type of CONSTANT_Integer constant pool items. +++ */ +++ static final int INT = 3; +++ +++ /** +++ * The type of CONSTANT_Float constant pool items. +++ */ +++ static final int FLOAT = 4; +++ +++ /** +++ * The type of CONSTANT_Long constant pool items. +++ */ +++ static final int LONG = 5; +++ +++ /** +++ * The type of CONSTANT_Double constant pool items. +++ */ +++ static final int DOUBLE = 6; +++ +++ /** +++ * The type of CONSTANT_NameAndType constant pool items. +++ */ +++ static final int NAME_TYPE = 12; +++ +++ /** +++ * The type of CONSTANT_Utf8 constant pool items. +++ */ +++ static final int UTF8 = 1; +++ +++ /** +++ * The type of CONSTANT_MethodType constant pool items. +++ */ +++ static final int MTYPE = 16; +++ +++ /** +++ * The type of CONSTANT_MethodHandle constant pool items. +++ */ +++ static final int HANDLE = 15; +++ +++ /** +++ * The type of CONSTANT_InvokeDynamic constant pool items. +++ */ +++ static final int INDY = 18; +++ +++ /** +++ * The base value for all CONSTANT_MethodHandle constant pool items. +++ * Internally, ASM store the 9 variations of CONSTANT_MethodHandle into 9 +++ * different items. +++ */ +++ static final int HANDLE_BASE = 20; +++ +++ /** +++ * Normal type Item stored in the ClassWriter {@link ClassWriter#typeTable}, +++ * instead of the constant pool, in order to avoid clashes with normal +++ * constant pool items in the ClassWriter constant pool's hash table. +++ */ +++ static final int TYPE_NORMAL = 30; +++ +++ /** +++ * Uninitialized type Item stored in the ClassWriter +++ * {@link ClassWriter#typeTable}, instead of the constant pool, in order to +++ * avoid clashes with normal constant pool items in the ClassWriter constant +++ * pool's hash table. +++ */ +++ static final int TYPE_UNINIT = 31; +++ +++ /** +++ * Merged type Item stored in the ClassWriter {@link ClassWriter#typeTable}, +++ * instead of the constant pool, in order to avoid clashes with normal +++ * constant pool items in the ClassWriter constant pool's hash table. +++ */ +++ static final int TYPE_MERGED = 32; +++ +++ /** +++ * The type of BootstrapMethods items. These items are stored in a special +++ * class attribute named BootstrapMethods and not in the constant pool. +++ */ +++ static final int BSM = 33; +++ +++ /** +++ * The class reader from which this class writer was constructed, if any. +++ */ +++ ClassReader cr; +++ +++ /** +++ * Minor and major version numbers of the class to be generated. +++ */ +++ int version; +++ +++ /** +++ * Index of the next item to be added in the constant pool. +++ */ +++ int index; +++ +++ /** +++ * The constant pool of this class. +++ */ +++ final ByteVector pool; +++ +++ /** +++ * The constant pool's hash table data. +++ */ +++ Item[] items; +++ +++ /** +++ * The threshold of the constant pool's hash table. +++ */ +++ int threshold; +++ +++ /** +++ * A reusable key used to look for items in the {@link #items} hash table. +++ */ +++ final Item key; +++ +++ /** +++ * A reusable key used to look for items in the {@link #items} hash table. +++ */ +++ final Item key2; +++ +++ /** +++ * A reusable key used to look for items in the {@link #items} hash table. +++ */ +++ final Item key3; +++ +++ /** +++ * A reusable key used to look for items in the {@link #items} hash table. +++ */ +++ final Item key4; +++ +++ /** +++ * A type table used to temporarily store internal names that will not +++ * necessarily be stored in the constant pool. This type table is used by +++ * the control flow and data flow analysis algorithm used to compute stack +++ * map frames from scratch. This array associates to each index i +++ * the Item whose index is i. All Item objects stored in this array +++ * are also stored in the {@link #items} hash table. These two arrays allow +++ * to retrieve an Item from its index or, conversely, to get the index of an +++ * Item from its value. Each Item stores an internal name in its +++ * {@link Item#strVal1} field. +++ */ +++ Item[] typeTable; +++ +++ /** +++ * Number of elements in the {@link #typeTable} array. +++ */ +++ private short typeCount; +++ +++ /** +++ * The access flags of this class. +++ */ +++ private int access; +++ +++ /** +++ * The constant pool item that contains the internal name of this class. +++ */ +++ private int name; +++ +++ /** +++ * The internal name of this class. +++ */ +++ String thisName; +++ +++ /** +++ * The constant pool item that contains the signature of this class. +++ */ +++ private int signature; +++ +++ /** +++ * The constant pool item that contains the internal name of the super class +++ * of this class. +++ */ +++ private int superName; +++ +++ /** +++ * Number of interfaces implemented or extended by this class or interface. +++ */ +++ private int interfaceCount; +++ +++ /** +++ * The interfaces implemented or extended by this class or interface. More +++ * precisely, this array contains the indexes of the constant pool items +++ * that contain the internal names of these interfaces. +++ */ +++ private int[] interfaces; +++ +++ /** +++ * The index of the constant pool item that contains the name of the source +++ * file from which this class was compiled. +++ */ +++ private int sourceFile; +++ +++ /** +++ * The SourceDebug attribute of this class. +++ */ +++ private ByteVector sourceDebug; +++ +++ /** +++ * The constant pool item that contains the name of the enclosing class of +++ * this class. +++ */ +++ private int enclosingMethodOwner; +++ +++ /** +++ * The constant pool item that contains the name and descriptor of the +++ * enclosing method of this class. +++ */ +++ private int enclosingMethod; +++ +++ /** +++ * The runtime visible annotations of this class. +++ */ +++ private AnnotationWriter anns; +++ +++ /** +++ * The runtime invisible annotations of this class. +++ */ +++ private AnnotationWriter ianns; +++ +++ /** +++ * The runtime visible type annotations of this class. +++ */ +++ private AnnotationWriter tanns; +++ +++ /** +++ * The runtime invisible type annotations of this class. +++ */ +++ private AnnotationWriter itanns; +++ +++ /** +++ * The non standard attributes of this class. +++ */ +++ private Attribute attrs; +++ +++ /** +++ * The number of entries in the InnerClasses attribute. +++ */ +++ private int innerClassesCount; +++ +++ /** +++ * The InnerClasses attribute. +++ */ +++ private ByteVector innerClasses; +++ +++ /** +++ * The number of entries in the BootstrapMethods attribute. +++ */ +++ int bootstrapMethodsCount; +++ +++ /** +++ * The BootstrapMethods attribute. +++ */ +++ ByteVector bootstrapMethods; +++ +++ /** +++ * The fields of this class. These fields are stored in a linked list of +++ * {@link FieldWriter} objects, linked to each other by their +++ * {@link FieldWriter#fv} field. This field stores the first element of this +++ * list. +++ */ +++ FieldWriter firstField; +++ +++ /** +++ * The fields of this class. These fields are stored in a linked list of +++ * {@link FieldWriter} objects, linked to each other by their +++ * {@link FieldWriter#fv} field. This field stores the last element of this +++ * list. +++ */ +++ FieldWriter lastField; +++ +++ /** +++ * The methods of this class. These methods are stored in a linked list of +++ * {@link MethodWriter} objects, linked to each other by their +++ * {@link MethodWriter#mv} field. This field stores the first element of +++ * this list. +++ */ +++ MethodWriter firstMethod; +++ +++ /** +++ * The methods of this class. These methods are stored in a linked list of +++ * {@link MethodWriter} objects, linked to each other by their +++ * {@link MethodWriter#mv} field. This field stores the last element of this +++ * list. +++ */ +++ MethodWriter lastMethod; +++ +++ /** +++ * true if the maximum stack size and number of local variables +++ * must be automatically computed. +++ */ +++ private boolean computeMaxs; +++ +++ /** +++ * true if the stack map frames must be recomputed from scratch. +++ */ +++ private boolean computeFrames; +++ +++ /** +++ * true if the stack map tables of this class are invalid. The +++ * {@link MethodWriter#resizeInstructions} method cannot transform existing +++ * stack map tables, and so produces potentially invalid classes when it is +++ * executed. In this case the class is reread and rewritten with the +++ * {@link #COMPUTE_FRAMES} option (the resizeInstructions method can resize +++ * stack map tables when this option is used). +++ */ +++ boolean invalidFrames; +++ +++ // ------------------------------------------------------------------------ +++ // Static initializer +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Computes the instruction types of JVM opcodes. +++ */ +++ static { +++ int i; +++ byte[] b = new byte[220]; +++ String s = "AAAAAAAAAAAAAAAABCLMMDDDDDEEEEEEEEEEEEEEEEEEEEAAAAAAAADD" +++ + "DDDEEEEEEEEEEEEEEEEEEEEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" +++ + "AAAAAAAAAAAAAAAAANAAAAAAAAAAAAAAAAAAAAJJJJJJJJJJJJJJJJDOPAA" +++ + "AAAAGGGGGGGHIFBFAAFFAARQJJKKJJJJJJJJJJJJJJJJJJ"; +++ for (i = 0; i < b.length; ++i) { +++ b[i] = (byte) (s.charAt(i) - 'A'); +++ } +++ TYPE = b; +++ +++ // code to generate the above string +++ // +++ // // SBYTE_INSN instructions +++ // b[Constants.NEWARRAY] = SBYTE_INSN; +++ // b[Constants.BIPUSH] = SBYTE_INSN; +++ // +++ // // SHORT_INSN instructions +++ // b[Constants.SIPUSH] = SHORT_INSN; +++ // +++ // // (IMPL)VAR_INSN instructions +++ // b[Constants.RET] = VAR_INSN; +++ // for (i = Constants.ILOAD; i <= Constants.ALOAD; ++i) { +++ // b[i] = VAR_INSN; +++ // } +++ // for (i = Constants.ISTORE; i <= Constants.ASTORE; ++i) { +++ // b[i] = VAR_INSN; +++ // } +++ // for (i = 26; i <= 45; ++i) { // ILOAD_0 to ALOAD_3 +++ // b[i] = IMPLVAR_INSN; +++ // } +++ // for (i = 59; i <= 78; ++i) { // ISTORE_0 to ASTORE_3 +++ // b[i] = IMPLVAR_INSN; +++ // } +++ // +++ // // TYPE_INSN instructions +++ // b[Constants.NEW] = TYPE_INSN; +++ // b[Constants.ANEWARRAY] = TYPE_INSN; +++ // b[Constants.CHECKCAST] = TYPE_INSN; +++ // b[Constants.INSTANCEOF] = TYPE_INSN; +++ // +++ // // (Set)FIELDORMETH_INSN instructions +++ // for (i = Constants.GETSTATIC; i <= Constants.INVOKESTATIC; ++i) { +++ // b[i] = FIELDORMETH_INSN; +++ // } +++ // b[Constants.INVOKEINTERFACE] = ITFMETH_INSN; +++ // b[Constants.INVOKEDYNAMIC] = INDYMETH_INSN; +++ // +++ // // LABEL(W)_INSN instructions +++ // for (i = Constants.IFEQ; i <= Constants.JSR; ++i) { +++ // b[i] = LABEL_INSN; +++ // } +++ // b[Constants.IFNULL] = LABEL_INSN; +++ // b[Constants.IFNONNULL] = LABEL_INSN; +++ // b[200] = LABELW_INSN; // GOTO_W +++ // b[201] = LABELW_INSN; // JSR_W +++ // // temporary opcodes used internally by ASM - see Label and +++ // MethodWriter +++ // for (i = 202; i < 220; ++i) { +++ // b[i] = LABEL_INSN; +++ // } +++ // +++ // // LDC(_W) instructions +++ // b[Constants.LDC] = LDC_INSN; +++ // b[19] = LDCW_INSN; // LDC_W +++ // b[20] = LDCW_INSN; // LDC2_W +++ // +++ // // special instructions +++ // b[Constants.IINC] = IINC_INSN; +++ // b[Constants.TABLESWITCH] = TABL_INSN; +++ // b[Constants.LOOKUPSWITCH] = LOOK_INSN; +++ // b[Constants.MULTIANEWARRAY] = MANA_INSN; +++ // b[196] = WIDE_INSN; // WIDE +++ // +++ // for (i = 0; i < b.length; ++i) { +++ // System.err.print((char)('A' + b[i])); +++ // } +++ // System.err.println(); +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Constructor +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Constructs a new {@link ClassWriter} object. +++ * +++ * @param flags +++ * option flags that can be used to modify the default behavior +++ * of this class. See {@link #COMPUTE_MAXS}, +++ * {@link #COMPUTE_FRAMES}. +++ */ +++ public ClassWriter(final int flags) { +++ super(Opcodes.ASM5); +++ index = 1; +++ pool = new ByteVector(); +++ items = new Item[256]; +++ threshold = (int) (0.75d * items.length); +++ key = new Item(); +++ key2 = new Item(); +++ key3 = new Item(); +++ key4 = new Item(); +++ this.computeMaxs = (flags & COMPUTE_MAXS) != 0; +++ this.computeFrames = (flags & COMPUTE_FRAMES) != 0; +++ } +++ +++ /** +++ * Constructs a new {@link ClassWriter} object and enables optimizations for +++ * "mostly add" bytecode transformations. These optimizations are the +++ * following: +++ * +++ *

    +++ *
  • The constant pool from the original class is copied as is in the new +++ * class, which saves time. New constant pool entries will be added at the +++ * end if necessary, but unused constant pool entries won't be +++ * removed.
  • +++ *
  • Methods that are not transformed are copied as is in the new class, +++ * directly from the original class bytecode (i.e. without emitting visit +++ * events for all the method instructions), which saves a lot of +++ * time. Untransformed methods are detected by the fact that the +++ * {@link ClassReader} receives {@link MethodVisitor} objects that come from +++ * a {@link ClassWriter} (and not from any other {@link ClassVisitor} +++ * instance).
  • +++ *
+++ * +++ * @param classReader +++ * the {@link ClassReader} used to read the original class. It +++ * will be used to copy the entire constant pool from the +++ * original class and also to copy other fragments of original +++ * bytecode where applicable. +++ * @param flags +++ * option flags that can be used to modify the default behavior +++ * of this class. These option flags do not affect methods +++ * that are copied as is in the new class. This means that the +++ * maximum stack size nor the stack frames will be computed for +++ * these methods. See {@link #COMPUTE_MAXS}, +++ * {@link #COMPUTE_FRAMES}. +++ */ +++ public ClassWriter(final ClassReader classReader, final int flags) { +++ this(flags); +++ classReader.copyPool(this); +++ this.cr = classReader; +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Implementation of the ClassVisitor abstract class +++ // ------------------------------------------------------------------------ +++ +++ @Override +++ public final void visit(final int version, final int access, +++ final String name, final String signature, final String superName, +++ final String[] interfaces) { +++ this.version = version; +++ this.access = access; +++ this.name = newClass(name); +++ thisName = name; +++ if (ClassReader.SIGNATURES && signature != null) { +++ this.signature = newUTF8(signature); +++ } +++ this.superName = superName == null ? 0 : newClass(superName); +++ if (interfaces != null && interfaces.length > 0) { +++ interfaceCount = interfaces.length; +++ this.interfaces = new int[interfaceCount]; +++ for (int i = 0; i < interfaceCount; ++i) { +++ this.interfaces[i] = newClass(interfaces[i]); +++ } +++ } +++ } +++ +++ @Override +++ public final void visitSource(final String file, final String debug) { +++ if (file != null) { +++ sourceFile = newUTF8(file); +++ } +++ if (debug != null) { +++ sourceDebug = new ByteVector().encodeUTF8(debug, 0, +++ Integer.MAX_VALUE); +++ } +++ } +++ +++ @Override +++ public final void visitOuterClass(final String owner, final String name, +++ final String desc) { +++ enclosingMethodOwner = newClass(owner); +++ if (name != null && desc != null) { +++ enclosingMethod = newNameType(name, desc); +++ } +++ } +++ +++ @Override +++ public final AnnotationVisitor visitAnnotation(final String desc, +++ final boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write type, and reserve space for values count +++ bv.putShort(newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(this, true, bv, bv, 2); +++ if (visible) { +++ aw.next = anns; +++ anns = aw; +++ } else { +++ aw.next = ianns; +++ ianns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public final AnnotationVisitor visitTypeAnnotation(int typeRef, +++ TypePath typePath, final String desc, final boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write target_type and target_info +++ AnnotationWriter.putTarget(typeRef, typePath, bv); +++ // write type, and reserve space for values count +++ bv.putShort(newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(this, true, bv, bv, +++ bv.length - 2); +++ if (visible) { +++ aw.next = tanns; +++ tanns = aw; +++ } else { +++ aw.next = itanns; +++ itanns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public final void visitAttribute(final Attribute attr) { +++ attr.next = attrs; +++ attrs = attr; +++ } +++ +++ @Override +++ public final void visitInnerClass(final String name, +++ final String outerName, final String innerName, final int access) { +++ if (innerClasses == null) { +++ innerClasses = new ByteVector(); +++ } +++ // Sec. 4.7.6 of the JVMS states "Every CONSTANT_Class_info entry in the +++ // constant_pool table which represents a class or interface C that is +++ // not a package member must have exactly one corresponding entry in the +++ // classes array". To avoid duplicates we keep track in the intVal field +++ // of the Item of each CONSTANT_Class_info entry C whether an inner +++ // class entry has already been added for C (this field is unused for +++ // class entries, and changing its value does not change the hashcode +++ // and equality tests). If so we store the index of this inner class +++ // entry (plus one) in intVal. This hack allows duplicate detection in +++ // O(1) time. +++ Item nameItem = newClassItem(name); +++ if (nameItem.intVal == 0) { +++ ++innerClassesCount; +++ innerClasses.putShort(nameItem.index); +++ innerClasses.putShort(outerName == null ? 0 : newClass(outerName)); +++ innerClasses.putShort(innerName == null ? 0 : newUTF8(innerName)); +++ innerClasses.putShort(access); +++ nameItem.intVal = innerClassesCount; +++ } else { +++ // Compare the inner classes entry nameItem.intVal - 1 with the +++ // arguments of this method and throw an exception if there is a +++ // difference? +++ } +++ } +++ +++ @Override +++ public final FieldVisitor visitField(final int access, final String name, +++ final String desc, final String signature, final Object value) { +++ return new FieldWriter(this, access, name, desc, signature, value); +++ } +++ +++ @Override +++ public final MethodVisitor visitMethod(final int access, final String name, +++ final String desc, final String signature, final String[] exceptions) { +++ return new MethodWriter(this, access, name, desc, signature, +++ exceptions, computeMaxs, computeFrames); +++ } +++ +++ @Override +++ public final void visitEnd() { +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Other public methods +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the bytecode of the class that was build with this class writer. +++ * +++ * @return the bytecode of the class that was build with this class writer. +++ */ +++ public byte[] toByteArray() { +++ if (index > 0xFFFF) { +++ throw new RuntimeException("Class file too large!"); +++ } +++ // computes the real size of the bytecode of this class +++ int size = 24 + 2 * interfaceCount; +++ int nbFields = 0; +++ FieldWriter fb = firstField; +++ while (fb != null) { +++ ++nbFields; +++ size += fb.getSize(); +++ fb = (FieldWriter) fb.fv; +++ } +++ int nbMethods = 0; +++ MethodWriter mb = firstMethod; +++ while (mb != null) { +++ ++nbMethods; +++ size += mb.getSize(); +++ mb = (MethodWriter) mb.mv; +++ } +++ int attributeCount = 0; +++ if (bootstrapMethods != null) { +++ // we put it as first attribute in order to improve a bit +++ // ClassReader.copyBootstrapMethods +++ ++attributeCount; +++ size += 8 + bootstrapMethods.length; +++ newUTF8("BootstrapMethods"); +++ } +++ if (ClassReader.SIGNATURES && signature != 0) { +++ ++attributeCount; +++ size += 8; +++ newUTF8("Signature"); +++ } +++ if (sourceFile != 0) { +++ ++attributeCount; +++ size += 8; +++ newUTF8("SourceFile"); +++ } +++ if (sourceDebug != null) { +++ ++attributeCount; +++ size += sourceDebug.length + 6; +++ newUTF8("SourceDebugExtension"); +++ } +++ if (enclosingMethodOwner != 0) { +++ ++attributeCount; +++ size += 10; +++ newUTF8("EnclosingMethod"); +++ } +++ if ((access & Opcodes.ACC_DEPRECATED) != 0) { +++ ++attributeCount; +++ size += 6; +++ newUTF8("Deprecated"); +++ } +++ if ((access & Opcodes.ACC_SYNTHETIC) != 0) { +++ if ((version & 0xFFFF) < Opcodes.V1_5 +++ || (access & ACC_SYNTHETIC_ATTRIBUTE) != 0) { +++ ++attributeCount; +++ size += 6; +++ newUTF8("Synthetic"); +++ } +++ } +++ if (innerClasses != null) { +++ ++attributeCount; +++ size += 8 + innerClasses.length; +++ newUTF8("InnerClasses"); +++ } +++ if (ClassReader.ANNOTATIONS && anns != null) { +++ ++attributeCount; +++ size += 8 + anns.getSize(); +++ newUTF8("RuntimeVisibleAnnotations"); +++ } +++ if (ClassReader.ANNOTATIONS && ianns != null) { +++ ++attributeCount; +++ size += 8 + ianns.getSize(); +++ newUTF8("RuntimeInvisibleAnnotations"); +++ } +++ if (ClassReader.ANNOTATIONS && tanns != null) { +++ ++attributeCount; +++ size += 8 + tanns.getSize(); +++ newUTF8("RuntimeVisibleTypeAnnotations"); +++ } +++ if (ClassReader.ANNOTATIONS && itanns != null) { +++ ++attributeCount; +++ size += 8 + itanns.getSize(); +++ newUTF8("RuntimeInvisibleTypeAnnotations"); +++ } +++ if (attrs != null) { +++ attributeCount += attrs.getCount(); +++ size += attrs.getSize(this, null, 0, -1, -1); +++ } +++ size += pool.length; +++ // allocates a byte vector of this size, in order to avoid unnecessary +++ // arraycopy operations in the ByteVector.enlarge() method +++ ByteVector out = new ByteVector(size); +++ out.putInt(0xCAFEBABE).putInt(version); +++ out.putShort(index).putByteArray(pool.data, 0, pool.length); +++ int mask = Opcodes.ACC_DEPRECATED | ACC_SYNTHETIC_ATTRIBUTE +++ | ((access & ACC_SYNTHETIC_ATTRIBUTE) / TO_ACC_SYNTHETIC); +++ out.putShort(access & ~mask).putShort(name).putShort(superName); +++ out.putShort(interfaceCount); +++ for (int i = 0; i < interfaceCount; ++i) { +++ out.putShort(interfaces[i]); +++ } +++ out.putShort(nbFields); +++ fb = firstField; +++ while (fb != null) { +++ fb.put(out); +++ fb = (FieldWriter) fb.fv; +++ } +++ out.putShort(nbMethods); +++ mb = firstMethod; +++ while (mb != null) { +++ mb.put(out); +++ mb = (MethodWriter) mb.mv; +++ } +++ out.putShort(attributeCount); +++ if (bootstrapMethods != null) { +++ out.putShort(newUTF8("BootstrapMethods")); +++ out.putInt(bootstrapMethods.length + 2).putShort( +++ bootstrapMethodsCount); +++ out.putByteArray(bootstrapMethods.data, 0, bootstrapMethods.length); +++ } +++ if (ClassReader.SIGNATURES && signature != 0) { +++ out.putShort(newUTF8("Signature")).putInt(2).putShort(signature); +++ } +++ if (sourceFile != 0) { +++ out.putShort(newUTF8("SourceFile")).putInt(2).putShort(sourceFile); +++ } +++ if (sourceDebug != null) { +++ int len = sourceDebug.length; +++ out.putShort(newUTF8("SourceDebugExtension")).putInt(len); +++ out.putByteArray(sourceDebug.data, 0, len); +++ } +++ if (enclosingMethodOwner != 0) { +++ out.putShort(newUTF8("EnclosingMethod")).putInt(4); +++ out.putShort(enclosingMethodOwner).putShort(enclosingMethod); +++ } +++ if ((access & Opcodes.ACC_DEPRECATED) != 0) { +++ out.putShort(newUTF8("Deprecated")).putInt(0); +++ } +++ if ((access & Opcodes.ACC_SYNTHETIC) != 0) { +++ if ((version & 0xFFFF) < Opcodes.V1_5 +++ || (access & ACC_SYNTHETIC_ATTRIBUTE) != 0) { +++ out.putShort(newUTF8("Synthetic")).putInt(0); +++ } +++ } +++ if (innerClasses != null) { +++ out.putShort(newUTF8("InnerClasses")); +++ out.putInt(innerClasses.length + 2).putShort(innerClassesCount); +++ out.putByteArray(innerClasses.data, 0, innerClasses.length); +++ } +++ if (ClassReader.ANNOTATIONS && anns != null) { +++ out.putShort(newUTF8("RuntimeVisibleAnnotations")); +++ anns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && ianns != null) { +++ out.putShort(newUTF8("RuntimeInvisibleAnnotations")); +++ ianns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && tanns != null) { +++ out.putShort(newUTF8("RuntimeVisibleTypeAnnotations")); +++ tanns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && itanns != null) { +++ out.putShort(newUTF8("RuntimeInvisibleTypeAnnotations")); +++ itanns.put(out); +++ } +++ if (attrs != null) { +++ attrs.put(this, null, 0, -1, -1, out); +++ } +++ if (invalidFrames) { +++ anns = null; +++ ianns = null; +++ attrs = null; +++ innerClassesCount = 0; +++ innerClasses = null; +++ bootstrapMethodsCount = 0; +++ bootstrapMethods = null; +++ firstField = null; +++ lastField = null; +++ firstMethod = null; +++ lastMethod = null; +++ computeMaxs = false; +++ computeFrames = true; +++ invalidFrames = false; +++ new ClassReader(out.data).accept(this, ClassReader.SKIP_FRAMES); +++ return toByteArray(); +++ } +++ return out.data; +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Utility methods: constant pool management +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Adds a number or string constant to the constant pool of the class being +++ * build. Does nothing if the constant pool already contains a similar item. +++ * +++ * @param cst +++ * the value of the constant to be added to the constant pool. +++ * This parameter must be an {@link Integer}, a {@link Float}, a +++ * {@link Long}, a {@link Double}, a {@link String} or a +++ * {@link Type}. +++ * @return a new or already existing constant item with the given value. +++ */ +++ Item newConstItem(final Object cst) { +++ if (cst instanceof Integer) { +++ int val = ((Integer) cst).intValue(); +++ return newInteger(val); +++ } else if (cst instanceof Byte) { +++ int val = ((Byte) cst).intValue(); +++ return newInteger(val); +++ } else if (cst instanceof Character) { +++ int val = ((Character) cst).charValue(); +++ return newInteger(val); +++ } else if (cst instanceof Short) { +++ int val = ((Short) cst).intValue(); +++ return newInteger(val); +++ } else if (cst instanceof Boolean) { +++ int val = ((Boolean) cst).booleanValue() ? 1 : 0; +++ return newInteger(val); +++ } else if (cst instanceof Float) { +++ float val = ((Float) cst).floatValue(); +++ return newFloat(val); +++ } else if (cst instanceof Long) { +++ long val = ((Long) cst).longValue(); +++ return newLong(val); +++ } else if (cst instanceof Double) { +++ double val = ((Double) cst).doubleValue(); +++ return newDouble(val); +++ } else if (cst instanceof String) { +++ return newString((String) cst); +++ } else if (cst instanceof Type) { +++ Type t = (Type) cst; +++ int s = t.getSort(); +++ if (s == Type.OBJECT) { +++ return newClassItem(t.getInternalName()); +++ } else if (s == Type.METHOD) { +++ return newMethodTypeItem(t.getDescriptor()); +++ } else { // s == primitive type or array +++ return newClassItem(t.getDescriptor()); +++ } +++ } else if (cst instanceof Handle) { +++ Handle h = (Handle) cst; +++ return newHandleItem(h.tag, h.owner, h.name, h.desc); +++ } else { +++ throw new IllegalArgumentException("value " + cst); +++ } +++ } +++ +++ /** +++ * Adds a number or string constant to the constant pool of the class being +++ * build. Does nothing if the constant pool already contains a similar item. +++ * This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters. +++ * +++ * @param cst +++ * the value of the constant to be added to the constant pool. +++ * This parameter must be an {@link Integer}, a {@link Float}, a +++ * {@link Long}, a {@link Double} or a {@link String}. +++ * @return the index of a new or already existing constant item with the +++ * given value. +++ */ +++ public int newConst(final Object cst) { +++ return newConstItem(cst).index; +++ } +++ +++ /** +++ * Adds an UTF8 string to the constant pool of the class being build. Does +++ * nothing if the constant pool already contains a similar item. This +++ * method is intended for {@link Attribute} sub classes, and is normally not +++ * needed by class generators or adapters. +++ * +++ * @param value +++ * the String value. +++ * @return the index of a new or already existing UTF8 item. +++ */ +++ public int newUTF8(final String value) { +++ key.set(UTF8, value, null, null); +++ Item result = get(key); +++ if (result == null) { +++ pool.putByte(UTF8).putUTF8(value); +++ result = new Item(index++, key); +++ put(result); +++ } +++ return result.index; +++ } +++ +++ /** +++ * Adds a class reference to the constant pool of the class being build. +++ * Does nothing if the constant pool already contains a similar item. +++ * This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters. +++ * +++ * @param value +++ * the internal name of the class. +++ * @return a new or already existing class reference item. +++ */ +++ Item newClassItem(final String value) { +++ key2.set(CLASS, value, null, null); +++ Item result = get(key2); +++ if (result == null) { +++ pool.put12(CLASS, newUTF8(value)); +++ result = new Item(index++, key2); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a class reference to the constant pool of the class being build. +++ * Does nothing if the constant pool already contains a similar item. +++ * This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters. +++ * +++ * @param value +++ * the internal name of the class. +++ * @return the index of a new or already existing class reference item. +++ */ +++ public int newClass(final String value) { +++ return newClassItem(value).index; +++ } +++ +++ /** +++ * Adds a method type reference to the constant pool of the class being +++ * build. Does nothing if the constant pool already contains a similar item. +++ * This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters. +++ * +++ * @param methodDesc +++ * method descriptor of the method type. +++ * @return a new or already existing method type reference item. +++ */ +++ Item newMethodTypeItem(final String methodDesc) { +++ key2.set(MTYPE, methodDesc, null, null); +++ Item result = get(key2); +++ if (result == null) { +++ pool.put12(MTYPE, newUTF8(methodDesc)); +++ result = new Item(index++, key2); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a method type reference to the constant pool of the class being +++ * build. Does nothing if the constant pool already contains a similar item. +++ * This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters. +++ * +++ * @param methodDesc +++ * method descriptor of the method type. +++ * @return the index of a new or already existing method type reference +++ * item. +++ */ +++ public int newMethodType(final String methodDesc) { +++ return newMethodTypeItem(methodDesc).index; +++ } +++ +++ /** +++ * Adds a handle to the constant pool of the class being build. Does nothing +++ * if the constant pool already contains a similar item. This method is +++ * intended for {@link Attribute} sub classes, and is normally not needed by +++ * class generators or adapters. +++ * +++ * @param tag +++ * the kind of this handle. Must be {@link Opcodes#H_GETFIELD}, +++ * {@link Opcodes#H_GETSTATIC}, {@link Opcodes#H_PUTFIELD}, +++ * {@link Opcodes#H_PUTSTATIC}, {@link Opcodes#H_INVOKEVIRTUAL}, +++ * {@link Opcodes#H_INVOKESTATIC}, +++ * {@link Opcodes#H_INVOKESPECIAL}, +++ * {@link Opcodes#H_NEWINVOKESPECIAL} or +++ * {@link Opcodes#H_INVOKEINTERFACE}. +++ * @param owner +++ * the internal name of the field or method owner class. +++ * @param name +++ * the name of the field or method. +++ * @param desc +++ * the descriptor of the field or method. +++ * @return a new or an already existing method type reference item. +++ */ +++ Item newHandleItem(final int tag, final String owner, final String name, +++ final String desc) { +++ key4.set(HANDLE_BASE + tag, owner, name, desc); +++ Item result = get(key4); +++ if (result == null) { +++ if (tag <= Opcodes.H_PUTSTATIC) { +++ put112(HANDLE, tag, newField(owner, name, desc)); +++ } else { +++ put112(HANDLE, +++ tag, +++ newMethod(owner, name, desc, +++ tag == Opcodes.H_INVOKEINTERFACE)); +++ } +++ result = new Item(index++, key4); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a handle to the constant pool of the class being build. Does nothing +++ * if the constant pool already contains a similar item. This method is +++ * intended for {@link Attribute} sub classes, and is normally not needed by +++ * class generators or adapters. +++ * +++ * @param tag +++ * the kind of this handle. Must be {@link Opcodes#H_GETFIELD}, +++ * {@link Opcodes#H_GETSTATIC}, {@link Opcodes#H_PUTFIELD}, +++ * {@link Opcodes#H_PUTSTATIC}, {@link Opcodes#H_INVOKEVIRTUAL}, +++ * {@link Opcodes#H_INVOKESTATIC}, +++ * {@link Opcodes#H_INVOKESPECIAL}, +++ * {@link Opcodes#H_NEWINVOKESPECIAL} or +++ * {@link Opcodes#H_INVOKEINTERFACE}. +++ * @param owner +++ * the internal name of the field or method owner class. +++ * @param name +++ * the name of the field or method. +++ * @param desc +++ * the descriptor of the field or method. +++ * @return the index of a new or already existing method type reference +++ * item. +++ */ +++ public int newHandle(final int tag, final String owner, final String name, +++ final String desc) { +++ return newHandleItem(tag, owner, name, desc).index; +++ } +++ +++ /** +++ * Adds an invokedynamic reference to the constant pool of the class being +++ * build. Does nothing if the constant pool already contains a similar item. +++ * This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters. +++ * +++ * @param name +++ * name of the invoked method. +++ * @param desc +++ * descriptor of the invoke method. +++ * @param bsm +++ * the bootstrap method. +++ * @param bsmArgs +++ * the bootstrap method constant arguments. +++ * +++ * @return a new or an already existing invokedynamic type reference item. +++ */ +++ Item newInvokeDynamicItem(final String name, final String desc, +++ final Handle bsm, final Object... bsmArgs) { +++ // cache for performance +++ ByteVector bootstrapMethods = this.bootstrapMethods; +++ if (bootstrapMethods == null) { +++ bootstrapMethods = this.bootstrapMethods = new ByteVector(); +++ } +++ +++ int position = bootstrapMethods.length; // record current position +++ +++ int hashCode = bsm.hashCode(); +++ bootstrapMethods.putShort(newHandle(bsm.tag, bsm.owner, bsm.name, +++ bsm.desc)); +++ +++ int argsLength = bsmArgs.length; +++ bootstrapMethods.putShort(argsLength); +++ +++ for (int i = 0; i < argsLength; i++) { +++ Object bsmArg = bsmArgs[i]; +++ hashCode ^= bsmArg.hashCode(); +++ bootstrapMethods.putShort(newConst(bsmArg)); +++ } +++ +++ byte[] data = bootstrapMethods.data; +++ int length = (1 + 1 + argsLength) << 1; // (bsm + argCount + arguments) +++ hashCode &= 0x7FFFFFFF; +++ Item result = items[hashCode % items.length]; +++ loop: while (result != null) { +++ if (result.type != BSM || result.hashCode != hashCode) { +++ result = result.next; +++ continue; +++ } +++ +++ // because the data encode the size of the argument +++ // we don't need to test if these size are equals +++ int resultPosition = result.intVal; +++ for (int p = 0; p < length; p++) { +++ if (data[position + p] != data[resultPosition + p]) { +++ result = result.next; +++ continue loop; +++ } +++ } +++ break; +++ } +++ +++ int bootstrapMethodIndex; +++ if (result != null) { +++ bootstrapMethodIndex = result.index; +++ bootstrapMethods.length = position; // revert to old position +++ } else { +++ bootstrapMethodIndex = bootstrapMethodsCount++; +++ result = new Item(bootstrapMethodIndex); +++ result.set(position, hashCode); +++ put(result); +++ } +++ +++ // now, create the InvokeDynamic constant +++ key3.set(name, desc, bootstrapMethodIndex); +++ result = get(key3); +++ if (result == null) { +++ put122(INDY, bootstrapMethodIndex, newNameType(name, desc)); +++ result = new Item(index++, key3); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds an invokedynamic reference to the constant pool of the class being +++ * build. Does nothing if the constant pool already contains a similar item. +++ * This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters. +++ * +++ * @param name +++ * name of the invoked method. +++ * @param desc +++ * descriptor of the invoke method. +++ * @param bsm +++ * the bootstrap method. +++ * @param bsmArgs +++ * the bootstrap method constant arguments. +++ * +++ * @return the index of a new or already existing invokedynamic reference +++ * item. +++ */ +++ public int newInvokeDynamic(final String name, final String desc, +++ final Handle bsm, final Object... bsmArgs) { +++ return newInvokeDynamicItem(name, desc, bsm, bsmArgs).index; +++ } +++ +++ /** +++ * Adds a field reference to the constant pool of the class being build. +++ * Does nothing if the constant pool already contains a similar item. +++ * +++ * @param owner +++ * the internal name of the field's owner class. +++ * @param name +++ * the field's name. +++ * @param desc +++ * the field's descriptor. +++ * @return a new or already existing field reference item. +++ */ +++ Item newFieldItem(final String owner, final String name, final String desc) { +++ key3.set(FIELD, owner, name, desc); +++ Item result = get(key3); +++ if (result == null) { +++ put122(FIELD, newClass(owner), newNameType(name, desc)); +++ result = new Item(index++, key3); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a field reference to the constant pool of the class being build. +++ * Does nothing if the constant pool already contains a similar item. +++ * This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters. +++ * +++ * @param owner +++ * the internal name of the field's owner class. +++ * @param name +++ * the field's name. +++ * @param desc +++ * the field's descriptor. +++ * @return the index of a new or already existing field reference item. +++ */ +++ public int newField(final String owner, final String name, final String desc) { +++ return newFieldItem(owner, name, desc).index; +++ } +++ +++ /** +++ * Adds a method reference to the constant pool of the class being build. +++ * Does nothing if the constant pool already contains a similar item. +++ * +++ * @param owner +++ * the internal name of the method's owner class. +++ * @param name +++ * the method's name. +++ * @param desc +++ * the method's descriptor. +++ * @param itf +++ * true if owner is an interface. +++ * @return a new or already existing method reference item. +++ */ +++ Item newMethodItem(final String owner, final String name, +++ final String desc, final boolean itf) { +++ int type = itf ? IMETH : METH; +++ key3.set(type, owner, name, desc); +++ Item result = get(key3); +++ if (result == null) { +++ put122(type, newClass(owner), newNameType(name, desc)); +++ result = new Item(index++, key3); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a method reference to the constant pool of the class being build. +++ * Does nothing if the constant pool already contains a similar item. +++ * This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters. +++ * +++ * @param owner +++ * the internal name of the method's owner class. +++ * @param name +++ * the method's name. +++ * @param desc +++ * the method's descriptor. +++ * @param itf +++ * true if owner is an interface. +++ * @return the index of a new or already existing method reference item. +++ */ +++ public int newMethod(final String owner, final String name, +++ final String desc, final boolean itf) { +++ return newMethodItem(owner, name, desc, itf).index; +++ } +++ +++ /** +++ * Adds an integer to the constant pool of the class being build. Does +++ * nothing if the constant pool already contains a similar item. +++ * +++ * @param value +++ * the int value. +++ * @return a new or already existing int item. +++ */ +++ Item newInteger(final int value) { +++ key.set(value); +++ Item result = get(key); +++ if (result == null) { +++ pool.putByte(INT).putInt(value); +++ result = new Item(index++, key); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a float to the constant pool of the class being build. Does nothing +++ * if the constant pool already contains a similar item. +++ * +++ * @param value +++ * the float value. +++ * @return a new or already existing float item. +++ */ +++ Item newFloat(final float value) { +++ key.set(value); +++ Item result = get(key); +++ if (result == null) { +++ pool.putByte(FLOAT).putInt(key.intVal); +++ result = new Item(index++, key); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a long to the constant pool of the class being build. Does nothing +++ * if the constant pool already contains a similar item. +++ * +++ * @param value +++ * the long value. +++ * @return a new or already existing long item. +++ */ +++ Item newLong(final long value) { +++ key.set(value); +++ Item result = get(key); +++ if (result == null) { +++ pool.putByte(LONG).putLong(value); +++ result = new Item(index, key); +++ index += 2; +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a double to the constant pool of the class being build. Does nothing +++ * if the constant pool already contains a similar item. +++ * +++ * @param value +++ * the double value. +++ * @return a new or already existing double item. +++ */ +++ Item newDouble(final double value) { +++ key.set(value); +++ Item result = get(key); +++ if (result == null) { +++ pool.putByte(DOUBLE).putLong(key.longVal); +++ result = new Item(index, key); +++ index += 2; +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a string to the constant pool of the class being build. Does nothing +++ * if the constant pool already contains a similar item. +++ * +++ * @param value +++ * the String value. +++ * @return a new or already existing string item. +++ */ +++ private Item newString(final String value) { +++ key2.set(STR, value, null, null); +++ Item result = get(key2); +++ if (result == null) { +++ pool.put12(STR, newUTF8(value)); +++ result = new Item(index++, key2); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a name and type to the constant pool of the class being build. Does +++ * nothing if the constant pool already contains a similar item. This +++ * method is intended for {@link Attribute} sub classes, and is normally not +++ * needed by class generators or adapters. +++ * +++ * @param name +++ * a name. +++ * @param desc +++ * a type descriptor. +++ * @return the index of a new or already existing name and type item. +++ */ +++ public int newNameType(final String name, final String desc) { +++ return newNameTypeItem(name, desc).index; +++ } +++ +++ /** +++ * Adds a name and type to the constant pool of the class being build. Does +++ * nothing if the constant pool already contains a similar item. +++ * +++ * @param name +++ * a name. +++ * @param desc +++ * a type descriptor. +++ * @return a new or already existing name and type item. +++ */ +++ Item newNameTypeItem(final String name, final String desc) { +++ key2.set(NAME_TYPE, name, desc, null); +++ Item result = get(key2); +++ if (result == null) { +++ put122(NAME_TYPE, newUTF8(name), newUTF8(desc)); +++ result = new Item(index++, key2); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds the given internal name to {@link #typeTable} and returns its index. +++ * Does nothing if the type table already contains this internal name. +++ * +++ * @param type +++ * the internal name to be added to the type table. +++ * @return the index of this internal name in the type table. +++ */ +++ int addType(final String type) { +++ key.set(TYPE_NORMAL, type, null, null); +++ Item result = get(key); +++ if (result == null) { +++ result = addType(key); +++ } +++ return result.index; +++ } +++ +++ /** +++ * Adds the given "uninitialized" type to {@link #typeTable} and returns its +++ * index. This method is used for UNINITIALIZED types, made of an internal +++ * name and a bytecode offset. +++ * +++ * @param type +++ * the internal name to be added to the type table. +++ * @param offset +++ * the bytecode offset of the NEW instruction that created this +++ * UNINITIALIZED type value. +++ * @return the index of this internal name in the type table. +++ */ +++ int addUninitializedType(final String type, final int offset) { +++ key.type = TYPE_UNINIT; +++ key.intVal = offset; +++ key.strVal1 = type; +++ key.hashCode = 0x7FFFFFFF & (TYPE_UNINIT + type.hashCode() + offset); +++ Item result = get(key); +++ if (result == null) { +++ result = addType(key); +++ } +++ return result.index; +++ } +++ +++ /** +++ * Adds the given Item to {@link #typeTable}. +++ * +++ * @param item +++ * the value to be added to the type table. +++ * @return the added Item, which a new Item instance with the same value as +++ * the given Item. +++ */ +++ private Item addType(final Item item) { +++ ++typeCount; +++ Item result = new Item(typeCount, key); +++ put(result); +++ if (typeTable == null) { +++ typeTable = new Item[16]; +++ } +++ if (typeCount == typeTable.length) { +++ Item[] newTable = new Item[2 * typeTable.length]; +++ System.arraycopy(typeTable, 0, newTable, 0, typeTable.length); +++ typeTable = newTable; +++ } +++ typeTable[typeCount] = result; +++ return result; +++ } +++ +++ /** +++ * Returns the index of the common super type of the two given types. This +++ * method calls {@link #getCommonSuperClass} and caches the result in the +++ * {@link #items} hash table to speedup future calls with the same +++ * parameters. +++ * +++ * @param type1 +++ * index of an internal name in {@link #typeTable}. +++ * @param type2 +++ * index of an internal name in {@link #typeTable}. +++ * @return the index of the common super type of the two given types. +++ */ +++ int getMergedType(final int type1, final int type2) { +++ key2.type = TYPE_MERGED; +++ key2.longVal = type1 | (((long) type2) << 32); +++ key2.hashCode = 0x7FFFFFFF & (TYPE_MERGED + type1 + type2); +++ Item result = get(key2); +++ if (result == null) { +++ String t = typeTable[type1].strVal1; +++ String u = typeTable[type2].strVal1; +++ key2.intVal = addType(getCommonSuperClass(t, u)); +++ result = new Item((short) 0, key2); +++ put(result); +++ } +++ return result.intVal; +++ } +++ +++ /** +++ * Returns the common super type of the two given types. The default +++ * implementation of this method loads the two given classes and uses +++ * the java.lang.Class methods to find the common super class. It can be +++ * overridden to compute this common super type in other ways, in particular +++ * without actually loading any class, or to take into account the class +++ * that is currently being generated by this ClassWriter, which can of +++ * course not be loaded since it is under construction. +++ * +++ * @param type1 +++ * the internal name of a class. +++ * @param type2 +++ * the internal name of another class. +++ * @return the internal name of the common super class of the two given +++ * classes. +++ */ +++ protected String getCommonSuperClass(final String type1, final String type2) { +++ Class c, d; +++ ClassLoader classLoader = getClass().getClassLoader(); +++ try { +++ c = Class.forName(type1.replace('/', '.'), false, classLoader); +++ d = Class.forName(type2.replace('/', '.'), false, classLoader); +++ } catch (Exception e) { +++ throw new RuntimeException(e.toString()); +++ } +++ if (c.isAssignableFrom(d)) { +++ return type1; +++ } +++ if (d.isAssignableFrom(c)) { +++ return type2; +++ } +++ if (c.isInterface() || d.isInterface()) { +++ return "java/lang/Object"; +++ } else { +++ do { +++ c = c.getSuperclass(); +++ } while (!c.isAssignableFrom(d)); +++ return c.getName().replace('.', '/'); +++ } +++ } +++ +++ /** +++ * Returns the constant pool's hash table item which is equal to the given +++ * item. +++ * +++ * @param key +++ * a constant pool item. +++ * @return the constant pool's hash table item which is equal to the given +++ * item, or null if there is no such item. +++ */ +++ private Item get(final Item key) { +++ Item i = items[key.hashCode % items.length]; +++ while (i != null && (i.type != key.type || !key.isEqualTo(i))) { +++ i = i.next; +++ } +++ return i; +++ } +++ +++ /** +++ * Puts the given item in the constant pool's hash table. The hash table +++ * must not already contains this item. +++ * +++ * @param i +++ * the item to be added to the constant pool's hash table. +++ */ +++ private void put(final Item i) { +++ if (index + typeCount > threshold) { +++ int ll = items.length; +++ int nl = ll * 2 + 1; +++ Item[] newItems = new Item[nl]; +++ for (int l = ll - 1; l >= 0; --l) { +++ Item j = items[l]; +++ while (j != null) { +++ int index = j.hashCode % newItems.length; +++ Item k = j.next; +++ j.next = newItems[index]; +++ newItems[index] = j; +++ j = k; +++ } +++ } +++ items = newItems; +++ threshold = (int) (nl * 0.75); +++ } +++ int index = i.hashCode % items.length; +++ i.next = items[index]; +++ items[index] = i; +++ } +++ +++ /** +++ * Puts one byte and two shorts into the constant pool. +++ * +++ * @param b +++ * a byte. +++ * @param s1 +++ * a short. +++ * @param s2 +++ * another short. +++ */ +++ private void put122(final int b, final int s1, final int s2) { +++ pool.put12(b, s1).putShort(s2); +++ } +++ +++ /** +++ * Puts two bytes and one short into the constant pool. +++ * +++ * @param b1 +++ * a byte. +++ * @param b2 +++ * another byte. +++ * @param s +++ * a short. +++ */ +++ private void put112(final int b1, final int b2, final int s) { +++ pool.put11(b1, b2).putShort(s); +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Context.java b/contrib/asm/src/org/objectweb/asm/Context.java ++new file mode 100644 ++index 0000000..363b34c ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Context.java ++@@ -0,0 +1,145 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++ +++package org.objectweb.asm; +++ +++/** +++ * Information about a class being parsed in a {@link ClassReader}. +++ * +++ * @author Eric Bruneton +++ */ +++class Context { +++ +++ /** +++ * Prototypes of the attributes that must be parsed for this class. +++ */ +++ Attribute[] attrs; +++ +++ /** +++ * The {@link ClassReader} option flags for the parsing of this class. +++ */ +++ int flags; +++ +++ /** +++ * The buffer used to read strings. +++ */ +++ char[] buffer; +++ +++ /** +++ * The start index of each bootstrap method. +++ */ +++ int[] bootstrapMethods; +++ +++ /** +++ * The access flags of the method currently being parsed. +++ */ +++ int access; +++ +++ /** +++ * The name of the method currently being parsed. +++ */ +++ String name; +++ +++ /** +++ * The descriptor of the method currently being parsed. +++ */ +++ String desc; +++ +++ /** +++ * The label objects, indexed by bytecode offset, of the method currently +++ * being parsed (only bytecode offsets for which a label is needed have a +++ * non null associated Label object). +++ */ +++ Label[] labels; +++ +++ /** +++ * The target of the type annotation currently being parsed. +++ */ +++ int typeRef; +++ +++ /** +++ * The path of the type annotation currently being parsed. +++ */ +++ TypePath typePath; +++ +++ /** +++ * The offset of the latest stack map frame that has been parsed. +++ */ +++ int offset; +++ +++ /** +++ * The labels corresponding to the start of the local variable ranges in the +++ * local variable type annotation currently being parsed. +++ */ +++ Label[] start; +++ +++ /** +++ * The labels corresponding to the end of the local variable ranges in the +++ * local variable type annotation currently being parsed. +++ */ +++ Label[] end; +++ +++ /** +++ * The local variable indices for each local variable range in the local +++ * variable type annotation currently being parsed. +++ */ +++ int[] index; +++ +++ /** +++ * The encoding of the latest stack map frame that has been parsed. +++ */ +++ int mode; +++ +++ /** +++ * The number of locals in the latest stack map frame that has been parsed. +++ */ +++ int localCount; +++ +++ /** +++ * The number locals in the latest stack map frame that has been parsed, +++ * minus the number of locals in the previous frame. +++ */ +++ int localDiff; +++ +++ /** +++ * The local values of the latest stack map frame that has been parsed. +++ */ +++ Object[] local; +++ +++ /** +++ * The stack size of the latest stack map frame that has been parsed. +++ */ +++ int stackCount; +++ +++ /** +++ * The stack values of the latest stack map frame that has been parsed. +++ */ +++ Object[] stack; +++} ++\ No newline at end of file ++diff --git a/contrib/asm/src/org/objectweb/asm/Edge.java b/contrib/asm/src/org/objectweb/asm/Edge.java ++new file mode 100644 ++index 0000000..4e87cba ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Edge.java ++@@ -0,0 +1,75 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * An edge in the control flow graph of a method body. See {@link Label Label}. +++ * +++ * @author Eric Bruneton +++ */ +++class Edge { +++ +++ /** +++ * Denotes a normal control flow graph edge. +++ */ +++ static final int NORMAL = 0; +++ +++ /** +++ * Denotes a control flow graph edge corresponding to an exception handler. +++ * More precisely any {@link Edge} whose {@link #info} is strictly positive +++ * corresponds to an exception handler. The actual value of {@link #info} is +++ * the index, in the {@link ClassWriter} type table, of the exception that +++ * is catched. +++ */ +++ static final int EXCEPTION = 0x7FFFFFFF; +++ +++ /** +++ * Information about this control flow graph edge. If +++ * {@link ClassWriter#COMPUTE_MAXS} is used this field is the (relative) +++ * stack size in the basic block from which this edge originates. This size +++ * is equal to the stack size at the "jump" instruction to which this edge +++ * corresponds, relatively to the stack size at the beginning of the +++ * originating basic block. If {@link ClassWriter#COMPUTE_FRAMES} is used, +++ * this field is the kind of this control flow graph edge (i.e. NORMAL or +++ * EXCEPTION). +++ */ +++ int info; +++ +++ /** +++ * The successor block of the basic block from which this edge originates. +++ */ +++ Label successor; +++ +++ /** +++ * The next edge in the list of successors of the originating basic block. +++ * See {@link Label#successors successors}. +++ */ +++ Edge next; +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/FieldVisitor.java b/contrib/asm/src/org/objectweb/asm/FieldVisitor.java ++new file mode 100644 ++index 0000000..2372e4c ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/FieldVisitor.java ++@@ -0,0 +1,150 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A visitor to visit a Java field. The methods of this class must be called in +++ * the following order: ( visitAnnotation | +++ * visitTypeAnnotation | visitAttribute )* visitEnd. +++ * +++ * @author Eric Bruneton +++ */ +++public abstract class FieldVisitor { +++ +++ /** +++ * The ASM API version implemented by this visitor. The value of this field +++ * must be one of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ */ +++ protected final int api; +++ +++ /** +++ * The field visitor to which this visitor must delegate method calls. May +++ * be null. +++ */ +++ protected FieldVisitor fv; +++ +++ /** +++ * Constructs a new {@link FieldVisitor}. +++ * +++ * @param api +++ * the ASM API version implemented by this visitor. Must be one +++ * of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ */ +++ public FieldVisitor(final int api) { +++ this(api, null); +++ } +++ +++ /** +++ * Constructs a new {@link FieldVisitor}. +++ * +++ * @param api +++ * the ASM API version implemented by this visitor. Must be one +++ * of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ * @param fv +++ * the field visitor to which this visitor must delegate method +++ * calls. May be null. +++ */ +++ public FieldVisitor(final int api, final FieldVisitor fv) { +++ if (api != Opcodes.ASM4 && api != Opcodes.ASM5) { +++ throw new IllegalArgumentException(); +++ } +++ this.api = api; +++ this.fv = fv; +++ } +++ +++ /** +++ * Visits an annotation of the field. +++ * +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * true if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or null if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitAnnotation(String desc, boolean visible) { +++ if (fv != null) { +++ return fv.visitAnnotation(desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits an annotation on the type of the field. +++ * +++ * @param typeRef +++ * a reference to the annotated type. The sort of this type +++ * reference must be {@link TypeReference#FIELD FIELD}. See +++ * {@link TypeReference}. +++ * @param typePath +++ * the path to the annotated type argument, wildcard bound, array +++ * element type, or static inner type within 'typeRef'. May be +++ * null if the annotation targets 'typeRef' as a whole. +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * true if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or null if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitTypeAnnotation(int typeRef, +++ TypePath typePath, String desc, boolean visible) { +++ if (api < Opcodes.ASM5) { +++ throw new RuntimeException(); +++ } +++ if (fv != null) { +++ return fv.visitTypeAnnotation(typeRef, typePath, desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits a non standard attribute of the field. +++ * +++ * @param attr +++ * an attribute. +++ */ +++ public void visitAttribute(Attribute attr) { +++ if (fv != null) { +++ fv.visitAttribute(attr); +++ } +++ } +++ +++ /** +++ * Visits the end of the field. This method, which is the last one to be +++ * called, is used to inform the visitor that all the annotations and +++ * attributes of the field have been visited. +++ */ +++ public void visitEnd() { +++ if (fv != null) { +++ fv.visitEnd(); +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/FieldWriter.java b/contrib/asm/src/org/objectweb/asm/FieldWriter.java ++new file mode 100644 ++index 0000000..84d92aa ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/FieldWriter.java ++@@ -0,0 +1,329 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * An {@link FieldVisitor} that generates Java fields in bytecode form. +++ * +++ * @author Eric Bruneton +++ */ +++final class FieldWriter extends FieldVisitor { +++ +++ /** +++ * The class writer to which this field must be added. +++ */ +++ private final ClassWriter cw; +++ +++ /** +++ * Access flags of this field. +++ */ +++ private final int access; +++ +++ /** +++ * The index of the constant pool item that contains the name of this +++ * method. +++ */ +++ private final int name; +++ +++ /** +++ * The index of the constant pool item that contains the descriptor of this +++ * field. +++ */ +++ private final int desc; +++ +++ /** +++ * The index of the constant pool item that contains the signature of this +++ * field. +++ */ +++ private int signature; +++ +++ /** +++ * The index of the constant pool item that contains the constant value of +++ * this field. +++ */ +++ private int value; +++ +++ /** +++ * The runtime visible annotations of this field. May be null. +++ */ +++ private AnnotationWriter anns; +++ +++ /** +++ * The runtime invisible annotations of this field. May be null. +++ */ +++ private AnnotationWriter ianns; +++ +++ /** +++ * The runtime visible type annotations of this field. May be null. +++ */ +++ private AnnotationWriter tanns; +++ +++ /** +++ * The runtime invisible type annotations of this field. May be +++ * null. +++ */ +++ private AnnotationWriter itanns; +++ +++ /** +++ * The non standard attributes of this field. May be null. +++ */ +++ private Attribute attrs; +++ +++ // ------------------------------------------------------------------------ +++ // Constructor +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Constructs a new {@link FieldWriter}. +++ * +++ * @param cw +++ * the class writer to which this field must be added. +++ * @param access +++ * the field's access flags (see {@link Opcodes}). +++ * @param name +++ * the field's name. +++ * @param desc +++ * the field's descriptor (see {@link Type}). +++ * @param signature +++ * the field's signature. May be null. +++ * @param value +++ * the field's constant value. May be null. +++ */ +++ FieldWriter(final ClassWriter cw, final int access, final String name, +++ final String desc, final String signature, final Object value) { +++ super(Opcodes.ASM5); +++ if (cw.firstField == null) { +++ cw.firstField = this; +++ } else { +++ cw.lastField.fv = this; +++ } +++ cw.lastField = this; +++ this.cw = cw; +++ this.access = access; +++ this.name = cw.newUTF8(name); +++ this.desc = cw.newUTF8(desc); +++ if (ClassReader.SIGNATURES && signature != null) { +++ this.signature = cw.newUTF8(signature); +++ } +++ if (value != null) { +++ this.value = cw.newConstItem(value).index; +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Implementation of the FieldVisitor abstract class +++ // ------------------------------------------------------------------------ +++ +++ @Override +++ public AnnotationVisitor visitAnnotation(final String desc, +++ final boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write type, and reserve space for values count +++ bv.putShort(cw.newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(cw, true, bv, bv, 2); +++ if (visible) { +++ aw.next = anns; +++ anns = aw; +++ } else { +++ aw.next = ianns; +++ ianns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public AnnotationVisitor visitTypeAnnotation(final int typeRef, +++ final TypePath typePath, final String desc, final boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write target_type and target_info +++ AnnotationWriter.putTarget(typeRef, typePath, bv); +++ // write type, and reserve space for values count +++ bv.putShort(cw.newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(cw, true, bv, bv, +++ bv.length - 2); +++ if (visible) { +++ aw.next = tanns; +++ tanns = aw; +++ } else { +++ aw.next = itanns; +++ itanns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public void visitAttribute(final Attribute attr) { +++ attr.next = attrs; +++ attrs = attr; +++ } +++ +++ @Override +++ public void visitEnd() { +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Utility methods +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the size of this field. +++ * +++ * @return the size of this field. +++ */ +++ int getSize() { +++ int size = 8; +++ if (value != 0) { +++ cw.newUTF8("ConstantValue"); +++ size += 8; +++ } +++ if ((access & Opcodes.ACC_SYNTHETIC) != 0) { +++ if ((cw.version & 0xFFFF) < Opcodes.V1_5 +++ || (access & ClassWriter.ACC_SYNTHETIC_ATTRIBUTE) != 0) { +++ cw.newUTF8("Synthetic"); +++ size += 6; +++ } +++ } +++ if ((access & Opcodes.ACC_DEPRECATED) != 0) { +++ cw.newUTF8("Deprecated"); +++ size += 6; +++ } +++ if (ClassReader.SIGNATURES && signature != 0) { +++ cw.newUTF8("Signature"); +++ size += 8; +++ } +++ if (ClassReader.ANNOTATIONS && anns != null) { +++ cw.newUTF8("RuntimeVisibleAnnotations"); +++ size += 8 + anns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && ianns != null) { +++ cw.newUTF8("RuntimeInvisibleAnnotations"); +++ size += 8 + ianns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && tanns != null) { +++ cw.newUTF8("RuntimeVisibleTypeAnnotations"); +++ size += 8 + tanns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && itanns != null) { +++ cw.newUTF8("RuntimeInvisibleTypeAnnotations"); +++ size += 8 + itanns.getSize(); +++ } +++ if (attrs != null) { +++ size += attrs.getSize(cw, null, 0, -1, -1); +++ } +++ return size; +++ } +++ +++ /** +++ * Puts the content of this field into the given byte vector. +++ * +++ * @param out +++ * where the content of this field must be put. +++ */ +++ void put(final ByteVector out) { +++ final int FACTOR = ClassWriter.TO_ACC_SYNTHETIC; +++ int mask = Opcodes.ACC_DEPRECATED | ClassWriter.ACC_SYNTHETIC_ATTRIBUTE +++ | ((access & ClassWriter.ACC_SYNTHETIC_ATTRIBUTE) / FACTOR); +++ out.putShort(access & ~mask).putShort(name).putShort(desc); +++ int attributeCount = 0; +++ if (value != 0) { +++ ++attributeCount; +++ } +++ if ((access & Opcodes.ACC_SYNTHETIC) != 0) { +++ if ((cw.version & 0xFFFF) < Opcodes.V1_5 +++ || (access & ClassWriter.ACC_SYNTHETIC_ATTRIBUTE) != 0) { +++ ++attributeCount; +++ } +++ } +++ if ((access & Opcodes.ACC_DEPRECATED) != 0) { +++ ++attributeCount; +++ } +++ if (ClassReader.SIGNATURES && signature != 0) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && anns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && ianns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && tanns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && itanns != null) { +++ ++attributeCount; +++ } +++ if (attrs != null) { +++ attributeCount += attrs.getCount(); +++ } +++ out.putShort(attributeCount); +++ if (value != 0) { +++ out.putShort(cw.newUTF8("ConstantValue")); +++ out.putInt(2).putShort(value); +++ } +++ if ((access & Opcodes.ACC_SYNTHETIC) != 0) { +++ if ((cw.version & 0xFFFF) < Opcodes.V1_5 +++ || (access & ClassWriter.ACC_SYNTHETIC_ATTRIBUTE) != 0) { +++ out.putShort(cw.newUTF8("Synthetic")).putInt(0); +++ } +++ } +++ if ((access & Opcodes.ACC_DEPRECATED) != 0) { +++ out.putShort(cw.newUTF8("Deprecated")).putInt(0); +++ } +++ if (ClassReader.SIGNATURES && signature != 0) { +++ out.putShort(cw.newUTF8("Signature")); +++ out.putInt(2).putShort(signature); +++ } +++ if (ClassReader.ANNOTATIONS && anns != null) { +++ out.putShort(cw.newUTF8("RuntimeVisibleAnnotations")); +++ anns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && ianns != null) { +++ out.putShort(cw.newUTF8("RuntimeInvisibleAnnotations")); +++ ianns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && tanns != null) { +++ out.putShort(cw.newUTF8("RuntimeVisibleTypeAnnotations")); +++ tanns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && itanns != null) { +++ out.putShort(cw.newUTF8("RuntimeInvisibleTypeAnnotations")); +++ itanns.put(out); +++ } +++ if (attrs != null) { +++ attrs.put(cw, null, 0, -1, -1, out); +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Frame.java b/contrib/asm/src/org/objectweb/asm/Frame.java ++new file mode 100644 ++index 0000000..1f6106f ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Frame.java ++@@ -0,0 +1,1462 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * Information about the input and output stack map frames of a basic block. +++ * +++ * @author Eric Bruneton +++ */ +++final class Frame { +++ +++ /* +++ * Frames are computed in a two steps process: during the visit of each +++ * instruction, the state of the frame at the end of current basic block is +++ * updated by simulating the action of the instruction on the previous state +++ * of this so called "output frame". In visitMaxs, a fix point algorithm is +++ * used to compute the "input frame" of each basic block, i.e. the stack map +++ * frame at the beginning of the basic block, starting from the input frame +++ * of the first basic block (which is computed from the method descriptor), +++ * and by using the previously computed output frames to compute the input +++ * state of the other blocks. +++ * +++ * All output and input frames are stored as arrays of integers. Reference +++ * and array types are represented by an index into a type table (which is +++ * not the same as the constant pool of the class, in order to avoid adding +++ * unnecessary constants in the pool - not all computed frames will end up +++ * being stored in the stack map table). This allows very fast type +++ * comparisons. +++ * +++ * Output stack map frames are computed relatively to the input frame of the +++ * basic block, which is not yet known when output frames are computed. It +++ * is therefore necessary to be able to represent abstract types such as +++ * "the type at position x in the input frame locals" or "the type at +++ * position x from the top of the input frame stack" or even "the type at +++ * position x in the input frame, with y more (or less) array dimensions". +++ * This explains the rather complicated type format used in output frames. +++ * +++ * This format is the following: DIM KIND VALUE (4, 4 and 24 bits). DIM is a +++ * signed number of array dimensions (from -8 to 7). KIND is either BASE, +++ * LOCAL or STACK. BASE is used for types that are not relative to the input +++ * frame. LOCAL is used for types that are relative to the input local +++ * variable types. STACK is used for types that are relative to the input +++ * stack types. VALUE depends on KIND. For LOCAL types, it is an index in +++ * the input local variable types. For STACK types, it is a position +++ * relatively to the top of input frame stack. For BASE types, it is either +++ * one of the constants defined below, or for OBJECT and UNINITIALIZED +++ * types, a tag and an index in the type table. +++ * +++ * Output frames can contain types of any kind and with a positive or +++ * negative dimension (and even unassigned types, represented by 0 - which +++ * does not correspond to any valid type value). Input frames can only +++ * contain BASE types of positive or null dimension. In all cases the type +++ * table contains only internal type names (array type descriptors are +++ * forbidden - dimensions must be represented through the DIM field). +++ * +++ * The LONG and DOUBLE types are always represented by using two slots (LONG +++ * + TOP or DOUBLE + TOP), for local variable types as well as in the +++ * operand stack. This is necessary to be able to simulate DUPx_y +++ * instructions, whose effect would be dependent on the actual type values +++ * if types were always represented by a single slot in the stack (and this +++ * is not possible, since actual type values are not always known - cf LOCAL +++ * and STACK type kinds). +++ */ +++ +++ /** +++ * Mask to get the dimension of a frame type. This dimension is a signed +++ * integer between -8 and 7. +++ */ +++ static final int DIM = 0xF0000000; +++ +++ /** +++ * Constant to be added to a type to get a type with one more dimension. +++ */ +++ static final int ARRAY_OF = 0x10000000; +++ +++ /** +++ * Constant to be added to a type to get a type with one less dimension. +++ */ +++ static final int ELEMENT_OF = 0xF0000000; +++ +++ /** +++ * Mask to get the kind of a frame type. +++ * +++ * @see #BASE +++ * @see #LOCAL +++ * @see #STACK +++ */ +++ static final int KIND = 0xF000000; +++ +++ /** +++ * Flag used for LOCAL and STACK types. Indicates that if this type happens +++ * to be a long or double type (during the computations of input frames), +++ * then it must be set to TOP because the second word of this value has been +++ * reused to store other data in the basic block. Hence the first word no +++ * longer stores a valid long or double value. +++ */ +++ static final int TOP_IF_LONG_OR_DOUBLE = 0x800000; +++ +++ /** +++ * Mask to get the value of a frame type. +++ */ +++ static final int VALUE = 0x7FFFFF; +++ +++ /** +++ * Mask to get the kind of base types. +++ */ +++ static final int BASE_KIND = 0xFF00000; +++ +++ /** +++ * Mask to get the value of base types. +++ */ +++ static final int BASE_VALUE = 0xFFFFF; +++ +++ /** +++ * Kind of the types that are not relative to an input stack map frame. +++ */ +++ static final int BASE = 0x1000000; +++ +++ /** +++ * Base kind of the base reference types. The BASE_VALUE of such types is an +++ * index into the type table. +++ */ +++ static final int OBJECT = BASE | 0x700000; +++ +++ /** +++ * Base kind of the uninitialized base types. The BASE_VALUE of such types +++ * in an index into the type table (the Item at that index contains both an +++ * instruction offset and an internal class name). +++ */ +++ static final int UNINITIALIZED = BASE | 0x800000; +++ +++ /** +++ * Kind of the types that are relative to the local variable types of an +++ * input stack map frame. The value of such types is a local variable index. +++ */ +++ private static final int LOCAL = 0x2000000; +++ +++ /** +++ * Kind of the the types that are relative to the stack of an input stack +++ * map frame. The value of such types is a position relatively to the top of +++ * this stack. +++ */ +++ private static final int STACK = 0x3000000; +++ +++ /** +++ * The TOP type. This is a BASE type. +++ */ +++ static final int TOP = BASE | 0; +++ +++ /** +++ * The BOOLEAN type. This is a BASE type mainly used for array types. +++ */ +++ static final int BOOLEAN = BASE | 9; +++ +++ /** +++ * The BYTE type. This is a BASE type mainly used for array types. +++ */ +++ static final int BYTE = BASE | 10; +++ +++ /** +++ * The CHAR type. This is a BASE type mainly used for array types. +++ */ +++ static final int CHAR = BASE | 11; +++ +++ /** +++ * The SHORT type. This is a BASE type mainly used for array types. +++ */ +++ static final int SHORT = BASE | 12; +++ +++ /** +++ * The INTEGER type. This is a BASE type. +++ */ +++ static final int INTEGER = BASE | 1; +++ +++ /** +++ * The FLOAT type. This is a BASE type. +++ */ +++ static final int FLOAT = BASE | 2; +++ +++ /** +++ * The DOUBLE type. This is a BASE type. +++ */ +++ static final int DOUBLE = BASE | 3; +++ +++ /** +++ * The LONG type. This is a BASE type. +++ */ +++ static final int LONG = BASE | 4; +++ +++ /** +++ * The NULL type. This is a BASE type. +++ */ +++ static final int NULL = BASE | 5; +++ +++ /** +++ * The UNINITIALIZED_THIS type. This is a BASE type. +++ */ +++ static final int UNINITIALIZED_THIS = BASE | 6; +++ +++ /** +++ * The stack size variation corresponding to each JVM instruction. This +++ * stack variation is equal to the size of the values produced by an +++ * instruction, minus the size of the values consumed by this instruction. +++ */ +++ static final int[] SIZE; +++ +++ /** +++ * Computes the stack size variation corresponding to each JVM instruction. +++ */ +++ static { +++ int i; +++ int[] b = new int[202]; +++ String s = "EFFFFFFFFGGFFFGGFFFEEFGFGFEEEEEEEEEEEEEEEEEEEEDEDEDDDDD" +++ + "CDCDEEEEEEEEEEEEEEEEEEEEBABABBBBDCFFFGGGEDCDCDCDCDCDCDCDCD" +++ + "CDCEEEEDDDDDDDCDCDCEFEFDDEEFFDEDEEEBDDBBDDDDDDCCCCCCCCEFED" +++ + "DDCDCDEEEEEEEEEEFEEEEEEDDEEDDEE"; +++ for (i = 0; i < b.length; ++i) { +++ b[i] = s.charAt(i) - 'E'; +++ } +++ SIZE = b; +++ +++ // code to generate the above string +++ // +++ // int NA = 0; // not applicable (unused opcode or variable size opcode) +++ // +++ // b = new int[] { +++ // 0, //NOP, // visitInsn +++ // 1, //ACONST_NULL, // - +++ // 1, //ICONST_M1, // - +++ // 1, //ICONST_0, // - +++ // 1, //ICONST_1, // - +++ // 1, //ICONST_2, // - +++ // 1, //ICONST_3, // - +++ // 1, //ICONST_4, // - +++ // 1, //ICONST_5, // - +++ // 2, //LCONST_0, // - +++ // 2, //LCONST_1, // - +++ // 1, //FCONST_0, // - +++ // 1, //FCONST_1, // - +++ // 1, //FCONST_2, // - +++ // 2, //DCONST_0, // - +++ // 2, //DCONST_1, // - +++ // 1, //BIPUSH, // visitIntInsn +++ // 1, //SIPUSH, // - +++ // 1, //LDC, // visitLdcInsn +++ // NA, //LDC_W, // - +++ // NA, //LDC2_W, // - +++ // 1, //ILOAD, // visitVarInsn +++ // 2, //LLOAD, // - +++ // 1, //FLOAD, // - +++ // 2, //DLOAD, // - +++ // 1, //ALOAD, // - +++ // NA, //ILOAD_0, // - +++ // NA, //ILOAD_1, // - +++ // NA, //ILOAD_2, // - +++ // NA, //ILOAD_3, // - +++ // NA, //LLOAD_0, // - +++ // NA, //LLOAD_1, // - +++ // NA, //LLOAD_2, // - +++ // NA, //LLOAD_3, // - +++ // NA, //FLOAD_0, // - +++ // NA, //FLOAD_1, // - +++ // NA, //FLOAD_2, // - +++ // NA, //FLOAD_3, // - +++ // NA, //DLOAD_0, // - +++ // NA, //DLOAD_1, // - +++ // NA, //DLOAD_2, // - +++ // NA, //DLOAD_3, // - +++ // NA, //ALOAD_0, // - +++ // NA, //ALOAD_1, // - +++ // NA, //ALOAD_2, // - +++ // NA, //ALOAD_3, // - +++ // -1, //IALOAD, // visitInsn +++ // 0, //LALOAD, // - +++ // -1, //FALOAD, // - +++ // 0, //DALOAD, // - +++ // -1, //AALOAD, // - +++ // -1, //BALOAD, // - +++ // -1, //CALOAD, // - +++ // -1, //SALOAD, // - +++ // -1, //ISTORE, // visitVarInsn +++ // -2, //LSTORE, // - +++ // -1, //FSTORE, // - +++ // -2, //DSTORE, // - +++ // -1, //ASTORE, // - +++ // NA, //ISTORE_0, // - +++ // NA, //ISTORE_1, // - +++ // NA, //ISTORE_2, // - +++ // NA, //ISTORE_3, // - +++ // NA, //LSTORE_0, // - +++ // NA, //LSTORE_1, // - +++ // NA, //LSTORE_2, // - +++ // NA, //LSTORE_3, // - +++ // NA, //FSTORE_0, // - +++ // NA, //FSTORE_1, // - +++ // NA, //FSTORE_2, // - +++ // NA, //FSTORE_3, // - +++ // NA, //DSTORE_0, // - +++ // NA, //DSTORE_1, // - +++ // NA, //DSTORE_2, // - +++ // NA, //DSTORE_3, // - +++ // NA, //ASTORE_0, // - +++ // NA, //ASTORE_1, // - +++ // NA, //ASTORE_2, // - +++ // NA, //ASTORE_3, // - +++ // -3, //IASTORE, // visitInsn +++ // -4, //LASTORE, // - +++ // -3, //FASTORE, // - +++ // -4, //DASTORE, // - +++ // -3, //AASTORE, // - +++ // -3, //BASTORE, // - +++ // -3, //CASTORE, // - +++ // -3, //SASTORE, // - +++ // -1, //POP, // - +++ // -2, //POP2, // - +++ // 1, //DUP, // - +++ // 1, //DUP_X1, // - +++ // 1, //DUP_X2, // - +++ // 2, //DUP2, // - +++ // 2, //DUP2_X1, // - +++ // 2, //DUP2_X2, // - +++ // 0, //SWAP, // - +++ // -1, //IADD, // - +++ // -2, //LADD, // - +++ // -1, //FADD, // - +++ // -2, //DADD, // - +++ // -1, //ISUB, // - +++ // -2, //LSUB, // - +++ // -1, //FSUB, // - +++ // -2, //DSUB, // - +++ // -1, //IMUL, // - +++ // -2, //LMUL, // - +++ // -1, //FMUL, // - +++ // -2, //DMUL, // - +++ // -1, //IDIV, // - +++ // -2, //LDIV, // - +++ // -1, //FDIV, // - +++ // -2, //DDIV, // - +++ // -1, //IREM, // - +++ // -2, //LREM, // - +++ // -1, //FREM, // - +++ // -2, //DREM, // - +++ // 0, //INEG, // - +++ // 0, //LNEG, // - +++ // 0, //FNEG, // - +++ // 0, //DNEG, // - +++ // -1, //ISHL, // - +++ // -1, //LSHL, // - +++ // -1, //ISHR, // - +++ // -1, //LSHR, // - +++ // -1, //IUSHR, // - +++ // -1, //LUSHR, // - +++ // -1, //IAND, // - +++ // -2, //LAND, // - +++ // -1, //IOR, // - +++ // -2, //LOR, // - +++ // -1, //IXOR, // - +++ // -2, //LXOR, // - +++ // 0, //IINC, // visitIincInsn +++ // 1, //I2L, // visitInsn +++ // 0, //I2F, // - +++ // 1, //I2D, // - +++ // -1, //L2I, // - +++ // -1, //L2F, // - +++ // 0, //L2D, // - +++ // 0, //F2I, // - +++ // 1, //F2L, // - +++ // 1, //F2D, // - +++ // -1, //D2I, // - +++ // 0, //D2L, // - +++ // -1, //D2F, // - +++ // 0, //I2B, // - +++ // 0, //I2C, // - +++ // 0, //I2S, // - +++ // -3, //LCMP, // - +++ // -1, //FCMPL, // - +++ // -1, //FCMPG, // - +++ // -3, //DCMPL, // - +++ // -3, //DCMPG, // - +++ // -1, //IFEQ, // visitJumpInsn +++ // -1, //IFNE, // - +++ // -1, //IFLT, // - +++ // -1, //IFGE, // - +++ // -1, //IFGT, // - +++ // -1, //IFLE, // - +++ // -2, //IF_ICMPEQ, // - +++ // -2, //IF_ICMPNE, // - +++ // -2, //IF_ICMPLT, // - +++ // -2, //IF_ICMPGE, // - +++ // -2, //IF_ICMPGT, // - +++ // -2, //IF_ICMPLE, // - +++ // -2, //IF_ACMPEQ, // - +++ // -2, //IF_ACMPNE, // - +++ // 0, //GOTO, // - +++ // 1, //JSR, // - +++ // 0, //RET, // visitVarInsn +++ // -1, //TABLESWITCH, // visiTableSwitchInsn +++ // -1, //LOOKUPSWITCH, // visitLookupSwitch +++ // -1, //IRETURN, // visitInsn +++ // -2, //LRETURN, // - +++ // -1, //FRETURN, // - +++ // -2, //DRETURN, // - +++ // -1, //ARETURN, // - +++ // 0, //RETURN, // - +++ // NA, //GETSTATIC, // visitFieldInsn +++ // NA, //PUTSTATIC, // - +++ // NA, //GETFIELD, // - +++ // NA, //PUTFIELD, // - +++ // NA, //INVOKEVIRTUAL, // visitMethodInsn +++ // NA, //INVOKESPECIAL, // - +++ // NA, //INVOKESTATIC, // - +++ // NA, //INVOKEINTERFACE, // - +++ // NA, //INVOKEDYNAMIC, // visitInvokeDynamicInsn +++ // 1, //NEW, // visitTypeInsn +++ // 0, //NEWARRAY, // visitIntInsn +++ // 0, //ANEWARRAY, // visitTypeInsn +++ // 0, //ARRAYLENGTH, // visitInsn +++ // NA, //ATHROW, // - +++ // 0, //CHECKCAST, // visitTypeInsn +++ // 0, //INSTANCEOF, // - +++ // -1, //MONITORENTER, // visitInsn +++ // -1, //MONITOREXIT, // - +++ // NA, //WIDE, // NOT VISITED +++ // NA, //MULTIANEWARRAY, // visitMultiANewArrayInsn +++ // -1, //IFNULL, // visitJumpInsn +++ // -1, //IFNONNULL, // - +++ // NA, //GOTO_W, // - +++ // NA, //JSR_W, // - +++ // }; +++ // for (i = 0; i < b.length; ++i) { +++ // System.err.print((char)('E' + b[i])); +++ // } +++ // System.err.println(); +++ } +++ +++ /** +++ * The label (i.e. basic block) to which these input and output stack map +++ * frames correspond. +++ */ +++ Label owner; +++ +++ /** +++ * The input stack map frame locals. +++ */ +++ int[] inputLocals; +++ +++ /** +++ * The input stack map frame stack. +++ */ +++ int[] inputStack; +++ +++ /** +++ * The output stack map frame locals. +++ */ +++ private int[] outputLocals; +++ +++ /** +++ * The output stack map frame stack. +++ */ +++ private int[] outputStack; +++ +++ /** +++ * Relative size of the output stack. The exact semantics of this field +++ * depends on the algorithm that is used. +++ * +++ * When only the maximum stack size is computed, this field is the size of +++ * the output stack relatively to the top of the input stack. +++ * +++ * When the stack map frames are completely computed, this field is the +++ * actual number of types in {@link #outputStack}. +++ */ +++ private int outputStackTop; +++ +++ /** +++ * Number of types that are initialized in the basic block. +++ * +++ * @see #initializations +++ */ +++ private int initializationCount; +++ +++ /** +++ * The types that are initialized in the basic block. A constructor +++ * invocation on an UNINITIALIZED or UNINITIALIZED_THIS type must replace +++ * every occurence of this type in the local variables and in the +++ * operand stack. This cannot be done during the first phase of the +++ * algorithm since, during this phase, the local variables and the operand +++ * stack are not completely computed. It is therefore necessary to store the +++ * types on which constructors are invoked in the basic block, in order to +++ * do this replacement during the second phase of the algorithm, where the +++ * frames are fully computed. Note that this array can contain types that +++ * are relative to input locals or to the input stack (see below for the +++ * description of the algorithm). +++ */ +++ private int[] initializations; +++ +++ /** +++ * Returns the output frame local variable type at the given index. +++ * +++ * @param local +++ * the index of the local that must be returned. +++ * @return the output frame local variable type at the given index. +++ */ +++ private int get(final int local) { +++ if (outputLocals == null || local >= outputLocals.length) { +++ // this local has never been assigned in this basic block, +++ // so it is still equal to its value in the input frame +++ return LOCAL | local; +++ } else { +++ int type = outputLocals[local]; +++ if (type == 0) { +++ // this local has never been assigned in this basic block, +++ // so it is still equal to its value in the input frame +++ type = outputLocals[local] = LOCAL | local; +++ } +++ return type; +++ } +++ } +++ +++ /** +++ * Sets the output frame local variable type at the given index. +++ * +++ * @param local +++ * the index of the local that must be set. +++ * @param type +++ * the value of the local that must be set. +++ */ +++ private void set(final int local, final int type) { +++ // creates and/or resizes the output local variables array if necessary +++ if (outputLocals == null) { +++ outputLocals = new int[10]; +++ } +++ int n = outputLocals.length; +++ if (local >= n) { +++ int[] t = new int[Math.max(local + 1, 2 * n)]; +++ System.arraycopy(outputLocals, 0, t, 0, n); +++ outputLocals = t; +++ } +++ // sets the local variable +++ outputLocals[local] = type; +++ } +++ +++ /** +++ * Pushes a new type onto the output frame stack. +++ * +++ * @param type +++ * the type that must be pushed. +++ */ +++ private void push(final int type) { +++ // creates and/or resizes the output stack array if necessary +++ if (outputStack == null) { +++ outputStack = new int[10]; +++ } +++ int n = outputStack.length; +++ if (outputStackTop >= n) { +++ int[] t = new int[Math.max(outputStackTop + 1, 2 * n)]; +++ System.arraycopy(outputStack, 0, t, 0, n); +++ outputStack = t; +++ } +++ // pushes the type on the output stack +++ outputStack[outputStackTop++] = type; +++ // updates the maximun height reached by the output stack, if needed +++ int top = owner.inputStackTop + outputStackTop; +++ if (top > owner.outputStackMax) { +++ owner.outputStackMax = top; +++ } +++ } +++ +++ /** +++ * Pushes a new type onto the output frame stack. +++ * +++ * @param cw +++ * the ClassWriter to which this label belongs. +++ * @param desc +++ * the descriptor of the type to be pushed. Can also be a method +++ * descriptor (in this case this method pushes its return type +++ * onto the output frame stack). +++ */ +++ private void push(final ClassWriter cw, final String desc) { +++ int type = type(cw, desc); +++ if (type != 0) { +++ push(type); +++ if (type == LONG || type == DOUBLE) { +++ push(TOP); +++ } +++ } +++ } +++ +++ /** +++ * Returns the int encoding of the given type. +++ * +++ * @param cw +++ * the ClassWriter to which this label belongs. +++ * @param desc +++ * a type descriptor. +++ * @return the int encoding of the given type. +++ */ +++ private static int type(final ClassWriter cw, final String desc) { +++ String t; +++ int index = desc.charAt(0) == '(' ? desc.indexOf(')') + 1 : 0; +++ switch (desc.charAt(index)) { +++ case 'V': +++ return 0; +++ case 'Z': +++ case 'C': +++ case 'B': +++ case 'S': +++ case 'I': +++ return INTEGER; +++ case 'F': +++ return FLOAT; +++ case 'J': +++ return LONG; +++ case 'D': +++ return DOUBLE; +++ case 'L': +++ // stores the internal name, not the descriptor! +++ t = desc.substring(index + 1, desc.length() - 1); +++ return OBJECT | cw.addType(t); +++ // case '[': +++ default: +++ // extracts the dimensions and the element type +++ int data; +++ int dims = index + 1; +++ while (desc.charAt(dims) == '[') { +++ ++dims; +++ } +++ switch (desc.charAt(dims)) { +++ case 'Z': +++ data = BOOLEAN; +++ break; +++ case 'C': +++ data = CHAR; +++ break; +++ case 'B': +++ data = BYTE; +++ break; +++ case 'S': +++ data = SHORT; +++ break; +++ case 'I': +++ data = INTEGER; +++ break; +++ case 'F': +++ data = FLOAT; +++ break; +++ case 'J': +++ data = LONG; +++ break; +++ case 'D': +++ data = DOUBLE; +++ break; +++ // case 'L': +++ default: +++ // stores the internal name, not the descriptor +++ t = desc.substring(dims + 1, desc.length() - 1); +++ data = OBJECT | cw.addType(t); +++ } +++ return (dims - index) << 28 | data; +++ } +++ } +++ +++ /** +++ * Pops a type from the output frame stack and returns its value. +++ * +++ * @return the type that has been popped from the output frame stack. +++ */ +++ private int pop() { +++ if (outputStackTop > 0) { +++ return outputStack[--outputStackTop]; +++ } else { +++ // if the output frame stack is empty, pops from the input stack +++ return STACK | -(--owner.inputStackTop); +++ } +++ } +++ +++ /** +++ * Pops the given number of types from the output frame stack. +++ * +++ * @param elements +++ * the number of types that must be popped. +++ */ +++ private void pop(final int elements) { +++ if (outputStackTop >= elements) { +++ outputStackTop -= elements; +++ } else { +++ // if the number of elements to be popped is greater than the number +++ // of elements in the output stack, clear it, and pops the remaining +++ // elements from the input stack. +++ owner.inputStackTop -= elements - outputStackTop; +++ outputStackTop = 0; +++ } +++ } +++ +++ /** +++ * Pops a type from the output frame stack. +++ * +++ * @param desc +++ * the descriptor of the type to be popped. Can also be a method +++ * descriptor (in this case this method pops the types +++ * corresponding to the method arguments). +++ */ +++ private void pop(final String desc) { +++ char c = desc.charAt(0); +++ if (c == '(') { +++ pop((Type.getArgumentsAndReturnSizes(desc) >> 2) - 1); +++ } else if (c == 'J' || c == 'D') { +++ pop(2); +++ } else { +++ pop(1); +++ } +++ } +++ +++ /** +++ * Adds a new type to the list of types on which a constructor is invoked in +++ * the basic block. +++ * +++ * @param var +++ * a type on a which a constructor is invoked. +++ */ +++ private void init(final int var) { +++ // creates and/or resizes the initializations array if necessary +++ if (initializations == null) { +++ initializations = new int[2]; +++ } +++ int n = initializations.length; +++ if (initializationCount >= n) { +++ int[] t = new int[Math.max(initializationCount + 1, 2 * n)]; +++ System.arraycopy(initializations, 0, t, 0, n); +++ initializations = t; +++ } +++ // stores the type to be initialized +++ initializations[initializationCount++] = var; +++ } +++ +++ /** +++ * Replaces the given type with the appropriate type if it is one of the +++ * types on which a constructor is invoked in the basic block. +++ * +++ * @param cw +++ * the ClassWriter to which this label belongs. +++ * @param t +++ * a type +++ * @return t or, if t is one of the types on which a constructor is invoked +++ * in the basic block, the type corresponding to this constructor. +++ */ +++ private int init(final ClassWriter cw, final int t) { +++ int s; +++ if (t == UNINITIALIZED_THIS) { +++ s = OBJECT | cw.addType(cw.thisName); +++ } else if ((t & (DIM | BASE_KIND)) == UNINITIALIZED) { +++ String type = cw.typeTable[t & BASE_VALUE].strVal1; +++ s = OBJECT | cw.addType(type); +++ } else { +++ return t; +++ } +++ for (int j = 0; j < initializationCount; ++j) { +++ int u = initializations[j]; +++ int dim = u & DIM; +++ int kind = u & KIND; +++ if (kind == LOCAL) { +++ u = dim + inputLocals[u & VALUE]; +++ } else if (kind == STACK) { +++ u = dim + inputStack[inputStack.length - (u & VALUE)]; +++ } +++ if (t == u) { +++ return s; +++ } +++ } +++ return t; +++ } +++ +++ /** +++ * Initializes the input frame of the first basic block from the method +++ * descriptor. +++ * +++ * @param cw +++ * the ClassWriter to which this label belongs. +++ * @param access +++ * the access flags of the method to which this label belongs. +++ * @param args +++ * the formal parameter types of this method. +++ * @param maxLocals +++ * the maximum number of local variables of this method. +++ */ +++ void initInputFrame(final ClassWriter cw, final int access, +++ final Type[] args, final int maxLocals) { +++ inputLocals = new int[maxLocals]; +++ inputStack = new int[0]; +++ int i = 0; +++ if ((access & Opcodes.ACC_STATIC) == 0) { +++ if ((access & MethodWriter.ACC_CONSTRUCTOR) == 0) { +++ inputLocals[i++] = OBJECT | cw.addType(cw.thisName); +++ } else { +++ inputLocals[i++] = UNINITIALIZED_THIS; +++ } +++ } +++ for (int j = 0; j < args.length; ++j) { +++ int t = type(cw, args[j].getDescriptor()); +++ inputLocals[i++] = t; +++ if (t == LONG || t == DOUBLE) { +++ inputLocals[i++] = TOP; +++ } +++ } +++ while (i < maxLocals) { +++ inputLocals[i++] = TOP; +++ } +++ } +++ +++ /** +++ * Simulates the action of the given instruction on the output stack frame. +++ * +++ * @param opcode +++ * the opcode of the instruction. +++ * @param arg +++ * the operand of the instruction, if any. +++ * @param cw +++ * the class writer to which this label belongs. +++ * @param item +++ * the operand of the instructions, if any. +++ */ +++ void execute(final int opcode, final int arg, final ClassWriter cw, +++ final Item item) { +++ int t1, t2, t3, t4; +++ switch (opcode) { +++ case Opcodes.NOP: +++ case Opcodes.INEG: +++ case Opcodes.LNEG: +++ case Opcodes.FNEG: +++ case Opcodes.DNEG: +++ case Opcodes.I2B: +++ case Opcodes.I2C: +++ case Opcodes.I2S: +++ case Opcodes.GOTO: +++ case Opcodes.RETURN: +++ break; +++ case Opcodes.ACONST_NULL: +++ push(NULL); +++ break; +++ case Opcodes.ICONST_M1: +++ case Opcodes.ICONST_0: +++ case Opcodes.ICONST_1: +++ case Opcodes.ICONST_2: +++ case Opcodes.ICONST_3: +++ case Opcodes.ICONST_4: +++ case Opcodes.ICONST_5: +++ case Opcodes.BIPUSH: +++ case Opcodes.SIPUSH: +++ case Opcodes.ILOAD: +++ push(INTEGER); +++ break; +++ case Opcodes.LCONST_0: +++ case Opcodes.LCONST_1: +++ case Opcodes.LLOAD: +++ push(LONG); +++ push(TOP); +++ break; +++ case Opcodes.FCONST_0: +++ case Opcodes.FCONST_1: +++ case Opcodes.FCONST_2: +++ case Opcodes.FLOAD: +++ push(FLOAT); +++ break; +++ case Opcodes.DCONST_0: +++ case Opcodes.DCONST_1: +++ case Opcodes.DLOAD: +++ push(DOUBLE); +++ push(TOP); +++ break; +++ case Opcodes.LDC: +++ switch (item.type) { +++ case ClassWriter.INT: +++ push(INTEGER); +++ break; +++ case ClassWriter.LONG: +++ push(LONG); +++ push(TOP); +++ break; +++ case ClassWriter.FLOAT: +++ push(FLOAT); +++ break; +++ case ClassWriter.DOUBLE: +++ push(DOUBLE); +++ push(TOP); +++ break; +++ case ClassWriter.CLASS: +++ push(OBJECT | cw.addType("java/lang/Class")); +++ break; +++ case ClassWriter.STR: +++ push(OBJECT | cw.addType("java/lang/String")); +++ break; +++ case ClassWriter.MTYPE: +++ push(OBJECT | cw.addType("java/lang/invoke/MethodType")); +++ break; +++ // case ClassWriter.HANDLE_BASE + [1..9]: +++ default: +++ push(OBJECT | cw.addType("java/lang/invoke/MethodHandle")); +++ } +++ break; +++ case Opcodes.ALOAD: +++ push(get(arg)); +++ break; +++ case Opcodes.IALOAD: +++ case Opcodes.BALOAD: +++ case Opcodes.CALOAD: +++ case Opcodes.SALOAD: +++ pop(2); +++ push(INTEGER); +++ break; +++ case Opcodes.LALOAD: +++ case Opcodes.D2L: +++ pop(2); +++ push(LONG); +++ push(TOP); +++ break; +++ case Opcodes.FALOAD: +++ pop(2); +++ push(FLOAT); +++ break; +++ case Opcodes.DALOAD: +++ case Opcodes.L2D: +++ pop(2); +++ push(DOUBLE); +++ push(TOP); +++ break; +++ case Opcodes.AALOAD: +++ pop(1); +++ t1 = pop(); +++ push(ELEMENT_OF + t1); +++ break; +++ case Opcodes.ISTORE: +++ case Opcodes.FSTORE: +++ case Opcodes.ASTORE: +++ t1 = pop(); +++ set(arg, t1); +++ if (arg > 0) { +++ t2 = get(arg - 1); +++ // if t2 is of kind STACK or LOCAL we cannot know its size! +++ if (t2 == LONG || t2 == DOUBLE) { +++ set(arg - 1, TOP); +++ } else if ((t2 & KIND) != BASE) { +++ set(arg - 1, t2 | TOP_IF_LONG_OR_DOUBLE); +++ } +++ } +++ break; +++ case Opcodes.LSTORE: +++ case Opcodes.DSTORE: +++ pop(1); +++ t1 = pop(); +++ set(arg, t1); +++ set(arg + 1, TOP); +++ if (arg > 0) { +++ t2 = get(arg - 1); +++ // if t2 is of kind STACK or LOCAL we cannot know its size! +++ if (t2 == LONG || t2 == DOUBLE) { +++ set(arg - 1, TOP); +++ } else if ((t2 & KIND) != BASE) { +++ set(arg - 1, t2 | TOP_IF_LONG_OR_DOUBLE); +++ } +++ } +++ break; +++ case Opcodes.IASTORE: +++ case Opcodes.BASTORE: +++ case Opcodes.CASTORE: +++ case Opcodes.SASTORE: +++ case Opcodes.FASTORE: +++ case Opcodes.AASTORE: +++ pop(3); +++ break; +++ case Opcodes.LASTORE: +++ case Opcodes.DASTORE: +++ pop(4); +++ break; +++ case Opcodes.POP: +++ case Opcodes.IFEQ: +++ case Opcodes.IFNE: +++ case Opcodes.IFLT: +++ case Opcodes.IFGE: +++ case Opcodes.IFGT: +++ case Opcodes.IFLE: +++ case Opcodes.IRETURN: +++ case Opcodes.FRETURN: +++ case Opcodes.ARETURN: +++ case Opcodes.TABLESWITCH: +++ case Opcodes.LOOKUPSWITCH: +++ case Opcodes.ATHROW: +++ case Opcodes.MONITORENTER: +++ case Opcodes.MONITOREXIT: +++ case Opcodes.IFNULL: +++ case Opcodes.IFNONNULL: +++ pop(1); +++ break; +++ case Opcodes.POP2: +++ case Opcodes.IF_ICMPEQ: +++ case Opcodes.IF_ICMPNE: +++ case Opcodes.IF_ICMPLT: +++ case Opcodes.IF_ICMPGE: +++ case Opcodes.IF_ICMPGT: +++ case Opcodes.IF_ICMPLE: +++ case Opcodes.IF_ACMPEQ: +++ case Opcodes.IF_ACMPNE: +++ case Opcodes.LRETURN: +++ case Opcodes.DRETURN: +++ pop(2); +++ break; +++ case Opcodes.DUP: +++ t1 = pop(); +++ push(t1); +++ push(t1); +++ break; +++ case Opcodes.DUP_X1: +++ t1 = pop(); +++ t2 = pop(); +++ push(t1); +++ push(t2); +++ push(t1); +++ break; +++ case Opcodes.DUP_X2: +++ t1 = pop(); +++ t2 = pop(); +++ t3 = pop(); +++ push(t1); +++ push(t3); +++ push(t2); +++ push(t1); +++ break; +++ case Opcodes.DUP2: +++ t1 = pop(); +++ t2 = pop(); +++ push(t2); +++ push(t1); +++ push(t2); +++ push(t1); +++ break; +++ case Opcodes.DUP2_X1: +++ t1 = pop(); +++ t2 = pop(); +++ t3 = pop(); +++ push(t2); +++ push(t1); +++ push(t3); +++ push(t2); +++ push(t1); +++ break; +++ case Opcodes.DUP2_X2: +++ t1 = pop(); +++ t2 = pop(); +++ t3 = pop(); +++ t4 = pop(); +++ push(t2); +++ push(t1); +++ push(t4); +++ push(t3); +++ push(t2); +++ push(t1); +++ break; +++ case Opcodes.SWAP: +++ t1 = pop(); +++ t2 = pop(); +++ push(t1); +++ push(t2); +++ break; +++ case Opcodes.IADD: +++ case Opcodes.ISUB: +++ case Opcodes.IMUL: +++ case Opcodes.IDIV: +++ case Opcodes.IREM: +++ case Opcodes.IAND: +++ case Opcodes.IOR: +++ case Opcodes.IXOR: +++ case Opcodes.ISHL: +++ case Opcodes.ISHR: +++ case Opcodes.IUSHR: +++ case Opcodes.L2I: +++ case Opcodes.D2I: +++ case Opcodes.FCMPL: +++ case Opcodes.FCMPG: +++ pop(2); +++ push(INTEGER); +++ break; +++ case Opcodes.LADD: +++ case Opcodes.LSUB: +++ case Opcodes.LMUL: +++ case Opcodes.LDIV: +++ case Opcodes.LREM: +++ case Opcodes.LAND: +++ case Opcodes.LOR: +++ case Opcodes.LXOR: +++ pop(4); +++ push(LONG); +++ push(TOP); +++ break; +++ case Opcodes.FADD: +++ case Opcodes.FSUB: +++ case Opcodes.FMUL: +++ case Opcodes.FDIV: +++ case Opcodes.FREM: +++ case Opcodes.L2F: +++ case Opcodes.D2F: +++ pop(2); +++ push(FLOAT); +++ break; +++ case Opcodes.DADD: +++ case Opcodes.DSUB: +++ case Opcodes.DMUL: +++ case Opcodes.DDIV: +++ case Opcodes.DREM: +++ pop(4); +++ push(DOUBLE); +++ push(TOP); +++ break; +++ case Opcodes.LSHL: +++ case Opcodes.LSHR: +++ case Opcodes.LUSHR: +++ pop(3); +++ push(LONG); +++ push(TOP); +++ break; +++ case Opcodes.IINC: +++ set(arg, INTEGER); +++ break; +++ case Opcodes.I2L: +++ case Opcodes.F2L: +++ pop(1); +++ push(LONG); +++ push(TOP); +++ break; +++ case Opcodes.I2F: +++ pop(1); +++ push(FLOAT); +++ break; +++ case Opcodes.I2D: +++ case Opcodes.F2D: +++ pop(1); +++ push(DOUBLE); +++ push(TOP); +++ break; +++ case Opcodes.F2I: +++ case Opcodes.ARRAYLENGTH: +++ case Opcodes.INSTANCEOF: +++ pop(1); +++ push(INTEGER); +++ break; +++ case Opcodes.LCMP: +++ case Opcodes.DCMPL: +++ case Opcodes.DCMPG: +++ pop(4); +++ push(INTEGER); +++ break; +++ case Opcodes.JSR: +++ case Opcodes.RET: +++ throw new RuntimeException( +++ "JSR/RET are not supported with computeFrames option"); +++ case Opcodes.GETSTATIC: +++ push(cw, item.strVal3); +++ break; +++ case Opcodes.PUTSTATIC: +++ pop(item.strVal3); +++ break; +++ case Opcodes.GETFIELD: +++ pop(1); +++ push(cw, item.strVal3); +++ break; +++ case Opcodes.PUTFIELD: +++ pop(item.strVal3); +++ pop(); +++ break; +++ case Opcodes.INVOKEVIRTUAL: +++ case Opcodes.INVOKESPECIAL: +++ case Opcodes.INVOKESTATIC: +++ case Opcodes.INVOKEINTERFACE: +++ pop(item.strVal3); +++ if (opcode != Opcodes.INVOKESTATIC) { +++ t1 = pop(); +++ if (opcode == Opcodes.INVOKESPECIAL +++ && item.strVal2.charAt(0) == '<') { +++ init(t1); +++ } +++ } +++ push(cw, item.strVal3); +++ break; +++ case Opcodes.INVOKEDYNAMIC: +++ pop(item.strVal2); +++ push(cw, item.strVal2); +++ break; +++ case Opcodes.NEW: +++ push(UNINITIALIZED | cw.addUninitializedType(item.strVal1, arg)); +++ break; +++ case Opcodes.NEWARRAY: +++ pop(); +++ switch (arg) { +++ case Opcodes.T_BOOLEAN: +++ push(ARRAY_OF | BOOLEAN); +++ break; +++ case Opcodes.T_CHAR: +++ push(ARRAY_OF | CHAR); +++ break; +++ case Opcodes.T_BYTE: +++ push(ARRAY_OF | BYTE); +++ break; +++ case Opcodes.T_SHORT: +++ push(ARRAY_OF | SHORT); +++ break; +++ case Opcodes.T_INT: +++ push(ARRAY_OF | INTEGER); +++ break; +++ case Opcodes.T_FLOAT: +++ push(ARRAY_OF | FLOAT); +++ break; +++ case Opcodes.T_DOUBLE: +++ push(ARRAY_OF | DOUBLE); +++ break; +++ // case Opcodes.T_LONG: +++ default: +++ push(ARRAY_OF | LONG); +++ break; +++ } +++ break; +++ case Opcodes.ANEWARRAY: +++ String s = item.strVal1; +++ pop(); +++ if (s.charAt(0) == '[') { +++ push(cw, '[' + s); +++ } else { +++ push(ARRAY_OF | OBJECT | cw.addType(s)); +++ } +++ break; +++ case Opcodes.CHECKCAST: +++ s = item.strVal1; +++ pop(); +++ if (s.charAt(0) == '[') { +++ push(cw, s); +++ } else { +++ push(OBJECT | cw.addType(s)); +++ } +++ break; +++ // case Opcodes.MULTIANEWARRAY: +++ default: +++ pop(arg); +++ push(cw, item.strVal1); +++ break; +++ } +++ } +++ +++ /** +++ * Merges the input frame of the given basic block with the input and output +++ * frames of this basic block. Returns true if the input frame of +++ * the given label has been changed by this operation. +++ * +++ * @param cw +++ * the ClassWriter to which this label belongs. +++ * @param frame +++ * the basic block whose input frame must be updated. +++ * @param edge +++ * the kind of the {@link Edge} between this label and 'label'. +++ * See {@link Edge#info}. +++ * @return true if the input frame of the given label has been +++ * changed by this operation. +++ */ +++ boolean merge(final ClassWriter cw, final Frame frame, final int edge) { +++ boolean changed = false; +++ int i, s, dim, kind, t; +++ +++ int nLocal = inputLocals.length; +++ int nStack = inputStack.length; +++ if (frame.inputLocals == null) { +++ frame.inputLocals = new int[nLocal]; +++ changed = true; +++ } +++ +++ for (i = 0; i < nLocal; ++i) { +++ if (outputLocals != null && i < outputLocals.length) { +++ s = outputLocals[i]; +++ if (s == 0) { +++ t = inputLocals[i]; +++ } else { +++ dim = s & DIM; +++ kind = s & KIND; +++ if (kind == BASE) { +++ t = s; +++ } else { +++ if (kind == LOCAL) { +++ t = dim + inputLocals[s & VALUE]; +++ } else { +++ t = dim + inputStack[nStack - (s & VALUE)]; +++ } +++ if ((s & TOP_IF_LONG_OR_DOUBLE) != 0 +++ && (t == LONG || t == DOUBLE)) { +++ t = TOP; +++ } +++ } +++ } +++ } else { +++ t = inputLocals[i]; +++ } +++ if (initializations != null) { +++ t = init(cw, t); +++ } +++ changed |= merge(cw, t, frame.inputLocals, i); +++ } +++ +++ if (edge > 0) { +++ for (i = 0; i < nLocal; ++i) { +++ t = inputLocals[i]; +++ changed |= merge(cw, t, frame.inputLocals, i); +++ } +++ if (frame.inputStack == null) { +++ frame.inputStack = new int[1]; +++ changed = true; +++ } +++ changed |= merge(cw, edge, frame.inputStack, 0); +++ return changed; +++ } +++ +++ int nInputStack = inputStack.length + owner.inputStackTop; +++ if (frame.inputStack == null) { +++ frame.inputStack = new int[nInputStack + outputStackTop]; +++ changed = true; +++ } +++ +++ for (i = 0; i < nInputStack; ++i) { +++ t = inputStack[i]; +++ if (initializations != null) { +++ t = init(cw, t); +++ } +++ changed |= merge(cw, t, frame.inputStack, i); +++ } +++ for (i = 0; i < outputStackTop; ++i) { +++ s = outputStack[i]; +++ dim = s & DIM; +++ kind = s & KIND; +++ if (kind == BASE) { +++ t = s; +++ } else { +++ if (kind == LOCAL) { +++ t = dim + inputLocals[s & VALUE]; +++ } else { +++ t = dim + inputStack[nStack - (s & VALUE)]; +++ } +++ if ((s & TOP_IF_LONG_OR_DOUBLE) != 0 +++ && (t == LONG || t == DOUBLE)) { +++ t = TOP; +++ } +++ } +++ if (initializations != null) { +++ t = init(cw, t); +++ } +++ changed |= merge(cw, t, frame.inputStack, nInputStack + i); +++ } +++ return changed; +++ } +++ +++ /** +++ * Merges the type at the given index in the given type array with the given +++ * type. Returns true if the type array has been modified by this +++ * operation. +++ * +++ * @param cw +++ * the ClassWriter to which this label belongs. +++ * @param t +++ * the type with which the type array element must be merged. +++ * @param types +++ * an array of types. +++ * @param index +++ * the index of the type that must be merged in 'types'. +++ * @return true if the type array has been modified by this +++ * operation. +++ */ +++ private static boolean merge(final ClassWriter cw, int t, +++ final int[] types, final int index) { +++ int u = types[index]; +++ if (u == t) { +++ // if the types are equal, merge(u,t)=u, so there is no change +++ return false; +++ } +++ if ((t & ~DIM) == NULL) { +++ if (u == NULL) { +++ return false; +++ } +++ t = NULL; +++ } +++ if (u == 0) { +++ // if types[index] has never been assigned, merge(u,t)=t +++ types[index] = t; +++ return true; +++ } +++ int v; +++ if ((u & BASE_KIND) == OBJECT || (u & DIM) != 0) { +++ // if u is a reference type of any dimension +++ if (t == NULL) { +++ // if t is the NULL type, merge(u,t)=u, so there is no change +++ return false; +++ } else if ((t & (DIM | BASE_KIND)) == (u & (DIM | BASE_KIND))) { +++ // if t and u have the same dimension and same base kind +++ if ((u & BASE_KIND) == OBJECT) { +++ // if t is also a reference type, and if u and t have the +++ // same dimension merge(u,t) = dim(t) | common parent of the +++ // element types of u and t +++ v = (t & DIM) | OBJECT +++ | cw.getMergedType(t & BASE_VALUE, u & BASE_VALUE); +++ } else { +++ // if u and t are array types, but not with the same element +++ // type, merge(u,t) = dim(u) - 1 | java/lang/Object +++ int vdim = ELEMENT_OF + (u & DIM); +++ v = vdim | OBJECT | cw.addType("java/lang/Object"); +++ } +++ } else if ((t & BASE_KIND) == OBJECT || (t & DIM) != 0) { +++ // if t is any other reference or array type, the merged type +++ // is min(udim, tdim) | java/lang/Object, where udim is the +++ // array dimension of u, minus 1 if u is an array type with a +++ // primitive element type (and similarly for tdim). +++ int tdim = (((t & DIM) == 0 || (t & BASE_KIND) == OBJECT) ? 0 +++ : ELEMENT_OF) + (t & DIM); +++ int udim = (((u & DIM) == 0 || (u & BASE_KIND) == OBJECT) ? 0 +++ : ELEMENT_OF) + (u & DIM); +++ v = Math.min(tdim, udim) | OBJECT +++ | cw.addType("java/lang/Object"); +++ } else { +++ // if t is any other type, merge(u,t)=TOP +++ v = TOP; +++ } +++ } else if (u == NULL) { +++ // if u is the NULL type, merge(u,t)=t, +++ // or TOP if t is not a reference type +++ v = (t & BASE_KIND) == OBJECT || (t & DIM) != 0 ? t : TOP; +++ } else { +++ // if u is any other type, merge(u,t)=TOP whatever t +++ v = TOP; +++ } +++ if (u != v) { +++ types[index] = v; +++ return true; +++ } +++ return false; +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Handle.java b/contrib/asm/src/org/objectweb/asm/Handle.java ++new file mode 100644 ++index 0000000..a627911 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Handle.java ++@@ -0,0 +1,170 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++ +++package org.objectweb.asm; +++ +++/** +++ * A reference to a field or a method. +++ * +++ * @author Remi Forax +++ * @author Eric Bruneton +++ */ +++public final class Handle { +++ +++ /** +++ * The kind of field or method designated by this Handle. Should be +++ * {@link Opcodes#H_GETFIELD}, {@link Opcodes#H_GETSTATIC}, +++ * {@link Opcodes#H_PUTFIELD}, {@link Opcodes#H_PUTSTATIC}, +++ * {@link Opcodes#H_INVOKEVIRTUAL}, {@link Opcodes#H_INVOKESTATIC}, +++ * {@link Opcodes#H_INVOKESPECIAL}, {@link Opcodes#H_NEWINVOKESPECIAL} or +++ * {@link Opcodes#H_INVOKEINTERFACE}. +++ */ +++ final int tag; +++ +++ /** +++ * The internal name of the class that owns the field or method designated +++ * by this handle. +++ */ +++ final String owner; +++ +++ /** +++ * The name of the field or method designated by this handle. +++ */ +++ final String name; +++ +++ /** +++ * The descriptor of the field or method designated by this handle. +++ */ +++ final String desc; +++ +++ /** +++ * Constructs a new field or method handle. +++ * +++ * @param tag +++ * the kind of field or method designated by this Handle. Must be +++ * {@link Opcodes#H_GETFIELD}, {@link Opcodes#H_GETSTATIC}, +++ * {@link Opcodes#H_PUTFIELD}, {@link Opcodes#H_PUTSTATIC}, +++ * {@link Opcodes#H_INVOKEVIRTUAL}, +++ * {@link Opcodes#H_INVOKESTATIC}, +++ * {@link Opcodes#H_INVOKESPECIAL}, +++ * {@link Opcodes#H_NEWINVOKESPECIAL} or +++ * {@link Opcodes#H_INVOKEINTERFACE}. +++ * @param owner +++ * the internal name of the class that owns the field or method +++ * designated by this handle. +++ * @param name +++ * the name of the field or method designated by this handle. +++ * @param desc +++ * the descriptor of the field or method designated by this +++ * handle. +++ */ +++ public Handle(int tag, String owner, String name, String desc) { +++ this.tag = tag; +++ this.owner = owner; +++ this.name = name; +++ this.desc = desc; +++ } +++ +++ /** +++ * Returns the kind of field or method designated by this handle. +++ * +++ * @return {@link Opcodes#H_GETFIELD}, {@link Opcodes#H_GETSTATIC}, +++ * {@link Opcodes#H_PUTFIELD}, {@link Opcodes#H_PUTSTATIC}, +++ * {@link Opcodes#H_INVOKEVIRTUAL}, {@link Opcodes#H_INVOKESTATIC}, +++ * {@link Opcodes#H_INVOKESPECIAL}, +++ * {@link Opcodes#H_NEWINVOKESPECIAL} or +++ * {@link Opcodes#H_INVOKEINTERFACE}. +++ */ +++ public int getTag() { +++ return tag; +++ } +++ +++ /** +++ * Returns the internal name of the class that owns the field or method +++ * designated by this handle. +++ * +++ * @return the internal name of the class that owns the field or method +++ * designated by this handle. +++ */ +++ public String getOwner() { +++ return owner; +++ } +++ +++ /** +++ * Returns the name of the field or method designated by this handle. +++ * +++ * @return the name of the field or method designated by this handle. +++ */ +++ public String getName() { +++ return name; +++ } +++ +++ /** +++ * Returns the descriptor of the field or method designated by this handle. +++ * +++ * @return the descriptor of the field or method designated by this handle. +++ */ +++ public String getDesc() { +++ return desc; +++ } +++ +++ @Override +++ public boolean equals(Object obj) { +++ if (obj == this) { +++ return true; +++ } +++ if (!(obj instanceof Handle)) { +++ return false; +++ } +++ Handle h = (Handle) obj; +++ return tag == h.tag && owner.equals(h.owner) && name.equals(h.name) +++ && desc.equals(h.desc); +++ } +++ +++ @Override +++ public int hashCode() { +++ return tag + owner.hashCode() * name.hashCode() * desc.hashCode(); +++ } +++ +++ /** +++ * Returns the textual representation of this handle. The textual +++ * representation is: +++ * +++ *
+++     * owner '.' name desc ' ' '(' tag ')'
+++     * 
+++ * +++ * . As this format is unambiguous, it can be parsed if necessary. +++ */ +++ @Override +++ public String toString() { +++ return owner + '.' + name + desc + " (" + tag + ')'; +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Handler.java b/contrib/asm/src/org/objectweb/asm/Handler.java ++new file mode 100644 ++index 0000000..b24591d ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Handler.java ++@@ -0,0 +1,121 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * Information about an exception handler block. +++ * +++ * @author Eric Bruneton +++ */ +++class Handler { +++ +++ /** +++ * Beginning of the exception handler's scope (inclusive). +++ */ +++ Label start; +++ +++ /** +++ * End of the exception handler's scope (exclusive). +++ */ +++ Label end; +++ +++ /** +++ * Beginning of the exception handler's code. +++ */ +++ Label handler; +++ +++ /** +++ * Internal name of the type of exceptions handled by this handler, or +++ * null to catch any exceptions. +++ */ +++ String desc; +++ +++ /** +++ * Constant pool index of the internal name of the type of exceptions +++ * handled by this handler, or 0 to catch any exceptions. +++ */ +++ int type; +++ +++ /** +++ * Next exception handler block info. +++ */ +++ Handler next; +++ +++ /** +++ * Removes the range between start and end from the given exception +++ * handlers. +++ * +++ * @param h +++ * an exception handler list. +++ * @param start +++ * the start of the range to be removed. +++ * @param end +++ * the end of the range to be removed. Maybe null. +++ * @return the exception handler list with the start-end range removed. +++ */ +++ static Handler remove(Handler h, Label start, Label end) { +++ if (h == null) { +++ return null; +++ } else { +++ h.next = remove(h.next, start, end); +++ } +++ int hstart = h.start.position; +++ int hend = h.end.position; +++ int s = start.position; +++ int e = end == null ? Integer.MAX_VALUE : end.position; +++ // if [hstart,hend[ and [s,e[ intervals intersect... +++ if (s < hend && e > hstart) { +++ if (s <= hstart) { +++ if (e >= hend) { +++ // [hstart,hend[ fully included in [s,e[, h removed +++ h = h.next; +++ } else { +++ // [hstart,hend[ minus [s,e[ = [e,hend[ +++ h.start = end; +++ } +++ } else if (e >= hend) { +++ // [hstart,hend[ minus [s,e[ = [hstart,s[ +++ h.end = start; +++ } else { +++ // [hstart,hend[ minus [s,e[ = [hstart,s[ + [e,hend[ +++ Handler g = new Handler(); +++ g.start = end; +++ g.end = h.end; +++ g.handler = h.handler; +++ g.desc = h.desc; +++ g.type = h.type; +++ g.next = h.next; +++ h.end = start; +++ h.next = g; +++ } +++ } +++ return h; +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Item.java b/contrib/asm/src/org/objectweb/asm/Item.java ++new file mode 100644 ++index 0000000..917524d ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Item.java ++@@ -0,0 +1,313 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A constant pool item. Constant pool items can be created with the 'newXXX' +++ * methods in the {@link ClassWriter} class. +++ * +++ * @author Eric Bruneton +++ */ +++final class Item { +++ +++ /** +++ * Index of this item in the constant pool. +++ */ +++ int index; +++ +++ /** +++ * Type of this constant pool item. A single class is used to represent all +++ * constant pool item types, in order to minimize the bytecode size of this +++ * package. The value of this field is one of {@link ClassWriter#INT}, +++ * {@link ClassWriter#LONG}, {@link ClassWriter#FLOAT}, +++ * {@link ClassWriter#DOUBLE}, {@link ClassWriter#UTF8}, +++ * {@link ClassWriter#STR}, {@link ClassWriter#CLASS}, +++ * {@link ClassWriter#NAME_TYPE}, {@link ClassWriter#FIELD}, +++ * {@link ClassWriter#METH}, {@link ClassWriter#IMETH}, +++ * {@link ClassWriter#MTYPE}, {@link ClassWriter#INDY}. +++ * +++ * MethodHandle constant 9 variations are stored using a range of 9 values +++ * from {@link ClassWriter#HANDLE_BASE} + 1 to +++ * {@link ClassWriter#HANDLE_BASE} + 9. +++ * +++ * Special Item types are used for Items that are stored in the ClassWriter +++ * {@link ClassWriter#typeTable}, instead of the constant pool, in order to +++ * avoid clashes with normal constant pool items in the ClassWriter constant +++ * pool's hash table. These special item types are +++ * {@link ClassWriter#TYPE_NORMAL}, {@link ClassWriter#TYPE_UNINIT} and +++ * {@link ClassWriter#TYPE_MERGED}. +++ */ +++ int type; +++ +++ /** +++ * Value of this item, for an integer item. +++ */ +++ int intVal; +++ +++ /** +++ * Value of this item, for a long item. +++ */ +++ long longVal; +++ +++ /** +++ * First part of the value of this item, for items that do not hold a +++ * primitive value. +++ */ +++ String strVal1; +++ +++ /** +++ * Second part of the value of this item, for items that do not hold a +++ * primitive value. +++ */ +++ String strVal2; +++ +++ /** +++ * Third part of the value of this item, for items that do not hold a +++ * primitive value. +++ */ +++ String strVal3; +++ +++ /** +++ * The hash code value of this constant pool item. +++ */ +++ int hashCode; +++ +++ /** +++ * Link to another constant pool item, used for collision lists in the +++ * constant pool's hash table. +++ */ +++ Item next; +++ +++ /** +++ * Constructs an uninitialized {@link Item}. +++ */ +++ Item() { +++ } +++ +++ /** +++ * Constructs an uninitialized {@link Item} for constant pool element at +++ * given position. +++ * +++ * @param index +++ * index of the item to be constructed. +++ */ +++ Item(final int index) { +++ this.index = index; +++ } +++ +++ /** +++ * Constructs a copy of the given item. +++ * +++ * @param index +++ * index of the item to be constructed. +++ * @param i +++ * the item that must be copied into the item to be constructed. +++ */ +++ Item(final int index, final Item i) { +++ this.index = index; +++ type = i.type; +++ intVal = i.intVal; +++ longVal = i.longVal; +++ strVal1 = i.strVal1; +++ strVal2 = i.strVal2; +++ strVal3 = i.strVal3; +++ hashCode = i.hashCode; +++ } +++ +++ /** +++ * Sets this item to an integer item. +++ * +++ * @param intVal +++ * the value of this item. +++ */ +++ void set(final int intVal) { +++ this.type = ClassWriter.INT; +++ this.intVal = intVal; +++ this.hashCode = 0x7FFFFFFF & (type + intVal); +++ } +++ +++ /** +++ * Sets this item to a long item. +++ * +++ * @param longVal +++ * the value of this item. +++ */ +++ void set(final long longVal) { +++ this.type = ClassWriter.LONG; +++ this.longVal = longVal; +++ this.hashCode = 0x7FFFFFFF & (type + (int) longVal); +++ } +++ +++ /** +++ * Sets this item to a float item. +++ * +++ * @param floatVal +++ * the value of this item. +++ */ +++ void set(final float floatVal) { +++ this.type = ClassWriter.FLOAT; +++ this.intVal = Float.floatToRawIntBits(floatVal); +++ this.hashCode = 0x7FFFFFFF & (type + (int) floatVal); +++ } +++ +++ /** +++ * Sets this item to a double item. +++ * +++ * @param doubleVal +++ * the value of this item. +++ */ +++ void set(final double doubleVal) { +++ this.type = ClassWriter.DOUBLE; +++ this.longVal = Double.doubleToRawLongBits(doubleVal); +++ this.hashCode = 0x7FFFFFFF & (type + (int) doubleVal); +++ } +++ +++ /** +++ * Sets this item to an item that do not hold a primitive value. +++ * +++ * @param type +++ * the type of this item. +++ * @param strVal1 +++ * first part of the value of this item. +++ * @param strVal2 +++ * second part of the value of this item. +++ * @param strVal3 +++ * third part of the value of this item. +++ */ +++ @SuppressWarnings("fallthrough") +++ void set(final int type, final String strVal1, final String strVal2, +++ final String strVal3) { +++ this.type = type; +++ this.strVal1 = strVal1; +++ this.strVal2 = strVal2; +++ this.strVal3 = strVal3; +++ switch (type) { +++ case ClassWriter.CLASS: +++ this.intVal = 0; // intVal of a class must be zero, see visitInnerClass +++ case ClassWriter.UTF8: +++ case ClassWriter.STR: +++ case ClassWriter.MTYPE: +++ case ClassWriter.TYPE_NORMAL: +++ hashCode = 0x7FFFFFFF & (type + strVal1.hashCode()); +++ return; +++ case ClassWriter.NAME_TYPE: { +++ hashCode = 0x7FFFFFFF & (type + strVal1.hashCode() +++ * strVal2.hashCode()); +++ return; +++ } +++ // ClassWriter.FIELD: +++ // ClassWriter.METH: +++ // ClassWriter.IMETH: +++ // ClassWriter.HANDLE_BASE + 1..9 +++ default: +++ hashCode = 0x7FFFFFFF & (type + strVal1.hashCode() +++ * strVal2.hashCode() * strVal3.hashCode()); +++ } +++ } +++ +++ /** +++ * Sets the item to an InvokeDynamic item. +++ * +++ * @param name +++ * invokedynamic's name. +++ * @param desc +++ * invokedynamic's desc. +++ * @param bsmIndex +++ * zero based index into the class attribute BootrapMethods. +++ */ +++ void set(String name, String desc, int bsmIndex) { +++ this.type = ClassWriter.INDY; +++ this.longVal = bsmIndex; +++ this.strVal1 = name; +++ this.strVal2 = desc; +++ this.hashCode = 0x7FFFFFFF & (ClassWriter.INDY + bsmIndex +++ * strVal1.hashCode() * strVal2.hashCode()); +++ } +++ +++ /** +++ * Sets the item to a BootstrapMethod item. +++ * +++ * @param position +++ * position in byte in the class attribute BootrapMethods. +++ * @param hashCode +++ * hashcode of the item. This hashcode is processed from the +++ * hashcode of the bootstrap method and the hashcode of all +++ * bootstrap arguments. +++ */ +++ void set(int position, int hashCode) { +++ this.type = ClassWriter.BSM; +++ this.intVal = position; +++ this.hashCode = hashCode; +++ } +++ +++ /** +++ * Indicates if the given item is equal to this one. This method assumes +++ * that the two items have the same {@link #type}. +++ * +++ * @param i +++ * the item to be compared to this one. Both items must have the +++ * same {@link #type}. +++ * @return true if the given item if equal to this one, +++ * false otherwise. +++ */ +++ boolean isEqualTo(final Item i) { +++ switch (type) { +++ case ClassWriter.UTF8: +++ case ClassWriter.STR: +++ case ClassWriter.CLASS: +++ case ClassWriter.MTYPE: +++ case ClassWriter.TYPE_NORMAL: +++ return i.strVal1.equals(strVal1); +++ case ClassWriter.TYPE_MERGED: +++ case ClassWriter.LONG: +++ case ClassWriter.DOUBLE: +++ return i.longVal == longVal; +++ case ClassWriter.INT: +++ case ClassWriter.FLOAT: +++ return i.intVal == intVal; +++ case ClassWriter.TYPE_UNINIT: +++ return i.intVal == intVal && i.strVal1.equals(strVal1); +++ case ClassWriter.NAME_TYPE: +++ return i.strVal1.equals(strVal1) && i.strVal2.equals(strVal2); +++ case ClassWriter.INDY: { +++ return i.longVal == longVal && i.strVal1.equals(strVal1) +++ && i.strVal2.equals(strVal2); +++ } +++ // case ClassWriter.FIELD: +++ // case ClassWriter.METH: +++ // case ClassWriter.IMETH: +++ // case ClassWriter.HANDLE_BASE + 1..9 +++ default: +++ return i.strVal1.equals(strVal1) && i.strVal2.equals(strVal2) +++ && i.strVal3.equals(strVal3); +++ } +++ } +++ +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Label.java b/contrib/asm/src/org/objectweb/asm/Label.java ++new file mode 100644 ++index 0000000..6bca6fb ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Label.java ++@@ -0,0 +1,565 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A label represents a position in the bytecode of a method. Labels are used +++ * for jump, goto, and switch instructions, and for try catch blocks. A label +++ * designates the instruction that is just after. Note however that there +++ * can be other elements between a label and the instruction it designates (such +++ * as other labels, stack map frames, line numbers, etc.). +++ * +++ * @author Eric Bruneton +++ */ +++public class Label { +++ +++ /** +++ * Indicates if this label is only used for debug attributes. Such a label +++ * is not the start of a basic block, the target of a jump instruction, or +++ * an exception handler. It can be safely ignored in control flow graph +++ * analysis algorithms (for optimization purposes). +++ */ +++ static final int DEBUG = 1; +++ +++ /** +++ * Indicates if the position of this label is known. +++ */ +++ static final int RESOLVED = 2; +++ +++ /** +++ * Indicates if this label has been updated, after instruction resizing. +++ */ +++ static final int RESIZED = 4; +++ +++ /** +++ * Indicates if this basic block has been pushed in the basic block stack. +++ * See {@link MethodWriter#visitMaxs visitMaxs}. +++ */ +++ static final int PUSHED = 8; +++ +++ /** +++ * Indicates if this label is the target of a jump instruction, or the start +++ * of an exception handler. +++ */ +++ static final int TARGET = 16; +++ +++ /** +++ * Indicates if a stack map frame must be stored for this label. +++ */ +++ static final int STORE = 32; +++ +++ /** +++ * Indicates if this label corresponds to a reachable basic block. +++ */ +++ static final int REACHABLE = 64; +++ +++ /** +++ * Indicates if this basic block ends with a JSR instruction. +++ */ +++ static final int JSR = 128; +++ +++ /** +++ * Indicates if this basic block ends with a RET instruction. +++ */ +++ static final int RET = 256; +++ +++ /** +++ * Indicates if this basic block is the start of a subroutine. +++ */ +++ static final int SUBROUTINE = 512; +++ +++ /** +++ * Indicates if this subroutine basic block has been visited by a +++ * visitSubroutine(null, ...) call. +++ */ +++ static final int VISITED = 1024; +++ +++ /** +++ * Indicates if this subroutine basic block has been visited by a +++ * visitSubroutine(!null, ...) call. +++ */ +++ static final int VISITED2 = 2048; +++ +++ /** +++ * Field used to associate user information to a label. Warning: this field +++ * is used by the ASM tree package. In order to use it with the ASM tree +++ * package you must override the +++ * {@link org.objectweb.asm.tree.MethodNode#getLabelNode} method. +++ */ +++ public Object info; +++ +++ /** +++ * Flags that indicate the status of this label. +++ * +++ * @see #DEBUG +++ * @see #RESOLVED +++ * @see #RESIZED +++ * @see #PUSHED +++ * @see #TARGET +++ * @see #STORE +++ * @see #REACHABLE +++ * @see #JSR +++ * @see #RET +++ */ +++ int status; +++ +++ /** +++ * The line number corresponding to this label, if known. If there are +++ * several lines, each line is stored in a separate label, all linked via +++ * their next field (these links are created in ClassReader and removed just +++ * before visitLabel is called, so that this does not impact the rest of the +++ * code). +++ */ +++ int line; +++ +++ /** +++ * The position of this label in the code, if known. +++ */ +++ int position; +++ +++ /** +++ * Number of forward references to this label, times two. +++ */ +++ private int referenceCount; +++ +++ /** +++ * Informations about forward references. Each forward reference is +++ * described by two consecutive integers in this array: the first one is the +++ * position of the first byte of the bytecode instruction that contains the +++ * forward reference, while the second is the position of the first byte of +++ * the forward reference itself. In fact the sign of the first integer +++ * indicates if this reference uses 2 or 4 bytes, and its absolute value +++ * gives the position of the bytecode instruction. This array is also used +++ * as a bitset to store the subroutines to which a basic block belongs. This +++ * information is needed in {@linked MethodWriter#visitMaxs}, after all +++ * forward references have been resolved. Hence the same array can be used +++ * for both purposes without problems. +++ */ +++ private int[] srcAndRefPositions; +++ +++ // ------------------------------------------------------------------------ +++ +++ /* +++ * Fields for the control flow and data flow graph analysis algorithms (used +++ * to compute the maximum stack size or the stack map frames). A control +++ * flow graph contains one node per "basic block", and one edge per "jump" +++ * from one basic block to another. Each node (i.e., each basic block) is +++ * represented by the Label object that corresponds to the first instruction +++ * of this basic block. Each node also stores the list of its successors in +++ * the graph, as a linked list of Edge objects. +++ * +++ * The control flow analysis algorithms used to compute the maximum stack +++ * size or the stack map frames are similar and use two steps. The first +++ * step, during the visit of each instruction, builds information about the +++ * state of the local variables and the operand stack at the end of each +++ * basic block, called the "output frame", relatively to the frame +++ * state at the beginning of the basic block, which is called the "input +++ * frame", and which is unknown during this step. The second step, in +++ * {@link MethodWriter#visitMaxs}, is a fix point algorithm that computes +++ * information about the input frame of each basic block, from the input +++ * state of the first basic block (known from the method signature), and by +++ * the using the previously computed relative output frames. +++ * +++ * The algorithm used to compute the maximum stack size only computes the +++ * relative output and absolute input stack heights, while the algorithm +++ * used to compute stack map frames computes relative output frames and +++ * absolute input frames. +++ */ +++ +++ /** +++ * Start of the output stack relatively to the input stack. The exact +++ * semantics of this field depends on the algorithm that is used. +++ * +++ * When only the maximum stack size is computed, this field is the number of +++ * elements in the input stack. +++ * +++ * When the stack map frames are completely computed, this field is the +++ * offset of the first output stack element relatively to the top of the +++ * input stack. This offset is always negative or null. A null offset means +++ * that the output stack must be appended to the input stack. A -n offset +++ * means that the first n output stack elements must replace the top n input +++ * stack elements, and that the other elements must be appended to the input +++ * stack. +++ */ +++ int inputStackTop; +++ +++ /** +++ * Maximum height reached by the output stack, relatively to the top of the +++ * input stack. This maximum is always positive or null. +++ */ +++ int outputStackMax; +++ +++ /** +++ * Information about the input and output stack map frames of this basic +++ * block. This field is only used when {@link ClassWriter#COMPUTE_FRAMES} +++ * option is used. +++ */ +++ Frame frame; +++ +++ /** +++ * The successor of this label, in the order they are visited. This linked +++ * list does not include labels used for debug info only. If +++ * {@link ClassWriter#COMPUTE_FRAMES} option is used then, in addition, it +++ * does not contain successive labels that denote the same bytecode position +++ * (in this case only the first label appears in this list). +++ */ +++ Label successor; +++ +++ /** +++ * The successors of this node in the control flow graph. These successors +++ * are stored in a linked list of {@link Edge Edge} objects, linked to each +++ * other by their {@link Edge#next} field. +++ */ +++ Edge successors; +++ +++ /** +++ * The next basic block in the basic block stack. This stack is used in the +++ * main loop of the fix point algorithm used in the second step of the +++ * control flow analysis algorithms. It is also used in +++ * {@link #visitSubroutine} to avoid using a recursive method, and in +++ * ClassReader to temporarily store multiple source lines for a label. +++ * +++ * @see MethodWriter#visitMaxs +++ */ +++ Label next; +++ +++ // ------------------------------------------------------------------------ +++ // Constructor +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Constructs a new label. +++ */ +++ public Label() { +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Methods to compute offsets and to manage forward references +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the offset corresponding to this label. This offset is computed +++ * from the start of the method's bytecode. This method is intended for +++ * {@link Attribute} sub classes, and is normally not needed by class +++ * generators or adapters. +++ * +++ * @return the offset corresponding to this label. +++ * @throws IllegalStateException +++ * if this label is not resolved yet. +++ */ +++ public int getOffset() { +++ if ((status & RESOLVED) == 0) { +++ throw new IllegalStateException( +++ "Label offset position has not been resolved yet"); +++ } +++ return position; +++ } +++ +++ /** +++ * Puts a reference to this label in the bytecode of a method. If the +++ * position of the label is known, the offset is computed and written +++ * directly. Otherwise, a null offset is written and a new forward reference +++ * is declared for this label. +++ * +++ * @param owner +++ * the code writer that calls this method. +++ * @param out +++ * the bytecode of the method. +++ * @param source +++ * the position of first byte of the bytecode instruction that +++ * contains this label. +++ * @param wideOffset +++ * true if the reference must be stored in 4 bytes, or +++ * false if it must be stored with 2 bytes. +++ * @throws IllegalArgumentException +++ * if this label has not been created by the given code writer. +++ */ +++ void put(final MethodWriter owner, final ByteVector out, final int source, +++ final boolean wideOffset) { +++ if ((status & RESOLVED) == 0) { +++ if (wideOffset) { +++ addReference(-1 - source, out.length); +++ out.putInt(-1); +++ } else { +++ addReference(source, out.length); +++ out.putShort(-1); +++ } +++ } else { +++ if (wideOffset) { +++ out.putInt(position - source); +++ } else { +++ out.putShort(position - source); +++ } +++ } +++ } +++ +++ /** +++ * Adds a forward reference to this label. This method must be called only +++ * for a true forward reference, i.e. only if this label is not resolved +++ * yet. For backward references, the offset of the reference can be, and +++ * must be, computed and stored directly. +++ * +++ * @param sourcePosition +++ * the position of the referencing instruction. This position +++ * will be used to compute the offset of this forward reference. +++ * @param referencePosition +++ * the position where the offset for this forward reference must +++ * be stored. +++ */ +++ private void addReference(final int sourcePosition, +++ final int referencePosition) { +++ if (srcAndRefPositions == null) { +++ srcAndRefPositions = new int[6]; +++ } +++ if (referenceCount >= srcAndRefPositions.length) { +++ int[] a = new int[srcAndRefPositions.length + 6]; +++ System.arraycopy(srcAndRefPositions, 0, a, 0, +++ srcAndRefPositions.length); +++ srcAndRefPositions = a; +++ } +++ srcAndRefPositions[referenceCount++] = sourcePosition; +++ srcAndRefPositions[referenceCount++] = referencePosition; +++ } +++ +++ /** +++ * Resolves all forward references to this label. This method must be called +++ * when this label is added to the bytecode of the method, i.e. when its +++ * position becomes known. This method fills in the blanks that where left +++ * in the bytecode by each forward reference previously added to this label. +++ * +++ * @param owner +++ * the code writer that calls this method. +++ * @param position +++ * the position of this label in the bytecode. +++ * @param data +++ * the bytecode of the method. +++ * @return true if a blank that was left for this label was to +++ * small to store the offset. In such a case the corresponding jump +++ * instruction is replaced with a pseudo instruction (using unused +++ * opcodes) using an unsigned two bytes offset. These pseudo +++ * instructions will need to be replaced with true instructions with +++ * wider offsets (4 bytes instead of 2). This is done in +++ * {@link MethodWriter#resizeInstructions}. +++ * @throws IllegalArgumentException +++ * if this label has already been resolved, or if it has not +++ * been created by the given code writer. +++ */ +++ boolean resolve(final MethodWriter owner, final int position, +++ final byte[] data) { +++ boolean needUpdate = false; +++ this.status |= RESOLVED; +++ this.position = position; +++ int i = 0; +++ while (i < referenceCount) { +++ int source = srcAndRefPositions[i++]; +++ int reference = srcAndRefPositions[i++]; +++ int offset; +++ if (source >= 0) { +++ offset = position - source; +++ if (offset < Short.MIN_VALUE || offset > Short.MAX_VALUE) { +++ /* +++ * changes the opcode of the jump instruction, in order to +++ * be able to find it later (see resizeInstructions in +++ * MethodWriter). These temporary opcodes are similar to +++ * jump instruction opcodes, except that the 2 bytes offset +++ * is unsigned (and can therefore represent values from 0 to +++ * 65535, which is sufficient since the size of a method is +++ * limited to 65535 bytes). +++ */ +++ int opcode = data[reference - 1] & 0xFF; +++ if (opcode <= Opcodes.JSR) { +++ // changes IFEQ ... JSR to opcodes 202 to 217 +++ data[reference - 1] = (byte) (opcode + 49); +++ } else { +++ // changes IFNULL and IFNONNULL to opcodes 218 and 219 +++ data[reference - 1] = (byte) (opcode + 20); +++ } +++ needUpdate = true; +++ } +++ data[reference++] = (byte) (offset >>> 8); +++ data[reference] = (byte) offset; +++ } else { +++ offset = position + source + 1; +++ data[reference++] = (byte) (offset >>> 24); +++ data[reference++] = (byte) (offset >>> 16); +++ data[reference++] = (byte) (offset >>> 8); +++ data[reference] = (byte) offset; +++ } +++ } +++ return needUpdate; +++ } +++ +++ /** +++ * Returns the first label of the series to which this label belongs. For an +++ * isolated label or for the first label in a series of successive labels, +++ * this method returns the label itself. For other labels it returns the +++ * first label of the series. +++ * +++ * @return the first label of the series to which this label belongs. +++ */ +++ Label getFirst() { +++ return !ClassReader.FRAMES || frame == null ? this : frame.owner; +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Methods related to subroutines +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns true is this basic block belongs to the given subroutine. +++ * +++ * @param id +++ * a subroutine id. +++ * @return true is this basic block belongs to the given subroutine. +++ */ +++ boolean inSubroutine(final long id) { +++ if ((status & Label.VISITED) != 0) { +++ return (srcAndRefPositions[(int) (id >>> 32)] & (int) id) != 0; +++ } +++ return false; +++ } +++ +++ /** +++ * Returns true if this basic block and the given one belong to a common +++ * subroutine. +++ * +++ * @param block +++ * another basic block. +++ * @return true if this basic block and the given one belong to a common +++ * subroutine. +++ */ +++ boolean inSameSubroutine(final Label block) { +++ if ((status & VISITED) == 0 || (block.status & VISITED) == 0) { +++ return false; +++ } +++ for (int i = 0; i < srcAndRefPositions.length; ++i) { +++ if ((srcAndRefPositions[i] & block.srcAndRefPositions[i]) != 0) { +++ return true; +++ } +++ } +++ return false; +++ } +++ +++ /** +++ * Marks this basic block as belonging to the given subroutine. +++ * +++ * @param id +++ * a subroutine id. +++ * @param nbSubroutines +++ * the total number of subroutines in the method. +++ */ +++ void addToSubroutine(final long id, final int nbSubroutines) { +++ if ((status & VISITED) == 0) { +++ status |= VISITED; +++ srcAndRefPositions = new int[nbSubroutines / 32 + 1]; +++ } +++ srcAndRefPositions[(int) (id >>> 32)] |= (int) id; +++ } +++ +++ /** +++ * Finds the basic blocks that belong to a given subroutine, and marks these +++ * blocks as belonging to this subroutine. This method follows the control +++ * flow graph to find all the blocks that are reachable from the current +++ * block WITHOUT following any JSR target. +++ * +++ * @param JSR +++ * a JSR block that jumps to this subroutine. If this JSR is not +++ * null it is added to the successor of the RET blocks found in +++ * the subroutine. +++ * @param id +++ * the id of this subroutine. +++ * @param nbSubroutines +++ * the total number of subroutines in the method. +++ */ +++ void visitSubroutine(final Label JSR, final long id, final int nbSubroutines) { +++ // user managed stack of labels, to avoid using a recursive method +++ // (recursivity can lead to stack overflow with very large methods) +++ Label stack = this; +++ while (stack != null) { +++ // removes a label l from the stack +++ Label l = stack; +++ stack = l.next; +++ l.next = null; +++ +++ if (JSR != null) { +++ if ((l.status & VISITED2) != 0) { +++ continue; +++ } +++ l.status |= VISITED2; +++ // adds JSR to the successors of l, if it is a RET block +++ if ((l.status & RET) != 0) { +++ if (!l.inSameSubroutine(JSR)) { +++ Edge e = new Edge(); +++ e.info = l.inputStackTop; +++ e.successor = JSR.successors.successor; +++ e.next = l.successors; +++ l.successors = e; +++ } +++ } +++ } else { +++ // if the l block already belongs to subroutine 'id', continue +++ if (l.inSubroutine(id)) { +++ continue; +++ } +++ // marks the l block as belonging to subroutine 'id' +++ l.addToSubroutine(id, nbSubroutines); +++ } +++ // pushes each successor of l on the stack, except JSR targets +++ Edge e = l.successors; +++ while (e != null) { +++ // if the l block is a JSR block, then 'l.successors.next' leads +++ // to the JSR target (see {@link #visitJumpInsn}) and must +++ // therefore not be followed +++ if ((l.status & Label.JSR) == 0 || e != l.successors.next) { +++ // pushes e.successor on the stack if it not already added +++ if (e.successor.next == null) { +++ e.successor.next = stack; +++ stack = e.successor; +++ } +++ } +++ e = e.next; +++ } +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Overriden Object methods +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns a string representation of this label. +++ * +++ * @return a string representation of this label. +++ */ +++ @Override +++ public String toString() { +++ return "L" + System.identityHashCode(this); +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/MethodVisitor.java b/contrib/asm/src/org/objectweb/asm/MethodVisitor.java ++new file mode 100644 ++index 0000000..f0927e8 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/MethodVisitor.java ++@@ -0,0 +1,881 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A visitor to visit a Java method. The methods of this class must be called in +++ * the following order: ( visitParameter )* [ +++ * visitAnnotationDefault ] ( visitAnnotation | +++ * visitParameterAnnotation visitTypeAnnotation | +++ * visitAttribute )* [ visitCode ( visitFrame | +++ * visitXInsn | visitLabel | +++ * visitInsnAnnotation | visitTryCatchBlock | +++ * visitTryCatchAnnotation | visitLocalVariable | +++ * visitLocalVariableAnnotation | visitLineNumber )* +++ * visitMaxs ] visitEnd. In addition, the +++ * visitXInsn and visitLabel methods must be called in +++ * the sequential order of the bytecode instructions of the visited code, +++ * visitInsnAnnotation must be called after the annotated +++ * instruction, visitTryCatchBlock must be called before the +++ * labels passed as arguments have been visited, +++ * visitTryCatchBlockAnnotation must be called after the +++ * corresponding try catch block has been visited, and the +++ * visitLocalVariable, visitLocalVariableAnnotation and +++ * visitLineNumber methods must be called after the labels +++ * passed as arguments have been visited. +++ * +++ * @author Eric Bruneton +++ */ +++public abstract class MethodVisitor { +++ +++ /** +++ * The ASM API version implemented by this visitor. The value of this field +++ * must be one of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ */ +++ protected final int api; +++ +++ /** +++ * The method visitor to which this visitor must delegate method calls. May +++ * be null. +++ */ +++ protected MethodVisitor mv; +++ +++ /** +++ * Constructs a new {@link MethodVisitor}. +++ * +++ * @param api +++ * the ASM API version implemented by this visitor. Must be one +++ * of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ */ +++ public MethodVisitor(final int api) { +++ this(api, null); +++ } +++ +++ /** +++ * Constructs a new {@link MethodVisitor}. +++ * +++ * @param api +++ * the ASM API version implemented by this visitor. Must be one +++ * of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ * @param mv +++ * the method visitor to which this visitor must delegate method +++ * calls. May be null. +++ */ +++ public MethodVisitor(final int api, final MethodVisitor mv) { +++ if (api != Opcodes.ASM4 && api != Opcodes.ASM5) { +++ throw new IllegalArgumentException(); +++ } +++ this.api = api; +++ this.mv = mv; +++ } +++ +++ // ------------------------------------------------------------------------- +++ // Parameters, annotations and non standard attributes +++ // ------------------------------------------------------------------------- +++ +++ /** +++ * Visits a parameter of this method. +++ * +++ * @param name +++ * parameter name or null if none is provided. +++ * @param access +++ * the parameter's access flags, only ACC_FINAL, +++ * ACC_SYNTHETIC or/and ACC_MANDATED are +++ * allowed (see {@link Opcodes}). +++ */ +++ public void visitParameter(String name, int access) { +++ if (api < Opcodes.ASM5) { +++ throw new RuntimeException(); +++ } +++ if (mv != null) { +++ mv.visitParameter(name, access); +++ } +++ } +++ +++ /** +++ * Visits the default value of this annotation interface method. +++ * +++ * @return a visitor to the visit the actual default value of this +++ * annotation interface method, or null if this visitor is +++ * not interested in visiting this default value. The 'name' +++ * parameters passed to the methods of this annotation visitor are +++ * ignored. Moreover, exacly one visit method must be called on this +++ * annotation visitor, followed by visitEnd. +++ */ +++ public AnnotationVisitor visitAnnotationDefault() { +++ if (mv != null) { +++ return mv.visitAnnotationDefault(); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits an annotation of this method. +++ * +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * true if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or null if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitAnnotation(String desc, boolean visible) { +++ if (mv != null) { +++ return mv.visitAnnotation(desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits an annotation on a type in the method signature. +++ * +++ * @param typeRef +++ * a reference to the annotated type. The sort of this type +++ * reference must be {@link TypeReference#METHOD_TYPE_PARAMETER +++ * METHOD_TYPE_PARAMETER}, +++ * {@link TypeReference#METHOD_TYPE_PARAMETER_BOUND +++ * METHOD_TYPE_PARAMETER_BOUND}, +++ * {@link TypeReference#METHOD_RETURN METHOD_RETURN}, +++ * {@link TypeReference#METHOD_RECEIVER METHOD_RECEIVER}, +++ * {@link TypeReference#METHOD_FORMAL_PARAMETER +++ * METHOD_FORMAL_PARAMETER} or {@link TypeReference#THROWS +++ * THROWS}. See {@link TypeReference}. +++ * @param typePath +++ * the path to the annotated type argument, wildcard bound, array +++ * element type, or static inner type within 'typeRef'. May be +++ * null if the annotation targets 'typeRef' as a whole. +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * true if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or null if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitTypeAnnotation(int typeRef, +++ TypePath typePath, String desc, boolean visible) { +++ if (api < Opcodes.ASM5) { +++ throw new RuntimeException(); +++ } +++ if (mv != null) { +++ return mv.visitTypeAnnotation(typeRef, typePath, desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits an annotation of a parameter this method. +++ * +++ * @param parameter +++ * the parameter index. +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * true if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or null if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitParameterAnnotation(int parameter, +++ String desc, boolean visible) { +++ if (mv != null) { +++ return mv.visitParameterAnnotation(parameter, desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits a non standard attribute of this method. +++ * +++ * @param attr +++ * an attribute. +++ */ +++ public void visitAttribute(Attribute attr) { +++ if (mv != null) { +++ mv.visitAttribute(attr); +++ } +++ } +++ +++ /** +++ * Starts the visit of the method's code, if any (i.e. non abstract method). +++ */ +++ public void visitCode() { +++ if (mv != null) { +++ mv.visitCode(); +++ } +++ } +++ +++ /** +++ * Visits the current state of the local variables and operand stack +++ * elements. This method must(*) be called just before any +++ * instruction i that follows an unconditional branch instruction +++ * such as GOTO or THROW, that is the target of a jump instruction, or that +++ * starts an exception handler block. The visited types must describe the +++ * values of the local variables and of the operand stack elements just +++ * before i is executed.
+++ *
+++ * (*) this is mandatory only for classes whose version is greater than or +++ * equal to {@link Opcodes#V1_6 V1_6}.
+++ *
+++ * The frames of a method must be given either in expanded form, or in +++ * compressed form (all frames must use the same format, i.e. you must not +++ * mix expanded and compressed frames within a single method): +++ *
    +++ *
  • In expanded form, all frames must have the F_NEW type.
  • +++ *
  • In compressed form, frames are basically "deltas" from the state of +++ * the previous frame: +++ *
      +++ *
    • {@link Opcodes#F_SAME} representing frame with exactly the same +++ * locals as the previous frame and with the empty stack.
    • +++ *
    • {@link Opcodes#F_SAME1} representing frame with exactly the same +++ * locals as the previous frame and with single value on the stack ( +++ * nStack is 1 and stack[0] contains value for the +++ * type of the stack item).
    • +++ *
    • {@link Opcodes#F_APPEND} representing frame with current locals are +++ * the same as the locals in the previous frame, except that additional +++ * locals are defined (nLocal is 1, 2 or 3 and +++ * local elements contains values representing added types).
    • +++ *
    • {@link Opcodes#F_CHOP} representing frame with current locals are the +++ * same as the locals in the previous frame, except that the last 1-3 locals +++ * are absent and with the empty stack (nLocals is 1, 2 or 3).
    • +++ *
    • {@link Opcodes#F_FULL} representing complete frame data.
    • +++ *
    +++ *
  • +++ *
+++ *
+++ * In both cases the first frame, corresponding to the method's parameters +++ * and access flags, is implicit and must not be visited. Also, it is +++ * illegal to visit two or more frames for the same code location (i.e., at +++ * least one instruction must be visited between two calls to visitFrame). +++ * +++ * @param type +++ * the type of this stack map frame. Must be +++ * {@link Opcodes#F_NEW} for expanded frames, or +++ * {@link Opcodes#F_FULL}, {@link Opcodes#F_APPEND}, +++ * {@link Opcodes#F_CHOP}, {@link Opcodes#F_SAME} or +++ * {@link Opcodes#F_APPEND}, {@link Opcodes#F_SAME1} for +++ * compressed frames. +++ * @param nLocal +++ * the number of local variables in the visited frame. +++ * @param local +++ * the local variable types in this frame. This array must not be +++ * modified. Primitive types are represented by +++ * {@link Opcodes#TOP}, {@link Opcodes#INTEGER}, +++ * {@link Opcodes#FLOAT}, {@link Opcodes#LONG}, +++ * {@link Opcodes#DOUBLE},{@link Opcodes#NULL} or +++ * {@link Opcodes#UNINITIALIZED_THIS} (long and double are +++ * represented by a single element). Reference types are +++ * represented by String objects (representing internal names), +++ * and uninitialized types by Label objects (this label +++ * designates the NEW instruction that created this uninitialized +++ * value). +++ * @param nStack +++ * the number of operand stack elements in the visited frame. +++ * @param stack +++ * the operand stack types in this frame. This array must not be +++ * modified. Its content has the same format as the "local" +++ * array. +++ * @throws IllegalStateException +++ * if a frame is visited just after another one, without any +++ * instruction between the two (unless this frame is a +++ * Opcodes#F_SAME frame, in which case it is silently ignored). +++ */ +++ public void visitFrame(int type, int nLocal, Object[] local, int nStack, +++ Object[] stack) { +++ if (mv != null) { +++ mv.visitFrame(type, nLocal, local, nStack, stack); +++ } +++ } +++ +++ // ------------------------------------------------------------------------- +++ // Normal instructions +++ // ------------------------------------------------------------------------- +++ +++ /** +++ * Visits a zero operand instruction. +++ * +++ * @param opcode +++ * the opcode of the instruction to be visited. This opcode is +++ * either NOP, ACONST_NULL, ICONST_M1, ICONST_0, ICONST_1, +++ * ICONST_2, ICONST_3, ICONST_4, ICONST_5, LCONST_0, LCONST_1, +++ * FCONST_0, FCONST_1, FCONST_2, DCONST_0, DCONST_1, IALOAD, +++ * LALOAD, FALOAD, DALOAD, AALOAD, BALOAD, CALOAD, SALOAD, +++ * IASTORE, LASTORE, FASTORE, DASTORE, AASTORE, BASTORE, CASTORE, +++ * SASTORE, POP, POP2, DUP, DUP_X1, DUP_X2, DUP2, DUP2_X1, +++ * DUP2_X2, SWAP, IADD, LADD, FADD, DADD, ISUB, LSUB, FSUB, DSUB, +++ * IMUL, LMUL, FMUL, DMUL, IDIV, LDIV, FDIV, DDIV, IREM, LREM, +++ * FREM, DREM, INEG, LNEG, FNEG, DNEG, ISHL, LSHL, ISHR, LSHR, +++ * IUSHR, LUSHR, IAND, LAND, IOR, LOR, IXOR, LXOR, I2L, I2F, I2D, +++ * L2I, L2F, L2D, F2I, F2L, F2D, D2I, D2L, D2F, I2B, I2C, I2S, +++ * LCMP, FCMPL, FCMPG, DCMPL, DCMPG, IRETURN, LRETURN, FRETURN, +++ * DRETURN, ARETURN, RETURN, ARRAYLENGTH, ATHROW, MONITORENTER, +++ * or MONITOREXIT. +++ */ +++ public void visitInsn(int opcode) { +++ if (mv != null) { +++ mv.visitInsn(opcode); +++ } +++ } +++ +++ /** +++ * Visits an instruction with a single int operand. +++ * +++ * @param opcode +++ * the opcode of the instruction to be visited. This opcode is +++ * either BIPUSH, SIPUSH or NEWARRAY. +++ * @param operand +++ * the operand of the instruction to be visited.
+++ * When opcode is BIPUSH, operand value should be between +++ * Byte.MIN_VALUE and Byte.MAX_VALUE.
+++ * When opcode is SIPUSH, operand value should be between +++ * Short.MIN_VALUE and Short.MAX_VALUE.
+++ * When opcode is NEWARRAY, operand value should be one of +++ * {@link Opcodes#T_BOOLEAN}, {@link Opcodes#T_CHAR}, +++ * {@link Opcodes#T_FLOAT}, {@link Opcodes#T_DOUBLE}, +++ * {@link Opcodes#T_BYTE}, {@link Opcodes#T_SHORT}, +++ * {@link Opcodes#T_INT} or {@link Opcodes#T_LONG}. +++ */ +++ public void visitIntInsn(int opcode, int operand) { +++ if (mv != null) { +++ mv.visitIntInsn(opcode, operand); +++ } +++ } +++ +++ /** +++ * Visits a local variable instruction. A local variable instruction is an +++ * instruction that loads or stores the value of a local variable. +++ * +++ * @param opcode +++ * the opcode of the local variable instruction to be visited. +++ * This opcode is either ILOAD, LLOAD, FLOAD, DLOAD, ALOAD, +++ * ISTORE, LSTORE, FSTORE, DSTORE, ASTORE or RET. +++ * @param var +++ * the operand of the instruction to be visited. This operand is +++ * the index of a local variable. +++ */ +++ public void visitVarInsn(int opcode, int var) { +++ if (mv != null) { +++ mv.visitVarInsn(opcode, var); +++ } +++ } +++ +++ /** +++ * Visits a type instruction. A type instruction is an instruction that +++ * takes the internal name of a class as parameter. +++ * +++ * @param opcode +++ * the opcode of the type instruction to be visited. This opcode +++ * is either NEW, ANEWARRAY, CHECKCAST or INSTANCEOF. +++ * @param type +++ * the operand of the instruction to be visited. This operand +++ * must be the internal name of an object or array class (see +++ * {@link Type#getInternalName() getInternalName}). +++ */ +++ public void visitTypeInsn(int opcode, String type) { +++ if (mv != null) { +++ mv.visitTypeInsn(opcode, type); +++ } +++ } +++ +++ /** +++ * Visits a field instruction. A field instruction is an instruction that +++ * loads or stores the value of a field of an object. +++ * +++ * @param opcode +++ * the opcode of the type instruction to be visited. This opcode +++ * is either GETSTATIC, PUTSTATIC, GETFIELD or PUTFIELD. +++ * @param owner +++ * the internal name of the field's owner class (see +++ * {@link Type#getInternalName() getInternalName}). +++ * @param name +++ * the field's name. +++ * @param desc +++ * the field's descriptor (see {@link Type Type}). +++ */ +++ public void visitFieldInsn(int opcode, String owner, String name, +++ String desc) { +++ if (mv != null) { +++ mv.visitFieldInsn(opcode, owner, name, desc); +++ } +++ } +++ +++ /** +++ * Visits a method instruction. A method instruction is an instruction that +++ * invokes a method. +++ * +++ * @param opcode +++ * the opcode of the type instruction to be visited. This opcode +++ * is either INVOKEVIRTUAL, INVOKESPECIAL, INVOKESTATIC or +++ * INVOKEINTERFACE. +++ * @param owner +++ * the internal name of the method's owner class (see +++ * {@link Type#getInternalName() getInternalName}). +++ * @param name +++ * the method's name. +++ * @param desc +++ * the method's descriptor (see {@link Type Type}). +++ */ +++ @Deprecated +++ public void visitMethodInsn(int opcode, String owner, String name, +++ String desc) { +++ if (api >= Opcodes.ASM5) { +++ boolean itf = opcode == Opcodes.INVOKEINTERFACE; +++ visitMethodInsn(opcode, owner, name, desc, itf); +++ return; +++ } +++ if (mv != null) { +++ mv.visitMethodInsn(opcode, owner, name, desc); +++ } +++ } +++ +++ /** +++ * Visits a method instruction. A method instruction is an instruction that +++ * invokes a method. +++ * +++ * @param opcode +++ * the opcode of the type instruction to be visited. This opcode +++ * is either INVOKEVIRTUAL, INVOKESPECIAL, INVOKESTATIC or +++ * INVOKEINTERFACE. +++ * @param owner +++ * the internal name of the method's owner class (see +++ * {@link Type#getInternalName() getInternalName}). +++ * @param name +++ * the method's name. +++ * @param desc +++ * the method's descriptor (see {@link Type Type}). +++ * @param itf +++ * if the method's owner class is an interface. +++ */ +++ public void visitMethodInsn(int opcode, String owner, String name, +++ String desc, boolean itf) { +++ if (api < Opcodes.ASM5) { +++ if (itf != (opcode == Opcodes.INVOKEINTERFACE)) { +++ throw new IllegalArgumentException( +++ "INVOKESPECIAL/STATIC on interfaces require ASM 5"); +++ } +++ visitMethodInsn(opcode, owner, name, desc); +++ return; +++ } +++ if (mv != null) { +++ mv.visitMethodInsn(opcode, owner, name, desc, itf); +++ } +++ } +++ +++ /** +++ * Visits an invokedynamic instruction. +++ * +++ * @param name +++ * the method's name. +++ * @param desc +++ * the method's descriptor (see {@link Type Type}). +++ * @param bsm +++ * the bootstrap method. +++ * @param bsmArgs +++ * the bootstrap method constant arguments. Each argument must be +++ * an {@link Integer}, {@link Float}, {@link Long}, +++ * {@link Double}, {@link String}, {@link Type} or {@link Handle} +++ * value. This method is allowed to modify the content of the +++ * array so a caller should expect that this array may change. +++ */ +++ public void visitInvokeDynamicInsn(String name, String desc, Handle bsm, +++ Object... bsmArgs) { +++ if (mv != null) { +++ mv.visitInvokeDynamicInsn(name, desc, bsm, bsmArgs); +++ } +++ } +++ +++ /** +++ * Visits a jump instruction. A jump instruction is an instruction that may +++ * jump to another instruction. +++ * +++ * @param opcode +++ * the opcode of the type instruction to be visited. This opcode +++ * is either IFEQ, IFNE, IFLT, IFGE, IFGT, IFLE, IF_ICMPEQ, +++ * IF_ICMPNE, IF_ICMPLT, IF_ICMPGE, IF_ICMPGT, IF_ICMPLE, +++ * IF_ACMPEQ, IF_ACMPNE, GOTO, JSR, IFNULL or IFNONNULL. +++ * @param label +++ * the operand of the instruction to be visited. This operand is +++ * a label that designates the instruction to which the jump +++ * instruction may jump. +++ */ +++ public void visitJumpInsn(int opcode, Label label) { +++ if (mv != null) { +++ mv.visitJumpInsn(opcode, label); +++ } +++ } +++ +++ /** +++ * Visits a label. A label designates the instruction that will be visited +++ * just after it. +++ * +++ * @param label +++ * a {@link Label Label} object. +++ */ +++ public void visitLabel(Label label) { +++ if (mv != null) { +++ mv.visitLabel(label); +++ } +++ } +++ +++ // ------------------------------------------------------------------------- +++ // Special instructions +++ // ------------------------------------------------------------------------- +++ +++ /** +++ * Visits a LDC instruction. Note that new constant types may be added in +++ * future versions of the Java Virtual Machine. To easily detect new +++ * constant types, implementations of this method should check for +++ * unexpected constant types, like this: +++ * +++ *
+++     * if (cst instanceof Integer) {
+++     *     // ...
+++     * } else if (cst instanceof Float) {
+++     *     // ...
+++     * } else if (cst instanceof Long) {
+++     *     // ...
+++     * } else if (cst instanceof Double) {
+++     *     // ...
+++     * } else if (cst instanceof String) {
+++     *     // ...
+++     * } else if (cst instanceof Type) {
+++     *     int sort = ((Type) cst).getSort();
+++     *     if (sort == Type.OBJECT) {
+++     *         // ...
+++     *     } else if (sort == Type.ARRAY) {
+++     *         // ...
+++     *     } else if (sort == Type.METHOD) {
+++     *         // ...
+++     *     } else {
+++     *         // throw an exception
+++     *     }
+++     * } else if (cst instanceof Handle) {
+++     *     // ...
+++     * } else {
+++     *     // throw an exception
+++     * }
+++     * 
+++ * +++ * @param cst +++ * the constant to be loaded on the stack. This parameter must be +++ * a non null {@link Integer}, a {@link Float}, a {@link Long}, a +++ * {@link Double}, a {@link String}, a {@link Type} of OBJECT or +++ * ARRAY sort for .class constants, for classes whose +++ * version is 49.0, a {@link Type} of METHOD sort or a +++ * {@link Handle} for MethodType and MethodHandle constants, for +++ * classes whose version is 51.0. +++ */ +++ public void visitLdcInsn(Object cst) { +++ if (mv != null) { +++ mv.visitLdcInsn(cst); +++ } +++ } +++ +++ /** +++ * Visits an IINC instruction. +++ * +++ * @param var +++ * index of the local variable to be incremented. +++ * @param increment +++ * amount to increment the local variable by. +++ */ +++ public void visitIincInsn(int var, int increment) { +++ if (mv != null) { +++ mv.visitIincInsn(var, increment); +++ } +++ } +++ +++ /** +++ * Visits a TABLESWITCH instruction. +++ * +++ * @param min +++ * the minimum key value. +++ * @param max +++ * the maximum key value. +++ * @param dflt +++ * beginning of the default handler block. +++ * @param labels +++ * beginnings of the handler blocks. labels[i] is the +++ * beginning of the handler block for the min + i key. +++ */ +++ public void visitTableSwitchInsn(int min, int max, Label dflt, +++ Label... labels) { +++ if (mv != null) { +++ mv.visitTableSwitchInsn(min, max, dflt, labels); +++ } +++ } +++ +++ /** +++ * Visits a LOOKUPSWITCH instruction. +++ * +++ * @param dflt +++ * beginning of the default handler block. +++ * @param keys +++ * the values of the keys. +++ * @param labels +++ * beginnings of the handler blocks. labels[i] is the +++ * beginning of the handler block for the keys[i] key. +++ */ +++ public void visitLookupSwitchInsn(Label dflt, int[] keys, Label[] labels) { +++ if (mv != null) { +++ mv.visitLookupSwitchInsn(dflt, keys, labels); +++ } +++ } +++ +++ /** +++ * Visits a MULTIANEWARRAY instruction. +++ * +++ * @param desc +++ * an array type descriptor (see {@link Type Type}). +++ * @param dims +++ * number of dimensions of the array to allocate. +++ */ +++ public void visitMultiANewArrayInsn(String desc, int dims) { +++ if (mv != null) { +++ mv.visitMultiANewArrayInsn(desc, dims); +++ } +++ } +++ +++ /** +++ * Visits an annotation on an instruction. This method must be called just +++ * after the annotated instruction. It can be called several times +++ * for the same instruction. +++ * +++ * @param typeRef +++ * a reference to the annotated type. The sort of this type +++ * reference must be {@link TypeReference#INSTANCEOF INSTANCEOF}, +++ * {@link TypeReference#NEW NEW}, +++ * {@link TypeReference#CONSTRUCTOR_REFERENCE +++ * CONSTRUCTOR_REFERENCE}, {@link TypeReference#METHOD_REFERENCE +++ * METHOD_REFERENCE}, {@link TypeReference#CAST CAST}, +++ * {@link TypeReference#CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT +++ * CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT}, +++ * {@link TypeReference#METHOD_INVOCATION_TYPE_ARGUMENT +++ * METHOD_INVOCATION_TYPE_ARGUMENT}, +++ * {@link TypeReference#CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT +++ * CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT}, or +++ * {@link TypeReference#METHOD_REFERENCE_TYPE_ARGUMENT +++ * METHOD_REFERENCE_TYPE_ARGUMENT}. See {@link TypeReference}. +++ * @param typePath +++ * the path to the annotated type argument, wildcard bound, array +++ * element type, or static inner type within 'typeRef'. May be +++ * null if the annotation targets 'typeRef' as a whole. +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * true if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or null if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitInsnAnnotation(int typeRef, +++ TypePath typePath, String desc, boolean visible) { +++ if (api < Opcodes.ASM5) { +++ throw new RuntimeException(); +++ } +++ if (mv != null) { +++ return mv.visitInsnAnnotation(typeRef, typePath, desc, visible); +++ } +++ return null; +++ } +++ +++ // ------------------------------------------------------------------------- +++ // Exceptions table entries, debug information, max stack and max locals +++ // ------------------------------------------------------------------------- +++ +++ /** +++ * Visits a try catch block. +++ * +++ * @param start +++ * beginning of the exception handler's scope (inclusive). +++ * @param end +++ * end of the exception handler's scope (exclusive). +++ * @param handler +++ * beginning of the exception handler's code. +++ * @param type +++ * internal name of the type of exceptions handled by the +++ * handler, or null to catch any exceptions (for +++ * "finally" blocks). +++ * @throws IllegalArgumentException +++ * if one of the labels has already been visited by this visitor +++ * (by the {@link #visitLabel visitLabel} method). +++ */ +++ public void visitTryCatchBlock(Label start, Label end, Label handler, +++ String type) { +++ if (mv != null) { +++ mv.visitTryCatchBlock(start, end, handler, type); +++ } +++ } +++ +++ /** +++ * Visits an annotation on an exception handler type. This method must be +++ * called after the {@link #visitTryCatchBlock} for the annotated +++ * exception handler. It can be called several times for the same exception +++ * handler. +++ * +++ * @param typeRef +++ * a reference to the annotated type. The sort of this type +++ * reference must be {@link TypeReference#EXCEPTION_PARAMETER +++ * EXCEPTION_PARAMETER}. See {@link TypeReference}. +++ * @param typePath +++ * the path to the annotated type argument, wildcard bound, array +++ * element type, or static inner type within 'typeRef'. May be +++ * null if the annotation targets 'typeRef' as a whole. +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * true if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or null if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitTryCatchAnnotation(int typeRef, +++ TypePath typePath, String desc, boolean visible) { +++ if (api < Opcodes.ASM5) { +++ throw new RuntimeException(); +++ } +++ if (mv != null) { +++ return mv.visitTryCatchAnnotation(typeRef, typePath, desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits a local variable declaration. +++ * +++ * @param name +++ * the name of a local variable. +++ * @param desc +++ * the type descriptor of this local variable. +++ * @param signature +++ * the type signature of this local variable. May be +++ * null if the local variable type does not use generic +++ * types. +++ * @param start +++ * the first instruction corresponding to the scope of this local +++ * variable (inclusive). +++ * @param end +++ * the last instruction corresponding to the scope of this local +++ * variable (exclusive). +++ * @param index +++ * the local variable's index. +++ * @throws IllegalArgumentException +++ * if one of the labels has not already been visited by this +++ * visitor (by the {@link #visitLabel visitLabel} method). +++ */ +++ public void visitLocalVariable(String name, String desc, String signature, +++ Label start, Label end, int index) { +++ if (mv != null) { +++ mv.visitLocalVariable(name, desc, signature, start, end, index); +++ } +++ } +++ +++ /** +++ * Visits an annotation on a local variable type. +++ * +++ * @param typeRef +++ * a reference to the annotated type. The sort of this type +++ * reference must be {@link TypeReference#LOCAL_VARIABLE +++ * LOCAL_VARIABLE} or {@link TypeReference#RESOURCE_VARIABLE +++ * RESOURCE_VARIABLE}. See {@link TypeReference}. +++ * @param typePath +++ * the path to the annotated type argument, wildcard bound, array +++ * element type, or static inner type within 'typeRef'. May be +++ * null if the annotation targets 'typeRef' as a whole. +++ * @param start +++ * the fist instructions corresponding to the continuous ranges +++ * that make the scope of this local variable (inclusive). +++ * @param end +++ * the last instructions corresponding to the continuous ranges +++ * that make the scope of this local variable (exclusive). This +++ * array must have the same size as the 'start' array. +++ * @param index +++ * the local variable's index in each range. This array must have +++ * the same size as the 'start' array. +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * true if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or null if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitLocalVariableAnnotation(int typeRef, +++ TypePath typePath, Label[] start, Label[] end, int[] index, +++ String desc, boolean visible) { +++ if (api < Opcodes.ASM5) { +++ throw new RuntimeException(); +++ } +++ if (mv != null) { +++ return mv.visitLocalVariableAnnotation(typeRef, typePath, start, +++ end, index, desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits a line number declaration. +++ * +++ * @param line +++ * a line number. This number refers to the source file from +++ * which the class was compiled. +++ * @param start +++ * the first instruction corresponding to this line number. +++ * @throws IllegalArgumentException +++ * if start has not already been visited by this +++ * visitor (by the {@link #visitLabel visitLabel} method). +++ */ +++ public void visitLineNumber(int line, Label start) { +++ if (mv != null) { +++ mv.visitLineNumber(line, start); +++ } +++ } +++ +++ /** +++ * Visits the maximum stack size and the maximum number of local variables +++ * of the method. +++ * +++ * @param maxStack +++ * maximum stack size of the method. +++ * @param maxLocals +++ * maximum number of local variables for the method. +++ */ +++ public void visitMaxs(int maxStack, int maxLocals) { +++ if (mv != null) { +++ mv.visitMaxs(maxStack, maxLocals); +++ } +++ } +++ +++ /** +++ * Visits the end of the method. This method, which is the last one to be +++ * called, is used to inform the visitor that all the annotations and +++ * attributes of the method have been visited. +++ */ +++ public void visitEnd() { +++ if (mv != null) { +++ mv.visitEnd(); +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/MethodWriter.java b/contrib/asm/src/org/objectweb/asm/MethodWriter.java ++new file mode 100644 ++index 0000000..ceca3f8 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/MethodWriter.java ++@@ -0,0 +1,2915 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A {@link MethodVisitor} that generates methods in bytecode form. Each visit +++ * method of this class appends the bytecode corresponding to the visited +++ * instruction to a byte vector, in the order these methods are called. +++ * +++ * @author Eric Bruneton +++ * @author Eugene Kuleshov +++ */ +++class MethodWriter extends MethodVisitor { +++ +++ /** +++ * Pseudo access flag used to denote constructors. +++ */ +++ static final int ACC_CONSTRUCTOR = 0x80000; +++ +++ /** +++ * Frame has exactly the same locals as the previous stack map frame and +++ * number of stack items is zero. +++ */ +++ static final int SAME_FRAME = 0; // to 63 (0-3f) +++ +++ /** +++ * Frame has exactly the same locals as the previous stack map frame and +++ * number of stack items is 1 +++ */ +++ static final int SAME_LOCALS_1_STACK_ITEM_FRAME = 64; // to 127 (40-7f) +++ +++ /** +++ * Reserved for future use +++ */ +++ static final int RESERVED = 128; +++ +++ /** +++ * Frame has exactly the same locals as the previous stack map frame and +++ * number of stack items is 1. Offset is bigger then 63; +++ */ +++ static final int SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED = 247; // f7 +++ +++ /** +++ * Frame where current locals are the same as the locals in the previous +++ * frame, except that the k last locals are absent. The value of k is given +++ * by the formula 251-frame_type. +++ */ +++ static final int CHOP_FRAME = 248; // to 250 (f8-fA) +++ +++ /** +++ * Frame has exactly the same locals as the previous stack map frame and +++ * number of stack items is zero. Offset is bigger then 63; +++ */ +++ static final int SAME_FRAME_EXTENDED = 251; // fb +++ +++ /** +++ * Frame where current locals are the same as the locals in the previous +++ * frame, except that k additional locals are defined. The value of k is +++ * given by the formula frame_type-251. +++ */ +++ static final int APPEND_FRAME = 252; // to 254 // fc-fe +++ +++ /** +++ * Full frame +++ */ +++ static final int FULL_FRAME = 255; // ff +++ +++ /** +++ * Indicates that the stack map frames must be recomputed from scratch. In +++ * this case the maximum stack size and number of local variables is also +++ * recomputed from scratch. +++ * +++ * @see #compute +++ */ +++ private static final int FRAMES = 0; +++ +++ /** +++ * Indicates that the maximum stack size and number of local variables must +++ * be automatically computed. +++ * +++ * @see #compute +++ */ +++ private static final int MAXS = 1; +++ +++ /** +++ * Indicates that nothing must be automatically computed. +++ * +++ * @see #compute +++ */ +++ private static final int NOTHING = 2; +++ +++ /** +++ * The class writer to which this method must be added. +++ */ +++ final ClassWriter cw; +++ +++ /** +++ * Access flags of this method. +++ */ +++ private int access; +++ +++ /** +++ * The index of the constant pool item that contains the name of this +++ * method. +++ */ +++ private final int name; +++ +++ /** +++ * The index of the constant pool item that contains the descriptor of this +++ * method. +++ */ +++ private final int desc; +++ +++ /** +++ * The descriptor of this method. +++ */ +++ private final String descriptor; +++ +++ /** +++ * The signature of this method. +++ */ +++ String signature; +++ +++ /** +++ * If not zero, indicates that the code of this method must be copied from +++ * the ClassReader associated to this writer in cw.cr. More +++ * precisely, this field gives the index of the first byte to copied from +++ * cw.cr.b. +++ */ +++ int classReaderOffset; +++ +++ /** +++ * If not zero, indicates that the code of this method must be copied from +++ * the ClassReader associated to this writer in cw.cr. More +++ * precisely, this field gives the number of bytes to copied from +++ * cw.cr.b. +++ */ +++ int classReaderLength; +++ +++ /** +++ * Number of exceptions that can be thrown by this method. +++ */ +++ int exceptionCount; +++ +++ /** +++ * The exceptions that can be thrown by this method. More precisely, this +++ * array contains the indexes of the constant pool items that contain the +++ * internal names of these exception classes. +++ */ +++ int[] exceptions; +++ +++ /** +++ * The annotation default attribute of this method. May be null. +++ */ +++ private ByteVector annd; +++ +++ /** +++ * The runtime visible annotations of this method. May be null. +++ */ +++ private AnnotationWriter anns; +++ +++ /** +++ * The runtime invisible annotations of this method. May be null. +++ */ +++ private AnnotationWriter ianns; +++ +++ /** +++ * The runtime visible type annotations of this method. May be null +++ * . +++ */ +++ private AnnotationWriter tanns; +++ +++ /** +++ * The runtime invisible type annotations of this method. May be +++ * null. +++ */ +++ private AnnotationWriter itanns; +++ +++ /** +++ * The runtime visible parameter annotations of this method. May be +++ * null. +++ */ +++ private AnnotationWriter[] panns; +++ +++ /** +++ * The runtime invisible parameter annotations of this method. May be +++ * null. +++ */ +++ private AnnotationWriter[] ipanns; +++ +++ /** +++ * The number of synthetic parameters of this method. +++ */ +++ private int synthetics; +++ +++ /** +++ * The non standard attributes of the method. +++ */ +++ private Attribute attrs; +++ +++ /** +++ * The bytecode of this method. +++ */ +++ private ByteVector code = new ByteVector(); +++ +++ /** +++ * Maximum stack size of this method. +++ */ +++ private int maxStack; +++ +++ /** +++ * Maximum number of local variables for this method. +++ */ +++ private int maxLocals; +++ +++ /** +++ * Number of local variables in the current stack map frame. +++ */ +++ private int currentLocals; +++ +++ /** +++ * Number of stack map frames in the StackMapTable attribute. +++ */ +++ private int frameCount; +++ +++ /** +++ * The StackMapTable attribute. +++ */ +++ private ByteVector stackMap; +++ +++ /** +++ * The offset of the last frame that was written in the StackMapTable +++ * attribute. +++ */ +++ private int previousFrameOffset; +++ +++ /** +++ * The last frame that was written in the StackMapTable attribute. +++ * +++ * @see #frame +++ */ +++ private int[] previousFrame; +++ +++ /** +++ * The current stack map frame. The first element contains the offset of the +++ * instruction to which the frame corresponds, the second element is the +++ * number of locals and the third one is the number of stack elements. The +++ * local variables start at index 3 and are followed by the operand stack +++ * values. In summary frame[0] = offset, frame[1] = nLocal, frame[2] = +++ * nStack, frame[3] = nLocal. All types are encoded as integers, with the +++ * same format as the one used in {@link Label}, but limited to BASE types. +++ */ +++ private int[] frame; +++ +++ /** +++ * Number of elements in the exception handler list. +++ */ +++ private int handlerCount; +++ +++ /** +++ * The first element in the exception handler list. +++ */ +++ private Handler firstHandler; +++ +++ /** +++ * The last element in the exception handler list. +++ */ +++ private Handler lastHandler; +++ +++ /** +++ * Number of entries in the MethodParameters attribute. +++ */ +++ private int methodParametersCount; +++ +++ /** +++ * The MethodParameters attribute. +++ */ +++ private ByteVector methodParameters; +++ +++ /** +++ * Number of entries in the LocalVariableTable attribute. +++ */ +++ private int localVarCount; +++ +++ /** +++ * The LocalVariableTable attribute. +++ */ +++ private ByteVector localVar; +++ +++ /** +++ * Number of entries in the LocalVariableTypeTable attribute. +++ */ +++ private int localVarTypeCount; +++ +++ /** +++ * The LocalVariableTypeTable attribute. +++ */ +++ private ByteVector localVarType; +++ +++ /** +++ * Number of entries in the LineNumberTable attribute. +++ */ +++ private int lineNumberCount; +++ +++ /** +++ * The LineNumberTable attribute. +++ */ +++ private ByteVector lineNumber; +++ +++ /** +++ * The start offset of the last visited instruction. +++ */ +++ private int lastCodeOffset; +++ +++ /** +++ * The runtime visible type annotations of the code. May be null. +++ */ +++ private AnnotationWriter ctanns; +++ +++ /** +++ * The runtime invisible type annotations of the code. May be null. +++ */ +++ private AnnotationWriter ictanns; +++ +++ /** +++ * The non standard attributes of the method's code. +++ */ +++ private Attribute cattrs; +++ +++ /** +++ * Indicates if some jump instructions are too small and need to be resized. +++ */ +++ private boolean resize; +++ +++ /** +++ * The number of subroutines in this method. +++ */ +++ private int subroutines; +++ +++ // ------------------------------------------------------------------------ +++ +++ /* +++ * Fields for the control flow graph analysis algorithm (used to compute the +++ * maximum stack size). A control flow graph contains one node per "basic +++ * block", and one edge per "jump" from one basic block to another. Each +++ * node (i.e., each basic block) is represented by the Label object that +++ * corresponds to the first instruction of this basic block. Each node also +++ * stores the list of its successors in the graph, as a linked list of Edge +++ * objects. +++ */ +++ +++ /** +++ * Indicates what must be automatically computed. +++ * +++ * @see #FRAMES +++ * @see #MAXS +++ * @see #NOTHING +++ */ +++ private final int compute; +++ +++ /** +++ * A list of labels. This list is the list of basic blocks in the method, +++ * i.e. a list of Label objects linked to each other by their +++ * {@link Label#successor} field, in the order they are visited by +++ * {@link MethodVisitor#visitLabel}, and starting with the first basic +++ * block. +++ */ +++ private Label labels; +++ +++ /** +++ * The previous basic block. +++ */ +++ private Label previousBlock; +++ +++ /** +++ * The current basic block. +++ */ +++ private Label currentBlock; +++ +++ /** +++ * The (relative) stack size after the last visited instruction. This size +++ * is relative to the beginning of the current basic block, i.e., the true +++ * stack size after the last visited instruction is equal to the +++ * {@link Label#inputStackTop beginStackSize} of the current basic block +++ * plus stackSize. +++ */ +++ private int stackSize; +++ +++ /** +++ * The (relative) maximum stack size after the last visited instruction. +++ * This size is relative to the beginning of the current basic block, i.e., +++ * the true maximum stack size after the last visited instruction is equal +++ * to the {@link Label#inputStackTop beginStackSize} of the current basic +++ * block plus stackSize. +++ */ +++ private int maxStackSize; +++ +++ // ------------------------------------------------------------------------ +++ // Constructor +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Constructs a new {@link MethodWriter}. +++ * +++ * @param cw +++ * the class writer in which the method must be added. +++ * @param access +++ * the method's access flags (see {@link Opcodes}). +++ * @param name +++ * the method's name. +++ * @param desc +++ * the method's descriptor (see {@link Type}). +++ * @param signature +++ * the method's signature. May be null. +++ * @param exceptions +++ * the internal names of the method's exceptions. May be +++ * null. +++ * @param computeMaxs +++ * true if the maximum stack size and number of local +++ * variables must be automatically computed. +++ * @param computeFrames +++ * true if the stack map tables must be recomputed from +++ * scratch. +++ */ +++ MethodWriter(final ClassWriter cw, final int access, final String name, +++ final String desc, final String signature, +++ final String[] exceptions, final boolean computeMaxs, +++ final boolean computeFrames) { +++ super(Opcodes.ASM5); +++ if (cw.firstMethod == null) { +++ cw.firstMethod = this; +++ } else { +++ cw.lastMethod.mv = this; +++ } +++ cw.lastMethod = this; +++ this.cw = cw; +++ this.access = access; +++ if ("".equals(name)) { +++ this.access |= ACC_CONSTRUCTOR; +++ } +++ this.name = cw.newUTF8(name); +++ this.desc = cw.newUTF8(desc); +++ this.descriptor = desc; +++ if (ClassReader.SIGNATURES) { +++ this.signature = signature; +++ } +++ if (exceptions != null && exceptions.length > 0) { +++ exceptionCount = exceptions.length; +++ this.exceptions = new int[exceptionCount]; +++ for (int i = 0; i < exceptionCount; ++i) { +++ this.exceptions[i] = cw.newClass(exceptions[i]); +++ } +++ } +++ this.compute = computeFrames ? FRAMES : (computeMaxs ? MAXS : NOTHING); +++ if (computeMaxs || computeFrames) { +++ // updates maxLocals +++ int size = Type.getArgumentsAndReturnSizes(descriptor) >> 2; +++ if ((access & Opcodes.ACC_STATIC) != 0) { +++ --size; +++ } +++ maxLocals = size; +++ currentLocals = size; +++ // creates and visits the label for the first basic block +++ labels = new Label(); +++ labels.status |= Label.PUSHED; +++ visitLabel(labels); +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Implementation of the MethodVisitor abstract class +++ // ------------------------------------------------------------------------ +++ +++ @Override +++ public void visitParameter(String name, int access) { +++ if (methodParameters == null) { +++ methodParameters = new ByteVector(); +++ } +++ ++methodParametersCount; +++ methodParameters.putShort((name == null) ? 0 : cw.newUTF8(name)) +++ .putShort(access); +++ } +++ +++ @Override +++ public AnnotationVisitor visitAnnotationDefault() { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ annd = new ByteVector(); +++ return new AnnotationWriter(cw, false, annd, null, 0); +++ } +++ +++ @Override +++ public AnnotationVisitor visitAnnotation(final String desc, +++ final boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write type, and reserve space for values count +++ bv.putShort(cw.newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(cw, true, bv, bv, 2); +++ if (visible) { +++ aw.next = anns; +++ anns = aw; +++ } else { +++ aw.next = ianns; +++ ianns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public AnnotationVisitor visitTypeAnnotation(final int typeRef, +++ final TypePath typePath, final String desc, final boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write target_type and target_info +++ AnnotationWriter.putTarget(typeRef, typePath, bv); +++ // write type, and reserve space for values count +++ bv.putShort(cw.newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(cw, true, bv, bv, +++ bv.length - 2); +++ if (visible) { +++ aw.next = tanns; +++ tanns = aw; +++ } else { +++ aw.next = itanns; +++ itanns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public AnnotationVisitor visitParameterAnnotation(final int parameter, +++ final String desc, final boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ if ("Ljava/lang/Synthetic;".equals(desc)) { +++ // workaround for a bug in javac with synthetic parameters +++ // see ClassReader.readParameterAnnotations +++ synthetics = Math.max(synthetics, parameter + 1); +++ return new AnnotationWriter(cw, false, bv, null, 0); +++ } +++ // write type, and reserve space for values count +++ bv.putShort(cw.newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(cw, true, bv, bv, 2); +++ if (visible) { +++ if (panns == null) { +++ panns = new AnnotationWriter[Type.getArgumentTypes(descriptor).length]; +++ } +++ aw.next = panns[parameter]; +++ panns[parameter] = aw; +++ } else { +++ if (ipanns == null) { +++ ipanns = new AnnotationWriter[Type.getArgumentTypes(descriptor).length]; +++ } +++ aw.next = ipanns[parameter]; +++ ipanns[parameter] = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public void visitAttribute(final Attribute attr) { +++ if (attr.isCodeAttribute()) { +++ attr.next = cattrs; +++ cattrs = attr; +++ } else { +++ attr.next = attrs; +++ attrs = attr; +++ } +++ } +++ +++ @Override +++ public void visitCode() { +++ } +++ +++ @Override +++ public void visitFrame(final int type, final int nLocal, +++ final Object[] local, final int nStack, final Object[] stack) { +++ if (!ClassReader.FRAMES || compute == FRAMES) { +++ return; +++ } +++ +++ if (type == Opcodes.F_NEW) { +++ if (previousFrame == null) { +++ visitImplicitFirstFrame(); +++ } +++ currentLocals = nLocal; +++ int frameIndex = startFrame(code.length, nLocal, nStack); +++ for (int i = 0; i < nLocal; ++i) { +++ if (local[i] instanceof String) { +++ frame[frameIndex++] = Frame.OBJECT +++ | cw.addType((String) local[i]); +++ } else if (local[i] instanceof Integer) { +++ frame[frameIndex++] = ((Integer) local[i]).intValue(); +++ } else { +++ frame[frameIndex++] = Frame.UNINITIALIZED +++ | cw.addUninitializedType("", +++ ((Label) local[i]).position); +++ } +++ } +++ for (int i = 0; i < nStack; ++i) { +++ if (stack[i] instanceof String) { +++ frame[frameIndex++] = Frame.OBJECT +++ | cw.addType((String) stack[i]); +++ } else if (stack[i] instanceof Integer) { +++ frame[frameIndex++] = ((Integer) stack[i]).intValue(); +++ } else { +++ frame[frameIndex++] = Frame.UNINITIALIZED +++ | cw.addUninitializedType("", +++ ((Label) stack[i]).position); +++ } +++ } +++ endFrame(); +++ } else { +++ int delta; +++ if (stackMap == null) { +++ stackMap = new ByteVector(); +++ delta = code.length; +++ } else { +++ delta = code.length - previousFrameOffset - 1; +++ if (delta < 0) { +++ if (type == Opcodes.F_SAME) { +++ return; +++ } else { +++ throw new IllegalStateException(); +++ } +++ } +++ } +++ +++ switch (type) { +++ case Opcodes.F_FULL: +++ currentLocals = nLocal; +++ stackMap.putByte(FULL_FRAME).putShort(delta).putShort(nLocal); +++ for (int i = 0; i < nLocal; ++i) { +++ writeFrameType(local[i]); +++ } +++ stackMap.putShort(nStack); +++ for (int i = 0; i < nStack; ++i) { +++ writeFrameType(stack[i]); +++ } +++ break; +++ case Opcodes.F_APPEND: +++ currentLocals += nLocal; +++ stackMap.putByte(SAME_FRAME_EXTENDED + nLocal).putShort(delta); +++ for (int i = 0; i < nLocal; ++i) { +++ writeFrameType(local[i]); +++ } +++ break; +++ case Opcodes.F_CHOP: +++ currentLocals -= nLocal; +++ stackMap.putByte(SAME_FRAME_EXTENDED - nLocal).putShort(delta); +++ break; +++ case Opcodes.F_SAME: +++ if (delta < 64) { +++ stackMap.putByte(delta); +++ } else { +++ stackMap.putByte(SAME_FRAME_EXTENDED).putShort(delta); +++ } +++ break; +++ case Opcodes.F_SAME1: +++ if (delta < 64) { +++ stackMap.putByte(SAME_LOCALS_1_STACK_ITEM_FRAME + delta); +++ } else { +++ stackMap.putByte(SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED) +++ .putShort(delta); +++ } +++ writeFrameType(stack[0]); +++ break; +++ } +++ +++ previousFrameOffset = code.length; +++ ++frameCount; +++ } +++ +++ maxStack = Math.max(maxStack, nStack); +++ maxLocals = Math.max(maxLocals, currentLocals); +++ } +++ +++ @Override +++ public void visitInsn(final int opcode) { +++ lastCodeOffset = code.length; +++ // adds the instruction to the bytecode of the method +++ code.putByte(opcode); +++ // update currentBlock +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(opcode, 0, null, null); +++ } else { +++ // updates current and max stack sizes +++ int size = stackSize + Frame.SIZE[opcode]; +++ if (size > maxStackSize) { +++ maxStackSize = size; +++ } +++ stackSize = size; +++ } +++ // if opcode == ATHROW or xRETURN, ends current block (no successor) +++ if ((opcode >= Opcodes.IRETURN && opcode <= Opcodes.RETURN) +++ || opcode == Opcodes.ATHROW) { +++ noSuccessor(); +++ } +++ } +++ } +++ +++ @Override +++ public void visitIntInsn(final int opcode, final int operand) { +++ lastCodeOffset = code.length; +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(opcode, operand, null, null); +++ } else if (opcode != Opcodes.NEWARRAY) { +++ // updates current and max stack sizes only for NEWARRAY +++ // (stack size variation = 0 for BIPUSH or SIPUSH) +++ int size = stackSize + 1; +++ if (size > maxStackSize) { +++ maxStackSize = size; +++ } +++ stackSize = size; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ if (opcode == Opcodes.SIPUSH) { +++ code.put12(opcode, operand); +++ } else { // BIPUSH or NEWARRAY +++ code.put11(opcode, operand); +++ } +++ } +++ +++ @Override +++ public void visitVarInsn(final int opcode, final int var) { +++ lastCodeOffset = code.length; +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(opcode, var, null, null); +++ } else { +++ // updates current and max stack sizes +++ if (opcode == Opcodes.RET) { +++ // no stack change, but end of current block (no successor) +++ currentBlock.status |= Label.RET; +++ // save 'stackSize' here for future use +++ // (see {@link #findSubroutineSuccessors}) +++ currentBlock.inputStackTop = stackSize; +++ noSuccessor(); +++ } else { // xLOAD or xSTORE +++ int size = stackSize + Frame.SIZE[opcode]; +++ if (size > maxStackSize) { +++ maxStackSize = size; +++ } +++ stackSize = size; +++ } +++ } +++ } +++ if (compute != NOTHING) { +++ // updates max locals +++ int n; +++ if (opcode == Opcodes.LLOAD || opcode == Opcodes.DLOAD +++ || opcode == Opcodes.LSTORE || opcode == Opcodes.DSTORE) { +++ n = var + 2; +++ } else { +++ n = var + 1; +++ } +++ if (n > maxLocals) { +++ maxLocals = n; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ if (var < 4 && opcode != Opcodes.RET) { +++ int opt; +++ if (opcode < Opcodes.ISTORE) { +++ /* ILOAD_0 */ +++ opt = 26 + ((opcode - Opcodes.ILOAD) << 2) + var; +++ } else { +++ /* ISTORE_0 */ +++ opt = 59 + ((opcode - Opcodes.ISTORE) << 2) + var; +++ } +++ code.putByte(opt); +++ } else if (var >= 256) { +++ code.putByte(196 /* WIDE */).put12(opcode, var); +++ } else { +++ code.put11(opcode, var); +++ } +++ if (opcode >= Opcodes.ISTORE && compute == FRAMES && handlerCount > 0) { +++ visitLabel(new Label()); +++ } +++ } +++ +++ @Override +++ public void visitTypeInsn(final int opcode, final String type) { +++ lastCodeOffset = code.length; +++ Item i = cw.newClassItem(type); +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(opcode, code.length, cw, i); +++ } else if (opcode == Opcodes.NEW) { +++ // updates current and max stack sizes only if opcode == NEW +++ // (no stack change for ANEWARRAY, CHECKCAST, INSTANCEOF) +++ int size = stackSize + 1; +++ if (size > maxStackSize) { +++ maxStackSize = size; +++ } +++ stackSize = size; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ code.put12(opcode, i.index); +++ } +++ +++ @Override +++ public void visitFieldInsn(final int opcode, final String owner, +++ final String name, final String desc) { +++ lastCodeOffset = code.length; +++ Item i = cw.newFieldItem(owner, name, desc); +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(opcode, 0, cw, i); +++ } else { +++ int size; +++ // computes the stack size variation +++ char c = desc.charAt(0); +++ switch (opcode) { +++ case Opcodes.GETSTATIC: +++ size = stackSize + (c == 'D' || c == 'J' ? 2 : 1); +++ break; +++ case Opcodes.PUTSTATIC: +++ size = stackSize + (c == 'D' || c == 'J' ? -2 : -1); +++ break; +++ case Opcodes.GETFIELD: +++ size = stackSize + (c == 'D' || c == 'J' ? 1 : 0); +++ break; +++ // case Constants.PUTFIELD: +++ default: +++ size = stackSize + (c == 'D' || c == 'J' ? -3 : -2); +++ break; +++ } +++ // updates current and max stack sizes +++ if (size > maxStackSize) { +++ maxStackSize = size; +++ } +++ stackSize = size; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ code.put12(opcode, i.index); +++ } +++ +++ @Override +++ public void visitMethodInsn(final int opcode, final String owner, +++ final String name, final String desc, final boolean itf) { +++ lastCodeOffset = code.length; +++ Item i = cw.newMethodItem(owner, name, desc, itf); +++ int argSize = i.intVal; +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(opcode, 0, cw, i); +++ } else { +++ /* +++ * computes the stack size variation. In order not to recompute +++ * several times this variation for the same Item, we use the +++ * intVal field of this item to store this variation, once it +++ * has been computed. More precisely this intVal field stores +++ * the sizes of the arguments and of the return value +++ * corresponding to desc. +++ */ +++ if (argSize == 0) { +++ // the above sizes have not been computed yet, +++ // so we compute them... +++ argSize = Type.getArgumentsAndReturnSizes(desc); +++ // ... and we save them in order +++ // not to recompute them in the future +++ i.intVal = argSize; +++ } +++ int size; +++ if (opcode == Opcodes.INVOKESTATIC) { +++ size = stackSize - (argSize >> 2) + (argSize & 0x03) + 1; +++ } else { +++ size = stackSize - (argSize >> 2) + (argSize & 0x03); +++ } +++ // updates current and max stack sizes +++ if (size > maxStackSize) { +++ maxStackSize = size; +++ } +++ stackSize = size; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ if (opcode == Opcodes.INVOKEINTERFACE) { +++ if (argSize == 0) { +++ argSize = Type.getArgumentsAndReturnSizes(desc); +++ i.intVal = argSize; +++ } +++ code.put12(Opcodes.INVOKEINTERFACE, i.index).put11(argSize >> 2, 0); +++ } else { +++ code.put12(opcode, i.index); +++ } +++ } +++ +++ @Override +++ public void visitInvokeDynamicInsn(final String name, final String desc, +++ final Handle bsm, final Object... bsmArgs) { +++ lastCodeOffset = code.length; +++ Item i = cw.newInvokeDynamicItem(name, desc, bsm, bsmArgs); +++ int argSize = i.intVal; +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(Opcodes.INVOKEDYNAMIC, 0, cw, i); +++ } else { +++ /* +++ * computes the stack size variation. In order not to recompute +++ * several times this variation for the same Item, we use the +++ * intVal field of this item to store this variation, once it +++ * has been computed. More precisely this intVal field stores +++ * the sizes of the arguments and of the return value +++ * corresponding to desc. +++ */ +++ if (argSize == 0) { +++ // the above sizes have not been computed yet, +++ // so we compute them... +++ argSize = Type.getArgumentsAndReturnSizes(desc); +++ // ... and we save them in order +++ // not to recompute them in the future +++ i.intVal = argSize; +++ } +++ int size = stackSize - (argSize >> 2) + (argSize & 0x03) + 1; +++ +++ // updates current and max stack sizes +++ if (size > maxStackSize) { +++ maxStackSize = size; +++ } +++ stackSize = size; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ code.put12(Opcodes.INVOKEDYNAMIC, i.index); +++ code.putShort(0); +++ } +++ +++ @Override +++ public void visitJumpInsn(final int opcode, final Label label) { +++ lastCodeOffset = code.length; +++ Label nextInsn = null; +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(opcode, 0, null, null); +++ // 'label' is the target of a jump instruction +++ label.getFirst().status |= Label.TARGET; +++ // adds 'label' as a successor of this basic block +++ addSuccessor(Edge.NORMAL, label); +++ if (opcode != Opcodes.GOTO) { +++ // creates a Label for the next basic block +++ nextInsn = new Label(); +++ } +++ } else { +++ if (opcode == Opcodes.JSR) { +++ if ((label.status & Label.SUBROUTINE) == 0) { +++ label.status |= Label.SUBROUTINE; +++ ++subroutines; +++ } +++ currentBlock.status |= Label.JSR; +++ addSuccessor(stackSize + 1, label); +++ // creates a Label for the next basic block +++ nextInsn = new Label(); +++ /* +++ * note that, by construction in this method, a JSR block +++ * has at least two successors in the control flow graph: +++ * the first one leads the next instruction after the JSR, +++ * while the second one leads to the JSR target. +++ */ +++ } else { +++ // updates current stack size (max stack size unchanged +++ // because stack size variation always negative in this +++ // case) +++ stackSize += Frame.SIZE[opcode]; +++ addSuccessor(stackSize, label); +++ } +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ if ((label.status & Label.RESOLVED) != 0 +++ && label.position - code.length < Short.MIN_VALUE) { +++ /* +++ * case of a backward jump with an offset < -32768. In this case we +++ * automatically replace GOTO with GOTO_W, JSR with JSR_W and IFxxx +++ * with IFNOTxxx GOTO_W , where IFNOTxxx is the +++ * "opposite" opcode of IFxxx (i.e., IFNE for IFEQ) and where +++ * designates the instruction just after the GOTO_W. +++ */ +++ if (opcode == Opcodes.GOTO) { +++ code.putByte(200); // GOTO_W +++ } else if (opcode == Opcodes.JSR) { +++ code.putByte(201); // JSR_W +++ } else { +++ // if the IF instruction is transformed into IFNOT GOTO_W the +++ // next instruction becomes the target of the IFNOT instruction +++ if (nextInsn != null) { +++ nextInsn.status |= Label.TARGET; +++ } +++ code.putByte(opcode <= 166 ? ((opcode + 1) ^ 1) - 1 +++ : opcode ^ 1); +++ code.putShort(8); // jump offset +++ code.putByte(200); // GOTO_W +++ } +++ label.put(this, code, code.length - 1, true); +++ } else { +++ /* +++ * case of a backward jump with an offset >= -32768, or of a forward +++ * jump with, of course, an unknown offset. In these cases we store +++ * the offset in 2 bytes (which will be increased in +++ * resizeInstructions, if needed). +++ */ +++ code.putByte(opcode); +++ label.put(this, code, code.length - 1, false); +++ } +++ if (currentBlock != null) { +++ if (nextInsn != null) { +++ // if the jump instruction is not a GOTO, the next instruction +++ // is also a successor of this instruction. Calling visitLabel +++ // adds the label of this next instruction as a successor of the +++ // current block, and starts a new basic block +++ visitLabel(nextInsn); +++ } +++ if (opcode == Opcodes.GOTO) { +++ noSuccessor(); +++ } +++ } +++ } +++ +++ @Override +++ public void visitLabel(final Label label) { +++ // resolves previous forward references to label, if any +++ resize |= label.resolve(this, code.length, code.data); +++ // updates currentBlock +++ if ((label.status & Label.DEBUG) != 0) { +++ return; +++ } +++ if (compute == FRAMES) { +++ if (currentBlock != null) { +++ if (label.position == currentBlock.position) { +++ // successive labels, do not start a new basic block +++ currentBlock.status |= (label.status & Label.TARGET); +++ label.frame = currentBlock.frame; +++ return; +++ } +++ // ends current block (with one new successor) +++ addSuccessor(Edge.NORMAL, label); +++ } +++ // begins a new current block +++ currentBlock = label; +++ if (label.frame == null) { +++ label.frame = new Frame(); +++ label.frame.owner = label; +++ } +++ // updates the basic block list +++ if (previousBlock != null) { +++ if (label.position == previousBlock.position) { +++ previousBlock.status |= (label.status & Label.TARGET); +++ label.frame = previousBlock.frame; +++ currentBlock = previousBlock; +++ return; +++ } +++ previousBlock.successor = label; +++ } +++ previousBlock = label; +++ } else if (compute == MAXS) { +++ if (currentBlock != null) { +++ // ends current block (with one new successor) +++ currentBlock.outputStackMax = maxStackSize; +++ addSuccessor(stackSize, label); +++ } +++ // begins a new current block +++ currentBlock = label; +++ // resets the relative current and max stack sizes +++ stackSize = 0; +++ maxStackSize = 0; +++ // updates the basic block list +++ if (previousBlock != null) { +++ previousBlock.successor = label; +++ } +++ previousBlock = label; +++ } +++ } +++ +++ @Override +++ public void visitLdcInsn(final Object cst) { +++ lastCodeOffset = code.length; +++ Item i = cw.newConstItem(cst); +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(Opcodes.LDC, 0, cw, i); +++ } else { +++ int size; +++ // computes the stack size variation +++ if (i.type == ClassWriter.LONG || i.type == ClassWriter.DOUBLE) { +++ size = stackSize + 2; +++ } else { +++ size = stackSize + 1; +++ } +++ // updates current and max stack sizes +++ if (size > maxStackSize) { +++ maxStackSize = size; +++ } +++ stackSize = size; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ int index = i.index; +++ if (i.type == ClassWriter.LONG || i.type == ClassWriter.DOUBLE) { +++ code.put12(20 /* LDC2_W */, index); +++ } else if (index >= 256) { +++ code.put12(19 /* LDC_W */, index); +++ } else { +++ code.put11(Opcodes.LDC, index); +++ } +++ } +++ +++ @Override +++ public void visitIincInsn(final int var, final int increment) { +++ lastCodeOffset = code.length; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(Opcodes.IINC, var, null, null); +++ } +++ } +++ if (compute != NOTHING) { +++ // updates max locals +++ int n = var + 1; +++ if (n > maxLocals) { +++ maxLocals = n; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ if ((var > 255) || (increment > 127) || (increment < -128)) { +++ code.putByte(196 /* WIDE */).put12(Opcodes.IINC, var) +++ .putShort(increment); +++ } else { +++ code.putByte(Opcodes.IINC).put11(var, increment); +++ } +++ } +++ +++ @Override +++ public void visitTableSwitchInsn(final int min, final int max, +++ final Label dflt, final Label... labels) { +++ lastCodeOffset = code.length; +++ // adds the instruction to the bytecode of the method +++ int source = code.length; +++ code.putByte(Opcodes.TABLESWITCH); +++ code.putByteArray(null, 0, (4 - code.length % 4) % 4); +++ dflt.put(this, code, source, true); +++ code.putInt(min).putInt(max); +++ for (int i = 0; i < labels.length; ++i) { +++ labels[i].put(this, code, source, true); +++ } +++ // updates currentBlock +++ visitSwitchInsn(dflt, labels); +++ } +++ +++ @Override +++ public void visitLookupSwitchInsn(final Label dflt, final int[] keys, +++ final Label[] labels) { +++ lastCodeOffset = code.length; +++ // adds the instruction to the bytecode of the method +++ int source = code.length; +++ code.putByte(Opcodes.LOOKUPSWITCH); +++ code.putByteArray(null, 0, (4 - code.length % 4) % 4); +++ dflt.put(this, code, source, true); +++ code.putInt(labels.length); +++ for (int i = 0; i < labels.length; ++i) { +++ code.putInt(keys[i]); +++ labels[i].put(this, code, source, true); +++ } +++ // updates currentBlock +++ visitSwitchInsn(dflt, labels); +++ } +++ +++ private void visitSwitchInsn(final Label dflt, final Label[] labels) { +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(Opcodes.LOOKUPSWITCH, 0, null, null); +++ // adds current block successors +++ addSuccessor(Edge.NORMAL, dflt); +++ dflt.getFirst().status |= Label.TARGET; +++ for (int i = 0; i < labels.length; ++i) { +++ addSuccessor(Edge.NORMAL, labels[i]); +++ labels[i].getFirst().status |= Label.TARGET; +++ } +++ } else { +++ // updates current stack size (max stack size unchanged) +++ --stackSize; +++ // adds current block successors +++ addSuccessor(stackSize, dflt); +++ for (int i = 0; i < labels.length; ++i) { +++ addSuccessor(stackSize, labels[i]); +++ } +++ } +++ // ends current block +++ noSuccessor(); +++ } +++ } +++ +++ @Override +++ public void visitMultiANewArrayInsn(final String desc, final int dims) { +++ lastCodeOffset = code.length; +++ Item i = cw.newClassItem(desc); +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(Opcodes.MULTIANEWARRAY, dims, cw, i); +++ } else { +++ // updates current stack size (max stack size unchanged because +++ // stack size variation always negative or null) +++ stackSize += 1 - dims; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ code.put12(Opcodes.MULTIANEWARRAY, i.index).putByte(dims); +++ } +++ +++ @Override +++ public AnnotationVisitor visitInsnAnnotation(int typeRef, +++ TypePath typePath, String desc, boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write target_type and target_info +++ typeRef = (typeRef & 0xFF0000FF) | (lastCodeOffset << 8); +++ AnnotationWriter.putTarget(typeRef, typePath, bv); +++ // write type, and reserve space for values count +++ bv.putShort(cw.newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(cw, true, bv, bv, +++ bv.length - 2); +++ if (visible) { +++ aw.next = ctanns; +++ ctanns = aw; +++ } else { +++ aw.next = ictanns; +++ ictanns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public void visitTryCatchBlock(final Label start, final Label end, +++ final Label handler, final String type) { +++ ++handlerCount; +++ Handler h = new Handler(); +++ h.start = start; +++ h.end = end; +++ h.handler = handler; +++ h.desc = type; +++ h.type = type != null ? cw.newClass(type) : 0; +++ if (lastHandler == null) { +++ firstHandler = h; +++ } else { +++ lastHandler.next = h; +++ } +++ lastHandler = h; +++ } +++ +++ @Override +++ public AnnotationVisitor visitTryCatchAnnotation(int typeRef, +++ TypePath typePath, String desc, boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write target_type and target_info +++ AnnotationWriter.putTarget(typeRef, typePath, bv); +++ // write type, and reserve space for values count +++ bv.putShort(cw.newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(cw, true, bv, bv, +++ bv.length - 2); +++ if (visible) { +++ aw.next = ctanns; +++ ctanns = aw; +++ } else { +++ aw.next = ictanns; +++ ictanns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public void visitLocalVariable(final String name, final String desc, +++ final String signature, final Label start, final Label end, +++ final int index) { +++ if (signature != null) { +++ if (localVarType == null) { +++ localVarType = new ByteVector(); +++ } +++ ++localVarTypeCount; +++ localVarType.putShort(start.position) +++ .putShort(end.position - start.position) +++ .putShort(cw.newUTF8(name)).putShort(cw.newUTF8(signature)) +++ .putShort(index); +++ } +++ if (localVar == null) { +++ localVar = new ByteVector(); +++ } +++ ++localVarCount; +++ localVar.putShort(start.position) +++ .putShort(end.position - start.position) +++ .putShort(cw.newUTF8(name)).putShort(cw.newUTF8(desc)) +++ .putShort(index); +++ if (compute != NOTHING) { +++ // updates max locals +++ char c = desc.charAt(0); +++ int n = index + (c == 'J' || c == 'D' ? 2 : 1); +++ if (n > maxLocals) { +++ maxLocals = n; +++ } +++ } +++ } +++ +++ @Override +++ public AnnotationVisitor visitLocalVariableAnnotation(int typeRef, +++ TypePath typePath, Label[] start, Label[] end, int[] index, +++ String desc, boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write target_type and target_info +++ bv.putByte(typeRef >>> 24).putShort(start.length); +++ for (int i = 0; i < start.length; ++i) { +++ bv.putShort(start[i].position) +++ .putShort(end[i].position - start[i].position) +++ .putShort(index[i]); +++ } +++ if (typePath == null) { +++ bv.putByte(0); +++ } else { +++ int length = typePath.b[typePath.offset] * 2 + 1; +++ bv.putByteArray(typePath.b, typePath.offset, length); +++ } +++ // write type, and reserve space for values count +++ bv.putShort(cw.newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(cw, true, bv, bv, +++ bv.length - 2); +++ if (visible) { +++ aw.next = ctanns; +++ ctanns = aw; +++ } else { +++ aw.next = ictanns; +++ ictanns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public void visitLineNumber(final int line, final Label start) { +++ if (lineNumber == null) { +++ lineNumber = new ByteVector(); +++ } +++ ++lineNumberCount; +++ lineNumber.putShort(start.position); +++ lineNumber.putShort(line); +++ } +++ +++ @Override +++ public void visitMaxs(final int maxStack, final int maxLocals) { +++ if (resize) { +++ // replaces the temporary jump opcodes introduced by Label.resolve. +++ if (ClassReader.RESIZE) { +++ resizeInstructions(); +++ } else { +++ throw new RuntimeException("Method code too large!"); +++ } +++ } +++ if (ClassReader.FRAMES && compute == FRAMES) { +++ // completes the control flow graph with exception handler blocks +++ Handler handler = firstHandler; +++ while (handler != null) { +++ Label l = handler.start.getFirst(); +++ Label h = handler.handler.getFirst(); +++ Label e = handler.end.getFirst(); +++ // computes the kind of the edges to 'h' +++ String t = handler.desc == null ? "java/lang/Throwable" +++ : handler.desc; +++ int kind = Frame.OBJECT | cw.addType(t); +++ // h is an exception handler +++ h.status |= Label.TARGET; +++ // adds 'h' as a successor of labels between 'start' and 'end' +++ while (l != e) { +++ // creates an edge to 'h' +++ Edge b = new Edge(); +++ b.info = kind; +++ b.successor = h; +++ // adds it to the successors of 'l' +++ b.next = l.successors; +++ l.successors = b; +++ // goes to the next label +++ l = l.successor; +++ } +++ handler = handler.next; +++ } +++ +++ // creates and visits the first (implicit) frame +++ Frame f = labels.frame; +++ Type[] args = Type.getArgumentTypes(descriptor); +++ f.initInputFrame(cw, access, args, this.maxLocals); +++ visitFrame(f); +++ +++ /* +++ * fix point algorithm: mark the first basic block as 'changed' +++ * (i.e. put it in the 'changed' list) and, while there are changed +++ * basic blocks, choose one, mark it as unchanged, and update its +++ * successors (which can be changed in the process). +++ */ +++ int max = 0; +++ Label changed = labels; +++ while (changed != null) { +++ // removes a basic block from the list of changed basic blocks +++ Label l = changed; +++ changed = changed.next; +++ l.next = null; +++ f = l.frame; +++ // a reachable jump target must be stored in the stack map +++ if ((l.status & Label.TARGET) != 0) { +++ l.status |= Label.STORE; +++ } +++ // all visited labels are reachable, by definition +++ l.status |= Label.REACHABLE; +++ // updates the (absolute) maximum stack size +++ int blockMax = f.inputStack.length + l.outputStackMax; +++ if (blockMax > max) { +++ max = blockMax; +++ } +++ // updates the successors of the current basic block +++ Edge e = l.successors; +++ while (e != null) { +++ Label n = e.successor.getFirst(); +++ boolean change = f.merge(cw, n.frame, e.info); +++ if (change && n.next == null) { +++ // if n has changed and is not already in the 'changed' +++ // list, adds it to this list +++ n.next = changed; +++ changed = n; +++ } +++ e = e.next; +++ } +++ } +++ +++ // visits all the frames that must be stored in the stack map +++ Label l = labels; +++ while (l != null) { +++ f = l.frame; +++ if ((l.status & Label.STORE) != 0) { +++ visitFrame(f); +++ } +++ if ((l.status & Label.REACHABLE) == 0) { +++ // finds start and end of dead basic block +++ Label k = l.successor; +++ int start = l.position; +++ int end = (k == null ? code.length : k.position) - 1; +++ // if non empty basic block +++ if (end >= start) { +++ max = Math.max(max, 1); +++ // replaces instructions with NOP ... NOP ATHROW +++ for (int i = start; i < end; ++i) { +++ code.data[i] = Opcodes.NOP; +++ } +++ code.data[end] = (byte) Opcodes.ATHROW; +++ // emits a frame for this unreachable block +++ int frameIndex = startFrame(start, 0, 1); +++ frame[frameIndex] = Frame.OBJECT +++ | cw.addType("java/lang/Throwable"); +++ endFrame(); +++ // removes the start-end range from the exception +++ // handlers +++ firstHandler = Handler.remove(firstHandler, l, k); +++ } +++ } +++ l = l.successor; +++ } +++ +++ handler = firstHandler; +++ handlerCount = 0; +++ while (handler != null) { +++ handlerCount += 1; +++ handler = handler.next; +++ } +++ +++ this.maxStack = max; +++ } else if (compute == MAXS) { +++ // completes the control flow graph with exception handler blocks +++ Handler handler = firstHandler; +++ while (handler != null) { +++ Label l = handler.start; +++ Label h = handler.handler; +++ Label e = handler.end; +++ // adds 'h' as a successor of labels between 'start' and 'end' +++ while (l != e) { +++ // creates an edge to 'h' +++ Edge b = new Edge(); +++ b.info = Edge.EXCEPTION; +++ b.successor = h; +++ // adds it to the successors of 'l' +++ if ((l.status & Label.JSR) == 0) { +++ b.next = l.successors; +++ l.successors = b; +++ } else { +++ // if l is a JSR block, adds b after the first two edges +++ // to preserve the hypothesis about JSR block successors +++ // order (see {@link #visitJumpInsn}) +++ b.next = l.successors.next.next; +++ l.successors.next.next = b; +++ } +++ // goes to the next label +++ l = l.successor; +++ } +++ handler = handler.next; +++ } +++ +++ if (subroutines > 0) { +++ // completes the control flow graph with the RET successors +++ /* +++ * first step: finds the subroutines. This step determines, for +++ * each basic block, to which subroutine(s) it belongs. +++ */ +++ // finds the basic blocks that belong to the "main" subroutine +++ int id = 0; +++ labels.visitSubroutine(null, 1, subroutines); +++ // finds the basic blocks that belong to the real subroutines +++ Label l = labels; +++ while (l != null) { +++ if ((l.status & Label.JSR) != 0) { +++ // the subroutine is defined by l's TARGET, not by l +++ Label subroutine = l.successors.next.successor; +++ // if this subroutine has not been visited yet... +++ if ((subroutine.status & Label.VISITED) == 0) { +++ // ...assigns it a new id and finds its basic blocks +++ id += 1; +++ subroutine.visitSubroutine(null, (id / 32L) << 32 +++ | (1L << (id % 32)), subroutines); +++ } +++ } +++ l = l.successor; +++ } +++ // second step: finds the successors of RET blocks +++ l = labels; +++ while (l != null) { +++ if ((l.status & Label.JSR) != 0) { +++ Label L = labels; +++ while (L != null) { +++ L.status &= ~Label.VISITED2; +++ L = L.successor; +++ } +++ // the subroutine is defined by l's TARGET, not by l +++ Label subroutine = l.successors.next.successor; +++ subroutine.visitSubroutine(l, 0, subroutines); +++ } +++ l = l.successor; +++ } +++ } +++ +++ /* +++ * control flow analysis algorithm: while the block stack is not +++ * empty, pop a block from this stack, update the max stack size, +++ * compute the true (non relative) begin stack size of the +++ * successors of this block, and push these successors onto the +++ * stack (unless they have already been pushed onto the stack). +++ * Note: by hypothesis, the {@link Label#inputStackTop} of the +++ * blocks in the block stack are the true (non relative) beginning +++ * stack sizes of these blocks. +++ */ +++ int max = 0; +++ Label stack = labels; +++ while (stack != null) { +++ // pops a block from the stack +++ Label l = stack; +++ stack = stack.next; +++ // computes the true (non relative) max stack size of this block +++ int start = l.inputStackTop; +++ int blockMax = start + l.outputStackMax; +++ // updates the global max stack size +++ if (blockMax > max) { +++ max = blockMax; +++ } +++ // analyzes the successors of the block +++ Edge b = l.successors; +++ if ((l.status & Label.JSR) != 0) { +++ // ignores the first edge of JSR blocks (virtual successor) +++ b = b.next; +++ } +++ while (b != null) { +++ l = b.successor; +++ // if this successor has not already been pushed... +++ if ((l.status & Label.PUSHED) == 0) { +++ // computes its true beginning stack size... +++ l.inputStackTop = b.info == Edge.EXCEPTION ? 1 : start +++ + b.info; +++ // ...and pushes it onto the stack +++ l.status |= Label.PUSHED; +++ l.next = stack; +++ stack = l; +++ } +++ b = b.next; +++ } +++ } +++ this.maxStack = Math.max(maxStack, max); +++ } else { +++ this.maxStack = maxStack; +++ this.maxLocals = maxLocals; +++ } +++ } +++ +++ @Override +++ public void visitEnd() { +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Utility methods: control flow analysis algorithm +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Adds a successor to the {@link #currentBlock currentBlock} block. +++ * +++ * @param info +++ * information about the control flow edge to be added. +++ * @param successor +++ * the successor block to be added to the current block. +++ */ +++ private void addSuccessor(final int info, final Label successor) { +++ // creates and initializes an Edge object... +++ Edge b = new Edge(); +++ b.info = info; +++ b.successor = successor; +++ // ...and adds it to the successor list of the currentBlock block +++ b.next = currentBlock.successors; +++ currentBlock.successors = b; +++ } +++ +++ /** +++ * Ends the current basic block. This method must be used in the case where +++ * the current basic block does not have any successor. +++ */ +++ private void noSuccessor() { +++ if (compute == FRAMES) { +++ Label l = new Label(); +++ l.frame = new Frame(); +++ l.frame.owner = l; +++ l.resolve(this, code.length, code.data); +++ previousBlock.successor = l; +++ previousBlock = l; +++ } else { +++ currentBlock.outputStackMax = maxStackSize; +++ } +++ currentBlock = null; +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Utility methods: stack map frames +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Visits a frame that has been computed from scratch. +++ * +++ * @param f +++ * the frame that must be visited. +++ */ +++ private void visitFrame(final Frame f) { +++ int i, t; +++ int nTop = 0; +++ int nLocal = 0; +++ int nStack = 0; +++ int[] locals = f.inputLocals; +++ int[] stacks = f.inputStack; +++ // computes the number of locals (ignores TOP types that are just after +++ // a LONG or a DOUBLE, and all trailing TOP types) +++ for (i = 0; i < locals.length; ++i) { +++ t = locals[i]; +++ if (t == Frame.TOP) { +++ ++nTop; +++ } else { +++ nLocal += nTop + 1; +++ nTop = 0; +++ } +++ if (t == Frame.LONG || t == Frame.DOUBLE) { +++ ++i; +++ } +++ } +++ // computes the stack size (ignores TOP types that are just after +++ // a LONG or a DOUBLE) +++ for (i = 0; i < stacks.length; ++i) { +++ t = stacks[i]; +++ ++nStack; +++ if (t == Frame.LONG || t == Frame.DOUBLE) { +++ ++i; +++ } +++ } +++ // visits the frame and its content +++ int frameIndex = startFrame(f.owner.position, nLocal, nStack); +++ for (i = 0; nLocal > 0; ++i, --nLocal) { +++ t = locals[i]; +++ frame[frameIndex++] = t; +++ if (t == Frame.LONG || t == Frame.DOUBLE) { +++ ++i; +++ } +++ } +++ for (i = 0; i < stacks.length; ++i) { +++ t = stacks[i]; +++ frame[frameIndex++] = t; +++ if (t == Frame.LONG || t == Frame.DOUBLE) { +++ ++i; +++ } +++ } +++ endFrame(); +++ } +++ +++ /** +++ * Visit the implicit first frame of this method. +++ */ +++ private void visitImplicitFirstFrame() { +++ // There can be at most descriptor.length() + 1 locals +++ int frameIndex = startFrame(0, descriptor.length() + 1, 0); +++ if ((access & Opcodes.ACC_STATIC) == 0) { +++ if ((access & ACC_CONSTRUCTOR) == 0) { +++ frame[frameIndex++] = Frame.OBJECT | cw.addType(cw.thisName); +++ } else { +++ frame[frameIndex++] = 6; // Opcodes.UNINITIALIZED_THIS; +++ } +++ } +++ int i = 1; +++ loop: while (true) { +++ int j = i; +++ switch (descriptor.charAt(i++)) { +++ case 'Z': +++ case 'C': +++ case 'B': +++ case 'S': +++ case 'I': +++ frame[frameIndex++] = 1; // Opcodes.INTEGER; +++ break; +++ case 'F': +++ frame[frameIndex++] = 2; // Opcodes.FLOAT; +++ break; +++ case 'J': +++ frame[frameIndex++] = 4; // Opcodes.LONG; +++ break; +++ case 'D': +++ frame[frameIndex++] = 3; // Opcodes.DOUBLE; +++ break; +++ case '[': +++ while (descriptor.charAt(i) == '[') { +++ ++i; +++ } +++ if (descriptor.charAt(i) == 'L') { +++ ++i; +++ while (descriptor.charAt(i) != ';') { +++ ++i; +++ } +++ } +++ frame[frameIndex++] = Frame.OBJECT +++ | cw.addType(descriptor.substring(j, ++i)); +++ break; +++ case 'L': +++ while (descriptor.charAt(i) != ';') { +++ ++i; +++ } +++ frame[frameIndex++] = Frame.OBJECT +++ | cw.addType(descriptor.substring(j + 1, i++)); +++ break; +++ default: +++ break loop; +++ } +++ } +++ frame[1] = frameIndex - 3; +++ endFrame(); +++ } +++ +++ /** +++ * Starts the visit of a stack map frame. +++ * +++ * @param offset +++ * the offset of the instruction to which the frame corresponds. +++ * @param nLocal +++ * the number of local variables in the frame. +++ * @param nStack +++ * the number of stack elements in the frame. +++ * @return the index of the next element to be written in this frame. +++ */ +++ private int startFrame(final int offset, final int nLocal, final int nStack) { +++ int n = 3 + nLocal + nStack; +++ if (frame == null || frame.length < n) { +++ frame = new int[n]; +++ } +++ frame[0] = offset; +++ frame[1] = nLocal; +++ frame[2] = nStack; +++ return 3; +++ } +++ +++ /** +++ * Checks if the visit of the current frame {@link #frame} is finished, and +++ * if yes, write it in the StackMapTable attribute. +++ */ +++ private void endFrame() { +++ if (previousFrame != null) { // do not write the first frame +++ if (stackMap == null) { +++ stackMap = new ByteVector(); +++ } +++ writeFrame(); +++ ++frameCount; +++ } +++ previousFrame = frame; +++ frame = null; +++ } +++ +++ /** +++ * Compress and writes the current frame {@link #frame} in the StackMapTable +++ * attribute. +++ */ +++ private void writeFrame() { +++ int clocalsSize = frame[1]; +++ int cstackSize = frame[2]; +++ if ((cw.version & 0xFFFF) < Opcodes.V1_6) { +++ stackMap.putShort(frame[0]).putShort(clocalsSize); +++ writeFrameTypes(3, 3 + clocalsSize); +++ stackMap.putShort(cstackSize); +++ writeFrameTypes(3 + clocalsSize, 3 + clocalsSize + cstackSize); +++ return; +++ } +++ int localsSize = previousFrame[1]; +++ int type = FULL_FRAME; +++ int k = 0; +++ int delta; +++ if (frameCount == 0) { +++ delta = frame[0]; +++ } else { +++ delta = frame[0] - previousFrame[0] - 1; +++ } +++ if (cstackSize == 0) { +++ k = clocalsSize - localsSize; +++ switch (k) { +++ case -3: +++ case -2: +++ case -1: +++ type = CHOP_FRAME; +++ localsSize = clocalsSize; +++ break; +++ case 0: +++ type = delta < 64 ? SAME_FRAME : SAME_FRAME_EXTENDED; +++ break; +++ case 1: +++ case 2: +++ case 3: +++ type = APPEND_FRAME; +++ break; +++ } +++ } else if (clocalsSize == localsSize && cstackSize == 1) { +++ type = delta < 63 ? SAME_LOCALS_1_STACK_ITEM_FRAME +++ : SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED; +++ } +++ if (type != FULL_FRAME) { +++ // verify if locals are the same +++ int l = 3; +++ for (int j = 0; j < localsSize; j++) { +++ if (frame[l] != previousFrame[l]) { +++ type = FULL_FRAME; +++ break; +++ } +++ l++; +++ } +++ } +++ switch (type) { +++ case SAME_FRAME: +++ stackMap.putByte(delta); +++ break; +++ case SAME_LOCALS_1_STACK_ITEM_FRAME: +++ stackMap.putByte(SAME_LOCALS_1_STACK_ITEM_FRAME + delta); +++ writeFrameTypes(3 + clocalsSize, 4 + clocalsSize); +++ break; +++ case SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED: +++ stackMap.putByte(SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED).putShort( +++ delta); +++ writeFrameTypes(3 + clocalsSize, 4 + clocalsSize); +++ break; +++ case SAME_FRAME_EXTENDED: +++ stackMap.putByte(SAME_FRAME_EXTENDED).putShort(delta); +++ break; +++ case CHOP_FRAME: +++ stackMap.putByte(SAME_FRAME_EXTENDED + k).putShort(delta); +++ break; +++ case APPEND_FRAME: +++ stackMap.putByte(SAME_FRAME_EXTENDED + k).putShort(delta); +++ writeFrameTypes(3 + localsSize, 3 + clocalsSize); +++ break; +++ // case FULL_FRAME: +++ default: +++ stackMap.putByte(FULL_FRAME).putShort(delta).putShort(clocalsSize); +++ writeFrameTypes(3, 3 + clocalsSize); +++ stackMap.putShort(cstackSize); +++ writeFrameTypes(3 + clocalsSize, 3 + clocalsSize + cstackSize); +++ } +++ } +++ +++ /** +++ * Writes some types of the current frame {@link #frame} into the +++ * StackMapTableAttribute. This method converts types from the format used +++ * in {@link Label} to the format used in StackMapTable attributes. In +++ * particular, it converts type table indexes to constant pool indexes. +++ * +++ * @param start +++ * index of the first type in {@link #frame} to write. +++ * @param end +++ * index of last type in {@link #frame} to write (exclusive). +++ */ +++ private void writeFrameTypes(final int start, final int end) { +++ for (int i = start; i < end; ++i) { +++ int t = frame[i]; +++ int d = t & Frame.DIM; +++ if (d == 0) { +++ int v = t & Frame.BASE_VALUE; +++ switch (t & Frame.BASE_KIND) { +++ case Frame.OBJECT: +++ stackMap.putByte(7).putShort( +++ cw.newClass(cw.typeTable[v].strVal1)); +++ break; +++ case Frame.UNINITIALIZED: +++ stackMap.putByte(8).putShort(cw.typeTable[v].intVal); +++ break; +++ default: +++ stackMap.putByte(v); +++ } +++ } else { +++ StringBuilder sb = new StringBuilder(); +++ d >>= 28; +++ while (d-- > 0) { +++ sb.append('['); +++ } +++ if ((t & Frame.BASE_KIND) == Frame.OBJECT) { +++ sb.append('L'); +++ sb.append(cw.typeTable[t & Frame.BASE_VALUE].strVal1); +++ sb.append(';'); +++ } else { +++ switch (t & 0xF) { +++ case 1: +++ sb.append('I'); +++ break; +++ case 2: +++ sb.append('F'); +++ break; +++ case 3: +++ sb.append('D'); +++ break; +++ case 9: +++ sb.append('Z'); +++ break; +++ case 10: +++ sb.append('B'); +++ break; +++ case 11: +++ sb.append('C'); +++ break; +++ case 12: +++ sb.append('S'); +++ break; +++ default: +++ sb.append('J'); +++ } +++ } +++ stackMap.putByte(7).putShort(cw.newClass(sb.toString())); +++ } +++ } +++ } +++ +++ private void writeFrameType(final Object type) { +++ if (type instanceof String) { +++ stackMap.putByte(7).putShort(cw.newClass((String) type)); +++ } else if (type instanceof Integer) { +++ stackMap.putByte(((Integer) type).intValue()); +++ } else { +++ stackMap.putByte(8).putShort(((Label) type).position); +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Utility methods: dump bytecode array +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the size of the bytecode of this method. +++ * +++ * @return the size of the bytecode of this method. +++ */ +++ final int getSize() { +++ if (classReaderOffset != 0) { +++ return 6 + classReaderLength; +++ } +++ int size = 8; +++ if (code.length > 0) { +++ if (code.length > 65536) { +++ throw new RuntimeException("Method code too large!"); +++ } +++ cw.newUTF8("Code"); +++ size += 18 + code.length + 8 * handlerCount; +++ if (localVar != null) { +++ cw.newUTF8("LocalVariableTable"); +++ size += 8 + localVar.length; +++ } +++ if (localVarType != null) { +++ cw.newUTF8("LocalVariableTypeTable"); +++ size += 8 + localVarType.length; +++ } +++ if (lineNumber != null) { +++ cw.newUTF8("LineNumberTable"); +++ size += 8 + lineNumber.length; +++ } +++ if (stackMap != null) { +++ boolean zip = (cw.version & 0xFFFF) >= Opcodes.V1_6; +++ cw.newUTF8(zip ? "StackMapTable" : "StackMap"); +++ size += 8 + stackMap.length; +++ } +++ if (ClassReader.ANNOTATIONS && ctanns != null) { +++ cw.newUTF8("RuntimeVisibleTypeAnnotations"); +++ size += 8 + ctanns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && ictanns != null) { +++ cw.newUTF8("RuntimeInvisibleTypeAnnotations"); +++ size += 8 + ictanns.getSize(); +++ } +++ if (cattrs != null) { +++ size += cattrs.getSize(cw, code.data, code.length, maxStack, +++ maxLocals); +++ } +++ } +++ if (exceptionCount > 0) { +++ cw.newUTF8("Exceptions"); +++ size += 8 + 2 * exceptionCount; +++ } +++ if ((access & Opcodes.ACC_SYNTHETIC) != 0) { +++ if ((cw.version & 0xFFFF) < Opcodes.V1_5 +++ || (access & ClassWriter.ACC_SYNTHETIC_ATTRIBUTE) != 0) { +++ cw.newUTF8("Synthetic"); +++ size += 6; +++ } +++ } +++ if ((access & Opcodes.ACC_DEPRECATED) != 0) { +++ cw.newUTF8("Deprecated"); +++ size += 6; +++ } +++ if (ClassReader.SIGNATURES && signature != null) { +++ cw.newUTF8("Signature"); +++ cw.newUTF8(signature); +++ size += 8; +++ } +++ if (methodParameters != null) { +++ cw.newUTF8("MethodParameters"); +++ size += 7 + methodParameters.length; +++ } +++ if (ClassReader.ANNOTATIONS && annd != null) { +++ cw.newUTF8("AnnotationDefault"); +++ size += 6 + annd.length; +++ } +++ if (ClassReader.ANNOTATIONS && anns != null) { +++ cw.newUTF8("RuntimeVisibleAnnotations"); +++ size += 8 + anns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && ianns != null) { +++ cw.newUTF8("RuntimeInvisibleAnnotations"); +++ size += 8 + ianns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && tanns != null) { +++ cw.newUTF8("RuntimeVisibleTypeAnnotations"); +++ size += 8 + tanns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && itanns != null) { +++ cw.newUTF8("RuntimeInvisibleTypeAnnotations"); +++ size += 8 + itanns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && panns != null) { +++ cw.newUTF8("RuntimeVisibleParameterAnnotations"); +++ size += 7 + 2 * (panns.length - synthetics); +++ for (int i = panns.length - 1; i >= synthetics; --i) { +++ size += panns[i] == null ? 0 : panns[i].getSize(); +++ } +++ } +++ if (ClassReader.ANNOTATIONS && ipanns != null) { +++ cw.newUTF8("RuntimeInvisibleParameterAnnotations"); +++ size += 7 + 2 * (ipanns.length - synthetics); +++ for (int i = ipanns.length - 1; i >= synthetics; --i) { +++ size += ipanns[i] == null ? 0 : ipanns[i].getSize(); +++ } +++ } +++ if (attrs != null) { +++ size += attrs.getSize(cw, null, 0, -1, -1); +++ } +++ return size; +++ } +++ +++ /** +++ * Puts the bytecode of this method in the given byte vector. +++ * +++ * @param out +++ * the byte vector into which the bytecode of this method must be +++ * copied. +++ */ +++ final void put(final ByteVector out) { +++ final int FACTOR = ClassWriter.TO_ACC_SYNTHETIC; +++ int mask = ACC_CONSTRUCTOR | Opcodes.ACC_DEPRECATED +++ | ClassWriter.ACC_SYNTHETIC_ATTRIBUTE +++ | ((access & ClassWriter.ACC_SYNTHETIC_ATTRIBUTE) / FACTOR); +++ out.putShort(access & ~mask).putShort(name).putShort(desc); +++ if (classReaderOffset != 0) { +++ out.putByteArray(cw.cr.b, classReaderOffset, classReaderLength); +++ return; +++ } +++ int attributeCount = 0; +++ if (code.length > 0) { +++ ++attributeCount; +++ } +++ if (exceptionCount > 0) { +++ ++attributeCount; +++ } +++ if ((access & Opcodes.ACC_SYNTHETIC) != 0) { +++ if ((cw.version & 0xFFFF) < Opcodes.V1_5 +++ || (access & ClassWriter.ACC_SYNTHETIC_ATTRIBUTE) != 0) { +++ ++attributeCount; +++ } +++ } +++ if ((access & Opcodes.ACC_DEPRECATED) != 0) { +++ ++attributeCount; +++ } +++ if (ClassReader.SIGNATURES && signature != null) { +++ ++attributeCount; +++ } +++ if (methodParameters != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && annd != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && anns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && ianns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && tanns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && itanns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && panns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && ipanns != null) { +++ ++attributeCount; +++ } +++ if (attrs != null) { +++ attributeCount += attrs.getCount(); +++ } +++ out.putShort(attributeCount); +++ if (code.length > 0) { +++ int size = 12 + code.length + 8 * handlerCount; +++ if (localVar != null) { +++ size += 8 + localVar.length; +++ } +++ if (localVarType != null) { +++ size += 8 + localVarType.length; +++ } +++ if (lineNumber != null) { +++ size += 8 + lineNumber.length; +++ } +++ if (stackMap != null) { +++ size += 8 + stackMap.length; +++ } +++ if (ClassReader.ANNOTATIONS && ctanns != null) { +++ size += 8 + ctanns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && ictanns != null) { +++ size += 8 + ictanns.getSize(); +++ } +++ if (cattrs != null) { +++ size += cattrs.getSize(cw, code.data, code.length, maxStack, +++ maxLocals); +++ } +++ out.putShort(cw.newUTF8("Code")).putInt(size); +++ out.putShort(maxStack).putShort(maxLocals); +++ out.putInt(code.length).putByteArray(code.data, 0, code.length); +++ out.putShort(handlerCount); +++ if (handlerCount > 0) { +++ Handler h = firstHandler; +++ while (h != null) { +++ out.putShort(h.start.position).putShort(h.end.position) +++ .putShort(h.handler.position).putShort(h.type); +++ h = h.next; +++ } +++ } +++ attributeCount = 0; +++ if (localVar != null) { +++ ++attributeCount; +++ } +++ if (localVarType != null) { +++ ++attributeCount; +++ } +++ if (lineNumber != null) { +++ ++attributeCount; +++ } +++ if (stackMap != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && ctanns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && ictanns != null) { +++ ++attributeCount; +++ } +++ if (cattrs != null) { +++ attributeCount += cattrs.getCount(); +++ } +++ out.putShort(attributeCount); +++ if (localVar != null) { +++ out.putShort(cw.newUTF8("LocalVariableTable")); +++ out.putInt(localVar.length + 2).putShort(localVarCount); +++ out.putByteArray(localVar.data, 0, localVar.length); +++ } +++ if (localVarType != null) { +++ out.putShort(cw.newUTF8("LocalVariableTypeTable")); +++ out.putInt(localVarType.length + 2).putShort(localVarTypeCount); +++ out.putByteArray(localVarType.data, 0, localVarType.length); +++ } +++ if (lineNumber != null) { +++ out.putShort(cw.newUTF8("LineNumberTable")); +++ out.putInt(lineNumber.length + 2).putShort(lineNumberCount); +++ out.putByteArray(lineNumber.data, 0, lineNumber.length); +++ } +++ if (stackMap != null) { +++ boolean zip = (cw.version & 0xFFFF) >= Opcodes.V1_6; +++ out.putShort(cw.newUTF8(zip ? "StackMapTable" : "StackMap")); +++ out.putInt(stackMap.length + 2).putShort(frameCount); +++ out.putByteArray(stackMap.data, 0, stackMap.length); +++ } +++ if (ClassReader.ANNOTATIONS && ctanns != null) { +++ out.putShort(cw.newUTF8("RuntimeVisibleTypeAnnotations")); +++ ctanns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && ictanns != null) { +++ out.putShort(cw.newUTF8("RuntimeInvisibleTypeAnnotations")); +++ ictanns.put(out); +++ } +++ if (cattrs != null) { +++ cattrs.put(cw, code.data, code.length, maxLocals, maxStack, out); +++ } +++ } +++ if (exceptionCount > 0) { +++ out.putShort(cw.newUTF8("Exceptions")).putInt( +++ 2 * exceptionCount + 2); +++ out.putShort(exceptionCount); +++ for (int i = 0; i < exceptionCount; ++i) { +++ out.putShort(exceptions[i]); +++ } +++ } +++ if ((access & Opcodes.ACC_SYNTHETIC) != 0) { +++ if ((cw.version & 0xFFFF) < Opcodes.V1_5 +++ || (access & ClassWriter.ACC_SYNTHETIC_ATTRIBUTE) != 0) { +++ out.putShort(cw.newUTF8("Synthetic")).putInt(0); +++ } +++ } +++ if ((access & Opcodes.ACC_DEPRECATED) != 0) { +++ out.putShort(cw.newUTF8("Deprecated")).putInt(0); +++ } +++ if (ClassReader.SIGNATURES && signature != null) { +++ out.putShort(cw.newUTF8("Signature")).putInt(2) +++ .putShort(cw.newUTF8(signature)); +++ } +++ if (methodParameters != null) { +++ out.putShort(cw.newUTF8("MethodParameters")); +++ out.putInt(methodParameters.length + 1).putByte( +++ methodParametersCount); +++ out.putByteArray(methodParameters.data, 0, methodParameters.length); +++ } +++ if (ClassReader.ANNOTATIONS && annd != null) { +++ out.putShort(cw.newUTF8("AnnotationDefault")); +++ out.putInt(annd.length); +++ out.putByteArray(annd.data, 0, annd.length); +++ } +++ if (ClassReader.ANNOTATIONS && anns != null) { +++ out.putShort(cw.newUTF8("RuntimeVisibleAnnotations")); +++ anns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && ianns != null) { +++ out.putShort(cw.newUTF8("RuntimeInvisibleAnnotations")); +++ ianns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && tanns != null) { +++ out.putShort(cw.newUTF8("RuntimeVisibleTypeAnnotations")); +++ tanns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && itanns != null) { +++ out.putShort(cw.newUTF8("RuntimeInvisibleTypeAnnotations")); +++ itanns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && panns != null) { +++ out.putShort(cw.newUTF8("RuntimeVisibleParameterAnnotations")); +++ AnnotationWriter.put(panns, synthetics, out); +++ } +++ if (ClassReader.ANNOTATIONS && ipanns != null) { +++ out.putShort(cw.newUTF8("RuntimeInvisibleParameterAnnotations")); +++ AnnotationWriter.put(ipanns, synthetics, out); +++ } +++ if (attrs != null) { +++ attrs.put(cw, null, 0, -1, -1, out); +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Utility methods: instruction resizing (used to handle GOTO_W and JSR_W) +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Resizes and replaces the temporary instructions inserted by +++ * {@link Label#resolve} for wide forward jumps, while keeping jump offsets +++ * and instruction addresses consistent. This may require to resize other +++ * existing instructions, or even to introduce new instructions: for +++ * example, increasing the size of an instruction by 2 at the middle of a +++ * method can increases the offset of an IFEQ instruction from 32766 to +++ * 32768, in which case IFEQ 32766 must be replaced with IFNEQ 8 GOTO_W +++ * 32765. This, in turn, may require to increase the size of another jump +++ * instruction, and so on... All these operations are handled automatically +++ * by this method. +++ *

+++ * This method must be called after all the method that is being built +++ * has been visited. In particular, the {@link Label Label} objects used +++ * to construct the method are no longer valid after this method has been +++ * called. +++ */ +++ private void resizeInstructions() { +++ byte[] b = code.data; // bytecode of the method +++ int u, v, label; // indexes in b +++ int i, j; // loop indexes +++ /* +++ * 1st step: As explained above, resizing an instruction may require to +++ * resize another one, which may require to resize yet another one, and +++ * so on. The first step of the algorithm consists in finding all the +++ * instructions that need to be resized, without modifying the code. +++ * This is done by the following "fix point" algorithm: +++ * +++ * Parse the code to find the jump instructions whose offset will need +++ * more than 2 bytes to be stored (the future offset is computed from +++ * the current offset and from the number of bytes that will be inserted +++ * or removed between the source and target instructions). For each such +++ * instruction, adds an entry in (a copy of) the indexes and sizes +++ * arrays (if this has not already been done in a previous iteration!). +++ * +++ * If at least one entry has been added during the previous step, go +++ * back to the beginning, otherwise stop. +++ * +++ * In fact the real algorithm is complicated by the fact that the size +++ * of TABLESWITCH and LOOKUPSWITCH instructions depends on their +++ * position in the bytecode (because of padding). In order to ensure the +++ * convergence of the algorithm, the number of bytes to be added or +++ * removed from these instructions is over estimated during the previous +++ * loop, and computed exactly only after the loop is finished (this +++ * requires another pass to parse the bytecode of the method). +++ */ +++ int[] allIndexes = new int[0]; // copy of indexes +++ int[] allSizes = new int[0]; // copy of sizes +++ boolean[] resize; // instructions to be resized +++ int newOffset; // future offset of a jump instruction +++ +++ resize = new boolean[code.length]; +++ +++ // 3 = loop again, 2 = loop ended, 1 = last pass, 0 = done +++ int state = 3; +++ do { +++ if (state == 3) { +++ state = 2; +++ } +++ u = 0; +++ while (u < b.length) { +++ int opcode = b[u] & 0xFF; // opcode of current instruction +++ int insert = 0; // bytes to be added after this instruction +++ +++ switch (ClassWriter.TYPE[opcode]) { +++ case ClassWriter.NOARG_INSN: +++ case ClassWriter.IMPLVAR_INSN: +++ u += 1; +++ break; +++ case ClassWriter.LABEL_INSN: +++ if (opcode > 201) { +++ // converts temporary opcodes 202 to 217, 218 and +++ // 219 to IFEQ ... JSR (inclusive), IFNULL and +++ // IFNONNULL +++ opcode = opcode < 218 ? opcode - 49 : opcode - 20; +++ label = u + readUnsignedShort(b, u + 1); +++ } else { +++ label = u + readShort(b, u + 1); +++ } +++ newOffset = getNewOffset(allIndexes, allSizes, u, label); +++ if (newOffset < Short.MIN_VALUE +++ || newOffset > Short.MAX_VALUE) { +++ if (!resize[u]) { +++ if (opcode == Opcodes.GOTO || opcode == Opcodes.JSR) { +++ // two additional bytes will be required to +++ // replace this GOTO or JSR instruction with +++ // a GOTO_W or a JSR_W +++ insert = 2; +++ } else { +++ // five additional bytes will be required to +++ // replace this IFxxx instruction with +++ // IFNOTxxx GOTO_W , where IFNOTxxx +++ // is the "opposite" opcode of IFxxx (i.e., +++ // IFNE for IFEQ) and where designates +++ // the instruction just after the GOTO_W. +++ insert = 5; +++ } +++ resize[u] = true; +++ } +++ } +++ u += 3; +++ break; +++ case ClassWriter.LABELW_INSN: +++ u += 5; +++ break; +++ case ClassWriter.TABL_INSN: +++ if (state == 1) { +++ // true number of bytes to be added (or removed) +++ // from this instruction = (future number of padding +++ // bytes - current number of padding byte) - +++ // previously over estimated variation = +++ // = ((3 - newOffset%4) - (3 - u%4)) - u%4 +++ // = (-newOffset%4 + u%4) - u%4 +++ // = -(newOffset & 3) +++ newOffset = getNewOffset(allIndexes, allSizes, 0, u); +++ insert = -(newOffset & 3); +++ } else if (!resize[u]) { +++ // over estimation of the number of bytes to be +++ // added to this instruction = 3 - current number +++ // of padding bytes = 3 - (3 - u%4) = u%4 = u & 3 +++ insert = u & 3; +++ resize[u] = true; +++ } +++ // skips instruction +++ u = u + 4 - (u & 3); +++ u += 4 * (readInt(b, u + 8) - readInt(b, u + 4) + 1) + 12; +++ break; +++ case ClassWriter.LOOK_INSN: +++ if (state == 1) { +++ // like TABL_INSN +++ newOffset = getNewOffset(allIndexes, allSizes, 0, u); +++ insert = -(newOffset & 3); +++ } else if (!resize[u]) { +++ // like TABL_INSN +++ insert = u & 3; +++ resize[u] = true; +++ } +++ // skips instruction +++ u = u + 4 - (u & 3); +++ u += 8 * readInt(b, u + 4) + 8; +++ break; +++ case ClassWriter.WIDE_INSN: +++ opcode = b[u + 1] & 0xFF; +++ if (opcode == Opcodes.IINC) { +++ u += 6; +++ } else { +++ u += 4; +++ } +++ break; +++ case ClassWriter.VAR_INSN: +++ case ClassWriter.SBYTE_INSN: +++ case ClassWriter.LDC_INSN: +++ u += 2; +++ break; +++ case ClassWriter.SHORT_INSN: +++ case ClassWriter.LDCW_INSN: +++ case ClassWriter.FIELDORMETH_INSN: +++ case ClassWriter.TYPE_INSN: +++ case ClassWriter.IINC_INSN: +++ u += 3; +++ break; +++ case ClassWriter.ITFMETH_INSN: +++ case ClassWriter.INDYMETH_INSN: +++ u += 5; +++ break; +++ // case ClassWriter.MANA_INSN: +++ default: +++ u += 4; +++ break; +++ } +++ if (insert != 0) { +++ // adds a new (u, insert) entry in the allIndexes and +++ // allSizes arrays +++ int[] newIndexes = new int[allIndexes.length + 1]; +++ int[] newSizes = new int[allSizes.length + 1]; +++ System.arraycopy(allIndexes, 0, newIndexes, 0, +++ allIndexes.length); +++ System.arraycopy(allSizes, 0, newSizes, 0, allSizes.length); +++ newIndexes[allIndexes.length] = u; +++ newSizes[allSizes.length] = insert; +++ allIndexes = newIndexes; +++ allSizes = newSizes; +++ if (insert > 0) { +++ state = 3; +++ } +++ } +++ } +++ if (state < 3) { +++ --state; +++ } +++ } while (state != 0); +++ +++ // 2nd step: +++ // copies the bytecode of the method into a new bytevector, updates the +++ // offsets, and inserts (or removes) bytes as requested. +++ +++ ByteVector newCode = new ByteVector(code.length); +++ +++ u = 0; +++ while (u < code.length) { +++ int opcode = b[u] & 0xFF; +++ switch (ClassWriter.TYPE[opcode]) { +++ case ClassWriter.NOARG_INSN: +++ case ClassWriter.IMPLVAR_INSN: +++ newCode.putByte(opcode); +++ u += 1; +++ break; +++ case ClassWriter.LABEL_INSN: +++ if (opcode > 201) { +++ // changes temporary opcodes 202 to 217 (inclusive), 218 +++ // and 219 to IFEQ ... JSR (inclusive), IFNULL and +++ // IFNONNULL +++ opcode = opcode < 218 ? opcode - 49 : opcode - 20; +++ label = u + readUnsignedShort(b, u + 1); +++ } else { +++ label = u + readShort(b, u + 1); +++ } +++ newOffset = getNewOffset(allIndexes, allSizes, u, label); +++ if (resize[u]) { +++ // replaces GOTO with GOTO_W, JSR with JSR_W and IFxxx +++ // with IFNOTxxx GOTO_W , where IFNOTxxx is +++ // the "opposite" opcode of IFxxx (i.e., IFNE for IFEQ) +++ // and where designates the instruction just after +++ // the GOTO_W. +++ if (opcode == Opcodes.GOTO) { +++ newCode.putByte(200); // GOTO_W +++ } else if (opcode == Opcodes.JSR) { +++ newCode.putByte(201); // JSR_W +++ } else { +++ newCode.putByte(opcode <= 166 ? ((opcode + 1) ^ 1) - 1 +++ : opcode ^ 1); +++ newCode.putShort(8); // jump offset +++ newCode.putByte(200); // GOTO_W +++ // newOffset now computed from start of GOTO_W +++ newOffset -= 3; +++ } +++ newCode.putInt(newOffset); +++ } else { +++ newCode.putByte(opcode); +++ newCode.putShort(newOffset); +++ } +++ u += 3; +++ break; +++ case ClassWriter.LABELW_INSN: +++ label = u + readInt(b, u + 1); +++ newOffset = getNewOffset(allIndexes, allSizes, u, label); +++ newCode.putByte(opcode); +++ newCode.putInt(newOffset); +++ u += 5; +++ break; +++ case ClassWriter.TABL_INSN: +++ // skips 0 to 3 padding bytes +++ v = u; +++ u = u + 4 - (v & 3); +++ // reads and copies instruction +++ newCode.putByte(Opcodes.TABLESWITCH); +++ newCode.putByteArray(null, 0, (4 - newCode.length % 4) % 4); +++ label = v + readInt(b, u); +++ u += 4; +++ newOffset = getNewOffset(allIndexes, allSizes, v, label); +++ newCode.putInt(newOffset); +++ j = readInt(b, u); +++ u += 4; +++ newCode.putInt(j); +++ j = readInt(b, u) - j + 1; +++ u += 4; +++ newCode.putInt(readInt(b, u - 4)); +++ for (; j > 0; --j) { +++ label = v + readInt(b, u); +++ u += 4; +++ newOffset = getNewOffset(allIndexes, allSizes, v, label); +++ newCode.putInt(newOffset); +++ } +++ break; +++ case ClassWriter.LOOK_INSN: +++ // skips 0 to 3 padding bytes +++ v = u; +++ u = u + 4 - (v & 3); +++ // reads and copies instruction +++ newCode.putByte(Opcodes.LOOKUPSWITCH); +++ newCode.putByteArray(null, 0, (4 - newCode.length % 4) % 4); +++ label = v + readInt(b, u); +++ u += 4; +++ newOffset = getNewOffset(allIndexes, allSizes, v, label); +++ newCode.putInt(newOffset); +++ j = readInt(b, u); +++ u += 4; +++ newCode.putInt(j); +++ for (; j > 0; --j) { +++ newCode.putInt(readInt(b, u)); +++ u += 4; +++ label = v + readInt(b, u); +++ u += 4; +++ newOffset = getNewOffset(allIndexes, allSizes, v, label); +++ newCode.putInt(newOffset); +++ } +++ break; +++ case ClassWriter.WIDE_INSN: +++ opcode = b[u + 1] & 0xFF; +++ if (opcode == Opcodes.IINC) { +++ newCode.putByteArray(b, u, 6); +++ u += 6; +++ } else { +++ newCode.putByteArray(b, u, 4); +++ u += 4; +++ } +++ break; +++ case ClassWriter.VAR_INSN: +++ case ClassWriter.SBYTE_INSN: +++ case ClassWriter.LDC_INSN: +++ newCode.putByteArray(b, u, 2); +++ u += 2; +++ break; +++ case ClassWriter.SHORT_INSN: +++ case ClassWriter.LDCW_INSN: +++ case ClassWriter.FIELDORMETH_INSN: +++ case ClassWriter.TYPE_INSN: +++ case ClassWriter.IINC_INSN: +++ newCode.putByteArray(b, u, 3); +++ u += 3; +++ break; +++ case ClassWriter.ITFMETH_INSN: +++ case ClassWriter.INDYMETH_INSN: +++ newCode.putByteArray(b, u, 5); +++ u += 5; +++ break; +++ // case MANA_INSN: +++ default: +++ newCode.putByteArray(b, u, 4); +++ u += 4; +++ break; +++ } +++ } +++ +++ // updates the stack map frame labels +++ if (compute == FRAMES) { +++ Label l = labels; +++ while (l != null) { +++ /* +++ * Detects the labels that are just after an IF instruction that +++ * has been resized with the IFNOT GOTO_W pattern. These labels +++ * are now the target of a jump instruction (the IFNOT +++ * instruction). Note that we need the original label position +++ * here. getNewOffset must therefore never have been called for +++ * this label. +++ */ +++ u = l.position - 3; +++ if (u >= 0 && resize[u]) { +++ l.status |= Label.TARGET; +++ } +++ getNewOffset(allIndexes, allSizes, l); +++ l = l.successor; +++ } +++ // Update the offsets in the uninitialized types +++ if (cw.typeTable != null) { +++ for (i = 0; i < cw.typeTable.length; ++i) { +++ Item item = cw.typeTable[i]; +++ if (item != null && item.type == ClassWriter.TYPE_UNINIT) { +++ item.intVal = getNewOffset(allIndexes, allSizes, 0, +++ item.intVal); +++ } +++ } +++ } +++ // The stack map frames are not serialized yet, so we don't need +++ // to update them. They will be serialized in visitMaxs. +++ } else if (frameCount > 0) { +++ /* +++ * Resizing an existing stack map frame table is really hard. Not +++ * only the table must be parsed to update the offets, but new +++ * frames may be needed for jump instructions that were inserted by +++ * this method. And updating the offsets or inserting frames can +++ * change the format of the following frames, in case of packed +++ * frames. In practice the whole table must be recomputed. For this +++ * the frames are marked as potentially invalid. This will cause the +++ * whole class to be reread and rewritten with the COMPUTE_FRAMES +++ * option (see the ClassWriter.toByteArray method). This is not very +++ * efficient but is much easier and requires much less code than any +++ * other method I can think of. +++ */ +++ cw.invalidFrames = true; +++ } +++ // updates the exception handler block labels +++ Handler h = firstHandler; +++ while (h != null) { +++ getNewOffset(allIndexes, allSizes, h.start); +++ getNewOffset(allIndexes, allSizes, h.end); +++ getNewOffset(allIndexes, allSizes, h.handler); +++ h = h.next; +++ } +++ // updates the instructions addresses in the +++ // local var and line number tables +++ for (i = 0; i < 2; ++i) { +++ ByteVector bv = i == 0 ? localVar : localVarType; +++ if (bv != null) { +++ b = bv.data; +++ u = 0; +++ while (u < bv.length) { +++ label = readUnsignedShort(b, u); +++ newOffset = getNewOffset(allIndexes, allSizes, 0, label); +++ writeShort(b, u, newOffset); +++ label += readUnsignedShort(b, u + 2); +++ newOffset = getNewOffset(allIndexes, allSizes, 0, label) +++ - newOffset; +++ writeShort(b, u + 2, newOffset); +++ u += 10; +++ } +++ } +++ } +++ if (lineNumber != null) { +++ b = lineNumber.data; +++ u = 0; +++ while (u < lineNumber.length) { +++ writeShort( +++ b, +++ u, +++ getNewOffset(allIndexes, allSizes, 0, +++ readUnsignedShort(b, u))); +++ u += 4; +++ } +++ } +++ // updates the labels of the other attributes +++ Attribute attr = cattrs; +++ while (attr != null) { +++ Label[] labels = attr.getLabels(); +++ if (labels != null) { +++ for (i = labels.length - 1; i >= 0; --i) { +++ getNewOffset(allIndexes, allSizes, labels[i]); +++ } +++ } +++ attr = attr.next; +++ } +++ +++ // replaces old bytecodes with new ones +++ code = newCode; +++ } +++ +++ /** +++ * Reads an unsigned short value in the given byte array. +++ * +++ * @param b +++ * a byte array. +++ * @param index +++ * the start index of the value to be read. +++ * @return the read value. +++ */ +++ static int readUnsignedShort(final byte[] b, final int index) { +++ return ((b[index] & 0xFF) << 8) | (b[index + 1] & 0xFF); +++ } +++ +++ /** +++ * Reads a signed short value in the given byte array. +++ * +++ * @param b +++ * a byte array. +++ * @param index +++ * the start index of the value to be read. +++ * @return the read value. +++ */ +++ static short readShort(final byte[] b, final int index) { +++ return (short) (((b[index] & 0xFF) << 8) | (b[index + 1] & 0xFF)); +++ } +++ +++ /** +++ * Reads a signed int value in the given byte array. +++ * +++ * @param b +++ * a byte array. +++ * @param index +++ * the start index of the value to be read. +++ * @return the read value. +++ */ +++ static int readInt(final byte[] b, final int index) { +++ return ((b[index] & 0xFF) << 24) | ((b[index + 1] & 0xFF) << 16) +++ | ((b[index + 2] & 0xFF) << 8) | (b[index + 3] & 0xFF); +++ } +++ +++ /** +++ * Writes a short value in the given byte array. +++ * +++ * @param b +++ * a byte array. +++ * @param index +++ * where the first byte of the short value must be written. +++ * @param s +++ * the value to be written in the given byte array. +++ */ +++ static void writeShort(final byte[] b, final int index, final int s) { +++ b[index] = (byte) (s >>> 8); +++ b[index + 1] = (byte) s; +++ } +++ +++ /** +++ * Computes the future value of a bytecode offset. +++ *

+++ * Note: it is possible to have several entries for the same instruction in +++ * the indexes and sizes: two entries (index=a,size=b) and +++ * (index=a,size=b') are equivalent to a single entry (index=a,size=b+b'). +++ * +++ * @param indexes +++ * current positions of the instructions to be resized. Each +++ * instruction must be designated by the index of its last +++ * byte, plus one (or, in other words, by the index of the +++ * first byte of the next instruction). +++ * @param sizes +++ * the number of bytes to be added to the above +++ * instructions. More precisely, for each i < len, +++ * sizes[i] bytes will be added at the end of the +++ * instruction designated by indexes[i] or, if +++ * sizes[i] is negative, the last | +++ * sizes[i]| bytes of the instruction will be removed +++ * (the instruction size must not become negative or +++ * null). +++ * @param begin +++ * index of the first byte of the source instruction. +++ * @param end +++ * index of the first byte of the target instruction. +++ * @return the future value of the given bytecode offset. +++ */ +++ static int getNewOffset(final int[] indexes, final int[] sizes, +++ final int begin, final int end) { +++ int offset = end - begin; +++ for (int i = 0; i < indexes.length; ++i) { +++ if (begin < indexes[i] && indexes[i] <= end) { +++ // forward jump +++ offset += sizes[i]; +++ } else if (end < indexes[i] && indexes[i] <= begin) { +++ // backward jump +++ offset -= sizes[i]; +++ } +++ } +++ return offset; +++ } +++ +++ /** +++ * Updates the offset of the given label. +++ * +++ * @param indexes +++ * current positions of the instructions to be resized. Each +++ * instruction must be designated by the index of its last +++ * byte, plus one (or, in other words, by the index of the +++ * first byte of the next instruction). +++ * @param sizes +++ * the number of bytes to be added to the above +++ * instructions. More precisely, for each i < len, +++ * sizes[i] bytes will be added at the end of the +++ * instruction designated by indexes[i] or, if +++ * sizes[i] is negative, the last | +++ * sizes[i]| bytes of the instruction will be removed +++ * (the instruction size must not become negative or +++ * null). +++ * @param label +++ * the label whose offset must be updated. +++ */ +++ static void getNewOffset(final int[] indexes, final int[] sizes, +++ final Label label) { +++ if ((label.status & Label.RESIZED) == 0) { +++ label.position = getNewOffset(indexes, sizes, 0, label.position); +++ label.status |= Label.RESIZED; +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Opcodes.java b/contrib/asm/src/org/objectweb/asm/Opcodes.java ++new file mode 100644 ++index 0000000..e5c2b33 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Opcodes.java ++@@ -0,0 +1,361 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * Defines the JVM opcodes, access flags and array type codes. This interface +++ * does not define all the JVM opcodes because some opcodes are automatically +++ * handled. For example, the xLOAD and xSTORE opcodes are automatically replaced +++ * by xLOAD_n and xSTORE_n opcodes when possible. The xLOAD_n and xSTORE_n +++ * opcodes are therefore not defined in this interface. Likewise for LDC, +++ * automatically replaced by LDC_W or LDC2_W when necessary, WIDE, GOTO_W and +++ * JSR_W. +++ * +++ * @author Eric Bruneton +++ * @author Eugene Kuleshov +++ */ +++public interface Opcodes { +++ +++ // ASM API versions +++ +++ int ASM4 = 4 << 16 | 0 << 8 | 0; +++ int ASM5 = 5 << 16 | 0 << 8 | 0; +++ +++ // versions +++ +++ int V1_1 = 3 << 16 | 45; +++ int V1_2 = 0 << 16 | 46; +++ int V1_3 = 0 << 16 | 47; +++ int V1_4 = 0 << 16 | 48; +++ int V1_5 = 0 << 16 | 49; +++ int V1_6 = 0 << 16 | 50; +++ int V1_7 = 0 << 16 | 51; +++ int V1_8 = 0 << 16 | 52; +++ +++ // access flags +++ +++ int ACC_PUBLIC = 0x0001; // class, field, method +++ int ACC_PRIVATE = 0x0002; // class, field, method +++ int ACC_PROTECTED = 0x0004; // class, field, method +++ int ACC_STATIC = 0x0008; // field, method +++ int ACC_FINAL = 0x0010; // class, field, method, parameter +++ int ACC_SUPER = 0x0020; // class +++ int ACC_SYNCHRONIZED = 0x0020; // method +++ int ACC_VOLATILE = 0x0040; // field +++ int ACC_BRIDGE = 0x0040; // method +++ int ACC_VARARGS = 0x0080; // method +++ int ACC_TRANSIENT = 0x0080; // field +++ int ACC_NATIVE = 0x0100; // method +++ int ACC_INTERFACE = 0x0200; // class +++ int ACC_ABSTRACT = 0x0400; // class, method +++ int ACC_STRICT = 0x0800; // method +++ int ACC_SYNTHETIC = 0x1000; // class, field, method, parameter +++ int ACC_ANNOTATION = 0x2000; // class +++ int ACC_ENUM = 0x4000; // class(?) field inner +++ int ACC_MANDATED = 0x8000; // parameter +++ +++ // ASM specific pseudo access flags +++ +++ int ACC_DEPRECATED = 0x20000; // class, field, method +++ +++ // types for NEWARRAY +++ +++ int T_BOOLEAN = 4; +++ int T_CHAR = 5; +++ int T_FLOAT = 6; +++ int T_DOUBLE = 7; +++ int T_BYTE = 8; +++ int T_SHORT = 9; +++ int T_INT = 10; +++ int T_LONG = 11; +++ +++ // tags for Handle +++ +++ int H_GETFIELD = 1; +++ int H_GETSTATIC = 2; +++ int H_PUTFIELD = 3; +++ int H_PUTSTATIC = 4; +++ int H_INVOKEVIRTUAL = 5; +++ int H_INVOKESTATIC = 6; +++ int H_INVOKESPECIAL = 7; +++ int H_NEWINVOKESPECIAL = 8; +++ int H_INVOKEINTERFACE = 9; +++ +++ // stack map frame types +++ +++ /** +++ * Represents an expanded frame. See {@link ClassReader#EXPAND_FRAMES}. +++ */ +++ int F_NEW = -1; +++ +++ /** +++ * Represents a compressed frame with complete frame data. +++ */ +++ int F_FULL = 0; +++ +++ /** +++ * Represents a compressed frame where locals are the same as the locals in +++ * the previous frame, except that additional 1-3 locals are defined, and +++ * with an empty stack. +++ */ +++ int F_APPEND = 1; +++ +++ /** +++ * Represents a compressed frame where locals are the same as the locals in +++ * the previous frame, except that the last 1-3 locals are absent and with +++ * an empty stack. +++ */ +++ int F_CHOP = 2; +++ +++ /** +++ * Represents a compressed frame with exactly the same locals as the +++ * previous frame and with an empty stack. +++ */ +++ int F_SAME = 3; +++ +++ /** +++ * Represents a compressed frame with exactly the same locals as the +++ * previous frame and with a single value on the stack. +++ */ +++ int F_SAME1 = 4; +++ +++ Integer TOP = new Integer(0); +++ Integer INTEGER = new Integer(1); +++ Integer FLOAT = new Integer(2); +++ Integer DOUBLE = new Integer(3); +++ Integer LONG = new Integer(4); +++ Integer NULL = new Integer(5); +++ Integer UNINITIALIZED_THIS = new Integer(6); +++ +++ // opcodes // visit method (- = idem) +++ +++ int NOP = 0; // visitInsn +++ int ACONST_NULL = 1; // - +++ int ICONST_M1 = 2; // - +++ int ICONST_0 = 3; // - +++ int ICONST_1 = 4; // - +++ int ICONST_2 = 5; // - +++ int ICONST_3 = 6; // - +++ int ICONST_4 = 7; // - +++ int ICONST_5 = 8; // - +++ int LCONST_0 = 9; // - +++ int LCONST_1 = 10; // - +++ int FCONST_0 = 11; // - +++ int FCONST_1 = 12; // - +++ int FCONST_2 = 13; // - +++ int DCONST_0 = 14; // - +++ int DCONST_1 = 15; // - +++ int BIPUSH = 16; // visitIntInsn +++ int SIPUSH = 17; // - +++ int LDC = 18; // visitLdcInsn +++ // int LDC_W = 19; // - +++ // int LDC2_W = 20; // - +++ int ILOAD = 21; // visitVarInsn +++ int LLOAD = 22; // - +++ int FLOAD = 23; // - +++ int DLOAD = 24; // - +++ int ALOAD = 25; // - +++ // int ILOAD_0 = 26; // - +++ // int ILOAD_1 = 27; // - +++ // int ILOAD_2 = 28; // - +++ // int ILOAD_3 = 29; // - +++ // int LLOAD_0 = 30; // - +++ // int LLOAD_1 = 31; // - +++ // int LLOAD_2 = 32; // - +++ // int LLOAD_3 = 33; // - +++ // int FLOAD_0 = 34; // - +++ // int FLOAD_1 = 35; // - +++ // int FLOAD_2 = 36; // - +++ // int FLOAD_3 = 37; // - +++ // int DLOAD_0 = 38; // - +++ // int DLOAD_1 = 39; // - +++ // int DLOAD_2 = 40; // - +++ // int DLOAD_3 = 41; // - +++ // int ALOAD_0 = 42; // - +++ // int ALOAD_1 = 43; // - +++ // int ALOAD_2 = 44; // - +++ // int ALOAD_3 = 45; // - +++ int IALOAD = 46; // visitInsn +++ int LALOAD = 47; // - +++ int FALOAD = 48; // - +++ int DALOAD = 49; // - +++ int AALOAD = 50; // - +++ int BALOAD = 51; // - +++ int CALOAD = 52; // - +++ int SALOAD = 53; // - +++ int ISTORE = 54; // visitVarInsn +++ int LSTORE = 55; // - +++ int FSTORE = 56; // - +++ int DSTORE = 57; // - +++ int ASTORE = 58; // - +++ // int ISTORE_0 = 59; // - +++ // int ISTORE_1 = 60; // - +++ // int ISTORE_2 = 61; // - +++ // int ISTORE_3 = 62; // - +++ // int LSTORE_0 = 63; // - +++ // int LSTORE_1 = 64; // - +++ // int LSTORE_2 = 65; // - +++ // int LSTORE_3 = 66; // - +++ // int FSTORE_0 = 67; // - +++ // int FSTORE_1 = 68; // - +++ // int FSTORE_2 = 69; // - +++ // int FSTORE_3 = 70; // - +++ // int DSTORE_0 = 71; // - +++ // int DSTORE_1 = 72; // - +++ // int DSTORE_2 = 73; // - +++ // int DSTORE_3 = 74; // - +++ // int ASTORE_0 = 75; // - +++ // int ASTORE_1 = 76; // - +++ // int ASTORE_2 = 77; // - +++ // int ASTORE_3 = 78; // - +++ int IASTORE = 79; // visitInsn +++ int LASTORE = 80; // - +++ int FASTORE = 81; // - +++ int DASTORE = 82; // - +++ int AASTORE = 83; // - +++ int BASTORE = 84; // - +++ int CASTORE = 85; // - +++ int SASTORE = 86; // - +++ int POP = 87; // - +++ int POP2 = 88; // - +++ int DUP = 89; // - +++ int DUP_X1 = 90; // - +++ int DUP_X2 = 91; // - +++ int DUP2 = 92; // - +++ int DUP2_X1 = 93; // - +++ int DUP2_X2 = 94; // - +++ int SWAP = 95; // - +++ int IADD = 96; // - +++ int LADD = 97; // - +++ int FADD = 98; // - +++ int DADD = 99; // - +++ int ISUB = 100; // - +++ int LSUB = 101; // - +++ int FSUB = 102; // - +++ int DSUB = 103; // - +++ int IMUL = 104; // - +++ int LMUL = 105; // - +++ int FMUL = 106; // - +++ int DMUL = 107; // - +++ int IDIV = 108; // - +++ int LDIV = 109; // - +++ int FDIV = 110; // - +++ int DDIV = 111; // - +++ int IREM = 112; // - +++ int LREM = 113; // - +++ int FREM = 114; // - +++ int DREM = 115; // - +++ int INEG = 116; // - +++ int LNEG = 117; // - +++ int FNEG = 118; // - +++ int DNEG = 119; // - +++ int ISHL = 120; // - +++ int LSHL = 121; // - +++ int ISHR = 122; // - +++ int LSHR = 123; // - +++ int IUSHR = 124; // - +++ int LUSHR = 125; // - +++ int IAND = 126; // - +++ int LAND = 127; // - +++ int IOR = 128; // - +++ int LOR = 129; // - +++ int IXOR = 130; // - +++ int LXOR = 131; // - +++ int IINC = 132; // visitIincInsn +++ int I2L = 133; // visitInsn +++ int I2F = 134; // - +++ int I2D = 135; // - +++ int L2I = 136; // - +++ int L2F = 137; // - +++ int L2D = 138; // - +++ int F2I = 139; // - +++ int F2L = 140; // - +++ int F2D = 141; // - +++ int D2I = 142; // - +++ int D2L = 143; // - +++ int D2F = 144; // - +++ int I2B = 145; // - +++ int I2C = 146; // - +++ int I2S = 147; // - +++ int LCMP = 148; // - +++ int FCMPL = 149; // - +++ int FCMPG = 150; // - +++ int DCMPL = 151; // - +++ int DCMPG = 152; // - +++ int IFEQ = 153; // visitJumpInsn +++ int IFNE = 154; // - +++ int IFLT = 155; // - +++ int IFGE = 156; // - +++ int IFGT = 157; // - +++ int IFLE = 158; // - +++ int IF_ICMPEQ = 159; // - +++ int IF_ICMPNE = 160; // - +++ int IF_ICMPLT = 161; // - +++ int IF_ICMPGE = 162; // - +++ int IF_ICMPGT = 163; // - +++ int IF_ICMPLE = 164; // - +++ int IF_ACMPEQ = 165; // - +++ int IF_ACMPNE = 166; // - +++ int GOTO = 167; // - +++ int JSR = 168; // - +++ int RET = 169; // visitVarInsn +++ int TABLESWITCH = 170; // visiTableSwitchInsn +++ int LOOKUPSWITCH = 171; // visitLookupSwitch +++ int IRETURN = 172; // visitInsn +++ int LRETURN = 173; // - +++ int FRETURN = 174; // - +++ int DRETURN = 175; // - +++ int ARETURN = 176; // - +++ int RETURN = 177; // - +++ int GETSTATIC = 178; // visitFieldInsn +++ int PUTSTATIC = 179; // - +++ int GETFIELD = 180; // - +++ int PUTFIELD = 181; // - +++ int INVOKEVIRTUAL = 182; // visitMethodInsn +++ int INVOKESPECIAL = 183; // - +++ int INVOKESTATIC = 184; // - +++ int INVOKEINTERFACE = 185; // - +++ int INVOKEDYNAMIC = 186; // visitInvokeDynamicInsn +++ int NEW = 187; // visitTypeInsn +++ int NEWARRAY = 188; // visitIntInsn +++ int ANEWARRAY = 189; // visitTypeInsn +++ int ARRAYLENGTH = 190; // visitInsn +++ int ATHROW = 191; // - +++ int CHECKCAST = 192; // visitTypeInsn +++ int INSTANCEOF = 193; // - +++ int MONITORENTER = 194; // visitInsn +++ int MONITOREXIT = 195; // - +++ // int WIDE = 196; // NOT VISITED +++ int MULTIANEWARRAY = 197; // visitMultiANewArrayInsn +++ int IFNULL = 198; // visitJumpInsn +++ int IFNONNULL = 199; // - +++ // int GOTO_W = 200; // - +++ // int JSR_W = 201; // - +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Type.java b/contrib/asm/src/org/objectweb/asm/Type.java ++new file mode 100644 ++index 0000000..33a8bf0 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Type.java ++@@ -0,0 +1,896 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++import java.lang.reflect.Constructor; +++import java.lang.reflect.Method; +++ +++/** +++ * A Java field or method type. This class can be used to make it easier to +++ * manipulate type and method descriptors. +++ * +++ * @author Eric Bruneton +++ * @author Chris Nokleberg +++ */ +++public class Type { +++ +++ /** +++ * The sort of the void type. See {@link #getSort getSort}. +++ */ +++ public static final int VOID = 0; +++ +++ /** +++ * The sort of the boolean type. See {@link #getSort getSort}. +++ */ +++ public static final int BOOLEAN = 1; +++ +++ /** +++ * The sort of the char type. See {@link #getSort getSort}. +++ */ +++ public static final int CHAR = 2; +++ +++ /** +++ * The sort of the byte type. See {@link #getSort getSort}. +++ */ +++ public static final int BYTE = 3; +++ +++ /** +++ * The sort of the short type. See {@link #getSort getSort}. +++ */ +++ public static final int SHORT = 4; +++ +++ /** +++ * The sort of the int type. See {@link #getSort getSort}. +++ */ +++ public static final int INT = 5; +++ +++ /** +++ * The sort of the float type. See {@link #getSort getSort}. +++ */ +++ public static final int FLOAT = 6; +++ +++ /** +++ * The sort of the long type. See {@link #getSort getSort}. +++ */ +++ public static final int LONG = 7; +++ +++ /** +++ * The sort of the double type. See {@link #getSort getSort}. +++ */ +++ public static final int DOUBLE = 8; +++ +++ /** +++ * The sort of array reference types. See {@link #getSort getSort}. +++ */ +++ public static final int ARRAY = 9; +++ +++ /** +++ * The sort of object reference types. See {@link #getSort getSort}. +++ */ +++ public static final int OBJECT = 10; +++ +++ /** +++ * The sort of method types. See {@link #getSort getSort}. +++ */ +++ public static final int METHOD = 11; +++ +++ /** +++ * The void type. +++ */ +++ public static final Type VOID_TYPE = new Type(VOID, null, ('V' << 24) +++ | (5 << 16) | (0 << 8) | 0, 1); +++ +++ /** +++ * The boolean type. +++ */ +++ public static final Type BOOLEAN_TYPE = new Type(BOOLEAN, null, ('Z' << 24) +++ | (0 << 16) | (5 << 8) | 1, 1); +++ +++ /** +++ * The char type. +++ */ +++ public static final Type CHAR_TYPE = new Type(CHAR, null, ('C' << 24) +++ | (0 << 16) | (6 << 8) | 1, 1); +++ +++ /** +++ * The byte type. +++ */ +++ public static final Type BYTE_TYPE = new Type(BYTE, null, ('B' << 24) +++ | (0 << 16) | (5 << 8) | 1, 1); +++ +++ /** +++ * The short type. +++ */ +++ public static final Type SHORT_TYPE = new Type(SHORT, null, ('S' << 24) +++ | (0 << 16) | (7 << 8) | 1, 1); +++ +++ /** +++ * The int type. +++ */ +++ public static final Type INT_TYPE = new Type(INT, null, ('I' << 24) +++ | (0 << 16) | (0 << 8) | 1, 1); +++ +++ /** +++ * The float type. +++ */ +++ public static final Type FLOAT_TYPE = new Type(FLOAT, null, ('F' << 24) +++ | (2 << 16) | (2 << 8) | 1, 1); +++ +++ /** +++ * The long type. +++ */ +++ public static final Type LONG_TYPE = new Type(LONG, null, ('J' << 24) +++ | (1 << 16) | (1 << 8) | 2, 1); +++ +++ /** +++ * The double type. +++ */ +++ public static final Type DOUBLE_TYPE = new Type(DOUBLE, null, ('D' << 24) +++ | (3 << 16) | (3 << 8) | 2, 1); +++ +++ // ------------------------------------------------------------------------ +++ // Fields +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * The sort of this Java type. +++ */ +++ private final int sort; +++ +++ /** +++ * A buffer containing the internal name of this Java type. This field is +++ * only used for reference types. +++ */ +++ private final char[] buf; +++ +++ /** +++ * The offset of the internal name of this Java type in {@link #buf buf} or, +++ * for primitive types, the size, descriptor and getOpcode offsets for this +++ * type (byte 0 contains the size, byte 1 the descriptor, byte 2 the offset +++ * for IALOAD or IASTORE, byte 3 the offset for all other instructions). +++ */ +++ private final int off; +++ +++ /** +++ * The length of the internal name of this Java type. +++ */ +++ private final int len; +++ +++ // ------------------------------------------------------------------------ +++ // Constructors +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Constructs a reference type. +++ * +++ * @param sort +++ * the sort of the reference type to be constructed. +++ * @param buf +++ * a buffer containing the descriptor of the previous type. +++ * @param off +++ * the offset of this descriptor in the previous buffer. +++ * @param len +++ * the length of this descriptor. +++ */ +++ private Type(final int sort, final char[] buf, final int off, final int len) { +++ this.sort = sort; +++ this.buf = buf; +++ this.off = off; +++ this.len = len; +++ } +++ +++ /** +++ * Returns the Java type corresponding to the given type descriptor. +++ * +++ * @param typeDescriptor +++ * a field or method type descriptor. +++ * @return the Java type corresponding to the given type descriptor. +++ */ +++ public static Type getType(final String typeDescriptor) { +++ return getType(typeDescriptor.toCharArray(), 0); +++ } +++ +++ /** +++ * Returns the Java type corresponding to the given internal name. +++ * +++ * @param internalName +++ * an internal name. +++ * @return the Java type corresponding to the given internal name. +++ */ +++ public static Type getObjectType(final String internalName) { +++ char[] buf = internalName.toCharArray(); +++ return new Type(buf[0] == '[' ? ARRAY : OBJECT, buf, 0, buf.length); +++ } +++ +++ /** +++ * Returns the Java type corresponding to the given method descriptor. +++ * Equivalent to Type.getType(methodDescriptor). +++ * +++ * @param methodDescriptor +++ * a method descriptor. +++ * @return the Java type corresponding to the given method descriptor. +++ */ +++ public static Type getMethodType(final String methodDescriptor) { +++ return getType(methodDescriptor.toCharArray(), 0); +++ } +++ +++ /** +++ * Returns the Java method type corresponding to the given argument and +++ * return types. +++ * +++ * @param returnType +++ * the return type of the method. +++ * @param argumentTypes +++ * the argument types of the method. +++ * @return the Java type corresponding to the given argument and return +++ * types. +++ */ +++ public static Type getMethodType(final Type returnType, +++ final Type... argumentTypes) { +++ return getType(getMethodDescriptor(returnType, argumentTypes)); +++ } +++ +++ /** +++ * Returns the Java type corresponding to the given class. +++ * +++ * @param c +++ * a class. +++ * @return the Java type corresponding to the given class. +++ */ +++ public static Type getType(final Class c) { +++ if (c.isPrimitive()) { +++ if (c == Integer.TYPE) { +++ return INT_TYPE; +++ } else if (c == Void.TYPE) { +++ return VOID_TYPE; +++ } else if (c == Boolean.TYPE) { +++ return BOOLEAN_TYPE; +++ } else if (c == Byte.TYPE) { +++ return BYTE_TYPE; +++ } else if (c == Character.TYPE) { +++ return CHAR_TYPE; +++ } else if (c == Short.TYPE) { +++ return SHORT_TYPE; +++ } else if (c == Double.TYPE) { +++ return DOUBLE_TYPE; +++ } else if (c == Float.TYPE) { +++ return FLOAT_TYPE; +++ } else /* if (c == Long.TYPE) */{ +++ return LONG_TYPE; +++ } +++ } else { +++ return getType(getDescriptor(c)); +++ } +++ } +++ +++ /** +++ * Returns the Java method type corresponding to the given constructor. +++ * +++ * @param c +++ * a {@link Constructor Constructor} object. +++ * @return the Java method type corresponding to the given constructor. +++ */ +++ public static Type getType(final Constructor c) { +++ return getType(getConstructorDescriptor(c)); +++ } +++ +++ /** +++ * Returns the Java method type corresponding to the given method. +++ * +++ * @param m +++ * a {@link Method Method} object. +++ * @return the Java method type corresponding to the given method. +++ */ +++ public static Type getType(final Method m) { +++ return getType(getMethodDescriptor(m)); +++ } +++ +++ /** +++ * Returns the Java types corresponding to the argument types of the given +++ * method descriptor. +++ * +++ * @param methodDescriptor +++ * a method descriptor. +++ * @return the Java types corresponding to the argument types of the given +++ * method descriptor. +++ */ +++ public static Type[] getArgumentTypes(final String methodDescriptor) { +++ char[] buf = methodDescriptor.toCharArray(); +++ int off = 1; +++ int size = 0; +++ while (true) { +++ char car = buf[off++]; +++ if (car == ')') { +++ break; +++ } else if (car == 'L') { +++ while (buf[off++] != ';') { +++ } +++ ++size; +++ } else if (car != '[') { +++ ++size; +++ } +++ } +++ Type[] args = new Type[size]; +++ off = 1; +++ size = 0; +++ while (buf[off] != ')') { +++ args[size] = getType(buf, off); +++ off += args[size].len + (args[size].sort == OBJECT ? 2 : 0); +++ size += 1; +++ } +++ return args; +++ } +++ +++ /** +++ * Returns the Java types corresponding to the argument types of the given +++ * method. +++ * +++ * @param method +++ * a method. +++ * @return the Java types corresponding to the argument types of the given +++ * method. +++ */ +++ public static Type[] getArgumentTypes(final Method method) { +++ Class[] classes = method.getParameterTypes(); +++ Type[] types = new Type[classes.length]; +++ for (int i = classes.length - 1; i >= 0; --i) { +++ types[i] = getType(classes[i]); +++ } +++ return types; +++ } +++ +++ /** +++ * Returns the Java type corresponding to the return type of the given +++ * method descriptor. +++ * +++ * @param methodDescriptor +++ * a method descriptor. +++ * @return the Java type corresponding to the return type of the given +++ * method descriptor. +++ */ +++ public static Type getReturnType(final String methodDescriptor) { +++ char[] buf = methodDescriptor.toCharArray(); +++ return getType(buf, methodDescriptor.indexOf(')') + 1); +++ } +++ +++ /** +++ * Returns the Java type corresponding to the return type of the given +++ * method. +++ * +++ * @param method +++ * a method. +++ * @return the Java type corresponding to the return type of the given +++ * method. +++ */ +++ public static Type getReturnType(final Method method) { +++ return getType(method.getReturnType()); +++ } +++ +++ /** +++ * Computes the size of the arguments and of the return value of a method. +++ * +++ * @param desc +++ * the descriptor of a method. +++ * @return the size of the arguments of the method (plus one for the +++ * implicit this argument), argSize, and the size of its return +++ * value, retSize, packed into a single int i = +++ * (argSize << 2) | retSize (argSize is therefore equal to +++ * i >> 2, and retSize to i & 0x03). +++ */ +++ public static int getArgumentsAndReturnSizes(final String desc) { +++ int n = 1; +++ int c = 1; +++ while (true) { +++ char car = desc.charAt(c++); +++ if (car == ')') { +++ car = desc.charAt(c); +++ return n << 2 +++ | (car == 'V' ? 0 : (car == 'D' || car == 'J' ? 2 : 1)); +++ } else if (car == 'L') { +++ while (desc.charAt(c++) != ';') { +++ } +++ n += 1; +++ } else if (car == '[') { +++ while ((car = desc.charAt(c)) == '[') { +++ ++c; +++ } +++ if (car == 'D' || car == 'J') { +++ n -= 1; +++ } +++ } else if (car == 'D' || car == 'J') { +++ n += 2; +++ } else { +++ n += 1; +++ } +++ } +++ } +++ +++ /** +++ * Returns the Java type corresponding to the given type descriptor. For +++ * method descriptors, buf is supposed to contain nothing more than the +++ * descriptor itself. +++ * +++ * @param buf +++ * a buffer containing a type descriptor. +++ * @param off +++ * the offset of this descriptor in the previous buffer. +++ * @return the Java type corresponding to the given type descriptor. +++ */ +++ private static Type getType(final char[] buf, final int off) { +++ int len; +++ switch (buf[off]) { +++ case 'V': +++ return VOID_TYPE; +++ case 'Z': +++ return BOOLEAN_TYPE; +++ case 'C': +++ return CHAR_TYPE; +++ case 'B': +++ return BYTE_TYPE; +++ case 'S': +++ return SHORT_TYPE; +++ case 'I': +++ return INT_TYPE; +++ case 'F': +++ return FLOAT_TYPE; +++ case 'J': +++ return LONG_TYPE; +++ case 'D': +++ return DOUBLE_TYPE; +++ case '[': +++ len = 1; +++ while (buf[off + len] == '[') { +++ ++len; +++ } +++ if (buf[off + len] == 'L') { +++ ++len; +++ while (buf[off + len] != ';') { +++ ++len; +++ } +++ } +++ return new Type(ARRAY, buf, off, len + 1); +++ case 'L': +++ len = 1; +++ while (buf[off + len] != ';') { +++ ++len; +++ } +++ return new Type(OBJECT, buf, off + 1, len - 1); +++ // case '(': +++ default: +++ return new Type(METHOD, buf, off, buf.length - off); +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Accessors +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the sort of this Java type. +++ * +++ * @return {@link #VOID VOID}, {@link #BOOLEAN BOOLEAN}, {@link #CHAR CHAR}, +++ * {@link #BYTE BYTE}, {@link #SHORT SHORT}, {@link #INT INT}, +++ * {@link #FLOAT FLOAT}, {@link #LONG LONG}, {@link #DOUBLE DOUBLE}, +++ * {@link #ARRAY ARRAY}, {@link #OBJECT OBJECT} or {@link #METHOD +++ * METHOD}. +++ */ +++ public int getSort() { +++ return sort; +++ } +++ +++ /** +++ * Returns the number of dimensions of this array type. This method should +++ * only be used for an array type. +++ * +++ * @return the number of dimensions of this array type. +++ */ +++ public int getDimensions() { +++ int i = 1; +++ while (buf[off + i] == '[') { +++ ++i; +++ } +++ return i; +++ } +++ +++ /** +++ * Returns the type of the elements of this array type. This method should +++ * only be used for an array type. +++ * +++ * @return Returns the type of the elements of this array type. +++ */ +++ public Type getElementType() { +++ return getType(buf, off + getDimensions()); +++ } +++ +++ /** +++ * Returns the binary name of the class corresponding to this type. This +++ * method must not be used on method types. +++ * +++ * @return the binary name of the class corresponding to this type. +++ */ +++ public String getClassName() { +++ switch (sort) { +++ case VOID: +++ return "void"; +++ case BOOLEAN: +++ return "boolean"; +++ case CHAR: +++ return "char"; +++ case BYTE: +++ return "byte"; +++ case SHORT: +++ return "short"; +++ case INT: +++ return "int"; +++ case FLOAT: +++ return "float"; +++ case LONG: +++ return "long"; +++ case DOUBLE: +++ return "double"; +++ case ARRAY: +++ StringBuilder sb = new StringBuilder(getElementType().getClassName()); +++ for (int i = getDimensions(); i > 0; --i) { +++ sb.append("[]"); +++ } +++ return sb.toString(); +++ case OBJECT: +++ return new String(buf, off, len).replace('/', '.'); +++ default: +++ return null; +++ } +++ } +++ +++ /** +++ * Returns the internal name of the class corresponding to this object or +++ * array type. The internal name of a class is its fully qualified name (as +++ * returned by Class.getName(), where '.' are replaced by '/'. This method +++ * should only be used for an object or array type. +++ * +++ * @return the internal name of the class corresponding to this object type. +++ */ +++ public String getInternalName() { +++ return new String(buf, off, len); +++ } +++ +++ /** +++ * Returns the argument types of methods of this type. This method should +++ * only be used for method types. +++ * +++ * @return the argument types of methods of this type. +++ */ +++ public Type[] getArgumentTypes() { +++ return getArgumentTypes(getDescriptor()); +++ } +++ +++ /** +++ * Returns the return type of methods of this type. This method should only +++ * be used for method types. +++ * +++ * @return the return type of methods of this type. +++ */ +++ public Type getReturnType() { +++ return getReturnType(getDescriptor()); +++ } +++ +++ /** +++ * Returns the size of the arguments and of the return value of methods of +++ * this type. This method should only be used for method types. +++ * +++ * @return the size of the arguments (plus one for the implicit this +++ * argument), argSize, and the size of the return value, retSize, +++ * packed into a single +++ * int i = (argSize << 2) | retSize +++ * (argSize is therefore equal to i >> 2, +++ * and retSize to i & 0x03). +++ */ +++ public int getArgumentsAndReturnSizes() { +++ return getArgumentsAndReturnSizes(getDescriptor()); +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Conversion to type descriptors +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the descriptor corresponding to this Java type. +++ * +++ * @return the descriptor corresponding to this Java type. +++ */ +++ public String getDescriptor() { +++ StringBuffer buf = new StringBuffer(); +++ getDescriptor(buf); +++ return buf.toString(); +++ } +++ +++ /** +++ * Returns the descriptor corresponding to the given argument and return +++ * types. +++ * +++ * @param returnType +++ * the return type of the method. +++ * @param argumentTypes +++ * the argument types of the method. +++ * @return the descriptor corresponding to the given argument and return +++ * types. +++ */ +++ public static String getMethodDescriptor(final Type returnType, +++ final Type... argumentTypes) { +++ StringBuffer buf = new StringBuffer(); +++ buf.append('('); +++ for (int i = 0; i < argumentTypes.length; ++i) { +++ argumentTypes[i].getDescriptor(buf); +++ } +++ buf.append(')'); +++ returnType.getDescriptor(buf); +++ return buf.toString(); +++ } +++ +++ /** +++ * Appends the descriptor corresponding to this Java type to the given +++ * string buffer. +++ * +++ * @param buf +++ * the string buffer to which the descriptor must be appended. +++ */ +++ private void getDescriptor(final StringBuffer buf) { +++ if (this.buf == null) { +++ // descriptor is in byte 3 of 'off' for primitive types (buf == +++ // null) +++ buf.append((char) ((off & 0xFF000000) >>> 24)); +++ } else if (sort == OBJECT) { +++ buf.append('L'); +++ buf.append(this.buf, off, len); +++ buf.append(';'); +++ } else { // sort == ARRAY || sort == METHOD +++ buf.append(this.buf, off, len); +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Direct conversion from classes to type descriptors, +++ // without intermediate Type objects +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the internal name of the given class. The internal name of a +++ * class is its fully qualified name, as returned by Class.getName(), where +++ * '.' are replaced by '/'. +++ * +++ * @param c +++ * an object or array class. +++ * @return the internal name of the given class. +++ */ +++ public static String getInternalName(final Class c) { +++ return c.getName().replace('.', '/'); +++ } +++ +++ /** +++ * Returns the descriptor corresponding to the given Java type. +++ * +++ * @param c +++ * an object class, a primitive class or an array class. +++ * @return the descriptor corresponding to the given class. +++ */ +++ public static String getDescriptor(final Class c) { +++ StringBuffer buf = new StringBuffer(); +++ getDescriptor(buf, c); +++ return buf.toString(); +++ } +++ +++ /** +++ * Returns the descriptor corresponding to the given constructor. +++ * +++ * @param c +++ * a {@link Constructor Constructor} object. +++ * @return the descriptor of the given constructor. +++ */ +++ public static String getConstructorDescriptor(final Constructor c) { +++ Class[] parameters = c.getParameterTypes(); +++ StringBuffer buf = new StringBuffer(); +++ buf.append('('); +++ for (int i = 0; i < parameters.length; ++i) { +++ getDescriptor(buf, parameters[i]); +++ } +++ return buf.append(")V").toString(); +++ } +++ +++ /** +++ * Returns the descriptor corresponding to the given method. +++ * +++ * @param m +++ * a {@link Method Method} object. +++ * @return the descriptor of the given method. +++ */ +++ public static String getMethodDescriptor(final Method m) { +++ Class[] parameters = m.getParameterTypes(); +++ StringBuffer buf = new StringBuffer(); +++ buf.append('('); +++ for (int i = 0; i < parameters.length; ++i) { +++ getDescriptor(buf, parameters[i]); +++ } +++ buf.append(')'); +++ getDescriptor(buf, m.getReturnType()); +++ return buf.toString(); +++ } +++ +++ /** +++ * Appends the descriptor of the given class to the given string buffer. +++ * +++ * @param buf +++ * the string buffer to which the descriptor must be appended. +++ * @param c +++ * the class whose descriptor must be computed. +++ */ +++ private static void getDescriptor(final StringBuffer buf, final Class c) { +++ Class d = c; +++ while (true) { +++ if (d.isPrimitive()) { +++ char car; +++ if (d == Integer.TYPE) { +++ car = 'I'; +++ } else if (d == Void.TYPE) { +++ car = 'V'; +++ } else if (d == Boolean.TYPE) { +++ car = 'Z'; +++ } else if (d == Byte.TYPE) { +++ car = 'B'; +++ } else if (d == Character.TYPE) { +++ car = 'C'; +++ } else if (d == Short.TYPE) { +++ car = 'S'; +++ } else if (d == Double.TYPE) { +++ car = 'D'; +++ } else if (d == Float.TYPE) { +++ car = 'F'; +++ } else /* if (d == Long.TYPE) */{ +++ car = 'J'; +++ } +++ buf.append(car); +++ return; +++ } else if (d.isArray()) { +++ buf.append('['); +++ d = d.getComponentType(); +++ } else { +++ buf.append('L'); +++ String name = d.getName(); +++ int len = name.length(); +++ for (int i = 0; i < len; ++i) { +++ char car = name.charAt(i); +++ buf.append(car == '.' ? '/' : car); +++ } +++ buf.append(';'); +++ return; +++ } +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Corresponding size and opcodes +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the size of values of this type. This method must not be used for +++ * method types. +++ * +++ * @return the size of values of this type, i.e., 2 for long and +++ * double, 0 for void and 1 otherwise. +++ */ +++ public int getSize() { +++ // the size is in byte 0 of 'off' for primitive types (buf == null) +++ return buf == null ? (off & 0xFF) : 1; +++ } +++ +++ /** +++ * Returns a JVM instruction opcode adapted to this Java type. This method +++ * must not be used for method types. +++ * +++ * @param opcode +++ * a JVM instruction opcode. This opcode must be one of ILOAD, +++ * ISTORE, IALOAD, IASTORE, IADD, ISUB, IMUL, IDIV, IREM, INEG, +++ * ISHL, ISHR, IUSHR, IAND, IOR, IXOR and IRETURN. +++ * @return an opcode that is similar to the given opcode, but adapted to +++ * this Java type. For example, if this type is float and +++ * opcode is IRETURN, this method returns FRETURN. +++ */ +++ public int getOpcode(final int opcode) { +++ if (opcode == Opcodes.IALOAD || opcode == Opcodes.IASTORE) { +++ // the offset for IALOAD or IASTORE is in byte 1 of 'off' for +++ // primitive types (buf == null) +++ return opcode + (buf == null ? (off & 0xFF00) >> 8 : 4); +++ } else { +++ // the offset for other instructions is in byte 2 of 'off' for +++ // primitive types (buf == null) +++ return opcode + (buf == null ? (off & 0xFF0000) >> 16 : 4); +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Equals, hashCode and toString +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Tests if the given object is equal to this type. +++ * +++ * @param o +++ * the object to be compared to this type. +++ * @return true if the given object is equal to this type. +++ */ +++ @Override +++ public boolean equals(final Object o) { +++ if (this == o) { +++ return true; +++ } +++ if (!(o instanceof Type)) { +++ return false; +++ } +++ Type t = (Type) o; +++ if (sort != t.sort) { +++ return false; +++ } +++ if (sort >= ARRAY) { +++ if (len != t.len) { +++ return false; +++ } +++ for (int i = off, j = t.off, end = i + len; i < end; i++, j++) { +++ if (buf[i] != t.buf[j]) { +++ return false; +++ } +++ } +++ } +++ return true; +++ } +++ +++ /** +++ * Returns a hash code value for this type. +++ * +++ * @return a hash code value for this type. +++ */ +++ @Override +++ public int hashCode() { +++ int hc = 13 * sort; +++ if (sort >= ARRAY) { +++ for (int i = off, end = i + len; i < end; i++) { +++ hc = 17 * (hc + buf[i]); +++ } +++ } +++ return hc; +++ } +++ +++ /** +++ * Returns a string representation of this type. +++ * +++ * @return the descriptor of this type. +++ */ +++ @Override +++ public String toString() { +++ return getDescriptor(); +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/TypePath.java b/contrib/asm/src/org/objectweb/asm/TypePath.java ++new file mode 100644 ++index 0000000..d9c99b1 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/TypePath.java ++@@ -0,0 +1,196 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2013 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++ +++package org.objectweb.asm; +++ +++/** +++ * The path to a type argument, wildcard bound, array element type, or static +++ * inner type within an enclosing type. +++ * +++ * @author Eric Bruneton +++ */ +++public class TypePath { +++ +++ /** +++ * A type path step that steps into the element type of an array type. See +++ * {@link #getStep getStep}. +++ */ +++ public final static int ARRAY_ELEMENT = 0; +++ +++ /** +++ * A type path step that steps into the nested type of a class type. See +++ * {@link #getStep getStep}. +++ */ +++ public final static int INNER_TYPE = 1; +++ +++ /** +++ * A type path step that steps into the bound of a wildcard type. See +++ * {@link #getStep getStep}. +++ */ +++ public final static int WILDCARD_BOUND = 2; +++ +++ /** +++ * A type path step that steps into a type argument of a generic type. See +++ * {@link #getStep getStep}. +++ */ +++ public final static int TYPE_ARGUMENT = 3; +++ +++ /** +++ * The byte array where the path is stored, in Java class file format. +++ */ +++ byte[] b; +++ +++ /** +++ * The offset of the first byte of the type path in 'b'. +++ */ +++ int offset; +++ +++ /** +++ * Creates a new type path. +++ * +++ * @param b +++ * the byte array containing the type path in Java class file +++ * format. +++ * @param offset +++ * the offset of the first byte of the type path in 'b'. +++ */ +++ TypePath(byte[] b, int offset) { +++ this.b = b; +++ this.offset = offset; +++ } +++ +++ /** +++ * Returns the length of this path. +++ * +++ * @return the length of this path. +++ */ +++ public int getLength() { +++ return b[offset]; +++ } +++ +++ /** +++ * Returns the value of the given step of this path. +++ * +++ * @param index +++ * an index between 0 and {@link #getLength()}, exclusive. +++ * @return {@link #ARRAY_ELEMENT ARRAY_ELEMENT}, {@link #INNER_TYPE +++ * INNER_TYPE}, {@link #WILDCARD_BOUND WILDCARD_BOUND}, or +++ * {@link #TYPE_ARGUMENT TYPE_ARGUMENT}. +++ */ +++ public int getStep(int index) { +++ return b[offset + 2 * index + 1]; +++ } +++ +++ /** +++ * Returns the index of the type argument that the given step is stepping +++ * into. This method should only be used for steps whose value is +++ * {@link #TYPE_ARGUMENT TYPE_ARGUMENT}. +++ * +++ * @param index +++ * an index between 0 and {@link #getLength()}, exclusive. +++ * @return the index of the type argument that the given step is stepping +++ * into. +++ */ +++ public int getStepArgument(int index) { +++ return b[offset + 2 * index + 2]; +++ } +++ +++ /** +++ * Converts a type path in string form, in the format used by +++ * {@link #toString()}, into a TypePath object. +++ * +++ * @param typePath +++ * a type path in string form, in the format used by +++ * {@link #toString()}. May be null or empty. +++ * @return the corresponding TypePath object, or null if the path is empty. +++ */ +++ public static TypePath fromString(final String typePath) { +++ if (typePath == null || typePath.length() == 0) { +++ return null; +++ } +++ int n = typePath.length(); +++ ByteVector out = new ByteVector(n); +++ out.putByte(0); +++ for (int i = 0; i < n;) { +++ char c = typePath.charAt(i++); +++ if (c == '[') { +++ out.put11(ARRAY_ELEMENT, 0); +++ } else if (c == '.') { +++ out.put11(INNER_TYPE, 0); +++ } else if (c == '*') { +++ out.put11(WILDCARD_BOUND, 0); +++ } else if (c >= '0' && c <= '9') { +++ int typeArg = c - '0'; +++ while (i < n && (c = typePath.charAt(i)) >= '0' && c <= '9') { +++ typeArg = typeArg * 10 + c - '0'; +++ i += 1; +++ } +++ if (i < n && typePath.charAt(i) == ';') { +++ i += 1; +++ } +++ out.put11(TYPE_ARGUMENT, typeArg); +++ } +++ } +++ out.data[0] = (byte) (out.length / 2); +++ return new TypePath(out.data, 0); +++ } +++ +++ /** +++ * Returns a string representation of this type path. {@link #ARRAY_ELEMENT +++ * ARRAY_ELEMENT} steps are represented with '[', {@link #INNER_TYPE +++ * INNER_TYPE} steps with '.', {@link #WILDCARD_BOUND WILDCARD_BOUND} steps +++ * with '*' and {@link #TYPE_ARGUMENT TYPE_ARGUMENT} steps with their type +++ * argument index in decimal form followed by ';'. +++ */ +++ @Override +++ public String toString() { +++ int length = getLength(); +++ StringBuilder result = new StringBuilder(length * 2); +++ for (int i = 0; i < length; ++i) { +++ switch (getStep(i)) { +++ case ARRAY_ELEMENT: +++ result.append('['); +++ break; +++ case INNER_TYPE: +++ result.append('.'); +++ break; +++ case WILDCARD_BOUND: +++ result.append('*'); +++ break; +++ case TYPE_ARGUMENT: +++ result.append(getStepArgument(i)).append(';'); +++ break; +++ default: +++ result.append('_'); +++ } +++ } +++ return result.toString(); +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/TypeReference.java b/contrib/asm/src/org/objectweb/asm/TypeReference.java ++new file mode 100644 ++index 0000000..dff76c0 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/TypeReference.java ++@@ -0,0 +1,452 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2013 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++ +++package org.objectweb.asm; +++ +++/** +++ * A reference to a type appearing in a class, field or method declaration, or +++ * on an instruction. Such a reference designates the part of the class where +++ * the referenced type is appearing (e.g. an 'extends', 'implements' or 'throws' +++ * clause, a 'new' instruction, a 'catch' clause, a type cast, a local variable +++ * declaration, etc). +++ * +++ * @author Eric Bruneton +++ */ +++public class TypeReference { +++ +++ /** +++ * The sort of type references that target a type parameter of a generic +++ * class. See {@link #getSort getSort}. +++ */ +++ public final static int CLASS_TYPE_PARAMETER = 0x00; +++ +++ /** +++ * The sort of type references that target a type parameter of a generic +++ * method. See {@link #getSort getSort}. +++ */ +++ public final static int METHOD_TYPE_PARAMETER = 0x01; +++ +++ /** +++ * The sort of type references that target the super class of a class or one +++ * of the interfaces it implements. See {@link #getSort getSort}. +++ */ +++ public final static int CLASS_EXTENDS = 0x10; +++ +++ /** +++ * The sort of type references that target a bound of a type parameter of a +++ * generic class. See {@link #getSort getSort}. +++ */ +++ public final static int CLASS_TYPE_PARAMETER_BOUND = 0x11; +++ +++ /** +++ * The sort of type references that target a bound of a type parameter of a +++ * generic method. See {@link #getSort getSort}. +++ */ +++ public final static int METHOD_TYPE_PARAMETER_BOUND = 0x12; +++ +++ /** +++ * The sort of type references that target the type of a field. See +++ * {@link #getSort getSort}. +++ */ +++ public final static int FIELD = 0x13; +++ +++ /** +++ * The sort of type references that target the return type of a method. See +++ * {@link #getSort getSort}. +++ */ +++ public final static int METHOD_RETURN = 0x14; +++ +++ /** +++ * The sort of type references that target the receiver type of a method. +++ * See {@link #getSort getSort}. +++ */ +++ public final static int METHOD_RECEIVER = 0x15; +++ +++ /** +++ * The sort of type references that target the type of a formal parameter of +++ * a method. See {@link #getSort getSort}. +++ */ +++ public final static int METHOD_FORMAL_PARAMETER = 0x16; +++ +++ /** +++ * The sort of type references that target the type of an exception declared +++ * in the throws clause of a method. See {@link #getSort getSort}. +++ */ +++ public final static int THROWS = 0x17; +++ +++ /** +++ * The sort of type references that target the type of a local variable in a +++ * method. See {@link #getSort getSort}. +++ */ +++ public final static int LOCAL_VARIABLE = 0x40; +++ +++ /** +++ * The sort of type references that target the type of a resource variable +++ * in a method. See {@link #getSort getSort}. +++ */ +++ public final static int RESOURCE_VARIABLE = 0x41; +++ +++ /** +++ * The sort of type references that target the type of the exception of a +++ * 'catch' clause in a method. See {@link #getSort getSort}. +++ */ +++ public final static int EXCEPTION_PARAMETER = 0x42; +++ +++ /** +++ * The sort of type references that target the type declared in an +++ * 'instanceof' instruction. See {@link #getSort getSort}. +++ */ +++ public final static int INSTANCEOF = 0x43; +++ +++ /** +++ * The sort of type references that target the type of the object created by +++ * a 'new' instruction. See {@link #getSort getSort}. +++ */ +++ public final static int NEW = 0x44; +++ +++ /** +++ * The sort of type references that target the receiver type of a +++ * constructor reference. See {@link #getSort getSort}. +++ */ +++ public final static int CONSTRUCTOR_REFERENCE = 0x45; +++ +++ /** +++ * The sort of type references that target the receiver type of a method +++ * reference. See {@link #getSort getSort}. +++ */ +++ public final static int METHOD_REFERENCE = 0x46; +++ +++ /** +++ * The sort of type references that target the type declared in an explicit +++ * or implicit cast instruction. See {@link #getSort getSort}. +++ */ +++ public final static int CAST = 0x47; +++ +++ /** +++ * The sort of type references that target a type parameter of a generic +++ * constructor in a constructor call. See {@link #getSort getSort}. +++ */ +++ public final static int CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT = 0x48; +++ +++ /** +++ * The sort of type references that target a type parameter of a generic +++ * method in a method call. See {@link #getSort getSort}. +++ */ +++ public final static int METHOD_INVOCATION_TYPE_ARGUMENT = 0x49; +++ +++ /** +++ * The sort of type references that target a type parameter of a generic +++ * constructor in a constructor reference. See {@link #getSort getSort}. +++ */ +++ public final static int CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT = 0x4A; +++ +++ /** +++ * The sort of type references that target a type parameter of a generic +++ * method in a method reference. See {@link #getSort getSort}. +++ */ +++ public final static int METHOD_REFERENCE_TYPE_ARGUMENT = 0x4B; +++ +++ /** +++ * The type reference value in Java class file format. +++ */ +++ private int value; +++ +++ /** +++ * Creates a new TypeReference. +++ * +++ * @param typeRef +++ * the int encoded value of the type reference, as received in a +++ * visit method related to type annotations, like +++ * visitTypeAnnotation. +++ */ +++ public TypeReference(int typeRef) { +++ this.value = typeRef; +++ } +++ +++ /** +++ * Returns a type reference of the given sort. +++ * +++ * @param sort +++ * {@link #FIELD FIELD}, {@link #METHOD_RETURN METHOD_RETURN}, +++ * {@link #METHOD_RECEIVER METHOD_RECEIVER}, +++ * {@link #LOCAL_VARIABLE LOCAL_VARIABLE}, +++ * {@link #RESOURCE_VARIABLE RESOURCE_VARIABLE}, +++ * {@link #INSTANCEOF INSTANCEOF}, {@link #NEW NEW}, +++ * {@link #CONSTRUCTOR_REFERENCE CONSTRUCTOR_REFERENCE}, or +++ * {@link #METHOD_REFERENCE METHOD_REFERENCE}. +++ * @return a type reference of the given sort. +++ */ +++ public static TypeReference newTypeReference(int sort) { +++ return new TypeReference(sort << 24); +++ } +++ +++ /** +++ * Returns a reference to a type parameter of a generic class or method. +++ * +++ * @param sort +++ * {@link #CLASS_TYPE_PARAMETER CLASS_TYPE_PARAMETER} or +++ * {@link #METHOD_TYPE_PARAMETER METHOD_TYPE_PARAMETER}. +++ * @param paramIndex +++ * the type parameter index. +++ * @return a reference to the given generic class or method type parameter. +++ */ +++ public static TypeReference newTypeParameterReference(int sort, +++ int paramIndex) { +++ return new TypeReference((sort << 24) | (paramIndex << 16)); +++ } +++ +++ /** +++ * Returns a reference to a type parameter bound of a generic class or +++ * method. +++ * +++ * @param sort +++ * {@link #CLASS_TYPE_PARAMETER CLASS_TYPE_PARAMETER} or +++ * {@link #METHOD_TYPE_PARAMETER METHOD_TYPE_PARAMETER}. +++ * @param paramIndex +++ * the type parameter index. +++ * @param boundIndex +++ * the type bound index within the above type parameters. +++ * @return a reference to the given generic class or method type parameter +++ * bound. +++ */ +++ public static TypeReference newTypeParameterBoundReference(int sort, +++ int paramIndex, int boundIndex) { +++ return new TypeReference((sort << 24) | (paramIndex << 16) +++ | (boundIndex << 8)); +++ } +++ +++ /** +++ * Returns a reference to the super class or to an interface of the +++ * 'implements' clause of a class. +++ * +++ * @param itfIndex +++ * the index of an interface in the 'implements' clause of a +++ * class, or -1 to reference the super class of the class. +++ * @return a reference to the given super type of a class. +++ */ +++ public static TypeReference newSuperTypeReference(int itfIndex) { +++ itfIndex &= 0xFFFF; +++ return new TypeReference((CLASS_EXTENDS << 24) | (itfIndex << 8)); +++ } +++ +++ /** +++ * Returns a reference to the type of a formal parameter of a method. +++ * +++ * @param paramIndex +++ * the formal parameter index. +++ * +++ * @return a reference to the type of the given method formal parameter. +++ */ +++ public static TypeReference newFormalParameterReference(int paramIndex) { +++ return new TypeReference((METHOD_FORMAL_PARAMETER << 24) +++ | (paramIndex << 16)); +++ } +++ +++ /** +++ * Returns a reference to the type of an exception, in a 'throws' clause of +++ * a method. +++ * +++ * @param exceptionIndex +++ * the index of an exception in a 'throws' clause of a method. +++ * +++ * @return a reference to the type of the given exception. +++ */ +++ public static TypeReference newExceptionReference(int exceptionIndex) { +++ return new TypeReference((THROWS << 24) | (exceptionIndex << 8)); +++ } +++ +++ /** +++ * Returns a reference to the type of the exception declared in a 'catch' +++ * clause of a method. +++ * +++ * @param tryCatchBlockIndex +++ * the index of a try catch block (using the order in which they +++ * are visited with visitTryCatchBlock). +++ * +++ * @return a reference to the type of the given exception. +++ */ +++ public static TypeReference newTryCatchReference(int tryCatchBlockIndex) { +++ return new TypeReference((EXCEPTION_PARAMETER << 24) +++ | (tryCatchBlockIndex << 8)); +++ } +++ +++ /** +++ * Returns a reference to the type of a type argument in a constructor or +++ * method call or reference. +++ * +++ * @param sort +++ * {@link #CAST CAST}, +++ * {@link #CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT +++ * CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT}, +++ * {@link #METHOD_INVOCATION_TYPE_ARGUMENT +++ * METHOD_INVOCATION_TYPE_ARGUMENT}, +++ * {@link #CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT +++ * CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT}, or +++ * {@link #METHOD_REFERENCE_TYPE_ARGUMENT +++ * METHOD_REFERENCE_TYPE_ARGUMENT}. +++ * @param argIndex +++ * the type argument index. +++ * +++ * @return a reference to the type of the given type argument. +++ */ +++ public static TypeReference newTypeArgumentReference(int sort, int argIndex) { +++ return new TypeReference((sort << 24) | argIndex); +++ } +++ +++ /** +++ * Returns the sort of this type reference. +++ * +++ * @return {@link #CLASS_TYPE_PARAMETER CLASS_TYPE_PARAMETER}, +++ * {@link #METHOD_TYPE_PARAMETER METHOD_TYPE_PARAMETER}, +++ * {@link #CLASS_EXTENDS CLASS_EXTENDS}, +++ * {@link #CLASS_TYPE_PARAMETER_BOUND CLASS_TYPE_PARAMETER_BOUND}, +++ * {@link #METHOD_TYPE_PARAMETER_BOUND METHOD_TYPE_PARAMETER_BOUND}, +++ * {@link #FIELD FIELD}, {@link #METHOD_RETURN METHOD_RETURN}, +++ * {@link #METHOD_RECEIVER METHOD_RECEIVER}, +++ * {@link #METHOD_FORMAL_PARAMETER METHOD_FORMAL_PARAMETER}, +++ * {@link #THROWS THROWS}, {@link #LOCAL_VARIABLE LOCAL_VARIABLE}, +++ * {@link #RESOURCE_VARIABLE RESOURCE_VARIABLE}, +++ * {@link #EXCEPTION_PARAMETER EXCEPTION_PARAMETER}, +++ * {@link #INSTANCEOF INSTANCEOF}, {@link #NEW NEW}, +++ * {@link #CONSTRUCTOR_REFERENCE CONSTRUCTOR_REFERENCE}, +++ * {@link #METHOD_REFERENCE METHOD_REFERENCE}, {@link #CAST CAST}, +++ * {@link #CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT +++ * CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT}, +++ * {@link #METHOD_INVOCATION_TYPE_ARGUMENT +++ * METHOD_INVOCATION_TYPE_ARGUMENT}, +++ * {@link #CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT +++ * CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT}, or +++ * {@link #METHOD_REFERENCE_TYPE_ARGUMENT +++ * METHOD_REFERENCE_TYPE_ARGUMENT}. +++ */ +++ public int getSort() { +++ return value >>> 24; +++ } +++ +++ /** +++ * Returns the index of the type parameter referenced by this type +++ * reference. This method must only be used for type references whose sort +++ * is {@link #CLASS_TYPE_PARAMETER CLASS_TYPE_PARAMETER}, +++ * {@link #METHOD_TYPE_PARAMETER METHOD_TYPE_PARAMETER}, +++ * {@link #CLASS_TYPE_PARAMETER_BOUND CLASS_TYPE_PARAMETER_BOUND} or +++ * {@link #METHOD_TYPE_PARAMETER_BOUND METHOD_TYPE_PARAMETER_BOUND}. +++ * +++ * @return a type parameter index. +++ */ +++ public int getTypeParameterIndex() { +++ return (value & 0x00FF0000) >> 16; +++ } +++ +++ /** +++ * Returns the index of the type parameter bound, within the type parameter +++ * {@link #getTypeParameterIndex}, referenced by this type reference. This +++ * method must only be used for type references whose sort is +++ * {@link #CLASS_TYPE_PARAMETER_BOUND CLASS_TYPE_PARAMETER_BOUND} or +++ * {@link #METHOD_TYPE_PARAMETER_BOUND METHOD_TYPE_PARAMETER_BOUND}. +++ * +++ * @return a type parameter bound index. +++ */ +++ public int getTypeParameterBoundIndex() { +++ return (value & 0x0000FF00) >> 8; +++ } +++ +++ /** +++ * Returns the index of the "super type" of a class that is referenced by +++ * this type reference. This method must only be used for type references +++ * whose sort is {@link #CLASS_EXTENDS CLASS_EXTENDS}. +++ * +++ * @return the index of an interface in the 'implements' clause of a class, +++ * or -1 if this type reference references the type of the super +++ * class. +++ */ +++ public int getSuperTypeIndex() { +++ return (short) ((value & 0x00FFFF00) >> 8); +++ } +++ +++ /** +++ * Returns the index of the formal parameter whose type is referenced by +++ * this type reference. This method must only be used for type references +++ * whose sort is {@link #METHOD_FORMAL_PARAMETER METHOD_FORMAL_PARAMETER}. +++ * +++ * @return a formal parameter index. +++ */ +++ public int getFormalParameterIndex() { +++ return (value & 0x00FF0000) >> 16; +++ } +++ +++ /** +++ * Returns the index of the exception, in a 'throws' clause of a method, +++ * whose type is referenced by this type reference. This method must only be +++ * used for type references whose sort is {@link #THROWS THROWS}. +++ * +++ * @return the index of an exception in the 'throws' clause of a method. +++ */ +++ public int getExceptionIndex() { +++ return (value & 0x00FFFF00) >> 8; +++ } +++ +++ /** +++ * Returns the index of the try catch block (using the order in which they +++ * are visited with visitTryCatchBlock), whose 'catch' type is referenced by +++ * this type reference. This method must only be used for type references +++ * whose sort is {@link #EXCEPTION_PARAMETER EXCEPTION_PARAMETER} . +++ * +++ * @return the index of an exception in the 'throws' clause of a method. +++ */ +++ public int getTryCatchBlockIndex() { +++ return (value & 0x00FFFF00) >> 8; +++ } +++ +++ /** +++ * Returns the index of the type argument referenced by this type reference. +++ * This method must only be used for type references whose sort is +++ * {@link #CAST CAST}, {@link #CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT +++ * CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT}, +++ * {@link #METHOD_INVOCATION_TYPE_ARGUMENT METHOD_INVOCATION_TYPE_ARGUMENT}, +++ * {@link #CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT +++ * CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT}, or +++ * {@link #METHOD_REFERENCE_TYPE_ARGUMENT METHOD_REFERENCE_TYPE_ARGUMENT}. +++ * +++ * @return a type parameter index. +++ */ +++ public int getTypeArgumentIndex() { +++ return value & 0xFF; +++ } +++ +++ /** +++ * Returns the int encoded value of this type reference, suitable for use in +++ * visit methods related to type annotations, like visitTypeAnnotation. +++ * +++ * @return the int encoded value of this type reference. +++ */ +++ public int getValue() { +++ return value; +++ } +++} ++diff --git a/includes/inttypes.h b/includes/inttypes.h ++new file mode 100644 ++index 0000000..ead903f ++--- /dev/null +++++ b/includes/inttypes.h ++@@ -0,0 +1,305 @@ +++// ISO C9x compliant inttypes.h for Microsoft Visual Studio +++// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +++// +++// Copyright (c) 2006 Alexander Chemeris +++// +++// Redistribution and use in source and binary forms, with or without +++// modification, are permitted provided that the following conditions are met: +++// +++// 1. Redistributions of source code must retain the above copyright notice, +++// this list of conditions and the following disclaimer. +++// +++// 2. Redistributions in binary form must reproduce the above copyright +++// notice, this list of conditions and the following disclaimer in the +++// documentation and/or other materials provided with the distribution. +++// +++// 3. The name of the author may be used to endorse or promote products +++// derived from this software without specific prior written permission. +++// +++// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +++// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +++// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +++// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +++// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +++// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +++// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +++// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +++// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +++// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +++// +++/////////////////////////////////////////////////////////////////////////////// +++ +++#ifndef _MSC_VER // [ +++#error "Use this header only with Microsoft Visual C++ compilers!" +++#endif // _MSC_VER ] +++ +++#ifndef _MSC_INTTYPES_H_ // [ +++#define _MSC_INTTYPES_H_ +++ +++#if _MSC_VER > 1000 +++#pragma once +++#endif +++ +++#include "stdint.h" +++ +++// 7.8 Format conversion of integer types +++ +++typedef struct { +++ intmax_t quot; +++ intmax_t rem; +++} imaxdiv_t; +++ +++// 7.8.1 Macros for format specifiers +++ +++#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 +++ +++// The fprintf macros for signed integers are: +++#define PRId8 "d" +++#define PRIi8 "i" +++#define PRIdLEAST8 "d" +++#define PRIiLEAST8 "i" +++#define PRIdFAST8 "d" +++#define PRIiFAST8 "i" +++ +++#define PRId16 "hd" +++#define PRIi16 "hi" +++#define PRIdLEAST16 "hd" +++#define PRIiLEAST16 "hi" +++#define PRIdFAST16 "hd" +++#define PRIiFAST16 "hi" +++ +++#define PRId32 "I32d" +++#define PRIi32 "I32i" +++#define PRIdLEAST32 "I32d" +++#define PRIiLEAST32 "I32i" +++#define PRIdFAST32 "I32d" +++#define PRIiFAST32 "I32i" +++ +++#define PRId64 "I64d" +++#define PRIi64 "I64i" +++#define PRIdLEAST64 "I64d" +++#define PRIiLEAST64 "I64i" +++#define PRIdFAST64 "I64d" +++#define PRIiFAST64 "I64i" +++ +++#define PRIdMAX "I64d" +++#define PRIiMAX "I64i" +++ +++#define PRIdPTR "Id" +++#define PRIiPTR "Ii" +++ +++// The fprintf macros for unsigned integers are: +++#define PRIo8 "o" +++#define PRIu8 "u" +++#define PRIx8 "x" +++#define PRIX8 "X" +++#define PRIoLEAST8 "o" +++#define PRIuLEAST8 "u" +++#define PRIxLEAST8 "x" +++#define PRIXLEAST8 "X" +++#define PRIoFAST8 "o" +++#define PRIuFAST8 "u" +++#define PRIxFAST8 "x" +++#define PRIXFAST8 "X" +++ +++#define PRIo16 "ho" +++#define PRIu16 "hu" +++#define PRIx16 "hx" +++#define PRIX16 "hX" +++#define PRIoLEAST16 "ho" +++#define PRIuLEAST16 "hu" +++#define PRIxLEAST16 "hx" +++#define PRIXLEAST16 "hX" +++#define PRIoFAST16 "ho" +++#define PRIuFAST16 "hu" +++#define PRIxFAST16 "hx" +++#define PRIXFAST16 "hX" +++ +++#define PRIo32 "I32o" +++#define PRIu32 "I32u" +++#define PRIx32 "I32x" +++#define PRIX32 "I32X" +++#define PRIoLEAST32 "I32o" +++#define PRIuLEAST32 "I32u" +++#define PRIxLEAST32 "I32x" +++#define PRIXLEAST32 "I32X" +++#define PRIoFAST32 "I32o" +++#define PRIuFAST32 "I32u" +++#define PRIxFAST32 "I32x" +++#define PRIXFAST32 "I32X" +++ +++#define PRIo64 "I64o" +++#define PRIu64 "I64u" +++#define PRIx64 "I64x" +++#define PRIX64 "I64X" +++#define PRIoLEAST64 "I64o" +++#define PRIuLEAST64 "I64u" +++#define PRIxLEAST64 "I64x" +++#define PRIXLEAST64 "I64X" +++#define PRIoFAST64 "I64o" +++#define PRIuFAST64 "I64u" +++#define PRIxFAST64 "I64x" +++#define PRIXFAST64 "I64X" +++ +++#define PRIoMAX "I64o" +++#define PRIuMAX "I64u" +++#define PRIxMAX "I64x" +++#define PRIXMAX "I64X" +++ +++#define PRIoPTR "Io" +++#define PRIuPTR "Iu" +++#define PRIxPTR "Ix" +++#define PRIXPTR "IX" +++ +++// The fscanf macros for signed integers are: +++#define SCNd8 "d" +++#define SCNi8 "i" +++#define SCNdLEAST8 "d" +++#define SCNiLEAST8 "i" +++#define SCNdFAST8 "d" +++#define SCNiFAST8 "i" +++ +++#define SCNd16 "hd" +++#define SCNi16 "hi" +++#define SCNdLEAST16 "hd" +++#define SCNiLEAST16 "hi" +++#define SCNdFAST16 "hd" +++#define SCNiFAST16 "hi" +++ +++#define SCNd32 "ld" +++#define SCNi32 "li" +++#define SCNdLEAST32 "ld" +++#define SCNiLEAST32 "li" +++#define SCNdFAST32 "ld" +++#define SCNiFAST32 "li" +++ +++#define SCNd64 "I64d" +++#define SCNi64 "I64i" +++#define SCNdLEAST64 "I64d" +++#define SCNiLEAST64 "I64i" +++#define SCNdFAST64 "I64d" +++#define SCNiFAST64 "I64i" +++ +++#define SCNdMAX "I64d" +++#define SCNiMAX "I64i" +++ +++#ifdef _WIN64 // [ +++# define SCNdPTR "I64d" +++# define SCNiPTR "I64i" +++#else // _WIN64 ][ +++# define SCNdPTR "ld" +++# define SCNiPTR "li" +++#endif // _WIN64 ] +++ +++// The fscanf macros for unsigned integers are: +++#define SCNo8 "o" +++#define SCNu8 "u" +++#define SCNx8 "x" +++#define SCNX8 "X" +++#define SCNoLEAST8 "o" +++#define SCNuLEAST8 "u" +++#define SCNxLEAST8 "x" +++#define SCNXLEAST8 "X" +++#define SCNoFAST8 "o" +++#define SCNuFAST8 "u" +++#define SCNxFAST8 "x" +++#define SCNXFAST8 "X" +++ +++#define SCNo16 "ho" +++#define SCNu16 "hu" +++#define SCNx16 "hx" +++#define SCNX16 "hX" +++#define SCNoLEAST16 "ho" +++#define SCNuLEAST16 "hu" +++#define SCNxLEAST16 "hx" +++#define SCNXLEAST16 "hX" +++#define SCNoFAST16 "ho" +++#define SCNuFAST16 "hu" +++#define SCNxFAST16 "hx" +++#define SCNXFAST16 "hX" +++ +++#define SCNo32 "lo" +++#define SCNu32 "lu" +++#define SCNx32 "lx" +++#define SCNX32 "lX" +++#define SCNoLEAST32 "lo" +++#define SCNuLEAST32 "lu" +++#define SCNxLEAST32 "lx" +++#define SCNXLEAST32 "lX" +++#define SCNoFAST32 "lo" +++#define SCNuFAST32 "lu" +++#define SCNxFAST32 "lx" +++#define SCNXFAST32 "lX" +++ +++#define SCNo64 "I64o" +++#define SCNu64 "I64u" +++#define SCNx64 "I64x" +++#define SCNX64 "I64X" +++#define SCNoLEAST64 "I64o" +++#define SCNuLEAST64 "I64u" +++#define SCNxLEAST64 "I64x" +++#define SCNXLEAST64 "I64X" +++#define SCNoFAST64 "I64o" +++#define SCNuFAST64 "I64u" +++#define SCNxFAST64 "I64x" +++#define SCNXFAST64 "I64X" +++ +++#define SCNoMAX "I64o" +++#define SCNuMAX "I64u" +++#define SCNxMAX "I64x" +++#define SCNXMAX "I64X" +++ +++#ifdef _WIN64 // [ +++# define SCNoPTR "I64o" +++# define SCNuPTR "I64u" +++# define SCNxPTR "I64x" +++# define SCNXPTR "I64X" +++#else // _WIN64 ][ +++# define SCNoPTR "lo" +++# define SCNuPTR "lu" +++# define SCNxPTR "lx" +++# define SCNXPTR "lX" +++#endif // _WIN64 ] +++ +++#endif // __STDC_FORMAT_MACROS ] +++ +++// 7.8.2 Functions for greatest-width integer types +++ +++// 7.8.2.1 The imaxabs function +++#define imaxabs _abs64 +++ +++// 7.8.2.2 The imaxdiv function +++ +++// This is modified version of div() function from Microsoft's div.c found +++// in %MSVC.NET%\crt\src\div.c +++#ifdef STATIC_IMAXDIV // [ +++static +++#else // STATIC_IMAXDIV ][ +++_inline +++#endif // STATIC_IMAXDIV ] +++imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +++{ +++ imaxdiv_t result; +++ +++ result.quot = numer / denom; +++ result.rem = numer % denom; +++ +++ if (numer < 0 && result.rem > 0) { +++ // did division wrong; must fix up +++ ++result.quot; +++ result.rem -= denom; +++ } +++ +++ return result; +++} +++ +++// 7.8.2.3 The strtoimax and strtoumax functions +++#define strtoimax _strtoi64 +++#define strtoumax _strtoui64 +++ +++// 7.8.2.4 The wcstoimax and wcstoumax functions +++#define wcstoimax _wcstoi64 +++#define wcstoumax _wcstoui64 +++ +++ +++#endif // _MSC_INTTYPES_H_ ] ++diff --git a/libbluray.def b/libbluray.def ++new file mode 100644 ++index 0000000..d4c93cb ++--- /dev/null +++++ b/libbluray.def ++@@ -0,0 +1,63 @@ +++; libbluray.def ; declares the exports +++ +++LIBRARY "libbluray.dll" +++ +++EXPORTS +++ ; bluray.h +++ bd_get_version +++ bd_get_titles +++ bd_get_title_info +++ bd_get_playlist_info +++ bd_free_title_info +++ bd_open +++ bd_close +++ bd_seek +++ bd_seek_time +++ bd_find_seek_point +++ bd_read +++ bd_read_skip_still +++ bd_seek_chapter +++ bd_chapter_pos +++ bd_get_current_chapter +++ bd_seek_mark +++ bd_seek_playitem +++ bd_select_playlist +++ bd_select_title +++ bd_select_angle +++ bd_seamless_angle_change +++ bd_get_title_size +++ bd_get_current_title +++ bd_get_current_angle +++ bd_tell +++ bd_tell_time +++ bd_get_disc_info +++ bd_set_player_setting +++ bd_set_player_setting_str +++ bd_start_bdj +++ bd_stop_bdj +++ bd_get_event +++ bd_play +++ bd_read_ext +++ bd_play_title +++ bd_menu_call +++ bd_register_overlay_proc +++ bd_register_argb_overlay_proc +++ bd_set_scr +++ bd_user_input +++ bd_mouse_select +++ bd_get_sound_effect +++ bd_get_meta +++ bd_get_clpi +++ bd_read_clpi +++ bd_free_clpi +++ bd_read_mpls +++ bd_free_mpls +++ bd_read_mobj +++ bd_free_mobj +++ bd_get_clip_infos +++ bd_get_title_mpls +++ +++ ; additional functions +++ bd_set_debug_handler +++ bd_set_debug_mask +++ bd_get_debug_mask ++diff --git a/libbluray.vcxproj b/libbluray.vcxproj ++new file mode 100644 ++index 0000000..c778955 ++--- /dev/null +++++ b/libbluray.vcxproj ++@@ -0,0 +1,231 @@ +++ +++ +++ +++ +++ DebugRelease +++ Win32 +++ +++ +++ DebugRelease +++ x64 +++ +++ +++ Debug +++ Win32 +++ +++ +++ Debug +++ x64 +++ +++ +++ Release +++ Win32 +++ +++ +++ Release +++ x64 +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ {E1DA1B95-71F1-4C21-A271-121176925062} +++ Win32Proj +++ libbluray +++ +++ +++ +++ v110_xp +++ +++ +++ v120_xp +++ +++ +++ v140_xp +++ +++ +++ DynamicLibrary +++ true +++ Unicode +++ +++ +++ DynamicLibrary +++ false +++ true +++ Unicode +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ true +++ $(SolutionDir)bin_$(PlatformName)d\ +++ $(SolutionDir)bin_$(PlatformName)d\$(ProjectName)\ +++ +++ +++ false +++ $(SolutionDir)bin_$(PlatformName)\$(ProjectName)\ +++ $(SolutionDir)bin_$(PlatformName)\$(ProjectName)\ +++ +++ +++ +++ +++ +++ Level3 +++ Disabled +++ HAVE_CONFIG_H;WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBBLURAY_EXPORTS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) +++ __STDC_FORMAT_MACROS;%(PreprocessorDefinitions) +++ $(ProjectDir);$(ProjectDir)includes;$(ProjectDir)src;$(ProjectDir)src\libbluray;%(AdditionalIncludeDirectories) +++ MultiThreadedDebugDLL +++ CompileAsCpp +++ +++ +++ Windows +++ true +++ libbluray.def +++ +++ +++ xcopy /I /Y "$(OutDir)$(TargetName).lib" "$(OutDir)lib\" +++ Copy .lib into library path +++ +++ +++ +++ +++ MultiThreadedDebug +++ +++ +++ +++ +++ Level3 +++ MaxSpeed +++ true +++ true +++ HAVE_CONFIG_H;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBBLURAY_EXPORTS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) +++ __STDC_FORMAT_MACROS;%(PreprocessorDefinitions) +++ $(ProjectDir);$(ProjectDir)includes;$(ProjectDir)src;$(ProjectDir)src\libbluray;%(AdditionalIncludeDirectories) +++ MultiThreaded +++ StreamingSIMDExtensions +++ CompileAsCpp +++ +++ +++ Windows +++ true +++ true +++ true +++ libbluray.def +++ true +++ +++ +++ xcopy /I /Y "$(TargetDir)$(TargetName)$(TargetExt)" "$(OutDir)..\" +++xcopy /I /Y "$(TargetDir)$(TargetName).lib" "$(OutDir)..\lib\" +++ Copy .dll/.lib into library path +++ +++ +++ +++ +++ +++ ++\ No newline at end of file ++diff --git a/libbluray.vcxproj.filters b/libbluray.vcxproj.filters ++new file mode 100644 ++index 0000000..57ff16c ++--- /dev/null +++++ b/libbluray.vcxproj.filters ++@@ -0,0 +1,353 @@ +++ +++ +++ +++ +++ {4FC737F1-C7A5-4376-A066-2A32D752A2FF} +++ cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx +++ +++ +++ {93995380-89BD-4b04-88EB-625FBE52EBFB} +++ h;hpp;hxx;hm;inl;inc;xsd +++ +++ +++ {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} +++ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms +++ +++ +++ {1ab0e905-7c04-4090-b385-6363dd1c961c} +++ +++ +++ {b8fa3348-a089-461f-9ef5-3d9df997b8e5} +++ +++ +++ {125333e9-0b5e-45f9-a444-f0aaaf547d9b} +++ +++ +++ {a743058f-f07a-4d0f-bab6-02dc57defda9} +++ +++ +++ {7dacc7c4-ef59-452b-9e5b-392c9df07c98} +++ +++ +++ {c7895c81-c186-4d5e-a8ff-645c6d55a731} +++ +++ +++ {c8619466-211b-4c85-9d30-d1b1a822d32e} +++ +++ +++ {8afb6919-994f-4d1f-9638-ce4a06d0b473} +++ +++ +++ {0e9086a7-eebf-4b8e-a4fe-b1724d148877} +++ +++ +++ {fc5e776b-0f32-493a-b823-240288288502} +++ +++ +++ {96d2d786-cd45-4856-937d-9e6f85ced241} +++ +++ +++ {9f4ea4ae-217a-4d97-a5f3-e561ce1e49cd} +++ +++ +++ {09e1b1b8-3aa3-4918-b157-3dfc0554ccbb} +++ +++ +++ {1e02e503-752e-4765-9dfb-8cc67a7b79f8} +++ +++ +++ +++ +++ Header Files\util +++ +++ +++ Header Files\util +++ +++ +++ Header Files\util +++ +++ +++ Header Files\util +++ +++ +++ Header Files\util +++ +++ +++ Header Files\util +++ +++ +++ Header Files\util +++ +++ +++ Header Files\file +++ +++ +++ Header Files\file +++ +++ +++ Header Files\file +++ +++ +++ Header Files\libbluray +++ +++ +++ Header Files\libbluray +++ +++ +++ Header Files\libbluray +++ +++ +++ Header Files\libbluray\bdnav +++ +++ +++ Header Files\libbluray\bdnav +++ +++ +++ Header Files\libbluray\bdnav +++ +++ +++ Header Files\libbluray\bdnav +++ +++ +++ Header Files\libbluray\bdnav +++ +++ +++ Header Files\libbluray\bdnav +++ +++ +++ Header Files\libbluray\bdnav +++ +++ +++ Header Files\libbluray\bdnav +++ +++ +++ Header Files\libbluray\bdnav +++ +++ +++ Header Files\libbluray\decoders +++ +++ +++ Header Files\libbluray\decoders +++ +++ +++ Header Files\libbluray\decoders +++ +++ +++ Header Files\libbluray\decoders +++ +++ +++ Header Files\libbluray\decoders +++ +++ +++ Header Files\libbluray\decoders +++ +++ +++ Header Files\libbluray\decoders +++ +++ +++ Header Files\libbluray\decoders +++ +++ +++ Header Files\libbluray\decoders +++ +++ +++ Header Files\libbluray\hdmv +++ +++ +++ Header Files\libbluray\hdmv +++ +++ +++ Header Files\libbluray\hdmv +++ +++ +++ Header Files\libbluray\bdnav +++ +++ +++ Header Files\libbluray\bdnav +++ +++ +++ Header Files\libbluray\bdnav +++ +++ +++ Header Files\libbluray\decoders +++ +++ +++ Header Files\libbluray\decoders +++ +++ +++ Header Files\libbluray\decoders +++ +++ +++ Header Files\libbluray\decoders +++ +++ +++ Header Files\util +++ +++ +++ Header Files\util +++ +++ +++ Header Files\libbluray +++ +++ +++ Header Files\libbluray\decoders +++ +++ +++ Header Files\file +++ +++ +++ Header Files\libbluray +++ +++ +++ Header Files\util +++ +++ +++ Header Files\libbluray\disc +++ +++ +++ Header Files\libbluray\disc +++ +++ +++ Header Files\libbluray\disc +++ +++ +++ Header Files\libbluray\disc +++ +++ +++ Header Files\libbluray\disc +++ +++ +++ Header Files\file +++ +++ +++ +++ +++ Source Files\util +++ +++ +++ Source Files\util +++ +++ +++ Source Files\file +++ +++ +++ Source Files\libbluray +++ +++ +++ Source Files\libbluray +++ +++ +++ Source Files\libbluray\bdnav +++ +++ +++ Source Files\libbluray\bdnav +++ +++ +++ Source Files\libbluray\bdnav +++ +++ +++ Source Files\libbluray\bdnav +++ +++ +++ Source Files\libbluray\bdnav +++ +++ +++ Source Files\libbluray\bdnav +++ +++ +++ Source Files\libbluray\decoders +++ +++ +++ Source Files\libbluray\decoders +++ +++ +++ Source Files\libbluray\decoders +++ +++ +++ Source Files\libbluray\decoders +++ +++ +++ Source Files\libbluray\decoders +++ +++ +++ Source Files\libbluray\decoders +++ +++ +++ Source Files\libbluray\hdmv +++ +++ +++ Source Files\libbluray\hdmv +++ +++ +++ Source Files\libbluray\hdmv +++ +++ +++ Source Files\libbluray\bdnav +++ +++ +++ Source Files\libbluray\bdnav +++ +++ +++ Source Files\util +++ +++ +++ Source Files\file +++ +++ +++ Source Files\file +++ +++ +++ Source Files\file +++ +++ +++ Source Files\libbluray\decoders +++ +++ +++ Source Files\libbluray\decoders +++ +++ +++ Source Files\util +++ +++ +++ Source Files\libbluray\decoders +++ +++ +++ Source Files\libbluray\decoders +++ +++ +++ Source Files\file +++ +++ +++ Source Files\file +++ +++ +++ Source Files\util +++ +++ +++ Source Files\util +++ +++ +++ Source Files\libbluray\disc +++ +++ +++ Source Files\libbluray\disc +++ +++ +++ Source Files\libbluray\disc +++ +++ +++ Source Files\libbluray\disc +++ +++ +++ Source Files\util +++ +++ +++ Source Files\file +++ +++ +++ +++ +++ Source Files +++ +++ +++ ++\ No newline at end of file ++diff --git a/src/devtools/bdj_test.c b/src/devtools/bdj_test.c ++new file mode 100644 ++index 0000000..d9ebd16 ++--- /dev/null +++++ b/src/devtools/bdj_test.c ++@@ -0,0 +1,67 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2010 William Hahne +++ * +++ * This program is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU General Public License +++ * as published by the Free Software Foundation; either version 2 +++ * of the License, or (at your option) any later version. +++ * +++ * This program is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +++ * GNU General Public License for more details. +++ * +++ * You should have received a copy of the GNU General Public License +++ * along with this program. If not, see . +++ * +++ * In addition, as a special exception, the copyright holders of libbluray +++ * gives permission to link the code of its release of libbluray with the +++ * OpenSSL project's "OpenSSL" library (or with modified versions of it +++ * that use the same license as the "OpenSSL" library), and distribute +++ * the linked executables. You must obey the GNU General Public License +++ * in all respects for all of the code used other than "OpenSSL". If you +++ * modify this file, you may extend this exception to your version of the +++ * file, but you are not obligated to do so. If you do not wish to do +++ * so, delete this exception statement from your version. +++ */ +++ +++#include +++#include +++#include +++ +++#include "libbluray/bluray.h" +++ +++#if defined(_WIN32) +++#include +++#define sleep(x) Sleep(x) +++#endif +++ +++static void _usage(void) { +++ printf("Usage: [path to disc] [starting object]\n"); +++} +++ +++int main(int argc, char** argv) +++{ +++ if (argc < 3) { +++ _usage(); +++ return 0; +++ } +++ +++ printf("%s %s\n", argv[1], argv[2]); +++ +++ BLURAY* bd = bd_open(argv[1], NULL); +++ +++ bd_get_titles(bd, TITLES_ALL, 0); +++ +++ if (!bd_start_bdj(bd, argv[2])) { +++ printf("Failed to start BD-J application.\n"); +++ } else { +++ while (1) { sleep(20); } +++ bd_stop_bdj(bd); +++ } +++ +++ bd_close(bd); +++ +++ return 0; +++} ++diff --git a/src/devtools/bdjo_dump.c b/src/devtools/bdjo_dump.c ++new file mode 100644 ++index 0000000..c9c8141 ++--- /dev/null +++++ b/src/devtools/bdjo_dump.c ++@@ -0,0 +1,206 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2014 Petri Hintukainen +++ * +++ * This library is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * This library is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with this library. If not, see +++ * . +++ */ +++ +++#include "libbluray/bluray.h" +++#include "libbluray/bdj/bdjo_data.h" +++ +++#include +++#include +++ +++static const char *_yes_no(int i) +++{ +++ return i > 0 ? "yes" : i < 0 ? "unknown" : "no"; +++} +++ +++static const char *_binding_str(int i) +++{ +++ switch (i) { +++ case 0: return "unbound"; +++ case 1: return "disc bound"; +++ case 3: return "title bound"; +++ default: return "???"; +++ } +++} +++ +++static const char *_visibility_str(int i) +++{ +++ switch (i) { +++ case 0: return "none"; +++ case 1: return "applications"; +++ case 2: return "user"; +++ default: return "???"; +++ } +++} +++ +++static void _terminal_info_print(const BDJO_TERMINAL_INFO *p) +++{ +++ printf("Terminal Info:\n"); +++ printf(" Default AWT font : %s\n", p->default_font); +++ printf(" initial HaVi config : %d\n", p->initial_havi_config_id); +++ printf(" Menu call mask : %d\n", p->menu_call_mask); +++ printf(" Title search mask : %d\n", p->menu_call_mask); +++} +++ +++static void _app_cache_item_print(const BDJO_APP_CACHE_ITEM *p) +++{ +++ printf(" %3.3s: %s%s\n", +++ p->lang_code, p->ref_to_name, +++ p->type == 1 ? ".jar" : p->type == 2 ? "/" : " (unknown type)"); +++} +++ +++static void _app_cache_info_print(const BDJO_APP_CACHE_INFO *p) +++{ +++ unsigned ii; +++ +++ printf("Application cache info:\n"); +++ for (ii = 0; ii < p->num_item; ii++) { +++ _app_cache_item_print(&p->item[ii]); +++ } +++} +++ +++static void _accessible_playlists_print(const BDJO_ACCESSIBLE_PLAYLISTS *p) +++{ +++ unsigned ii; +++ +++ printf("Accessible playlists:\n"); +++ printf(" Access to all : %s\n", _yes_no(p->access_to_all_flag)); +++ printf(" Autostart first : %s\n", _yes_no(p->autostart_first_playlist_flag)); +++ +++ if (p->num_pl) { +++ printf(" Playlists : %d\n", p->num_pl); +++ for (ii = 0; ii < p->num_pl; ii++) { +++ printf(" %s.mpls\n", p->pl[ii].name); +++ } +++ } +++} +++ +++static void _app_profile_print(BDJO_APP_PROFILE *p) +++{ +++ printf(" Profile %d Version %d.%d.%d\n", +++ p->profile_number, p->major_version, p->minor_version, p->micro_version); +++} +++ +++static void _app_print(const BDJO_APP *p) +++{ +++ unsigned ii; +++ +++ printf(" Control code: : %d (%s)\n", p->control_code, +++ p->control_code == 1 ? "autostart" : p->control_code == 2 ? "present" : "???"); +++ printf(" Type : %d (%s)\n", p->type, +++ p->type == 1 ? "BD-J App" : "???"); +++ printf(" Organization ID : %08X\n", p->org_id); +++ printf(" Application ID : %04X\n", p->app_id); +++ printf(" Priority : %d\n", p->priority); +++ printf(" Binding : %d (%s)\n", p->binding, _binding_str(p->binding)); +++ printf(" Visibility : %d (%s)\n", p->visibility, _visibility_str(p->visibility)); +++ +++ if (p->num_profile) { +++ printf(" Profiles:\n"); +++ for (ii = 0; ii < p->num_profile; ii++) { +++ _app_profile_print(&p->profile[ii]); +++ } +++ } +++ +++ if (p->num_name) { +++ printf(" Names:\n"); +++ for (ii = 0; ii < p->num_name; ii++) { +++ printf(" %s: %s\n", p->name[ii].lang, p->name[ii].name); +++ } +++ } +++ +++ printf(" Base directory : %s\n", p->base_dir); +++ printf(" Icon locator : %s\n", p->icon_locator); +++ printf(" Icon flags : 0x%04x\n", p->icon_flags); +++ printf(" Classpath extension : %s\n", p->classpath_extension); +++ printf(" Initial class : %s\n", p->initial_class); +++ printf(" Parameters : "); +++ for (ii = 0; ii < p->num_param; ii++) { +++ printf("%s ", p->param[ii].param); +++ } +++ printf("\n"); +++} +++ +++static void _app_management_table_print(const BDJO_APP_MANAGEMENT_TABLE *p) +++{ +++ unsigned ii; +++ +++ for (ii = 0; ii < p->num_app; ii++) { +++ printf("Application %u:\n", ii); +++ _app_print(&p->app[ii]); +++ } +++} +++ +++static void _key_interest_table_print(const BDJO_KEY_INTEREST_TABLE *p) +++{ +++ unsigned int v; +++ memcpy(&v, p, sizeof(unsigned int)); +++ if (v) { +++ printf("Key interest table:\n"); +++ printf(" %s%s%s%s%s%s%s%s%s%s%s\n", +++ p->vk_play ? "VK_PLAY " : "", +++ p->vk_stop ? "VK_STOP " : "", +++ p->vk_ffw ? "VK_FFW " : "", +++ p->vk_rew ? "VK_REW " : "", +++ p->vk_track_next ? "VK_TRACK_NEXT " : "", +++ p->vk_track_prev ? "VK_TRACK_PREV " : "", +++ p->vk_pause ? "VK_PAUSE " : "", +++ p->vk_still_off ? "VK_STILL_OFF " : "", +++ p->vk_sec_audio_ena_dis ? "VK_SEC_AUDIO " : "", +++ p->vk_sec_video_ena_dis ? "VK_SEC_VIDEO " : "", +++ p->pg_textst_ena_dis ? "VK_PG_TEXTST " : ""); +++ } +++} +++ +++static void _file_access_info_print(const BDJO_FILE_ACCESS_INFO *p) +++{ +++ printf("File access info:\n %s\n", p->path); +++} +++ +++static void _bdjo_print(const BDJO *p) +++{ +++ _terminal_info_print(&p->terminal_info); +++ _app_cache_info_print(&p->app_cache_info); +++ _accessible_playlists_print(&p->accessible_playlists); +++ _app_management_table_print(&p->app_table); +++ _key_interest_table_print(&p->key_interest_table); +++ _file_access_info_print(&p->file_access_info); +++} +++ +++int main(int argc, const char *argv[]) +++{ +++ if (argc < 2) { +++ fprintf(stderr, "usage: %s \n", argv[0]); +++ return 1; +++ } +++ +++ int cnt; +++ for (cnt = 1; cnt < argc; cnt++) { +++ +++ printf("%s\n", argv[cnt]); +++ +++ BDJO *bdjo = bd_read_bdjo(argv[cnt]); +++ if (bdjo) { +++ _bdjo_print(bdjo); +++ bd_free_bdjo(bdjo); +++ } +++ printf("\n"); +++ } +++ +++ return 0; +++} ++diff --git a/src/devtools/clpi_dump.c b/src/devtools/clpi_dump.c ++new file mode 100644 ++index 0000000..bd64783 ++--- /dev/null +++++ b/src/devtools/clpi_dump.c ++@@ -0,0 +1,487 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2009-2010 John Stebbins +++ * Copyright (C) 2012-2013 Petri Hintukainen +++ * +++ * This library is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * This library is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with this library. If not, see +++ * . +++ */ +++ +++#include +++#include +++#include +++#include +++ +++#include "libbluray/bdnav/clpi_data.h" +++#include "libbluray/bluray.h" +++ +++#include "util.h" +++ +++static int verbose; +++ +++typedef struct { +++ int value; +++ const char *str; +++} VALUE_MAP; +++ +++static inline const char* +++_lookup_str(const VALUE_MAP *map, int val) +++{ +++ int ii; +++ +++ for (ii = 0; map[ii].str; ii++) { +++ if (val == map[ii].value) { +++ return map[ii].str; +++ } +++ } +++ return "?"; +++} +++ +++const VALUE_MAP codec_map[] = { +++ {0x01, "MPEG-1 Video"}, +++ {0x02, "MPEG-2 Video"}, +++ {0x03, "MPEG-1 Audio"}, +++ {0x04, "MPEG-2 Audio"}, +++ {0x80, "LPCM"}, +++ {0x81, "AC-3"}, +++ {0x82, "DTS"}, +++ {0x83, "TrueHD"}, +++ {0x84, "AC-3 Plus"}, +++ {0x85, "DTS-HD"}, +++ {0x86, "DTS-HD Master"}, +++ {0xa1, "AC-3 Plus for secondary audio"}, +++ {0xa2, "DTS-HD for secondary audio"}, +++ {0xea, "VC-1"}, +++ {0x1b, "H.264"}, +++ {0x20, "H.264 MVC dep."}, +++ {0x90, "Presentation Graphics"}, +++ {0x91, "Presentation Graphics"}, +++ {0x92, "Interactive Graphics"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP video_format_map[] = { +++ {0, "Reserved"}, +++ {1, "480i"}, +++ {2, "576i"}, +++ {3, "480p"}, +++ {4, "1080i"}, +++ {5, "720p"}, +++ {6, "1080p"}, +++ {7, "576p"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP video_rate_map[] = { +++ {0, "Reserved1"}, +++ {1, "23.976"}, +++ {2, "24"}, +++ {3, "25"}, +++ {4, "29.97"}, +++ {5, "Reserved2"}, +++ {6, "50"}, +++ {7, "59.94"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP video_aspect_map[] = { +++ {0, "Reserved1"}, +++ {1, "Reserved2"}, +++ {2, "4:3"}, +++ {3, "16:9"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP audio_format_map[] = { +++ {0, "Reserved1"}, +++ {1, "Mono"}, +++ {2, "Reserved2"}, +++ {3, "Stereo"}, +++ {4, "Reserved3"}, +++ {5, "Reserved4"}, +++ {6, "Multi Channel"}, +++ {12, "Combo"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP audio_rate_map[] = { +++ {0, "Reserved1"}, +++ {1, "48 Khz"}, +++ {2, "Reserved2"}, +++ {3, "Reserved3"}, +++ {4, "96 Khz"}, +++ {5, "192 Khz"}, +++ {12, "48/192 Khz"}, +++ {14, "48/96 Khz"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP application_type_map[] = { +++ {1, "Main TS for a main-path of Movie"}, +++ {2, "Main TS for a main-path of Time based slide show"}, +++ {3, "Main TS for a main-path of Browsable slide show"}, +++ {4, "Sub TS for a sub-path of Browsable slide show"}, +++ {5, "Sub TS for a sub-path of Interactive Graphics menu"}, +++ {6, "Sub TS for a sub-path of Text subtitle"}, +++ {7, "Sub TS for a sub-path of one or more elementary streams path"}, +++ {0, NULL}, +++}; +++ +++static void +++_show_stream(CLPI_PROG_STREAM *ss, int level) +++{ +++ indent_printf(level, "Codec (%04x): %s", ss->coding_type, +++ _lookup_str(codec_map, ss->coding_type)); +++ indent_printf(level, "PID: %04x", ss->pid); +++ switch (ss->coding_type) { +++ case 0x01: +++ case 0x02: +++ case 0xea: +++ case 0x1b: +++ case 0x20: +++ indent_printf(level, "Format %02x: %s", ss->format, +++ _lookup_str(video_format_map, ss->format)); +++ indent_printf(level, "Rate %02x: %s", ss->rate, +++ _lookup_str(video_rate_map, ss->rate)); +++ indent_printf(level, "Aspect %02x: %s", ss->aspect, +++ _lookup_str(video_aspect_map, ss->aspect)); +++ indent_printf(level, "oc_flag %02x", ss->oc_flag); +++ break; +++ +++ case 0x03: +++ case 0x04: +++ case 0x80: +++ case 0x81: +++ case 0x82: +++ case 0x83: +++ case 0x84: +++ case 0x85: +++ case 0x86: +++ case 0xa1: +++ case 0xa2: +++ indent_printf(level, "Format %02x: %s", ss->format, +++ _lookup_str(audio_format_map, ss->format)); +++ indent_printf(level, "Rate %02x: %s", ss->rate, +++ _lookup_str(audio_rate_map, ss->rate)); +++ indent_printf(level, "Language: %s", ss->lang); +++ break; +++ +++ case 0x90: +++ case 0x91: +++ case 0xa0: +++ indent_printf(level, "Language: %s", ss->lang); +++ break; +++ +++ case 0x92: +++ indent_printf(level, "Char Code: %02x", ss->char_code); +++ indent_printf(level, "Language: %s", ss->lang); +++ break; +++ +++ default: +++ fprintf(stderr, "unrecognized coding type %02x\n", ss->coding_type); +++ break; +++ }; +++} +++ +++static void +++_show_clip_info(CLPI_CL *cl, int level) +++{ +++ CLPI_CLIP_INFO *ci = &cl->clip; +++ int ii; +++ +++ indent_printf(level, "Clip Info"); +++ indent_printf(level+1, "Clip Stream Type: %02x", ci->clip_stream_type); +++ indent_printf(level+1, "Clip Application Type (%02x): %s", +++ ci->application_type, _lookup_str(application_type_map, ci->application_type)); +++ indent_printf(level+1, "is_ATC_delta: %s", ci->is_atc_delta ? "True" : "False"); +++ indent_printf(level+1, "ATC delta count: %d", ci->atc_delta_count); +++ indent_printf(level+1, "TS Recording Rate: %u", ci->ts_recording_rate); +++ indent_printf(level+1, "Number Source Packets: %u", ci->num_source_packets); +++ // Show ts type info +++ indent_printf(level+1, "TS Type Info"); +++ indent_printf(level+2, "Validity Flags %02x", ci->ts_type_info.validity); +++ indent_printf(level+2, "Format Id %s", ci->ts_type_info.format_id); +++ // Show cc5 thing +++ for (ii = 0; ii < ci->atc_delta_count; ii++) { +++ indent_printf(level+1, "ATC delta[ %d ]", ii); +++ indent_printf(level+2, "Delta %08x", ci->atc_delta[ii].delta); +++ indent_printf(level+2, "File Id %s", ci->atc_delta[ii].file_id); +++ indent_printf(level+2, "File Code %s", ci->atc_delta[ii].file_code); +++ } +++ // show fonts +++ if (cl->font_info.font_count) { +++ indent_printf(level+1, "Font files"); +++ for (ii = 0; ii < cl->font_info.font_count; ii++) { +++ indent_printf(level+2, "Font file %d: %s.otf", ii+1, cl->font_info.font[ii].file_id); +++ } +++ } +++ +++ printf("\n"); +++} +++ +++static void +++_show_seq_info(CLPI_SEQ_INFO *si, int level) +++{ +++ CLPI_ATC_SEQ *atc; +++ CLPI_STC_SEQ *stc; +++ int ii, jj; +++ +++ indent_printf(level, "Sequence Info"); +++ indent_printf(level+1, "Number ATC Sequences: %d", si->num_atc_seq); +++ for (ii = 0; ii < si->num_atc_seq; ii++) { +++ atc = &si->atc_seq[ii]; +++ indent_printf(level+1, "ATC Sequence %d", ii); +++ indent_printf(level+2, "SPN ATC Start: %u", atc->spn_atc_start); +++ indent_printf(level+2, "Offset STC Id: %d", atc->offset_stc_id); +++ indent_printf(level+2, "Number STC Sequences: %d", atc->num_stc_seq); +++ for (jj = 0; jj < atc->num_stc_seq; jj++) { +++ stc = &atc->stc_seq[jj]; +++ indent_printf(level+2, "ATC Sequence %d", jj); +++ indent_printf(level+3, "SPN STC Start: %u", stc->spn_stc_start); +++ indent_printf(level+3, "PCR PID: %04x", stc->pcr_pid); +++ indent_printf(level+3, "Presentation Start: %u", +++ stc->presentation_start_time); +++ indent_printf(level+3, "Presentation End: %u", +++ stc->presentation_end_time); +++ } +++ } +++} +++ +++static void +++_show_prog_info(CLPI_PROG_INFO *pi, int level) +++{ +++ CLPI_PROG *prog; +++ int ii, jj; +++ +++ indent_printf(level, "Program Info"); +++ indent_printf(level+1, "Number Programs: %d", pi->num_prog); +++ for (ii = 0; ii < pi->num_prog; ii++) { +++ prog = &pi->progs[ii]; +++ indent_printf(level+1, "Program %d", ii); +++ indent_printf(level+2, "SPN Program Sequence Start: %d", +++ prog->spn_program_sequence_start); +++ indent_printf(level+2, "Program Map PID: %d", prog->program_map_pid); +++ indent_printf(level+2, "Number Streams: %d", prog->num_streams); +++ indent_printf(level+2, "Number Groups: %d", prog->num_groups); +++ for (jj = 0; jj < prog->num_streams; jj++) { +++ indent_printf(level+2, "Stream %d", jj); +++ _show_stream(&prog->streams[jj], level+3); +++ } +++ } +++} +++ +++static void +++_show_extent_start(CLPI_EXTENT_START *es, int level) +++{ +++ unsigned int ii; +++ +++ indent_printf(level, "Extension data: Extent Start Point"); +++ +++ if (!es->num_point) { +++ indent_printf(level+1, "(no data)"); +++ +++ } else { +++ indent_printf(level+1, "Number of Start Points: %d", es->num_point); +++ +++ if (verbose) { +++ for (ii = 0; ii < es->num_point; ii++) { +++ indent_printf(level+1, "Extent %5d: SPN 0x%08X", ii, es->point[ii]); +++ } +++ } +++ } +++} +++ +++static void +++_show_cpi_info(CLPI_CPI *cpi, int level) +++{ +++ CLPI_EP_MAP_ENTRY *entry; +++ CLPI_EP_COARSE *coarse; +++ CLPI_EP_FINE *fine; +++ int ii, jj, kk; +++ +++ indent_printf(level, "CPI"); +++ indent_printf(level+1, "Number Stream PID: %d", cpi->num_stream_pid); +++ for (ii = 0; ii < cpi->num_stream_pid; ii++) { +++ entry = &cpi->entry[ii]; +++ indent_printf(level+1, "Stream: %d", ii); +++ indent_printf(level+2, "PID: %04x", entry->pid); +++ indent_printf(level+2, "EP Stream Type: %d", entry->ep_stream_type); +++ indent_printf(level+2, "Number EP Coarse: %d", entry->num_ep_coarse); +++ indent_printf(level+2, "Number EP Fine: %d", entry->num_ep_fine); +++ indent_printf(level+2, "EP Map Start: %d", +++ entry->ep_map_stream_start_addr); +++ for (jj = 0; jj < entry->num_ep_coarse; jj++) { +++ coarse = &entry->coarse[jj]; +++ indent_printf(level+2, "Coarse: %d", jj); +++ indent_printf(level+3, "Ref EP Fine: %d", coarse->ref_ep_fine_id); +++ indent_printf(level+3, "PTS EP: %d", coarse->pts_ep); +++ indent_printf(level+3, "SPN EP: %d", coarse->spn_ep); +++ } +++ for (jj = 0; jj < entry->num_ep_fine; jj++) { +++ fine = &entry->fine[jj]; +++ indent_printf(level+2, "Fine: %d", jj); +++ indent_printf(level+3, "Angle Change Point: %s", +++ fine->is_angle_change_point ? "True":"False"); +++ indent_printf(level+3, "I End Offset: %d", +++ fine->i_end_position_offset); +++ indent_printf(level+3, "PTS EP: %d", fine->pts_ep); +++ indent_printf(level+3, "SPN EP: %d", fine->spn_ep); +++ } +++ if (verbose) { +++ uint64_t pts; +++ uint32_t spn; +++ +++ indent_printf(level+2, "PTS - SPN Map"); +++ for (jj = 0; jj < entry->num_ep_coarse; jj++) { +++ int start, end; +++ +++ indent_printf(level+3, "Coarse: %d", jj); +++ coarse = &entry->coarse[jj]; +++ start = coarse->ref_ep_fine_id; +++ if (jj < entry->num_ep_coarse - 1) { +++ end = entry->coarse[jj+1].ref_ep_fine_id; +++ } else { +++ end = entry->num_ep_fine; +++ } +++ for (kk = start; kk < end; kk++) { +++ fine = &entry->fine[kk]; +++ pts = ((uint64_t) (coarse->pts_ep & ~0x01) << 19) + +++ ((uint64_t)fine->pts_ep << 9); +++ spn = (coarse->spn_ep & ~0x1FFFF) + fine->spn_ep; +++ indent_printf(level+4, "PTS %8"PRIu64"/%8"PRIu64" -- SPN %u", +++ pts, pts >> 1, spn); +++ } +++ } +++ } +++ } +++} +++ +++ +++static void +++_usage(char *cmd) +++{ +++ fprintf(stderr, +++"Usage: %s -vcspi [ ...]\n" +++"With no options, produces no output (not very useful)\n" +++"Options:\n" +++" v - Verbose output.\n" +++" c - Shows the Clip Info structure\n" +++" s - Shows the Sequence Info structure\n" +++" p - Shows the Program Info structure\n" +++" i - Shows the CPI. PTS to SPN map\n" +++" e - Shows Extent Start Table\n" +++, cmd); +++ +++ exit(EXIT_FAILURE); +++} +++ +++#define OPTS "vcspie" +++ +++int +++main(int argc, char *argv[]) +++{ +++ CLPI_CL *cl; +++ int opt; +++ int opt_clip_info = 0, opt_seq_info = 0, opt_prog_info = 0; +++ int opt_cpi_info = 0, opt_extent_start = 0; +++ int ii; +++ +++ do { +++ opt = getopt(argc, argv, OPTS); +++ switch (opt) { +++ case -1: break; +++ +++ case 'v': +++ verbose = 1; +++ break; +++ +++ case 's': +++ opt_seq_info = 1; +++ break; +++ +++ case 'i': +++ opt_cpi_info = 1; +++ break; +++ +++ case 'c': +++ opt_clip_info = 1; +++ break; +++ +++ case 'p': +++ opt_prog_info = 1; +++ break; +++ +++ case 'e': +++ opt_extent_start = 1; +++ break; +++ +++ default: +++ _usage(argv[0]); +++ break; +++ } +++ } while (opt != -1); +++ +++ if (optind >= argc) { +++ _usage(argv[0]); +++ } +++ +++ for (ii = optind; ii < argc; ii++) { +++ cl = bd_read_clpi(argv[ii]); +++ if (cl == NULL) { +++ fprintf(stderr, "Parsing %s failed\n", argv[ii]); +++ continue; +++ } +++ if (opt_clip_info) { +++ // Show clip info +++ _show_clip_info(cl, 1); +++ } +++ if (opt_seq_info) { +++ // Show sequence info +++ _show_seq_info(&cl->sequence, 1); +++ } +++ if (opt_prog_info) { +++ // Show program info +++ _show_prog_info(&cl->program, 1); +++ } +++ if (opt_cpi_info) { +++ // Show cpi +++ _show_cpi_info(&cl->cpi, 1); +++ } +++ +++ if (opt_prog_info) { +++ if (cl->program_ss.num_prog) { +++ printf("\n"); +++ indent_printf(1, "Extension: Program Info SS"); +++ _show_prog_info(&cl->program_ss, 1); +++ } +++ } +++ if (opt_cpi_info) { +++ if (cl->program_ss.num_prog) { +++ printf("\n"); +++ indent_printf(1, "Extension: CPI SS"); +++ _show_cpi_info(&cl->cpi_ss, 1); +++ } +++ } +++ if (opt_extent_start) { +++ // Show extent start point +++ if (cl->extent_start.num_point > 0) { +++ _show_extent_start(&cl->extent_start, 1); +++ } +++ } +++ +++ bd_free_clpi(cl); +++ } +++ return 0; +++} +++ ++diff --git a/src/devtools/hdmv_test.c b/src/devtools/hdmv_test.c ++new file mode 100644 ++index 0000000..585ed70 ++--- /dev/null +++++ b/src/devtools/hdmv_test.c ++@@ -0,0 +1,257 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2010 hpi1 +++ * +++ * This library is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * This library is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with this library. If not, see +++ * . +++ */ +++ +++#include +++#include +++#include +++#include +++ +++#include "util/log_control.h" +++#include "libbluray/bluray.h" +++ +++static void _print_event(BD_EVENT *ev) +++{ +++ switch (ev->event) { +++ case BD_EVENT_NONE: +++ break; +++ case BD_EVENT_ERROR: +++ printf("EVENT_ERROR:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_READ_ERROR: +++ printf("EVENT_READ_ERROR:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_ENCRYPTED: +++ printf("EVENT_ENCRYPTED:\t%d\n", ev->param); +++ break; +++ +++ /* current playback position */ +++ +++ case BD_EVENT_ANGLE: +++ printf("EVENT_ANGLE:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_TITLE: +++ printf("EVENT_TITLE:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_PLAYLIST: +++ printf("EVENT_PLAYLIST:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_PLAYITEM: +++ printf("EVENT_PLAYITEM:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_CHAPTER: +++ printf("EVENT_CHAPTER:\t%d\n", ev->param); +++ break; +++ +++ /* */ +++ +++ case BD_EVENT_STILL: +++ printf("EVENT_STILL:\t%d\n", ev->param); +++ break; +++ +++ case BD_EVENT_SEEK: +++ printf("EVENT_SEEK:\t%d\n", ev->param); +++ break; +++ +++ case BD_EVENT_STILL_TIME: +++ if (ev->param) { +++ printf("EVENT_STILL_TIME:\t%d\n", ev->param); +++ } else { +++ printf("EVENT_STILL_TIME:\tinfinite\n"); +++ } +++ break; +++ +++ /* stream selection */ +++ +++ case BD_EVENT_AUDIO_STREAM: +++ printf("EVENT_AUDIO_STREAM:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_IG_STREAM: +++ printf("EVENT_IG_STREAM:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_PG_TEXTST_STREAM: +++ printf("EVENT_PG_TEXTST_STREAM:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_SECONDARY_AUDIO_STREAM: +++ printf("EVENT_SECONDARY_AUDIO_STREAM:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_SECONDARY_VIDEO_STREAM: +++ printf("EVENT_SECONDARY_VIDEO_STREAM:\t%d\n", ev->param); +++ break; +++ +++ case BD_EVENT_PG_TEXTST: +++ printf("EVENT_PG_TEXTST:\t%s\n", ev->param ? "enable" : "disable"); +++ break; +++ case BD_EVENT_SECONDARY_AUDIO: +++ printf("EVENT_SECONDARY_AUDIO:\t%s\n", ev->param ? "enable" : "disable"); +++ break; +++ case BD_EVENT_SECONDARY_VIDEO: +++ printf("EVENT_SECONDARY_VIDEO:\t%s\n", ev->param ? "enable" : "disable"); +++ break; +++ case BD_EVENT_SECONDARY_VIDEO_SIZE: +++ printf("EVENT_SECONDARY_VIDEO_SIZE:\t%s\n", ev->param==0 ? "PIP" : "fullscreen"); +++ break; +++ +++ default: +++ printf("UNKNOWN EVENT %d:\t%d\n", ev->event, ev->param); +++ break; +++ } +++ +++ fflush(stdout); +++} +++ +++static void _read_to_eof(BLURAY *bd) +++{ +++ BD_EVENT ev; +++ int bytes; +++ uint64_t total = 0; +++ uint8_t buf[6144]; +++ +++ bd_seek(bd, bd_get_title_size(bd) - 6144); +++ +++ do { +++ bytes = bd_read_ext(bd, buf, 6144, &ev); +++ total += bytes < 0 ? 0 : bytes; +++ _print_event(&ev); +++ } while (bytes > 0); +++ +++ printf("_read_to_eof(): read %"PRIu64" bytes\n", total); +++} +++ +++static void _print_events(BLURAY *bd) +++{ +++ BD_EVENT ev; +++ +++ do { +++ bd_read_ext(bd, NULL, 0, &ev); +++ _print_event(&ev); +++ } while (ev.event != BD_EVENT_NONE && ev.event != BD_EVENT_ERROR); +++} +++ +++static void _play_pl(BLURAY *bd) +++{ +++ printf("Playing playlist\n"); +++ +++ fflush(stdout); +++ _read_to_eof(bd); +++ +++ printf("Playing playlist done\n\n"); +++ +++ _print_events(bd); +++ +++ printf("\n"); +++} +++ +++int main(int argc, char *argv[]) +++{ +++ int title = -1; +++ int verbose = 0; +++ int args = 0; +++ +++ /* +++ * parse arguments +++ */ +++ +++ if (argc < 2) { +++ printf("\nUsage:\n %s [-v] [-t ] <media_path> [<keyfile_path>]\n\n", argv[0]); +++ return -1; +++ } +++ +++ if (!strcmp(argv[1+args], "-v")) { +++ verbose = 1; +++ args++; +++ } +++ +++ if (!strcmp(argv[1+args], "-t")) { +++ args++; +++ title = atoi(argv[1+args]); +++ args++; +++ printf("Requested title %d\n", title); +++ } +++ +++ if (verbose) { +++ printf("Enabling verbose debug\n"); +++ bd_set_debug_mask(bd_get_debug_mask() | DBG_HDMV | DBG_BLURAY); +++ } +++ +++ printf("\n"); +++ +++ /* +++ * open and setup +++ */ +++ +++ BLURAY *bd = bd_open(argv[1+args], argv[2+args]); +++ +++ if (!bd) { +++ printf("bd_open(\'%s\') failed\n", argv[1]); +++ return -1; +++ } +++ +++ bd_set_player_setting (bd, BLURAY_PLAYER_SETTING_PARENTAL, 99); +++ bd_set_player_setting_str(bd, BLURAY_PLAYER_SETTING_AUDIO_LANG, "eng"); +++ bd_set_player_setting_str(bd, BLURAY_PLAYER_SETTING_PG_LANG, "eng"); +++ bd_set_player_setting_str(bd, BLURAY_PLAYER_SETTING_MENU_LANG, "eng"); +++ bd_set_player_setting_str(bd, BLURAY_PLAYER_SETTING_COUNTRY_CODE, NULL); +++ +++ /* +++ * play +++ */ +++ +++ printf("Running first play movie object\n"); +++ +++ fflush(stdout); +++ bd_play(bd); +++ +++ _print_events(bd); +++ +++ printf("\n"); +++ +++ /* +++ * play title +++ */ +++ +++ if (title >= 0) { +++ printf("Playing title %d\n", title); +++ +++ fflush(stdout); +++ bd_play_title(bd, title); +++ +++ _print_events(bd); +++ +++ printf("\n"); +++ } +++ +++ /* +++ * play playlist +++ */ +++ +++ _play_pl(bd); +++ +++ _play_pl(bd); +++ +++ _play_pl(bd); +++ +++ /* +++ * clean up +++ */ +++ +++ bd_close(bd); +++ +++ return 0; +++} +++ ++diff --git a/src/devtools/mobj_dump.c b/src/devtools/mobj_dump.c ++new file mode 100644 ++index 0000000..3eaf9f4 ++--- /dev/null +++++ b/src/devtools/mobj_dump.c ++@@ -0,0 +1,83 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2010 hpi1 +++ * +++ * This library is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * This library is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with this library. If not, see +++ * <http://www.gnu.org/licenses/>. +++ */ +++ +++#include "libbluray/bluray.h" +++ +++#include "libbluray/hdmv/mobj_data.h" +++#include "libbluray/hdmv/mobj_print.h" +++ +++#include <stdio.h> +++#include <string.h> +++ +++static void _mobj_print(MOBJ_OBJECTS *objects, int disasm) +++{ +++ int o, c; +++ +++ printf("Number of objects: %d\n", objects->num_objects); +++ +++ for (o = 0; o < objects->num_objects; o++) { +++ +++ printf("Object %d:\n" +++ " number of commands: %d\n" +++ " resume intention flag: %d\n" +++ " menu call mask: %d\n" +++ " title search mask: %d\n", +++ o, objects->objects[o].num_cmds, +++ objects->objects[o].resume_intention_flag, +++ objects->objects[o].menu_call_mask, +++ objects->objects[o].title_search_mask); +++ +++ if (disasm) { +++ printf(" program:\n"); +++ for (c = 0; c < objects->objects[o].num_cmds; c++) { +++ char buf[256]; +++ mobj_sprint_cmd(buf, &objects->objects[o].cmds[c]); +++ printf(" %04d: %s\n", c, buf); +++ } +++ } +++ } +++} +++ +++int main(int argc, const char *argv[]) +++{ +++ int disasm = 0; +++ MOBJ_OBJECTS *mobj = NULL; +++ +++ if (argc < 2) { +++ fprintf(stderr, +++ "usage: %s [-d] <file>\n" +++ "Options:\n" +++ " d disassemble object code\n", +++ argv[0]); +++ return 1; +++ } +++ if (argc > 2) { +++ disasm = !strcmp(argv[1], "-d"); +++ } +++ +++ mobj = bd_read_mobj(argv[argc-1]); +++ +++ if (mobj) { +++ _mobj_print(mobj, disasm); +++ +++ bd_free_mobj(mobj); +++ } +++ +++ return 0; +++} ++diff --git a/src/devtools/mpls_dump.c b/src/devtools/mpls_dump.c ++new file mode 100644 ++index 0000000..405b6a1 ++--- /dev/null +++++ b/src/devtools/mpls_dump.c ++@@ -0,0 +1,799 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2009-2010 John Stebbins +++ * +++ * This library is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * This library is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with this library. If not, see +++ * <http://www.gnu.org/licenses/>. +++ */ +++ +++#include <sys/stat.h> +++#include <dirent.h> +++#include <stdio.h> +++#include <stdlib.h> +++#include <unistd.h> +++#include <string.h> +++#include <libgen.h> +++ +++#include "libbluray/bdnav/mpls_parse.h" +++#include "libbluray/bluray.h" +++ +++#include "util.h" +++ +++#ifdef _WIN32 +++# define DIR_SEP "\\" +++# define PLAYLIST_DIR "\\BDMV\\PLAYLIST" +++#else +++# define DIR_SEP "/" +++# define PLAYLIST_DIR "/BDMV/PLAYLIST" +++#endif +++ +++ +++static int verbose; +++ +++typedef struct { +++ int value; +++ const char *str; +++} VALUE_MAP; +++ +++const VALUE_MAP codec_map[] = { +++ {0x01, "MPEG-1 Video"}, +++ {0x02, "MPEG-2 Video"}, +++ {0x03, "MPEG-1 Audio"}, +++ {0x04, "MPEG-2 Audio"}, +++ {0x80, "LPCM"}, +++ {0x81, "AC-3"}, +++ {0x82, "DTS"}, +++ {0x83, "TrueHD"}, +++ {0x84, "AC-3 Plus"}, +++ {0x85, "DTS-HD"}, +++ {0x86, "DTS-HD Master"}, +++ {0xa1, "AC-3 Plus for secondary audio"}, +++ {0xa2, "DTS-HD for secondary audio"}, +++ {0xea, "VC-1"}, +++ {0x1b, "H.264"}, +++ {0x90, "Presentation Graphics"}, +++ {0x91, "Interactive Graphics"}, +++ {0x92, "Text Subtitle"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP video_format_map[] = { +++ {0, "Reserved"}, +++ {1, "480i"}, +++ {2, "576i"}, +++ {3, "480p"}, +++ {4, "1080i"}, +++ {5, "720p"}, +++ {6, "1080p"}, +++ {7, "576p"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP video_rate_map[] = { +++ {0, "Reserved1"}, +++ {1, "23.976"}, +++ {2, "24"}, +++ {3, "25"}, +++ {4, "29.97"}, +++ {5, "Reserved2"}, +++ {6, "50"}, +++ {7, "59.94"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP audio_format_map[] = { +++ {0, "Reserved1"}, +++ {1, "Mono"}, +++ {2, "Reserved2"}, +++ {3, "Stereo"}, +++ {4, "Reserved3"}, +++ {5, "Reserved4"}, +++ {6, "Multi Channel"}, +++ {12, "Combo"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP audio_rate_map[] = { +++ {0, "Reserved1"}, +++ {1, "48 Khz"}, +++ {2, "Reserved2"}, +++ {3, "Reserved3"}, +++ {4, "96 Khz"}, +++ {5, "192 Khz"}, +++ {12, "48/192 Khz"}, +++ {14, "48/96 Khz"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP subpath_type_map[] = { +++ {2, "Primary audio of the Browsable slideshow"}, +++ {3, "Interactive Graphics presentation menu"}, +++ {4, "Text Subtitle"}, +++ {5, "Out-of-mux Synchronous elementary streams"}, +++ {6, "Out-of-mux Asynchronous Picture-in-Picture presentation"}, +++ {7, "In-mux Synchronous Picture-in-Picture presentation"}, +++ {8, "SS Video"}, +++ {0,NULL} +++}; +++ +++const VALUE_MAP playback_type_map[] = { +++ {1, "Sequential"}, +++ {2, "Random"}, +++ {3, "Shuffle"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP connection_type_map[] = { +++ {1, "Non-seamless"}, +++ {5, "Seamless"}, +++ {6, "Seamless"}, +++ {0, NULL} +++}; +++ +++static const char* +++_lookup_str(const VALUE_MAP *map, int val) +++{ +++ int ii; +++ +++ for (ii = 0; map[ii].str; ii++) { +++ if (val == map[ii].value) { +++ return map[ii].str; +++ } +++ } +++ return "?"; +++} +++ +++static char * +++_mk_path(const char *base, const char *sub) +++{ +++ size_t n1 = strlen(base); +++ size_t n2 = strlen(sub); +++ char *result = (char*)malloc(n1 + n2 + strlen(DIR_SEP) + 1); +++ strcpy(result, base); +++ strcat(result, DIR_SEP); +++ strcat(result, sub); +++ +++ return result; +++} +++ +++static void +++_show_stream(MPLS_STREAM *ss, int level) +++{ +++ indent_printf(level, "Codec (%04x): %s", ss->coding_type, +++ _lookup_str(codec_map, ss->coding_type)); +++ switch (ss->stream_type) { +++ case 1: +++ indent_printf(level, "PID: %04x", ss->pid); +++ break; +++ +++ case 2: +++ case 4: +++ indent_printf(level, "SubPath Id: %02x", ss->subpath_id); +++ indent_printf(level, "SubClip Id: %02x", ss->subclip_id); +++ indent_printf(level, "PID: %04x", ss->pid); +++ break; +++ +++ case 3: +++ indent_printf(level, "SubPath Id: %02x", ss->subpath_id); +++ indent_printf(level, "PID: %04x", ss->pid); +++ break; +++ +++ default: +++ fprintf(stderr, "unrecognized stream type %02x\n", ss->stream_type); +++ break; +++ }; +++ +++ switch (ss->coding_type) { +++ case 0x01: +++ case 0x02: +++ case 0xea: +++ case 0x1b: +++ indent_printf(level, "Format %02x: %s", ss->format, +++ _lookup_str(video_format_map, ss->format)); +++ indent_printf(level, "Rate %02x: %s", ss->rate, +++ _lookup_str(video_rate_map, ss->rate)); +++ break; +++ +++ case 0x03: +++ case 0x04: +++ case 0x80: +++ case 0x81: +++ case 0x82: +++ case 0x83: +++ case 0x84: +++ case 0x85: +++ case 0x86: +++ case 0xa1: +++ case 0xa2: +++ indent_printf(level, "Format %02x: %s", ss->format, +++ _lookup_str(audio_format_map, ss->format)); +++ indent_printf(level, "Rate %02x: %s", ss->rate, +++ _lookup_str(audio_rate_map, ss->rate)); +++ indent_printf(level, "Language: %s", ss->lang); +++ break; +++ +++ case 0x90: +++ case 0x91: +++ indent_printf(level, "Language: %s", ss->lang); +++ break; +++ +++ case 0x92: +++ indent_printf(level, "Char Code: %02x", ss->char_code); +++ indent_printf(level, "Language: %s", ss->lang); +++ break; +++ +++ default: +++ fprintf(stderr, "unrecognized coding type %02x\n", ss->coding_type); +++ break; +++ }; +++} +++ +++static void +++_show_details(MPLS_PL *pl, int level) +++{ +++ int ii, jj, kk; +++ +++ for (ii = 0; ii < pl->list_count; ii++) { +++ MPLS_PI *pi; +++ +++ pi = &pl->play_item[ii]; +++ indent_printf(level, "Clip Id %s", pi->clip[0].clip_id); +++ indent_printf(level+1, "Stc Id: %02x", pi->clip[0].stc_id); +++ indent_printf(level+1, "Connection Condition: %s (%02x)", +++ _lookup_str(connection_type_map, pi->connection_condition), +++ pi->connection_condition); +++ indent_printf(level+1, "In-Time: %d", pi->in_time); +++ indent_printf(level+1, "Out-Time: %d", pi->out_time); +++ if (pi->still_mode == 1) { +++ indent_printf(level+1, "Still time: %ds\n", pi->still_time); +++ } +++ if (pi->still_mode == 2) { +++ indent_printf(level+1, "Still time: infinite\n"); +++ } +++ if (pi->angle_count > 1) { +++ for (jj = 1; jj < pi->angle_count; jj++) { +++ indent_printf(level+1, "Angle %d:", jj); +++ indent_printf(level+2, "Clip Id %s", pi->clip[jj].clip_id); +++ indent_printf(level+2, "Stc Id: %02x", pi->clip[jj].stc_id); +++ } +++ } +++ for (jj = 0; jj < pi->stn.num_video; jj++) { +++ indent_printf(level+1, "Video Stream %d:", jj); +++ _show_stream(&pi->stn.video[jj], level + 2); +++ } +++ for (jj = 0; jj < pi->stn.num_audio; jj++) { +++ indent_printf(level+1, "Audio Stream %d:", jj); +++ _show_stream(&pi->stn.audio[jj], level + 2); +++ } +++ for (jj = 0; jj < pi->stn.num_ig; jj++) { +++ indent_printf(level+1, "Interactive Graphics Stream %d:", jj); +++ _show_stream(&pi->stn.ig[jj], level + 2); +++ } +++ for (jj = 0; jj < (pi->stn.num_pg + pi->stn.num_pip_pg); jj++) { +++ if (jj < pi->stn.num_pg) { +++ indent_printf(level+1, "Presentation Graphics Stream %d:", jj); +++ } else { +++ indent_printf(level+1, "PIP Presentation Graphics Stream %d:", jj); +++ } +++ _show_stream(&pi->stn.pg[jj], level + 2); +++ } +++ for (jj = 0; jj < pi->stn.num_secondary_video; jj++) { +++ indent_printf(level+1, "Secondary Video Stream %d:", jj); +++ _show_stream(&pi->stn.secondary_video[jj], level + 2); +++ for (kk = 0; kk < pi->stn.secondary_video[jj].sv_num_secondary_audio_ref; kk++) { +++ indent_printf(level+2, "Secondary Audio Ref %d: %d", kk,pi->stn.secondary_video[jj].sv_secondary_audio_ref[kk]); +++ } +++ for (kk = 0; kk < pi->stn.secondary_video[jj].sv_num_pip_pg_ref; kk++) { +++ indent_printf(level+2, "PIP Presentation Graphic Ref %d: %d", kk,pi->stn.secondary_video[jj].sv_pip_pg_ref[kk]); +++ } +++ } +++ for (jj = 0; jj < pi->stn.num_secondary_audio; jj++) { +++ indent_printf(level+1, "Secondary Audio Stream %d:", jj); +++ _show_stream(&pi->stn.secondary_audio[jj], level + 2); +++ for (kk = 0; kk < pi->stn.secondary_audio[jj].sa_num_primary_audio_ref; kk++) { +++ indent_printf(level+2, "Primary Audio Ref %d: %d", kk,pi->stn.secondary_audio[jj].sa_primary_audio_ref[kk]); +++ } +++ } +++ printf("\n"); +++ } +++} +++ +++static void +++_show_ai(MPLS_PL *pl, int level) +++{ +++ indent_printf(level, "Playback type: %s (%d)", +++ _lookup_str(playback_type_map, pl->app_info.playback_type), +++ pl->app_info.playback_type); +++ if (pl->app_info.playback_type == 2 || pl->app_info.playback_type == 3) { +++ indent_printf(level+1, "Playback count: %d", pl->app_info.playback_count); +++ } +++} +++ +++static void +++_show_marks(MPLS_PL *pl, int level) +++{ +++ int ii; +++ +++ indent_printf(level, "PlayMark Count %d", pl->mark_count); +++ for (ii = 0; ii < pl->mark_count; ii++) { +++ MPLS_PI *pi; +++ MPLS_PLM *plm; +++ int min; +++ double sec; +++ +++ plm = &pl->play_mark[ii]; +++ indent_printf(level, "PlayMark %d", ii); +++ indent_printf(level+1, "Type: %02x", plm->mark_type); +++ if (plm->play_item_ref < pl->list_count) { +++ pi = &pl->play_item[plm->play_item_ref]; +++ indent_printf(level+1, "PlayItem: %s", pi->clip[0].clip_id); +++ } else { +++ indent_printf(level+1, "PlayItem: Invalid reference"); +++ } +++ indent_printf(level+1, "Time (ticks): %u", plm->time); +++ min = plm->duration / (45000*60); +++ sec = (double)(plm->duration - min * 45000 * 60) / 45000; +++ indent_printf(level+1, "Duration (mm:ss.ms, ticks): %d:%.2f, %u", +++ min, sec, plm->duration); +++ printf("\n"); +++ } +++} +++ +++static void +++_show_clip_list(MPLS_PL *pl, int level) +++{ +++ int ii, jj; +++ +++ for (ii = 0; ii < pl->list_count; ii++) { +++ MPLS_PI *pi; +++ +++ pi = &pl->play_item[ii]; +++ if (verbose) { +++ uint32_t duration; +++ +++ duration = pi->out_time - pi->in_time; +++ indent_printf(level, "%s.m2ts -- Duration: %3d:%02d", +++ pi->clip[0].clip_id, +++ duration / (45000 * 60), (duration / 45000) % 60); +++ } else { +++ indent_printf(level, "%s.m2ts", pi->clip[0].clip_id); +++ } +++ if (pi->angle_count > 1) { +++ for (jj = 1; jj < pi->angle_count; jj++) { +++ indent_printf(level+1, "Angle %d: %s.m2ts", jj+1, pi->clip[jj].clip_id); +++ } +++ } +++ } +++ printf("\n"); +++} +++ +++static void +++_show_sub_path(MPLS_SUB *sub, int level) +++{ +++ int ii; +++ +++ indent_printf(level+1, "Type: %d (%s)", sub->type, _lookup_str(subpath_type_map, sub->type)); +++ indent_printf(level+1, "Repeat: %d", sub->is_repeat); +++ indent_printf(level+1, "Sub playitem count: %d", sub->sub_playitem_count); +++ +++ for (ii = 0; ii < sub->sub_playitem_count; ii++) { +++ MPLS_SUB_PI *pi; +++ +++ pi = &sub->sub_play_item[ii]; +++ +++ if (verbose) { +++ indent_printf(level+1, "Sub playitem %d", ii); +++ indent_printf(level+2, "Clip Id %s", pi->clip[0].clip_id); +++ indent_printf(level+2, "Multi clip: %d", pi->is_multi_clip); +++ indent_printf(level+2, "Clip count: %d", pi->clip_count); +++ indent_printf(level+2, "Connection Condition: %s (%02x)", +++ _lookup_str(connection_type_map, pi->connection_condition), +++ pi->connection_condition); +++ indent_printf(level+2, "In-Time: %d", pi->in_time); +++ indent_printf(level+2, "Out-Time: %d", pi->out_time); +++ indent_printf(level+2, "Sync playitem Id: %d", pi->sync_play_item_id); +++ indent_printf(level+2, "Sync PTS: %d", pi->sync_pts); +++ } else { +++ indent_printf(level+1, "%s.m2ts", pi->clip[0].clip_id); +++ } +++ } +++} +++ +++static void +++_show_pip_metadata_block(MPLS_PIP_METADATA *block, int level) +++{ +++ int ii; +++ +++ indent_printf(level, "Clip ref: %d", block->clip_ref); +++ indent_printf(level, "Secondary video ref: %d", block->secondary_video_ref); +++ indent_printf(level, "Timeline type: %d", block->timeline_type); +++ indent_printf(level, "Luma key flag: %d", block->luma_key_flag); +++ if (block->luma_key_flag) { +++ indent_printf(level, "Upper limit luma key: %d", block->upper_limit_luma_key); +++ } +++ indent_printf(level, "Trick play flag: %d", block->trick_play_flag); +++ +++ for (ii = 0; ii < block->data_count; ii++) { +++ indent_printf(level, "data block %d:", ii); +++ indent_printf(level+1, "Timestamp: %d", block->data[ii].time); +++ indent_printf(level+1, "Horizontal position %d", block->data[ii].xpos); +++ indent_printf(level+1, "Vertical position: %d", block->data[ii].ypos); +++ indent_printf(level+1, "Scaling factor: %d", block->data[ii].scale_factor); +++ } +++} +++ +++static void +++_show_pip_metadata(MPLS_PL *pl, int level) +++{ +++ int ii; +++ +++ for (ii = 0; ii < pl->ext_pip_data_count; ii++) { +++ MPLS_PIP_METADATA *data; +++ +++ data = &pl->ext_pip_data[ii]; +++ +++ indent_printf(level, "PiP metadata block %d:", ii); +++ _show_pip_metadata_block(data, level+1); +++ } +++} +++ +++static void +++_show_sub_paths(MPLS_PL *pl, int level) +++{ +++ int ss; +++ +++ for (ss = 0; ss < pl->sub_count; ss++) { +++ MPLS_SUB *sub; +++ +++ sub = &pl->sub_path[ss]; +++ +++ indent_printf(level, "Sub Path %d:", ss); +++ _show_sub_path(sub, level+1); +++ } +++} +++ +++static void +++_show_sub_paths_ss(MPLS_PL *pl, int level) +++{ +++ int ss; +++ +++ for (ss = 0; ss < pl->ext_sub_count; ss++) { +++ MPLS_SUB *sub; +++ +++ sub = &pl->ext_sub_path[ss]; +++ +++ indent_printf(level, "Extension Sub Path %d:", ss); +++ _show_sub_path(sub, level+1); +++ } +++} +++ +++static uint32_t +++_pl_duration(MPLS_PL *pl) +++{ +++ int ii; +++ uint32_t duration = 0; +++ MPLS_PI *pi; +++ +++ for (ii = 0; ii < pl->list_count; ii++) { +++ pi = &pl->play_item[ii]; +++ duration += pi->out_time - pi->in_time; +++ } +++ return duration; +++} +++ +++static int +++_filter_dup(MPLS_PL *pl_list[], int count, MPLS_PL *pl) +++{ +++ int ii, jj; +++ +++ for (ii = 0; ii < count; ii++) { +++ if (pl->list_count != pl_list[ii]->list_count || +++ _pl_duration(pl) != _pl_duration(pl_list[ii])) { +++ continue; +++ } +++ for (jj = 0; jj < pl->list_count; jj++) { +++ MPLS_PI *pi1, *pi2; +++ +++ pi1 = &pl->play_item[jj]; +++ pi2 = &pl_list[ii]->play_item[jj]; +++ +++ if (memcmp(pi1->clip[0].clip_id, pi2->clip[0].clip_id, 5) != 0 || +++ pi1->in_time != pi2->in_time || +++ pi1->out_time != pi2->out_time) { +++ break; +++ } +++ } +++ if (jj != pl->list_count) { +++ continue; +++ } +++ return 0; +++ } +++ return 1; +++} +++ +++static int +++_find_repeats(MPLS_PL *pl, const char *m2ts) +++{ +++ int ii, count = 0; +++ +++ for (ii = 0; ii < pl->list_count; ii++) { +++ MPLS_PI *pi; +++ +++ pi = &pl->play_item[ii]; +++ // Ignore titles with repeated segments +++ if (strcmp(pi->clip[0].clip_id, m2ts) == 0) { +++ count++; +++ } +++ } +++ return count; +++} +++ +++static int +++_filter_short(MPLS_PL *pl, unsigned int seconds) +++{ +++ // Ignore short playlists +++ if (_pl_duration(pl) / 45000 <= seconds) { +++ return 0; +++ } +++ return 1; +++} +++ +++static int +++_filter_repeats(MPLS_PL *pl, int repeats) +++{ +++ int ii; +++ +++ for (ii = 0; ii < pl->list_count; ii++) { +++ MPLS_PI *pi; +++ +++ pi = &pl->play_item[ii]; +++ // Ignore titles with repeated segments +++ if (_find_repeats(pl, pi->clip[0].clip_id) > repeats) { +++ return 0; +++ } +++ } +++ return 1; +++} +++ +++static int clip_list = 0, playlist_info = 0, chapter_marks = 0, sub_paths = 0, pip_metadata = 0; +++static int repeats = 0, seconds = 0, dups = 0; +++ +++static MPLS_PL* +++_process_file(char *name, MPLS_PL *pl_list[], int pl_count) +++{ +++ MPLS_PL *pl; +++ +++ pl = bd_read_mpls(name); +++ if (pl == NULL) { +++ fprintf(stderr, "Parse failed: %s\n", name); +++ return NULL; +++ } +++ if (seconds) { +++ if (!_filter_short(pl, seconds)) { +++ bd_free_mpls(pl); +++ return NULL; +++ } +++ } +++ if (repeats) { +++ if (!_filter_repeats(pl, repeats)) { +++ bd_free_mpls(pl); +++ return NULL; +++ } +++ } +++ if (dups) { +++ if (!_filter_dup(pl_list, pl_count, pl)) { +++ bd_free_mpls(pl); +++ return NULL; +++ } +++ } +++ if (verbose) { +++ indent_printf(0, +++ "%s -- Num Clips: %3d , Duration: minutes %4u:%02u", +++ basename(name), +++ pl->list_count, +++ _pl_duration(pl) / (45000 * 60), +++ (_pl_duration(pl) / 45000) % 60); +++ _show_ai(pl, 1); +++ } else { +++ indent_printf(0, "%s -- Duration: minutes %4u:%02u", +++ basename(name), +++ _pl_duration(pl) / (45000 * 60), +++ (_pl_duration(pl) / 45000) % 60); +++ } +++ if (playlist_info) { +++ _show_details(pl, 1); +++ } +++ if (chapter_marks) { +++ _show_marks(pl, 1); +++ } +++ if (pip_metadata) { +++ _show_pip_metadata(pl, 1); +++ } +++ if (clip_list) { +++ _show_clip_list(pl, 1); +++ } +++ if (sub_paths) { +++ _show_sub_paths(pl, 1); +++ _show_sub_paths_ss(pl, 1); +++ } +++ return pl; +++} +++ +++static void +++_usage(char *cmd) +++{ +++ fprintf(stderr, +++"Usage: %s -vli <mpls file> [<mpls file> ...]\n" +++"With no options, produces a list of the playlist(s) with durations\n" +++"Options:\n" +++" v - Verbose output.\n" +++" l - Produces a list of the m2ts clips\n" +++" i - Dumps detailed information about each clip\n" +++" c - Show chapter marks\n" +++" p - Show sub paths\n" +++" P - Show picture-in-picture metadata\n" +++" r <N> - Filter out titles that have >N repeating clips\n" +++" d - Filter out duplicate titles\n" +++" s <seconds> - Filter out short titles\n" +++" f - Filter combination -r2 -d -s900\n" +++, cmd); +++ +++ exit(EXIT_FAILURE); +++} +++ +++#define OPTS "vlicpPfr:ds:" +++ +++static int +++_qsort_str_cmp(const void *a, const void *b) +++{ +++ const char *stra = *(char * const *)a; +++ const char *strb = *(char * const *)b; +++ +++ return strcmp(stra, strb); +++} +++ +++int +++main(int argc, char *argv[]) +++{ +++ MPLS_PL *pl; +++ int opt; +++ int ii, pl_ii; +++ MPLS_PL *pl_list[1000]; +++ struct stat st; +++ char *path = NULL; +++ DIR *dir = NULL; +++ +++ do { +++ opt = getopt(argc, argv, OPTS); +++ switch (opt) { +++ case -1: +++ break; +++ +++ case 'v': +++ verbose = 1; +++ break; +++ +++ case 'l': +++ clip_list = 1; +++ break; +++ +++ case 'i': +++ playlist_info = 1; +++ break; +++ +++ case 'c': +++ chapter_marks = 1; +++ break; +++ +++ case 'p': +++ sub_paths = 1; +++ break; +++ +++ case 'P': +++ pip_metadata = 1; +++ break; +++ +++ case 'd': +++ dups = 1; +++ break; +++ +++ case 'r': +++ repeats = atoi(optarg); +++ break; +++ +++ case 'f': +++ repeats = 2; +++ dups = 1; +++ seconds = 900; +++ break; +++ +++ case 's': +++ seconds = atoi(optarg); +++ break; +++ +++ default: +++ _usage(argv[0]); +++ break; +++ } +++ } while (opt != -1); +++ +++ if (optind >= argc) { +++ _usage(argv[0]); +++ } +++ +++ for (pl_ii = 0, ii = optind; pl_ii < 1000 && ii < argc; ii++) { +++ +++ if (stat(argv[ii], &st)) { +++ continue; +++ } +++ dir = NULL; +++ if (S_ISDIR(st.st_mode)) { +++ +++ printf("Directory: %s:\n", argv[ii]); +++ path = _mk_path(argv[ii], PLAYLIST_DIR); +++ if (path == NULL) { +++ fprintf(stderr, "Failed to find playlist path: %s\n", argv[ii]); +++ continue; +++ } +++ dir = opendir(path); +++ if (dir == NULL) { +++ fprintf(stderr, "Failed to open dir: %s\n", path); +++ free(path); +++ continue; +++ } +++ } +++ if (dir != NULL) { +++ char **dirlist = (char**)calloc(10001, sizeof(char*)); +++ struct dirent *ent; +++ int jj = 0; +++ for (ent = readdir(dir); ent != NULL; ent = readdir(dir)) { +++ dirlist[jj++] = strcpy((char*)malloc(strlen(ent->d_name)), ent->d_name); +++ } +++ qsort(dirlist, jj, sizeof(char*), _qsort_str_cmp); +++ for (jj = 0; dirlist[jj] != NULL; jj++) { +++ char *name = NULL; +++ name = _mk_path(path, dirlist[jj]); +++ free(dirlist[jj]); +++ if (stat(name, &st)) { +++ free(name); +++ continue; +++ } +++ if (!S_ISREG(st.st_mode)) { +++ free(name); +++ continue; +++ } +++ pl = _process_file(name, pl_list, pl_ii); +++ free(name); +++ if (pl != NULL) { +++ pl_list[pl_ii++] = pl; +++ } +++ } +++ free(dirlist); +++ free(path); +++ closedir(dir); +++ dir = NULL; +++ } else { +++ pl = _process_file(argv[ii], pl_list, pl_ii); +++ if (pl != NULL) { +++ pl_list[pl_ii++] = pl; +++ } +++ } +++ } +++ // Cleanup +++ for (ii = 0; ii < pl_ii; ii++) { +++ bd_free_mpls(pl_list[ii]); +++ } +++ return 0; +++} +++ ++diff --git a/src/devtools/util.c b/src/devtools/util.c ++new file mode 100644 ++index 0000000..aaa4c46 ++--- /dev/null +++++ b/src/devtools/util.c ++@@ -0,0 +1,40 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2009-2010 John Stebbins +++ * +++ * This library is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * This library is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with this library. If not, see +++ * <http://www.gnu.org/licenses/>. +++ */ +++ +++#include <stdio.h> +++#include <stdarg.h> +++ +++#include "util.h" +++ +++void +++indent_printf(int level, const char *fmt, ...) +++{ +++ va_list ap; +++ int ii; +++ +++ for (ii = 0; ii < level; ii++) +++ { +++ printf(" "); +++ } +++ va_start(ap, fmt); +++ vprintf(fmt, ap); +++ va_end(ap); +++ printf("\n"); +++} +++ ++diff --git a/src/devtools/util.h b/src/devtools/util.h ++new file mode 100644 ++index 0000000..144f8ec ++--- /dev/null +++++ b/src/devtools/util.h ++@@ -0,0 +1,43 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2009-2010 John Stebbins +++ * +++ * This library is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * This library is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with this library. If not, see +++ * <http://www.gnu.org/licenses/>. +++ */ +++ +++#include <stdint.h> +++ +++#include "util/attributes.h" +++ +++#if defined( __MINGW32__ ) +++# undef lseek +++# define lseek _lseeki64 +++# undef fseeko +++# define fseeko fseeko64 +++# undef ftello +++# define ftello ftello64 +++# define flockfile(...) +++# define funlockfile(...) +++# define getc_unlocked getc +++# undef off_t +++# define off_t off64_t +++# undef stat +++# define stat _stati64 +++# define fstat _fstati64 +++# define wstat _wstati64 +++#endif +++ +++void indent_printf(int level, const char *fmt, ...) BD_ATTR_FORMAT_PRINTF(2,3); +++ ++diff --git a/src/examples/bdj_test.c b/src/examples/bdj_test.c ++deleted file mode 100644 ++index d9ebd16..0000000 +++++ /dev/null ++@@ -1,67 +0,0 @@ ++-/* ++- * This file is part of libbluray ++- * Copyright (C) 2010 William Hahne ++- * ++- * This program is free software; you can redistribute it and/or ++- * modify it under the terms of the GNU General Public License ++- * as published by the Free Software Foundation; either version 2 ++- * of the License, or (at your option) any later version. ++- * ++- * This program is distributed in the hope that it will be useful, ++- * but WITHOUT ANY WARRANTY; without even the implied warranty of ++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++- * GNU General Public License for more details. ++- * ++- * You should have received a copy of the GNU General Public License ++- * along with this program. If not, see <http://www.gnu.org/licenses/>. ++- * ++- * In addition, as a special exception, the copyright holders of libbluray ++- * gives permission to link the code of its release of libbluray with the ++- * OpenSSL project's "OpenSSL" library (or with modified versions of it ++- * that use the same license as the "OpenSSL" library), and distribute ++- * the linked executables. You must obey the GNU General Public License ++- * in all respects for all of the code used other than "OpenSSL". If you ++- * modify this file, you may extend this exception to your version of the ++- * file, but you are not obligated to do so. If you do not wish to do ++- * so, delete this exception statement from your version. ++- */ ++- ++-#include <stdio.h> ++-#include <stdlib.h> ++-#include <unistd.h> ++- ++-#include "libbluray/bluray.h" ++- ++-#if defined(_WIN32) ++-#include <windows.h> ++-#define sleep(x) Sleep(x) ++-#endif ++- ++-static void _usage(void) { ++- printf("Usage: [path to disc] [starting object]\n"); ++-} ++- ++-int main(int argc, char** argv) ++-{ ++- if (argc < 3) { ++- _usage(); ++- return 0; ++- } ++- ++- printf("%s %s\n", argv[1], argv[2]); ++- ++- BLURAY* bd = bd_open(argv[1], NULL); ++- ++- bd_get_titles(bd, TITLES_ALL, 0); ++- ++- if (!bd_start_bdj(bd, argv[2])) { ++- printf("Failed to start BD-J application.\n"); ++- } else { ++- while (1) { sleep(20); } ++- bd_stop_bdj(bd); ++- } ++- ++- bd_close(bd); ++- ++- return 0; ++-} ++diff --git a/src/examples/bdjo_dump.c b/src/examples/bdjo_dump.c ++deleted file mode 100644 ++index bcbd2af..0000000 +++++ /dev/null ++@@ -1,206 +0,0 @@ ++-/* ++- * This file is part of libbluray ++- * Copyright (C) 2014 Petri Hintukainen <phintuka@users.sourceforge.net> ++- * ++- * This library is free software; you can redistribute it and/or ++- * modify it under the terms of the GNU Lesser General Public ++- * License as published by the Free Software Foundation; either ++- * version 2.1 of the License, or (at your option) any later version. ++- * ++- * This library is distributed in the hope that it will be useful, ++- * but WITHOUT ANY WARRANTY; without even the implied warranty of ++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++- * Lesser General Public License for more details. ++- * ++- * You should have received a copy of the GNU Lesser General Public ++- * License along with this library. If not, see ++- * <http://www.gnu.org/licenses/>. ++- */ ++- ++-#include "libbluray/bluray.h" ++-#include "libbluray/bdj/bdjo_data.h" ++- ++-#include <stdio.h> ++-#include <string.h> ++- ++-static const char *_yes_no(int i) ++-{ ++- return i > 0 ? "yes" : i < 0 ? "unknown" : "no"; ++-} ++- ++-static const char *_binding_str(int i) ++-{ ++- switch (i) { ++- case 0: return "unbound"; ++- case 1: return "disc bound"; ++- case 3: return "title bound"; ++- default: return "???"; ++- } ++-} ++- ++-static const char *_visibility_str(int i) ++-{ ++- switch (i) { ++- case 0: return "none"; ++- case 1: return "applications"; ++- case 2: return "user"; ++- default: return "???"; ++- } ++-} ++- ++-static void _terminal_info_print(const BDJO_TERMINAL_INFO *p) ++-{ ++- printf("Terminal Info:\n"); ++- printf(" Default AWT font : %s\n", p->default_font); ++- printf(" initial HaVi config : %d\n", p->initial_havi_config_id); ++- printf(" Menu call mask : %d\n", p->menu_call_mask); ++- printf(" Title search mask : %d\n", p->menu_call_mask); ++-} ++- ++-static void _app_cache_item_print(const BDJO_APP_CACHE_ITEM *p) ++-{ ++- printf(" %3.3s: %s%s\n", ++- p->lang_code, p->ref_to_name, ++- p->type == 1 ? ".jar" : p->type == 2 ? "/" : " (unknown type)"); ++-} ++- ++-static void _app_cache_info_print(const BDJO_APP_CACHE_INFO *p) ++-{ ++- unsigned ii; ++- ++- printf("Application cache info:\n"); ++- for (ii = 0; ii < p->num_item; ii++) { ++- _app_cache_item_print(&p->item[ii]); ++- } ++-} ++- ++-static void _accessible_playlists_print(const BDJO_ACCESSIBLE_PLAYLISTS *p) ++-{ ++- unsigned ii; ++- ++- printf("Accessible playlists:\n"); ++- printf(" Access to all : %s\n", _yes_no(p->access_to_all_flag)); ++- printf(" Autostart first : %s\n", _yes_no(p->autostart_first_playlist_flag)); ++- ++- if (p->num_pl) { ++- printf(" Playlists : %d\n", p->num_pl); ++- for (ii = 0; ii < p->num_pl; ii++) { ++- printf(" %s.mpls\n", p->pl[ii].name); ++- } ++- } ++-} ++- ++-static void _app_profile_print(BDJO_APP_PROFILE *p) ++-{ ++- printf(" Profile %d Version %d.%d.%d\n", ++- p->profile_number, p->major_version, p->minor_version, p->micro_version); ++-} ++- ++-static void _app_print(const BDJO_APP *p) ++-{ ++- unsigned ii; ++- ++- printf(" Control code: : %d (%s)\n", p->control_code, ++- p->control_code == 1 ? "autostart" : p->control_code == 2 ? "present" : "???"); ++- printf(" Type : %d (%s)\n", p->type, ++- p->type == 1 ? "BD-J App" : "???"); ++- printf(" Organization ID : %08X\n", p->org_id); ++- printf(" Application ID : %04X\n", p->app_id); ++- printf(" Priority : %d\n", p->priority); ++- printf(" Binding : %d (%s)\n", p->binding, _binding_str(p->binding)); ++- printf(" Visibility : %d (%s)\n", p->visibility, _visibility_str(p->visibility)); ++- ++- if (p->num_profile) { ++- printf(" Profiles:\n"); ++- for (ii = 0; ii < p->num_profile; ii++) { ++- _app_profile_print(&p->profile[ii]); ++- } ++- } ++- ++- if (p->num_name) { ++- printf(" Names:\n"); ++- for (ii = 0; ii < p->num_name; ii++) { ++- printf(" %s: %s\n", p->name[ii].lang, p->name[ii].name); ++- } ++- } ++- ++- printf(" Base directory : %s\n", p->base_dir); ++- printf(" Icon locator : %s\n", p->icon_locator); ++- printf(" Icon flags : 0x%04x\n", p->icon_flags); ++- printf(" Classpath extension : %s\n", p->classpath_extension); ++- printf(" Initial class : %s\n", p->initial_class); ++- printf(" Parameters : "); ++- for (ii = 0; ii < p->num_param; ii++) { ++- printf("%s ", p->param[ii].param); ++- } ++- printf("\n"); ++-} ++- ++-static void _app_management_table_print(const BDJO_APP_MANAGEMENT_TABLE *p) ++-{ ++- unsigned ii; ++- ++- for (ii = 0; ii < p->num_app; ii++) { ++- printf("Application %d:\n", ii); ++- _app_print(&p->app[ii]); ++- } ++-} ++- ++-static void _key_interest_table_print(const BDJO_KEY_INTEREST_TABLE *p) ++-{ ++- unsigned int v; ++- memcpy(&v, p, sizeof(unsigned int)); ++- if (v) { ++- printf("Key interest table:\n"); ++- printf(" %s%s%s%s%s%s%s%s%s%s%s\n", ++- p->vk_play ? "VK_PLAY " : "", ++- p->vk_stop ? "VK_STOP " : "", ++- p->vk_ffw ? "VK_FFW " : "", ++- p->vk_rew ? "VK_REW " : "", ++- p->vk_track_next ? "VK_TRACK_NEXT " : "", ++- p->vk_track_prev ? "VK_TRACK_PREV " : "", ++- p->vk_pause ? "VK_PAUSE " : "", ++- p->vk_still_off ? "VK_STILL_OFF " : "", ++- p->vk_sec_audio_ena_dis ? "VK_SEC_AUDIO " : "", ++- p->vk_sec_video_ena_dis ? "VK_SEC_VIDEO " : "", ++- p->pg_textst_ena_dis ? "VK_PG_TEXTST " : ""); ++- } ++-} ++- ++-static void _file_access_info_print(const BDJO_FILE_ACCESS_INFO *p) ++-{ ++- printf("File access info:\n %s\n", p->path); ++-} ++- ++-static void _bdjo_print(const BDJO *p) ++-{ ++- _terminal_info_print(&p->terminal_info); ++- _app_cache_info_print(&p->app_cache_info); ++- _accessible_playlists_print(&p->accessible_playlists); ++- _app_management_table_print(&p->app_table); ++- _key_interest_table_print(&p->key_interest_table); ++- _file_access_info_print(&p->file_access_info); ++-} ++- ++-int main(int argc, const char *argv[]) ++-{ ++- if (argc < 2) { ++- fprintf(stderr, "usage: %s <bdjo_file>\n", argv[0]); ++- return 1; ++- } ++- ++- int cnt; ++- for (cnt = 1; cnt < argc; cnt++) { ++- ++- printf("%s\n", argv[cnt]); ++- ++- BDJO *bdjo = bd_read_bdjo(argv[cnt]); ++- if (bdjo) { ++- _bdjo_print(bdjo); ++- bd_free_bdjo(bdjo); ++- } ++- printf("\n"); ++- } ++- ++- return 0; ++-} ++diff --git a/src/examples/clpi_dump.c b/src/examples/clpi_dump.c ++deleted file mode 100644 ++index bd64783..0000000 +++++ /dev/null ++@@ -1,487 +0,0 @@ ++-/* ++- * This file is part of libbluray ++- * Copyright (C) 2009-2010 John Stebbins ++- * Copyright (C) 2012-2013 Petri Hintukainen <phintuka@users.sourceforge.net> ++- * ++- * This library is free software; you can redistribute it and/or ++- * modify it under the terms of the GNU Lesser General Public ++- * License as published by the Free Software Foundation; either ++- * version 2.1 of the License, or (at your option) any later version. ++- * ++- * This library is distributed in the hope that it will be useful, ++- * but WITHOUT ANY WARRANTY; without even the implied warranty of ++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++- * Lesser General Public License for more details. ++- * ++- * You should have received a copy of the GNU Lesser General Public ++- * License along with this library. If not, see ++- * <http://www.gnu.org/licenses/>. ++- */ ++- ++-#include <stdio.h> ++-#include <stdlib.h> ++-#include <unistd.h> ++-#include <inttypes.h> ++- ++-#include "libbluray/bdnav/clpi_data.h" ++-#include "libbluray/bluray.h" ++- ++-#include "util.h" ++- ++-static int verbose; ++- ++-typedef struct { ++- int value; ++- const char *str; ++-} VALUE_MAP; ++- ++-static inline const char* ++-_lookup_str(const VALUE_MAP *map, int val) ++-{ ++- int ii; ++- ++- for (ii = 0; map[ii].str; ii++) { ++- if (val == map[ii].value) { ++- return map[ii].str; ++- } ++- } ++- return "?"; ++-} ++- ++-const VALUE_MAP codec_map[] = { ++- {0x01, "MPEG-1 Video"}, ++- {0x02, "MPEG-2 Video"}, ++- {0x03, "MPEG-1 Audio"}, ++- {0x04, "MPEG-2 Audio"}, ++- {0x80, "LPCM"}, ++- {0x81, "AC-3"}, ++- {0x82, "DTS"}, ++- {0x83, "TrueHD"}, ++- {0x84, "AC-3 Plus"}, ++- {0x85, "DTS-HD"}, ++- {0x86, "DTS-HD Master"}, ++- {0xa1, "AC-3 Plus for secondary audio"}, ++- {0xa2, "DTS-HD for secondary audio"}, ++- {0xea, "VC-1"}, ++- {0x1b, "H.264"}, ++- {0x20, "H.264 MVC dep."}, ++- {0x90, "Presentation Graphics"}, ++- {0x91, "Presentation Graphics"}, ++- {0x92, "Interactive Graphics"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP video_format_map[] = { ++- {0, "Reserved"}, ++- {1, "480i"}, ++- {2, "576i"}, ++- {3, "480p"}, ++- {4, "1080i"}, ++- {5, "720p"}, ++- {6, "1080p"}, ++- {7, "576p"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP video_rate_map[] = { ++- {0, "Reserved1"}, ++- {1, "23.976"}, ++- {2, "24"}, ++- {3, "25"}, ++- {4, "29.97"}, ++- {5, "Reserved2"}, ++- {6, "50"}, ++- {7, "59.94"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP video_aspect_map[] = { ++- {0, "Reserved1"}, ++- {1, "Reserved2"}, ++- {2, "4:3"}, ++- {3, "16:9"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP audio_format_map[] = { ++- {0, "Reserved1"}, ++- {1, "Mono"}, ++- {2, "Reserved2"}, ++- {3, "Stereo"}, ++- {4, "Reserved3"}, ++- {5, "Reserved4"}, ++- {6, "Multi Channel"}, ++- {12, "Combo"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP audio_rate_map[] = { ++- {0, "Reserved1"}, ++- {1, "48 Khz"}, ++- {2, "Reserved2"}, ++- {3, "Reserved3"}, ++- {4, "96 Khz"}, ++- {5, "192 Khz"}, ++- {12, "48/192 Khz"}, ++- {14, "48/96 Khz"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP application_type_map[] = { ++- {1, "Main TS for a main-path of Movie"}, ++- {2, "Main TS for a main-path of Time based slide show"}, ++- {3, "Main TS for a main-path of Browsable slide show"}, ++- {4, "Sub TS for a sub-path of Browsable slide show"}, ++- {5, "Sub TS for a sub-path of Interactive Graphics menu"}, ++- {6, "Sub TS for a sub-path of Text subtitle"}, ++- {7, "Sub TS for a sub-path of one or more elementary streams path"}, ++- {0, NULL}, ++-}; ++- ++-static void ++-_show_stream(CLPI_PROG_STREAM *ss, int level) ++-{ ++- indent_printf(level, "Codec (%04x): %s", ss->coding_type, ++- _lookup_str(codec_map, ss->coding_type)); ++- indent_printf(level, "PID: %04x", ss->pid); ++- switch (ss->coding_type) { ++- case 0x01: ++- case 0x02: ++- case 0xea: ++- case 0x1b: ++- case 0x20: ++- indent_printf(level, "Format %02x: %s", ss->format, ++- _lookup_str(video_format_map, ss->format)); ++- indent_printf(level, "Rate %02x: %s", ss->rate, ++- _lookup_str(video_rate_map, ss->rate)); ++- indent_printf(level, "Aspect %02x: %s", ss->aspect, ++- _lookup_str(video_aspect_map, ss->aspect)); ++- indent_printf(level, "oc_flag %02x", ss->oc_flag); ++- break; ++- ++- case 0x03: ++- case 0x04: ++- case 0x80: ++- case 0x81: ++- case 0x82: ++- case 0x83: ++- case 0x84: ++- case 0x85: ++- case 0x86: ++- case 0xa1: ++- case 0xa2: ++- indent_printf(level, "Format %02x: %s", ss->format, ++- _lookup_str(audio_format_map, ss->format)); ++- indent_printf(level, "Rate %02x: %s", ss->rate, ++- _lookup_str(audio_rate_map, ss->rate)); ++- indent_printf(level, "Language: %s", ss->lang); ++- break; ++- ++- case 0x90: ++- case 0x91: ++- case 0xa0: ++- indent_printf(level, "Language: %s", ss->lang); ++- break; ++- ++- case 0x92: ++- indent_printf(level, "Char Code: %02x", ss->char_code); ++- indent_printf(level, "Language: %s", ss->lang); ++- break; ++- ++- default: ++- fprintf(stderr, "unrecognized coding type %02x\n", ss->coding_type); ++- break; ++- }; ++-} ++- ++-static void ++-_show_clip_info(CLPI_CL *cl, int level) ++-{ ++- CLPI_CLIP_INFO *ci = &cl->clip; ++- int ii; ++- ++- indent_printf(level, "Clip Info"); ++- indent_printf(level+1, "Clip Stream Type: %02x", ci->clip_stream_type); ++- indent_printf(level+1, "Clip Application Type (%02x): %s", ++- ci->application_type, _lookup_str(application_type_map, ci->application_type)); ++- indent_printf(level+1, "is_ATC_delta: %s", ci->is_atc_delta ? "True" : "False"); ++- indent_printf(level+1, "ATC delta count: %d", ci->atc_delta_count); ++- indent_printf(level+1, "TS Recording Rate: %u", ci->ts_recording_rate); ++- indent_printf(level+1, "Number Source Packets: %u", ci->num_source_packets); ++- // Show ts type info ++- indent_printf(level+1, "TS Type Info"); ++- indent_printf(level+2, "Validity Flags %02x", ci->ts_type_info.validity); ++- indent_printf(level+2, "Format Id %s", ci->ts_type_info.format_id); ++- // Show cc5 thing ++- for (ii = 0; ii < ci->atc_delta_count; ii++) { ++- indent_printf(level+1, "ATC delta[ %d ]", ii); ++- indent_printf(level+2, "Delta %08x", ci->atc_delta[ii].delta); ++- indent_printf(level+2, "File Id %s", ci->atc_delta[ii].file_id); ++- indent_printf(level+2, "File Code %s", ci->atc_delta[ii].file_code); ++- } ++- // show fonts ++- if (cl->font_info.font_count) { ++- indent_printf(level+1, "Font files"); ++- for (ii = 0; ii < cl->font_info.font_count; ii++) { ++- indent_printf(level+2, "Font file %d: %s.otf", ii+1, cl->font_info.font[ii].file_id); ++- } ++- } ++- ++- printf("\n"); ++-} ++- ++-static void ++-_show_seq_info(CLPI_SEQ_INFO *si, int level) ++-{ ++- CLPI_ATC_SEQ *atc; ++- CLPI_STC_SEQ *stc; ++- int ii, jj; ++- ++- indent_printf(level, "Sequence Info"); ++- indent_printf(level+1, "Number ATC Sequences: %d", si->num_atc_seq); ++- for (ii = 0; ii < si->num_atc_seq; ii++) { ++- atc = &si->atc_seq[ii]; ++- indent_printf(level+1, "ATC Sequence %d", ii); ++- indent_printf(level+2, "SPN ATC Start: %u", atc->spn_atc_start); ++- indent_printf(level+2, "Offset STC Id: %d", atc->offset_stc_id); ++- indent_printf(level+2, "Number STC Sequences: %d", atc->num_stc_seq); ++- for (jj = 0; jj < atc->num_stc_seq; jj++) { ++- stc = &atc->stc_seq[jj]; ++- indent_printf(level+2, "ATC Sequence %d", jj); ++- indent_printf(level+3, "SPN STC Start: %u", stc->spn_stc_start); ++- indent_printf(level+3, "PCR PID: %04x", stc->pcr_pid); ++- indent_printf(level+3, "Presentation Start: %u", ++- stc->presentation_start_time); ++- indent_printf(level+3, "Presentation End: %u", ++- stc->presentation_end_time); ++- } ++- } ++-} ++- ++-static void ++-_show_prog_info(CLPI_PROG_INFO *pi, int level) ++-{ ++- CLPI_PROG *prog; ++- int ii, jj; ++- ++- indent_printf(level, "Program Info"); ++- indent_printf(level+1, "Number Programs: %d", pi->num_prog); ++- for (ii = 0; ii < pi->num_prog; ii++) { ++- prog = &pi->progs[ii]; ++- indent_printf(level+1, "Program %d", ii); ++- indent_printf(level+2, "SPN Program Sequence Start: %d", ++- prog->spn_program_sequence_start); ++- indent_printf(level+2, "Program Map PID: %d", prog->program_map_pid); ++- indent_printf(level+2, "Number Streams: %d", prog->num_streams); ++- indent_printf(level+2, "Number Groups: %d", prog->num_groups); ++- for (jj = 0; jj < prog->num_streams; jj++) { ++- indent_printf(level+2, "Stream %d", jj); ++- _show_stream(&prog->streams[jj], level+3); ++- } ++- } ++-} ++- ++-static void ++-_show_extent_start(CLPI_EXTENT_START *es, int level) ++-{ ++- unsigned int ii; ++- ++- indent_printf(level, "Extension data: Extent Start Point"); ++- ++- if (!es->num_point) { ++- indent_printf(level+1, "(no data)"); ++- ++- } else { ++- indent_printf(level+1, "Number of Start Points: %d", es->num_point); ++- ++- if (verbose) { ++- for (ii = 0; ii < es->num_point; ii++) { ++- indent_printf(level+1, "Extent %5d: SPN 0x%08X", ii, es->point[ii]); ++- } ++- } ++- } ++-} ++- ++-static void ++-_show_cpi_info(CLPI_CPI *cpi, int level) ++-{ ++- CLPI_EP_MAP_ENTRY *entry; ++- CLPI_EP_COARSE *coarse; ++- CLPI_EP_FINE *fine; ++- int ii, jj, kk; ++- ++- indent_printf(level, "CPI"); ++- indent_printf(level+1, "Number Stream PID: %d", cpi->num_stream_pid); ++- for (ii = 0; ii < cpi->num_stream_pid; ii++) { ++- entry = &cpi->entry[ii]; ++- indent_printf(level+1, "Stream: %d", ii); ++- indent_printf(level+2, "PID: %04x", entry->pid); ++- indent_printf(level+2, "EP Stream Type: %d", entry->ep_stream_type); ++- indent_printf(level+2, "Number EP Coarse: %d", entry->num_ep_coarse); ++- indent_printf(level+2, "Number EP Fine: %d", entry->num_ep_fine); ++- indent_printf(level+2, "EP Map Start: %d", ++- entry->ep_map_stream_start_addr); ++- for (jj = 0; jj < entry->num_ep_coarse; jj++) { ++- coarse = &entry->coarse[jj]; ++- indent_printf(level+2, "Coarse: %d", jj); ++- indent_printf(level+3, "Ref EP Fine: %d", coarse->ref_ep_fine_id); ++- indent_printf(level+3, "PTS EP: %d", coarse->pts_ep); ++- indent_printf(level+3, "SPN EP: %d", coarse->spn_ep); ++- } ++- for (jj = 0; jj < entry->num_ep_fine; jj++) { ++- fine = &entry->fine[jj]; ++- indent_printf(level+2, "Fine: %d", jj); ++- indent_printf(level+3, "Angle Change Point: %s", ++- fine->is_angle_change_point ? "True":"False"); ++- indent_printf(level+3, "I End Offset: %d", ++- fine->i_end_position_offset); ++- indent_printf(level+3, "PTS EP: %d", fine->pts_ep); ++- indent_printf(level+3, "SPN EP: %d", fine->spn_ep); ++- } ++- if (verbose) { ++- uint64_t pts; ++- uint32_t spn; ++- ++- indent_printf(level+2, "PTS - SPN Map"); ++- for (jj = 0; jj < entry->num_ep_coarse; jj++) { ++- int start, end; ++- ++- indent_printf(level+3, "Coarse: %d", jj); ++- coarse = &entry->coarse[jj]; ++- start = coarse->ref_ep_fine_id; ++- if (jj < entry->num_ep_coarse - 1) { ++- end = entry->coarse[jj+1].ref_ep_fine_id; ++- } else { ++- end = entry->num_ep_fine; ++- } ++- for (kk = start; kk < end; kk++) { ++- fine = &entry->fine[kk]; ++- pts = ((uint64_t) (coarse->pts_ep & ~0x01) << 19) + ++- ((uint64_t)fine->pts_ep << 9); ++- spn = (coarse->spn_ep & ~0x1FFFF) + fine->spn_ep; ++- indent_printf(level+4, "PTS %8"PRIu64"/%8"PRIu64" -- SPN %u", ++- pts, pts >> 1, spn); ++- } ++- } ++- } ++- } ++-} ++- ++- ++-static void ++-_usage(char *cmd) ++-{ ++- fprintf(stderr, ++-"Usage: %s -vcspi <clpi file> [<clpi file> ...]\n" ++-"With no options, produces no output (not very useful)\n" ++-"Options:\n" ++-" v - Verbose output.\n" ++-" c - Shows the Clip Info structure\n" ++-" s - Shows the Sequence Info structure\n" ++-" p - Shows the Program Info structure\n" ++-" i - Shows the CPI. PTS to SPN map\n" ++-" e - Shows Extent Start Table\n" ++-, cmd); ++- ++- exit(EXIT_FAILURE); ++-} ++- ++-#define OPTS "vcspie" ++- ++-int ++-main(int argc, char *argv[]) ++-{ ++- CLPI_CL *cl; ++- int opt; ++- int opt_clip_info = 0, opt_seq_info = 0, opt_prog_info = 0; ++- int opt_cpi_info = 0, opt_extent_start = 0; ++- int ii; ++- ++- do { ++- opt = getopt(argc, argv, OPTS); ++- switch (opt) { ++- case -1: break; ++- ++- case 'v': ++- verbose = 1; ++- break; ++- ++- case 's': ++- opt_seq_info = 1; ++- break; ++- ++- case 'i': ++- opt_cpi_info = 1; ++- break; ++- ++- case 'c': ++- opt_clip_info = 1; ++- break; ++- ++- case 'p': ++- opt_prog_info = 1; ++- break; ++- ++- case 'e': ++- opt_extent_start = 1; ++- break; ++- ++- default: ++- _usage(argv[0]); ++- break; ++- } ++- } while (opt != -1); ++- ++- if (optind >= argc) { ++- _usage(argv[0]); ++- } ++- ++- for (ii = optind; ii < argc; ii++) { ++- cl = bd_read_clpi(argv[ii]); ++- if (cl == NULL) { ++- fprintf(stderr, "Parsing %s failed\n", argv[ii]); ++- continue; ++- } ++- if (opt_clip_info) { ++- // Show clip info ++- _show_clip_info(cl, 1); ++- } ++- if (opt_seq_info) { ++- // Show sequence info ++- _show_seq_info(&cl->sequence, 1); ++- } ++- if (opt_prog_info) { ++- // Show program info ++- _show_prog_info(&cl->program, 1); ++- } ++- if (opt_cpi_info) { ++- // Show cpi ++- _show_cpi_info(&cl->cpi, 1); ++- } ++- ++- if (opt_prog_info) { ++- if (cl->program_ss.num_prog) { ++- printf("\n"); ++- indent_printf(1, "Extension: Program Info SS"); ++- _show_prog_info(&cl->program_ss, 1); ++- } ++- } ++- if (opt_cpi_info) { ++- if (cl->program_ss.num_prog) { ++- printf("\n"); ++- indent_printf(1, "Extension: CPI SS"); ++- _show_cpi_info(&cl->cpi_ss, 1); ++- } ++- } ++- if (opt_extent_start) { ++- // Show extent start point ++- if (cl->extent_start.num_point > 0) { ++- _show_extent_start(&cl->extent_start, 1); ++- } ++- } ++- ++- bd_free_clpi(cl); ++- } ++- return 0; ++-} ++- ++diff --git a/src/examples/hdmv_test.c b/src/examples/hdmv_test.c ++deleted file mode 100644 ++index 585ed70..0000000 +++++ /dev/null ++@@ -1,257 +0,0 @@ ++-/* ++- * This file is part of libbluray ++- * Copyright (C) 2010 hpi1 ++- * ++- * This library is free software; you can redistribute it and/or ++- * modify it under the terms of the GNU Lesser General Public ++- * License as published by the Free Software Foundation; either ++- * version 2.1 of the License, or (at your option) any later version. ++- * ++- * This library is distributed in the hope that it will be useful, ++- * but WITHOUT ANY WARRANTY; without even the implied warranty of ++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++- * Lesser General Public License for more details. ++- * ++- * You should have received a copy of the GNU Lesser General Public ++- * License along with this library. If not, see ++- * <http://www.gnu.org/licenses/>. ++- */ ++- ++-#include <stdio.h> ++-#include <stdlib.h> ++-#include <string.h> ++-#include <inttypes.h> ++- ++-#include "util/log_control.h" ++-#include "libbluray/bluray.h" ++- ++-static void _print_event(BD_EVENT *ev) ++-{ ++- switch (ev->event) { ++- case BD_EVENT_NONE: ++- break; ++- case BD_EVENT_ERROR: ++- printf("EVENT_ERROR:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_READ_ERROR: ++- printf("EVENT_READ_ERROR:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_ENCRYPTED: ++- printf("EVENT_ENCRYPTED:\t%d\n", ev->param); ++- break; ++- ++- /* current playback position */ ++- ++- case BD_EVENT_ANGLE: ++- printf("EVENT_ANGLE:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_TITLE: ++- printf("EVENT_TITLE:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_PLAYLIST: ++- printf("EVENT_PLAYLIST:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_PLAYITEM: ++- printf("EVENT_PLAYITEM:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_CHAPTER: ++- printf("EVENT_CHAPTER:\t%d\n", ev->param); ++- break; ++- ++- /* */ ++- ++- case BD_EVENT_STILL: ++- printf("EVENT_STILL:\t%d\n", ev->param); ++- break; ++- ++- case BD_EVENT_SEEK: ++- printf("EVENT_SEEK:\t%d\n", ev->param); ++- break; ++- ++- case BD_EVENT_STILL_TIME: ++- if (ev->param) { ++- printf("EVENT_STILL_TIME:\t%d\n", ev->param); ++- } else { ++- printf("EVENT_STILL_TIME:\tinfinite\n"); ++- } ++- break; ++- ++- /* stream selection */ ++- ++- case BD_EVENT_AUDIO_STREAM: ++- printf("EVENT_AUDIO_STREAM:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_IG_STREAM: ++- printf("EVENT_IG_STREAM:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_PG_TEXTST_STREAM: ++- printf("EVENT_PG_TEXTST_STREAM:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_SECONDARY_AUDIO_STREAM: ++- printf("EVENT_SECONDARY_AUDIO_STREAM:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_SECONDARY_VIDEO_STREAM: ++- printf("EVENT_SECONDARY_VIDEO_STREAM:\t%d\n", ev->param); ++- break; ++- ++- case BD_EVENT_PG_TEXTST: ++- printf("EVENT_PG_TEXTST:\t%s\n", ev->param ? "enable" : "disable"); ++- break; ++- case BD_EVENT_SECONDARY_AUDIO: ++- printf("EVENT_SECONDARY_AUDIO:\t%s\n", ev->param ? "enable" : "disable"); ++- break; ++- case BD_EVENT_SECONDARY_VIDEO: ++- printf("EVENT_SECONDARY_VIDEO:\t%s\n", ev->param ? "enable" : "disable"); ++- break; ++- case BD_EVENT_SECONDARY_VIDEO_SIZE: ++- printf("EVENT_SECONDARY_VIDEO_SIZE:\t%s\n", ev->param==0 ? "PIP" : "fullscreen"); ++- break; ++- ++- default: ++- printf("UNKNOWN EVENT %d:\t%d\n", ev->event, ev->param); ++- break; ++- } ++- ++- fflush(stdout); ++-} ++- ++-static void _read_to_eof(BLURAY *bd) ++-{ ++- BD_EVENT ev; ++- int bytes; ++- uint64_t total = 0; ++- uint8_t buf[6144]; ++- ++- bd_seek(bd, bd_get_title_size(bd) - 6144); ++- ++- do { ++- bytes = bd_read_ext(bd, buf, 6144, &ev); ++- total += bytes < 0 ? 0 : bytes; ++- _print_event(&ev); ++- } while (bytes > 0); ++- ++- printf("_read_to_eof(): read %"PRIu64" bytes\n", total); ++-} ++- ++-static void _print_events(BLURAY *bd) ++-{ ++- BD_EVENT ev; ++- ++- do { ++- bd_read_ext(bd, NULL, 0, &ev); ++- _print_event(&ev); ++- } while (ev.event != BD_EVENT_NONE && ev.event != BD_EVENT_ERROR); ++-} ++- ++-static void _play_pl(BLURAY *bd) ++-{ ++- printf("Playing playlist\n"); ++- ++- fflush(stdout); ++- _read_to_eof(bd); ++- ++- printf("Playing playlist done\n\n"); ++- ++- _print_events(bd); ++- ++- printf("\n"); ++-} ++- ++-int main(int argc, char *argv[]) ++-{ ++- int title = -1; ++- int verbose = 0; ++- int args = 0; ++- ++- /* ++- * parse arguments ++- */ ++- ++- if (argc < 2) { ++- printf("\nUsage:\n %s [-v] [-t <title>] <media_path> [<keyfile_path>]\n\n", argv[0]); ++- return -1; ++- } ++- ++- if (!strcmp(argv[1+args], "-v")) { ++- verbose = 1; ++- args++; ++- } ++- ++- if (!strcmp(argv[1+args], "-t")) { ++- args++; ++- title = atoi(argv[1+args]); ++- args++; ++- printf("Requested title %d\n", title); ++- } ++- ++- if (verbose) { ++- printf("Enabling verbose debug\n"); ++- bd_set_debug_mask(bd_get_debug_mask() | DBG_HDMV | DBG_BLURAY); ++- } ++- ++- printf("\n"); ++- ++- /* ++- * open and setup ++- */ ++- ++- BLURAY *bd = bd_open(argv[1+args], argv[2+args]); ++- ++- if (!bd) { ++- printf("bd_open(\'%s\') failed\n", argv[1]); ++- return -1; ++- } ++- ++- bd_set_player_setting (bd, BLURAY_PLAYER_SETTING_PARENTAL, 99); ++- bd_set_player_setting_str(bd, BLURAY_PLAYER_SETTING_AUDIO_LANG, "eng"); ++- bd_set_player_setting_str(bd, BLURAY_PLAYER_SETTING_PG_LANG, "eng"); ++- bd_set_player_setting_str(bd, BLURAY_PLAYER_SETTING_MENU_LANG, "eng"); ++- bd_set_player_setting_str(bd, BLURAY_PLAYER_SETTING_COUNTRY_CODE, NULL); ++- ++- /* ++- * play ++- */ ++- ++- printf("Running first play movie object\n"); ++- ++- fflush(stdout); ++- bd_play(bd); ++- ++- _print_events(bd); ++- ++- printf("\n"); ++- ++- /* ++- * play title ++- */ ++- ++- if (title >= 0) { ++- printf("Playing title %d\n", title); ++- ++- fflush(stdout); ++- bd_play_title(bd, title); ++- ++- _print_events(bd); ++- ++- printf("\n"); ++- } ++- ++- /* ++- * play playlist ++- */ ++- ++- _play_pl(bd); ++- ++- _play_pl(bd); ++- ++- _play_pl(bd); ++- ++- /* ++- * clean up ++- */ ++- ++- bd_close(bd); ++- ++- return 0; ++-} ++- ++diff --git a/src/examples/list_titles.c b/src/examples/list_titles.c ++index 2e0cae8..768f078 100644 ++--- a/src/examples/list_titles.c +++++ b/src/examples/list_titles.c ++@@ -78,7 +78,10 @@ int main(int argc, char *argv[]) ++ _usage(argv[0]); ++ } ++ bd = bd_open(bd_dir, NULL); ++- +++ if (!bd) { +++ fprintf(stderr, "bd_open(%s) failed\n", bd_dir); +++ exit(EXIT_FAILURE); +++ } ++ count = bd_get_titles(bd, flags, seconds); ++ main_title = bd_get_main_title(bd); ++ if (main_title >= 0) { ++@@ -89,7 +92,7 @@ int main(int argc, char *argv[]) ++ BLURAY_TITLE_INFO* ti; ++ ti = bd_get_title_info(bd, ii, 0); ++ printf( ++- "index: %d duration: %02"PRIu64":%02"PRIu64":%02"PRIu64" chapters: %3d angles: %2u clips: %3u (playlist: %05d.mpls) " +++ "index: %3d duration: %02"PRIu64":%02"PRIu64":%02"PRIu64" chapters: %3d angles: %2u clips: %3u (playlist: %05d.mpls) " ++ "V:%d A:%-2d PG:%-2d IG:%-2d SV:%d SA:%d\n", ++ ii + 1, ++ (ti->duration / 90000) / (3600), ++diff --git a/src/examples/mobj_dump.c b/src/examples/mobj_dump.c ++deleted file mode 100644 ++index 3eaf9f4..0000000 +++++ /dev/null ++@@ -1,83 +0,0 @@ ++-/* ++- * This file is part of libbluray ++- * Copyright (C) 2010 hpi1 ++- * ++- * This library is free software; you can redistribute it and/or ++- * modify it under the terms of the GNU Lesser General Public ++- * License as published by the Free Software Foundation; either ++- * version 2.1 of the License, or (at your option) any later version. ++- * ++- * This library is distributed in the hope that it will be useful, ++- * but WITHOUT ANY WARRANTY; without even the implied warranty of ++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++- * Lesser General Public License for more details. ++- * ++- * You should have received a copy of the GNU Lesser General Public ++- * License along with this library. If not, see ++- * <http://www.gnu.org/licenses/>. ++- */ ++- ++-#include "libbluray/bluray.h" ++- ++-#include "libbluray/hdmv/mobj_data.h" ++-#include "libbluray/hdmv/mobj_print.h" ++- ++-#include <stdio.h> ++-#include <string.h> ++- ++-static void _mobj_print(MOBJ_OBJECTS *objects, int disasm) ++-{ ++- int o, c; ++- ++- printf("Number of objects: %d\n", objects->num_objects); ++- ++- for (o = 0; o < objects->num_objects; o++) { ++- ++- printf("Object %d:\n" ++- " number of commands: %d\n" ++- " resume intention flag: %d\n" ++- " menu call mask: %d\n" ++- " title search mask: %d\n", ++- o, objects->objects[o].num_cmds, ++- objects->objects[o].resume_intention_flag, ++- objects->objects[o].menu_call_mask, ++- objects->objects[o].title_search_mask); ++- ++- if (disasm) { ++- printf(" program:\n"); ++- for (c = 0; c < objects->objects[o].num_cmds; c++) { ++- char buf[256]; ++- mobj_sprint_cmd(buf, &objects->objects[o].cmds[c]); ++- printf(" %04d: %s\n", c, buf); ++- } ++- } ++- } ++-} ++- ++-int main(int argc, const char *argv[]) ++-{ ++- int disasm = 0; ++- MOBJ_OBJECTS *mobj = NULL; ++- ++- if (argc < 2) { ++- fprintf(stderr, ++- "usage: %s [-d] <file>\n" ++- "Options:\n" ++- " d disassemble object code\n", ++- argv[0]); ++- return 1; ++- } ++- if (argc > 2) { ++- disasm = !strcmp(argv[1], "-d"); ++- } ++- ++- mobj = bd_read_mobj(argv[argc-1]); ++- ++- if (mobj) { ++- _mobj_print(mobj, disasm); ++- ++- bd_free_mobj(mobj); ++- } ++- ++- return 0; ++-} ++diff --git a/src/examples/mpls_dump.c b/src/examples/mpls_dump.c ++deleted file mode 100644 ++index 619b6b2..0000000 +++++ /dev/null ++@@ -1,797 +0,0 @@ ++-/* ++- * This file is part of libbluray ++- * Copyright (C) 2009-2010 John Stebbins ++- * ++- * This library is free software; you can redistribute it and/or ++- * modify it under the terms of the GNU Lesser General Public ++- * License as published by the Free Software Foundation; either ++- * version 2.1 of the License, or (at your option) any later version. ++- * ++- * This library is distributed in the hope that it will be useful, ++- * but WITHOUT ANY WARRANTY; without even the implied warranty of ++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++- * Lesser General Public License for more details. ++- * ++- * You should have received a copy of the GNU Lesser General Public ++- * License along with this library. If not, see ++- * <http://www.gnu.org/licenses/>. ++- */ ++- ++-#include <sys/stat.h> ++-#include <dirent.h> ++-#include <stdio.h> ++-#include <stdlib.h> ++-#include <unistd.h> ++-#include <string.h> ++-#include <libgen.h> ++- ++-#include "libbluray/bdnav/mpls_parse.h" ++-#include "libbluray/bluray.h" ++- ++-#include "util.h" ++- ++-#ifdef _WIN32 ++-# define DIR_SEP "\\" ++-# define PLAYLIST_DIR "\\BDMV\\PLAYLIST" ++-#else ++-# define DIR_SEP "/" ++-# define PLAYLIST_DIR "/BDMV/PLAYLIST" ++-#endif ++- ++- ++-static int verbose; ++- ++-typedef struct { ++- int value; ++- const char *str; ++-} VALUE_MAP; ++- ++-const VALUE_MAP codec_map[] = { ++- {0x01, "MPEG-1 Video"}, ++- {0x02, "MPEG-2 Video"}, ++- {0x03, "MPEG-1 Audio"}, ++- {0x04, "MPEG-2 Audio"}, ++- {0x80, "LPCM"}, ++- {0x81, "AC-3"}, ++- {0x82, "DTS"}, ++- {0x83, "TrueHD"}, ++- {0x84, "AC-3 Plus"}, ++- {0x85, "DTS-HD"}, ++- {0x86, "DTS-HD Master"}, ++- {0xa1, "AC-3 Plus for secondary audio"}, ++- {0xa2, "DTS-HD for secondary audio"}, ++- {0xea, "VC-1"}, ++- {0x1b, "H.264"}, ++- {0x90, "Presentation Graphics"}, ++- {0x91, "Interactive Graphics"}, ++- {0x92, "Text Subtitle"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP video_format_map[] = { ++- {0, "Reserved"}, ++- {1, "480i"}, ++- {2, "576i"}, ++- {3, "480p"}, ++- {4, "1080i"}, ++- {5, "720p"}, ++- {6, "1080p"}, ++- {7, "576p"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP video_rate_map[] = { ++- {0, "Reserved1"}, ++- {1, "23.976"}, ++- {2, "24"}, ++- {3, "25"}, ++- {4, "29.97"}, ++- {5, "Reserved2"}, ++- {6, "50"}, ++- {7, "59.94"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP audio_format_map[] = { ++- {0, "Reserved1"}, ++- {1, "Mono"}, ++- {2, "Reserved2"}, ++- {3, "Stereo"}, ++- {4, "Reserved3"}, ++- {5, "Reserved4"}, ++- {6, "Multi Channel"}, ++- {12, "Combo"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP audio_rate_map[] = { ++- {0, "Reserved1"}, ++- {1, "48 Khz"}, ++- {2, "Reserved2"}, ++- {3, "Reserved3"}, ++- {4, "96 Khz"}, ++- {5, "192 Khz"}, ++- {12, "48/192 Khz"}, ++- {14, "48/96 Khz"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP subpath_type_map[] = { ++- {2, "Primary audio of the Browsable slideshow"}, ++- {3, "Interactive Graphics presentation menu"}, ++- {4, "Text Subtitle"}, ++- {5, "Out-of-mux Synchronous elementary streams"}, ++- {6, "Out-of-mux Asynchronous Picture-in-Picture presentation"}, ++- {7, "In-mux Synchronous Picture-in-Picture presentation"}, ++- {8, "SS Video"}, ++- {0,NULL} ++-}; ++- ++-const VALUE_MAP playback_type_map[] = { ++- {1, "Sequential"}, ++- {2, "Random"}, ++- {3, "Shuffle"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP connection_type_map[] = { ++- {1, "Non-seamless"}, ++- {5, "Seamless"}, ++- {6, "Seamless"}, ++- {0, NULL} ++-}; ++- ++-static const char* ++-_lookup_str(const VALUE_MAP *map, int val) ++-{ ++- int ii; ++- ++- for (ii = 0; map[ii].str; ii++) { ++- if (val == map[ii].value) { ++- return map[ii].str; ++- } ++- } ++- return "?"; ++-} ++- ++-static char * ++-_mk_path(const char *base, const char *sub) ++-{ ++- size_t n1 = strlen(base); ++- size_t n2 = strlen(sub); ++- char *result = (char*)malloc(n1 + n2 + strlen(DIR_SEP) + 1); ++- strcpy(result, base); ++- strcat(result, DIR_SEP); ++- strcat(result, sub); ++- ++- return result; ++-} ++- ++-static void ++-_show_stream(MPLS_STREAM *ss, int level) ++-{ ++- indent_printf(level, "Codec (%04x): %s", ss->coding_type, ++- _lookup_str(codec_map, ss->coding_type)); ++- switch (ss->stream_type) { ++- case 1: ++- indent_printf(level, "PID: %04x", ss->pid); ++- break; ++- ++- case 2: ++- case 4: ++- indent_printf(level, "SubPath Id: %02x", ss->subpath_id); ++- indent_printf(level, "SubClip Id: %02x", ss->subclip_id); ++- indent_printf(level, "PID: %04x", ss->pid); ++- break; ++- ++- case 3: ++- indent_printf(level, "SubPath Id: %02x", ss->subpath_id); ++- indent_printf(level, "PID: %04x", ss->pid); ++- break; ++- ++- default: ++- fprintf(stderr, "unrecognized stream type %02x\n", ss->stream_type); ++- break; ++- }; ++- ++- switch (ss->coding_type) { ++- case 0x01: ++- case 0x02: ++- case 0xea: ++- case 0x1b: ++- indent_printf(level, "Format %02x: %s", ss->format, ++- _lookup_str(video_format_map, ss->format)); ++- indent_printf(level, "Rate %02x: %s", ss->rate, ++- _lookup_str(video_rate_map, ss->rate)); ++- break; ++- ++- case 0x03: ++- case 0x04: ++- case 0x80: ++- case 0x81: ++- case 0x82: ++- case 0x83: ++- case 0x84: ++- case 0x85: ++- case 0x86: ++- case 0xa1: ++- case 0xa2: ++- indent_printf(level, "Format %02x: %s", ss->format, ++- _lookup_str(audio_format_map, ss->format)); ++- indent_printf(level, "Rate %02x: %s", ss->rate, ++- _lookup_str(audio_rate_map, ss->rate)); ++- indent_printf(level, "Language: %s", ss->lang); ++- break; ++- ++- case 0x90: ++- case 0x91: ++- indent_printf(level, "Language: %s", ss->lang); ++- break; ++- ++- case 0x92: ++- indent_printf(level, "Char Code: %02x", ss->char_code); ++- indent_printf(level, "Language: %s", ss->lang); ++- break; ++- ++- default: ++- fprintf(stderr, "unrecognized coding type %02x\n", ss->coding_type); ++- break; ++- }; ++-} ++- ++-static void ++-_show_details(MPLS_PL *pl, int level) ++-{ ++- int ii, jj, kk; ++- ++- for (ii = 0; ii < pl->list_count; ii++) { ++- MPLS_PI *pi; ++- ++- pi = &pl->play_item[ii]; ++- indent_printf(level, "Clip Id %s", pi->clip[0].clip_id); ++- indent_printf(level+1, "Stc Id: %02x", pi->clip[0].stc_id); ++- indent_printf(level+1, "Connection Condition: %s (%02x)", ++- _lookup_str(connection_type_map, pi->connection_condition), ++- pi->connection_condition); ++- indent_printf(level+1, "In-Time: %d", pi->in_time); ++- indent_printf(level+1, "Out-Time: %d", pi->out_time); ++- if (pi->still_mode == 1) { ++- indent_printf(level+1, "Still time: %ds\n", pi->still_time); ++- } ++- if (pi->still_mode == 2) { ++- indent_printf(level+1, "Still time: infinite\n"); ++- } ++- if (pi->angle_count > 1) { ++- for (jj = 1; jj < pi->angle_count; jj++) { ++- indent_printf(level+1, "Angle %d:", jj); ++- indent_printf(level+2, "Clip Id %s", pi->clip[jj].clip_id); ++- indent_printf(level+2, "Stc Id: %02x", pi->clip[jj].stc_id); ++- } ++- } ++- for (jj = 0; jj < pi->stn.num_video; jj++) { ++- indent_printf(level+1, "Video Stream %d:", jj); ++- _show_stream(&pi->stn.video[jj], level + 2); ++- } ++- for (jj = 0; jj < pi->stn.num_audio; jj++) { ++- indent_printf(level+1, "Audio Stream %d:", jj); ++- _show_stream(&pi->stn.audio[jj], level + 2); ++- } ++- for (jj = 0; jj < pi->stn.num_ig; jj++) { ++- indent_printf(level+1, "Interactive Graphics Stream %d:", jj); ++- _show_stream(&pi->stn.ig[jj], level + 2); ++- } ++- for (jj = 0; jj < (pi->stn.num_pg + pi->stn.num_pip_pg); jj++) { ++- if (jj < pi->stn.num_pg) { ++- indent_printf(level+1, "Presentation Graphics Stream %d:", jj); ++- } else { ++- indent_printf(level+1, "PIP Presentation Graphics Stream %d:", jj); ++- } ++- _show_stream(&pi->stn.pg[jj], level + 2); ++- } ++- for (jj = 0; jj < pi->stn.num_secondary_video; jj++) { ++- indent_printf(level+1, "Secondary Video Stream %d:", jj); ++- _show_stream(&pi->stn.secondary_video[jj], level + 2); ++- for (kk = 0; kk < pi->stn.secondary_video[jj].sv_num_secondary_audio_ref; kk++) { ++- indent_printf(level+2, "Secondary Audio Ref %d: %d", kk,pi->stn.secondary_video[jj].sv_secondary_audio_ref[kk]); ++- } ++- for (kk = 0; kk < pi->stn.secondary_video[jj].sv_num_pip_pg_ref; kk++) { ++- indent_printf(level+2, "PIP Presentation Graphic Ref %d: %d", kk,pi->stn.secondary_video[jj].sv_pip_pg_ref[kk]); ++- } ++- } ++- for (jj = 0; jj < pi->stn.num_secondary_audio; jj++) { ++- indent_printf(level+1, "Secondary Audio Stream %d:", jj); ++- _show_stream(&pi->stn.secondary_audio[jj], level + 2); ++- for (kk = 0; kk < pi->stn.secondary_audio[jj].sa_num_primary_audio_ref; kk++) { ++- indent_printf(level+2, "Primary Audio Ref %d: %d", kk,pi->stn.secondary_audio[jj].sa_primary_audio_ref[kk]); ++- } ++- } ++- printf("\n"); ++- } ++-} ++- ++-static void ++-_show_ai(MPLS_PL *pl, int level) ++-{ ++- indent_printf(level, "Playback type: %s (%d)", ++- _lookup_str(playback_type_map, pl->app_info.playback_type), ++- pl->app_info.playback_type); ++- if (pl->app_info.playback_type == 2 || pl->app_info.playback_type == 3) { ++- indent_printf(level+1, "Playback count: %d", pl->app_info.playback_count); ++- } ++-} ++- ++-static void ++-_show_marks(MPLS_PL *pl, int level) ++-{ ++- int ii; ++- ++- indent_printf(level, "PlayMark Count %d", pl->mark_count); ++- for (ii = 0; ii < pl->mark_count; ii++) { ++- MPLS_PI *pi; ++- MPLS_PLM *plm; ++- int min; ++- double sec; ++- ++- plm = &pl->play_mark[ii]; ++- indent_printf(level, "PlayMark %d", ii); ++- indent_printf(level+1, "Type: %02x", plm->mark_type); ++- if (plm->play_item_ref < pl->list_count) { ++- pi = &pl->play_item[plm->play_item_ref]; ++- indent_printf(level+1, "PlayItem: %s", pi->clip[0].clip_id); ++- } else { ++- indent_printf(level+1, "PlayItem: Invalid reference"); ++- } ++- indent_printf(level+1, "Time (ticks): %u", plm->time); ++- min = plm->duration / (45000*60); ++- sec = (double)(plm->duration - min * 45000 * 60) / 45000; ++- indent_printf(level+1, "Duration (mm:ss.ms, ticks): %d:%.2f, %u", ++- min, sec, plm->duration); ++- printf("\n"); ++- } ++-} ++- ++-static void ++-_show_clip_list(MPLS_PL *pl, int level) ++-{ ++- int ii, jj; ++- ++- for (ii = 0; ii < pl->list_count; ii++) { ++- MPLS_PI *pi; ++- ++- pi = &pl->play_item[ii]; ++- if (verbose) { ++- uint32_t duration; ++- ++- duration = pi->out_time - pi->in_time; ++- indent_printf(level, "%s.m2ts -- Duration: %3d:%02d", ++- pi->clip[0].clip_id, ++- duration / (45000 * 60), (duration / 45000) % 60); ++- } else { ++- indent_printf(level, "%s.m2ts", pi->clip[0].clip_id); ++- } ++- if (pi->angle_count > 1) { ++- for (jj = 1; jj < pi->angle_count; jj++) { ++- indent_printf(level+1, "Angle %d: %s.m2ts", jj+1, pi->clip[jj].clip_id); ++- } ++- } ++- } ++- printf("\n"); ++-} ++- ++-static void ++-_show_sub_path(MPLS_SUB *sub, int level) ++-{ ++- int ii; ++- ++- indent_printf(level+1, "Type: %d (%s)", sub->type, _lookup_str(subpath_type_map, sub->type)); ++- indent_printf(level+1, "Repeat: %d", sub->is_repeat); ++- indent_printf(level+1, "Sub playitem count: %d", sub->sub_playitem_count); ++- ++- for (ii = 0; ii < sub->sub_playitem_count; ii++) { ++- MPLS_SUB_PI *pi; ++- ++- pi = &sub->sub_play_item[ii]; ++- ++- if (verbose) { ++- indent_printf(level+1, "Sub playitem %d", ii); ++- indent_printf(level+2, "Clip Id %s", pi->clip[0].clip_id); ++- indent_printf(level+2, "Multi clip: %d", pi->is_multi_clip); ++- indent_printf(level+2, "Clip count: %d", pi->clip_count); ++- indent_printf(level+2, "Connection Condition: %s (%02x)", ++- _lookup_str(connection_type_map, pi->connection_condition), ++- pi->connection_condition); ++- indent_printf(level+2, "In-Time: %d", pi->in_time); ++- indent_printf(level+2, "Out-Time: %d", pi->out_time); ++- indent_printf(level+2, "Sync playitem Id: %d", pi->sync_play_item_id); ++- indent_printf(level+2, "Sync PTS: %d", pi->sync_pts); ++- } else { ++- indent_printf(level+1, "%s.m2ts", pi->clip[0].clip_id); ++- } ++- } ++-} ++- ++-static void ++-_show_pip_metadata_block(MPLS_PIP_METADATA *block, int level) ++-{ ++- int ii; ++- ++- indent_printf(level, "Clip ref: %d", block->clip_ref); ++- indent_printf(level, "Secondary video ref: %d", block->secondary_video_ref); ++- indent_printf(level, "Timeline type: %d", block->timeline_type); ++- indent_printf(level, "Luma key flag: %d", block->luma_key_flag); ++- if (block->luma_key_flag) { ++- indent_printf(level, "Upper limit luma key: %d", block->upper_limit_luma_key); ++- } ++- indent_printf(level, "Trick play flag: %d", block->trick_play_flag); ++- ++- for (ii = 0; ii < block->data_count; ii++) { ++- indent_printf(level, "data block %d:", ii); ++- indent_printf(level+1, "Timestamp: %d", block->data[ii].time); ++- indent_printf(level+1, "Horizontal position %d", block->data[ii].xpos); ++- indent_printf(level+1, "Vertical position: %d", block->data[ii].ypos); ++- indent_printf(level+1, "Scaling factor: %d", block->data[ii].scale_factor); ++- } ++-} ++- ++-static void ++-_show_pip_metadata(MPLS_PL *pl, int level) ++-{ ++- int ii; ++- ++- for (ii = 0; ii < pl->ext_pip_data_count; ii++) { ++- MPLS_PIP_METADATA *data; ++- ++- data = &pl->ext_pip_data[ii]; ++- ++- indent_printf(level, "PiP metadata block %d:", ii); ++- _show_pip_metadata_block(data, level+1); ++- } ++-} ++- ++-static void ++-_show_sub_paths(MPLS_PL *pl, int level) ++-{ ++- int ss; ++- ++- for (ss = 0; ss < pl->sub_count; ss++) { ++- MPLS_SUB *sub; ++- ++- sub = &pl->sub_path[ss]; ++- ++- indent_printf(level, "Sub Path %d:", ss); ++- _show_sub_path(sub, level+1); ++- } ++-} ++- ++-static void ++-_show_sub_paths_ss(MPLS_PL *pl, int level) ++-{ ++- int ss; ++- ++- for (ss = 0; ss < pl->ext_sub_count; ss++) { ++- MPLS_SUB *sub; ++- ++- sub = &pl->ext_sub_path[ss]; ++- ++- indent_printf(level, "Extension Sub Path %d:", ss); ++- _show_sub_path(sub, level+1); ++- } ++-} ++- ++-static uint32_t ++-_pl_duration(MPLS_PL *pl) ++-{ ++- int ii; ++- uint32_t duration = 0; ++- MPLS_PI *pi; ++- ++- for (ii = 0; ii < pl->list_count; ii++) { ++- pi = &pl->play_item[ii]; ++- duration += pi->out_time - pi->in_time; ++- } ++- return duration; ++-} ++- ++-static int ++-_filter_dup(MPLS_PL *pl_list[], int count, MPLS_PL *pl) ++-{ ++- int ii, jj; ++- ++- for (ii = 0; ii < count; ii++) { ++- if (pl->list_count != pl_list[ii]->list_count || ++- _pl_duration(pl) != _pl_duration(pl_list[ii])) { ++- continue; ++- } ++- for (jj = 0; jj < pl->list_count; jj++) { ++- MPLS_PI *pi1, *pi2; ++- ++- pi1 = &pl->play_item[jj]; ++- pi2 = &pl_list[ii]->play_item[jj]; ++- ++- if (memcmp(pi1->clip[0].clip_id, pi2->clip[0].clip_id, 5) != 0 || ++- pi1->in_time != pi2->in_time || ++- pi1->out_time != pi2->out_time) { ++- break; ++- } ++- } ++- if (jj != pl->list_count) { ++- continue; ++- } ++- return 0; ++- } ++- return 1; ++-} ++- ++-static int ++-_find_repeats(MPLS_PL *pl, const char *m2ts) ++-{ ++- int ii, count = 0; ++- ++- for (ii = 0; ii < pl->list_count; ii++) { ++- MPLS_PI *pi; ++- ++- pi = &pl->play_item[ii]; ++- // Ignore titles with repeated segments ++- if (strcmp(pi->clip[0].clip_id, m2ts) == 0) { ++- count++; ++- } ++- } ++- return count; ++-} ++- ++-static int ++-_filter_short(MPLS_PL *pl, unsigned int seconds) ++-{ ++- // Ignore short playlists ++- if (_pl_duration(pl) / 45000 <= seconds) { ++- return 0; ++- } ++- return 1; ++-} ++- ++-static int ++-_filter_repeats(MPLS_PL *pl, int repeats) ++-{ ++- int ii; ++- ++- for (ii = 0; ii < pl->list_count; ii++) { ++- MPLS_PI *pi; ++- ++- pi = &pl->play_item[ii]; ++- // Ignore titles with repeated segments ++- if (_find_repeats(pl, pi->clip[0].clip_id) > repeats) { ++- return 0; ++- } ++- } ++- return 1; ++-} ++- ++-static int clip_list = 0, playlist_info = 0, chapter_marks = 0, sub_paths = 0, pip_metadata = 0; ++-static int repeats = 0, seconds = 0, dups = 0; ++- ++-static MPLS_PL* ++-_process_file(char *name, MPLS_PL *pl_list[], int pl_count) ++-{ ++- MPLS_PL *pl; ++- ++- pl = bd_read_mpls(name); ++- if (pl == NULL) { ++- fprintf(stderr, "Parse failed: %s\n", name); ++- return NULL; ++- } ++- if (seconds) { ++- if (!_filter_short(pl, seconds)) { ++- bd_free_mpls(pl); ++- return NULL; ++- } ++- } ++- if (repeats) { ++- if (!_filter_repeats(pl, repeats)) { ++- bd_free_mpls(pl); ++- return NULL; ++- } ++- } ++- if (dups) { ++- if (!_filter_dup(pl_list, pl_count, pl)) { ++- bd_free_mpls(pl); ++- return NULL; ++- } ++- } ++- if (verbose) { ++- indent_printf(0, ++- "%s -- Num Clips: %3d , Duration: minutes %4u:%02u", ++- basename(name), ++- pl->list_count, ++- _pl_duration(pl) / (45000 * 60), ++- (_pl_duration(pl) / 45000) % 60); ++- _show_ai(pl, 1); ++- } else { ++- indent_printf(0, "%s -- Duration: minutes %4u:%02u", ++- basename(name), ++- _pl_duration(pl) / (45000 * 60), ++- (_pl_duration(pl) / 45000) % 60); ++- } ++- if (playlist_info) { ++- _show_details(pl, 1); ++- } ++- if (chapter_marks) { ++- _show_marks(pl, 1); ++- } ++- if (pip_metadata) { ++- _show_pip_metadata(pl, 1); ++- } ++- if (clip_list) { ++- _show_clip_list(pl, 1); ++- } ++- if (sub_paths) { ++- _show_sub_paths(pl, 1); ++- _show_sub_paths_ss(pl, 1); ++- } ++- return pl; ++-} ++- ++-static void ++-_usage(char *cmd) ++-{ ++- fprintf(stderr, ++-"Usage: %s -vli <mpls file> [<mpls file> ...]\n" ++-"With no options, produces a list of the playlist(s) with durations\n" ++-"Options:\n" ++-" v - Verbose output.\n" ++-" l - Produces a list of the m2ts clips\n" ++-" i - Dumps detailed information about each clip\n" ++-" c - Show chapter marks\n" ++-" p - Show sub paths\n" ++-" P - Show picture-in-picture metadata\n" ++-" r <N> - Filter out titles that have >N repeating clips\n" ++-" d - Filter out duplicate titles\n" ++-" s <seconds> - Filter out short titles\n" ++-" f - Filter combination -r2 -d -s900\n" ++-, cmd); ++- ++- exit(EXIT_FAILURE); ++-} ++- ++-#define OPTS "vlicpPfr:ds:" ++- ++-static int ++-_qsort_str_cmp(const void *a, const void *b) ++-{ ++- const char *stra = *(char * const *)a; ++- const char *strb = *(char * const *)b; ++- ++- return strcmp(stra, strb); ++-} ++- ++-int ++-main(int argc, char *argv[]) ++-{ ++- MPLS_PL *pl; ++- int opt; ++- int ii, pl_ii; ++- MPLS_PL *pl_list[1000]; ++- struct stat st; ++- char *path = NULL; ++- DIR *dir = NULL; ++- ++- do { ++- opt = getopt(argc, argv, OPTS); ++- switch (opt) { ++- case -1: ++- break; ++- ++- case 'v': ++- verbose = 1; ++- break; ++- ++- case 'l': ++- clip_list = 1; ++- break; ++- ++- case 'i': ++- playlist_info = 1; ++- break; ++- ++- case 'c': ++- chapter_marks = 1; ++- break; ++- ++- case 'p': ++- sub_paths = 1; ++- break; ++- ++- case 'P': ++- pip_metadata = 1; ++- break; ++- ++- case 'd': ++- dups = 1; ++- break; ++- ++- case 'r': ++- repeats = atoi(optarg); ++- break; ++- ++- case 'f': ++- repeats = 2; ++- dups = 1; ++- seconds = 900; ++- break; ++- ++- case 's': ++- seconds = atoi(optarg); ++- break; ++- ++- default: ++- _usage(argv[0]); ++- break; ++- } ++- } while (opt != -1); ++- ++- if (optind >= argc) { ++- _usage(argv[0]); ++- } ++- ++- for (pl_ii = 0, ii = optind; pl_ii < 1000 && ii < argc; ii++) { ++- ++- if (stat(argv[ii], &st)) { ++- continue; ++- } ++- dir = NULL; ++- if (S_ISDIR(st.st_mode)) { ++- ++- printf("Directory: %s:\n", argv[ii]); ++- path = _mk_path(argv[ii], PLAYLIST_DIR); ++- if (path == NULL) { ++- fprintf(stderr, "Failed to find playlist path: %s\n", argv[ii]); ++- continue; ++- } ++- dir = opendir(path); ++- if (dir == NULL) { ++- fprintf(stderr, "Failed to open dir: %s\n", path); ++- free(path); ++- continue; ++- } ++- } ++- if (dir != NULL) { ++- char **dirlist = (char**)calloc(10001, sizeof(char*)); ++- struct dirent *ent; ++- int jj = 0; ++- for (ent = readdir(dir); ent != NULL; ent = readdir(dir)) { ++- dirlist[jj++] = strcpy((char*)malloc(strlen(ent->d_name)), ent->d_name); ++- } ++- qsort(dirlist, jj, sizeof(char*), _qsort_str_cmp); ++- for (jj = 0; dirlist[jj] != NULL; jj++) { ++- char *name = NULL; ++- name = _mk_path(path, dirlist[jj]); ++- free(dirlist[jj]); ++- if (stat(name, &st)) { ++- free(name); ++- continue; ++- } ++- if (!S_ISREG(st.st_mode)) { ++- free(name); ++- continue; ++- } ++- pl = _process_file(name, pl_list, pl_ii); ++- free(name); ++- if (pl != NULL) { ++- pl_list[pl_ii++] = pl; ++- } ++- } ++- free(dirlist); ++- free(path); ++- } else { ++- pl = _process_file(argv[ii], pl_list, pl_ii); ++- if (pl != NULL) { ++- pl_list[pl_ii++] = pl; ++- } ++- } ++- } ++- // Cleanup ++- for (ii = 0; ii < pl_ii; ii++) { ++- bd_free_mpls(pl_list[ii]); ++- } ++- return 0; ++-} ++- ++diff --git a/src/examples/util.c b/src/examples/util.c ++deleted file mode 100644 ++index aaa4c46..0000000 +++++ /dev/null ++@@ -1,40 +0,0 @@ ++-/* ++- * This file is part of libbluray ++- * Copyright (C) 2009-2010 John Stebbins ++- * ++- * This library is free software; you can redistribute it and/or ++- * modify it under the terms of the GNU Lesser General Public ++- * License as published by the Free Software Foundation; either ++- * version 2.1 of the License, or (at your option) any later version. ++- * ++- * This library is distributed in the hope that it will be useful, ++- * but WITHOUT ANY WARRANTY; without even the implied warranty of ++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++- * Lesser General Public License for more details. ++- * ++- * You should have received a copy of the GNU Lesser General Public ++- * License along with this library. If not, see ++- * <http://www.gnu.org/licenses/>. ++- */ ++- ++-#include <stdio.h> ++-#include <stdarg.h> ++- ++-#include "util.h" ++- ++-void ++-indent_printf(int level, const char *fmt, ...) ++-{ ++- va_list ap; ++- int ii; ++- ++- for (ii = 0; ii < level; ii++) ++- { ++- printf(" "); ++- } ++- va_start(ap, fmt); ++- vprintf(fmt, ap); ++- va_end(ap); ++- printf("\n"); ++-} ++- ++diff --git a/src/examples/util.h b/src/examples/util.h ++deleted file mode 100644 ++index 144f8ec..0000000 +++++ /dev/null ++@@ -1,43 +0,0 @@ ++-/* ++- * This file is part of libbluray ++- * Copyright (C) 2009-2010 John Stebbins ++- * ++- * This library is free software; you can redistribute it and/or ++- * modify it under the terms of the GNU Lesser General Public ++- * License as published by the Free Software Foundation; either ++- * version 2.1 of the License, or (at your option) any later version. ++- * ++- * This library is distributed in the hope that it will be useful, ++- * but WITHOUT ANY WARRANTY; without even the implied warranty of ++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++- * Lesser General Public License for more details. ++- * ++- * You should have received a copy of the GNU Lesser General Public ++- * License along with this library. If not, see ++- * <http://www.gnu.org/licenses/>. ++- */ ++- ++-#include <stdint.h> ++- ++-#include "util/attributes.h" ++- ++-#if defined( __MINGW32__ ) ++-# undef lseek ++-# define lseek _lseeki64 ++-# undef fseeko ++-# define fseeko fseeko64 ++-# undef ftello ++-# define ftello ftello64 ++-# define flockfile(...) ++-# define funlockfile(...) ++-# define getc_unlocked getc ++-# undef off_t ++-# define off_t off64_t ++-# undef stat ++-# define stat _stati64 ++-# define fstat _fstati64 ++-# define wstat _wstati64 ++-#endif ++- ++-void indent_printf(int level, const char *fmt, ...) BD_ATTR_FORMAT_PRINTF(2,3); ++- ++diff --git a/src/file/dir_win32.c b/src/file/dir_win32.c ++index 2690658..f42114d 100644 ++--- a/src/file/dir_win32.c +++++ b/src/file/dir_win32.c ++@@ -86,8 +86,8 @@ static BD_DIR_H *_dir_open_win32(const char* dirname) ++ ++ dir->internal = priv; ++ ++- wchar_t wfilespec[MAX_PATH]; ++- if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, filespec, -1, wfilespec, MAX_PATH)) +++ wchar_t wfilespec[4096 + 1] = {0}; +++ if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, filespec, -1, wfilespec, 4096)) ++ priv->handle = _wfindfirst(wfilespec, &priv->info); ++ else ++ priv->handle = -1; ++diff --git a/src/file/dirs_win32.c b/src/file/dirs_win32.c ++index 5279ea5..3d07251 100644 ++--- a/src/file/dirs_win32.c +++++ b/src/file/dirs_win32.c ++@@ -36,10 +36,18 @@ ++ ++ char *win32_get_font_dir(const char *font_file) ++ { ++- wchar_t wdir[MAX_PATH]; +++ wchar_t wdir[MAX_PATH+1] = {0}; ++ if (S_OK != SHGetFolderPathW(NULL, CSIDL_FONTS, NULL, SHGFP_TYPE_CURRENT, wdir)) { ++- GetWindowsDirectoryW(wdir, MAX_PATH); ++- wcscat(wdir, L"\\fonts"); +++ int lenght = GetWindowsDirectoryW(wdir, MAX_PATH); +++ if (lenght == 0 || lenght > (MAX_PATH - 8)) { +++ BD_DEBUG(DBG_FILE, "Font directory path too long!\n"); +++ return NULL; +++ } +++ if (!wcscat(wdir, L"\\fonts")) { +++ BD_DEBUG(DBG_FILE, "Could not construct font directory path!\n"); +++ return NULL; +++ } +++ ++ } ++ ++ int len = WideCharToMultiByte (CP_UTF8, 0, wdir, -1, NULL, 0, NULL, NULL); ++@@ -59,7 +67,7 @@ char *file_get_config_home(void) ++ ++ char *file_get_data_home(void) ++ { ++- wchar_t wdir[MAX_PATH]; +++ wchar_t wdir[MAX_PATH+1] = {0}; ++ ++ /* Get the "Application Data" folder for the user */ ++ if (S_OK == SHGetFolderPathW(NULL, CSIDL_APPDATA | CSIDL_FLAG_CREATE, ++@@ -84,7 +92,7 @@ char *file_get_cache_home(void) ++ const char *file_get_config_system(const char *dir) ++ { ++ static char *appdir = NULL; ++- wchar_t wdir[MAX_PATH]; +++ wchar_t wdir[MAX_PATH+1] = {0}; ++ ++ if (!dir) { ++ // first call ++diff --git a/src/file/dl_win32.c b/src/file/dl_win32.c ++index e4492e1..7f6ae73 100644 ++--- a/src/file/dl_win32.c +++++ b/src/file/dl_win32.c ++@@ -57,7 +57,7 @@ void *dl_dlopen(const char *path, const char *version) ++ { ++ (void)version; ++ ++- wchar_t wname[MAX_PATH]; +++ wchar_t wname[MAX_PATH+1] = {0}; ++ char *name; ++ void *result; ++ ++@@ -109,7 +109,7 @@ const char *dl_get_path(void) ++ if (!initialized) { ++ initialized = 1; ++ ++- static char path[MAX_PATH]; +++ static char path[MAX_PATH + 1]; ++ HMODULE hModule; ++ wchar_t wpath[MAX_PATH]; ++ ++diff --git a/src/file/file.c b/src/file/file.c ++index 15edfe0..2f85248 100644 ++--- a/src/file/file.c +++++ b/src/file/file.c ++@@ -52,6 +52,10 @@ int file_mkdirs(const char *path) ++ char *end = dir; ++ char *p; ++ +++ if (!dir) { +++ return -1; +++ } +++ ++ /* strip file name */ ++ if (!(end = strrchr(end, DIR_SEP_CHAR))) { ++ X_FREE(dir); ++diff --git a/src/file/file_posix.c b/src/file/file_posix.c ++index 753a8ce..2a79f6f 100644 ++--- a/src/file/file_posix.c +++++ b/src/file/file_posix.c ++@@ -38,6 +38,13 @@ ++ #include <sys/stat.h> ++ #include <fcntl.h> ++ +++#ifdef __ANDROID__ +++# undef lseek +++# define lseek lseek64 +++# undef off_t +++# define off_t off64_t +++#endif +++ ++ static void _file_close(BD_FILE_H *file) ++ { ++ if (file) { ++diff --git a/src/file/file_win32.c b/src/file/file_win32.c ++index 5eb52d7..c0f48e4 100644 ++--- a/src/file/file_win32.c +++++ b/src/file/file_win32.c ++@@ -97,9 +97,9 @@ static BD_FILE_H *_file_open(const char* filename, const char *mode) ++ { ++ BD_FILE_H *file; ++ FILE *fp; ++- wchar_t wfilename[MAX_PATH], wmode[8]; +++ wchar_t wfilename[4096 + 1] = {0}, wmode[8] = {0}; ++ ++- if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, filename, -1, wfilename, MAX_PATH) || +++ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, filename, -1, wfilename, 4096) || ++ !MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, mode, -1, wmode, 8)) { ++ ++ BD_DEBUG(DBG_FILE, "Error opening file %s\n", filename); ++@@ -112,6 +112,9 @@ static BD_FILE_H *_file_open(const char* filename, const char *mode) ++ return NULL; ++ } ++ +++ // Set file buffer +++ setvbuf(fp, NULL, _IOFBF, 6144 * 10); +++ ++ file = calloc(1, sizeof(BD_FILE_H)); ++ if (!file) { ++ BD_DEBUG(DBG_FILE | DBG_CRIT, "Error opening file %s (out of memory)\n", filename); ++diff --git a/src/file/mount.c b/src/file/mount.c ++index 6382d8b..7575f89 100644 ++--- a/src/file/mount.c +++++ b/src/file/mount.c ++@@ -33,6 +33,7 @@ ++ ++ char *mount_get_mountpoint(const char *device_path) ++ { +++#ifndef __ANDROID__ ++ #ifdef HAVE_MNTENT_H ++ struct stat st; ++ if (stat (device_path, &st) ) { ++@@ -62,6 +63,7 @@ char *mount_get_mountpoint(const char *device_path) ++ endmntent (f); ++ } ++ #endif /* HAVE_MNTENT_H */ +++#endif /* __ANDROID__ */ ++ ++ return str_dup(device_path); ++ } ++diff --git a/src/libbluray/bdj/bdj.c b/src/libbluray/bdj/bdj.c ++index 3465c69..23d944a 100644 ++--- a/src/libbluray/bdj/bdj.c +++++ b/src/libbluray/bdj/bdj.c ++@@ -26,6 +26,7 @@ ++ ++ #include "native/register_native.h" ++ +++#include "file/file.h" ++ #include "file/dirs.h" ++ #include "file/dl.h" ++ #include "util/strutl.h" ++@@ -41,9 +42,6 @@ ++ #ifdef _WIN32 ++ #include <windows.h> ++ #include <winreg.h> ++-#define DIR_SEP "\\" ++-#else ++-#define DIR_SEP "/" ++ #endif ++ ++ #ifdef HAVE_BDJ_J2ME ++@@ -67,7 +65,7 @@ static void *_load_jvm_win32(const char **p_java_home) ++ ++ wchar_t buf_loc[4096] = L"SOFTWARE\\JavaSoft\\Java Runtime Environment\\"; ++ wchar_t buf_vers[128]; ++- +++ wchar_t java_path[4096] = L""; ++ char strbuf[256]; ++ ++ LONG r; ++@@ -77,14 +75,14 @@ static void *_load_jvm_win32(const char **p_java_home) ++ ++ r = RegOpenKeyExW(HKEY_LOCAL_MACHINE, buf_loc, 0, KEY_READ, &hkey); ++ if (r != ERROR_SUCCESS) { ++- BD_DEBUG(DBG_BDJ | DBG_CRIT, "Error opening registry key SOFTWARE\\JavaSoft\\Java Runtime Environment\\"); +++ BD_DEBUG(DBG_BDJ | DBG_CRIT, "Error opening registry key SOFTWARE\\JavaSoft\\Java Runtime Environment\\\n"); ++ return NULL; ++ } ++ ++ r = RegQueryValueExW(hkey, L"CurrentVersion", NULL, &lType, (LPBYTE)buf_vers, &dSize); ++ RegCloseKey(hkey); ++ if (r != ERROR_SUCCESS) { ++- BD_DEBUG(DBG_BDJ | DBG_CRIT, "CurrentVersion registry value not found"); +++ BD_DEBUG(DBG_BDJ | DBG_CRIT, "CurrentVersion registry value not found\n"); ++ return NULL; ++ } ++ ++@@ -97,7 +95,7 @@ static void *_load_jvm_win32(const char **p_java_home) ++ dSize = sizeof(buf_loc); ++ r = RegOpenKeyExW(HKEY_LOCAL_MACHINE, buf_loc, 0, KEY_READ, &hkey); ++ if (r != ERROR_SUCCESS) { ++- BD_DEBUG(DBG_BDJ | DBG_CRIT, "Error opening JRE version-specific registry key"); +++ BD_DEBUG(DBG_BDJ | DBG_CRIT, "Error opening JRE version-specific registry key\n"); ++ return NULL; ++ } ++ ++@@ -108,6 +106,9 @@ static void *_load_jvm_win32(const char **p_java_home) ++ WideCharToMultiByte(CP_UTF8, 0, buf_loc, -1, java_home, sizeof(java_home), NULL, NULL); ++ *p_java_home = java_home; ++ BD_DEBUG(DBG_BDJ, "JavaHome: %s\n", java_home); +++ +++ wcscat(java_path, buf_loc); +++ wcscat(java_path, L"\\bin"); ++ } ++ ++ dSize = sizeof(buf_loc); ++@@ -115,11 +116,13 @@ static void *_load_jvm_win32(const char **p_java_home) ++ RegCloseKey(hkey); ++ ++ if (r != ERROR_SUCCESS) { ++- BD_DEBUG(DBG_BDJ | DBG_CRIT, "RuntimeLib registry value not found"); +++ BD_DEBUG(DBG_BDJ | DBG_CRIT, "RuntimeLib registry value not found\n"); ++ return NULL; ++ } ++ +++ SetDllDirectoryW(java_path); ++ void *result = LoadLibraryW(buf_loc); +++ SetDllDirectoryW(NULL); ++ ++ WideCharToMultiByte(CP_UTF8, 0, buf_loc, -1, strbuf, sizeof(strbuf), NULL, NULL); ++ if (!result) { ++@@ -132,10 +135,43 @@ static void *_load_jvm_win32(const char **p_java_home) ++ } ++ #endif ++ +++#ifdef _WIN32 +++static inline char *_utf8_to_cp(const char *utf8) +++{ +++ int wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0); +++ if (wlen == 0) { +++ return NULL; +++ } +++ +++ wchar_t *wide = (wchar_t *)malloc(wlen * sizeof(wchar_t)); +++ if (!wide) { +++ return NULL; +++ } +++ MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wide, wlen); +++ +++ size_t len = WideCharToMultiByte(CP_ACP, 0, wide, -1, NULL, 0, NULL, NULL); +++ if (len == 0) { +++ X_FREE(wide); +++ return NULL; +++ } +++ +++ char *out = (char *)malloc(len); +++ if (out != NULL) { +++ WideCharToMultiByte(CP_ACP, 0, wide, -1, out, len, NULL, NULL); +++ } +++ X_FREE(wide); +++ return out; +++} +++#endif +++ ++ static void *_jvm_dlopen(const char *java_home, const char *jvm_dir, const char *jvm_lib) ++ { ++ if (java_home) { ++ char *path = str_printf("%s/%s/%s", java_home, jvm_dir, jvm_lib); +++ if (!path) { +++ BD_DEBUG(DBG_CRIT, "out of memory\n"); +++ return NULL; +++ } ++ BD_DEBUG(DBG_BDJ, "Opening %s ...\n", path); ++ void *h = dl_dlopen(path, NULL); ++ X_FREE(path); ++@@ -208,11 +244,17 @@ static void *_load_jvm(const char **p_java_home) ++ ++ static int _can_read_file(const char *fn) ++ { ++- FILE *fp = fopen(fn, "rb"); +++ BD_FILE_H *fp; +++ +++ if (!fn) { +++ return 0; +++ } +++ +++ fp = file_open(fn, "rb"); ++ if (fp) { ++- char b; ++- int result = (int)fread(&b, 1, 1, fp); ++- fclose(fp); +++ uint8_t b; +++ int result = (int)file_read(fp, &b, 1); +++ file_close(fp); ++ if (result == 1) { ++ return 1; ++ } ++@@ -460,6 +502,11 @@ static int _create_jvm(void *jvm_lib, const char *java_home, const char *jar_fil ++ } ++ ++ JavaVMOption* option = calloc(1, sizeof(JavaVMOption) * 20); +++ if (!option) { +++ BD_DEBUG(DBG_CRIT, "out of memory\n"); +++ return 0; +++ } +++ ++ int n = 0; ++ JavaVMInitArgs args; ++ option[n++].optionString = str_dup ("-Dawt.toolkit=java.awt.BDToolkit"); ++@@ -499,6 +546,17 @@ static int _create_jvm(void *jvm_lib, const char *java_home, const char *jar_fil ++ args.options = option; ++ args.ignoreUnrecognized = JNI_FALSE; // don't ignore unrecognized options ++ +++#ifdef _WIN32 +++ /* ... in windows, JVM options are not UTF8 but current system code page ... */ +++ /* luckily, most BD-J options can be passed in as java strings later. But, not class path. */ +++ int ii; +++ for (ii = 0; ii < n; ii++) { +++ char *tmp = _utf8_to_cp(option[ii].optionString); +++ X_FREE(option[ii].optionString); +++ option[ii].optionString = tmp; +++ } +++#endif +++ ++ int result = JNI_CreateJavaVM_fp(jvm, (void**) env, &args); ++ ++ while (--n >= 0) { ++@@ -534,16 +592,22 @@ BDJAVA* bdj_open(const char *path, struct bluray *bd, ++ return 0; ++ } ++ +++ BDJAVA* bdjava = calloc(1, sizeof(BDJAVA)); +++ if (!bdjava) { +++ dl_dlclose(jvm_lib); +++ return NULL; +++ } +++ ++ JNIEnv* env = NULL; ++ JavaVM *jvm = NULL; ++ if (!_find_jvm(jvm_lib, &env, &jvm) && ++ !_create_jvm(jvm_lib, java_home, jar_file, &env, &jvm)) { ++ +++ X_FREE(bdjava); ++ dl_dlclose(jvm_lib); ++ return NULL; ++ } ++ ++- BDJAVA* bdjava = calloc(1, sizeof(BDJAVA)); ++ bdjava->h_libjvm = jvm_lib; ++ bdjava->jvm = jvm; ++ ++diff --git a/src/libbluray/bdj/bdj.h b/src/libbluray/bdj/bdj.h ++index 45fbfc5..f6cd97b 100644 ++--- a/src/libbluray/bdj/bdj.h +++++ b/src/libbluray/bdj/bdj.h ++@@ -45,10 +45,10 @@ typedef enum { ++ } BDJ_EVENT; ++ ++ typedef struct { ++- char *persistent_root; ++- char *cache_root; +++ char *persistent_root; /* BD-J Xlet persistent storage */ +++ char *cache_root; /* BD-J binding unit data area */ ++ ++- char *classpath; +++ char *classpath; /* BD-J implementation class path (location of libbluray.jar) */ ++ } BDJ_STORAGE; ++ ++ typedef struct bdjava_s BDJAVA; ++diff --git a/src/libbluray/bdj/build.xml b/src/libbluray/bdj/build.xml ++index c2764f7..938cd44 100644 ++--- a/src/libbluray/bdj/build.xml +++++ b/src/libbluray/bdj/build.xml ++@@ -7,6 +7,7 @@ ++ <property name="build" location="build"/> ++ <property name="dist" location="../../.libs"/> ++ <property name="src_awt" value=""/> +++ <property name="src_asm" value="../../../contrib/asm/src/"/> ++ <property name="bootclasspath" value=""/> ++ <property name="version" value=""/> ++ ++@@ -18,6 +19,12 @@ ++ ++ <target name="compile" depends="init" ++ description="compile the source " > +++ <javac srcdir="${src_asm}" destdir="${build}" debug="yes" +++ bootclasspath="${bootclasspath}" +++ source="1.5" target="1.5"> +++ <compilerarg value="-XDignore.symbol.file"/> +++ <compilerarg value="-Xlint:-deprecation"/> +++ </javac> ++ <javac srcdir="${src}${src_awt}" destdir="${build}" debug="yes" ++ bootclasspath="${bootclasspath}" ++ source="1.4" target="1.4"> ++diff --git a/src/libbluray/bdj/java-j2se/java/awt/BDGraphics.java b/src/libbluray/bdj/java-j2se/java/awt/BDGraphics.java ++index 54c24e5..77ce66e 100644 ++--- a/src/libbluray/bdj/java-j2se/java/awt/BDGraphics.java +++++ b/src/libbluray/bdj/java-j2se/java/awt/BDGraphics.java ++@@ -53,6 +53,7 @@ class BDGraphics extends BDGraphicsBase { ++ ++ public java.awt.font.FontRenderContext getFontRenderContext() ++ { +++ logger.unimplemented("getFontRenderContext"); ++ return null; ++ } ++ public void setPaint(Paint p) { ++diff --git a/src/libbluray/bdj/java/com/aacsla/bluray/online/ContentAttribute.java b/src/libbluray/bdj/java/com/aacsla/bluray/online/ContentAttribute.java ++index fbfa45d..34f59e4 100644 ++--- a/src/libbluray/bdj/java/com/aacsla/bluray/online/ContentAttribute.java +++++ b/src/libbluray/bdj/java/com/aacsla/bluray/online/ContentAttribute.java ++@@ -27,10 +27,33 @@ public class ContentAttribute { ++ } ++ ++ public byte[] getContentCertID() { +++ byte[] id = getContentCertID("AACS" + File.separator + "Content000.cer"); +++ if (id != null) { +++ return id; +++ } +++ +++ id = getContentCertID("MAKEMKV" + File.separator + "AACS" + File.separator + "Content000.cer"); +++ if (id != null) { +++ return id; +++ } +++ +++ id = getContentCertID("ANY!" + File.separator + "Content000.cer"); +++ if (id != null) { +++ return id; +++ } +++ +++ return new byte[6]; +++ } +++ +++ private byte[] getContentCertID(String file) { ++ FileInputStream is = null; ++ try { ++ is = new FileInputStream( ++- System.getProperty("bluray.vfs.root") + File.separator + "AACS/Content000.cer"); +++ System.getProperty("bluray.vfs.root") + File.separator + file); +++ } catch (Exception e) { +++ return null; +++ } +++ try { ++ if (is.skip(14) != 14) ++ return null; ++ byte[] bytes = new byte[6]; ++diff --git a/src/libbluray/bdj/java/java/awt/BDFontMetrics.java b/src/libbluray/bdj/java/java/awt/BDFontMetrics.java ++index d2a91dc..fdcda44 100644 ++--- a/src/libbluray/bdj/java/java/awt/BDFontMetrics.java +++++ b/src/libbluray/bdj/java/java/awt/BDFontMetrics.java ++@@ -192,7 +192,12 @@ public class BDFontMetrics extends sun.font.FontDesignMetrics { ++ } ++ ++ static synchronized String[] getFontList() { ++- init(); +++ try { +++ init(); +++ } catch (Throwable t) { +++ System.err.println("getFontList() failed: " + t); +++ return new String[0]; +++ } ++ ++ ArrayList fontNames = new ArrayList(); ++ ++diff --git a/src/libbluray/bdj/java/java/awt/BDGraphicsBase.java b/src/libbluray/bdj/java/java/awt/BDGraphicsBase.java ++index 0c7d403..f7e60f7 100644 ++--- a/src/libbluray/bdj/java/java/awt/BDGraphicsBase.java +++++ b/src/libbluray/bdj/java/java/awt/BDGraphicsBase.java ++@@ -306,7 +306,7 @@ abstract class BDGraphicsBase extends Graphics2D implements ConstrainableGraphic ++ Rectangle rect = new Rectangle(x, y, length, 1); ++ rect = actualClip.intersection(rect); ++ ++- if (rect.width <= 0 || rect.height <= 0 || rect.x < 0 || rect.y < 0) { +++ if (rect.width <= 0 || rect.height <= 0 || rect.x < 0 || rect.y < 0 || backBuffer == null) { ++ return; ++ } ++ ++@@ -364,7 +364,7 @@ abstract class BDGraphicsBase extends Graphics2D implements ConstrainableGraphic ++ Rectangle rect = new Rectangle(x, y, length, 1); ++ rect = actualClip.intersection(rect); ++ ++- if (rect.width <= 0 || rect.height <= 0 || rect.x < 0 || rect.y < 0) { +++ if (rect.width <= 0 || rect.height <= 0 || rect.x < 0 || rect.y < 0 || backBuffer == null) { ++ return; ++ } ++ ++@@ -458,7 +458,7 @@ abstract class BDGraphicsBase extends Graphics2D implements ConstrainableGraphic ++ y += originY; ++ Rectangle rect = new Rectangle(x, y, w, h); ++ rect = actualClip.intersection(rect); ++- if (rect.isEmpty()) { +++ if (rect.isEmpty() || backBuffer == null) { ++ return; ++ } ++ x = rect.x; ++@@ -572,7 +572,7 @@ abstract class BDGraphicsBase extends Graphics2D implements ConstrainableGraphic ++ Rectangle rect = new Rectangle(x, y, w, h); ++ rect = actualClip.intersection(rect); ++ ++- if (rect.width <= 0 || rect.height <= 0) { +++ if (rect.width <= 0 || rect.height <= 0 || backBuffer == null) { ++ return; ++ } ++ ++diff --git a/src/libbluray/bdj/java/java/awt/BDImageConsumer.java b/src/libbluray/bdj/java/java/awt/BDImageConsumer.java ++index 59e2af3..a076873 100644 ++--- a/src/libbluray/bdj/java/java/awt/BDImageConsumer.java +++++ b/src/libbluray/bdj/java/java/awt/BDImageConsumer.java ++@@ -25,7 +25,7 @@ import java.awt.image.ImageObserver; ++ import java.awt.image.ImageConsumer; ++ import java.awt.image.ColorModel; ++ ++-public class BDImageConsumer extends BDImage implements ImageConsumer { +++class BDImageConsumer extends BDImage implements ImageConsumer { ++ private Hashtable properties; ++ private ImageProducer producer; ++ private int status; ++diff --git a/src/libbluray/bdj/java/java/awt/BDToolkitBase.java b/src/libbluray/bdj/java/java/awt/BDToolkitBase.java ++index e210dea..0f5e3e0 100644 ++--- a/src/libbluray/bdj/java/java/awt/BDToolkitBase.java +++++ b/src/libbluray/bdj/java/java/awt/BDToolkitBase.java ++@@ -124,6 +124,10 @@ abstract class BDToolkitBase extends Toolkit { ++ } ++ ++ public Image getImage(String filename) { +++ if (BDJXletContext.getCurrentContext() == null) { +++ logger.error("getImage(): no context " + Logger.dumpStack()); +++ } +++ ++ if (cachedImages.containsKey(filename)) ++ return (Image)cachedImages.get(filename); ++ Image newImage = createImage(filename); ++@@ -133,6 +137,10 @@ abstract class BDToolkitBase extends Toolkit { ++ } ++ ++ public Image getImage(URL url) { +++ if (BDJXletContext.getCurrentContext() == null) { +++ logger.error("getImage(): no context " + Logger.dumpStack()); +++ } +++ ++ if (cachedImages.containsKey(url)) ++ return (Image)cachedImages.get(url); ++ Image newImage = createImage(url); ++@@ -142,6 +150,10 @@ abstract class BDToolkitBase extends Toolkit { ++ } ++ ++ public Image createImage(String filename) { +++ if (BDJXletContext.getCurrentContext() == null) { +++ logger.error("createImage(): no context " + Logger.dumpStack()); +++ } +++ ++ if (!new File(filename).isAbsolute()) { ++ String home = BDJXletContext.getCurrentXletHome(); ++ if (home != null) { ++@@ -161,6 +173,9 @@ abstract class BDToolkitBase extends Toolkit { ++ } ++ ++ public Image createImage(URL url) { +++ if (BDJXletContext.getCurrentContext() == null) { +++ logger.error("createImage(): no context " + Logger.dumpStack()); +++ } ++ ImageProducer ip = new URLImageSource(url); ++ Image newImage = createImage(ip); ++ return newImage; ++@@ -169,12 +184,20 @@ abstract class BDToolkitBase extends Toolkit { ++ public Image createImage(byte[] imagedata, ++ int imageoffset, ++ int imagelength) { +++ +++ if (BDJXletContext.getCurrentContext() == null) { +++ logger.error("createImage(): no context " + Logger.dumpStack()); +++ } +++ ++ ImageProducer ip = new ByteArrayImageSource(imagedata, imageoffset, imagelength); ++ Image newImage = createImage(ip); ++ return newImage; ++ } ++ ++ public Image createImage(ImageProducer producer) { +++ if (BDJXletContext.getCurrentContext() == null) { +++ logger.error("createImage(): no context " + Logger.dumpStack()); +++ } ++ return new BDImageConsumer(producer); ++ } ++ ++@@ -243,7 +266,7 @@ abstract class BDToolkitBase extends Toolkit { ++ } ++ } ++ ++- logger.warning("getSystemEventQueue(): no context"); +++ logger.warning("getSystemEventQueue(): no context from:" + logger.dumpStack()); ++ return eventQueue; ++ } ++ } ++diff --git a/src/libbluray/bdj/java/java/awt/BDWindowGraphics.java b/src/libbluray/bdj/java/java/awt/BDWindowGraphics.java ++index 743f441..26e7248 100644 ++--- a/src/libbluray/bdj/java/java/awt/BDWindowGraphics.java +++++ b/src/libbluray/bdj/java/java/awt/BDWindowGraphics.java ++@@ -39,6 +39,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void clearRect(int x, int y, int w, int h) { +++ if (window == null) return; ++ synchronized (window) { ++ super.clearRect(x, y, w, h); ++ window.notifyChanged(); ++@@ -46,6 +47,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void fillRect(int x, int y, int w, int h) { +++ if (window == null) return; ++ synchronized (window) { ++ super.fillRect(x, y, w, h); ++ window.notifyChanged(); ++@@ -53,6 +55,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void drawRect(int x, int y, int w, int h) { +++ if (window == null) return; ++ synchronized (window) { ++ super.drawRect(x, y, w, h); ++ window.notifyChanged(); ++@@ -60,6 +63,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void drawLine(int x1, int y1, int x2, int y2) { +++ if (window == null) return; ++ synchronized (window) { ++ super.drawLine(x1, y1, x2, y2); ++ window.notifyChanged(); ++@@ -67,6 +71,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void copyArea(int x, int y, int w, int h, int dx, int dy) { +++ if (window == null) return; ++ synchronized (window) { ++ super.copyArea(x, y, w, h, dx, dy); ++ window.notifyChanged(); ++@@ -74,6 +79,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void drawPolyline(int xPoints[], int yPoints[], int nPoints) { +++ if (window == null) return; ++ synchronized (window) { ++ super.drawPolyline(xPoints, yPoints, nPoints); ++ window.notifyChanged(); ++@@ -81,6 +87,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void drawPolygon(int xPoints[], int yPoints[], int nPoints) { +++ if (window == null) return; ++ synchronized (window) { ++ super.drawPolygon(xPoints, yPoints, nPoints); ++ window.notifyChanged(); ++@@ -88,6 +95,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void fillPolygon(int xPoints[], int yPoints[], int nPoints) { +++ if (window == null) return; ++ synchronized (window) { ++ super.fillPolygon(xPoints, yPoints, nPoints); ++ window.notifyChanged(); ++@@ -95,6 +103,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void drawOval(int x, int y, int w, int h) { +++ if (window == null) return; ++ synchronized (window) { ++ super.drawOval(x, y, w, h); ++ window.notifyChanged(); ++@@ -102,6 +111,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void fillOval(int x, int y, int w, int h) { +++ if (window == null) return; ++ synchronized (window) { ++ super.fillOval(x, y, w, h); ++ window.notifyChanged(); ++@@ -109,6 +119,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void drawArc(int x, int y, int w, int h, int startAngle, int endAngle) { +++ if (window == null) return; ++ synchronized (window) { ++ super.drawArc(x, y, w, h, startAngle, endAngle); ++ window.notifyChanged(); ++@@ -116,6 +127,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void fillArc(int x, int y, int w, int h, int startAngle, int endAngle) { +++ if (window == null) return; ++ synchronized (window) { ++ super.fillArc(x, y, w, h, startAngle, endAngle); ++ window.notifyChanged(); ++@@ -123,6 +135,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void drawRoundRect(int x, int y, int w, int h, int arcWidth, int arcHeight) { +++ if (window == null) return; ++ synchronized (window) { ++ super.drawRoundRect(x, y, w, h, arcWidth, arcHeight); ++ window.notifyChanged(); ++@@ -130,6 +143,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void fillRoundRect(int x, int y, int w, int h, int arcWidth, int arcHeight) { +++ if (window == null) return; ++ synchronized (window) { ++ super.fillRoundRect(x, y, w, h, arcWidth, arcHeight); ++ window.notifyChanged(); ++@@ -137,6 +151,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ protected void drawStringN(long ftFace, String string, int x, int y, int rgb) { +++ if (window == null) return; ++ synchronized (window) { ++ super.drawStringN(ftFace, string, x, y, rgb); ++ window.notifyChanged(); ++@@ -154,6 +169,8 @@ public class BDWindowGraphics extends BDGraphics { ++ boolean flipX, boolean flipY, ++ Color bg, ImageObserver observer) { ++ +++ if (window == null) return true; +++ ++ synchronized (window) { ++ boolean complete = super.drawImageN( ++ img, dx, dy, dw, dh, sx, sy, sw, sh, ++diff --git a/src/libbluray/bdj/java/java/awt/Font.java b/src/libbluray/bdj/java/java/awt/Font.java ++index a126bc5..a952599 100644 ++--- a/src/libbluray/bdj/java/java/awt/Font.java +++++ b/src/libbluray/bdj/java/java/awt/Font.java ++@@ -198,6 +198,9 @@ public class Font implements java.io.Serializable { ++ public Font deriveFont(int style, int size) { ++ return new Font(name, style, size, fontFile, family); ++ } +++ public Font deriveFont(int style, float size) { +++ return new Font(name, style, (int)size, fontFile, family); +++ } ++ ++ /* constructor */ ++ private Font(String name, int style, int size, File fontFile, String family) { ++diff --git a/src/libbluray/bdj/java/javax/media/MediaLocator.java b/src/libbluray/bdj/java/javax/media/MediaLocator.java ++index a182e8d..245ac54 100644 ++--- a/src/libbluray/bdj/java/javax/media/MediaLocator.java +++++ b/src/libbluray/bdj/java/javax/media/MediaLocator.java ++@@ -25,11 +25,11 @@ import java.net.URL; ++ ++ public class MediaLocator implements Serializable ++ { ++- public MediaLocator(URL url) { +++ public MediaLocator(URL url) { ++ this(url.toExternalForm()); ++ } ++ ++- public MediaLocator(String locatorString) { +++ public MediaLocator(String locatorString) { ++ int index = locatorString.indexOf(":"); ++ if (index <= 0) ++ throw new IllegalArgumentException("Bad locator string."); ++@@ -56,7 +56,7 @@ public class MediaLocator implements Serializable ++ public String toExternalForm() { ++ return protocol + ":" + remainder; ++ } ++- +++ ++ private String protocol = ""; ++ private String remainder = ""; ++ ++diff --git a/src/libbluray/bdj/java/javax/tv/graphics/TVContainer.java b/src/libbluray/bdj/java/javax/tv/graphics/TVContainer.java ++index e036884..09971b6 100644 ++--- a/src/libbluray/bdj/java/javax/tv/graphics/TVContainer.java +++++ b/src/libbluray/bdj/java/javax/tv/graphics/TVContainer.java ++@@ -20,9 +20,10 @@ package javax.tv.graphics; ++ ++ import java.awt.Container; ++ import javax.tv.xlet.XletContext; ++-import org.havi.ui.HScene; ++ import org.havi.ui.HSceneFactory; ++ +++import org.videolan.BDJXletContext; +++ ++ public class TVContainer { ++ public static Container getRootContainer(XletContext context) ++ { ++@@ -30,7 +31,15 @@ public class TVContainer { ++ throw new NullPointerException(); ++ } ++ ++- HScene scene = HSceneFactory.getInstance().getDefaultHScene(); ++- return scene; +++ if (!(context instanceof BDJXletContext) || (BDJXletContext)context != BDJXletContext.getCurrentContext()) { +++ org.videolan.Logger.getLogger(TVContainer.class.getName()).error("wrong context"); +++ } +++ +++ /* GEM: return instance of org.havi.ui.HScene or NULL */ +++ HSceneFactory sf = HSceneFactory.getInstance(); +++ if (sf != null) { +++ return sf.getDefaultHScene(); +++ } +++ return null; ++ } ++ } ++diff --git a/src/libbluray/bdj/java/javax/tv/locator/LocatorImpl.java b/src/libbluray/bdj/java/javax/tv/locator/LocatorImpl.java ++index e14825c..a87269a 100644 ++--- a/src/libbluray/bdj/java/javax/tv/locator/LocatorImpl.java +++++ b/src/libbluray/bdj/java/javax/tv/locator/LocatorImpl.java ++@@ -24,7 +24,7 @@ public class LocatorImpl implements Locator { ++ this.url = url; ++ } ++ ++- public boolean hasMultipleTransformations() { +++ public boolean hasMultipleTransformations() { ++ return false; ++ } ++ ++diff --git a/src/libbluray/bdj/java/javax/tv/service/SIElement.java b/src/libbluray/bdj/java/javax/tv/service/SIElement.java ++index c2a0262..16140de 100644 ++--- a/src/libbluray/bdj/java/javax/tv/service/SIElement.java +++++ b/src/libbluray/bdj/java/javax/tv/service/SIElement.java ++@@ -26,7 +26,7 @@ public interface SIElement extends SIRetrievable ++ public Locator getLocator(); ++ ++ public boolean equals(Object obj); ++- +++ ++ public int hashCode(); ++ ++ public ServiceInformationType getServiceInformationType(); ++diff --git a/src/libbluray/bdj/java/javax/tv/service/SIManagerImpl.java b/src/libbluray/bdj/java/javax/tv/service/SIManagerImpl.java ++index 4016876..f9d4a32 100644 ++--- a/src/libbluray/bdj/java/javax/tv/service/SIManagerImpl.java +++++ b/src/libbluray/bdj/java/javax/tv/service/SIManagerImpl.java ++@@ -29,6 +29,7 @@ import javax.tv.service.navigation.ServiceListImpl; ++ import javax.tv.service.transport.Transport; ++ import javax.tv.service.transport.TransportImpl; ++ +++import org.bluray.net.BDLocator; ++ import org.bluray.ti.TitleImpl; ++ import org.videolan.Libbluray; ++ ++@@ -82,6 +83,15 @@ public class SIManagerImpl extends SIManager { ++ } ++ ++ public Service getService(Locator locator) throws InvalidLocatorException, SecurityException { +++ +++ BDLocator bdLocator = null; +++ try { +++ bdLocator = new BDLocator(locator.toExternalForm()); +++ } catch (org.davic.net.InvalidLocatorException e) { +++ System.err.println("invalid locator: " + locator.toExternalForm() + "\n" + org.videolan.Logger.dumpStack(e)); +++ throw new javax.tv.locator.InvalidLocatorException(locator); +++ } +++ ++ return titles.findService(locator); ++ } ++ ++diff --git a/src/libbluray/bdj/java/javax/tv/service/navigation/SIElementFilter.java b/src/libbluray/bdj/java/javax/tv/service/navigation/SIElementFilter.java ++index fdfa128..f0dc97c 100644 ++--- a/src/libbluray/bdj/java/javax/tv/service/navigation/SIElementFilter.java +++++ b/src/libbluray/bdj/java/javax/tv/service/navigation/SIElementFilter.java ++@@ -1,6 +1,7 @@ ++ /* ++ * This file is part of libbluray ++ * Copyright (C) 2010 William Hahne +++ * Copyright (C) 2015 Petri Hintukainen ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++@@ -25,9 +26,21 @@ import javax.tv.service.SIElement; ++ import javax.tv.service.SIRequest; ++ import javax.tv.service.SIRequestorImpl; ++ +++import org.bluray.net.BDLocator; +++ ++ public final class SIElementFilter extends ServiceFilter ++ { ++ public SIElementFilter(SIElement element) throws FilterNotSupportedException { +++ if (element == null) +++ throw new NullPointerException(); +++ +++ try { +++ new BDLocator(element.getLocator().toExternalForm()); +++ } catch (Exception e) { +++ System.err.println("Invalid SI element: " + e + " at " + org.videolan.Logger.dumpStack(e)); +++ throw new FilterNotSupportedException(); +++ } +++ ++ this.element = element; ++ } ++ ++@@ -37,9 +50,9 @@ public final class SIElementFilter extends ServiceFilter ++ ++ public boolean accept(Service service) { ++ SIRequestorImpl requestor = new SIRequestorImpl(); ++- +++ ++ SIRequest req = service.retrieveDetails(requestor); ++- +++ ++ // TODO: This may be a bit excessive ++ int timeout = 0; ++ while (!requestor.getResponse() && timeout < 1000) { ++@@ -48,27 +61,27 @@ public final class SIElementFilter extends ServiceFilter ++ } catch (InterruptedException e) { ++ // ignore ++ } ++- +++ ++ timeout++; ++ } ++- +++ ++ // if we still don't have a response just cancel the request ++ if (!requestor.getResponse()) { ++ if (req != null) ++ req.cancel(); ++ } ++- +++ ++ if (requestor.getResult() == null) ++ return false; ++- +++ ++ SIRetrievable[] rets = requestor.getResult(); ++ for (int i = 0; i < rets.length; i++) { ++ if (rets[i].equals(element)) ++ return true; ++ } ++- +++ ++ return false; ++ } ++- +++ ++ SIElement element; ++ } ++diff --git a/src/libbluray/bdj/java/javax/tv/service/navigation/ServiceTypeFilter.java b/src/libbluray/bdj/java/javax/tv/service/navigation/ServiceTypeFilter.java ++index 0333302..250821e 100644 ++--- a/src/libbluray/bdj/java/javax/tv/service/navigation/ServiceTypeFilter.java +++++ b/src/libbluray/bdj/java/javax/tv/service/navigation/ServiceTypeFilter.java ++@@ -24,6 +24,8 @@ import javax.tv.service.ServiceType; ++ public final class ServiceTypeFilter extends ServiceFilter { ++ public ServiceTypeFilter(ServiceType type) ++ { +++ if (type == null) +++ throw new NullPointerException(); ++ this.type = type; ++ } ++ ++diff --git a/src/libbluray/bdj/java/javax/tv/service/navigation/StreamType.java b/src/libbluray/bdj/java/javax/tv/service/navigation/StreamType.java ++index 5824b44..26dc166 100644 ++--- a/src/libbluray/bdj/java/javax/tv/service/navigation/StreamType.java +++++ b/src/libbluray/bdj/java/javax/tv/service/navigation/StreamType.java ++@@ -22,6 +22,8 @@ package javax.tv.service.navigation; ++ public class StreamType { ++ protected StreamType(String name) ++ { +++ if (name == null) +++ throw new NullPointerException(); ++ this.name = name; ++ } ++ ++diff --git a/src/libbluray/bdj/java/javax/tv/service/selection/ServiceContextFactoryImpl.java b/src/libbluray/bdj/java/javax/tv/service/selection/ServiceContextFactoryImpl.java ++index e1e8dea..2940db4 100644 ++--- a/src/libbluray/bdj/java/javax/tv/service/selection/ServiceContextFactoryImpl.java +++++ b/src/libbluray/bdj/java/javax/tv/service/selection/ServiceContextFactoryImpl.java ++@@ -33,12 +33,14 @@ public class ServiceContextFactoryImpl extends ServiceContextFactory { ++ synchronized (ServiceContextFactoryImpl.class) { ++ if (instance == null) ++ instance = new ServiceContextFactoryImpl(); +++ return instance; ++ } ++- return instance; ++ } ++ ++ public static void shutdown() { ++- instance = null; +++ synchronized (ServiceContextFactoryImpl.class) { +++ instance = null; +++ } ++ } ++ ++ public ServiceContext createServiceContext() ++@@ -60,10 +62,19 @@ public class ServiceContextFactoryImpl extends ServiceContextFactory { ++ } ++ ++ public ServiceContext[] getServiceContexts() { ++- SecurityManager sec = System.getSecurityManager(); ++- if (sec != null) ++- sec.checkPermission(new ServiceContextPermission("access", "own")); ++- return serviceContexts; +++ try { +++ SecurityManager sec = System.getSecurityManager(); +++ if (sec != null) +++ sec.checkPermission(new ServiceContextPermission("access", "own")); +++ +++ ServiceContext[] r = new ServiceContext[1]; +++ r[0] = serviceContexts[0]; +++ return r; +++ +++ } catch (Exception e) { +++ } +++ +++ return new ServiceContext[0]; ++ } ++ ++ private ServiceContext[] serviceContexts; ++diff --git a/src/libbluray/bdj/java/org/bluray/bdplus/Status.java b/src/libbluray/bdj/java/org/bluray/bdplus/Status.java ++index 3f5fcf6..b897b3e 100644 ++--- a/src/libbluray/bdj/java/org/bluray/bdplus/Status.java +++++ b/src/libbluray/bdj/java/org/bluray/bdplus/Status.java ++@@ -28,8 +28,8 @@ public class Status { ++ synchronized (Status.class) { ++ if (instance == null) ++ instance = new Status(); +++ return instance; ++ } ++- return instance; ++ } ++ ++ public static void shutdown() { ++diff --git a/src/libbluray/bdj/java/org/bluray/net/BDLocator.java b/src/libbluray/bdj/java/org/bluray/net/BDLocator.java ++index 6b747cc..a0b593b 100644 ++--- a/src/libbluray/bdj/java/org/bluray/net/BDLocator.java +++++ b/src/libbluray/bdj/java/org/bluray/net/BDLocator.java ++@@ -29,11 +29,11 @@ public class BDLocator extends Locator { ++ super(url); ++ try { ++ ++- if (!url.startsWith("bd://")) ++- throw new InvalidLocatorException(); ++- String str = url.substring(5); ++- if (!parseJar(str) && !parseSound(str) && !parsePlaylist(str)) ++- throw new InvalidLocatorException(); +++ if (!url.startsWith("bd://")) +++ throw new InvalidLocatorException(); +++ String str = url.substring(5); +++ if (!parseJar(str) && !parseSound(str) && !parsePlaylist(str)) +++ throw new InvalidLocatorException(); ++ ++ } catch (InvalidLocatorException e) { ++ System.err.println("Invalid locator: " + url); ++diff --git a/src/libbluray/bdj/java/org/bluray/storage/StorageManager.java b/src/libbluray/bdj/java/org/bluray/storage/StorageManager.java ++index 961c8ec..5ea1c57 100644 ++--- a/src/libbluray/bdj/java/org/bluray/storage/StorageManager.java +++++ b/src/libbluray/bdj/java/org/bluray/storage/StorageManager.java ++@@ -24,8 +24,8 @@ public class StorageManager { ++ synchronized (StorageManager.class) { ++ if (instance == null) ++ instance = new StorageManager(); +++ return instance; ++ } ++- return instance; ++ } ++ ++ protected StorageManager() { ++diff --git a/src/libbluray/bdj/java/org/bluray/ti/selection/TitleContextImpl.java b/src/libbluray/bdj/java/org/bluray/ti/selection/TitleContextImpl.java ++index 0109d2b..48d70be 100644 ++--- a/src/libbluray/bdj/java/org/bluray/ti/selection/TitleContextImpl.java +++++ b/src/libbluray/bdj/java/org/bluray/ti/selection/TitleContextImpl.java ++@@ -38,9 +38,11 @@ import javax.tv.service.selection.ServiceContextPermission; ++ ++ import org.bluray.ti.Title; ++ import org.bluray.ti.TitleImpl; +++ ++ import org.videolan.BDJLoader; ++ import org.videolan.BDJLoaderCallback; ++ import org.videolan.BDJListeners; +++import org.videolan.Logger; ++ import org.videolan.media.content.PlayerManager; ++ ++ public class TitleContextImpl implements TitleContext { ++@@ -71,19 +73,30 @@ public class TitleContextImpl implements TitleContext { ++ } ++ ++ public void start(Title title, boolean restart) throws SecurityException { +++ logger.info("start(" + title.getName() + ", restart=" + restart + ")"); +++ ++ SecurityManager sm = System.getSecurityManager(); ++ if (sm != null) { ++ sm.checkPermission(new SelectPermission(title.getLocator(), "own")); ++ } ++- ++- if (state == STATE_DESTROYED) +++ if (state == STATE_DESTROYED) { +++ logger.error("start() failed: Title Context already destroyed"); ++ throw new IllegalStateException(); +++ } +++ +++ if (!restart && (this.title == null || !title.equals(this.title))) { +++ /* force restarting of service bound Xlets when title changes */ +++ logger.info("start(): title changed, force restart"); +++ restart = true; +++ } +++ ++ TitleStartAction action = new TitleStartAction(this, (TitleImpl)title); ++ if (!BDJLoader.load((TitleImpl)title, restart, action)) ++ action.loaderDone(false); ++ } ++ ++ public void select(Service service) throws SecurityException { +++ logger.info("select(" + service.getName() + ")"); ++ start((Title)service, true); ++ } ++ ++@@ -96,6 +109,8 @@ public class TitleContextImpl implements TitleContext { ++ } ++ ++ public void stop() throws SecurityException { +++ logger.info("stop()"); +++ ++ SecurityManager sm = System.getSecurityManager(); ++ if (sm != null) { ++ sm.checkPermission(new ServiceContextPermission("stop", "own")); ++@@ -187,4 +202,6 @@ public class TitleContextImpl implements TitleContext { ++ private BDJListeners listeners = new BDJListeners(); ++ private TitleImpl title = null; ++ private int state = STATE_STOPPED; +++ +++ private static final Logger logger = Logger.getLogger(TitleContextImpl.class.getName()); ++ } ++diff --git a/src/libbluray/bdj/java/org/dvb/event/EventManager.java b/src/libbluray/bdj/java/org/dvb/event/EventManager.java ++index 2bf2ea4..844b72d 100644 ++--- a/src/libbluray/bdj/java/org/dvb/event/EventManager.java +++++ b/src/libbluray/bdj/java/org/dvb/event/EventManager.java ++@@ -40,8 +40,8 @@ public class EventManager implements ResourceServer { ++ synchronized (EventManager.class) { ++ if (instance == null) ++ instance = new EventManager(); +++ return instance; ++ } ++- return instance; ++ } ++ ++ public static void shutdown() { ++@@ -254,7 +254,7 @@ public class EventManager implements ResourceServer { ++ return false; ++ } ++ ++- private class UserEventItem { +++ private static class UserEventItem { ++ public UserEventItem(BDJXletContext context, UserEventListener listener, ++ ResourceClient client, UserEventRepository userEvents) { ++ this.context = context; ++@@ -272,7 +272,7 @@ public class EventManager implements ResourceServer { ++ public UserEventRepository userEvents; ++ } ++ ++- private class UserEventAction extends BDJAction { +++ private static class UserEventAction extends BDJAction { ++ public UserEventAction(UserEventItem item, UserEvent event) { ++ this.listener = item.listener; ++ this.event = event; ++diff --git a/src/libbluray/bdj/java/org/dvb/io/persistent/FileAttributes.java b/src/libbluray/bdj/java/org/dvb/io/persistent/FileAttributes.java ++index 4c941ff..65c3d29 100644 ++--- a/src/libbluray/bdj/java/org/dvb/io/persistent/FileAttributes.java +++++ b/src/libbluray/bdj/java/org/dvb/io/persistent/FileAttributes.java ++@@ -78,9 +78,9 @@ public class FileAttributes { ++ { ++ boolean r = f.canRead(); ++ boolean w = f.canWrite(); ++- +++ ++ FileAccessPermissions permissions = new FileAccessPermissions(r, w, r, w, r, w); ++- +++ ++ return new FileAttributes(null, permissions, PRIORITY_LOW); ++ } ++ ++diff --git a/src/libbluray/bdj/java/org/dvb/user/UserPreferenceManager.java b/src/libbluray/bdj/java/org/dvb/user/UserPreferenceManager.java ++index 239c966..af86b4e 100644 ++--- a/src/libbluray/bdj/java/org/dvb/user/UserPreferenceManager.java +++++ b/src/libbluray/bdj/java/org/dvb/user/UserPreferenceManager.java ++@@ -31,8 +31,8 @@ public class UserPreferenceManager { ++ synchronized (UserPreferenceManager.class) { ++ if (instance == null) ++ instance = new UserPreferenceManager(); +++ return instance; ++ } ++- return instance; ++ } ++ ++ public void read(Preference p) { ++diff --git a/src/libbluray/bdj/java/org/havi/ui/HScene.java b/src/libbluray/bdj/java/org/havi/ui/HScene.java ++index 7937d32..926781a 100644 ++--- a/src/libbluray/bdj/java/org/havi/ui/HScene.java +++++ b/src/libbluray/bdj/java/org/havi/ui/HScene.java ++@@ -239,8 +239,10 @@ public class HScene extends Container implements HComponentOrdering { ++ } ++ ++ public synchronized void dispose() { ++- if (null != BDJXletContext.getCurrentContext()) ++- HSceneFactory.getInstance().dispose(this); +++ HSceneFactory sf = HSceneFactory.getInstance(); +++ if (sf != null) { +++ sf.dispose(this); +++ } ++ } ++ ++ protected void disposeImpl() ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJAppProxy.java b/src/libbluray/bdj/java/org/videolan/BDJAppProxy.java ++index 135c000..72ba458 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJAppProxy.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJAppProxy.java ++@@ -384,8 +384,8 @@ class BDJAppProxy implements DVBJProxy, Runnable { ++ } catch (InterruptedException e) { ++ } ++ } +++ return done; ++ } ++- return done; ++ } ++ ++ public void release() { ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJAppsDatabase.java b/src/libbluray/bdj/java/org/videolan/BDJAppsDatabase.java ++index 13c088f..ca39f12 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJAppsDatabase.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJAppsDatabase.java ++@@ -35,8 +35,8 @@ public class BDJAppsDatabase extends AppsDatabase { ++ synchronized (BDJAppsDatabase.class) { ++ if (instance == null) ++ instance = new BDJAppsDatabase(); +++ return instance; ++ } ++- return instance; ++ } ++ ++ public int size() { ++@@ -106,5 +106,5 @@ public class BDJAppsDatabase extends AppsDatabase { ++ private BDJAppProxy[] appProxys = null; ++ private AppEntry[] appTable = null; ++ ++- protected static BDJAppsDatabase instance = null; +++ private static BDJAppsDatabase instance = null; ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJClassFileTransformer.java b/src/libbluray/bdj/java/org/videolan/BDJClassFileTransformer.java ++new file mode 100644 ++index 0000000..988e76e ++--- /dev/null +++++ b/src/libbluray/bdj/java/org/videolan/BDJClassFileTransformer.java ++@@ -0,0 +1,91 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2015 Petri Hintukainen <phintuka@users.sourceforge.net> +++ * +++ * This library is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * This library is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with this library. If not, see +++ * <http://www.gnu.org/licenses/>. +++ */ +++ +++package org.videolan; +++ +++/** +++ * This is a class which is called by BDJClassLoader +++ * when ClassFormatError is thrown inside defineClass(). +++ * +++ * Some discs have invalid debug info in class files (broken by +++ * malfunctioning obfuscater ?). +++ * We strip debug info from the class and try to load it again. +++ * +++ * Penguins of MAdagascar: +++ * java.lang.ClassFormatError: Invalid index 0 in LocalVariableTable' +++ * in class file com/tcs/blr/bluray/pal/fox/controller/d +++ */ +++ +++import org.objectweb.asm.ClassReader; +++import org.objectweb.asm.ClassWriter; +++import org.objectweb.asm.ClassVisitor; +++import org.objectweb.asm.MethodVisitor; +++import org.objectweb.asm.Opcodes; +++import org.objectweb.asm.Attribute; +++ +++class BDJClassFileTransformer +++{ +++ public byte[] transform(byte[] b, int off, int len) +++ throws ClassFormatError +++ { +++ logger.info("Trying to transform broken class file (" + len + " bytes)"); +++ +++ byte[] r = new byte[len]; +++ for (int i = 0; i < len; i++) +++ r[i] = b[i+off]; +++ +++ try { +++ ClassReader cr = new ClassReader(r); +++ ClassWriter cw = new ClassWriter(cr, 0/*ClassWriter.COMPUTE_FRAMES | ClassWriter.COMPUTE_MAXS*/); +++ ClassVisitor cv = new MyClassVisitor(cw); +++ cr.accept(cv, ClassReader.SKIP_DEBUG); +++ return cw.toByteArray(); +++ } catch (Exception e) { +++ logger.error("Failed transforming class: " + e); +++ } +++ +++ return r; +++ } +++ +++ public class MyClassVisitor extends ClassVisitor { +++ public MyClassVisitor(ClassVisitor cv) { +++ super(Opcodes.ASM4, cv); +++ } +++ +++ public MethodVisitor visitMethod(int access, String name, String desc, +++ String signature, String[] exceptions) { +++ MethodVisitor mv = super.visitMethod(access, name, desc, signature, exceptions); +++ //System.err.println("visit method: " + name); +++ return new MyMethodVisitor(mv); +++ } +++ } +++ +++ public class MyMethodVisitor extends MethodVisitor { +++ public MyMethodVisitor(MethodVisitor mv) { +++ super(Opcodes.ASM4, mv); +++ } +++ +++ public void visitAttribute(Attribute attr) { +++ //System.err.println(" attribute: " + attr.type); +++ super.visitAttribute(attr); +++ } +++ } +++ +++ private static final Logger logger = Logger.getLogger(BDJClassFileTransformer.class.getName()); +++} ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJClassLoader.java b/src/libbluray/bdj/java/org/videolan/BDJClassLoader.java ++index 733c3e5..2eb3844 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJClassLoader.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJClassLoader.java ++@@ -21,6 +21,7 @@ package org.videolan; ++ ++ import java.net.MalformedURLException; ++ +++import java.io.ByteArrayOutputStream; ++ import java.io.File; ++ import java.io.InputStream; ++ import java.io.IOException; ++@@ -126,7 +127,87 @@ public class BDJClassLoader extends URLClassLoader { ++ } ++ return c; ++ } ++- return super.loadClass(name); +++ +++ try { +++ return super.loadClass(name); +++ } catch (ClassNotFoundException e0) { +++ logger.error("ClassNotFoundException: " + name); +++ throw e0; +++ } catch (Error err) { +++ logger.error("FATAL: " + err); +++ throw err; +++ } +++ } +++ +++ private byte[] loadClassCode(String name) throws ClassNotFoundException { +++ String path = name.replace('.', '/').concat(".class"); +++ +++ URL res = super.findResource(path); +++ if (res == null) { +++ logger.error("loadClassCode(): resource for class " + name + "not found"); +++ throw new ClassNotFoundException(name); +++ } +++ +++ InputStream is = null; +++ ByteArrayOutputStream os = null; +++ try { +++ is = res.openStream(); +++ os = new ByteArrayOutputStream(); +++ byte[] buffer = new byte[0xffff]; +++ while (true) { +++ int r = is.read(buffer); +++ if (r == -1) break; +++ os.write(buffer, 0, r); +++ } +++ +++ return os.toByteArray(); +++ +++ } catch (Exception e) { +++ logger.error("loadClassCode(" + name + ") failed: " + e); +++ throw new ClassNotFoundException(name); +++ +++ } finally { +++ try { +++ if (is != null) +++ is.close(); +++ } catch (IOException ioe) { +++ } +++ try { +++ if (os != null) +++ os.close(); +++ } catch (IOException ioe) { +++ } +++ } +++ } +++ +++ protected Class findClass(String name) throws ClassNotFoundException { +++ try { +++ return super.findClass(name); +++ +++ } catch (ClassFormatError ce) { +++ +++ /* try to "fix" broken class file */ +++ /* if we got ClassFormatError, package was already created. */ +++ byte[] b = loadClassCode(name); +++ if (b == null) { +++ logger.error("loadClassCode(" + name + ") failed"); +++ /* this usually kills Xlet ... */ +++ throw ce; +++ } +++ try { +++ b = new BDJClassFileTransformer().transform(b, 0, b.length); +++ return defineClass(b, 0, b.length); +++ } catch (ThreadDeath td) { +++ throw td; +++ } catch (Throwable t) { +++ logger.error("Class rewriting failed: " + t); +++ throw new ClassNotFoundException(name); +++ } +++ +++ } catch (Error er) { +++ logger.error("Unexpected error: " + er + " " + Logger.dumpStack(er)); +++ throw er; +++ } ++ } ++ ++ public URL getResource(String name) { ++@@ -157,4 +238,6 @@ public class BDJClassLoader extends URLClassLoader { ++ } ++ ++ private String xletClass; +++ +++ private static final Logger logger = Logger.getLogger(BDJClassLoader.class.getName()); ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJListeners.java b/src/libbluray/bdj/java/org/videolan/BDJListeners.java ++index 77acf4d..ba3a9c5 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJListeners.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJListeners.java ++@@ -56,6 +56,8 @@ import org.dvb.media.SubtitleListener; ++ import org.dvb.media.SubtitleNotAvailableEvent; ++ import org.dvb.media.SubtitleNotSelectedEvent; ++ import org.dvb.media.SubtitleSelectedEvent; +++import org.dvb.media.VideoFormatListener; +++import org.dvb.media.VideoFormatEvent; ++ ++ public class BDJListeners { ++ private LinkedList listeners = new LinkedList(); ++@@ -220,6 +222,9 @@ public class BDJListeners { ++ event instanceof SubtitleNotSelectedEvent || event instanceof SubtitleSelectedEvent) { ++ ((SubtitleListener)listener).subtitleStatusChanged((EventObject)event); ++ +++ } else if (event instanceof VideoFormatEvent) { +++ ((VideoFormatListener)listener).receiveVideoFormatEvent((VideoFormatEvent)event); +++ ++ } else if (event instanceof PSR102Status) { ++ ((StatusListener)listener).receive(((PSR102Status)event).value); ++ ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJLoader.java b/src/libbluray/bdj/java/org/videolan/BDJLoader.java ++index 22bd37a..b2bcff3 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJLoader.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJLoader.java ++@@ -44,8 +44,60 @@ import org.videolan.media.content.PlayerManager; ++ ++ public class BDJLoader { ++ +++ private static class FontCacheAction extends BDJAction { +++ public FontCacheAction(InputStream is) { +++ this.fontPath = null; +++ this.is = is; +++ } +++ public FontCacheAction(String fontPath) { +++ this.fontPath = fontPath; +++ this.is = null; +++ } +++ +++ protected void doAction() { +++ try { +++ if (this.is != null) { +++ this.cacheFile = addFontImpl(is); +++ } else { +++ this.cacheFile = addFontImpl(fontPath); +++ } +++ } catch (RuntimeException e) { +++ this.exception = e; +++ } +++ } +++ +++ public File execute() { +++ BDJActionManager.getInstance().putCommand(this); +++ waitEnd(); +++ if (exception != null) { +++ throw exception; +++ } +++ return cacheFile; +++ } +++ +++ private final String fontPath; +++ private final InputStream is; +++ private File cacheFile = null; +++ private RuntimeException exception = null; +++ } +++ ++ /* called by org.dvb.ui.FontFactory */ ++ public static File addFont(InputStream is) { +++ if (BDJXletContext.getCurrentContext() == null) +++ return addFontImpl(is); +++ /* dispatch cache request to privileged thread */ +++ return new FontCacheAction(is).execute(); +++ } +++ +++ /* called by org.dvb.ui.FontFactory */ +++ public static File addFont(String fontFile) { +++ if (BDJXletContext.getCurrentContext() == null) +++ return addFontImpl(fontFile); +++ /* dispatch cache request to privileged thread */ +++ return new FontCacheAction(fontFile).execute(); +++ } +++ +++ private static File addFontImpl(InputStream is) { ++ VFSCache localCache = vfsCache; ++ if (localCache != null) { ++ return localCache.addFont(is); ++@@ -53,8 +105,7 @@ public class BDJLoader { ++ return null; ++ } ++ ++- /* called by org.dvb.ui.FontFactory */ ++- public static File addFont(String fontFile) { +++ private static File addFontImpl(String fontFile) { ++ VFSCache localCache = vfsCache; ++ if (localCache != null) { ++ return localCache.addFont(fontFile); ++@@ -134,11 +185,6 @@ public class BDJLoader { ++ throw new InvalidObjectException("bdjo not loaded"); ++ AppEntry[] appTable = bdjo.getAppTable(); ++ ++- // initialize AppCaches ++- if (vfsCache != null) { ++- vfsCache.add(bdjo.getAppCaches()); ++- } ++- ++ // reuse appProxys ++ BDJAppProxy[] proxys = new BDJAppProxy[appTable.length]; ++ AppsDatabase db = AppsDatabase.getAppsDatabase(); ++@@ -147,6 +193,15 @@ public class BDJLoader { ++ AppID id = (AppID)ids.nextElement(); ++ BDJAppProxy proxy = (BDJAppProxy)db.getAppProxy(id); ++ AppEntry entry = (AppEntry)db.getAppAttributes(id); +++ if (proxy == null) { +++ logger.error("AppsDatabase corrupted!"); +++ continue; +++ } +++ if (entry == null) { +++ logger.error("AppsDatabase corrupted!"); +++ proxy.release(); +++ continue; +++ } ++ for (int i = 0; i < appTable.length; i++) { ++ if (id.equals(appTable[i].getIdentifier()) && ++ entry.getInitialClass().equals(appTable[i].getInitialClass())) { ++@@ -155,7 +210,6 @@ public class BDJLoader { ++ proxy.stop(true); ++ } else { ++ logger.info("Keeping xlet " + appTable[i].getInitialClass()); ++- proxy.getXletContext().update(appTable[i], bdjo.getAppCaches()); ++ proxys[i] = proxy; ++ proxy = null; ++ } ++@@ -180,6 +234,11 @@ public class BDJLoader { ++ Libbluray.setUOMask(terminfo.getMenuCallMask(), terminfo.getTitleSearchMask()); ++ Libbluray.setKeyInterest(bdjo.getKeyInterestTable()); ++ +++ // initialize AppCaches +++ if (vfsCache != null) { +++ vfsCache.add(bdjo.getAppCaches()); +++ } +++ ++ // initialize appProxys ++ for (int i = 0; i < appTable.length; i++) { ++ if (proxys[i] == null) { ++@@ -196,6 +255,7 @@ public class BDJLoader { ++ } ++ logger.info("Loaded class: " + appTable[i].getInitialClass() + p + " from " + appTable[i].getBasePath() + ".jar"); ++ } else { +++ proxys[i].getXletContext().update(appTable[i], bdjo.getAppCaches()); ++ logger.info("Reused class: " + appTable[i].getInitialClass() + " from " + appTable[i].getBasePath() + ".jar"); ++ } ++ } ++@@ -206,6 +266,19 @@ public class BDJLoader { ++ // notify AppsDatabase ++ ((BDJAppsDatabase)BDJAppsDatabase.getAppsDatabase()).newDatabase(bdjo, proxys); ++ +++ // auto start playlist +++ try { +++ PlayListTable plt = bdjo.getAccessiblePlaylists(); +++ if ((plt != null) && (plt.isAutostartFirst())) { +++ logger.info("Auto-starting playlist"); +++ String[] pl = plt.getPlayLists(); +++ if (pl.length > 0) +++ Manager.createPlayer(new MediaLocator(new BDLocator("bd://PLAYLIST:" + pl[0]))).start(); +++ } +++ } catch (Exception e) { +++ logger.error("loadN(): autoplaylist failed: " + e + "\n" + Logger.dumpStack(e)); +++ } +++ ++ // now run all the xlets ++ for (int i = 0; i < appTable.length; i++) { ++ int code = appTable[i].getControlCode(); ++@@ -222,15 +295,6 @@ public class BDJLoader { ++ ++ logger.info("Finished initializing and starting xlets."); ++ ++- // auto start playlist ++- PlayListTable plt = bdjo.getAccessiblePlaylists(); ++- if ((plt != null) && (plt.isAutostartFirst())) { ++- logger.info("Auto-starting playlist"); ++- String[] pl = plt.getPlayLists(); ++- if (pl.length > 0) ++- Manager.createPlayer(new MediaLocator(new BDLocator("bd://PLAYLIST:" + pl[0]))).start(); ++- } ++- ++ return true; ++ ++ } catch (Throwable e) { ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJSecurityManager.java b/src/libbluray/bdj/java/org/videolan/BDJSecurityManager.java ++index 38f8ac5..8a337ee 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJSecurityManager.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJSecurityManager.java ++@@ -76,6 +76,14 @@ final class BDJSecurityManager extends SecurityManager { ++ } ++ deny(perm); ++ } +++ +++ // work around bug in openjdk 7 / 8 +++ // sun.awt.AWTAutoShutdown.notifyThreadBusy is missing doPrivileged() +++ // (fixed in jdk9 / http://hg.openjdk.java.net/jdk9/client/jdk/rev/5b613a3c04be ) +++ if (classDepth("sun.awt.AWTAutoShutdown") > 0) { +++ return; +++ } +++ ++ if (perm.implies(new RuntimePermission("modifyThreadGroup"))) { ++ /* do check here (no need to log failures) */ ++ super.checkPermission(perm); ++@@ -119,6 +127,10 @@ final class BDJSecurityManager extends SecurityManager { ++ return; ++ } ++ } +++ if (perm.getActions().contains("write")) { +++ /* write permissions are handled in checkWrite() */ +++ deny(perm); +++ } ++ } ++ ++ /* Networking */ ++@@ -180,6 +192,10 @@ final class BDJSecurityManager extends SecurityManager { ++ throw new SecurityException("exit denied"); ++ } ++ +++ public void checkSystemClipboardAccess() { +++ throw new SecurityException("clipboard access denied"); +++ } +++ ++ /* ++ * file read access ++ */ ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJThreadGroup.java b/src/libbluray/bdj/java/org/videolan/BDJThreadGroup.java ++index f4bc1dc..4943a7e 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJThreadGroup.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJThreadGroup.java ++@@ -20,9 +20,9 @@ ++ ++ package org.videolan; ++ ++-public class BDJThreadGroup extends ThreadGroup { +++class BDJThreadGroup extends ThreadGroup { ++ ++- public BDJThreadGroup(String name, BDJXletContext context) { +++ protected BDJThreadGroup(String name, BDJXletContext context) { ++ super(name); ++ this.context = context; ++ } ++@@ -45,15 +45,11 @@ public class BDJThreadGroup extends ThreadGroup { ++ } ++ } ++ ++- public BDJXletContext getContext() { +++ protected BDJXletContext getContext() { ++ return context; ++ } ++ ++- public void setContext(BDJXletContext context) { ++- this.context = context; ++- } ++- ++- public boolean waitForShutdown(int timeout, int maxThreads) { +++ protected boolean waitForShutdown(int timeout, int maxThreads) { ++ ++ if (parentOf(Thread.currentThread().getThreadGroup()) && maxThreads < 1) { ++ logger.error("Current Thread is contained within ThreadGroup to be disposed."); ++@@ -94,8 +90,6 @@ public class BDJThreadGroup extends ThreadGroup { ++ } catch (IllegalThreadStateException e) { ++ logger.error("ThreadGroup destroy failed: " + e); ++ } ++- ++- context = null; ++ } ++ ++ public void dumpThreads() { ++@@ -115,6 +109,6 @@ public class BDJThreadGroup extends ThreadGroup { ++ } ++ } ++ ++- private BDJXletContext context; +++ private final BDJXletContext context; ++ private static final Logger logger = Logger.getLogger(BDJThreadGroup.class.getName()); ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJUtil.java b/src/libbluray/bdj/java/org/videolan/BDJUtil.java ++index 507c2e7..cc17992 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJUtil.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJUtil.java ++@@ -25,20 +25,31 @@ public class BDJUtil { ++ /** ++ * Make a five digit zero padded string based on an integer ++ * Ex. integer 1 -> string "00001" ++- * +++ * ++ * @param id ++ * @return ++ */ ++- public static String makeFiveDigitStr(int id) +++ public static String makeFiveDigitStr(int id) ++ { +++ if (id < 0 || id > 99999) { +++ System.err.println("Invalid ID: " + id); +++ throw new IllegalArgumentException("Invalid ID " + id); +++ } +++ String s = "" + id; +++ while (s.length() < 5) { +++ s = "0" + s; +++ } +++ return s; +++ /* ++ DecimalFormat fmt = new DecimalFormat(); ++ fmt.setMaximumIntegerDigits(5); ++ fmt.setMinimumIntegerDigits(5); ++ fmt.setGroupingUsed(false); ++- +++ ++ return fmt.format(id); +++ */ ++ } ++- +++ ++ /** ++ * Make a path based on the disc root to an absolute path based on the filesystem of the computer ++ * Ex. /BDMV/JAR/00000.jar -> /bluray/disc/mount/point/BDMV/JAR/00000.jar ++@@ -47,6 +58,11 @@ public class BDJUtil { ++ */ ++ public static String discRootToFilesystem(String path) ++ { ++- return System.getProperty("bluray.vfs.root") + path; +++ String vfsRoot = System.getProperty("bluray.vfs.root"); +++ if (vfsRoot == null) { +++ System.err.println("discRootToFilesystem(): disc root not set !"); +++ return path; +++ } +++ return vfsRoot + path; ++ } ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJXletContext.java b/src/libbluray/bdj/java/org/videolan/BDJXletContext.java ++index ae5b3a0..8ee818a 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJXletContext.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJXletContext.java ++@@ -70,7 +70,12 @@ public class BDJXletContext implements javax.tv.xlet.XletContext, javax.microedi ++ try { ++ int homeJarID = Integer.parseInt(home); ++ long time = System.currentTimeMillis(); ++- homeMountPoint = MountManager.mount(homeJarID, false) + java.io.File.separator; +++ homeMountPoint = MountManager.mount(homeJarID, false); +++ if (homeMountPoint == null) { +++ logger.error("Failed mounting " + home + ".jar"); +++ } else { +++ homeMountPoint = homeMountPoint + java.io.File.separator; +++ } ++ time = System.currentTimeMillis() - time; ++ logger.info("Mounted Xlet home directory from " + home + ".jar " + ++ "to " + homeMountPoint + "(" + time + "ms)"); ++@@ -80,6 +85,8 @@ public class BDJXletContext implements javax.tv.xlet.XletContext, javax.microedi ++ } ++ ++ public String getXletHome() { +++ if (homeMountPoint == null) +++ logger.error("Home directory not mounted!"); ++ return homeMountPoint; ++ } ++ ++@@ -102,6 +109,8 @@ public class BDJXletContext implements javax.tv.xlet.XletContext, javax.microedi ++ return Integer.toHexString(appid.getAID()); ++ else if (key.equals("org.dvb.application.appid")) ++ return appid; +++ +++ logger.error("unhandled getXletProperty(" + key + ")"); ++ return null; ++ } ++ ++diff --git a/src/libbluray/bdj/java/org/videolan/IxcRegistryImpl.java b/src/libbluray/bdj/java/org/videolan/IxcRegistryImpl.java ++index a9fe28d..bae986f 100644 ++--- a/src/libbluray/bdj/java/org/videolan/IxcRegistryImpl.java +++++ b/src/libbluray/bdj/java/org/videolan/IxcRegistryImpl.java ++@@ -260,7 +260,7 @@ public class IxcRegistryImpl { ++ return result; ++ } ++ ++- public class RemoteMethod implements Runnable +++ private class RemoteMethod implements Runnable ++ { ++ final BDJXletContext calleeContext; ++ final BDJXletContext callerContext; ++@@ -276,9 +276,11 @@ public class IxcRegistryImpl { ++ callerContext = BDJXletContext.getCurrentContext(); ++ if (callerContext == null) { ++ logger.error("caller context is null"); +++ throw new RemoteException("no caller context"); ++ } ++ if (context == null) { ++ logger.error("callee context is null"); +++ throw new RemoteException("no callee context"); ++ } ++ calleeContext = context; ++ ++@@ -426,6 +428,18 @@ public class IxcRegistryImpl { ++ throw new IllegalArgumentException("xc not current BDJXletContext"); ++ } ++ +++ if ("/7fff7669/4050/Messenger".equals(path)) { +++ /* known discs: +++ - Terminator Salvation +++ */ +++ try { +++ logger.error("Enabling Ixc delay hack for " + path); +++ Thread.sleep(200L); +++ } catch (InterruptedException ie) { +++ ie.printStackTrace(); +++ } +++ } +++ ++ WrappedRemoteObj wrappedObj = null; ++ synchronized (remoteObjects) { ++ if (!remoteObjects.containsKey(path)) { ++@@ -438,7 +452,7 @@ public class IxcRegistryImpl { ++ } ++ Object remoteObj = wrapOrCopy(wrappedObj, wrappedObj.context, (BDJXletContext)xc); ++ ++- Debug("IxcRegistry.lookup(" + path + ") => " + remoteObj); +++ Debug("IxcRegistry.lookup(" + path + ") => OK"); ++ ++ return (Remote)remoteObj; ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/Libbluray.java b/src/libbluray/bdj/java/org/videolan/Libbluray.java ++index 41af18f..6a97ad7 100644 ++--- a/src/libbluray/bdj/java/org/videolan/Libbluray.java +++++ b/src/libbluray/bdj/java/org/videolan/Libbluray.java ++@@ -24,6 +24,8 @@ import java.awt.BDFontMetrics; ++ import java.awt.BDToolkit; ++ import java.awt.event.KeyEvent; ++ import java.io.File; +++import java.util.HashMap; +++import java.util.Map; ++ import java.util.Vector; ++ ++ import javax.media.PackageManager; ++@@ -50,14 +52,7 @@ public class Libbluray { ++ ++ /* hook system properties: make "user.dir" point to current Xlet home directory */ ++ ++- private static boolean propertiesHooked = false; ++- ++ private static void hookProperties() { ++- if (propertiesHooked) { ++- return; ++- } ++- propertiesHooked = true; ++- ++ java.util.Properties p = new java.util.Properties(System.getProperties()) { ++ public String getProperty(String key) { ++ if (key.equals("user.dir")) { ++@@ -65,6 +60,7 @@ public class Libbluray { ++ if (ctx != null) { ++ return ctx.getXletHome(); ++ } +++ System.err.println("getProperty(user.dir): no context ! " + Logger.dumpStack()); ++ } ++ return super.getProperty(key); ++ } ++@@ -72,6 +68,28 @@ public class Libbluray { ++ System.setProperties(p); ++ } ++ +++ private static boolean initOnce = false; +++ private static void initOnce() { +++ if (initOnce) { +++ return; +++ } +++ initOnce = true; +++ +++ /* hook system properties (provide Xlet-specific user.dir) */ +++ try { +++ hookProperties(); +++ } catch (Throwable t) { +++ System.err.println("hookProperties() failed: " + t); +++ } +++ +++ /* hook sockets (limit network connections) */ +++ try { +++ BDJSocketFactory.init(); +++ } catch (Throwable t) { +++ System.err.println("Hooking socket factory failed: " + t + "\n" + Logger.dumpStack(t)); +++ } +++ } +++ ++ private static String canonicalize(String path, boolean create) { ++ try { ++ File dir = new File(path); ++@@ -89,7 +107,7 @@ public class Libbluray { ++ private static void init(long nativePointer, String discID, String discRoot, ++ String persistentRoot, String budaRoot) { ++ ++- hookProperties(); +++ initOnce(); ++ ++ /* set up directories */ ++ persistentRoot = canonicalize(persistentRoot, true); ++@@ -185,8 +203,6 @@ public class Libbluray { ++ ++ System.setProperty("bluray.network.connected", "YES"); ++ ++- BDJSocketFactory.init(); ++- ++ try { ++ System.setSecurityManager(new BDJSecurityManager(discRoot, persistentRoot, budaRoot)); ++ } catch (Exception ex) { ++@@ -228,6 +244,7 @@ public class Libbluray { ++ } ++ nativePointer = 0; ++ titleInfos = null; +++ bdjoFiles = null; ++ } ++ ++ /* ++@@ -296,6 +313,10 @@ public class Libbluray { ++ * Disc data ++ */ ++ +++ /* cache parsed .bdjo files */ +++ private static Map bdjoFiles = null; +++ private static Object bdjoFilesLock = new Object(); +++ ++ public static byte[] getAacsData(int type) { ++ return getAacsDataN(nativePointer, type); ++ } ++@@ -305,7 +326,23 @@ public class Libbluray { ++ } ++ ++ public static Bdjo getBdjo(String name) { ++- return getBdjoN(nativePointer, name + ".bdjo"); +++ Bdjo bdjo; +++ synchronized (bdjoFilesLock) { +++ if (bdjoFiles == null) { +++ bdjoFiles = new HashMap(); +++ } else { +++ bdjo = (Bdjo)bdjoFiles.get(name); +++ if (bdjo != null) { +++ return bdjo; +++ } +++ } +++ +++ bdjo = getBdjoN(nativePointer, name + ".bdjo"); +++ if (bdjo != null) { +++ bdjoFiles.put(name, bdjo); +++ } +++ return bdjo; +++ } ++ } ++ ++ public static String[] listBdFiles(String path, boolean onlyBdRom) { ++diff --git a/src/libbluray/bdj/java/org/videolan/MountManager.java b/src/libbluray/bdj/java/org/videolan/MountManager.java ++index 83d6870..6f6fd52 100644 ++--- a/src/libbluray/bdj/java/org/videolan/MountManager.java +++++ b/src/libbluray/bdj/java/org/videolan/MountManager.java ++@@ -185,6 +185,7 @@ public class MountManager { ++ new PrivilegedAction() { ++ public Object run() { ++ if (mountPoint.decRefCount() < 1) { +++ logger.error("Removing JAR " + id + " from mount cache"); ++ mountPoints.remove(id); ++ } ++ return null; ++@@ -221,7 +222,7 @@ public class MountManager { ++ if (mountPoint != null) { ++ return mountPoint.getMountPoint(); ++ } else { ++- logger.info("JAR " + jarId + " not mounted"); +++ logger.error("JAR " + jarId + " not mounted"); ++ } ++ return null; ++ } ++@@ -247,6 +248,7 @@ public class MountManager { ++ if (dir != null) { ++ return dir.getAbsolutePath(); ++ } +++ logger.error("getMountPoint(): already unmounted !"); ++ return null; ++ } ++ ++@@ -274,8 +276,8 @@ public class MountManager { ++ return classFiles; ++ } ++ ++- public boolean setClassFiles() { ++- return classFiles == true; +++ public void setClassFiles() { +++ classFiles = true; ++ } ++ ++ private File dir; ++diff --git a/src/libbluray/bdj/java/org/videolan/TitleInfo.java b/src/libbluray/bdj/java/org/videolan/TitleInfo.java ++index 1c1075b..10dc62a 100644 ++--- a/src/libbluray/bdj/java/org/videolan/TitleInfo.java +++++ b/src/libbluray/bdj/java/org/videolan/TitleInfo.java ++@@ -24,7 +24,7 @@ public class TitleInfo { ++ this.objType = objType; ++ this.playbackType = playbackType; ++ if (objType == OBJ_TYPE_BDJ) ++- this.bdjoName = (new java.text.DecimalFormat("00000")).format(idRef); +++ this.bdjoName = (BDJUtil.makeFiveDigitStr(idRef)); ++ else ++ this.hdmvOID = idRef; ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/VFSCache.java b/src/libbluray/bdj/java/org/videolan/VFSCache.java ++index 2bcfbe9..22fe1f0 100644 ++--- a/src/libbluray/bdj/java/org/videolan/VFSCache.java +++++ b/src/libbluray/bdj/java/org/videolan/VFSCache.java ++@@ -270,16 +270,19 @@ class VFSCache { ++ accessFileSynced(absPath); ++ } ++ ++- protected synchronized void accessFileSynced(String absPath) { +++ private synchronized void accessFileSynced(String absPath) { ++ ++ if (inAccessFile) { ++ /* avoid recursion from SecurityManager checks */ ++ return; ++ } ++ ++- inAccessFile = true; ++- accessFileImp(absPath); ++- inAccessFile = false; +++ try { +++ inAccessFile = true; +++ accessFileImp(absPath); +++ } finally { +++ inAccessFile = false; +++ } ++ } ++ ++ private void accessFileImp(String absPath) { ++@@ -297,7 +300,7 @@ class VFSCache { ++ } ++ ++ /* do not cache .m2ts streams */ ++- if (relPath.startsWith("BDMV" + File.separator + "STREAM" + File.separator)) { +++ if (relPath.startsWith(streamDir)) { ++ return; ++ } ++ ++@@ -352,6 +355,7 @@ class VFSCache { ++ ++ private static final String jarDir = "BDMV" + File.separator + "JAR" + File.separator; ++ private static final String fontDir = "BDMV" + File.separator + "AUXDATA" + File.separator; +++ private static final String streamDir = "BDMV" + File.separator + "STREAM" + File.separator; ++ ++ private static final Logger logger = Logger.getLogger(VFSCache.class.getName()); ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/media/content/BDHandler.java b/src/libbluray/bdj/java/org/videolan/media/content/BDHandler.java ++index 3d43579..92269f1 100644 ++--- a/src/libbluray/bdj/java/org/videolan/media/content/BDHandler.java +++++ b/src/libbluray/bdj/java/org/videolan/media/content/BDHandler.java ++@@ -67,10 +67,13 @@ public abstract class BDHandler implements Player, ServiceContentHandler { ++ ++ public BDHandler() { ++ ownerContext = BDJXletContext.getCurrentContext(); ++- ++- PlayerAction action = new PlayerAction(this, PlayerAction.ACTION_INIT, null); ++- BDJActionManager.getInstance().putCommand(action); ++- action.waitEnd(); +++ if (ownerContext == null) { +++ doInitAction(); +++ } else { +++ PlayerAction action = new PlayerAction(this, PlayerAction.ACTION_INIT, null); +++ BDJActionManager.getInstance().putCommand(action); +++ action.waitEnd(); +++ } ++ } ++ ++ private void doInitAction() { ++diff --git a/src/libbluray/bdj/java/org/videolan/media/content/PlayerManager.java b/src/libbluray/bdj/java/org/videolan/media/content/PlayerManager.java ++index cc06e84..d45358b 100644 ++--- a/src/libbluray/bdj/java/org/videolan/media/content/PlayerManager.java +++++ b/src/libbluray/bdj/java/org/videolan/media/content/PlayerManager.java ++@@ -19,6 +19,7 @@ ++ package org.videolan.media.content; ++ ++ import java.util.ArrayList; +++import org.videolan.Logger; ++ ++ public class PlayerManager { ++ ++@@ -81,7 +82,7 @@ public class PlayerManager { ++ return; ++ } ++ ++- System.err.println("unknown player type: " + player.getClass().getName()); +++ logger.error("unknown player type: " + player.getClass().getName()); ++ } ++ ++ protected boolean allocateResource(BDHandler player) { ++@@ -91,6 +92,9 @@ public class PlayerManager { ++ } ++ synchronized (playlistPlayerLock) { ++ if (playlistPlayer != null && player != playlistPlayer) { +++ +++ logger.info("allocateResource(): Stopping old playlist player"); +++ ++ playlistPlayer.stop(); ++ playlistPlayer.deallocate(); ++ } ++@@ -108,7 +112,7 @@ public class PlayerManager { ++ return true; ++ } ++ ++- System.err.println("unknown player type: " + player.getClass().getName()); +++ logger.error("allocateResource(): unknown player type: " + player.getClass().getName()); ++ return false; ++ } ++ ++@@ -153,4 +157,6 @@ public class PlayerManager { ++ } ++ } ++ } +++ +++ private static final Logger logger = Logger.getLogger(PlayerManager.class.getName()); ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/media/content/playlist/BackgroundVideoPresentationControlImpl.java b/src/libbluray/bdj/java/org/videolan/media/content/playlist/BackgroundVideoPresentationControlImpl.java ++index 05ae554..21f6de5 100644 ++--- a/src/libbluray/bdj/java/org/videolan/media/content/playlist/BackgroundVideoPresentationControlImpl.java +++++ b/src/libbluray/bdj/java/org/videolan/media/content/playlist/BackgroundVideoPresentationControlImpl.java ++@@ -53,6 +53,8 @@ public class BackgroundVideoPresentationControlImpl extends VideoControl ++ } ++ ++ public boolean setVideoTransformation(VideoTransformation transform) { +++ if (transform == null) +++ return false; ++ setClipRegion(transform.getClipRegion()); ++ HScreenPoint pos = transform.getVideoPosition(); ++ float[] scales = transform.getScalingFactors(); ++@@ -99,40 +101,40 @@ public class BackgroundVideoPresentationControlImpl extends VideoControl ++ return new AWTVideoSize( ++ new Rectangle(vd.width, vd.height), ++ new Rectangle(sd.width, sd.height)); ++- } +++ } ++ ++- public Dimension getSourceVideoSize() { ++- return getVideoSize(); ++- } +++ public Dimension getSourceVideoSize() { +++ return getVideoSize(); +++ } ++ ++- public boolean setSize(AWTVideoSize size) { ++- setClipRegion(size.getSource()); ++- setVideoArea(getNormalizedRectangle(getScreenSize(), size.getDestination())); ++- return true; ++- } +++ public boolean setSize(AWTVideoSize size) { +++ setClipRegion(size.getSource()); +++ setVideoArea(getNormalizedRectangle(getScreenSize(), size.getDestination())); +++ return true; +++ } ++ ++- public AWTVideoSize checkSize(AWTVideoSize size) { ++- Dimension vd = getInputVideoSize(); ++- Rectangle sr = size.getSource(); ++- if (sr.x < 0) +++ public AWTVideoSize checkSize(AWTVideoSize size) { +++ Dimension vd = getInputVideoSize(); +++ Rectangle sr = size.getSource(); +++ if (sr.x < 0) +++ sr.x = 0; +++ if ((sr.x + sr.width) > vd.width) { +++ sr.width = vd.width - sr.x; +++ if (sr.width <= 0) { ++ sr.x = 0; ++- if ((sr.x + sr.width) > vd.width) { ++- sr.width = vd.width - sr.x; ++- if (sr.width <= 0) { ++- sr.x = 0; ++- sr.width = 0; ++- } +++ sr.width = 0; ++ } ++- if (sr.y < 0) +++ } +++ if (sr.y < 0) +++ sr.y = 0; +++ if ((sr.y + sr.height) > vd.height) { +++ sr.height = vd.height - sr.y; +++ if (sr.height <= 0) { ++ sr.y = 0; ++- if ((sr.y + sr.height) > vd.height) { ++- sr.height = vd.height - sr.y; ++- if (sr.height <= 0) { ++- sr.y = 0; ++- sr.height = 0; ++- } +++ sr.height = 0; ++ } ++- Rectangle dr = size.getDestination(); ++- return new AWTVideoSize(sr, dr); ++ } +++ Rectangle dr = size.getDestination(); +++ return new AWTVideoSize(sr, dr); +++ } ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/media/content/playlist/Handler.java b/src/libbluray/bdj/java/org/videolan/media/content/playlist/Handler.java ++index 7e52949..8728628 100644 ++--- a/src/libbluray/bdj/java/org/videolan/media/content/playlist/Handler.java +++++ b/src/libbluray/bdj/java/org/videolan/media/content/playlist/Handler.java ++@@ -72,6 +72,7 @@ public class Handler extends BDHandler { ++ synchronized (this) { ++ try { ++ locator = new BDLocator(source.getLocator().toExternalForm()); +++ currentLocator = null; ++ } catch (org.davic.net.InvalidLocatorException e) { ++ throw new IncompatibleSourceException(); ++ } ++@@ -294,7 +295,11 @@ public class Handler extends BDHandler { ++ ++ protected void doEndOfMediaReached(int playlist) { ++ synchronized (this) { ++- if (locator == null || locator.getPlayListId() != playlist) { +++ if (locator == null) { +++ System.err.println("endOfMedia(" + playlist + ") ignored: no current locator"); +++ return; +++ } +++ if (locator.getPlayListId() != playlist) { ++ System.err.println("endOfMedia ignored: playlist does not match (" + playlist + " != " + locator.getPlayListId()); ++ return; ++ } ++@@ -336,6 +341,7 @@ public class Handler extends BDHandler { ++ if (pi == null) ++ throw new InvalidPlayListException(); ++ this.locator = locator; +++ this.currentLocator = null; ++ baseMediaTime = 0; ++ if (state == Prefetched) ++ doPrefetch(); ++diff --git a/src/libbluray/bdj/java/org/videolan/media/content/video/dvb/mpeg/drip/BackgroundVideoPresentationControlImpl.java b/src/libbluray/bdj/java/org/videolan/media/content/video/dvb/mpeg/drip/BackgroundVideoPresentationControlImpl.java ++index 3596def..377aacc 100644 ++--- a/src/libbluray/bdj/java/org/videolan/media/content/video/dvb/mpeg/drip/BackgroundVideoPresentationControlImpl.java +++++ b/src/libbluray/bdj/java/org/videolan/media/content/video/dvb/mpeg/drip/BackgroundVideoPresentationControlImpl.java ++@@ -100,23 +100,23 @@ public class BackgroundVideoPresentationControlImpl extends VideoControl ++ Rectangle sr = size.getSource(); ++ if (sr.x < 0) ++ sr.x = 0; ++- if ((sr.x + sr.width) > vd.width) { ++- sr.width = vd.width - sr.x; ++- if (sr.width <= 0) { ++- sr.x = 0; ++- sr.width = 0; ++- } +++ if ((sr.x + sr.width) > vd.width) { +++ sr.width = vd.width - sr.x; +++ if (sr.width <= 0) { +++ sr.x = 0; +++ sr.width = 0; ++ } ++- if (sr.y < 0) +++ } +++ if (sr.y < 0) +++ sr.y = 0; +++ if ((sr.y + sr.height) > vd.height) { +++ sr.height = vd.height - sr.y; +++ if (sr.height <= 0) { ++ sr.y = 0; ++- if ((sr.y + sr.height) > vd.height) { ++- sr.height = vd.height - sr.y; ++- if (sr.height <= 0) { ++- sr.y = 0; ++- sr.height = 0; ++- } +++ sr.height = 0; ++ } ++- Rectangle dr = size.getDestination(); ++- return new AWTVideoSize(sr, dr); +++ } +++ Rectangle dr = size.getDestination(); +++ return new AWTVideoSize(sr, dr); ++ } ++ } ++diff --git a/src/libbluray/bdj/native/java_awt_BDFontMetrics.c b/src/libbluray/bdj/native/java_awt_BDFontMetrics.c ++index 3bbd3c3..f84a382 100644 ++--- a/src/libbluray/bdj/native/java_awt_BDFontMetrics.c +++++ b/src/libbluray/bdj/native/java_awt_BDFontMetrics.c ++@@ -135,7 +135,10 @@ static char *_win32_resolve_font(const char *family, int style) ++ ++ memset(&lf, 0, sizeof(lf)); ++ lf.lfCharSet = DEFAULT_CHARSET; ++- MultiByteToWideChar(CP_UTF8, 0, family, -1, lf.lfFaceName, sizeof(lf.lfFaceName)); +++ int length = MultiByteToWideChar(CP_UTF8, 0, family, -1, lf.lfFaceName, LF_FACESIZE); +++ if (!length) { +++ return NULL; +++ } ++ ++ hDC = GetDC(NULL); ++ EnumFontFamiliesExW(hDC, &lf, (FONTENUMPROCW)&EnumFontCallbackW, (LPARAM)&data, 0); ++diff --git a/src/libbluray/bdnav/clpi_parse.c b/src/libbluray/bdnav/clpi_parse.c ++index 365ec0f..f0826de 100644 ++--- a/src/libbluray/bdnav/clpi_parse.c +++++ b/src/libbluray/bdnav/clpi_parse.c ++@@ -39,6 +39,7 @@ ++ #define CLPI_SIG1 ('H' << 24 | 'D' << 16 | 'M' << 8 | 'V') ++ #define CLPI_SIG2A ('0' << 24 | '2' << 16 | '0' << 8 | '0') ++ #define CLPI_SIG2B ('0' << 24 | '1' << 16 | '0' << 8 | '0') +++#define CLPI_SIG2C ('0' << 24 | '2' << 16 | '4' << 8 | '0') ++ ++ static void ++ _human_readable_sig(char *sig, uint32_t s1, uint32_t s2) ++@@ -129,7 +130,8 @@ _parse_header(BITSTREAM *bits, CLPI_CL *cl) ++ cl->type_indicator2 = bs_read(bits, 32); ++ if (cl->type_indicator != CLPI_SIG1 || ++ (cl->type_indicator2 != CLPI_SIG2A && ++- cl->type_indicator2 != CLPI_SIG2B)) { +++ cl->type_indicator2 != CLPI_SIG2B && +++ cl->type_indicator2 != CLPI_SIG2C)) { ++ ++ char sig[9]; ++ char expect[9]; ++@@ -223,7 +225,7 @@ _parse_sequence(BITSTREAM *bits, CLPI_CL *cl) ++ cl->sequence.num_atc_seq = bs_read(bits, 8); ++ ++ CLPI_ATC_SEQ *atc_seq; ++- atc_seq = malloc(cl->sequence.num_atc_seq * sizeof(CLPI_ATC_SEQ)); +++ atc_seq = calloc(cl->sequence.num_atc_seq, sizeof(CLPI_ATC_SEQ)); ++ cl->sequence.atc_seq = atc_seq; ++ for (ii = 0; ii < cl->sequence.num_atc_seq; ii++) { ++ atc_seq[ii].spn_atc_start = bs_read(bits, 32); ++@@ -254,7 +256,7 @@ _parse_program(BITSTREAM *bits, CLPI_PROG_INFO *program) ++ program->num_prog = bs_read(bits, 8); ++ ++ CLPI_PROG *progs; ++- progs = malloc(program->num_prog * sizeof(CLPI_PROG)); +++ progs = calloc(program->num_prog, sizeof(CLPI_PROG)); ++ program->progs = progs; ++ for (ii = 0; ii < program->num_prog; ii++) { ++ progs[ii].spn_program_sequence_start = bs_read(bits, 32); ++@@ -263,7 +265,7 @@ _parse_program(BITSTREAM *bits, CLPI_PROG_INFO *program) ++ progs[ii].num_groups = bs_read(bits, 8); ++ ++ CLPI_PROG_STREAM *ps; ++- ps = malloc(progs[ii].num_streams * sizeof(CLPI_PROG_STREAM)); +++ ps = calloc(progs[ii].num_streams, sizeof(CLPI_PROG_STREAM)); ++ progs[ii].streams = ps; ++ for (jj = 0; jj < progs[ii].num_streams; jj++) { ++ ps[jj].pid = bs_read(bits, 16); ++@@ -335,7 +337,7 @@ _parse_cpi(BITSTREAM *bits, CLPI_CPI *cpi) ++ cpi->num_stream_pid = bs_read(bits, 8); ++ ++ CLPI_EP_MAP_ENTRY *entry; ++- entry = malloc(cpi->num_stream_pid * sizeof(CLPI_EP_MAP_ENTRY)); +++ entry = calloc(cpi->num_stream_pid, sizeof(CLPI_EP_MAP_ENTRY)); ++ cpi->entry = entry; ++ for (ii = 0; ii < cpi->num_stream_pid; ii++) { ++ entry[ii].pid = bs_read(bits, 16); ++@@ -622,12 +624,12 @@ _clean_program(CLPI_PROG_INFO *p) ++ { ++ int ii; ++ ++- for (ii = 0; ii < p->num_prog; ii++) { ++- if (p->progs[ii].streams != NULL) { +++ if (p && p->progs) { +++ for (ii = 0; ii < p->num_prog; ii++) { ++ X_FREE(p->progs[ii].streams); ++ } +++ X_FREE(p->progs); ++ } ++- X_FREE(p->progs); ++ } ++ ++ static void ++@@ -635,15 +637,13 @@ _clean_cpi(CLPI_CPI *cpi) ++ { ++ int ii; ++ ++- for (ii = 0; ii < cpi->num_stream_pid; ii++) { ++- if (cpi->entry[ii].coarse != NULL) { +++ if (cpi && cpi->entry) { +++ for (ii = 0; ii < cpi->num_stream_pid; ii++) { ++ X_FREE(cpi->entry[ii].coarse); ++- } ++- if (cpi->entry[ii].fine != NULL) { ++ X_FREE(cpi->entry[ii].fine); ++ } +++ X_FREE(cpi->entry); ++ } ++- X_FREE(cpi->entry); ++ } ++ ++ void ++@@ -654,15 +654,12 @@ clpi_free(CLPI_CL *cl) ++ if (cl == NULL) { ++ return; ++ } ++- if (cl->clip.atc_delta != NULL) { ++- X_FREE(cl->clip.atc_delta); ++- } ++- for (ii = 0; ii < cl->sequence.num_atc_seq; ii++) { ++- if (cl->sequence.atc_seq[ii].stc_seq != NULL) { +++ X_FREE(cl->clip.atc_delta); +++ if (cl->sequence.atc_seq) { +++ for (ii = 0; ii < cl->sequence.num_atc_seq; ii++) { ++ X_FREE(cl->sequence.atc_seq[ii].stc_seq); ++ } ++- } ++- if (cl->sequence.atc_seq != NULL) { +++ ++ X_FREE(cl->sequence.atc_seq); ++ } ++ ++@@ -796,7 +793,7 @@ clpi_copy(const CLPI_CL* src_cl) ++ } ++ ++ dest_cl->sequence.num_atc_seq = src_cl->sequence.num_atc_seq; ++- dest_cl->sequence.atc_seq = malloc(src_cl->sequence.num_atc_seq * sizeof(CLPI_ATC_SEQ)); +++ dest_cl->sequence.atc_seq = calloc(src_cl->sequence.num_atc_seq, sizeof(CLPI_ATC_SEQ)); ++ for (ii = 0; ii < src_cl->sequence.num_atc_seq; ii++) { ++ dest_cl->sequence.atc_seq[ii].spn_atc_start = src_cl->sequence.atc_seq[ii].spn_atc_start; ++ dest_cl->sequence.atc_seq[ii].offset_stc_id = src_cl->sequence.atc_seq[ii].offset_stc_id; ++@@ -811,7 +808,7 @@ clpi_copy(const CLPI_CL* src_cl) ++ } ++ ++ dest_cl->program.num_prog = src_cl->program.num_prog; ++- dest_cl->program.progs = malloc(src_cl->program.num_prog * sizeof(CLPI_PROG)); +++ dest_cl->program.progs = calloc(src_cl->program.num_prog, sizeof(CLPI_PROG)); ++ for (ii = 0; ii < src_cl->program.num_prog; ii++) { ++ dest_cl->program.progs[ii].spn_program_sequence_start = src_cl->program.progs[ii].spn_program_sequence_start; ++ dest_cl->program.progs[ii].program_map_pid = src_cl->program.progs[ii].program_map_pid; ++@@ -831,7 +828,7 @@ clpi_copy(const CLPI_CL* src_cl) ++ } ++ ++ dest_cl->cpi.num_stream_pid = src_cl->cpi.num_stream_pid; ++- dest_cl->cpi.entry = malloc(src_cl->cpi.num_stream_pid * sizeof(CLPI_EP_MAP_ENTRY)); +++ dest_cl->cpi.entry = calloc(src_cl->cpi.num_stream_pid, sizeof(CLPI_EP_MAP_ENTRY)); ++ for (ii = 0; ii < dest_cl->cpi.num_stream_pid; ii++) { ++ dest_cl->cpi.entry[ii].pid = src_cl->cpi.entry[ii].pid; ++ dest_cl->cpi.entry[ii].ep_stream_type = src_cl->cpi.entry[ii].ep_stream_type; ++diff --git a/src/libbluray/bdnav/index_parse.c b/src/libbluray/bdnav/index_parse.c ++index 6c07ba1..64dc5e3 100644 ++--- a/src/libbluray/bdnav/index_parse.c +++++ b/src/libbluray/bdnav/index_parse.c ++@@ -103,8 +103,16 @@ static int _parse_index(BITSTREAM *bs, INDX_ROOT *index) ++ } ++ ++ index->num_titles = bs_read(bs, 16); +++ if (!index->num_titles) { +++ BD_DEBUG(DBG_CRIT, "empty index\n"); +++ return 0; +++ } ++ ++ index->titles = calloc(index->num_titles, sizeof(INDX_TITLE)); +++ if (!index->titles) { +++ BD_DEBUG(DBG_CRIT, "out of memory\n"); +++ return 0; +++ } ++ ++ for (i = 0; i < index->num_titles; i++) { ++ ++diff --git a/src/libbluray/bdnav/meta_parse.c b/src/libbluray/bdnav/meta_parse.c ++index 50b8c75..a9a7edc 100644 ++--- a/src/libbluray/bdnav/meta_parse.c +++++ b/src/libbluray/bdnav/meta_parse.c ++@@ -81,29 +81,35 @@ static void _parseManifestNode(xmlNode * a_node, META_DL *disclib) ++ } ++ else if (xmlStrEqual(cur_node->parent->name, BAD_CAST_CONST "tableOfContents")) { ++ if (xmlStrEqual(cur_node->name, BAD_CAST_CONST "titleName") && (tmp = xmlGetProp(cur_node, BAD_CAST_CONST "titleNumber"))) { ++- int i = disclib->toc_count; ++- disclib->toc_count++; ++- disclib->toc_entries = realloc(disclib->toc_entries, (disclib->toc_count*sizeof(META_TITLE))); ++- disclib->toc_entries[i].title_number = atoi((const char*)tmp); ++- disclib->toc_entries[i].title_name = (char*)xmlNodeGetContent(cur_node); +++ META_TITLE *new_entries = realloc(disclib->toc_entries, ((disclib->toc_count + 1)*sizeof(META_TITLE))); +++ if (new_entries) { +++ int i = disclib->toc_count; +++ disclib->toc_count++; +++ disclib->toc_entries = new_entries; +++ disclib->toc_entries[i].title_number = atoi((const char*)tmp); +++ disclib->toc_entries[i].title_name = (char*)xmlNodeGetContent(cur_node); +++ } ++ XML_FREE(tmp); ++ } ++ } ++ else if (xmlStrEqual(cur_node->parent->name, BAD_CAST_CONST "description")) { ++ if (xmlStrEqual(cur_node->name, BAD_CAST_CONST "thumbnail") && (tmp = xmlGetProp(cur_node, BAD_CAST_CONST "href"))) { ++- uint8_t i = disclib->thumb_count; ++- disclib->thumb_count++; ++- disclib->thumbnails = realloc(disclib->thumbnails, (disclib->thumb_count*sizeof(META_THUMBNAIL))); ++- disclib->thumbnails[i].path = (char *)tmp; ++- if ((tmp = xmlGetProp(cur_node, BAD_CAST_CONST "size"))) { ++- int x = 0, y = 0; ++- sscanf((const char*)tmp, "%ix%i", &x, &y); ++- disclib->thumbnails[i].xres = x; ++- disclib->thumbnails[i].yres = y; ++- XML_FREE(tmp); ++- } ++- else { ++- disclib->thumbnails[i].xres = disclib->thumbnails[i].yres = -1; +++ META_THUMBNAIL *new_thumbnails = realloc(disclib->thumbnails, ((disclib->thumb_count + 1)*sizeof(META_THUMBNAIL))); +++ if (new_thumbnails) { +++ uint8_t i = disclib->thumb_count; +++ disclib->thumb_count++; +++ disclib->thumbnails = new_thumbnails; +++ disclib->thumbnails[i].path = (char *)tmp; +++ if ((tmp = xmlGetProp(cur_node, BAD_CAST_CONST "size"))) { +++ int x = 0, y = 0; +++ sscanf((const char*)tmp, "%ix%i", &x, &y); +++ disclib->thumbnails[i].xres = x; +++ disclib->thumbnails[i].yres = y; +++ XML_FREE(tmp); +++ } +++ else { +++ disclib->thumbnails[i].xres = disclib->thumbnails[i].yres = -1; +++ } ++ } ++ } ++ } ++@@ -126,15 +132,18 @@ static void _findMetaXMLfiles(META_ROOT *meta, BD_DISC *disc) ++ if (ent.d_name[0] == '.') ++ continue; ++ else if (strncasecmp(ent.d_name, "bdmt_", 5) == 0) { ++- uint8_t i = meta->dl_count; ++- meta->dl_count++; ++- meta->dl_entries = realloc(meta->dl_entries, (meta->dl_count*sizeof(META_DL))); ++- memset(&meta->dl_entries[i], 0, sizeof(meta->dl_entries[i])); ++- ++- meta->dl_entries[i].filename = str_dup(ent.d_name); ++- strncpy(meta->dl_entries[i].language_code, ent.d_name+5,3); ++- meta->dl_entries[i].language_code[3] = '\0'; ++- str_tolower(meta->dl_entries[i].language_code); +++ META_DL *new_dl_entries = realloc(meta->dl_entries, ((meta->dl_count + 1)*sizeof(META_DL))); +++ if (new_dl_entries) { +++ uint8_t i = meta->dl_count; +++ meta->dl_count++; +++ meta->dl_entries = new_dl_entries; +++ memset(&meta->dl_entries[i], 0, sizeof(meta->dl_entries[i])); +++ +++ meta->dl_entries[i].filename = str_dup(ent.d_name); +++ strncpy(meta->dl_entries[i].language_code, ent.d_name+5,3); +++ meta->dl_entries[i].language_code[3] = '\0'; +++ str_tolower(meta->dl_entries[i].language_code); +++ } ++ } ++ } ++ dir_close(dir); ++@@ -145,6 +154,10 @@ META_ROOT *meta_parse(BD_DISC *disc) ++ { ++ #ifdef HAVE_LIBXML2 ++ META_ROOT *root = calloc(1, sizeof(META_ROOT)); +++ if (!root) { +++ BD_DEBUG(DBG_CRIT, "out of memory\n"); +++ return NULL; +++ } ++ root->dl_count = 0; ++ ++ xmlDocPtr doc; ++diff --git a/src/libbluray/bdnav/mpls_parse.c b/src/libbluray/bdnav/mpls_parse.c ++index da01f7b..8bfbb8c 100644 ++--- a/src/libbluray/bdnav/mpls_parse.c +++++ b/src/libbluray/bdnav/mpls_parse.c ++@@ -39,6 +39,7 @@ ++ #define MPLS_SIG1 ('M' << 24 | 'P' << 16 | 'L' << 8 | 'S') ++ #define MPLS_SIG2A ('0' << 24 | '2' << 16 | '0' << 8 | '0') ++ #define MPLS_SIG2B ('0' << 24 | '1' << 16 | '0' << 8 | '0') +++#define MPLS_SIG2C ('0' << 24 | '2' << 16 | '4' << 8 | '0') ++ ++ static void ++ _human_readable_sig(char *sig, uint32_t s1, uint32_t s2) ++@@ -137,8 +138,9 @@ _parse_appinfo(BITSTREAM *bits, MPLS_AI *ai) ++ ai->random_access_flag = bs_read(bits, 1); ++ ai->audio_mix_flag = bs_read(bits, 1); ++ ai->lossless_bypass_flag = bs_read(bits, 1); +++ ai->mvc_base_view_r_flag = bs_read(bits, 1); ++ // Reserved ++- bs_skip(bits, 13); +++ bs_skip(bits, 12); ++ bs_seek_byte(bits, pos + len); ++ return 1; ++ } ++@@ -155,7 +157,8 @@ _parse_header(BITSTREAM *bits, MPLS_PL *pl) ++ pl->type_indicator2 = bs_read(bits, 32); ++ if (pl->type_indicator != MPLS_SIG1 || ++ (pl->type_indicator2 != MPLS_SIG2A && ++- pl->type_indicator2 != MPLS_SIG2B)) { +++ pl->type_indicator2 != MPLS_SIG2B && +++ pl->type_indicator2 != MPLS_SIG2C)) { ++ ++ char sig[9]; ++ char expect[9]; ++@@ -259,6 +262,7 @@ _parse_stream(BITSTREAM *bits, MPLS_STREAM *s) ++ break; ++ }; ++ s->lang[3] = '\0'; +++ s->ss_offset_sequence_id = 0xFF; ++ ++ bs_seek_byte(bits, pos + len); ++ return 1; ++@@ -882,6 +886,99 @@ _parse_subpath_extension(BITSTREAM *bits, MPLS_PL *pl) ++ } ++ ++ static int +++_parse_stn_ss_extension(BITSTREAM *bits, MPLS_PL *pl) +++{ +++ int ii, s; +++ int64_t pos; +++ +++ for (ii = 0; ii < pl->list_count; ii++) { +++ uint32_t len = bs_read(bits, 16); +++ pos = bs_pos(bits) >> 3; +++ int Fixed_offset_during_PopUp_flag = bs_read(bits, 1); +++ bs_skip(bits, 15); // reserved +++ +++ for (s = 0; s < pl->play_item[ii].stn.num_video; s++) { +++ // stream_entry +++ uint32_t slen = bs_read(bits, 8); +++ bs_skip(bits, slen * 8); +++ +++ // stream_attributes_ss +++ slen = bs_read(bits, 8); +++ bs_skip(bits, slen * 8); +++ +++ bs_skip(bits, 10); // reserved +++ bs_skip(bits, 6); // number_of_offset_sequences +++ } +++ +++ for (s = 0; s < pl->play_item[ii].stn.num_pg; s++) { +++ pl->play_item[ii].stn.pg[s].ss_offset_sequence_id = bs_read(bits, 8); +++ +++ bs_skip(bits, 4); // reserved +++ bs_skip(bits, 1); // dialog_region_offset_valid_flag +++ int is_SS_PG = bs_read(bits, 1); +++ int is_top_AS_PG_textST = bs_read(bits, 1); +++ int is_bottom_AS_PG_textST = bs_read(bits, 1); +++ if (is_SS_PG) { +++ // stream_entry left eye +++ uint32_t slen = bs_read(bits, 8); +++ bs_skip(bits, slen * 8); +++ +++ // stream_entry right eye +++ slen = bs_read(bits, 8); +++ bs_skip(bits, slen * 8); +++ +++ bs_skip(bits, 8); // reserved +++ bs_skip(bits, 8); // PG offset +++ } +++ if (is_top_AS_PG_textST) { +++ // stream_entry +++ uint32_t slen = bs_read(bits, 8); +++ bs_skip(bits, slen * 8); +++ +++ bs_skip(bits, 8); // reserved +++ bs_skip(bits, 8); // PG offset +++ } +++ if (is_bottom_AS_PG_textST) { +++ // stream_entry +++ uint32_t slen = bs_read(bits, 8); +++ bs_skip(bits, slen * 8); +++ +++ bs_skip(bits, 8); // reserved +++ bs_skip(bits, 8); // PG offset +++ } +++ } +++ +++ for (s = 0; s < pl->play_item[ii].stn.num_ig; s++) { +++ if (Fixed_offset_during_PopUp_flag) +++ bs_skip(bits, 8); +++ else +++ pl->play_item[ii].stn.ig[s].ss_offset_sequence_id = bs_read(bits, 8); +++ +++ bs_skip(bits, 16); // IG_Plane_offset_during_BB_video +++ bs_skip(bits, 7); // reserved +++ int is_SS_IG = bs_read(bits, 1); +++ if (is_SS_IG) { +++ // stream_entry left eye +++ uint32_t slen = bs_read(bits, 8); +++ bs_skip(bits, slen * 8); +++ +++ // stream_entry right eye +++ slen = bs_read(bits, 8); +++ bs_skip(bits, slen * 8); +++ +++ bs_skip(bits, 8); // reserved +++ bs_skip(bits, 8); // PG offset +++ } +++ } +++ +++ // Skip to next play item +++ bs_seek_byte(bits, pos + len); +++ } +++ +++ return 0; +++} +++ +++static int ++ _parse_mpls_extension(BITSTREAM *bits, int id1, int id2, void *handle) ++ { ++ MPLS_PL *pl = (MPLS_PL*)handle; ++@@ -895,7 +992,7 @@ _parse_mpls_extension(BITSTREAM *bits, int id1, int id2, void *handle) ++ ++ if (id1 == 2) { ++ if (id2 == 1) { ++- return 0; +++ return _parse_stn_ss_extension(bits, pl); ++ } ++ if (id2 == 2) { ++ // SubPath entries extension ++diff --git a/src/libbluray/bdnav/mpls_parse.h b/src/libbluray/bdnav/mpls_parse.h ++index f9f7a18..94add53 100644 ++--- a/src/libbluray/bdnav/mpls_parse.h +++++ b/src/libbluray/bdnav/mpls_parse.h ++@@ -49,6 +49,7 @@ typedef struct ++ uint8_t sv_num_pip_pg_ref; ++ uint8_t *sv_secondary_audio_ref; ++ uint8_t *sv_pip_pg_ref; +++ uint8_t ss_offset_sequence_id; ++ } MPLS_STREAM; ++ ++ typedef struct ++@@ -110,6 +111,7 @@ typedef struct ++ uint8_t random_access_flag; ++ uint8_t audio_mix_flag; ++ uint8_t lossless_bypass_flag; +++ uint8_t mvc_base_view_r_flag; ++ } MPLS_AI; ++ ++ typedef struct ++diff --git a/src/libbluray/bdnav/navigation.c b/src/libbluray/bdnav/navigation.c ++index db7fa9f..cfd7739 100644 ++--- a/src/libbluray/bdnav/navigation.c +++++ b/src/libbluray/bdnav/navigation.c ++@@ -174,6 +174,21 @@ _pl_duration(MPLS_PL *pl) ++ return duration; ++ } ++ +++static uint32_t +++_pl_chapter_count(MPLS_PL *pl) +++{ +++ unsigned ii, chapters = 0; +++ +++ // Count the number of "entry" marks (skipping "link" marks) +++ // This is the the number of chapters +++ for (ii = 0; ii < pl->mark_count; ii++) { +++ if (pl->play_mark[ii].mark_type == BD_MARK_ENTRY) { +++ chapters++; +++ } +++ } +++ return chapters; +++} +++ ++ NAV_TITLE_LIST* nav_get_title_list(BD_DISC *disc, uint32_t flags, uint32_t min_title_length) ++ { ++ BD_DIR_H *dir; ++@@ -403,15 +418,20 @@ static void _fill_clip(NAV_TITLE *title, ++ strncpy(&clip->name[5], ".m2ts", 6); ++ clip->clip_id = atoi(mpls_clip[clip->angle].clip_id); ++ ++- file = str_printf("%s.clpi", mpls_clip[clip->angle].clip_id); ++ clpi_free(clip->cl); ++- clip->cl = clpi_get(title->disc, file); ++- X_FREE(file); +++ clip->cl = NULL; +++ +++ file = str_printf("%s.clpi", mpls_clip[clip->angle].clip_id); +++ if (file) { +++ clip->cl = clpi_get(title->disc, file); +++ X_FREE(file); +++ } ++ if (clip->cl == NULL) { ++ clip->start_pkt = 0; ++ clip->end_pkt = 0; ++ return; ++ } +++ ++ switch (connection_condition) { ++ case 5: ++ case 6: ++@@ -441,7 +461,7 @@ static void _fill_clip(NAV_TITLE *title, ++ NAV_TITLE* nav_title_open(BD_DISC *disc, const char *playlist, unsigned angle) ++ { ++ NAV_TITLE *title = NULL; ++- unsigned ii, ss, chapters = 0; +++ unsigned ii, ss; ++ uint32_t pos = 0; ++ uint32_t time = 0; ++ ++@@ -501,15 +521,8 @@ NAV_TITLE* nav_title_open(BD_DISC *disc, const char *playlist, unsigned angle) ++ } ++ } ++ ++- // Count the number of "entry" marks (skipping "link" marks) ++- // This is the the number of chapters ++- for (ii = 0; ii < title->pl->mark_count; ii++) { ++- if (title->pl->play_mark[ii].mark_type == BD_MARK_ENTRY) { ++- chapters++; ++- } ++- } ++- title->chap_list.count = chapters; ++- title->chap_list.mark = calloc(chapters, sizeof(NAV_MARK)); +++ title->chap_list.count = _pl_chapter_count(title->pl); +++ title->chap_list.mark = calloc(title->chap_list.count, sizeof(NAV_MARK)); ++ title->mark_list.count = title->pl->mark_count; ++ title->mark_list.mark = calloc(title->pl->mark_count, sizeof(NAV_MARK)); ++ ++@@ -526,19 +539,29 @@ void nav_title_close(NAV_TITLE *title) ++ { ++ unsigned ii, ss; ++ ++- for (ss = 0; ss < title->sub_path_count; ss++) { ++- for (ii = 0; ii < title->sub_path[ss].clip_list.count; ii++) { ++- clpi_free(title->sub_path[ss].clip_list.clip[ii].cl); +++ if (!title) +++ return; +++ +++ if (title->sub_path) { +++ for (ss = 0; ss < title->sub_path_count; ss++) { +++ if (title->sub_path[ss].clip_list.clip) { +++ for (ii = 0; ii < title->sub_path[ss].clip_list.count; ii++) { +++ clpi_free(title->sub_path[ss].clip_list.clip[ii].cl); +++ } +++ X_FREE(title->sub_path[ss].clip_list.clip); +++ } ++ } ++- X_FREE(title->sub_path[ss].clip_list.clip); +++ X_FREE(title->sub_path); ++ } ++- X_FREE(title->sub_path); ++ ++- for (ii = 0; ii < title->pl->list_count; ii++) { ++- clpi_free(title->clip_list.clip[ii].cl); +++ if (title->clip_list.clip) { +++ for (ii = 0; ii < title->clip_list.count; ii++) { +++ clpi_free(title->clip_list.clip[ii].cl); +++ } +++ X_FREE(title->clip_list.clip); ++ } +++ ++ mpls_free(title->pl); ++- X_FREE(title->clip_list.clip); ++ X_FREE(title->chap_list.mark); ++ X_FREE(title->mark_list.mark); ++ X_FREE(title); ++diff --git a/src/libbluray/bdnav/sound_parse.c b/src/libbluray/bdnav/sound_parse.c ++index c1cbcfb..7c267da 100644 ++--- a/src/libbluray/bdnav/sound_parse.c +++++ b/src/libbluray/bdnav/sound_parse.c ++@@ -65,6 +65,7 @@ static int _sound_parse_attributes(BITSTREAM *bs, SOUND_OBJECT *obj) ++ ++ switch (i = bs_read(bs, 4)) { ++ default: BD_DEBUG(DBG_NAV, "unknown channel configuration code %d\n", i); +++ /* fall thru */ ++ case 1: obj->num_channels = 1; ++ break; ++ case 3: obj->num_channels = 2; ++@@ -72,11 +73,13 @@ static int _sound_parse_attributes(BITSTREAM *bs, SOUND_OBJECT *obj) ++ }; ++ switch (i = bs_read(bs, 4)) { ++ default: BD_DEBUG(DBG_NAV, "unknown sample rate code %d\n", i); +++ /* fall thru */ ++ case 1: obj->sample_rate = 48000; ++ break; ++ }; ++ switch (i = bs_read(bs, 2)) { ++ default: BD_DEBUG(DBG_NAV, "unknown bits per sample code %d\n", i); +++ /* fall thru */ ++ case 1: obj->bits_per_sample = 16; ++ break; ++ }; ++@@ -103,7 +106,15 @@ static int _sound_read_samples(BITSTREAM *bs, SOUND_OBJECT *obj) ++ uint32_t n; ++ uint32_t num_samples = obj->num_frames * obj->num_channels; ++ +++ if (!num_samples) { +++ return 1; +++ } +++ ++ obj->samples = calloc(num_samples, sizeof(uint16_t)); +++ if (!obj->samples) { +++ BD_DEBUG(DBG_CRIT, "out of memory\n"); +++ return 0; +++ } ++ ++ for (n = 0; n < num_samples; n++) { ++ obj->samples[n] = bs_read(bs, 16); ++@@ -116,13 +127,14 @@ void sound_free(SOUND_DATA **p) ++ { ++ if (p && *p) { ++ ++- unsigned i; ++- for (i = 0 ; i < (*p)->num_sounds; i++) { ++- X_FREE((*p)->sounds[i].samples); ++- } ++- ++- X_FREE((*p)->sounds); +++ if ((*p)->sounds) { +++ unsigned i; +++ for (i = 0 ; i < (*p)->num_sounds; i++) { +++ X_FREE((*p)->sounds[i].samples); +++ } ++ +++ X_FREE((*p)->sounds); +++ } ++ X_FREE(*p); ++ } ++ } ++@@ -150,21 +162,29 @@ static SOUND_DATA *_sound_parse(BD_FILE_H *fp) ++ bs_skip(&bs, 8); /* reserved */ ++ num_sounds = bs_read(&bs, 8); ++ ++- if (data_len < 1) { +++ if (data_len < 1 || num_sounds < 1) { ++ BD_DEBUG(DBG_NAV | DBG_CRIT, "empty database\n"); ++ goto error; ++ } ++ ++ data_offsets = calloc(num_sounds, sizeof(uint32_t)); ++ data = calloc(1, sizeof(SOUND_DATA)); +++ if (!data_offsets || !data) { +++ BD_DEBUG(DBG_CRIT, "out of memory\n"); +++ goto error; +++ } ++ data->num_sounds = num_sounds; ++ data->sounds = calloc(num_sounds, sizeof(SOUND_OBJECT)); +++ if (!data->sounds) { +++ BD_DEBUG(DBG_CRIT, "out of memory\n"); +++ goto error; +++ } ++ ++ /* parse headers */ ++ ++ for (i = 0; i < data->num_sounds; i++) { ++ if (!_sound_parse_index(&bs, data_offsets + i, &data->sounds[i])) { ++- BD_DEBUG(DBG_NAV | DBG_CRIT, "error parsing sound %d attribues\n", i); +++ BD_DEBUG(DBG_NAV | DBG_CRIT, "error parsing sound %d attributes\n", i); ++ goto error; ++ } ++ } ++diff --git a/src/libbluray/bluray.c b/src/libbluray/bluray.c ++index eba9c5e..27beed7 100644 ++--- a/src/libbluray/bluray.c +++++ b/src/libbluray/bluray.c ++@@ -42,6 +42,7 @@ ++ #include "hdmv/hdmv_vm.h" ++ #include "hdmv/mobj_parse.h" ++ #include "decoders/graphics_controller.h" +++#include "decoders/hdmv_pids.h" ++ #include "decoders/m2ts_filter.h" ++ #include "decoders/overlay.h" ++ #include "disc/disc.h" ++@@ -93,6 +94,7 @@ typedef struct { ++ /* */ ++ uint8_t eof_hit; ++ uint8_t encrypted_block_cnt; +++ uint8_t seek_flag; /* used to fine-tune first read after seek */ ++ ++ M2TS_FILTER *m2ts_filter; ++ } BD_STREAM; ++@@ -202,7 +204,9 @@ static void _init_event_queue(BLURAY *bd) ++ { ++ if (!bd->event_queue) { ++ bd->event_queue = calloc(1, sizeof(struct bd_event_queue_s)); ++- bd_mutex_init(&bd->event_queue->mutex); +++ if (bd->event_queue) { +++ bd_mutex_init(&bd->event_queue->mutex); +++ } ++ } else { ++ bd_mutex_lock(&bd->event_queue->mutex); ++ bd->event_queue->in = 0; ++@@ -794,7 +798,15 @@ static int _preload_m2ts(BLURAY *bd, BD_PRELOAD *p) ++ ++ /* allocate buffer */ ++ p->clip_size = (size_t)st.clip_size; ++- p->buf = realloc(p->buf, p->clip_size); +++ uint8_t* tmp = (uint8_t*)realloc(p->buf, p->clip_size); +++ if (!tmp) { +++ BD_DEBUG(DBG_BLURAY | DBG_CRIT, "_preload_m2ts(): out of memory\n"); +++ _close_m2ts(&st); +++ _close_preload(p); +++ return 0; +++ } +++ +++ p->buf = tmp; ++ ++ /* read clip to buffer */ ++ ++@@ -847,6 +859,7 @@ static int64_t _seek_stream(BLURAY *bd, BD_STREAM *st, ++ } ++ ++ st->int_buf_off = 6144; +++ st->seek_flag = 1; ++ ++ return st->clip_pos; ++ } ++@@ -939,6 +952,7 @@ static void _fill_disc_info(BLURAY *bd, BD_ENC_INFO *enc_info) ++ bd->disc_info.bdplus_handled = enc_info->bdplus_handled; ++ bd->disc_info.bdplus_gen = enc_info->bdplus_gen; ++ bd->disc_info.bdplus_date = enc_info->bdplus_date; +++ bd->disc_info.no_menu_support = enc_info->no_menu_support; ++ ++ bd->disc_info.udf_volume_id = disc_volume_id(bd->disc); ++ ++@@ -1085,6 +1099,10 @@ static void _fill_disc_info(BLURAY *bd, BD_ENC_INFO *enc_info) ++ indx_free(&index); ++ } ++ +++ if (!bd->disc_info.first_play_supported || !bd->disc_info.top_menu_supported) { +++ bd->disc_info.no_menu_support = 1; +++ } +++ ++ if (bd->disc_info.bdj_detected) { ++ BDID_DATA *bdid = bdid_get(bd->disc); /* parse id.bdmv */ ++ if (bdid) { ++@@ -1624,6 +1642,25 @@ int64_t bd_seek_time(BLURAY *bd, uint64_t tick) ++ return bd->s_pos; ++ } ++ +++int64_t bd_find_seek_point(BLURAY *bd, uint64_t tick) +++{ +++ uint32_t clip_pkt, out_pkt; +++ NAV_CLIP *clip; +++ +++ tick /= 2; +++ +++ if (bd->title && +++ tick < bd->title->duration) { +++ +++ // Find the closest access unit to the requested position +++ clip = nav_time_search(bd->title, (uint32_t)tick, &clip_pkt, &out_pkt); +++ +++ return (int64_t)out_pkt * 192; +++ } +++ +++ return bd->s_pos; +++} +++ ++ uint64_t bd_tell_time(BLURAY *bd) ++ { ++ uint32_t clip_pkt = 0, out_pkt = 0, out_time = 0; ++@@ -1956,6 +1993,19 @@ static int _bd_read(BLURAY *bd, unsigned char *buf, int len) ++ /* fatal error */ ++ return -1; ++ } +++ +++ /* finetune seek point (avoid skipping PAT/PMT/PCR) */ +++ if (BD_UNLIKELY(st->seek_flag)) { +++ st->seek_flag = 0; +++ +++ /* rewind if previous packets contain PAT/PMT/PCR */ +++ while (st->int_buf_off >= 192 && TS_PID(bd->int_buf + st->int_buf_off - 192) <= HDMV_PID_PCR) { +++ st->clip_pos -= 192; +++ st->int_buf_off -= 192; +++ bd->s_pos -= 192; +++ } +++ } +++ ++ } ++ if (size > (unsigned int)6144 - st->int_buf_off) { ++ size = 6144 - st->int_buf_off; ++@@ -2081,12 +2131,14 @@ static int _preload_textst_subpath(BLURAY *bd) ++ gc_add_font(bd->graphics_controller, NULL, -1); ++ for (ii = 0; ii < bd->st_textst.clip->cl->font_info.font_count; ii++) { ++ char *file = str_printf("%s.otf", bd->st_textst.clip->cl->font_info.font[ii].file_id); ++- uint8_t *data = NULL; ++- size_t size = disc_read_file(bd->disc, "BDMV" DIR_SEP "AUXDATA", file, &data); ++- if (data && size > 0 && gc_add_font(bd->graphics_controller, data, size) < 0) { ++- X_FREE(data); +++ if (file) { +++ uint8_t *data = NULL; +++ size_t size = disc_read_file(bd->disc, "BDMV" DIR_SEP "AUXDATA", file, &data); +++ if (data && size > 0 && gc_add_font(bd->graphics_controller, data, size) < 0) { +++ X_FREE(data); +++ } +++ X_FREE(file); ++ } ++- X_FREE(file); ++ } ++ gc_run(bd->graphics_controller, GC_CTRL_PG_CHARCODE, char_code, NULL); ++ ++@@ -2278,6 +2330,8 @@ static int _open_playlist(BLURAY *bd, const char *f_name, unsigned angle) ++ ++ _preload_subpaths(bd); ++ +++ bd->st0.seek_flag = 1; +++ ++ return 1; ++ } ++ return 0; ++@@ -2285,9 +2339,14 @@ static int _open_playlist(BLURAY *bd, const char *f_name, unsigned angle) ++ ++ int bd_select_playlist(BLURAY *bd, uint32_t playlist) ++ { ++- char *f_name = str_printf("%05d.mpls", playlist); +++ char *f_name; ++ int result; ++ +++ f_name = str_printf("%05d.mpls", playlist); +++ if (!f_name) { +++ return 0; +++ } +++ ++ bd_mutex_lock(&bd->mutex); ++ ++ if (bd->title_list) { ++@@ -2504,6 +2563,9 @@ uint32_t bd_get_titles(BLURAY *bd, uint8_t flags, uint32_t min_title_length) ++ ++ int bd_get_main_title(BLURAY *bd) ++ { +++ if (!bd) { +++ return -1; +++ } ++ if (bd->title_type != title_undef) { ++ BD_DEBUG(DBG_CRIT | DBG_BLURAY, "bd_get_main_title() can't be used with BluRay menus\n"); ++ } ++@@ -2571,6 +2633,7 @@ static BLURAY_TITLE_INFO* _fill_title_info(NAV_TITLE* title, uint32_t title_idx, ++ BLURAY_CLIP_INFO *ci = &title_info->clips[ii]; ++ NAV_CLIP *nc = &title->clip_list.clip[ii]; ++ +++ ci->idx = nc->clip_id; ++ ci->pkt_count = nc->end_pkt - nc->start_pkt; ++ ci->start_time = (uint64_t)nc->title_time * 2; ++ ci->in_time = (uint64_t)pi->in_time * 2; ++@@ -2597,6 +2660,8 @@ static BLURAY_TITLE_INFO* _fill_title_info(NAV_TITLE* title, uint32_t title_idx, ++ _copy_streams(nc, ci->sec_audio_streams, pi->stn.secondary_audio, ci->sec_audio_stream_count); ++ } ++ +++ title_info->mvc_base_view_r_flag = title->pl->app_info.mvc_base_view_r_flag; +++ ++ return title_info; ++ } ++ ++@@ -2637,9 +2702,14 @@ BLURAY_TITLE_INFO* bd_get_title_info(BLURAY *bd, uint32_t title_idx, unsigned an ++ ++ BLURAY_TITLE_INFO* bd_get_playlist_info(BLURAY *bd, uint32_t playlist, unsigned angle) ++ { ++- char *f_name = str_printf("%05d.mpls", playlist); +++ char *f_name; ++ BLURAY_TITLE_INFO *title_info; ++ +++ f_name = str_printf("%05d.mpls", playlist); +++ if (!f_name) { +++ return NULL; +++ } +++ ++ title_info = _get_title_info(bd, 0, playlist, f_name, angle); ++ ++ X_FREE(f_name); ++@@ -2694,9 +2764,9 @@ int bd_set_player_setting(BLURAY *bd, uint32_t idx, uint32_t value) ++ bd_mutex_lock(&bd->mutex); ++ ++ bd->decode_pg = !!value; ++- result = bd_psr_write_bits(bd->regs, PSR_PG_STREAM, ++- (!!value) << 31, ++- 0x80000000); +++ result = !bd_psr_write_bits(bd->regs, PSR_PG_STREAM, +++ (!!value) << 31, +++ 0x80000000); ++ ++ bd_mutex_unlock(&bd->mutex); ++ return result; ++@@ -2705,7 +2775,7 @@ int bd_set_player_setting(BLURAY *bd, uint32_t idx, uint32_t value) ++ for (i = 0; i < sizeof(map) / sizeof(map[0]); i++) { ++ if (idx == map[i].idx) { ++ bd_mutex_lock(&bd->mutex); ++- result = !bd_psr_setting_write(bd->regs, idx, value); +++ result = !bd_psr_setting_write(bd->regs, map[i].psr, value); ++ bd_mutex_unlock(&bd->mutex); ++ return result; ++ } ++@@ -2756,6 +2826,9 @@ void bd_select_stream(BLURAY *bd, uint32_t stream_type, uint32_t stream_id, uint ++ bd_mutex_lock(&bd->mutex); ++ ++ switch (stream_type) { +++ case BLURAY_AUDIO_STREAM: +++ bd_psr_write(bd->regs, PSR_PRIMARY_AUDIO_ID, stream_id & 0xff); +++ break; ++ case BLURAY_PG_TEXTST_STREAM: ++ bd_psr_write_bits(bd->regs, PSR_PG_STREAM, ++ ((!!enable_flag)<<31) | (stream_id & 0xfff), ++@@ -3076,6 +3149,11 @@ static int _play_title(BLURAY *bd, unsigned title) ++ return 0; ++ } ++ +++ if (bd->disc_info.no_menu_support) { +++ BD_DEBUG(DBG_BLURAY | DBG_CRIT, "bd_play(): no menu support\n"); +++ return 0; +++ } +++ ++ /* first play object ? */ ++ if (title == BLURAY_TITLE_FIRST_PLAY) { ++ ++@@ -3203,6 +3281,12 @@ static int _try_play_title(BLURAY *bd, unsigned title) ++ int bd_play_title(BLURAY *bd, unsigned title) ++ { ++ int ret; +++ +++ if (title == BLURAY_TITLE_TOP_MENU) { +++ /* menu call uses different UO mask */ +++ return bd_menu_call(bd, -1); +++ } +++ ++ bd_mutex_lock(&bd->mutex); ++ ret = _try_play_title(bd, title); ++ bd_mutex_unlock(&bd->mutex); ++@@ -3561,7 +3645,37 @@ int bd_get_sound_effect(BLURAY *bd, unsigned sound_id, BLURAY_SOUND_EFFECT *effe ++ } ++ ++ /* ++- * +++ * Direct file access +++ */ +++ +++static int _bd_read_file(BLURAY *bd, const char *dir, const char *file, void **data, int64_t *size) +++{ +++ if (!bd || !bd->disc || !file || !data || !size) { +++ BD_DEBUG(DBG_CRIT, "Invalid arguments for bd_read_file()\n"); +++ return 0; +++ } +++ +++ *data = NULL; +++ *size = (int64_t)disc_read_file(bd->disc, dir, file, (uint8_t**)data); +++ if (!*data || *size < 0) { +++ BD_DEBUG(DBG_BLURAY, "bd_read_file() failed\n"); +++ X_FREE(*data); +++ return 0; +++ } +++ +++ BD_DEBUG(DBG_BLURAY, "bd_read_file(): read %"PRId64" bytes from %s"DIR_SEP"%s\n", +++ *size, dir, file); +++ return 1; +++} +++ +++int bd_read_file(BLURAY *bd, const char *path, void **data, int64_t *size) +++{ +++ return _bd_read_file(bd, NULL, path, data, size); +++} +++ +++ +++/* +++ * Metadata ++ */ ++ ++ const struct meta_dl *bd_get_meta(BLURAY *bd) ++@@ -3598,6 +3712,15 @@ const struct meta_dl *bd_get_meta(BLURAY *bd) ++ return meta; ++ } ++ +++int bd_get_meta_file(BLURAY *bd, const char *name, void **data, int64_t *size) +++{ +++ return _bd_read_file(bd, DIR_SEP "BDMV" DIR_SEP "META" DIR_SEP "DL", name, data, size); +++} +++ +++/* +++ * Database access +++ */ +++ ++ struct clpi_cl *bd_get_clpi(BLURAY *bd, unsigned clip_ref) ++ { ++ if (bd->title && clip_ref < bd->title->clip_list.count) { ++@@ -3655,3 +3778,28 @@ void bd_free_bdjo(struct bdjo_data *obj) ++ (void)obj; ++ #endif ++ } +++ +++int bd_get_clip_infos(BLURAY *bd, unsigned clip, uint64_t *clip_start_time, uint64_t *stream_start_time, uint64_t *pos, uint64_t *duration) +++{ +++ if (bd && bd->title && bd->title->clip_list.count > clip) { +++ if (clip_start_time) +++ *clip_start_time = (uint64_t)bd->title->clip_list.clip[clip].title_time << 1; +++ if (stream_start_time) +++ *stream_start_time = (uint64_t)bd->title->clip_list.clip[clip].in_time << 1; +++ if (pos) +++ *pos = (uint64_t)bd->title->clip_list.clip[clip].title_pkt * 192; +++ if (duration) +++ *duration = (uint64_t)bd->title->clip_list.clip[clip].duration << 1; +++ +++ return 1; +++ } +++ return 0; +++} +++ +++struct mpls_pl* bd_get_title_mpls(BLURAY * bd) +++{ +++ if (bd && bd->title) { +++ return bd->title->pl; +++ } +++ return NULL; +++} ++diff --git a/src/libbluray/bluray.h b/src/libbluray/bluray.h ++index 6ade74b..6e74df4 100644 ++--- a/src/libbluray/bluray.h +++++ b/src/libbluray/bluray.h ++@@ -32,6 +32,7 @@ extern "C" { ++ */ ++ ++ #include <stdint.h> +++#include "bdnav/clpi_data.h" ++ ++ #define TITLES_ALL 0 /**< all titles. */ ++ #define TITLES_FILTER_DUP_TITLE 0x01 /**< remove duplicate titles. */ ++@@ -119,6 +120,9 @@ typedef struct { ++ char bdj_disc_id[33]; /* (BD-J) disc ID */ ++ ++ const char *udf_volume_id; /* optional UDF volume identifier */ +++ +++ uint8_t no_menu_support; /* 1 if this disc can't be played using on-disc menus */ +++ ++ } BLURAY_DISC_INFO; ++ ++ /* ++@@ -216,6 +220,7 @@ typedef struct bd_stream_info { ++ } BLURAY_STREAM_INFO; ++ ++ typedef struct bd_clip { +++ uint32_t idx; ++ uint32_t pkt_count; ++ uint8_t still_mode; ++ uint16_t still_time; /* seconds */ ++@@ -266,6 +271,8 @@ typedef struct bd_title_info { ++ ++ uint32_t mark_count; ++ BLURAY_TITLE_MARK *marks; +++ +++ uint8_t mvc_base_view_r_flag; ++ } BLURAY_TITLE_INFO; ++ ++ /* ++@@ -355,12 +362,29 @@ const BLURAY_DISC_INFO *bd_get_disc_info(BLURAY *bd); ++ * If information is provided in multiple languages, currently ++ * selected language (BLURAY_PLAYER_SETTING_MENU_LANG) is used. ++ * +++ * Referenced thumbnail images should be read with bd_get_meta_file(). +++ * ++ * @param bd BLURAY object ++ * @return META_DL (disclib) object, NULL on error ++ */ ++ struct meta_dl; ++ const struct meta_dl *bd_get_meta(BLURAY *bd); ++ +++/** +++ * +++ * Read metadata file from BluRay disc. +++ * +++ * Allocate large enough memory block and read file contents. +++ * Caller must free the memory block with free(). +++ * +++ * @param bd BLURAY object +++ * @param file_name name of metadata file +++ * @param data where to store pointer to file data +++ * @param size where to store file size +++ * @return 1 on success, 0 on error +++ */ +++int bd_get_meta_file(BLURAY *bd, const char *file_name, void **data, int64_t *size); +++ ++ ++ /* ++ * Title selection without on-disc menus ++@@ -441,6 +465,16 @@ uint32_t bd_get_current_title(BLURAY *bd); ++ ++ /** ++ * +++ * Find the byte position to specific time in 90Khz ticks +++ * +++ * @param bd BLURAY ojbect +++ * @param tick tick count +++ * @return byte position +++ */ +++int64_t bd_find_seek_point(BLURAY *bd, uint64_t tick); +++ +++/** +++ * ++ * Read from currently selected title file, decrypt if possible ++ * ++ * @param bd BLURAY object ++@@ -536,6 +570,7 @@ void bd_seamless_angle_change(BLURAY *bd, unsigned angle); ++ * @param stream_id stream number (1..N) ++ * @param enable_flag set to 0 to disable streams of this type ++ */ +++#define BLURAY_AUDIO_STREAM 0 ++ #define BLURAY_PG_TEXTST_STREAM 1 ++ ++ void bd_select_stream(BLURAY *bd, uint32_t stream_type, uint32_t stream_id, uint32_t enable_flag); ++@@ -963,7 +998,6 @@ int bd_mouse_select(BLURAY *bd, int64_t pts, uint16_t x, uint16_t y); ++ ++ /* access to internal information */ ++ ++-struct clpi_cl; ++ /** ++ * ++ * Get copy of clip information for requested playitem. ++@@ -1001,6 +1035,43 @@ void bd_free_bdjo(struct bdjo_data *); ++ int bd_start_bdj(BLURAY *bd, const char* start_object); // start BD-J from the specified BD-J object (should be a 5 character string) ++ void bd_stop_bdj(BLURAY *bd); // shutdown BD-J and clean up resources ++ +++/** +++ * +++ * Read a file from BluRay Virtual File System. +++ * +++ * Allocate large enough memory block and read file contents. +++ * Caller must free the memory block with free(). +++ * +++ * @param bd BLURAY object +++ * @param file_name path to the file (relative to disc root) +++ * @param data where to store pointer to allocated data +++ * @param size where to store file size +++ * @return 1 on success, 0 on error +++ */ +++int bd_read_file(BLURAY *, const char *path, void **data, int64_t *size); +++ +++/** +++ * +++ * Get information about the clip +++ * +++ * @param bd BLURAY object +++ * @param clip clip index +++ * @param clip_start_time start of the clip (in the total title) (in 90khz) +++ * @param stream_start_time first pts in the clip (in 90khz) +++ * @param byte position of the clip (absolute) +++ * @param duration duration of the clip (in 90khz) +++ */ +++int bd_get_clip_infos(BLURAY *bd, unsigned clip, uint64_t *clip_start_time, uint64_t *stream_start_time, uint64_t *pos, uint64_t *duration); +++ +++/** +++ * Get the MPLS struct of the current title +++ * +++ * @param bd BLURAY object +++ * @return the MPLS struct +++ * +++ * Lifetime of the MPLS pointer is limited to the lifetime of the BD title +++ */ +++struct mpls_pl* bd_get_title_mpls(BLURAY * bd); ++ ++ #ifdef __cplusplus ++ } ++diff --git a/src/libbluray/decoders/graphics_controller.c b/src/libbluray/decoders/graphics_controller.c ++index dabde1c..d3c775a 100644 ++--- a/src/libbluray/decoders/graphics_controller.c +++++ b/src/libbluray/decoders/graphics_controller.c ++@@ -825,6 +825,8 @@ void gc_free(GRAPHICS_CONTROLLER **p) ++ ++ bd_mutex_destroy(&gc->mutex); ++ +++ X_FREE(gc->saved_bog_data); +++ ++ X_FREE(*p); ++ } ++ } ++diff --git a/src/libbluray/decoders/hdmv_pids.h b/src/libbluray/decoders/hdmv_pids.h ++index ac5bc6a..45a55f3 100644 ++--- a/src/libbluray/decoders/hdmv_pids.h +++++ b/src/libbluray/decoders/hdmv_pids.h ++@@ -61,5 +61,12 @@ ++ #define IS_HDMV_PID_IG(pid) ((pid) >= HDMV_PID_IG_FIRST && (pid) <= HDMV_PID_IG_LAST) ++ #define IS_HDMV_PID_TEXTST(pid) ((pid) == HDMV_PID_TEXTST) ++ +++/* +++ * Extract PID from HDMV MPEG-TS packet +++ */ +++ +++#define TS_PID(buf) \ +++ ((((buf)[4+1] & 0x1f) << 8) | (buf)[4+2]) +++ ++ ++ #endif // _HDMV_PIDS_H_ ++diff --git a/src/libbluray/decoders/overlay.h b/src/libbluray/decoders/overlay.h ++index 6a31218..7daa478 100644 ++--- a/src/libbluray/decoders/overlay.h +++++ b/src/libbluray/decoders/overlay.h ++@@ -20,6 +20,10 @@ ++ #ifndef BD_OVERLAY_H_ ++ #define BD_OVERLAY_H_ ++ +++#ifdef __cplusplus +++extern "C" { +++#endif +++ ++ #include <stdint.h> ++ ++ #define BD_OVERLAY_INTERFACE_VERSION 2 ++@@ -199,4 +203,8 @@ typedef struct bd_argb_buffer_s { ++ ++ } BD_ARGB_BUFFER; ++ +++#ifdef __cplusplus +++} +++#endif +++ ++ #endif // BD_OVERLAY_H_ ++diff --git a/src/libbluray/decoders/textst_render.c b/src/libbluray/decoders/textst_render.c ++index 8d1527e..0e87d4b 100644 ++--- a/src/libbluray/decoders/textst_render.c +++++ b/src/libbluray/decoders/textst_render.c ++@@ -74,6 +74,10 @@ TEXTST_RENDER *textst_render_init(void) ++ #ifdef HAVE_FT2 ++ TEXTST_RENDER *p = calloc(1, sizeof(TEXTST_RENDER)); ++ +++ if (!p) { +++ return NULL; +++ } +++ ++ if (!FT_Init_FreeType(&p->ft_lib)) { ++ return p; ++ } ++diff --git a/src/libbluray/disc/aacs.c b/src/libbluray/disc/aacs.c ++index 217ef6f..9ae8efb 100644 ++--- a/src/libbluray/disc/aacs.c +++++ b/src/libbluray/disc/aacs.c ++@@ -47,6 +47,8 @@ struct bd_aacs { ++ fptr_p_void get_device_binding_id; ++ fptr_p_void get_device_nonce; ++ fptr_p_void get_media_key; +++ +++ int impl_id; ++ }; ++ ++ ++@@ -58,15 +60,19 @@ static void _libaacs_close(BD_AACS *p) ++ } ++ } ++ ++-void libaacs_unload(BD_AACS **p) +++static void _unload(BD_AACS *p) ++ { ++- if (p && *p) { ++- _libaacs_close(*p); +++ _libaacs_close(p); ++ ++- if ((*p)->h_libaacs) { ++- dl_dlclose((*p)->h_libaacs); ++- } +++ if (p->h_libaacs) { +++ dl_dlclose(p->h_libaacs); +++ } +++} ++ +++void libaacs_unload(BD_AACS **p) +++{ +++ if (p && *p) { +++ _unload(*p); ++ X_FREE(*p); ++ } ++ } ++@@ -82,7 +88,7 @@ int libaacs_required(void *have_file_handle, int (*have_file)(void *, const char ++ return 0; ++ } ++ ++-static void *_open_libaacs(void) +++static void *_open_libaacs(int *impl_id) ++ { ++ const char * const libaacs[] = { ++ getenv("LIBAACS_PATH"), ++@@ -91,10 +97,11 @@ static void *_open_libaacs(void) ++ }; ++ unsigned ii; ++ ++- for (ii = 0; ii < sizeof(libaacs) / sizeof(libaacs[0]); ii++) { +++ for (ii = *impl_id; ii < sizeof(libaacs) / sizeof(libaacs[0]); ii++) { ++ if (libaacs[ii]) { ++ void *handle = dl_dlopen(libaacs[ii], "0"); ++ if (handle) { +++ *impl_id = ii; ++ BD_DEBUG(DBG_BLURAY, "Using %s for AACS\n", libaacs[ii]); ++ return handle; ++ } ++@@ -105,11 +112,15 @@ static void *_open_libaacs(void) ++ return NULL; ++ } ++ ++-BD_AACS *libaacs_load(void) +++static BD_AACS *_load(int impl_id) ++ { ++ BD_AACS *p = calloc(1, sizeof(BD_AACS)); +++ if (!p) { +++ return NULL; +++ } +++ p->impl_id = impl_id; ++ ++- p->h_libaacs = _open_libaacs(); +++ p->h_libaacs = _open_libaacs(&p->impl_id); ++ if (!p->h_libaacs) { ++ X_FREE(p); ++ return NULL; ++@@ -140,6 +151,11 @@ BD_AACS *libaacs_load(void) ++ return p; ++ } ++ +++BD_AACS *libaacs_load(void) +++{ +++ return _load(0); +++} +++ ++ int libaacs_open(BD_AACS *p, const char *device, ++ void *file_open_handle, void *file_open_fp, ++ const char *keyfile_path) ++@@ -177,6 +193,22 @@ int libaacs_open(BD_AACS *p, const char *device, ++ BD_DEBUG(DBG_BLURAY, "aacs_open() not found\n"); ++ } ++ +++ if (error_code) { +++ /* failed. try next aacs implementation if available. */ +++ BD_AACS *p2 = _load(p->impl_id + 1); +++ if (p2) { +++ if (!libaacs_open(p2, device, file_open_handle, file_open_fp, keyfile_path)) { +++ /* succeed - swap implementations */ +++ _unload(p); +++ *p = *p2; +++ X_FREE(p2); +++ return 0; +++ } +++ /* failed - report original errors */ +++ libaacs_unload(&p2); +++ } +++ } +++ ++ if (p->aacs) { ++ if (aacs_get_mkb_version) { ++ p->mkbv = aacs_get_mkb_version(p->aacs); ++diff --git a/src/libbluray/disc/bdplus.c b/src/libbluray/disc/bdplus.c ++index b8c4d57..363719f 100644 ++--- a/src/libbluray/disc/bdplus.c +++++ b/src/libbluray/disc/bdplus.c ++@@ -107,6 +107,9 @@ static void *_libbdplus_open(void) ++ BD_BDPLUS *libbdplus_load(void) ++ { ++ BD_BDPLUS *p = calloc(1, sizeof(BD_BDPLUS)); +++ if (!p) { +++ return NULL; +++ } ++ ++ BD_DEBUG(DBG_BDPLUS, "attempting to load libbdplus\n"); ++ ++@@ -241,10 +244,12 @@ BD_BDPLUS_ST *libbdplus_m2ts(BD_BDPLUS *p, uint32_t clip_id, uint64_t pos) ++ if (!p->m2ts) { ++ /* use old API */ ++ BD_BDPLUS_ST *ret = calloc(1, sizeof(BD_BDPLUS_ST)); ++- ret->lib = p; ++- ret->st = NULL; ++- p->title(p->bdplus, clip_id); ++- p->seek(p->bdplus, pos); +++ if (ret) { +++ ret->lib = p; +++ ret->st = NULL; +++ p->title(p->bdplus, clip_id); +++ p->seek(p->bdplus, pos); +++ } ++ return ret; ++ } ++ ++@@ -258,9 +263,11 @@ BD_BDPLUS_ST *libbdplus_m2ts(BD_BDPLUS *p, uint32_t clip_id, uint64_t pos) ++ p->m2ts_close(st); ++ } else { ++ BD_BDPLUS_ST *ret = calloc(1, sizeof(BD_BDPLUS_ST)); ++- ret->lib = p; ++- ret->st = st; ++- BD_DEBUG(DBG_BLURAY | DBG_CRIT, "BD+ active for clip %05d.m2ts\n", clip_id); +++ if (ret) { +++ ret->lib = p; +++ ret->st = st; +++ BD_DEBUG(DBG_BLURAY | DBG_CRIT, "BD+ active for clip %05d.m2ts\n", clip_id); +++ } ++ return ret; ++ } ++ } ++diff --git a/src/libbluray/disc/dec.c b/src/libbluray/disc/dec.c ++index 694646e..1c8a601 100644 ++--- a/src/libbluray/disc/dec.c +++++ b/src/libbluray/disc/dec.c ++@@ -158,6 +158,10 @@ static int _bdrom_have_file(void *p, const char *dir, const char *file) ++ char *path; ++ ++ path = str_printf("%s" DIR_SEP "%s", dir, file); +++ if (!path) { +++ return 0; +++ } +++ ++ fp = dev->pf_file_open_bdrom(dev->file_open_bdrom_handle, path); ++ X_FREE(path); ++ ++@@ -175,6 +179,8 @@ static int _libaacs_init(BD_DEC *dec, struct dec_dev *dev, ++ int result; ++ const uint8_t *disc_id; ++ +++ memset(i, 0, sizeof(*i)); +++ ++ libaacs_unload(&dec->aacs); ++ ++ i->aacs_detected = libaacs_required((void*)dev, _bdrom_have_file); ++@@ -201,7 +207,7 @@ static int _libaacs_init(BD_DEC *dec, struct dec_dev *dev, ++ } ++ ++ if (result) { ++- BD_DEBUG(DBG_BLURAY | DBG_CRIT, "aacs_open() failed!\n"); +++ BD_DEBUG(DBG_BLURAY | DBG_CRIT, "aacs_open() failed: %d!\n", result); ++ libaacs_unload(&dec->aacs); ++ return 0; ++ } ++@@ -255,6 +261,13 @@ static int _libbdplus_init(BD_DEC *dec, struct dec_dev *dev, ++ i->bdplus_gen = libbdplus_get_gen(dec->bdplus); ++ i->bdplus_date = libbdplus_get_date(dec->bdplus); ++ i->bdplus_handled = 1; +++ +++ if (i->bdplus_date == 0) { +++ // libmmbd -> no menu support +++ BD_DEBUG(DBG_BLURAY | DBG_CRIT, "WARNING: using libmmbd for BD+. On-disc menus will not work.\n"); +++ i->no_menu_support = 1; +++ } +++ ++ return 1; ++ } ++ ++diff --git a/src/libbluray/disc/disc.c b/src/libbluray/disc/disc.c ++index ecd53e3..757b6ed 100644 ++--- a/src/libbluray/disc/disc.c +++++ b/src/libbluray/disc/disc.c ++@@ -65,7 +65,11 @@ static BD_FILE_H *_bdrom_open_path(void *p, const char *rel_path) ++ char *abs_path; ++ ++ abs_path = str_printf("%s%s", disc->disc_root, rel_path); ++- fp = file_open(abs_path, "rb"); +++ if (!abs_path) { +++ return NULL; +++ } +++ +++ fp = file_open(abs_path, "rbS"); ++ X_FREE(abs_path); ++ ++ return fp; ++@@ -78,6 +82,10 @@ static BD_DIR_H *_bdrom_open_dir(void *p, const char *dir) ++ char *path; ++ ++ path = str_printf("%s%s", disc->disc_root, dir); +++ if (!path) { +++ return NULL; +++ } +++ ++ dp = dir_open(path); ++ X_FREE(path); ++ ++@@ -96,8 +104,10 @@ static BD_FILE_H *_overlay_open_path(BD_DISC *p, const char *rel_path) ++ ++ if (p->overlay_root) { ++ char *abs_path = str_printf("%s%s", p->overlay_root, rel_path); ++- fp = file_open(abs_path, "rb"); ++- X_FREE(abs_path); +++ if (abs_path) { +++ fp = file_open(abs_path, "rb"); +++ X_FREE(abs_path); +++ } ++ } ++ ++ bd_mutex_unlock(&p->ovl_mutex); ++@@ -113,8 +123,10 @@ static BD_DIR_H *_overlay_open_dir(BD_DISC *p, const char *dir) ++ ++ if (p->overlay_root) { ++ char *abs_path = str_printf("%s%s", p->disc_root, dir); ++- dp = dir_open_default()(abs_path); ++- X_FREE(abs_path); +++ if (abs_path) { +++ dp = dir_open_default()(abs_path); +++ X_FREE(abs_path); +++ } ++ } ++ ++ bd_mutex_unlock(&p->ovl_mutex); ++@@ -165,7 +177,7 @@ static void _comb_dir_append(BD_DIR_H *dp, BD_DIRENT *entry) ++ } ++ ++ /* append */ ++- priv = realloc(priv, sizeof(*priv) + priv->count * sizeof(BD_DIRENT)); +++ priv = realloc(dp->internal, sizeof(*priv) + priv->count * sizeof(BD_DIRENT)); ++ if (!priv) { ++ return; ++ } ++@@ -183,6 +195,10 @@ static BD_DIR_H *_combine_dirs(BD_DIR_H *ovl, BD_DIR_H *rom) ++ dp->read = _comb_dir_read; ++ dp->close = _comb_dir_close; ++ dp->internal = calloc(1, sizeof(COMB_DIR)); +++ if (!dp->internal) { +++ X_FREE(dp); +++ goto out; +++ } ++ ++ while (!dir_read(ovl, &entry)) { ++ _comb_dir_append(dp, &entry); ++@@ -191,6 +207,8 @@ static BD_DIR_H *_combine_dirs(BD_DIR_H *ovl, BD_DIR_H *rom) ++ _comb_dir_append(dp, &entry); ++ } ++ } +++ +++ out: ++ dir_close(ovl); ++ dir_close(rom); ++ ++@@ -342,6 +360,10 @@ BD_FILE_H *disc_open_file(BD_DISC *p, const char *dir, const char *file) ++ char *path; ++ ++ path = str_printf("%s" DIR_SEP "%s", dir, file); +++ if (!path) { +++ return NULL; +++ } +++ ++ fp = disc_open_path(p, path); ++ X_FREE(path); ++ ++@@ -377,7 +399,11 @@ size_t disc_read_file(BD_DISC *disc, const char *dir, const char *file, ++ ++ *data = NULL; ++ ++- fp = disc_open_file(disc, dir, file); +++ if (dir) { +++ fp = disc_open_file(disc, dir, file); +++ } else { +++ fp = disc_open_path(disc, file); +++ } ++ if (!fp) { ++ return 0; ++ } ++@@ -454,7 +480,7 @@ int disc_cache_bdrom_file(BD_DISC *p, const char *rel_path, const char *cache_pa ++ BD_DEBUG(DBG_FILE | DBG_CRIT, "error caching file %s\n", rel_path); ++ file_close(fp_out); ++ file_close(fp_in); ++- file_unlink(cache_path); +++ (void)file_unlink(cache_path); ++ return -1; ++ } ++ } ++diff --git a/src/libbluray/disc/enc_info.h b/src/libbluray/disc/enc_info.h ++index d45d891..47ca94f 100644 ++--- a/src/libbluray/disc/enc_info.h +++++ b/src/libbluray/disc/enc_info.h ++@@ -34,6 +34,8 @@ typedef struct bd_enc_info { ++ uint8_t disc_id[20]; ++ uint8_t bdplus_gen; ++ uint32_t bdplus_date; +++ +++ uint8_t no_menu_support; ++ } BD_ENC_INFO; ++ ++ #endif /* _BD_DISC_ENC_INFO_H_ */ ++diff --git a/src/libbluray/disc/udf_fs.c b/src/libbluray/disc/udf_fs.c ++index 1eec761..3e438ca 100644 ++--- a/src/libbluray/disc/udf_fs.c +++++ b/src/libbluray/disc/udf_fs.c ++@@ -67,6 +67,9 @@ static int64_t _file_read(BD_FILE_H *file, uint8_t *buf, int64_t size) ++ BD_FILE_H *udf_file_open(void *udf, const char *filename) ++ { ++ BD_FILE_H *file = calloc(1, sizeof(BD_FILE_H)); +++ if (!file) { +++ return NULL; +++ } ++ ++ BD_DEBUG(DBG_FILE, "Opening UDF file %s... (%p)\n", filename, (void*)file); ++ ++@@ -116,6 +119,9 @@ static int _dir_read(BD_DIR_H *dir, BD_DIRENT *entry) ++ BD_DIR_H *udf_dir_open(void *udf, const char* dirname) ++ { ++ BD_DIR_H *dir = calloc(1, sizeof(BD_DIR_H)); +++ if (!dir) { +++ return NULL; +++ } ++ ++ BD_DEBUG(DBG_DIR, "Opening UDF dir %s... (%p)\n", dirname, (void*)dir); ++ ++diff --git a/src/libbluray/hdmv/mobj_print.c b/src/libbluray/hdmv/mobj_print.c ++index 5c5313e..4361a76 100644 ++--- a/src/libbluray/hdmv/mobj_print.c +++++ b/src/libbluray/hdmv/mobj_print.c ++@@ -159,6 +159,7 @@ static const char * const psr_info[128] = { ++ /* PSR127 */ NULL, ++ }; ++ +++#if 0 ++ static const char * const insn_groups[4] = { ++ "BRANCH", ++ "COMPARE", ++@@ -175,6 +176,7 @@ static const char * const insn_group_set[8] = { ++ "SET", ++ "SETSYSTEM", ++ }; +++#endif ++ ++ static const char * const insn_opt_set[32] = { ++ NULL, ++diff --git a/src/util/logging.c b/src/util/logging.c ++index b8ef1f5..62e6b59 100644 ++--- a/src/util/logging.c +++++ b/src/util/logging.c ++@@ -81,19 +81,34 @@ void bd_debug(const char *file, int line, uint32_t mask, const char *format, ... ++ ++ if (mask & debug_mask) { ++ const char *f = strrchr(file, DIR_SEP_CHAR); ++- char buffer[4096], *pt = buffer; +++ char buffer[4096]; ++ va_list args; +++ int len, len2; ++ ++- pt += sprintf(buffer, "%s:%d: ", f ? f + 1 : file, line); +++ len = sprintf(buffer, "%s:%d: ", f ? f + 1 : file, line); +++ if (len < 0) { +++ return; +++ } ++ ++ va_start(args, format); ++- vsnprintf(pt, sizeof(buffer) - (size_t)(intptr_t)(pt - buffer) - 1, format, args); +++ len2 = vsnprintf(buffer + len, sizeof(buffer) - len - 1, format, args); ++ va_end(args); ++ +++ if (len2 < 0) { +++ return; +++ } +++ ++ if (log_func) { +++ buffer[sizeof(buffer)-1] = 0; ++ log_func(buffer); +++ ++ } else { ++- fprintf(logfile, "%s", buffer); +++ len += len2; +++ if ((size_t)len >= sizeof(buffer)) { +++ len = sizeof(buffer); +++ } +++ +++ fwrite(buffer, len, 1, logfile); ++ } ++ } ++ } ++diff --git a/src/util/refcnt.h b/src/util/refcnt.h ++index b839eba..9164921 100644 ++--- a/src/util/refcnt.h +++++ b/src/util/refcnt.h ++@@ -20,6 +20,10 @@ ++ #ifndef BD_REFCNT_H_ ++ #define BD_REFCNT_H_ ++ +++#ifdef __cplusplus +++extern "C" { +++#endif +++ ++ #include "attributes.h" ++ ++ #include <stddef.h> ++@@ -53,4 +57,8 @@ void bd_refcnt_inc(const void *obj); ++ void bd_refcnt_dec(const void *obj); ++ #endif ++ +++#ifdef __cplusplus +++} +++#endif +++ ++ #endif // BD_REFCNT_H_ + +From 5e19f7192303245587548c2564d1e1711019a565 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Wed, 2 Mar 2016 19:40:47 +0000 +Subject: [PATCH 48/67] [VideoPlayer] Added new msdk-mvc decoder. + +--- + xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.cpp | 61 ++++++++++++++++++++++ + xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.h | 4 ++ + 2 files changed, 65 insertions(+) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.cpp +index 83b1c5639c80020cd53a30844b4f1bb0b45507cb..b075c263d54c7078b69b86127ee023d42f8d5d20 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.cpp +@@ -24,6 +24,7 @@ + #include "utils/log.h" + #include "cores/FFmpeg.h" + #include "Util.h" ++#include <assert.h> + + #ifdef TARGET_WINDOWS + #pragma comment(lib, "avcodec.lib") +@@ -37,6 +38,7 @@ + + extern "C" { + #include "libswscale/swscale.h" ++#include "libavutil/intreadwrite.h" + } + + // allocate a new picture (AV_PIX_FMT_YUV420P) +@@ -402,6 +404,65 @@ double CDVDCodecUtils::NormalizeFrameduration(double frameduration, bool *match) + } + } + ++bool CDVDCodecUtils::IsH264AnnexB(std::string format, AVStream *avstream) ++{ ++ assert(avstream->codec->codec_id == AV_CODEC_ID_H264 || avstream->codec->codec_id == AV_CODEC_ID_H264_MVC); ++ if (avstream->codec->extradata_size < 4) ++ return true; ++ if (avstream->codec->extradata[0] == 1) ++ return false; ++ if (format == "avi") ++ { ++ BYTE *src = avstream->codec->extradata; ++ unsigned startcode = AV_RB32(src); ++ if (startcode == 0x00000001 || (startcode & 0xffffff00) == 0x00000100) ++ return true; ++ if (avstream->codec->codec_tag == MKTAG('A', 'V', 'C', '1') || avstream->codec->codec_tag == MKTAG('a', 'v', 'c', '1')) ++ return false; ++ } ++ return true; ++} ++ ++bool CDVDCodecUtils::ProcessH264MVCExtradata(uint8_t *data, int data_size, uint8_t **mvc_data, int *mvc_data_size) ++{ ++ uint8_t* extradata = data; ++ int extradata_size = data_size; ++ ++ if (extradata_size > 4 && *(char *)extradata == 1) ++ { ++ // Find "mvcC" atom ++ uint32_t state = -1; ++ int i = 0; ++ for (; i < extradata_size; i++) ++ { ++ state = (state << 8) | extradata[i]; ++ if (state == MKBETAG('m', 'v', 'c', 'C')) ++ break; ++ } ++ if (i >= 8 && i < extradata_size) ++ { ++ // Update pointers to the start of the mvcC atom ++ extradata = extradata + i - 7; ++ extradata_size = extradata_size - i + 7; ++ // verify size atom and actual size ++ if (extradata_size >= 14 && (AV_RB32(extradata) + 4) <= extradata_size) ++ { ++ extradata += 8; ++ extradata_size -= 8; ++ if (*(char *)extradata == 1) ++ { ++ if (mvc_data) ++ *mvc_data = extradata; ++ if (mvc_data_size) ++ *mvc_data_size = extradata_size; ++ return true; ++ } ++ } ++ } ++ } ++ return false; ++} ++ + struct EFormatMap { + AVPixelFormat pix_fmt; + ERenderFormat format; +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.h b/xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.h +index eb76a6fe73f6c884540807cfb93c7a3ecc4eea90..7e24c2364e8d2efa9b8351afc041aa14404d5e51 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.h +@@ -24,6 +24,7 @@ + #include "cores/VideoPlayer/VideoRenderers/RenderFormats.h" + + struct YV12Image; ++class AVStream; + + class CDVDCodecUtils + { +@@ -42,6 +43,9 @@ public: + + static double NormalizeFrameduration(double frameduration, bool *match = NULL); + ++ static bool IsH264AnnexB(std::string format, AVStream *avstream); ++ static bool ProcessH264MVCExtradata(uint8_t *extradata, int extradata_size, uint8_t **mvc_extradata = nullptr, int *mvc_extradata_size = nullptr); ++ + static ERenderFormat EFormatFromPixfmt(int fmt); + static int PixfmtFromEFormat(ERenderFormat format); + }; + +From 38596e0f01d1235fe66c4891818f203e676bf6af Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sun, 6 Mar 2016 12:54:59 +0000 +Subject: [PATCH 49/67] mvc: Automatically enable stereo mode + +--- + xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp | 6 +++++- + xbmc/cores/omxplayer/OMXVideo.cpp | 6 +++++- + 2 files changed, 10 insertions(+), 2 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +index 8691b086a46fcdd03eee809a53ea9b20f74dcc05..b4e2c57d406297f75c5dfc0217f4d33507cb6755 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +@@ -571,13 +571,17 @@ bool CMMALVideo::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options) + switch (hints.codec) + { + case AV_CODEC_ID_H264: ++ case AV_CODEC_ID_H264_MVC: + // H.264 + m_codingType = MMAL_ENCODING_H264; + m_pFormatName = "mmal-h264"; +- if (CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_SUPPORTMVC)) ++ if ((hints.codec_tag == MKTAG('M', 'V', 'C', '1') || hints.codec_tag == MKTAG('A', 'M', 'V', 'C')) && ++ CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_SUPPORTMVC)) + { + m_codingType = MMAL_ENCODING_MVC; + m_pFormatName= "mmal-mvc"; ++ if (hints.stereo_mode == "mono") ++ hints.stereo_mode = "mvc_lr"; + } + break; + case AV_CODEC_ID_H263: +diff --git a/xbmc/cores/omxplayer/OMXVideo.cpp b/xbmc/cores/omxplayer/OMXVideo.cpp +index b2bb0a832f5a722bb9de2a48e21e96d5d74e71b8..f8f26a891f6610de83ec143ec4b51f0aea5424de 100644 +--- a/xbmc/cores/omxplayer/OMXVideo.cpp ++++ b/xbmc/cores/omxplayer/OMXVideo.cpp +@@ -401,6 +401,7 @@ bool COMXVideo::Open(CDVDStreamInfo &hints, OMXClock *clock, EDEINTERLACEMODE de + switch (hints.codec) + { + case AV_CODEC_ID_H264: ++ case AV_CODEC_ID_H264_MVC: + { + switch(hints.profile) + { +@@ -437,10 +438,13 @@ bool COMXVideo::Open(CDVDStreamInfo &hints, OMXClock *clock, EDEINTERLACEMODE de + break; + } + } +- if (CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_SUPPORTMVC)) ++ if ((hints.codec_tag == MKTAG('M', 'V', 'C', '1') || hints.codec_tag == MKTAG('A', 'M', 'V', 'C')) && ++ CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_SUPPORTMVC)) + { + m_codingType = OMX_VIDEO_CodingMVC; + m_video_codec_name = "omx-mvc"; ++ if (hints.stereo_mode == "mono") ++ hints.stereo_mode = "mvc_lr"; + } + break; + case AV_CODEC_ID_MPEG4: + +From 25afb65c978ae78c35cfcfd10cb355ba88d42f7a Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Thu, 24 Mar 2016 13:02:58 +0000 +Subject: [PATCH 50/67] ffmpeg: mvc: fix for pixelation from packets with no + pts/dts + +--- + .../73fde6f9f3d01f7fc0f3ae4b66f6c725f9fb1105.patch | 24 ++++++++++++++++++++++ + tools/depends/target/ffmpeg/Makefile | 4 +++- + 2 files changed, 27 insertions(+), 1 deletion(-) + create mode 100644 tools/depends/target/ffmpeg/73fde6f9f3d01f7fc0f3ae4b66f6c725f9fb1105.patch + +diff --git a/tools/depends/target/ffmpeg/73fde6f9f3d01f7fc0f3ae4b66f6c725f9fb1105.patch b/tools/depends/target/ffmpeg/73fde6f9f3d01f7fc0f3ae4b66f6c725f9fb1105.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..5240cf58ce40c28d12354db63b7e29143ba46978 +--- /dev/null ++++ b/tools/depends/target/ffmpeg/73fde6f9f3d01f7fc0f3ae4b66f6c725f9fb1105.patch +@@ -0,0 +1,24 @@ ++From 73fde6f9f3d01f7fc0f3ae4b66f6c725f9fb1105 Mon Sep 17 00:00:00 2001 ++From: Hendrik Leppkes <h.leppkes@gmail.com> ++Date: Mon, 1 Sep 2014 11:39:09 +0200 ++Subject: [PATCH] h264_parser: force grabing a new timestamp until a frame ++ start was found ++ ++--- ++ libavcodec/h264_parser.c | 3 +++ ++ 1 file changed, 3 insertions(+) ++ ++diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c ++index 2fd3f2b..7165652 100644 ++--- a/libavcodec/h264_parser.c +++++ b/libavcodec/h264_parser.c ++@@ -525,6 +525,9 @@ static int h264_parse(AVCodecParserContext *s, ++ } else { ++ next = h264_find_frame_end(p, buf, buf_size); ++ +++ if (next == END_NOT_FOUND && pc->frame_start_found == 0) +++ s->fetch_timestamp = 1; +++ ++ if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) { ++ *poutbuf = NULL; ++ *poutbuf_size = 0; +diff --git a/tools/depends/target/ffmpeg/Makefile b/tools/depends/target/ffmpeg/Makefile +index 92d9437b36eaa4e655990f7e68634e0bbf4d9605..99f375ba5d5b40eecdd423ac5787276e534ad4d7 100644 +--- a/tools/depends/target/ffmpeg/Makefile ++++ b/tools/depends/target/ffmpeg/Makefile +@@ -6,7 +6,8 @@ DEPS= ../../Makefile.include FFMPEG-VERSION Makefile \ + pfcd_hevc_optimisations.patch \ + 0001-Squashed-commit-of-the-following.patch \ + 0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch 0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch \ +- h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch ++ h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch \ ++ 73fde6f9f3d01f7fc0f3ae4b66f6c725f9fb1105.patch + + # set to "yes" to enable patching + # we don't apply patches until we move to a vanilla ffmpeg tarball +@@ -91,6 +92,7 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + cd $(PLATFORM); patch -p1 < ../0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch + cd $(PLATFORM); patch -p1 < ../0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch + cd $(PLATFORM); patch -p1 < ../h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch ++ cd $(PLATFORM); patch -p1 < ../73fde6f9f3d01f7fc0f3ae4b66f6c725f9fb1105.patch + + cd $(PLATFORM);\ + CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)" \ + +From bcc3e1b01501c7ca65525ae31ecdb7a6028f8e84 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Wed, 9 Mar 2016 13:08:44 +0000 +Subject: [PATCH 51/67] stereoscopicmanager: remove hardwarebased for rbp + +--- + xbmc/guilib/StereoscopicsManager.cpp | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/xbmc/guilib/StereoscopicsManager.cpp b/xbmc/guilib/StereoscopicsManager.cpp +index 6eb0752994bc5f8c47efbbf211120af0a0720d0c..9426604f6460651f54cc035476e69530b2ea8493 100644 +--- a/xbmc/guilib/StereoscopicsManager.cpp ++++ b/xbmc/guilib/StereoscopicsManager.cpp +@@ -72,8 +72,10 @@ static const struct StereoModeMap VideoModeToGuiModeMap[] = + { "anaglyph_yellow_blue", RENDER_STEREO_MODE_ANAGLYPH_YELLOW_BLUE }, + { "block_lr", RENDER_STEREO_MODE_OFF }, // unsupported + { "block_rl", RENDER_STEREO_MODE_OFF }, // unsupported ++#ifndef TARGET_RASPBERRY_PI + { "mvc_lr", RENDER_STEREO_MODE_HARDWAREBASED }, + { "mvc_rl", RENDER_STEREO_MODE_HARDWAREBASED }, ++#endif + { "mvc_lr", RENDER_STEREO_MODE_SPLIT_HORIZONTAL }, // fallback + { "mvc_rl", RENDER_STEREO_MODE_SPLIT_HORIZONTAL }, // fallback + {} + +From 5ca0d3a5d247af35d48a4375131117466ad56f09 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Tue, 17 May 2016 19:24:08 +0100 +Subject: [PATCH 52/67] stereoscopics: Switch to using block_lr for mvc to + match makemkv + +See: http://forum.kodi.tv/showthread.php?tid=221407&pid=2339656#pid2339656 +--- + xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp | 2 +- + xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp | 2 +- + xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp | 12 ++++-------- + xbmc/cores/omxplayer/OMXVideo.cpp | 2 +- + xbmc/guilib/StereoscopicsManager.cpp | 11 +++++------ + 5 files changed, 12 insertions(+), 17 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +index b4e2c57d406297f75c5dfc0217f4d33507cb6755..470083b2256d23488ca476cebfe8d3ef9f62377e 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +@@ -581,7 +581,7 @@ bool CMMALVideo::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options) + m_codingType = MMAL_ENCODING_MVC; + m_pFormatName= "mmal-mvc"; + if (hints.stereo_mode == "mono") +- hints.stereo_mode = "mvc_lr"; ++ hints.stereo_mode = "block_lr"; + } + break; + case AV_CODEC_ID_H263: +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +index 54e4d0b66680a08c1e4c1be343fabe4371aec6af..5798ba2ede172c89d18b6997874117301a8b6a37 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +@@ -1387,7 +1387,7 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + CDVDInputStreamBluray *bluRay = static_cast<CDVDInputStreamBluray*>(m_pInput); + if (bluRay->HasMVC()) + { +- st->stereo_mode = bluRay->AreEyesFlipped() ? "mvc_rl" : "mvc_lr"; ++ st->stereo_mode = bluRay->AreEyesFlipped() ? "block_rl" : "block_lr"; + mvcStream = static_cast<CDVDDemuxMVC*>(bluRay->GetDemuxMVC())->GetAVStream(); + } + } +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp +index 04ceed1504c2d81aaa165d232e128c410b9fdc2c..49f7f7ca7e144a259f6d06bd11cd97aa0b3242aa 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp +@@ -102,10 +102,8 @@ namespace RenderManager { + convert["right_left"] = CONF_FLAGS_STEREO_MODE_SBS | CONF_FLAGS_STEREO_CADANCE_RIGHT_LEFT; + convert["anaglyph_green_magenta"] = 0u; + convert["anaglyph_yellow_blue"] = 0u; +- convert["block_lr"] = 0u; +- convert["block_rl"] = 0u; +- convert["mvc_lr"] = 0u; +- convert["mvc_rl"] = 0u; ++ convert["block_lr"] = CONF_FLAGS_STEREO_CADANCE_LEFT_RIGHT; ++ convert["block_rl"] = CONF_FLAGS_STEREO_CADANCE_RIGHT_LEFT; + } + return convert[mode]; + } +@@ -125,10 +123,8 @@ namespace RenderManager { + convert["row_interleaved_lr"] = "row_interleaved_rl"; + convert["col_interleaved_rl"] = "col_interleaved_lr"; + convert["col_interleaved_lr"] = "col_interleaved_rl"; +- convert["block_lr"] = "block_lr"; +- convert["block_rl"] = "block_rl"; +- convert["mvc_lr"] = "mvc_rl"; +- convert["mvc_rl"] = "mvc_lr"; ++ convert["block_lr"] = "block_rl"; ++ convert["block_rl"] = "block_lr"; + } + std::string res = convert[mode]; + if(res.empty()) +diff --git a/xbmc/cores/omxplayer/OMXVideo.cpp b/xbmc/cores/omxplayer/OMXVideo.cpp +index f8f26a891f6610de83ec143ec4b51f0aea5424de..de15bfff05d23949d6e6f4304b15aa7d79120dc2 100644 +--- a/xbmc/cores/omxplayer/OMXVideo.cpp ++++ b/xbmc/cores/omxplayer/OMXVideo.cpp +@@ -444,7 +444,7 @@ bool COMXVideo::Open(CDVDStreamInfo &hints, OMXClock *clock, EDEINTERLACEMODE de + m_codingType = OMX_VIDEO_CodingMVC; + m_video_codec_name = "omx-mvc"; + if (hints.stereo_mode == "mono") +- hints.stereo_mode = "mvc_lr"; ++ hints.stereo_mode = "block_lr"; + } + break; + case AV_CODEC_ID_MPEG4: +diff --git a/xbmc/guilib/StereoscopicsManager.cpp b/xbmc/guilib/StereoscopicsManager.cpp +index 9426604f6460651f54cc035476e69530b2ea8493..cc929b599125a44ac128713fd4331782d9931791 100644 +--- a/xbmc/guilib/StereoscopicsManager.cpp ++++ b/xbmc/guilib/StereoscopicsManager.cpp +@@ -70,14 +70,13 @@ static const struct StereoModeMap VideoModeToGuiModeMap[] = + { "anaglyph_cyan_red", RENDER_STEREO_MODE_ANAGLYPH_RED_CYAN }, + { "anaglyph_green_magenta", RENDER_STEREO_MODE_ANAGLYPH_GREEN_MAGENTA }, + { "anaglyph_yellow_blue", RENDER_STEREO_MODE_ANAGLYPH_YELLOW_BLUE }, +- { "block_lr", RENDER_STEREO_MODE_OFF }, // unsupported +- { "block_rl", RENDER_STEREO_MODE_OFF }, // unsupported + #ifndef TARGET_RASPBERRY_PI +- { "mvc_lr", RENDER_STEREO_MODE_HARDWAREBASED }, +- { "mvc_rl", RENDER_STEREO_MODE_HARDWAREBASED }, ++ { "block_lr", RENDER_STEREO_MODE_HARDWAREBASED }, ++ { "block_rl", RENDER_STEREO_MODE_HARDWAREBASED }, ++#else ++ { "block_lr", RENDER_STEREO_MODE_SPLIT_HORIZONTAL }, // fallback ++ { "block_rl", RENDER_STEREO_MODE_SPLIT_HORIZONTAL }, // fallback + #endif +- { "mvc_lr", RENDER_STEREO_MODE_SPLIT_HORIZONTAL }, // fallback +- { "mvc_rl", RENDER_STEREO_MODE_SPLIT_HORIZONTAL }, // fallback + {} + }; + + +From 0a3a48ddcd0cbcd263105f844d27afa720da9bf2 Mon Sep 17 00:00:00 2001 +From: Anton Fedchin <anightik@gmail.com> +Date: Thu, 10 Mar 2016 18:11:33 +0300 +Subject: [PATCH 53/67] fixup! Revert supporting crappy tab/sbs subtitles. this + fixes regular subtitles. + +--- + .../VideoPlayer/DVDCodecs/Overlay/DVDOverlayCodecFFmpeg.cpp | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Overlay/DVDOverlayCodecFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Overlay/DVDOverlayCodecFFmpeg.cpp +index 3a080d06c90b0762482816928642e6de7810b539..7c0b70777556ac7694e7fc511cd4bb189fc42e08 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Overlay/DVDOverlayCodecFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Overlay/DVDOverlayCodecFFmpeg.cpp +@@ -243,20 +243,20 @@ CDVDOverlay* CDVDOverlayCodecFFmpeg::GetOverlay() + } + } + +- RENDER_STEREO_MODE render_stereo_mode = g_graphicsContext.GetStereoMode(); ++ /*RENDER_STEREO_MODE render_stereo_mode = g_graphicsContext.GetStereoMode(); + if (render_stereo_mode != RENDER_STEREO_MODE_OFF) + { +- if (rect.h > m_height / 2) ++ if ((rect.h - rect.y) > m_height / 2) + { + m_height /= 2; + rect.h /= 2; + } +- else if (rect.w > m_width / 2) ++ else if ((rect.w - rect.x) > m_width / 2) + { + m_width /= 2; + rect.w /= 2; + } +- } ++ }*/ + + CDVDOverlayImage* overlay = new CDVDOverlayImage(); + +@@ -290,6 +290,7 @@ CDVDOverlay* CDVDOverlayCodecFFmpeg::GetOverlay() + + m_SubtitleIndex++; + ++ CLog::Log(LOGDEBUG, "Overlay: x:%d y:%d w:%d h:%d", overlay->x, overlay->y, overlay->width, overlay->height); + return overlay; + } + + +From 49a3522fd02f0d6e4adb10ec413df1bf5e181421 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Tue, 10 Feb 2015 15:29:16 +0000 +Subject: [PATCH 54/67] [libcec] Add repeating keypress patch from popcornmix' + repo + +--- + tools/depends/target/libcec/Makefile | 1 + + tools/depends/target/libcec/popcornmix.patch | 859 +++++++++++++++++++++++++++ + 2 files changed, 860 insertions(+) + create mode 100644 tools/depends/target/libcec/popcornmix.patch + +diff --git a/tools/depends/target/libcec/Makefile b/tools/depends/target/libcec/Makefile +index f54af9e7ed3d0a9bef922517728c8b8db51d9d75..ddf996361ad5b46dd2b33fb035b2ed133914a612 100644 +--- a/tools/depends/target/libcec/Makefile ++++ b/tools/depends/target/libcec/Makefile +@@ -21,6 +21,7 @@ $(TARBALLS_LOCATION)/$(ARCHIVE): + $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + rm -rf $(PLATFORM); mkdir -p $(PLATFORM)/build + cd $(PLATFORM); $(ARCHIVE_TOOL) $(ARCHIVE_TOOL_FLAGS) $(TARBALLS_LOCATION)/$(ARCHIVE) ++ cd $(PLATFORM); patch -p1 < ../popcornmix.patch + cd $(PLATFORM)/build; $(CMAKE) -DBUILD_SHARED_LIBS=1 -DSKIP_PYTHON_WRAPPER:STRING=1 -DCMAKE_INSTALL_LIBDIR=$(PREFIX)/lib .. + + $(LIBDYLIB): $(PLATFORM) +diff --git a/tools/depends/target/libcec/popcornmix.patch b/tools/depends/target/libcec/popcornmix.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..8366a696562a934144cc9a21ea6f2cab3c69e655 +--- /dev/null ++++ b/tools/depends/target/libcec/popcornmix.patch +@@ -0,0 +1,859 @@ ++From ec982e9800ae312972d306b67779215a2add6cde Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Fri, 24 Oct 2014 13:45:21 +0100 ++Subject: [PATCH 1/6] Make released key polling wait for exact time until key ++ gets released ++ ++--- ++ src/libcec/CECClient.cpp | 16 ++++++++++++++-- ++ src/libcec/CECClient.h | 2 +- ++ src/libcec/CECProcessor.cpp | 8 +++++--- ++ src/libcec/LibCEC.cpp | 10 ++++++++-- ++ src/libcec/LibCEC.h | 4 +++- ++ 5 files changed, 31 insertions(+), 9 deletions(-) ++ ++diff --git a/src/libcec/CECClient.cpp b/src/libcec/CECClient.cpp ++index 35c2d3e..e307c0e 100644 ++--- a/src/libcec/CECClient.cpp +++++ b/src/libcec/CECClient.cpp ++@@ -1067,7 +1067,7 @@ void CCECClient::SetCurrentButton(const cec_user_control_code iButtonCode) ++ AddKey(key); ++ } ++ ++-void CCECClient::CheckKeypressTimeout(void) +++uint16_t CCECClient::CheckKeypressTimeout(void) ++ { ++ cec_keypress key; ++ ++@@ -1091,12 +1091,24 @@ void CCECClient::CheckKeypressTimeout(void) ++ } ++ else ++ { ++- return; +++ // time when this keypress will be released and we'd like to be called again +++ unsigned int timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; +++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton == comboKey && iTimeoutMs > 0) +++ timeout = iTimeoutMs - (iNow - m_buttontime) + 1; +++ else if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton != comboKey) +++ timeout = CEC_BUTTON_TIMEOUT - (iNow - m_buttontime) + 1; +++ if (timeout > CEC_PROCESSOR_SIGNAL_WAIT_TIME) +++ { +++ LIB_CEC->AddLog(CEC_LOG_ERROR, "Unexpected timeout: %d (%.3f %.3f %.3f) k:%02x", timeout, iNow*1e-3, m_buttontime*1e-3, CEC_BUTTON_TIMEOUT*1e-3, m_iCurrentButton); +++ timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; +++ } +++ return timeout; ++ } ++ } ++ ++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "key auto-released: %s (%1x)", ToString(key.keycode), key.keycode); ++ QueueAddKey(key); +++ return CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ } ++ ++ bool CCECClient::EnableCallbacks(void *cbParam, ICECCallbacks *callbacks) ++diff --git a/src/libcec/CECClient.h b/src/libcec/CECClient.h ++index 12f8a3b..c9ce5e3 100644 ++--- a/src/libcec/CECClient.h +++++ b/src/libcec/CECClient.h ++@@ -272,7 +272,7 @@ namespace CEC ++ virtual void AddKey(bool bSendComboKey = false); ++ virtual void AddKey(const cec_keypress &key); ++ virtual void SetCurrentButton(const cec_user_control_code iButtonCode); ++- virtual void CheckKeypressTimeout(void); +++ virtual uint16_t CheckKeypressTimeout(void); ++ virtual void SourceActivated(const cec_logical_address logicalAddress); ++ virtual void SourceDeactivated(const cec_logical_address logicalAddress); ++ ++diff --git a/src/libcec/CECProcessor.cpp b/src/libcec/CECProcessor.cpp ++index 99f71aa..604b950 100644 ++--- a/src/libcec/CECProcessor.cpp +++++ b/src/libcec/CECProcessor.cpp ++@@ -52,7 +52,6 @@ ++ using namespace CEC; ++ using namespace PLATFORM; ++ ++-#define CEC_PROCESSOR_SIGNAL_WAIT_TIME 1000 ++ #define ACTIVE_SOURCE_CHECK_INTERVAL 500 ++ #define TV_PRESENT_CHECK_INTERVAL 30000 ++ ++@@ -260,6 +259,7 @@ bool CCECProcessor::OnCommandReceived(const cec_command &command) ++ ++ void *CCECProcessor::Process(void) ++ { +++ uint16_t timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ m_libcec->AddLog(CEC_LOG_DEBUG, "processor thread started"); ++ ++ if (!m_connCheck) ++@@ -274,13 +274,13 @@ void *CCECProcessor::Process(void) ++ while (!IsStopped() && m_communication->IsOpen()) ++ { ++ // wait for a new incoming command, and process it ++- if (m_inBuffer.Pop(command, CEC_PROCESSOR_SIGNAL_WAIT_TIME)) +++ if (m_inBuffer.Pop(command, timeout)) ++ ProcessCommand(command); ++ ++ if (CECInitialised() && !IsStopped()) ++ { ++ // check clients for keypress timeouts ++- m_libcec->CheckKeypressTimeout(); +++ timeout = m_libcec->CheckKeypressTimeout(); ++ ++ // check if we need to replace handlers ++ ReplaceHandlers(); ++@@ -311,6 +311,8 @@ void *CCECProcessor::Process(void) ++ tvPresentCheck.Init(TV_PRESENT_CHECK_INTERVAL); ++ } ++ } +++ else +++ timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ } ++ ++ return NULL; ++diff --git a/src/libcec/LibCEC.cpp b/src/libcec/LibCEC.cpp ++index af36b79..5ccb8dd 100644 ++--- a/src/libcec/LibCEC.cpp +++++ b/src/libcec/LibCEC.cpp ++@@ -361,11 +361,17 @@ bool CLibCEC::IsValidPhysicalAddress(uint16_t iPhysicalAddress) ++ iPhysicalAddress <= CEC_MAX_PHYSICAL_ADDRESS; ++ } ++ ++-void CLibCEC::CheckKeypressTimeout(void) +++uint16_t CLibCEC::CheckKeypressTimeout(void) ++ { +++ uint16_t timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ // check all clients ++ for (std::vector<CECClientPtr>::iterator it = m_clients.begin(); it != m_clients.end(); it++) ++- (*it)->CheckKeypressTimeout(); +++ { +++ uint16_t t = (*it)->CheckKeypressTimeout(); +++ if (t < timeout) +++ timeout = t; +++ } +++ return timeout; ++ } ++ ++ void CLibCEC::AddLog(const cec_log_level level, const char *strFormat, ...) ++diff --git a/src/libcec/LibCEC.h b/src/libcec/LibCEC.h ++index 6d9a229..d9d1e7b 100644 ++--- a/src/libcec/LibCEC.h +++++ b/src/libcec/LibCEC.h ++@@ -39,6 +39,8 @@ ++ #include "CECTypeUtils.h" ++ #include <memory> ++ +++#define CEC_PROCESSOR_SIGNAL_WAIT_TIME 1000 +++ ++ namespace CEC ++ { ++ class CAdapterCommunication; ++@@ -125,7 +127,7 @@ namespace CEC ++ ++ void AddLog(const cec_log_level level, const char *strFormat, ...); ++ void AddCommand(const cec_command &command); ++- void CheckKeypressTimeout(void); +++ uint16_t CheckKeypressTimeout(void); ++ void Alert(const libcec_alert type, const libcec_parameter ¶m); ++ ++ static bool IsValidPhysicalAddress(uint16_t iPhysicalAddress); ++-- ++1.9.1 ++ ++ ++From 41f0f3ec9ac136da3565c96fd5a7075499f3938d Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Fri, 24 Oct 2014 13:51:34 +0100 ++Subject: [PATCH 2/6] Keep track of time since initial button press and last ++ button update ++ ++--- ++ src/libcec/CECClient.cpp | 44 +++++++++++++++++++++++++++----------------- ++ src/libcec/CECClient.h | 3 ++- ++ 2 files changed, 29 insertions(+), 18 deletions(-) ++ ++diff --git a/src/libcec/CECClient.cpp b/src/libcec/CECClient.cpp ++index e307c0e..e7935b9 100644 ++--- a/src/libcec/CECClient.cpp +++++ b/src/libcec/CECClient.cpp ++@@ -54,7 +54,8 @@ CCECClient::CCECClient(CCECProcessor *processor, const libcec_configuration &con ++ m_bInitialised(false), ++ m_bRegistered(false), ++ m_iCurrentButton(CEC_USER_CONTROL_CODE_UNKNOWN), ++- m_buttontime(0), +++ m_initialButtontime(0), +++ m_updateButtontime(0), ++ m_iPreventForwardingPowerOffCommand(0), ++ m_iLastKeypressTime(0) ++ { ++@@ -981,9 +982,10 @@ void CCECClient::AddKey(bool bSendComboKey /* = false */) ++ CLockObject lock(m_mutex); ++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN) ++ { ++- key.duration = (unsigned int) (GetTimeMs() - m_buttontime); +++ unsigned int duration = (unsigned int) (GetTimeMs() - m_updateButtontime); +++ key.duration = (unsigned int) (GetTimeMs() - m_initialButtontime); ++ ++- if (key.duration > m_configuration.iComboKeyTimeoutMs || +++ if (duration > m_configuration.iComboKeyTimeoutMs || ++ m_configuration.iComboKeyTimeoutMs == 0 || ++ m_iCurrentButton != m_configuration.comboKey || ++ bSendComboKey) ++@@ -991,14 +993,15 @@ void CCECClient::AddKey(bool bSendComboKey /* = false */) ++ key.keycode = m_iCurrentButton; ++ ++ m_iCurrentButton = CEC_USER_CONTROL_CODE_UNKNOWN; ++- m_buttontime = 0; +++ m_initialButtontime = 0; +++ m_updateButtontime = 0; ++ } ++ } ++ } ++ ++ if (key.keycode != CEC_USER_CONTROL_CODE_UNKNOWN) ++ { ++- LIB_CEC->AddLog(CEC_LOG_DEBUG, "key released: %s (%1x)", ToString(key.keycode), key.keycode); +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "key released: %s (%1x) D:%dms", ToString(key.keycode), key.keycode, key.duration); ++ QueueAddKey(key); ++ } ++ } ++@@ -1012,7 +1015,7 @@ void CCECClient::AddKey(const cec_keypress &key) ++ AddKey(); ++ return; ++ } ++- +++ bool isrepeat = false; ++ cec_keypress transmitKey(key); ++ cec_user_control_code comboKey(m_configuration.clientVersion >= LIBCEC_VERSION_TO_UINT(2, 0, 5) ? ++ m_configuration.comboKey : CEC_USER_CONTROL_CODE_STOP); ++@@ -1035,22 +1038,27 @@ void CCECClient::AddKey(const cec_keypress &key) ++ AddKey(true); ++ } ++ +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "key pressed: %s (%1x) current(%lx) duration(%d)", ToString(transmitKey.keycode), transmitKey.keycode, m_iCurrentButton, key.duration); +++ ++ if (m_iCurrentButton == key.keycode) ++ { ++- m_buttontime = GetTimeMs(); +++ m_updateButtontime = GetTimeMs(); +++ isrepeat = true; ++ } ++ else ++ { ++- AddKey(); +++ if (m_iCurrentButton != transmitKey.keycode) +++ AddKey(); ++ if (key.duration == 0) ++ { ++ m_iCurrentButton = transmitKey.keycode; ++- m_buttontime = m_iCurrentButton == CEC_USER_CONTROL_CODE_UNKNOWN || key.duration > 0 ? 0 : GetTimeMs(); +++ m_initialButtontime = m_iCurrentButton == CEC_USER_CONTROL_CODE_UNKNOWN || key.duration > 0 ? 0 : GetTimeMs(); +++ m_updateButtontime = m_initialButtontime; ++ } ++ } ++ } ++ ++- if (key.keycode != comboKey || key.duration > 0) +++ if (!isrepeat && (key.keycode != comboKey || key.duration > 0)) ++ { ++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "key pressed: %s (%1x)", ToString(transmitKey.keycode), transmitKey.keycode); ++ QueueAddKey(transmitKey); ++@@ -1074,32 +1082,34 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ { ++ CLockObject lock(m_mutex); ++ uint64_t iNow = GetTimeMs(); +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "%s T:%.3f", __FUNCTION__, iNow*1e-3); ++ cec_user_control_code comboKey(m_configuration.clientVersion >= LIBCEC_VERSION_TO_UINT(2, 0, 5) ? ++ m_configuration.comboKey : CEC_USER_CONTROL_CODE_STOP); ++ uint32_t iTimeoutMs(m_configuration.clientVersion >= LIBCEC_VERSION_TO_UINT(2, 0, 5) ? ++ m_configuration.iComboKeyTimeoutMs : CEC_DEFAULT_COMBO_TIMEOUT_MS); ++ ++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && ++- ((m_iCurrentButton == comboKey && iTimeoutMs > 0 && iNow - m_buttontime > iTimeoutMs) || ++- (m_iCurrentButton != comboKey && iNow - m_buttontime > CEC_BUTTON_TIMEOUT))) +++ ((m_iCurrentButton == comboKey && iTimeoutMs > 0 && iNow - m_updateButtontime > iTimeoutMs) || +++ (m_iCurrentButton != comboKey && iNow - m_updateButtontime > CEC_BUTTON_TIMEOUT))) ++ { ++- key.duration = (unsigned int) (iNow - m_buttontime); +++ key.duration = (unsigned int) (iNow - m_initialButtontime); ++ key.keycode = m_iCurrentButton; ++ ++ m_iCurrentButton = CEC_USER_CONTROL_CODE_UNKNOWN; ++- m_buttontime = 0; +++ m_initialButtontime = 0; +++ m_updateButtontime = 0; ++ } ++ else ++ { ++ // time when this keypress will be released and we'd like to be called again ++ unsigned int timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton == comboKey && iTimeoutMs > 0) ++- timeout = iTimeoutMs - (iNow - m_buttontime) + 1; +++ timeout = iTimeoutMs - (iNow - m_updateButtontime) + 1; ++ else if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton != comboKey) ++- timeout = CEC_BUTTON_TIMEOUT - (iNow - m_buttontime) + 1; +++ timeout = CEC_BUTTON_TIMEOUT - (iNow - m_updateButtontime) + 1; ++ if (timeout > CEC_PROCESSOR_SIGNAL_WAIT_TIME) ++ { ++- LIB_CEC->AddLog(CEC_LOG_ERROR, "Unexpected timeout: %d (%.3f %.3f %.3f) k:%02x", timeout, iNow*1e-3, m_buttontime*1e-3, CEC_BUTTON_TIMEOUT*1e-3, m_iCurrentButton); +++ LIB_CEC->AddLog(CEC_LOG_ERROR, "Unexpected timeout: %d (%.3f %.3f %.3f) k:%02x", timeout, iNow*1e-3, m_updateButtontime*1e-3, CEC_BUTTON_TIMEOUT*1e-3, m_iCurrentButton); ++ timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ } ++ return timeout; ++diff --git a/src/libcec/CECClient.h b/src/libcec/CECClient.h ++index c9ce5e3..611c68b 100644 ++--- a/src/libcec/CECClient.h +++++ b/src/libcec/CECClient.h ++@@ -404,7 +404,8 @@ namespace CEC ++ PLATFORM::CMutex m_mutex; /**< mutex for changes to this instance */ ++ PLATFORM::CMutex m_cbMutex; /**< mutex that is held when doing anything with callbacks */ ++ cec_user_control_code m_iCurrentButton; /**< the control code of the button that's currently held down (if any) */ ++- int64_t m_buttontime; /**< the timestamp when the button was pressed (in seconds since epoch), or 0 if none was pressed. */ +++ int64_t m_initialButtontime; /**< the timestamp when the button was initially pressed (in seconds since epoch), or 0 if none was pressed. */ +++ int64_t m_updateButtontime; /**< the timestamp when the button was updated (in seconds since epoch), or 0 if none was pressed. */ ++ int64_t m_iPreventForwardingPowerOffCommand; /**< prevent forwarding standby commands until this time */ ++ int64_t m_iLastKeypressTime; /**< last time a key press was sent to the client */ ++ cec_keypress m_lastKeypress; /**< the last key press that was sent to the client */ ++-- ++1.9.1 ++ ++ ++From 273ead6980b69eddf98810eb1eb33d94a7d74fce Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Tue, 28 Oct 2014 00:09:18 +0000 ++Subject: [PATCH 3/6] Support repeating button presses with configurable repeat ++ rate ++ ++--- ++ include/cectypes.h | 6 ++ ++ src/libcec/CECClient.cpp | 100 +++++++++++++++++++---- ++ src/libcec/CECClient.h | 6 +- ++ src/libcec/implementations/CECCommandHandler.cpp | 2 +- ++ 4 files changed, 96 insertions(+), 18 deletions(-) ++ ++diff --git a/include/cectypes.h b/include/cectypes.h ++index acff259..8f098ef 100644 ++--- a/include/cectypes.h +++++ b/include/cectypes.h ++@@ -1493,6 +1493,8 @@ struct libcec_configuration ++ XXX changed meaning in 2.2.0 to not break binary compatibility. next major (3.0) release will fix it in a nicer way */ ++ cec_user_control_code comboKey; /*!< key code that initiates combo keys. defaults to CEC_USER_CONTROL_CODE_F1_BLUE. CEC_USER_CONTROL_CODE_UNKNOWN to disable. added in 2.0.5 */ ++ uint32_t iComboKeyTimeoutMs; /*!< timeout until the combo key is sent as normal keypress */ +++ uint32_t iButtonRepeatRateMs; /*!< rate at which buttons autorepeat. 0 means rely on CEC device */ +++ uint32_t iButtonReleaseDelayMs;/*!< duration after last update until a button is considered released */ ++ ++ #ifdef __cplusplus ++ libcec_configuration(void) { Clear(); } ++@@ -1527,6 +1529,8 @@ struct libcec_configuration ++ cecVersion == other.cecVersion && ++ adapterType == other.adapterType && ++ iDoubleTapTimeout50Ms == other.iDoubleTapTimeout50Ms && +++ iButtonRepeatRateMs == other.iButtonRepeatRateMs && +++ iButtonReleaseDelayMs == other.iButtonReleaseDelayMs && ++ (other.clientVersion <= LIBCEC_VERSION_TO_UINT(2, 0, 4) || comboKey == other.comboKey) && ++ (other.clientVersion <= LIBCEC_VERSION_TO_UINT(2, 0, 4) || iComboKeyTimeoutMs == other.iComboKeyTimeoutMs) && ++ (other.clientVersion < LIBCEC_VERSION_TO_UINT(2, 1, 0) || bPowerOnScreensaver == other.bPowerOnScreensaver)); ++@@ -1567,6 +1571,8 @@ struct libcec_configuration ++ iDoubleTapTimeout50Ms = CEC_DOUBLE_TAP_TIMEOUT_50_MS; ++ comboKey = CEC_USER_CONTROL_CODE_STOP; ++ iComboKeyTimeoutMs = CEC_DEFAULT_COMBO_TIMEOUT_MS; +++ iButtonRepeatRateMs = 0; +++ iButtonReleaseDelayMs = CEC_BUTTON_TIMEOUT; ++ ++ memset(strDeviceName, 0, 13); ++ deviceTypes.Clear(); ++diff --git a/src/libcec/CECClient.cpp b/src/libcec/CECClient.cpp ++index e7935b9..598628d 100644 ++--- a/src/libcec/CECClient.cpp +++++ b/src/libcec/CECClient.cpp ++@@ -56,6 +56,10 @@ CCECClient::CCECClient(CCECProcessor *processor, const libcec_configuration &con ++ m_iCurrentButton(CEC_USER_CONTROL_CODE_UNKNOWN), ++ m_initialButtontime(0), ++ m_updateButtontime(0), +++ m_repeatButtontime(0), +++ m_releaseButtontime(0), +++ m_pressedButtoncount(0), +++ m_releasedButtoncount(0), ++ m_iPreventForwardingPowerOffCommand(0), ++ m_iLastKeypressTime(0) ++ { ++@@ -851,6 +855,9 @@ bool CCECClient::GetCurrentConfiguration(libcec_configuration &configuration) ++ configuration.bMonitorOnly = m_configuration.bMonitorOnly; ++ configuration.cecVersion = m_configuration.cecVersion; ++ configuration.adapterType = m_configuration.adapterType; +++ configuration.iDoubleTapTimeout50Ms = m_configuration.iDoubleTapTimeout50Ms; +++ configuration.iButtonRepeatRateMs = m_configuration.iButtonRepeatRateMs; +++ configuration.iButtonReleaseDelayMs = m_configuration.iButtonReleaseDelayMs; ++ ++ return true; ++ } ++@@ -894,6 +901,9 @@ bool CCECClient::SetConfiguration(const libcec_configuration &configuration) ++ m_configuration.cecVersion = configuration.cecVersion; ++ m_configuration.adapterType = configuration.adapterType; ++ m_configuration.iDoubleTapTimeout50Ms = configuration.iDoubleTapTimeout50Ms; +++ m_configuration.iButtonRepeatRateMs = configuration.iButtonRepeatRateMs; +++ m_configuration.iButtonReleaseDelayMs = configuration.iButtonReleaseDelayMs; +++ ++ m_configuration.deviceTypes.Add(configuration.deviceTypes[0]); ++ ++ if (m_configuration.clientVersion >= LIBCEC_VERSION_TO_UINT(2, 0, 5)) ++@@ -950,6 +960,7 @@ bool CCECClient::SetConfiguration(const libcec_configuration &configuration) ++ primary->ActivateSource(); ++ } ++ +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "%s: %d:%d:%d", __FUNCTION__, DoubleTapTimeoutMS(), m_configuration.iButtonRepeatRateMs, m_configuration.iButtonReleaseDelayMs); ++ return true; ++ } ++ ++@@ -973,11 +984,15 @@ void CCECClient::AddCommand(const cec_command &command) ++ } ++ } ++ ++-void CCECClient::AddKey(bool bSendComboKey /* = false */) +++void CCECClient::AddKey(bool bSendComboKey /* = false */, bool bButtonRelease /* = false */) ++ { ++ cec_keypress key; ++ key.keycode = CEC_USER_CONTROL_CODE_UNKNOWN; ++ +++ // we ignore button releases when supporting repeating keys +++ if (bButtonRelease && m_configuration.iButtonRepeatRateMs && m_configuration.iButtonReleaseDelayMs) +++ return; +++ ++ { ++ CLockObject lock(m_mutex); ++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN) ++@@ -995,6 +1010,10 @@ void CCECClient::AddKey(bool bSendComboKey /* = false */) ++ m_iCurrentButton = CEC_USER_CONTROL_CODE_UNKNOWN; ++ m_initialButtontime = 0; ++ m_updateButtontime = 0; +++ m_repeatButtontime = 0; +++ m_releaseButtontime = 0; +++ m_pressedButtoncount = 0; +++ m_releasedButtoncount = 0; ++ } ++ } ++ } ++@@ -1012,6 +1031,7 @@ void CCECClient::AddKey(const cec_keypress &key) ++ key.keycode < CEC_USER_CONTROL_CODE_SELECT) ++ { ++ // send back the previous key if there is one +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "Unexpected key %s (%1x) D:%dms", ToString(key.keycode), key.keycode, key.duration); ++ AddKey(); ++ return; ++ } ++@@ -1035,7 +1055,10 @@ void CCECClient::AddKey(const cec_keypress &key) ++ transmitKey.keycode = CEC_USER_CONTROL_CODE_DOT; ++ // default, send back the previous key ++ else +++ { +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "Combo key %s (%1x) D%dms:", ToString(key.keycode), key.keycode, key.duration); ++ AddKey(true); +++ } ++ } ++ ++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "key pressed: %s (%1x) current(%lx) duration(%d)", ToString(transmitKey.keycode), transmitKey.keycode, m_iCurrentButton, key.duration); ++@@ -1043,17 +1066,44 @@ void CCECClient::AddKey(const cec_keypress &key) ++ if (m_iCurrentButton == key.keycode) ++ { ++ m_updateButtontime = GetTimeMs(); ++- isrepeat = true; +++ m_releaseButtontime = m_updateButtontime + (m_configuration.iButtonReleaseDelayMs ? m_configuration.iButtonReleaseDelayMs : CEC_BUTTON_TIMEOUT); +++ // want to have seen some updated before considering a repeat +++ if (m_configuration.iButtonRepeatRateMs) +++ { +++ if (!m_repeatButtontime && m_pressedButtoncount > 1) +++ m_repeatButtontime = m_initialButtontime + DoubleTapTimeoutMS(); +++ isrepeat = true; +++ } +++ m_pressedButtoncount++; ++ } ++ else ++ { ++ if (m_iCurrentButton != transmitKey.keycode) +++ { +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "Changed key %s (%1x) D:%dms cur:%lx", ToString(transmitKey.keycode), transmitKey.keycode, transmitKey.duration, m_iCurrentButton); ++ AddKey(); +++ } ++ if (key.duration == 0) ++ { ++ m_iCurrentButton = transmitKey.keycode; ++- m_initialButtontime = m_iCurrentButton == CEC_USER_CONTROL_CODE_UNKNOWN || key.duration > 0 ? 0 : GetTimeMs(); ++- m_updateButtontime = m_initialButtontime; +++ if (m_iCurrentButton == CEC_USER_CONTROL_CODE_UNKNOWN) +++ { +++ m_initialButtontime = 0; +++ m_updateButtontime = 0; +++ m_repeatButtontime = 0; +++ m_releaseButtontime = 0; +++ m_pressedButtoncount = 0; +++ m_releasedButtoncount = 0; +++ } +++ else +++ { +++ m_initialButtontime = GetTimeMs(); +++ m_updateButtontime = m_initialButtontime; +++ m_repeatButtontime = 0; // set this on next update +++ m_releaseButtontime = m_initialButtontime + (m_configuration.iButtonReleaseDelayMs ? m_configuration.iButtonReleaseDelayMs : CEC_BUTTON_TIMEOUT); +++ m_pressedButtoncount = 1; +++ m_releasedButtoncount = 0; +++ } ++ } ++ } ++ } ++@@ -1072,12 +1122,16 @@ void CCECClient::SetCurrentButton(const cec_user_control_code iButtonCode) ++ key.duration = 0; ++ key.keycode = iButtonCode; ++ +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "SetCurrentButton %s (%1x) D:%dms cur:%lx", ToString(key.keycode), key.keycode, key.duration); ++ AddKey(key); ++ } ++ ++ uint16_t CCECClient::CheckKeypressTimeout(void) ++ { +++ // time when we'd like to be called again +++ unsigned int timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ cec_keypress key; +++ key.keycode = CEC_USER_CONTROL_CODE_UNKNOWN; ++ ++ { ++ CLockObject lock(m_mutex); ++@@ -1089,8 +1143,8 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ m_configuration.iComboKeyTimeoutMs : CEC_DEFAULT_COMBO_TIMEOUT_MS); ++ ++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && ++- ((m_iCurrentButton == comboKey && iTimeoutMs > 0 && iNow - m_updateButtontime > iTimeoutMs) || ++- (m_iCurrentButton != comboKey && iNow - m_updateButtontime > CEC_BUTTON_TIMEOUT))) +++ ((m_iCurrentButton == comboKey && iTimeoutMs > 0 && iNow - m_updateButtontime >= iTimeoutMs) || +++ (m_iCurrentButton != comboKey && m_releaseButtontime && iNow >= (uint64_t)m_releaseButtontime))) ++ { ++ key.duration = (unsigned int) (iNow - m_initialButtontime); ++ key.keycode = m_iCurrentButton; ++@@ -1098,27 +1152,41 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ m_iCurrentButton = CEC_USER_CONTROL_CODE_UNKNOWN; ++ m_initialButtontime = 0; ++ m_updateButtontime = 0; +++ m_repeatButtontime = 0; +++ m_releaseButtontime = 0; +++ m_pressedButtoncount = 0; +++ m_releasedButtoncount = 0; +++ } +++ else if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && +++ (m_iCurrentButton != comboKey && m_repeatButtontime && iNow >= (uint64_t)m_repeatButtontime)) +++ { +++ key.duration = 0; +++ key.keycode = m_iCurrentButton; +++ m_repeatButtontime = iNow + m_configuration.iButtonRepeatRateMs; +++ timeout = std::min((uint64_t)timeout, m_repeatButtontime - iNow); ++ } ++ else ++ { ++- // time when this keypress will be released and we'd like to be called again ++- unsigned int timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton == comboKey && iTimeoutMs > 0) ++- timeout = iTimeoutMs - (iNow - m_updateButtontime) + 1; ++- else if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton != comboKey) ++- timeout = CEC_BUTTON_TIMEOUT - (iNow - m_updateButtontime) + 1; +++ timeout = std::min((uint64_t)timeout, m_updateButtontime - iNow + iTimeoutMs); +++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton != comboKey && m_releaseButtontime) +++ timeout = std::min((uint64_t)timeout, m_releaseButtontime - iNow); +++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton != comboKey && m_repeatButtontime) +++ timeout = std::min((uint64_t)timeout, m_repeatButtontime - iNow); ++ if (timeout > CEC_PROCESSOR_SIGNAL_WAIT_TIME) ++ { ++- LIB_CEC->AddLog(CEC_LOG_ERROR, "Unexpected timeout: %d (%.3f %.3f %.3f) k:%02x", timeout, iNow*1e-3, m_updateButtontime*1e-3, CEC_BUTTON_TIMEOUT*1e-3, m_iCurrentButton); +++ LIB_CEC->AddLog(CEC_LOG_ERROR, "Unexpected timeout: %d (%.3f %.3f %.3f) k:%02x", timeout, iNow*1e-3, m_updateButtontime*1e-3, m_releaseButtontime*1e-3, m_iCurrentButton); ++ timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ } ++- return timeout; ++ } +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "key %s: %s (%1x) timeout:%dms (rel:%d,rep:%d,prs:%d,rel:%d)", key.keycode == CEC_USER_CONTROL_CODE_UNKNOWN ? "idle" : key.duration ? "released" : "repeated", +++ ToString(m_iCurrentButton), m_iCurrentButton, timeout, (int)(m_releaseButtontime ? m_releaseButtontime - iNow : 0), (int)(m_repeatButtontime ? m_repeatButtontime - iNow : 0), m_pressedButtoncount, m_releasedButtoncount); ++ } ++ ++- LIB_CEC->AddLog(CEC_LOG_DEBUG, "key auto-released: %s (%1x)", ToString(key.keycode), key.keycode); ++- QueueAddKey(key); ++- return CEC_PROCESSOR_SIGNAL_WAIT_TIME; +++ if (key.keycode != CEC_USER_CONTROL_CODE_UNKNOWN) +++ QueueAddKey(key); +++ +++ return timeout; ++ } ++ ++ bool CCECClient::EnableCallbacks(void *cbParam, ICECCallbacks *callbacks) ++diff --git a/src/libcec/CECClient.h b/src/libcec/CECClient.h ++index 611c68b..adeb5af 100644 ++--- a/src/libcec/CECClient.h +++++ b/src/libcec/CECClient.h ++@@ -269,7 +269,7 @@ namespace CEC ++ // callbacks ++ virtual void Alert(const libcec_alert type, const libcec_parameter ¶m) { QueueAlert(type, param); } ++ virtual void AddLog(const cec_log_message &message) { QueueAddLog(message); } ++- virtual void AddKey(bool bSendComboKey = false); +++ virtual void AddKey(bool bSendComboKey = false, bool bButtonRelease = false); ++ virtual void AddKey(const cec_keypress &key); ++ virtual void SetCurrentButton(const cec_user_control_code iButtonCode); ++ virtual uint16_t CheckKeypressTimeout(void); ++@@ -406,6 +406,10 @@ namespace CEC ++ cec_user_control_code m_iCurrentButton; /**< the control code of the button that's currently held down (if any) */ ++ int64_t m_initialButtontime; /**< the timestamp when the button was initially pressed (in seconds since epoch), or 0 if none was pressed. */ ++ int64_t m_updateButtontime; /**< the timestamp when the button was updated (in seconds since epoch), or 0 if none was pressed. */ +++ int64_t m_repeatButtontime; /**< the timestamp when the button will next repeat (in seconds since epoch), or 0 if repeat is disabled. */ +++ int64_t m_releaseButtontime; /**< the timestamp when the button will be released (in seconds since epoch), or 0 if none was pressed. */ +++ int32_t m_pressedButtoncount; /**< the number of times a button released message has been seen for this press. */ +++ int32_t m_releasedButtoncount; /**< the number of times a button pressed message has been seen for this press. */ ++ int64_t m_iPreventForwardingPowerOffCommand; /**< prevent forwarding standby commands until this time */ ++ int64_t m_iLastKeypressTime; /**< last time a key press was sent to the client */ ++ cec_keypress m_lastKeypress; /**< the last key press that was sent to the client */ ++diff --git a/src/libcec/implementations/CECCommandHandler.cpp b/src/libcec/implementations/CECCommandHandler.cpp ++index 6d6244e..d64186f 100644 ++--- a/src/libcec/implementations/CECCommandHandler.cpp +++++ b/src/libcec/implementations/CECCommandHandler.cpp ++@@ -770,7 +770,7 @@ int CCECCommandHandler::HandleUserControlRelease(const cec_command &command) ++ ++ CECClientPtr client = m_processor->GetClient(command.destination); ++ if (client) ++- client->AddKey(); +++ client->AddKey(false, true); ++ ++ return COMMAND_HANDLED; ++ } ++-- ++1.9.1 ++ ++ ++From 3336d0827f7fd159430f3431642b07090c06c869 Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Tue, 28 Oct 2014 01:21:35 +0000 ++Subject: [PATCH 4/6] Skip double press removal. It is handled through other ++ means. ++ ++--- ++ src/libcec/CECClient.cpp | 18 +----------------- ++ src/libcec/CECClient.h | 2 -- ++ 2 files changed, 1 insertion(+), 19 deletions(-) ++ ++diff --git a/src/libcec/CECClient.cpp b/src/libcec/CECClient.cpp ++index 598628d..dccd874 100644 ++--- a/src/libcec/CECClient.cpp +++++ b/src/libcec/CECClient.cpp ++@@ -60,11 +60,8 @@ CCECClient::CCECClient(CCECProcessor *processor, const libcec_configuration &con ++ m_releaseButtontime(0), ++ m_pressedButtoncount(0), ++ m_releasedButtoncount(0), ++- m_iPreventForwardingPowerOffCommand(0), ++- m_iLastKeypressTime(0) +++ m_iPreventForwardingPowerOffCommand(0) ++ { ++- m_lastKeypress.keycode = CEC_USER_CONTROL_CODE_UNKNOWN; ++- m_lastKeypress.duration = 0; ++ m_configuration.Clear(); ++ // set the initial configuration ++ SetConfiguration(configuration); ++@@ -1647,20 +1644,7 @@ void CCECClient::CallbackAddKey(const cec_keypress &key) ++ { ++ CLockObject lock(m_cbMutex); ++ if (m_configuration.callbacks && m_configuration.callbacks->CBCecKeyPress) ++- { ++- // prevent double taps ++- int64_t now = GetTimeMs(); ++- if (m_lastKeypress.keycode != key.keycode || ++- key.duration > 0 || ++- now - m_iLastKeypressTime >= DoubleTapTimeoutMS()) ++- { ++- // no double tap ++- if (key.duration == 0) ++- m_iLastKeypressTime = now; ++- m_lastKeypress = key; ++ m_configuration.callbacks->CBCecKeyPress(m_configuration.callbackParam, key); ++- } ++- } ++ } ++ ++ void CCECClient::CallbackAddLog(const cec_log_message &message) ++diff --git a/src/libcec/CECClient.h b/src/libcec/CECClient.h ++index adeb5af..43a713b 100644 ++--- a/src/libcec/CECClient.h +++++ b/src/libcec/CECClient.h ++@@ -411,8 +411,6 @@ namespace CEC ++ int32_t m_pressedButtoncount; /**< the number of times a button released message has been seen for this press. */ ++ int32_t m_releasedButtoncount; /**< the number of times a button pressed message has been seen for this press. */ ++ int64_t m_iPreventForwardingPowerOffCommand; /**< prevent forwarding standby commands until this time */ ++- int64_t m_iLastKeypressTime; /**< last time a key press was sent to the client */ ++- cec_keypress m_lastKeypress; /**< the last key press that was sent to the client */ ++ PLATFORM::SyncedBuffer<CCallbackWrap*> m_callbackCalls; ++ }; ++ } ++-- ++1.9.1 ++ ++ ++From 0dd0234f620a546bfa843172648383f83d88088c Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Mon, 3 Nov 2014 23:28:04 +0000 ++Subject: [PATCH 5/6] Pass through duration on all button repeats ++ ++--- ++ src/libcec/CECClient.cpp | 34 ++++++++++++++++++++++++---------- ++ 1 file changed, 24 insertions(+), 10 deletions(-) ++ ++diff --git a/src/libcec/CECClient.cpp b/src/libcec/CECClient.cpp ++index dccd874..1946148 100644 ++--- a/src/libcec/CECClient.cpp +++++ b/src/libcec/CECClient.cpp ++@@ -986,10 +986,6 @@ void CCECClient::AddKey(bool bSendComboKey /* = false */, bool bButtonRelease /* ++ cec_keypress key; ++ key.keycode = CEC_USER_CONTROL_CODE_UNKNOWN; ++ ++- // we ignore button releases when supporting repeating keys ++- if (bButtonRelease && m_configuration.iButtonRepeatRateMs && m_configuration.iButtonReleaseDelayMs) ++- return; ++- ++ { ++ CLockObject lock(m_mutex); ++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN) ++@@ -1015,6 +1011,10 @@ void CCECClient::AddKey(bool bSendComboKey /* = false */, bool bButtonRelease /* ++ } ++ } ++ +++ // we don't forward releases when supporting repeating keys +++ if (bButtonRelease && m_configuration.iButtonRepeatRateMs) +++ return; +++ ++ if (key.keycode != CEC_USER_CONTROL_CODE_UNKNOWN) ++ { ++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "key released: %s (%1x) D:%dms", ToString(key.keycode), key.keycode, key.duration); ++@@ -1107,7 +1107,7 @@ void CCECClient::AddKey(const cec_keypress &key) ++ ++ if (!isrepeat && (key.keycode != comboKey || key.duration > 0)) ++ { ++- LIB_CEC->AddLog(CEC_LOG_DEBUG, "key pressed: %s (%1x)", ToString(transmitKey.keycode), transmitKey.keycode); +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "key pressed: %s (%1x, %d)", ToString(transmitKey.keycode), transmitKey.keycode, transmitKey.duration); ++ QueueAddKey(transmitKey); ++ } ++ } ++@@ -1129,6 +1129,7 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ unsigned int timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ cec_keypress key; ++ key.keycode = CEC_USER_CONTROL_CODE_UNKNOWN; +++ key.duration = 0; ++ ++ { ++ CLockObject lock(m_mutex); ++@@ -1140,8 +1141,7 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ m_configuration.iComboKeyTimeoutMs : CEC_DEFAULT_COMBO_TIMEOUT_MS); ++ ++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && ++- ((m_iCurrentButton == comboKey && iTimeoutMs > 0 && iNow - m_updateButtontime >= iTimeoutMs) || ++- (m_iCurrentButton != comboKey && m_releaseButtontime && iNow >= (uint64_t)m_releaseButtontime))) +++ m_iCurrentButton == comboKey && iTimeoutMs > 0 && iNow - m_updateButtontime >= iTimeoutMs) ++ { ++ key.duration = (unsigned int) (iNow - m_initialButtontime); ++ key.keycode = m_iCurrentButton; ++@@ -1155,9 +1155,23 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ m_releasedButtoncount = 0; ++ } ++ else if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && +++ m_iCurrentButton != comboKey && m_releaseButtontime && iNow >= (uint64_t)m_releaseButtontime) +++ { +++ key.duration = (unsigned int) (iNow - m_initialButtontime); +++ key.keycode = CEC_USER_CONTROL_CODE_UNKNOWN; +++ +++ m_iCurrentButton = CEC_USER_CONTROL_CODE_UNKNOWN; +++ m_initialButtontime = 0; +++ m_updateButtontime = 0; +++ m_repeatButtontime = 0; +++ m_releaseButtontime = 0; +++ m_pressedButtoncount = 0; +++ m_releasedButtoncount = 0; +++ } +++ else if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && ++ (m_iCurrentButton != comboKey && m_repeatButtontime && iNow >= (uint64_t)m_repeatButtontime)) ++ { ++- key.duration = 0; +++ key.duration = (unsigned int) (iNow - m_initialButtontime); ++ key.keycode = m_iCurrentButton; ++ m_repeatButtontime = iNow + m_configuration.iButtonRepeatRateMs; ++ timeout = std::min((uint64_t)timeout, m_repeatButtontime - iNow); ++@@ -1176,8 +1190,8 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ } ++ } ++- LIB_CEC->AddLog(CEC_LOG_DEBUG, "key %s: %s (%1x) timeout:%dms (rel:%d,rep:%d,prs:%d,rel:%d)", key.keycode == CEC_USER_CONTROL_CODE_UNKNOWN ? "idle" : key.duration ? "released" : "repeated", ++- ToString(m_iCurrentButton), m_iCurrentButton, timeout, (int)(m_releaseButtontime ? m_releaseButtontime - iNow : 0), (int)(m_repeatButtontime ? m_repeatButtontime - iNow : 0), m_pressedButtoncount, m_releasedButtoncount); +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "Key %s: %s (duration:%d) (%1x) timeout:%dms (rel:%d,rep:%d,prs:%d,rel:%d)", ToString(m_iCurrentButton), key.keycode == CEC_USER_CONTROL_CODE_UNKNOWN ? "idle" : m_repeatButtontime ? "repeated" : "released", key.duration, +++ m_iCurrentButton, timeout, (int)(m_releaseButtontime ? m_releaseButtontime - iNow : 0), (int)(m_repeatButtontime ? m_repeatButtontime - iNow : 0), m_pressedButtoncount, m_releasedButtoncount); ++ } ++ ++ if (key.keycode != CEC_USER_CONTROL_CODE_UNKNOWN) ++-- ++1.9.1 ++ ++ ++From 1ea01f59d8186d4d53af41961aaccbbc11651115 Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Wed, 5 Nov 2014 21:04:25 +0000 ++Subject: [PATCH 6/6] squash: Fix for stop needing to be pressed twice ++ ++--- ++ src/libcec/CECClient.cpp | 17 ++++++++--------- ++ 1 file changed, 8 insertions(+), 9 deletions(-) ++ ++diff --git a/src/libcec/CECClient.cpp b/src/libcec/CECClient.cpp ++index 1946148..f4f114b 100644 ++--- a/src/libcec/CECClient.cpp +++++ b/src/libcec/CECClient.cpp ++@@ -1131,6 +1131,8 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ key.keycode = CEC_USER_CONTROL_CODE_UNKNOWN; ++ key.duration = 0; ++ +++ if (m_iCurrentButton == CEC_USER_CONTROL_CODE_UNKNOWN) +++ return timeout; ++ { ++ CLockObject lock(m_mutex); ++ uint64_t iNow = GetTimeMs(); ++@@ -1140,8 +1142,7 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ uint32_t iTimeoutMs(m_configuration.clientVersion >= LIBCEC_VERSION_TO_UINT(2, 0, 5) ? ++ m_configuration.iComboKeyTimeoutMs : CEC_DEFAULT_COMBO_TIMEOUT_MS); ++ ++- if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && ++- m_iCurrentButton == comboKey && iTimeoutMs > 0 && iNow - m_updateButtontime >= iTimeoutMs) +++ if (m_iCurrentButton == comboKey && iTimeoutMs > 0 && iNow - m_updateButtontime >= iTimeoutMs) ++ { ++ key.duration = (unsigned int) (iNow - m_initialButtontime); ++ key.keycode = m_iCurrentButton; ++@@ -1154,8 +1155,7 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ m_pressedButtoncount = 0; ++ m_releasedButtoncount = 0; ++ } ++- else if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && ++- m_iCurrentButton != comboKey && m_releaseButtontime && iNow >= (uint64_t)m_releaseButtontime) +++ else if (m_iCurrentButton != comboKey && m_releaseButtontime && iNow >= (uint64_t)m_releaseButtontime) ++ { ++ key.duration = (unsigned int) (iNow - m_initialButtontime); ++ key.keycode = CEC_USER_CONTROL_CODE_UNKNOWN; ++@@ -1168,8 +1168,7 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ m_pressedButtoncount = 0; ++ m_releasedButtoncount = 0; ++ } ++- else if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && ++- (m_iCurrentButton != comboKey && m_repeatButtontime && iNow >= (uint64_t)m_repeatButtontime)) +++ else if (m_iCurrentButton != comboKey && m_repeatButtontime && iNow >= (uint64_t)m_repeatButtontime) ++ { ++ key.duration = (unsigned int) (iNow - m_initialButtontime); ++ key.keycode = m_iCurrentButton; ++@@ -1178,11 +1177,11 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ } ++ else ++ { ++- if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton == comboKey && iTimeoutMs > 0) +++ if (m_iCurrentButton == comboKey && iTimeoutMs > 0) ++ timeout = std::min((uint64_t)timeout, m_updateButtontime - iNow + iTimeoutMs); ++- if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton != comboKey && m_releaseButtontime) +++ if (m_iCurrentButton != comboKey && m_releaseButtontime) ++ timeout = std::min((uint64_t)timeout, m_releaseButtontime - iNow); ++- if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton != comboKey && m_repeatButtontime) +++ if (m_iCurrentButton != comboKey && m_repeatButtontime) ++ timeout = std::min((uint64_t)timeout, m_repeatButtontime - iNow); ++ if (timeout > CEC_PROCESSOR_SIGNAL_WAIT_TIME) ++ { ++-- ++1.9.1 ++ + +From fcfb4a5068565c3ca935cf16932f6f45f34a33d0 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sat, 19 Mar 2016 17:15:29 +0000 +Subject: [PATCH 55/67] cec: hack: pretend bump to 3.1.0 + +--- + tools/depends/target/libcec/Makefile | 1 + + tools/depends/target/libcec/bump.patch | 21 +++++++++++++++++++++ + 2 files changed, 22 insertions(+) + create mode 100644 tools/depends/target/libcec/bump.patch + +diff --git a/tools/depends/target/libcec/Makefile b/tools/depends/target/libcec/Makefile +index ddf996361ad5b46dd2b33fb035b2ed133914a612..39ba882d0c7e270b4d1d1d566027cbaffb76b587 100644 +--- a/tools/depends/target/libcec/Makefile ++++ b/tools/depends/target/libcec/Makefile +@@ -22,6 +22,7 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + rm -rf $(PLATFORM); mkdir -p $(PLATFORM)/build + cd $(PLATFORM); $(ARCHIVE_TOOL) $(ARCHIVE_TOOL_FLAGS) $(TARBALLS_LOCATION)/$(ARCHIVE) + cd $(PLATFORM); patch -p1 < ../popcornmix.patch ++ cd $(PLATFORM); patch -p1 < ../bump.patch + cd $(PLATFORM)/build; $(CMAKE) -DBUILD_SHARED_LIBS=1 -DSKIP_PYTHON_WRAPPER:STRING=1 -DCMAKE_INSTALL_LIBDIR=$(PREFIX)/lib .. + + $(LIBDYLIB): $(PLATFORM) +diff --git a/tools/depends/target/libcec/bump.patch b/tools/depends/target/libcec/bump.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..9e55e51068e7befd9d4ff003156ce1ff4cc56c0e +--- /dev/null ++++ b/tools/depends/target/libcec/bump.patch +@@ -0,0 +1,21 @@ ++commit 49a1728feabca68b8424a8b22abec9ee87b9aa99 ++Author: Lars Op den Kamp <lars@opdenkamp.eu> ++Date: Wed Jan 20 01:06:50 2016 +0100 ++ ++ bump to 3.1.0 ++ ++diff --git a/CMakeLists.txt b/CMakeLists.txt ++index 23d71fc..173f625 100644 ++--- a/CMakeLists.txt +++++ b/CMakeLists.txt ++@@ -2,8 +2,8 @@ project(libcec) ++ cmake_minimum_required(VERSION 2.8.9) ++ ++ set(LIBCEC_VERSION_MAJOR 3) ++-set(LIBCEC_VERSION_MINOR 0) ++-set(LIBCEC_VERSION_PATCH 0) +++set(LIBCEC_VERSION_MINOR 1) +++set(LIBCEC_VERSION_PATCH 0) ++ ++ # cec-client ++ add_subdirectory(src/cec-client) + +From 5dc7976451fc1ab8c7aeac2d9b4090a71e5a857d Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Tue, 28 Oct 2014 00:19:40 +0000 +Subject: [PATCH 56/67] [cec] Add settings for configuring button repeats + +--- + addons/resource.language.en_gb/resources/strings.po | 17 +++++++++++++++-- + system/peripherals.xml | 4 +++- + xbmc/peripherals/devices/PeripheralCecAdapter.cpp | 16 ++++++++++++++++ + 3 files changed, 34 insertions(+), 3 deletions(-) + +diff --git a/addons/resource.language.en_gb/resources/strings.po b/addons/resource.language.en_gb/resources/strings.po +index 8cb9f8503c29c54cd0cb55018f867a45248c649f..a4c4387b0a78e4dc9ed875e72c4ce72dd2741fe2 100644 +--- a/addons/resource.language.en_gb/resources/strings.po ++++ b/addons/resource.language.en_gb/resources/strings.po +@@ -19392,8 +19392,6 @@ msgctxt "#38111" + msgid "This category contains other settings for the GUI interface" + msgstr "" + +-#empty strings from id 38112 to 38999 +- + #: system/settings/settings.xml + msgctxt "#39000" + msgid "HD and up" +@@ -19414,6 +19412,21 @@ msgctxt "#39003" + msgid "Accelerate h264" + msgstr "" + ++#: system/peripherals.xml ++msgctxt "#38050" ++msgid "Remote button press delay before repeating (ms)" ++msgstr "" ++ ++#: system/peripherals.xml ++msgctxt "#38051" ++msgid "Remote button press repeat rate (ms)" ++msgstr "" ++ ++#: system/peripherals.xml ++msgctxt "#38052" ++msgid "Remote button press release time (ms)" ++msgstr "" ++ + msgctxt "#38190" + msgid "Extract thumbnails from video files" + msgstr "" +diff --git a/system/peripherals.xml b/system/peripherals.xml +index ec3c3fe39db5f2272b3a9e49b34de3a4a063aab0..c3dbae029d397ab2e6948296df64b7a6f174b2af 100644 +--- a/system/peripherals.xml ++++ b/system/peripherals.xml +@@ -31,7 +31,9 @@ + <setting key="device_type" type="int" value="1" configurable="0" /> + <setting key="wake_devices_advanced" type="string" value="" configurable="0" /> + <setting key="standby_devices_advanced" type="string" value="" configurable="0" /> +- <setting key="double_tap_timeout_ms" type="int" min="0" value="300" configurable="0" /> ++ <setting key="double_tap_timeout_ms" type="int" min="50" max="1000" step="50" value="300" label="38050" order="16" /> ++ <setting key="button_repeat_rate_ms" type="int" min="0" max="250" step="10" value="0" label="38051" order="17" /> ++ <setting key="button_release_delay_ms" type="int" min="0" max="500" step="50" value="0" label="38052" order="18" /> + </peripheral> + + <peripheral vendor_product="2548:1001,2548:1002" bus="usb" name="Pulse-Eight CEC Adapter" mapTo="cec"> +diff --git a/xbmc/peripherals/devices/PeripheralCecAdapter.cpp b/xbmc/peripherals/devices/PeripheralCecAdapter.cpp +index f784bded97de9491d3eaaee2fb6efc86e74dd07b..8ab327c34e08a14c598b758a67384f1c6a838e6c 100644 +--- a/xbmc/peripherals/devices/PeripheralCecAdapter.cpp ++++ b/xbmc/peripherals/devices/PeripheralCecAdapter.cpp +@@ -1284,6 +1284,20 @@ void CPeripheralCecAdapter::SetConfigurationFromLibCEC(const CEC::libcec_configu + m_configuration.bSendInactiveSource = config.bSendInactiveSource; + bChanged |= SetSetting("send_inactive_source", m_configuration.bSendInactiveSource == 1); + ++#if defined(CEC_DOUBLE_TAP_TIMEOUT_MS_OLD) ++ m_configuration.iDoubleTapTimeout50Ms = config.iDoubleTapTimeout50Ms; ++ bChanged |= SetSetting("double_tap_timeout_ms", (int)m_configuration.iDoubleTapTimeout50Ms * 50); ++#else ++ m_configuration.iDoubleTapTimeoutMs = config.iDoubleTapTimeoutMs; ++ bChanged |= SetSetting("double_tap_timeout_ms", (int)m_configuration.iDoubleTapTimeoutMs; ++#endif ++ ++ m_configuration.iButtonRepeatRateMs = config.iButtonRepeatRateMs; ++ bChanged |= SetSetting("button_repeat_rate_ms", (int)m_configuration.iButtonRepeatRateMs); ++ ++ m_configuration.iButtonReleaseDelayMs = config.iButtonReleaseDelayMs; ++ bChanged |= SetSetting("button_release_delay_ms", (int)m_configuration.iButtonReleaseDelayMs); ++ + m_configuration.iFirmwareVersion = config.iFirmwareVersion; + m_configuration.bShutdownOnStandby = config.bShutdownOnStandby; + +@@ -1388,6 +1402,8 @@ void CPeripheralCecAdapter::SetConfigurationFromSettings(void) + // backwards compatibility. will be removed once the next major release of libCEC is out + m_configuration.iDoubleTapTimeoutMs = GetSettingInt("double_tap_timeout_ms"); + #endif ++ m_configuration.iButtonRepeatRateMs = GetSettingInt("button_repeat_rate_ms"); ++ m_configuration.iButtonReleaseDelayMs = GetSettingInt("button_release_delay_ms"); + + if (GetSettingBool("pause_playback_on_deactivate")) + { + +From c43daf2021b96de898d3522b5248108f9d9af488 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Mon, 3 Nov 2014 23:17:46 +0000 +Subject: [PATCH 57/67] [cec] Don't discard buttons when repeat mode is enabled + +--- + xbmc/peripherals/devices/PeripheralCecAdapter.cpp | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/xbmc/peripherals/devices/PeripheralCecAdapter.cpp b/xbmc/peripherals/devices/PeripheralCecAdapter.cpp +index 8ab327c34e08a14c598b758a67384f1c6a838e6c..8b04a37a803c2f0ff15de35a10186e3dc9c0d130 100644 +--- a/xbmc/peripherals/devices/PeripheralCecAdapter.cpp ++++ b/xbmc/peripherals/devices/PeripheralCecAdapter.cpp +@@ -776,7 +776,10 @@ void CPeripheralCecAdapter::PushCecKeypress(const CecButtonPress &key) + CLog::Log(LOGDEBUG, "%s - received key %2x duration %d", __FUNCTION__, key.iButton, key.iDuration); + + CSingleLock lock(m_critSection); +- if (key.iDuration > 0) ++ // avoid the queue getting too long ++ if (m_configuration.iButtonRepeatRateMs && m_buttonQueue.size() > 5) ++ return; ++ if (m_configuration.iButtonRepeatRateMs == 0 && key.iDuration > 0) + { + if (m_currentButton.iButton == key.iButton && m_currentButton.iDuration == 0) + { + +From 8e198d48296f237f20faa59de880eaef75752459 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Tue, 4 Nov 2014 18:50:00 +0000 +Subject: [PATCH 58/67] [cec] Temp - more logging + +--- + xbmc/peripherals/devices/PeripheralCecAdapter.cpp | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/xbmc/peripherals/devices/PeripheralCecAdapter.cpp b/xbmc/peripherals/devices/PeripheralCecAdapter.cpp +index 8b04a37a803c2f0ff15de35a10186e3dc9c0d130..259649721512e744fd89bfe66af6bc6324c82653 100644 +--- a/xbmc/peripherals/devices/PeripheralCecAdapter.cpp ++++ b/xbmc/peripherals/devices/PeripheralCecAdapter.cpp +@@ -773,12 +773,15 @@ void CPeripheralCecAdapter::GetNextKey(void) + + void CPeripheralCecAdapter::PushCecKeypress(const CecButtonPress &key) + { +- CLog::Log(LOGDEBUG, "%s - received key %2x duration %d", __FUNCTION__, key.iButton, key.iDuration); ++ CLog::Log(LOGDEBUG, "%s - received key %2x duration %d (rep:%d size:%d)", __FUNCTION__, key.iButton, key.iDuration, m_configuration.iButtonRepeatRateMs, m_buttonQueue.size()); + + CSingleLock lock(m_critSection); + // avoid the queue getting too long + if (m_configuration.iButtonRepeatRateMs && m_buttonQueue.size() > 5) ++ { ++ CLog::Log(LOGDEBUG, "%s - discarded key %2x", __FUNCTION__, key.iButton); + return; ++ } + if (m_configuration.iButtonRepeatRateMs == 0 && key.iDuration > 0) + { + if (m_currentButton.iButton == key.iButton && m_currentButton.iDuration == 0) +@@ -787,6 +790,7 @@ void CPeripheralCecAdapter::PushCecKeypress(const CecButtonPress &key) + if (m_bHasButton) + m_currentButton.iDuration = key.iDuration; + // ignore this one, since it's already been handled by xbmc ++ CLog::Log(LOGDEBUG, "%s - ignored key %2x", __FUNCTION__, key.iButton); + return; + } + // if we received a keypress with a duration set, try to find the same one without a duration set, and replace it +@@ -797,6 +801,7 @@ void CPeripheralCecAdapter::PushCecKeypress(const CecButtonPress &key) + if ((*it).iDuration == 0) + { + // replace this entry ++ CLog::Log(LOGDEBUG, "%s - replaced key %2x", __FUNCTION__, key.iButton); + (*it).iDuration = key.iDuration; + return; + } +@@ -806,6 +811,7 @@ void CPeripheralCecAdapter::PushCecKeypress(const CecButtonPress &key) + } + } + ++ CLog::Log(LOGDEBUG, "%s - added key %2x", __FUNCTION__, key.iButton); + m_buttonQueue.push_back(key); + } + + +From d61469373a92dbcb58bc8e38fc8d921106f61943 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Fri, 22 Jan 2016 12:29:41 +0000 +Subject: [PATCH 59/67] [cec] Update for libcec 3.1.0 + +--- + configure.ac | 4 ++-- + xbmc/peripherals/devices/PeripheralCecAdapter.cpp | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/configure.ac b/configure.ac +index d498f958b83813cbf5fce0a86bf07743665b5ed4..277c97f72b20650ba6a594e6363b9a863e0310a8 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -1433,9 +1433,9 @@ if test "x$use_libcec" != "xno"; then + # libcec is dyloaded, so we need to check for its headers and link any depends. + if test "x$use_libcec" != "xno"; then + if test "x$use_libcec" != "xauto"; then +- PKG_CHECK_MODULES([CEC],[libcec >= 3.0.0],,[use_libcec="no";AC_MSG_ERROR($libcec_disabled)]) ++ PKG_CHECK_MODULES([CEC],[libcec >= 3.1.0],,[use_libcec="no";AC_MSG_ERROR($libcec_disabled)]) + else +- PKG_CHECK_MODULES([CEC],[libcec >= 3.0.0],,[use_libcec="no";AC_MSG_RESULT($libcec_disabled)]) ++ PKG_CHECK_MODULES([CEC],[libcec >= 3.1.0],,[use_libcec="no";AC_MSG_RESULT($libcec_disabled)]) + fi + + if test "x$use_libcec" != "xno"; then +diff --git a/xbmc/peripherals/devices/PeripheralCecAdapter.cpp b/xbmc/peripherals/devices/PeripheralCecAdapter.cpp +index 259649721512e744fd89bfe66af6bc6324c82653..ae7fd02ea17cb11318083f853d6b1641af4ecadb 100644 +--- a/xbmc/peripherals/devices/PeripheralCecAdapter.cpp ++++ b/xbmc/peripherals/devices/PeripheralCecAdapter.cpp +@@ -43,7 +43,7 @@ using namespace PERIPHERALS; + using namespace ANNOUNCEMENT; + using namespace CEC; + +-#define CEC_LIB_SUPPORTED_VERSION LIBCEC_VERSION_TO_UINT(3, 0, 0) ++#define CEC_LIB_SUPPORTED_VERSION LIBCEC_VERSION_TO_UINT(3, 1, 0) + + /* time in seconds to ignore standby commands from devices after the screensaver has been activated */ + #define SCREENSAVER_TIMEOUT 20 +@@ -1326,7 +1326,7 @@ void CPeripheralCecAdapter::SetConfigurationFromLibCEC(const CEC::libcec_configu + void CPeripheralCecAdapter::SetConfigurationFromSettings(void) + { + // client version matches the version of libCEC that we originally used the API from +- m_configuration.clientVersion = LIBCEC_VERSION_TO_UINT(3, 0, 0); ++ m_configuration.clientVersion = CEC_LIB_SUPPORTED_VERSION; + + // device name 'XBMC' + snprintf(m_configuration.strDeviceName, 13, "%s", GetSettingString("device_name").c_str()); + +From f50610a41e776cb15acbb2740587cf65b47811d0 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sat, 19 Mar 2016 14:46:41 +0000 +Subject: [PATCH 60/67] libcec: use system audio mode request instead of power + on to start AVR reliable + +--- + tools/depends/target/libcec/208.patch | 38 +++++++++++++++++++++++++++++++++++ + tools/depends/target/libcec/Makefile | 1 + + 2 files changed, 39 insertions(+) + create mode 100644 tools/depends/target/libcec/208.patch + +diff --git a/tools/depends/target/libcec/208.patch b/tools/depends/target/libcec/208.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..3dc5adf022e80c3337ad69b7c7d7346daafbfdd3 +--- /dev/null ++++ b/tools/depends/target/libcec/208.patch +@@ -0,0 +1,38 @@ ++From f70c4d76e1d9c0219a3927b6b66090b7575e7933 Mon Sep 17 00:00:00 2001 ++From: Gerald Dachs <gda@dachsweb.de> ++Date: Thu, 17 Mar 2016 12:12:51 +0100 ++Subject: [PATCH] use system audio mode request instead of power on to start ++ AVR reliable ++ ++--- ++ src/libcec/devices/CECBusDevice.cpp | 13 +++++++++---- ++ 1 file changed, 9 insertions(+), 4 deletions(-) ++ ++diff --git a/src/libcec/devices/CECBusDevice.cpp b/src/libcec/devices/CECBusDevice.cpp ++index 55939d1..e2d5ea3 100644 ++--- a/src/libcec/devices/CECBusDevice.cpp +++++ b/src/libcec/devices/CECBusDevice.cpp ++@@ -1025,14 +1025,19 @@ bool CCECBusDevice::ActivateSource(uint64_t iDelay /* = 0 */) ++ bool bReturn(true); ++ if (iDelay == 0) ++ { ++- /** some AVRs fail to be powered up by the TV when it powers up. power up the AVR explicitly */ +++ /** send system audio mode request if AVR exists */ ++ if (m_iLogicalAddress != CECDEVICE_AUDIOSYSTEM) ++ { ++ CCECBusDevice* audioSystem(m_processor->GetDevice(CECDEVICE_AUDIOSYSTEM)); ++- if (audioSystem && audioSystem->IsPresent() && audioSystem->GetPowerStatus(m_iLogicalAddress) != CEC_POWER_STATUS_ON) +++ if (audioSystem && audioSystem->IsPresent()) ++ { ++- LIB_CEC->AddLog(CEC_LOG_DEBUG, "powering up the AVR"); ++- audioSystem->PowerOn(m_iLogicalAddress); +++ cec_command command; +++ +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "sending system audio mode request for '%s'", ToString(m_iLogicalAddress)); +++ cec_command::Format(command, m_iLogicalAddress, CECDEVICE_AUDIOSYSTEM, CEC_OPCODE_SYSTEM_AUDIO_MODE_REQUEST); +++ command.parameters.PushBack((uint8_t) ((m_iPhysicalAddress >> 8) & 0xFF)); +++ command.parameters.PushBack((uint8_t) (m_iPhysicalAddress & 0xFF)); +++ bReturn = m_handler->Transmit(command, false, false); ++ } ++ } ++ +diff --git a/tools/depends/target/libcec/Makefile b/tools/depends/target/libcec/Makefile +index 39ba882d0c7e270b4d1d1d566027cbaffb76b587..4565dc9f6fc0b3e6b49133443c19e10767d475eb 100644 +--- a/tools/depends/target/libcec/Makefile ++++ b/tools/depends/target/libcec/Makefile +@@ -23,6 +23,7 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + cd $(PLATFORM); $(ARCHIVE_TOOL) $(ARCHIVE_TOOL_FLAGS) $(TARBALLS_LOCATION)/$(ARCHIVE) + cd $(PLATFORM); patch -p1 < ../popcornmix.patch + cd $(PLATFORM); patch -p1 < ../bump.patch ++ cd $(PLATFORM); patch -p1 < ../208.patch + cd $(PLATFORM)/build; $(CMAKE) -DBUILD_SHARED_LIBS=1 -DSKIP_PYTHON_WRAPPER:STRING=1 -DCMAKE_INSTALL_LIBDIR=$(PREFIX)/lib .. + + $(LIBDYLIB): $(PLATFORM) + +From b3074634af438e1dd9de238718364d82d4ee46e2 Mon Sep 17 00:00:00 2001 +From: Rainer Hochecker <fernetmenta@online.de> +Date: Tue, 22 Mar 2016 09:51:52 +0100 +Subject: [PATCH 61/67] python: use kodi provided cert if available + +--- + xbmc/interfaces/python/XBPython.cpp | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/xbmc/interfaces/python/XBPython.cpp b/xbmc/interfaces/python/XBPython.cpp +index d762bf4f8fdca2a1081026089977ae8987c88b66..ff4ed7db26845905108ea0ae504e4f589f9c7d0f 100644 +--- a/xbmc/interfaces/python/XBPython.cpp ++++ b/xbmc/interfaces/python/XBPython.cpp +@@ -593,9 +593,12 @@ bool XBPython::OnScriptInitialized(ILanguageInvoker *invoker) + CEnvironment::putenv(buf); + buf = "OS=win32"; + CEnvironment::putenv(buf); ++#endif + +-#elif defined(TARGET_ANDROID) +- setenv("SSL_CERT_FILE", CSpecialProtocol::TranslatePath("special://xbmc/system/certs/cacert.pem").c_str(), 1); ++#if !defined(TARGET_WINDOWS) ++ // use Kodi provided cert if available ++ if (XFILE::CFile::Exists("special://xbmc/system/certs/cacert.pem")) ++ setenv("SSL_CERT_FILE", CSpecialProtocol::TranslatePath("special://xbmc/system/certs/cacert.pem").c_str(), 1); + #endif + + if (PyEval_ThreadsInitialized()) + +From bd545157e573f1904ac552693524a4bce8789c5d Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Wed, 25 May 2016 18:31:17 +0100 +Subject: [PATCH 62/67] rbp: Hard code the number of buffers to improve audio + sync + +--- + system/settings/rbp.xml | 6 ++++++ + xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp | 4 ++++ + 2 files changed, 10 insertions(+) + +diff --git a/system/settings/rbp.xml b/system/settings/rbp.xml +index 2e6c903df5e4d2cd064466db0ef55deada5cdc80..29d8f92c123875a83eae4832c1f6246a6deefc3c 100644 +--- a/system/settings/rbp.xml ++++ b/system/settings/rbp.xml +@@ -92,6 +92,12 @@ + <control type="toggle" /> + </setting> + </group> ++ <group id="3"> ++ <setting id="videoscreen.noofbuffers"> ++ <visible>false</visible> ++ <default>2</default> <!-- double buffered --> ++ </setting> ++ </group> + </category> + <category id="audio"> + <group id="1"> +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp +index db537d33a5d55fc856bbd3ec0a7846df3bb060be..ee34c0b31da3b05fabae5e47ad51db2f09e682c3 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp +@@ -1066,7 +1066,11 @@ void CRenderManager::UpdateDisplayLatency() + refresh = 0; // No idea about refresh rate when windowed, just get the default latency + m_displayLatency = (double) g_advancedSettings.GetDisplayLatency(refresh); + ++#ifdef TARGET_RASPBERRY_PI ++ int buffers = CSettings::GetInstance().GetBool("videoplayer.usedisplayasclock") ? 1:2; ++#else + int buffers = g_Windowing.NoOfBuffers(); ++#endif + m_displayLatency += (buffers - 1) / fps; + + } + +From cb6cdf5bf5a392fffc7e58631a4767ab8836ea02 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Wed, 8 Jun 2016 01:11:26 +0100 +Subject: [PATCH 63/67] omxvideo: Remove call to AutoInterlaceMethod. Treat + auto as advanced + +--- + xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp | 2 +- + xbmc/cores/omxplayer/OMXVideo.cpp | 5 ++++- + 2 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +index d65857779628debfc85b47b8dd283513edb5a319..523e52c27de2711ca03c6b06767c940be6e3d177 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +@@ -632,7 +632,7 @@ bool CMMALRenderer::Supports(ESCALINGMETHOD method) + + EINTERLACEMETHOD CMMALRenderer::AutoInterlaceMethod() + { +- return m_sourceWidth * m_sourceHeight <= 576 * 720 ? VS_INTERLACEMETHOD_MMAL_ADVANCED : VS_INTERLACEMETHOD_MMAL_BOB; ++ return VS_INTERLACEMETHOD_MMAL_ADVANCED; + } + + void CMMALRenderer::SetVideoRect(const CRect& InSrcRect, const CRect& InDestRect) +diff --git a/xbmc/cores/omxplayer/OMXVideo.cpp b/xbmc/cores/omxplayer/OMXVideo.cpp +index de15bfff05d23949d6e6f4304b15aa7d79120dc2..79685835382422d0a22d7b75d7c1408e2c053403 100644 +--- a/xbmc/cores/omxplayer/OMXVideo.cpp ++++ b/xbmc/cores/omxplayer/OMXVideo.cpp +@@ -242,7 +242,10 @@ bool COMXVideo::PortSettingsChanged(ResolutionUpdateInfo &resinfo) + + if(m_deinterlace) + { +- EINTERLACEMETHOD interlace_method = m_renderManager.AutoInterlaceMethod(CMediaSettings::GetInstance().GetCurrentVideoSettings().m_InterlaceMethod); ++ EINTERLACEMETHOD interlace_method = CMediaSettings::GetInstance().GetCurrentVideoSettings().m_InterlaceMethod; ++ if (interlace_method == VS_INTERLACEMETHOD_AUTO) ++ interlace_method = VS_INTERLACEMETHOD_MMAL_ADVANCED; ++ + bool advanced_deinterlace = interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED || interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED_HALF; + bool half_framerate = interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED_HALF || interlace_method == VS_INTERLACEMETHOD_MMAL_BOB_HALF; + + +From 120051ba33cc7dc2885b3bf3abf49c4903bee0f8 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Wed, 15 Jun 2016 23:41:43 +0100 +Subject: [PATCH 64/67] mmal_codec: Use EOS through codec to determine drain is + complete + +Rather than relying on a timeout from codec, feed an EOS through to ensure all frames have been returned +--- + .../VideoPlayer/DVDCodecs/Video/MMALCodec.cpp | 72 ++++++++++++++-------- + xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.h | 3 + + 2 files changed, 49 insertions(+), 26 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +index 470083b2256d23488ca476cebfe8d3ef9f62377e..cd0d30d77cc1cd8803ccde317bcc2f3cd61000e4 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +@@ -117,6 +117,9 @@ CMMALVideo::CMMALVideo(CProcessInfo &processInfo) : CDVDVideoCodec(processInfo) + m_fps = 0.0f; + m_num_decoded = 0; + m_codecControlFlags = 0; ++ m_got_eos = false; ++ m_packet_num = 0; ++ m_packet_num_eos = ~0; + } + + CMMALVideo::~CMMALVideo() +@@ -243,7 +246,7 @@ void CMMALVideo::dec_output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf + { + if (!(buffer->cmd == 0 && buffer->length > 0)) + if (g_advancedSettings.CanLogComponent(LOGVIDEO)) +- CLog::Log(LOGDEBUG, "%s::%s port:%p buffer %p, len %d cmd:%x", CLASSNAME, __func__, port, buffer, buffer->length, buffer->cmd); ++ CLog::Log(LOGDEBUG, "%s::%s port:%p buffer %p, len %d cmd:%x flags:%x", CLASSNAME, __func__, port, buffer, buffer->length, buffer->cmd, buffer->flags); + + bool kept = false; + +@@ -288,6 +291,12 @@ void CMMALVideo::dec_output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf + kept = true; + } + } ++ if (buffer->flags & MMAL_BUFFER_HEADER_FLAG_EOS) ++ { ++ CSingleLock lock(m_output_mutex); ++ m_got_eos = true; ++ m_output_cond.notifyAll(); ++ } + } + else if (buffer->cmd == MMAL_EVENT_FORMAT_CHANGED) + { +@@ -790,11 +799,18 @@ int CMMALVideo::Decode(uint8_t* pData, int iSize, double dts, double pts) + + MMAL_BUFFER_HEADER_T *buffer; + MMAL_STATUS_T status; +- ++ bool drain = (m_codecControlFlags & DVD_CODEC_CTRL_DRAIN) ? true : false; ++ bool send_eos = drain && !m_got_eos && m_packet_num_eos != m_packet_num; ++ // we don't get an EOS response if no packets have been sent ++ if (m_packet_num == 0) ++ { ++ send_eos = false; ++ m_got_eos = true; ++ } + Prime(); + while (1) + { +- if (pData) ++ if (pData || send_eos) + { + // 500ms timeout + { +@@ -817,17 +833,25 @@ int CMMALVideo::Decode(uint8_t* pData, int iSize, double dts, double pts) + if (m_dropState) + buffer->flags |= MMAL_BUFFER_HEADER_FLAG_USER3; + +- memcpy(buffer->data, pData, buffer->length); ++ if (pData) ++ memcpy(buffer->data, pData, buffer->length); + iSize -= buffer->length; + pData += buffer->length; + + if (iSize == 0) ++ { ++ m_packet_num++; + buffer->flags |= MMAL_BUFFER_HEADER_FLAG_FRAME_END; +- ++ if (send_eos) ++ { ++ buffer->flags |= MMAL_BUFFER_HEADER_FLAG_EOS; ++ m_packet_num_eos = m_packet_num; ++ m_got_eos = false; ++ } ++ } + if (g_advancedSettings.CanLogComponent(LOGVIDEO)) + CLog::Log(LOGDEBUG, "%s::%s - %-8p %-6d/%-6d dts:%.3f pts:%.3f flags:%x ready_queue(%d)", + CLASSNAME, __func__, buffer, buffer->length, iSize, dts == DVD_NOPTS_VALUE ? 0.0 : dts*1e-6, pts == DVD_NOPTS_VALUE ? 0.0 : pts*1e-6, buffer->flags, m_output_ready.size()); +- assert((int)buffer->length > 0); + status = mmal_port_send_buffer(m_dec_input, buffer); + if (status != MMAL_SUCCESS) + { +@@ -879,36 +903,28 @@ int CMMALVideo::Decode(uint8_t* pData, int iSize, double dts, double pts) + bool full = queued > DVD_MSEC_TO_TIME(1000); + int ret = 0; + +- unsigned int pics = m_output_ready.size(); +- if (m_preroll && (pics >= GetAllowedReferences() || m_codecControlFlags & DVD_CODEC_CTRL_DRAIN)) +- m_preroll = false; +- if (pics > 0 && !m_preroll) +- ret |= VC_PICTURE; +- if ((m_preroll || pics <= 1) && mmal_queue_length(m_dec_input_pool->queue) > 0 && !(m_codecControlFlags & DVD_CODEC_CTRL_DRAIN)) +- ret |= VC_BUFFER; +- +- bool slept = false; +- if (!ret) ++ XbmcThreads::EndTime delay(500); ++ while (!ret && !delay.IsTimePast()) + { +- slept = true; ++ unsigned int pics = m_output_ready.size(); ++ if (m_preroll && (pics >= GetAllowedReferences() || drain)) ++ m_preroll = false; ++ if (pics > 0 && !m_preroll) ++ ret |= VC_PICTURE; ++ if ((m_preroll || pics <= 1) && mmal_queue_length(m_dec_input_pool->queue) > 0 && (!drain || m_got_eos || m_packet_num_eos != m_packet_num)) ++ ret |= VC_BUFFER; ++ if (!ret) + { + // otherwise we busy spin + lock.Leave(); + CSingleLock output_lock(m_output_mutex); +- m_output_cond.wait(output_lock, 30); ++ m_output_cond.wait(output_lock, delay.MillisLeft()); + lock.Enter(); + } +- unsigned int pics = m_output_ready.size(); +- if (m_preroll && (pics >= GetAllowedReferences() || m_codecControlFlags & DVD_CODEC_CTRL_DRAIN)) +- m_preroll = false; +- if (pics > 0 && !m_preroll) +- ret |= VC_PICTURE; +- if ((m_preroll || pics <= 1) && (mmal_queue_length(m_dec_input_pool->queue) > 0 || m_codecControlFlags & DVD_CODEC_CTRL_DRAIN)) +- ret |= VC_BUFFER; + } + + if (g_advancedSettings.CanLogComponent(LOGVIDEO)) +- CLog::Log(LOGDEBUG, "%s::%s - ret(%x) pics(%d) inputs(%d) slept(%d) queued(%.2f) (%.2f:%.2f) full(%d) flags(%x) preroll(%d)", CLASSNAME, __func__, ret, m_output_ready.size(), mmal_queue_length(m_dec_input_pool->queue), slept, queued*1e-6, m_demuxerPts*1e-6, m_decoderPts*1e-6, full, m_codecControlFlags, m_preroll); ++ CLog::Log(LOGDEBUG, "%s::%s - ret(%x) pics(%d) inputs(%d) slept(%2d) queued(%.2f) (%.2f:%.2f) full(%d) flags(%x) preroll(%d) eos(%d %d/%d)", CLASSNAME, __func__, ret, m_output_ready.size(), mmal_queue_length(m_dec_input_pool->queue), 500-delay.MillisLeft(), queued*1e-6, m_demuxerPts*1e-6, m_decoderPts*1e-6, full, m_codecControlFlags, m_preroll, m_got_eos, m_packet_num, m_packet_num_eos); + + return ret; + } +@@ -981,6 +997,10 @@ void CMMALVideo::Reset(void) + m_demuxerPts = DVD_NOPTS_VALUE; + m_codecControlFlags = 0; + m_dropState = false; ++ m_num_decoded = 0; ++ m_got_eos = false; ++ m_packet_num = 0; ++ m_packet_num_eos = ~0; + m_preroll = !m_hints.stills && (m_speed == DVD_PLAYSPEED_NORMAL || m_speed == DVD_PLAYSPEED_PAUSE); + } + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.h +index d008c6c538819a05be9925ab8cd342b131e511d8..122a5e24f5ffb1bf2415867ec98d8e5104339ab1 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.h +@@ -134,6 +134,9 @@ protected: + int m_codecControlFlags; + bool m_dropState; + bool m_preroll; ++ bool m_got_eos; ++ uint32_t m_packet_num; ++ uint32_t m_packet_num_eos; + + CCriticalSection m_sharedSection; + MMAL_COMPONENT_T *m_dec; + +From d91a21ae571cc2bf2c5103c8c72fc1262379f4b5 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Fri, 17 Jun 2016 16:23:25 +0100 +Subject: [PATCH 65/67] rbp: Update transposed video scaling to match other + platforms + +--- + .../VideoRenderers/HwDecRender/MMALRenderer.cpp | 29 ++++++++++++++++++---- + xbmc/cores/omxplayer/OMXPlayerVideo.cpp | 29 ++++++++++++++++++---- + 2 files changed, 48 insertions(+), 10 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +index 523e52c27de2711ca03c6b06767c940be6e3d177..8a4bf24625a57b11908f4f38588fb348581556a6 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +@@ -656,11 +656,30 @@ void CMMALRenderer::SetVideoRect(const CRect& InSrcRect, const CRect& InDestRect + // fix up transposed video + if (m_renderOrientation == 90 || m_renderOrientation == 270) + { +- float diff = (DestRect.Height() - DestRect.Width()) * 0.5f; +- DestRect.x1 -= diff; +- DestRect.x2 += diff; +- DestRect.y1 += diff; +- DestRect.y2 -= diff; ++ float newWidth, newHeight; ++ float aspectRatio = GetAspectRatio(); ++ // clamp width if too wide ++ if (DestRect.Height() > DestRect.Width()) ++ { ++ newWidth = DestRect.Width(); // clamp to the width of the old dest rect ++ newHeight = newWidth * aspectRatio; ++ } ++ else // else clamp to height ++ { ++ newHeight = DestRect.Height(); // clamp to the height of the old dest rect ++ newWidth = newHeight / aspectRatio; ++ } ++ ++ // calculate the center point of the view and offsets ++ float centerX = DestRect.x1 + DestRect.Width() * 0.5f; ++ float centerY = DestRect.y1 + DestRect.Height() * 0.5f; ++ float diffX = newWidth * 0.5f; ++ float diffY = newHeight * 0.5f; ++ ++ DestRect.x1 = centerX - diffX; ++ DestRect.x2 = centerX + diffX; ++ DestRect.y1 = centerY - diffY; ++ DestRect.y2 = centerY + diffY; + } + + // check if destination rect or video view mode has changed +diff --git a/xbmc/cores/omxplayer/OMXPlayerVideo.cpp b/xbmc/cores/omxplayer/OMXPlayerVideo.cpp +index d61dc4f2668f8aca91bce79cfb631034061c491c..ed138297b49c8d3e6b42a1f1fa5fa08bd01be11b 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerVideo.cpp ++++ b/xbmc/cores/omxplayer/OMXPlayerVideo.cpp +@@ -640,11 +640,30 @@ void OMXPlayerVideo::SetVideoRect(const CRect &InSrcRect, const CRect &InDestRec + // fix up transposed video + if (m_hints.orientation == 90 || m_hints.orientation == 270) + { +- float diff = (DestRect.Height() - DestRect.Width()) * 0.5f; +- DestRect.x1 -= diff; +- DestRect.x2 += diff; +- DestRect.y1 += diff; +- DestRect.y2 -= diff; ++ float newWidth, newHeight; ++ float aspectRatio = GetAspectRatio(); ++ // clamp width if too wide ++ if (DestRect.Height() > DestRect.Width()) ++ { ++ newWidth = DestRect.Width(); // clamp to the width of the old dest rect ++ newHeight = newWidth * aspectRatio; ++ } ++ else // else clamp to height ++ { ++ newHeight = DestRect.Height(); // clamp to the height of the old dest rect ++ newWidth = newHeight / aspectRatio; ++ } ++ ++ // calculate the center point of the view and offsets ++ float centerX = DestRect.x1 + DestRect.Width() * 0.5f; ++ float centerY = DestRect.y1 + DestRect.Height() * 0.5f; ++ float diffX = newWidth * 0.5f; ++ float diffY = newHeight * 0.5f; ++ ++ DestRect.x1 = centerX - diffX; ++ DestRect.x2 = centerX + diffX; ++ DestRect.y1 = centerY - diffY; ++ DestRect.y2 = centerY + diffY; + } + + // check if destination rect or video view mode has changed + +From bd128ae3789c33616ae6ebd55fbae13984f98477 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sun, 19 Jun 2016 16:53:49 +0100 +Subject: [PATCH 66/67] mmalcodec: Add another buffer when deinterlacing + +See: http://forum.kodi.tv/showthread.php?tid=276372 +--- + xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +index cd0d30d77cc1cd8803ccde317bcc2f3cd61000e4..6fd59a64dd48c05d1ccc3183adc5deda16e930a2 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +@@ -400,7 +400,7 @@ bool CMMALVideo::CreateDeinterlace(EINTERLACEMETHOD interlace_method) + m_deint_input->userdata = (struct MMAL_PORT_USERDATA_T *)this; + + // Image_fx assumed 3 frames of context. simple deinterlace doesn't require this +- status = mmal_port_parameter_set_uint32(m_deint_input, MMAL_PARAMETER_EXTRA_BUFFERS, GetAllowedReferences() - 5 + advanced_deinterlace ? 2:0); ++ status = mmal_port_parameter_set_uint32(m_deint_input, MMAL_PARAMETER_EXTRA_BUFFERS, 1 + GetAllowedReferences() - 5 + advanced_deinterlace ? 2:0); + if (status != MMAL_SUCCESS) + CLog::Log(LOGERROR, "%s::%s Failed to enable extra buffers on %s (status=%x %s)", CLASSNAME, __func__, m_deint_input->name, status, mmal_status_to_string(status)); + + +From a89d7094bd34a58451effaa3fbbc72651888ea23 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Fri, 1 Jul 2016 13:15:36 +0100 +Subject: [PATCH 67/67] UNSTABLE: This is a placeholder. Commits after this + point are considered experimental. + +--- + .placeholder | 0 + 1 file changed, 0 insertions(+), 0 deletions(-) + create mode 100644 .placeholder + +diff --git a/.placeholder b/.placeholder +new file mode 100644 +index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/projects/RPi2/patches/kodi/kodi-000-backports.patch b/projects/RPi2/patches/kodi/kodi-000-backports.patch new file mode 100644 index 0000000000..e09ca0ff94 --- /dev/null +++ b/projects/RPi2/patches/kodi/kodi-000-backports.patch @@ -0,0 +1,2187 @@ +From 618094ed6ad5b01165de2111410dafbe4160598c Mon Sep 17 00:00:00 2001 +From: Rainer Hochecker <fernetmenta@online.de> +Date: Tue, 31 May 2016 13:28:48 +0200 +Subject: [PATCH 1/3] VideoPlayer: expose stream player info to GUI + +--- + xbmc/GUIInfoManager.cpp | 60 +++++++ + xbmc/cores/DataCacheCore.cpp | 166 ++++++++++++++++- + xbmc/cores/DataCacheCore.h | 55 +++++- + .../VideoPlayer/DVDCodecs/Audio/DVDAudioCodec.h | 6 +- + .../DVDCodecs/Audio/DVDAudioCodecFFmpeg.cpp | 3 +- + .../DVDCodecs/Audio/DVDAudioCodecFFmpeg.h | 4 +- + .../DVDCodecs/Audio/DVDAudioCodecPassthrough.cpp | 9 +- + .../DVDCodecs/Audio/DVDAudioCodecPassthrough.h | 4 +- + .../VideoPlayer/DVDCodecs/DVDFactoryCodec.cpp | 6 +- + xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.h | 3 +- + .../DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp | 30 +++- + .../DVDCodecs/Video/DVDVideoCodecFFmpeg.h | 1 + + xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.cpp | 6 +- + xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.h | 5 +- + xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.cpp | 19 +- + xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.h | 5 +- + xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.cpp | 4 +- + xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.h | 6 +- + xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.cpp | 4 +- + xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.h | 6 +- + xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp | 199 +++++++++++++++++++++ + xbmc/cores/VideoPlayer/Process/ProcessInfo.h | 47 +++++ + xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp | 11 +- + xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp | 5 + + xbmc/cores/omxplayer/OMXPlayerAudio.cpp | 1 + + xbmc/cores/omxplayer/OMXPlayerVideo.cpp | 7 + + xbmc/cores/paplayer/VideoPlayerCodec.cpp | 4 +- + xbmc/cores/paplayer/VideoPlayerCodec.h | 2 + + xbmc/guiinfo/GUIInfoLabels.h | 14 ++ + 29 files changed, 662 insertions(+), 30 deletions(-) + +diff --git a/xbmc/GUIInfoManager.cpp b/xbmc/GUIInfoManager.cpp +index 0d37f1f..ab5cb12 100644 +--- a/xbmc/GUIInfoManager.cpp ++++ b/xbmc/GUIInfoManager.cpp +@@ -2108,6 +2108,22 @@ const infomap videoplayer[] = {{ "title", VIDEOPLAYER_TITLE }, + { "episodename", VIDEOPLAYER_EPISODENAME } + }; + ++const infomap player_process[] = ++{ ++ { "videodecoder", PLAYER_PROCESS_VIDEODECODER }, ++ { "deintmethod", PLAYER_PROCESS_DEINTMETHOD }, ++ { "pixformat", PLAYER_PROCESS_PIXELFORMAT }, ++ { "videowidth", PLAYER_PROCESS_VIDEOWIDTH }, ++ { "videoheight", PLAYER_PROCESS_VIDEOHEIGHT }, ++ { "videofps", PLAYER_PROCESS_VIDEOFPS }, ++ { "videodar", PLAYER_PROCESS_VIDEODAR }, ++ { "videohwdecoder", PLAYER_PROCESS_VIDEOHWDECODER }, ++ { "audiodecoder", PLAYER_PROCESS_AUDIODECODER }, ++ { "audiochannels", PLAYER_PROCESS_AUDIOCHANNELS }, ++ { "audiosamplerate", PLAYER_PROCESS_AUDIOSAMPLERATE }, ++ { "audiobitspersample", PLAYER_PROCESS_AUDIOBITSPERSAMPLE } ++}; ++ + /// \page modules__General__List_of_gui_access + /// \section modules__General__List_of_gui_access_Container Container + /// @{ +@@ -5320,6 +5336,14 @@ int CGUIInfoManager::TranslateSingleString(const std::string &strCondition, bool + return videoplayer[i].val; + } + } ++ else if (cat.name == "player_process") ++ { ++ for (size_t i = 0; i < sizeof(player_process) / sizeof(infomap); i++) ++ { ++ if (prop.name == player_process[i].str) ++ return videoplayer[i].val; ++ } ++ } + else if (cat.name == "slideshow") + { + for (size_t i = 0; i < sizeof(slideshow) / sizeof(infomap); i++) +@@ -5993,6 +6017,27 @@ std::string CGUIInfoManager::GetLabel(int info, int contextWindow, std::string * + strLabel = info.language; + } + break; ++ case PLAYER_PROCESS_VIDEODECODER: ++ strLabel = g_dataCacheCore.GetVideoDecoderName(); ++ break; ++ case PLAYER_PROCESS_DEINTMETHOD: ++ strLabel = g_dataCacheCore.GetVideoDeintMethod(); ++ break; ++ case PLAYER_PROCESS_PIXELFORMAT: ++ strLabel = g_dataCacheCore.GetVideoPixelFormat(); ++ break; ++ case PLAYER_PROCESS_VIDEOFPS: ++ strLabel = StringUtils::FormatNumber(g_dataCacheCore.GetVideoFps()); ++ break; ++ case PLAYER_PROCESS_VIDEODAR: ++ strLabel = StringUtils::FormatNumber(CServiceBroker::GetDataCacheCore().GetVideoDAR()); ++ break; ++ case PLAYER_PROCESS_AUDIODECODER: ++ strLabel = g_dataCacheCore.GetAudioDecoderName(); ++ break; ++ case PLAYER_PROCESS_AUDIOCHANNELS: ++ strLabel = g_dataCacheCore.GetAudioChannels(); ++ break; + case RDS_AUDIO_LANG: + case RDS_CHANNEL_COUNTRY: + case RDS_TITLE: +@@ -6555,6 +6600,18 @@ bool CGUIInfoManager::GetInt(int &value, int info, int contextWindow, const CGUI + case SYSTEM_BATTERY_LEVEL: + value = g_powerManager.BatteryLevel(); + return true; ++ case PLAYER_PROCESS_VIDEOWIDTH: ++ value = g_dataCacheCore.GetVideoWidth(); ++ return true; ++ case PLAYER_PROCESS_VIDEOHEIGHT: ++ value = g_dataCacheCore.GetVideoHeight(); ++ return true; ++ case PLAYER_PROCESS_AUDIOSAMPLERATE: ++ value = g_dataCacheCore.GetAudioSampleRate(); ++ return true; ++ case PLAYER_PROCESS_AUDIOBITSPERSAMPLE: ++ value = g_dataCacheCore.GetAudioBitsPerSampe(); ++ return true; + } + return false; + } +@@ -7090,6 +7147,9 @@ bool CGUIInfoManager::GetBool(int condition1, int contextWindow, const CGUIListI + !m_currentFile->GetPVRRadioRDSInfoTag()->GetSMSStudio().empty() || + !m_currentFile->GetPVRRadioRDSInfoTag()->GetPhoneStudio().empty()); + break; ++ case PLAYER_PROCESS_VIDEOHWDECODER: ++ bReturn = g_dataCacheCore.IsVideoHwDecoder(); ++ break; + default: // default, use integer value different from 0 as true + { + int val; +diff --git a/xbmc/cores/DataCacheCore.cpp b/xbmc/cores/DataCacheCore.cpp +index 68cf2fb..cbb0a4f 100644 +--- a/xbmc/cores/DataCacheCore.cpp ++++ b/xbmc/cores/DataCacheCore.cpp +@@ -19,6 +19,12 @@ + */ + + #include "cores/DataCacheCore.h" ++#include "threads/SingleLock.h" ++ ++CDataCacheCore::CDataCacheCore() ++{ ++ m_hasAVInfoChanges = false; ++} + + bool CDataCacheCore::HasAVInfoChanges() + { +@@ -35,4 +41,162 @@ void CDataCacheCore::SignalVideoInfoChange() + void CDataCacheCore::SignalAudioInfoChange() + { + m_hasAVInfoChanges = true; +-} +\ No newline at end of file ++} ++ ++void CDataCacheCore::SetVideoDecoderName(std::string name, bool isHw) ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ m_playerVideoInfo.decoderName = name; ++ m_playerVideoInfo.isHwDecoder = isHw; ++} ++ ++std::string CDataCacheCore::GetVideoDecoderName() ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ return m_playerVideoInfo.decoderName; ++} ++ ++bool CDataCacheCore::IsVideoHwDecoder() ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ return m_playerVideoInfo.isHwDecoder; ++} ++ ++ ++void CDataCacheCore::SetVideoDeintMethod(std::string method) ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ m_playerVideoInfo.deintMethod = method; ++} ++ ++std::string CDataCacheCore::GetVideoDeintMethod() ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ return m_playerVideoInfo.deintMethod; ++} ++ ++void CDataCacheCore::SetVideoPixelFormat(std::string pixFormat) ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ m_playerVideoInfo.pixFormat = pixFormat; ++} ++ ++std::string CDataCacheCore::GetVideoPixelFormat() ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ return m_playerVideoInfo.pixFormat; ++} ++ ++void CDataCacheCore::SetVideoDimensions(int width, int height) ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ m_playerVideoInfo.width = width; ++ m_playerVideoInfo.height = height; ++} ++ ++int CDataCacheCore::GetVideoWidth() ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ return m_playerVideoInfo.width; ++} ++ ++int CDataCacheCore::GetVideoHeight() ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ return m_playerVideoInfo.height; ++} ++ ++void CDataCacheCore::SetVideoFps(float fps) ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ m_playerVideoInfo.fps = fps; ++} ++ ++float CDataCacheCore::GetVideoFps() ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ return m_playerVideoInfo.fps; ++} ++ ++void CDataCacheCore::SetVideoDAR(float dar) ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ m_playerVideoInfo.dar = dar; ++} ++ ++float CDataCacheCore::GetVideoDAR() ++{ ++ CSingleLock lock(m_videoPlayerSection); ++ ++ return m_playerVideoInfo.dar; ++} ++ ++// player audio info ++void CDataCacheCore::SetAudioDecoderName(std::string name) ++{ ++ CSingleLock lock(m_audioPlayerSection); ++ ++ m_playerAudioInfo.decoderName = name; ++} ++ ++std::string CDataCacheCore::GetAudioDecoderName() ++{ ++ CSingleLock lock(m_audioPlayerSection); ++ ++ return m_playerAudioInfo.decoderName; ++} ++ ++void CDataCacheCore::SetAudioChannels(std::string channels) ++{ ++ CSingleLock lock(m_audioPlayerSection); ++ ++ m_playerAudioInfo.channels = channels; ++} ++ ++std::string CDataCacheCore::GetAudioChannels() ++{ ++ CSingleLock lock(m_audioPlayerSection); ++ ++ return m_playerAudioInfo.channels; ++} ++ ++void CDataCacheCore::SetAudioSampleRate(int sampleRate) ++{ ++ CSingleLock lock(m_audioPlayerSection); ++ ++ m_playerAudioInfo.sampleRate = sampleRate; ++} ++ ++int CDataCacheCore::GetAudioSampleRate() ++{ ++ CSingleLock lock(m_audioPlayerSection); ++ ++ return m_playerAudioInfo.sampleRate; ++} ++ ++void CDataCacheCore::SetAudioBitsPerSample(int bitsPerSample) ++{ ++ CSingleLock lock(m_audioPlayerSection); ++ ++ m_playerAudioInfo.bitsPerSample = bitsPerSample; ++} ++ ++int CDataCacheCore::GetAudioBitsPerSampe() ++{ ++ CSingleLock lock(m_audioPlayerSection); ++ ++ return m_playerAudioInfo.bitsPerSample; ++} +diff --git a/xbmc/cores/DataCacheCore.h b/xbmc/cores/DataCacheCore.h +index 0df013d..e16c81f 100644 +--- a/xbmc/cores/DataCacheCore.h ++++ b/xbmc/cores/DataCacheCore.h +@@ -20,15 +20,68 @@ + * + */ + ++#include <atomic> ++#include <string> ++#include "threads/CriticalSection.h" ++ + class CDataCacheCore + { + public: ++ CDataCacheCore(); + bool HasAVInfoChanges(); + void SignalVideoInfoChange(); + void SignalAudioInfoChange(); + ++ // player video info ++ void SetVideoDecoderName(std::string name, bool isHw); ++ std::string GetVideoDecoderName(); ++ bool IsVideoHwDecoder(); ++ void SetVideoDeintMethod(std::string method); ++ std::string GetVideoDeintMethod(); ++ void SetVideoPixelFormat(std::string pixFormat); ++ std::string GetVideoPixelFormat(); ++ void SetVideoDimensions(int width, int height); ++ int GetVideoWidth(); ++ int GetVideoHeight(); ++ void SetVideoFps(float fps); ++ float GetVideoFps(); ++ void SetVideoDAR(float dar); ++ float GetVideoDAR(); ++ ++ // player audio info ++ void SetAudioDecoderName(std::string name); ++ std::string GetAudioDecoderName(); ++ void SetAudioChannels(std::string channels); ++ std::string GetAudioChannels(); ++ void SetAudioSampleRate(int sampleRate); ++ int GetAudioSampleRate(); ++ void SetAudioBitsPerSample(int bitsPerSample); ++ int GetAudioBitsPerSampe(); ++ + protected: +- volatile bool m_hasAVInfoChanges; ++ std::atomic_bool m_hasAVInfoChanges; ++ ++ CCriticalSection m_videoPlayerSection; ++ struct SPlayerVideoInfo ++ { ++ std::string decoderName; ++ bool isHwDecoder; ++ std::string deintMethod; ++ std::string pixFormat; ++ int width; ++ int height; ++ float fps; ++ float dar; ++ } m_playerVideoInfo; ++ ++ CCriticalSection m_audioPlayerSection; ++ struct SPlayerAudioInfo ++ { ++ std::string decoderName; ++ std::string channels; ++ int sampleRate; ++ int bitsPerSample; ++ } m_playerAudioInfo; + }; + + extern CDataCacheCore g_dataCacheCore; +\ No newline at end of file +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodec.h b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodec.h +index 7e0da61..bb698da 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodec.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodec.h +@@ -23,6 +23,7 @@ + #include "system.h" + #include "cores/AudioEngine/Utils/AEAudioFormat.h" + #include "cores/AudioEngine/Utils/AEUtil.h" ++#include "cores/VideoPlayer/Process/ProcessInfo.h" + #include "DVDClock.h" + + +@@ -64,7 +65,7 @@ class CDVDAudioCodec + { + public: + +- CDVDAudioCodec() {} ++ CDVDAudioCodec(CProcessInfo &processInfo) : m_processInfo(processInfo) {} + virtual ~CDVDAudioCodec() {} + + /* +@@ -138,4 +139,7 @@ class CDVDAudioCodec + * should return the ffmpeg profile value + */ + virtual int GetProfile() { return 0; } ++ ++protected: ++ CProcessInfo &m_processInfo; + }; +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecFFmpeg.cpp +index a21894e..f5880cc 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecFFmpeg.cpp +@@ -35,7 +35,7 @@ extern "C" { + #include "cores/AudioEngine/Utils/AEUtil.h" + #endif + +-CDVDAudioCodecFFmpeg::CDVDAudioCodecFFmpeg() : CDVDAudioCodec() ++CDVDAudioCodecFFmpeg::CDVDAudioCodecFFmpeg(CProcessInfo &processInfo) : CDVDAudioCodec(processInfo) + { + m_pCodecContext = NULL; + +@@ -126,6 +126,7 @@ bool CDVDAudioCodecFFmpeg::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options + m_iSampleFormat = AV_SAMPLE_FMT_NONE; + m_matrixEncoding = AV_MATRIX_ENCODING_NONE; + ++ m_processInfo.SetAudioDecoderName(m_pCodecContext->codec->name); + return true; + } + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecFFmpeg.h b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecFFmpeg.h +index a15317a..d5760bb 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecFFmpeg.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecFFmpeg.h +@@ -29,10 +29,12 @@ extern "C" { + #include "libswresample/swresample.h" + } + ++class CProcessInfo; ++ + class CDVDAudioCodecFFmpeg : public CDVDAudioCodec + { + public: +- CDVDAudioCodecFFmpeg(); ++ CDVDAudioCodecFFmpeg(CProcessInfo &processInfo); + virtual ~CDVDAudioCodecFFmpeg(); + virtual bool Open(CDVDStreamInfo &hints, CDVDCodecOptions &options); + virtual void Dispose(); +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecPassthrough.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecPassthrough.cpp +index 1fb00e1..8009297 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecPassthrough.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecPassthrough.cpp +@@ -29,7 +29,8 @@ + + #define TRUEHD_BUF_SIZE 61440 + +-CDVDAudioCodecPassthrough::CDVDAudioCodecPassthrough(void) : ++CDVDAudioCodecPassthrough::CDVDAudioCodecPassthrough(CProcessInfo &processInfo) : ++ CDVDAudioCodec(processInfo), + m_buffer(NULL), + m_bufferSize(0), + m_trueHDoffset(0) +@@ -51,22 +52,26 @@ bool CDVDAudioCodecPassthrough::Open(CDVDStreamInfo &hints, CDVDCodecOptions &op + case AV_CODEC_ID_AC3: + format.m_streamInfo.m_type = CAEStreamInfo::STREAM_TYPE_AC3; + format.m_streamInfo.m_sampleRate = hints.samplerate; ++ m_processInfo.SetAudioDecoderName("PT_AC3"); + break; + + case AV_CODEC_ID_EAC3: + format.m_streamInfo.m_type = CAEStreamInfo::STREAM_TYPE_EAC3; + format.m_streamInfo.m_sampleRate = hints.samplerate; ++ m_processInfo.SetAudioDecoderName("PT_EAC3"); + break; + + case AV_CODEC_ID_DTS: + format.m_streamInfo.m_type = CAEStreamInfo::STREAM_TYPE_DTSHD; + format.m_streamInfo.m_sampleRate = hints.samplerate; ++ m_processInfo.SetAudioDecoderName("PT_DTSHD"); + break; + + case AV_CODEC_ID_TRUEHD: + format.m_streamInfo.m_type = CAEStreamInfo::STREAM_TYPE_TRUEHD; + format.m_streamInfo.m_sampleRate = hints.samplerate; + m_trueHDBuffer.reset(new uint8_t[TRUEHD_BUF_SIZE]); ++ m_processInfo.SetAudioDecoderName("PT_TRUEHD"); + break; + + default: +@@ -83,6 +88,8 @@ bool CDVDAudioCodecPassthrough::Open(CDVDStreamInfo &hints, CDVDCodecOptions &op + + // only get the dts core from the parser if we don't support dtsHD + m_parser.SetCoreOnly(true); ++ ++ m_processInfo.SetAudioDecoderName("PT_DTS"); + } + + m_dataSize = 0; +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecPassthrough.h b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecPassthrough.h +index a04e736..4005429 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecPassthrough.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Audio/DVDAudioCodecPassthrough.h +@@ -29,10 +29,12 @@ + #include "cores/AudioEngine/Utils/AEStreamInfo.h" + #include "cores/AudioEngine/Utils/AEBitstreamPacker.h" + ++class CProcessInfo; ++ + class CDVDAudioCodecPassthrough : public CDVDAudioCodec + { + public: +- CDVDAudioCodecPassthrough(); ++ CDVDAudioCodecPassthrough(CProcessInfo &processInfo); + virtual ~CDVDAudioCodecPassthrough(); + + virtual bool Open(CDVDStreamInfo &hints, CDVDCodecOptions &options); +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.cpp +index bb5bfe0..9717412 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.cpp +@@ -173,7 +173,7 @@ CDVDVideoCodec* CDVDFactoryCodec::CreateVideoCodec(CDVDStreamInfo &hint, CProces + return nullptr;; + } + +-CDVDAudioCodec* CDVDFactoryCodec::CreateAudioCodec(CDVDStreamInfo &hint, bool allowpassthrough, bool allowdtshddecode) ++CDVDAudioCodec* CDVDFactoryCodec::CreateAudioCodec(CDVDStreamInfo &hint, CProcessInfo &processInfo, bool allowpassthrough, bool allowdtshddecode) + { + CDVDAudioCodec* pCodec = NULL; + CDVDCodecOptions options; +@@ -184,12 +184,12 @@ CDVDAudioCodec* CDVDFactoryCodec::CreateAudioCodec(CDVDStreamInfo &hint, bool al + // we don't use passthrough if "sync playback to display" is enabled + if (allowpassthrough) + { +- pCodec = OpenCodec(new CDVDAudioCodecPassthrough(), hint, options); ++ pCodec = OpenCodec(new CDVDAudioCodecPassthrough(processInfo), hint, options); + if (pCodec) + return pCodec; + } + +- pCodec = OpenCodec(new CDVDAudioCodecFFmpeg(), hint, options); ++ pCodec = OpenCodec(new CDVDAudioCodecFFmpeg(processInfo), hint, options); + if (pCodec) + return pCodec; + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.h b/xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.h +index 45e794b98..d11c700 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/DVDFactoryCodec.h +@@ -41,7 +41,8 @@ class CDVDFactoryCodec + static CDVDVideoCodec* CreateVideoCodec(CDVDStreamInfo &hint, + CProcessInfo &processInfo, + const CRenderInfo &info = CRenderInfo()); +- static CDVDAudioCodec* CreateAudioCodec(CDVDStreamInfo &hint, bool allowpassthrough = true, bool allowdtshddecode = true); ++ static CDVDAudioCodec* CreateAudioCodec(CDVDStreamInfo &hint, CProcessInfo &processInfo, ++ bool allowpassthrough = true, bool allowdtshddecode = true); + static CDVDOverlayCodec* CreateOverlayCodec(CDVDStreamInfo &hint ); + + static CDVDAudioCodec* OpenCodec(CDVDAudioCodec* pCodec, CDVDStreamInfo &hint, CDVDCodecOptions &options ); +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp +index 0414d85..967d518 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp +@@ -68,6 +68,7 @@ extern "C" { + #include "libavfilter/avfilter.h" + #include "libavfilter/buffersink.h" + #include "libavfilter/buffersrc.h" ++#include "libavutil/pixdesc.h" + } + + enum DecoderState +@@ -88,11 +89,12 @@ enum EFilterFlags { + FILTER_ROTATE = 0x40, //< rotate image according to the codec hints + }; + +-enum AVPixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avctx +- , const AVPixelFormat * fmt ) ++enum AVPixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avctx, const AVPixelFormat * fmt) + { + CDVDVideoCodecFFmpeg* ctx = (CDVDVideoCodecFFmpeg*)avctx->opaque; + ++ const char* pixFmtName = av_get_pix_fmt_name(*fmt); ++ + // if frame threading is enabled hw accel is not allowed + if(ctx->m_decoderState != STATE_HW_SINGLE) + { +@@ -122,9 +124,10 @@ enum AVPixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avct + if(VDPAU::CDecoder::IsVDPAUFormat(*cur) && CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEVDPAU)) + { + CLog::Log(LOGNOTICE,"CDVDVideoCodecFFmpeg::GetFormat - Creating VDPAU(%ix%i)", avctx->width, avctx->height); +- VDPAU::CDecoder* vdp = new VDPAU::CDecoder(); ++ VDPAU::CDecoder* vdp = new VDPAU::CDecoder(ctx->m_processInfo); + if(vdp->Open(avctx, ctx->m_pCodecContext, *cur, ctx->m_uSurfacesCount)) + { ++ ctx->m_processInfo.SetVideoPixelFormat(pixFmtName ? pixFmtName : ""); + ctx->SetHardware(vdp); + return *cur; + } +@@ -137,9 +140,10 @@ enum AVPixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avct + !ctx->m_hints.dvd && !ctx->m_hints.stills) + { + CLog::Log(LOGNOTICE, "CDVDVideoCodecFFmpeg::GetFormat - Creating DXVA(%ix%i)", avctx->width, avctx->height); +- DXVA::CDecoder* dec = new DXVA::CDecoder(); ++ DXVA::CDecoder* dec = new DXVA::CDecoder(ctx->m_processInfo); + if(dec->Open(avctx, ctx->m_pCodecContext, *cur, ctx->m_uSurfacesCount)) + { ++ ctx->m_processInfo.SetVideoPixelFormat(pixFmtName ? pixFmtName : ""); + ctx->SetHardware(dec); + return *cur; + } +@@ -151,9 +155,10 @@ enum AVPixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avct + // mpeg4 vaapi decoding is disabled + if(*cur == AV_PIX_FMT_VAAPI_VLD && CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEVAAPI)) + { +- VAAPI::CDecoder* dec = new VAAPI::CDecoder(); ++ VAAPI::CDecoder* dec = new VAAPI::CDecoder(ctx->m_processInfo); + if(dec->Open(avctx, ctx->m_pCodecContext, *cur, ctx->m_uSurfacesCount) == true) + { ++ ctx->m_processInfo.SetVideoPixelFormat(pixFmtName ? pixFmtName : ""); + ctx->SetHardware(dec); + return *cur; + } +@@ -165,9 +170,10 @@ enum AVPixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avct + #ifdef TARGET_DARWIN + if (*cur == AV_PIX_FMT_VIDEOTOOLBOX && CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEVTB)) + { +- VTB::CDecoder* dec = new VTB::CDecoder(); ++ VTB::CDecoder* dec = new VTB::CDecoder(ctx->m_processInfo); + if(dec->Open(avctx, ctx->m_pCodecContext, *cur, ctx->m_uSurfacesCount)) + { ++ ctx->m_processInfo.SetVideoPixelFormat(pixFmtName ? pixFmtName : ""); + ctx->SetHardware(dec); + return *cur; + } +@@ -183,6 +189,7 @@ enum AVPixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avct + ctx->m_pCodecContext->hwaccel_context = (void *)ctx->m_options.m_opaque_pointer; + if(dec->Open(avctx, ctx->m_pCodecContext, *cur, ctx->m_uSurfacesCount)) + { ++ ctx->m_processInfo.SetVideoPixelFormat(pixFmtName ? pixFmtName : ""); + ctx->SetHardware(dec); + return *cur; + } +@@ -193,6 +200,7 @@ enum AVPixelFormat CDVDVideoCodecFFmpeg::GetFormat( struct AVCodecContext * avct + cur++; + } + ++ ctx->m_processInfo.SetVideoPixelFormat(pixFmtName ? pixFmtName : ""); + ctx->m_decoderState = STATE_HW_FAILED; + return avcodec_default_get_format(avctx, fmt); + } +@@ -226,6 +234,7 @@ CDVDVideoCodecFFmpeg::CDVDVideoCodecFFmpeg(CProcessInfo &processInfo) : CDVDVide + m_skippedDeint = 0; + m_droppedFrames = 0; + m_interlaced = false; ++ m_DAR = 1.0; + } + + CDVDVideoCodecFFmpeg::~CDVDVideoCodecFFmpeg() +@@ -385,6 +394,9 @@ bool CDVDVideoCodecFFmpeg::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options + } + + UpdateName(); ++ ++ m_processInfo.SetVideoDecoderName(m_name, m_pHardware ? true : false); ++ m_processInfo.SetVideoDimensions(m_pCodecContext->coded_width, m_pCodecContext->coded_height); + return true; + } + +@@ -746,6 +758,12 @@ bool CDVDVideoCodecFFmpeg::GetPictureCommon(DVDVideoPicture* pDvdVideoPicture) + if (aspect_ratio <= 0.0) + aspect_ratio = (float)pDvdVideoPicture->iWidth / (float)pDvdVideoPicture->iHeight; + ++ if (m_DAR != aspect_ratio) ++ { ++ m_DAR = aspect_ratio; ++ m_processInfo.SetVideoDAR(m_DAR); ++ } ++ + /* XXX: we suppose the screen has a 1.0 pixel ratio */ // CDVDVideo will compensate it. + pDvdVideoPicture->iDisplayHeight = pDvdVideoPicture->iHeight; + pDvdVideoPicture->iDisplayWidth = ((int)RINT(pDvdVideoPicture->iHeight * aspect_ratio)) & -3; +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.h +index 4ef2982..20bc1ff 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.h +@@ -119,6 +119,7 @@ class CDVDVideoCodecFFmpeg : public CDVDVideoCodec + bool m_requestSkipDeint; + int m_codecControlFlags; + bool m_interlaced; ++ double m_DAR; + CDVDStreamInfo m_hints; + CDVDCodecOptions m_options; + }; +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.cpp +index f8730c5..fb83b42 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.cpp +@@ -29,6 +29,7 @@ + #include <d3d11.h> + #include <Initguid.h> + #include <windows.h> ++#include "cores/VideoPlayer/Process/ProcessInfo.h" + #include "cores/VideoPlayer/VideoRenderers/RenderManager.h" + #include "../DVDCodecUtils.h" + #include "DXVA.h" +@@ -689,8 +690,9 @@ CRenderPicture::~CRenderPicture() + // DXVA Decoder + //----------------------------------------------------------------------------- + +-CDecoder::CDecoder() +- : m_event(true) ++CDecoder::CDecoder(CProcessInfo& processInfo) ++ : m_event(true), ++ m_processInfo(processInfo) + { + m_event.Set(); + m_state = DXVA_OPEN; +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.h +index ab756f7..2170515 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DXVA.h +@@ -28,6 +28,8 @@ + #include "libavcodec/d3d11va.h" + #include "threads/Event.h" + ++class CProcessInfo; ++ + namespace DXVA { + + #define CHECK(a) \ +@@ -114,7 +116,7 @@ class CDecoder + , public ID3DResource + { + public: +- CDecoder(); ++ CDecoder(CProcessInfo& processInfo); + ~CDecoder(); + + // IHardwareDecoder overrides +@@ -163,6 +165,7 @@ class CDecoder + unsigned int m_surface_alignment; + CCriticalSection m_section; + CEvent m_event; ++ CProcessInfo& m_processInfo; + }; + + }; +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.cpp +index c014ce2..1b4c8e8 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.cpp +@@ -24,6 +24,7 @@ + #include "DVDVideoCodec.h" + #include "cores/VideoPlayer/DVDCodecs/DVDCodecUtils.h" + #include "cores/VideoPlayer/DVDClock.h" ++#include "cores/VideoPlayer/Process/ProcessInfo.h" + #include "utils/log.h" + #include "utils/StringUtils.h" + #include "threads/SingleLock.h" +@@ -444,7 +445,9 @@ bool CVideoSurfaces::HasRefs() + // VAAPI + //----------------------------------------------------------------------------- + +-CDecoder::CDecoder() : m_vaapiOutput(&m_inMsgEvent) ++CDecoder::CDecoder(CProcessInfo& processInfo) : ++ m_vaapiOutput(&m_inMsgEvent), ++ m_processInfo(processInfo) + { + m_vaapiConfig.videoSurfaces = &m_videoSurfaces; + +@@ -453,6 +456,7 @@ CDecoder::CDecoder() : m_vaapiOutput(&m_inMsgEvent) + m_vaapiConfig.context = 0; + m_vaapiConfig.contextId = VA_INVALID_ID; + m_vaapiConfig.configId = VA_INVALID_ID; ++ m_vaapiConfig.processInfo = &m_processInfo; + m_avctx = NULL; + m_getBufferError = 0; + } +@@ -2016,6 +2020,7 @@ void COutput::InitCycle() + delete m_pp; + m_pp = NULL; + DropVppProcessedPictures(); ++ m_config.processInfo->SetVideoDeintMethod("unknown"); + } + if (!m_pp) + { +@@ -2034,6 +2039,17 @@ void COutput::InitCycle() + { + m_pp->Init(method); + m_currentDiMethod = method; ++ ++ if (method == VS_INTERLACEMETHOD_DEINTERLACE) ++ m_config.processInfo->SetVideoDeintMethod("yadif"); ++ else if (method == VS_INTERLACEMETHOD_RENDER_BOB) ++ m_config.processInfo->SetVideoDeintMethod("render-bob"); ++ else if (method == VS_INTERLACEMETHOD_VAAPI_BOB) ++ m_config.processInfo->SetVideoDeintMethod("vaapi-bob"); ++ else if (method == VS_INTERLACEMETHOD_VAAPI_MADI) ++ m_config.processInfo->SetVideoDeintMethod("vaapi-madi"); ++ else if (method == VS_INTERLACEMETHOD_VAAPI_MACI) ++ m_config.processInfo->SetVideoDeintMethod("vaapi-maci"); + } + else + { +@@ -2066,6 +2082,7 @@ void COutput::InitCycle() + { + m_pp->Init(method); + m_currentDiMethod = method; ++ m_config.processInfo->SetVideoDeintMethod("none"); + } + else + { +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.h +index cc744c7..08c5dfc 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VAAPI.h +@@ -48,6 +48,7 @@ extern "C" { + + using namespace Actor; + ++class CProcessInfo; + + #define FULLHD_WIDTH 1920 + +@@ -125,6 +126,7 @@ struct CVaapiConfig + VAProfile profile; + VAConfigAttrib attrib; + Display *x11dsp; ++ CProcessInfo *processInfo; + }; + + /** +@@ -411,7 +413,7 @@ class CDecoder + + public: + +- CDecoder(); ++ CDecoder(CProcessInfo& processInfo); + virtual ~CDecoder(); + + virtual bool Open (AVCodecContext* avctx, AVCodecContext* mainctx, const enum AVPixelFormat, unsigned int surfaces = 0); +@@ -468,6 +470,7 @@ class CDecoder + + int m_codecControl; + std::vector<EINTERLACEMETHOD> m_diMethods; ++ CProcessInfo& m_processInfo; + }; + + //----------------------------------------------------------------------------- +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.cpp +index 331b719..377c72b 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.cpp +@@ -25,6 +25,7 @@ + #include "windowing/WindowingFactory.h" + #include "guilib/GraphicContext.h" + #include "guilib/TextureManager.h" ++#include "cores/VideoPlayer/Process/ProcessInfo.h" + #include "cores/VideoPlayer/VideoRenderers/RenderManager.h" + #include "DVDVideoCodecFFmpeg.h" + #include "DVDClock.h" +@@ -467,13 +468,14 @@ int CVideoSurfaces::Size() + // CVDPAU + //----------------------------------------------------------------------------- + +-CDecoder::CDecoder() : m_vdpauOutput(&m_inMsgEvent) ++CDecoder::CDecoder(CProcessInfo& processInfo) : m_vdpauOutput(&m_inMsgEvent), m_processInfo(processInfo) + { + m_vdpauConfig.videoSurfaces = &m_videoSurfaces; + + m_vdpauConfigured = false; + m_DisplayState = VDPAU_OPEN; + m_vdpauConfig.context = 0; ++ m_vdpauConfig.processInfo = &m_processInfo; + } + + bool CDecoder::Open(AVCodecContext* avctx, AVCodecContext* mainctx, const enum AVPixelFormat fmt, unsigned int surfaces) +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.h +index 56601a1..59432ad 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VDPAU.h +@@ -70,6 +70,8 @@ extern "C" { + #define FULLHD_WIDTH 1920 + #define MAX_PIC_Q_LENGTH 20 //for non-interop_yuv this controls the max length of the decoded pic to render completion Q + ++class CProcessInfo; ++ + namespace VDPAU + { + +@@ -182,6 +184,7 @@ struct CVdpauConfig + uint32_t maxReferences; + bool useInteropYuv; + CVDPAUContext *context; ++ CProcessInfo *processInfo; + }; + + /** +@@ -556,7 +559,7 @@ class CDecoder + uint32_t aux; /* optional extra parameter... */ + }; + +- CDecoder(); ++ CDecoder(CProcessInfo& processInfo); + virtual ~CDecoder(); + + virtual bool Open (AVCodecContext* avctx, AVCodecContext* mainctx, const enum AVPixelFormat, unsigned int surfaces = 0); +@@ -623,6 +626,7 @@ class CDecoder + CVdpauRenderPicture *m_presentPicture; + + int m_codecControl; ++ CProcessInfo& m_processInfo; + }; + + } +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.cpp +index 253aefd..287b7c1 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.cpp +@@ -21,6 +21,7 @@ + #ifdef TARGET_DARWIN + #include "platform/darwin/osx/CocoaInterface.h" + #include "platform/darwin/DarwinUtils.h" ++#include "cores/VideoPlayer/Process/ProcessInfo.h" + #include "DVDVideoCodec.h" + #include "DVDCodecs/DVDCodecUtils.h" + #include "utils/log.h" +@@ -34,7 +35,7 @@ extern "C" { + using namespace VTB; + + +-CDecoder::CDecoder() ++CDecoder::CDecoder(CProcessInfo& processInfo) : m_processInfo(processInfo) + { + m_avctx = nullptr; + } +@@ -86,6 +87,7 @@ bool CDecoder::Open(AVCodecContext *avctx, AVCodecContext* mainctx, enum AVPixel + mainctx->pix_fmt = fmt; + mainctx->hwaccel_context = avctx->hwaccel_context; + ++ m_processInfo.SetVideoDeintMethod("none"); + return true; + } + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.h +index 1e097d4..bad295b 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/VTB.h +@@ -23,6 +23,8 @@ + + #include "DVDVideoCodecFFmpeg.h" + ++class CProcessInfo; ++ + namespace VTB + { + +@@ -30,7 +32,7 @@ class CDecoder + : public CDVDVideoCodecFFmpeg::IHardwareDecoder + { + public: +- CDecoder(); ++ CDecoder(CProcessInfo& processInfo); + ~CDecoder(); + virtual bool Open(AVCodecContext* avctx, AVCodecContext* mainctx, const enum AVPixelFormat, unsigned int surfaces = 0); + virtual int Decode(AVCodecContext* avctx, AVFrame* frame); +@@ -43,7 +45,7 @@ class CDecoder + protected: + unsigned m_renderbuffers_count; + AVCodecContext *m_avctx; +- ++ CProcessInfo& m_processInfo; + }; + + } +diff --git a/xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp b/xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp +index ceaa256..fc1f5dd 100644 +--- a/xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp ++++ b/xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp +@@ -19,6 +19,8 @@ + */ + + #include "ProcessInfo.h" ++#include "cores/DataCacheCore.h" ++#include "threads/SingleLock.h" + + // Override for platform ports + #if !defined(PLATFORM_OVERRIDE) +@@ -51,3 +53,200 @@ bool CProcessInfo::AllowDTSHDDecode() + { + return true; + } ++ ++void CProcessInfo::ResetVideoCodecInfo() ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ m_videoIsHWDecoder = false; ++ m_videoDecoderName = "unknown"; ++ m_videoDeintMethod = "unknown"; ++ m_videoPixelFormat = "unknown"; ++ m_videoWidth = 0; ++ m_videoHeight = 0; ++ m_videoFPS = 0.0; ++ ++ g_dataCacheCore.SetVideoDecoderName(m_videoDecoderName, m_videoIsHWDecoder); ++ g_dataCacheCore.SetVideoDeintMethod(m_videoDeintMethod); ++ g_dataCacheCore.SetVideoPixelFormat(m_videoPixelFormat); ++ g_dataCacheCore.SetVideoDimensions(m_videoWidth, m_videoHeight); ++ g_dataCacheCore.SetVideoFps(m_videoFPS); ++} ++ ++void CProcessInfo::SetVideoDecoderName(std::string name, bool isHw) ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ m_videoIsHWDecoder = isHw; ++ m_videoDecoderName = name; ++ ++ g_dataCacheCore.SetVideoDecoderName(m_videoDecoderName, m_videoIsHWDecoder); ++} ++ ++std::string CProcessInfo::GetVideoDecoderName() ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ return m_videoDecoderName; ++} ++ ++bool CProcessInfo::IsVideoHwDecoder() ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ return m_videoIsHWDecoder; ++} ++ ++void CProcessInfo::SetVideoDeintMethod(std::string method) ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ m_videoDeintMethod = method; ++ ++ g_dataCacheCore.SetVideoDeintMethod(m_videoDeintMethod); ++} ++ ++std::string CProcessInfo::GetVideoDeintMethod() ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ return m_videoDeintMethod; ++} ++ ++void CProcessInfo::SetVideoPixelFormat(std::string pixFormat) ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ m_videoPixelFormat = pixFormat; ++ ++ g_dataCacheCore.SetVideoPixelFormat(m_videoPixelFormat); ++} ++ ++std::string CProcessInfo::GetVideoPixelFormat() ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ return m_videoPixelFormat; ++} ++ ++void CProcessInfo::SetVideoDimensions(int width, int height) ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ m_videoWidth = width; ++ m_videoHeight = height; ++ ++ g_dataCacheCore.SetVideoDimensions(m_videoWidth, m_videoHeight); ++} ++ ++void CProcessInfo::GetVideoDimensions(int &width, int &height) ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ width = m_videoWidth; ++ height = m_videoHeight; ++} ++ ++void CProcessInfo::SetVideoFps(float fps) ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ m_videoFPS = fps; ++ ++ g_dataCacheCore.SetVideoFps(m_videoFPS); ++} ++ ++float CProcessInfo::GetVideoFps() ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ return m_videoFPS; ++} ++ ++void CProcessInfo::SetVideoDAR(float dar) ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ m_videoDAR = dar; ++ ++ CServiceBroker::GetDataCacheCore().SetVideoDAR(m_videoDAR); ++} ++ ++float CProcessInfo::GetVideoDAR() ++{ ++ CSingleLock lock(m_videoCodecSection); ++ ++ return m_videoDAR; ++} ++ ++// player audio info ++void CProcessInfo::ResetAudioCodecInfo() ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ m_audioDecoderName = "unknown"; ++ m_audioChannels = "unknown"; ++ m_audioSampleRate = 0;; ++ m_audioBitsPerSample = 0; ++ ++ g_dataCacheCore.SetAudioDecoderName(m_audioDecoderName); ++ g_dataCacheCore.SetAudioChannels(m_audioChannels); ++ g_dataCacheCore.SetAudioSampleRate(m_audioSampleRate); ++ g_dataCacheCore.SetAudioBitsPerSample(m_audioBitsPerSample); ++} ++ ++void CProcessInfo::SetAudioDecoderName(std::string name) ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ m_audioDecoderName = name; ++} ++ ++std::string CProcessInfo::GetAudioDecoderName() ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ return m_audioDecoderName; ++} ++ ++void CProcessInfo::SetAudioChannels(std::string channels) ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ m_audioChannels = channels; ++} ++ ++std::string CProcessInfo::GetAudioChannels() ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ return m_audioChannels; ++} ++ ++void CProcessInfo::SetAudioSampleRate(int sampleRate) ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ m_audioSampleRate = sampleRate; ++} ++ ++int CProcessInfo::GetAudioSampleRate() ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ return m_audioSampleRate; ++} ++ ++void CProcessInfo::SetAudioBitsPerSample(int bitsPerSample) ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ m_audioBitsPerSample = bitsPerSample; ++} ++ ++int CProcessInfo::GetAudioBitsPerSampe() ++{ ++ CSingleLock lock(m_audioCodecSection); ++ ++ return m_audioBitsPerSample; ++} +diff --git a/xbmc/cores/VideoPlayer/Process/ProcessInfo.h b/xbmc/cores/VideoPlayer/Process/ProcessInfo.h +index b8a4e46..0ec9a2c 100644 +--- a/xbmc/cores/VideoPlayer/Process/ProcessInfo.h ++++ b/xbmc/cores/VideoPlayer/Process/ProcessInfo.h +@@ -20,6 +20,8 @@ + #pragma once + + #include "cores/IPlayer.h" ++#include "threads/CriticalSection.h" ++#include <string> + + class CProcessInfo + { +@@ -29,6 +31,51 @@ class CProcessInfo + virtual EINTERLACEMETHOD GetFallbackDeintMethod(); + virtual bool AllowDTSHDDecode(); + ++ // player video info ++ void ResetVideoCodecInfo(); ++ void SetVideoDecoderName(std::string name, bool isHw); ++ std::string GetVideoDecoderName(); ++ bool IsVideoHwDecoder(); ++ void SetVideoDeintMethod(std::string method); ++ std::string GetVideoDeintMethod(); ++ void SetVideoPixelFormat(std::string pixFormat); ++ std::string GetVideoPixelFormat(); ++ void SetVideoDimensions(int width, int height); ++ void GetVideoDimensions(int &width, int &height); ++ void SetVideoFps(float fps); ++ float GetVideoFps(); ++ void SetVideoDAR(float dar); ++ float GetVideoDAR(); ++ ++ // player audio info ++ void ResetAudioCodecInfo(); ++ void SetAudioDecoderName(std::string name); ++ std::string GetAudioDecoderName(); ++ void SetAudioChannels(std::string channels); ++ std::string GetAudioChannels(); ++ void SetAudioSampleRate(int sampleRate); ++ int GetAudioSampleRate(); ++ void SetAudioBitsPerSample(int bitsPerSample); ++ int GetAudioBitsPerSampe(); ++ + protected: + CProcessInfo(); ++ ++ // player video info ++ bool m_videoIsHWDecoder; ++ std::string m_videoDecoderName; ++ std::string m_videoDeintMethod; ++ std::string m_videoPixelFormat; ++ int m_videoWidth; ++ int m_videoHeight; ++ float m_videoFPS; ++ float m_videoDAR; ++ CCriticalSection m_videoCodecSection; ++ ++ // player audio info ++ std::string m_audioDecoderName; ++ std::string m_audioChannels; ++ int m_audioSampleRate; ++ int m_audioBitsPerSample; ++ CCriticalSection m_audioCodecSection; + }; +diff --git a/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp b/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp +index fb1d993..2422815 100644 +--- a/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp ++++ b/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp +@@ -90,11 +90,13 @@ CVideoPlayerAudio::~CVideoPlayerAudio() + + bool CVideoPlayerAudio::OpenStream(CDVDStreamInfo &hints) + { ++ m_processInfo.ResetAudioCodecInfo(); ++ + CLog::Log(LOGNOTICE, "Finding audio codec for: %i", hints.codec); + bool allowpassthrough = !CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEDISPLAYASCLOCK); + if (hints.realtime) + allowpassthrough = false; +- CDVDAudioCodec* codec = CDVDFactoryCodec::CreateAudioCodec(hints, allowpassthrough, m_processInfo.AllowDTSHDDecode()); ++ CDVDAudioCodec* codec = CDVDFactoryCodec::CreateAudioCodec(hints, m_processInfo, allowpassthrough, m_processInfo.AllowDTSHDDecode()); + if(!codec) + { + CLog::Log(LOGERROR, "Unsupported audio codec"); +@@ -451,6 +453,11 @@ void CVideoPlayerAudio::Process() + + m_streaminfo.channels = audioframe.format.m_channelLayout.Count(); + ++ ++ m_processInfo.SetAudioChannels(audioframe.format.m_channelLayout); ++ m_processInfo.SetAudioSampleRate(audioframe.format.m_sampleRate); ++ m_processInfo.SetAudioBitsPerSample(audioframe.bits_per_sample); ++ + m_messageParent.Put(new CDVDMsg(CDVDMsg::PLAYER_AVCHANGE)); + } + +@@ -595,7 +602,7 @@ bool CVideoPlayerAudio::SwitchCodecIfNeeded() + bool allowpassthrough = !CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEDISPLAYASCLOCK); + if (m_streaminfo.realtime) + allowpassthrough = false; +- CDVDAudioCodec *codec = CDVDFactoryCodec::CreateAudioCodec(m_streaminfo, allowpassthrough, m_processInfo.AllowDTSHDDecode()); ++ CDVDAudioCodec *codec = CDVDFactoryCodec::CreateAudioCodec(m_streaminfo, m_processInfo, allowpassthrough, m_processInfo.AllowDTSHDDecode()); + if (!codec || codec->NeedPassthrough() == m_pAudioCodec->NeedPassthrough()) { + // passthrough state has not changed + delete codec; +diff --git a/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp b/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp +index 8e5d33dc..fd260d43 100644 +--- a/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp ++++ b/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp +@@ -120,6 +120,8 @@ double CVideoPlayerVideo::GetOutputDelay() + + bool CVideoPlayerVideo::OpenStream( CDVDStreamInfo &hint ) + { ++ m_processInfo.ResetVideoCodecInfo(); ++ + CRenderInfo info; + info = m_renderManager.GetRenderInfo(); + +@@ -156,11 +158,13 @@ void CVideoPlayerVideo::OpenStream(CDVDStreamInfo &hint, CDVDVideoCodec* codec) + { + m_fFrameRate = DVD_TIME_BASE / CDVDCodecUtils::NormalizeFrameduration((double)DVD_TIME_BASE * hint.fpsscale / hint.fpsrate); + m_bFpsInvalid = false; ++ m_processInfo.SetVideoFps(m_fFrameRate); + } + else + { + m_fFrameRate = 25; + m_bFpsInvalid = true; ++ m_processInfo.SetVideoFps(0); + } + + m_pullupCorrection.ResetVFRDetection(); +@@ -1023,6 +1027,7 @@ void CVideoPlayerVideo::CalcFrameRate() + CLog::Log(LOGDEBUG,"%s framerate was:%f calculated:%f", __FUNCTION__, m_fFrameRate, m_fStableFrameRate / m_iFrameRateCount); + m_fFrameRate = m_fStableFrameRate / m_iFrameRateCount; + m_bFpsInvalid = false; ++ m_processInfo.SetVideoFps(m_fFrameRate); + } + + //reset the stored framerates +diff --git a/xbmc/cores/omxplayer/OMXPlayerAudio.cpp b/xbmc/cores/omxplayer/OMXPlayerAudio.cpp +index 50a5b17..6161962 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerAudio.cpp ++++ b/xbmc/cores/omxplayer/OMXPlayerAudio.cpp +@@ -100,6 +100,7 @@ bool OMXPlayerAudio::OpenStream(CDVDStreamInfo &hints) + { + m_bad_state = false; + ++ m_processInfo.ResetAudioCodecInfo(); + COMXAudioCodecOMX *codec = new COMXAudioCodecOMX(); + + if(!codec || !codec->Open(hints)) +diff --git a/xbmc/cores/omxplayer/OMXPlayerVideo.cpp b/xbmc/cores/omxplayer/OMXPlayerVideo.cpp +index 236f1b3..0ec7f15 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerVideo.cpp ++++ b/xbmc/cores/omxplayer/OMXPlayerVideo.cpp +@@ -515,6 +515,8 @@ bool OMXPlayerVideo::OpenDecoder() + if(!m_av_clock) + return false; + ++ m_processInfo.ResetVideoCodecInfo(); ++ + if (m_hints.fpsrate && m_hints.fpsscale) + m_fFrameRate = DVD_TIME_BASE / CDVDCodecUtils::NormalizeFrameduration((double)DVD_TIME_BASE * m_hints.fpsscale / m_hints.fpsrate); + else +@@ -525,6 +527,8 @@ bool OMXPlayerVideo::OpenDecoder() + CLog::Log(LOGINFO, "OMXPlayerVideo::OpenDecoder : Invalid framerate %d, using forced 25fps and just trust timestamps\n", (int)m_fFrameRate); + m_fFrameRate = 25; + } ++ m_processInfo.SetVideoFps(m_fFrameRate); ++ + // use aspect in stream if available + if (m_hints.forced_aspect) + m_fForcedAspectRatio = m_hints.aspect; +@@ -705,6 +709,9 @@ void OMXPlayerVideo::ResolutionUpdateCallBack(uint32_t width, uint32_t height, f + m_bAllowFullscreen = false; // only allow on first configure + } + ++ m_processInfo.SetVideoDimensions(width, height); ++ m_processInfo.SetVideoAspectRatio(display_aspect); ++ + unsigned int iDisplayWidth = width; + unsigned int iDisplayHeight = height; + +diff --git a/xbmc/cores/paplayer/VideoPlayerCodec.cpp b/xbmc/cores/paplayer/VideoPlayerCodec.cpp +index 9056cf8..32add6c 100644 +--- a/xbmc/cores/paplayer/VideoPlayerCodec.cpp ++++ b/xbmc/cores/paplayer/VideoPlayerCodec.cpp +@@ -46,6 +46,8 @@ VideoPlayerCodec::VideoPlayerCodec() + m_pResampler = NULL; + m_needConvert = false; + m_channels = 0; ++ ++ m_processInfo.reset(CProcessInfo::CreateInstance()); + } + + VideoPlayerCodec::~VideoPlayerCodec() +@@ -165,7 +167,7 @@ bool VideoPlayerCodec::Init(const CFileItem &file, unsigned int filecache) + + CDVDStreamInfo hint(*pStream, true); + +- m_pAudioCodec = CDVDFactoryCodec::CreateAudioCodec(hint); ++ m_pAudioCodec = CDVDFactoryCodec::CreateAudioCodec(hint, *m_processInfo.get()); + if (!m_pAudioCodec) + { + CLog::Log(LOGERROR, "%s: Could not create audio codec", __FUNCTION__); +diff --git a/xbmc/cores/paplayer/VideoPlayerCodec.h b/xbmc/cores/paplayer/VideoPlayerCodec.h +index 81379bd..042f4f7 100644 +--- a/xbmc/cores/paplayer/VideoPlayerCodec.h ++++ b/xbmc/cores/paplayer/VideoPlayerCodec.h +@@ -73,6 +73,8 @@ class VideoPlayerCodec : public ICodec + bool m_needConvert; + AEAudioFormat m_srcFormat; + int m_channels; ++ ++ std::unique_ptr<CProcessInfo> m_processInfo; + }; + + #endif +diff --git a/xbmc/guiinfo/GUIInfoLabels.h b/xbmc/guiinfo/GUIInfoLabels.h +index 27d6bc2..96edafa 100644 +--- a/xbmc/guiinfo/GUIInfoLabels.h ++++ b/xbmc/guiinfo/GUIInfoLabels.h +@@ -562,6 +562,20 @@ + #define RDS_CHANNEL_COUNTRY (RDS_DATA_START + 44) + #define RDS_DATA_END RDS_CHANNEL_COUNTRY + ++#define PLAYER_PROCESS 1500 ++#define PLAYER_PROCESS_VIDEODECODER (PLAYER_PROCESS) ++#define PLAYER_PROCESS_DEINTMETHOD (PLAYER_PROCESS + 1) ++#define PLAYER_PROCESS_PIXELFORMAT (PLAYER_PROCESS + 2) ++#define PLAYER_PROCESS_VIDEOWIDTH (PLAYER_PROCESS + 3) ++#define PLAYER_PROCESS_VIDEOHEIGHT (PLAYER_PROCESS + 4) ++#define PLAYER_PROCESS_VIDEOFPS (PLAYER_PROCESS + 5) ++#define PLAYER_PROCESS_VIDEODAR (PLAYER_PROCESS + 6) ++#define PLAYER_PROCESS_VIDEOHWDECODER (PLAYER_PROCESS + 7) ++#define PLAYER_PROCESS_AUDIODECODER (PLAYER_PROCESS + 8) ++#define PLAYER_PROCESS_AUDIOCHANNELS (PLAYER_PROCESS + 9) ++#define PLAYER_PROCESS_AUDIOSAMPLERATE (PLAYER_PROCESS + 10) ++#define PLAYER_PROCESS_AUDIOBITSPERSAMPLE (PLAYER_PROCESS + 11) ++ + #define WINDOW_PROPERTY 9993 + #define WINDOW_IS_TOPMOST 9994 + #define WINDOW_IS_VISIBLE 9995 + +From 6855680f52c826aa2a2e2684c5607e00a6f8fff1 Mon Sep 17 00:00:00 2001 +From: Rainer Hochecker <fernetmenta@online.de> +Date: Wed, 22 Jun 2016 18:48:10 +0200 +Subject: [PATCH 2/3] remove DataCacheCore from systemGlobals + +--- + xbmc/Application.cpp | 1 + + xbmc/GUIInfoManager.cpp | 24 ++++++++++----------- + xbmc/ServiceBroker.cpp | 5 +++++ + xbmc/ServiceBroker.h | 2 ++ + xbmc/ServiceManager.cpp | 7 +++++++ + xbmc/ServiceManager.h | 3 +++ + xbmc/SystemGlobals.cpp | 4 ---- + xbmc/cores/DataCacheCore.cpp | 6 ++++++ + xbmc/cores/DataCacheCore.h | 5 ++--- + xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp | 29 +++++++++++++------------- + xbmc/cores/VideoPlayer/VideoPlayer.cpp | 14 +++++++------ + xbmc/cores/paplayer/PAPlayer.cpp | 3 ++- + 12 files changed, 63 insertions(+), 40 deletions(-) + +diff --git a/xbmc/Application.cpp b/xbmc/Application.cpp +index cdcf53e..baada48 100644 +--- a/xbmc/Application.cpp ++++ b/xbmc/Application.cpp +@@ -37,6 +37,7 @@ + #include "cores/AudioEngine/AEFactory.h" + #include "cores/AudioEngine/Engines/ActiveAE/AudioDSPAddons/ActiveAEDSP.h" + #include "cores/AudioEngine/Utils/AEUtil.h" ++#include "cores/DataCacheCore.h" + #include "cores/playercorefactory/PlayerCoreFactory.h" + #include "PlayListPlayer.h" + #include "Autorun.h" +diff --git a/xbmc/GUIInfoManager.cpp b/xbmc/GUIInfoManager.cpp +index ab5cb12..7d42106 100644 +--- a/xbmc/GUIInfoManager.cpp ++++ b/xbmc/GUIInfoManager.cpp +@@ -6018,25 +6018,25 @@ std::string CGUIInfoManager::GetLabel(int info, int contextWindow, std::string * + } + break; + case PLAYER_PROCESS_VIDEODECODER: +- strLabel = g_dataCacheCore.GetVideoDecoderName(); ++ strLabel = CServiceBroker::GetDataCacheCore().GetVideoDecoderName(); + break; + case PLAYER_PROCESS_DEINTMETHOD: +- strLabel = g_dataCacheCore.GetVideoDeintMethod(); ++ strLabel = CServiceBroker::GetDataCacheCore().GetVideoDeintMethod(); + break; + case PLAYER_PROCESS_PIXELFORMAT: +- strLabel = g_dataCacheCore.GetVideoPixelFormat(); ++ strLabel = CServiceBroker::GetDataCacheCore().GetVideoPixelFormat(); + break; + case PLAYER_PROCESS_VIDEOFPS: +- strLabel = StringUtils::FormatNumber(g_dataCacheCore.GetVideoFps()); ++ strLabel = StringUtils::FormatNumber(CServiceBroker::GetDataCacheCore().GetVideoFps()); + break; + case PLAYER_PROCESS_VIDEODAR: + strLabel = StringUtils::FormatNumber(CServiceBroker::GetDataCacheCore().GetVideoDAR()); + break; + case PLAYER_PROCESS_AUDIODECODER: +- strLabel = g_dataCacheCore.GetAudioDecoderName(); ++ strLabel = CServiceBroker::GetDataCacheCore().GetAudioDecoderName(); + break; + case PLAYER_PROCESS_AUDIOCHANNELS: +- strLabel = g_dataCacheCore.GetAudioChannels(); ++ strLabel = CServiceBroker::GetDataCacheCore().GetAudioChannels(); + break; + case RDS_AUDIO_LANG: + case RDS_CHANNEL_COUNTRY: +@@ -6601,16 +6601,16 @@ bool CGUIInfoManager::GetInt(int &value, int info, int contextWindow, const CGUI + value = g_powerManager.BatteryLevel(); + return true; + case PLAYER_PROCESS_VIDEOWIDTH: +- value = g_dataCacheCore.GetVideoWidth(); ++ value = CServiceBroker::GetDataCacheCore().GetVideoWidth(); + return true; + case PLAYER_PROCESS_VIDEOHEIGHT: +- value = g_dataCacheCore.GetVideoHeight(); ++ value = CServiceBroker::GetDataCacheCore().GetVideoHeight(); + return true; + case PLAYER_PROCESS_AUDIOSAMPLERATE: +- value = g_dataCacheCore.GetAudioSampleRate(); ++ value = CServiceBroker::GetDataCacheCore().GetAudioSampleRate(); + return true; + case PLAYER_PROCESS_AUDIOBITSPERSAMPLE: +- value = g_dataCacheCore.GetAudioBitsPerSampe(); ++ value = CServiceBroker::GetDataCacheCore().GetAudioBitsPerSampe(); + return true; + } + return false; +@@ -7148,7 +7148,7 @@ bool CGUIInfoManager::GetBool(int condition1, int contextWindow, const CGUIListI + !m_currentFile->GetPVRRadioRDSInfoTag()->GetPhoneStudio().empty()); + break; + case PLAYER_PROCESS_VIDEOHWDECODER: +- bReturn = g_dataCacheCore.IsVideoHwDecoder(); ++ bReturn = CServiceBroker::GetDataCacheCore().IsVideoHwDecoder(); + break; + default: // default, use integer value different from 0 as true + { +@@ -9150,7 +9150,7 @@ void CGUIInfoManager::UpdateAVInfo() + { + if(g_application.m_pPlayer->IsPlaying()) + { +- if (g_dataCacheCore.HasAVInfoChanges()) ++ if (CServiceBroker::GetDataCacheCore().HasAVInfoChanges()) + { + SPlayerVideoStreamInfo video; + SPlayerAudioStreamInfo audio; +diff --git a/xbmc/ServiceBroker.cpp b/xbmc/ServiceBroker.cpp +index fff03c3..dd5c640 100644 +--- a/xbmc/ServiceBroker.cpp ++++ b/xbmc/ServiceBroker.cpp +@@ -50,3 +50,8 @@ ActiveAE::CActiveAEDSP &CServiceBroker::GetADSP() + { + return g_application.m_ServiceManager->GetADSPManager(); + } ++ ++CDataCacheCore &CServiceBroker::GetDataCacheCore() ++{ ++ return g_application.m_ServiceManager->GetDataCacheCore(); ++} +diff --git a/xbmc/ServiceBroker.h b/xbmc/ServiceBroker.h +index 32add30..9f9de19 100644 +--- a/xbmc/ServiceBroker.h ++++ b/xbmc/ServiceBroker.h +@@ -40,6 +40,7 @@ namespace PVR + } + + class XBPython; ++class CDataCacheCore; + + class CServiceBroker + { +@@ -50,4 +51,5 @@ class CServiceBroker + static XBPython &GetXBPython(); + static PVR::CPVRManager &GetPVRManager(); + static ActiveAE::CActiveAEDSP& GetADSP(); ++ static CDataCacheCore& GetDataCacheCore(); + }; +diff --git a/xbmc/ServiceManager.cpp b/xbmc/ServiceManager.cpp +index 3cc188c..4cf4440 100644 +--- a/xbmc/ServiceManager.cpp ++++ b/xbmc/ServiceManager.cpp +@@ -21,6 +21,7 @@ + #include "ServiceManager.h" + #include "addons/BinaryAddonCache.h" + #include "cores/AudioEngine/Engines/ActiveAE/AudioDSPAddons/ActiveAEDSP.h" ++#include "cores/DataCacheCore.h" + #include "utils/log.h" + #include "interfaces/AnnouncementManager.h" + #include "interfaces/generic/ScriptInvocationManager.h" +@@ -49,6 +50,7 @@ bool CServiceManager::Init2() + + m_ADSPManager.reset(new ActiveAE::CActiveAEDSP()); + m_PVRManager.reset(new PVR::CPVRManager()); ++ m_dataCacheCore.reset(new CDataCacheCore()); + + m_binaryAddonCache.reset( new ADDON::CBinaryAddonCache()); + m_binaryAddonCache->Init(); +@@ -104,3 +106,8 @@ ActiveAE::CActiveAEDSP& CServiceManager::GetADSPManager() + { + return *m_ADSPManager; + } ++ ++CDataCacheCore& CServiceManager::GetDataCacheCore() ++{ ++ return *m_dataCacheCore; ++} +\ No newline at end of file +diff --git a/xbmc/ServiceManager.h b/xbmc/ServiceManager.h +index 9b7806f6..5c7a9a8 100644 +--- a/xbmc/ServiceManager.h ++++ b/xbmc/ServiceManager.h +@@ -42,6 +42,7 @@ class CPVRManager; + } + + class XBPython; ++class CDataCacheCore; + + class CServiceManager + { +@@ -56,6 +57,7 @@ class CServiceManager + XBPython& GetXBPython(); + PVR::CPVRManager& GetPVRManager(); + ActiveAE::CActiveAEDSP& GetADSPManager(); ++ CDataCacheCore& GetDataCacheCore(); + + protected: + std::unique_ptr<ADDON::CAddonMgr> m_addonMgr; +@@ -64,4 +66,5 @@ class CServiceManager + std::unique_ptr<XBPython> m_XBPython; + std::unique_ptr<PVR::CPVRManager> m_PVRManager; + std::unique_ptr<ActiveAE::CActiveAEDSP> m_ADSPManager; ++ std::unique_ptr<CDataCacheCore> m_dataCacheCore; + }; +diff --git a/xbmc/SystemGlobals.cpp b/xbmc/SystemGlobals.cpp +index 9354471..3d1cb55 100644 +--- a/xbmc/SystemGlobals.cpp ++++ b/xbmc/SystemGlobals.cpp +@@ -19,7 +19,6 @@ + */ + #include "system.h" + #include "SectionLoader.h" +-#include "cores/DataCacheCore.h" + #include "GUILargeTextureManager.h" + #include "guilib/TextureManager.h" + #include "utils/AlarmClock.h" +@@ -71,6 +70,3 @@ std::map<std::string, std::string> CSpecialProtocol::m_pathMap; + #endif + + CZipManager g_ZipManager; +- +- CDataCacheCore g_dataCacheCore; +- +diff --git a/xbmc/cores/DataCacheCore.cpp b/xbmc/cores/DataCacheCore.cpp +index cbb0a4f..43a24f1 100644 +--- a/xbmc/cores/DataCacheCore.cpp ++++ b/xbmc/cores/DataCacheCore.cpp +@@ -20,12 +20,18 @@ + + #include "cores/DataCacheCore.h" + #include "threads/SingleLock.h" ++#include "ServiceBroker.h" + + CDataCacheCore::CDataCacheCore() + { + m_hasAVInfoChanges = false; + } + ++CDataCacheCore& GetInstance() ++{ ++ return CServiceBroker::GetDataCacheCore(); ++} ++ + bool CDataCacheCore::HasAVInfoChanges() + { + bool ret = m_hasAVInfoChanges; +diff --git a/xbmc/cores/DataCacheCore.h b/xbmc/cores/DataCacheCore.h +index e16c81f..646f512 100644 +--- a/xbmc/cores/DataCacheCore.h ++++ b/xbmc/cores/DataCacheCore.h +@@ -28,6 +28,7 @@ class CDataCacheCore + { + public: + CDataCacheCore(); ++ static CDataCacheCore& GetInstance(); + bool HasAVInfoChanges(); + void SignalVideoInfoChange(); + void SignalAudioInfoChange(); +@@ -82,6 +83,4 @@ class CDataCacheCore + int sampleRate; + int bitsPerSample; + } m_playerAudioInfo; +-}; +- +-extern CDataCacheCore g_dataCacheCore; +\ No newline at end of file ++}; +\ No newline at end of file +diff --git a/xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp b/xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp +index fc1f5dd..bfd7d58 100644 +--- a/xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp ++++ b/xbmc/cores/VideoPlayer/Process/ProcessInfo.cpp +@@ -19,6 +19,7 @@ + */ + + #include "ProcessInfo.h" ++#include "ServiceBroker.h" + #include "cores/DataCacheCore.h" + #include "threads/SingleLock.h" + +@@ -66,11 +67,11 @@ void CProcessInfo::ResetVideoCodecInfo() + m_videoHeight = 0; + m_videoFPS = 0.0; + +- g_dataCacheCore.SetVideoDecoderName(m_videoDecoderName, m_videoIsHWDecoder); +- g_dataCacheCore.SetVideoDeintMethod(m_videoDeintMethod); +- g_dataCacheCore.SetVideoPixelFormat(m_videoPixelFormat); +- g_dataCacheCore.SetVideoDimensions(m_videoWidth, m_videoHeight); +- g_dataCacheCore.SetVideoFps(m_videoFPS); ++ CServiceBroker::GetDataCacheCore().SetVideoDecoderName(m_videoDecoderName, m_videoIsHWDecoder); ++ CServiceBroker::GetDataCacheCore().SetVideoDeintMethod(m_videoDeintMethod); ++ CServiceBroker::GetDataCacheCore().SetVideoPixelFormat(m_videoPixelFormat); ++ CServiceBroker::GetDataCacheCore().SetVideoDimensions(m_videoWidth, m_videoHeight); ++ CServiceBroker::GetDataCacheCore().SetVideoFps(m_videoFPS); + } + + void CProcessInfo::SetVideoDecoderName(std::string name, bool isHw) +@@ -80,7 +81,7 @@ void CProcessInfo::SetVideoDecoderName(std::string name, bool isHw) + m_videoIsHWDecoder = isHw; + m_videoDecoderName = name; + +- g_dataCacheCore.SetVideoDecoderName(m_videoDecoderName, m_videoIsHWDecoder); ++ CServiceBroker::GetDataCacheCore().SetVideoDecoderName(m_videoDecoderName, m_videoIsHWDecoder); + } + + std::string CProcessInfo::GetVideoDecoderName() +@@ -103,7 +104,7 @@ void CProcessInfo::SetVideoDeintMethod(std::string method) + + m_videoDeintMethod = method; + +- g_dataCacheCore.SetVideoDeintMethod(m_videoDeintMethod); ++ CServiceBroker::GetDataCacheCore().SetVideoDeintMethod(m_videoDeintMethod); + } + + std::string CProcessInfo::GetVideoDeintMethod() +@@ -119,7 +120,7 @@ void CProcessInfo::SetVideoPixelFormat(std::string pixFormat) + + m_videoPixelFormat = pixFormat; + +- g_dataCacheCore.SetVideoPixelFormat(m_videoPixelFormat); ++ CServiceBroker::GetDataCacheCore().SetVideoPixelFormat(m_videoPixelFormat); + } + + std::string CProcessInfo::GetVideoPixelFormat() +@@ -136,7 +137,7 @@ void CProcessInfo::SetVideoDimensions(int width, int height) + m_videoWidth = width; + m_videoHeight = height; + +- g_dataCacheCore.SetVideoDimensions(m_videoWidth, m_videoHeight); ++ CServiceBroker::GetDataCacheCore().SetVideoDimensions(m_videoWidth, m_videoHeight); + } + + void CProcessInfo::GetVideoDimensions(int &width, int &height) +@@ -153,7 +154,7 @@ void CProcessInfo::SetVideoFps(float fps) + + m_videoFPS = fps; + +- g_dataCacheCore.SetVideoFps(m_videoFPS); ++ CServiceBroker::GetDataCacheCore().SetVideoFps(m_videoFPS); + } + + float CProcessInfo::GetVideoFps() +@@ -189,10 +190,10 @@ void CProcessInfo::ResetAudioCodecInfo() + m_audioSampleRate = 0;; + m_audioBitsPerSample = 0; + +- g_dataCacheCore.SetAudioDecoderName(m_audioDecoderName); +- g_dataCacheCore.SetAudioChannels(m_audioChannels); +- g_dataCacheCore.SetAudioSampleRate(m_audioSampleRate); +- g_dataCacheCore.SetAudioBitsPerSample(m_audioBitsPerSample); ++ CServiceBroker::GetDataCacheCore().SetAudioDecoderName(m_audioDecoderName); ++ CServiceBroker::GetDataCacheCore().SetAudioChannels(m_audioChannels); ++ CServiceBroker::GetDataCacheCore().SetAudioSampleRate(m_audioSampleRate); ++ CServiceBroker::GetDataCacheCore().SetAudioBitsPerSample(m_audioBitsPerSample); + } + + void CProcessInfo::SetAudioDecoderName(std::string name) +diff --git a/xbmc/cores/VideoPlayer/VideoPlayer.cpp b/xbmc/cores/VideoPlayer/VideoPlayer.cpp +index 9ed9176..5205414 100644 +--- a/xbmc/cores/VideoPlayer/VideoPlayer.cpp ++++ b/xbmc/cores/VideoPlayer/VideoPlayer.cpp +@@ -45,6 +45,7 @@ + #include "guilib/GUIWindowManager.h" + #include "guilib/StereoscopicsManager.h" + #include "Application.h" ++#include "ServiceBroker.h" + #include "messaging/ApplicationMessenger.h" + + #include "DVDDemuxers/DVDDemuxCC.h" +@@ -81,6 +82,7 @@ + #include "cores/omxplayer/OMXHelper.h" + #endif + #include "VideoPlayerAudio.h" ++#include "cores/DataCacheCore.h" + #include "windowing/WindowingFactory.h" + #include "DVDCodecs/DVDCodecUtils.h" + +@@ -545,8 +547,8 @@ void CSelectionStreams::Update(CDVDInputStream* input, CDVDDemux* demuxer, std:: + Update(s); + } + } +- g_dataCacheCore.SignalAudioInfoChange(); +- g_dataCacheCore.SignalVideoInfoChange(); ++ CServiceBroker::GetDataCacheCore().SignalAudioInfoChange(); ++ CServiceBroker::GetDataCacheCore().SignalVideoInfoChange(); + } + + int CSelectionStreams::CountSource(StreamType type, StreamSource source) const +@@ -2872,8 +2874,8 @@ void CVideoPlayer::HandleMessages() + else if (pMsg->IsType(CDVDMsg::PLAYER_AVCHANGE)) + { + UpdateStreamInfos(); +- g_dataCacheCore.SignalAudioInfoChange(); +- g_dataCacheCore.SignalVideoInfoChange(); ++ CServiceBroker::GetDataCacheCore().SignalAudioInfoChange(); ++ CServiceBroker::GetDataCacheCore().SignalVideoInfoChange(); + } + + pMsg->Release(); +@@ -3596,8 +3598,8 @@ bool CVideoPlayer::OpenStream(CCurrentStream& current, int64_t demuxerId, int iS + } + } + +- g_dataCacheCore.SignalAudioInfoChange(); +- g_dataCacheCore.SignalVideoInfoChange(); ++ CServiceBroker::GetDataCacheCore().SignalAudioInfoChange(); ++ CServiceBroker::GetDataCacheCore().SignalVideoInfoChange(); + + return res; + } +diff --git a/xbmc/cores/paplayer/PAPlayer.cpp b/xbmc/cores/paplayer/PAPlayer.cpp +index 17e1cfc..6bd5180 100644 +--- a/xbmc/cores/paplayer/PAPlayer.cpp ++++ b/xbmc/cores/paplayer/PAPlayer.cpp +@@ -21,6 +21,7 @@ + #include "PAPlayer.h" + #include "CodecFactory.h" + #include "FileItem.h" ++#include "ServiceBroker.h" + #include "settings/AdvancedSettings.h" + #include "settings/Settings.h" + #include "music/tags/MusicInfoTag.h" +@@ -1130,7 +1131,7 @@ void PAPlayer::UpdateGUIData(StreamInfo *si) + total -= m_currentStream->m_startOffset; + m_playerGUIData.m_totalTime = total; + +- g_dataCacheCore.SignalAudioInfoChange(); ++ CServiceBroker::GetDataCacheCore().SignalAudioInfoChange(); + } + + void PAPlayer::OnJobComplete(unsigned int jobID, bool success, CJob *job) + +From 0337c933aaf3a438edba894780838f5c1fbb00f5 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Fri, 24 Jun 2016 19:37:32 +0100 +Subject: [PATCH 3/3] rbp: Update to use new processInfo data cache + +--- + .../VideoPlayer/DVDCodecs/Video/MMALCodec.cpp | 19 +++++++++++++++++ + xbmc/cores/omxplayer/OMXAudioCodecOMX.cpp | 3 ++- + xbmc/cores/omxplayer/OMXAudioCodecOMX.h | 4 +++- + xbmc/cores/omxplayer/OMXPlayerAudio.cpp | 24 +++++++++++++++++++--- + xbmc/cores/omxplayer/OMXPlayerVideo.cpp | 10 ++++----- + xbmc/cores/omxplayer/OMXVideo.cpp | 20 +++++++++++++++++- + xbmc/cores/omxplayer/OMXVideo.h | 4 +++- + 7 files changed, 72 insertions(+), 12 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +index 3d026cd..51ded6b2 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +@@ -177,6 +177,10 @@ void CMMALVideo::PortSettingsChanged(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *bu + m_decoded_height = m_es_format->es->video.crop.height; + m_decoded_aligned_width = m_es_format->es->video.width; + m_decoded_aligned_height = m_es_format->es->video.height; ++ ++ m_processInfo.SetVideoDimensions(m_decoded_width, m_decoded_height); ++ m_processInfo.SetVideoDAR(m_aspect_ratio); ++ + if (g_advancedSettings.CanLogComponent(LOGVIDEO)) + CLog::Log(LOGDEBUG, "%s::%s format changed: %dx%d (%dx%d) %.2f", CLASSNAME, __func__, m_decoded_width, m_decoded_height, m_decoded_aligned_width, m_decoded_aligned_height, m_aspect_ratio); + } +@@ -360,6 +364,15 @@ bool CMMALVideo::CreateDeinterlace(EINTERLACEMETHOD interlace_method) + bool advanced_deinterlace = interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED || interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED_HALF; + bool half_framerate = interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED_HALF || interlace_method == VS_INTERLACEMETHOD_MMAL_BOB_HALF; + ++ if (advanced_deinterlace && !half_framerate) ++ m_processInfo.SetVideoDeintMethod("adv(x2)"); ++ else if (advanced_deinterlace && half_framerate) ++ m_processInfo.SetVideoDeintMethod("adv(x1)"); ++ else if (!advanced_deinterlace && !half_framerate) ++ m_processInfo.SetVideoDeintMethod("bob(x2)"); ++ else if (!advanced_deinterlace && half_framerate) ++ m_processInfo.SetVideoDeintMethod("bob(x1)"); ++ + MMAL_PARAMETER_IMAGEFX_PARAMETERS_T imfx_param = {{MMAL_PARAMETER_IMAGE_EFFECT_PARAMETERS, sizeof(imfx_param)}, + advanced_deinterlace ? MMAL_PARAM_IMAGEFX_DEINTERLACE_ADV : MMAL_PARAM_IMAGEFX_DEINTERLACE_FAST, 4, {3, 0, half_framerate, 1 }}; + +@@ -437,6 +450,8 @@ bool CMMALVideo::DestroyDeinterlace() + if (g_advancedSettings.CanLogComponent(LOGVIDEO)) + CLog::Log(LOGDEBUG, "%s::%s", CLASSNAME, __func__); + ++ m_processInfo.SetVideoDeintMethod("none"); ++ + assert(m_deint); + assert(m_dec_output == m_deint->output[0]); + +@@ -526,6 +541,8 @@ bool CMMALVideo::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options) + if (!CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEMMAL) || hints.software) + return false; + ++ m_processInfo.SetVideoDeintMethod("none"); ++ + m_hints = hints; + m_renderer = (CMMALRenderer *)options.m_opaque_pointer; + MMAL_STATUS_T status; +@@ -732,6 +749,8 @@ bool CMMALVideo::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options) + m_preroll = !m_hints.stills; + m_speed = DVD_PLAYSPEED_NORMAL; + ++ m_processInfo.SetVideoDecoderName(m_pFormatName, true); ++ + return true; + } + +diff --git a/xbmc/cores/omxplayer/OMXAudioCodecOMX.cpp b/xbmc/cores/omxplayer/OMXAudioCodecOMX.cpp +index 20f706c..d8cef9c 100644 +--- a/xbmc/cores/omxplayer/OMXAudioCodecOMX.cpp ++++ b/xbmc/cores/omxplayer/OMXAudioCodecOMX.cpp +@@ -33,7 +33,7 @@ + #define AUDIO_DECODE_OUTPUT_BUFFER (32*1024) + static const char rounded_up_channels_shift[] = {0,0,1,2,2,3,3,3,3}; + +-COMXAudioCodecOMX::COMXAudioCodecOMX() ++COMXAudioCodecOMX::COMXAudioCodecOMX(CProcessInfo &processInfo) : m_processInfo(processInfo) + { + m_pBufferOutput = NULL; + m_iBufferOutputAlloced = 0; +@@ -134,6 +134,7 @@ bool COMXAudioCodecOMX::Open(CDVDStreamInfo &hints) + + m_iSampleFormat = AV_SAMPLE_FMT_NONE; + m_desiredSampleFormat = m_pCodecContext->sample_fmt == AV_SAMPLE_FMT_S16 ? AV_SAMPLE_FMT_S16 : AV_SAMPLE_FMT_FLTP; ++ m_processInfo.SetAudioDecoderName(m_pCodecContext->codec->name); + return true; + } + +diff --git a/xbmc/cores/omxplayer/OMXAudioCodecOMX.h b/xbmc/cores/omxplayer/OMXAudioCodecOMX.h +index c06a323..3b2a0f3 100644 +--- a/xbmc/cores/omxplayer/OMXAudioCodecOMX.h ++++ b/xbmc/cores/omxplayer/OMXAudioCodecOMX.h +@@ -31,11 +31,12 @@ extern "C" { + + #include "DVDStreamInfo.h" + #include "linux/PlatformDefs.h" ++#include "cores/VideoPlayer/Process/ProcessInfo.h" + + class COMXAudioCodecOMX + { + public: +- COMXAudioCodecOMX(); ++ COMXAudioCodecOMX(CProcessInfo &processInfo); + virtual ~COMXAudioCodecOMX(); + bool Open(CDVDStreamInfo &hints); + void Dispose(); +@@ -52,6 +53,7 @@ class COMXAudioCodecOMX + unsigned int GetFrameSize() { return m_frameSize; } + + protected: ++ CProcessInfo &m_processInfo; + AVCodecContext* m_pCodecContext; + SwrContext* m_pConvert; + enum AVSampleFormat m_iSampleFormat; +diff --git a/xbmc/cores/omxplayer/OMXPlayerAudio.cpp b/xbmc/cores/omxplayer/OMXPlayerAudio.cpp +index 6161962..1e5d2b9 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerAudio.cpp ++++ b/xbmc/cores/omxplayer/OMXPlayerAudio.cpp +@@ -43,6 +43,7 @@ + #include "linux/RBP.h" + #include "cores/AudioEngine/AEFactory.h" + #include "cores/DataCacheCore.h" ++#include "ServiceBroker.h" + + #include <algorithm> + #include <iomanip> +@@ -101,7 +102,7 @@ bool OMXPlayerAudio::OpenStream(CDVDStreamInfo &hints) + m_bad_state = false; + + m_processInfo.ResetAudioCodecInfo(); +- COMXAudioCodecOMX *codec = new COMXAudioCodecOMX(); ++ COMXAudioCodecOMX *codec = new COMXAudioCodecOMX(m_processInfo); + + if(!codec || !codec->Open(hints)) + { +@@ -143,7 +144,7 @@ void OMXPlayerAudio::OpenStream(CDVDStreamInfo &hints, COMXAudioCodecOMX *codec) + m_format.m_sampleRate = 0; + m_format.m_channelLayout = 0; + +- g_dataCacheCore.SignalAudioInfoChange(); ++ CServiceBroker::GetDataCacheCore().SignalAudioInfoChange(); + } + + void OMXPlayerAudio::CloseStream(bool bWaitForBuffers) +@@ -188,6 +189,7 @@ bool OMXPlayerAudio::CodecChange() + { + m_hints.channels = m_pAudioCodec->GetChannels(); + m_hints.samplerate = m_pAudioCodec->GetSampleRate(); ++ m_hints.bitspersample = m_pAudioCodec->GetBitsPerSample(); + } + + /* only check bitrate changes on AV_CODEC_ID_DTS, AV_CODEC_ID_AC3, AV_CODEC_ID_EAC3 */ +@@ -204,7 +206,11 @@ bool OMXPlayerAudio::CodecChange() + (!m_passthrough && minor_change) || !m_DecoderOpen) + { + m_hints_current = m_hints; +- g_dataCacheCore.SignalAudioInfoChange(); ++ ++ m_processInfo.SetAudioSampleRate(m_hints.samplerate); ++ m_processInfo.SetAudioBitsPerSample(m_hints.bitspersample); ++ ++ CServiceBroker::GetDataCacheCore().SignalAudioInfoChange(); + return true; + } + +@@ -562,11 +568,23 @@ bool OMXPlayerAudio::OpenDecoder() + + CAEChannelInfo channelMap; + if (m_pAudioCodec && !m_passthrough) ++ { + channelMap = m_pAudioCodec->GetChannelMap(); ++ } + else if (m_passthrough) ++ { + // we just want to get the channel count right to stop OMXAudio.cpp rejecting stream + // the actual layout is not used + channelMap = AE_CH_LAYOUT_5_1; ++ ++ if (m_hints.codec == AV_CODEC_ID_AC3) ++ m_processInfo.SetAudioDecoderName("PT_AC3"); ++ else if (m_hints.codec == AV_CODEC_ID_EAC3) ++ m_processInfo.SetAudioDecoderName("PT_EAC3"); ++ else ++ m_processInfo.SetAudioDecoderName("PT_DTS"); ++ } ++ m_processInfo.SetAudioChannels(channelMap); + bool bAudioRenderOpen = m_omxAudio.Initialize(m_format, m_av_clock, m_hints, channelMap, m_passthrough); + + m_codec_name = ""; +diff --git a/xbmc/cores/omxplayer/OMXPlayerVideo.cpp b/xbmc/cores/omxplayer/OMXPlayerVideo.cpp +index 0ec7f15..6efd0d5 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerVideo.cpp ++++ b/xbmc/cores/omxplayer/OMXPlayerVideo.cpp +@@ -73,8 +73,7 @@ OMXPlayerVideo::OMXPlayerVideo(OMXClock *av_clock, + : CThread("OMXPlayerVideo") + , IDVDStreamPlayerVideo(processInfo) + , m_messageQueue("video") +-, m_omxVideo(renderManager) +-, m_codecname("") ++, m_omxVideo(renderManager, processInfo) + , m_messageParent(parent) + , m_renderManager(renderManager) + { +@@ -471,7 +470,7 @@ void OMXPlayerVideo::Process() + + if (m_syncState == IDVDStreamPlayer::SYNC_STARTING && !bRequestDrop && settings_changed) + { +- m_codecname = m_omxVideo.GetDecoderName(); ++ m_processInfo.SetVideoDecoderName(m_omxVideo.GetDecoderName(), true); + m_syncState = IDVDStreamPlayer::SYNC_WAITSYNC; + SStartMsg msg; + msg.player = VideoPlayer_VIDEO; +@@ -548,7 +547,7 @@ bool OMXPlayerVideo::OpenDecoder() + CLog::Log(LOGINFO, "OMXPlayerVideo::OpenDecoder : Video codec %s width %d height %d profile %d fps %f\n", + m_omxVideo.GetDecoderName().c_str() , m_hints.width, m_hints.height, m_hints.profile, m_fFrameRate); + +- m_codecname = m_omxVideo.GetDecoderName(); ++ m_processInfo.SetVideoDecoderName(m_omxVideo.GetDecoderName(), true); + } + + return bVideoDecoderOpen; +@@ -710,7 +709,7 @@ void OMXPlayerVideo::ResolutionUpdateCallBack(uint32_t width, uint32_t height, f + } + + m_processInfo.SetVideoDimensions(width, height); +- m_processInfo.SetVideoAspectRatio(display_aspect); ++ m_processInfo.SetVideoDAR(display_aspect); + + unsigned int iDisplayWidth = width; + unsigned int iDisplayHeight = height; +@@ -722,6 +721,7 @@ void OMXPlayerVideo::ResolutionUpdateCallBack(uint32_t width, uint32_t height, f + iDisplayWidth = (int) (iDisplayHeight * display_aspect); + + m_fFrameRate = DVD_TIME_BASE / CDVDCodecUtils::NormalizeFrameduration((double)DVD_TIME_BASE / framerate); ++ m_processInfo.SetVideoFps(m_fFrameRate); + + CLog::Log(LOGDEBUG,"%s - change configuration. video:%dx%d. framerate: %4.2f. %dx%d format: BYPASS", + __FUNCTION__, video_width, video_height, m_fFrameRate, iDisplayWidth, iDisplayHeight); +diff --git a/xbmc/cores/omxplayer/OMXVideo.cpp b/xbmc/cores/omxplayer/OMXVideo.cpp +index 4c165bf..b2bb0a8 100644 +--- a/xbmc/cores/omxplayer/OMXVideo.cpp ++++ b/xbmc/cores/omxplayer/OMXVideo.cpp +@@ -65,8 +65,9 @@ + + #define MAX_TEXT_LENGTH 1024 + +-COMXVideo::COMXVideo(CRenderManager& renderManager) : m_video_codec_name("") ++COMXVideo::COMXVideo(CRenderManager& renderManager, CProcessInfo &processInfo) : m_video_codec_name("") + , m_renderManager(renderManager) ++, m_processInfo(processInfo) + { + m_is_open = false; + m_extradata = NULL; +@@ -244,6 +245,19 @@ bool COMXVideo::PortSettingsChanged(ResolutionUpdateInfo &resinfo) + EINTERLACEMETHOD interlace_method = m_renderManager.AutoInterlaceMethod(CMediaSettings::GetInstance().GetCurrentVideoSettings().m_InterlaceMethod); + bool advanced_deinterlace = interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED || interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED_HALF; + bool half_framerate = interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED_HALF || interlace_method == VS_INTERLACEMETHOD_MMAL_BOB_HALF; ++ ++ if (advanced_deinterlace && !half_framerate) ++ m_processInfo.SetVideoDeintMethod("adv(x2)"); ++ else if (advanced_deinterlace && half_framerate) ++ m_processInfo.SetVideoDeintMethod("adv(x1)"); ++ else if (!advanced_deinterlace && !half_framerate) ++ m_processInfo.SetVideoDeintMethod("bob(x2)"); ++ else if (!advanced_deinterlace && half_framerate) ++ m_processInfo.SetVideoDeintMethod("bob(x1)"); ++ ++ if (!half_framerate) ++ resinfo.framerate *= 2.0f; ++ + if (!advanced_deinterlace) + { + // Image_fx assumed 3 frames of context. simple deinterlace doesn't require this +@@ -280,6 +294,10 @@ bool COMXVideo::PortSettingsChanged(ResolutionUpdateInfo &resinfo) + return false; + } + } ++ else ++ { ++ m_processInfo.SetVideoDeintMethod("none"); ++ } + + if(m_deinterlace) + { +diff --git a/xbmc/cores/omxplayer/OMXVideo.h b/xbmc/cores/omxplayer/OMXVideo.h +index 46e79cb..fd101e7 100644 +--- a/xbmc/cores/omxplayer/OMXVideo.h ++++ b/xbmc/cores/omxplayer/OMXVideo.h +@@ -34,6 +34,7 @@ + #include "threads/CriticalSection.h" + #include "xbmc/rendering/RenderSystem.h" + #include "cores/VideoPlayer/VideoRenderers/RenderManager.h" ++#include "cores/VideoPlayer/Process/ProcessInfo.h" + #include <string> + + #define VIDEO_BUFFERS 60 +@@ -53,7 +54,7 @@ struct ResolutionUpdateInfo { + class COMXVideo + { + public: +- COMXVideo(CRenderManager& renderManager); ++ COMXVideo(CRenderManager& renderManager, CProcessInfo &processInfo); + ~COMXVideo(); + + // Required overrides +@@ -112,6 +113,7 @@ class COMXVideo + OMX_DISPLAYTRANSFORMTYPE m_transform; + bool m_settings_changed; + CRenderManager& m_renderManager; ++ CProcessInfo& m_processInfo; + static bool NaluFormatStartCodes(enum AVCodecID codec, uint8_t *in_extradata, int in_extrasize); + CCriticalSection m_critSection; + }; diff --git a/projects/RPi2/patches/kodi/kodi-001-backport.patch b/projects/RPi2/patches/kodi/kodi-001-backport.patch new file mode 100644 index 0000000000..1f88c2f0db --- /dev/null +++ b/projects/RPi2/patches/kodi/kodi-001-backport.patch @@ -0,0 +1,80614 @@ +From 1c8dd52e7185c555335c927aa16102e7b758e54d Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Tue, 5 May 2015 17:27:39 +0100 +Subject: [PATCH 01/67] build: Allow installed links to be overwritten + +--- + tools/depends/target/Makefile | 72 +++++++++++++++++++-------------------- + tools/depends/xbmc-addons.include | 36 ++++++++++---------- + 2 files changed, 54 insertions(+), 54 deletions(-) + +diff --git a/tools/depends/target/Makefile b/tools/depends/target/Makefile +index 2f0c83141034374ec5742a96a282391405ec8125..89f24641fd341336545fbdc6024e88eaacc805e7 100644 +--- a/tools/depends/target/Makefile ++++ b/tools/depends/target/Makefile +@@ -129,41 +129,41 @@ distclean:: + for d in $(DEPENDS); do $(MAKE) -C $$d distclean; done + + linux-system-libs-egl: +- [ -f $(PREFIX)/lib/pkgconfig/egl.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/egl.pc $(PREFIX)/lib/pkgconfig/egl.pc +- [ -f $(PREFIX)/lib/pkgconfig/damageproto.pc ] || ln -s /usr/share/pkgconfig/damageproto.pc $(PREFIX)/lib/pkgconfig/damageproto.pc +- [ -f $(PREFIX)/lib/pkgconfig/fixesproto.pc ] || ln -s /usr/share/pkgconfig/fixesproto.pc $(PREFIX)/lib/pkgconfig/fixesproto.pc +- [ -f $(PREFIX)/lib/pkgconfig/x11-xcb.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/x11-xcb.pc $(PREFIX)/lib/pkgconfig/x11-xcb.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-dri2.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-dri2.pc $(PREFIX)/lib/pkgconfig/xcb-dri2.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-dri3.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-dri3.pc $(PREFIX)/lib/pkgconfig/xcb-dri3.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-glx.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-glx.pc $(PREFIX)/lib/pkgconfig/xcb-glx.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-xfixes.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-xfixes.pc $(PREFIX)/lib/pkgconfig/xcb-xfixes.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-present.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-present.pc $(PREFIX)/lib/pkgconfig/xcb-present.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-randr.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-randr.pc $(PREFIX)/lib/pkgconfig/xcb-randr.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-render.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-render.pc $(PREFIX)/lib/pkgconfig/xcb-render.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-shape.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-shape.pc $(PREFIX)/lib/pkgconfig/xcb-shape.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb-sync.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb-sync.pc $(PREFIX)/lib/pkgconfig/xcb-sync.pc +- [ -f $(PREFIX)/lib/pkgconfig/xdamage.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xdamage.pc $(PREFIX)/lib/pkgconfig/xdamage.pc +- [ -f $(PREFIX)/lib/pkgconfig/xf86vidmodeproto.pc ] || ln -s /usr/share/pkgconfig/xf86vidmodeproto.pc $(PREFIX)/lib/pkgconfig/xf86vidmodeproto.pc +- [ -f $(PREFIX)/lib/pkgconfig/xfixes.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xfixes.pc $(PREFIX)/lib/pkgconfig/xfixes.pc +- [ -f $(PREFIX)/lib/pkgconfig/xshmfence.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xshmfence.pc $(PREFIX)/lib/pkgconfig/xshmfence.pc +- [ -f $(PREFIX)/lib/pkgconfig/xxf86vm.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xxf86vm.pc $(PREFIX)/lib/pkgconfig/xxf86vm.pc ++ [ -f $(PREFIX)/lib/pkgconfig/egl.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/egl.pc $(PREFIX)/lib/pkgconfig/egl.pc ++ [ -f $(PREFIX)/lib/pkgconfig/damageproto.pc ] || ln -sf /usr/share/pkgconfig/damageproto.pc $(PREFIX)/lib/pkgconfig/damageproto.pc ++ [ -f $(PREFIX)/lib/pkgconfig/fixesproto.pc ] || ln -sf /usr/share/pkgconfig/fixesproto.pc $(PREFIX)/lib/pkgconfig/fixesproto.pc ++ [ -f $(PREFIX)/lib/pkgconfig/x11-xcb.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/x11-xcb.pc $(PREFIX)/lib/pkgconfig/x11-xcb.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-dri2.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-dri2.pc $(PREFIX)/lib/pkgconfig/xcb-dri2.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-dri3.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-dri3.pc $(PREFIX)/lib/pkgconfig/xcb-dri3.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-glx.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-glx.pc $(PREFIX)/lib/pkgconfig/xcb-glx.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-xfixes.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-xfixes.pc $(PREFIX)/lib/pkgconfig/xcb-xfixes.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-present.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-present.pc $(PREFIX)/lib/pkgconfig/xcb-present.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-randr.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-randr.pc $(PREFIX)/lib/pkgconfig/xcb-randr.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-render.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-render.pc $(PREFIX)/lib/pkgconfig/xcb-render.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-shape.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-shape.pc $(PREFIX)/lib/pkgconfig/xcb-shape.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb-sync.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb-sync.pc $(PREFIX)/lib/pkgconfig/xcb-sync.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xdamage.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xdamage.pc $(PREFIX)/lib/pkgconfig/xdamage.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xf86vidmodeproto.pc ] || ln -sf /usr/share/pkgconfig/xf86vidmodeproto.pc $(PREFIX)/lib/pkgconfig/xf86vidmodeproto.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xfixes.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xfixes.pc $(PREFIX)/lib/pkgconfig/xfixes.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xshmfence.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xshmfence.pc $(PREFIX)/lib/pkgconfig/xshmfence.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xxf86vm.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xxf86vm.pc $(PREFIX)/lib/pkgconfig/xxf86vm.pc + + linux-system-libs: linux-system-libs-egl +- [ -f $(PREFIX)/lib/pkgconfig/x11.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/x11.pc $(PREFIX)/lib/pkgconfig/x11.pc +- [ -f $(PREFIX)/lib/pkgconfig/xproto.pc ] || ln -s /usr/share/pkgconfig/xproto.pc $(PREFIX)/lib/pkgconfig/xproto.pc +- [ -f $(PREFIX)/lib/pkgconfig/kbproto.pc ] || ln -s /usr/share/pkgconfig/kbproto.pc $(PREFIX)/lib/pkgconfig/kbproto.pc +- [ -f $(PREFIX)/lib/pkgconfig/xcb.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xcb.pc $(PREFIX)/lib/pkgconfig/xcb.pc +- [ -f $(PREFIX)/lib/pkgconfig/pthread-stubs.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/pthread-stubs.pc $(PREFIX)/lib/pkgconfig/pthread-stubs.pc +- [ -f $(PREFIX)/lib/pkgconfig/xau.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xau.pc $(PREFIX)/lib/pkgconfig/xau.pc +- [ -f $(PREFIX)/lib/pkgconfig/xdmcp.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xdmcp.pc $(PREFIX)/lib/pkgconfig/xdmcp.pc +- [ -f $(PREFIX)/lib/pkgconfig/xext.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xext.pc $(PREFIX)/lib/pkgconfig/xext.pc +- [ -f $(PREFIX)/lib/pkgconfig/xextproto.pc ] || ln -s /usr/share/pkgconfig/xextproto.pc $(PREFIX)/lib/pkgconfig/xextproto.pc +- [ -f $(PREFIX)/lib/pkgconfig/xrandr.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xrandr.pc $(PREFIX)/lib/pkgconfig/xrandr.pc +- [ -f $(PREFIX)/lib/pkgconfig/xrender.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xrender.pc $(PREFIX)/lib/pkgconfig/xrender.pc +- [ -f $(PREFIX)/lib/pkgconfig/randrproto.pc ] || ln -s /usr/share/pkgconfig/randrproto.pc $(PREFIX)/lib/pkgconfig/randrproto.pc +- [ -f $(PREFIX)/lib/pkgconfig/renderproto.pc ] || ln -s /usr/share/pkgconfig/renderproto.pc $(PREFIX)/lib/pkgconfig/renderproto.pc +- [ -f $(PREFIX)/lib/pkgconfig/xt.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xt.pc $(PREFIX)/lib/pkgconfig/xt.pc +- [ -f $(PREFIX)/lib/pkgconfig/ice.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/ice.pc $(PREFIX)/lib/pkgconfig/ice.pc +- [ -f $(PREFIX)/lib/pkgconfig/sm.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/sm.pc $(PREFIX)/lib/pkgconfig/sm.pc +- [ -f $(PREFIX)/lib/pkgconfig/xmu.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/xmu.pc $(PREFIX)/lib/pkgconfig/xmu.pc +- [ -f $(PREFIX)/lib/pkgconfig/libdrm.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/libdrm.pc $(PREFIX)/lib/pkgconfig/libdrm.pc ++ [ -f $(PREFIX)/lib/pkgconfig/x11.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/x11.pc $(PREFIX)/lib/pkgconfig/x11.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xproto.pc ] || ln -sf /usr/share/pkgconfig/xproto.pc $(PREFIX)/lib/pkgconfig/xproto.pc ++ [ -f $(PREFIX)/lib/pkgconfig/kbproto.pc ] || ln -sf /usr/share/pkgconfig/kbproto.pc $(PREFIX)/lib/pkgconfig/kbproto.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xcb.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xcb.pc $(PREFIX)/lib/pkgconfig/xcb.pc ++ [ -f $(PREFIX)/lib/pkgconfig/pthread-stubs.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/pthread-stubs.pc $(PREFIX)/lib/pkgconfig/pthread-stubs.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xau.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xau.pc $(PREFIX)/lib/pkgconfig/xau.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xdmcp.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xdmcp.pc $(PREFIX)/lib/pkgconfig/xdmcp.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xext.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xext.pc $(PREFIX)/lib/pkgconfig/xext.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xextproto.pc ] || ln -sf /usr/share/pkgconfig/xextproto.pc $(PREFIX)/lib/pkgconfig/xextproto.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xrandr.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xrandr.pc $(PREFIX)/lib/pkgconfig/xrandr.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xrender.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xrender.pc $(PREFIX)/lib/pkgconfig/xrender.pc ++ [ -f $(PREFIX)/lib/pkgconfig/randrproto.pc ] || ln -sf /usr/share/pkgconfig/randrproto.pc $(PREFIX)/lib/pkgconfig/randrproto.pc ++ [ -f $(PREFIX)/lib/pkgconfig/renderproto.pc ] || ln -sf /usr/share/pkgconfig/renderproto.pc $(PREFIX)/lib/pkgconfig/renderproto.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xt.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xt.pc $(PREFIX)/lib/pkgconfig/xt.pc ++ [ -f $(PREFIX)/lib/pkgconfig/ice.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/ice.pc $(PREFIX)/lib/pkgconfig/ice.pc ++ [ -f $(PREFIX)/lib/pkgconfig/sm.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/sm.pc $(PREFIX)/lib/pkgconfig/sm.pc ++ [ -f $(PREFIX)/lib/pkgconfig/xmu.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/xmu.pc $(PREFIX)/lib/pkgconfig/xmu.pc ++ [ -f $(PREFIX)/lib/pkgconfig/libdrm.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/libdrm.pc $(PREFIX)/lib/pkgconfig/libdrm.pc +diff --git a/tools/depends/xbmc-addons.include b/tools/depends/xbmc-addons.include +index e5cb842d9f61578efe5df95dfa3a938cf5346663..3ddba3cefb1ca785f7a17c72f42aacbbaada7b6c 100644 +--- a/tools/depends/xbmc-addons.include ++++ b/tools/depends/xbmc-addons.include +@@ -77,23 +77,23 @@ $(TOOLCHAIN_FILE): $(abs_top_srcdir)/target/Toolchain_binaddons.cmake + + linux-system-libs: + mkdir -p $(ADDON_DEPS_DIR)/lib/pkgconfig $(ADDON_DEPS_DIR)/include +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/x11.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/x*.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/ +- [ -f $(ADDON_DEPS_DIR)/lib/libX11.so ] || ln -s /usr/lib/$(HOST)/libX11.so* $(ADDON_DEPS_DIR)/lib/ +- [ -L $(ADDON_DEPS_DIR)/include/X11 ] || ln -s /usr/include/X11 $(ADDON_DEPS_DIR)/include/X11 +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/xproto.pc ] || ln -s /usr/share/pkgconfig/x*.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/ +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/kbproto.pc ] || ln -s /usr/share/pkgconfig/kbproto.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/kbproto.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/damageproto.pc ] || ln -s /usr/share/pkgconfig/damageproto.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/damageproto.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/fixesproto.pc ] || ln -s /usr/share/pkgconfig/fixesproto.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/fixesproto.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/pthread-stubs.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/pthread-stubs.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/pthread-stubs.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/ice.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/ice.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/ice.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/sm.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/sm.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/sm.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/libdrm.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/libdrm.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/libdrm.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/gl.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/gl.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/gl.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/glu.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/glu.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/glu.pc +- [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/glew.pc ] || ln -s /usr/lib/$(HOST)/pkgconfig/glew.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/glew.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/x11.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/x*.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/ ++ [ -f $(ADDON_DEPS_DIR)/lib/libX11.so ] || ln -sf /usr/lib/$(HOST)/libX11.so* $(ADDON_DEPS_DIR)/lib/ ++ [ -L $(ADDON_DEPS_DIR)/include/X11 ] || ln -sf /usr/include/X11 $(ADDON_DEPS_DIR)/include/X11 ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/xproto.pc ] || ln -sf /usr/share/pkgconfig/x*.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/ ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/kbproto.pc ] || ln -sf /usr/share/pkgconfig/kbproto.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/kbproto.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/damageproto.pc ] || ln -sf /usr/share/pkgconfig/damageproto.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/damageproto.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/fixesproto.pc ] || ln -sf /usr/share/pkgconfig/fixesproto.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/fixesproto.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/pthread-stubs.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/pthread-stubs.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/pthread-stubs.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/ice.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/ice.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/ice.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/sm.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/sm.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/sm.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/libdrm.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/libdrm.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/libdrm.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/gl.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/gl.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/gl.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/glu.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/glu.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/glu.pc ++ [ -f $(ADDON_DEPS_DIR)/lib/pkgconfig/glew.pc ] || ln -sf /usr/lib/$(HOST)/pkgconfig/glew.pc $(ADDON_DEPS_DIR)/lib/pkgconfig/glew.pc + [ -f $(ADDON_DEPS_DIR)/lib/libGL.so ] || \ +- (ln -s /usr/lib/$(HOST)/mesa $(ADDON_DEPS_DIR)/lib/mesa && ln -s $(ADDON_DEPS_DIR)/lib/mesa/libGL.so $(ADDON_DEPS_DIR)/lib/libGL.so) +- [ -f $(ADDON_DEPS_DIR)/lib/libGLEW.so ] || ln -s /usr/lib/$(HOST)/libGLEW.so* $(ADDON_DEPS_DIR)/lib/ +- [ -L $(ADDON_DEPS_DIR)/include/GL ] || ln -s /usr/include/GL $(ADDON_DEPS_DIR)/include/GL +- [ -f $(ADDON_DEPS_DIR)/lib/libm.so ] || ln -s /usr/lib/$(HOST)/libm.so $(ADDON_DEPS_DIR)/lib/ ++ (ln -sf /usr/lib/$(HOST)/mesa $(ADDON_DEPS_DIR)/lib/mesa && ln -sf $(ADDON_DEPS_DIR)/lib/mesa/libGL.so $(ADDON_DEPS_DIR)/lib/libGL.so) ++ [ -f $(ADDON_DEPS_DIR)/lib/libGLEW.so ] || ln -sf /usr/lib/$(HOST)/libGLEW.so* $(ADDON_DEPS_DIR)/lib/ ++ [ -L $(ADDON_DEPS_DIR)/include/GL ] || ln -sf /usr/include/GL $(ADDON_DEPS_DIR)/include/GL ++ [ -f $(ADDON_DEPS_DIR)/lib/libm.so ] || ln -sf /usr/lib/$(HOST)/libm.so $(ADDON_DEPS_DIR)/lib/ + + +From 9e113927dc8591c51d7cebc3e13d97c5db19f1d4 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Mon, 7 Apr 2014 18:19:32 +0100 +Subject: [PATCH 02/67] [rbp/omxplayer] When opening a stream don't try to + update gui so often + +--- + xbmc/dialogs/GUIDialogBusy.cpp | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/xbmc/dialogs/GUIDialogBusy.cpp b/xbmc/dialogs/GUIDialogBusy.cpp +index 8ea5161637b4e66ddd222859f058521dbc8922b9..811019a39a10acc21b83f0b0c70d5500055e7f98 100644 +--- a/xbmc/dialogs/GUIDialogBusy.cpp ++++ b/xbmc/dialogs/GUIDialogBusy.cpp +@@ -69,7 +69,11 @@ bool CGUIDialogBusy::WaitOnEvent(CEvent &event, unsigned int displaytime /* = 10 + { + dialog->Open(); + ++#ifdef TARGET_RASPBERRY_PI ++ while(!event.WaitMSec(100)) ++#else + while(!event.WaitMSec(1)) ++#endif + { + dialog->ProcessRenderLoop(false); + if (allowCancel && dialog->IsCanceled()) + +From 13bfba5171501299fc0d21ef4c5b1407807242e2 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sat, 8 Mar 2014 15:36:06 +0000 +Subject: [PATCH 03/67] [hifiberry] Hack: force it to be recognised as IEC958 + capable to enable passthrough options + +--- + xbmc/cores/AudioEngine/Sinks/AESinkALSA.cpp | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/xbmc/cores/AudioEngine/Sinks/AESinkALSA.cpp b/xbmc/cores/AudioEngine/Sinks/AESinkALSA.cpp +index 6a9066b2dbe8d505d636b3638c1d35c7c8a698ed..9c6ac5d4cc9bf21b2d48619cc6fb5d274f1c3928 100644 +--- a/xbmc/cores/AudioEngine/Sinks/AESinkALSA.cpp ++++ b/xbmc/cores/AudioEngine/Sinks/AESinkALSA.cpp +@@ -1352,6 +1352,10 @@ void CAESinkALSA::EnumerateDevice(AEDeviceInfoList &list, const std::string &dev + if (snd_card_get_name(cardNr, &cardName) == 0) + info.m_displayName = cardName; + ++ // hack: hifiberry digi doesn't correctly report as iec958 device. Needs fixing in kernel driver ++ if (info.m_displayName == "snd_rpi_hifiberry_digi") ++ info.m_deviceType = AE_DEVTYPE_IEC958; ++ + if (info.m_deviceType == AE_DEVTYPE_HDMI && info.m_displayName.size() > 5 && + info.m_displayName.substr(info.m_displayName.size()-5) == " HDMI") + { + +From c89b8b2588ffc2fb3022bb2debc09648e66f01d1 Mon Sep 17 00:00:00 2001 +From: Ben Avison <bavison@riscosopen.org> +Date: Thu, 1 May 2014 16:28:39 +0100 +Subject: [PATCH 04/67] Improved file buffering in CArchive + +Even though memcpy is typically inlined by the compiler into byte/word loads +and stores (at least for release builds), the frequency with which 1, 2 and 4 +byte loads/stores are encountered in cases where the size is *not* +determinable at compile time is still high enough that it's worth handling +these specially. On the ARM1176JZF-S in the Raspberry Pi, this improves the +total time to open a library (in the case where it's fetched from a CArchive) +by around 4%. + +It should be noted that this code uses 16-bit and 32-bit word loads and +stores that are not necessarily aligned to their respective widths. It is +possible that there are some architectures out there which do not support +this, although all ARMs since ARMv6 have supported it (and ARMs earlier than +that are probably not powerful enough to be good targets for XBMC). +--- + xbmc/utils/Archive.h | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/xbmc/utils/Archive.h b/xbmc/utils/Archive.h +index 6ed0f8fe37950306bb6ac369082dd024f032ab66..8506d9593de4c913a3c1469cf9cec89475d8dd30 100644 +--- a/xbmc/utils/Archive.h ++++ b/xbmc/utils/Archive.h +@@ -154,9 +154,17 @@ protected: + * than waiting until we attempt to put more data into an already full buffer */ + if (m_BufferRemain > size) + { ++ switch (size) ++ { ++ case 1: *m_BufferPos++ = *ptr; m_BufferRemain--; break; ++ case 2: *(uint16_t *) m_BufferPos = *(const uint16_t *) ptr; m_BufferPos += 2; m_BufferRemain -= 2; break; ++ case 4: *(uint32_t *) m_BufferPos = *(const uint32_t *) ptr; m_BufferPos += 4; m_BufferRemain -= 4; break; ++ default: + memcpy(m_BufferPos, ptr, size); + m_BufferPos += size; + m_BufferRemain -= size; ++ break; ++ } + return *this; + } + else +@@ -171,9 +179,17 @@ protected: + /* Note, refilling the buffer is deferred until we know we need to read more from it */ + if (m_BufferRemain >= size) + { ++ switch (size) ++ { ++ case 1: *ptr = *m_BufferPos++; m_BufferRemain--; break; ++ case 2: *(uint16_t *) ptr = *(const uint16_t *) m_BufferPos; m_BufferPos += 2; m_BufferRemain -= 2; break; ++ case 4: *(uint32_t *) ptr = *(const uint32_t *) m_BufferPos; m_BufferPos += 4; m_BufferRemain -= 4; break; ++ default: + memcpy(ptr, m_BufferPos, size); + m_BufferPos += size; + m_BufferRemain -= size; ++ break; ++ } + return *this; + } + else + +From afe3081bcf63939850a753200650570d04ed8aaa Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sun, 10 Aug 2014 16:45:16 +0100 +Subject: [PATCH 05/67] filesystem: Make support of browsing into archives + optional + +The ability to browse, scan and play content in archives can cause problems on low powered/low memory devices. +It's quite common to see reports of a large rar file that causes xbmc to crash with an out-of-memory error when browsing or scanning. +It also can be slow as any archive in the directory is opened and extracted. + +This causes issues for people who scan library with archives disabled, then subsequently enable it. +The library has the .rar files in which don't play without removing and re-adding. + +We'll let people who don't use archives disable it manually +--- + addons/resource.language.en_gb/resources/strings.po | 9 +++++++++ + system/settings/rbp.xml | 11 +++++++++++ + xbmc/Util.cpp | 4 ++-- + xbmc/filesystem/FileDirectoryFactory.cpp | 4 ++++ + 4 files changed, 26 insertions(+), 2 deletions(-) + +diff --git a/addons/resource.language.en_gb/resources/strings.po b/addons/resource.language.en_gb/resources/strings.po +index f0cfe2bc13ec3f333af83df21d0185448896719b..8860129ce3d4fd3426f6ba65d0c8cb8df18be8b2 100644 +--- a/addons/resource.language.en_gb/resources/strings.po ++++ b/addons/resource.language.en_gb/resources/strings.po +@@ -19131,6 +19131,15 @@ msgstr "" + #: system/settings/rbp.xml + msgctxt "#38010" + msgid "GPU accelerated" ++ ++#: system/settings/settings.xml ++msgctxt "#38040" ++msgid "Support browsing into archives" ++msgstr "" ++ ++#: system/settings/settings.xml ++msgctxt "#38041" ++msgid "Allow viewing and playing files in archives (e.g. zip, rar)" + msgstr "" + + #. Setting #38011 "Show All Items entry" +diff --git a/system/settings/rbp.xml b/system/settings/rbp.xml +index 806eadf44d73cea70fdbd8b723770a7f828e0633..7e6e52f82fde4c91fdc004c4b4b46e86091bcc87 100644 +--- a/system/settings/rbp.xml ++++ b/system/settings/rbp.xml +@@ -99,4 +99,15 @@ + </group> + </category> + </section> ++ <section id="library"> ++ <category id="filelists"> ++ <group id="1"> ++ <setting id="filelists.browsearchives" type="boolean" label="38040" help="38041"> ++ <level>1</level> ++ <default>true</default> ++ <control type="toggle" /> ++ </setting> ++ </group> ++ </category> ++ </section> + </settings> +diff --git a/xbmc/Util.cpp b/xbmc/Util.cpp +index b0de1c8f1046e094191f19ecd52334ddc6d1b4d1..446d8df2993423a2f80d88f82fbb7f767b11cf1b 100644 +--- a/xbmc/Util.cpp ++++ b/xbmc/Util.cpp +@@ -1773,7 +1773,7 @@ void CUtil::ScanPathsForAssociatedItems(const std::string& videoName, + URIUtils::RemoveExtension(strCandidate); + if (StringUtils::StartsWithNoCase(strCandidate, videoName)) + { +- if (URIUtils::IsRAR(pItem->GetPath()) || URIUtils::IsZIP(pItem->GetPath())) ++ if (CSettings::GetInstance().GetBool("filelists.browsearchives") && (URIUtils::IsRAR(pItem->GetPath()) || URIUtils::IsZIP(pItem->GetPath()))) + CUtil::ScanArchiveForAssociatedItems(pItem->GetPath(), "", item_exts, associatedFiles); + else + { +@@ -1783,7 +1783,7 @@ void CUtil::ScanPathsForAssociatedItems(const std::string& videoName, + } + else + { +- if (URIUtils::IsRAR(pItem->GetPath()) || URIUtils::IsZIP(pItem->GetPath())) ++ if (CSettings::GetInstance().GetBool("filelists.browsearchives") && (URIUtils::IsRAR(pItem->GetPath()) || URIUtils::IsZIP(pItem->GetPath()))) + CUtil::ScanArchiveForAssociatedItems(pItem->GetPath(), videoName, item_exts, associatedFiles); + } + } +diff --git a/xbmc/filesystem/FileDirectoryFactory.cpp b/xbmc/filesystem/FileDirectoryFactory.cpp +index a0fd0a9011e71f4af1535110c696b6ea5c4b37db..688b71a297c7c617c6764bfe6be157d727eb49d3 100644 +--- a/xbmc/filesystem/FileDirectoryFactory.cpp ++++ b/xbmc/filesystem/FileDirectoryFactory.cpp +@@ -40,6 +40,7 @@ + #include "playlists/PlayListFactory.h" + #include "Directory.h" + #include "File.h" ++#include "settings/Settings.h" + #include "FileItem.h" + #include "utils/StringUtils.h" + #include "URL.h" +@@ -116,6 +117,8 @@ IFileDirectory* CFileDirectoryFactory::Create(const CURL& url, CFileItem* pItem, + return NULL; + } + #endif ++ if (CSettings::GetInstance().GetBool("filelists.browsearchives")) ++ { + if (url.IsFileType("zip")) + { + CURL zipURL = URIUtils::CreateArchivePath("zip", url); +@@ -189,6 +192,7 @@ IFileDirectory* CFileDirectoryFactory::Create(const CURL& url, CFileItem* pItem, + } + return NULL; + } ++ } + if (url.IsFileType("xbt")) + { + CURL xbtUrl = URIUtils::CreateArchivePath("xbt", url); + +From b38f7abd72691bb2eb87892e6619a7eba7ebea77 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Mon, 27 Oct 2014 13:06:57 +0000 +Subject: [PATCH 06/67] [rbp] Make cachemembuffersize default depend on memory + size + +--- + xbmc/linux/RBP.cpp | 10 ++++++++++ + xbmc/linux/RBP.h | 1 + + xbmc/settings/AdvancedSettings.cpp | 12 +++++++++++- + 3 files changed, 22 insertions(+), 1 deletion(-) + +diff --git a/xbmc/linux/RBP.cpp b/xbmc/linux/RBP.cpp +index d101638cc38468c3d9673bc48f6603d414bcb7f5..ddbe27061f8192b7f6c830a4c22652a731537079 100644 +--- a/xbmc/linux/RBP.cpp ++++ b/xbmc/linux/RBP.cpp +@@ -23,6 +23,7 @@ + + #include <assert.h> + #include "settings/Settings.h" ++#include "settings/AdvancedSettings.h" + #include "utils/log.h" + + #include "cores/omxplayer/OMXImage.h" +@@ -58,6 +59,12 @@ CRBP::~CRBP() + delete m_DllBcmHost; + } + ++void CRBP::InitializeSettings() ++{ ++ if (m_initialized && g_advancedSettings.m_cacheMemSize == ~0U) ++ g_advancedSettings.m_cacheMemSize = m_arm_mem < 256 ? 1024 * 1024 * 2 : 1024 * 1024 * 20; ++} ++ + bool CRBP::Initialize() + { + CSingleLock lock(m_critSection); +@@ -97,6 +104,8 @@ bool CRBP::Initialize() + if (!m_gui_resolution_limit) + m_gui_resolution_limit = m_gpu_mem < 128 ? 720:1080; + ++ InitializeSettings(); ++ + g_OMXImage.Initialize(); + m_omx_image_init = true; + return true; +@@ -109,6 +118,7 @@ void CRBP::LogFirmwareVerison() + response[sizeof(response) - 1] = '\0'; + CLog::Log(LOGNOTICE, "Raspberry PI firmware version: %s", response); + CLog::Log(LOGNOTICE, "ARM mem: %dMB GPU mem: %dMB MPG2:%d WVC1:%d", m_arm_mem, m_gpu_mem, m_codec_mpg2_enabled, m_codec_wvc1_enabled); ++ CLog::Log(LOGNOTICE, "cache.memorysize: %dMB", g_advancedSettings.m_cacheMemSize >> 20); + m_DllBcmHost->vc_gencmd(response, sizeof response, "get_config int"); + response[sizeof(response) - 1] = '\0'; + CLog::Log(LOGNOTICE, "Config:\n%s", response); +diff --git a/xbmc/linux/RBP.h b/xbmc/linux/RBP.h +index a35a509a91483f13e2cf0e688fc7e9528f254290..fffa5182126159f6dfcf750b21fa0464e229e545 100644 +--- a/xbmc/linux/RBP.h ++++ b/xbmc/linux/RBP.h +@@ -62,6 +62,7 @@ public: + ~CRBP(); + + bool Initialize(); ++ void InitializeSettings(); + void LogFirmwareVerison(); + void Deinitialize(); + int GetArmMem() { return m_arm_mem; } +diff --git a/xbmc/settings/AdvancedSettings.cpp b/xbmc/settings/AdvancedSettings.cpp +index e7f13a73e5ce6d5fe9864fe76dccc9d3e1fdbc27..446293308010f3b8cd8d325fa6d0285fcc9f892d 100644 +--- a/xbmc/settings/AdvancedSettings.cpp ++++ b/xbmc/settings/AdvancedSettings.cpp +@@ -50,6 +50,9 @@ + #if defined(TARGET_DARWIN_IOS) + #include "platform/darwin/DarwinUtils.h" + #endif ++#if defined(TARGET_RASPBERRY_PI) ++#include "linux/RBP.h" ++#endif + + using namespace ADDON; + using namespace XFILE; +@@ -356,7 +359,12 @@ void CAdvancedSettings::Initialize() + m_bPVRAutoScanIconsUserSet = false; + m_iPVRNumericChannelSwitchTimeout = 1000; + ++#ifdef TARGET_RASPBERRY_PI ++ // want default to be memory dependent, but interface to gpu not available yet, so set in RBP.cpp ++ m_cacheMemSize = ~0; ++#else + m_cacheMemSize = 1024 * 1024 * 20; ++#endif + m_cacheBufferMode = CACHE_BUFFER_MODE_INTERNET; // Default (buffer all internet streams/filesystems) + // the following setting determines the readRate of a player data + // as multiply of the default data read rate +@@ -405,7 +413,9 @@ void CAdvancedSettings::Initialize() + m_extraLogLevels = 0; + + m_userAgent = g_sysinfo.GetUserAgent(); +- ++#ifdef TARGET_RASPBERRY_PI ++ g_RBP.InitializeSettings(); ++#endif + m_initialized = true; + } + + +From 444ff3630cfa2ff69f1f41150158175ed7d8a549 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Fri, 30 May 2014 14:58:43 +0100 +Subject: [PATCH 07/67] [settings] Experiment: Report DESKTOP resolution in + video settings + +--- + xbmc/settings/DisplaySettings.cpp | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/xbmc/settings/DisplaySettings.cpp b/xbmc/settings/DisplaySettings.cpp +index c1cca7efdd5d119b07308b947c569911f2a9bdc9..e03f3c8ef21ba824c0d707042e5a735ac74a86b8 100644 +--- a/xbmc/settings/DisplaySettings.cpp ++++ b/xbmc/settings/DisplaySettings.cpp +@@ -704,6 +704,9 @@ void CDisplaySettings::SettingOptionsResolutionsFiller(const CSetting *setting, + std::vector<RESOLUTION_WHR> resolutions = g_Windowing.ScreenResolutions(info.iScreen, info.fRefreshRate); + for (std::vector<RESOLUTION_WHR>::const_iterator resolution = resolutions.begin(); resolution != resolutions.end(); ++resolution) + { ++if (resolution->ResInfo_Index == RES_DESKTOP) ++ list.push_back(std::make_pair(StringUtils::Format("DESKTOP"), resolution->ResInfo_Index)); ++else + list.push_back(std::make_pair( + StringUtils::Format("%dx%d%s", resolution->width, resolution->height, + ModeFlagsToString(resolution->flags, false).c_str()), + +From 03a66653809c1494b57bc1644af53c1c111a4765 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Wed, 24 Sep 2014 23:13:52 +0100 +Subject: [PATCH 08/67] [audio] Add settings option to boost centre channel + when downmixing + +This allows a dB volume increase to be added to centre channel. +This can help improve dialgue in the presence of background music/effects. +It can go up to 30dB for testing purposes, but value of 6 is probably more reasonable. +It is recommended to ensure "Normalise levels on downmix" is enabled when boosting by large values to avoid clipping. + +Should work with Pi Sink (dvdplayer/paplayer) and omxplayer +--- + addons/resource.language.en_gb/resources/strings.po | 15 +++++++++++++++ + system/settings/settings.xml | 12 ++++++++++++ + .../Engines/ActiveAE/ActiveAEResampleFFMPEG.cpp | 7 +++++++ + .../AudioEngine/Engines/ActiveAE/ActiveAEResamplePi.cpp | 6 ++++++ + xbmc/cores/omxplayer/OMXAudio.cpp | 6 ++++++ + 5 files changed, 46 insertions(+) + +diff --git a/addons/resource.language.en_gb/resources/strings.po b/addons/resource.language.en_gb/resources/strings.po +index 8860129ce3d4fd3426f6ba65d0c8cb8df18be8b2..f646446b73b2e8a3a783b2e52b3257c6ad6da2bd 100644 +--- a/addons/resource.language.en_gb/resources/strings.po ++++ b/addons/resource.language.en_gb/resources/strings.po +@@ -19305,6 +19305,21 @@ msgstr "" + + #empty strings from id 38043 to 38099 + ++#: system/settings/settings.xml ++msgctxt "#38007" ++msgid "Boost centre channel when downmixing" ++msgstr "" ++ ++#: system/settings/settings.xml ++msgctxt "#38008" ++msgid "Increase this value to make the dialogue louder compared to background sounds when downmixing multichannel audio" ++msgstr "" ++ ++#: system/settings/settings.xml ++msgctxt "#38009" ++msgid "%i dB" ++msgstr "" ++ + #. Description of section #14200 "Player"" + #: system/settings/settings.xml + msgctxt "#38100" +diff --git a/system/settings/settings.xml b/system/settings/settings.xml +index c9d3c9dbe5bc5d41c8eb54babf78f9fe4046dd5c..2fad528a2f7ad57db8476c1879f853b8485d08e4 100644 +--- a/system/settings/settings.xml ++++ b/system/settings/settings.xml +@@ -2261,6 +2261,18 @@ + </dependencies> + <control type="toggle" /> + </setting> ++ <setting id="audiooutput.boostcenter" type="integer" label="38007" help="38008"> ++ <level>2</level> ++ <default>0</default> ++ <constraints> ++ <minimum>0</minimum> ++ <step>1</step> ++ <maximum>30</maximum> ++ </constraints> ++ <control type="spinner" format="string"> ++ <formatlabel>38009</formatlabel> ++ </control> ++ </setting> + <setting id="audiooutput.processquality" type="integer" label="13505" help="36169"> + <requirement>HAS_AE_QUALITY_LEVELS</requirement> + <level>2</level> +diff --git a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEResampleFFMPEG.cpp b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEResampleFFMPEG.cpp +index 0cef1c58fae68f5a74d9ca31073282eb13abb037..23cd1eb96c2515eb5022f5b0220e67785b8aa4de 100644 +--- a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEResampleFFMPEG.cpp ++++ b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEResampleFFMPEG.cpp +@@ -20,6 +20,7 @@ + + #include "cores/AudioEngine/Utils/AEUtil.h" + #include "ActiveAEResampleFFMPEG.h" ++#include "settings/Settings.h" + #include "utils/log.h" + + extern "C" { +@@ -104,6 +105,12 @@ bool CActiveAEResampleFFMPEG::Init(uint64_t dst_chan_layout, int dst_channels, i + { + av_opt_set_double(m_pContext, "rematrix_maxval", 1.0, 0); + } ++ int boost_center = CSettings::GetInstance().GetInt("audiooutput.boostcenter"); ++ if (boost_center) ++ { ++ float gain = pow(10.0f, ((float)(-3 + boost_center))/20.0f); ++ av_opt_set_double(m_pContext, "center_mix_level", gain, 0); ++ } + + if (remapLayout) + { +diff --git a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEResamplePi.cpp b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEResamplePi.cpp +index 78071493fca4756c6741d7085e35cbe2f27038e6..698a6ae1e2bc0cc9256caec42c0dcfb0893301b5 100644 +--- a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEResamplePi.cpp ++++ b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEResamplePi.cpp +@@ -164,6 +164,12 @@ bool CActiveAEResamplePi::Init(uint64_t dst_chan_layout, int dst_channels, int d + { + av_opt_set_double(m_pContext, "rematrix_maxval", 1.0, 0); + } ++ int boost_center = CSettings::GetInstance().GetInt("audiooutput.boostcenter"); ++ if (boost_center) ++ { ++ float gain = pow(10.0f, ((float)(-3 + boost_center))/20.0f); ++ av_opt_set_double(m_pContext, "center_mix_level", gain, 0); ++ } + + if (remapLayout) + { +diff --git a/xbmc/cores/omxplayer/OMXAudio.cpp b/xbmc/cores/omxplayer/OMXAudio.cpp +index f16b822ed7b4aebe18b5d339b3f71ee66e97c23f..993d4b33a294e88c2c004b7943895ba55558c2d0 100644 +--- a/xbmc/cores/omxplayer/OMXAudio.cpp ++++ b/xbmc/cores/omxplayer/OMXAudio.cpp +@@ -633,6 +633,12 @@ bool COMXAudio::Initialize(AEAudioFormat format, OMXClock *clock, CDVDStreamInfo + { + av_opt_set_double(m_pContext, "rematrix_maxval", 1.0, 0); + } ++ int boost_center = CSettings::GetInstance().GetInt("audiooutput.boostcenter"); ++ if (boost_center) ++ { ++ float gain = pow(10.0f, ((float)(-3 + boost_center))/20.0f); ++ av_opt_set_double(m_pContext, "center_mix_level", gain, 0); ++ } + + // stereo upmix + if (upmix && m_src_channels == 2 && m_dst_channels > 2) + +From db58404d482592303a170a3519ed43e552f3034a Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Mon, 27 Oct 2014 15:23:51 +0000 +Subject: [PATCH 09/67] [rbp] Default extract thumbnails to false + +It can take 80 seconds for a single file on a Pi. It can cause crashes with out-of-memory errors. +It genereates a lot of support issues. Best to default to disabled and let users enable it if they must +--- + system/settings/rbp.xml | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/system/settings/rbp.xml b/system/settings/rbp.xml +index 7e6e52f82fde4c91fdc004c4b4b46e86091bcc87..737ec4e0c7f0feb98a6dd008b53e238c41dde8af 100644 +--- a/system/settings/rbp.xml ++++ b/system/settings/rbp.xml +@@ -43,6 +43,12 @@ + <setting id="myvideos.extractchapterthumbs"> + <default>false</default> + </setting> ++ <setting id="myvideos.extractflags"> ++ <default>false</default> ++ </setting> ++ <setting id="myvideos.extractthumb"> ++ <default>false</default> ++ </setting> + </group> + </category> + </section> + +From e2a04cad01c0fe85bec84480d05a58fe55f84bb2 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Thu, 27 Nov 2014 16:31:56 +0000 +Subject: [PATCH 10/67] [languageinvoker] Reduce priority of python threads + +--- + xbmc/interfaces/generic/LanguageInvokerThread.cpp | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/xbmc/interfaces/generic/LanguageInvokerThread.cpp b/xbmc/interfaces/generic/LanguageInvokerThread.cpp +index fcdd0633f30cd9595ae6cc4ed293677cdcb1f422..16f0c8916b5e0a9e90973d194cf2ebd12b5a81fd 100644 +--- a/xbmc/interfaces/generic/LanguageInvokerThread.cpp ++++ b/xbmc/interfaces/generic/LanguageInvokerThread.cpp +@@ -50,6 +50,11 @@ bool CLanguageInvokerThread::execute(const std::string &script, const std::vecto + m_args = arguments; + + Create(); ++ #ifdef TARGET_RASPBERRY_PI ++ /* low prio */ ++ SetPriority(GetPriority()-1); ++ #endif ++ + return true; + } + + +From e34bc9595b6b789d3b13165d7abcec3b25c83bfd Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sat, 14 Dec 2013 16:55:05 +0000 +Subject: [PATCH 11/67] logging: Add microsecond timer to log messages + +--- + xbmc/utils/log.cpp | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +diff --git a/xbmc/utils/log.cpp b/xbmc/utils/log.cpp +index 3443f1293d86018830269ed992c90a4e69c0430c..d330320842243df6f5ff256e608dddfa946e8773 100644 +--- a/xbmc/utils/log.cpp ++++ b/xbmc/utils/log.cpp +@@ -24,6 +24,7 @@ + #include "threads/Thread.h" + #include "utils/StringUtils.h" + #include "CompileInfo.h" ++#include "utils/TimeUtils.h" + + static const char* const levelNames[] = + {"DEBUG", "INFO", "NOTICE", "WARNING", "ERROR", "SEVERE", "FATAL", "NONE"}; +@@ -198,19 +199,29 @@ void CLog::PrintDebugString(const std::string& line) + + bool CLog::WriteLogString(int logLevel, const std::string& logString) + { ++#if defined(TARGET_LINUX) ++ static const char* prefixFormat = "%02.2d:%02.2d:%02.2d %10.6f T:%" PRIu64" %7s: "; ++#else + static const char* prefixFormat = "%02.2d:%02.2d:%02.2d T:%" PRIu64" %7s: "; +- ++#endif + std::string strData(logString); + /* fixup newline alignment, number of spaces should equal prefix length */ + StringUtils::Replace(strData, "\n", "\n "); + + int hour, minute, second; + s_globals.m_platform.GetCurrentLocalTime(hour, minute, second); +- ++ ++#if defined(TARGET_LINUX) ++ float Now = CurrentHostCounter() * 1e-9; ++#endif ++ + strData = StringUtils::Format(prefixFormat, + hour, + minute, + second, ++#if defined(TARGET_LINUX) ++ Now, ++#endif + (uint64_t)CThread::GetCurrentThreadId(), + levelNames[logLevel]) + strData; + + +From 6a9154ceb989a8ca0f2c5f50c6746ade14125267 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sat, 29 Nov 2014 15:25:16 +0000 +Subject: [PATCH 12/67] [rbp] hack: wait for splash to complete before changing + hdmi mode + +--- + xbmc/windowing/egl/EGLNativeTypeRaspberryPI.cpp | 52 +++++++++++++++++++++++++ + 1 file changed, 52 insertions(+) + +diff --git a/xbmc/windowing/egl/EGLNativeTypeRaspberryPI.cpp b/xbmc/windowing/egl/EGLNativeTypeRaspberryPI.cpp +index ee297700f8583dbb15cbe53baf8c887b36bd2ea0..bbe501d40c5e101f1d0d64b8b59b1928ae12d52f 100644 +--- a/xbmc/windowing/egl/EGLNativeTypeRaspberryPI.cpp ++++ b/xbmc/windowing/egl/EGLNativeTypeRaspberryPI.cpp +@@ -32,6 +32,9 @@ + #include "guilib/StereoscopicsManager.h" + #include "rendering/RenderSystem.h" + #include <cassert> ++#ifdef TARGET_POSIX ++#include "linux/XTimeUtils.h" ++#endif + + #ifndef __VIDEOCORE4__ + #define __VIDEOCORE4__ +@@ -221,12 +224,61 @@ int CEGLNativeTypeRaspberryPI::AddUniqueResolution(RESOLUTION_INFO &res, std::ve + } + #endif + ++#include <dirent.h> ++ ++pid_t proc_find(const char* name) ++{ ++ DIR* dir; ++ struct dirent* ent; ++ char buf[512]; ++ ++ long pid; ++ char pname[100] = {0,}; ++ char state; ++ FILE *fp=NULL; ++ ++ if (!(dir = opendir("/proc"))) { ++ perror("can't open /proc"); ++ return -1; ++ } ++ ++ while((ent = readdir(dir)) != NULL) { ++ long lpid = atol(ent->d_name); ++ if(lpid < 0) ++ continue; ++ snprintf(buf, sizeof(buf), "/proc/%ld/stat", lpid); ++ fp = fopen(buf, "r"); ++ ++ if (fp) { ++ if ( (fscanf(fp, "%ld (%[^)]) %c", &pid, pname, &state)) != 3 ){ ++ printf("fscanf failed \n"); ++ fclose(fp); ++ closedir(dir); ++ return -1; ++ } ++ if (!strcmp(pname, name)) { ++ fclose(fp); ++ closedir(dir); ++ return (pid_t)lpid; ++ } ++ fclose(fp); ++ } ++ } ++ ++ closedir(dir); ++ return -1; ++} ++ ++ + bool CEGLNativeTypeRaspberryPI::SetNativeResolution(const RESOLUTION_INFO &res) + { + #if defined(TARGET_RASPBERRY_PI) + if(!m_DllBcmHost || !m_nativeWindow) + return false; + ++ while (proc_find("hello_video.bin") >= 0) ++ Sleep(100); ++ + DestroyDispmaxWindow(); + + RENDER_STEREO_MODE stereo_mode = g_graphicsContext.GetStereoMode(); + +From 6aa85041e715484b032f9e905db8c65388acfe17 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Thu, 11 Dec 2014 17:00:57 +0000 +Subject: [PATCH 13/67] Fix for UI not showing both extractflags and + extractthumb + +--- + addons/resource.language.en_gb/resources/strings.po | 10 +++++++--- + system/settings/settings.xml | 4 ++-- + 2 files changed, 9 insertions(+), 5 deletions(-) + +diff --git a/addons/resource.language.en_gb/resources/strings.po b/addons/resource.language.en_gb/resources/strings.po +index f646446b73b2e8a3a783b2e52b3257c6ad6da2bd..f1100b4238139b15799ddf1dba86265a1eaa53f3 100644 +--- a/addons/resource.language.en_gb/resources/strings.po ++++ b/addons/resource.language.en_gb/resources/strings.po +@@ -12348,7 +12348,7 @@ msgstr "" + + #: system/settings/settings.xml + msgctxt "#20433" +-msgid "Extract thumbnails and video information" ++msgid "Extract video information from files" + msgstr "" + + #: xbmc/dialogs/GUIDialogSmartPlaylistRule.cpp +@@ -16783,7 +16783,7 @@ msgstr "" + #. Description of setting with label #20433 "Extract thumbnails and video information" + #: system/settings/settings.xml + msgctxt "#36178" +-msgid "Extract thumbnails and metadata information such as codec and aspect ratio from videos." ++msgid "Extract metadata information such as codec and aspect ratio from videos." + msgstr "" + + #. Description of setting with label #20419 "Replace file names with library titles" +@@ -16795,7 +16795,7 @@ msgstr "" + #. Description of setting with label #20433 "Extract thumbnails and video information" + #: system/settings/settings.xml + msgctxt "#36180" +-msgid "Extract thumbnails and information, such as codecs and aspect ratio, to display in library mode." ++msgid "Extract thumbnails, to display in library Mode." + msgstr "" + + #: system/settings/settings.xml +@@ -19413,3 +19413,7 @@ msgstr "" + msgctxt "#39003" + msgid "Accelerate h264" + msgstr "" ++ ++msgctxt "#38190" ++msgid "Extract thumbnails from video files" ++msgstr "" +diff --git a/system/settings/settings.xml b/system/settings/settings.xml +index 2fad528a2f7ad57db8476c1879f853b8485d08e4..ca7e8892606782e54d4883c5b2f0e6686b1ae280 100644 +--- a/system/settings/settings.xml ++++ b/system/settings/settings.xml +@@ -919,8 +919,8 @@ + <default>true</default> + <control type="toggle" /> + </setting> +- <setting id="myvideos.extractthumb" type="boolean" label="20433" help="36180"> +- <level>4</level> ++ <setting id="myvideos.extractthumb" type="boolean" label="38190" help="36180"> ++ <level>1</level> + <default>true</default> + <control type="toggle" /> + </setting> + +From 2900f0dc9fa9b7271efc13dfd219ee62a8737f6c Mon Sep 17 00:00:00 2001 +From: anaconda <anaconda@menakite.eu> +Date: Thu, 11 Sep 2014 21:30:43 +0200 +Subject: [PATCH 14/67] Disable autoscrolling while on screensaver and while + opening streams. + +--- + xbmc/Application.cpp | 10 ++++++++++ + xbmc/Application.h | 2 ++ + xbmc/guilib/GUIFadeLabelControl.cpp | 4 +++- + xbmc/guilib/GUIFont.cpp | 4 ++++ + xbmc/guilib/GUILabel.cpp | 4 +++- + xbmc/guilib/GUITextBox.cpp | 3 ++- + 6 files changed, 24 insertions(+), 3 deletions(-) + +diff --git a/xbmc/Application.cpp b/xbmc/Application.cpp +index 39c5731cc13c028212c4776511ea978fa2cb6776..bf2f2d3e73cbc88ab9d89f91baa11f983f36ee10 100644 +--- a/xbmc/Application.cpp ++++ b/xbmc/Application.cpp +@@ -5198,3 +5198,13 @@ bool CApplication::NotifyActionListeners(const CAction &action) const + + return false; + } ++ ++bool CApplication::ScreenSaverDisablesAutoScrolling() ++{ ++ bool onBlackDimScreenSaver = IsInScreenSaver() && m_screenSaver && ++ (m_screenSaver->ID() == "screensaver.xbmc.builtin.black" || ++ m_screenSaver->ID() == "screensaver.xbmc.builtin.dim"); ++ bool openingStreams = m_pPlayer->IsPlaying() && g_windowManager.IsWindowActive(WINDOW_DIALOG_BUSY); ++ ++ return onBlackDimScreenSaver || openingStreams; ++} +diff --git a/xbmc/Application.h b/xbmc/Application.h +index 5d38663767a70875d9459a2f4a65979a203edc7b..1aca9fe67fea8436a15a5e2c07b6558b2bdf3ab7 100644 +--- a/xbmc/Application.h ++++ b/xbmc/Application.h +@@ -394,6 +394,8 @@ public: + */ + void UnregisterActionListener(IActionListener *listener); + ++ bool ScreenSaverDisablesAutoScrolling(); ++ + std::unique_ptr<CServiceManager> m_ServiceManager; + + /*! +diff --git a/xbmc/guilib/GUIFadeLabelControl.cpp b/xbmc/guilib/GUIFadeLabelControl.cpp +index 01826a5f7ca2ccb104f897ca0670571a9b04b83d..553a6458a71009dd592c8a843eeb3bc336864d61 100644 +--- a/xbmc/guilib/GUIFadeLabelControl.cpp ++++ b/xbmc/guilib/GUIFadeLabelControl.cpp +@@ -21,6 +21,8 @@ + #include "GUIFadeLabelControl.h" + #include "utils/Random.h" + ++#include "Application.h" ++ + CGUIFadeLabelControl::CGUIFadeLabelControl(int parentID, int controlID, float posX, float posY, float width, float height, const CLabelInfo& labelInfo, bool scrollOut, unsigned int timeToDelayAtEnd, bool resetOnLabelChange, bool randomized) + : CGUIControl(parentID, controlID, posX, posY, width, height), m_label(labelInfo), m_scrollInfo(50, labelInfo.offsetX, labelInfo.scrollSpeed) + , m_textLayout(labelInfo.font, false) +@@ -106,7 +108,7 @@ void CGUIFadeLabelControl::Process(unsigned int currentTime, CDirtyRegionList &d + m_lastLabel = m_currentLabel; + } + +- if (m_infoLabels.size() > 1 || !m_shortText) ++ if ((m_infoLabels.size() > 1 || !m_shortText) && !g_application.ScreenSaverDisablesAutoScrolling()) + { // have scrolling text + bool moveToNextLabel = false; + if (!m_scrollOut) +diff --git a/xbmc/guilib/GUIFont.cpp b/xbmc/guilib/GUIFont.cpp +index 7f1108939a63162024c7a055403a58e395f090b6..1192b74675b79d1a862de2949a60163abb916035 100644 +--- a/xbmc/guilib/GUIFont.cpp ++++ b/xbmc/guilib/GUIFont.cpp +@@ -22,6 +22,7 @@ + #include "GUIFontTTF.h" + #include "GraphicContext.h" + ++#include "Application.h" + #include "threads/SingleLock.h" + #include "utils/TimeUtils.h" + #include "utils/MathUtils.h" +@@ -128,6 +129,9 @@ bool CGUIFont::UpdateScrollInfo(const vecText &text, CScrollInfo &scrollInfo) + // If the string is smaller than the viewport, then it may be plotted even + // more times than that. + // ++ if (g_application.ScreenSaverDisablesAutoScrolling()) ++ return false; ++ + if (scrollInfo.waitTime) + { + scrollInfo.waitTime--; +diff --git a/xbmc/guilib/GUILabel.cpp b/xbmc/guilib/GUILabel.cpp +index db201317a1f8d93dcf0641a28b7688cbd1a70734..1c8c30dcb9d55b7240af93b5e46c620320ef410a 100644 +--- a/xbmc/guilib/GUILabel.cpp ++++ b/xbmc/guilib/GUILabel.cpp +@@ -21,6 +21,8 @@ + #include "GUILabel.h" + #include <limits> + ++#include "Application.h" ++ + CGUILabel::CGUILabel(float posX, float posY, float width, float height, const CLabelInfo& labelInfo, CGUILabel::OVER_FLOW overflow) + : m_label(labelInfo) + , m_textLayout(labelInfo.font, overflow == OVER_FLOW_WRAP, height) +@@ -104,7 +106,7 @@ void CGUILabel::Render() + color_t color = GetColor(); + bool renderSolid = (m_color == COLOR_DISABLED); + bool overFlows = (m_renderRect.Width() + 0.5f < m_textLayout.GetTextWidth()); // 0.5f to deal with floating point rounding issues +- if (overFlows && m_scrolling && !renderSolid) ++ if (overFlows && m_scrolling && !renderSolid && !g_application.ScreenSaverDisablesAutoScrolling()) + m_textLayout.RenderScrolling(m_renderRect.x1, m_renderRect.y1, m_label.angle, color, m_label.shadowColor, 0, m_renderRect.Width(), m_scrollInfo); + else + { +diff --git a/xbmc/guilib/GUITextBox.cpp b/xbmc/guilib/GUITextBox.cpp +index d7bc1c5ba6067af9a460589920367288c640a915..ac766293f1c47c7f145cb46f6b152144b303f15f 100644 +--- a/xbmc/guilib/GUITextBox.cpp ++++ b/xbmc/guilib/GUITextBox.cpp +@@ -24,6 +24,7 @@ + #include "utils/MathUtils.h" + #include "utils/StringUtils.h" + #include "guiinfo/GUIInfoLabels.h" ++#include "Application.h" + + #include <algorithm> + +@@ -133,7 +134,7 @@ void CGUITextBox::Process(unsigned int currentTime, CDirtyRegionList &dirtyregio + // update our auto-scrolling as necessary + if (m_autoScrollTime && m_lines.size() > m_itemsPerPage) + { +- if (!m_autoScrollCondition || m_autoScrollCondition->Get()) ++ if ((!m_autoScrollCondition || m_autoScrollCondition->Get()) && !g_application.ScreenSaverDisablesAutoScrolling()) + { + if (m_lastRenderTime) + m_autoScrollDelayTime += currentTime - m_lastRenderTime; + +From 91f06fc770b8d9dee8086ab20a7111dc75664229 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sat, 13 Dec 2014 18:35:20 +0000 +Subject: [PATCH 15/67] [demuxer] Avoid memcpy on every demuxer packet + +Avoids an unnecessary memcpy on every demuxer packet which for +high bitrate videos can be significant. +--- + xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp | 17 ++++++++++++----- + xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxPacket.h | 3 +++ + xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxUtils.cpp | 7 ++++++- + 3 files changed, 21 insertions(+), 6 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +index 0b3643c70a9f0d18ccdbb04619d90f82e3b2f232..b9131402dff3a6d538a188794096bad5784dbb63 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +@@ -851,7 +851,7 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + { + if(m_pkt.pkt.stream_index == (int)m_pFormatContext->programs[m_program]->stream_index[i]) + { +- pPacket = CDVDDemuxUtils::AllocateDemuxPacket(m_pkt.pkt.size); ++ pPacket = CDVDDemuxUtils::AllocateDemuxPacket(0); + break; + } + } +@@ -860,7 +860,7 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + bReturnEmpty = true; + } + else +- pPacket = CDVDDemuxUtils::AllocateDemuxPacket(m_pkt.pkt.size); ++ pPacket = CDVDDemuxUtils::AllocateDemuxPacket(0); + } + else + bReturnEmpty = true; +@@ -890,9 +890,13 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + // copy contents into our own packet + pPacket->iSize = m_pkt.pkt.size; + +- // maybe we can avoid a memcpy here by detecting where pkt.destruct is pointing too? + if (m_pkt.pkt.data) +- memcpy(pPacket->pData, m_pkt.pkt.data, pPacket->iSize); ++ { ++ pPacket->pData = m_pkt.pkt.data; ++ // so we can free AVPacket when DemuxPacket is freed ++ pPacket->pkt = new AVPacket(m_pkt.pkt); ++ } ++ + + pPacket->pts = ConvertTimestamp(m_pkt.pkt.pts, stream->time_base.den, stream->time_base.num); + pPacket->dts = ConvertTimestamp(m_pkt.pkt.dts, stream->time_base.den, stream->time_base.num); +@@ -946,7 +950,10 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + pPacket->iStreamId = m_pkt.pkt.stream_index; + } + m_pkt.result = -1; +- av_packet_unref(&m_pkt.pkt); ++ if (pPacket && pPacket->pkt) ++ memset(&m_pkt.pkt, 0, sizeof(AVPacket)); ++ else ++ av_packet_unref(&m_pkt.pkt); + } + } + } // end of lock scope +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxPacket.h b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxPacket.h +index 4f471188c133deb91516311f0082e8741d9dee79..22805781c4d5a957d10fdf74ffa34387f67a25e9 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxPacket.h ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxPacket.h +@@ -25,6 +25,8 @@ + #define DMX_SPECIALID_STREAMINFO -10 + #define DMX_SPECIALID_STREAMCHANGE -11 + ++struct AVPacket; ++ + typedef struct DemuxPacket + { + unsigned char* pData; // data +@@ -36,6 +38,7 @@ typedef struct DemuxPacket + double pts; // pts in DVD_TIME_BASE + double dts; // dts in DVD_TIME_BASE + double duration; // duration in DVD_TIME_BASE if available ++ AVPacket *pkt; // to allow packet to be freed + + int dispTime; + } DemuxPacket; +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxUtils.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxUtils.cpp +index df0f35bd49c65b302de4ccd110d859e8b881ea5f..b4b591ae4c4dd4fb0b36d4d00fedca966f86000f 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxUtils.cpp ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxUtils.cpp +@@ -39,7 +39,12 @@ void CDVDDemuxUtils::FreeDemuxPacket(DemuxPacket* pPacket) + if (pPacket) + { + try { +- if (pPacket->pData) _aligned_free(pPacket->pData); ++ if (pPacket->pkt) ++ { ++ av_free_packet(pPacket->pkt); ++ delete pPacket->pkt; ++ } ++ else if (pPacket->pData) _aligned_free(pPacket->pData); + delete pPacket; + } + catch(...) { + +From a1f9425d9d9417c7f83806f41b724554653a1be6 Mon Sep 17 00:00:00 2001 +From: anaconda <anaconda@menakite.eu> +Date: Wed, 25 Feb 2015 18:22:21 +0100 +Subject: [PATCH 16/67] Load OSD dialogs on startup. + +Fixes skipped frames the first time they're loaded in memory on less powered +devices, like a Raspberry Pi, when using DVDPlayer. +See http://forum.kodi.tv/showthread.php?tid=211501&pid=1938811#pid1938811 +--- + xbmc/pvr/dialogs/GUIDialogPVRChannelsOSD.cpp | 1 + + xbmc/pvr/dialogs/GUIDialogPVRGuideOSD.cpp | 1 + + xbmc/video/dialogs/GUIDialogAudioSubtitleSettings.cpp | 4 +++- + xbmc/video/dialogs/GUIDialogSubtitles.cpp | 2 +- + xbmc/video/dialogs/GUIDialogVideoOSD.cpp | 2 +- + xbmc/video/dialogs/GUIDialogVideoSettings.cpp | 4 +++- + 6 files changed, 10 insertions(+), 4 deletions(-) + +diff --git a/xbmc/pvr/dialogs/GUIDialogPVRChannelsOSD.cpp b/xbmc/pvr/dialogs/GUIDialogPVRChannelsOSD.cpp +index 1f72db5b0726434505ee7f52296b909b98a5d133..bb2dd07f8c18e6e72c31feb6273b84b599265e0e 100644 +--- a/xbmc/pvr/dialogs/GUIDialogPVRChannelsOSD.cpp ++++ b/xbmc/pvr/dialogs/GUIDialogPVRChannelsOSD.cpp +@@ -50,6 +50,7 @@ CGUIDialogPVRChannelsOSD::CGUIDialogPVRChannelsOSD() : + CGUIDialog(WINDOW_DIALOG_PVR_OSD_CHANNELS, "DialogPVRChannelsOSD.xml"), + Observer() + { ++ m_loadType = LOAD_ON_GUI_INIT; + m_vecItems = new CFileItemList; + } + +diff --git a/xbmc/pvr/dialogs/GUIDialogPVRGuideOSD.cpp b/xbmc/pvr/dialogs/GUIDialogPVRGuideOSD.cpp +index 8b472435e26e455249637faf5120055b415fc49e..be1f64d552161f8a86a5c5d89c1bc23328574fb6 100644 +--- a/xbmc/pvr/dialogs/GUIDialogPVRGuideOSD.cpp ++++ b/xbmc/pvr/dialogs/GUIDialogPVRGuideOSD.cpp +@@ -36,6 +36,7 @@ using namespace PVR; + CGUIDialogPVRGuideOSD::CGUIDialogPVRGuideOSD() + : CGUIDialog(WINDOW_DIALOG_PVR_OSD_GUIDE, "DialogPVRGuideOSD.xml") + { ++ m_loadType = LOAD_ON_GUI_INIT; + m_vecItems = new CFileItemList; + } + +diff --git a/xbmc/video/dialogs/GUIDialogAudioSubtitleSettings.cpp b/xbmc/video/dialogs/GUIDialogAudioSubtitleSettings.cpp +index eb67552344f59b8857b16c882c29e3fa62bed75c..f31572b34d376e70a35003a8c2e175b45daf8070 100644 +--- a/xbmc/video/dialogs/GUIDialogAudioSubtitleSettings.cpp ++++ b/xbmc/video/dialogs/GUIDialogAudioSubtitleSettings.cpp +@@ -68,7 +68,9 @@ CGUIDialogAudioSubtitleSettings::CGUIDialogAudioSubtitleSettings() + : CGUIDialogSettingsManualBase(WINDOW_DIALOG_AUDIO_OSD_SETTINGS, "DialogSettings.xml"), + m_passthrough(false), + m_dspEnabled(false) +-{ } ++{ ++ m_loadType = LOAD_ON_GUI_INIT; ++} + + CGUIDialogAudioSubtitleSettings::~CGUIDialogAudioSubtitleSettings() + { } +diff --git a/xbmc/video/dialogs/GUIDialogSubtitles.cpp b/xbmc/video/dialogs/GUIDialogSubtitles.cpp +index 398558e4d5d0cae30ee1c73e2b70e3b2f787e8fc..4e8a9b1e307a89d3a7b68402e2ff11b57e7dccd4 100644 +--- a/xbmc/video/dialogs/GUIDialogSubtitles.cpp ++++ b/xbmc/video/dialogs/GUIDialogSubtitles.cpp +@@ -103,7 +103,7 @@ CGUIDialogSubtitles::CGUIDialogSubtitles(void) + , m_pausedOnRun(false) + , m_updateSubsList(false) + { +- m_loadType = KEEP_IN_MEMORY; ++ m_loadType = LOAD_ON_GUI_INIT; + } + + CGUIDialogSubtitles::~CGUIDialogSubtitles(void) +diff --git a/xbmc/video/dialogs/GUIDialogVideoOSD.cpp b/xbmc/video/dialogs/GUIDialogVideoOSD.cpp +index e498e1fd476d9ab5300bb00bc39946a22cfd93cb..a6648d016b07e2eb3e52f8d927697cc53a42fd7b 100644 +--- a/xbmc/video/dialogs/GUIDialogVideoOSD.cpp ++++ b/xbmc/video/dialogs/GUIDialogVideoOSD.cpp +@@ -30,7 +30,7 @@ using namespace PVR; + CGUIDialogVideoOSD::CGUIDialogVideoOSD(void) + : CGUIDialog(WINDOW_DIALOG_VIDEO_OSD, "VideoOSD.xml") + { +- m_loadType = KEEP_IN_MEMORY; ++ m_loadType = LOAD_ON_GUI_INIT; + } + + CGUIDialogVideoOSD::~CGUIDialogVideoOSD(void) +diff --git a/xbmc/video/dialogs/GUIDialogVideoSettings.cpp b/xbmc/video/dialogs/GUIDialogVideoSettings.cpp +index afbe2032b9b2235cd524263d8a730eb3402eb07f..89f685e5dc791a64dd74fa25356d62bbb74f5b58 100644 +--- a/xbmc/video/dialogs/GUIDialogVideoSettings.cpp ++++ b/xbmc/video/dialogs/GUIDialogVideoSettings.cpp +@@ -66,7 +66,9 @@ + CGUIDialogVideoSettings::CGUIDialogVideoSettings() + : CGUIDialogSettingsManualBase(WINDOW_DIALOG_VIDEO_OSD_SETTINGS, "DialogSettings.xml"), + m_viewModeChanged(false) +-{ } ++{ ++ m_loadType = LOAD_ON_GUI_INIT; ++} + + CGUIDialogVideoSettings::~CGUIDialogVideoSettings() + { } + +From be39b1d7f8f1c217bb78888b18f2a27acc793031 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Tue, 14 Apr 2015 20:51:14 +0100 +Subject: [PATCH 17/67] [gui] Also limit GUI updates when in non full-screen + video mode + +--- + xbmc/Application.cpp | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/xbmc/Application.cpp b/xbmc/Application.cpp +index bf2f2d3e73cbc88ab9d89f91baa11f983f36ee10..3ecce5b0ac4c1b9d3c4fc0dd759b31f1600ac7fa 100644 +--- a/xbmc/Application.cpp ++++ b/xbmc/Application.cpp +@@ -2707,7 +2707,7 @@ void CApplication::FrameMove(bool processEvents, bool processGUI) + #if defined(TARGET_RASPBERRY_PI) || defined(HAS_IMXVPU) + // This code reduces rendering fps of the GUI layer when playing videos in fullscreen mode + // it makes only sense on architectures with multiple layers +- if (g_graphicsContext.IsFullScreenVideo() && !m_pPlayer->IsPausedPlayback() && m_pPlayer->IsRenderingVideoLayer()) ++ if (m_pPlayer->IsPlayingVideo() && !m_pPlayer->IsPausedPlayback() && m_pPlayer->IsRenderingVideoLayer()) + fps = CSettings::GetInstance().GetInt(CSettings::SETTING_VIDEOPLAYER_LIMITGUIUPDATE); + #endif + +@@ -2720,6 +2720,8 @@ void CApplication::FrameMove(bool processEvents, bool processGUI) + { + if (!m_skipGuiRender) + g_windowManager.Process(CTimeUtils::GetFrameTime()); ++ else if (!g_graphicsContext.IsFullScreenVideo()) ++ g_windowManager.FrameMove(); + } + g_windowManager.FrameMove(); + } + +From 3dea2824fdcfe2448b5b6fd348569c34c5c12f84 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Tue, 21 Apr 2015 14:32:07 +0100 +Subject: [PATCH 18/67] [mmalrenderer] Add sharpness control + +--- + addons/resource.language.en_gb/resources/strings.po | 2 +- + .../VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp | 13 ++++++++++++- + .../VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h | 1 + + 3 files changed, 14 insertions(+), 2 deletions(-) + +diff --git a/addons/resource.language.en_gb/resources/strings.po b/addons/resource.language.en_gb/resources/strings.po +index f1100b4238139b15799ddf1dba86265a1eaa53f3..085e2a195d2e52ce6bea3ed791bf817f5be23b15 100644 +--- a/addons/resource.language.en_gb/resources/strings.po ++++ b/addons/resource.language.en_gb/resources/strings.po +@@ -8631,7 +8631,7 @@ msgstr "" + + #: xbmc/video/dialogs/GUIDialogVideoSettings.cpp + msgctxt "#16313" +-msgid "VDPAU - Sharpness" ++msgid "Sharpness" + msgstr "" + + #: xbmc/video/dialogs/GUIDialogVideoSettings.cpp +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +index cd573128fdc7e24b5ecf19730b40ef35d1c67a14..d65857779628debfc85b47b8dd283513edb5a319 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +@@ -237,6 +237,7 @@ CMMALRenderer::CMMALRenderer() : CThread("MMALRenderer") + m_inflight = 0; + m_queue = nullptr; + m_error = 0.0; ++ m_sharpness = -2.0f; + } + + CMMALRenderer::~CMMALRenderer() +@@ -419,6 +420,15 @@ void CMMALRenderer::RenderUpdate(bool clear, DWORD flags, DWORD alpha) + + ManageRenderArea(); + ++ // if sharpness setting has changed, we should update it ++ if (m_sharpness != CMediaSettings::GetInstance().GetCurrentVideoSettings().m_Sharpness) ++ { ++ m_sharpness = CMediaSettings::GetInstance().GetCurrentVideoSettings().m_Sharpness; ++ char command[80], response[80]; ++ sprintf(command, "scaling_sharpness %d", ((int)(50.0f * (m_sharpness + 1.0f) + 0.5f))); ++ vc_gencmd(response, sizeof response, command); ++ } ++ + if (m_format != RENDER_FMT_MMAL) + { + if (g_advancedSettings.CanLogComponent(LOGVIDEO)) +@@ -608,7 +618,8 @@ bool CMMALRenderer::Supports(ERENDERFEATURE feature) + feature == RENDERFEATURE_ZOOM || + feature == RENDERFEATURE_ROTATION || + feature == RENDERFEATURE_VERTICAL_SHIFT || +- feature == RENDERFEATURE_PIXEL_RATIO) ++ feature == RENDERFEATURE_PIXEL_RATIO || ++ feature == RENDERFEATURE_SHARPNESS) + return true; + + return false; +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h +index e2c0223836af4171715a3907a5f7ac2511930f5f..ae0ce625c619910530f0b62ea8921aca0a3a7f63 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.h +@@ -116,6 +116,7 @@ protected: + bool m_StereoInvert; + int m_inflight; + bool m_opaque; ++ float m_sharpness; + AVPixelFormat m_pixfmt; + + CCriticalSection m_sharedSection; + +From 121a372d0e98284ede602670609158fc26f8a5be Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Tue, 5 May 2015 23:58:06 +0100 +Subject: [PATCH 19/67] [screensaver] Leave GUI contents available for + screensaver + +--- + xbmc/guilib/GUIWindowManager.cpp | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/xbmc/guilib/GUIWindowManager.cpp b/xbmc/guilib/GUIWindowManager.cpp +index 5835280d07f049329b05494cd30744c9c1f7a258..93f646e2b28efca6a4bdebbf458127ab597024eb 100644 +--- a/xbmc/guilib/GUIWindowManager.cpp ++++ b/xbmc/guilib/GUIWindowManager.cpp +@@ -789,7 +789,16 @@ void CGUIWindowManager::ActivateWindow_Internal(int iWindowID, const std::vector + int currentWindow = GetActiveWindow(); + CGUIWindow *pWindow = GetWindow(currentWindow); + if (pWindow) +- CloseWindowSync(pWindow, iWindowID); ++ { ++ if (iWindowID == WINDOW_SCREENSAVER) ++ { ++ pWindow->Close(true, iWindowID); ++ } ++ else ++ { ++ CloseWindowSync(pWindow, iWindowID); ++ } ++ } + g_infoManager.SetNextWindow(WINDOW_INVALID); + + // Add window to the history list (we must do this before we activate it, + +From d0dac94c4e36e2c8d60311137194573b49ca3c9a Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sat, 6 Jun 2015 18:43:57 +0100 +Subject: [PATCH 20/67] ffmpeg: Automatic switch to software decode for GMC + with more than one warp point + +--- + ...Signal-unsupported-GMC-with-more-than-one.patch | 48 ++++++++++++++++++++++ + tools/depends/target/ffmpeg/Makefile | 4 +- + tools/depends/target/ffmpeg/autobuild.sh | 2 + + .../VideoPlayer/DVDCodecs/Video/MMALCodec.cpp | 6 +++ + xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemux.h | 2 + + .../VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp | 2 +- + xbmc/cores/VideoPlayer/DVDStreamInfo.cpp | 3 ++ + xbmc/cores/VideoPlayer/DVDStreamInfo.h | 1 + + xbmc/cores/omxplayer/OMXHelper.cpp | 8 +++- + 9 files changed, 73 insertions(+), 3 deletions(-) + create mode 100644 tools/depends/target/ffmpeg/0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch + +diff --git a/tools/depends/target/ffmpeg/0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch b/tools/depends/target/ffmpeg/0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..4cb8dd8fc466220e5d2539120de79ab123e65713 +--- /dev/null ++++ b/tools/depends/target/ffmpeg/0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch +@@ -0,0 +1,48 @@ ++From 84e9a1784bbd3182b68cefa5e5feae8da8b9e184 Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Fri, 5 Jun 2015 22:48:33 +0100 ++Subject: [PATCH] mpeg4video: Signal unsupported GMC with more than one warp ++ point ++ ++--- ++ libavcodec/avcodec.h | 1 + ++ libavcodec/mpeg4videodec.c | 4 ++++ ++ 2 files changed, 5 insertions(+) ++ ++diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h ++index 8c7c420..e63dc2d 100644 ++--- a/libavcodec/avcodec.h +++++ b/libavcodec/avcodec.h ++@@ -2527,6 +2527,7 @@ typedef struct AVCodecContext { ++ #define FF_BUG_DC_CLIP 4096 ++ #define FF_BUG_MS 8192 ///< Work around various bugs in Microsoft's broken decoders. ++ #define FF_BUG_TRUNCATED 16384 +++#define FF_BUG_GMC_UNSUPPORTED 32768 ++ ++ /** ++ * strictly follow the standard (MPEG4, ...). ++diff --git a/libavcodec/mpeg4videodec.c b/libavcodec/mpeg4videodec.c ++index 9bf33dd..0b5d3b9 100644 ++--- a/libavcodec/mpeg4videodec.c +++++ b/libavcodec/mpeg4videodec.c ++@@ -2179,6 +2179,9 @@ int ff_mpeg4_workaround_bugs(AVCodecContext *avctx) ++ ++ if (ctx->divx_version >= 0) ++ s->workaround_bugs |= FF_BUG_HPEL_CHROMA; +++ +++ if (ctx->num_sprite_warping_points > 1) +++ s->workaround_bugs |= FF_BUG_GMC_UNSUPPORTED; ++ } ++ ++ if (s->workaround_bugs & FF_BUG_STD_QPEL) { ++@@ -2203,6 +2206,7 @@ int ff_mpeg4_workaround_bugs(AVCodecContext *avctx) ++ s->workaround_bugs, ctx->lavc_build, ctx->xvid_build, ++ ctx->divx_version, ctx->divx_build, s->divx_packed ? "p" : ""); ++ +++ avctx->workaround_bugs = s->workaround_bugs; ++ if (CONFIG_MPEG4_DECODER && ctx->xvid_build >= 0 && ++ s->codec_id == AV_CODEC_ID_MPEG4 && ++ avctx->idct_algo == FF_IDCT_AUTO) { ++-- ++1.9.1 ++ +diff --git a/tools/depends/target/ffmpeg/Makefile b/tools/depends/target/ffmpeg/Makefile +index c3998be2f3a5f1dbde2498be624fa8b48de7339f..dffe2da1dfd09e06c5f15c362f7cbe3cf2a26f75 100644 +--- a/tools/depends/target/ffmpeg/Makefile ++++ b/tools/depends/target/ffmpeg/Makefile +@@ -1,6 +1,7 @@ + include ../../Makefile.include + include FFMPEG-VERSION +-DEPS= ../../Makefile.include FFMPEG-VERSION Makefile ++DEPS= ../../Makefile.include FFMPEG-VERSION Makefile \ ++ 0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch + + # set to "yes" to enable patching + # we don't apply patches until we move to a vanilla ffmpeg tarball +@@ -72,6 +73,7 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + rm -rf $(PLATFORM); mkdir -p $(PLATFORM) + cd $(PLATFORM); $(ARCHIVE_TOOL) $(ARCHIVE_TOOL_FLAGS) $(TARBALLS_LOCATION)/$(ARCHIVE) + cd $(PLATFORM); sed -i".bak" -e "s%pkg_config_default=pkg-config%export PKG_CONFIG_LIBDIR=$(PREFIX)/lib/pkgconfig \&\& pkg_config_default=$(NATIVEPREFIX)/bin/pkg-config%" configure ++ cd $(PLATFORM); patch -p1 < ../0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch + cd $(PLATFORM);\ + CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)" \ + ./configure $(ffmpg_config) +diff --git a/tools/depends/target/ffmpeg/autobuild.sh b/tools/depends/target/ffmpeg/autobuild.sh +index 6bbebfca1c7189fec6650932d7292f17af60db62..9c26b239c2b2c1221bed7c4d99c46e909a4a5c5d 100755 +--- a/tools/depends/target/ffmpeg/autobuild.sh ++++ b/tools/depends/target/ffmpeg/autobuild.sh +@@ -127,6 +127,8 @@ mkdir -p "ffmpeg-${VERSION}" + cd "ffmpeg-${VERSION}" || exit 2 + tar --strip-components=1 -xf $MYDIR/${ARCHIVE} + ++patch -p1 < ../../0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch ++ + CFLAGS="$CFLAGS" CXXFLAGS="$CXXFLAGS" LDFLAGS="$LDFLAGS" \ + ./configure --prefix=$FFMPEG_PREFIX \ + --extra-version="kodi-${VERSION}" \ +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +index 51ded6b236418b7ff31b15b59e5da1b196f31fc2..c0e553ca060749edff28bcbb880ed3e149b9f751 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +@@ -47,6 +47,10 @@ + + #include "linux/RBP.h" + ++#ifndef FF_BUG_GMC_UNSUPPORTED ++#define FF_BUG_GMC_UNSUPPORTED 0 ++#endif ++ + using namespace KODI::MESSAGING; + + #define CLASSNAME "CMMALVideoBuffer" +@@ -540,6 +544,8 @@ bool CMMALVideo::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options) + // we always qualify even if DVDFactoryCodec does this too. + if (!CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEMMAL) || hints.software) + return false; ++ if (hints.workaround_bugs & FF_BUG_GMC_UNSUPPORTED) ++ return false; + + m_processInfo.SetVideoDeintMethod("none"); + +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemux.h b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemux.h +index 23cd50ce4643d32fc8f97bc612e9e911169f32d1..86ac5175b0ff1481571beaf0617471e122ee05a1 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemux.h ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemux.h +@@ -157,6 +157,7 @@ public: + type = STREAM_VIDEO; + iOrientation = 0; + iBitsPerPixel = 0; ++ workaround_bugs = 0; + } + + virtual ~CDemuxStreamVideo() {} +@@ -171,6 +172,7 @@ public: + int iOrientation; // orientation of the video in degress counter clockwise + int iBitsPerPixel; + std::string stereo_mode; // expected stereo mode ++ int workaround_bugs; // info for decoder + }; + + class CDemuxStreamAudio : public CDemuxStream +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +index b9131402dff3a6d538a188794096bad5784dbb63..84310bbda6440dd10f9aa0711859f4dc0bb1fd1a 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +@@ -1310,7 +1310,7 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + if (!stereoMode.empty()) + st->stereo_mode = stereoMode; + +- ++ st->workaround_bugs = pStream->codec->workaround_bugs; + if ( m_pInput->IsStreamType(DVDSTREAM_TYPE_DVD) ) + { + if (pStream->codec->codec_id == AV_CODEC_ID_PROBE) +diff --git a/xbmc/cores/VideoPlayer/DVDStreamInfo.cpp b/xbmc/cores/VideoPlayer/DVDStreamInfo.cpp +index e59c84c32ff6f108b52955523321f37bd3885986..28dbdd344473338762927f5f2d01425243187a7c 100644 +--- a/xbmc/cores/VideoPlayer/DVDStreamInfo.cpp ++++ b/xbmc/cores/VideoPlayer/DVDStreamInfo.cpp +@@ -74,6 +74,7 @@ void CDVDStreamInfo::Clear() + channellayout = 0; + + orientation = 0; ++ workaround_bugs = 0; + } + + bool CDVDStreamInfo::Equal(const CDVDStreamInfo& right, bool withextradata) +@@ -175,6 +176,7 @@ void CDVDStreamInfo::Assign(const CDVDStreamInfo& right, bool withextradata) + vfr = right.vfr; + software = right.software; + stereo_mode = right.stereo_mode; ++ workaround_bugs = right.workaround_bugs; + + // AUDIO + channels = right.channels; +@@ -233,6 +235,7 @@ void CDVDStreamInfo::Assign(const CDemuxStream& right, bool withextradata) + orientation = stream->iOrientation; + bitsperpixel = stream->iBitsPerPixel; + stereo_mode = stream->stereo_mode; ++ workaround_bugs = stream->workaround_bugs; + } + else if( right.type == STREAM_SUBTITLE ) + { +diff --git a/xbmc/cores/VideoPlayer/DVDStreamInfo.h b/xbmc/cores/VideoPlayer/DVDStreamInfo.h +index f14170850673ebf746df0acf8f5cf5977feae684..85e402bb4e1ddd61bdb657802cc7347c95b9a302 100644 +--- a/xbmc/cores/VideoPlayer/DVDStreamInfo.h ++++ b/xbmc/cores/VideoPlayer/DVDStreamInfo.h +@@ -73,6 +73,7 @@ public: + int orientation; // orientation of the video in degress counter clockwise + int bitsperpixel; + std::string stereo_mode; // stereoscopic 3d mode ++ int workaround_bugs; // info for decoder + + // AUDIO + int channels; +diff --git a/xbmc/cores/omxplayer/OMXHelper.cpp b/xbmc/cores/omxplayer/OMXHelper.cpp +index b5db1c4ec03e4b5809a14c541329ee11aa7df04f..344f393cfa2230b21a8dba42ef3cf79ce428dac2 100644 +--- a/xbmc/cores/omxplayer/OMXHelper.cpp ++++ b/xbmc/cores/omxplayer/OMXHelper.cpp +@@ -30,6 +30,10 @@ + #include "cores/omxplayer/OMXPlayerVideo.h" + #include "threads/SystemClock.h" + ++#ifndef FF_BUG_GMC_UNSUPPORTED ++#define FF_BUG_GMC_UNSUPPORTED 0 ++#endif ++ + #define PREDICATE_RETURN(lh, rh) \ + do { \ + if((lh) != (rh)) \ +@@ -81,7 +85,9 @@ bool OMXPlayerUnsuitable(bool m_HasVideo, bool m_HasAudio, CDVDDemux* m_pDemuxer + CDVDStreamInfo hint(*stream, true); + + bool supported = false; +- if ((hint.codec == AV_CODEC_ID_MPEG1VIDEO || hint.codec == AV_CODEC_ID_MPEG2VIDEO) && g_RBP.GetCodecMpg2()) ++ if (hint.workaround_bugs & FF_BUG_GMC_UNSUPPORTED) ++ ; ++ else if ((hint.codec == AV_CODEC_ID_MPEG1VIDEO || hint.codec == AV_CODEC_ID_MPEG2VIDEO) && g_RBP.GetCodecMpg2()) + supported = true; + else if ((hint.codec == AV_CODEC_ID_VC1 || hint.codec == AV_CODEC_ID_WMV3) && g_RBP.GetCodecWvc1()) + supported = true; + +From e7ca15df03877b289fcaed9838e49758982ecacf Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Thu, 10 Mar 2016 17:56:11 +0000 +Subject: [PATCH 21/67] [rbp] HW mouse pointer + +Updating the mouse point provokes a complete screen update which can make it feel laggy +and results in high cpu. + +Render the mouse with an overlay to avoid redrawing the normal gui. +--- + xbmc/guilib/GUIWindowManager.cpp | 2 + + xbmc/linux/RBP.cpp | 135 ++++++++++++++++++++++++++++++++++++++ + xbmc/linux/RBP.h | 9 +++ + xbmc/windowing/WinEventsLinux.cpp | 125 +++++++++++++++++++++++++++++++++++ + xbmc/windowing/WinEventsLinux.h | 11 ++++ + 5 files changed, 282 insertions(+) + +diff --git a/xbmc/guilib/GUIWindowManager.cpp b/xbmc/guilib/GUIWindowManager.cpp +index 93f646e2b28efca6a4bdebbf458127ab597024eb..4bedbdde8c9b226e86a0c37378597bd524dbe66e 100644 +--- a/xbmc/guilib/GUIWindowManager.cpp ++++ b/xbmc/guilib/GUIWindowManager.cpp +@@ -198,7 +198,9 @@ void CGUIWindowManager::CreateWindows() + Add(new CGUIWindowAddonBrowser); + Add(new CGUIWindowScreensaverDim); + Add(new CGUIWindowDebugInfo); ++#ifndef TARGET_RASPBERRY_PI + Add(new CGUIWindowPointer); ++#endif + Add(new CGUIDialogYesNo); + Add(new CGUIDialogProgress); + Add(new CGUIDialogExtendedProgressBar); +diff --git a/xbmc/linux/RBP.cpp b/xbmc/linux/RBP.cpp +index ddbe27061f8192b7f6c830a4c22652a731537079..fbffa3a952d920cb41412f00f59d5c1c91f98740 100644 +--- a/xbmc/linux/RBP.cpp ++++ b/xbmc/linux/RBP.cpp +@@ -28,6 +28,9 @@ + + #include "cores/omxplayer/OMXImage.h" + ++#include "guilib/GraphicContext.h" ++#include "settings/DisplaySettings.h" ++ + #include <sys/ioctl.h> + #include "rpi/rpi_user_vcsm.h" + #include "utils/TimeUtils.h" +@@ -46,6 +49,10 @@ CRBP::CRBP() + m_DllBcmHost = new DllBcmHost(); + m_OMX = new COMXCore(); + m_display = DISPMANX_NO_HANDLE; ++ m_p = NULL; ++ m_x = 0; ++ m_y = 0; ++ m_enabled = 0; + m_mb = mbox_open(); + vcsm_init(); + m_vsync_count = 0; +@@ -141,6 +148,7 @@ DISPMANX_DISPLAY_HANDLE_T CRBP::OpenDisplay(uint32_t device) + m_display = vc_dispmanx_display_open( 0 /*screen*/ ); + int s = vc_dispmanx_vsync_callback(m_display, vsync_callback_static, (void *)this); + assert(s == 0); ++ init_cursor(); + } + return m_display; + } +@@ -148,6 +156,7 @@ DISPMANX_DISPLAY_HANDLE_T CRBP::OpenDisplay(uint32_t device) + void CRBP::CloseDisplay(DISPMANX_DISPLAY_HANDLE_T display) + { + CSingleLock lock(m_critSection); ++ uninit_cursor(); + assert(display == m_display); + int s = vc_dispmanx_vsync_callback(m_display, NULL, NULL); + assert(s == 0); +@@ -266,6 +275,9 @@ void CRBP::Deinitialize() + m_omx_image_init = false; + m_initialized = false; + m_omx_initialized = false; ++ uninit_cursor(); ++ delete m_p; ++ m_p = NULL; + if (m_mb) + mbox_close(m_mb); + m_mb = 0; +@@ -338,6 +350,52 @@ unsigned mem_unlock(int file_desc, unsigned handle) + return p[5]; + } + ++unsigned int mailbox_set_cursor_info(int file_desc, int width, int height, int format, uint32_t buffer, int hotspotx, int hotspoty) ++{ ++ int i=0; ++ unsigned int p[32]; ++ p[i++] = 0; // size ++ p[i++] = 0x00000000; // process request ++ p[i++] = 0x00008010; // set cursor state ++ p[i++] = 24; // buffer size ++ p[i++] = 24; // data size ++ ++ p[i++] = width; ++ p[i++] = height; ++ p[i++] = format; ++ p[i++] = buffer; // ptr to VC memory buffer. Doesn't work in 64bit.... ++ p[i++] = hotspotx; ++ p[i++] = hotspoty; ++ ++ p[i++] = 0x00000000; // end tag ++ p[0] = i*sizeof(*p); // actual size ++ ++ mbox_property(file_desc, p); ++ return p[5]; ++ ++} ++ ++unsigned int mailbox_set_cursor_position(int file_desc, int enabled, int x, int y) ++{ ++ int i=0; ++ unsigned p[32]; ++ p[i++] = 0; // size ++ p[i++] = 0x00000000; // process request ++ p[i++] = 0x00008011; // set cursor state ++ p[i++] = 12; // buffer size ++ p[i++] = 12; // data size ++ ++ p[i++] = enabled; ++ p[i++] = x; ++ p[i++] = y; ++ ++ p[i++] = 0x00000000; // end tag ++ p[0] = i*sizeof *p; // actual size ++ ++ mbox_property(file_desc, p); ++ return p[5]; ++} ++ + CGPUMEM::CGPUMEM(unsigned int numbytes, bool cached) + { + m_numbytes = numbytes; +@@ -369,4 +427,81 @@ void CGPUMEM::Flush() + vcsm_clean_invalid( &iocache ); + } + ++#define T 0 ++#define W 0xffffffff ++#define B 0xff000000 ++ ++const static uint32_t default_cursor_pixels[] = ++{ ++ B,B,B,B,B,B,B,B,B,T,T,T,T,T,T,T, ++ B,W,W,W,W,W,W,B,T,T,T,T,T,T,T,T, ++ B,W,W,W,W,W,B,T,T,T,T,T,T,T,T,T, ++ B,W,W,W,W,B,T,T,T,T,T,T,T,T,T,T, ++ B,W,W,W,W,W,B,T,T,T,T,T,T,T,T,T, ++ B,W,W,B,W,W,W,B,T,T,T,T,T,T,T,T, ++ B,W,B,T,B,W,W,W,B,T,T,T,T,T,T,T, ++ B,B,T,T,T,B,W,W,W,B,T,T,T,T,T,T, ++ B,T,T,T,T,T,B,W,W,W,B,T,T,T,T,T, ++ T,T,T,T,T,T,T,B,W,W,W,B,T,T,T,T, ++ T,T,T,T,T,T,T,T,B,W,W,W,B,T,T,T, ++ T,T,T,T,T,T,T,T,T,B,W,W,W,B,T,T, ++ T,T,T,T,T,T,T,T,T,T,B,W,W,W,B,T, ++ T,T,T,T,T,T,T,T,T,T,T,B,W,W,W,B, ++ T,T,T,T,T,T,T,T,T,T,T,T,B,W,B,T, ++ T,T,T,T,T,T,T,T,T,T,T,T,T,B,T,T ++}; ++ ++#undef T ++#undef W ++#undef B ++ ++void CRBP::init_cursor() ++{ ++ if (!m_mb) ++ return; ++ if (!m_p) ++ m_p = new CGPUMEM(64 * 64 * 4, false); ++ if (m_p && m_p->m_arm && m_p->m_vc) ++ set_cursor(default_cursor_pixels, 16, 16, 0, 0); ++} ++ ++void CRBP::set_cursor(const void *pixels, int width, int height, int hotspot_x, int hotspot_y) ++{ ++ if (!m_mb || !m_p || !m_p->m_arm || !m_p->m_vc || !pixels || width * height > 64 * 64) ++ return; ++ memcpy(m_p->m_arm, pixels, width * height * 4); ++ unsigned int s = mailbox_set_cursor_info(m_mb, width, height, 0, m_p->m_vc, hotspot_x, hotspot_y); ++ assert(s == 0); ++} ++ ++void CRBP::update_cursor(int x, int y, bool enabled) ++{ ++ if (!m_mb || !m_p || !m_p->m_arm || !m_p->m_vc) ++ return; ++ ++ RESOLUTION res = g_graphicsContext.GetVideoResolution(); ++ CRect gui(0, 0, CDisplaySettings::GetInstance().GetResolutionInfo(res).iWidth, CDisplaySettings::GetInstance().GetResolutionInfo(res).iHeight); ++ CRect display(0, 0, CDisplaySettings::GetInstance().GetResolutionInfo(res).iScreenWidth, CDisplaySettings::GetInstance().GetResolutionInfo(res).iScreenHeight); ++ ++ int x2 = x * display.Width() / gui.Width(); ++ int y2 = y * display.Height() / gui.Height(); ++ ++ if (g_graphicsContext.GetStereoMode() == RENDER_STEREO_MODE_SPLIT_HORIZONTAL) ++ y2 *= 2; ++ else if (g_graphicsContext.GetStereoMode() == RENDER_STEREO_MODE_SPLIT_VERTICAL) ++ x2 *= 2; ++ if (m_x != x2 || m_y != y2 || m_enabled != enabled) ++ mailbox_set_cursor_position(m_mb, enabled, x2, y2); ++ m_x = x2; ++ m_y = y2; ++ m_enabled = enabled; ++} ++ ++void CRBP::uninit_cursor() ++{ ++ if (!m_mb || !m_p || !m_p->m_arm || !m_p->m_vc) ++ return; ++ mailbox_set_cursor_position(m_mb, 0, 0, 0); ++} ++ + #endif +diff --git a/xbmc/linux/RBP.h b/xbmc/linux/RBP.h +index fffa5182126159f6dfcf750b21fa0464e229e545..90b04db5405058be2ff20aeaa6af2d2ac651586f 100644 +--- a/xbmc/linux/RBP.h ++++ b/xbmc/linux/RBP.h +@@ -103,6 +103,15 @@ private: + CCriticalSection m_critSection; + + int m_mb; ++ CGPUMEM *m_p; ++ int m_x; ++ int m_y; ++ bool m_enabled; ++ public: ++ void init_cursor(); ++ void set_cursor(const void *pixels, int width, int height, int hotspot_x, int hotspot_y); ++ void update_cursor(int x, int y, bool enabled); ++ void uninit_cursor(); + }; + + extern CRBP g_RBP; +diff --git a/xbmc/windowing/WinEventsLinux.cpp b/xbmc/windowing/WinEventsLinux.cpp +index a958a23d7185a1dce59fc6c3f8854d177068ace4..70f0e4d3f2ac6c706e7c477d0a6e5ee2999dc88b 100644 +--- a/xbmc/windowing/WinEventsLinux.cpp ++++ b/xbmc/windowing/WinEventsLinux.cpp +@@ -30,11 +30,26 @@ + #include "utils/log.h" + #include "powermanagement/PowerManager.h" + ++#ifdef TARGET_RASPBERRY_PI ++#include "utils/TimeUtils.h" ++#include "guilib/Resolution.h" ++#include "addons/Skin.h" ++#include "utils/XMLUtils.h" ++#include "utils/StringUtils.h" ++#include "guilib/Texture.h" ++#include "linux/RBP.h" ++#include "input/InputManager.h" ++#endif ++ + bool CWinEventsLinux::m_initialized = false; + CLinuxInputDevices CWinEventsLinux::m_devices; + + CWinEventsLinux::CWinEventsLinux() + { ++#ifdef TARGET_RASPBERRY_PI ++ m_last_mouse_move_time = 0; ++ m_mouse_state = -1; ++#endif + } + + void CWinEventsLinux::RefreshDevices() +@@ -48,6 +63,72 @@ bool CWinEventsLinux::IsRemoteLowBattery() + return false; + } + ++#ifdef TARGET_RASPBERRY_PI ++bool CWinEventsLinux::LoadXML(const std::string strFileName) ++{ ++ RESOLUTION_INFO m_coordsRes; // resolution that the window coordinates are in. ++ // Find appropriate skin folder + resolution to load from ++ std::string strFileNameLower = strFileName; ++ StringUtils::ToLower(strFileNameLower); ++ std::string strLowerPath = g_SkinInfo->GetSkinPath(strFileNameLower, &m_coordsRes); ++ std::string strPath = g_SkinInfo->GetSkinPath(strFileName, &m_coordsRes); ++ ++ TiXmlElement* pRootElement = NULL; ++ CXBMCTinyXML xmlDoc; ++ std::string strPathLower = strPath; ++ StringUtils::ToLower(strPathLower); ++ if (!xmlDoc.LoadFile(strPath) && !xmlDoc.LoadFile(strPathLower) && !xmlDoc.LoadFile(strLowerPath)) ++ { ++ CLog::Log(LOGERROR, "unable to load:%s, Line %d\n%s", strPath.c_str(), xmlDoc.ErrorRow(), xmlDoc.ErrorDesc()); ++ return false; ++ } ++ pRootElement = (TiXmlElement*)xmlDoc.RootElement()->Clone(); ++ ++ if (!pRootElement) ++ return false; ++ ++ if (strcmpi(pRootElement->Value(), "window")) ++ { ++ CLog::Log(LOGERROR, "file : XML file doesnt contain <window>"); ++ return false; ++ } ++ ++ TiXmlElement *pChild = pRootElement->FirstChildElement(); ++ while (pChild) ++ { ++ if (strcmpi(pChild->Value(), "controls") == 0) ++ { ++ TiXmlElement *pControl = pChild->FirstChildElement(); ++ while (pControl) ++ { ++ if (strcmpi(pControl->Value(), "control") == 0) ++ { ++ std::string strStringValue; ++ if (XMLUtils::GetString(pControl, "texture", strStringValue)) ++ { ++ const char* idAttr = pControl->Attribute("id"); ++ int index = idAttr ? atoi(idAttr)-1 : -1; ++ if (index >= 0 && index < (int)(sizeof m_cursors/sizeof *m_cursors)) ++ { ++ if (m_cursors[index].m_filename.size()) ++ g_TextureManager.ReleaseTexture(m_cursors[index].m_filename, true); ++ m_cursors[index].m_filename.clear(); ++ m_cursors[index].m_texture = g_TextureManager.Load(strStringValue); ++ if (m_cursors[index].m_texture.size()) ++ m_cursors[index].m_filename = strStringValue; ++ } ++ } ++ } ++ pControl = pControl->NextSiblingElement(); ++ } ++ } ++ pChild = pChild->NextSiblingElement(); ++ } ++ delete pRootElement; ++ return true; ++} ++#endif ++ + bool CWinEventsLinux::MessagePump() + { + if (!m_initialized) +@@ -55,13 +136,50 @@ bool CWinEventsLinux::MessagePump() + m_devices.InitAvailable(); + m_checkHotplug = std::unique_ptr<CLinuxInputDevicesCheckHotplugged>(new CLinuxInputDevicesCheckHotplugged(m_devices)); + m_initialized = true; ++#ifdef TARGET_RASPBERRY_PI ++ LoadXML("Pointer.xml"); ++#endif + } + + bool ret = false; + XBMC_Event event = {0}; ++#ifdef TARGET_RASPBERRY_PI ++ bool active = CInputManager::GetInstance().IsMouseActive(); ++ int64_t Now = CurrentHostCounter(); ++ if (!active) ++ { ++ if (m_mouse_state != -1) ++ { ++ g_RBP.update_cursor(0, 0, 0); ++ m_mouse_state = -1; ++ } ++ } ++ else ++ { ++ int state = CInputManager::GetInstance().GetMouseState() - 1; ++ if (m_mouse_state != state) ++ { ++ if (state >= 0 && state < (int)(sizeof m_cursors/sizeof *m_cursors) && !m_cursors[state].m_texture.m_textures.empty()) ++ { ++ CBaseTexture *t = (m_cursors[state].m_texture.m_textures)[0]; ++ if (t) ++ g_RBP.set_cursor((const void *)t->GetPixels(), t->GetPitch()>>2, t->GetRows(), 0, 0); ++ } ++ m_mouse_state = state; ++ } ++ } ++#endif + while (1) + { + event = m_devices.ReadEvent(); ++#ifdef TARGET_RASPBERRY_PI ++ if (active && (event.type == XBMC_MOUSEMOTION || event.type == XBMC_MOUSEBUTTONDOWN || event.type == XBMC_MOUSEBUTTONUP)) ++ { ++ if (event.type == XBMC_MOUSEMOTION) ++ g_RBP.update_cursor(event.motion.x, event.motion.y, 1); ++ m_last_mouse_move_time = Now; ++ } ++#endif + if (event.type != XBMC_NOEVENT) + { + ret |= g_application.OnEvent(event); +@@ -72,6 +190,13 @@ bool CWinEventsLinux::MessagePump() + } + } + ++#ifdef TARGET_RASPBERRY_PI ++ if (active && Now - m_last_mouse_move_time > 5 * 1000000000LL) ++ { ++ g_RBP.update_cursor(0, 0, 0); ++ m_mouse_state = -1; ++ } ++#endif + return ret; + } + +diff --git a/xbmc/windowing/WinEventsLinux.h b/xbmc/windowing/WinEventsLinux.h +index 1b1d2f2e60334ed0f3a9964d106957f58e69f1b3..c82ba84625fe3556ff49764d40ceb3ec220114e1 100644 +--- a/xbmc/windowing/WinEventsLinux.h ++++ b/xbmc/windowing/WinEventsLinux.h +@@ -25,6 +25,7 @@ + #include <memory> + #include "windowing/WinEvents.h" + #include "input/linux/LinuxInputDevices.h" ++#include "guilib/TextureManager.h" + + class CWinEventsLinux : public IWinEvents + { +@@ -45,6 +46,16 @@ private: + static bool m_initialized; + static CLinuxInputDevices m_devices; + std::unique_ptr<CLinuxInputDevicesCheckHotplugged> m_checkHotplug; ++#ifdef TARGET_RASPBERRY_PI ++ bool LoadXML(const std::string strFileName); ++ int64_t m_last_mouse_move_time; ++ struct ++ { ++ std::string m_filename; ++ CTextureArray m_texture; ++ } m_cursors[4]; ++ int m_mouse_state; ++#endif + }; + + #endif + +From f5e09c6ab9f5544d67f94305998b8a3b13f27b9a Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Tue, 10 Feb 2015 16:39:12 +0000 +Subject: [PATCH 22/67] [librtmp] Update to 15-Dec-2015 from + http://stream-recorder.com/forum/customized-rtmpdump-binaries-patch-file-t16103.html + +--- + tools/depends/target/librtmp/Makefile | 5 +- + tools/depends/target/librtmp/Patch.diff | 4066 ++++++++++++++++++++++ + tools/depends/target/librtmp/UpdateToLatest.diff | 257 ++ + tools/depends/target/librtmp/libm.patch | 11 - + 4 files changed, 4326 insertions(+), 13 deletions(-) + create mode 100644 tools/depends/target/librtmp/Patch.diff + create mode 100644 tools/depends/target/librtmp/UpdateToLatest.diff + delete mode 100644 tools/depends/target/librtmp/libm.patch + +diff --git a/tools/depends/target/librtmp/Makefile b/tools/depends/target/librtmp/Makefile +index e78d375b1284957036a549a65b8493582cea82e6..03fee99576ab943c72bfb1f5c5b1ccc88450a63a 100644 +--- a/tools/depends/target/librtmp/Makefile ++++ b/tools/depends/target/librtmp/Makefile +@@ -1,5 +1,5 @@ + include ../../Makefile.include +-DEPS= ../../Makefile.include Makefile prefix.patch ++DEPS= ../../Makefile.include Makefile prefix.patch UpdateToLatest.diff Patch.diff + + # lib name, version + LIBNAME=rtmpdump +@@ -27,7 +27,8 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + rm -rf $(PLATFORM)/*; mkdir -p $(PLATFORM) + cd $(PLATFORM); $(ARCHIVE_TOOL) $(ARCHIVE_TOOL_FLAGS) $(TARBALLS_LOCATION)/$(ARCHIVE) + cd $(PLATFORM); patch -p0 < ../prefix.patch +- cd $(PLATFORM)/librtmp; patch -p0 < ../../libm.patch ++ cd $(PLATFORM); patch -p1 < ../UpdateToLatest.diff ++ cd $(PLATFORM); patch -p0 < ../Patch.diff + sed -i -e 's|CC=|#CC=|' $(PLATFORM)/librtmp/Makefile + sed -i -e 's|LD=|#LD=|' $(PLATFORM)/librtmp/Makefile + sed -i -e 's|AR=|#AR=|' $(PLATFORM)/librtmp/Makefile +diff --git a/tools/depends/target/librtmp/Patch.diff b/tools/depends/target/librtmp/Patch.diff +new file mode 100644 +index 0000000000000000000000000000000000000000..62c1e990e73f61dd205028c3acae0e57d5953f76 +--- /dev/null ++++ b/tools/depends/target/librtmp/Patch.diff +@@ -0,0 +1,4066 @@ ++diff --git Makefile Makefile ++index a1595a8..9fe7584 100644 ++--- Makefile +++++ Makefile ++@@ -32,7 +32,7 @@ BINDIR=$(DESTDIR)$(bindir) ++ SBINDIR=$(DESTDIR)$(sbindir) ++ MANDIR=$(DESTDIR)$(mandir) ++ ++-LIBS_posix= +++LIBS_posix=-lm ++ LIBS_darwin= ++ LIBS_mingw=-lws2_32 -lwinmm -lgdi32 ++ LIB_RTMP=-Llibrtmp -lrtmp ++diff --git librtmp/Makefile librtmp/Makefile ++index 2c1c790..e367535 100644 ++--- librtmp/Makefile +++++ librtmp/Makefile ++@@ -26,7 +26,7 @@ REQ_GNUTLS=gnutls,hogweed,nettle ++ REQ_OPENSSL=libssl,libcrypto ++ PUB_GNUTLS=-lgmp ++ LIBZ=-lz ++-LIBS_posix= +++LIBS_posix=-lm ++ LIBS_darwin= ++ LIBS_mingw=-lws2_32 -lwinmm -lgdi32 ++ LIB_GNUTLS=-lgnutls -lhogweed -lnettle -lgmp $(LIBZ) ++diff --git librtmp/amf.c librtmp/amf.c ++index 1c5f99f..1310cbe 100644 ++--- librtmp/amf.c +++++ librtmp/amf.c ++@@ -319,6 +319,13 @@ AMFProp_SetName(AMFObjectProperty *prop, AVal *name) ++ prop->p_name = *name; ++ } ++ +++void +++AMFProp_SetString(AMFObjectProperty *prop, AVal *str) +++{ +++ prop->p_type = AMF_STRING; +++ prop->p_vu.p_aval = *str; +++} +++ ++ AMFDataType ++ AMFProp_GetType(AMFObjectProperty *prop) ++ { ++@@ -503,6 +510,9 @@ AMF3Prop_Decode(AMFObjectProperty *prop, const char *pBuffer, int nSize, ++ return -1; ++ } ++ +++ if (*pBuffer == AMF3_NULL) +++ bDecodeName = FALSE; +++ ++ /* decode name */ ++ if (bDecodeName) ++ { ++@@ -586,7 +596,7 @@ AMF3Prop_Decode(AMFObjectProperty *prop, const char *pBuffer, int nSize, ++ } ++ case AMF3_OBJECT: ++ { ++- int nRes = AMF3_Decode(&prop->p_vu.p_object, pBuffer, nSize, TRUE); +++ int nRes = AMF3_Decode(&prop->p_vu.p_object, pBuffer, nSize, FALSE); ++ if (nRes == -1) ++ return -1; ++ nSize -= nRes; ++@@ -620,6 +630,9 @@ AMFProp_Decode(AMFObjectProperty *prop, const char *pBuffer, int nSize, ++ return -1; ++ } ++ +++ if (*pBuffer == AMF_NULL) +++ bDecodeName = FALSE; +++ ++ if (bDecodeName && nSize < 4) ++ { /* at least name (length + at least 1 byte) and 1 byte of data */ ++ RTMP_Log(RTMP_LOGDEBUG, ++@@ -649,9 +662,8 @@ AMFProp_Decode(AMFObjectProperty *prop, const char *pBuffer, int nSize, ++ return -1; ++ } ++ ++- nSize--; ++- ++ prop->p_type = *pBuffer++; +++ nSize--; ++ switch (prop->p_type) ++ { ++ case AMF_NUMBER: ++@@ -697,9 +709,13 @@ AMFProp_Decode(AMFObjectProperty *prop, const char *pBuffer, int nSize, ++ break; ++ case AMF_REFERENCE: ++ { ++- RTMP_Log(RTMP_LOGERROR, "AMF_REFERENCE not supported!"); ++- return -1; ++- break; +++ RTMP_Log(RTMP_LOGDEBUG, "AMF_REFERENCE is not fully supported!"); +++ if (nSize < 2) +++ return -1; +++ prop->p_type = AMF_NUMBER; +++ prop->p_vu.p_number = AMF_DecodeInt16(pBuffer); +++ nSize -= 2; +++ break; ++ } ++ case AMF_ECMA_ARRAY: ++ { ++@@ -731,13 +747,13 @@ AMFProp_Decode(AMFObjectProperty *prop, const char *pBuffer, int nSize, ++ } ++ case AMF_DATE: ++ { ++- RTMP_Log(RTMP_LOGDEBUG, "AMF_DATE"); ++- ++ if (nSize < 10) ++ return -1; ++ ++ prop->p_vu.p_number = AMF_DecodeNumber(pBuffer); ++ prop->p_UTCoffset = AMF_DecodeInt16(pBuffer + 8); +++ RTMP_Log(RTMP_LOGDEBUG, "AMF_DATE: %f, UTC offset: %d", prop->p_vu.p_number, +++ prop->p_UTCoffset); ++ ++ nSize -= 10; ++ break; ++@@ -809,8 +825,8 @@ AMFProp_Dump(AMFObjectProperty *prop) ++ } ++ else ++ { ++- name.av_val = "no-name."; ++- name.av_len = sizeof("no-name.") - 1; +++ name.av_val = "no-name"; +++ name.av_len = sizeof ("no-name") - 1; ++ } ++ if (name.av_len > 18) ++ name.av_len = 18; ++@@ -1021,11 +1037,18 @@ AMF3_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bAMFData) ++ obj->o_props = NULL; ++ if (bAMFData) ++ { ++- if (*pBuffer != AMF3_OBJECT) ++- RTMP_Log(RTMP_LOGERROR, ++- "AMF3 Object encapsulated in AMF stream does not start with AMF3_OBJECT!"); ++- pBuffer++; ++- nSize--; +++ // Decode only if it's an AMF3 object +++ if (*pBuffer == AMF3_OBJECT) +++ { +++ pBuffer++; +++ nSize--; +++ } +++ else +++ { +++ RTMP_Log(RTMP_LOGERROR, "AMF3 Object encapsulated in AMF stream does not start with AMF3_OBJECT!"); +++ pBuffer += nOriginalSize; +++ return nOriginalSize; +++ } ++ } ++ ++ ref = 0; ++@@ -1043,8 +1066,12 @@ AMF3_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bAMFData) ++ { ++ int32_t classRef = (ref >> 1); ++ ++- AMF3ClassDef cd = { {0, 0} ++- }; +++ AMF3ClassDef cd; +++ cd.cd_name.av_len = 0; +++ cd.cd_name.av_val = 0; +++ cd.cd_externalizable = FALSE; +++ cd.cd_dynamic = TRUE; +++ cd.cd_num = 0; ++ AMFObjectProperty prop; ++ ++ if ((classRef & 0x1) == 0) ++@@ -1061,6 +1088,7 @@ AMF3_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bAMFData) ++ cd.cd_dynamic = ((classExtRef >> 1) & 0x1) == 1; ++ ++ cdnum = classExtRef >> 2; +++ cd.cd_num = cdnum; ++ ++ /* class name */ ++ ++@@ -1070,24 +1098,25 @@ AMF3_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bAMFData) ++ ++ /*std::string str = className; */ ++ ++- RTMP_Log(RTMP_LOGDEBUG, ++- "Class name: %s, externalizable: %d, dynamic: %d, classMembers: %d", ++- cd.cd_name.av_val, cd.cd_externalizable, cd.cd_dynamic, ++- cd.cd_num); +++ RTMP_Log(RTMP_LOGDEBUG, "Class name: %.*s, externalizable: %d, dynamic: %d, classMembers: %d", +++ cd.cd_name.av_len, cd.cd_name.av_val, cd.cd_externalizable, cd.cd_dynamic, cd.cd_num); ++ ++ for (i = 0; i < cdnum; i++) ++- { ++- AVal memberName; ++- if (nSize <=0) +++ { +++ AVal memberName = {NULL, 0}; +++ if (nSize <= 0) ++ { ++ invalid: ++ RTMP_Log(RTMP_LOGDEBUG, "%s, invalid class encoding!", ++ __FUNCTION__); ++ return nOriginalSize; ++- } ++- len = AMF3ReadString(pBuffer, &memberName); ++- RTMP_Log(RTMP_LOGDEBUG, "Member: %s", memberName.av_val); ++- AMF3CD_AddProp(&cd, &memberName); +++ } +++ len = AMF3ReadString(pBuffer, &memberName); +++ if (memberName.av_val) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "Member: %.*s", memberName.av_len, memberName.av_val); +++ AMF3CD_AddProp(&cd, &memberName); +++ } ++ nSize -= len; ++ pBuffer += len; ++ } ++@@ -1118,10 +1147,10 @@ invalid: ++ else ++ { ++ int nRes, i; ++- for (i = 0; i < cd.cd_num; i++) /* non-dynamic */ ++- { ++- if (nSize <=0) ++- goto invalid; +++ for (i = 0; i < cd.cd_num; i++) /* non-dynamic */ +++ { +++ if (nSize <= 0) +++ goto invalid; ++ nRes = AMF3Prop_Decode(&prop, pBuffer, nSize, FALSE); ++ if (nRes == -1) ++ RTMP_Log(RTMP_LOGDEBUG, "%s, failed to decode AMF3 property!", ++@@ -1138,9 +1167,9 @@ invalid: ++ int len = 0; ++ ++ do ++- { ++- if (nSize <=0) ++- goto invalid; +++ { +++ if (nSize <= 0) +++ goto invalid; ++ nRes = AMF3Prop_Decode(&prop, pBuffer, nSize, TRUE); ++ AMF_AddProp(obj, &prop); ++ ++@@ -1154,7 +1183,15 @@ invalid: ++ } ++ RTMP_Log(RTMP_LOGDEBUG, "class object!"); ++ } ++- return nOriginalSize - nSize; +++ +++ /** +++ * In case of switch to AMF3 serialization consume rest of the unprocessed +++ * packet data to make sure it's not later processed as AMF0 data. +++ */ +++ if (bAMFData) +++ return nOriginalSize; +++ else +++ return nOriginalSize - nSize; ++ } ++ ++ int ++@@ -1272,7 +1309,8 @@ AMF3CD_AddProp(AMF3ClassDef *cd, AVal *prop) ++ { ++ if (!(cd->cd_num & 0x0f)) ++ cd->cd_props = realloc(cd->cd_props, (cd->cd_num + 16) * sizeof(AVal)); ++- cd->cd_props[cd->cd_num++] = *prop; +++ if (cd->cd_props) +++ cd->cd_props[cd->cd_num++] = *prop; ++ } ++ ++ AVal * ++diff --git librtmp/handshake.h librtmp/handshake.h ++index 0438486..104af28 100644 ++--- librtmp/handshake.h +++++ librtmp/handshake.h ++@@ -707,7 +707,7 @@ HandShake(RTMP * r, int FP9HandShake) ++ uint32_t uptime; ++ ++ uint8_t clientbuf[RTMP_SIG_SIZE + 4], *clientsig=clientbuf+4; ++- uint8_t serversig[RTMP_SIG_SIZE], client2[RTMP_SIG_SIZE], *reply; +++ uint8_t serversig[RTMP_SIG_SIZE], serversig1[RTMP_SIG_SIZE], client2[RTMP_SIG_SIZE], *reply; ++ uint8_t type; ++ getoff *getdh = NULL, *getdig = NULL; ++ ++@@ -760,7 +760,7 @@ HandShake(RTMP * r, int FP9HandShake) ++ #else ++ ip = (int32_t *)(clientsig+8); ++ for (i = 2; i < RTMP_SIG_SIZE/4; i++) ++- *ip++ = rand(); +++ *ip++ = ((rand() & 0xFFFF) << 16) | (rand() & 0xFFFF); ++ #endif ++ ++ /* set handshake digest */ ++@@ -825,6 +825,8 @@ HandShake(RTMP * r, int FP9HandShake) ++ ++ if (ReadN(r, (char *)serversig, RTMP_SIG_SIZE) != RTMP_SIG_SIZE) ++ return FALSE; +++ if (ReadN(r, (char *) serversig1, RTMP_SIG_SIZE) != RTMP_SIG_SIZE) +++ return FALSE; ++ ++ /* decode server response */ ++ memcpy(&uptime, serversig, 4); ++@@ -834,7 +836,7 @@ HandShake(RTMP * r, int FP9HandShake) ++ RTMP_Log(RTMP_LOGDEBUG, "%s: FMS Version : %d.%d.%d.%d", __FUNCTION__, serversig[4], ++ serversig[5], serversig[6], serversig[7]); ++ ++- if (FP9HandShake && type == 3 && !serversig[4]) +++ if (FP9HandShake && type == 3 && (!serversig[4] || !serversig1[4])) ++ FP9HandShake = FALSE; ++ ++ #ifdef _DEBUG ++@@ -914,7 +916,7 @@ HandShake(RTMP * r, int FP9HandShake) ++ #else ++ ip = (int32_t *)reply; ++ for (i = 0; i < RTMP_SIG_SIZE/4; i++) ++- *ip++ = rand(); +++ *ip++ = ((rand() & 0xFFFF) << 16) | (rand() & 0xFFFF); ++ #endif ++ /* calculate response now */ ++ signatureResp = reply+RTMP_SIG_SIZE-SHA256_DIGEST_LENGTH; ++@@ -965,16 +967,22 @@ HandShake(RTMP * r, int FP9HandShake) ++ __FUNCTION__); ++ RTMP_LogHex(RTMP_LOGDEBUG, reply, RTMP_SIG_SIZE); ++ #endif ++- if (!WriteN(r, (char *)reply, RTMP_SIG_SIZE)) ++- return FALSE; ++- ++- /* 2nd part of handshake */ ++- if (ReadN(r, (char *)serversig, RTMP_SIG_SIZE) != RTMP_SIG_SIZE) ++- return FALSE; +++ if (r->Link.CombineConnectPacket) +++ { +++ char *HandshakeResponse = malloc(RTMP_SIG_SIZE); +++ memcpy(HandshakeResponse, (char *) reply, RTMP_SIG_SIZE); +++ r->Link.HandshakeResponse.av_val = HandshakeResponse; +++ r->Link.HandshakeResponse.av_len = RTMP_SIG_SIZE; +++ } +++ else +++ { +++ if (!WriteN(r, (char *) reply, RTMP_SIG_SIZE)) +++ return FALSE; +++ } ++ ++ #ifdef _DEBUG ++ RTMP_Log(RTMP_LOGDEBUG, "%s: 2nd handshake: ", __FUNCTION__); ++- RTMP_LogHex(RTMP_LOGDEBUG, serversig, RTMP_SIG_SIZE); +++ RTMP_LogHex(RTMP_LOGDEBUG, serversig1, RTMP_SIG_SIZE); ++ #endif ++ ++ if (FP9HandShake) ++@@ -982,21 +990,21 @@ HandShake(RTMP * r, int FP9HandShake) ++ uint8_t signature[SHA256_DIGEST_LENGTH]; ++ uint8_t digest[SHA256_DIGEST_LENGTH]; ++ ++- if (serversig[4] == 0 && serversig[5] == 0 && serversig[6] == 0 ++- && serversig[7] == 0) +++ if (serversig1[4] == 0 && serversig1[5] == 0 && serversig1[6] == 0 +++ && serversig1[7] == 0) ++ { ++ RTMP_Log(RTMP_LOGDEBUG, ++ "%s: Wait, did the server just refuse signed authentication?", ++ __FUNCTION__); ++ } ++ RTMP_Log(RTMP_LOGDEBUG, "%s: Server sent signature:", __FUNCTION__); ++- RTMP_LogHex(RTMP_LOGDEBUG, &serversig[RTMP_SIG_SIZE - SHA256_DIGEST_LENGTH], +++ RTMP_LogHex(RTMP_LOGDEBUG, &serversig1[RTMP_SIG_SIZE - SHA256_DIGEST_LENGTH], ++ SHA256_DIGEST_LENGTH); ++ ++ /* verify server response */ ++ HMACsha256(&clientsig[digestPosClient], SHA256_DIGEST_LENGTH, ++ GenuineFMSKey, sizeof(GenuineFMSKey), digest); ++- HMACsha256(serversig, RTMP_SIG_SIZE - SHA256_DIGEST_LENGTH, digest, +++ HMACsha256(serversig1, RTMP_SIG_SIZE - SHA256_DIGEST_LENGTH, digest, ++ SHA256_DIGEST_LENGTH, signature); ++ ++ /* show some information */ ++@@ -1024,7 +1032,7 @@ HandShake(RTMP * r, int FP9HandShake) ++ RTMP_Log(RTMP_LOGDEBUG, "%s: Signature calculated:", __FUNCTION__); ++ RTMP_LogHex(RTMP_LOGDEBUG, signature, SHA256_DIGEST_LENGTH); ++ if (memcmp ++- (signature, &serversig[RTMP_SIG_SIZE - SHA256_DIGEST_LENGTH], +++ (signature, &serversig1[RTMP_SIG_SIZE - SHA256_DIGEST_LENGTH], ++ SHA256_DIGEST_LENGTH) != 0) ++ { ++ RTMP_Log(RTMP_LOGWARNING, "%s: Server not genuine Adobe!", __FUNCTION__); ++@@ -1057,7 +1065,7 @@ HandShake(RTMP * r, int FP9HandShake) ++ } ++ else ++ { ++- if (memcmp(serversig, clientsig, RTMP_SIG_SIZE) != 0) +++ if (memcmp(serversig1, clientsig, RTMP_SIG_SIZE) != 0) ++ { ++ RTMP_Log(RTMP_LOGWARNING, "%s: client signature does not match!", ++ __FUNCTION__); ++@@ -1099,7 +1107,7 @@ SHandShake(RTMP * r) ++ { ++ encrypted = FALSE; ++ } ++- else if (type == 6 || type == 8) +++ else if (type == 6 || type == 8 || type == 9) ++ { ++ offalg = 1; ++ encrypted = TRUE; ++@@ -1148,7 +1156,7 @@ SHandShake(RTMP * r) ++ #else ++ ip = (int32_t *)(serversig+8); ++ for (i = 2; i < RTMP_SIG_SIZE/4; i++) ++- *ip++ = rand(); +++ *ip++ = ((rand() & 0xFFFF) << 16) | (rand() & 0xFFFF); ++ #endif ++ ++ /* set handshake digest */ ++diff --git librtmp/hashswf.c librtmp/hashswf.c ++index 9f4e2c0..01b97e2 100644 ++--- librtmp/hashswf.c +++++ librtmp/hashswf.c ++@@ -70,7 +70,7 @@ extern TLS_CTX RTMP_TLS_ctx; ++ ++ #endif /* CRYPTO */ ++ ++-#define AGENT "Mozilla/5.0" +++#define AGENT "Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0" ++ ++ HTTPResult ++ HTTP_get(struct HTTP_ctx *http, const char *url, HTTP_read_callback *cb) ++@@ -116,6 +116,8 @@ HTTP_get(struct HTTP_ctx *http, const char *url, HTTP_read_callback *cb) ++ ++ host = p1 + 3; ++ path = strchr(host, '/'); +++ if (!path) +++ return HTTPRES_BAD_REQUEST; ++ hlen = path - host; ++ strncpy(hbuf, host, hlen); ++ hbuf[hlen] = '\0'; ++@@ -200,7 +202,7 @@ HTTP_get(struct HTTP_ctx *http, const char *url, HTTP_read_callback *cb) ++ } ++ ++ p1 = strchr(sb.sb_buf, ' '); ++- rc = atoi(p1 + 1); +++ rc = p1 ? atoi(p1 + 1) : 400; ++ http->status = rc; ++ ++ if (rc >= 300) ++@@ -379,13 +381,13 @@ make_unix_time(char *s) ++ if (fmt) ++ { ++ /* Day, DD-MMM-YYYY HH:MM:SS GMT */ ++- time.tm_mday = strtol(n + 1, &n, 0); +++ time.tm_mday = strtol(n + 1, &n, 10); ++ month = n + 1; ++ n = strchr(month, ' '); ++- time.tm_year = strtol(n + 1, &n, 0); ++- time.tm_hour = strtol(n + 1, &n, 0); ++- time.tm_min = strtol(n + 1, &n, 0); ++- time.tm_sec = strtol(n + 1, NULL, 0); +++ time.tm_year = strtol(n + 1, &n, 10); +++ time.tm_hour = strtol(n + 1, &n, 10); +++ time.tm_min = strtol(n + 1, &n, 10); +++ time.tm_sec = strtol(n + 1, NULL, 10); ++ } ++ else ++ { ++@@ -395,11 +397,11 @@ make_unix_time(char *s) ++ n = strchr(month, ' '); ++ while (isspace(*n)) ++ n++; ++- time.tm_mday = strtol(n, &n, 0); ++- time.tm_hour = strtol(n + 1, &n, 0); ++- time.tm_min = strtol(n + 1, &n, 0); ++- time.tm_sec = strtol(n + 1, &n, 0); ++- time.tm_year = strtol(n + 1, NULL, 0); +++ time.tm_mday = strtol(n, &n, 10); +++ time.tm_hour = strtol(n + 1, &n, 10); +++ time.tm_min = strtol(n + 1, &n, 10); +++ time.tm_sec = strtol(n + 1, &n, 10); +++ time.tm_year = strtol(n + 1, NULL, 10); ++ } ++ if (time.tm_year > 100) ++ time.tm_year -= ysub; ++@@ -528,9 +530,11 @@ RTMP_HashSWF(const char *url, unsigned int *size, unsigned char *hash, ++ ++ if (strncmp(buf, "url: ", 5)) ++ continue; ++- if (strncmp(buf + 5, url, hlen)) +++ if (strncmp(buf + 5, url, strlen(buf + 5) - 1)) ++ continue; ++ r1 = strrchr(buf, '/'); +++ if (!r1) +++ continue; ++ i = strlen(r1); ++ r1[--i] = '\0'; ++ if (strncmp(r1, file, i)) ++@@ -640,7 +644,7 @@ RTMP_HashSWF(const char *url, unsigned int *size, unsigned char *hash, ++ HMAC_finish(in.ctx, hash, hlen); ++ *size = in.size; ++ ++- fprintf(f, "date: %s\n", date); +++ fprintf(f, "date: %s\n", date[0] ? date : cctim); ++ fprintf(f, "size: %08x\n", in.size); ++ fprintf(f, "hash: "); ++ for (i = 0; i < SHA256_DIGEST_LENGTH; i++) ++diff --git librtmp/log.c librtmp/log.c ++index 1b52000..7564a15 100644 ++--- librtmp/log.c +++++ librtmp/log.c ++@@ -52,8 +52,8 @@ static void rtmp_log_default(int level, const char *format, va_list vl) ++ vsnprintf(str, MAX_PRINT_LEN-1, format, vl); ++ ++ /* Filter out 'no-name' */ ++- if ( RTMP_debuglevel<RTMP_LOGALL && strstr(str, "no-name" ) != NULL ) ++- return; +++ if (RTMP_debuglevel < RTMP_LOGDEBUG && strstr(str, "no-name") != NULL) +++ return; ++ ++ if ( !fmsg ) fmsg = stderr; ++ ++diff --git librtmp/parseurl.c librtmp/parseurl.c ++index 646c70c..a0a83e6 100644 ++--- librtmp/parseurl.c +++++ librtmp/parseurl.c ++@@ -34,6 +34,7 @@ int RTMP_ParseURL(const char *url, int *protocol, AVal *host, unsigned int *port ++ AVal *playpath, AVal *app) ++ { ++ char *p, *end, *col, *ques, *slash; +++ int doubleSlash = FALSE; ++ ++ RTMP_Log(RTMP_LOGDEBUG, "Parsing..."); ++ ++@@ -140,11 +141,19 @@ parsehost: ++ char *slash2, *slash3 = NULL, *slash4 = NULL; ++ int applen, appnamelen; ++ ++- slash2 = strchr(p, '/'); ++- if(slash2) ++- slash3 = strchr(slash2+1, '/'); ++- if(slash3) ++- slash4 = strchr(slash3+1, '/'); +++ if ((slash2 = strstr(p, "//"))) +++ { +++ doubleSlash = TRUE; +++ slash2 += 1; +++ } +++ else +++ { +++ slash2 = strchr(p, '/'); +++ if (slash2) +++ slash3 = strchr(slash2 + 1, '/'); +++ if (slash3) +++ slash4 = strchr(slash3 + 1, '/'); +++ } ++ ++ applen = end-p; /* ondemand, pass all parameters as app */ ++ appnamelen = applen; /* ondemand length */ ++@@ -168,6 +177,8 @@ parsehost: ++ applen = appnamelen; ++ } ++ +++ if (doubleSlash) +++ applen -= 1; ++ app->av_val = p; ++ app->av_len = applen; ++ RTMP_Log(RTMP_LOGDEBUG, "Parsed app : %.*s", applen, p); ++diff --git librtmp/rtmp.c librtmp/rtmp.c ++index ca7db6a..c652cff 100644 ++--- librtmp/rtmp.c +++++ librtmp/rtmp.c ++@@ -28,6 +28,7 @@ ++ #include <string.h> ++ #include <assert.h> ++ #include <time.h> +++#include <math.h> ++ ++ #include "rtmp_sys.h" ++ #include "log.h" ++@@ -68,6 +69,7 @@ TLS_CTX RTMP_TLS_ctx; ++ ++ #define RTMP_SIG_SIZE 1536 ++ #define RTMP_LARGE_HEADER_SIZE 12 +++#define HEX2BIN(a) (((a)&0x40)?((a)&0xf)+9:((a)&0xf)) ++ ++ static const int packetSize[] = { 12, 8, 4, 1 }; ++ ++@@ -108,18 +110,25 @@ typedef enum { ++ RTMPT_OPEN=0, RTMPT_SEND, RTMPT_IDLE, RTMPT_CLOSE ++ } RTMPTCmd; ++ +++static int ConnectSocket(RTMP *r); ++ static int DumpMetaData(AMFObject *obj); ++ static int HandShake(RTMP *r, int FP9HandShake); ++ static int SocksNegotiate(RTMP *r); ++ +++static int SendBytesReceived(RTMP *r); +++static int SendCommand(RTMP *r, char *method, int queue); ++ static int SendConnectPacket(RTMP *r, RTMPPacket *cp); ++ static int SendCheckBW(RTMP *r); ++ static int SendCheckBWResult(RTMP *r, double txn); ++ static int SendDeleteStream(RTMP *r, double dStreamId); ++ static int SendFCSubscribe(RTMP *r, AVal *subscribepath); +++static int SendGetStreamLength(RTMP *r); +++static int SendInvoke(RTMP *r, AVal *command, int queue); ++ static int SendPlay(RTMP *r); ++-static int SendBytesReceived(RTMP *r); ++ static int SendUsherToken(RTMP *r, AVal *usherToken); +++static void TransformRot13(AMFObject *obj, AVal *rindex, AVal *r); +++static void __TeaCrypt(uint32_t *block, uint32_t len, uint32_t *key); +++static AVal TeaEncrypt(AVal *srcData, AVal *srcKey); ++ ++ #if 0 /* unused */ ++ static int SendBGHasStream(RTMP *r, double dId, AVal *playpath); ++@@ -338,10 +347,15 @@ RTMP_Init(RTMP *r) ++ r->m_nClientBW = 2500000; ++ r->m_nClientBW2 = 2; ++ r->m_nServerBW = 2500000; ++- r->m_fAudioCodecs = 3191.0; +++ r->m_fAudioCodecs = 3575.0; ++ r->m_fVideoCodecs = 252.0; +++ r->m_fEncoding = 3.0; ++ r->Link.timeout = 30; ++ r->Link.swfAge = 30; +++ r->Link.CombineConnectPacket = TRUE; +++ r->Link.ConnectPacket = FALSE; +++ r->Link.publishId = 0; +++ r->Link.dynamicPublish = FALSE; ++ } ++ ++ void ++@@ -359,6 +373,8 @@ RTMP_GetDuration(RTMP *r) ++ int ++ RTMP_IsConnected(RTMP *r) ++ { +++ if (r->m_sb.sb_size > 0) +++ return TRUE; ++ return r->m_sb.sb_socket != -1; ++ } ++ ++@@ -445,6 +461,8 @@ RTMP_SetupStream(RTMP *r, ++ AVal *flashVer, ++ AVal *subscribepath, ++ AVal *usherToken, +++ AVal *WeebToken, +++ AVal *ccomm, ++ int dStart, ++ int dStop, int bLiveStream, long int timeout) ++ { ++@@ -467,6 +485,8 @@ RTMP_SetupStream(RTMP *r, ++ RTMP_Log(RTMP_LOGDEBUG, "subscribepath : %s", subscribepath->av_val); ++ if (usherToken && usherToken->av_val) ++ RTMP_Log(RTMP_LOGDEBUG, "NetStream.Authenticate.UsherToken : %s", usherToken->av_val); +++ if (WeebToken && WeebToken->av_val) +++ RTMP_Log(RTMP_LOGDEBUG, "WeebToken: %s", WeebToken->av_val); ++ if (flashVer && flashVer->av_val) ++ RTMP_Log(RTMP_LOGDEBUG, "flashVer : %s", flashVer->av_val); ++ if (dStart > 0) ++@@ -515,6 +535,10 @@ RTMP_SetupStream(RTMP *r, ++ r->Link.subscribepath = *subscribepath; ++ if (usherToken && usherToken->av_len) ++ r->Link.usherToken = *usherToken; +++ if (WeebToken && WeebToken->av_len) +++ r->Link.WeebToken = *WeebToken; +++ if (ccomm && ccomm->av_len) +++ r->Link.ccomm = *ccomm; ++ r->Link.seekTime = dStart; ++ r->Link.stopTime = dStop; ++ if (bLiveStream) ++@@ -572,14 +596,24 @@ static struct urlopt { ++ "Stream is live, no seeking possible" }, ++ { AVC("subscribe"), OFF(Link.subscribepath), OPT_STR, 0, ++ "Stream to subscribe to" }, ++- { AVC("jtv"), OFF(Link.usherToken), OPT_STR, 0, ++- "Justin.tv authentication token" }, ++- { AVC("token"), OFF(Link.token), OPT_STR, 0, +++ { AVC("jtv"), OFF(Link.usherToken), OPT_STR, 0, +++ "Justin.tv authentication token"}, +++ { AVC("weeb"), OFF(Link.WeebToken), OPT_STR, 0, +++ "Weeb.tv authentication token"}, +++ { AVC("token"), OFF(Link.token), OPT_STR, 0, ++ "Key for SecureToken response" }, +++ { AVC("ccommand"), OFF(Link.ccomm), OPT_STR, 0, +++ "Send custom command before play" }, ++ { AVC("swfVfy"), OFF(Link.lFlags), OPT_BOOL, RTMP_LF_SWFV, ++ "Perform SWF Verification" }, ++ { AVC("swfAge"), OFF(Link.swfAge), OPT_INT, 0, ++ "Number of days to use cached SWF hash" }, +++#ifdef CRYPTO +++ { AVC("swfsize"), OFF(Link.swfSize), OPT_INT, 0, +++ "Size of the decompressed SWF file"}, +++ { AVC("swfhash"), OFF(Link.swfHash), OPT_STR, 0, +++ "SHA256 hash of the decompressed SWF file"}, +++#endif ++ { AVC("start"), OFF(Link.seekTime), OPT_INT, 0, ++ "Stream start position in milliseconds" }, ++ { AVC("stop"), OFF(Link.stopTime), OPT_INT, 0, ++@@ -685,6 +719,9 @@ parseAMF(AMFObject *obj, AVal *av, int *depth) ++ case 'O': ++ prop.p_type = AMF_OBJECT; ++ break; +++ case 'Z': +++ prop.p_type = AMF_NULL; +++ break; ++ default: ++ return -1; ++ } ++@@ -722,7 +759,7 @@ int RTMP_SetOpt(RTMP *r, const AVal *opt, AVal *arg) ++ *aptr = *arg; } ++ break; ++ case OPT_INT: { ++- long l = strtol(arg->av_val, NULL, 0); +++ long l = strtol(arg->av_val, NULL, 10); ++ *(int *)v = l; } ++ break; ++ case OPT_BOOL: { ++@@ -767,7 +804,7 @@ int RTMP_SetupURL(RTMP *r, char *url) ++ if (!ret) ++ return ret; ++ r->Link.port = port; ++- r->Link.playpath = r->Link.playpath0; +++ r->Link.playpath = AVcopy(r->Link.playpath0); ++ ++ while (ptr) { ++ *ptr++ = '\0'; ++@@ -844,9 +881,16 @@ int RTMP_SetupURL(RTMP *r, char *url) ++ } ++ ++ #ifdef CRYPTO ++- if ((r->Link.lFlags & RTMP_LF_SWFV) && r->Link.swfUrl.av_len) ++- RTMP_HashSWF(r->Link.swfUrl.av_val, &r->Link.SWFSize, ++- (unsigned char *)r->Link.SWFHash, r->Link.swfAge); +++ RTMP_Log(RTMP_LOGDEBUG, "Khalsa: %d %d %s", r->Link.swfSize, r->Link.swfHash.av_len, r->Link.swfHash.av_val); +++ if (r->Link.swfSize && r->Link.swfHash.av_len) +++ { +++ int i, j = 0; +++ for (i = 0; i < r->Link.swfHash.av_len; i += 2) +++ r->Link.SWFHash[j++] = (HEX2BIN(r->Link.swfHash.av_val[i]) << 4) | HEX2BIN(r->Link.swfHash.av_val[i + 1]); +++ r->Link.SWFSize = (uint32_t) r->Link.swfSize; +++ } +++ else if ((r->Link.lFlags & RTMP_LF_SWFV) && r->Link.swfUrl.av_len) +++ RTMP_HashSWF(r->Link.swfUrl.av_val, &r->Link.SWFSize, (unsigned char *) r->Link.SWFHash, r->Link.swfAge); ++ #endif ++ ++ SocksSetup(r, &r->Link.sockshost); ++@@ -949,6 +993,8 @@ RTMP_Connect0(RTMP *r, struct sockaddr * service) ++ } ++ ++ setsockopt(r->m_sb.sb_socket, IPPROTO_TCP, TCP_NODELAY, (char *) &on, sizeof(on)); +++ if (r->Link.protocol & RTMP_FEATURE_HTTP) +++ setsockopt(r->m_sb.sb_socket, SOL_SOCKET, SO_KEEPALIVE, (char *) &on, sizeof (on)); ++ ++ return TRUE; ++ } ++@@ -1399,41 +1445,96 @@ ReadN(RTMP *r, char *buffer, int n) ++ ptr = buffer; ++ while (n > 0) ++ { ++- int nBytes = 0, nRead; +++ int nBytes = 0, nRead, status = 0, retries = 0; ++ if (r->Link.protocol & RTMP_FEATURE_HTTP) ++ { ++- int refill = 0; ++- while (!r->m_resplen) ++- { ++- int ret; ++- if (r->m_sb.sb_size < 13 || refill) ++- { ++- if (!r->m_unackd) ++- HTTP_Post(r, RTMPT_IDLE, "", 1); ++- if (RTMPSockBuf_Fill(&r->m_sb) < 1) ++- { ++- if (!r->m_sb.sb_timedout) ++- RTMP_Close(r); ++- return 0; ++- } ++- } ++- if ((ret = HTTP_read(r, 0)) == -1) ++- { ++- RTMP_Log(RTMP_LOGDEBUG, "%s, No valid HTTP response found", __FUNCTION__); ++- RTMP_Close(r); ++- return 0; ++- } ++- else if (ret == -2) +++ while (!r->m_resplen) +++ { +++ /* Refill if socket buffer is empty */ +++ if (!r->m_sb.sb_size) ++ { ++- refill = 1; +++ if (retries > 30) +++ { +++ RTMP_Close(r); +++ return 0; +++ } +++ +++ if (!r->m_unackd) +++ { +++ if (retries > 0) +++ { +++ HTTP_Post(r, RTMPT_IDLE, "", 1); +++ r->m_unackd = TRUE; +++ } +++ retries++; +++ +++ if (!r->m_bPlaying) +++ sleep(.25); +++ } +++ +++ RTMP_Log(RTMP_LOGDEBUG, "Trying to fill HTTP buffer, Retries: %d", retries); +++ status = RTMPSockBuf_Fill(&r->m_sb); +++ /* Reconnect socket when closed by some moronic servers after +++ * every HTTP data packet */ +++ if (status < 1) +++ { +++ /* Close connection on connection reset */ +++ if (status == -1) +++ { +++ RTMP_Close(r); +++ return 0; +++ } +++ +++ RTMP_Log(RTMP_LOGDEBUG, "Reconnecting socket, Status: %d", status); +++ if (ConnectSocket(r)) +++ { +++ HTTP_Post(r, RTMPT_IDLE, "", 1); +++ r->m_unackd = TRUE; +++ retries++; +++ } +++ else +++ { +++ RTMP_Close(r); +++ return 0; +++ } +++ } ++ } ++- else +++ +++ RTMP_Log(RTMP_LOGDEBUG, "Trying to read HTTP response, Bytes Available: %d", r->m_sb.sb_size); +++ status = HTTP_read(r, 0); +++ if (status == -1) ++ { ++- refill = 0; +++ RTMP_Log(RTMP_LOGDEBUG, "%s, No valid HTTP response found", __FUNCTION__); +++ RTMP_Close(r); +++ return 0; ++ } ++- } ++- if (r->m_resplen && !r->m_sb.sb_size) ++- RTMPSockBuf_Fill(&r->m_sb); +++ else if (status == -2) +++ { +++ if (RTMPSockBuf_Fill(&r->m_sb) < 1) +++ if (!r->m_sb.sb_timedout) +++ { +++ RTMP_Close(r); +++ return 0; +++ } +++ } +++ else if (status == -3) +++ { +++ RTMP_Close(r); +++ return 0; +++ } +++ else +++ r->m_unackd = FALSE; +++ } +++ +++ /* Refill when there is still some data to be read and socket buffer +++ * is empty */ +++ if (r->m_resplen && (!r->m_sb.sb_size)) +++ { +++ if (RTMPSockBuf_Fill(&r->m_sb) < 1) +++ if (!r->m_sb.sb_timedout) +++ RTMP_Close(r); +++ } +++ ++ avail = r->m_sb.sb_size; ++ if (avail > r->m_resplen) ++ avail = r->m_resplen; ++@@ -1460,10 +1561,11 @@ ReadN(RTMP *r, char *buffer, int n) ++ r->m_sb.sb_size -= nRead; ++ nBytes = nRead; ++ r->m_nBytesIn += nRead; ++- if (r->m_bSendCounter ++- && r->m_nBytesIn > ( r->m_nBytesInSent + r->m_nClientBW / 10)) ++- if (!SendBytesReceived(r)) ++- return FALSE; +++ if (r->m_nBytesIn > 0xF0000000) +++ r->m_nBytesIn -= 0xF0000000; +++ if (r->m_bSendCounter && (r->m_nBytesIn > (r->m_nBytesInSent + r->m_nClientBW / 10))) +++ if (!SendBytesReceived(r)) +++ return FALSE; ++ } ++ /*RTMP_Log(RTMP_LOGDEBUG, "%s: %d bytes\n", __FUNCTION__, nBytes); */ ++ #ifdef _DEBUG ++@@ -1474,7 +1576,8 @@ ReadN(RTMP *r, char *buffer, int n) ++ { ++ RTMP_Log(RTMP_LOGDEBUG, "%s, RTMP socket closed by peer", __FUNCTION__); ++ /*goto again; */ ++- RTMP_Close(r); +++ if (!r->m_sb.sb_timedout) +++ RTMP_Close(r); ++ break; ++ } ++ ++@@ -1499,6 +1602,7 @@ static int ++ WriteN(RTMP *r, const char *buffer, int n) ++ { ++ const char *ptr = buffer; +++ char *ConnectPacket = 0; ++ #ifdef CRYPTO ++ char *encrypted = 0; ++ char buf[RTMP_BUFFER_CACHE_SIZE]; ++@@ -1514,6 +1618,15 @@ WriteN(RTMP *r, const char *buffer, int n) ++ } ++ #endif ++ +++ if (r->Link.ConnectPacket) +++ { +++ char *ConnectPacket = malloc(r->Link.HandshakeResponse.av_len + n); +++ memcpy(ConnectPacket, r->Link.HandshakeResponse.av_val, r->Link.HandshakeResponse.av_len); +++ memcpy(ConnectPacket + r->Link.HandshakeResponse.av_len, ptr, n); +++ ptr = ConnectPacket; +++ n += r->Link.HandshakeResponse.av_len; +++ } +++ ++ while (n > 0) ++ { ++ int nBytes; ++@@ -1550,6 +1663,14 @@ WriteN(RTMP *r, const char *buffer, int n) ++ free(encrypted); ++ #endif ++ +++ if (r->Link.ConnectPacket) +++ { +++ if (r->Link.HandshakeResponse.av_val) +++ free(r->Link.HandshakeResponse.av_val); +++ free(ConnectPacket); +++ r->Link.ConnectPacket = FALSE; +++ } +++ ++ return n == 0; ++ } ++ ++@@ -1579,6 +1700,9 @@ SendConnectPacket(RTMP *r, RTMPPacket *cp) ++ char pbuf[4096], *pend = pbuf + sizeof(pbuf); ++ char *enc; ++ +++ if (r->Link.CombineConnectPacket) +++ r->Link.ConnectPacket = TRUE; +++ ++ if (cp) ++ return RTMP_SendPacket(r, cp, TRUE); ++ ++@@ -1627,7 +1751,7 @@ SendConnectPacket(RTMP *r, RTMPPacket *cp) ++ enc = AMF_EncodeNamedBoolean(enc, pend, &av_fpad, FALSE); ++ if (!enc) ++ return FALSE; ++- enc = AMF_EncodeNamedNumber(enc, pend, &av_capabilities, 15.0); +++ enc = AMF_EncodeNamedNumber(enc, pend, &av_capabilities, 239.0); ++ if (!enc) ++ return FALSE; ++ enc = AMF_EncodeNamedNumber(enc, pend, &av_audioCodecs, r->m_fAudioCodecs); ++@@ -1791,7 +1915,7 @@ SendUsherToken(RTMP *r, AVal *usherToken) ++ packet.m_hasAbsTimestamp = 0; ++ packet.m_body = pbuf + RTMP_MAX_HEADER_SIZE; ++ ++- RTMP_Log(RTMP_LOGDEBUG, "UsherToken: %s", usherToken->av_val); +++ RTMP_Log(RTMP_LOGDEBUG, "UsherToken: %.*s", usherToken->av_len, usherToken->av_val); ++ enc = packet.m_body; ++ enc = AMF_EncodeString(enc, pend, &av_NetStream_Authenticate_UsherToken); ++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); ++@@ -1934,6 +2058,26 @@ SendPublish(RTMP *r) ++ return RTMP_SendPacket(r, &packet, TRUE); ++ } ++ +++static int +++SendDynamicPublish(RTMP *r, double publishId) +++{ +++ char pbuf[1024], *pend = pbuf + sizeof (pbuf), *enc; +++ AVal av_command, av_publishId; +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_publish); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ av_publishId.av_val = malloc(128 * sizeof (char)); +++ av_publishId.av_len = sprintf(av_publishId.av_val, "%.0f", publishId); +++ enc = AMF_EncodeString(enc, pend, &av_publishId); +++ enc = AMF_EncodeString(enc, pend, &av_live); +++ av_command.av_val = pbuf; +++ av_command.av_len = enc - pbuf; +++ +++ return SendInvoke(r, &av_command, FALSE); +++} +++ ++ SAVC(deleteStream); ++ ++ static int ++@@ -2097,6 +2241,7 @@ SendBytesReceived(RTMP *r) ++ } ++ ++ SAVC(_checkbw); +++SAVC(checkBandwidth); ++ ++ static int ++ SendCheckBW(RTMP *r) ++@@ -2114,7 +2259,7 @@ SendCheckBW(RTMP *r) ++ packet.m_body = pbuf + RTMP_MAX_HEADER_SIZE; ++ ++ enc = packet.m_body; ++- enc = AMF_EncodeString(enc, pend, &av__checkbw); +++ enc = AMF_EncodeString(enc, pend, &av_checkBandwidth); ++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); ++ *enc++ = AMF_NULL; ++ ++@@ -2221,10 +2366,8 @@ SendPlay(RTMP *r) ++ enc = AMF_EncodeNumber(enc, pend, -1000.0); ++ else ++ { ++- if (r->Link.seekTime > 0.0) ++- enc = AMF_EncodeNumber(enc, pend, r->Link.seekTime); /* resume from here */ ++- else ++- enc = AMF_EncodeNumber(enc, pend, 0.0); /*-2000.0);*/ /* recorded as default, -2000.0 is not reliable since that freezes the player if the stream is not found */ +++ if (r->Link.seekTime > 0.0 || r->Link.stopTime) +++ enc = AMF_EncodeNumber(enc, pend, r->Link.seekTime); /* resume from here */ ++ } ++ if (!enc) ++ return FALSE; ++@@ -2340,7 +2483,7 @@ RTMP_SendCtrl(RTMP *r, short nType, unsigned int nObject, unsigned int nTime) ++ int nSize; ++ char *buf; ++ ++- RTMP_Log(RTMP_LOGDEBUG, "sending ctrl. type: 0x%04x", (unsigned short)nType); +++ RTMP_Log(RTMP_LOGDEBUG, "sending ctrl, type: 0x%04x", (unsigned short)nType); ++ ++ packet.m_nChannel = 0x02; /* control channel (ping) */ ++ packet.m_headerType = RTMP_PACKET_SIZE_MEDIUM; ++@@ -2372,8 +2515,8 @@ RTMP_SendCtrl(RTMP *r, short nType, unsigned int nObject, unsigned int nTime) ++ } ++ else if (nType == 0x1A) ++ { ++- *buf = nObject & 0xff; ++- } +++ *buf = nObject & 0xff; +++ } ++ else ++ { ++ if (nSize > 2) ++@@ -2873,6 +3016,7 @@ PublisherAuth(RTMP *r, AVal *description) ++ #endif ++ ++ +++SAVC(onBWCheck); ++ SAVC(onBWDone); ++ SAVC(onFCSubscribe); ++ SAVC(onFCUnsubscribe); ++@@ -2885,24 +3029,25 @@ SAVC(level); ++ SAVC(description); ++ SAVC(onStatus); ++ SAVC(playlist_ready); +++SAVC(cps); +++SAVC(disneyToken); +++SAVC(getStreamLength); +++SAVC(sendStatus); +++SAVC(verifyClient); ++ static const AVal av_NetStream_Failed = AVC("NetStream.Failed"); ++ static const AVal av_NetStream_Play_Failed = AVC("NetStream.Play.Failed"); ++-static const AVal av_NetStream_Play_StreamNotFound = ++-AVC("NetStream.Play.StreamNotFound"); ++-static const AVal av_NetConnection_Connect_InvalidApp = ++-AVC("NetConnection.Connect.InvalidApp"); +++static const AVal av_NetStream_Play_StreamNotFound = AVC("NetStream.Play.StreamNotFound"); +++static const AVal av_NetConnection_Connect_InvalidApp = AVC("NetConnection.Connect.InvalidApp"); ++ static const AVal av_NetStream_Play_Start = AVC("NetStream.Play.Start"); ++ static const AVal av_NetStream_Play_Complete = AVC("NetStream.Play.Complete"); ++ static const AVal av_NetStream_Play_Stop = AVC("NetStream.Play.Stop"); ++ static const AVal av_NetStream_Seek_Notify = AVC("NetStream.Seek.Notify"); ++ static const AVal av_NetStream_Pause_Notify = AVC("NetStream.Pause.Notify"); ++-static const AVal av_NetStream_Play_PublishNotify = ++-AVC("NetStream.Play.PublishNotify"); ++-static const AVal av_NetStream_Play_UnpublishNotify = ++-AVC("NetStream.Play.UnpublishNotify"); +++static const AVal av_NetStream_Play_PublishNotify = AVC("NetStream.Play.PublishNotify"); +++static const AVal av_NetStream_Play_UnpublishNotify = AVC("NetStream.Play.UnpublishNotify"); ++ static const AVal av_NetStream_Publish_Start = AVC("NetStream.Publish.Start"); ++-static const AVal av_NetConnection_Connect_Rejected = ++-AVC("NetConnection.Connect.Rejected"); +++static const AVal av_NetConnection_Connect_Rejected = AVC("NetConnection.Connect.Rejected"); +++static const AVal av_NetConnection_confStream = AVC("NetConnection.confStream"); ++ ++ /* Returns 0 for OK/Failed/error, 1 for 'Stop or Complete' */ ++ static int ++@@ -2912,6 +3057,11 @@ HandleInvoke(RTMP *r, const char *body, unsigned int nBodySize) ++ AVal method; ++ double txn; ++ int ret = 0, nRes; +++ char pbuf[512], *pend = pbuf + sizeof (pbuf), *enc, **params = NULL; +++ char *host = r->Link.hostname.av_len ? r->Link.hostname.av_val : ""; +++ char *pageUrl = r->Link.pageUrl.av_len ? r->Link.pageUrl.av_val : ""; +++ int param_count; +++ AVal av_Command, av_Response; ++ if (body[0] != 0x02) /* make sure it is a string method name we start with */ ++ { ++ RTMP_Log(RTMP_LOGWARNING, "%s, Sanity failed. no string method in invoke packet", ++@@ -2952,7 +3102,14 @@ HandleInvoke(RTMP *r, const char *body, unsigned int nBodySize) ++ RTMP_Log(RTMP_LOGDEBUG, "%s, received result for method call <%s>", __FUNCTION__, ++ methodInvoked.av_val); ++ ++- if (AVMATCH(&methodInvoked, &av_connect)) +++ if ((r->Link.dynamicPublish == TRUE) && AVMATCH(&methodInvoked, &r->Link.dynamicCommand)) +++ { +++ r->Link.dynamicPublish = FALSE; +++ r->Link.publishId = AMFProp_GetNumber(AMF_GetProp(&obj, NULL, 3)); +++ RTMP_Log(RTMP_LOGDEBUG, "server returned dynamic publish id: %.0f", r->Link.publishId); +++ RTMP_SendCreateStream(r); +++ } +++ else if (AVMATCH(&methodInvoked, &av_connect)) ++ { ++ if (r->Link.token.av_len) ++ { ++@@ -2973,46 +3130,360 @@ HandleInvoke(RTMP *r, const char *body, unsigned int nBodySize) ++ RTMP_SendServerBW(r); ++ RTMP_SendCtrl(r, 3, 0, 300); ++ } ++- RTMP_SendCreateStream(r); +++ if (r->Link.ccomm.av_len) +++ { +++ param_count = strsplit(r->Link.ccomm.av_val, FALSE, ';', ¶ms); +++ if ((param_count > 1) && (strcasecmp(params[1], "TRUE") == 0)) +++ SendCommand(r, params[0], TRUE); +++ else +++ SendCommand(r, params[0], FALSE); +++ if ((param_count > 2) && (strcasecmp(params[2], "TRUE") == 0)) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "overriding inbuilt dynamic publish command with -K (ccommand) switch"); +++ r->Link.dynamicPublish = TRUE; +++ r->Link.dynamicCommand.av_val = params[0]; +++ r->Link.dynamicCommand.av_len = strlen(params[0]); +++ } +++ else +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "overriding inbuilt site specific authentication with -K (ccommand) switch"); +++ r->Link.dynamicPublish = FALSE; +++ RTMP_SendCreateStream(r); +++ } +++ } +++ else if (strstr(host, "3dbuzz.com") || strstr(pageUrl, "3dbuzz.com")) +++ { +++ AVal r1, r3; +++ AVal av_r1 = AVC("r1"); +++ AVal av_r3 = AVC("r3"); +++ AVal r1_key = AVC("4V?c6k7Y`(6~rMjp6S6!xT04]8m$g2"); +++ AVal r3_key = AVC("aB`d^+8?9;36]Lw2#rg?PDMcX?lCw2"); +++ TransformRot13(&obj, &av_r1, &r1); +++ TransformRot13(&obj, &av_r3, &r3); +++ if (r1.av_val && r3.av_val) +++ { +++ AVal av_qq = AVC("qq"); +++ AVal av_tos = AVC("http://www.3dbuzz.com/home/tos"); +++ AVal av_warning = AVC("Stream capturing is a violation of our terms, and may result in immediate cancellation of your account without refund"); +++ AVal r1_response; +++ +++ RTMP_Log(RTMP_LOGDEBUG, "3DBuzz SecureToken r1 request - %.*s", r1.av_len, r1.av_val); +++ RTMP_Log(RTMP_LOGDEBUG, "3DBuzz SecureToken r3 request - %.*s", r3.av_len, r3.av_val); +++ DecodeTEA(&r1_key, &r1); +++ DecodeTEA(&r3_key, &r3); +++ r1_response = TeaEncrypt(&av_tos, &r1); +++ RTMP_Log(RTMP_LOGDEBUG, "3DBuzz SecureToken r1 response - %.*s", r1_response.av_len, r1_response.av_val); +++ RTMP_Log(RTMP_LOGDEBUG, "3DBuzz SecureToken r3 response - %.*s", r3.av_len, r3.av_val); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_qq); +++ enc = AMF_EncodeNumber(enc, pend, 0); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeString(enc, pend, &r3); +++ enc = AMF_EncodeString(enc, pend, &av_tos); +++ enc = AMF_EncodeString(enc, pend, &r1_response); +++ enc = AMF_EncodeString(enc, pend, &av_warning); +++ av_Command.av_val = pbuf; +++ av_Command.av_len = enc - pbuf; +++ SendInvoke(r, &av_Command, FALSE); +++ } ++ ++- if (!(r->Link.protocol & RTMP_FEATURE_WRITE)) ++- { ++- /* Authenticate on Justin.tv legacy servers before sending FCSubscribe */ ++- if (r->Link.usherToken.av_len) ++- SendUsherToken(r, &r->Link.usherToken); ++- /* Send the FCSubscribe if live stream or if subscribepath is set */ ++- if (r->Link.subscribepath.av_len) ++- SendFCSubscribe(r, &r->Link.subscribepath); ++- else if (r->Link.lFlags & RTMP_LF_LIVE) ++- SendFCSubscribe(r, &r->Link.playpath); ++- } ++- } +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "cam4")) +++ { +++ AMFObject obj2, response; +++ AMFObjectProperty p; +++ AVal Host, ID, IP, av_ChallengeResponse; +++ AVal av_receiveRTMPResponse = AVC("receiveRTMPResponse"); +++ AVal av_client = AVC("client"); +++ AVal av_result = AVC("result"); +++ char ChallengeResponse[16] = {0}; +++ SAVC(application); +++ SAVC(Host); +++ SAVC(ID); +++ SAVC(IP); +++ +++ AMFProp_GetObject(AMF_GetProp(&obj, NULL, 3), &obj2); +++ if (RTMP_FindFirstMatchingProperty(&obj2, &av_application, &p)) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "sending cam4 authentication"); +++ AMFProp_GetObject(&p, &obj2); +++ RTMP_FindFirstMatchingProperty(&obj2, &av_Host, &p); +++ AMFProp_GetString(&p, &Host); +++ RTMP_FindFirstMatchingProperty(&obj2, &av_ID, &p); +++ AMFProp_GetString(&p, &ID); +++ RTMP_FindFirstMatchingProperty(&obj2, &av_IP, &p); +++ AMFProp_GetString(&p, &IP); +++ RTMP_Log(RTMP_LOGDEBUG, "Cam4 Host: %.*s", Host.av_len, Host.av_val); +++ RTMP_Log(RTMP_LOGDEBUG, "Cam4 ID : %.*s", ID.av_len, ID.av_val); +++ RTMP_Log(RTMP_LOGDEBUG, "Cam4 IP : %.*s", IP.av_len, IP.av_val); +++ snprintf(ChallengeResponse, 15, "%d", Host.av_len + ID.av_len + IP.av_len); +++ av_ChallengeResponse.av_val = ChallengeResponse; +++ av_ChallengeResponse.av_len = strlen(av_ChallengeResponse.av_val); +++ AMFProp_SetName(&p, &av_client); +++ AMFProp_SetString(&p, &ID); +++ AMF_AddProp(&response, &p); +++ AMFProp_SetName(&p, &av_result); +++ AMFProp_SetString(&p, &av_ChallengeResponse); +++ AMF_AddProp(&response, &p); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_receiveRTMPResponse); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ enc = AMF_Encode(&response, enc, pend); +++ enc = AMF_EncodeBoolean(enc, pend, TRUE); +++ av_Response.av_val = pbuf; +++ av_Response.av_len = enc - pbuf; +++ +++ AMF_Decode(&obj, av_Response.av_val, av_Response.av_len, FALSE); +++ AMF_Dump(&obj); +++ SendInvoke(r, &av_Response, TRUE); +++ } +++ +++ RTMP_SendCreateStream(r); +++ } +++ else if ((strstr(host, "highwebmedia.com") || strstr(pageUrl, "chaturbate.com")) +++ && (!strstr(host, "origin"))) +++ { +++ AVal av_ModelName; +++ SAVC(CheckPublicStatus); +++ +++ if (strlen(pageUrl) > 7) +++ { +++ strsplit(pageUrl + 7, FALSE, '/', ¶ms); +++ av_ModelName.av_val = params[1]; +++ av_ModelName.av_len = strlen(params[1]); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_CheckPublicStatus); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeString(enc, pend, &av_ModelName); +++ av_Command.av_val = pbuf; +++ av_Command.av_len = enc - pbuf; +++ +++ SendInvoke(r, &av_Command, FALSE); +++ } +++ else +++ { +++ RTMP_Log(RTMP_LOGERROR, "you must specify the pageUrl"); +++ RTMP_Close(r); +++ } +++ } +++ else if (strstr(host, "featve.com") || strstr(pageUrl, "featve.com")) +++ { +++ AVal av_auth = AVC("yes"); +++ SAVC(youCannotPlayMe); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_youCannotPlayMe); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeString(enc, pend, &av_auth); +++ av_Command.av_val = pbuf; +++ av_Command.av_len = enc - pbuf; +++ SendInvoke(r, &av_Command, FALSE); +++ +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(host, "tv-stream.to") || strstr(pageUrl, "tv-stream.to")) +++ { +++ static char auth[] = {'h', 0xC2, 0xA7, '4', 'j', 'h', 'H', '4', '3', 'd'}; +++ AVal av_auth; +++ SAVC(requestAccess); +++ av_auth.av_val = auth; +++ av_auth.av_len = sizeof (auth); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_requestAccess); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeString(enc, pend, &av_auth); +++ av_Command.av_val = pbuf; +++ av_Command.av_len = enc - pbuf; +++ SendInvoke(r, &av_Command, FALSE); +++ +++ SendCommand(r, "getConnectionCount", FALSE); +++ SendGetStreamLength(r); +++ RTMP_SendCreateStream(r); +++ } +++ else if (r->Link.WeebToken.av_len) +++ { +++ AVal av_Token, av_Username, av_Password; +++ SAVC(determineAccess); +++ +++ param_count = strsplit(r->Link.WeebToken.av_val, FALSE, ';', ¶ms); +++ if (param_count >= 1) +++ { +++ av_Token.av_val = params[0]; +++ av_Token.av_len = strlen(params[0]); +++ } +++ if (param_count >= 2) +++ { +++ av_Username.av_val = params[1]; +++ av_Username.av_len = strlen(params[1]); +++ } +++ if (param_count >= 3) +++ { +++ av_Password.av_val = params[2]; +++ av_Password.av_len = strlen(params[2]); +++ } +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_determineAccess); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeString(enc, pend, &av_Token); +++ enc = AMF_EncodeString(enc, pend, &av_Username); +++ enc = AMF_EncodeString(enc, pend, &av_Password); +++ av_Command.av_val = pbuf; +++ av_Command.av_len = enc - pbuf; +++ +++ RTMP_Log(RTMP_LOGDEBUG, "WeebToken: %s", r->Link.WeebToken.av_val); +++ SendInvoke(r, &av_Command, FALSE); +++ } +++ else if (strstr(host, "wfctv.com") || strstr(pageUrl, "wfctv.com")) +++ { +++ AVal av_auth1 = AVC("zoivid"); +++ AVal av_auth2 = AVC("yePi4jee"); +++ SAVC(stream_login); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_stream_login); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeString(enc, pend, &av_auth1); +++ enc = AMF_EncodeString(enc, pend, &av_auth2); +++ av_Command.av_val = pbuf; +++ av_Command.av_len = enc - pbuf; +++ SendInvoke(r, &av_Command, FALSE); +++ +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(host, "pc3oot.us.to")) +++ { +++ SendCommand(r, "UIUIUINASOWAS", TRUE); +++ SendGetStreamLength(r); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(host, "streamscene.cc") || strstr(pageUrl, "streamscene.cc") +++ || strstr(host, "tsboard.tv") || strstr(pageUrl, "teamstream.in") +++ || strstr(host, "hdstreams.tv") || strstr(pageUrl, "teamstream.to") +++ || strstr(pageUrl, "istreams.to")) +++ { +++ SendCommand(r, "r", FALSE); +++ SendGetStreamLength(r); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "axcast.com")) +++ { +++ SendCommand(r, "requestData", FALSE); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "dhmediahosting.com")) +++ { +++ SendCommand(r, "netStreamEnable", FALSE); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "ezcast.tv")) +++ { +++ SendCommand(r, "iUsteJaSakamCarevataKerka", TRUE); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "janjua.tv")) +++ { +++ SendCommand(r, "soLagaDaSeStoriAga", TRUE); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "liveflash.tv")) +++ { +++ char *command = "kaskatijaEkonomista"; +++ r->Link.dynamicPublish = TRUE; +++ r->Link.dynamicCommand.av_val = command; +++ r->Link.dynamicCommand.av_len = strlen(command); +++ SendCommand(r, command, TRUE); +++ } +++ else if (strstr(pageUrl, "mips.tv") || strstr(pageUrl, "mipsplayer.com")) +++ { +++ char *command = "gaolVanusPobeleVoKosata"; +++ r->Link.dynamicPublish = TRUE; +++ r->Link.dynamicCommand.av_val = command; +++ r->Link.dynamicCommand.av_len = strlen(command); +++ SendCommand(r, command, TRUE); +++ } +++ else if (strstr(pageUrl, "streamify.tv")) +++ { +++ SendCommand(r, "keGoVidishStambolSoseBardovci", TRUE); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "ucaster.eu")) +++ { +++ SendCommand(r, "vujkoMiLazarBarakovOdMonospitovo", TRUE); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "yukons.net")) +++ { +++ SendCommand(r, "trxuwaaLahRKnaechb", TRUE); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "yycast.com")) +++ { +++ SendCommand(r, "trajkoProkopiev", TRUE); +++ RTMP_SendCreateStream(r); +++ } +++ else if (strstr(pageUrl, "zenex.tv")) +++ { +++ SendCommand(r, "goVideStambolSoseBardovci", TRUE); +++ RTMP_SendCreateStream(r); +++ } +++ else +++ RTMP_SendCreateStream(r); +++ } ++ else if (AVMATCH(&methodInvoked, &av_createStream)) ++- { ++- r->m_stream_id = (int)AMFProp_GetNumber(AMF_GetProp(&obj, NULL, 3)); +++ { +++ r->m_stream_id = (int) AMFProp_GetNumber(AMF_GetProp(&obj, NULL, 3)); ++ ++- if (r->Link.protocol & RTMP_FEATURE_WRITE) ++- { ++- SendPublish(r); ++- } ++- else ++- { ++- if (r->Link.lFlags & RTMP_LF_PLST) ++- SendPlaylist(r); ++- SendPlay(r); ++- RTMP_SendCtrl(r, 3, r->m_stream_id, r->m_nBufferMS); ++- } ++- } +++ if (!(r->Link.protocol & RTMP_FEATURE_WRITE)) +++ { +++ /* Authenticate on Justin.tv legacy servers before sending FCSubscribe */ +++ if (r->Link.usherToken.av_len) +++ SendUsherToken(r, &r->Link.usherToken); +++ if (r->Link.publishId > 0) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "sending dynamic publish id: %.0f", r->Link.publishId); +++ SendDynamicPublish(r, r->Link.publishId); +++ } +++ /* Send the FCSubscribe if live stream or if subscribepath is set */ +++ if (r->Link.subscribepath.av_len) +++ SendFCSubscribe(r, &r->Link.subscribepath); +++ else if ((r->Link.lFlags & RTMP_LF_LIVE) && (!r->Link.WeebToken.av_len)) +++ SendFCSubscribe(r, &r->Link.playpath); +++ } +++ +++ if (r->Link.protocol & RTMP_FEATURE_WRITE) +++ { +++ SendPublish(r); +++ } +++ else +++ { +++ if (r->Link.lFlags & RTMP_LF_PLST) +++ SendPlaylist(r); +++ SendPlay(r); +++ RTMP_SendCtrl(r, 3, r->m_stream_id, r->m_nBufferMS); +++ } +++ } ++ else if (AVMATCH(&methodInvoked, &av_play) || ++- AVMATCH(&methodInvoked, &av_publish)) ++- { ++- r->m_bPlaying = TRUE; ++- } +++ AVMATCH(&methodInvoked, &av_publish)) +++ { +++ r->m_bPlaying = TRUE; +++ } ++ free(methodInvoked.av_val); ++ } ++ else if (AVMATCH(&method, &av_onBWDone)) ++ { ++- if (!r->m_nBWCheckCounter) +++ if (!r->m_nBWCheckCounter) ++ SendCheckBW(r); ++ } ++ else if (AVMATCH(&method, &av_onFCSubscribe)) ++@@ -3036,21 +3507,22 @@ HandleInvoke(RTMP *r, const char *body, unsigned int nBodySize) ++ { ++ int i; ++ for (i = 0; i < r->m_numCalls; i++) ++- if (AVMATCH(&r->m_methodCalls[i].name, &av__checkbw)) ++- { ++- AV_erase(r->m_methodCalls, &r->m_numCalls, i, TRUE); ++- break; ++- } +++ if (AVMATCH(&r->m_methodCalls[i].name, &av__checkbw)) +++ { +++ AV_erase(r->m_methodCalls, &r->m_numCalls, i, TRUE); +++ break; +++ } ++ } ++ else if (AVMATCH(&method, &av__error)) ++ { +++ int handled = FALSE; ++ #ifdef CRYPTO ++ AVal methodInvoked = {0}; ++ int i; ++ ++ if (r->Link.protocol & RTMP_FEATURE_WRITE) ++ { ++- for (i=0; i<r->m_numCalls; i++) +++ for (i = 0; i < r->m_numCalls; i++) ++ { ++ if (r->m_methodCalls[i].num == txn) ++ { ++@@ -3062,12 +3534,12 @@ HandleInvoke(RTMP *r, const char *body, unsigned int nBodySize) ++ if (!methodInvoked.av_val) ++ { ++ RTMP_Log(RTMP_LOGDEBUG, "%s, received result id %f without matching request", ++- __FUNCTION__, txn); +++ __FUNCTION__, txn); ++ goto leave; ++ } ++ ++ RTMP_Log(RTMP_LOGDEBUG, "%s, received error for method call <%s>", __FUNCTION__, ++- methodInvoked.av_val); +++ methodInvoked.av_val); ++ ++ if (AVMATCH(&methodInvoked, &av_connect)) ++ { ++@@ -3086,34 +3558,96 @@ HandleInvoke(RTMP *r, const char *body, unsigned int nBodySize) ++ goto leave; ++ } ++ } ++- } ++- else ++- { ++- RTMP_Log(RTMP_LOGERROR, "rtmp server sent error"); +++ handled = TRUE; ++ } ++ free(methodInvoked.av_val); ++-#else ++- RTMP_Log(RTMP_LOGERROR, "rtmp server sent error"); ++ #endif +++ double code = 0.0; +++ unsigned int parsedPort = 0; +++ AMFObject obj2; +++ AMFObjectProperty p; +++ AVal redirect; +++ SAVC(ex); +++ SAVC(redirect); +++ +++ AMFProp_GetObject(AMF_GetProp(&obj, NULL, 3), &obj2); +++ if (RTMP_FindFirstMatchingProperty(&obj2, &av_ex, &p)) +++ { +++ AMFProp_GetObject(&p, &obj2); +++ if (RTMP_FindFirstMatchingProperty(&obj2, &av_code, &p)) +++ code = AMFProp_GetNumber(&p); +++ if (code == 302 && RTMP_FindFirstMatchingProperty(&obj2, &av_redirect, &p)) +++ { +++ AMFProp_GetString(&p, &redirect); +++ r->Link.redirected = TRUE; +++ +++ char *playpath = "//playpath"; +++ int len = redirect.av_len + strlen(playpath); +++ char *url = malloc(len + 1); +++ memcpy(url, redirect.av_val, redirect.av_len); +++ memcpy(url + redirect.av_len, playpath, strlen(playpath)); +++ url[len] = '\0'; +++ r->Link.tcUrl.av_val = url; +++ r->Link.tcUrl.av_len = redirect.av_len; +++ if (r->Link.lFlags & RTMP_LF_FTCU) +++ r->Link.lFlags ^= RTMP_LF_FTCU; +++ RTMP_ParseURL(url, &r->Link.protocol, &r->Link.hostname, &parsedPort, &r->Link.playpath0, &r->Link.app); +++ if (parsedPort) +++ r->Link.port = parsedPort; +++ } +++ } +++ if (r->Link.redirected) +++ { +++ handled = TRUE; +++ RTMP_Log(RTMP_LOGINFO, "rtmp server sent redirect"); +++ } +++ +++ if (!handled) +++ RTMP_Log(RTMP_LOGERROR, "rtmp server sent error"); ++ } ++ else if (AVMATCH(&method, &av_close)) ++ { ++- RTMP_Log(RTMP_LOGERROR, "rtmp server requested close"); ++- RTMP_Close(r); +++ if (r->Link.redirected) +++ { +++ r->Link.redirected = FALSE; +++ RTMP_Close(r); +++ RTMP_Log(RTMP_LOGINFO, "trying to connect with redirected url"); +++ if (r->Link.port == 0) +++ { +++ if (r->Link.protocol & RTMP_FEATURE_SSL) +++ r->Link.port = 443; +++ else if (r->Link.protocol & RTMP_FEATURE_HTTP) +++ r->Link.port = 80; +++ else +++ r->Link.port = 1935; +++ } +++ RTMP_Connect(r, NULL); +++ } +++ else +++ { +++ +++ RTMP_Log(RTMP_LOGERROR, "rtmp server requested close"); +++ if (r->m_bPlaying && (strstr(pageUrl, "streamlive.to") || strstr(pageUrl, "uk-iptv.co.uk"))) +++ RTMP_Log(RTMP_LOGINFO, "ignoring close request"); +++ else +++ RTMP_Close(r); +++ } ++ } ++ else if (AVMATCH(&method, &av_onStatus)) ++ { ++ AMFObject obj2; ++- AVal code, level; +++ AVal code, level, description; ++ AMFProp_GetObject(AMF_GetProp(&obj, NULL, 3), &obj2); ++ AMFProp_GetString(AMF_GetProp(&obj2, &av_code, -1), &code); ++ AMFProp_GetString(AMF_GetProp(&obj2, &av_level, -1), &level); +++ AMFProp_GetString(AMF_GetProp(&obj2, &av_description, -1), &description); ++ ++ RTMP_Log(RTMP_LOGDEBUG, "%s, onStatus: %s", __FUNCTION__, code.av_val); ++ if (AVMATCH(&code, &av_NetStream_Failed) ++- || AVMATCH(&code, &av_NetStream_Play_Failed) ++- || AVMATCH(&code, &av_NetStream_Play_StreamNotFound) ++- || AVMATCH(&code, &av_NetConnection_Connect_InvalidApp)) +++ || AVMATCH(&code, &av_NetStream_Play_Failed) +++ || AVMATCH(&code, &av_NetStream_Play_StreamNotFound) +++ || AVMATCH(&code, &av_NetConnection_Connect_Rejected) +++ || AVMATCH(&code, &av_NetConnection_Connect_InvalidApp)) ++ { ++ r->m_stream_id = -1; ++ RTMP_Close(r); ++@@ -3171,6 +3705,46 @@ HandleInvoke(RTMP *r, const char *body, unsigned int nBodySize) ++ r->m_pausing = 3; ++ } ++ } +++ +++ else if (AVMATCH(&code, &av_NetConnection_confStream)) +++ { +++#ifdef CRYPTO +++ static const char hexdig[] = "0123456789abcdef"; +++ AVal auth; +++ SAVC(cf_stream); +++ int i; +++ char hash_hex[33] = {0}; +++ unsigned char hash[16]; +++ +++ param_count = strsplit(description.av_val, description.av_len, ':', ¶ms); +++ if (param_count >= 3) +++ { +++ char *buf = malloc(strlen(params[0]) + r->Link.playpath.av_len + 1); +++ strcpy(buf, params[0]); +++ strncat(buf, r->Link.playpath.av_val, r->Link.playpath.av_len); +++ md5_hash((unsigned char *) buf, strlen(buf), hash); +++ for (i = 0; i < 16; i++) +++ { +++ hash_hex[i * 2] = hexdig[0x0f & (hash[i] >> 4)]; +++ hash_hex[i * 2 + 1] = hexdig[0x0f & (hash[i])]; +++ } +++ auth.av_val = &hash_hex[atoi(params[1]) - 1]; +++ auth.av_len = atoi(params[2]); +++ RTMP_Log(RTMP_LOGDEBUG, "Khalsa: %.*s", auth.av_len, auth.av_val); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_cf_stream); +++ enc = AMF_EncodeNumber(enc, pend, txn); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeString(enc, pend, &auth); +++ av_Command.av_val = pbuf; +++ av_Command.av_len = enc - pbuf; +++ +++ SendInvoke(r, &av_Command, FALSE); +++ free(buf); +++ } +++#endif +++ } ++ } ++ else if (AVMATCH(&method, &av_playlist_ready)) ++ { ++@@ -3184,6 +3758,109 @@ HandleInvoke(RTMP *r, const char *body, unsigned int nBodySize) ++ } ++ } ++ } +++ else if (AVMATCH(&method, &av_cps)) +++ { +++ if (obj.o_num >= 4) +++ { +++ int Status = AMFProp_GetBoolean(AMF_GetProp(&obj, NULL, 3)); +++ if (Status == FALSE) +++ { +++ AVal Message; +++ AMFProp_GetString(AMF_GetProp(&obj, NULL, 4), &Message); +++ RTMP_Log(RTMP_LOGINFO, "Model status is %.*s", Message.av_len, Message.av_val); +++ RTMP_Close(r); +++ } +++ else +++ { +++ if (obj.o_num >= 7) +++ { +++ AVal Playpath, Server; +++ AMFProp_GetString(AMF_GetProp(&obj, NULL, 5), &Playpath); +++ AMFProp_GetString(AMF_GetProp(&obj, NULL, 6), &Server); +++ if (strncasecmp(&Playpath.av_val[Playpath.av_len - 4], ".mp4", 4) != 0) +++ { +++ char *playpath = calloc(Server.av_len + Playpath.av_len + 25, sizeof (char)); +++ strcat(playpath, "rtmp://"); +++ strncat(playpath, Server.av_val, Server.av_len); +++ strcat(playpath, "/live-origin/"); +++ strncat(playpath, Playpath.av_val, Playpath.av_len); +++ strcat(playpath, ".mp4"); +++ Playpath.av_val = playpath; +++ Playpath.av_len = strlen(playpath); +++ } +++ RTMP_ParsePlaypath(&Playpath, &r->Link.playpath); +++ RTMP_SendCreateStream(r); +++ } +++ } +++ } +++ } +++ else if (AVMATCH(&method, &av_disneyToken)) +++ { +++ double FirstNumber = AMFProp_GetNumber(AMF_GetProp(&obj, NULL, 3)); +++ double SecondNumber = AMFProp_GetNumber(AMF_GetProp(&obj, NULL, 4)); +++ RTMP_Log(RTMP_LOGDEBUG, "FirstNumber: %.2f, SecondNumber: %.2f", FirstNumber, SecondNumber); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av__result); +++ enc = AMF_EncodeNumber(enc, pend, txn); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeNumber(enc, pend, FirstNumber * SecondNumber); +++ av_Response.av_val = pbuf; +++ av_Response.av_len = enc - pbuf; +++ +++ AMF_Decode(&obj, av_Response.av_val, av_Response.av_len, FALSE); +++ AMF_Dump(&obj); +++ SendInvoke(r, &av_Response, FALSE); +++ } +++ else if (AVMATCH(&method, &av_verifyClient)) +++ { +++ double VerificationNumber = AMFProp_GetNumber(AMF_GetProp(&obj, NULL, 3)); +++ RTMP_Log(RTMP_LOGDEBUG, "VerificationNumber: %.2f", VerificationNumber); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av__result); +++ enc = AMF_EncodeNumber(enc, pend, txn); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeNumber(enc, pend, exp(atan(sqrt(VerificationNumber))) + 1); +++ av_Response.av_val = pbuf; +++ av_Response.av_len = enc - pbuf; +++ +++ AMF_Decode(&obj, av_Response.av_val, av_Response.av_len, FALSE); +++ AMF_Dump(&obj); +++ SendInvoke(r, &av_Response, FALSE); +++ } +++ else if (AVMATCH(&method, &av_sendStatus)) +++ { +++ if (r->Link.WeebToken.av_len) +++ { +++ AVal av_Authorized = AVC("User.hasAccess"); +++ AVal av_TransferLimit = AVC("User.noPremium.limited"); +++ AVal av_UserLimit = AVC("User.noPremium.tooManyUsers"); +++ AVal av_TimeLeft = AVC("timeLeft"); +++ AVal av_Status, av_ReconnectionTime; +++ +++ AMFObject Status; +++ AMFProp_GetObject(AMF_GetProp(&obj, NULL, 3), &Status); +++ AMFProp_GetString(AMF_GetProp(&Status, &av_code, -1), &av_Status); +++ RTMP_Log(RTMP_LOGINFO, "%.*s", av_Status.av_len, av_Status.av_val); +++ if (AVMATCH(&av_Status, &av_Authorized)) +++ { +++ RTMP_Log(RTMP_LOGINFO, "Weeb.tv authentication successful"); +++ RTMP_SendCreateStream(r); +++ } +++ else if (AVMATCH(&av_Status, &av_UserLimit)) +++ { +++ RTMP_Log(RTMP_LOGINFO, "No free slots available"); +++ RTMP_Close(r); +++ } +++ else if (AVMATCH(&av_Status, &av_TransferLimit)) +++ { +++ AMFProp_GetString(AMF_GetProp(&Status, &av_TimeLeft, -1), &av_ReconnectionTime); +++ RTMP_Log(RTMP_LOGINFO, "Viewing limit exceeded. try again in %.*s minutes.", av_ReconnectionTime.av_len, av_ReconnectionTime.av_val); +++ RTMP_Close(r); +++ } +++ } +++ } ++ else ++ { ++ ++@@ -3209,7 +3886,8 @@ RTMP_FindFirstMatchingProperty(AMFObject *obj, const AVal *name, ++ return TRUE; ++ } ++ ++- if (prop->p_type == AMF_OBJECT || prop->p_type == AMF_ECMA_ARRAY) +++ if (prop->p_type == AMF_OBJECT || prop->p_type == AMF_ECMA_ARRAY +++ || prop->p_type == AMF_STRICT_ARRAY) ++ { ++ if (RTMP_FindFirstMatchingProperty(&prop->p_vu.p_object, name, p)) ++ return TRUE; ++@@ -3235,7 +3913,8 @@ RTMP_FindPrefixProperty(AMFObject *obj, const AVal *name, ++ return TRUE; ++ } ++ ++- if (prop->p_type == AMF_OBJECT) +++ if (prop->p_type == AMF_OBJECT || prop->p_type == AMF_ECMA_ARRAY +++ || prop->p_type == AMF_STRICT_ARRAY) ++ { ++ if (RTMP_FindPrefixProperty(&prop->p_vu.p_object, name, p)) ++ return TRUE; ++@@ -3269,6 +3948,7 @@ DumpMetaData(AMFObject *obj) ++ snprintf(str, 255, "%s", ++ prop->p_vu.p_number != 0. ? "TRUE" : "FALSE"); ++ break; +++ case AMF_NULL: ++ case AMF_STRING: ++ len = snprintf(str, 255, "%.*s", prop->p_vu.p_aval.av_len, ++ prop->p_vu.p_aval.av_val); ++@@ -3284,7 +3964,7 @@ DumpMetaData(AMFObject *obj) ++ } ++ if (str[0] && prop->p_name.av_len) ++ { ++- RTMP_Log(RTMP_LOGINFO, " %-22.*s%s", prop->p_name.av_len, +++ RTMP_Log(RTMP_LOGINFO, " %-24.*s%s", prop->p_name.av_len, ++ prop->p_name.av_val, str); ++ } ++ } ++@@ -3366,7 +4046,7 @@ HandleCtrl(RTMP *r, const RTMPPacket *packet) ++ unsigned int tmp; ++ if (packet->m_body && packet->m_nBodySize >= 2) ++ nType = AMF_DecodeInt16(packet->m_body); ++- RTMP_Log(RTMP_LOGDEBUG, "%s, received ctrl. type: %d, len: %d", __FUNCTION__, nType, +++ RTMP_Log(RTMP_LOGDEBUG, "%s, received ctrl, type: %d, len: %d", __FUNCTION__, nType, ++ packet->m_nBodySize); ++ /*RTMP_LogHex(packet.m_body, packet.m_nBodySize); */ ++ ++@@ -3475,15 +4155,15 @@ HandleCtrl(RTMP *r, const RTMPPacket *packet) ++ RTMP_Log(RTMP_LOGDEBUG, "%s, SWFVerification ping received: ", __FUNCTION__); ++ if (packet->m_nBodySize > 2 && packet->m_body[2] > 0x01) ++ { ++- RTMP_Log(RTMP_LOGERROR, ++- "%s: SWFVerification Type %d request not supported! Patches welcome...", ++- __FUNCTION__, packet->m_body[2]); +++ RTMP_Log(RTMP_LOGERROR, +++ "%s: SWFVerification Type %d request not supported, attempting to use SWFVerification Type 1! Patches welcome...", +++ __FUNCTION__, packet->m_body[2]); ++ } ++ #ifdef CRYPTO ++ /*RTMP_LogHex(packet.m_body, packet.m_nBodySize); */ ++ ++ /* respond with HMAC SHA256 of decompressed SWF, key is the 30byte player key, also the last 30 bytes of the server handshake are applied */ ++- else if (r->Link.SWFSize) +++ if (r->Link.SWFSize) ++ { ++ RTMP_SendCtrl(r, 0x1B, 0, 0); ++ } ++@@ -3788,8 +4468,18 @@ HandShake(RTMP *r, int FP9HandShake) ++ serversig[4], serversig[5], serversig[6], serversig[7]); ++ ++ /* 2nd part of handshake */ ++- if (!WriteN(r, serversig, RTMP_SIG_SIZE)) ++- return FALSE; +++ if (r->Link.CombineConnectPacket) +++ { +++ char *HandshakeResponse = malloc(RTMP_SIG_SIZE); +++ memcpy(HandshakeResponse, (char *) serversig, RTMP_SIG_SIZE); +++ r->Link.HandshakeResponse.av_val = HandshakeResponse; +++ r->Link.HandshakeResponse.av_len = RTMP_SIG_SIZE; +++ } +++ else +++ { +++ if (!WriteN(r, (char *) serversig, RTMP_SIG_SIZE)) +++ return FALSE; +++ } ++ ++ if (ReadN(r, serversig, RTMP_SIG_SIZE) != RTMP_SIG_SIZE) ++ return FALSE; ++@@ -3942,7 +4632,7 @@ RTMP_SendPacket(RTMP *r, RTMPPacket *packet, int queue) ++ ++ nSize = packetSize[packet->m_headerType]; ++ hSize = nSize; cSize = 0; ++- t = packet->m_nTimeStamp - last; +++ t = packet->m_nTimeStamp ? packet->m_nTimeStamp - last : 0; ++ ++ if (packet->m_body) ++ { ++@@ -4251,8 +4941,13 @@ RTMPSockBuf_Fill(RTMPSockBuf *sb) ++ { ++ int nBytes; ++ ++- if (!sb->sb_size) ++- sb->sb_start = sb->sb_buf; +++ /* Copy unprocessed bytes to the start of buffer to make optimum use of +++ * available buffer */ +++ if (sb->sb_start != sb->sb_buf) +++ { +++ memcpy(sb->sb_buf, sb->sb_start, sb->sb_size); +++ sb->sb_start = sb->sb_buf; +++ } ++ ++ while (1) ++ { ++@@ -4266,8 +4961,10 @@ RTMPSockBuf_Fill(RTMPSockBuf *sb) ++ #endif ++ { ++ nBytes = recv(sb->sb_socket, sb->sb_start + sb->sb_size, nBytes, 0); ++- } ++- if (nBytes != -1) +++ if (!nBytes) +++ RTMP_Log(RTMP_LOGDEBUG, "Socket closed by server, nBytes: %d", nBytes); +++ } +++ if (nBytes >= 0) ++ { ++ sb->sb_size += nBytes; ++ } ++@@ -4405,21 +5102,19 @@ static int ++ HTTP_Post(RTMP *r, RTMPTCmd cmd, const char *buf, int len) ++ { ++ char hbuf[512]; ++- int hlen = snprintf(hbuf, sizeof(hbuf), "POST /%s%s/%d HTTP/1.1\r\n" ++- "Host: %.*s:%d\r\n" ++- "Accept: */*\r\n" ++- "User-Agent: Shockwave Flash\r\n" ++- "Connection: Keep-Alive\r\n" ++- "Cache-Control: no-cache\r\n" ++- "Content-type: application/x-fcs\r\n" ++- "Content-length: %d\r\n\r\n", RTMPT_cmds[cmd], ++- r->m_clientID.av_val ? r->m_clientID.av_val : "", ++- r->m_msgCounter, r->Link.hostname.av_len, r->Link.hostname.av_val, ++- r->Link.port, len); +++ int hlen = snprintf(hbuf, sizeof (hbuf), "POST /%s%s/%d HTTP/1.1\r\n" +++ "Content-Type: application/x-fcs\r\n" +++ "User-Agent: Shockwave Flash\r\n" +++ "Host: %.*s:%d\r\n" +++ "Content-Length: %d\r\n" +++ "Connection: Keep-Alive\r\n" +++ "Cache-Control: no-cache\r\n\r\n", RTMPT_cmds[cmd], +++ r->m_clientID.av_val ? r->m_clientID.av_val : "", +++ r->m_msgCounter, r->Link.hostname.av_len, r->Link.hostname.av_val, +++ r->Link.port, len); ++ RTMPSockBuf_Send(&r->m_sb, hbuf, hlen); ++ hlen = RTMPSockBuf_Send(&r->m_sb, buf, len); ++ r->m_msgCounter++; ++- r->m_unackd++; ++ return hlen; ++ } ++ ++@@ -4429,22 +5124,17 @@ HTTP_read(RTMP *r, int fill) ++ char *ptr; ++ int hlen; ++ ++-restart: ++ if (fill) ++ RTMPSockBuf_Fill(&r->m_sb); ++- if (r->m_sb.sb_size < 13) { ++- if (fill) ++- goto restart; +++ +++ /* Check if socket buffer is empty or HTTP header isn't completely received */ +++ memset(r->m_sb.sb_start + r->m_sb.sb_size, '\0', 1); +++ if ((!r->m_sb.sb_size) || (!strstr(r->m_sb.sb_start, "\r\n\r\n"))) ++ return -2; ++- } +++ ++ if (strncmp(r->m_sb.sb_start, "HTTP/1.1 200 ", 13)) ++ return -1; ++ r->m_sb.sb_start[r->m_sb.sb_size] = '\0'; ++- if (!strstr(r->m_sb.sb_start, "\r\n\r\n")) { ++- if (fill) ++- goto restart; ++- return -2; ++- } ++ ++ ptr = r->m_sb.sb_start + sizeof("HTTP/1.1 200"); ++ while ((ptr = strstr(ptr, "Content-"))) { ++@@ -4452,21 +5142,31 @@ restart: ++ ptr += 8; ++ } ++ if (!ptr) ++- return -1; ++- hlen = atoi(ptr+16); +++ { +++ ptr = r->m_sb.sb_start + sizeof ("HTTP/1.1 200"); +++ RTMP_Log(RTMP_LOGDEBUG, "No Content-Length header found, assuming continuous stream"); +++ hlen = 2147483648UL; // 2 GB +++ } +++ else +++ hlen = atoi(ptr + 16); ++ ptr = strstr(ptr+16, "\r\n\r\n"); ++ if (!ptr) ++ return -1; ++ ptr += 4; ++- if (ptr + (r->m_clientID.av_val ? 1 : hlen) > r->m_sb.sb_start + r->m_sb.sb_size) ++- { ++- if (fill) ++- goto restart; ++- return -2; ++- } ++ r->m_sb.sb_size -= ptr - r->m_sb.sb_start; ++ r->m_sb.sb_start = ptr; ++- r->m_unackd--; +++ +++ /* Stop processing if content length is 0 */ +++ if (!hlen) +++ return -3; +++ +++ /* Refill buffer if no payload is received */ +++ if (hlen && (!r->m_sb.sb_size)) +++ { +++ RTMPSockBuf_Fill(&r->m_sb); +++ ptr = r->m_sb.sb_buf; +++ r->m_sb.sb_start = ptr; +++ } ++ ++ if (!r->m_clientID.av_val) ++ { ++@@ -4486,10 +5186,17 @@ restart: ++ r->m_sb.sb_start++; ++ r->m_sb.sb_size--; ++ } +++ +++ /* Following values shouldn't be negative in any case */ +++ if (r->m_resplen < 0) +++ r->m_resplen = 0; +++ if (r->m_sb.sb_size < 0) +++ r->m_sb.sb_size = 0; +++ ++ return 0; ++ } ++ ++-#define MAX_IGNORED_FRAMES 50 +++#define MAX_IGNORED_FRAMES 100 ++ ++ /* Read from the stream until we get a media packet. ++ * Returns -3 if Play.Close/Stop, -2 if fatal error, -1 if no more media ++@@ -4557,162 +5264,156 @@ Read_1_Packet(RTMP *r, char *buf, unsigned int buflen) ++ #endif ++ ++ if (r->m_read.flags & RTMP_READ_RESUME) ++- { ++- /* check the header if we get one */ ++- if (packet.m_nTimeStamp == 0) ++- { ++- if (r->m_read.nMetaHeaderSize > 0 ++- && packet.m_packetType == RTMP_PACKET_TYPE_INFO) ++- { ++- AMFObject metaObj; ++- int nRes = ++- AMF_Decode(&metaObj, packetBody, nPacketLen, FALSE); ++- if (nRes >= 0) ++- { ++- AVal metastring; ++- AMFProp_GetString(AMF_GetProp(&metaObj, NULL, 0), ++- &metastring); ++- ++- if (AVMATCH(&metastring, &av_onMetaData)) ++- { ++- /* compare */ ++- if ((r->m_read.nMetaHeaderSize != nPacketLen) || ++- (memcmp ++- (r->m_read.metaHeader, packetBody, ++- r->m_read.nMetaHeaderSize) != 0)) ++- { ++- ret = RTMP_READ_ERROR; ++- } ++- } ++- AMF_Reset(&metaObj); ++- if (ret == RTMP_READ_ERROR) ++- break; ++- } ++- } +++ { +++ RTMP_Log(RTMP_LOGDEBUG2, "Received timestamp: %d, type %d", +++ packet.m_nTimeStamp, packet.m_packetType); +++ if (packet.m_nTimeStamp > 0 && r->m_read.nResumeDriftTS > 0) +++ packet.m_nTimeStamp -= r->m_read.nResumeDriftTS; +++ RTMP_Log(RTMP_LOGDEBUG2, "Adjusted timestamp: %d", packet.m_nTimeStamp); +++ +++ /* check the header if we get one */ +++ if (r->m_read.nMetaHeaderSize > 0 +++ && packet.m_packetType == RTMP_PACKET_TYPE_INFO) +++ { +++ AMFObject metaObj; +++ int nRes = AMF_Decode(&metaObj, packetBody, nPacketLen, FALSE); +++ if (nRes >= 0) +++ { +++ AVal metastring; +++ AMFProp_GetString(AMF_GetProp(&metaObj, NULL, 0), &metastring); +++ +++ if (AVMATCH(&metastring, &av_onMetaData)) +++ { +++ /* compare */ +++ if ((r->m_read.nMetaHeaderSize != nPacketLen) || +++ (memcmp(r->m_read.metaHeader, packetBody, r->m_read.nMetaHeaderSize) != 0)) +++ { +++ ret = RTMP_READ_ERROR; +++ } +++ } +++ AMF_Reset(&metaObj); +++ if (ret == RTMP_READ_ERROR) +++ break; +++ } +++ } ++ ++- /* check first keyframe to make sure we got the right position ++- * in the stream! (the first non ignored frame) ++- */ ++- if (r->m_read.nInitialFrameSize > 0) ++- { ++- /* video or audio data */ ++- if (packet.m_packetType == r->m_read.initialFrameType ++- && r->m_read.nInitialFrameSize == nPacketLen) ++- { ++- /* we don't compare the sizes since the packet can ++- * contain several FLV packets, just make sure the ++- * first frame is our keyframe (which we are going ++- * to rewrite) ++- */ ++- if (memcmp ++- (r->m_read.initialFrame, packetBody, ++- r->m_read.nInitialFrameSize) == 0) ++- { ++- RTMP_Log(RTMP_LOGDEBUG, "Checked keyframe successfully!"); ++- r->m_read.flags |= RTMP_READ_GOTKF; ++- /* ignore it! (what about audio data after it? it is ++- * handled by ignoring all 0ms frames, see below) ++- */ ++- ret = RTMP_READ_IGNORE; ++- break; ++- } ++- } +++ /* check first keyframe to make sure we got the right position +++ * in the stream! (the first non ignored frame) +++ */ +++ RTMP_Log(RTMP_LOGDEBUG2, "Required packet length: %d, Packet length: %d", +++ r->m_read.nInitialFrameSize, nPacketLen); +++ if (r->m_read.nInitialFrameSize > 0) +++ { +++ /* video or audio data */ +++ if (packet.m_packetType == r->m_read.initialFrameType +++ && r->m_read.nInitialFrameSize == nPacketLen) +++ { +++ /* we don't compare the sizes since the packet can +++ * contain several FLV packets, just make sure the +++ * first frame is our keyframe (which we are going +++ * to rewrite) +++ */ +++ RTMP_Log(RTMP_LOGDEBUG2, "Comparing keyframe data"); +++ if (memcmp(r->m_read.initialFrame, packetBody, +++ r->m_read.nInitialFrameSize) == 0) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "Checked keyframe successfully!"); +++ r->m_read.flags |= RTMP_READ_GOTKF; +++ r->m_read.nResumeDriftTS = packet.m_nTimeStamp; +++ /* ignore it! (what about audio data after it? it is +++ * handled by ignoring all 0ms frames, see below) +++ */ +++ ret = RTMP_READ_IGNORE; +++ break; +++ } +++ } ++ ++- /* hande FLV streams, even though the server resends the ++- * keyframe as an extra video packet it is also included ++- * in the first FLV stream chunk and we have to compare ++- * it and filter it out !! ++- */ ++- if (packet.m_packetType == RTMP_PACKET_TYPE_FLASH_VIDEO) ++- { ++- /* basically we have to find the keyframe with the ++- * correct TS being nResumeTS ++- */ ++- unsigned int pos = 0; ++- uint32_t ts = 0; ++- ++- while (pos + 11 < nPacketLen) ++- { ++- /* size without header (11) and prevTagSize (4) */ ++- uint32_t dataSize = ++- AMF_DecodeInt24(packetBody + pos + 1); ++- ts = AMF_DecodeInt24(packetBody + pos + 4); ++- ts |= (packetBody[pos + 7] << 24); +++ /* hande FLV streams, even though the server resends the +++ * keyframe as an extra video packet it is also included +++ * in the first FLV stream chunk and we have to compare +++ * it and filter it out !! +++ */ +++ if (packet.m_packetType == RTMP_PACKET_TYPE_FLASH_VIDEO) +++ { +++ /* basically we have to find the keyframe with the +++ * correct TS being nResumeTS +++ */ +++ unsigned int pos = 0; +++ uint32_t ts = 0; +++ +++ while (pos + 11 < nPacketLen) +++ { +++ /* size without header (11) and prevTagSize (4) */ +++ uint32_t dataSize = AMF_DecodeInt24(packetBody + pos + 1); +++ ts = AMF_DecodeInt24(packetBody + pos + 4); +++ ts |= (packetBody[pos + 7] << 24); ++ ++ #ifdef _DEBUG ++- RTMP_Log(RTMP_LOGDEBUG, ++- "keyframe search: FLV Packet: type %02X, dataSize: %d, timeStamp: %d ms", ++- packetBody[pos], dataSize, ts); +++ RTMP_Log(RTMP_LOGDEBUG, +++ "keyframe search: FLV Packet: type %02X, dataSize: %d, timeStamp: %d ms", +++ packetBody[pos], dataSize, ts); ++ #endif ++- /* ok, is it a keyframe?: ++- * well doesn't work for audio! ++- */ ++- if (packetBody[pos /*6928, test 0 */ ] == ++- r->m_read.initialFrameType ++- /* && (packetBody[11]&0xf0) == 0x10 */ ) ++- { ++- if (ts == r->m_read.nResumeTS) ++- { ++- RTMP_Log(RTMP_LOGDEBUG, ++- "Found keyframe with resume-keyframe timestamp!"); ++- if (r->m_read.nInitialFrameSize != dataSize ++- || memcmp(r->m_read.initialFrame, ++- packetBody + pos + 11, ++- r->m_read. ++- nInitialFrameSize) != 0) ++- { ++- RTMP_Log(RTMP_LOGERROR, ++- "FLV Stream: Keyframe doesn't match!"); ++- ret = RTMP_READ_ERROR; ++- break; ++- } ++- r->m_read.flags |= RTMP_READ_GOTFLVK; ++- ++- /* skip this packet? ++- * check whether skippable: ++- */ ++- if (pos + 11 + dataSize + 4 > nPacketLen) ++- { ++- RTMP_Log(RTMP_LOGWARNING, ++- "Non skipable packet since it doesn't end with chunk, stream corrupt!"); ++- ret = RTMP_READ_ERROR; ++- break; ++- } ++- packetBody += (pos + 11 + dataSize + 4); ++- nPacketLen -= (pos + 11 + dataSize + 4); ++- ++- goto stopKeyframeSearch; ++- ++- } ++- else if (r->m_read.nResumeTS < ts) ++- { ++- /* the timestamp ts will only increase with ++- * further packets, wait for seek ++- */ ++- goto stopKeyframeSearch; ++- } ++- } ++- pos += (11 + dataSize + 4); ++- } ++- if (ts < r->m_read.nResumeTS) ++- { ++- RTMP_Log(RTMP_LOGERROR, ++- "First packet does not contain keyframe, all " ++- "timestamps are smaller than the keyframe " ++- "timestamp; probably the resume seek failed?"); ++- } ++- stopKeyframeSearch: ++- ; ++- if (!(r->m_read.flags & RTMP_READ_GOTFLVK)) ++- { ++- RTMP_Log(RTMP_LOGERROR, ++- "Couldn't find the seeked keyframe in this chunk!"); ++- ret = RTMP_READ_IGNORE; ++- break; ++- } ++- } ++- } ++- } +++ /* ok, is it a keyframe?: +++ * well doesn't work for audio! +++ */ +++ if (packetBody[pos /*6928, test 0 */ ] == r->m_read.initialFrameType +++ /* && (packetBody[11]&0xf0) == 0x10 */) +++ { +++ if (ts == r->m_read.nResumeTS) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "Found keyframe with resume-keyframe timestamp!"); +++ if (r->m_read.nInitialFrameSize != dataSize || +++ memcmp(r->m_read.initialFrame, packetBody + pos + 11, +++ r->m_read.nInitialFrameSize) != 0) +++ { +++ RTMP_Log(RTMP_LOGERROR, "FLV Stream: Keyframe doesn't match!"); +++ ret = RTMP_READ_ERROR; +++ break; +++ } +++ r->m_read.flags |= RTMP_READ_GOTFLVK; +++ +++ /* skip this packet? +++ * check whether skippable: +++ */ +++ if (pos + 11 + dataSize + 4 > nPacketLen) +++ { +++ RTMP_Log(RTMP_LOGWARNING, "Non skipable packet since it doesn't " +++ "end with chunk, stream corrupt!"); +++ ret = RTMP_READ_ERROR; +++ break; +++ } +++ packetBody += (pos + 11 + dataSize + 4); +++ nPacketLen -= (pos + 11 + dataSize + 4); +++ +++ goto stopKeyframeSearch; +++ +++ } +++ else if (r->m_read.nResumeTS < ts) +++ { +++ /* the timestamp ts will only increase with +++ * further packets, wait for seek +++ */ +++ goto stopKeyframeSearch; +++ } +++ } +++ pos += (11 + dataSize + 4); +++ } +++ if (ts < r->m_read.nResumeTS) +++ { +++ RTMP_Log(RTMP_LOGERROR, +++ "First packet does not contain keyframe, all " +++ "timestamps are smaller than the keyframe " +++ "timestamp; probably the resume seek failed?"); +++ } +++ stopKeyframeSearch: +++ if (!(r->m_read.flags & RTMP_READ_GOTFLVK)) +++ { +++ RTMP_Log(RTMP_LOGERROR, "Couldn't find the seeked keyframe in this chunk!"); +++ ret = RTMP_READ_IGNORE; +++ break; +++ } +++ } +++ } ++ ++ if (packet.m_nTimeStamp > 0 ++ && (r->m_read.flags & (RTMP_READ_GOTKF|RTMP_READ_GOTFLVK))) ++@@ -4972,7 +5673,7 @@ static const char flvHeader[] = { 'F', 'L', 'V', 0x01, ++ 0x00, 0x00, 0x00, 0x00 ++ }; ++ ++-#define HEADERBUF (128*1024) +++#define HEADERBUF (1024*1024) ++ int ++ RTMP_Read(RTMP *r, char *buf, int size) ++ { ++@@ -5175,3 +5876,395 @@ RTMP_Write(RTMP *r, const char *buf, int size) ++ } ++ return size+s2; ++ } +++ +++AVal +++AVcopy(AVal src) +++{ +++ AVal dst; +++ if (src.av_len) +++ { +++ dst.av_val = malloc(src.av_len + 1); +++ memcpy(dst.av_val, src.av_val, src.av_len); +++ dst.av_val[src.av_len] = '\0'; +++ dst.av_len = src.av_len; +++ } +++ else +++ { +++ dst.av_val = NULL; +++ dst.av_len = 0; +++ } +++ return dst; +++} +++ +++static int +++ConnectSocket(RTMP *r) +++{ +++ int on = 1; +++ struct sockaddr_in service; +++ if (!r->Link.hostname.av_len) +++ return FALSE; +++ +++ memset(&service, 0, sizeof (struct sockaddr_in)); +++ service.sin_family = AF_INET; +++ +++ if (r->Link.socksport) +++ { +++ /* Connect via SOCKS */ +++ if (!add_addr_info(&service, &r->Link.sockshost, r->Link.socksport)) +++ return FALSE; +++ } +++ else +++ { +++ /* Connect directly */ +++ if (!add_addr_info(&service, &r->Link.hostname, r->Link.port)) +++ return FALSE; +++ } +++ +++ r->m_sb.sb_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); +++ if (r->m_sb.sb_socket != -1) +++ { +++ if (connect(r->m_sb.sb_socket, (struct sockaddr *) &service, sizeof (struct sockaddr)) < 0) +++ { +++ int err = GetSockError(); +++ RTMP_Log(RTMP_LOGERROR, "%s, failed to connect socket. %d (%s)", +++ __FUNCTION__, err, strerror(err)); +++ RTMP_Close(r); +++ return FALSE; +++ } +++ +++ if (r->Link.socksport) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "%s ... SOCKS negotiation", __FUNCTION__); +++ if (!SocksNegotiate(r)) +++ { +++ RTMP_Log(RTMP_LOGERROR, "%s, SOCKS negotiation failed.", __FUNCTION__); +++ RTMP_Close(r); +++ return FALSE; +++ } +++ } +++ } +++ else +++ { +++ RTMP_Log(RTMP_LOGERROR, "%s, failed to create socket. Error: %d", +++ __FUNCTION__, GetSockError()); +++ return FALSE; +++ } +++ +++ /* set timeout */ +++ SET_RCVTIMEO(tv, r->Link.timeout); +++ if (setsockopt(r->m_sb.sb_socket, SOL_SOCKET, SO_RCVTIMEO, (char *) &tv, sizeof (tv))) +++ { +++ RTMP_Log(RTMP_LOGERROR, "%s, Setting socket timeout to %d failed!", +++ __FUNCTION__, r->Link.timeout); +++ } +++ +++ setsockopt(r->m_sb.sb_socket, IPPROTO_TCP, TCP_NODELAY, (char *) &on, sizeof (on)); +++ if (r->Link.protocol & RTMP_FEATURE_HTTP) +++ setsockopt(r->m_sb.sb_socket, SOL_SOCKET, SO_KEEPALIVE, (char *) &on, sizeof (on)); +++ +++ return TRUE; +++} +++ +++static int +++SendCommand(RTMP *r, char *method, int queue) +++{ +++ char pbuf[256], *pend = pbuf + sizeof (pbuf), *enc; +++ AVal av_command, methodName; +++ +++ enc = pbuf; +++ methodName.av_val = method; +++ methodName.av_len = strlen(method); +++ enc = AMF_EncodeString(enc, pend, &methodName); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ av_command.av_val = pbuf; +++ av_command.av_len = enc - pbuf; +++ +++ return SendInvoke(r, &av_command, queue); +++} +++ +++static int +++SendGetStreamLength(RTMP *r) +++{ +++ char pbuf[256], *pend = pbuf + sizeof (pbuf), *enc; +++ AVal av_Command; +++ SAVC(getStreamLength); +++ +++ enc = pbuf; +++ enc = AMF_EncodeString(enc, pend, &av_getStreamLength); +++ enc = AMF_EncodeNumber(enc, pend, ++r->m_numInvokes); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeString(enc, pend, &r->Link.playpath); +++ av_Command.av_val = pbuf; +++ av_Command.av_len = enc - pbuf; +++ +++ return SendInvoke(r, &av_Command, TRUE); +++} +++ +++static int +++SendInvoke(RTMP *r, AVal *command, int queue) +++{ +++ RTMPPacket packet; +++ char pbuf[512], *enc; +++ +++ packet.m_nChannel = 0x03; /* control channel (invoke) */ +++ packet.m_headerType = RTMP_PACKET_SIZE_MEDIUM; +++ packet.m_packetType = RTMP_PACKET_TYPE_INVOKE; +++ packet.m_nTimeStamp = 0; +++ packet.m_nInfoField2 = 0; +++ packet.m_hasAbsTimestamp = 0; +++ packet.m_body = pbuf + RTMP_MAX_HEADER_SIZE; +++ +++ enc = packet.m_body; +++ if (command->av_len) +++ { +++ memcpy(enc, command->av_val, command->av_len); +++ enc += command->av_len; +++ } +++ else +++ return FALSE; +++ packet.m_nBodySize = enc - packet.m_body; +++ +++ return RTMP_SendPacket(r, &packet, queue); +++} +++ +++AVal +++StripParams(AVal *src) +++{ +++ AVal str; +++ if (src->av_val) +++ { +++ str.av_val = calloc(src->av_len + 1, sizeof (char)); +++ strncpy(str.av_val, src->av_val, src->av_len); +++ str.av_len = src->av_len; +++ char *start = str.av_val; +++ char *end = start + str.av_len; +++ char *ptr = start; +++ +++ while (ptr < end) +++ { +++ if (*ptr == '?') +++ { +++ str.av_len = ptr - start; +++ break; +++ } +++ ptr++; +++ } +++ memset(start + str.av_len, 0, 1); +++ +++ char *dynamic = strstr(start, "[[DYNAMIC]]"); +++ if (dynamic) +++ { +++ dynamic -= 1; +++ memset(dynamic, 0, 1); +++ str.av_len = dynamic - start; +++ end = start + str.av_len; +++ } +++ +++ char *import = strstr(start, "[[IMPORT]]"); +++ if (import) +++ { +++ str.av_val = import + 11; +++ strcpy(start, "http://"); +++ str.av_val = strcat(start, str.av_val); +++ str.av_len = strlen(str.av_val); +++ } +++ return str; +++ } +++ str = *src; +++ return str; +++} +++ +++char * +++strreplace(char *srcstr, int srclen, char *orig, char *repl, int didAlloc) +++{ +++ char *ptr = NULL, *sptr = srcstr; +++ int origlen = strlen(orig); +++ int repllen = strlen(repl); +++ if (!srclen) +++ srclen = strlen(srcstr); +++ char *srcend = srcstr + srclen; +++ int dstbuffer = srclen / origlen * repllen; +++ if (dstbuffer < srclen) +++ dstbuffer = srclen; +++ char *dststr = calloc(dstbuffer + 1, sizeof (char)); +++ char *dptr = dststr; +++ +++ if ((ptr = strstr(srcstr, orig))) +++ { +++ while (ptr < srcend && (ptr = strstr(sptr, orig))) +++ { +++ int len = ptr - sptr; +++ memcpy(dptr, sptr, len); +++ sptr += len + origlen; +++ dptr += len; +++ memcpy(dptr, repl, repllen); +++ dptr += repllen; +++ } +++ memcpy(dptr, sptr, srcend - sptr); +++ if (didAlloc) +++ free(srcstr); +++ return dststr; +++ } +++ +++ memcpy(dststr, srcstr, srclen); +++ if (didAlloc) +++ free(srcstr); +++ return dststr; +++} +++ +++int +++strsplit(char *src, int srclen, char delim, char ***params) +++{ +++ char *sptr, *srcbeg, *srcend, *dstr; +++ int count = 1, i = 0, len = 0; +++ +++ if (src == NULL) +++ return 0; +++ if (!srclen) +++ srclen = strlen(src); +++ srcbeg = src; +++ srcend = srcbeg + srclen; +++ sptr = srcbeg; +++ +++ /* count the delimiters */ +++ while (sptr < srcend) +++ { +++ if (*sptr++ == delim) +++ count++; +++ } +++ sptr = srcbeg; +++ *params = malloc(count * sizeof (size_t)); +++ char **param = *params; +++ +++ for (i = 0; i < (count - 1); i++) +++ { +++ dstr = strchr(sptr, delim); +++ len = dstr - sptr; +++ param[i] = malloc((len + 1) * sizeof (char)); +++ memcpy(param[i], sptr, len); +++ *(param[i] + len) = '\0'; +++ sptr += len + 1; +++ } +++ +++ /* copy the last string */ +++ if (sptr <= srcend) +++ { +++ len = srclen - (sptr - srcbeg); +++ param[i] = malloc((len + 1) * sizeof (char)); +++ memcpy(param[i], sptr, len); +++ *(param[i] + len) = '\0'; +++ } +++ return count; +++} +++ +++void +++TransformRot13(AMFObject *obj, AVal *rindex, AVal *r) +++{ +++ char *chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMabcdefghijklmnopqrstuvwxyzabcdefghijklm"; +++ int i = 0, pos = 0; +++ AMFObject obj2; +++ +++ AMFProp_GetObject(AMF_GetProp(obj, NULL, 3), &obj2); +++ AMFProp_GetString(AMF_GetProp(&obj2, rindex, -1), r); +++ +++ for (i = 0; i < r->av_len; i++) +++ { +++ char *chr = &r->av_val[i]; +++ chr = strchr(chars, *chr); +++ pos = chr ? chr - chars : -1; +++ if (pos > -1) +++ r->av_val[i] = chars[pos + 13]; +++ } +++} +++ +++void +++__TeaCrypt(uint32_t *block, uint32_t len, uint32_t *key) +++{ +++ uint32_t z = block[len - 1], y = block[0], sum = 0, e, DELTA = 0x9e3779b9; +++ int32_t p, q; +++ +++ q = 6 + 52 / len; +++ while (q-- > 0) +++ { +++ sum += DELTA; +++ e = (sum >> 2) & 3; +++ for (p = 0; p < len - 1; p++) +++ { +++ y = block[p + 1]; +++ block[p] += ((z >> 5^y << 2) + (y >> 3^z << 4)) ^ ((sum^y) + (key[(p & 3)^e] ^ z)); +++ z = block[p]; +++ } +++ y = block[0]; +++ block[len - 1] += ((z >> 5^y << 2) + (y >> 3^z << 4)) ^ ((sum^y) + (key[(p & 3)^e] ^ z)); +++ z = block[len - 1]; +++ } +++} +++ +++AVal +++TeaEncrypt(AVal *srcData, AVal *srcKey) +++{ +++ int i, reqPadding, longKeyBlocks, longDataBlocks; +++ unsigned char *key, *data; +++ +++ // Prepare key +++ int srcKeyLen = srcKey->av_len; +++ int reqKeyLen = 16; +++ reqPadding = reqKeyLen - srcKeyLen; +++ if (reqPadding < 0) +++ { +++ reqPadding = 0; +++ srcKeyLen = reqKeyLen; +++ } +++ key = calloc((srcKeyLen + reqPadding + 1), sizeof (char)); +++ memcpy(key, srcKey->av_val, srcKeyLen); +++ longKeyBlocks = reqKeyLen / 4; +++ uint32_t *longKeyBuf = (uint32_t *) malloc(longKeyBlocks * sizeof (uint32_t)); +++ for (i = 0; i < longKeyBlocks; i++) +++ { +++ longKeyBuf[i] = 0; +++ longKeyBuf[i] |= (key[i * 4 + 0]) | (key[i * 4 + 1] << 8) | (key[i * 4 + 2] << 16) | (key[i * 4 + 3] << 24); +++ } +++ +++ // Prepare data +++ int srcDataLen = srcData->av_len; +++ reqPadding = ((int) ((srcDataLen + 3) / 4))*4 - srcDataLen; +++ if ((srcDataLen + reqPadding) < 8) +++ reqPadding = 8 - srcDataLen; +++ data = calloc((srcDataLen + reqPadding + 1), sizeof (char)); +++ memcpy(data, srcData->av_val, srcDataLen); +++ longDataBlocks = (srcDataLen + reqPadding) / 4; +++ uint32_t *longDataBuf = malloc(longDataBlocks * sizeof (uint32_t)); +++ for (i = 0; i < longDataBlocks; i++) +++ { +++ longDataBuf[i] = 0; +++ longDataBuf[i] |= (data[i * 4 + 0]) | (data[i * 4 + 1] << 8) | (data[i * 4 + 2] << 16) | (data[i * 4 + 3] << 24); +++ } +++ +++ // Encrypt data +++ __TeaCrypt(longDataBuf, longDataBlocks, longKeyBuf); +++ +++ // Convert data back to char array +++ for (i = 0; i < longDataBlocks; i++) +++ { +++ data[i * 4 + 0] = longDataBuf[i] & 0xFF; +++ data[i * 4 + 1] = (longDataBuf[i] >> 8) & 0xFF; +++ data[i * 4 + 2] = (longDataBuf[i] >> 16) & 0xFF; +++ data[i * 4 + 3] = (longDataBuf[i] >> 24) & 0xFF; +++ } +++ +++ // Convert to hex string +++ AVal hexData; +++ hexData.av_val = calloc((longDataBlocks * 4 * 2) + 1, sizeof (char)); +++ for (i = 0; i < (longDataBlocks * 4); i++) +++ sprintf(&hexData.av_val[i * 2], "%.2X", data[i]); +++ hexData.av_len = strlen(hexData.av_val); +++ +++ // Free allocated resources +++ free(key); +++ free(longKeyBuf); +++ free(data); +++ free(longDataBuf); +++ +++ return hexData; +++} ++diff --git librtmp/rtmp.h librtmp/rtmp.h ++index 0248913..3e573da 100644 ++--- librtmp/rtmp.h +++++ librtmp/rtmp.h ++@@ -150,12 +150,15 @@ extern "C" ++ AVal playpath; /* passed in explicitly */ ++ AVal tcUrl; ++ AVal swfUrl; +++ AVal swfHash; ++ AVal pageUrl; ++ AVal app; ++ AVal auth; ++ AVal flashVer; ++ AVal subscribepath; +++ AVal ccomm; ++ AVal usherToken; +++ AVal WeebToken; ++ AVal token; ++ AVal pubUser; ++ AVal pubPasswd; ++@@ -175,9 +178,18 @@ extern "C" ++ int lFlags; ++ ++ int swfAge; +++ int swfSize; ++ ++ int protocol; +++ int ConnectPacket; +++ int CombineConnectPacket; +++ int redirected; ++ int timeout; /* connection timeout in seconds */ +++ int dynamicPublish; +++ AVal dynamicCommand; +++ AVal Extras; +++ AVal HandshakeResponse; +++ double publishId; ++ ++ int pFlags; /* unused, but kept to avoid breaking ABI */ ++ ++@@ -220,6 +232,7 @@ extern "C" ++ /* if bResume == TRUE */ ++ uint8_t initialFrameType; ++ uint32_t nResumeTS; +++ uint32_t nResumeDriftTS; ++ char *metaHeader; ++ char *initialFrame; ++ uint32_t nMetaHeaderSize; ++@@ -306,6 +319,8 @@ extern "C" ++ AVal *flashVer, ++ AVal *subscribepath, ++ AVal *usherToken, +++ AVal *WeebToken, +++ AVal *ccomm, ++ int dStart, ++ int dStop, int bLiveStream, long int timeout); ++ ++@@ -371,6 +386,11 @@ extern "C" ++ int RTMP_HashSWF(const char *url, unsigned int *size, unsigned char *hash, ++ int age); ++ +++ AVal AVcopy(AVal src); +++ AVal StripParams(AVal *src); +++ char *strreplace(char *srcstr, int srclen, char *orig, char *repl, int didAlloc); +++ int strsplit(char *src, int srclen, char delim, char ***params); +++ ++ #ifdef __cplusplus ++ }; ++ #endif ++diff --git librtmp/rtmp_sys.h librtmp/rtmp_sys.h ++index 85d7e53..b2a3438 100644 ++--- librtmp/rtmp_sys.h +++++ librtmp/rtmp_sys.h ++@@ -65,6 +65,7 @@ ++ #include <polarssl/net.h> ++ #include <polarssl/ssl.h> ++ #include <polarssl/havege.h> +++#include <polarssl/md5.h> ++ #if POLARSSL_VERSION_NUMBER < 0x01010000 ++ #define havege_random havege_rand ++ #endif ++@@ -105,6 +106,7 @@ typedef struct tls_server_ctx { ++ #define TLS_write(s,b,l) ssl_write(s,(unsigned char *)b,l) ++ #define TLS_shutdown(s) ssl_close_notify(s) ++ #define TLS_close(s) ssl_free(s); free(s) +++#define md5_hash(i, ilen, o) md5(i, ilen, o) ++ ++ #elif defined(USE_GNUTLS) ++ #include <gnutls/gnutls.h> ++@@ -122,6 +124,8 @@ typedef struct tls_ctx { ++ #define TLS_write(s,b,l) gnutls_record_send(s,b,l) ++ #define TLS_shutdown(s) gnutls_bye(s, GNUTLS_SHUT_RDWR) ++ #define TLS_close(s) gnutls_deinit(s) +++#define md5_hash(i, ilen, o) gnutls_digest_algorithm_t algorithm = GNUTLS_DIG_MD5;\ +++ gnutls_hash_fast(algorithm, i, ilen, o); ++ ++ #else /* USE_OPENSSL */ ++ #define TLS_CTX SSL_CTX * ++@@ -134,6 +138,7 @@ typedef struct tls_ctx { ++ #define TLS_write(s,b,l) SSL_write(s,b,l) ++ #define TLS_shutdown(s) SSL_shutdown(s) ++ #define TLS_close(s) SSL_free(s) +++#define md5_hash(i, ilen, o) MD5(i, ilen, o) ++ ++ #endif ++ #endif ++diff --git rtmpdump.c rtmpdump.c ++index 13741a7..b3ae33f 100644 ++--- rtmpdump.c +++++ rtmpdump.c ++@@ -36,6 +36,9 @@ ++ #ifdef WIN32 ++ #define fseeko fseeko64 ++ #define ftello ftello64 +++#ifdef __MINGW32__ +++#define off_t off64_t +++#endif ++ #include <io.h> ++ #include <fcntl.h> ++ #define SET_BINMODE(f) setmode(fileno(f), O_BINARY) ++@@ -67,7 +70,7 @@ InitSockets() ++ #endif ++ } ++ ++-inline void +++static inline void ++ CleanupSockets() ++ { ++ #ifdef WIN32 ++@@ -148,9 +151,9 @@ OpenResumeFile(const char *flvFile, // file name [in] ++ if (!*file) ++ return RD_SUCCESS; // RD_SUCCESS, because we go to fresh file mode instead of quiting ++ ++- fseek(*file, 0, SEEK_END); +++ fseeko(*file, 0, SEEK_END); ++ *size = ftello(*file); ++- fseek(*file, 0, SEEK_SET); +++ fseeko(*file, 0, SEEK_SET); ++ ++ if (*size > 0) ++ { ++@@ -178,7 +181,7 @@ OpenResumeFile(const char *flvFile, // file name [in] ++ } ++ ++ uint32_t dataOffset = AMF_DecodeInt32(hbuf + 5); ++- fseek(*file, dataOffset, SEEK_SET); +++ fseeko(*file, dataOffset, SEEK_SET); ++ ++ if (fread(hbuf, 1, 4, *file) != 4) ++ { ++@@ -283,18 +286,24 @@ GetLastKeyframe(FILE * file, // output file [in] ++ uint8_t dataType; ++ int bAudioOnly; ++ off_t size; +++ char *syncbuf, *p; ++ ++- fseek(file, 0, SEEK_END); +++ fseeko(file, 0, SEEK_END); ++ size = ftello(file); +++ if (size <= 0) +++ { +++ dSeek = 0; +++ return RD_SUCCESS; +++ } ++ ++- fseek(file, 4, SEEK_SET); +++ fseeko(file, 4, SEEK_SET); ++ if (fread(&dataType, sizeof(uint8_t), 1, file) != 1) ++ return RD_FAILED; ++ ++ bAudioOnly = (dataType & 0x4) && !(dataType & 0x1); ++ ++- RTMP_Log(RTMP_LOGDEBUG, "bAudioOnly: %d, size: %llu", bAudioOnly, ++- (unsigned long long) size); +++ RTMP_Log(RTMP_LOGDEBUG, "bAudioOnly: %d, size: %lu", bAudioOnly, +++ (unsigned long) size); ++ ++ // ok, we have to get the timestamp of the last keyframe (only keyframes are seekable) / last audio frame (audio only streams) ++ ++@@ -326,6 +335,51 @@ GetLastKeyframe(FILE * file, // output file [in] ++ prevTagSize = AMF_DecodeInt32(buffer); ++ //RTMP_Log(RTMP_LOGDEBUG, "Last packet: prevTagSize: %d", prevTagSize); ++ +++ if (prevTagSize <= 0 || prevTagSize > size - 4 - 13) +++ { +++ /* Last packet was not fully received - try to sync to last tag */ +++ prevTagSize = 0; +++ tsize = size > 0x100000 ? 0x100000 : size; /* 1MB should be enough for 3500K bitrates */ +++ if (tsize > 13 + 15) +++ { +++ tsize -= 13; // do not read header +++ syncbuf = (char *) malloc(tsize); +++ if (syncbuf) +++ { +++ fseeko(file, size - tsize, SEEK_SET); +++ if (fread(syncbuf, 1, tsize, file) == tsize) +++ { +++ p = syncbuf + tsize; +++ while (p >= syncbuf + 15) +++ { +++ /* Check for StreamID */ +++ if (AMF_DecodeInt24(p - 7) == 0) +++ { +++ /* Check for Audio/Video/Script */ +++ dataType = p[-15] & 0x1F; +++ if (dataType == 8 || dataType == 9 || dataType == 18) +++ { +++ prevTagSize = AMF_DecodeInt24(p - 14); +++ if ((prevTagSize < tsize) && (p + prevTagSize + 11 <= syncbuf + tsize - 4) +++ && (AMF_DecodeInt32(p - 4 + prevTagSize) == prevTagSize + 11)) +++ { +++ prevTagSize = syncbuf + tsize - p + 15; +++ RTMP_Log(RTMP_LOGDEBUG, "Sync success - found last tag at 0x%x", (uint32_t) (size - prevTagSize)); +++ prevTagSize -= 4; +++ tsize = 0; +++ break; +++ } +++ else +++ prevTagSize = 0; +++ } +++ } +++ --p; +++ } +++ } +++ free(syncbuf); +++ } +++ } +++ } ++ if (prevTagSize == 0) ++ { ++ RTMP_Log(RTMP_LOGERROR, "Couldn't find keyframe to resume from!"); ++@@ -703,8 +757,12 @@ void usage(char *prog) ++ RTMP_LogPrintf ++ ("--token|-T key Key for SecureToken response\n"); ++ RTMP_LogPrintf +++ ("--ccommand|-K key Send custom command before play\n"); +++ RTMP_LogPrintf ++ ("--jtv|-j JSON Authentication token for Justin.tv legacy servers\n"); ++ RTMP_LogPrintf +++ ("--weeb|-J string Authentication token for weeb.tv servers\n"); +++ RTMP_LogPrintf ++ ("--hashes|-# Display progress with hashes, not with the byte counter\n"); ++ RTMP_LogPrintf ++ ("--buffer|-b Buffer time in milliseconds (default: %u)\n", ++@@ -751,7 +809,9 @@ main(int argc, char **argv) ++ AVal hostname = { 0, 0 }; ++ AVal playpath = { 0, 0 }; ++ AVal subscribepath = { 0, 0 }; ++- AVal usherToken = { 0, 0 }; //Justin.tv auth token +++ AVal usherToken = { 0, 0 }; // Justin.tv auth token +++ AVal WeebToken = { 0, 0 }; // Weeb.tv auth token +++ AVal ccomm = { 0, 0 }; ++ int port = -1; ++ int protocol = RTMP_PROTOCOL_UNDEFINED; ++ int retries = 0; ++@@ -853,17 +913,19 @@ main(int argc, char **argv) ++ {"start", 1, NULL, 'A'}, ++ {"stop", 1, NULL, 'B'}, ++ {"token", 1, NULL, 'T'}, +++ {"ccommand", 1, NULL, 'K'}, ++ {"hashes", 0, NULL, '#'}, ++ {"debug", 0, NULL, 'z'}, ++ {"quiet", 0, NULL, 'q'}, ++ {"verbose", 0, NULL, 'V'}, ++ {"jtv", 1, NULL, 'j'}, +++ {"weeb", 1, NULL, 'J'}, ++ {0, 0, 0, 0} ++ }; ++ ++ while ((opt = ++ getopt_long(argc, argv, ++- "hVveqzRr:s:t:i:p:a:b:f:o:u:C:n:c:l:y:Ym:k:d:A:B:T:w:x:W:X:S:#j:", +++ "hVveqzRr:s:t:i:p:a:b:f:o:u:C:n:c:l:y:Ym:k:d:A:B:T:K:w:x:W:X:S:#j:J:", ++ longopts, NULL)) != -1) ++ { ++ switch (opt) ++@@ -995,7 +1057,7 @@ main(int argc, char **argv) ++ port = parsedPort; ++ if (playpath.av_len == 0 && parsedPlaypath.av_len) ++ { ++- playpath = parsedPlaypath; +++ playpath = AVcopy(parsedPlaypath); ++ } ++ if (protocol == RTMP_PROTOCOL_UNDEFINED) ++ protocol = parsedProtocol; ++@@ -1061,6 +1123,9 @@ main(int argc, char **argv) ++ RTMP_SetOpt(&rtmp, &av_token, &token); ++ } ++ break; +++ case 'K': +++ STR2AVAL(ccomm, optarg); +++ break; ++ case '#': ++ bHashes = TRUE; ++ break; ++@@ -1079,6 +1144,9 @@ main(int argc, char **argv) ++ case 'j': ++ STR2AVAL(usherToken, optarg); ++ break; +++ case 'J': +++ STR2AVAL(WeebToken, optarg); +++ break; ++ default: ++ RTMP_LogPrintf("unknown option: %c\n", opt); ++ usage(argv[0]); ++@@ -1170,14 +1238,14 @@ main(int argc, char **argv) ++ ++ if (tcUrl.av_len == 0) ++ { ++- tcUrl.av_len = strlen(RTMPProtocolStringsLower[protocol]) + ++- hostname.av_len + app.av_len + sizeof("://:65535/"); +++ tcUrl.av_len = strlen(RTMPProtocolStringsLower[protocol]) + +++ hostname.av_len + app.av_len + sizeof ("://:65535/"); ++ tcUrl.av_val = (char *) malloc(tcUrl.av_len); ++- if (!tcUrl.av_val) ++- return RD_FAILED; +++ if (!tcUrl.av_val) +++ return RD_FAILED; ++ tcUrl.av_len = snprintf(tcUrl.av_val, tcUrl.av_len, "%s://%.*s:%d/%.*s", ++- RTMPProtocolStringsLower[protocol], hostname.av_len, ++- hostname.av_val, port, app.av_len, app.av_val); +++ RTMPProtocolStringsLower[protocol], hostname.av_len, +++ hostname.av_val, port, app.av_len, app.av_val); ++ } ++ ++ int first = 1; ++@@ -1197,8 +1265,9 @@ main(int argc, char **argv) ++ if (!fullUrl.av_len) ++ { ++ RTMP_SetupStream(&rtmp, protocol, &hostname, port, &sockshost, &playpath, ++- &tcUrl, &swfUrl, &pageUrl, &app, &auth, &swfHash, swfSize, ++- &flashVer, &subscribepath, &usherToken, dSeek, dStopOffset, bLiveStream, timeout); +++ &tcUrl, &swfUrl, &pageUrl, &app, &auth, &swfHash, swfSize, +++ &flashVer, &subscribepath, &usherToken, &WeebToken, &ccomm, +++ dSeek, dStopOffset, bLiveStream, timeout); ++ } ++ else ++ { ++diff --git rtmpgw.c rtmpgw.c ++index 3e47602..e56b855 100644 ++--- rtmpgw.c +++++ rtmpgw.c ++@@ -96,7 +96,9 @@ typedef struct ++ AVal flashVer; ++ AVal token; ++ AVal subscribepath; ++- AVal usherToken; //Justin.tv auth token +++ AVal ccomm; +++ AVal usherToken; // Justin.tv auth token +++ AVal WeebToken; // Weeb.tv auth token ++ AVal sockshost; ++ AMFObject extras; ++ int edepth; ++@@ -556,8 +558,8 @@ void processTCPrequest(STREAMING_SERVER * server, // server socket and state (ou ++ if (!req.fullUrl.av_len) ++ { ++ RTMP_SetupStream(&rtmp, req.protocol, &req.hostname, req.rtmpport, &req.sockshost, ++- &req.playpath, &req.tcUrl, &req.swfUrl, &req.pageUrl, &req.app, &req.auth, &req.swfHash, req.swfSize, &req.flashVer, &req.subscribepath, &req.usherToken, dSeek, req.dStopOffset, ++- req.bLiveStream, req.timeout); +++ &req.playpath, &req.tcUrl, &req.swfUrl, &req.pageUrl, &req.app, &req.auth, &req.swfHash, req.swfSize, &req.flashVer, &req.subscribepath, +++ &req.usherToken, &req.WeebToken, &req.ccomm, dSeek, req.dStopOffset, req.bLiveStream, req.timeout); ++ } ++ else ++ { ++@@ -972,6 +974,12 @@ ParseOption(char opt, char *arg, RTMP_REQUEST * req) ++ case 'j': ++ STR2AVAL(req->usherToken, arg); ++ break; +++ case 'J': +++ STR2AVAL(req->WeebToken, arg); +++ break; +++ case 'K': +++ STR2AVAL(req->ccomm, arg); +++ break; ++ default: ++ RTMP_LogPrintf("unknown option: %c, arg: %s\n", opt, arg); ++ return FALSE; ++@@ -1044,6 +1052,8 @@ main(int argc, char **argv) ++ {"quiet", 0, NULL, 'q'}, ++ {"verbose", 0, NULL, 'V'}, ++ {"jtv", 1, NULL, 'j'}, +++ {"weeb", 1, NULL, 'J'}, +++ {"ccommand", 1, NULL, 'K'}, ++ {0, 0, 0, 0} ++ }; ++ ++@@ -1056,7 +1066,7 @@ main(int argc, char **argv) ++ ++ while ((opt = ++ getopt_long(argc, argv, ++- "hvqVzr:s:t:i:p:a:f:u:n:c:l:y:m:d:D:A:B:T:g:w:x:W:X:S:j:", longopts, +++ "hvqVzr:s:t:i:p:a:f:u:n:c:l:y:m:d:D:A:B:T:g:w:x:W:X:S:j:J:", longopts, ++ NULL)) != -1) ++ { ++ switch (opt) ++@@ -1119,8 +1129,12 @@ main(int argc, char **argv) ++ RTMP_LogPrintf ++ ("--token|-T key Key for SecureToken response\n"); ++ RTMP_LogPrintf +++ ("--ccommand|-K key Send custom command before play\n"); +++ RTMP_LogPrintf ++ ("--jtv|-j JSON Authentication token for Justin.tv legacy servers\n"); ++ RTMP_LogPrintf +++ ("--weeb|-J string Authentication token for weeb.tv servers\n"); +++ RTMP_LogPrintf ++ ("--buffer|-b Buffer time in milliseconds (default: %u)\n\n", ++ defaultRTMPRequest.bufferTime); ++ ++diff --git rtmpsrv.c rtmpsrv.c ++index 5df4d3a..eccaa9c 100644 ++--- rtmpsrv.c +++++ rtmpsrv.c ++@@ -25,9 +25,13 @@ ++ */ ++ ++ #include <stdlib.h> +++#ifdef __MINGW_H +++#include <unistd.h> +++#endif ++ #include <string.h> ++ #include <math.h> ++ #include <limits.h> +++#include <time.h> ++ ++ #include <signal.h> ++ #include <getopt.h> ++@@ -94,12 +98,19 @@ typedef struct ++ STREAMING_SERVER *rtmpServer = 0; // server structure pointer ++ void *sslCtx = NULL; ++ +++int file_exists(const char *fname); ++ STREAMING_SERVER *startStreaming(const char *address, int port); ++ void stopStreaming(STREAMING_SERVER * server); ++ void AVreplace(AVal *src, const AVal *orig, const AVal *repl); ++ ++ static const AVal av_dquote = AVC("\""); ++ static const AVal av_escdquote = AVC("\\\""); +++#ifdef WIN32 +++static const AVal av_caret = AVC("^"); +++static const AVal av_esccaret = AVC("^^"); +++static const AVal av_pipe = AVC("|"); +++static const AVal av_escpipe = AVC("^|"); +++#endif ++ ++ typedef struct ++ { ++@@ -168,6 +179,12 @@ SAVC(level); ++ SAVC(code); ++ SAVC(description); ++ SAVC(secureToken); +++SAVC(_checkbw); +++SAVC(_onbwdone); +++SAVC(checkBandwidth); +++SAVC(onBWDone); +++SAVC(FCSubscribe); +++SAVC(onFCSubscribe); ++ ++ static int ++ SendConnectResult(RTMP *r, double txn) ++@@ -191,7 +208,7 @@ SendConnectResult(RTMP *r, double txn) ++ enc = AMF_EncodeNumber(enc, pend, txn); ++ *enc++ = AMF_OBJECT; ++ ++- STR2AVAL(av, "FMS/3,5,1,525"); +++ STR2AVAL(av, "FMS/3,5,7,7009"); ++ enc = AMF_EncodeNamedString(enc, pend, &av_fmsVer, &av); ++ enc = AMF_EncodeNamedNumber(enc, pend, &av_capabilities, 31.0); ++ enc = AMF_EncodeNamedNumber(enc, pend, &av_mode, 1.0); ++@@ -213,7 +230,7 @@ SendConnectResult(RTMP *r, double txn) ++ enc = AMF_EncodeNamedString(enc, pend, &av_secureToken, &av); ++ #endif ++ STR2AVAL(p.p_name, "version"); ++- STR2AVAL(p.p_vu.p_aval, "3,5,1,525"); +++ STR2AVAL(p.p_vu.p_aval, "3,5,7,7009"); ++ p.p_type = AMF_STRING; ++ obj.o_num = 1; ++ obj.o_props = &p; ++@@ -234,7 +251,7 @@ static int ++ SendResultNumber(RTMP *r, double txn, double ID) ++ { ++ RTMPPacket packet; ++- char pbuf[256], *pend = pbuf+sizeof(pbuf); +++ char pbuf[1024], *pend = pbuf + sizeof (pbuf); ++ ++ packet.m_nChannel = 0x03; // control channel (invoke) ++ packet.m_headerType = 1; /* RTMP_PACKET_SIZE_MEDIUM; */ ++@@ -264,12 +281,13 @@ static const AVal av_Stopped_playing = AVC("Stopped playing"); ++ SAVC(details); ++ SAVC(clientid); ++ static const AVal av_NetStream_Authenticate_UsherToken = AVC("NetStream.Authenticate.UsherToken"); +++static const AVal av_FCSubscribe_message = AVC("FCSubscribe to stream"); ++ ++ static int ++ SendPlayStart(RTMP *r) ++ { ++ RTMPPacket packet; ++- char pbuf[512], *pend = pbuf+sizeof(pbuf); +++ char pbuf[1024], *pend = pbuf + sizeof (pbuf); ++ ++ packet.m_nChannel = 0x03; // control channel (invoke) ++ packet.m_headerType = 1; /* RTMP_PACKET_SIZE_MEDIUM; */ ++@@ -301,7 +319,7 @@ static int ++ SendPlayStop(RTMP *r) ++ { ++ RTMPPacket packet; ++- char pbuf[512], *pend = pbuf+sizeof(pbuf); +++ char pbuf[1024], *pend = pbuf + sizeof (pbuf); ++ ++ packet.m_nChannel = 0x03; // control channel (invoke) ++ packet.m_headerType = 1; /* RTMP_PACKET_SIZE_MEDIUM; */ ++@@ -329,6 +347,83 @@ SendPlayStop(RTMP *r) ++ return RTMP_SendPacket(r, &packet, FALSE); ++ } ++ +++static int +++SendCheckBWResponse(RTMP *r, int oldMethodType, int onBWDoneInit) +++{ +++ RTMPPacket packet; +++ char pbuf[1024], *pend = pbuf + sizeof (pbuf); +++ char *enc; +++ +++ packet.m_nChannel = 0x03; /* control channel (invoke) */ +++ packet.m_headerType = RTMP_PACKET_SIZE_MEDIUM; +++ packet.m_packetType = RTMP_PACKET_TYPE_INVOKE; +++ packet.m_nTimeStamp = 0; +++ packet.m_nInfoField2 = 0; +++ packet.m_hasAbsTimestamp = 0; +++ packet.m_body = pbuf + RTMP_MAX_HEADER_SIZE; +++ +++ enc = packet.m_body; +++ if (oldMethodType) +++ { +++ enc = AMF_EncodeString(enc, pend, &av__onbwdone); +++ enc = AMF_EncodeNumber(enc, pend, 0); +++ *enc++ = AMF_NULL; +++ enc = AMF_EncodeNumber(enc, pend, 10240); +++ enc = AMF_EncodeNumber(enc, pend, 0); +++ } +++ else +++ { +++ enc = AMF_EncodeString(enc, pend, &av_onBWDone); +++ enc = AMF_EncodeNumber(enc, pend, 0); +++ *enc++ = AMF_NULL; +++ if (!onBWDoneInit) +++ { +++ enc = AMF_EncodeNumber(enc, pend, 10240); +++ enc = AMF_EncodeNumber(enc, pend, 0); +++ enc = AMF_EncodeNumber(enc, pend, 0); +++ enc = AMF_EncodeNumber(enc, pend, 20); +++ } +++ } +++ +++ packet.m_nBodySize = enc - packet.m_body; +++ +++ return RTMP_SendPacket(r, &packet, FALSE); +++} +++ +++static int +++SendOnFCSubscribe(RTMP *r) +++{ +++ RTMPPacket packet; +++ char pbuf[1024], *pend = pbuf + sizeof (pbuf); +++ char *enc; +++ +++ packet.m_nChannel = 0x03; /* control channel (invoke) */ +++ packet.m_headerType = RTMP_PACKET_SIZE_MEDIUM; +++ packet.m_packetType = RTMP_PACKET_TYPE_INVOKE; +++ packet.m_nTimeStamp = 0; +++ packet.m_nInfoField2 = 0; +++ packet.m_hasAbsTimestamp = 0; +++ packet.m_body = pbuf + RTMP_MAX_HEADER_SIZE; +++ +++ enc = packet.m_body; +++ enc = AMF_EncodeString(enc, pend, &av_onFCSubscribe); +++ enc = AMF_EncodeNumber(enc, pend, 0); +++ *enc++ = AMF_NULL; +++ +++ *enc++ = AMF_OBJECT; +++ enc = AMF_EncodeNamedString(enc, pend, &av_level, &av_status); +++ enc = AMF_EncodeNamedString(enc, pend, &av_code, &av_NetStream_Play_Start); +++ enc = AMF_EncodeNamedString(enc, pend, &av_description, &av_FCSubscribe_message); +++ enc = AMF_EncodeNamedNumber(enc, pend, &av_clientid, 0); +++ *enc++ = 0; +++ *enc++ = 0; +++ *enc++ = AMF_OBJECT_END; +++ +++ packet.m_nBodySize = enc - packet.m_body; +++ +++ return RTMP_SendPacket(r, &packet, FALSE); +++} +++ ++ static void ++ spawn_dumper(int argc, AVal *av, char *cmd) ++ { ++@@ -389,6 +484,8 @@ countAMF(AMFObject *obj, int *argc) ++ len += 40; ++ break; ++ case AMF_OBJECT: +++ case AMF_ECMA_ARRAY: +++ case AMF_STRICT_ARRAY: ++ len += 9; ++ len += countAMF(&p->p_vu.p_object, argc); ++ (*argc) += 2; ++@@ -407,9 +504,11 @@ dumpAMF(AMFObject *obj, char *ptr, AVal *argv, int *argc) ++ int i, ac = *argc; ++ const char opt[] = "NBSO Z"; ++ ++- for (i=0; i < obj->o_num; i++) +++ for (i = 0; i < obj->o_num; i++) ++ { ++ AMFObjectProperty *p = &obj->o_props[i]; +++ if ((p->p_type == AMF_ECMA_ARRAY) || (p->p_type == AMF_STRICT_ARRAY)) +++ p->p_type = AMF_OBJECT; ++ argv[ac].av_val = ptr+1; ++ argv[ac++].av_len = 2; ++ ptr += sprintf(ptr, " -C "); ++@@ -569,6 +668,7 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ server->arglen += countAMF(&r->Link.extras, &server->argc); ++ } ++ SendConnectResult(r, txn); +++ SendCheckBWResponse(r, FALSE, TRUE); ++ } ++ else if (AVMATCH(&method, &av_createStream)) ++ { ++@@ -583,10 +683,26 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ AVal usherToken; ++ AMFProp_GetString(AMF_GetProp(&obj, NULL, 3), &usherToken); ++ AVreplace(&usherToken, &av_dquote, &av_escdquote); +++#ifdef WIN32 +++ AVreplace(&usherToken, &av_caret, &av_esccaret); +++ AVreplace(&usherToken, &av_pipe, &av_escpipe); +++#endif ++ server->arglen += 6 + usherToken.av_len; ++ server->argc += 2; ++ r->Link.usherToken = usherToken; ++ } +++ else if (AVMATCH(&method, &av__checkbw)) +++ { +++ SendCheckBWResponse(r, TRUE, FALSE); +++ } +++ else if (AVMATCH(&method, &av_checkBandwidth)) +++ { +++ SendCheckBWResponse(r, FALSE, FALSE); +++ } +++ else if (AVMATCH(&method, &av_FCSubscribe)) +++ { +++ SendOnFCSubscribe(r); +++ } ++ else if (AVMATCH(&method, &av_play)) ++ { ++ char *file, *p, *q, *cmd, *ptr; ++@@ -602,6 +718,17 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ if (obj.o_num > 5) ++ r->Link.length = AMFProp_GetNumber(AMF_GetProp(&obj, NULL, 5)); ++ */ +++ double StartFlag = 0; +++ AMFObjectProperty *Start = AMF_GetProp(&obj, NULL, 4); +++ if (!(Start->p_type == AMF_INVALID)) +++ StartFlag = AMFProp_GetNumber(Start); +++ r->Link.app = AVcopy(r->Link.app); +++ if (StartFlag == -1000 || (r->Link.app.av_val && strstr(r->Link.app.av_val, "live"))) +++ { +++ StartFlag = -1000; +++ server->arglen += 7; +++ server->argc += 1; +++ } ++ if (r->Link.tcUrl.av_len) ++ { ++ len = server->arglen + r->Link.playpath.av_len + 4 + ++@@ -619,6 +746,7 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ argv[argc].av_val = ptr + 1; ++ argv[argc++].av_len = 2; ++ argv[argc].av_val = ptr + 5; +++ r->Link.tcUrl = StripParams(&r->Link.tcUrl); ++ ptr += sprintf(ptr," -r \"%s\"", r->Link.tcUrl.av_val); ++ argv[argc++].av_len = r->Link.tcUrl.av_len; ++ ++@@ -643,6 +771,7 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ argv[argc].av_val = ptr + 1; ++ argv[argc++].av_len = 2; ++ argv[argc].av_val = ptr + 5; +++ r->Link.swfUrl = StripParams(&r->Link.swfUrl); ++ ptr += sprintf(ptr, " -W \"%s\"", r->Link.swfUrl.av_val); ++ argv[argc++].av_len = r->Link.swfUrl.av_len; ++ } ++@@ -665,10 +794,17 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ r->Link.usherToken.av_val = NULL; ++ r->Link.usherToken.av_len = 0; ++ } ++- if (r->Link.extras.o_num) { ++- ptr = dumpAMF(&r->Link.extras, ptr, argv, &argc); ++- AMF_Reset(&r->Link.extras); ++- } +++ if (StartFlag == -1000) +++ { +++ argv[argc].av_val = ptr + 1; +++ argv[argc++].av_len = 6; +++ ptr += sprintf(ptr, " --live"); +++ } +++ if (r->Link.extras.o_num) +++ { +++ ptr = dumpAMF(&r->Link.extras, ptr, argv, &argc); +++ AMF_Reset(&r->Link.extras); +++ } ++ argv[argc].av_val = ptr + 1; ++ argv[argc++].av_len = 2; ++ argv[argc].av_val = ptr + 5; ++@@ -676,7 +812,13 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ r->Link.playpath.av_len, r->Link.playpath.av_val); ++ argv[argc++].av_len = r->Link.playpath.av_len; ++ ++- av = r->Link.playpath; +++ if (r->Link.playpath.av_len) +++ av = r->Link.playpath; +++ else +++ { +++ av.av_val = "file"; +++ av.av_len = 4; +++ } ++ /* strip trailing URL parameters */ ++ q = memchr(av.av_val, '?', av.av_len); ++ if (q) ++@@ -710,25 +852,82 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ ++ memcpy(file, av.av_val, av.av_len); ++ file[av.av_len] = '\0'; ++- for (p=file; *p; p++) ++- if (*p == ':') ++- *p = '_'; ++ ++- /* Add extension if none present */ ++- if (file[av.av_len - 4] != '.') ++- { ++- av.av_len += 4; ++- } ++- /* Always use flv extension, regardless of original */ ++- if (strcmp(file+av.av_len-4, ".flv")) ++- { ++- strcpy(file+av.av_len-4, ".flv"); ++- } +++ if (strlen(file) < 128) +++ { +++ /* Add extension if none present */ +++ if (file[av.av_len - 4] != '.') +++ { +++ av.av_len += 4; +++ } +++ +++ /* Always use flv extension, regardless of original */ +++ if (strcmp(file + av.av_len - 4, ".flv")) +++ { +++ strcpy(file + av.av_len - 4, ".flv"); +++ } +++ +++ /* Remove invalid characters from filename */ +++ file = strreplace(file, 0, ":", "_", TRUE); +++ file = strreplace(file, 0, "&", "_", TRUE); +++ file = strreplace(file, 0, "^", "_", TRUE); +++ file = strreplace(file, 0, "|", "_", TRUE); +++ } +++ else +++ { +++ /* Filename too long - generate unique name */ +++ strcpy(file, "vXXXXXX"); +++ mkstemp(file); +++ strcat(file, ".flv"); +++ } +++ +++ /* Add timestamp to the filename */ +++ char *filename, *pfilename, timestamp[21]; +++ int filename_len, timestamp_len; +++ time_t current_time; +++ +++ time(¤t_time); +++ timestamp_len = strftime(×tamp[0], sizeof (timestamp), "%Y-%m-%d_%I-%M-%S_", localtime(¤t_time)); +++ timestamp[timestamp_len] = '\0'; +++ filename_len = strlen(file); +++ filename = malloc(timestamp_len + filename_len + 1); +++ pfilename = filename; +++ memcpy(pfilename, timestamp, timestamp_len); +++ pfilename += timestamp_len; +++ memcpy(pfilename, file, filename_len); +++ pfilename += filename_len; +++ *pfilename++ = '\0'; +++ file = filename; +++ ++ argv[argc].av_val = ptr + 1; ++ argv[argc++].av_len = 2; ++ argv[argc].av_val = file; ++ argv[argc].av_len = av.av_len; ++- ptr += sprintf(ptr, " -o %s", file); +++#ifdef VLC +++ char *vlc; +++ int didAlloc = FALSE; +++ +++ if (getenv("VLC")) +++ vlc = getenv("VLC"); +++ else if (getenv("ProgramFiles")) +++ { +++ vlc = malloc(512 * sizeof (char)); +++ didAlloc = TRUE; +++ char *ProgramFiles = getenv("ProgramFiles"); +++ sprintf(vlc, "\"%s%s", ProgramFiles, " (x86)\\VideoLAN\\VLC\\vlc.exe"); +++ if (!file_exists(vlc + 1)) +++ sprintf(vlc + 1, "%s%s", ProgramFiles, "\\VideoLAN\\VLC\\vlc.exe"); +++ strcpy(vlc + strlen(vlc), "\" -"); +++ } +++ else +++ vlc = "vlc -"; +++ +++ ptr += sprintf(ptr, " | %s", vlc); +++ if (didAlloc) +++ free(vlc); +++#else +++ ptr += sprintf(ptr, " -o \"%s\"", file); +++#endif ++ now = RTMP_GetTime(); ++ if (now - server->filetime < DUPTIME && AVMATCH(&argv[argc], &server->filename)) ++ { ++@@ -742,7 +941,21 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ server->filetime = now; ++ free(server->filename.av_val); ++ server->filename = argv[argc++]; ++- spawn_dumper(argc, argv, cmd); +++#ifdef VLC +++ FILE *vlc_cmdfile = fopen("VLC.bat", "w"); +++ char *vlc_batchcmd = strreplace(cmd, 0, "%", "%%", FALSE); +++ fprintf(vlc_cmdfile, "%s\n", vlc_batchcmd); +++ fclose(vlc_cmdfile); +++ free(vlc_batchcmd); +++ spawn_dumper(argc, argv, "VLC.bat"); +++#else +++ spawn_dumper(argc, argv, cmd); +++#endif +++ +++ /* Save command to text file */ +++ FILE *cmdfile = fopen("Command.txt", "a"); +++ fprintf(cmdfile, "%s\n", cmd); +++ fclose(cmdfile); ++ } ++ ++ free(cmd); ++@@ -861,12 +1074,18 @@ controlServerThread(void *unused) ++ { ++ case 'q': ++ RTMP_LogPrintf("Exiting\n"); ++- stopStreaming(rtmpServer); ++- exit(0); +++ if (rtmpServer) +++ stopStreaming(rtmpServer); ++ break; ++ default: ++ RTMP_LogPrintf("Unknown command \'%c\', ignoring\n", ich); ++ } +++ sleep(1); +++ if (rtmpServer && (rtmpServer->state == STREAMING_STOPPED)) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "Exiting text UI thread"); +++ break; +++ } ++ } ++ TFRET(); ++ } ++@@ -1054,7 +1273,6 @@ stopStreaming(STREAMING_SERVER * server) ++ } ++ } ++ ++- ++ void ++ sigIntHandler(int sig) ++ { ++@@ -1191,3 +1409,15 @@ AVreplace(AVal *src, const AVal *orig, const AVal *repl) ++ src->av_val = dest; ++ src->av_len = dptr - dest; ++ } +++ +++int +++file_exists(const char *fname) +++{ +++ FILE *file; +++ if ((file = fopen(fname, "r"))) +++ { +++ fclose(file); +++ return TRUE; +++ } +++ return FALSE; +++} ++diff --git rtmpsuck.c rtmpsuck.c ++index e886179..0abdba4 100644 ++--- rtmpsuck.c +++++ rtmpsuck.c ++@@ -25,10 +25,13 @@ ++ */ ++ ++ #include <stdlib.h> +++#ifdef __MINGW_H +++#include <unistd.h> +++#endif ++ #include <string.h> ++ #include <math.h> ++ #include <limits.h> ++- +++#include <time.h> ++ #include <signal.h> ++ #include <getopt.h> ++ ++@@ -141,18 +144,21 @@ SAVC(code); ++ SAVC(secureToken); ++ SAVC(onStatus); ++ SAVC(close); +++SAVC(play2); ++ static const AVal av_NetStream_Failed = AVC("NetStream.Failed"); ++ static const AVal av_NetStream_Play_Failed = AVC("NetStream.Play.Failed"); ++-static const AVal av_NetStream_Play_StreamNotFound = ++-AVC("NetStream.Play.StreamNotFound"); ++-static const AVal av_NetConnection_Connect_InvalidApp = ++-AVC("NetConnection.Connect.InvalidApp"); +++static const AVal av_NetStream_Play_StreamNotFound = AVC("NetStream.Play.StreamNotFound"); +++static const AVal av_NetConnection_Connect_InvalidApp = AVC("NetConnection.Connect.InvalidApp"); +++static const AVal av_NetConnection_Connect_Rejected = AVC("NetConnection.Connect.Rejected"); ++ static const AVal av_NetStream_Play_Start = AVC("NetStream.Play.Start"); ++ static const AVal av_NetStream_Play_Complete = AVC("NetStream.Play.Complete"); ++ static const AVal av_NetStream_Play_Stop = AVC("NetStream.Play.Stop"); +++static const AVal av_NetStream_Authenticate_UsherToken = AVC("NetStream.Authenticate.UsherToken"); ++ ++ static const char *cst[] = { "client", "server" }; ++ +++char *dumpAMF(AMFObject *obj, char *ptr); +++ ++ // Returns 0 for OK/Failed/error, 1 for 'Stop or Complete' ++ int ++ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *body) ++@@ -198,26 +204,28 @@ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *b ++ if (cobj.o_props[i].p_type == AMF_STRING) ++ { ++ pval = cobj.o_props[i].p_vu.p_aval; ++- RTMP_LogPrintf("%.*s: %.*s\n", pname.av_len, pname.av_val, pval.av_len, pval.av_val); +++ RTMP_LogPrintf("%10.*s : %.*s\n", pname.av_len, pname.av_val, pval.av_len, pval.av_val); ++ } ++ if (AVMATCH(&pname, &av_app)) ++ { ++- server->rc.Link.app = pval; +++ server->rc.Link.app = AVcopy(pval); ++ pval.av_val = NULL; ++ } ++ else if (AVMATCH(&pname, &av_flashVer)) ++ { ++- server->rc.Link.flashVer = pval; +++ server->rc.Link.flashVer = AVcopy(pval); ++ pval.av_val = NULL; ++ } ++ else if (AVMATCH(&pname, &av_swfUrl)) ++ { ++ #ifdef CRYPTO ++ if (pval.av_val) ++- RTMP_HashSWF(pval.av_val, &server->rc.Link.SWFSize, ++- (unsigned char *)server->rc.Link.SWFHash, 30); +++ { +++ AVal swfUrl = StripParams(&pval); +++ RTMP_HashSWF(swfUrl.av_val, &server->rc.Link.SWFSize, (unsigned char *) server->rc.Link.SWFHash, 30); +++ } ++ #endif ++- server->rc.Link.swfUrl = pval; +++ server->rc.Link.swfUrl = AVcopy(pval); ++ pval.av_val = NULL; ++ } ++ else if (AVMATCH(&pname, &av_tcUrl)) ++@@ -225,7 +233,7 @@ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *b ++ char *r1 = NULL, *r2; ++ int len; ++ ++- server->rc.Link.tcUrl = pval; +++ server->rc.Link.tcUrl = AVcopy(pval); ++ if ((pval.av_val[0] | 0x40) == 'r' && ++ (pval.av_val[1] | 0x40) == 't' && ++ (pval.av_val[2] | 0x40) == 'm' && ++@@ -267,7 +275,7 @@ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *b ++ } ++ else if (AVMATCH(&pname, &av_pageUrl)) ++ { ++- server->rc.Link.pageUrl = pval; +++ server->rc.Link.pageUrl = AVcopy(pval); ++ pval.av_val = NULL; ++ } ++ else if (AVMATCH(&pname, &av_audioCodecs)) ++@@ -287,14 +295,21 @@ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *b ++ if (pval.av_val) ++ free(pval.av_val); ++ } +++ ++ if (obj.o_num > 3) ++ { ++- if (AMFProp_GetBoolean(&obj.o_props[3])) ++- server->rc.Link.lFlags |= RTMP_LF_AUTH; ++- if (obj.o_num > 4) ++- { ++- AMFProp_GetString(&obj.o_props[4], &server->rc.Link.auth); ++- } +++ int i = obj.o_num - 3; +++ server->rc.Link.extras.o_num = i; +++ server->rc.Link.extras.o_props = malloc(i * sizeof (AMFObjectProperty)); +++ memcpy(server->rc.Link.extras.o_props, obj.o_props + 3, i * sizeof (AMFObjectProperty)); +++ obj.o_num = 3; +++ } +++ +++ if (server->rc.Link.extras.o_num) +++ { +++ server->rc.Link.Extras.av_val = calloc(2048, sizeof (char)); +++ dumpAMF(&server->rc.Link.extras, server->rc.Link.Extras.av_val); +++ server->rc.Link.Extras.av_len = strlen(server->rc.Link.Extras.av_val); ++ } ++ ++ if (!RTMP_Connect(&server->rc, pack)) ++@@ -303,6 +318,37 @@ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *b ++ return 1; ++ } ++ server->rc.m_bSendCounter = FALSE; +++ +++ if (server->rc.Link.extras.o_props) +++ { +++ AMF_Reset(&server->rc.Link.extras); +++ } +++ } +++ else if (AVMATCH(&method, &av_NetStream_Authenticate_UsherToken)) +++ { +++ AVal usherToken = {0}; +++ AMFProp_GetString(AMF_GetProp(&obj, NULL, 3), &usherToken); +++ server->rc.Link.usherToken = AVcopy(usherToken); +++ RTMP_LogPrintf("%10s : %.*s\n", "usherToken", server->rc.Link.usherToken.av_len, server->rc.Link.usherToken.av_val); +++ } +++ else if (AVMATCH(&method, &av_play2)) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "%s: Detected play2 request\n", __FUNCTION__); +++ if (body && nBodySize > 0) +++ { +++ char* pCmd = (char*) body; +++ char* pEnd = pCmd + nBodySize - 4; +++ while (pCmd < pEnd) +++ { +++ if (pCmd[0] == 'p' && pCmd[1] == 'l' && pCmd[2] == 'a' && pCmd[3] == 'y' && pCmd[4] == '2') +++ { +++ /* Disable bitrate transition by sending invalid command */ +++ pCmd[4] = 'z'; +++ break; +++ } +++ ++pCmd; +++ } +++ } ++ } ++ else if (AVMATCH(&method, &av_play)) ++ { ++@@ -323,6 +369,14 @@ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *b ++ if (!av.av_val) ++ goto out; ++ +++ double StartFlag = 0; +++ AMFObjectProperty *Start = AMF_GetProp(&obj, NULL, 4); +++ if (!(Start->p_type == AMF_INVALID)) +++ StartFlag = AMFProp_GetNumber(Start); +++ if (StartFlag == -1000 || (server->rc.Link.app.av_val && strstr(server->rc.Link.app.av_val, "live"))) +++ StartFlag = -1000; +++ RTMP_LogPrintf("%10s : %s\n", "live", (StartFlag == -1000) ? "yes" : "no"); +++ ++ /* check for duplicates */ ++ for (fl = server->f_head; fl; fl=fl->f_next) ++ { ++@@ -362,19 +416,104 @@ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *b ++ /* hope there aren't more than 255 dups */ ++ if (count) ++ flen += 2; ++- file = malloc(flen+1); +++ file = malloc(flen + 5); ++ ++ memcpy(file, av.av_val, av.av_len); ++ if (count) ++ sprintf(file+av.av_len, "%02x", count); ++ else ++ file[av.av_len] = '\0'; ++- for (p=file; *p; p++) ++- if (*p == ':') ++- *p = '_'; ++- RTMP_LogPrintf("Playpath: %.*s\nSaving as: %s\n", ++- server->rc.Link.playpath.av_len, server->rc.Link.playpath.av_val, ++- file); +++ +++ if (strlen(file) < 128) +++ { +++ /* Add extension if none present */ +++ if (file[av.av_len - 4] != '.') +++ { +++ av.av_len += 4; +++ } +++ +++ /* Always use flv extension, regardless of original */ +++ if (strcmp(file + av.av_len - 4, ".flv")) +++ { +++ strcpy(file + av.av_len - 4, ".flv"); +++ } +++ +++ /* Remove invalid characters from filename */ +++ file = strreplace(file, 0, ":", "_", TRUE); +++ file = strreplace(file, 0, "&", "_", TRUE); +++ file = strreplace(file, 0, "^", "_", TRUE); +++ file = strreplace(file, 0, "|", "_", TRUE); +++ } +++ else +++ { +++ /* Filename too long - generate unique name */ +++ strcpy(file, "vXXXXXX"); +++ mkstemp(file); +++ strcat(file, ".flv"); +++ } +++ +++ /* Add timestamp to the filename */ +++ char *filename, *pfilename, timestamp[21]; +++ int filename_len, timestamp_len; +++ time_t current_time; +++ +++ time(¤t_time); +++ timestamp_len = strftime(×tamp[0], sizeof (timestamp), "%Y-%m-%d_%I-%M-%S_", localtime(¤t_time)); +++ timestamp[timestamp_len] = '\0'; +++ filename_len = strlen(file); +++ filename = malloc(timestamp_len + filename_len + 1); +++ pfilename = filename; +++ memcpy(pfilename, timestamp, timestamp_len); +++ pfilename += timestamp_len; +++ memcpy(pfilename, file, filename_len); +++ pfilename += filename_len; +++ *pfilename++ = '\0'; +++ file = filename; +++ +++ RTMP_LogPrintf("%10s : %.*s\n%10s : %s\n", "Playpath", server->rc.Link.playpath.av_len, +++ server->rc.Link.playpath.av_val, "Saving as", file); +++ +++ /* Save command to text file */ +++ char *cmd = NULL, *ptr = NULL; +++ AVal swfUrl, tcUrl; +++ +++ cmd = calloc(4096, sizeof (char)); +++ ptr = cmd; +++ tcUrl = StripParams(&server->rc.Link.tcUrl); +++ swfUrl = StripParams(&server->rc.Link.swfUrl); +++ ptr += sprintf(ptr, "rtmpdump -r \"%.*s\" -a \"%.*s\" -f \"%.*s\" -W \"%.*s\" -p \"%.*s\"", +++ tcUrl.av_len, tcUrl.av_val, +++ server->rc.Link.app.av_len, server->rc.Link.app.av_val, +++ server->rc.Link.flashVer.av_len, server->rc.Link.flashVer.av_val, +++ swfUrl.av_len, swfUrl.av_val, +++ server->rc.Link.pageUrl.av_len, server->rc.Link.pageUrl.av_val); +++ +++ if (server->rc.Link.usherToken.av_val) +++ { +++ char *usherToken = strreplace(server->rc.Link.usherToken.av_val, server->rc.Link.usherToken.av_len, "\"", "\\\"", TRUE); +++#ifdef WIN32 +++ usherToken = strreplace(usherToken, 0, "^", "^^", TRUE); +++ usherToken = strreplace(usherToken, 0, "|", "^|", TRUE); +++#endif +++ ptr += sprintf(ptr, " --jtv \"%s\"", usherToken); +++ free(usherToken); +++ } +++ +++ if (server->rc.Link.Extras.av_len) +++ { +++ ptr += sprintf(ptr, "%.*s", server->rc.Link.Extras.av_len, server->rc.Link.Extras.av_val); +++ } +++ +++ if (StartFlag == -1000) +++ ptr += sprintf(ptr, "%s", " --live"); +++ ptr += sprintf(ptr, " -y \"%.*s\"", server->rc.Link.playpath.av_len, server->rc.Link.playpath.av_val); +++ ptr += sprintf(ptr, " -o \"%s\"\n", file); +++ +++ FILE *cmdfile = fopen("Command.txt", "a"); +++ fprintf(cmdfile, "%s", cmd); +++ fclose(cmdfile); +++ free(cmd); +++ ++ out = fopen(file, "wb"); ++ free(file); ++ if (!out) ++@@ -407,9 +546,10 @@ ServeInvoke(STREAMING_SERVER *server, int which, RTMPPacket *pack, const char *b ++ ++ RTMP_Log(RTMP_LOGDEBUG, "%s, onStatus: %s", __FUNCTION__, code.av_val); ++ if (AVMATCH(&code, &av_NetStream_Failed) ++- || AVMATCH(&code, &av_NetStream_Play_Failed) ++- || AVMATCH(&code, &av_NetStream_Play_StreamNotFound) ++- || AVMATCH(&code, &av_NetConnection_Connect_InvalidApp)) +++ || AVMATCH(&code, &av_NetStream_Play_Failed) +++ || AVMATCH(&code, &av_NetStream_Play_StreamNotFound) +++ || AVMATCH(&code, &av_NetConnection_Connect_Rejected) +++ || AVMATCH(&code, &av_NetConnection_Connect_InvalidApp)) ++ { ++ ret = 1; ++ } ++@@ -719,13 +859,18 @@ controlServerThread(void *unused) ++ { ++ case 'q': ++ RTMP_LogPrintf("Exiting\n"); ++- stopStreaming(rtmpServer); ++- free(rtmpServer); ++- exit(0); +++ if (rtmpServer) +++ stopStreaming(rtmpServer); ++ break; ++ default: ++ RTMP_LogPrintf("Unknown command \'%c\', ignoring\n", ich); ++ } +++ sleep(1); +++ if (rtmpServer && (rtmpServer->state == STREAMING_STOPPED)) +++ { +++ RTMP_Log(RTMP_LOGDEBUG, "Exiting text UI thread"); +++ break; +++ } ++ } ++ TFRET(); ++ } ++@@ -815,7 +960,7 @@ TFTYPE doServe(void *arg) // server socket and state (our listening socket) ++ ++ if (select(n + 1, &rfds, NULL, NULL, &tv) <= 0) ++ { ++- if (server->f_cur && server->rc.m_mediaChannel && !paused) +++ if (server->f_cur && server->rc.m_mediaChannel && !paused && server->rc.m_channelTimestamp) ++ { ++ server->rc.m_pauseStamp = server->rc.m_channelTimestamp[server->rc.m_mediaChannel]; ++ if (RTMP_ToggleStream(&server->rc)) ++@@ -1123,7 +1268,6 @@ stopStreaming(STREAMING_SERVER * server) ++ } ++ } ++ ++- ++ void ++ sigIntHandler(int sig) ++ { ++@@ -1196,3 +1340,48 @@ main(int argc, char **argv) ++ #endif ++ return nStatus; ++ } +++ +++char * +++dumpAMF(AMFObject *obj, char *ptr) +++{ +++ int i; +++ const char opt[] = "NBSO Z"; +++ +++ for (i = 0; i < obj->o_num; i++) +++ { +++ AMFObjectProperty *p = &obj->o_props[i]; +++ if ((p->p_type == AMF_ECMA_ARRAY) || (p->p_type == AMF_STRICT_ARRAY)) +++ p->p_type = AMF_OBJECT; +++ if (p->p_type > 5) +++ continue; +++ ptr += sprintf(ptr, " -C "); +++ if (p->p_name.av_val) +++ *ptr++ = 'N'; +++ *ptr++ = opt[p->p_type]; +++ *ptr++ = ':'; +++ if (p->p_name.av_val) +++ ptr += sprintf(ptr, "%.*s:", p->p_name.av_len, p->p_name.av_val); +++ switch (p->p_type) +++ { +++ case AMF_BOOLEAN: +++ *ptr++ = p->p_vu.p_number != 0 ? '1' : '0'; +++ break; +++ case AMF_STRING: +++ memcpy(ptr, p->p_vu.p_aval.av_val, p->p_vu.p_aval.av_len); +++ ptr += p->p_vu.p_aval.av_len; +++ break; +++ case AMF_NUMBER: +++ ptr += sprintf(ptr, "%f", p->p_vu.p_number); +++ break; +++ case AMF_OBJECT: +++ *ptr++ = '1'; +++ ptr = dumpAMF(&p->p_vu.p_object, ptr); +++ ptr += sprintf(ptr, " -C O:0"); +++ break; +++ case AMF_NULL: +++ default: +++ break; +++ } +++ } +++ return ptr; +++} ++diff --git thread.c thread.c ++index 0913c98..13d624a 100644 ++--- thread.c +++++ thread.c ++@@ -32,7 +32,7 @@ ThreadCreate(thrfunc *routine, void *args) ++ HANDLE thd; ++ ++ thd = (HANDLE) _beginthread(routine, 0, args); ++- if (thd == -1L) +++ if (thd == INVALID_HANDLE_VALUE) ++ RTMP_LogPrintf("%s, _beginthread failed with %d\n", __FUNCTION__, errno); ++ ++ return thd; +diff --git a/tools/depends/target/librtmp/UpdateToLatest.diff b/tools/depends/target/librtmp/UpdateToLatest.diff +new file mode 100644 +index 0000000000000000000000000000000000000000..d9d5f6b8e4869efaba4b03abef4ccb534c4e8beb +--- /dev/null ++++ b/tools/depends/target/librtmp/UpdateToLatest.diff +@@ -0,0 +1,257 @@ ++diff --git b/ChangeLog a/ChangeLog ++index c3b1a14..b027e31 100644 ++--- b/ChangeLog +++++ a/ChangeLog ++@@ -1,6 +1,6 @@ ++ RTMPDump ++ Copyright 2008-2009 Andrej Stepanchuk; Distributed under the GPL v2 ++-Copyright 2009-2011 Howard Chu +++Copyright 2009-2015 Howard Chu ++ Copyright 2009 The Flvstreamer Team ++ http://rtmpdump.mplayerhq.hu/ ++ ++diff --git b/librtmp/amf.c a/librtmp/amf.c ++index 73d1486..7954144 100644 ++--- b/librtmp/amf.c +++++ a/librtmp/amf.c ++@@ -33,6 +33,7 @@ ++ #include "bytes.h" ++ ++ static const AMFObjectProperty AMFProp_Invalid = { {0, 0}, AMF_INVALID }; +++static const AMFObject AMFObj_Invalid = { 0, 0 }; ++ static const AVal AV_empty = { 0, 0 }; ++ ++ /* Data is Big-Endian */ ++@@ -340,13 +341,19 @@ AMFProp_GetBoolean(AMFObjectProperty *prop) ++ void ++ AMFProp_GetString(AMFObjectProperty *prop, AVal *str) ++ { ++- *str = prop->p_vu.p_aval; +++ if (prop->p_type == AMF_STRING) +++ *str = prop->p_vu.p_aval; +++ else +++ *str = AV_empty; ++ } ++ ++ void ++ AMFProp_GetObject(AMFObjectProperty *prop, AMFObject *obj) ++ { ++- *obj = prop->p_vu.p_object; +++ if (prop->p_type == AMF_OBJECT) +++ *obj = prop->p_vu.p_object; +++ else +++ *obj = AMFObj_Invalid; ++ } ++ ++ int ++@@ -471,6 +478,8 @@ AMF3ReadString(const char *data, AVal *str) ++ RTMP_Log(RTMP_LOGDEBUG, ++ "%s, string reference, index: %d, not supported, ignoring!", ++ __FUNCTION__, refIndex); +++ str->av_val = NULL; +++ str->av_len = 0; ++ return len; ++ } ++ else ++@@ -510,9 +519,11 @@ AMF3Prop_Decode(AMFObjectProperty *prop, const char *pBuffer, int nSize, ++ if (name.av_len <= 0) ++ return nRes; ++ +++ nSize -= nRes; +++ if (nSize <= 0) +++ return -1; ++ prop->p_name = name; ++ pBuffer += nRes; ++- nSize -= nRes; ++ } ++ ++ /* decode */ ++@@ -598,6 +609,8 @@ AMF3Prop_Decode(AMFObjectProperty *prop, const char *pBuffer, int nSize, ++ __FUNCTION__, (unsigned char)(*pBuffer), pBuffer); ++ return -1; ++ } +++ if (nSize < 0) +++ return -1; ++ ++ return nOriginalSize - nSize; ++ } ++@@ -992,9 +1005,17 @@ AMF_DecodeArray(AMFObject *obj, const char *pBuffer, int nSize, ++ int nRes; ++ nArrayLen--; ++ +++ if (nSize <= 0) +++ { +++ bError = TRUE; +++ break; +++ } ++ nRes = AMFProp_Decode(&prop, pBuffer, nSize, bDecodeName); ++ if (nRes == -1) ++- bError = TRUE; +++ { +++ bError = TRUE; +++ break; +++ } ++ else ++ { ++ nSize -= nRes; ++@@ -1053,12 +1074,12 @@ AMF3_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bAMFData) ++ else ++ { ++ int32_t classExtRef = (classRef >> 1); ++- int i; +++ int i, cdnum; ++ ++ cd.cd_externalizable = (classExtRef & 0x1) == 1; ++ cd.cd_dynamic = ((classExtRef >> 1) & 0x1) == 1; ++ ++- cd.cd_num = classExtRef >> 2; +++ cdnum = classExtRef >> 2; ++ ++ /* class name */ ++ ++@@ -1073,9 +1094,16 @@ AMF3_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bAMFData) ++ cd.cd_name.av_val, cd.cd_externalizable, cd.cd_dynamic, ++ cd.cd_num); ++ ++- for (i = 0; i < cd.cd_num; i++) +++ for (i = 0; i < cdnum; i++) ++ { ++ AVal memberName; +++ if (nSize <=0) +++ { +++invalid: +++ RTMP_Log(RTMP_LOGDEBUG, "%s, invalid class encoding!", +++ __FUNCTION__); +++ return nOriginalSize; +++ } ++ len = AMF3ReadString(pBuffer, &memberName); ++ RTMP_Log(RTMP_LOGDEBUG, "Member: %s", memberName.av_val); ++ AMF3CD_AddProp(&cd, &memberName); ++@@ -1111,6 +1139,8 @@ AMF3_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bAMFData) ++ int nRes, i; ++ for (i = 0; i < cd.cd_num; i++) /* non-dynamic */ ++ { +++ if (nSize <=0) +++ goto invalid; ++ nRes = AMF3Prop_Decode(&prop, pBuffer, nSize, FALSE); ++ if (nRes == -1) ++ RTMP_Log(RTMP_LOGDEBUG, "%s, failed to decode AMF3 property!", ++@@ -1128,6 +1158,8 @@ AMF3_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bAMFData) ++ ++ do ++ { +++ if (nSize <=0) +++ goto invalid; ++ nRes = AMF3Prop_Decode(&prop, pBuffer, nSize, TRUE); ++ AMF_AddProp(obj, &prop); ++ ++@@ -1175,10 +1207,18 @@ AMF_Decode(AMFObject *obj, const char *pBuffer, int nSize, int bDecodeName) ++ ++ nRes = AMFProp_Decode(&prop, pBuffer, nSize, bDecodeName); ++ if (nRes == -1) ++- bError = TRUE; +++ { +++ bError = TRUE; +++ break; +++ } ++ else ++ { ++ nSize -= nRes; +++ if (nSize < 0) +++ { +++ bError = TRUE; +++ break; +++ } ++ pBuffer += nRes; ++ AMF_AddProp(obj, &prop); ++ } ++diff --git b/librtmp/log.c a/librtmp/log.c ++index 0012985..1b52000 100644 ++--- b/librtmp/log.c +++++ a/librtmp/log.c ++@@ -92,6 +92,10 @@ RTMP_LogLevel RTMP_LogGetLevel() ++ void RTMP_Log(int level, const char *format, ...) ++ { ++ va_list args; +++ +++ if ( level > RTMP_debuglevel ) +++ return; +++ ++ va_start(args, format); ++ cb(level, format, args); ++ va_end(args); ++diff --git b/librtmp/rtmp.c a/librtmp/rtmp.c ++index ca7db6a..a2863b0 100644 ++--- b/librtmp/rtmp.c +++++ a/librtmp/rtmp.c ++@@ -186,9 +186,12 @@ RTMPPacket_Reset(RTMPPacket *p) ++ } ++ ++ int ++-RTMPPacket_Alloc(RTMPPacket *p, int nSize) +++RTMPPacket_Alloc(RTMPPacket *p, uint32_t nSize) ++ { ++- char *ptr = calloc(1, nSize + RTMP_MAX_HEADER_SIZE); +++ char *ptr; +++ if (nSize > SIZE_MAX - RTMP_MAX_HEADER_SIZE) +++ return FALSE; +++ ptr = calloc(1, nSize + RTMP_MAX_HEADER_SIZE); ++ if (!ptr) ++ return FALSE; ++ p->m_body = ptr + RTMP_MAX_HEADER_SIZE; ++@@ -1180,7 +1183,7 @@ RTMP_GetNextMediaPacket(RTMP *r, RTMPPacket *packet) ++ while (!bHasMediaPacket && RTMP_IsConnected(r) ++ && RTMP_ReadPacket(r, packet)) ++ { ++- if (!RTMPPacket_IsReady(packet)) +++ if (!RTMPPacket_IsReady(packet) || !packet->m_nBodySize) ++ { ++ continue; ++ } ++@@ -3643,7 +3646,6 @@ RTMP_ReadPacket(RTMP *r, RTMPPacket *packet) ++ { ++ packet->m_nBodySize = AMF_DecodeInt24(header + 3); ++ packet->m_nBytesRead = 0; ++- RTMPPacket_Free(packet); ++ ++ if (nSize > 6) ++ { ++diff --git b/librtmp/rtmp.h a/librtmp/rtmp.h ++index 0248913..6d7dd89 100644 ++--- b/librtmp/rtmp.h +++++ a/librtmp/rtmp.h ++@@ -136,7 +136,7 @@ extern "C" ++ ++ void RTMPPacket_Reset(RTMPPacket *p); ++ void RTMPPacket_Dump(RTMPPacket *p); ++- int RTMPPacket_Alloc(RTMPPacket *p, int nSize); +++ int RTMPPacket_Alloc(RTMPPacket *p, uint32_t nSize); ++ void RTMPPacket_Free(RTMPPacket *p); ++ ++ #define RTMPPacket_IsReady(a) ((a)->m_nBytesRead == (a)->m_nBodySize) ++diff --git b/rtmpsrv.c a/rtmpsrv.c ++index a9e9045..5df4d3a 100644 ++--- b/rtmpsrv.c +++++ a/rtmpsrv.c ++@@ -404,10 +404,10 @@ countAMF(AMFObject *obj, int *argc) ++ static char * ++ dumpAMF(AMFObject *obj, char *ptr, AVal *argv, int *argc) ++ { ++- int i, len, ac = *argc; +++ int i, ac = *argc; ++ const char opt[] = "NBSO Z"; ++ ++- for (i=0, len=0; i < obj->o_num; i++) +++ for (i=0; i < obj->o_num; i++) ++ { ++ AMFObjectProperty *p = &obj->o_props[i]; ++ argv[ac].av_val = ptr+1; ++@@ -595,6 +595,8 @@ ServeInvoke(STREAMING_SERVER *server, RTMP * r, RTMPPacket *packet, unsigned int ++ uint32_t now; ++ RTMPPacket pc = {0}; ++ AMFProp_GetString(AMF_GetProp(&obj, NULL, 3), &r->Link.playpath); +++ if (!r->Link.playpath.av_len) +++ return 0; ++ /* ++ r->Link.seekTime = AMFProp_GetNumber(AMF_GetProp(&obj, NULL, 4)); ++ if (obj.o_num > 5) +diff --git a/tools/depends/target/librtmp/libm.patch b/tools/depends/target/librtmp/libm.patch +deleted file mode 100644 +index d86485b584920d3b8e7d775196e50f9b7fe3b297..0000000000000000000000000000000000000000 +--- a/tools/depends/target/librtmp/libm.patch ++++ /dev/null +@@ -1,11 +0,0 @@ +---- Makefile.old 2013-06-04 17:35:58.000000000 +0200 +-+++ Makefile 2013-06-04 17:36:13.000000000 +0200 +-@@ -25,7 +25,7 @@ +- REQ_GNUTLS=gnutls +- REQ_OPENSSL=libssl,libcrypto +- LIBZ=-lz +--LIBS_posix= +-+LIBS_posix=-lm +- LIBS_darwin= +- LIBS_mingw=-lws2_32 -lwinmm -lgdi32 +- LIB_GNUTLS=-lgnutls -lhogweed -lnettle -lgmp $(LIBZ) + +From 2c9b195f2c8cf3559bba7d7b21d35ee0d0bca59c Mon Sep 17 00:00:00 2001 +From: Claudio-Sjo <Claudio.Porfiri@gmail.com> +Date: Mon, 16 Feb 2015 14:51:26 +0100 +Subject: [PATCH 23/67] - allow reads < CDIO_CD_FRAMESIZE_RAW by using a buffer + - fixes #15794 + +--- + xbmc/filesystem/CDDAFile.cpp | 120 ++++++++++++++++++++++++++++++++----------- + xbmc/filesystem/CDDAFile.h | 3 ++ + 2 files changed, 92 insertions(+), 31 deletions(-) + +diff --git a/xbmc/filesystem/CDDAFile.cpp b/xbmc/filesystem/CDDAFile.cpp +index 722e62602084923bd040803f0e5a5c336a42fa3b..b0f53e5d44e108d88d7af0e46913a7f29328cd31 100644 +--- a/xbmc/filesystem/CDDAFile.cpp ++++ b/xbmc/filesystem/CDDAFile.cpp +@@ -42,10 +42,14 @@ CFileCDDA::CFileCDDA(void) + m_lsnEnd = CDIO_INVALID_LSN; + m_cdio = CLibcdio::GetInstance(); + m_iSectorCount = 52; ++ m_TrackBuf = (uint8_t *) malloc(CDIO_CD_FRAMESIZE_RAW); ++ p_TrackBuf = 0; ++ f_TrackBuf = 0; + } + + CFileCDDA::~CFileCDDA(void) + { ++ free(m_TrackBuf); + Close(); + } + +@@ -53,6 +57,9 @@ bool CFileCDDA::Open(const CURL& url) + { + std::string strURL = url.GetWithoutFilename(); + ++ // Flag TrackBuffer = FALSE, TrackBuffer is empty ++ f_TrackBuf = 0; ++ + if (!g_mediaManager.IsDiscInDrive(strURL) || !IsValidFile(url)) + return false; + +@@ -117,50 +124,98 @@ int CFileCDDA::Stat(const CURL& url, struct __stat64* buffer) + + ssize_t CFileCDDA::Read(void* lpBuf, size_t uiBufSize) + { +- if (!m_pCdIo || !g_mediaManager.IsDiscInDrive()) +- return -1; + +- if (uiBufSize > SSIZE_MAX) +- uiBufSize = SSIZE_MAX; ++ ssize_t returnValue; ++ int iSectorCount; ++ void *destBuf; + +- // limit number of sectors that fits in buffer by m_iSectorCount +- int iSectorCount = std::min((int)uiBufSize / CDIO_CD_FRAMESIZE_RAW, m_iSectorCount); + +- if (iSectorCount <= 0) ++ if (!m_pCdIo || !g_mediaManager.IsDiscInDrive()) ++ { ++ CLog::Log(LOGERROR, "file cdda: Aborted because no disc in drive or no m_pCdIo"); + return -1; ++ } + +- // Are there enough sectors left to read +- if (m_lsnCurrent + iSectorCount > m_lsnEnd) +- iSectorCount = m_lsnEnd - m_lsnCurrent; ++ uiBufSize = std::min( uiBufSize, (size_t)SSIZE_MAX ); + +- // The loop tries to solve read error problem by lowering number of sectors to read (iSectorCount). +- // When problem is solved the proper number of sectors is stored in m_iSectorCount +- int big_iSectorCount = iSectorCount; +- while (iSectorCount > 0) ++ // If we have data in the TrackBuffer, they must be used first ++ if (f_TrackBuf) + { +- int iret = m_cdio->cdio_read_audio_sectors(m_pCdIo, lpBuf, m_lsnCurrent, iSectorCount); ++ // Get at most the remaining data in m_TrackBuf ++ uiBufSize = std::min(uiBufSize, CDIO_CD_FRAMESIZE_RAW - p_TrackBuf); ++ memcpy(lpBuf, m_TrackBuf + p_TrackBuf, uiBufSize); ++ // Update the data offset ++ p_TrackBuf += uiBufSize; ++ // Is m_TrackBuf empty? ++ f_TrackBuf = (CDIO_CD_FRAMESIZE_RAW == p_TrackBuf); ++ // All done, return read bytes ++ return uiBufSize; ++ } ++ ++ // No data left in buffer ++ ++ // Is this a short read? ++ if (uiBufSize < CDIO_CD_FRAMESIZE_RAW) ++ { ++ // short request, buffer one full sector ++ iSectorCount = 1; ++ destBuf = m_TrackBuf; ++ } ++ else // normal request ++ { ++ // limit number of sectors that fits in buffer by m_iSectorCount ++ iSectorCount = std::min((int)uiBufSize / CDIO_CD_FRAMESIZE_RAW, m_iSectorCount); ++ destBuf = lpBuf; ++ } + ++ // Are there enough sectors left to read? ++ iSectorCount = std::min(iSectorCount, m_lsnEnd - m_lsnCurrent); ++ ++ // Have we reached EOF? ++ if (iSectorCount == 0) ++ { ++ CLog::Log(LOGNOTICE, "file cdda: Read EoF"); ++ return 0; // Success, but nothing read ++ } // Reached EoF ++ ++ // At leat one sector to read ++ int retries; ++ int iret; ++ // Try reading a decresing number of sectors, then 3 times with 1 sector ++ for (retries = 3; retries > 0; iSectorCount>1 ? iSectorCount-- : retries--) ++ { ++ iret = m_cdio->cdio_read_audio_sectors(m_pCdIo, destBuf, m_lsnCurrent, iSectorCount); + if (iret == DRIVER_OP_SUCCESS) ++ break; // Get out from the loop ++ else + { +- // If lower iSectorCount solved the problem limit it's value +- if (iSectorCount < big_iSectorCount) +- { +- m_iSectorCount = iSectorCount; +- } +- break; +- } +- +- // iSectorCount is low so it cannot solve read problem +- if (iSectorCount <= 10) +- { +- CLog::Log(LOGERROR, "file cdda: Reading %d sectors of audio data starting at lsn %d failed with error code %i", iSectorCount, m_lsnCurrent, iret); +- return -1; +- } +- +- iSectorCount = 10; ++ CLog::Log(LOGERROR, "file cdda: Read cdio error when reading track "); ++ } // Errors when reading file + } ++ // retries == 0 only if failed reading at least one sector ++ if (retries == 0) ++ { ++ CLog::Log(LOGERROR, "file cdda: Reading %d sectors of audio data starting at lsn %d failed with error code %i", iSectorCount, m_lsnCurrent, iret); ++ return -1; ++ } ++ ++ // Update position in file + m_lsnCurrent += iSectorCount; + ++ // Was it a short request? ++ if (uiBufSize < CDIO_CD_FRAMESIZE_RAW) ++ { ++ // We copy the amount if requested data into the destination buffer ++ memcpy(lpBuf, m_TrackBuf, uiBufSize); ++ // and keep track of the first available data ++ p_TrackBuf = uiBufSize; ++ // Finally, we set the buffer flag as TRUE ++ f_TrackBuf = true; ++ // We will return uiBufSize ++ return uiBufSize; ++ } ++ ++ // Otherwise, just return the size of read data + return iSectorCount*CDIO_CD_FRAMESIZE_RAW; + } + +@@ -194,6 +249,9 @@ int64_t CFileCDDA::Seek(int64_t iFilePosition, int iWhence /*=SEEK_SET*/) + + void CFileCDDA::Close() + { ++ // Flag TrackBuffer = FALSE, TrackBuffer is empty ++ f_TrackBuf = 0; ++ + if (m_pCdIo) + { + m_cdio->cdio_destroy(m_pCdIo); +diff --git a/xbmc/filesystem/CDDAFile.h b/xbmc/filesystem/CDDAFile.h +index 0427af4534bfe59a343f0518c7f4242d93299836..e99236294fa8b9b613e465a8ecaf3ad3ba8b5a6f 100644 +--- a/xbmc/filesystem/CDDAFile.h ++++ b/xbmc/filesystem/CDDAFile.h +@@ -50,6 +50,9 @@ protected: + + protected: + CdIo_t* m_pCdIo; ++ uint8_t *m_TrackBuf; ++ size_t p_TrackBuf; ++ int f_TrackBuf; + lsn_t m_lsnStart; // Start of m_iTrack in logical sector number + lsn_t m_lsnCurrent; // Position inside the track in logical sector number + lsn_t m_lsnEnd; // End of m_iTrack in logical sector number + +From d83b2890c3b4135d505f202f9081f59a1fd7b065 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Fri, 24 Jun 2016 19:38:13 +0100 +Subject: [PATCH 24/67] codecoverlay: Include codec name in overlay + +--- + xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp | 4 ++++ + xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp | 5 ++++- + xbmc/cores/omxplayer/OMXPlayerAudio.cpp | 4 ++++ + xbmc/cores/omxplayer/OMXPlayerVideo.cpp | 8 +++++--- + xbmc/cores/omxplayer/OMXPlayerVideo.h | 1 - + 5 files changed, 17 insertions(+), 5 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp b/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp +index 24228154ecf99911f74407d73d280778e6f98fcd..188b85b12b86f887324cdcfda3c3aa4cd90d3a11 100644 +--- a/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp ++++ b/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp +@@ -210,6 +210,10 @@ void CVideoPlayerAudio::UpdatePlayerInfo() + std::ostringstream s; + s << "aq:" << std::setw(2) << std::min(99,m_messageQueue.GetLevel()) << "%"; + s << ", Kb/s:" << std::fixed << std::setprecision(2) << (double)GetAudioBitrate() / 1024.0; ++ s << ", ac:" << m_processInfo.GetAudioDecoderName().c_str(); ++ if (!m_info.passthrough) ++ s << ", chan:" << m_processInfo.GetAudioChannels().c_str(); ++ s << ", " << m_streaminfo.samplerate/1000 << " kHz"; + + //print the inverse of the resample ratio, since that makes more sense + //if the resample ratio is 0.5, then we're playing twice as fast +diff --git a/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp b/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp +index fd260d4378f6b13a158a57a2493e59cbab1f7d9d..f6d1b8572c6a4a8b4a193ebfc9d36d85ccd2d819 100644 +--- a/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp ++++ b/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp +@@ -909,10 +909,13 @@ int CVideoPlayerVideo::OutputPicture(const DVDVideoPicture* src, double pts) + + std::string CVideoPlayerVideo::GetPlayerInfo() + { ++ int width, height; ++ m_processInfo.GetVideoDimensions(width, height); + std::ostringstream s; + s << "vq:" << std::setw(2) << std::min(99,GetLevel()) << "%"; + s << ", Mb/s:" << std::fixed << std::setprecision(2) << (double)GetVideoBitrate() / (1024.0*1024.0); +- s << ", fr:" << std::fixed << std::setprecision(3) << m_fFrameRate; ++ s << ", dc:" << m_processInfo.GetVideoDecoderName().c_str(); ++ s << ", " << width << "x" << height << "[" << std::setprecision(2) << m_processInfo.GetVideoDAR() << "]@" << std::fixed << std::setprecision(3) << m_processInfo.GetVideoFps() << ", deint:" << m_processInfo.GetVideoDeintMethod(); + s << ", drop:" << m_iDroppedFrames; + s << ", skip:" << m_renderManager.GetSkippedFrames(); + +diff --git a/xbmc/cores/omxplayer/OMXPlayerAudio.cpp b/xbmc/cores/omxplayer/OMXPlayerAudio.cpp +index 1e5d2b98bbef15b47994c3e4735873a9946b58c7..d43350fa0eefb5960475a02c1327efc24d138e0f 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerAudio.cpp ++++ b/xbmc/cores/omxplayer/OMXPlayerAudio.cpp +@@ -659,6 +659,10 @@ std::string OMXPlayerAudio::GetPlayerInfo() + std::ostringstream s; + s << "aq:" << std::setw(2) << std::min(99,m_messageQueue.GetLevel() + MathUtils::round_int(100.0/8.0*GetCacheTime())) << "%"; + s << ", Kb/s:" << std::fixed << std::setprecision(2) << (double)GetAudioBitrate() / 1024.0; ++ s << ", ac:" << m_processInfo.GetAudioDecoderName().c_str(); ++ if (!m_passthrough) ++ s << ", chan:" << m_processInfo.GetAudioChannels().c_str(); ++ s << ", " << m_processInfo.GetAudioSampleRate()/1000 << " kHz"; + + return s.str(); + } +diff --git a/xbmc/cores/omxplayer/OMXPlayerVideo.cpp b/xbmc/cores/omxplayer/OMXPlayerVideo.cpp +index 6efd0d51df46a530dd05b3add639f38a939cf92d..d61dc4f2668f8aca91bce79cfb631034061c491c 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerVideo.cpp ++++ b/xbmc/cores/omxplayer/OMXPlayerVideo.cpp +@@ -583,12 +583,14 @@ void OMXPlayerVideo::SetSpeed(int speed) + + std::string OMXPlayerVideo::GetPlayerInfo() + { ++ int width, height; ++ m_processInfo.GetVideoDimensions(width, height); + double match = 0.0f, phase = 0.0f, pll = 0.0f; + std::ostringstream s; +- s << "fr:" << std::fixed << std::setprecision(3) << m_fFrameRate; +- s << ", vq:" << std::setw(2) << std::min(99,GetLevel()) << "%"; +- s << ", dc:" << m_codecname; ++ s << "vq:" << std::setw(2) << std::min(99,GetLevel()) << "%"; + s << ", Mb/s:" << std::fixed << std::setprecision(2) << (double)GetVideoBitrate() / (1024.0*1024.0); ++ s << ", dc:" << m_processInfo.GetVideoDecoderName().c_str(); ++ s << ", " << width << "x" << height << "[" << std::setprecision(2) << m_processInfo.GetVideoDAR() << "]@" << std::fixed << std::setprecision(3) << m_processInfo.GetVideoFps() << ", deint:" << m_processInfo.GetVideoDeintMethod(); + if (m_omxVideo.GetPlayerInfo(match, phase, pll)) + { + s << ", match:" << std::fixed << std::setprecision(2) << match; +diff --git a/xbmc/cores/omxplayer/OMXPlayerVideo.h b/xbmc/cores/omxplayer/OMXPlayerVideo.h +index 0df7e72cc9d1947173c2bac5e72eb09976b51aa5..b5050081c360d29b1b478c27e6b88291e20ecdac 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerVideo.h ++++ b/xbmc/cores/omxplayer/OMXPlayerVideo.h +@@ -55,7 +55,6 @@ protected: + bool m_stalled; + IDVDStreamPlayer::ESyncState m_syncState; + bool m_flush; +- std::string m_codecname; + std::atomic_bool m_bAbortOutput; + double m_iSubtitleDelay; + bool m_bRenderSubs; + +From e46e93d403ab9f6cb6f61a5b7ac39f347bcf6089 Mon Sep 17 00:00:00 2001 +From: Anton Fedchin <afedchin@ruswizards.com> +Date: Tue, 8 Mar 2016 21:20:58 +0300 +Subject: [PATCH 25/67] [DebugInfo] Add cpu usage info. + +--- + .../VideoPlayer/VideoRenderers/DebugRenderer.cpp | 56 ++++++++-------------- + .../VideoPlayer/VideoRenderers/DebugRenderer.h | 9 ++-- + .../VideoPlayer/VideoRenderers/RenderManager.cpp | 7 ++- + 3 files changed, 30 insertions(+), 42 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/DebugRenderer.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/DebugRenderer.cpp +index 846868967e15309c22b4ee46795a914230dc65d1..36be6ce222cfae9680af6834e934d88495a55950 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/DebugRenderer.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/DebugRenderer.cpp +@@ -27,7 +27,7 @@ using namespace OVERLAY; + + CDebugRenderer::CDebugRenderer() + { +- for (int i=0; i<4; i++) ++ for (int i = 0; i<DEBUG_OVERLAY_COUNT_MAX; i++) + { + m_overlay[i] = nullptr; + m_strDebug[i] = " "; +@@ -36,54 +36,36 @@ CDebugRenderer::CDebugRenderer() + + CDebugRenderer::~CDebugRenderer() + { +- for (int i=0; i<4; i++) ++ for (int i = 0; i<DEBUG_OVERLAY_COUNT_MAX; i++) + { + if (m_overlay[i]) + m_overlay[i]->Release(); + } + } + +-void CDebugRenderer::SetInfo(std::string &info1, std::string &info2, std::string &info3, std::string &info4) ++void CDebugRenderer::SetInfo(std::vector<std::string> &infos) + { + m_overlayRenderer.Release(0); + +- if (info1 != m_strDebug[0]) ++ for (size_t i = 0; i < std::min(infos.size(), (size_t)DEBUG_OVERLAY_COUNT_MAX); i++) + { +- m_strDebug[0] = info1; +- if (m_overlay[0]) +- m_overlay[0]->Release(); +- m_overlay[0] = new CDVDOverlayText(); +- m_overlay[0]->AddElement(new CDVDOverlayText::CElementText(m_strDebug[0])); +- } +- if (info2 != m_strDebug[1]) +- { +- m_strDebug[1] = info2; +- if (m_overlay[1]) +- m_overlay[1]->Release(); +- m_overlay[1] = new CDVDOverlayText(); +- m_overlay[1]->AddElement(new CDVDOverlayText::CElementText(m_strDebug[1])); +- } +- if (info3 != m_strDebug[2]) +- { +- m_strDebug[2] = info3; +- if (m_overlay[2]) +- m_overlay[2]->Release(); +- m_overlay[2] = new CDVDOverlayText(); +- m_overlay[2]->AddElement(new CDVDOverlayText::CElementText(m_strDebug[2])); ++ if (infos[i] != m_strDebug[i]) ++ { ++ if (infos[i].empty()) ++ continue; ++ m_strDebug[i] = infos[i]; ++ if (m_overlay[i]) ++ m_overlay[i]->Release(); ++ m_overlay[i] = new CDVDOverlayText(); ++ m_overlay[i]->AddElement(new CDVDOverlayText::CElementText(m_strDebug[i])); ++ } ++ + } +- if (info4 != m_strDebug[3]) ++ for (size_t i = 0; i < DEBUG_OVERLAY_COUNT_MAX; i++) + { +- m_strDebug[3] = info4; +- if (m_overlay[3]) +- m_overlay[3]->Release(); +- m_overlay[3] = new CDVDOverlayText(); +- m_overlay[3]->AddElement(new CDVDOverlayText::CElementText(m_strDebug[3])); ++ if (m_overlay[i]) ++ m_overlayRenderer.AddOverlay(m_overlay[i], 0, 0); + } +- +- m_overlayRenderer.AddOverlay(m_overlay[0], 0, 0); +- m_overlayRenderer.AddOverlay(m_overlay[1], 0, 0); +- m_overlayRenderer.AddOverlay(m_overlay[2], 0, 0); +- m_overlayRenderer.AddOverlay(m_overlay[3], 0, 0); + } + + void CDebugRenderer::Render(CRect &src, CRect &dst, CRect &view) +@@ -120,7 +102,7 @@ void CDebugRenderer::CRenderer::Render(int idx) + + COverlayText *text = dynamic_cast<COverlayText*>(o); + if (text) +- text->PrepareRender("arial.ttf", 1, 16, 0, m_font, m_fontBorder); ++ text->PrepareRender("arial.ttf", 1, 12, 0, m_font, m_fontBorder); + + o->m_pos = COverlay::POSITION_ABSOLUTE; + o->m_align = COverlay::ALIGN_SCREEN; +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/DebugRenderer.h b/xbmc/cores/VideoPlayer/VideoRenderers/DebugRenderer.h +index 85aefaace73994730f7d2bdff9de85c79e99b2a2..8005a13bc220be0c5c596d276197c11ed938ffb0 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/DebugRenderer.h ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/DebugRenderer.h +@@ -22,6 +22,9 @@ + + #include "OverlayRenderer.h" + #include <string> ++#include <vector> ++ ++#define DEBUG_OVERLAY_COUNT_MAX 6 + + class CDVDOverlayText; + +@@ -30,7 +33,7 @@ class CDebugRenderer + public: + CDebugRenderer(); + virtual ~CDebugRenderer(); +- void SetInfo(std::string &info1, std::string &info2, std::string &info3, std::string &info4); ++ void SetInfo(std::vector<std::string> &infos); + void Render(CRect &src, CRect &dst, CRect &view); + void Flush(); + +@@ -43,7 +46,7 @@ protected: + void Render(int idx) override; + }; + +- std::string m_strDebug[4]; +- CDVDOverlayText *m_overlay[4]; ++ std::string m_strDebug[DEBUG_OVERLAY_COUNT_MAX]; ++ CDVDOverlayText *m_overlay[DEBUG_OVERLAY_COUNT_MAX]; + CRenderer m_overlayRenderer; + }; +\ No newline at end of file +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp +index 93f8d6f292accf34e153fa4d3dd982e5a4b4fded..db537d33a5d55fc856bbd3ec0a7846df3bb060be 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp +@@ -24,6 +24,7 @@ + #include "guilib/GraphicContext.h" + #include "utils/MathUtils.h" + #include "threads/SingleLock.h" ++#include "utils/CPUInfo.h" + #include "utils/log.h" + #include "utils/StringUtils.h" + #include "windowing/WindowingFactory.h" +@@ -926,7 +927,7 @@ void CRenderManager::Render(bool clear, DWORD flags, DWORD alpha, bool gui) + + if (m_renderDebug) + { +- std::string audio, video, player, vsync; ++ std::string audio, video, player, vsync, cpu; + + m_playerPort->GetDebugInfo(audio, video, player); + +@@ -940,8 +941,10 @@ void CRenderManager::Render(bool clear, DWORD flags, DWORD alpha, bool gui) + missedvblanks, + clockspeed - 100.0); + } ++ cpu = g_cpuInfo.GetCoresUsageString(); + +- m_debugRenderer.SetInfo(audio, video, player, vsync); ++ std::vector<std::string> infos = { audio, video, player, vsync, cpu }; ++ m_debugRenderer.SetInfo(infos); + m_debugRenderer.Render(src, dst, view); + + m_debugTimer.Set(1000); + +From e674b4137eb3ffe70a0bae619e24862ceae51b25 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Fri, 22 May 2015 13:56:29 +0100 +Subject: [PATCH 26/67] ffmpeg: Allow neon to be enabled in unified builds + +--- + tools/depends/target/ffmpeg/Makefile | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/tools/depends/target/ffmpeg/Makefile b/tools/depends/target/ffmpeg/Makefile +index dffe2da1dfd09e06c5f15c362f7cbe3cf2a26f75..4081dddb6bc2db53559d35506cad6af4cd668362 100644 +--- a/tools/depends/target/ffmpeg/Makefile ++++ b/tools/depends/target/ffmpeg/Makefile +@@ -23,7 +23,11 @@ ffmpg_config += --enable-gnutls + ffmpg_config += --enable-encoder=png --enable-encoder=mjpeg + + ifeq ($(CROSS_COMPILING), yes) ++ ifeq ($(CPU), cortex-a7) ++ ffmpg_config += --arch=arm --enable-cross-compile ++ else + ffmpg_config += --arch=$(CPU) --enable-cross-compile ++ endif + endif + ifeq ($(OS), linux) + ffmpg_config += --target-os=$(OS) --cpu=$(CPU) + +From c575384e34c27d1ffb70e0181e45bc4078b1d2ac Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Fri, 27 Feb 2015 14:37:27 +0000 +Subject: [PATCH 27/67] ffmpeg: Add some upstream HEVC optimisations + +--- + tools/depends/target/ffmpeg/Makefile | 6 +- + .../added_ARM_NEON_optimized_SAO_patches.patch | 3328 ++++++++++++++++++++ + tools/depends/target/ffmpeg/autobuild.sh | 3 + + ...hevcdsp_ARM_NEON_optimized_epel_functions.patch | 409 +++ + 4 files changed, 3745 insertions(+), 1 deletion(-) + create mode 100644 tools/depends/target/ffmpeg/added_ARM_NEON_optimized_SAO_patches.patch + create mode 100644 tools/depends/target/ffmpeg/hevcdsp_ARM_NEON_optimized_epel_functions.patch + +diff --git a/tools/depends/target/ffmpeg/Makefile b/tools/depends/target/ffmpeg/Makefile +index 4081dddb6bc2db53559d35506cad6af4cd668362..d9db534dd8c59a4993a3509737d901fbb3923de8 100644 +--- a/tools/depends/target/ffmpeg/Makefile ++++ b/tools/depends/target/ffmpeg/Makefile +@@ -1,7 +1,8 @@ + include ../../Makefile.include + include FFMPEG-VERSION + DEPS= ../../Makefile.include FFMPEG-VERSION Makefile \ +- 0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch ++ 0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch \ ++ hevcdsp_ARM_NEON_optimized_epel_functions.patch added_ARM_NEON_optimized_SAO_patches.patch + + # set to "yes" to enable patching + # we don't apply patches until we move to a vanilla ffmpeg tarball +@@ -78,6 +79,9 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + cd $(PLATFORM); $(ARCHIVE_TOOL) $(ARCHIVE_TOOL_FLAGS) $(TARBALLS_LOCATION)/$(ARCHIVE) + cd $(PLATFORM); sed -i".bak" -e "s%pkg_config_default=pkg-config%export PKG_CONFIG_LIBDIR=$(PREFIX)/lib/pkgconfig \&\& pkg_config_default=$(NATIVEPREFIX)/bin/pkg-config%" configure + cd $(PLATFORM); patch -p1 < ../0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch ++ cd $(PLATFORM); patch -p1 < ../hevcdsp_ARM_NEON_optimized_epel_functions.patch ++ cd $(PLATFORM); patch -p1 < ../added_ARM_NEON_optimized_SAO_patches.patch ++ + cd $(PLATFORM);\ + CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)" \ + ./configure $(ffmpg_config) +diff --git a/tools/depends/target/ffmpeg/added_ARM_NEON_optimized_SAO_patches.patch b/tools/depends/target/ffmpeg/added_ARM_NEON_optimized_SAO_patches.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..792b5fea581613a6fe9108443357f97518f4b4db +--- /dev/null ++++ b/tools/depends/target/ffmpeg/added_ARM_NEON_optimized_SAO_patches.patch +@@ -0,0 +1,3328 @@ ++From b0cb307c253d2c9f4b94a54dfc74ddb83af984cc Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi <seppo.tomperi@vtt.fi> ++Date: Mon, 8 Dec 2014 13:24:40 +0200 ++Subject: [PATCH 1/9] added ARM NEON optimized SAO band offset ++ ++--- ++ libavcodec/arm/Makefile | 3 +- ++ libavcodec/arm/hevcdsp_init_neon.c | 47 +++++++++ ++ libavcodec/arm/hevcdsp_sao_neon.S | 204 +++++++++++++++++++++++++++++++++++++ ++ 3 files changed, 253 insertions(+), 1 deletion(-) ++ create mode 100644 libavcodec/arm/hevcdsp_sao_neon.S ++ ++diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile ++index 6051ec8..093a2e8 100644 ++--- a/libavcodec/arm/Makefile +++++ b/libavcodec/arm/Makefile ++@@ -133,7 +133,8 @@ NEON-OBJS-$(CONFIG_HEVC_DECODER) += arm/hevcdsp_init_neon.o \ ++ arm/hevcdsp_deblock_neon.o \ ++ arm/hevcdsp_epel_neon.o \ ++ arm/hevcdsp_idct_neon.o \ ++- arm/hevcdsp_qpel_neon.o +++ arm/hevcdsp_qpel_neon.o \ +++ arm/hevcdsp_sao_neon.o ++ NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_neon.o ++ NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o \ ++ arm/rv40dsp_neon.o ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index 733ff08..69e2b2c 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -22,6 +22,7 @@ ++ #include "libavutil/arm/cpu.h" ++ #include "libavcodec/hevcdsp.h" ++ #include "hevcdsp_arm.h" +++#include "../bit_depth_template.c" ++ ++ void ff_hevc_v_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q); ++ void ff_hevc_h_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q); ++@@ -43,6 +44,11 @@ void ff_hevc_transform_add_16x16_neon_8(uint8_t *_dst, int16_t *coeffs, ++ void ff_hevc_transform_add_32x32_neon_8(uint8_t *_dst, int16_t *coeffs, ++ ptrdiff_t stride); ++ +++void ff_hevc_sao_band_w8_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); +++void ff_hevc_sao_band_w16_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); +++void ff_hevc_sao_band_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); +++void ff_hevc_sao_band_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); +++ ++ #define PUT_PIXELS(name) \ ++ void name(int16_t *dst, uint8_t *src, \ ++ ptrdiff_t srcstride, int height, \ ++@@ -151,6 +157,44 @@ void ff_hevc_put_qpel_bi_neon_wrapper(uint8_t *dst, ptrdiff_t dststride, uint8_t ++ put_hevc_qpel_uw_neon[my][mx](dst, dststride, src, srcstride, width, height, src2, MAX_PB_SIZE); ++ } ++ +++static void ff_hevc_sao_band_neon_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, +++ int16_t *sao_offset_val, int sao_left_class, int width, int height) +++{ +++ pixel *dst = (pixel *)_dst; +++ pixel *src = (pixel *)_src; +++ int8_t offset_table[32] = { 0 }; +++ int k, y, x; +++ int shift = 3; // BIT_DEPTH - 5 +++ +++ stride_src /= sizeof(pixel); +++ stride_dst /= sizeof(pixel); +++ +++ for (k = 0; k < 4; k++) +++ offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1]; +++ +++ switch(width){ +++ case 8: +++ ff_hevc_sao_band_w8_neon_8(_dst, _src, stride_dst, stride_src, height, offset_table); +++ break; +++ case 16: +++ ff_hevc_sao_band_w16_neon_8(_dst, _src, stride_dst, stride_src, height, offset_table); +++ break; +++ case 32: +++ ff_hevc_sao_band_w32_neon_8(_dst, _src, stride_dst, stride_src, height, offset_table); +++ break; +++ case 64: +++ ff_hevc_sao_band_w64_neon_8(_dst, _src, stride_dst, stride_src, height, offset_table); +++ break; +++ default: +++ for (y = 0; y < height; y++) { +++ for (x = 0; x < width; x++) +++ dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]); +++ dst += stride_dst; +++ src += stride_src; +++ } +++ } +++} +++ ++ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) ++ { ++ if (bit_depth == 8) { ++@@ -170,6 +214,9 @@ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) ++ c->transform_add[2] = ff_hevc_transform_add_16x16_neon_8; ++ c->transform_add[3] = ff_hevc_transform_add_32x32_neon_8; ++ c->idct_4x4_luma = ff_hevc_transform_luma_4x4_neon_8; +++ for (x = 0; x < sizeof c->sao_band_filter / sizeof *c->sao_band_filter; x++) { +++ c->sao_band_filter[x] = ff_hevc_sao_band_neon_wrapper; +++ } ++ put_hevc_qpel_neon[1][0] = ff_hevc_put_qpel_v1_neon_8; ++ put_hevc_qpel_neon[2][0] = ff_hevc_put_qpel_v2_neon_8; ++ put_hevc_qpel_neon[3][0] = ff_hevc_put_qpel_v3_neon_8; ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++new file mode 100644 ++index 0000000..1f0ad64 ++--- /dev/null +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -0,0 +1,204 @@ +++/* +++ * Copyright (c) 2014 Seppo Tomperi <seppo.tomperi@vtt.fi> +++ * +++ * This file is part of FFmpeg. +++ * +++ * FFmpeg is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * FFmpeg is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with FFmpeg; if not, write to the Free Software +++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +++ */ +++ +++#include "libavutil/arm/asm.S" +++#include "neon.S" +++ +++function ff_hevc_sao_band_w8_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // offset_table +++ vpush {d8-d15} +++ vld1.8 {q0, q1}, [r5] // offset table +++ +++1: subs r4, #1 +++ vld1.8 {d24}, [r1], r3 +++ vshr.u8 d16, d24, #3 +++ vtbl.8 d16, {q0, q1}, d16 +++ vmovl.s8 q2, d16 +++ vmovl.u8 q6, d24 +++ vadd.s16 q2, q6 +++ vqmovun.s16 d4, q2 +++ vst1.8 {d4}, [r0], r2 +++ bne 1b +++ +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_band_w16_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // offset_table +++ vpush {d8-d15} +++ vld1.8 {q0, q1}, [r5] // offset table +++ +++1: subs r4, #1 +++ vld1.8 {q12}, [r1], r3 +++ +++ vshr.u8 q8, q12, #3 +++ +++ vtbl.8 d16, {q0, q1}, d16 +++ vtbl.8 d17, {q0, q1}, d17 +++ +++ vmovl.s8 q2, d16 +++ vmovl.s8 q3, d17 +++ +++ vmovl.u8 q6, d24 +++ vmovl.u8 q7, d25 +++ +++ vadd.s16 q2, q6 +++ vadd.s16 q3, q7 +++ +++ vqmovun.s16 d4, q2 +++ vqmovun.s16 d5, q3 +++ +++ vstm.8 r0, {q2} +++ add r0, r2 +++ bne 1b +++ +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_band_w32_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // offset_table +++ vpush {d8-d15} +++ vld1.8 {q0, q1}, [r5] // offset table +++ +++1: subs r4, #1 +++ vld1.8 {q12-q13}, [r1], r3 +++ +++ vshr.u8 q8, q12, #3 +++ vshr.u8 q9, q13, #3 +++ +++ vtbl.8 d16, {q0, q1}, d16 +++ vtbl.8 d17, {q0, q1}, d17 +++ vtbl.8 d18, {q0, q1}, d18 +++ vtbl.8 d19, {q0, q1}, d19 +++ +++ vmovl.s8 q2, d16 +++ vmovl.s8 q3, d17 // q8 free +++ vmovl.s8 q4, d18 +++ vmovl.s8 q5, d19 // q9 free +++ +++ vmovl.u8 q6, d24 +++ vmovl.u8 q7, d25 // q12 free +++ vmovl.u8 q8, d26 +++ vmovl.u8 q9, d27 // q13 free +++ +++ vadd.s16 q2, q6 +++ vadd.s16 q3, q7 +++ vadd.s16 q4, q8 +++ vadd.s16 q5, q9 +++ +++ vqmovun.s16 d4, q2 +++ vqmovun.s16 d5, q3 +++ vqmovun.s16 d6, q4 // q4 free +++ vqmovun.s16 d7, q5 // q5 free +++ +++ vst1.8 {q2-q3}, [r0], r2 +++ bne 1b +++ +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_band_w64_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // offset_table +++ vpush {d8-d15} +++ vld1.8 {q0, q1}, [r5] // offset table +++ +++1: subs r4, #1 +++ vld1.8 {q12-q13}, [r1]! +++ vld1.8 {q14-q15}, [r1], r3 +++ sub r1, #32 +++ +++ vshr.u8 q8, q12, #3 +++ vshr.u8 q9, q13, #3 +++ vshr.u8 q10, q14, #3 +++ vshr.u8 q11, q15, #3 +++ +++ vtbl.8 d16, {q0, q1}, d16 +++ vtbl.8 d17, {q0, q1}, d17 +++ vtbl.8 d18, {q0, q1}, d18 +++ vtbl.8 d19, {q0, q1}, d19 +++ vtbl.8 d20, {q0, q1}, d20 +++ vtbl.8 d21, {q0, q1}, d21 +++ vtbl.8 d22, {q0, q1}, d22 +++ vtbl.8 d23, {q0, q1}, d23 +++ +++ vmovl.s8 q2, d16 +++ vmovl.s8 q3, d17 // q8 free +++ vmovl.s8 q4, d18 +++ vmovl.s8 q5, d19 // q9 free +++ +++ vmovl.u8 q6, d24 +++ vmovl.u8 q7, d25 // q12 free +++ vmovl.u8 q8, d26 +++ vmovl.u8 q9, d27 // q13 free +++ +++ vadd.s16 q2, q6 +++ vadd.s16 q3, q7 +++ vadd.s16 q4, q8 +++ vadd.s16 q5, q9 +++ +++ vqmovun.s16 d4, q2 +++ vqmovun.s16 d5, q3 +++ vqmovun.s16 d6, q4 // q4 free +++ vqmovun.s16 d7, q5 // q5 free +++ +++ // free q4 -q9, q12 - q13 +++ vmovl.s8 q4, d20 +++ vmovl.s8 q5, d21 // q10 free +++ vmovl.s8 q6, d22 +++ vmovl.s8 q7, d23 // q11 free +++ +++ vmovl.u8 q8, d28 +++ vmovl.u8 q9, d29 // q14 free +++ vmovl.u8 q10, d30 +++ vmovl.u8 q11, d31 // q15 free +++ +++ vadd.s16 q4, q8 +++ vadd.s16 q5, q9 +++ vadd.s16 q6, q10 +++ vadd.s16 q7, q11 +++ +++ vqmovun.s16 d8, q4 +++ vqmovun.s16 d9, q5 +++ vqmovun.s16 d10, q6 +++ vqmovun.s16 d11, q7 +++ +++ vstm.8 r0, {q2-q5} +++ add r0, r2 +++ bne 1b +++ +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ ++-- ++2.5.0 ++ ++ ++From 8429b1de64bb871d57651ecfe3b084e2dfe0af51 Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi <seppo.tomperi@vtt.fi> ++Date: Wed, 27 May 2015 18:10:20 +0100 ++Subject: [PATCH 2/9] added NEON optimized sao edge for eo1 width 64 ++ ++--- ++ libavcodec/arm/hevcdsp_init_neon.c | 47 ++++++++++++ ++ libavcodec/arm/hevcdsp_sao_neon.S | 147 +++++++++++++++++++++++++++++++++++++ ++ 2 files changed, 194 insertions(+) ++ ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index 69e2b2c..c7b5404 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -22,6 +22,7 @@ ++ #include "libavutil/arm/cpu.h" ++ #include "libavcodec/hevcdsp.h" ++ #include "hevcdsp_arm.h" +++#include "libavcodec/avcodec.h" ++ #include "../bit_depth_template.c" ++ ++ void ff_hevc_v_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q); ++@@ -48,6 +49,7 @@ void ff_hevc_sao_band_w8_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_d ++ void ff_hevc_sao_band_w16_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); ++ void ff_hevc_sao_band_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); ++ void ff_hevc_sao_band_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); +++void ff_hevc_sao_edge_eo1_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); ++ ++ #define PUT_PIXELS(name) \ ++ void name(int16_t *dst, uint8_t *src, \ ++@@ -195,6 +197,50 @@ static void ff_hevc_sao_band_neon_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_ ++ } ++ } ++ +++#define CMP(a, b) ((a) > (b) ? 1 : ((a) == (b) ? 0 : -1)) +++static void ff_hevc_sao_edge_neon_wrapper(uint8_t *_dst /* align 16 */, uint8_t *_src /* align 32 */, ptrdiff_t stride_dst, +++ int16_t *_sao_offset_val, int eo, int width, int height) +++{ +++ static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 }; +++ static const int8_t pos[4][2][2] = { +++ { { -1, 0 }, { 1, 0 } }, // horizontal +++ { { 0, -1 }, { 0, 1 } }, // vertical +++ { { -1, -1 }, { 1, 1 } }, // 45 degree +++ { { 1, -1 }, { -1, 1 } }, // 135 degree +++ }; +++ int8_t sao_offset_val[8]; // padding of 3 for vld +++ ptrdiff_t stride_src = (2*MAX_PB_SIZE + FF_INPUT_BUFFER_PADDING_SIZE); +++ pixel *dst = (pixel *)_dst; +++ pixel *src = (pixel *)_src; +++ int a_stride, b_stride; +++ int x, y; +++ +++ for (x = 0; x < 5; x++) { +++ sao_offset_val[x] = _sao_offset_val[x]; +++ } +++ +++ stride_src /= sizeof(pixel); +++ stride_dst /= sizeof(pixel); +++ +++ if (eo == 1 && width == 64) { +++ ff_hevc_sao_edge_eo1_w64_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ } else { +++ a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src; +++ b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src; +++ for (y = 0; y < height; y++) { +++ for (x = 0; x < width; x++) { +++ int diff0 = CMP(src[x], src[x + a_stride]); +++ int diff1 = CMP(src[x], src[x + b_stride]); +++ int offset_val = edge_idx[2 + diff0 + diff1]; +++ dst[x] = av_clip_pixel(src[x] + sao_offset_val[offset_val]); +++ } +++ src += stride_src; +++ dst += stride_dst; +++ } +++ } +++} +++#undef CMP +++ ++ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) ++ { ++ if (bit_depth == 8) { ++@@ -216,6 +262,7 @@ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) ++ c->idct_4x4_luma = ff_hevc_transform_luma_4x4_neon_8; ++ for (x = 0; x < sizeof c->sao_band_filter / sizeof *c->sao_band_filter; x++) { ++ c->sao_band_filter[x] = ff_hevc_sao_band_neon_wrapper; +++ c->sao_edge_filter[x] = ff_hevc_sao_edge_neon_wrapper; ++ } ++ put_hevc_qpel_neon[1][0] = ff_hevc_put_qpel_v1_neon_8; ++ put_hevc_qpel_neon[2][0] = ff_hevc_put_qpel_v2_neon_8; ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++index 1f0ad64..5ec2de9 100644 ++--- a/libavcodec/arm/hevcdsp_sao_neon.S +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -202,3 +202,150 @@ function ff_hevc_sao_band_w64_neon_8, export=1 ++ bx lr ++ endfunc ++ +++function ff_hevc_sao_edge_eo1_w64_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x02 +++ vpush {d8-d15} +++1: subs r4, #1 +++ // load a +++ sub r1, r3 +++ vld1.8 {q0-q1}, [r1]! +++ vld1.8 {q2-q3}, [r1], r3 +++ sub r1, #32 +++ // load c +++ vld1.8 {q4-q5}, [r1]! +++ vld1.8 {q6-q7}, [r1], r3 +++ sub r1, #32 +++ // load b +++ vld1.8 {q8-q9}, [r1]! +++ vld1.8 {q10-q11}, [r1], r3 +++ sub r1, #32 +++ +++ vcgt.u8 q12, q4, q0 // c > a -> -1 , otherwise 0 +++ vcgt.u8 q0, q0, q4 // a > c -> -1 , otherwise 0 +++ vcgt.u8 q13, q5, q1 +++ vcgt.u8 q1, q1, q5 +++ vcgt.u8 q14, q6, q2 +++ vcgt.u8 q2, q2, q6 +++ vcgt.u8 q15, q7, q3 +++ vcgt.u8 q3, q3, q7 +++ +++ vsub.s8 q12, q0, q12 // diff0 +++ vsub.s8 q13, q1, q13 +++ vsub.s8 q14, q2, q14 +++ vsub.s8 q15, q3, q15 +++ +++ vcgt.u8 q0, q4, q8 // c > b +++ vcgt.u8 q8, q8, q4 // b > c +++ vcgt.u8 q1, q5, q9 +++ vcgt.u8 q9, q9, q5 +++ vcgt.u8 q2, q6, q10 +++ vcgt.u8 q10, q10, q6 +++ vcgt.u8 q3, q7, q11 +++ vcgt.u8 q11, q11, q7 +++ +++ vsub.s8 q0, q8, q0 // diff1 +++ vsub.s8 q1, q9, q1 +++ vsub.s8 q2, q10, q2 +++ vsub.s8 q3, q11, q3 +++ +++ veor.u8 q8, q8 // zero register +++ vdup.s8 q9, r6 // 2 to all elements +++ add r6, #1 +++ vdup.s8 q10, r6 // 3 to all elements +++ sub r6, #1 +++ +++ vadd.s8 q0, q12 //diff0 + diff1 +++ vadd.s8 q1, q13 +++ vadd.s8 q2, q14 +++ vadd.s8 q3, q15 +++ +++ vcgt.s8 q4, q0, q8 // diff0 + diff1 > 0 +++ vcgt.s8 q5, q1, q8 +++ vcgt.s8 q6, q2, q8 +++ vcgt.s8 q7, q3, q8 +++ +++ vclt.s8 q11, q0, q8 // diff0 + diff1 < 0 +++ vclt.s8 q12, q1, q8 +++ vclt.s8 q13, q2, q8 +++ vclt.s8 q14, q3, q8 +++ +++ vadd.s8 q8, q0, q9 // diff0 + diff1 + 2 +++ vand.8 q15, q8, q4 +++ vadd.s8 q8, q0, q10 // diff0 + diff1 + 3 +++ vand.8 q8, q8, q11 +++ vadd.s8 q0, q15, q8 // offset_idx +++ +++ vadd.s8 q8, q1, q9 // diff0 + diff1 + 2 +++ vand.8 q15, q8, q5 +++ vadd.s8 q8, q1, q10 // diff0 + diff1 + 3 +++ vand.8 q8, q8, q12 +++ vadd.s8 q1, q15, q8 // offset_idx +++ +++ vadd.s8 q8, q2, q9 // diff0 + diff1 + 2 + 2 +++ vand.8 q15, q8, q6 +++ vadd.s8 q8, q2, q10 // diff0 + diff1 + 2 + 3 +++ vand.8 q8, q8, q13 +++ vadd.s8 q2, q15, q8 // offset_idx +++ +++ vadd.s8 q8, q3, q9 // diff0 + diff1 + 2 + 2 +++ vand.8 q15, q8, q7 +++ vadd.s8 q8, q3, q10 // diff0 + diff1 + 2 + 3 +++ vand.8 q8, q8, q14 +++ vadd.s8 q3, q15, q8 // offset_idx +++ // TODO: load only once +++ vld1.8 d16, [r5] +++ +++ vtbl.8 d0, {d16}, d0 +++ vtbl.8 d1, {d16}, d1 +++ vtbl.8 d2, {d16}, d2 +++ vtbl.8 d3, {d16}, d3 +++ vtbl.8 d4, {d16}, d4 +++ vtbl.8 d5, {d16}, d5 +++ vtbl.8 d6, {d16}, d6 +++ vtbl.8 d7, {d16}, d7 +++ +++ // TODO: load only once +++ // load c again +++ sub r1, r3 +++ sub r1, r3 +++ vld1.8 {q4-q5}, [r1]! +++ vld1.8 {q6-q7}, [r1], r3 +++ sub r1, #32 +++ +++ vmovl.u8 q8, d8 +++ vmovl.u8 q9, d9 +++ vmovl.u8 q10, d10 +++ vmovl.u8 q11, d11 +++ vmovl.u8 q12, d12 +++ vmovl.u8 q13, d13 +++ vmovl.u8 q14, d14 +++ vmovl.u8 q15, d15 +++ +++ vaddw.s8 q8, d0 +++ vaddw.s8 q9, d1 +++ vaddw.s8 q10, d2 +++ vaddw.s8 q11, d3 +++ vaddw.s8 q12, d4 +++ vaddw.s8 q13, d5 +++ vaddw.s8 q14, d6 +++ vaddw.s8 q15, d7 +++ +++ vqmovun.s16 d0, q8 +++ vqmovun.s16 d1, q9 +++ vqmovun.s16 d2, q10 +++ vqmovun.s16 d3, q11 +++ vqmovun.s16 d4, q12 +++ vqmovun.s16 d5, q13 +++ vqmovun.s16 d6, q14 +++ vqmovun.s16 d7, q15 +++ +++ vstm r0, {q0-q3} +++ add r0, r2 +++ bne 1b +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc ++-- ++2.5.0 ++ ++ ++From 402e2bd1c5ad659c757bf9734abe6331904fb9e2 Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi <seppo.tomperi@vtt.fi> ++Date: Tue, 16 Dec 2014 16:28:25 +0200 ++Subject: [PATCH 3/9] Added SAO edge offset for ARM NEON w32 and w64 ++ ++--- ++ libavcodec/arm/hevcdsp_init_neon.c | 46 +++- ++ libavcodec/arm/hevcdsp_sao_neon.S | 510 +++++++++++++++++++++++++++++++------ ++ 2 files changed, 474 insertions(+), 82 deletions(-) ++ ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index c7b5404..c32940e 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -49,7 +49,16 @@ void ff_hevc_sao_band_w8_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_d ++ void ff_hevc_sao_band_w16_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); ++ void ff_hevc_sao_band_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); ++ void ff_hevc_sao_band_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); +++ +++void ff_hevc_sao_edge_eo0_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); +++void ff_hevc_sao_edge_eo1_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); +++void ff_hevc_sao_edge_eo2_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); +++void ff_hevc_sao_edge_eo3_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); +++ +++void ff_hevc_sao_edge_eo0_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); ++ void ff_hevc_sao_edge_eo1_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); +++void ff_hevc_sao_edge_eo2_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); +++void ff_hevc_sao_edge_eo3_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); ++ ++ #define PUT_PIXELS(name) \ ++ void name(int16_t *dst, uint8_t *src, \ ++@@ -222,9 +231,40 @@ static void ff_hevc_sao_edge_neon_wrapper(uint8_t *_dst /* align 16 */, uint8_t ++ stride_src /= sizeof(pixel); ++ stride_dst /= sizeof(pixel); ++ ++- if (eo == 1 && width == 64) { ++- ff_hevc_sao_edge_eo1_w64_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); ++- } else { +++ switch (width) { +++ case 32: +++ switch(eo) { +++ case 0: +++ ff_hevc_sao_edge_eo0_w32_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ break; +++ case 1: +++ ff_hevc_sao_edge_eo1_w32_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ break; +++ case 2: +++ ff_hevc_sao_edge_eo2_w32_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ break; +++ case 3: +++ ff_hevc_sao_edge_eo3_w32_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ break; +++ } +++ break; +++ case 64: +++ switch(eo) { +++ case 0: +++ ff_hevc_sao_edge_eo0_w64_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ break; +++ case 1: +++ ff_hevc_sao_edge_eo1_w64_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ break; +++ case 2: +++ ff_hevc_sao_edge_eo2_w64_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ break; +++ case 3: +++ ff_hevc_sao_edge_eo3_w64_neon_8(dst, src, stride_dst, stride_src, height, sao_offset_val); +++ break; +++ } +++ break; +++ default: ++ a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src; ++ b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src; ++ for (y = 0; y < height; y++) { ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++index 5ec2de9..4687012 100644 ++--- a/libavcodec/arm/hevcdsp_sao_neon.S +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -202,27 +202,7 @@ function ff_hevc_sao_band_w64_neon_8, export=1 ++ bx lr ++ endfunc ++ ++-function ff_hevc_sao_edge_eo1_w64_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x02 ++- vpush {d8-d15} ++-1: subs r4, #1 ++- // load a ++- sub r1, r3 ++- vld1.8 {q0-q1}, [r1]! ++- vld1.8 {q2-q3}, [r1], r3 ++- sub r1, #32 ++- // load c ++- vld1.8 {q4-q5}, [r1]! ++- vld1.8 {q6-q7}, [r1], r3 ++- sub r1, #32 ++- // load b ++- vld1.8 {q8-q9}, [r1]! ++- vld1.8 {q10-q11}, [r1], r3 ++- sub r1, #32 ++- +++.macro edge_w64_body ++ vcgt.u8 q12, q4, q0 // c > a -> -1 , otherwise 0 ++ vcgt.u8 q0, q0, q4 // a > c -> -1 , otherwise 0 ++ vcgt.u8 q13, q5, q1 ++@@ -251,69 +231,61 @@ function ff_hevc_sao_edge_eo1_w64_neon_8, export=1 ++ vsub.s8 q2, q10, q2 ++ vsub.s8 q3, q11, q3 ++ ++- veor.u8 q8, q8 // zero register ++- vdup.s8 q9, r6 // 2 to all elements ++- add r6, #1 ++- vdup.s8 q10, r6 // 3 to all elements ++- sub r6, #1 ++- ++ vadd.s8 q0, q12 //diff0 + diff1 ++ vadd.s8 q1, q13 ++ vadd.s8 q2, q14 ++ vadd.s8 q3, q15 ++ ++- vcgt.s8 q4, q0, q8 // diff0 + diff1 > 0 ++- vcgt.s8 q5, q1, q8 ++- vcgt.s8 q6, q2, q8 ++- vcgt.s8 q7, q3, q8 ++- ++- vclt.s8 q11, q0, q8 // diff0 + diff1 < 0 ++- vclt.s8 q12, q1, q8 ++- vclt.s8 q13, q2, q8 ++- vclt.s8 q14, q3, q8 ++- ++- vadd.s8 q8, q0, q9 // diff0 + diff1 + 2 ++- vand.8 q15, q8, q4 ++- vadd.s8 q8, q0, q10 // diff0 + diff1 + 3 ++- vand.8 q8, q8, q11 ++- vadd.s8 q0, q15, q8 // offset_idx ++- ++- vadd.s8 q8, q1, q9 // diff0 + diff1 + 2 ++- vand.8 q15, q8, q5 ++- vadd.s8 q8, q1, q10 // diff0 + diff1 + 3 ++- vand.8 q8, q8, q12 ++- vadd.s8 q1, q15, q8 // offset_idx ++- ++- vadd.s8 q8, q2, q9 // diff0 + diff1 + 2 + 2 ++- vand.8 q15, q8, q6 ++- vadd.s8 q8, q2, q10 // diff0 + diff1 + 2 + 3 ++- vand.8 q8, q8, q13 ++- vadd.s8 q2, q15, q8 // offset_idx ++- ++- vadd.s8 q8, q3, q9 // diff0 + diff1 + 2 + 2 ++- vand.8 q15, q8, q7 ++- vadd.s8 q8, q3, q10 // diff0 + diff1 + 2 + 3 ++- vand.8 q8, q8, q14 ++- vadd.s8 q3, q15, q8 // offset_idx ++- // TODO: load only once ++- vld1.8 d16, [r5] ++- ++- vtbl.8 d0, {d16}, d0 ++- vtbl.8 d1, {d16}, d1 ++- vtbl.8 d2, {d16}, d2 ++- vtbl.8 d3, {d16}, d3 ++- vtbl.8 d4, {d16}, d4 ++- vtbl.8 d5, {d16}, d5 ++- vtbl.8 d6, {d16}, d6 ++- vtbl.8 d7, {d16}, d7 ++- ++- // TODO: load only once ++- // load c again ++- sub r1, r3 ++- sub r1, r3 ++- vld1.8 {q4-q5}, [r1]! ++- vld1.8 {q6-q7}, [r1], r3 ++- sub r1, #32 +++ vdup.s8 q9, r6 // 3 to all elements +++ sub r6, #1 +++ +++ vclt.s8 q12, q0, #0 // diff0 + diff1 < 0 +++ vclt.s8 q13, q1, #0 +++ vclt.s8 q14, q2, #0 +++ vclt.s8 q15, q3, #0 +++ +++ vadd.s8 q8, q0, q9 // diff0 + diff1 + 3 +++ vadd.s8 q10, q1, q9 +++ vand.8 q12, q8, q12 // if (diff0 + diff1 < 0) then (diff0 + diff1 + 3) else 0 +++ vand.8 q13, q10, q13 +++ vadd.s8 q8, q2, q9 +++ vadd.s8 q10, q3, q9 +++ vand.8 q14, q8, q14 +++ vand.8 q15, q10, q15 +++ +++ vdup.s8 q9, r6 // 2 to all elements +++ add r6, #1 +++ +++ vcgt.s8 q10, q0, #0 // diff0 + diff1 > 0 +++ vadd.s8 q8, q0, q9 // diff0 + diff1 + 2 +++ vand.8 q11, q8, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 +++ vcgt.s8 q10, q1, #0 +++ vadd.s8 q0, q11, q12 // offset_idx +++ +++ vadd.s8 q8, q1, q9 // diff0 + diff1 + 2 +++ vcgt.s8 q12, q2, #0 +++ vand.8 q11, q8, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 +++ vadd.s8 q8, q2, q9 // diff0 + diff1 + 2 +++ vadd.s8 q1, q11, q13 +++ +++ vand.8 q11, q8, q12 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 +++ vcgt.s8 q10, q3, #0 +++ vadd.s8 q2, q11, q14 +++ +++ vadd.s8 q8, q3, q9 // diff0 + diff1 + 2 +++ vmov.32 d18[0], r7 // load offset table from general registers +++ vand.8 q11, q8, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 +++ vmov.32 d18[1], r5 // load rest of offset table +++ vadd.s8 q3, q11, q15 +++ +++ vtbl.8 d0, {d18}, d0 +++ vtbl.8 d1, {d18}, d1 +++ vtbl.8 d2, {d18}, d2 +++ vtbl.8 d3, {d18}, d3 +++ vtbl.8 d4, {d18}, d4 +++ vtbl.8 d5, {d18}, d5 +++ vtbl.8 d6, {d18}, d6 +++ vtbl.8 d7, {d18}, d7 ++ ++ vmovl.u8 q8, d8 ++ vmovl.u8 q9, d9 ++@@ -344,8 +316,388 @@ function ff_hevc_sao_edge_eo1_w64_neon_8, export=1 ++ ++ vstm r0, {q0-q3} ++ add r0, r2 +++.endm +++ +++.macro edge_w32_body +++ vcgt.u8 q12, q4, q0 // c > a -> -1 , otherwise 0 +++ vcgt.u8 q0, q0, q4 // a > c -> -1 , otherwise 0 +++ vcgt.u8 q13, q5, q1 +++ vcgt.u8 q1, q1, q5 +++ +++ vsub.s8 q12, q0, q12 // diff0 +++ vcgt.u8 q0, q4, q8 // c > b +++ vsub.s8 q13, q1, q13 // diff0 part 2 +++ +++ vcgt.u8 q6, q8, q4 // b > c +++ vcgt.u8 q1, q5, q9 +++ vcgt.u8 q7, q9, q5 +++ +++ vsub.s8 q0, q6, q0 // diff1 +++ vsub.s8 q1, q7, q1 // diff1 part 2 +++ vadd.s8 q0, q12 //diff0 + diff1 +++ +++ vdup.s8 q7, r6 // 3 to all elements +++ sub r6, #1 +++ vadd.s8 q1, q13 +++ +++ vclt.s8 q12, q0, #0 // diff0 + diff1 < 0 +++ vclt.s8 q13, q1, #0 +++ +++ vadd.s8 q6, q0, q7 // diff0 + diff1 + 3 +++ vadd.s8 q10, q1, q7 +++ vdup.s8 q7, r6 // 2 to all elements +++ add r6, #1 +++ vand.8 q12, q6, q12 // if (diff0 + diff1 < 0) then (diff0 + diff1 + 3) else 0 +++ vand.8 q13, q10, q13 +++ +++ +++ vcgt.s8 q10, q0, #0 // diff0 + diff1 > 0 +++ vadd.s8 q6, q0, q7 // diff0 + diff1 + 2 +++ vand.8 q11, q6, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 +++ vcgt.s8 q10, q1, #0 +++ vadd.s8 q0, q11, q12 // offset_idx +++ +++ vadd.s8 q6, q1, q7 // diff0 + diff1 + 2 +++ vmov.32 d14[0], r7 // load offset table from general registers +++ vand.8 q11, q6, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 +++ vmov.32 d14[1], r5 // load rest of offset table +++ vadd.s8 q1, q11, q13 +++ +++ vtbl.8 d0, {d14}, d0 +++ vtbl.8 d1, {d14}, d1 +++ vtbl.8 d2, {d14}, d2 +++ vtbl.8 d3, {d14}, d3 +++ +++ vmovl.u8 q6, d8 +++ vmovl.u8 q7, d9 +++ vmovl.u8 q10, d10 +++ vmovl.u8 q11, d11 +++ +++ vaddw.s8 q6, d0 +++ vaddw.s8 q7, d1 +++ vaddw.s8 q10, d2 +++ vaddw.s8 q11, d3 +++ +++ vqmovun.s16 d0, q6 +++ vqmovun.s16 d1, q7 +++ vqmovun.s16 d2, q10 +++ vqmovun.s16 d3, q11 +++ +++ vstm r0, {q0-q1} +++ add r0, r2 +++.endm +++ +++function ff_hevc_sao_edge_eo0_w64_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x03 +++ ldr r7, [r5] +++ add r5, #4 +++ ldr r5, [r5] +++ vpush {d8-d15} +++ sub r1, #8 +++1: subs r4, #1 +++ vld1.64 {q10-q11}, [r1]! +++ vld1.64 {q12-q13}, [r1]! +++ vld1.64 {q14}, [r1], r3 +++ sub r1, #64 +++ // load a +++ vext.8 q0, q10, q11, #7 +++ vext.8 q1, q11, q12, #7 +++ vext.8 q2, q12, q13, #7 +++ vext.8 q3, q13, q14, #7 +++ // load c +++ vext.8 q4, q10, q11, #8 +++ vext.8 q5, q11, q12, #8 +++ vext.8 q6, q12, q13, #8 +++ vext.8 q7, q13, q14, #8 +++ // load b +++ vext.8 q8, q10, q11, #9 +++ vext.8 q9, q11, q12, #9 +++ vext.8 q10, q12, q13, #9 +++ vext.8 q11, q13, q14, #9 +++ edge_w64_body +++ bne 1b +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_edge_eo1_w64_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x03 +++ ldr r7, [r5] +++ add r5, #4 +++ ldr r5, [r5] +++ vpush {d8-d15} +++ sub r1, r3 +++ // load a +++ vld1.8 {q0-q1}, [r1]! +++ vld1.8 {q2-q3}, [r1], r3 +++ sub r1, #32 +++1: subs r4, #1 +++ // load c +++ vld1.8 {q4-q5}, [r1]! +++ vld1.8 {q6-q7}, [r1], r3 +++ sub r1, #32 +++ // load b +++ vld1.8 {q8-q9}, [r1]! +++ vld1.8 {q10-q11}, [r1] +++ sub r1, #32 +++ edge_w64_body +++ // copy c to a +++ vmov.64 q0, q4 +++ vmov.64 q1, q5 +++ vmov.64 q2, q6 +++ vmov.64 q3, q7 ++ bne 1b ++ vpop {d8-d15} ++ pop {r4-r8} ++ bx lr ++ endfunc +++ +++function ff_hevc_sao_edge_eo2_w64_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x03 +++ ldr r7, [r5] +++ add r5, #4 +++ ldr r5, [r5] +++ vpush {d8-d15} +++1: sub r1, r3 +++ // load a +++ // TODO: fix unaligned load +++ // don't reload a like in eo1 +++ sub r1, #1 +++ vld1.8 {q0-q1}, [r1]! +++ vld1.8 {q2-q3}, [r1], r3 +++ sub r1, #31 +++ subs r4, #1 +++ // load c +++ vld1.8 {q4-q5}, [r1]! +++ vld1.8 {q6-q7}, [r1], r3 +++ sub r1, #32 +++ // load b +++ add r1, #1 +++ vld1.8 {q8-q9}, [r1]! +++ vld1.8 {q10-q11}, [r1] +++ sub r1, #33 +++ edge_w64_body +++ // copy c to a +++ vmov.64 q0, q4 +++ vmov.64 q1, q5 +++ vmov.64 q2, q6 +++ vmov.64 q3, q7 +++ bne 1b +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_edge_eo3_w64_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x03 +++ ldr r7, [r5] +++ add r5, #4 +++ ldr r5, [r5] +++ vpush {d8-d15} +++1: sub r1, r3 +++ // load a +++ // TODO: fix unaligned load +++ // don't reload a like in eo1 +++ add r1, #1 +++ vld1.8 {q0-q1}, [r1]! +++ vld1.8 {q2-q3}, [r1], r3 +++ sub r1, #33 +++ subs r4, #1 +++ // load c +++ vld1.8 {q4-q5}, [r1]! +++ vld1.8 {q6-q7}, [r1], r3 +++ sub r1, #32 +++ // load b +++ sub r1, #1 +++ vld1.8 {q8-q9}, [r1]! +++ vld1.8 {q10-q11}, [r1] +++ sub r1, #31 +++ edge_w64_body +++ // copy c to a +++ vmov.64 q0, q4 +++ vmov.64 q1, q5 +++ vmov.64 q2, q6 +++ vmov.64 q3, q7 +++ bne 1b +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_edge_eo0_w32_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x03 +++ ldr r7, [r5] +++ add r5, #4 +++ ldr r5, [r5] +++ vpush {d8-d15} +++ sub r1, #8 // load 8 extra bytes +++1: subs r4, #1 +++ vld1.8 {q10-q11}, [r1] +++ add r1, #32 +++ vld1.8 {q12}, [r1], r3 // only first 9 bytes are used +++ sub r1, #32 +++ // a +++ vext.8 q0, q10, q11, #7 +++ vext.8 q1, q11, q12, #7 +++ // c +++ vext.8 q4, q10, q11, #8 +++ vext.8 q5, q11, q12, #8 +++ // b +++ vext.8 q8, q10, q11, #9 +++ vext.8 q9, q11, q12, #9 +++ edge_w32_body +++ bne 1b +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_edge_eo1_w32_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x03 +++ ldr r7, [r5] +++ add r5, #4 +++ ldr r5, [r5] +++ vpush {d8-d15} +++ // load a +++ sub r1, r3 +++ vld1.8 {q0-q1}, [r1], r3 +++ // load c +++ vld1.8 {q4-q5}, [r1], r3 +++1: subs r4, #1 +++ // load b +++ vld1.8 {q8-q9}, [r1], r3 +++ edge_w32_body +++ // inputs for next loop iteration +++ // a +++ vmov.64 q0, q4 +++ vmov.64 q1, q5 +++ // c +++ vmov.64 q4, q8 +++ vmov.64 q5, q9 +++ bne 1b +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_edge_eo2_w32_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x03 +++ ldr r7, [r5] +++ add r5, #4 +++ ldr r5, [r5] +++ vpush {d8-d15} +++ // load a +++ sub r1, r3 +++ sub r1, #8 +++ vld1.8 {q10-q11}, [r1] +++ add r1, #32 +++ vld1.8 {q12}, [r1], r3 +++ sub r1, #32 +++ vext.8 q0, q10, q11, #7 +++ vext.8 q1, q11, q12, #7 +++ // load c +++ vld1.8 {q10-q11}, [r1] +++ add r1, #32 +++ vld1.8 {q12}, [r1], r3 +++ sub r1, #32 +++ vext.8 q4, q10, q11, #8 +++ vext.8 q5, q11, q12, #8 +++ vext.8 q2, q10, q11, #7 +++1: subs r4, #1 +++ // load b +++ vld1.8 {q10-q11}, [r1] +++ add r1, #32 +++ vld1.8 {q12}, [r1], r3 +++ sub r1, #32 +++ vext.8 q8, q10, q11, #9 +++ vext.8 q9, q11, q12, #9 +++ vext.8 q14, q10, q11, #8 +++ vext.8 q15, q11, q12, #8 +++ vext.8 q3, q10, q11, #7 +++ edge_w32_body +++ // inputs for next loop iteration +++ // a +++ vmov.8 q0, q2 +++ vext.8 q1, q4, q5, #15 +++ // c +++ vmov.8 q4, q14 +++ vmov.8 q5, q15 +++ vmov.8 q2, q3 +++ bne 1b +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ +++function ff_hevc_sao_edge_eo3_w32_neon_8, export=1 +++ push {r4-r8} +++ ldr r4, [sp, #20] // height +++ ldr r5, [sp, #24] // sao_offset_val_table +++ ldr r6, =0x03 +++ ldr r7, [r5] +++ add r5, #4 +++ sub r1, r3 +++ ldr r5, [r5] +++ sub r1, #8 +++ vpush {d8-d15} +++ // load a +++ vld1.8 {q10-q11}, [r1] +++ add r1, #32 +++ vld1.8 {q12}, [r1], r3 +++ sub r1, #32 +++ vext.8 q0, q10, q11, #9 +++ vext.8 q1, q11, q12, #9 +++ // load c +++ vld1.8 {q10-q11}, [r1] +++ add r1, #32 +++ vld1.8 {q12}, [r1], r3 +++ sub r1, #32 +++ vext.8 q4, q10, q11, #8 +++ vext.8 q5, q11, q12, #8 +++ vext.8 q2, q12, q11, #8 +++1: subs r4, #1 +++ // load b +++ vld1.8 {q10-q11}, [r1] +++ add r1, #32 +++ vld1.8 {q12}, [r1], r3 +++ sub r1, #32 +++ vext.8 q8, q10, q11, #7 +++ vext.8 q9, q11, q12, #7 +++ vext.8 q3, q12, q10, #7 +++ edge_w32_body +++ // inputs for next loop iteration +++ // a +++ vext.8 q0, q4, q5, #1 +++ vext.8 q1, q5, q2, #1 +++ // c +++ vext.8 q4, q8, q9, #1 +++ vext.8 q5, q9, q3, #1 +++ vext.8 q2, q3, q1, #1 +++ bne 1b +++ vpop {d8-d15} +++ pop {r4-r8} +++ bx lr +++endfunc +++ ++-- ++2.5.0 ++ ++ ++From 1898d052a73370166d57e17cc7c52b7275887df3 Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi <seppo.tomperi@vtt.fi> ++Date: Fri, 19 Dec 2014 09:44:10 +0200 ++Subject: [PATCH 4/9] Improved SAO band NEON opimizations made SAO buffer 16 ++ byte aligned added alignment hints to loads and stores optimized register ++ usage in SAO band neon assembly ++ ++--- ++ libavcodec/arm/hevcdsp_sao_neon.S | 212 +++++++++++++++----------------------- ++ 1 file changed, 82 insertions(+), 130 deletions(-) ++ ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++index 4687012..ac21013 100644 ++--- a/libavcodec/arm/hevcdsp_sao_neon.S +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -22,120 +22,84 @@ ++ #include "neon.S" ++ ++ function ff_hevc_sao_band_w8_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // offset_table ++- vpush {d8-d15} ++- vld1.8 {q0, q1}, [r5] // offset table +++ ldr r12, [sp, #4] // offset_table address +++ vld1.8 {q0, q1}, [r12] // offset table +++ ldr r12, [sp, #0] // height ++ ++-1: subs r4, #1 ++- vld1.8 {d24}, [r1], r3 +++1: subs r12, #1 +++ vld1.8 {d24}, [r1,:64], r3 ++ vshr.u8 d16, d24, #3 ++ vtbl.8 d16, {q0, q1}, d16 ++- vmovl.s8 q2, d16 ++ vmovl.u8 q6, d24 ++- vadd.s16 q2, q6 +++ vaddw.s8 q6, d16 ++ vqmovun.s16 d4, q2 ++- vst1.8 {d4}, [r0], r2 +++ vst1.8 {d4}, [r0,:64], r2 ++ bne 1b ++ ++- vpop {d8-d15} ++- pop {r4-r8} ++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_band_w16_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // offset_table ++- vpush {d8-d15} ++- vld1.8 {q0, q1}, [r5] // offset table ++- ++-1: subs r4, #1 ++- vld1.8 {q12}, [r1], r3 +++ ldr r12, [sp, #4] // offset_table address +++ vld1.8 {q0, q1}, [r12] // offset table +++ ldr r12, [sp, #0] // height ++ +++1: subs r12, #1 +++ vld1.8 {q12}, [r1,:128], r3 ++ vshr.u8 q8, q12, #3 ++- ++ vtbl.8 d16, {q0, q1}, d16 ++ vtbl.8 d17, {q0, q1}, d17 ++- ++- vmovl.s8 q2, d16 ++- vmovl.s8 q3, d17 ++- ++- vmovl.u8 q6, d24 ++- vmovl.u8 q7, d25 ++- ++- vadd.s16 q2, q6 ++- vadd.s16 q3, q7 ++- ++- vqmovun.s16 d4, q2 ++- vqmovun.s16 d5, q3 ++- ++- vstm.8 r0, {q2} ++- add r0, r2 +++ vmovl.u8 q10, d24 +++ vmovl.u8 q11, d25 +++ vaddw.s8 q10, d16 +++ vaddw.s8 q11, d17 +++ vqmovun.s16 d4, q10 +++ vqmovun.s16 d5, q11 +++ vst1.8 {q2}, [r0,:128], r2 ++ bne 1b ++ ++- vpop {d8-d15} ++- pop {r4-r8} ++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_band_w32_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // offset_table ++- vpush {d8-d15} ++- vld1.8 {q0, q1}, [r5] // offset table ++- ++-1: subs r4, #1 ++- vld1.8 {q12-q13}, [r1], r3 ++- ++- vshr.u8 q8, q12, #3 ++- vshr.u8 q9, q13, #3 ++- ++- vtbl.8 d16, {q0, q1}, d16 ++- vtbl.8 d17, {q0, q1}, d17 ++- vtbl.8 d18, {q0, q1}, d18 ++- vtbl.8 d19, {q0, q1}, d19 ++- ++- vmovl.s8 q2, d16 ++- vmovl.s8 q3, d17 // q8 free ++- vmovl.s8 q4, d18 ++- vmovl.s8 q5, d19 // q9 free ++- ++- vmovl.u8 q6, d24 ++- vmovl.u8 q7, d25 // q12 free ++- vmovl.u8 q8, d26 ++- vmovl.u8 q9, d27 // q13 free ++- ++- vadd.s16 q2, q6 ++- vadd.s16 q3, q7 ++- vadd.s16 q4, q8 ++- vadd.s16 q5, q9 ++- ++- vqmovun.s16 d4, q2 ++- vqmovun.s16 d5, q3 ++- vqmovun.s16 d6, q4 // q4 free ++- vqmovun.s16 d7, q5 // q5 free ++- ++- vst1.8 {q2-q3}, [r0], r2 ++- bne 1b ++- ++- vpop {d8-d15} ++- pop {r4-r8} ++- bx lr +++ ldr r12, [sp, #4] // offset_table address +++ vld1.8 {q0, q1}, [r12] // offset table +++ ldr r12, [sp, #0] // height +++ +++1: subs r12, #1 +++ vld1.8 {q2-q3}, [r1,:128], r3 +++ vshr.u8 q8, q2, #3 +++ vshr.u8 q9, q3, #3 +++ vtbl.8 d16, {q0, q1}, d16 +++ vtbl.8 d17, {q0, q1}, d17 +++ vtbl.8 d18, {q0, q1}, d18 +++ vtbl.8 d19, {q0, q1}, d19 +++ vmovl.u8 q12, d4 +++ vmovl.u8 q13, d5 +++ vmovl.u8 q14, d6 +++ vmovl.u8 q15, d7 +++ vaddw.s8 q12, d16 +++ vaddw.s8 q13, d17 +++ vaddw.s8 q14, d18 +++ vaddw.s8 q15, d19 +++ vqmovun.s16 d4, q12 +++ vqmovun.s16 d5, q13 +++ vqmovun.s16 d6, q14 +++ vqmovun.s16 d7, q15 +++ vst1.8 {q2-q3}, [r0,:128], r2 +++ bne 1b +++ +++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_band_w64_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // offset_table ++- vpush {d8-d15} ++- vld1.8 {q0, q1}, [r5] // offset table +++ ldr r12, [sp, #4] // offset_table address +++ vld1.8 {q0, q1}, [r12] // offset table +++ ldr r12, [sp, #0] // height ++ ++-1: subs r4, #1 ++- vld1.8 {q12-q13}, [r1]! ++- vld1.8 {q14-q15}, [r1], r3 +++1: subs r12, #1 +++ vld1.8 {q12-q13}, [r1,:128]! +++ vld1.8 {q14-q15}, [r1,:128], r3 ++ sub r1, #32 ++ ++ vshr.u8 q8, q12, #3 ++@@ -152,53 +116,41 @@ function ff_hevc_sao_band_w64_neon_8, export=1 ++ vtbl.8 d22, {q0, q1}, d22 ++ vtbl.8 d23, {q0, q1}, d23 ++ ++- vmovl.s8 q2, d16 ++- vmovl.s8 q3, d17 // q8 free ++- vmovl.s8 q4, d18 ++- vmovl.s8 q5, d19 // q9 free +++ vmovl.u8 q2, d24 +++ vmovl.u8 q3, d25 +++ vmovl.u8 q12, d26 +++ vmovl.u8 q13, d27 ++ ++- vmovl.u8 q6, d24 ++- vmovl.u8 q7, d25 // q12 free ++- vmovl.u8 q8, d26 ++- vmovl.u8 q9, d27 // q13 free ++- ++- vadd.s16 q2, q6 ++- vadd.s16 q3, q7 ++- vadd.s16 q4, q8 ++- vadd.s16 q5, q9 +++ vaddw.s8 q2, d16 +++ vaddw.s8 q3, d17 +++ vaddw.s8 q12, d18 +++ vaddw.s8 q13, d19 ++ ++ vqmovun.s16 d4, q2 ++ vqmovun.s16 d5, q3 ++- vqmovun.s16 d6, q4 // q4 free ++- vqmovun.s16 d7, q5 // q5 free ++- ++- // free q4 -q9, q12 - q13 ++- vmovl.s8 q4, d20 ++- vmovl.s8 q5, d21 // q10 free ++- vmovl.s8 q6, d22 ++- vmovl.s8 q7, d23 // q11 free ++- ++- vmovl.u8 q8, d28 ++- vmovl.u8 q9, d29 // q14 free ++- vmovl.u8 q10, d30 ++- vmovl.u8 q11, d31 // q15 free ++- ++- vadd.s16 q4, q8 ++- vadd.s16 q5, q9 ++- vadd.s16 q6, q10 ++- vadd.s16 q7, q11 ++- ++- vqmovun.s16 d8, q4 ++- vqmovun.s16 d9, q5 ++- vqmovun.s16 d10, q6 ++- vqmovun.s16 d11, q7 ++- ++- vstm.8 r0, {q2-q5} ++- add r0, r2 +++ vqmovun.s16 d6, q12 +++ vqmovun.s16 d7, q13 +++ +++ vmovl.u8 q12, d28 +++ vmovl.u8 q13, d29 +++ vmovl.u8 q14, d30 +++ vmovl.u8 q15, d31 +++ +++ vaddw.s8 q12, d20 +++ vaddw.s8 q13, d21 +++ vaddw.s8 q14, d22 +++ vaddw.s8 q15, d23 +++ +++ vqmovun.s16 d8, q12 +++ vqmovun.s16 d9, q13 +++ vqmovun.s16 d10, q14 +++ vqmovun.s16 d11, q15 +++ +++ vst1.8 {q2-q3}, [r0,:128]! +++ vst1.8 {q4-q5}, [r0,:128], r2 +++ sub r0, #32 ++ bne 1b ++ ++- vpop {d8-d15} ++- pop {r4-r8} ++ bx lr ++ endfunc ++ ++-- ++2.5.0 ++ ++ ++From 26bd536800db2f50ff6a021e1fda0d0394d1ea01 Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi <seppo.tomperi@vtt.fi> ++Date: Mon, 29 Dec 2014 15:00:49 +0200 ++Subject: [PATCH 5/9] better code reuse in NEON SAO band ++ ++--- ++ libavcodec/arm/hevcdsp_init_neon.c | 16 ++-- ++ libavcodec/arm/hevcdsp_sao_neon.S | 155 +++++++++++++------------------------ ++ 2 files changed, 61 insertions(+), 110 deletions(-) ++ ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index c32940e..6379810 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -45,10 +45,10 @@ void ff_hevc_transform_add_16x16_neon_8(uint8_t *_dst, int16_t *coeffs, ++ void ff_hevc_transform_add_32x32_neon_8(uint8_t *_dst, int16_t *coeffs, ++ ptrdiff_t stride); ++ ++-void ff_hevc_sao_band_w8_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); ++-void ff_hevc_sao_band_w16_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); ++-void ff_hevc_sao_band_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); ++-void ff_hevc_sao_band_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t * offset_table); +++void ff_hevc_sao_band_w8_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int8_t * offset_table, int height); +++void ff_hevc_sao_band_w16_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int8_t * offset_table, int height); +++void ff_hevc_sao_band_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int8_t * offset_table, int height); +++void ff_hevc_sao_band_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int8_t * offset_table, int height); ++ ++ void ff_hevc_sao_edge_eo0_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); ++ void ff_hevc_sao_edge_eo1_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); ++@@ -185,16 +185,16 @@ static void ff_hevc_sao_band_neon_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_ ++ ++ switch(width){ ++ case 8: ++- ff_hevc_sao_band_w8_neon_8(_dst, _src, stride_dst, stride_src, height, offset_table); +++ ff_hevc_sao_band_w8_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); ++ break; ++ case 16: ++- ff_hevc_sao_band_w16_neon_8(_dst, _src, stride_dst, stride_src, height, offset_table); +++ ff_hevc_sao_band_w16_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); ++ break; ++ case 32: ++- ff_hevc_sao_band_w32_neon_8(_dst, _src, stride_dst, stride_src, height, offset_table); +++ ff_hevc_sao_band_w32_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); ++ break; ++ case 64: ++- ff_hevc_sao_band_w64_neon_8(_dst, _src, stride_dst, stride_src, height, offset_table); +++ ff_hevc_sao_band_w64_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); ++ break; ++ default: ++ for (y = 0; y < height; y++) { ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++index ac21013..8852550 100644 ++--- a/libavcodec/arm/hevcdsp_sao_neon.S +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -21,53 +21,13 @@ ++ #include "libavutil/arm/asm.S" ++ #include "neon.S" ++ ++-function ff_hevc_sao_band_w8_neon_8, export=1 ++- ldr r12, [sp, #4] // offset_table address +++.macro init_sao_band +++ ldr r12, [sp, #0] // offset_table address ++ vld1.8 {q0, q1}, [r12] // offset table ++- ldr r12, [sp, #0] // height ++- ++-1: subs r12, #1 ++- vld1.8 {d24}, [r1,:64], r3 ++- vshr.u8 d16, d24, #3 ++- vtbl.8 d16, {q0, q1}, d16 ++- vmovl.u8 q6, d24 ++- vaddw.s8 q6, d16 ++- vqmovun.s16 d4, q2 ++- vst1.8 {d4}, [r0,:64], r2 ++- bne 1b ++- ++- bx lr ++-endfunc ++- ++-function ff_hevc_sao_band_w16_neon_8, export=1 ++- ldr r12, [sp, #4] // offset_table address ++- vld1.8 {q0, q1}, [r12] // offset table ++- ldr r12, [sp, #0] // height ++- ++-1: subs r12, #1 ++- vld1.8 {q12}, [r1,:128], r3 ++- vshr.u8 q8, q12, #3 ++- vtbl.8 d16, {q0, q1}, d16 ++- vtbl.8 d17, {q0, q1}, d17 ++- vmovl.u8 q10, d24 ++- vmovl.u8 q11, d25 ++- vaddw.s8 q10, d16 ++- vaddw.s8 q11, d17 ++- vqmovun.s16 d4, q10 ++- vqmovun.s16 d5, q11 ++- vst1.8 {q2}, [r0,:128], r2 ++- bne 1b ++- ++- bx lr ++-endfunc ++- ++-function ff_hevc_sao_band_w32_neon_8, export=1 ++- ldr r12, [sp, #4] // offset_table address ++- vld1.8 {q0, q1}, [r12] // offset table ++- ldr r12, [sp, #0] // height +++ ldr r12, [sp, #4] // height +++.endm ++ ++-1: subs r12, #1 ++- vld1.8 {q2-q3}, [r1,:128], r3 +++.macro sao_band_32 ++ vshr.u8 q8, q2, #3 ++ vshr.u8 q9, q3, #3 ++ vtbl.8 d16, {q0, q1}, d16 ++@@ -86,6 +46,43 @@ function ff_hevc_sao_band_w32_neon_8, export=1 ++ vqmovun.s16 d5, q13 ++ vqmovun.s16 d6, q14 ++ vqmovun.s16 d7, q15 +++.endm +++ +++function ff_hevc_sao_band_w8_neon_8, export=1 +++ init_sao_band +++1: subs r12, #4 +++ vld1.8 {d4}, [r1,:64], r3 +++ vld1.8 {d5}, [r1,:64], r3 +++ vld1.8 {d6}, [r1,:64], r3 +++ vld1.8 {d7}, [r1,:64], r3 +++ sao_band_32 +++ vst1.8 {d4}, [r0,:64], r2 +++ vst1.8 {d5}, [r0,:64], r2 +++ vst1.8 {d6}, [r0,:64], r2 +++ vst1.8 {d7}, [r0,:64], r2 +++ bne 1b +++ +++ bx lr +++endfunc +++ +++function ff_hevc_sao_band_w16_neon_8, export=1 +++ init_sao_band +++1: subs r12, #2 +++ vld1.8 {q2}, [r1,:128], r3 +++ vld1.8 {q3}, [r1,:128], r3 +++ sao_band_32 +++ vst1.8 {q2}, [r0,:128], r2 +++ vst1.8 {q3}, [r0,:128], r2 +++ bne 1b +++ +++ bx lr +++endfunc +++ +++function ff_hevc_sao_band_w32_neon_8, export=1 +++ init_sao_band +++1: subs r12, #1 +++ vld1.8 {q2-q3}, [r1,:128], r3 +++ sao_band_32 ++ vst1.8 {q2-q3}, [r0,:128], r2 ++ bne 1b ++ ++@@ -93,63 +90,17 @@ function ff_hevc_sao_band_w32_neon_8, export=1 ++ endfunc ++ ++ function ff_hevc_sao_band_w64_neon_8, export=1 ++- ldr r12, [sp, #4] // offset_table address ++- vld1.8 {q0, q1}, [r12] // offset table ++- ldr r12, [sp, #0] // height ++- ++-1: subs r12, #1 ++- vld1.8 {q12-q13}, [r1,:128]! ++- vld1.8 {q14-q15}, [r1,:128], r3 ++- sub r1, #32 ++- ++- vshr.u8 q8, q12, #3 ++- vshr.u8 q9, q13, #3 ++- vshr.u8 q10, q14, #3 ++- vshr.u8 q11, q15, #3 ++- ++- vtbl.8 d16, {q0, q1}, d16 ++- vtbl.8 d17, {q0, q1}, d17 ++- vtbl.8 d18, {q0, q1}, d18 ++- vtbl.8 d19, {q0, q1}, d19 ++- vtbl.8 d20, {q0, q1}, d20 ++- vtbl.8 d21, {q0, q1}, d21 ++- vtbl.8 d22, {q0, q1}, d22 ++- vtbl.8 d23, {q0, q1}, d23 ++- ++- vmovl.u8 q2, d24 ++- vmovl.u8 q3, d25 ++- vmovl.u8 q12, d26 ++- vmovl.u8 q13, d27 ++- ++- vaddw.s8 q2, d16 ++- vaddw.s8 q3, d17 ++- vaddw.s8 q12, d18 ++- vaddw.s8 q13, d19 ++- ++- vqmovun.s16 d4, q2 ++- vqmovun.s16 d5, q3 ++- vqmovun.s16 d6, q12 ++- vqmovun.s16 d7, q13 ++- ++- vmovl.u8 q12, d28 ++- vmovl.u8 q13, d29 ++- vmovl.u8 q14, d30 ++- vmovl.u8 q15, d31 ++- ++- vaddw.s8 q12, d20 ++- vaddw.s8 q13, d21 ++- vaddw.s8 q14, d22 ++- vaddw.s8 q15, d23 ++- ++- vqmovun.s16 d8, q12 ++- vqmovun.s16 d9, q13 ++- vqmovun.s16 d10, q14 ++- vqmovun.s16 d11, q15 ++- ++- vst1.8 {q2-q3}, [r0,:128]! ++- vst1.8 {q4-q5}, [r0,:128], r2 ++- sub r0, #32 ++- bne 1b +++ init_sao_band +++1: subs r12, #1 +++ vld1.8 {q2-q3}, [r1,:128]! +++ sao_band_32 +++ vst1.8 {q2-q3}, [r0,:128]! +++ vld1.8 {q2-q3}, [r1,:128], r3 +++ sub r1, #32 +++ sao_band_32 +++ vst1.8 {q2-q3}, [r0,:128], r2 +++ sub r0, #32 +++ bne 1b ++ ++ bx lr ++ endfunc ++-- ++2.5.0 ++ ++ ++From f93646a97bc885b81759e774d04be3781916a3e7 Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi <seppo.tomperi@vtt.fi> ++Date: Wed, 7 Jan 2015 15:27:38 +0200 ++Subject: [PATCH 6/9] More SAO NEON optimizations Now uses only 8 bit integers ++ for SAO calculations ++ ++--- ++ libavcodec/arm/hevcdsp_init_neon.c | 7 +- ++ libavcodec/arm/hevcdsp_sao_neon.S | 664 +++++++++++++++---------------------- ++ 2 files changed, 272 insertions(+), 399 deletions(-) ++ ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index 6379810..8d6e863 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -225,7 +225,7 @@ static void ff_hevc_sao_edge_neon_wrapper(uint8_t *_dst /* align 16 */, uint8_t ++ int x, y; ++ ++ for (x = 0; x < 5; x++) { ++- sao_offset_val[x] = _sao_offset_val[x]; +++ sao_offset_val[x] = _sao_offset_val[edge_idx[x]]; ++ } ++ ++ stride_src /= sizeof(pixel); ++@@ -271,8 +271,9 @@ static void ff_hevc_sao_edge_neon_wrapper(uint8_t *_dst /* align 16 */, uint8_t ++ for (x = 0; x < width; x++) { ++ int diff0 = CMP(src[x], src[x + a_stride]); ++ int diff1 = CMP(src[x], src[x + b_stride]); ++- int offset_val = edge_idx[2 + diff0 + diff1]; ++- dst[x] = av_clip_pixel(src[x] + sao_offset_val[offset_val]); +++ int idx = diff0 + diff1; +++ if (idx) +++ dst[x] = av_clip_pixel(src[x] + sao_offset_val[idx+2]); ++ } ++ src += stride_src; ++ dst += stride_dst; ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++index 8852550..5fc482b 100644 ++--- a/libavcodec/arm/hevcdsp_sao_neon.S +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -1,5 +1,5 @@ ++ /* ++- * Copyright (c) 2014 Seppo Tomperi <seppo.tomperi@vtt.fi> +++ * Copyright (c) 2014 - 2015 Seppo Tomperi <seppo.tomperi@vtt.fi> ++ * ++ * This file is part of FFmpeg. ++ * ++@@ -23,6 +23,7 @@ ++ ++ .macro init_sao_band ++ ldr r12, [sp, #0] // offset_table address +++ pld [r1] ++ vld1.8 {q0, q1}, [r12] // offset table ++ ldr r12, [sp, #4] // height ++ .endm ++@@ -30,36 +31,31 @@ ++ .macro sao_band_32 ++ vshr.u8 q8, q2, #3 ++ vshr.u8 q9, q3, #3 +++ vmov.u8 q14, #128 ++ vtbl.8 d16, {q0, q1}, d16 ++ vtbl.8 d17, {q0, q1}, d17 ++ vtbl.8 d18, {q0, q1}, d18 ++ vtbl.8 d19, {q0, q1}, d19 ++- vmovl.u8 q12, d4 ++- vmovl.u8 q13, d5 ++- vmovl.u8 q14, d6 ++- vmovl.u8 q15, d7 ++- vaddw.s8 q12, d16 ++- vaddw.s8 q13, d17 ++- vaddw.s8 q14, d18 ++- vaddw.s8 q15, d19 ++- vqmovun.s16 d4, q12 ++- vqmovun.s16 d5, q13 ++- vqmovun.s16 d6, q14 ++- vqmovun.s16 d7, q15 +++ vadd.s8 q2, q14 +++ vadd.s8 q3, q14 +++ vqadd.s8 q2, q8 +++ vqadd.s8 q3, q9 +++ vsub.s8 q2, q14 +++ vsub.s8 q3, q14 ++ .endm ++ ++ function ff_hevc_sao_band_w8_neon_8, export=1 ++ init_sao_band ++ 1: subs r12, #4 ++- vld1.8 {d4}, [r1,:64], r3 ++- vld1.8 {d5}, [r1,:64], r3 ++- vld1.8 {d6}, [r1,:64], r3 ++- vld1.8 {d7}, [r1,:64], r3 +++ vld1.8 {d4}, [r1, :64], r3 +++ vld1.8 {d5}, [r1, :64], r3 +++ vld1.8 {d6}, [r1, :64], r3 +++ vld1.8 {d7}, [r1, :64], r3 ++ sao_band_32 ++- vst1.8 {d4}, [r0,:64], r2 ++- vst1.8 {d5}, [r0,:64], r2 ++- vst1.8 {d6}, [r0,:64], r2 ++- vst1.8 {d7}, [r0,:64], r2 +++ vst1.8 {d4}, [r0, :64], r2 +++ vst1.8 {d5}, [r0, :64], r2 +++ vst1.8 {d6}, [r0, :64], r2 +++ vst1.8 {d7}, [r0, :64], r2 ++ bne 1b ++ ++ bx lr ++@@ -68,11 +64,11 @@ endfunc ++ function ff_hevc_sao_band_w16_neon_8, export=1 ++ init_sao_band ++ 1: subs r12, #2 ++- vld1.8 {q2}, [r1,:128], r3 ++- vld1.8 {q3}, [r1,:128], r3 +++ vld1.8 {q2}, [r1, :128], r3 +++ vld1.8 {q3}, [r1, :128], r3 ++ sao_band_32 ++- vst1.8 {q2}, [r0,:128], r2 ++- vst1.8 {q3}, [r0,:128], r2 +++ vst1.8 {q2}, [r0, :128], r2 +++ vst1.8 {q3}, [r0, :128], r2 ++ bne 1b ++ ++ bx lr ++@@ -81,9 +77,9 @@ endfunc ++ function ff_hevc_sao_band_w32_neon_8, export=1 ++ init_sao_band ++ 1: subs r12, #1 ++- vld1.8 {q2-q3}, [r1,:128], r3 +++ vld1.8 {q2-q3}, [r1, :128], r3 ++ sao_band_32 ++- vst1.8 {q2-q3}, [r0,:128], r2 +++ vst1.8 {q2-q3}, [r0, :128], r2 ++ bne 1b ++ ++ bx lr ++@@ -92,263 +88,153 @@ endfunc ++ function ff_hevc_sao_band_w64_neon_8, export=1 ++ init_sao_band ++ 1: subs r12, #1 ++- vld1.8 {q2-q3}, [r1,:128]! +++ pld [r1, r3] +++ vld1.8 {q2-q3}, [r1, :128]! ++ sao_band_32 ++- vst1.8 {q2-q3}, [r0,:128]! ++- vld1.8 {q2-q3}, [r1,:128], r3 +++ vst1.8 {q2-q3}, [r0, :128]! +++ vld1.8 {q2-q3}, [r1, :128], r3 ++ sub r1, #32 ++ sao_band_32 ++- vst1.8 {q2-q3}, [r0,:128], r2 +++ vst1.8 {q2-q3}, [r0, :128], r2 ++ sub r0, #32 ++ bne 1b ++ ++ bx lr ++ endfunc ++- +++// input +++// a in q0 - q3 +++// c in q4 - q7 +++// b in q8 - q11 +++// offset table in r7 and r5 +++// output in q0 - q3 +++// clobbers q12 - q15 ++ .macro edge_w64_body ++- vcgt.u8 q12, q4, q0 // c > a -> -1 , otherwise 0 ++- vcgt.u8 q0, q0, q4 // a > c -> -1 , otherwise 0 ++- vcgt.u8 q13, q5, q1 ++- vcgt.u8 q1, q1, q5 ++- vcgt.u8 q14, q6, q2 ++- vcgt.u8 q2, q2, q6 ++- vcgt.u8 q15, q7, q3 ++- vcgt.u8 q3, q3, q7 ++- ++- vsub.s8 q12, q0, q12 // diff0 ++- vsub.s8 q13, q1, q13 ++- vsub.s8 q14, q2, q14 ++- vsub.s8 q15, q3, q15 ++- +++ vcgt.u8 q12, q4, q0 // c > a -> -1 , otherwise 0 +++ vcgt.u8 q0, q0, q4 // a > c -> -1 , otherwise 0 +++ vcgt.u8 q13, q5, q1 +++ vcgt.u8 q1, q1, q5 +++ vsub.s8 q12, q0, q12 // diff0 ++ vcgt.u8 q0, q4, q8 // c > b ++- vcgt.u8 q8, q8, q4 // b > c +++ vsub.s8 q13, q1, q13 +++ +++ vcgt.u8 q14, q8, q4 // b > c ++ vcgt.u8 q1, q5, q9 ++- vcgt.u8 q9, q9, q5 ++- vcgt.u8 q2, q6, q10 ++- vcgt.u8 q10, q10, q6 ++- vcgt.u8 q3, q7, q11 ++- vcgt.u8 q11, q11, q7 +++ vcgt.u8 q15, q9, q5 +++ vsub.s8 q0, q14, q0 // diff1 ++ ++- vsub.s8 q0, q8, q0 // diff1 ++- vsub.s8 q1, q9, q1 ++- vsub.s8 q2, q10, q2 ++- vsub.s8 q3, q11, q3 +++ vsub.s8 q1, q15, q1 ++ ++- vadd.s8 q0, q12 //diff0 + diff1 ++- vadd.s8 q1, q13 ++- vadd.s8 q2, q14 ++- vadd.s8 q3, q15 ++- ++- vdup.s8 q9, r6 // 3 to all elements ++- sub r6, #1 ++- ++- vclt.s8 q12, q0, #0 // diff0 + diff1 < 0 ++- vclt.s8 q13, q1, #0 ++- vclt.s8 q14, q2, #0 ++- vclt.s8 q15, q3, #0 ++- ++- vadd.s8 q8, q0, q9 // diff0 + diff1 + 3 ++- vadd.s8 q10, q1, q9 ++- vand.8 q12, q8, q12 // if (diff0 + diff1 < 0) then (diff0 + diff1 + 3) else 0 ++- vand.8 q13, q10, q13 ++- vadd.s8 q8, q2, q9 ++- vadd.s8 q10, q3, q9 ++- vand.8 q14, q8, q14 ++- vand.8 q15, q10, q15 ++- ++- vdup.s8 q9, r6 // 2 to all elements ++- add r6, #1 ++- ++- vcgt.s8 q10, q0, #0 // diff0 + diff1 > 0 ++- vadd.s8 q8, q0, q9 // diff0 + diff1 + 2 ++- vand.8 q11, q8, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 ++- vcgt.s8 q10, q1, #0 ++- vadd.s8 q0, q11, q12 // offset_idx ++- ++- vadd.s8 q8, q1, q9 // diff0 + diff1 + 2 ++- vcgt.s8 q12, q2, #0 ++- vand.8 q11, q8, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 ++- vadd.s8 q8, q2, q9 // diff0 + diff1 + 2 ++- vadd.s8 q1, q11, q13 ++- ++- vand.8 q11, q8, q12 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 ++- vcgt.s8 q10, q3, #0 ++- vadd.s8 q2, q11, q14 ++- ++- vadd.s8 q8, q3, q9 // diff0 + diff1 + 2 ++- vmov.32 d18[0], r7 // load offset table from general registers ++- vand.8 q11, q8, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 ++- vmov.32 d18[1], r5 // load rest of offset table ++- vadd.s8 q3, q11, q15 ++- ++- vtbl.8 d0, {d18}, d0 ++- vtbl.8 d1, {d18}, d1 ++- vtbl.8 d2, {d18}, d2 ++- vtbl.8 d3, {d18}, d3 ++- vtbl.8 d4, {d18}, d4 ++- vtbl.8 d5, {d18}, d5 ++- vtbl.8 d6, {d18}, d6 ++- vtbl.8 d7, {d18}, d7 ++- ++- vmovl.u8 q8, d8 ++- vmovl.u8 q9, d9 ++- vmovl.u8 q10, d10 ++- vmovl.u8 q11, d11 ++- vmovl.u8 q12, d12 ++- vmovl.u8 q13, d13 ++- vmovl.u8 q14, d14 ++- vmovl.u8 q15, d15 ++- ++- vaddw.s8 q8, d0 ++- vaddw.s8 q9, d1 ++- vaddw.s8 q10, d2 ++- vaddw.s8 q11, d3 ++- vaddw.s8 q12, d4 ++- vaddw.s8 q13, d5 ++- vaddw.s8 q14, d6 ++- vaddw.s8 q15, d7 ++- ++- vqmovun.s16 d0, q8 ++- vqmovun.s16 d1, q9 ++- vqmovun.s16 d2, q10 ++- vqmovun.s16 d3, q11 ++- vqmovun.s16 d4, q12 ++- vqmovun.s16 d5, q13 ++- vqmovun.s16 d6, q14 ++- vqmovun.s16 d7, q15 ++- ++- vstm r0, {q0-q3} ++- add r0, r2 ++-.endm +++ vadd.s8 q0, q12 //diff0 + diff1 +++ vadd.s8 q1, q13 ++ ++-.macro edge_w32_body ++- vcgt.u8 q12, q4, q0 // c > a -> -1 , otherwise 0 ++- vcgt.u8 q0, q0, q4 // a > c -> -1 , otherwise 0 ++- vcgt.u8 q13, q5, q1 ++- vcgt.u8 q1, q1, q5 +++ vcgt.u8 q14, q6, q2 +++ vcgt.u8 q2, q2, q6 +++ vcgt.u8 q15, q7, q3 +++ vcgt.u8 q3, q3, q7 ++ ++- vsub.s8 q12, q0, q12 // diff0 ++- vcgt.u8 q0, q4, q8 // c > b ++- vsub.s8 q13, q1, q13 // diff0 part 2 +++ vsub.s8 q14, q2, q14 +++ vcgt.u8 q2, q6, q10 +++ vsub.s8 q15, q3, q15 ++ ++- vcgt.u8 q6, q8, q4 // b > c ++- vcgt.u8 q1, q5, q9 ++- vcgt.u8 q7, q9, q5 +++ vcgt.u8 q12, q10, q6 +++ vcgt.u8 q3, q7, q11 +++ vcgt.u8 q13, q11, q7 +++ vsub.s8 q2, q12, q2 +++ vsub.s8 q3, q13, q3 ++ ++- vsub.s8 q0, q6, q0 // diff1 ++- vsub.s8 q1, q7, q1 // diff1 part 2 ++- vadd.s8 q0, q12 //diff0 + diff1 +++ vmov.s8 q13, #2 // 2 to all elements ++ ++- vdup.s8 q7, r6 // 3 to all elements ++- sub r6, #1 ++- vadd.s8 q1, q13 +++ vadd.s8 q2, q14 +++ vadd.s8 q3, q15 +++ +++ vmov.32 d24[0], r4 // load offset table from general registers +++ vmov.32 d24[1], r5 // load rest of offset table ++ ++- vclt.s8 q12, q0, #0 // diff0 + diff1 < 0 ++- vclt.s8 q13, q1, #0 ++- ++- vadd.s8 q6, q0, q7 // diff0 + diff1 + 3 ++- vadd.s8 q10, q1, q7 ++- vdup.s8 q7, r6 // 2 to all elements ++- add r6, #1 ++- vand.8 q12, q6, q12 // if (diff0 + diff1 < 0) then (diff0 + diff1 + 3) else 0 ++- vand.8 q13, q10, q13 ++- ++- ++- vcgt.s8 q10, q0, #0 // diff0 + diff1 > 0 ++- vadd.s8 q6, q0, q7 // diff0 + diff1 + 2 ++- vand.8 q11, q6, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 ++- vcgt.s8 q10, q1, #0 ++- vadd.s8 q0, q11, q12 // offset_idx ++- ++- vadd.s8 q6, q1, q7 // diff0 + diff1 + 2 ++- vmov.32 d14[0], r7 // load offset table from general registers ++- vand.8 q11, q6, q10 // if (diff0 + diff1 > 0) then (diff0 + diff1 + 2) else 0 ++- vmov.32 d14[1], r5 // load rest of offset table ++- vadd.s8 q1, q11, q13 ++- ++- vtbl.8 d0, {d14}, d0 ++- vtbl.8 d1, {d14}, d1 ++- vtbl.8 d2, {d14}, d2 ++- vtbl.8 d3, {d14}, d3 ++- ++- vmovl.u8 q6, d8 ++- vmovl.u8 q7, d9 ++- vmovl.u8 q10, d10 ++- vmovl.u8 q11, d11 ++- ++- vaddw.s8 q6, d0 ++- vaddw.s8 q7, d1 ++- vaddw.s8 q10, d2 ++- vaddw.s8 q11, d3 ++- ++- vqmovun.s16 d0, q6 ++- vqmovun.s16 d1, q7 ++- vqmovun.s16 d2, q10 ++- vqmovun.s16 d3, q11 ++- ++- vstm r0, {q0-q1} ++- add r0, r2 +++ vadd.s8 q0, q13 +++ vadd.s8 q1, q13 +++ vadd.s8 q2, q13 +++ vadd.s8 q3, q13 +++ +++ vmov.u8 q15, #128 // s8 #-128 +++ vtbl.8 d0, {d24}, d0 +++ vtbl.8 d1, {d24}, d1 +++ vtbl.8 d2, {d24}, d2 +++ vtbl.8 d3, {d24}, d3 +++ vtbl.8 d4, {d24}, d4 +++ vtbl.8 d5, {d24}, d5 +++ vtbl.8 d6, {d24}, d6 +++ vtbl.8 d7, {d24}, d7 +++ +++ vadd.s8 q12, q4, q15 +++ vadd.s8 q13, q5, q15 +++ vadd.s8 q14, q6, q15 +++ vadd.s8 q15, q7, q15 +++ vqadd.s8 q12, q0 +++ vqadd.s8 q15, q3 +++ vmov.u8 q3, #128 // s8 #-128 +++ vqadd.s8 q13, q1 +++ vqadd.s8 q14, q2 +++ vsub.s8 q0, q12, q3 +++ vsub.s8 q1, q13, q3 +++ vsub.s8 q2, q14, q3 +++ vsub.s8 q3, q15, q3 +++ vst1.8 {q0-q1}, [r0, :128]! +++ vst1.8 {q2-q3}, [r0, :128], r2 +++ sub r0, #32 ++ .endm ++ ++-function ff_hevc_sao_edge_eo0_w64_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x03 ++- ldr r7, [r5] +++.macro init_edge_64 +++ push {r4-r5} +++ ldr r12, [sp, #8] // height +++ ldr r5, [sp, #12] // sao_offset_val_table +++ ldr r4, [r5] ++ add r5, #4 ++ ldr r5, [r5] +++.endm +++ +++function ff_hevc_sao_edge_eo0_w64_neon_8, export=1 +++ init_edge_64 ++ vpush {d8-d15} ++ sub r1, #8 ++-1: subs r4, #1 ++- vld1.64 {q10-q11}, [r1]! ++- vld1.64 {q12-q13}, [r1]! ++- vld1.64 {q14}, [r1], r3 ++- sub r1, #64 +++1: subs r12, #1 +++ vld1.64 {d7}, [r1, :64]! +++ vld1.64 {q4-q5}, [r1, :128]! // load c +++ vld1.64 {q6-q7}, [r1, :128]! +++ vld1.64 {d24}, [r1, :64], r3 +++ sub r1, #72 ++ // load a ++- vext.8 q0, q10, q11, #7 ++- vext.8 q1, q11, q12, #7 ++- vext.8 q2, q12, q13, #7 ++- vext.8 q3, q13, q14, #7 ++- // load c ++- vext.8 q4, q10, q11, #8 ++- vext.8 q5, q11, q12, #8 ++- vext.8 q6, q12, q13, #8 ++- vext.8 q7, q13, q14, #8 +++ vext.8 q0, q3, q4, #15 +++ vext.8 q1, q4, q5, #15 +++ vext.8 q2, q5, q6, #15 +++ vext.8 q3, q6, q7, #15 ++ // load b ++- vext.8 q8, q10, q11, #9 ++- vext.8 q9, q11, q12, #9 ++- vext.8 q10, q12, q13, #9 ++- vext.8 q11, q13, q14, #9 +++ vext.8 q8, q4, q5, #1 +++ vext.8 q9, q5, q6, #1 +++ vext.8 q10, q6, q7, #1 +++ vext.8 q11, q7, q12, #1 ++ edge_w64_body ++ bne 1b ++ vpop {d8-d15} ++- pop {r4-r8} +++ pop {r4-r5} ++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_edge_eo1_w64_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x03 ++- ldr r7, [r5] ++- add r5, #4 ++- ldr r5, [r5] +++ init_edge_64 ++ vpush {d8-d15} ++ sub r1, r3 ++ // load a ++- vld1.8 {q0-q1}, [r1]! ++- vld1.8 {q2-q3}, [r1], r3 +++ vld1.8 {q0-q1}, [r1, :128]! +++ vld1.8 {q2-q3}, [r1, :128], r3 ++ sub r1, #32 ++-1: subs r4, #1 ++ // load c ++- vld1.8 {q4-q5}, [r1]! ++- vld1.8 {q6-q7}, [r1], r3 +++ vld1.8 {q4-q5}, [r1, :128]! +++ vld1.8 {q6-q7}, [r1, :128], r3 ++ sub r1, #32 +++1: subs r12, #1 ++ // load b ++- vld1.8 {q8-q9}, [r1]! ++- vld1.8 {q10-q11}, [r1] +++ vld1.8 {q8-q9}, [r1, :128]! +++ vld1.8 {q10-q11}, [r1, :128], r3 ++ sub r1, #32 ++ edge_w64_body ++ // copy c to a ++@@ -356,20 +242,19 @@ function ff_hevc_sao_edge_eo1_w64_neon_8, export=1 ++ vmov.64 q1, q5 ++ vmov.64 q2, q6 ++ vmov.64 q3, q7 +++ // copy b to c +++ vmov.64 q4, q8 +++ vmov.64 q5, q9 +++ vmov.64 q6, q10 +++ vmov.64 q7, q11 ++ bne 1b ++ vpop {d8-d15} ++- pop {r4-r8} +++ pop {r4-r5} ++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_edge_eo2_w64_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x03 ++- ldr r7, [r5] ++- add r5, #4 ++- ldr r5, [r5] +++ init_edge_64 ++ vpush {d8-d15} ++ 1: sub r1, r3 ++ // load a ++@@ -379,10 +264,10 @@ function ff_hevc_sao_edge_eo2_w64_neon_8, export=1 ++ vld1.8 {q0-q1}, [r1]! ++ vld1.8 {q2-q3}, [r1], r3 ++ sub r1, #31 ++- subs r4, #1 +++ subs r12, #1 ++ // load c ++- vld1.8 {q4-q5}, [r1]! ++- vld1.8 {q6-q7}, [r1], r3 +++ vld1.8 {q4-q5}, [r1, :128]! +++ vld1.8 {q6-q7}, [r1, :128], r3 ++ sub r1, #32 ++ // load b ++ add r1, #1 ++@@ -390,25 +275,14 @@ function ff_hevc_sao_edge_eo2_w64_neon_8, export=1 ++ vld1.8 {q10-q11}, [r1] ++ sub r1, #33 ++ edge_w64_body ++- // copy c to a ++- vmov.64 q0, q4 ++- vmov.64 q1, q5 ++- vmov.64 q2, q6 ++- vmov.64 q3, q7 ++ bne 1b ++ vpop {d8-d15} ++- pop {r4-r8} +++ pop {r4-r5} ++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_edge_eo3_w64_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x03 ++- ldr r7, [r5] ++- add r5, #4 ++- ldr r5, [r5] +++ init_edge_64 ++ vpush {d8-d15} ++ 1: sub r1, r3 ++ // load a ++@@ -418,10 +292,10 @@ function ff_hevc_sao_edge_eo3_w64_neon_8, export=1 ++ vld1.8 {q0-q1}, [r1]! ++ vld1.8 {q2-q3}, [r1], r3 ++ sub r1, #33 ++- subs r4, #1 +++ subs r12, #1 ++ // load c ++- vld1.8 {q4-q5}, [r1]! ++- vld1.8 {q6-q7}, [r1], r3 +++ vld1.8 {q4-q5}, [r1, :128]! +++ vld1.8 {q6-q7}, [r1, :128], r3 ++ sub r1, #32 ++ // load b ++ sub r1, #1 ++@@ -429,178 +303,176 @@ function ff_hevc_sao_edge_eo3_w64_neon_8, export=1 ++ vld1.8 {q10-q11}, [r1] ++ sub r1, #31 ++ edge_w64_body ++- // copy c to a ++- vmov.64 q0, q4 ++- vmov.64 q1, q5 ++- vmov.64 q2, q6 ++- vmov.64 q3, q7 ++ bne 1b ++ vpop {d8-d15} ++- pop {r4-r8} +++ pop {r4-r5} ++ bx lr ++ endfunc ++ +++// inputs: +++// a in q0, q1 +++// c in q2, q3 +++// b in q8, q9 +++// offset table in d31 +++// clobbered registers q0, q1, q10, q11, q12, q13 +++// output q0, q1 +++.macro edge_w32_body +++ vcgt.u8 q12, q2, q0 // c > a -> -1 , otherwise 0 +++ vcgt.u8 q0, q0, q2 // a > c -> -1 , otherwise 0 +++ vcgt.u8 q13, q3, q1 +++ vcgt.u8 q1, q1, q3 +++ +++ vsub.s8 q12, q0, q12 // diff0 +++ vcgt.u8 q0, q2, q8 // c > b +++ vsub.s8 q13, q1, q13 // diff0 part 2 +++ +++ vcgt.u8 q10, q8, q2 // b > c +++ vcgt.u8 q1, q3, q9 +++ vcgt.u8 q11, q9, q3 +++ +++ vsub.s8 q0, q10, q0 // diff1 +++ +++ vmov.s8 q10, #2 // 2 to all elements +++ vsub.s8 q1, q11, q1 // diff1 part 2 +++ vadd.s8 q0, q12 //diff0 + diff1 +++ vadd.s8 q1, q13 +++ +++ vadd.s8 q0, q10 +++ vadd.s8 q1, q10 +++ +++ vmov.u8 q10, #128 +++ vtbl.8 d0, {d31}, d0 +++ vtbl.8 d1, {d31}, d1 +++ vtbl.8 d2, {d31}, d2 +++ vtbl.8 d3, {d31}, d3 +++ +++ vadd.s8 q11, q2, q10 +++ vadd.s8 q12, q3, q10 +++ vqadd.s8 q11, q0 +++ vqadd.s8 q12, q1 +++ vsub.s8 q0, q11, q10 +++ vsub.s8 q1, q12, q10 +++ vst1.8 {q0-q1}, [r0, :128], r2 +++.endm +++ +++.macro init_edge_32 +++ ldr r12, [sp, #4] // sao_offset_val_table +++ vld1.32 {d31}, [r12] +++ ldr r12, [sp] // height +++.endm +++ ++ function ff_hevc_sao_edge_eo0_w32_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x03 ++- ldr r7, [r5] ++- add r5, #4 ++- ldr r5, [r5] ++- vpush {d8-d15} ++- sub r1, #8 // load 8 extra bytes ++-1: subs r4, #1 ++- vld1.8 {q10-q11}, [r1] ++- add r1, #32 ++- vld1.8 {q12}, [r1], r3 // only first 9 bytes are used ++- sub r1, #32 +++ init_edge_32 +++ sub r1, #4 // load 4 extra bytes +++1: subs r12, #1 +++ vld1.32 d3[1], [r1]! +++ vld1.8 {q2-q3}, [r1, :128]! // c +++ vld1.32 d20[0], [r1], r3 +++ sub r1, #36 ++ // a ++- vext.8 q0, q10, q11, #7 ++- vext.8 q1, q11, q12, #7 ++- // c ++- vext.8 q4, q10, q11, #8 ++- vext.8 q5, q11, q12, #8 +++ vext.8 q0, q1, q2, #15 +++ vext.8 q1, q2, q3, #15 ++ // b ++- vext.8 q8, q10, q11, #9 ++- vext.8 q9, q11, q12, #9 +++ vext.8 q8, q2, q3, #1 +++ vext.8 q9, q3, q10, #1 ++ edge_w32_body ++- bne 1b ++- vpop {d8-d15} ++- pop {r4-r8} ++- bx lr +++ bne 1b +++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_edge_eo1_w32_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x03 ++- ldr r7, [r5] ++- add r5, #4 ++- ldr r5, [r5] ++- vpush {d8-d15} +++ init_edge_32 ++ // load a ++ sub r1, r3 ++- vld1.8 {q0-q1}, [r1], r3 +++ vld1.8 {q0-q1}, [r1, :128], r3 ++ // load c ++- vld1.8 {q4-q5}, [r1], r3 ++-1: subs r4, #1 +++ vld1.8 {q2-q3}, [r1, :128], r3 +++1: subs r12, #1 ++ // load b ++- vld1.8 {q8-q9}, [r1], r3 +++ vld1.8 {q8-q9}, [r1, :128], r3 ++ edge_w32_body ++ // inputs for next loop iteration ++ // a ++- vmov.64 q0, q4 ++- vmov.64 q1, q5 +++ vmov.64 q0, q2 +++ vmov.64 q1, q3 ++ // c ++- vmov.64 q4, q8 ++- vmov.64 q5, q9 ++- bne 1b ++- vpop {d8-d15} ++- pop {r4-r8} ++- bx lr +++ vmov.64 q2, q8 +++ vmov.64 q3, q9 +++ bne 1b +++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_edge_eo2_w32_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x03 ++- ldr r7, [r5] ++- add r5, #4 ++- ldr r5, [r5] ++- vpush {d8-d15} +++ init_edge_32 +++ vpush {d8-d15} ++ // load a ++ sub r1, r3 ++- sub r1, #8 ++- vld1.8 {q10-q11}, [r1] ++- add r1, #32 ++- vld1.8 {q12}, [r1], r3 ++- sub r1, #32 +++ sub r1, #8 +++ vld1.8 {q10-q11}, [r1, :64]! +++ vld1.8 {d24}, [r1, :64], r3 +++ sub r1, #32 ++ vext.8 q0, q10, q11, #7 ++ vext.8 q1, q11, q12, #7 ++ // load c ++- vld1.8 {q10-q11}, [r1] ++- add r1, #32 ++- vld1.8 {q12}, [r1], r3 ++- sub r1, #32 ++- vext.8 q4, q10, q11, #8 ++- vext.8 q5, q11, q12, #8 ++- vext.8 q2, q10, q11, #7 ++-1: subs r4, #1 +++ vld1.8 {d9}, [r1, :64]! +++ vld1.8 {q2-q3}, [r1, :64], r3 +++ sub r1, #8 +++ vext.8 q4, q4, q2, #15 +++1: subs r12, #1 ++ // load b ++- vld1.8 {q10-q11}, [r1] ++- add r1, #32 ++- vld1.8 {q12}, [r1], r3 ++- sub r1, #32 +++ vld1.8 {q10-q11}, [r1, :64]! +++ vld1.8 {q12}, [r1, :64], r3 +++ sub r1, #32 ++ vext.8 q8, q10, q11, #9 ++ vext.8 q9, q11, q12, #9 ++- vext.8 q14, q10, q11, #8 ++- vext.8 q15, q11, q12, #8 ++- vext.8 q3, q10, q11, #7 +++ vext.8 q6, q10, q11, #8 +++ vext.8 q7, q11, q12, #8 +++ vext.8 q5, q10, q11, #7 ++ edge_w32_body ++ // inputs for next loop iteration ++ // a ++- vmov.8 q0, q2 ++- vext.8 q1, q4, q5, #15 +++ vmov.8 q0, q4 +++ vext.8 q1, q2, q3, #15 ++ // c ++- vmov.8 q4, q14 ++- vmov.8 q5, q15 ++- vmov.8 q2, q3 ++- bne 1b ++- vpop {d8-d15} ++- pop {r4-r8} ++- bx lr +++ vmov.8 q2, q6 +++ vmov.8 q3, q7 +++ vmov.8 q4, q5 +++ bne 1b +++ vpop {d8-d15} +++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_edge_eo3_w32_neon_8, export=1 ++- push {r4-r8} ++- ldr r4, [sp, #20] // height ++- ldr r5, [sp, #24] // sao_offset_val_table ++- ldr r6, =0x03 ++- ldr r7, [r5] ++- add r5, #4 ++- sub r1, r3 ++- ldr r5, [r5] ++- sub r1, #8 ++- vpush {d8-d15} +++ init_edge_32 +++ sub r1, r3 ++ // load a ++- vld1.8 {q10-q11}, [r1] ++- add r1, #32 ++- vld1.8 {q12}, [r1], r3 ++- sub r1, #32 ++- vext.8 q0, q10, q11, #9 ++- vext.8 q1, q11, q12, #9 +++ vld1.8 {q10-q11}, [r1, :64]! +++ vld1.8 {d24}, [r1, :64], r3 +++ sub r1, #32 +++ vext.8 q0, q10, q11, #1 +++ vext.8 q1, q11, q12, #1 ++ // load c ++- vld1.8 {q10-q11}, [r1] ++- add r1, #32 ++- vld1.8 {q12}, [r1], r3 ++- sub r1, #32 ++- vext.8 q4, q10, q11, #8 ++- vext.8 q5, q11, q12, #8 ++- vext.8 q2, q12, q11, #8 ++-1: subs r4, #1 +++ vld1.8 {q2-q3}, [r1, :64]! +++ vld1.8 {d30}, [r1, :64], r3 +++ sub r1, #40 +++1: subs r12, #1 ++ // load b ++- vld1.8 {q10-q11}, [r1] ++- add r1, #32 ++- vld1.8 {q12}, [r1], r3 ++- sub r1, #32 +++ vld1.8 {q10-q11}, [r1, :64]! +++ vld1.8 {q12}, [r1, :64], r3 +++ sub r1, #32 ++ vext.8 q8, q10, q11, #7 ++ vext.8 q9, q11, q12, #7 ++- vext.8 q3, q12, q10, #7 +++ vext.8 q14, q12, q10, #7 ++ edge_w32_body ++ // inputs for next loop iteration ++ // a ++- vext.8 q0, q4, q5, #1 ++- vext.8 q1, q5, q2, #1 +++ vext.8 q0, q2, q3, #1 +++ vext.8 q1, q3, q15, #1 ++ // c ++- vext.8 q4, q8, q9, #1 ++- vext.8 q5, q9, q3, #1 ++- vext.8 q2, q3, q1, #1 ++- bne 1b ++- vpop {d8-d15} ++- pop {r4-r8} ++- bx lr +++ vext.8 q2, q8, q9, #1 +++ vext.8 q3, q9, q14, #1 +++ vext.8 d30, d28, d2, #1 +++ bne 1b +++ bx lr ++ endfunc ++ ++-- ++2.5.0 ++ ++ ++From 016c39d46b86830204a4519590332d2a38f7ee51 Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi <seppo.tomperi@vtt.fi> ++Date: Thu, 8 Jan 2015 09:58:55 +0200 ++Subject: [PATCH 7/9] small optimization to SAO BAND. correct path for ++ bit_depth_template.c ++ ++--- ++ libavcodec/arm/hevcdsp_init_neon.c | 2 +- ++ libavcodec/arm/hevcdsp_sao_neon.S | 2 +- ++ 2 files changed, 2 insertions(+), 2 deletions(-) ++ ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index 8d6e863..385c35d 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -23,7 +23,7 @@ ++ #include "libavcodec/hevcdsp.h" ++ #include "hevcdsp_arm.h" ++ #include "libavcodec/avcodec.h" ++-#include "../bit_depth_template.c" +++#include "libavcodec/bit_depth_template.c" ++ ++ void ff_hevc_v_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q); ++ void ff_hevc_h_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q); ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++index 5fc482b..710b32b 100644 ++--- a/libavcodec/arm/hevcdsp_sao_neon.S +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -26,12 +26,12 @@ ++ pld [r1] ++ vld1.8 {q0, q1}, [r12] // offset table ++ ldr r12, [sp, #4] // height +++ vmov.u8 q14, #128 ++ .endm ++ ++ .macro sao_band_32 ++ vshr.u8 q8, q2, #3 ++ vshr.u8 q9, q3, #3 ++- vmov.u8 q14, #128 ++ vtbl.8 d16, {q0, q1}, d16 ++ vtbl.8 d17, {q0, q1}, d17 ++ vtbl.8 d18, {q0, q1}, d18 ++-- ++2.5.0 ++ ++ ++From 579f1584d688e1ac24fb7d22697e2a7b64f62e8e Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi <seppo.tomperi@vtt.fi> ++Date: Fri, 9 Jan 2015 10:28:52 +0200 ++Subject: [PATCH 8/9] Added height check for SAO NEON optimizations. Faster SAO ++ band NEON Some reordering to use NEON pipelines more efficiently ++ ++--- ++ libavcodec/arm/hevcdsp_init_neon.c | 12 +++- ++ libavcodec/arm/hevcdsp_sao_neon.S | 142 ++++++++++++++++++++++--------------- ++ 2 files changed, 93 insertions(+), 61 deletions(-) ++ ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index 385c35d..6d0689c 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -176,6 +176,7 @@ static void ff_hevc_sao_band_neon_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_ ++ int8_t offset_table[32] = { 0 }; ++ int k, y, x; ++ int shift = 3; // BIT_DEPTH - 5 +++ int cwidth = 0; ++ ++ stride_src /= sizeof(pixel); ++ stride_dst /= sizeof(pixel); ++@@ -183,7 +184,10 @@ static void ff_hevc_sao_band_neon_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_ ++ for (k = 0; k < 4; k++) ++ offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1]; ++ ++- switch(width){ +++ if (height % 8 == 0) +++ cwidth = width; +++ +++ switch(cwidth){ ++ case 8: ++ ff_hevc_sao_band_w8_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); ++ break; ++@@ -223,15 +227,19 @@ static void ff_hevc_sao_edge_neon_wrapper(uint8_t *_dst /* align 16 */, uint8_t ++ pixel *src = (pixel *)_src; ++ int a_stride, b_stride; ++ int x, y; +++ int cwidth = 0; ++ ++ for (x = 0; x < 5; x++) { ++ sao_offset_val[x] = _sao_offset_val[edge_idx[x]]; ++ } ++ +++ if (height % 8 == 0) +++ cwidth = width; +++ ++ stride_src /= sizeof(pixel); ++ stride_dst /= sizeof(pixel); ++ ++- switch (width) { +++ switch (cwidth) { ++ case 32: ++ switch(eo) { ++ case 0: ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++index 710b32b..08f50b8 100644 ++--- a/libavcodec/arm/hevcdsp_sao_neon.S +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -26,36 +26,59 @@ ++ pld [r1] ++ vld1.8 {q0, q1}, [r12] // offset table ++ ldr r12, [sp, #4] // height ++- vmov.u8 q14, #128 +++ vmov.u8 q3, #128 ++ .endm ++ ++-.macro sao_band_32 ++- vshr.u8 q8, q2, #3 ++- vshr.u8 q9, q3, #3 ++- vtbl.8 d16, {q0, q1}, d16 ++- vtbl.8 d17, {q0, q1}, d17 ++- vtbl.8 d18, {q0, q1}, d18 ++- vtbl.8 d19, {q0, q1}, d19 ++- vadd.s8 q2, q14 ++- vadd.s8 q3, q14 ++- vqadd.s8 q2, q8 ++- vqadd.s8 q3, q9 ++- vsub.s8 q2, q14 ++- vsub.s8 q3, q14 +++// 128 in q3 +++// input q8 - q11 +++// 32 cycles +++.macro sao_band_64 +++ vshr.u8 q12, q8, #3 +++ vshr.u8 q13, q9, #3 +++ vshr.u8 q14, q10, #3 +++ vshr.u8 q15, q11, #3 +++ vtbl.8 d24, {d0, d1, d2, d3}, d24 +++ vadd.s8 q8, q3 +++ vtbl.8 d25, {d0, d1, d2, d3}, d25 +++ vadd.s8 q9, q3 +++ vtbl.8 d26, {d0, d1, d2, d3}, d26 +++ vadd.s8 q10, q3 +++ vtbl.8 d27, {d0, d1, d2, d3}, d27 +++ vadd.s8 q11, q3 +++ vtbl.8 d28, {d0, d1, d2, d3}, d28 +++ vqadd.s8 q8, q12 +++ vtbl.8 d29, {d0, d1, d2, d3}, d29 +++ vqadd.s8 q9, q13 +++ vtbl.8 d30, {d0, d1, d2, d3}, d30 +++ vqadd.s8 q10, q14 +++ vtbl.8 d31, {d0, d1, d2, d3}, d31 +++ vqadd.s8 q11, q15 +++ vsub.s8 q8, q3 +++ vsub.s8 q9, q3 +++ vsub.s8 q10, q3 +++ vsub.s8 q11, q3 ++ .endm ++ ++ function ff_hevc_sao_band_w8_neon_8, export=1 ++ init_sao_band ++-1: subs r12, #4 ++- vld1.8 {d4}, [r1, :64], r3 ++- vld1.8 {d5}, [r1, :64], r3 ++- vld1.8 {d6}, [r1, :64], r3 ++- vld1.8 {d7}, [r1, :64], r3 ++- sao_band_32 ++- vst1.8 {d4}, [r0, :64], r2 ++- vst1.8 {d5}, [r0, :64], r2 ++- vst1.8 {d6}, [r0, :64], r2 ++- vst1.8 {d7}, [r0, :64], r2 +++1: subs r12, #8 +++ vld1.8 {d16}, [r1, :64], r3 +++ vld1.8 {d17}, [r1, :64], r3 +++ vld1.8 {d18}, [r1, :64], r3 +++ vld1.8 {d19}, [r1, :64], r3 +++ vld1.8 {d20}, [r1, :64], r3 +++ vld1.8 {d21}, [r1, :64], r3 +++ vld1.8 {d22}, [r1, :64], r3 +++ vld1.8 {d23}, [r1, :64], r3 +++ sao_band_64 +++ vst1.8 {d16}, [r0, :64], r2 +++ vst1.8 {d17}, [r0, :64], r2 +++ vst1.8 {d18}, [r0, :64], r2 +++ vst1.8 {d19}, [r0, :64], r2 +++ vst1.8 {d20}, [r0, :64], r2 +++ vst1.8 {d21}, [r0, :64], r2 +++ vst1.8 {d22}, [r0, :64], r2 +++ vst1.8 {d23}, [r0, :64], r2 ++ bne 1b ++ ++ bx lr ++@@ -63,12 +86,16 @@ endfunc ++ ++ function ff_hevc_sao_band_w16_neon_8, export=1 ++ init_sao_band ++-1: subs r12, #2 ++- vld1.8 {q2}, [r1, :128], r3 ++- vld1.8 {q3}, [r1, :128], r3 ++- sao_band_32 ++- vst1.8 {q2}, [r0, :128], r2 ++- vst1.8 {q3}, [r0, :128], r2 +++1: subs r12, #4 +++ vld1.8 {q8}, [r1, :128], r3 +++ vld1.8 {q9}, [r1, :128], r3 +++ vld1.8 {q10}, [r1, :128], r3 +++ vld1.8 {q11}, [r1, :128], r3 +++ sao_band_64 +++ vst1.8 {q8}, [r0, :128], r2 +++ vst1.8 {q9}, [r0, :128], r2 +++ vst1.8 {q10}, [r0, :128], r2 +++ vst1.8 {q11}, [r0, :128], r2 ++ bne 1b ++ ++ bx lr ++@@ -76,10 +103,12 @@ endfunc ++ ++ function ff_hevc_sao_band_w32_neon_8, export=1 ++ init_sao_band ++-1: subs r12, #1 ++- vld1.8 {q2-q3}, [r1, :128], r3 ++- sao_band_32 ++- vst1.8 {q2-q3}, [r0, :128], r2 +++1: subs r12, #2 +++ vld1.8 {q8-q9}, [r1, :128], r3 +++ vld1.8 {q10-q11}, [r1, :128], r3 +++ sao_band_64 +++ vst1.8 {q8-q9}, [r0, :128], r2 +++ vst1.8 {q10-q11}, [r0, :128], r2 ++ bne 1b ++ ++ bx lr ++@@ -89,13 +118,12 @@ function ff_hevc_sao_band_w64_neon_8, export=1 ++ init_sao_band ++ 1: subs r12, #1 ++ pld [r1, r3] ++- vld1.8 {q2-q3}, [r1, :128]! ++- sao_band_32 ++- vst1.8 {q2-q3}, [r0, :128]! ++- vld1.8 {q2-q3}, [r1, :128], r3 +++ vld1.8 {q8-q9}, [r1, :128]! +++ vld1.8 {q10-q11}, [r1, :128], r3 ++ sub r1, #32 ++- sao_band_32 ++- vst1.8 {q2-q3}, [r0, :128], r2 +++ sao_band_64 +++ vst1.8 {q8-q9}, [r0, :128]! +++ vst1.8 {q10-q11}, [r0, :128], r2 ++ sub r0, #32 ++ bne 1b ++ ++@@ -121,7 +149,6 @@ endfunc ++ vcgt.u8 q1, q5, q9 ++ vcgt.u8 q15, q9, q5 ++ vsub.s8 q0, q14, q0 // diff1 ++- ++ vsub.s8 q1, q15, q1 ++ ++ vadd.s8 q0, q12 //diff0 + diff1 ++@@ -157,27 +184,25 @@ endfunc ++ ++ vmov.u8 q15, #128 // s8 #-128 ++ vtbl.8 d0, {d24}, d0 +++ vadd.s8 q13, q4, q15 ++ vtbl.8 d1, {d24}, d1 +++ vadd.s8 q14, q5, q15 ++ vtbl.8 d2, {d24}, d2 +++ vqadd.s8 q0, q13 ++ vtbl.8 d3, {d24}, d3 +++ vqadd.s8 q1, q14 ++ vtbl.8 d4, {d24}, d4 +++ vadd.s8 q13, q6, q15 ++ vtbl.8 d5, {d24}, d5 +++ vadd.s8 q14, q7, q15 ++ vtbl.8 d6, {d24}, d6 +++ vqadd.s8 q2, q13 ++ vtbl.8 d7, {d24}, d7 ++- ++- vadd.s8 q12, q4, q15 ++- vadd.s8 q13, q5, q15 ++- vadd.s8 q14, q6, q15 ++- vadd.s8 q15, q7, q15 ++- vqadd.s8 q12, q0 ++- vqadd.s8 q15, q3 ++- vmov.u8 q3, #128 // s8 #-128 ++- vqadd.s8 q13, q1 ++- vqadd.s8 q14, q2 ++- vsub.s8 q0, q12, q3 ++- vsub.s8 q1, q13, q3 ++- vsub.s8 q2, q14, q3 ++- vsub.s8 q3, q15, q3 +++ vqadd.s8 q3, q14 +++ vsub.s8 q0, q15 +++ vsub.s8 q1, q15 +++ vsub.s8 q2, q15 +++ vsub.s8 q3, q15 ++ vst1.8 {q0-q1}, [r0, :128]! ++ vst1.8 {q2-q3}, [r0, :128], r2 ++ sub r0, #32 ++@@ -342,13 +367,12 @@ endfunc ++ ++ vmov.u8 q10, #128 ++ vtbl.8 d0, {d31}, d0 +++ vadd.s8 q11, q2, q10 ++ vtbl.8 d1, {d31}, d1 +++ vadd.s8 q12, q3, q10 ++ vtbl.8 d2, {d31}, d2 +++ vqadd.s8 q11, q0 ++ vtbl.8 d3, {d31}, d3 ++- ++- vadd.s8 q11, q2, q10 ++- vadd.s8 q12, q3, q10 ++- vqadd.s8 q11, q0 ++ vqadd.s8 q12, q1 ++ vsub.s8 q0, q11, q10 ++ vsub.s8 q1, q12, q10 ++-- ++2.5.0 ++ ++ ++From 026bac1824e4936e948e6b1efec82868c520ea66 Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi <seppo.tomperi@vtt.fi> ++Date: Mon, 2 Feb 2015 16:08:27 +0200 ++Subject: [PATCH 9/9] Further SAO NEON optimisations ++ ++--- ++ libavcodec/arm/hevcdsp_init_neon.c | 16 +-- ++ libavcodec/arm/hevcdsp_sao_neon.S | 224 +++++++++++++++++++------------------ ++ 2 files changed, 124 insertions(+), 116 deletions(-) ++ ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index 6d0689c..e5da7e9 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -45,10 +45,10 @@ void ff_hevc_transform_add_16x16_neon_8(uint8_t *_dst, int16_t *coeffs, ++ void ff_hevc_transform_add_32x32_neon_8(uint8_t *_dst, int16_t *coeffs, ++ ptrdiff_t stride); ++ ++-void ff_hevc_sao_band_w8_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int8_t * offset_table, int height); ++-void ff_hevc_sao_band_w16_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int8_t * offset_table, int height); ++-void ff_hevc_sao_band_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int8_t * offset_table, int height); ++-void ff_hevc_sao_band_w64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int8_t * offset_table, int height); +++void ff_hevc_sao_band_w8_neon_8(uint8_t *_dst, uint8_t *_src, int8_t * offset_table, ptrdiff_t stride_src, ptrdiff_t stride_dst, int height); +++void ff_hevc_sao_band_w16_neon_8(uint8_t *_dst, uint8_t *_src, int8_t * offset_table, ptrdiff_t stride_src, ptrdiff_t stride_dst, int height); +++void ff_hevc_sao_band_w32_neon_8(uint8_t *_dst, uint8_t *_src, int8_t * offset_table, ptrdiff_t stride_src, ptrdiff_t stride_dst, int height); +++void ff_hevc_sao_band_w64_neon_8(uint8_t *_dst, uint8_t *_src, int8_t * offset_table, ptrdiff_t stride_src, ptrdiff_t stride_dst, int height); ++ ++ void ff_hevc_sao_edge_eo0_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); ++ void ff_hevc_sao_edge_eo1_w32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int height, int8_t *sao_offset_table); ++@@ -189,16 +189,16 @@ static void ff_hevc_sao_band_neon_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_ ++ ++ switch(cwidth){ ++ case 8: ++- ff_hevc_sao_band_w8_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); +++ ff_hevc_sao_band_w8_neon_8(_dst, _src, offset_table, stride_src, stride_dst, height); ++ break; ++ case 16: ++- ff_hevc_sao_band_w16_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); +++ ff_hevc_sao_band_w16_neon_8(_dst, _src, offset_table, stride_src, stride_dst, height); ++ break; ++ case 32: ++- ff_hevc_sao_band_w32_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); +++ ff_hevc_sao_band_w32_neon_8(_dst, _src, offset_table, stride_src, stride_dst, height); ++ break; ++ case 64: ++- ff_hevc_sao_band_w64_neon_8(_dst, _src, stride_dst, stride_src, offset_table, height); +++ ff_hevc_sao_band_w64_neon_8(_dst, _src, offset_table, stride_src, stride_dst, height); ++ break; ++ default: ++ for (y = 0; y < height; y++) { ++diff --git a/libavcodec/arm/hevcdsp_sao_neon.S b/libavcodec/arm/hevcdsp_sao_neon.S ++index 08f50b8..9c7808d 100644 ++--- a/libavcodec/arm/hevcdsp_sao_neon.S +++++ b/libavcodec/arm/hevcdsp_sao_neon.S ++@@ -22,21 +22,16 @@ ++ #include "neon.S" ++ ++ .macro init_sao_band ++- ldr r12, [sp, #0] // offset_table address ++ pld [r1] ++- vld1.8 {q0, q1}, [r12] // offset table ++- ldr r12, [sp, #4] // height +++ vld1.8 {q0, q1}, [r2] // offset table +++ ldr r2, [sp, #0] // stride_dst +++ ldr r12, [sp, #4] // height ++ vmov.u8 q3, #128 ++ .endm ++ ++ // 128 in q3 ++ // input q8 - q11 ++-// 32 cycles ++ .macro sao_band_64 ++- vshr.u8 q12, q8, #3 ++- vshr.u8 q13, q9, #3 ++- vshr.u8 q14, q10, #3 ++- vshr.u8 q15, q11, #3 ++ vtbl.8 d24, {d0, d1, d2, d3}, d24 ++ vadd.s8 q8, q3 ++ vtbl.8 d25, {d0, d1, d2, d3}, d25 ++@@ -52,8 +47,8 @@ ++ vtbl.8 d30, {d0, d1, d2, d3}, d30 ++ vqadd.s8 q10, q14 ++ vtbl.8 d31, {d0, d1, d2, d3}, d31 ++- vqadd.s8 q11, q15 ++ vsub.s8 q8, q3 +++ vqadd.s8 q11, q15 ++ vsub.s8 q9, q3 ++ vsub.s8 q10, q3 ++ vsub.s8 q11, q3 ++@@ -64,12 +59,16 @@ function ff_hevc_sao_band_w8_neon_8, export=1 ++ 1: subs r12, #8 ++ vld1.8 {d16}, [r1, :64], r3 ++ vld1.8 {d17}, [r1, :64], r3 +++ vshr.u8 q12, q8, #3 ++ vld1.8 {d18}, [r1, :64], r3 ++ vld1.8 {d19}, [r1, :64], r3 +++ vshr.u8 q13, q9, #3 ++ vld1.8 {d20}, [r1, :64], r3 ++ vld1.8 {d21}, [r1, :64], r3 +++ vshr.u8 q14, q10, #3 ++ vld1.8 {d22}, [r1, :64], r3 ++ vld1.8 {d23}, [r1, :64], r3 +++ vshr.u8 q15, q11, #3 ++ sao_band_64 ++ vst1.8 {d16}, [r0, :64], r2 ++ vst1.8 {d17}, [r0, :64], r2 ++@@ -88,9 +87,13 @@ function ff_hevc_sao_band_w16_neon_8, export=1 ++ init_sao_band ++ 1: subs r12, #4 ++ vld1.8 {q8}, [r1, :128], r3 +++ vshr.u8 q12, q8, #3 ++ vld1.8 {q9}, [r1, :128], r3 +++ vshr.u8 q13, q9, #3 ++ vld1.8 {q10}, [r1, :128], r3 +++ vshr.u8 q14, q10, #3 ++ vld1.8 {q11}, [r1, :128], r3 +++ vshr.u8 q15, q11, #3 ++ sao_band_64 ++ vst1.8 {q8}, [r0, :128], r2 ++ vst1.8 {q9}, [r0, :128], r2 ++@@ -105,7 +108,11 @@ function ff_hevc_sao_band_w32_neon_8, export=1 ++ init_sao_band ++ 1: subs r12, #2 ++ vld1.8 {q8-q9}, [r1, :128], r3 +++ vshr.u8 q12, q8, #3 +++ vshr.u8 q13, q9, #3 ++ vld1.8 {q10-q11}, [r1, :128], r3 +++ vshr.u8 q14, q10, #3 +++ vshr.u8 q15, q11, #3 ++ sao_band_64 ++ vst1.8 {q8-q9}, [r0, :128], r2 ++ vst1.8 {q10-q11}, [r0, :128], r2 ++@@ -119,7 +126,11 @@ function ff_hevc_sao_band_w64_neon_8, export=1 ++ 1: subs r12, #1 ++ pld [r1, r3] ++ vld1.8 {q8-q9}, [r1, :128]! +++ vshr.u8 q12, q8, #3 +++ vshr.u8 q13, q9, #3 ++ vld1.8 {q10-q11}, [r1, :128], r3 +++ vshr.u8 q14, q10, #3 +++ vshr.u8 q15, q11, #3 ++ sub r1, #32 ++ sao_band_64 ++ vst1.8 {q8-q9}, [r0, :128]! ++@@ -129,51 +140,18 @@ function ff_hevc_sao_band_w64_neon_8, export=1 ++ ++ bx lr ++ endfunc ++-// input ++-// a in q0 - q3 ++-// c in q4 - q7 ++-// b in q8 - q11 ++-// offset table in r7 and r5 ++-// output in q0 - q3 ++-// clobbers q12 - q15 ++-.macro edge_w64_body ++- vcgt.u8 q12, q4, q0 // c > a -> -1 , otherwise 0 ++- vcgt.u8 q0, q0, q4 // a > c -> -1 , otherwise 0 ++- vcgt.u8 q13, q5, q1 ++- vcgt.u8 q1, q1, q5 ++- vsub.s8 q12, q0, q12 // diff0 ++- vcgt.u8 q0, q4, q8 // c > b ++- vsub.s8 q13, q1, q13 ++- ++- vcgt.u8 q14, q8, q4 // b > c ++- vcgt.u8 q1, q5, q9 ++- vcgt.u8 q15, q9, q5 ++- vsub.s8 q0, q14, q0 // diff1 ++- vsub.s8 q1, q15, q1 ++ ++- vadd.s8 q0, q12 //diff0 + diff1 ++- vadd.s8 q1, q13 ++- ++- vcgt.u8 q14, q6, q2 ++- vcgt.u8 q2, q2, q6 ++- vcgt.u8 q15, q7, q3 ++- vcgt.u8 q3, q3, q7 ++- ++- vsub.s8 q14, q2, q14 ++- vcgt.u8 q2, q6, q10 ++- vsub.s8 q15, q3, q15 ++- ++- vcgt.u8 q12, q10, q6 ++- vcgt.u8 q3, q7, q11 ++- vcgt.u8 q13, q11, q7 ++- vsub.s8 q2, q12, q2 ++- vsub.s8 q3, q13, q3 +++.macro diff32 out0, out1, tmp0, tmp1, in0, in1, in2, in3 +++ vcgt.u8 \out0, \in2, \in0 // c > a -> -1 , otherwise 0 +++ vcgt.u8 \tmp0, \in0, \in2 // a > c -> -1 , otherwise 0 +++ vcgt.u8 \out1, \in3, \in1 // c > a -> -1 , otherwise 0 part 2 +++ vcgt.u8 \tmp1, \in1, \in3 // a > c -> -1 , otherwise 0 part 2 +++ vsub.s8 \out0, \tmp0, \out0 // diff0 +++ vsub.s8 \out1, \tmp1, \out1 // diff0 part 2 +++.endm ++ +++.macro table64 ++ vmov.s8 q13, #2 // 2 to all elements ++- ++- vadd.s8 q2, q14 ++- vadd.s8 q3, q15 ++- ++ vmov.32 d24[0], r4 // load offset table from general registers ++ vmov.32 d24[1], r5 // load rest of offset table ++ ++@@ -208,6 +186,28 @@ endfunc ++ sub r0, #32 ++ .endm ++ +++// input +++// a in q0 - q3 +++// c in q4 - q7 +++// b in q8 - q11 +++// offset table in r7 and r5 +++// output in q0 - q3 +++// clobbers q12 - q15 +++.macro edge_w64_body +++ diff32 q12, q13, q0, q1, q0, q1, q4, q5 +++ diff32 q0, q1, q14, q15, q8, q9, q4, q5 +++ +++ vadd.s8 q0, q12 //diff0 + diff1 +++ vadd.s8 q1, q13 +++ +++ diff32 q14, q15, q2, q3, q2, q3, q6, q7 +++ diff32 q2, q3, q12, q13, q10, q11, q6, q7 +++ +++ vadd.s8 q2, q14 +++ vadd.s8 q3, q15 +++ table64 +++.endm +++ ++ .macro init_edge_64 ++ push {r4-r5} ++ ldr r12, [sp, #8] // height ++@@ -334,38 +334,23 @@ function ff_hevc_sao_edge_eo3_w64_neon_8, export=1 ++ bx lr ++ endfunc ++ ++-// inputs: ++-// a in q0, q1 ++-// c in q2, q3 ++-// b in q8, q9 ++-// offset table in d31 ++-// clobbered registers q0, q1, q10, q11, q12, q13 ++-// output q0, q1 ++-.macro edge_w32_body ++- vcgt.u8 q12, q2, q0 // c > a -> -1 , otherwise 0 ++- vcgt.u8 q0, q0, q2 // a > c -> -1 , otherwise 0 ++- vcgt.u8 q13, q3, q1 ++- vcgt.u8 q1, q1, q3 ++- ++- vsub.s8 q12, q0, q12 // diff0 ++- vcgt.u8 q0, q2, q8 // c > b ++- vsub.s8 q13, q1, q13 // diff0 part 2 ++- ++- vcgt.u8 q10, q8, q2 // b > c ++- vcgt.u8 q1, q3, q9 ++- vcgt.u8 q11, q9, q3 ++- ++- vsub.s8 q0, q10, q0 // diff1 ++- ++- vmov.s8 q10, #2 // 2 to all elements ++- vsub.s8 q1, q11, q1 // diff1 part 2 ++- vadd.s8 q0, q12 //diff0 + diff1 ++- vadd.s8 q1, q13 +++.macro init_edge_32 +++ ldr r12, [sp, #4] // sao_offset_val_table +++ vld1.32 {d31}, [r12] +++ ldr r12, [sp] // height +++.endm ++ ++- vadd.s8 q0, q10 ++- vadd.s8 q1, q10 +++.macro diff out0, tmp0, in0, in1 +++ vcgt.u8 \out0, \in1, \in0 // c > a -> -1 , otherwise 0 +++ vcgt.u8 \tmp0, \in0, \in1 // a > c -> -1 , otherwise 0 +++ vsub.s8 \out0, \tmp0, \out0 // diff0 +++.endm ++ ++- vmov.u8 q10, #128 +++.macro table32 +++ vmov.s8 q10, #2 +++ vadd.s8 q0, q10 +++ vadd.s8 q1, q10 +++ vmov.s8 q10, #128 ++ vtbl.8 d0, {d31}, d0 ++ vadd.s8 q11, q2, q10 ++ vtbl.8 d1, {d31}, d1 ++@@ -373,56 +358,68 @@ endfunc ++ vtbl.8 d2, {d31}, d2 ++ vqadd.s8 q11, q0 ++ vtbl.8 d3, {d31}, d3 ++- vqadd.s8 q12, q1 ++- vsub.s8 q0, q11, q10 ++- vsub.s8 q1, q12, q10 +++ vqadd.s8 q12, q1 +++ vsub.s8 q0, q11, q10 +++ vsub.s8 q1, q12, q10 ++ vst1.8 {q0-q1}, [r0, :128], r2 ++ .endm ++ ++-.macro init_edge_32 ++- ldr r12, [sp, #4] // sao_offset_val_table ++- vld1.32 {d31}, [r12] ++- ldr r12, [sp] // height ++-.endm ++- ++ function ff_hevc_sao_edge_eo0_w32_neon_8, export=1 ++ init_edge_32 ++- sub r1, #4 // load 4 extra bytes +++ vpush {q4-q7} +++ sub r1, #4 ++ 1: subs r12, #1 ++- vld1.32 d3[1], [r1]! ++- vld1.8 {q2-q3}, [r1, :128]! // c ++- vld1.32 d20[0], [r1], r3 ++- sub r1, #36 +++ vld1.8 {q13-q14}, [r1]! +++ vld1.32 d30, [r1], r3 +++ sub r1, #32 ++ // a ++- vext.8 q0, q1, q2, #15 ++- vext.8 q1, q2, q3, #15 ++- // b ++- vext.8 q8, q2, q3, #1 ++- vext.8 q9, q3, q10, #1 ++- edge_w32_body +++ vext.8 q0, q13, q14, #3 +++ vext.8 q1, q14, q15, #3 +++ vshr.u64 d24, d30, #24 +++ // c +++ vext.8 q2, q13, q14, #4 +++ vext.8 q3, q14, q15, #4 +++ vshr.u64 d16, d30, #32 +++ // diff0 +++ diff32 q13, q14, q4, q5, q0, q1, q2, q3 +++ diff d18, d25, d24, d16 +++ // -diff1 +++ vext.s8 q0, q13, q14, #1 +++ vext.s8 q1, q14, q9, #1 +++ +++ vsub.s8 q0, q13, q0 //diff0 + diff1 +++ vsub.s8 q1, q14, q1 +++ table32 ++ bne 1b +++ vpop {q4-q7} +++ ++ bx lr ++ endfunc ++ ++ function ff_hevc_sao_edge_eo1_w32_neon_8, export=1 ++ init_edge_32 +++ vpush {q4-q7} ++ // load a ++ sub r1, r3 ++ vld1.8 {q0-q1}, [r1, :128], r3 ++ // load c ++ vld1.8 {q2-q3}, [r1, :128], r3 +++ diff32 q12, q13, q0, q1, q0, q1, q2, q3 // CMP ( c, a ) ++ 1: subs r12, #1 ++ // load b ++ vld1.8 {q8-q9}, [r1, :128], r3 ++- edge_w32_body ++- // inputs for next loop iteration ++- // a ++- vmov.64 q0, q2 ++- vmov.64 q1, q3 +++ diff32 q4, q5, q10, q11, q8, q9, q2, q3 // CMP ( c, b ) +++ vadd.s8 q0, q4, q12 //diff0 + diff1 +++ vadd.s8 q1, q5, q13 +++ table32 +++ // CMP ( c, a ) +++ vneg.s8 q12, q4 +++ vneg.s8 q13, q5 ++ // c ++ vmov.64 q2, q8 ++ vmov.64 q3, q9 ++ bne 1b +++ vpop {q4-q7} ++ bx lr ++ endfunc ++ ++@@ -452,7 +449,11 @@ function ff_hevc_sao_edge_eo2_w32_neon_8, export=1 ++ vext.8 q6, q10, q11, #8 ++ vext.8 q7, q11, q12, #8 ++ vext.8 q5, q10, q11, #7 ++- edge_w32_body +++ diff32 q12, q13, q0, q1, q0, q1, q2, q3 +++ diff32 q0, q1, q10, q11, q8, q9, q2, q3 +++ vadd.s8 q0, q12 //diff0 + diff1 +++ vadd.s8 q1, q13 +++ table32 ++ // inputs for next loop iteration ++ // a ++ vmov.8 q0, q4 ++@@ -487,7 +488,14 @@ function ff_hevc_sao_edge_eo3_w32_neon_8, export=1 ++ vext.8 q8, q10, q11, #7 ++ vext.8 q9, q11, q12, #7 ++ vext.8 q14, q12, q10, #7 ++- edge_w32_body +++ +++ diff32 q12, q13, q0, q1, q0, q1, q2, q3 +++ diff32 q0, q1, q10, q11, q8, q9, q2, q3 +++ +++ vadd.s8 q0, q12 //diff0 + diff1 +++ vadd.s8 q1, q13 +++ table32 +++ ++ // inputs for next loop iteration ++ // a ++ vext.8 q0, q2, q3, #1 ++-- ++2.5.0 ++ +diff --git a/tools/depends/target/ffmpeg/autobuild.sh b/tools/depends/target/ffmpeg/autobuild.sh +index 9c26b239c2b2c1221bed7c4d99c46e909a4a5c5d..b9590d7b200a2ccf0fe3aa660e3b08b82d2133fc 100755 +--- a/tools/depends/target/ffmpeg/autobuild.sh ++++ b/tools/depends/target/ffmpeg/autobuild.sh +@@ -128,6 +128,9 @@ cd "ffmpeg-${VERSION}" || exit 2 + tar --strip-components=1 -xf $MYDIR/${ARCHIVE} + + patch -p1 < ../../0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch ++patch -p1 < ../../0001-Discard-data-before-VO-VOL-in-mpeg-4-over-mpegts.patch ++patch -p1 < ../../hevcdsp_ARM_NEON_optimized_epel_functions.patch ++patch -p1 < ../../added_ARM_NEON_optimized_SAO_patches.patch + + CFLAGS="$CFLAGS" CXXFLAGS="$CXXFLAGS" LDFLAGS="$LDFLAGS" \ + ./configure --prefix=$FFMPEG_PREFIX \ +diff --git a/tools/depends/target/ffmpeg/hevcdsp_ARM_NEON_optimized_epel_functions.patch b/tools/depends/target/ffmpeg/hevcdsp_ARM_NEON_optimized_epel_functions.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..5e8e07d407f045fc99554f0f061d1e818716ac62 +--- /dev/null ++++ b/tools/depends/target/ffmpeg/hevcdsp_ARM_NEON_optimized_epel_functions.patch +@@ -0,0 +1,409 @@ ++From 29c3327a0d72a7e872ff170363cfe5ed13bca5d0 Mon Sep 17 00:00:00 2001 ++From: Seppo Tomperi <seppo.tomperi@vtt.fi> ++Date: Tue, 22 Dec 2015 18:10:24 +0000 ++Subject: [PATCH] hevcdsp: ARM NEON optimized epel functions ++ ++--- ++ libavcodec/arm/Makefile | 1 + ++ libavcodec/arm/hevcdsp_epel_neon.S | 334 +++++++++++++++++++++++++++++++++++++ ++ libavcodec/arm/hevcdsp_init_neon.c | 23 +++ ++ 3 files changed, 358 insertions(+) ++ create mode 100644 libavcodec/arm/hevcdsp_epel_neon.S ++ ++diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile ++index cdd35b0..6051ec8 100644 ++--- a/libavcodec/arm/Makefile +++++ b/libavcodec/arm/Makefile ++@@ -131,6 +131,7 @@ NEON-OBJS-$(CONFIG_DCA_DECODER) += arm/dcadsp_neon.o \ ++ arm/synth_filter_neon.o ++ NEON-OBJS-$(CONFIG_HEVC_DECODER) += arm/hevcdsp_init_neon.o \ ++ arm/hevcdsp_deblock_neon.o \ +++ arm/hevcdsp_epel_neon.o \ ++ arm/hevcdsp_idct_neon.o \ ++ arm/hevcdsp_qpel_neon.o ++ NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_neon.o ++diff --git a/libavcodec/arm/hevcdsp_epel_neon.S b/libavcodec/arm/hevcdsp_epel_neon.S ++new file mode 100644 ++index 0000000..516ae5b ++--- /dev/null +++++ b/libavcodec/arm/hevcdsp_epel_neon.S ++@@ -0,0 +1,334 @@ +++/* +++ * Copyright (c) 2014 - 2015 Seppo Tomperi <seppo.tomperi@vtt.fi> +++ * +++ * This file is part of FFmpeg. +++ * +++ * FFmpeg is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * FFmpeg is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with FFmpeg; if not, write to the Free Software +++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +++ */ +++ +++#include "libavutil/arm/asm.S" +++#include "neon.S" +++ +++#define MAX_PB_SIZE #64 +++ +++.macro vextin_d4 +++ vld1.8 {q10}, [r1], r2 +++ vmov d16, d20 +++ vext.8 d17, d20, d21, #1 +++ vext.8 d18, d20, d21, #2 +++ vext.8 d19, d20, d21, #3 +++.endm +++ +++.macro vextin_d4_8 +++ vld1.8 d16, [r1], r2 +++ vext.8 d17, d16, d16, #1 +++ vext.8 d18, d16, d16, #2 +++ vext.8 d19, d16, d16, #3 +++.endm +++ +++.macro load_coeffs_16b coeffs +++ ldr \coeffs, [\coeffs] +++ vdup.i8 d0, \coeffs +++ lsr \coeffs, #8 +++ vdup.i8 d1, \coeffs +++ lsr \coeffs, #8 +++ vdup.i8 d2, \coeffs +++ lsr \coeffs, #8 +++ vdup.i8 d3, \coeffs +++.endm +++ +++.macro epel_filter_16b out=q12 +++ vmull.u8 q3, d16, d0 +++ vmull.u8 q11, d19, d3 +++ vmull.u8 \out, d17, d1 +++ vmull.u8 q10, d18, d2 +++ vadd.s16 q3, q11 +++ vadd.s16 \out, q10 +++ vsub.s16 \out, q3 +++.endm +++ +++.macro load_coeffs_32b coeffs +++ ldr \coeffs, [\coeffs] +++ vmov.i64 d4, #0 +++ vmov.8 d4[0], \coeffs +++ lsr \coeffs, #8 +++ vmov.8 d4[2], \coeffs +++ lsr \coeffs, #8 +++ vmov.8 d4[4], \coeffs +++ lsr \coeffs, #8 +++ vmov.8 d4[6], \coeffs +++.endm +++ +++.macro epel_filter_32b +++ vmull.s16 q3, d24, d4[0] //q12 +++ vmull.s16 q4, d25, d4[0] +++ vmull.s16 q5, d30, d4[3] //q15 +++ vmull.s16 q6, d31, d4[3] +++ +++ vmull.s16 q7, d26, d4[1] // q13 +++ vmull.s16 q8, d27, d4[1] +++ vmull.s16 q9, d28, d4[2] // q14 +++ vmull.s16 q10, d29, d4[2] +++ vadd.s32 q3, q5 +++ vadd.s32 q4, q6 +++ vadd.s32 q7, q9 +++ vadd.s32 q8, q10 +++ vsub.s32 q7, q3 +++ vsub.s32 q8, q4 +++ vqshrn.s32 d6, q7, #6 +++ vqshrn.s32 d7, q8, #6 +++.endm +++ +++.macro epel_filter_32b_4 +++ vmull.s16 q3, d24, d4[0] //q12 +++ vmull.s16 q5, d30, d4[3] //q15 +++ vmull.s16 q7, d26, d4[1] // q13 +++ vmull.s16 q9, d28, d4[2] // q14 +++ vadd.s32 q3, q5 +++ vadd.s32 q7, q9 +++ vsub.s32 q7, q3 +++ vqshrn.s32 d6, q7, #6 +++.endm +++ +++function ff_hevc_put_epel_h_neon_8, export=1 +++ push {r4-r7} +++ mov r4, MAX_PB_SIZE +++ ldr r7, [sp, #16] // mx +++ ldr r5, [sp, #24] // width +++ sub r7, #1 +++ lsl r7, #2 +++ vpush {d8-d15} +++ adrl r12, epel_coeffs +++ add r7, r12 +++ sub r1, #1 +++ lsl r4, #1 +++ load_coeffs_16b r7 +++ mov r12, r3 +++ mov r6, r0 +++ mov r7, r1 +++ cmp r5, #6 +++ bgt 8f +++ cmp r5, #4 +++ blt 2f +++ b 4f +++8: subs r3, #1 +++ pld [r1] +++ vextin_d4 +++ epel_filter_16b +++ vst1.16 {q12}, [r0], r4 +++ bne 8b +++ subs r5, #8 +++ beq 99f +++ mov r3, r12 +++ add r6, #16 +++ mov r0, r6 +++ add r7, #8 +++ mov r1, r7 +++ cmp r5, #4 +++ bgt 8b +++4: subs r3, #1 +++ pld [r1] +++ vextin_d4_8 +++ epel_filter_16b +++ vst1.16 d24, [r0], r4 +++ bne 4b +++ subs r5, #4 +++ beq 99f +++ mov r3, r12 +++ add r6, #8 +++ mov r0, r6 +++ add r7, #4 +++ mov r1, r7 +++2: subs r3, #1 +++ pld [r1] +++ vextin_d4_8 +++ epel_filter_16b +++ vst1.32 d24[0], [r0], r4 +++ bne 2b +++99: vpop {d8-d15} +++ pop {r4-r7} +++ bx lr +++endfunc +++ +++function ff_hevc_put_epel_v_neon_8, export=1 +++ push {r4-r7} +++ mov r4, MAX_PB_SIZE +++ ldr r7, [sp, #20] // my +++ ldr r5, [sp, #24] // width +++ sub r7, #1 +++ lsl r7, #2 +++ vpush {d8-d15} +++ adrl r12, epel_coeffs +++ add r7, r12 +++ load_coeffs_16b r7 +++ sub r1, r2 +++ lsl r4, #1 +++ mov r12, r3 +++ mov r6, r0 +++ mov r7, r1 +++0: pld [r1] +++ vld1.8 {d16}, [r1], r2 +++ pld [r1] +++ vld1.8 {d17}, [r1], r2 +++ pld [r1] +++ vld1.8 {d18}, [r1], r2 +++ cmp r5, #6 +++ bgt 8f +++ cmp r5, #4 +++ blt 2f +++ b 4f +++8: pld [r1] +++ vld1.8 {d19}, [r1], r2 +++ subs r3, #1 +++ epel_filter_16b +++ vst1.16 {q12}, [r0], r4 +++ vmov d16, d17 +++ vmov d17, d18 +++ vmov d18, d19 +++ bne 8b +++ subs r5, #8 +++ beq 99f +++ mov r3, r12 +++ add r6, #16 +++ mov r0, r6 +++ add r7, #8 +++ mov r1, r7 +++ b 0b +++4: pld [r1] +++ vld1.8 {d19}, [r1], r2 +++ subs r3, #1 +++ epel_filter_16b +++ vst1.16 d24, [r0], r4 +++ vmov d16, d17 +++ vmov d17, d18 +++ vmov d18, d19 +++ bne 4b +++ subs r5, #4 +++ beq 99f +++ mov r3, r12 +++ add r6, #8 +++ mov r0, r6 +++ add r7, #4 +++ mov r1, r7 +++ b 0b +++2: pld [r1] +++ vld1.8 {d19}, [r1], r2 +++ subs r3, #1 +++ epel_filter_16b +++ vst1.32 d24[0], [r0], r4 +++ vmov d16, d17 +++ vmov d17, d18 +++ vmov d18, d19 +++ bne 2b +++99: vpop {d8-d15} +++ pop {r4-r7} +++ bx lr +++endfunc +++ +++function ff_hevc_put_epel_hv_neon_8, export=1 +++ push {r4-r7} +++ mov r4, MAX_PB_SIZE +++ ldr r6, [sp, #16] // mx +++ ldr r7, [sp, #20] // my +++ ldr r5, [sp, #24] // width +++ sub r7, #1 +++ lsl r7, #2 +++ vpush {d8-d15} +++ adrl r12, epel_coeffs +++ sub r6, #1 +++ lsl r6, #2 +++ add r6, r12 // mx epel coeff offset +++ add r7, r12 +++ sub r1, #1 +++ sub r1, r2 +++ lsl r4, #1 +++ load_coeffs_16b r6 +++ load_coeffs_32b r7 +++ mov r12, r3 +++ mov r6, r0 +++ mov r7, r1 +++0: pld [r1] +++ vextin_d4 +++ epel_filter_16b q12 +++ pld [r1] +++ vextin_d4 +++ epel_filter_16b q13 +++ pld [r1] +++ vextin_d4 +++ epel_filter_16b q14 +++ cmp r5, #6 +++ bgt 8f +++ cmp r5, #4 +++ blt 2f +++ b 4f +++8: pld [r1] +++ vextin_d4 +++ epel_filter_16b q15 +++ subs r3, #1 +++ epel_filter_32b +++ vst1.16 {q3}, [r0], r4 +++ vmov q12, q13 +++ vmov q13, q14 +++ vmov q14, q15 +++ bne 8b +++ subs r5, #8 +++ beq 99f +++ mov r3, r12 +++ add r6, #16 +++ mov r0, r6 +++ add r7, #8 +++ mov r1, r7 +++ b 0b +++4: pld [r1] +++ vextin_d4_8 +++ epel_filter_16b q15 +++ subs r3, #1 +++ epel_filter_32b_4 +++ vst1.16 d6, [r0], r4 +++ vmov q12, q13 +++ vmov q13, q14 +++ vmov q14, q15 +++ bne 4b +++ subs r5, #4 +++ beq 99f +++ mov r3, r12 +++ add r6, #8 +++ mov r0, r6 +++ add r7, #4 +++ mov r1, r7 +++ b 0b +++2: pld [r1] +++ vextin_d4_8 +++ epel_filter_16b q15 +++ subs r3, #1 +++ epel_filter_32b_4 +++ vst1.32 d6[0], [r0], r4 +++ vmov q12, q13 +++ vmov q13, q14 +++ vmov q14, q15 +++ bne 2b +++99: vpop {d8-d15} +++ pop {r4-r7} +++ bx lr +++endfunc +++ +++epel_coeffs: +++ .byte 2, 58, 10, 2 +++ .byte 4, 54, 16, 2 +++ .byte 6, 46, 28, 4 +++ .byte 4, 36, 36, 4 +++ .byte 4, 28, 46, 6 +++ .byte 2, 16, 54, 4 +++ .byte 2, 10, 58, 2 ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index 5591807..733ff08 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -58,6 +58,15 @@ PUT_PIXELS(ff_hevc_put_pixels_w32_neon_8); ++ PUT_PIXELS(ff_hevc_put_pixels_w48_neon_8); ++ PUT_PIXELS(ff_hevc_put_pixels_w64_neon_8); ++ #undef PUT_PIXELS +++void ff_hevc_put_epel_h_neon_8(int16_t *dst, uint8_t *src, +++ ptrdiff_t srcstride, int height, +++ intptr_t mx, intptr_t my, int width); +++void ff_hevc_put_epel_v_neon_8(int16_t *dst, uint8_t *src, +++ ptrdiff_t srcstride, int height, +++ intptr_t mx, intptr_t my, int width); +++void ff_hevc_put_epel_hv_neon_8(int16_t *dst, uint8_t *src, +++ ptrdiff_t srcstride, int height, +++ intptr_t mx, intptr_t my, int width); ++ ++ static void (*put_hevc_qpel_neon[4][4])(int16_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, ++ int height, int width); ++@@ -201,7 +210,21 @@ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) ++ c->put_hevc_qpel_bi[x][1][0] = ff_hevc_put_qpel_bi_neon_wrapper; ++ c->put_hevc_qpel_bi[x][0][1] = ff_hevc_put_qpel_bi_neon_wrapper; ++ c->put_hevc_qpel_bi[x][1][1] = ff_hevc_put_qpel_bi_neon_wrapper; +++ c->put_hevc_epel[x][1][0] = ff_hevc_put_epel_v_neon_8; +++ c->put_hevc_epel[x][0][1] = ff_hevc_put_epel_h_neon_8; +++ c->put_hevc_epel[x][1][1] = ff_hevc_put_epel_hv_neon_8; ++ } +++ c->put_hevc_epel[0][0][0] = ff_hevc_put_pixels_w2_neon_8; +++ c->put_hevc_epel[1][0][0] = ff_hevc_put_pixels_w4_neon_8; +++ c->put_hevc_epel[2][0][0] = ff_hevc_put_pixels_w6_neon_8; +++ c->put_hevc_epel[3][0][0] = ff_hevc_put_pixels_w8_neon_8; +++ c->put_hevc_epel[4][0][0] = ff_hevc_put_pixels_w12_neon_8; +++ c->put_hevc_epel[5][0][0] = ff_hevc_put_pixels_w16_neon_8; +++ c->put_hevc_epel[6][0][0] = ff_hevc_put_pixels_w24_neon_8; +++ c->put_hevc_epel[7][0][0] = ff_hevc_put_pixels_w32_neon_8; +++ c->put_hevc_epel[8][0][0] = ff_hevc_put_pixels_w48_neon_8; +++ c->put_hevc_epel[9][0][0] = ff_hevc_put_pixels_w64_neon_8; +++ ++ c->put_hevc_qpel[0][0][0] = ff_hevc_put_pixels_w2_neon_8; ++ c->put_hevc_qpel[1][0][0] = ff_hevc_put_pixels_w4_neon_8; ++ c->put_hevc_qpel[2][0][0] = ff_hevc_put_pixels_w6_neon_8; ++-- ++2.5.0 ++ + +From 641013389142290475c0c053cf2cbd3a4866eae0 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Thu, 7 May 2015 14:04:18 +0100 +Subject: [PATCH 28/67] [ffmpeg] Add GPU acceleration to hevc + +--- + tools/depends/target/ffmpeg/Makefile | 4 +- + tools/depends/target/ffmpeg/autobuild.sh | 1 + + .../target/ffmpeg/pfcd_hevc_optimisations.patch | 38136 +++++++++++++++++++ + 3 files changed, 38140 insertions(+), 1 deletion(-) + create mode 100644 tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch + +diff --git a/tools/depends/target/ffmpeg/Makefile b/tools/depends/target/ffmpeg/Makefile +index d9db534dd8c59a4993a3509737d901fbb3923de8..2dc4addea504d142eb74385653584bf39b253156 100644 +--- a/tools/depends/target/ffmpeg/Makefile ++++ b/tools/depends/target/ffmpeg/Makefile +@@ -2,7 +2,8 @@ include ../../Makefile.include + include FFMPEG-VERSION + DEPS= ../../Makefile.include FFMPEG-VERSION Makefile \ + 0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch \ +- hevcdsp_ARM_NEON_optimized_epel_functions.patch added_ARM_NEON_optimized_SAO_patches.patch ++ hevcdsp_ARM_NEON_optimized_epel_functions.patch added_ARM_NEON_optimized_SAO_patches.patch \ ++ pfcd_hevc_optimisations.patch + + # set to "yes" to enable patching + # we don't apply patches until we move to a vanilla ffmpeg tarball +@@ -81,6 +82,7 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + cd $(PLATFORM); patch -p1 < ../0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch + cd $(PLATFORM); patch -p1 < ../hevcdsp_ARM_NEON_optimized_epel_functions.patch + cd $(PLATFORM); patch -p1 < ../added_ARM_NEON_optimized_SAO_patches.patch ++ cd $(PLATFORM); patch -p1 < ../pfcd_hevc_optimisations.patch + + cd $(PLATFORM);\ + CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)" \ +diff --git a/tools/depends/target/ffmpeg/autobuild.sh b/tools/depends/target/ffmpeg/autobuild.sh +index b9590d7b200a2ccf0fe3aa660e3b08b82d2133fc..b6bd57731bca6dfe5f814a4043b3e08d1bb08318 100755 +--- a/tools/depends/target/ffmpeg/autobuild.sh ++++ b/tools/depends/target/ffmpeg/autobuild.sh +@@ -131,6 +131,7 @@ patch -p1 < ../../0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patc + patch -p1 < ../../0001-Discard-data-before-VO-VOL-in-mpeg-4-over-mpegts.patch + patch -p1 < ../../hevcdsp_ARM_NEON_optimized_epel_functions.patch + patch -p1 < ../../added_ARM_NEON_optimized_SAO_patches.patch ++patch -p1 < ../../pfcd_hevc_optimisations.patch + + CFLAGS="$CFLAGS" CXXFLAGS="$CXXFLAGS" LDFLAGS="$LDFLAGS" \ + ./configure --prefix=$FFMPEG_PREFIX \ +diff --git a/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch b/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..e172ebf157aebffe1ae50b4a2b25fd71bc708c93 +--- /dev/null ++++ b/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch +@@ -0,0 +1,38136 @@ ++From b9b5434c61afd492a54dad5158b4d56ecbf7f01d Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Tue, 28 Apr 2015 16:18:40 +0100 ++Subject: [PATCH 01/68] Added display output ++ ++--- ++ ffmpeg.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ++ 1 file changed, 159 insertions(+) ++ ++diff --git a/ffmpeg.c b/ffmpeg.c ++index 9ffd833..50c6e86 100644 ++--- a/ffmpeg.c +++++ b/ffmpeg.c ++@@ -23,6 +23,11 @@ ++ * multimedia converter based on the FFmpeg libraries ++ */ ++ +++#ifdef RPI +++#define RPI_DISPLAY +++//#define RPI_ZERO_COPY +++#endif +++ ++ #include "config.h" ++ #include <ctype.h> ++ #include <string.h> ++@@ -66,6 +71,20 @@ ++ # include "libavfilter/buffersrc.h" ++ # include "libavfilter/buffersink.h" ++ +++#ifdef RPI_DISPLAY +++#include <bcm_host.h> +++#include <interface/mmal/mmal.h> +++#include <interface/mmal/mmal_parameters_camera.h> +++#include <interface/mmal/mmal_buffer.h> +++#include <interface/mmal/util/mmal_util.h> +++#include <interface/mmal/util/mmal_default_components.h> +++#include <interface/mmal/util/mmal_connection.h> +++#include <interface/mmal/util/mmal_util_params.h> +++#ifdef RPI_ZERO_COPY +++#include "libavcodec/rpi_qpu.h" +++#endif +++#endif +++ ++ #if HAVE_SYS_RESOURCE_H ++ #include <sys/time.h> ++ #include <sys/types.h> ++@@ -158,6 +177,134 @@ static int restore_tty; ++ static void free_input_threads(void); ++ #endif ++ +++#ifdef RPI_DISPLAY +++ +++#define NUM_BUFFERS 4 +++ +++static MMAL_COMPONENT_T* rpi_display = NULL; +++static MMAL_POOL_T *rpi_pool = NULL; +++ +++#ifdef RPI_ZERO_COPY +++static uint8_t *get_vc_handle(AVBufferRef *bref) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); +++ return (uint8_t *)p->vc_handle; +++} +++#endif +++ +++static MMAL_POOL_T* display_alloc_pool(MMAL_PORT_T* port, size_t w, size_t h) +++{ +++ MMAL_POOL_T* pool; +++ size_t i; +++ size_t size = (w*h*3)/2; +++#ifdef RPI_ZERO_COPY +++ mmal_port_parameter_set_boolean(port, MMAL_PARAMETER_ZERO_COPY, MMAL_TRUE); // Does this mark that the buffer contains a vc_handle? Would have expected a vc_image? +++ pool = mmal_port_pool_create(port, NUM_BUFFERS, 0); +++ assert(pool); +++#else +++ pool = mmal_port_pool_create(port, NUM_BUFFERS, size); +++ +++ for (i = 0; i < NUM_BUFFERS; ++i) +++ { +++ MMAL_BUFFER_HEADER_T* buffer = pool->header[i]; +++ void* bufPtr = buffer->data; +++ memset(bufPtr, i*30, w*h); +++ memset(bufPtr+w*h, 128, (w*h)/2); +++ } +++#endif +++ +++ return pool; +++} +++ +++static void display_cb_input(MMAL_PORT_T *port,MMAL_BUFFER_HEADER_T *buffer) { +++ mmal_buffer_header_release(buffer); +++} +++ +++static MMAL_COMPONENT_T* display_init(size_t x, size_t y, size_t w, size_t h) +++{ +++ MMAL_COMPONENT_T* display; +++ int w2 = (w+31)&~31; +++ int h2 = (h+15)&~15; +++ MMAL_DISPLAYREGION_T region = +++ { +++ {MMAL_PARAMETER_DISPLAYREGION, sizeof(region)}, +++ .set = MMAL_DISPLAY_SET_LAYER | MMAL_DISPLAY_SET_FULLSCREEN | MMAL_DISPLAY_SET_DEST_RECT, +++ .layer = 2, +++ .fullscreen = 0, +++ .dest_rect = {x, y, w, h} +++ }; +++ bcm_host_init(); // TODO is this needed? +++ mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &display); +++ assert(display); +++ +++ mmal_port_parameter_set(display->input[0], ®ion.hdr); +++ +++ MMAL_ES_FORMAT_T* format = display->input[0]->format; +++ format->encoding = MMAL_ENCODING_I420; +++ format->es->video.width = w2; +++ format->es->video.height = h2; +++ format->es->video.crop.x = 0; +++ format->es->video.crop.y = 0; +++ format->es->video.crop.width = w; +++ format->es->video.crop.height = h; +++ mmal_port_format_commit(display->input[0]); +++ +++ mmal_component_enable(display); +++ +++ rpi_pool = display_alloc_pool(display->input[0], w2, h2); +++ +++ mmal_port_enable(display->input[0],display_cb_input); +++ mmal_port_enable(display->control,display_cb_input); +++ +++ printf("Allocated display %d %d\n",w,h); +++ +++ return display; +++} +++ +++static void display_frame(MMAL_COMPONENT_T* display,AVFrame* fr) +++{ +++ int w = fr->width; +++ int h = fr->height; +++ int w2 = (w+31)&~31; +++ int h2 = (h+15)&~15; +++ if (!display || !rpi_pool) +++ return; +++ MMAL_BUFFER_HEADER_T* buf = mmal_queue_get(rpi_pool->queue); +++ if (!buf) { +++ // Running too fast so drop the frame +++ return; +++ } +++ assert(buf); +++ buf->cmd = 0; +++ buf->length = (w2 * h2 * 3)/2; +++ buf->offset = 0; // Offset to valid data +++ buf->flags = 0; +++#ifdef RPI_ZERO_COPY +++ buf->data = get_vc_handle(fr->buf[0]); +++ buf->alloc_size = (w2*h2*3)/2; +++#else +++ //mmal_buffer_header_mem_lock(buf); +++ memcpy(buf->data, fr->data[0], w2 * h); +++ memcpy(buf->data+w2*h2, fr->data[1], w2 * h / 4); +++ memcpy(buf->data+w2*h2*5/4, fr->data[2], w2 * h / 4); +++ //mmal_buffer_header_mem_unlock(buf); +++#endif +++ +++ mmal_port_send_buffer(display->input[0], buf); // I assume this will automatically get released +++} +++ +++static void display_exit(MMAL_COMPONENT_T* display) +++{ +++ if (display) { +++ mmal_component_destroy(display); +++ } +++ if (rpi_pool) { +++ mmal_port_pool_destroy(display->input[0], rpi_pool); +++ } +++} +++ +++#endif +++ +++ ++ /* sub2video hack: ++ Convert subtitles to video with alpha to insert them in filter graphs. ++ This is a temporary solution until libavfilter gets real subtitles support. ++@@ -581,6 +728,10 @@ static void ffmpeg_cleanup(int ret) ++ } ++ term_exit(); ++ ffmpeg_exited = 1; +++ +++#ifdef RPI_DISPLAY +++ display_exit(rpi_display); +++#endif ++ } ++ ++ void remove_avoptions(AVDictionary **a, AVDictionary *b) ++@@ -940,6 +1091,14 @@ static void do_video_out(AVFormatContext *s, ++ int frame_size = 0; ++ InputStream *ist = NULL; ++ AVFilterContext *filter = ost->filter->filter; +++#ifdef RPI_DISPLAY +++ if (next_picture) +++ { +++ if (!rpi_display) +++ rpi_display = display_init(0,0,next_picture->width,next_picture->height); +++ display_frame(rpi_display,next_picture); +++ } +++#endif ++ ++ if (ost->source_index >= 0) ++ ist = input_streams[ost->source_index]; ++-- ++2.7.4 ++ ++ ++From b90a5aff7bf9112ebd2a07949c8d79a49fcafe48 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 29 Apr 2015 16:49:43 +0100 ++Subject: [PATCH 02/68] Split transform and intra prediction into commands ++ ++--- ++ libavcodec/hevc.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++- ++ libavcodec/hevc.h | 58 +++++++++++++++++++++++ ++ libavcodec/hevc_cabac.c | 15 ++++++ ++ 3 files changed, 191 insertions(+), 1 deletion(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index b478065..aa45dd6 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -931,6 +931,25 @@ static int hls_cross_component_pred(HEVCContext *s, int idx) { ++ return 0; ++ } ++ +++#ifdef RPI +++static void rpi_intra_pred(HEVCContext *s, int log2_trafo_size, int x0, int y0, int c_idx) +++{ +++ if (s->enable_rpi) { +++ HEVCLocalContext *lc = s->HEVClc; +++ HEVCPredCmd *cmd = s->univ_pred_cmds + s->num_pred_cmds++; +++ cmd->type = RPI_PRED_INTRA; +++ cmd->size = log2_trafo_size; +++ cmd->c_idx = c_idx; +++ cmd->x = x0; +++ cmd->y = y0; +++ cmd->na = (lc->na.cand_bottom_left<<4) + (lc->na.cand_left<<3) + (lc->na.cand_up_left<<2) + (lc->na.cand_up<<1) + lc->na.cand_up_right; +++ cmd->mode = c_idx ? lc->tu.intra_pred_mode_c : lc->tu.intra_pred_mode; +++ } else { +++ s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, c_idx); +++ } +++} +++#endif +++ ++ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ int xBase, int yBase, int cb_xBase, int cb_yBase, ++ int log2_cb_size, int log2_trafo_size, ++@@ -943,8 +962,11 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ if (lc->cu.pred_mode == MODE_INTRA) { ++ int trafo_size = 1 << log2_trafo_size; ++ ff_hevc_set_neighbour_available(s, x0, y0, trafo_size, trafo_size); ++- +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size, x0, y0, 0); +++#else ++ s->hpc.intra_pred[log2_trafo_size - 2](s, x0, y0, 0); +++#endif ++ } ++ ++ if (cbf_luma || cbf_cb[0] || cbf_cr[0] || ++@@ -1030,7 +1052,11 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) { ++ if (lc->cu.pred_mode == MODE_INTRA) { ++ ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v); +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size_c, x0, y0 + (i << log2_trafo_size_c), 1); +++#else ++ s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 1); +++#endif ++ } ++ if (cbf_cb[i]) ++ ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c), ++@@ -1059,7 +1085,11 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) { ++ if (lc->cu.pred_mode == MODE_INTRA) { ++ ff_hevc_set_neighbour_available(s, x0, y0 + (i << log2_trafo_size_c), trafo_size_h, trafo_size_v); +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size_c, x0, y0 + (i << log2_trafo_size_c), 2); +++#else ++ s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (i << log2_trafo_size_c), 2); +++#endif ++ } ++ if (cbf_cr[i]) ++ ff_hevc_hls_residual_coding(s, x0, y0 + (i << log2_trafo_size_c), ++@@ -1088,7 +1118,11 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ if (lc->cu.pred_mode == MODE_INTRA) { ++ ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size), ++ trafo_size_h, trafo_size_v); +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size, xBase, yBase + (i << log2_trafo_size), 1); +++#else ++ s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 1); +++#endif ++ } ++ if (cbf_cb[i]) ++ ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size), ++@@ -1098,7 +1132,11 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ if (lc->cu.pred_mode == MODE_INTRA) { ++ ff_hevc_set_neighbour_available(s, xBase, yBase + (i << log2_trafo_size), ++ trafo_size_h, trafo_size_v); +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size, xBase, yBase + (i << log2_trafo_size), 2); +++#else ++ s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (i << log2_trafo_size), 2); +++#endif ++ } ++ if (cbf_cr[i]) ++ ff_hevc_hls_residual_coding(s, xBase, yBase + (i << log2_trafo_size), ++@@ -1110,26 +1148,46 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ int trafo_size_h = 1 << (log2_trafo_size_c + s->ps.sps->hshift[1]); ++ int trafo_size_v = 1 << (log2_trafo_size_c + s->ps.sps->vshift[1]); ++ ff_hevc_set_neighbour_available(s, x0, y0, trafo_size_h, trafo_size_v); +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size_c, x0, y0, 1); +++ rpi_intra_pred(s, log2_trafo_size_c, x0, y0, 2); +++#else ++ s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 1); ++ s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0, 2); +++#endif ++ if (s->ps.sps->chroma_format_idc == 2) { ++ ff_hevc_set_neighbour_available(s, x0, y0 + (1 << log2_trafo_size_c), ++ trafo_size_h, trafo_size_v); +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size_c, x0, y0 + (1 << log2_trafo_size_c), 1); +++ rpi_intra_pred(s, log2_trafo_size_c, x0, y0 + (1 << log2_trafo_size_c), 2); +++#else ++ s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 1); ++ s->hpc.intra_pred[log2_trafo_size_c - 2](s, x0, y0 + (1 << log2_trafo_size_c), 2); +++#endif ++ } ++ } else if (blk_idx == 3) { ++ int trafo_size_h = 1 << (log2_trafo_size + 1); ++ int trafo_size_v = 1 << (log2_trafo_size + s->ps.sps->vshift[1]); ++ ff_hevc_set_neighbour_available(s, xBase, yBase, ++ trafo_size_h, trafo_size_v); +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size, xBase, yBase, 1); +++ rpi_intra_pred(s, log2_trafo_size, xBase, yBase, 2); +++#else ++ s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 1); ++ s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase, 2); +++#endif ++ if (s->ps.sps->chroma_format_idc == 2) { ++ ff_hevc_set_neighbour_available(s, xBase, yBase + (1 << (log2_trafo_size)), ++ trafo_size_h, trafo_size_v); +++#ifdef RPI +++ rpi_intra_pred(s, log2_trafo_size, xBase, yBase + (1 << (log2_trafo_size)), 1); +++ rpi_intra_pred(s, log2_trafo_size, xBase, yBase + (1 << (log2_trafo_size)), 2); +++#else ++ s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 1); ++ s->hpc.intra_pred[log2_trafo_size - 2](s, xBase, yBase + (1 << (log2_trafo_size)), 2); +++#endif ++ } ++ } ++ } ++@@ -2304,6 +2362,31 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb, ++ lc->ctb_up_left_flag = ((x_ctb > 0) && (y_ctb > 0) && (ctb_addr_in_slice-1 >= s->ps.sps->ctb_width) && (s->ps.pps->tile_id[ctb_addr_ts] == s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1 - s->ps.sps->ctb_width]])); ++ } ++ +++#ifdef RPI +++static void rpi_execute_pred_cmds(HEVCContext *s) +++{ +++ int i; +++ HEVCPredCmd *cmd = s->univ_pred_cmds; +++ HEVCLocalContext *lc = s->HEVClc; +++ +++ for(i = s->num_pred_cmds; i > 0; i--, cmd++) { +++ if (cmd->type == RPI_PRED_INTRA) { +++ lc->tu.intra_pred_mode_c = lc->tu.intra_pred_mode = cmd->mode; +++ lc->na.cand_bottom_left = (cmd->na >> 4) & 1; +++ lc->na.cand_left = (cmd->na >> 3) & 1; +++ lc->na.cand_up_left = (cmd->na >> 2) & 1; +++ lc->na.cand_up = (cmd->na >> 1) & 1; +++ lc->na.cand_up_right = (cmd->na >> 0) & 1; +++ s->hpc.intra_pred[cmd->size - 2](s, cmd->x, cmd->y, cmd->c_idx); +++ } else { +++ s->hevcdsp.transform_add[cmd->size-2](cmd->dst, cmd->buf, cmd->stride); +++ } +++ } +++ s->num_pred_cmds = 0; +++ s->num_coeffs = 0; +++} +++#endif +++ ++ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ { ++ HEVCContext *s = avctxt->priv_data; ++@@ -2313,6 +2396,10 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ int y_ctb = 0; ++ int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs]; ++ +++#ifdef RPI +++ s->enable_rpi = 1; // TODO this should depend on cross component and frame width etc. +++#endif +++ ++ if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) { ++ av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n"); ++ return AVERROR_INVALIDDATA; ++@@ -2342,6 +2429,9 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag; ++ ++ more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); +++#ifdef RPI +++ rpi_execute_pred_cmds(s); +++#endif ++ if (more_data < 0) { ++ s->tab_slice_address[ctb_addr_rs] = -1; ++ return more_data; ++@@ -2387,6 +2477,10 @@ static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int ++ s = s1->sList[self_id]; ++ lc = s->HEVClc; ++ +++#ifdef RPI +++ s->enable_rpi = 0; +++#endif +++ ++ if(ctb_row) { ++ ret = init_get_bits8(&lc->gb, s->data + s->sh.offset[ctb_row - 1], s->sh.size[ctb_row - 1]); ++ ++@@ -3075,6 +3169,13 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ ++ av_freep(&s->cabac_state); ++ +++#ifdef RPI +++ av_freep(&s->unif_mv_cmds); +++ av_freep(&s->unif_xfm_cmds); +++ av_freep(&s->univ_pred_cmds); +++ av_freep(&s->coeffs_buf); +++#endif +++ ++ for (i = 0; i < 3; i++) { ++ av_freep(&s->sao_pixel_buffer_h[i]); ++ av_freep(&s->sao_pixel_buffer_v[i]); ++@@ -3129,6 +3230,22 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ s->HEVClcList[0] = s->HEVClc; ++ s->sList[0] = s; ++ +++#ifdef RPI +++ s->unif_mv_cmds = av_mallocz(sizeof(HEVCMvCmd)*RPI_MAX_MV_CMDS); +++ if (!s->unif_mv_cmds) +++ goto fail; +++ s->unif_xfm_cmds = av_mallocz(sizeof(HEVCXfmCmd)*RPI_MAX_XFM_CMDS); +++ if (!s->unif_xfm_cmds) +++ goto fail; +++ s->univ_pred_cmds = av_mallocz(sizeof(HEVCPredCmd)*RPI_MAX_PRED_CMDS); +++ if (!s->univ_pred_cmds) +++ goto fail; +++ s->coeffs_buf = av_mallocz(sizeof(int16_t)*RPI_MAX_XFM_CMDS*16); +++ if (!s->coeffs_buf) +++ goto fail; +++ s->enable_rpi = 0; +++#endif +++ ++ s->cabac_state = av_malloc(HEVC_CONTEXTS); ++ if (!s->cabac_state) ++ goto fail; ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index be91010..7a1c35f 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -23,6 +23,9 @@ ++ #ifndef AVCODEC_HEVC_H ++ #define AVCODEC_HEVC_H ++ +++// define RPI to split the CABAC/prediction/transform into separate stages +++#include "config.h" +++ ++ #include "libavutil/buffer.h" ++ #include "libavutil/md5.h" ++ ++@@ -790,6 +793,49 @@ typedef struct HEVCLocalContext { ++ int boundary_flags; ++ } HEVCLocalContext; ++ +++#ifdef RPI +++ +++// RPI_MAX_WIDTH is maximum width in pixels supported by the accelerated code +++#define RPI_MAX_WIDTH 2048 +++ +++// Worst case is for 4:4:4 4x4 blocks with 64 high coding tree blocks, so 16 MV cmds per 4 pixels across for each colour plane +++#define RPI_MAX_MV_CMDS (16*3*(RPI_MAX_WIDTH/4)) +++#define RPI_MAX_XFM_CMDS (16*3*(RPI_MAX_WIDTH/4)) +++// Each block can have an intra prediction and a transform_add command +++#define RPI_MAX_PRED_CMDS (2*16*3*(RPI_MAX_WIDTH/4)) +++ +++// Command for inter prediction +++typedef struct HEVCMvCmd { +++} HEVCMvCmd; +++ +++// Command for transform to process a block of coefficients +++typedef struct HEVCXfmCmd { +++} HEVCXfmCmd; +++ +++// Command for intra prediction and transform_add of predictions to coefficients +++#define RPI_PRED_TRANSFORM_ADD 0 +++#define RPI_PRED_INTRA 1 +++typedef struct HEVCPredCmd { +++ uint8_t size; +++ uint8_t type; +++ uint8_t na; +++ uint8_t c_idx; +++ union { +++ uint8_t *dst; // RPI_PRED_TRANSFORM_ADD +++ uint32_t x; // RPI_PRED_INTRA +++ }; +++ union { +++ int16_t *buf; // RPI_PRED_TRANSFORM_ADD +++ uint32_t y; // RPI_PRED_INTRA +++ }; +++ union { +++ enum IntraPredMode mode; // RPI_PRED_TRANSFORM_ADD +++ uint32_t stride; // RPI_PRED_INTRA +++ }; +++} HEVCPredCmd; +++ +++#endif +++ ++ typedef struct HEVCContext { ++ const AVClass *c; // needed by private avoptions ++ AVCodecContext *avctx; ++@@ -805,6 +851,18 @@ typedef struct HEVCContext { ++ int width; ++ int height; ++ +++#ifdef RPI +++ int enable_rpi; +++ HEVCMvCmd *unif_mv_cmds; +++ HEVCXfmCmd *unif_xfm_cmds; +++ HEVCPredCmd *univ_pred_cmds; +++ int16_t *coeffs_buf; +++ int num_mv_cmds; +++ int num_xfm_cmds; +++ int num_pred_cmds; +++ int num_coeffs; +++#endif +++ ++ uint8_t *cabac_state; ++ ++ /** 1 if the independent slice segment header was successfully parsed */ ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index 05b2821..4e97f06 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1510,6 +1510,21 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ coeffs[i] = coeffs[i] + ((lc->tu.res_scale_val * coeffs_y[i]) >> 3); ++ } ++ } +++#ifdef RPI +++ if (s->enable_rpi) { +++ int16_t *c = s->coeffs_buf + s->num_coeffs; +++ int n = trafo_size * trafo_size; +++ HEVCPredCmd *cmd = s->univ_pred_cmds + s->num_pred_cmds++; +++ memcpy(c, coeffs, n * sizeof(int16_t)); // TODO change pointer earlier and we can avoid this copy +++ s->num_coeffs += n; +++ cmd->type = RPI_PRED_TRANSFORM_ADD; +++ cmd->size = log2_trafo_size; +++ cmd->buf = c; +++ cmd->dst = dst; +++ cmd->stride = stride; +++ return; +++ } +++#endif ++ s->hevcdsp.transform_add[log2_trafo_size-2](dst, coeffs, stride); ++ } ++ ++-- ++2.7.4 ++ ++ ++From f8293de11dc040d9fa2a558762a357c0c353d2c9 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 30 Apr 2015 15:23:22 +0100 ++Subject: [PATCH 03/68] Added simple VPU test code ++ ++--- ++ libavcodec/Makefile | 7 + ++ libavcodec/hevc.c | 33 +- ++ libavcodec/rpi_hevc_transform.h | 212 ++++++ ++ libavcodec/rpi_hevc_transform.s | 147 ++++ ++ libavcodec/rpi_mailbox.c | 293 ++++++++ ++ libavcodec/rpi_mailbox.h | 20 + ++ libavcodec/rpi_qpu.c | 652 ++++++++++++++++++ ++ libavcodec/rpi_qpu.h | 45 ++ ++ libavcodec/rpi_shader.c | 818 ++++++++++++++++++++++ ++ libavcodec/rpi_shader.h | 20 + ++ libavcodec/rpi_shader.qasm | 1413 +++++++++++++++++++++++++++++++++++++++ ++ libavcodec/rpi_user_vcsm.h | 425 ++++++++++++ ++ 12 files changed, 4084 insertions(+), 1 deletion(-) ++ create mode 100644 libavcodec/rpi_hevc_transform.h ++ create mode 100644 libavcodec/rpi_hevc_transform.s ++ create mode 100644 libavcodec/rpi_mailbox.c ++ create mode 100644 libavcodec/rpi_mailbox.h ++ create mode 100644 libavcodec/rpi_qpu.c ++ create mode 100644 libavcodec/rpi_qpu.h ++ create mode 100644 libavcodec/rpi_shader.c ++ create mode 100644 libavcodec/rpi_shader.h ++ create mode 100644 libavcodec/rpi_shader.qasm ++ create mode 100644 libavcodec/rpi_user_vcsm.h ++ ++diff --git a/libavcodec/Makefile b/libavcodec/Makefile ++index fd0d1f0..03065cd 100644 ++--- a/libavcodec/Makefile +++++ b/libavcodec/Makefile ++@@ -5,6 +5,10 @@ NAME = avcodec ++ HEADERS = avcodec.h \ ++ avdct.h \ ++ avfft.h \ +++ rpi_qpu.h \ +++ rpi_shader.h \ +++ rpi_mailbox.h \ +++ rpi_hevc_transform.h \ ++ d3d11va.h \ ++ dirac.h \ ++ dv_profile.h \ ++@@ -43,6 +47,9 @@ OBJS = allcodecs.o \ ++ resample.o \ ++ resample2.o \ ++ utils.o \ +++ rpi_qpu.o \ +++ rpi_shader.o \ +++ rpi_mailbox.o \ ++ vorbis_parser.o \ ++ xiph.o \ ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index aa45dd6..ab55df1 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -41,6 +41,10 @@ ++ #include "hevc.h" ++ #include "profiles.h" ++ +++#ifdef RPI +++#include "rpi_qpu.h" +++#endif +++ ++ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 }; ++ ++ /** ++@@ -2430,7 +2434,9 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ ++ more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); ++ #ifdef RPI ++- rpi_execute_pred_cmds(s); +++ if (x_ctb + ctb_size >= s->ps.sps->width) { +++ rpi_execute_pred_cmds(s); +++ } ++ #endif ++ if (more_data < 0) { ++ s->tab_slice_address[ctb_addr_rs] = -1; ++@@ -3244,6 +3250,31 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ if (!s->coeffs_buf) ++ goto fail; ++ s->enable_rpi = 0; +++ +++ // A little test program +++ { +++ GPU_MEM_PTR_T p; +++ int err = gpu_malloc_cached(16, &p); +++ short *q = (short *)p.arm; +++ int i; +++ int r; +++ printf("Allocated memory %d ARM 0x%x, VC 0x%x, Code 0x%x\n",err,(int)p.arm,p.vc,(int)vpu_get_fn()); +++ printf("Allocated memory %d ARM 0x%x, VC 0x%x\n",err,(int)p.arm,p.vc); +++ printf("Preparing data %p\n",q); +++ for(i=0;i<16;i++) +++ q[i] = i; +++ printf("Flush cache\n"); +++ gpu_cache_flush(&p); +++ printf("Executing code\n"); +++ r = vpu_execute_code( vpu_get_fn(), p.vc, 0, 0, 0, 0, 0); +++ printf("Return value %d (",r); +++ for(i=0;i<16;i++) +++ printf("%d ",q[i]); +++ printf(")\n"); +++ gpu_free(&p); +++ goto fail; // Early out +++ } +++ ++ #endif ++ ++ s->cabac_state = av_malloc(HEVC_CONTEXTS); ++diff --git a/libavcodec/rpi_hevc_transform.h b/libavcodec/rpi_hevc_transform.h ++new file mode 100644 ++index 0000000..85a9102 ++--- /dev/null +++++ b/libavcodec/rpi_hevc_transform.h ++@@ -0,0 +1,212 @@ +++unsigned char rpi_hevc_transform [] = { +++169, +++3, +++3, +++232, +++128, +++0, +++0, +++0, +++20, +++248, +++0, +++136, +++0, +++0, +++192, +++248, +++0, +++0, +++0, +++96, +++3, +++232, +++32, +++0, +++0, +++0, +++7, +++232, +++0, +++2, +++0, +++0, +++8, +++232, +++0, +++4, +++0, +++0, +++12, +++248, +++0, +++128, +++0, +++0, +++192, +++8, +++4, +++0, +++4, +++232, +++64, +++0, +++0, +++0, +++5, +++232, +++0, +++0, +++8, +++0, +++128, +++69, +++113, +++66, +++12, +++248, +++0, +++128, +++0, +++0, +++192, +++8, +++4, +++0, +++128, +++69, +++113, +++70, +++128, +++144, +++39, +++0, +++4, +++255, +++48, +++192, +++128, +++3, +++32, +++8, +++16, +++0, +++76, +++254, +++48, +++192, +++9, +++4, +++32, +++8, +++0, +++0, +++4, +++254, +++0, +++144, +++128, +++2, +++0, +++248, +++62, +++0, +++128, +++144, +++22, +++0, +++4, +++255, +++48, +++192, +++128, +++3, +++32, +++8, +++16, +++0, +++76, +++254, +++48, +++192, +++9, +++4, +++32, +++8, +++0, +++0, +++140, +++248, +++44, +++0, +++0, +++0, +++32, +++48, +++4, +++0, +++128, +++69, +++113, +++66, +++242, +++140, +++211, +++192, +++41, +++3, +++68, +++192, +++80, +++7, +++164, +++255, +++36, +++220, +++96, +++2, +++0, +++248, +++62, +++0, +++3, +++255, +++55, +++208, +++120, +++3, +++224, +++3, +++190, +++11, +++16, +++139, +++246, +++83, +++0, +++103, +++90, +++0, +++8, +++240, +++0, +++128, +++128, +++3, +++0, +++247, +++32, +++128, +++10, +++4, +++136, +++240, +++32, +++0, +++128, +++3, +++112, +++96, +++90, +++0, +++}; ++diff --git a/libavcodec/rpi_hevc_transform.s b/libavcodec/rpi_hevc_transform.s ++new file mode 100644 ++index 0000000..5e2728d ++--- /dev/null +++++ b/libavcodec/rpi_hevc_transform.s ++@@ -0,0 +1,147 @@ +++# ****************************************************************************** +++# Argon Design Ltd. +++# (c) Copyright 2015 Argon Design Ltd. All rights reserved. +++# +++# Module : HEVC +++# Author : Peter de Rivaz +++# ****************************************************************************** +++ +++# HEVC VPU Transform +++# +++# Transform matrix can be thought of as +++# output row vector = input row vector * transMatrix2 +++# +++# The even rows of the matrix are symmetric +++# The odd rows of the matrix are antisymmetric +++# +++# So only need to compute the first half of the results, then can compute the remainder with a butterfly +++# +++# EXAMPLE +++# (a b c d) (1 2 2 1) +++# (3 4 -4 -3) +++# (5 6 6 5) +++# (7 8 -8 -7) +++# +++# x=(a c)(1 2) = 1a+5c 2a+6c +++# (5 6) +++# +++# y=(b d)(3 4) = 3b+7d 4b+8d +++# (7 8) +++# +++# u=x+y = 1a+5c+3b+7d 2a+4b+6c+8d +++# v=x-y = 1a+5c-3b-7d 2a+6c-4b-8d +++# +++# Final results are (u , v[::-1]) +++# +++# +++# For 32x1 input, load even rows into HX(0++,0), odd rows into HX(16++,0) +++# Apply the even matrix first and stop before rounding +++# Then apply the odd matrix in a full manner: +++# +++# First step is to compute partial products with the first input (16 cycles) +++# 1a 3b 5c 7d 16x1 input coefficients produce 16x16 output +++# 2a 4b 6c 8d +++# 2a -4b 6c -8d +++# 1a -3b 5c -7d +++# +++# Second step is to sum partial products into final position (8 cycles) +++# 1a+3b+5c+7d +++# 2a+4b+6c+8d +++# 2a-4b+6c-8d +++# 1a-3b+5c-7d +++# +++# Then can apply butterfly to combine even results and odd results + rounding to produce 16 rows of output at a time (need to save in transposed format) +++# +++# For 16x16 no butterfly is required and can store final results in original location (Could do 2 16x16s in parallel to make use of the trick - saves on the adds) +++# +++# For 8x8 we could compute two in parallel. +++# +++# +++ +++test_add: +++ vldh HX(0,0),(r0) +++ vadd HX(0,0),HX(0,0),10 +++ vsth HX(0,0),(r0) +++ mov r0,7 # return value +++ b lr +++ +++# Columns are transformed first +++# +++# Store top left half of transMatrix2 in +++# Store bottom left half of transMatrix2 in HX(32,32) +++# +++# For 16x16 +++# HX(0:15,0) contains input data before transform +++# HY(0:15,0) contains 32bit output data after transform +++# HX(32,0) contains even rows of left half of transMatrix2 +++# HX(32,32) contains odd rows of left half of transMatrix2 +++# HY(48,0) contains partial products ready for summing +++# +++ +++ +++# hevc_trans_16x16(short *transMatrix2, short *coeffs, int num) +++# transMatrix2: address of the constant matrix (must be at 32 byte aligned address in Videocore memory) +++# coeffs: address of the transform coefficients (must be at 32 byte aligned address in Videocore memory) +++# num: number of 16x16 transforms to be done +++# +++hevc_trans_16x16: +++ push r6-r15, lr # TODO cut down number of used registers +++ +++ mov r3, 2*32*2 # Twice Stride of transMatrix2 in bytes +++ vld HX(32++,0),(r0 += r3) REP 16 # This is the 16x16 matrix, a transform is equivalent to multiplying input row vector * matrix +++ # Now use r0 to describe which matrix we are working on. +++ # Allows us to prefetch the next block of coefficients for efficiency. +++ mov r0,0 # This describes the location where we read our coefficients from +++ mov r3,16*2 # Stride of coefficients in bytes +++ mov r7,16*16*2 # Total block size +++ mov r8,64*16 # Value used to swap from current to next VRF location +++ vldh HX(0++,0)+r0,(r1 += r3) REP 16 +++ mov r4,64 # Constant used for rounding first pass +++ mov r5,1<<19 # Constant used for rounding second pass +++ +++ # At start of block r0,r1 point to the current block (that has already been loaded) +++block_loop: +++ eor r0,r8 +++ add r1,r7 +++ # Prefetch the next block +++ vldh HX(0++,0)+r0,(r1 += r3) REP 16 +++ eor r0,r8 +++ sub r1,r7 +++ +++ # Transform the current block +++ bl col_trans_16 +++ vadd HY(0++,0)+r0,HY(0++,0)+r0,r4 REP 16 # Now add on rounding, shift down by 7, and saturate +++ #vsasls HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # 9+7=16 so this ends up with the output saturated and in the top half of the word. +++ vasl HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # This should be saturating, but the instruction above does not assemble? +++ vmov VX(0,0++), HX(0++,32) REP 16 # For simplicity transpose this back to the original position +++ +++ bl col_trans_16 +++ vadd HY(0++,0)+r0,HY(0++,0)+r0,r4 REP 16 # Now add on rounding, shift down by 7, and saturate +++ #vsasls HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # 9+7=16 so this ends up with the output saturated and in the top half of the word. +++ vasl HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # This should be saturating, but the instruction above does not assemble? +++ +++ # Save results - note there has been a transposition during the processing so we save columns +++ vsth VX(0,32++)+r0, (r1 += r3) REP 16 +++ +++ # Move onto next block +++ eor r0,r8 +++ add r1,r7 +++ +++ addcmpbgt r2,-1,0,block_loop +++ pop r6-r15, pc +++ +++# r1,r2,r3 r7,r8 should be preserved +++# HX(0++,0)+r0 is the block to be transformed +++# HX(32++,0) is the 16x16 matrix of transform coefficients +++# Use HY(48,0) for intermediate results +++# r0 can be used, but should be returned to its original value at the end +++col_trans_16: +++ add r4,r0,16 # Final value for this loop +++col_trans_16_loop: +++ # First compute partial products for a single column +++ vmul32s VY(48,0++), VX(0,0)+r0, VX(32,0++) REP 16 +++ # Then sum up the results and place back +++ vadd VY(0,0)+r0, VY(48,0++), VY(48,8++) REP 8 CLRA SACC +++ addcmpblt r0,1,r4,col_trans_16_loop +++ sub r0,16 # but r0 back to its original value +++ b lr ++diff --git a/libavcodec/rpi_mailbox.c b/libavcodec/rpi_mailbox.c ++new file mode 100644 ++index 0000000..536896f ++--- /dev/null +++++ b/libavcodec/rpi_mailbox.c ++@@ -0,0 +1,293 @@ +++/* +++Copyright (c) 2012, Broadcom Europe Ltd. +++All rights reserved. +++ +++Redistribution and use in source and binary forms, with or without +++modification, are permitted provided that the following conditions are met: +++ * Redistributions of source code must retain the above copyright +++ notice, this list of conditions and the following disclaimer. +++ * Redistributions in binary form must reproduce the above copyright +++ notice, this list of conditions and the following disclaimer in the +++ documentation and/or other materials provided with the distribution. +++ * Neither the name of the copyright holder nor the +++ names of its contributors may be used to endorse or promote products +++ derived from this software without specific prior written permission. +++ +++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY +++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +++*/ +++ +++#include <stdio.h> +++#include <string.h> +++#include <stdlib.h> +++#include <fcntl.h> +++#include <unistd.h> +++#include <assert.h> +++#include <stdint.h> +++#include <sys/mman.h> +++#include <sys/ioctl.h> +++ +++#include <linux/ioctl.h> +++ +++#define MAJOR_NUM 100 +++#define IOCTL_MBOX_PROPERTY _IOWR(MAJOR_NUM, 0, char *) +++#define DEVICE_FILE_NAME "/dev/char_dev" +++ +++#include "rpi_mailbox.h" +++ +++#define PAGE_SIZE (4*1024) +++ +++// Shared memory will not be cached in ARM cache +++void *mapmem_shared(unsigned base, unsigned size) +++{ +++ int mem_fd; +++ unsigned offset = base % PAGE_SIZE; +++ base = base - offset; +++ /* open /dev/mem */ +++ if ((mem_fd = open("/dev/mem", O_RDWR|O_SYNC) ) < 0) { +++ printf("can't open /dev/mem\nThis program should be run as root. Try prefixing command with: sudo\n"); +++ return NULL; +++ } +++ void *mem = mmap( +++ 0, +++ size, +++ PROT_READ|PROT_WRITE, +++ MAP_SHARED/*|MAP_FIXED*/, +++ mem_fd, +++ base); +++#ifdef DEBUG +++ printf("base=0x%x, mem=%p\n", base, mem); +++#endif +++ if (mem == MAP_FAILED) { +++ printf("mmap error %d\n", (int)mem); +++ return NULL; +++ } +++ close(mem_fd); +++ return (char *)mem + offset; +++} +++ +++// Unshared memory will be faster as lives in ARM cache, but requires cache flushing +++void *mapmem_private(unsigned base, unsigned size) +++{ +++ int mem_fd; +++ unsigned offset = base % PAGE_SIZE; +++ base = base - offset; +++ /* open /dev/mem */ +++ if ((mem_fd = open("/dev/mem", O_RDWR|O_SYNC) ) < 0) { +++ printf("can't open /dev/mem\nThis program should be run as root. Try prefixing command with: sudo\n"); +++ return NULL; +++ } +++ void *mem = mmap( +++ 0, +++ size, +++ PROT_READ|PROT_WRITE, +++ MAP_PRIVATE/*|MAP_FIXED*/, +++ mem_fd, +++ base); +++#ifdef DEBUG +++ printf("base=0x%x, mem=%p\n", base, mem); +++#endif +++ if (mem == MAP_FAILED) { +++ printf("mmap error %d\n", (int)mem); +++ return NULL; +++ } +++ close(mem_fd); +++ return (char *)mem + offset; +++} +++ +++void unmapmem(void *addr, unsigned size) +++{ +++ int s = munmap(addr, size); +++ if (s != 0) { +++ printf("munmap error %d\n", s); +++ exit (-1); +++ } +++} +++ +++/* +++ * use ioctl to send mbox property message +++ */ +++ +++static int mbox_property(int file_desc, void *buf) +++{ +++ int ret_val = ioctl(file_desc, IOCTL_MBOX_PROPERTY, buf); +++ +++ if (ret_val < 0) { +++ printf("ioctl_set_msg failed:%d\n", ret_val); +++ } +++ +++#ifdef DEBUG +++ unsigned *p = buf; int i; unsigned size = *(unsigned *)buf; +++ for (i=0; i<size/4; i++) +++ printf("%04x: 0x%08x\n", i*sizeof *p, p[i]); +++#endif +++ return ret_val; +++} +++ +++unsigned mem_alloc(int file_desc, unsigned size, unsigned align, unsigned flags) +++{ +++ int i=0; +++ unsigned p[32]; +++ p[i++] = 0; // size +++ p[i++] = 0x00000000; // process request +++ +++ p[i++] = 0x3000c; // (the tag id) +++ p[i++] = 12; // (size of the buffer) +++ p[i++] = 12; // (size of the data) +++ p[i++] = size; // (num bytes? or pages?) +++ p[i++] = align; // (alignment) +++ p[i++] = flags; // (MEM_FLAG_L1_NONALLOCATING) +++ +++ p[i++] = 0x00000000; // end tag +++ p[0] = i*sizeof *p; // actual size +++ +++ mbox_property(file_desc, p); +++ return p[5]; +++} +++ +++unsigned mem_free(int file_desc, unsigned handle) +++{ +++ int i=0; +++ unsigned p[32]; +++ p[i++] = 0; // size +++ p[i++] = 0x00000000; // process request +++ +++ p[i++] = 0x3000f; // (the tag id) +++ p[i++] = 4; // (size of the buffer) +++ p[i++] = 4; // (size of the data) +++ p[i++] = handle; +++ +++ p[i++] = 0x00000000; // end tag +++ p[0] = i*sizeof *p; // actual size +++ +++ mbox_property(file_desc, p); +++ return p[5]; +++} +++ +++unsigned mem_lock(int file_desc, unsigned handle) +++{ +++ int i=0; +++ unsigned p[32]; +++ p[i++] = 0; // size +++ p[i++] = 0x00000000; // process request +++ +++ p[i++] = 0x3000d; // (the tag id) +++ p[i++] = 4; // (size of the buffer) +++ p[i++] = 4; // (size of the data) +++ p[i++] = handle; +++ +++ p[i++] = 0x00000000; // end tag +++ p[0] = i*sizeof *p; // actual size +++ +++ mbox_property(file_desc, p); +++ return p[5]; +++} +++ +++unsigned mem_unlock(int file_desc, unsigned handle) +++{ +++ int i=0; +++ unsigned p[32]; +++ p[i++] = 0; // size +++ p[i++] = 0x00000000; // process request +++ +++ p[i++] = 0x3000e; // (the tag id) +++ p[i++] = 4; // (size of the buffer) +++ p[i++] = 4; // (size of the data) +++ p[i++] = handle; +++ +++ p[i++] = 0x00000000; // end tag +++ p[0] = i*sizeof *p; // actual size +++ +++ mbox_property(file_desc, p); +++ return p[5]; +++} +++ +++unsigned execute_code(int file_desc, unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5) +++{ +++ int i=0; +++ unsigned p[32]; +++ p[i++] = 0; // size +++ p[i++] = 0x00000000; // process request +++ +++ p[i++] = 0x30010; // (the tag id) +++ p[i++] = 28; // (size of the buffer) +++ p[i++] = 28; // (size of the data) +++ p[i++] = code; +++ p[i++] = r0; +++ p[i++] = r1; +++ p[i++] = r2; +++ p[i++] = r3; +++ p[i++] = r4; +++ p[i++] = r5; +++ +++ p[i++] = 0x00000000; // end tag +++ p[0] = i*sizeof *p; // actual size +++ +++ mbox_property(file_desc, p); +++ return p[5]; +++} +++ +++unsigned qpu_enable(int file_desc, unsigned enable) +++{ +++ int i=0; +++ unsigned p[32]; +++ +++ p[i++] = 0; // size +++ p[i++] = 0x00000000; // process request +++ +++ p[i++] = 0x30012; // (the tag id) +++ p[i++] = 4; // (size of the buffer) +++ p[i++] = 4; // (size of the data) +++ p[i++] = enable; +++ +++ p[i++] = 0x00000000; // end tag +++ p[0] = i*sizeof *p; // actual size +++ +++ mbox_property(file_desc, p); +++ return p[5]; +++} +++ +++unsigned execute_qpu(int file_desc, unsigned num_qpus, unsigned control, unsigned noflush, unsigned timeout) { +++ int i=0; +++ unsigned p[32]; +++ +++ p[i++] = 0; // size +++ p[i++] = 0x00000000; // process request +++ p[i++] = 0x30011; // (the tag id) +++ p[i++] = 16; // (size of the buffer) +++ p[i++] = 16; // (size of the data) +++ p[i++] = num_qpus; +++ p[i++] = control; +++ p[i++] = noflush; +++ p[i++] = timeout; // ms +++ +++ p[i++] = 0x00000000; // end tag +++ p[0] = i*sizeof *p; // actual size +++ +++ mbox_property(file_desc, p); +++ return p[5]; +++} +++ +++int mbox_open() { +++ int file_desc; +++ +++ // open a char device file used for communicating with kernel mbox driver +++ file_desc = open(DEVICE_FILE_NAME, 0); +++ if (file_desc < 0) { +++ printf("Can't open device file: %s\n", DEVICE_FILE_NAME); +++ printf("Try creating a device file with: sudo mknod %s c %d 0\n", DEVICE_FILE_NAME, MAJOR_NUM); +++ } +++ return file_desc; +++} +++ +++void mbox_close(int file_desc) { +++ close(file_desc); +++} ++diff --git a/libavcodec/rpi_mailbox.h b/libavcodec/rpi_mailbox.h ++new file mode 100644 ++index 0000000..c264d2e ++--- /dev/null +++++ b/libavcodec/rpi_mailbox.h ++@@ -0,0 +1,20 @@ +++#ifndef RPI_MAILBOX_H +++#define RPI_MAILBOX_H +++ +++extern int mbox_open(void); +++extern void mbox_close(int file_desc); +++ +++extern unsigned get_version(int file_desc); +++extern unsigned mem_alloc(int file_desc, unsigned size, unsigned align, unsigned flags); +++extern unsigned mem_free(int file_desc, unsigned handle); +++extern unsigned mem_lock(int file_desc, unsigned handle); +++extern unsigned mem_unlock(int file_desc, unsigned handle); +++extern void *mapmem_shared(unsigned base, unsigned size); +++extern void *mapmem_private(unsigned base, unsigned size); +++extern void unmapmem(void *addr, unsigned size); +++ +++extern unsigned execute_code(int file_desc, unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5); +++extern unsigned execute_qpu(int file_desc, unsigned num_qpus, unsigned control, unsigned noflush, unsigned timeout); +++extern unsigned qpu_enable(int file_desc, unsigned enable); +++ +++#endif ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++new file mode 100644 ++index 0000000..b1f50ee ++--- /dev/null +++++ b/libavcodec/rpi_qpu.c ++@@ -0,0 +1,652 @@ +++#ifdef RPI +++// Use the vcsm device for shared memory +++// This works better than the mmap in that the memory can be cached, but requires a kernel modification to enable the device. +++#define RPI_USE_VCSM +++#define RPI_TIME_TOTAL_QPU +++ +++#include <stdio.h> +++#include <stdlib.h> +++#include <string.h> +++#include <stddef.h> +++#include <assert.h> +++ +++#include "config.h" +++ +++#include <pthread.h> +++#include <time.h> +++ +++#include "rpi_mailbox.h" +++#include "rpi_qpu.h" +++#include "rpi_shader.h" +++#include "rpi_hevc_transform.h" +++ +++#ifdef RPI_USE_VCSM +++#include "rpi_user_vcsm.h" +++#endif +++ +++// On Pi2 there is no way to access the VPU L2 cache +++// GPU_MEM_FLG should be 4 for uncached memory. +++// However, if using VCSM allocated buffers, need to use C at the moment because VCSM does not allocate uncached memory correctly +++// The QPU crashes if we mix L2 cached and L2 uncached accesses due to a HW bug. +++#define GPU_MEM_FLG 0xC +++#define GPU_MEM_MAP 0x0 +++ +++#define vcos_verify(x) ((x)>=0) +++ +++typedef unsigned char uint8_t; +++typedef signed char int8_t; +++typedef unsigned short uint16_t; +++typedef unsigned int uint32_t; +++typedef int int32_t; +++ +++/*static const unsigned code[] = +++{ +++ #include "rpi_shader.hex" +++};*/ +++ +++// Size in 32bit words +++#define QPU_CODE_SIZE 2048 +++#define VPU_CODE_SIZE 2048 +++ +++struct GPU +++{ +++ unsigned int qpu_code[QPU_CODE_SIZE]; +++ unsigned int vpu_code[VPU_CODE_SIZE]; +++ int open_count; // Number of allocated video buffers +++ unsigned int vc_handle; // Handle of this memory +++ int mb; // Mailbox handle +++ int vc; // Address in GPU memory +++ int mail[12]; // These are used to pass pairs of code/unifs to the QPUs +++}; +++ +++// Stop more than one thread trying to allocate memory or use the processing resources at once +++static pthread_mutex_t gpu_mutex = PTHREAD_MUTEX_INITIALIZER; +++static volatile struct GPU* gpu = NULL; +++ +++#ifdef RPI_TIME_TOTAL_QPU +++static unsigned int Microseconds(void) { +++ struct timespec ts; +++ unsigned int x; +++ static unsigned int base = 0; +++ clock_gettime(CLOCK_REALTIME, &ts); +++ x = ts.tv_sec*1000000 + ts.tv_nsec/1000; +++ if (base==0) base=x; +++ return x-base; +++} +++#endif +++ +++// Connect to QPU, returns 0 on success. +++static int gpu_init(volatile struct GPU **gpu) { +++ int mb = mbox_open(); +++ int vc; +++ int handle; +++ volatile struct GPU* ptr; +++ if (mb < 0) +++ return -1; +++ +++ if (qpu_enable(mb, 1)) return -2; +++ +++#ifdef RPI_USE_VCSM +++ vcsm_init(); +++#endif +++ +++ handle = mem_alloc(mb, sizeof(struct GPU), 4096, GPU_MEM_FLG); +++ if (!handle) +++ { +++ qpu_enable(mb, 0); +++ return -3; +++ } +++ vc = mem_lock(mb, handle); +++ ptr = mapmem_shared((vc+GPU_MEM_MAP)&~0xc0000000, sizeof(struct GPU)); +++ if (ptr == NULL) +++ { mem_free(mb, handle); +++ mem_unlock(mb, handle); +++ qpu_enable(mb, 0); +++ return -4; +++ } +++ +++ ptr->mb = mb; +++ ptr->vc_handle = handle; +++ ptr->vc = vc; +++ +++ *gpu = ptr; +++ +++ // Now copy over the QPU code into GPU memory +++ { +++ int num_bytes = qpu_get_fn(QPU_MC_END) - qpu_get_fn(QPU_MC_SETUP); +++ assert(num_bytes<=QPU_CODE_SIZE*sizeof(unsigned int)); +++ memcpy((void*)ptr->qpu_code, rpi_shader, num_bytes); +++ } +++ // And the VPU code +++ { +++ int num_bytes = sizeof(rpi_hevc_transform); +++ assert(num_bytes<=VPU_CODE_SIZE*sizeof(unsigned int)); +++ memcpy((void*)ptr->vpu_code, rpi_hevc_transform, num_bytes); +++ } +++ +++ return 0; +++} +++ +++// Make sure we have exclusive access to the mailbox, and enable qpu if necessary. +++static void gpu_lock(void) { +++ pthread_mutex_lock(&gpu_mutex); +++ if (gpu==NULL) { +++ gpu_init(&gpu); +++ } +++} +++ +++static void gpu_unlock(void) { +++ pthread_mutex_unlock(&gpu_mutex); +++} +++ +++// Allocate memory on GPU +++// Fills in structure <p> containing ARM pointer, videocore handle, videocore memory address, numbytes +++// Returns 0 on success. +++// This allocates memory that will not be cached in ARM's data cache. +++// Therefore safe to use without data cache flushing. +++int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p) { +++ gpu_lock(); +++ p->vc_handle = mem_alloc(gpu->mb, numbytes, 4096, GPU_MEM_FLG); +++ p->vcsm_handle = 0; +++ if (!p->vc_handle) +++ { +++ qpu_enable(gpu->mb, 0); +++ return -3; +++ } +++ p->vc = mem_lock(gpu->mb, p->vc_handle); +++ p->arm = mapmem_shared((p->vc+GPU_MEM_MAP)&~0xc0000000,numbytes); +++ p->numbytes = numbytes; +++ if (p->arm == NULL) +++ { +++ mem_free(gpu->mb, p->vc_handle); +++ mem_unlock(gpu->mb, p->vc_handle); +++ gpu_unlock(); +++ qpu_enable(gpu->mb, 0); +++ return -4; +++ } +++ gpu->open_count++; +++ gpu_unlock(); +++ return 0; +++} +++ +++void gpu_cache_flush(GPU_MEM_PTR_T *p) +++{ +++ // This only works when using RPI_USE_VCSM +++ void *tmp = vcsm_lock(p->vcsm_handle); +++ vcsm_unlock_ptr(tmp); +++} +++ +++// This allocates data that will be +++// Cached in ARM L2 +++// Uncached in VPU L2 +++int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p) { +++ gpu_lock(); +++#ifdef RPI_USE_VCSM +++ { +++ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST, (char *)"Video Frame" ); // f....... locks up for VP9 - retest this? +++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_VC, (char *)"Video Frame" ); // 3b...... works +++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); //fb...... locks up +++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST_AND_VC, (char *)"Video Frame" ); // 3b works (but corrupted due to caching) +++ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); +++ p->arm = vcsm_lock(p->vcsm_handle); +++ p->vc = mem_lock(gpu->mb, p->vc_handle); +++ } +++#else +++ p->vc_handle = mem_alloc(gpu->mb, numbytes, 4096, GPU_MEM_FLG); +++ p->vcsm_handle = 0; +++ if (!p->handle) +++ { +++ qpu_enable(gpu->mb, 0); +++ return -3; +++ } +++ p->vc = mem_lock(gpu->mb, p->vc_handle); +++ printf("This mapmem_private does not seem to work\n"); +++ exit(-1); +++ p->arm = mapmem_private((p->vc+GPU_MEM_MAP)&~0xc0000000,numbytes); +++ p->numbytes = numbytes; +++ if (p->arm == NULL) +++ { +++ mem_free(gpu->mb, p->handle); +++ mem_unlock(gpu->mb, p->handle); +++ gpu_unlock(); +++ qpu_enable(gpu->mb, 0); +++ return -4; +++ } +++#endif +++ gpu->open_count++; +++ gpu_unlock(); +++ return 0; +++} +++ +++static void gpu_term(void) +++{ +++ int mb = gpu->mb; +++ unsigned handle = gpu->vc_handle; +++ if (gpu==NULL) +++ return; +++ unmapmem((void*)gpu, sizeof(struct GPU)); +++ mem_unlock(mb, handle); +++ mem_free(mb, handle); +++ qpu_enable(mb, 0); +++#ifdef RPI_USE_VCSM +++ vcsm_exit(); +++#endif +++ mbox_close(mb); +++ gpu = NULL; +++} +++ +++void gpu_free(GPU_MEM_PTR_T *p) { +++ int mb = gpu->mb; +++ unsigned handle = p->vc_handle; +++ gpu_lock(); +++#ifdef RPI_USE_VCSM +++ if (p->vcsm_handle) { +++ mem_unlock(mb,p->vc_handle); +++ vcsm_unlock_ptr(p->arm); +++ vcsm_free(p->vcsm_handle); +++ } else { +++ unmapmem((void*)p->arm, sizeof(struct GPU)); +++ mem_unlock(mb, handle); +++ mem_free(mb, handle); +++ } +++#else +++ unmapmem((void*)p->arm, sizeof(struct GPU)); +++ mem_unlock(mb, handle); +++ mem_free(mb, handle); +++#endif +++ +++ gpu->open_count--; +++ if (gpu->open_count==0) { +++ printf("Closing GPU\n"); +++ gpu_term(); +++ gpu = NULL; +++ } +++ gpu_unlock(); +++} +++ +++unsigned int vpu_get_fn(void) { +++ // Make sure that the gpu is initialized +++ if (gpu==NULL) { +++ printf("Preparing gpu\n"); +++ gpu_lock(); +++ gpu_unlock(); +++ } +++ return gpu->vc + offsetof(struct GPU,vpu_code); +++} +++ +++unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5) +++{ +++ unsigned r; +++ gpu_lock(); +++ r = execute_code(gpu->mb, code, r0, r1, r2, r3, r4, r5); +++ gpu_unlock(); +++ return r; +++} +++ +++// Run a program on a QPU with the given code and uniform stream (given in GPU addresses) +++// The first num QPUs will start at code, the next num2 QPUs will start at code2 +++void qpu_run_shader12(int code, int num, int code2, int num2, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8, int unifs9, int unifs10, int unifs11, int unifs12) +++{ +++ int i; +++#ifdef RPI_TIME_TOTAL_QPU +++ static int last_time=0; +++ static long long on_time=0; +++ static long long off_time=0; +++ int start_time; +++ int end_time; +++ static int count=0; +++#endif +++ +++ gpu_lock(); +++#ifdef RPI_TIME_TOTAL_QPU +++ start_time = Microseconds(); +++ if (last_time==0) +++ last_time = start_time; +++ off_time += start_time-last_time; +++#endif +++ for(i=0;i<num;i++) { +++ gpu->mail[i*2 + 1] = code; +++ } +++ for(;i<num+num2;i++) { +++ gpu->mail[i*2 + 1] = code2; +++ } +++ gpu->mail[0 ] = unifs1; +++ gpu->mail[2 ] = unifs2; +++ gpu->mail[4 ] = unifs3; +++ gpu->mail[6 ] = unifs4; +++ gpu->mail[8 ] = unifs5; +++ gpu->mail[10] = unifs6; +++ gpu->mail[12] = unifs7; +++ gpu->mail[14] = unifs8; +++ gpu->mail[16] = unifs9; +++ gpu->mail[18] = unifs10; +++ gpu->mail[20] = unifs11; +++ gpu->mail[22] = unifs12; +++ execute_qpu( +++ gpu->mb, +++ 12 /* Number of QPUs */, +++ gpu->vc + offsetof(struct GPU, mail), +++ 1 /* no flush */, // Don't flush VPU L1 cache +++ 5000 /* timeout ms */); +++#ifdef RPI_TIME_TOTAL_QPU +++ end_time = Microseconds(); +++ last_time = end_time; +++ on_time += end_time - start_time; +++ count++; +++ if ((count&0x7f)==0) +++ printf("On=%dms, Off=%dms\n",(int)(on_time/1000),(int)(off_time/1000)); +++#endif +++ gpu_unlock(); +++} +++ +++unsigned int qpu_get_fn(int num) { +++ // Make sure that the gpu is initialized +++ unsigned int *fn; +++ if (gpu==NULL) { +++ printf("Preparing gpu\n"); +++ gpu_lock(); +++ gpu_unlock(); +++ } +++ switch(num) { +++ case QPU_MC_SETUP: +++ fn = mc_setup; +++ break; +++ case QPU_MC_FILTER: +++ fn = mc_filter; +++ break; +++ case QPU_MC_EXIT: +++ fn = mc_exit; +++ break; +++ case QPU_MC_INTERRUPT_EXIT: +++ fn = mc_interrupt_exit; +++ break; +++ case QPU_MC_FILTER_B: +++ fn = mc_filter_b; +++ break; +++ case QPU_MC_FILTER_HONLY: +++ fn = mc_filter_honly; +++ break; +++ case QPU_MC_SETUP_UV: +++ fn = mc_setup_uv; +++ break; +++ case QPU_MC_FILTER_UV: +++ fn = mc_filter_uv; +++ break; +++ case QPU_MC_FILTER_UV_B: +++ fn = mc_filter_uv_b; +++ break; +++ case QPU_MC_END: +++ fn = mc_end; +++ break; +++ default: +++ printf("Unknown function\n"); +++ exit(-1); +++ } +++ return gpu->vc + 4*(int)(fn-rpi_shader); +++ //return code[num] + gpu->vc; +++} +++ +++#if 0 +++ +++int32_t hcoeffs[] = {-4, 10, -21, 70, 90, -24, 11, -4}; +++//int32_t hcoeffs[] = {1, 1, 1, 1, 1, 1, 1, 1}; +++int32_t vcoeffs[] = {-2, 6, -13, 37, 115, -20, 9, -4}; +++//int32_t vcoeffs[] = {1, 1, 1, 1, 1, 1, 1, 1}; +++ +++#define ENCODE_COEFFS(c0, c1, c2, c3) (((c0-1) & 0xff) | ((c1-1) & 0xff) << 8 | ((c2-1) & 0xff) << 16 | ((c3-1) & 0xff) << 24); +++ +++static uint8_t av_clip_uint8(int32_t a) +++{ +++ if (a&(~255)) return (-a)>>31; +++ else return a; +++} +++ +++static int32_t filter8(const uint8_t *data, int pitch) +++{ +++ int32_t vsum = 0; +++ int x, y; +++ +++ for (y = 0; y < 8; y++) { +++ int32_t hsum = 0; +++ +++ for (x = 0; x < 8; x++) +++ hsum += hcoeffs[x]*data[x + y * pitch]; +++ +++ vsum += vcoeffs[y]*av_clip_uint8( (hsum + 64) >> 7); // Added brackets to stop compiler warning +++ } +++ +++ return av_clip_uint8( (vsum + 64) >> 7); +++} +++ +++// Note regression changes coefficients so is not thread safe +++//#define REGRESSION +++#ifdef REGRESSION +++#define CMAX 100 +++#else +++#define CMAX 2 +++#endif +++#define YMAX 16 +++ +++int rpi_test_shader(void) +++{ +++ int i, c; +++ +++ uint32_t *unifs; +++ +++ uint8_t *in_buffer; +++ uint8_t *out_buffer[2]; +++ +++ GPU_MEM_PTR_T unifs_ptr; +++ GPU_MEM_PTR_T in_buffer_ptr; +++ GPU_MEM_PTR_T out_buffer_ptr[2]; +++ +++ // Addresses in GPU memory of filter programs +++ uint32_t mc_setup = 0; +++ uint32_t mc_filter = 0; +++ uint32_t mc_exit = 0; +++ +++ int pitch = 0x500; +++ +++ if (gpu==NULL) { +++ gpu_lock(); +++ gpu_unlock(); +++ } +++ +++ printf("This needs to change to reflect new assembler\n"); +++ // Use table to compute locations of program start points +++ mc_setup = code[0] + gpu->vc; +++ mc_filter = code[1] + gpu->vc; +++ mc_exit = code[2] + gpu->vc; +++ +++ if (!vcos_verify(gpu_malloc_uncached(4*64,&unifs_ptr))) { +++ return -2; +++ } +++ unifs = (uint32_t*)unifs_ptr.arm; +++ +++ if (!vcos_verify(gpu_malloc_uncached(64*23,&in_buffer_ptr))) { +++ return -3; +++ } +++ in_buffer = (uint8_t*)in_buffer_ptr.arm; +++ +++ if (!vcos_verify(gpu_malloc_uncached(16*pitch,&out_buffer_ptr[0])) || !vcos_verify(gpu_malloc_uncached(16*pitch,&out_buffer_ptr[1]))) { +++ return -4; +++ } +++ out_buffer[0] = (uint8_t*)out_buffer_ptr[0].arm; +++ out_buffer[1] = (uint8_t*)out_buffer_ptr[1].arm; +++ +++ for (c = 0; c < CMAX; c++) { +++ int xo[] = {rand()&31, rand()&31}; +++ +++#ifdef REGRESSION +++ for (i = 0; i < 8; i++) { +++ hcoeffs[i] = (int8_t)rand(); +++ vcoeffs[i] = (int8_t)rand(); +++ if (hcoeffs[i]==-128) +++ hcoeffs[i]++; +++ if (vcoeffs[i]==-128) +++ vcoeffs[i]++; +++ } +++#endif +++ +++ for (i = 0; i < 64*23; i++) { +++ //printf("%d %d %p\n",i,gpu->mb,&in_buffer[i]); +++ in_buffer[i] = rand(); +++ } +++ +++ // Clear output array +++ { +++ int b; +++ for(b=0;b<2;b++) { +++ for(i=0;i<16*16;i++) { +++ out_buffer[b][i] = 3; +++ } +++ } +++ } +++ +++ unifs[0] = mc_filter; +++ unifs[1] = in_buffer_ptr.vc+xo[0]+16; +++ unifs[2] = 64; // src pitch +++ unifs[3] = pitch; // dst pitch +++ unifs[4] = 0; // Padding +++ unifs[5] = 0; +++ unifs[6] = 0; +++ unifs[7 ] = mc_filter; +++ unifs[8 ] = in_buffer_ptr.vc+xo[1]+16; +++ unifs[9 ] = ENCODE_COEFFS(hcoeffs[0], hcoeffs[1], hcoeffs[2], hcoeffs[3]); +++ unifs[10] = ENCODE_COEFFS(hcoeffs[4], hcoeffs[5], hcoeffs[6], hcoeffs[7]); +++ unifs[11] = ENCODE_COEFFS(vcoeffs[0], vcoeffs[1], vcoeffs[2], vcoeffs[3]); +++ unifs[12] = ENCODE_COEFFS(vcoeffs[4], vcoeffs[5], vcoeffs[6], vcoeffs[7]); +++ unifs[13] = out_buffer_ptr[0].vc; +++ unifs[14] = mc_exit; +++ unifs[15] = in_buffer_ptr.vc+xo[1]+16; // dummy +++ unifs[16] = ENCODE_COEFFS(hcoeffs[0], hcoeffs[1], hcoeffs[2], hcoeffs[3]); +++ unifs[17] = ENCODE_COEFFS(hcoeffs[4], hcoeffs[5], hcoeffs[6], hcoeffs[7]); +++ unifs[18] = ENCODE_COEFFS(vcoeffs[0], vcoeffs[1], vcoeffs[2], vcoeffs[3]); +++ unifs[19] = ENCODE_COEFFS(vcoeffs[4], vcoeffs[5], vcoeffs[6], vcoeffs[7]); +++ unifs[20] = out_buffer_ptr[1].vc; +++ +++ printf("Gpu->vc=%x Code=%x dst=%x\n",gpu->vc, mc_filter,out_buffer_ptr[1].vc); +++ +++ // flush_dcache(); TODO is this needed on ARM side? - tried to use the direct alias to avoid this problem +++ +++ //qpu_run_shader(mc_setup, unifs_ptr.vc); +++ //qpu_run_shader(gpu, gpu->vc, unifs_ptr.vc); +++ rpi_do_block(in_buffer_ptr.vc+xo[0]+16, 64, out_buffer_ptr[0].vc, pitch,out_buffer[0]); +++ rpi_do_block(in_buffer_ptr.vc+xo[1]+16, 64, out_buffer_ptr[1].vc, pitch,out_buffer[1]); +++ +++ if (1) +++ { +++ int x, y, b; +++ int bad = 0; +++ +++ for (b=0; b<2; ++b) +++ for (y=0; y<YMAX; ++y) +++ for (x=0; x<16; ++x) { +++ int32_t ref = filter8(in_buffer+x+y*64+xo[b], 64); +++ +++ if (out_buffer[b][x+y*pitch] != ref) { +++ bad = 1; +++// printf("%d, %d, %d, %d\n", c, b, x, y); +++ } +++#ifndef REGRESSION +++ //printf("%08x %08x\n", out_buffer[b][x+y*pitch], ref); +++#endif +++ } +++ if (bad) +++ printf("Failed dst=%x test=%d\n",out_buffer_ptr[1].vc,c); +++ else +++ printf("Passed dst=%x test=%d\n",out_buffer_ptr[1].vc,c); +++ } +++ //printf("%d\n", simpenrose_get_qpu_tick_count()); +++ } +++ +++ gpu_free(&out_buffer_ptr[0]); +++ gpu_free(&out_buffer_ptr[1]); +++ gpu_free(&in_buffer_ptr); +++ gpu_free(&unifs_ptr); +++ +++ return 0; +++} +++ +++void rpi_do_block_arm(const uint8_t *in_buffer, int src_pitch, uint8_t *dst, int dst_pitch) +++{ +++ int x,y; +++ for (y=0; y<16; ++y) { +++ for (x=0; x<16; ++x) { +++ dst[x+y*dst_pitch] = filter8(in_buffer+x+y*src_pitch, src_pitch); +++ } +++ } +++} +++ +++void rpi_do_block(const uint8_t *in_buffer_vc, int src_pitch, uint8_t *dst_vc, int dst_pitch, uint8_t *dst) +++{ +++ uint32_t *unifs; +++ +++ GPU_MEM_PTR_T unifs_ptr; +++ //uint8_t *out_buffer; +++ //GPU_MEM_PTR_T out_buffer_ptr; +++ +++ // Addresses in GPU memory of filter programs +++ uint32_t mc_setup = 0; +++ uint32_t mc_filter = 0; +++ uint32_t mc_exit = 0; +++ //int x,y; +++ +++ if (gpu==NULL) { +++ gpu_lock(); +++ gpu_unlock(); +++ } +++ +++ // Use table to compute locations of program start points +++ mc_setup = code[0] + gpu->vc; +++ mc_filter = code[1] + gpu->vc; +++ mc_exit = code[2] + gpu->vc; +++ +++ if (!vcos_verify(gpu_malloc_uncached(4*64,&unifs_ptr))) { +++ return; +++ } +++ //gpu_malloc_uncached(16*dst_pitch,&out_buffer_ptr); +++ //out_buffer = (uint8_t*)out_buffer_ptr.arm; +++ +++ /*for (y=0; y<16; ++y) { +++ for (x=0; x<16; ++x) { +++ out_buffer[x+y*dst_pitch] = 7; +++ } +++ }*/ +++ +++ unifs = (uint32_t*)unifs_ptr.arm; +++ +++ unifs[0] = mc_filter; +++ unifs[1] = (int)in_buffer_vc; +++ unifs[2] = src_pitch; // src pitch +++ unifs[3] = dst_pitch; // dst pitch +++ unifs[4] = 0; // Padding +++ unifs[5] = 0; +++ unifs[6] = 0; +++ unifs[7 ] = mc_exit; +++ unifs[8 ] = (int)in_buffer_vc; +++ unifs[9 ] = ENCODE_COEFFS(hcoeffs[0], hcoeffs[1], hcoeffs[2], hcoeffs[3]); +++ unifs[10] = ENCODE_COEFFS(hcoeffs[4], hcoeffs[5], hcoeffs[6], hcoeffs[7]); +++ unifs[11] = ENCODE_COEFFS(vcoeffs[0], vcoeffs[1], vcoeffs[2], vcoeffs[3]); +++ unifs[12] = ENCODE_COEFFS(vcoeffs[4], vcoeffs[5], vcoeffs[6], vcoeffs[7]); +++ unifs[13] = (int)dst_vc; +++ //unifs[13] = (int)out_buffer_ptr.vc; +++ +++ //printf("Gpu->vc=%x Code=%x dst=%x\n",gpu->vc, mc_filter,out_buffer_ptr[1].vc); +++ +++ qpu_run_shader(mc_setup, unifs_ptr.vc); +++ +++ /*for (y=0; y<16; ++y) { +++ for (x=0; x<16; ++x) { +++ dst[x+y*dst_pitch] = out_buffer[x+y*dst_pitch]; +++ } +++ }*/ +++ +++ gpu_free(&unifs_ptr); +++ //gpu_free(&out_buffer_ptr); +++} +++ +++ +++#endif +++ +++#endif // RPI ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++new file mode 100644 ++index 0000000..4e3c35c ++--- /dev/null +++++ b/libavcodec/rpi_qpu.h ++@@ -0,0 +1,45 @@ +++#ifndef RPI_QPU_H +++#define RPI_QPU_H +++ +++typedef struct gpu_mem_ptr_s { +++ unsigned char *arm; // Pointer to memory mapped on ARM side +++ int vc_handle; // Videocore handle of relocatable memory +++ int vcsm_handle; // Handle for use by VCSM +++ int vc; // Address for use in GPU code +++ int numbytes; // Size of memory block +++} GPU_MEM_PTR_T; +++ +++// General GPU functions +++extern int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p); +++extern int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p); +++extern void gpu_free(GPU_MEM_PTR_T *p); +++extern void gpu_cache_flush(GPU_MEM_PTR_T *p); +++ +++// QPU specific functions +++extern void qpu_run_shader12(int code, int num, int code2, int num2, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8, int unifs9, int unifs10, int unifs11, int unifs12); +++ +++enum { +++ QPU_MC_SETUP, +++ QPU_MC_FILTER, +++ QPU_MC_EXIT, +++ QPU_MC_INTERRUPT_EXIT, +++ QPU_MC_FILTER_B, +++ QPU_MC_FILTER_HONLY, +++ QPU_MC_SETUP_UV, +++ QPU_MC_FILTER_UV, +++ QPU_MC_FILTER_UV_B, +++ QPU_MC_END +++ }; +++extern unsigned int qpu_get_fn(int num); +++ +++// VPU specific functions +++extern unsigned int vpu_get_fn(void); +++extern unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5); +++ +++// Simple test of shader code +++extern int rpi_test_shader(void); +++ +++extern void rpi_do_block(const unsigned char *in_buffer_vc, int src_pitch, unsigned char *dst_vc, int dst_pitch, unsigned char *dst); +++extern void rpi_do_block_arm(const unsigned char *in_buffer, int src_pitch, unsigned char *dst, int dst_pitch); +++ +++#endif ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++new file mode 100644 ++index 0000000..41cc2e1 ++--- /dev/null +++++ b/libavcodec/rpi_shader.c ++@@ -0,0 +1,818 @@ +++#include "rpi_shader.h" +++ +++#ifdef _MSC_VER +++ #include <stdint.h> +++ /* cast through uintptr_t to avoid warnings */ +++ #define POINTER_TO_UINT(X) ((unsigned int)(uintptr_t)(X)) +++#else +++ #define POINTER_TO_UINT(X) ((unsigned int)(X)) +++#endif +++ +++#ifdef __cplusplus +++extern "C" { /* the types are probably wrong... */ +++#endif +++#ifdef __cplusplus +++} +++#endif +++ +++#ifdef _MSC_VER +++__declspec(align(8)) +++#elif defined(__GNUC__) +++__attribute__((aligned(8))) +++#endif +++unsigned int rpi_shader[] = { +++// ::mc_setup +++/* [0x00000000] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000008] */ 0x0c9a0f80, 0x10020427, // add ra_x_base, unif, elem_num +++/* [0x00000010] */ 0x15827d80, 0x10020767, // mov ra_y, unif +++/* [0x00000018] */ 0x15827d80, 0x10020627, // mov ra_x2_base, unif +++/* [0x00000020] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 +++/* [0x00000028] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 +++/* [0x00000030] */ 0x15827d80, 0x10021427, // mov rb16, unif +++/* [0x00000038] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000040] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) +++/* [0x00000048] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 +++/* [0x00000050] */ 0x00000001, 0xe0020527, // mov ra20, 1 +++/* [0x00000058] */ 0x00000040, 0xe0020567, // mov ra21, 64 +++/* [0x00000060] */ 0x00000100, 0xe00205a7, // mov ra22, 256 +++/* [0x00000068] */ 0x00000008, 0xe00205e7, // mov ra23, 8 +++/* [0x00000070] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x00000078] */ 0x00000040, 0xe0021567, // mov rb21, 64 +++/* [0x00000080] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x00000088] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x00000090] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x00000098] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x000000a0] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x000000a8] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x000000b0] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x000000b8] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x000000c0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x000000c8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x000000d0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x000000d8] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x000000e0] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x000000e8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x000000f0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x000000f8] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000100] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000108] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000110] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00000118] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00000120] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00000128] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000130] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00000138] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000140] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000148] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000150] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000158] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000160] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000168] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000170] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000178] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base +++/* [0x00000180] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 +++/* [0x00000188] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x00000190] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base +++/* [0x00000198] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000001a0] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 +++/* [0x000001a8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x000001b0] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 +++/* [0x000001b8] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 +++/* [0x000001c0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x000001c8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x000001d0] */ 0x4c9d00cf, 0x10024821, // add r0, r0, r3; mul24 r1, r1, rb_pitch +++/* [0x000001d8] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 +++/* [0x000001e0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000001e8] */ 0x949dc5c0, 0xd0025890, // and r2, r2, ~3; mov ra_x_base, r0 +++/* [0x000001f0] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 +++/* [0x000001f8] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 +++/* [0x00000200] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000208] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000210] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000218] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000220] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000228] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000230] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000238] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base +++/* [0x00000240] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++// ::mc_filter_uv +++/* [0x00000248] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000250] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000258] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000260] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000268] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000270] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000278] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000280] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000288] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000290] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000298] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000002a0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000002a8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000002b0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002b8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000002c0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000002c8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000002d0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x000002d8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x000002e0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000002e8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000002f0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000002f8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000300] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000308] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000310] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000318] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000320] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000328] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000330] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000340] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000348] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000350] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000358] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000360] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000368] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000370] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000378] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000380] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000388] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000390] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000398] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003a0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :uvloop +++/* [0x000003a8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000003b0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000003b8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000003c0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003c8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003d0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003d8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003e0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003e8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000003f0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000003f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000400] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000408] */ 0x40038031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 +++/* [0x00000410] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000418] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000420] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000428] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000430] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000438] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000440] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000448] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000450] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000458] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000460] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000468] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000470] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000478] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000480] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x00000488] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000490] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000498] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x000004a0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x000004a8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x000004b0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x000004b8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000004c0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x000004c8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000004d0] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x000004d8] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x000004e0] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x000004e8] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x000004f0] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x000004f8] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000500] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000508] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000510] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000518] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000520] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000528] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x00000530] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000538] */ 0xfffffe50, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000540] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x00000548] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000550] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000558] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000560] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000568] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000570] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000578] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000580] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000588] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000590] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000598] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_filter +++/* [0x000005a0] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000005a8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000005b0] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next +++/* [0x000005b8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000005c0] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 +++/* [0x000005c8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000005d0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif +++/* [0x000005d8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000005e0] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 +++/* [0x000005e8] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 +++/* [0x000005f0] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 +++/* [0x000005f8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000600] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 +++/* [0x00000608] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000610] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 +++/* [0x00000618] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000620] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000628] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000630] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000638] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000640] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000648] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000650] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000658] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000660] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000668] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000670] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000678] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000680] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000688] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000690] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000698] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006a0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006a8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000006b0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006b8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006c0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006c8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000006d0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006d8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006e0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006e8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000006f0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006f8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000700] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000708] */ 0x000001d0, 0xf07809e7, // brr.anynn -, r:fast_path +++/* [0x00000710] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000718] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000720] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000728] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :loop +++/* [0x00000730] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000738] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000740] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000748] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000750] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 +++/* [0x00000758] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000760] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000768] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000770] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000778] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000780] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000788] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000790] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000798] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000007a0] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000007a8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000007b0] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000007b8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000007c0] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x000007c8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x000007d0] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x000007d8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x000007e0] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x000007e8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x000007f0] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x000007f8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000800] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x00000808] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000810] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000818] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000820] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000828] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000830] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000838] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000840] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000848] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:loop +++/* [0x00000850] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x00000858] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x00000860] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x00000868] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x00000870] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x00000878] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000880] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000888] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000890] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000898] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x000008a0] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x000008a8] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x000008b0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000008b8] */ 0xfffffe58, 0xf06809e7, // brr.anyn -, r:loop +++/* [0x000008c0] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x000008c8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000008d0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000008d8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000008e0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000008e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000008f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// :fast_path +++/* [0x000008f8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :fast_loop +++/* [0x00000900] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000908] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000910] */ 0x95727d9b, 0x1004475f, // mov.ifz ra_y, ra_y_next ; mov rb31, r3 +++/* [0x00000918] */ 0x95690dbf, 0x10044623, // mov.ifz ra_x2_base, ra_x2_base_next ; mov r3, rb_pitch +++/* [0x00000920] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000928] */ 0x929de5e4, 0x100248a1, // min r2, r2, rb_frame_height_minus_1 ; mov r1, r4 +++/* [0x00000930] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000938] */ 0xec414c87, 0x10024e20, // add t0s, ra_x_base, r2 ; v8subs r0, r0, rb20 +++/* [0x00000940] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000948] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000950] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000958] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000960] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000968] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000970] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000978] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000980] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000988] */ 0x8d9df4ff, 0x10024823, // sub r0, r2, r3 ; mov r3, rb31 +++/* [0x00000990] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000998] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x000009a0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x000009a8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x000009b0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x000009b8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000009c0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x000009c8] */ 0xffffff18, 0xf06809e7, // brr.anyn -, r:fast_loop +++/* [0x000009d0] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x000009d8] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x000009e0] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x000009e8] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x000009f0] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x000009f8] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000a00] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000a08] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000a10] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000a18] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000a20] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000a28] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x00000a30] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000a38] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:fast_loop +++/* [0x00000a40] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x00000a48] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000a50] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000a58] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000a60] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000a68] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a70] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_filter_b +++/* [0x00000a78] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000a80] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000a88] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next +++/* [0x00000a90] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000a98] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 +++/* [0x00000aa0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000aa8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif +++/* [0x00000ab0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000ab8] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 +++/* [0x00000ac0] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 +++/* [0x00000ac8] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 +++/* [0x00000ad0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000ad8] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 +++/* [0x00000ae0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000ae8] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 +++/* [0x00000af0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000af8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000b00] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000b08] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000b10] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000b18] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000b20] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000b28] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000b30] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000b38] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000b40] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000b48] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000b50] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000b58] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000b60] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000b68] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000b70] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 +++/* [0x00000b78] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000b80] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000b88] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000b90] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000b98] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000ba0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ba8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bb0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bb8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000bc0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bc8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bd0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bd8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000be0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000be8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bf0] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bf8] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000c00] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000c08] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000c10] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :bloop +++/* [0x00000c18] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000c20] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000c28] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000c30] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000c38] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 +++/* [0x00000c40] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000c48] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000c50] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000c58] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000c60] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000c68] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000c70] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000c78] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000c80] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000c88] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000c90] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000c98] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000ca0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000ca8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000cb0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000cb8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000cc0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000cc8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000cd0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000cd8] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000ce0] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000ce8] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x00000cf0] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000cf8] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000d00] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000d08] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000d10] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000d18] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000d20] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000d28] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000d30] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:bloop +++/* [0x00000d38] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x00000d40] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x00000d48] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x00000d50] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x00000d58] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x00000d60] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000d68] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000d70] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000d78] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000d80] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000d88] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000d90] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x00000d98] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000da0] */ 0x8fc8f3f6, 0xd0020867, // asr r1, r1, 15 ; mov -, vr_wait +++/* [0x00000da8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000db0] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 +++/* [0x00000db8] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:bloop +++/* [0x00000dc0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000dc8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x00000dd0] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 +++/* [0x00000dd8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000de0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000de8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000df0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_filter_honly +++/* [0x00000df8] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000e00] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000e08] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next +++/* [0x00000e10] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000e18] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 +++/* [0x00000e20] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000e28] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif +++/* [0x00000e30] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000e38] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 +++/* [0x00000e40] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 +++/* [0x00000e48] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 +++/* [0x00000e50] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000e58] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 +++/* [0x00000e60] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000e68] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 +++/* [0x00000e70] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000e78] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000e80] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000e88] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000e90] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000e98] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000ea0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000ea8] */ 0x0c9de1c0, 0xd0021467, // add rb17, r0, -2 +++/* [0x00000eb0] */ 0x919c71c0, 0xd0024812, // shl r0, r0, 7 ; mov rb18,r0 +++/* [0x00000eb8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000ec0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000ec8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000ed0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000ed8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ee0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ee8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ef0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000ef8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f00] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f08] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f10] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000f18] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000f20] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000f28] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000f30] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :loop_honly +++/* [0x00000f38] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000f40] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000f48] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000f50] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000f58] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 +++/* [0x00000f60] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000f68] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000f70] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000f78] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000f80] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000f88] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000f90] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000f98] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000fa0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000fa8] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000fb0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000fb8] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000fc0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000fc8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000fd0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000fd8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000fe0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000fe8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000ff0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000ff8] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00001000] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00001008] */ 0x8d9df4ff, 0x10024823, // sub r0, r2, r3 ; mov r3, rb31 +++/* [0x00001010] */ 0x8d5927f6, 0x100269e1, // sub.setf -, r3, rb18 ; mov r1, ra22 +++/* [0x00001018] */ 0x559f2fc1, 0x100049e0, // mov -, vw_wait ; mul24 r0, r0, r1 +++/* [0x00001020] */ 0xfffffef8, 0xf06809e7, // brr.anyn -, r:loop_honly +++/* [0x00001028] */ 0x0f9cf1c0, 0xd0020827, // asr r0, r0, 15 +++/* [0x00001030] */ 0x129d61c0, 0x10020827, // min r0, r0, rb22 +++/* [0x00001038] */ 0x139c01c0, 0xd0020c27, // max vpm, r0, 0 +++/* [0x00001040] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001048] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00001050] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001058] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_exit +++/* [0x00001060] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00001068] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00001070] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001078] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001080] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001088] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001090] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00001098] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000010a0] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_exit1 +++/* [0x000010a8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000010b0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010b8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010c0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010c8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010d0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000010d8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000010e0] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_interrupt_exit +++/* [0x000010e8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000010f0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010f8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001100] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001108] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001110] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001118] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001120] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001128] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001130] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001138] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001140] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001148] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001150] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001158] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001160] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001168] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00001170] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00001178] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_interrupt_exit4 +++/* [0x00001180] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00001188] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001190] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001198] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000011a0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000011a8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000011b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000011b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000011c0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000011c8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000011d0] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_interrupt_exit8 +++/* [0x000011d8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000011e0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000011e8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000011f0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000011f8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001200] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001208] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001210] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001218] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001220] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001228] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001230] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001238] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00001240] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00001248] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_setup_uv +++/* [0x00001250] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00001258] */ 0x0c9a0f80, 0x10020427, // add ra_x_base, unif, elem_num +++/* [0x00001260] */ 0x15827d80, 0x10020767, // mov ra_y, unif +++/* [0x00001268] */ 0x15827d80, 0x10020627, // mov ra_x2_base, unif +++/* [0x00001270] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00001278] */ 0x0d620f80, 0x10020667, // sub ra_u2v_ref_offset, unif, ra_x2_base +++/* [0x00001280] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 +++/* [0x00001288] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 +++/* [0x00001290] */ 0x15827d80, 0x10021427, // mov rb16, unif +++/* [0x00001298] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000012a0] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) +++/* [0x000012a8] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 +++/* [0x000012b0] */ 0x00000001, 0xe0020527, // mov ra20, 1 +++/* [0x000012b8] */ 0x00000040, 0xe0020567, // mov ra21, 64 +++/* [0x000012c0] */ 0x00000100, 0xe00205a7, // mov ra22, 256 +++/* [0x000012c8] */ 0x00000008, 0xe00205e7, // mov ra23, 8 +++/* [0x000012d0] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x000012d8] */ 0x00000040, 0xe0021567, // mov rb21, 64 +++/* [0x000012e0] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x000012e8] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x000012f0] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x000012f8] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x00001300] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x00001308] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x00001310] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x00001318] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x00001320] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x00001328] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x00001330] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00001338] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00001340] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00001348] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00001350] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00001358] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00001360] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00001368] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00001370] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00001378] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00001380] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00001388] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00001390] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00001398] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x000013a0] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x000013a8] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x000013b0] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x000013b8] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x000013c0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000013c8] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x000013d0] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x000013d8] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base +++/* [0x000013e0] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x000013e8] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base +++/* [0x000013f0] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset +++/* [0x000013f8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x00001400] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00001408] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00001410] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 +++/* [0x00001418] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00001420] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++/* [0x00001428] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 +++/* [0x00001430] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 +++/* [0x00001438] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001440] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001448] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001450] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00001458] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00001460] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00001468] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001470] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00001478] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base +++/* [0x00001480] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++// ::mc_filter_uv_b +++/* [0x00001488] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00001490] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00001498] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000014a0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000014a8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000014b0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000014b8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000014c0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000014c8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000014d0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000014d8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000014e0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000014e8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000014f0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000014f8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00001500] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00001508] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00001510] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00001518] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00001520] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00001528] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00001530] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00001538] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00001540] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00001548] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00001550] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00001558] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 +++/* [0x00001560] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00001568] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001570] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001578] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001580] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001588] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00001590] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001598] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015a0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015a8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000015b0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015b8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015c0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015c8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000015d0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015d8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015e0] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015e8] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x000015f0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000015f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00001600] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :uvloop_b +++/* [0x00001608] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00001610] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00001618] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00001620] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00001628] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00001630] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00001638] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00001640] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00001648] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00001650] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00001658] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00001660] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00001668] */ 0x40038031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 +++/* [0x00001670] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00001678] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00001680] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00001688] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00001690] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00001698] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000016a0] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x000016a8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x000016b0] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x000016b8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x000016c0] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x000016c8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x000016d0] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x000016d8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x000016e0] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x000016e8] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x000016f0] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x000016f8] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00001700] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00001708] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00001710] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00001718] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00001720] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00001728] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00001730] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x00001738] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x00001740] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x00001748] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x00001750] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x00001758] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00001760] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00001768] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00001770] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00001778] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00001780] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00001788] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x00001790] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00001798] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x000017a0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000017a8] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 +++/* [0x000017b0] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000017b8] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x000017c0] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x000017c8] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 +++/* [0x000017d0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000017d8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000017e0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000017e8] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000017f0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000017f8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001800] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00001808] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001810] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_end +++}; +++#ifdef __HIGHC__ +++#pragma Align_to(8, rpi_shader) +++#endif ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++new file mode 100644 ++index 0000000..db971f4 ++--- /dev/null +++++ b/libavcodec/rpi_shader.h ++@@ -0,0 +1,20 @@ +++#ifndef rpi_shader_H +++#define rpi_shader_H +++ +++extern unsigned int rpi_shader[]; +++ +++#define mc_setup (rpi_shader + 0) +++#define mc_filter_uv (rpi_shader + 146) +++#define mc_filter (rpi_shader + 360) +++#define mc_filter_b (rpi_shader + 670) +++#define mc_filter_honly (rpi_shader + 894) +++#define mc_exit (rpi_shader + 1048) +++#define mc_exit1 (rpi_shader + 1066) +++#define mc_interrupt_exit (rpi_shader + 1082) +++#define mc_interrupt_exit4 (rpi_shader + 1120) +++#define mc_interrupt_exit8 (rpi_shader + 1142) +++#define mc_setup_uv (rpi_shader + 1172) +++#define mc_filter_uv_b (rpi_shader + 1314) +++#define mc_end (rpi_shader + 1542) +++ +++#endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++new file mode 100644 ++index 0000000..6851e83 ++--- /dev/null +++++ b/libavcodec/rpi_shader.qasm ++@@ -0,0 +1,1413 @@ +++# register allocation +++# +++# ra0...ra7 eight horizontal filter coefficients +++# +++# rb1...rb7 seven shifted copies of the current unfiltered row +++# +++# ra8...ra15 eight filtered rows of context (rb15 == most recent) +++# +++# (ra15 isn't clamped to zero - this happens during the +++# copy to ra14, and during its use in the vertical filter) +++# +++# rb8...rb15 eight vertical filter coefficients +++# +++# ra16 clipped(row start address+elem_num)&~3 +++# ra17 per-channel shifts +++# ra19 next ra17 +++# +++# rb16 pitch +++# rb17 height + 5 +++# rb18 height + 7 +++# rb19 next ra16 +++# +++# ra20 1 +++# ra21 64 +++# ra22 256 +++# ra23 8 +++# +++# rb20 0xffffff00 +++# rb21 64 +++# rb22 255 +++# rb23 24 +++# +++# rb24 vdw_setup_1(dst_pitch) +++# rb25 frame width-1 +++# rb26 height<<23 + width<<16 + vdw_setup_0 +++# rb27 vdw_setup_0 (depends on QPU number) +++# rb28 vpm_setup (depends on QPU number) +++# rb29 vdw_setup_1(dst_pitch-width) +++# rb30 frame height-1 +++# rb31 used as temp to count loop iterations +++# +++# ra24...ra30 15, 14, 13, 12, 11, 10, 9 +++# ra24 clipped(row start address+8+elem_num)&~3 +++# ra25 per-channel shifts 2 +++# ra26 next ra24 +++# ra27 next ra25 +++# ra28 next y +++# ra29 y for next texture access +++# +++# ra31 next kernel address +++ +++.set rb_frame_width_minus_1, rb25 +++.set rb_frame_height_minus_1, rb30 +++.set rb_pitch, rb16 +++.set ra_x_base, ra16 +++.set rb_x_base_next, rb19 +++.set ra_x2_base, ra24 +++.set ra_x2_base_next, ra26 +++.set ra_xshift, ra17 +++ +++.set ra_x2shift, ra25 +++.set ra_u2v_ref_offset, ra25 +++ +++.set ra_xshift_next, ra19 +++ +++.set ra_x2shift_next, ra27 +++.set ra_u2v_dst_offset, ra27 +++ +++.set ra_y_next, ra28 +++.set ra_y, ra29 +++ +++.set rb_const_64, rb21 +++ +++# mc_setup(next_kernel, x, y, ref_base, frame_width, frame_height, pitch, dst_pitch, pad0, pad1) +++::mc_setup +++ +++# Read starting kernel +++mov ra31, unif +++ +++# Load first request location +++add ra_x_base, unif, elem_num # Store x +++mov ra_y, unif # Store y +++mov ra_x2_base, unif # Store frame base +++ +++# Read image dimensions +++sub rb25,unif,1 +++sub rb30,unif,1 +++ +++# get source pitch +++mov rb16, unif +++ +++# get destination pitch +++mov r0, unif +++mov r1, vdw_setup_1(0) +++add rb24, r1, r0 +++ +++# load constants +++ +++mov ra20, 1 +++mov ra21, 64 +++mov ra22, 256 +++mov ra23, 8 +++ +++mov rb20, 0xffffff00 +++mov rb21, 64 +++mov rb22, 255 +++mov rb23, 24 +++ +++# touch vertical context to keep simulator happy +++ +++mov ra8, 0 +++mov ra9, 0 +++mov ra10, 0 +++mov ra11, 0 +++mov ra12, 0 +++mov ra13, 0 +++mov ra14, 0 +++mov ra15, 0 +++ +++# Compute part of VPM to use for DMA output +++mov r2, qpu_num +++and r2, r2, 15 +++mov r1, r2 +++asr r1, r1, 2 +++shl r1, r1, 6 +++mov r0, r2 +++and r0, r0, 3 +++add r0, r0, r1 +++mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later +++shl r0, r0, 5 +++add rb27, r0, r1 +++ +++# Compute part of VPM to save data into +++mov r2, qpu_num +++and r2, r2, 15 +++mov r1, r2 +++asr r1, r1, 2 +++shl r1, r1, 6 +++mov r0, r2 +++and r0, r0, 3 +++add r0, r0, r1 +++mov r1, vpm_setup(0, 4, h8p(0, 0)) +++add rb28, r0, r1 +++ +++# Compute base address for first and second access +++#add r0, unif, elem_num # x +++mov r0, ra_x_base # Load x +++add r2, r0, 8 # x+8 +++max r0, r0, 0; mov r1, ra_y # Load y +++min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base # Load the frame base +++shl ra_xshift_next, r0, 3 +++max r2, r2, 0 +++add ra_y, r1, 1 +++min r2, r2, rb_frame_width_minus_1 +++shl ra_x2shift_next, r2, 3 +++max r1, r1, 0 # y +++min r1, r1, rb_frame_height_minus_1 +++add r0, r0, r3; mul24 r1, r1, rb_pitch +++add r2, r2, r3 +++and r0, r0, ~3 +++and r2, r2, ~3; mov ra_x_base, r0 +++# submit texture requests for first line +++add t0s, r0, r1 ; mov ra_x2_base, r2 +++add t0s, r2, r1 +++ +++# Dump padding words +++mov r0, unif +++mov r0, unif +++ +++# submit texture requests for second line +++max r1, ra_y, 0 +++min r1, r1, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 +++bra -, ra31 +++nop ; mul24 r1, r1, rb_pitch +++add t0s, r1, ra_x_base +++add t0s, r1, ra_x2_base +++ +++################################################################################ +++ +++# mc_filter_uv(next_kernel, x, y, frame_u_base, frame_v_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_u_dst, this_v_dst) +++ +++# At this point we have already issued two pairs of texture requests for the current block +++# ra_x_base, ra_x16_base point to the current coordinates for this block +++::mc_filter_uv +++mov ra31, unif +++ +++# per-channel shifts were calculated on the *previous* invocation +++ +++mov ra_xshift, ra_xshift_next +++ +++# get base addresses and per-channel shifts for *next* invocation +++add r0, unif, elem_num # x +++max r0, r0, 0; mov r1, unif # y +++min r0, r0, rb_frame_width_minus_1 ; mov r3, unif # frame_base +++shl ra_xshift_next, r0, 3 +++sub r2, unif, r3 # compute offset from frame base u to frame base v +++add r0, r0, r3 +++and rb_x_base_next, r0, ~3 +++mov ra_y_next, r1 +++add ra_x2_base_next, rb_x_base_next, r2 +++ +++# set up VPM write +++mov vw_setup, rb28 +++ +++# get width,height of block +++mov r2, 16 +++mov r0, unif +++shr r1, r0, r2 # Extract width +++sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++and r0, r0, rb22 # Extract height +++add rb17, r0, 5 +++add rb18, r0, 7 +++shl r0, r0, 7 +++add r0, r0, r1 # Combine width and height of destination area +++shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register +++add rb26, r0, rb27 +++ +++sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code +++ +++# get filter coefficients +++ +++mov r0, unif +++asr ra3, r0, rb23; mul24 r0, r0, ra22 +++asr ra2, r0, rb23; mul24 r0, r0, ra22 +++asr ra1, r0, rb23; mul24 r0, r0, ra22 +++asr ra0, r0, rb23; mov r0, unif +++asr ra7, r0, rb23; mul24 r0, r0, ra22 +++asr ra6, r0, rb23; mul24 r0, r0, ra22 +++asr ra5, r0, rb23; mul24 r0, r0, ra22 +++asr ra4, r0, rb23; mov r0, unif +++asr rb11, r0, rb23; mul24 r0, r0, ra22 +++asr rb10, r0, rb23; mul24 r0, r0, ra22 +++asr rb9, r0, rb23; mul24 r0, r0, ra22 +++asr rb8, r0, rb23; mov r0, unif +++asr rb15, r0, rb23; mul24 r0, r0, ra22 +++asr rb14, r0, rb23; mul24 r0, r0, ra22 +++asr rb13, r0, rb23; mul24 r0, r0, ra22 +++asr rb12, r0, rb23 +++ +++# r2 is elem_num +++# r3 is loop counter +++ +++mov r5rep, -8 +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++mov r3, 0 +++ +++:uvloop +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment +++shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++ +++max r2, ra_y, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_x2_base, r2 +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++mov r2, rb21 ; mul24 r3, r0, ra0 +++nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 +++sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++sub r0, r2, r3 +++ +++mov r3, rb31 +++ +++mov ra8, ra9 +++mov ra9, ra10 +++mov ra10, ra11 +++mov ra11, ra12 +++mov ra12, ra13 +++mov ra13, ra14 +++ +++sub.setf -, r3, 8 ; mov r1, ra22 +++ +++# apply horizontal filter +++brr.anyn -, r:uvloop +++max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero +++asr r0, r0, 15 ; mov r1, ra21 +++min.setf ra15, r0, rb22 +++ +++# apply vertical filter and write to VPM +++ +++nop ; mul24 r0, ra14, rb14 +++sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++sub.ifnn r1, r1, r0 ; mov -, vw_wait +++sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++brr.anyn -, r:uvloop +++asr r1, r1, 15 +++min r1, r1, rb22 +++max vpm, r1, 0 +++ +++# DMA out for U +++ +++mov vw_setup, rb26 # VDW setup 0 +++mov vw_setup, rb29 # Stride +++mov vw_addr, unif # start the VDW +++ +++# DMA out for V +++# We need to wait for the U to complete first, but have nothing useful to compute while we wait. +++# Could potentially push this write into the start of the next pipeline stage. +++mov r0, 16 +++mov -, vw_wait +++ +++bra -, ra31 +++add vw_setup, rb26, r0 # VDW setup 0 +++mov vw_setup, rb29 # Stride +++mov vw_addr, unif # start the VDW +++ +++################################################################################ +++ +++ +++# mc_filter(next_kernel, x, y, frame_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_dst) +++ +++# At this point we have already issued two pairs of texture requests for the current block +++# ra_x_base, ra_x16_base point to the current coordinates for this block +++::mc_filter +++mov ra31, unif +++ +++# per-channel shifts were calculated on the *previous* invocation +++ +++mov ra_xshift, ra_xshift_next +++mov ra_x2shift, ra_x2shift_next +++ +++# get base addresses and per-channel shifts for *next* invocation +++add r0, unif, elem_num # x +++add r2, r0, 8 # x+8 +++max r0, r0, 0; mov r1, unif # y +++min r0, r0, rb_frame_width_minus_1 ; mov r3,unif # frame_base +++shl ra_xshift_next, r0, 3 +++max r2, r2, 0 +++min r2, r2, rb_frame_width_minus_1 +++shl ra_x2shift_next, r2, 3 +++add r0, r0, r3 +++add r2, r2, r3 +++and rb_x_base_next, r0, ~3 +++and ra_x2_base_next, r2, ~3 +++mov ra_y_next, r1 +++ +++# set up VPM write +++mov vw_setup, rb28 +++ +++# get width,height of block +++mov r2, 16 +++mov r0, unif +++shr r1, r0, r2 # Extract width +++sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++and r0, r0, rb22 # Extract height +++add rb17, r0, 5 +++add rb18, r0, 7 +++shl r0, r0, 7 +++add r0, r0, r1 # Combine width and height of destination area +++shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register +++add rb26, r0, rb27 +++ +++sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code +++ +++# get filter coefficients +++ +++mov r0, unif +++asr ra3, r0, rb23; mul24 r0, r0, ra22 +++asr ra2, r0, rb23; mul24 r0, r0, ra22 +++asr ra1, r0, rb23; mul24 r0, r0, ra22 +++asr ra0, r0, rb23; mov r0, unif +++asr ra7, r0, rb23; mul24 r0, r0, ra22 +++asr ra6, r0, rb23; mul24 r0, r0, ra22 +++asr ra5, r0, rb23; mul24 r0, r0, ra22 +++asr ra4, r0, rb23; mov r0, unif +++asr rb11, r0, rb23; mul24 r0, r0, ra22 +++asr rb10, r0, rb23; mul24 r0, r0, ra22 +++asr rb9, r0, rb23; mul24 r0, r0, ra22 +++asr rb8, r0, rb23; mov r0, unif +++asr rb15, r0, rb23; mul24 r0, r0, ra22 +++asr rb14, r0, rb23; mul24 r0, r0, ra22 +++asr rb13, r0, rb23; mul24 r0, r0, ra22 +++brr.anynn -, r:fast_path +++asr rb12, r0, rb23 # delay slot 1 +++ +++# r2 is elem_num +++# r3 is loop counter +++ +++mov r5rep, -8 # delay slot 2 +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] # delay slot 3 +++ +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++## nop ; ldtmu0 # loop counter increment +++## shr r0, r4, ra17 ; ldtmu0 +++## shr r1, r4, ra17 ; v8subs r0, r0, rb20 +++## add t0s, ra16, r5 ; v8subs r1, r1, rb20 +++## add ra16, ra16, rb16 ; mov t0s, ra16 +++## +++## # generate seven shifted versions +++## # interleave with scroll of vertical context +++## +++## mov r2, rb21 ; mul24 r3, r0, ra0 +++## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++## nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++## nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++## nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++## nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++## nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++## nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++## nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++## sub r2, r2, r3 ; ldtmu0 +++## +++## mov r0, ra22 +++## shr r0, r4, ra17 ; mul24 r2, r2, r0 ; ldtmu0 +++## shr r1, r4, ra17 ; v8subs r0, r0, rb20 +++## add t0s, ra16, r5 ; v8subs r1, r1, rb20 +++## add ra16, ra16, rb16 ; mov t0s, ra16 +++## +++## # apply horizontal filter +++## +++## asr r2, r2, 15 ; mul24 r3, r0, ra0 +++## min r2, r2, rb22 +++## max ra13, r2, 0 +++## +++## # generate seven shifted versions +++## # interleave with scroll of vertical context +++## +++## mov r2, rb21 +++## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++## nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++## nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++## nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++## nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++## nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++## nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++## nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++## sub r0, r2, r3 +++## +++## # apply horizontal filter +++## +++## nop ; mul24 r0, r0, ra22 # last bit of context scroll, including clamp to zero +++## asr r0, r0, 15 +++## min r0, r0, rb22 +++## max ra14, r0, 0 +++## +++## +++## +++## +++## nop ; ldtmu0 # loop counter increment +++## shr r0, r4, ra17 ; ldtmu0 +++## shr r1, r4, ra17 ; v8subs r0, r0, rb20 +++## add t0s, ra16, r5 ; v8subs r1, r1, rb20 +++## add ra16, ra16, rb16 ; mov t0s, ra16 +++## +++## # generate seven shifted versions +++## # interleave with scroll of vertical context +++## +++## mov r2, rb21 ; mul24 r3, r0, ra0 +++## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++## nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++## nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++## nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++## nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++## nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++## nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++## nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++## sub r0, r2, r3 +++## +++## # apply horizontal filter +++## +++## nop ; mul24 r0, r0, ra22 # last bit of context scroll, including clamp to zero +++## asr r0, r0, 15 +++## min r0, r0, rb22 +++## max ra15, r0, 0 +++ +++ +++ +++ +++mov r3, 0 +++ +++:loop +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment +++shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++ +++max r2, ra_y, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_x2_base, r2 +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++mov r2, rb21 ; mul24 r3, r0, ra0 +++sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++sub r0, r2, r3 +++ +++mov r3, rb31 +++ +++mov ra8, ra9 +++mov ra9, ra10 +++mov ra10, ra11 +++mov ra11, ra12 +++mov ra12, ra13 +++mov ra13, ra14 +++ +++sub.setf -, r3, 8 ; mov r1, ra22 +++ +++# apply horizontal filter +++brr.anyn -, r:loop +++max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero +++asr r0, r0, 15 ; mov r1, ra21 +++min.setf ra15, r0, rb22 +++ +++# apply vertical filter and write to VPM +++ +++nop ; mul24 r0, ra14, rb14 +++sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++sub.ifnn r1, r1, r0 ; mov -, vw_wait +++sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++brr.anyn -, r:loop +++asr r1, r1, 15 +++min r1, r1, rb22 +++max vpm, r1, 0 +++ +++# DMA out +++ +++bra -, ra31 +++mov vw_setup, rb26 # VDW: height rows, 16 8-bit units long +++mov vw_setup, rb29 +++mov vw_addr, unif # start the VDW +++ +++#################################################### +++ +++:fast_path +++## nop ; ldtmu0 # loop counter increment +++## shr r0, r4, ra17 ; ldtmu0 +++## shr r1, r4, ra17 ; v8subs r0, r0, rb20 +++## add t0s, ra16, r5 ; v8subs r1, r1, rb20 +++## add ra16, ra16, rb16 ; mov t0s, ra16 +++## +++## # generate seven shifted versions +++## # interleave with scroll of vertical context +++## +++## mov r2, rb21 ; mul24 r3, r0, ra0 +++## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++## sub r2, r2, r3 ; ldtmu0 +++## +++## mov r0, ra22 +++## shr r0, r4, ra17 ; mul24 r2, r2, r0 ; ldtmu0 +++## shr r1, r4, ra17 ; v8subs r0, r0, rb20 +++## add t0s, ra16, r5 ; v8subs r1, r1, rb20 +++## add ra16, ra16, rb16 ; mov t0s, ra16 +++## +++## # apply horizontal filter +++## +++## asr r2, r2, 15 ; mul24 r3, r0, ra0 +++## min r2, r2, rb22 +++## max ra13, r2, 0 +++## +++## # generate seven shifted versions +++## # interleave with scroll of vertical context +++## +++## mov r2, rb21 +++## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++## sub r0, r2, r3 +++## +++## # apply horizontal filter +++## +++## nop ; mul24 r0, r0, ra22 # last bit of context scroll, including clamp to zero +++## asr r0, r0, 15 +++## min r0, r0, rb22 +++## max ra14, r0, 0 +++## +++## +++## +++## +++## nop ; ldtmu0 # loop counter increment +++## shr r0, r4, ra17 ; ldtmu0 +++## shr r1, r4, ra17 ; v8subs r0, r0, rb20 +++## add t0s, ra16, r5 ; v8subs r1, r1, rb20 +++## add ra16, ra16, rb16 ; mov t0s, ra16 +++## +++## # generate seven shifted versions +++## # interleave with scroll of vertical context +++## +++## mov r2, rb21 ; mul24 r3, r0, ra0 +++## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++## sub r0, r2, r3 +++## +++## # apply horizontal filter +++## +++## nop ; mul24 r0, r0, ra22 # last bit of context scroll, including clamp to zero +++## asr r0, r0, 15 +++## min r0, r0, rb22 +++## max ra15, r0, 0 +++ +++ +++mov r3, 0 # This signifies the amount of unrolling +++ +++:fast_loop +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++# Due to pipelining we can only skip second pipeline instructions related to the fetched pixels +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment +++shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++mov.ifz ra_y, ra_y_next ; mov rb31, r3 +++mov.ifz ra_x2_base, ra_x2_base_next ; mov r3, rb_pitch +++ +++max r2, ra_y, 0 +++min r2, r2, rb_frame_height_minus_1 ; mov r1, r4 # discard texture read +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_x_base, r2 ; v8subs r0, r0, rb20 +++add t0s, ra_x2_base, r2 +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov r2, rb21 ; mul24 r3, r0, ra0 +++sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++sub r0, r2, r3 ; mov r3, rb31 +++ +++mov ra8, ra9 +++mov ra9, ra10 +++mov ra10, ra11 +++mov ra11, ra12 +++mov ra12, ra13 +++mov ra13, ra14 +++ +++sub.setf -, r3, 8 ; mov r1, ra22 +++ +++# apply horizontal filter +++ +++brr.anyn -, r:fast_loop +++max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero +++asr r0, r0, 15 ; mov r1, ra21 +++min.setf ra15, r0, rb22 +++ +++# apply vertical filter and write to VPM +++ +++nop ; mul24 r0, ra14, rb14 +++sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++sub.ifnn r1, r1, r0 ; mov -, vw_wait +++sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++brr.anyn -, r:fast_loop +++asr r1, r1, 15 +++min r1, r1, rb22 +++max vpm, r1, 0 +++ +++# DMA out +++ +++bra -, ra31 +++mov vw_setup, rb26 # VDW: height rows, 16 8-bit units long +++mov vw_setup, rb29 +++mov vw_addr, unif # start the VDW +++ +++################################################################################ +++ +++# mc_filter_b(next_kernel, x, y, frame_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_dst) +++ +++# At this point we have already issued two pairs of texture requests for the current block +++# ra_x_base, ra_x16_base point to the current coordinates for this block +++::mc_filter_b +++mov ra31, unif +++ +++# per-channel shifts were calculated on the *previous* invocation +++ +++mov ra_xshift, ra_xshift_next +++mov ra_x2shift, ra_x2shift_next +++ +++# get base addresses and per-channel shifts for *next* invocation +++add r0, unif, elem_num # x +++add r2, r0, 8 # x+8 +++max r0, r0, 0; mov r1, unif # y +++min r0, r0, rb_frame_width_minus_1 ; mov r3,unif # frame_base +++shl ra_xshift_next, r0, 3 +++max r2, r2, 0 +++min r2, r2, rb_frame_width_minus_1 +++shl ra_x2shift_next, r2, 3 +++add r0, r0, r3 +++add r2, r2, r3 +++and rb_x_base_next, r0, ~3 +++and ra_x2_base_next, r2, ~3 +++mov ra_y_next, r1 +++ +++# set up VPM write +++mov vw_setup, rb28 +++ +++# get width,height of block +++mov r2, 16 +++mov r0, unif +++shr r1, r0, r2 # Extract width +++sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++and r0, r0, rb22 # Extract height +++add rb17, r0, 5 +++add rb18, r0, 7 +++shl r0, r0, 7 +++# r0 is currently height<<7 +++# For vr_setup we want height<<20 (so 20-7=13 additional bits) +++shl r3, r0, 13 +++shl r3, r3, 8 # Mask off top 8 bits +++shr r3, r3, 8 +++add r0, r0, r1 # Combine width and height of destination area +++shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register +++add rb26, r0, rb27 +++# In a B frame, so also set up VPM read +++add vr_setup, r3, rb28 +++ +++# get filter coefficients +++ +++mov r0, unif +++asr ra3, r0, rb23; mul24 r0, r0, ra22 +++asr ra2, r0, rb23; mul24 r0, r0, ra22 +++asr ra1, r0, rb23; mul24 r0, r0, ra22 +++asr ra0, r0, rb23; mov r0, unif +++asr ra7, r0, rb23; mul24 r0, r0, ra22 +++asr ra6, r0, rb23; mul24 r0, r0, ra22 +++asr ra5, r0, rb23; mul24 r0, r0, ra22 +++asr ra4, r0, rb23; mov r0, unif +++asr rb11, r0, rb23; mul24 r0, r0, ra22 +++asr rb10, r0, rb23; mul24 r0, r0, ra22 +++asr rb9, r0, rb23; mul24 r0, r0, ra22 +++asr rb8, r0, rb23; mov r0, unif +++asr rb15, r0, rb23; mul24 r0, r0, ra22 +++asr rb14, r0, rb23; mul24 r0, r0, ra22 +++asr rb13, r0, rb23; mul24 r0, r0, ra22 +++asr rb12, r0, rb23 +++ +++# r2 is elem_num +++# r3 is loop counter +++ +++mov r5rep, -8 +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++mov r3, 0 +++ +++:bloop +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment +++shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++ +++max r2, ra_y, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_x2_base, r2 +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++mov r2, rb21 ; mul24 r3, r0, ra0 +++sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++sub r0, r2, r3 +++ +++mov r3, rb31 +++ +++mov ra8, ra9 +++mov ra9, ra10 +++mov ra10, ra11 +++mov ra11, ra12 +++mov ra12, ra13 +++mov ra13, ra14 +++ +++sub.setf -, r3, 8 ; mov r1, ra22 +++ +++# apply horizontal filter +++brr.anyn -, r:bloop +++max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero +++asr r0, r0, 15 ; mov r1, ra21 +++min.setf ra15, r0, rb22 +++ +++# apply vertical filter and write to VPM +++ +++nop ; mul24 r0, ra14, rb14 +++sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++sub.ifnn r1, r1, r0 ; mov -, vw_wait +++sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++asr r1, r1, 15 ; mov -, vr_wait +++min r1, r1, rb22 +++add r0, vpm, 1 # Blend in previous VPM contents at this location +++brr.anyn -, r:bloop +++max r1, r1, 0 +++add r1, r1, r0 +++shr vpm, r1, 1 +++ +++# DMA out +++ +++bra -, ra31 +++mov vw_setup, rb26 # VDW: height rows, 16 8-bit units long +++mov vw_setup, rb29 +++mov vw_addr, unif # start the VDW +++ +++################################################################################ +++ +++# mc_filter_honly(next_kernel, x, y, frame_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_dst) +++# This filter only does horizontal filtering. +++# It is assumed that the region to fetch does not include extra rows above. +++ +++# At this point we have already issued two pairs of texture requests for the current block +++# ra_x_base, ra_x16_base point to the current coordinates for this block +++::mc_filter_honly +++mov ra31, unif +++ +++# per-channel shifts were calculated on the *previous* invocation +++ +++mov ra_xshift, ra_xshift_next +++mov ra_x2shift, ra_x2shift_next +++ +++# get base addresses and per-channel shifts for *next* invocation +++add r0, unif, elem_num # x +++add r2, r0, 8 # x+8 +++max r0, r0, 0; mov r1, unif # y +++min r0, r0, rb_frame_width_minus_1 ; mov r3,unif # frame_base +++shl ra_xshift_next, r0, 3 +++max r2, r2, 0 +++min r2, r2, rb_frame_width_minus_1 +++shl ra_x2shift_next, r2, 3 +++add r0, r0, r3 +++add r2, r2, r3 +++and rb_x_base_next, r0, ~3 +++and ra_x2_base_next, r2, ~3 +++mov ra_y_next, r1 +++ +++# set up VPM write +++mov vw_setup, rb28 +++ +++# get width,height of block +++mov r2, 16 +++mov r0, unif +++shr r1, r0, r2 # Extract width +++sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++and r0, r0, rb22 # Extract height +++add rb17, r0, -2 # Pipelining means we move data across 2 iterations early +++shl r0, r0, 7 ; mov rb18,r0 +++add r0, r0, r1 # Combine width and height of destination area +++shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register +++add rb26, r0, rb27 +++ +++# get filter coefficients +++ +++mov r0, unif +++asr ra3, r0, rb23; mul24 r0, r0, ra22 +++asr ra2, r0, rb23; mul24 r0, r0, ra22 +++asr ra1, r0, rb23; mul24 r0, r0, ra22 +++asr ra0, r0, rb23; mov r0, unif +++asr ra7, r0, rb23; mul24 r0, r0, ra22 +++asr ra6, r0, rb23; mul24 r0, r0, ra22 +++asr ra5, r0, rb23; mul24 r0, r0, ra22 +++asr ra4, r0, rb23; mov r0, unif +++mov r0, unif +++ +++# r2 is elem_num +++# r3 is loop counter +++mov r5rep, -8 +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] # delay slot 3 +++mov r3, 0 +++ +++:loop_honly +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment +++shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++ +++max r2, ra_y, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_x2_base, r2 +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++mov r2, rb21 ; mul24 r3, r0, ra0 +++sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++sub r0, r2, r3 ; mov r3, rb31 +++ +++sub.setf -, r3, rb18 ; mov r1, ra22 +++ +++mov -, vw_wait ; mul24 r0, r0, r1 +++brr.anyn -, r:loop_honly +++asr r0, r0, 15 # delay 1 +++min r0, r0, rb22 # delay 2 +++max vpm, r0, 0 # delay 3 +++ +++# DMA out +++bra -, ra31 +++mov vw_setup, rb26 # VDW: height rows, 16 8-bit units long +++mov vw_setup, rb29 +++mov vw_addr, unif # start the VDW +++ +++ +++################################################################################ +++ +++# mc_exit() +++ +++::mc_exit +++mov -, vw_wait # wait on the VDW +++ +++mov -,srel(0) +++ +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ +++nop ; nop ; thrend +++nop ; nop # delay slot 1 +++nop ; nop # delay slot 2 +++ +++::mc_exit1 +++mov -, vw_wait # wait on the VDW +++ +++#mov -,srel(1) +++ +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ +++nop ; nop ; thrend +++mov interrupt, 1; nop # delay slot 1 +++nop ; nop # delay slot 2 +++ +++# mc_interrupt_exit() +++::mc_interrupt_exit +++mov -, vw_wait # wait on the VDW +++ +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ +++mov -,sacq(0) # 1 +++mov -,sacq(0) # 2 +++mov -,sacq(0) # 3 +++mov -,sacq(0) # 4 +++mov -,sacq(0) # 5 +++mov -,sacq(0) # 6 +++mov -,sacq(0) # 7 +++mov -,sacq(0) # 8 +++mov -,sacq(0) # 9 +++mov -,sacq(0) # 10 +++mov -,sacq(0) # 11 +++ +++nop ; nop ; thrend +++mov interrupt, 1; nop # delay slot 1 +++nop ; nop # delay slot 2 +++ +++# mc_interrupt_exit4() +++::mc_interrupt_exit4 +++mov -, vw_wait # wait on the VDW +++ +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ +++mov -,sacq(0) # 1 +++mov -,sacq(0) # 2 +++mov -,sacq(0) # 3 +++ +++nop ; nop ; thrend +++mov interrupt, 1; nop # delay slot 1 +++nop ; nop # delay slot 2 +++ +++# mc_interrupt_exit8() +++::mc_interrupt_exit8 +++mov -, vw_wait # wait on the VDW +++ +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ +++mov -,sacq(0) # 1 +++mov -,sacq(0) # 2 +++mov -,sacq(0) # 3 +++mov -,sacq(0) # 4 +++mov -,sacq(0) # 5 +++mov -,sacq(0) # 6 +++mov -,sacq(0) # 7 +++ +++nop ; nop ; thrend +++mov interrupt, 1; nop # delay slot 1 +++nop ; nop # delay slot 2 +++ +++################################################################################ +++# mc_setup_uv(next_kernel, x, y, ref_u_base, ref_v_base, frame_width, frame_height, pitch, dst_pitch, pad0, pad1, pad2) +++::mc_setup_uv +++ +++# Read starting kernel +++mov ra31, unif +++ +++# Load first request location +++add ra_x_base, unif, elem_num # Store x +++mov ra_y, unif # Store y +++mov ra_x2_base, unif # Store frame u base +++nop +++sub ra_u2v_ref_offset, unif, ra_x2_base # Store offset to add to move from u to v in reference frame +++ +++# Read image dimensions +++sub rb25,unif,1 +++sub rb30,unif,1 +++ +++# get source pitch +++mov rb16, unif +++ +++# get destination pitch +++mov r0, unif +++mov r1, vdw_setup_1(0) +++add rb24, r1, r0 +++ +++# load constants +++ +++mov ra20, 1 +++mov ra21, 64 +++mov ra22, 256 +++mov ra23, 8 +++ +++mov rb20, 0xffffff00 +++mov rb21, 64 +++mov rb22, 255 +++mov rb23, 24 +++ +++# touch vertical context to keep simulator happy +++ +++mov ra8, 0 +++mov ra9, 0 +++mov ra10, 0 +++mov ra11, 0 +++mov ra12, 0 +++mov ra13, 0 +++mov ra14, 0 +++mov ra15, 0 +++ +++# Compute part of VPM to use for DMA output +++mov r2, qpu_num +++and r2, r2, 15 +++mov r1, r2 +++asr r1, r1, 2 +++shl r1, r1, 6 +++mov r0, r2 +++and r0, r0, 3 +++add r0, r0, r1 +++mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later +++shl r0, r0, 5 +++add rb27, r0, r1 +++ +++# Compute part of VPM to save data into +++mov r2, qpu_num +++and r2, r2, 15 +++mov r1, r2 +++asr r1, r1, 2 +++shl r1, r1, 6 +++mov r0, r2 +++and r0, r0, 3 +++add r0, r0, r1 +++mov r1, vpm_setup(0, 4, h8p(0, 0)) +++add rb28, r0, r1 +++ +++# Compute base address for first and second access +++mov r0, ra_x_base # Load x +++max r0, r0, 0; mov r1, ra_y # Load y +++min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base # Load the frame base +++shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset +++add ra_y, r1, 1 +++add r0, r0, r3 +++and r0, r0, ~3 +++max r1, r1, 0 ; mov ra_x_base, r0 # y +++min r1, r1, rb_frame_height_minus_1 +++# submit texture requests for first line +++add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++add t0s, r0, r1 ; mov ra_x2_base, r2 +++add t0s, r2, r1 +++ +++# Dump padding words +++mov r0, unif +++mov r0, unif +++mov r0, unif +++ +++# submit texture requests for second line +++max r1, ra_y, 0 +++min r1, r1, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 +++bra -, ra31 +++nop ; mul24 r1, r1, rb_pitch +++add t0s, r1, ra_x_base +++add t0s, r1, ra_x2_base +++ +++ +++ +++################################################################################ +++ +++::mc_filter_uv_b +++mov ra31, unif +++ +++# per-channel shifts were calculated on the *previous* invocation +++ +++mov ra_xshift, ra_xshift_next +++ +++# get base addresses and per-channel shifts for *next* invocation +++add r0, unif, elem_num # x +++max r0, r0, 0; mov r1, unif # y +++min r0, r0, rb_frame_width_minus_1 ; mov r3, unif # frame_base +++shl ra_xshift_next, r0, 3 +++sub r2, unif, r3 # compute offset from frame base u to frame base v +++add r0, r0, r3 +++and rb_x_base_next, r0, ~3 +++mov ra_y_next, r1 +++add ra_x2_base_next, rb_x_base_next, r2 +++ +++# set up VPM write +++mov vw_setup, rb28 +++ +++# get width,height of block +++mov r2, 16 +++mov r0, unif +++shr r1, r0, r2 # Extract width +++sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++and r0, r0, rb22 # Extract height +++add rb17, r0, 5 +++add rb18, r0, 7 +++shl r0, r0, 7 +++ +++# r0 is currently height<<7 +++# For vr_setup we want height<<20 (so 20-7=13 additional bits) +++shl r3, r0, 13 +++shl r3, r3, 8 # Mask off top 8 bits +++shr r3, r3, 8 +++ +++add r0, r0, r1 # Combine width and height of destination area +++shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register +++add rb26, r0, rb27 +++ +++# In a B frame, so also set up VPM read +++add vr_setup, r3, rb28 +++ +++sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code +++ +++# get filter coefficients +++ +++mov r0, unif +++asr ra3, r0, rb23; mul24 r0, r0, ra22 +++asr ra2, r0, rb23; mul24 r0, r0, ra22 +++asr ra1, r0, rb23; mul24 r0, r0, ra22 +++asr ra0, r0, rb23; mov r0, unif +++asr ra7, r0, rb23; mul24 r0, r0, ra22 +++asr ra6, r0, rb23; mul24 r0, r0, ra22 +++asr ra5, r0, rb23; mul24 r0, r0, ra22 +++asr ra4, r0, rb23; mov r0, unif +++asr rb11, r0, rb23; mul24 r0, r0, ra22 +++asr rb10, r0, rb23; mul24 r0, r0, ra22 +++asr rb9, r0, rb23; mul24 r0, r0, ra22 +++asr rb8, r0, rb23; mov r0, unif +++asr rb15, r0, rb23; mul24 r0, r0, ra22 +++asr rb14, r0, rb23; mul24 r0, r0, ra22 +++asr rb13, r0, rb23; mul24 r0, r0, ra22 +++asr rb12, r0, rb23 +++ +++# r2 is elem_num +++# r3 is loop counter +++ +++mov r5rep, -8 +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++mov r3, 0 +++ +++:uvloop_b +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment +++shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++ +++max r2, ra_y, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_x2_base, r2 +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++mov r2, rb21 ; mul24 r3, r0, ra0 +++nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 +++sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++sub r0, r2, r3 +++ +++mov r3, rb31 +++ +++mov ra8, ra9 +++mov ra9, ra10 +++mov ra10, ra11 +++mov ra11, ra12 +++mov ra12, ra13 +++mov ra13, ra14 +++ +++sub.setf -, r3, 8 ; mov r1, ra22 +++ +++# apply horizontal filter +++brr.anyn -, r:uvloop_b +++max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero +++asr r0, r0, 15 ; mov r1, ra21 +++min.setf ra15, r0, rb22 +++ +++# apply vertical filter and write to VPM +++ +++nop ; mul24 r0, ra14, rb14 +++sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++sub.ifnn r1, r1, r0 ; mov -, vw_wait +++sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++asr r1, r1, 15 +++min r1, r1, rb22 +++add r0, vpm, 1 # Blend in previous VPM contents at this location +++brr.anyn -, r:uvloop_b +++max r1, r1, 0 +++add r1, r1, r0 +++shr vpm, r1, 1 +++ +++ +++# DMA out for U +++ +++mov vw_setup, rb26 # VDW setup 0 +++mov vw_setup, rb29 # Stride +++mov vw_addr, unif # start the VDW +++ +++# DMA out for V +++# We need to wait for the U to complete first, but have nothing useful to compute while we wait. +++# Could potentially push this write into the start of the next pipeline stage. +++mov r0, 16 +++mov -, vw_wait +++ +++bra -, ra31 +++add vw_setup, rb26, r0 # VDW setup 0 +++mov vw_setup, rb29 # Stride +++mov vw_addr, unif # start the VDW +++ +++::mc_end ++diff --git a/libavcodec/rpi_user_vcsm.h b/libavcodec/rpi_user_vcsm.h ++new file mode 100644 ++index 0000000..fbebbbe ++--- /dev/null +++++ b/libavcodec/rpi_user_vcsm.h ++@@ -0,0 +1,425 @@ +++/* +++Copyright (c) 2012, Broadcom Europe Ltd +++All rights reserved. +++ +++Redistribution and use in source and binary forms, with or without +++modification, are permitted provided that the following conditions are met: +++ * Redistributions of source code must retain the above copyright +++ notice, this list of conditions and the following disclaimer. +++ * Redistributions in binary form must reproduce the above copyright +++ notice, this list of conditions and the following disclaimer in the +++ documentation and/or other materials provided with the distribution. +++ * Neither the name of the copyright holder nor the +++ names of its contributors may be used to endorse or promote products +++ derived from this software without specific prior written permission. +++ +++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY +++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +++*/ +++ +++#ifndef __USER_VCSM__H__INCLUDED__ +++#define __USER_VCSM__H__INCLUDED__ +++ +++/* VideoCore Shared Memory - user interface library. +++** +++** This library provides all the necessary abstraction for any application to +++** make use of the shared memory service which is distributed accross a kernel +++** driver and a videocore service. +++** +++** It is an application design decision to choose or not to use this service. +++** +++** The logical flow of operations that a user application needs to follow when +++** using this service is: +++** +++** 1) Initialize the service. +++** 2) Allocate shared memory blocks. +++** 3) Start using the allocated blocks. +++** - In order to gain ownership on a block, lock the allocated block, +++** locking a block returns a valid address that the user application +++** can access. +++** - When finished with using the block for the current execution cycle +++** or function, and so when giving up the ownership, unlock the block. +++** 4) A block can be locked/unlocked as many times required - within or outside +++** of - a specific execution context. +++** 5) To completely release an allocated block, free it. +++** 6) If the service is no longer required, terminate it. +++** +++** +++** Some generic considerations: +++ +++** Allocating memory blocks. +++** +++** Memory blocks can be allocated in different manners depending on the cache +++** behavior desired. A given block can either be: +++ +++** - Allocated in a non cached fashion all the way through host and videocore. +++** - Allocated in a cached fashion on host OR videocore. +++** - Allocated in a cached fashion on host AND videocore. +++** +++** It is an application decision to determine how to allocate a block. Evidently +++** if the application will be doing substantial read/write accesses to a given block, +++** it is recommended to allocate the block at least in a 'host cached' fashion for +++** better results. +++** +++** +++** Locking memory blocks. +++** +++** When the memory block has been allocated in a host cached fashion, locking the +++** memory block (and so taking ownership of it) will trigger a cache invalidation. +++** +++** For the above reason and when using host cached allocation, it is important that +++** an application properly implements the lock/unlock mechanism to ensure cache will +++** stay coherent, otherwise there is no guarantee it will at all be. +++** +++** It is possible to dynamically change the host cache behavior (ie cached or non +++** cached) of a given allocation without needing to free and re-allocate the block. +++** This feature can be useful for such application which requires access to the block +++** only at certain times and not otherwise. By changing the cache behavior dynamically +++** the application can optimize performances for a given duration of use. +++** Such dynamic cache behavior remapping only applies to host cache and not videocore +++** cache. If one requires to change the videocore cache behavior, then a new block +++** must be created to replace the old one. +++** +++** On successful locking, a valid pointer is returned that the application can use +++** to access to data inside the block. There is no guarantee that the pointer will +++** stay valid following the unlock action corresponding to this lock. +++** +++** +++** Unocking memory blocks. +++** +++** When the memory block has been allocated in a host cached fashion, unlocking the +++** memory block (and so forgiving its ownership) will trigger a cache flush unless +++** explicitely asked not to flush the cache for performances reasons. +++** +++** For the above reason and when using host cached allocation, it is important that +++** an application properly implements the lock/unlock mechanism to ensure cache will +++** stay coherent, otherwise there is no guarantee it will at all be. +++** +++** +++** A complete API is defined below. +++*/ +++ +++#ifdef __cplusplus +++extern "C" +++{ +++#endif +++ +++/* Different status that can be dumped. +++*/ +++typedef enum +++{ +++ VCSM_STATUS_VC_WALK_ALLOC = 0, // Walks *all* the allocation on videocore. +++ // Result of the walk is seen in the videocore +++ // log. +++ VCSM_STATUS_HOST_WALK_MAP, // Walks the *full* mapping allocation on host +++ // driver (ie for all processes). Result of +++ // the walk is seen in the kernel log. +++ VCSM_STATUS_HOST_WALK_PID_MAP, // Walks the per process mapping allocation on host +++ // driver (for current process). Result of +++ // the walk is seen in the kernel log. +++ VCSM_STATUS_HOST_WALK_PID_ALLOC, // Walks the per process host allocation on host +++ // driver (for current process). Result of +++ // the walk is seen in the kernel log. +++ VCSM_STATUS_VC_MAP_ALL, // Equivalent to both VCSM_STATUS_VC_WALK_ALLOC and +++ // VCSM_STATUS_HOST_WALK_MAP. +++ // +++ VCSM_STATUS_NONE, // Must be last - invalid. +++ +++} VCSM_STATUS_T; +++ +++/* Different kind of cache behavior. +++*/ +++typedef enum +++{ +++ VCSM_CACHE_TYPE_NONE = 0, // No caching applies. +++ VCSM_CACHE_TYPE_HOST, // Allocation is cached on host (user space). +++ VCSM_CACHE_TYPE_VC, // Allocation is cached on videocore. +++ VCSM_CACHE_TYPE_HOST_AND_VC, // Allocation is cached on both host and videocore. +++ +++} VCSM_CACHE_TYPE_T; +++ +++/* Initialize the vcsm processing. +++** +++** Must be called once before attempting to do anything else. +++** +++** Returns 0 on success, -1 on error. +++*/ +++int vcsm_init( void ); +++ +++ +++/* Terminates the vcsm processing. +++** +++** Must be called vcsm services are no longer needed, it will +++** take care of removing any allocation under the current process +++** control if deemed necessary. +++*/ +++void vcsm_exit( void ); +++ +++ +++/* Queries the status of the the vcsm. +++** +++** Triggers dump of various kind of information, see the +++** different variants specified in VCSM_STATUS_T. +++** +++** Pid is optional. +++*/ +++void vcsm_status( VCSM_STATUS_T status, int pid ); +++ +++ +++/* Allocates a non-cached block of memory of size 'size' via the vcsm memory +++** allocator. +++** +++** Returns: 0 on error +++** a non-zero opaque handle on success. +++** +++** On success, the user must invoke vcsm_lock with the returned opaque +++** handle to gain access to the memory associated with the opaque handle. +++** When finished using the memory, the user calls vcsm_unlock_xx (see those +++** function definition for more details on the one that can be used). +++** +++** A well behaved application should make every attempt to lock/unlock +++** only for the duration it needs to access the memory data associated with +++** the opaque handle. +++*/ +++unsigned int vcsm_malloc( unsigned int size, char *name ); +++ +++ +++/* Allocates a cached block of memory of size 'size' via the vcsm memory +++** allocator, the type of caching requested is passed as argument of the +++** function call. +++** +++** Returns: 0 on error +++** a non-zero opaque handle on success. +++** +++** On success, the user must invoke vcsm_lock with the returned opaque +++** handle to gain access to the memory associated with the opaque handle. +++** When finished using the memory, the user calls vcsm_unlock_xx (see those +++** function definition for more details on the one that can be used). +++** +++** A well behaved application should make every attempt to lock/unlock +++** only for the duration it needs to access the memory data associated with +++** the opaque handle. +++*/ +++unsigned int vcsm_malloc_cache( unsigned int size, VCSM_CACHE_TYPE_T cache, char *name ); +++ +++ +++/* Shares an allocated block of memory via the vcsm memory allocator. +++** +++** Returns: 0 on error +++** a non-zero opaque handle on success. +++** +++** On success, the user must invoke vcsm_lock with the returned opaque +++** handle to gain access to the memory associated with the opaque handle. +++** When finished using the memory, the user calls vcsm_unlock_xx (see those +++** function definition for more details on the one that can be used). +++** +++** A well behaved application should make every attempt to lock/unlock +++** only for the duration it needs to access the memory data associated with +++** the opaque handle. +++*/ +++unsigned int vcsm_malloc_share( unsigned int handle ); +++ +++ +++/* Resizes a block of memory allocated previously by vcsm_alloc. +++** +++** Returns: 0 on success +++** -errno on error. +++** +++** The handle must be unlocked by user prior to attempting any +++** resize action. +++** +++** On error, the original size allocated against the handle +++** remains available the same way it would be following a +++** successful vcsm_malloc. +++*/ +++int vcsm_resize( unsigned int handle, unsigned int new_size ); +++ +++ +++/* Frees a block of memory that was successfully allocated by +++** a prior call the vcms_alloc. +++** +++** The handle should be considered invalid upon return from this +++** call. +++** +++** Whether any memory is actually freed up or not as the result of +++** this call will depends on many factors, if all goes well it will +++** be freed. If something goes wrong, the memory will likely end up +++** being freed up as part of the vcsm_exit process. In the end the +++** memory is guaranteed to be freed one way or another. +++*/ +++void vcsm_free( unsigned int handle ); +++ +++ +++/* Retrieves a videocore opaque handle from a mapped user address +++** pointer. The videocore handle will correspond to the actual +++** memory mapped in videocore. +++** +++** Returns: 0 on error +++** a non-zero opaque handle on success. +++** +++** Note: the videocore opaque handle is distinct from the user +++** opaque handle (allocated via vcsm_malloc) and it is only +++** significant for such application which knows what to do +++** with it, for the others it is just a number with little +++** use since nothing can be done with it (in particular +++** for safety reason it cannot be used to map anything). +++*/ +++unsigned int vcsm_vc_hdl_from_ptr( void *usr_ptr ); +++ +++ +++/* Retrieves a videocore opaque handle from a opaque handle +++** pointer. The videocore handle will correspond to the actual +++** memory mapped in videocore. +++** +++** Returns: 0 on error +++** a non-zero opaque handle on success. +++** +++** Note: the videocore opaque handle is distinct from the user +++** opaque handle (allocated via vcsm_malloc) and it is only +++** significant for such application which knows what to do +++** with it, for the others it is just a number with little +++** use since nothing can be done with it (in particular +++** for safety reason it cannot be used to map anything). +++*/ +++unsigned int vcsm_vc_hdl_from_hdl( unsigned int handle ); +++ +++ +++/* Retrieves a user opaque handle from a mapped user address +++** pointer. +++** +++** Returns: 0 on error +++** a non-zero opaque handle on success. +++*/ +++unsigned int vcsm_usr_handle( void *usr_ptr ); +++ +++ +++/* Retrieves a mapped user address from an opaque user +++** handle. +++** +++** Returns: 0 on error +++** a non-zero address on success. +++** +++** On success, the address corresponds to the pointer +++** which can access the data allocated via the vcsm_malloc +++** call. +++*/ +++void *vcsm_usr_address( unsigned int handle ); +++ +++ +++/* Locks the memory associated with this opaque handle. +++** +++** Returns: NULL on error +++** a valid pointer on success. +++** +++** A user MUST lock the handle received from vcsm_malloc +++** in order to be able to use the memory associated with it. +++** +++** On success, the pointer returned is only valid within +++** the lock content (ie until a corresponding vcsm_unlock_xx +++** is invoked). +++*/ +++void *vcsm_lock( unsigned int handle ); +++ +++ +++/* Locks the memory associated with this opaque handle. The lock +++** also gives a chance to update the *host* cache behavior of the +++** allocated buffer if so desired. The *videocore* cache behavior +++** of the allocated buffer cannot be changed by this call and such +++** attempt will be ignored. +++** +++** The system will attempt to honour the cache_update mode request, +++** the cache_result mode will provide the final answer on which cache +++** mode is really in use. Failing to change the cache mode will not +++** result in a failure to lock the buffer as it is an application +++** decision to choose what to do if (cache_result != cache_update) +++** +++** The value returned in cache_result can only be considered valid if +++** the returned pointer is non NULL. The cache_result pointer may be +++** NULL if the application does not care about the actual outcome of +++** its action with regards to the cache behavior change. +++** +++** Returns: NULL on error +++** a valid pointer on success. +++** +++** A user MUST lock the handle received from vcsm_malloc +++** in order to be able to use the memory associated with it. +++** +++** On success, the pointer returned is only valid within +++** the lock content (ie until a corresponding vcsm_unlock_xx +++** is invoked). +++*/ +++void *vcsm_lock_cache( unsigned int handle, +++ VCSM_CACHE_TYPE_T cache_update, +++ VCSM_CACHE_TYPE_T *cache_result ); +++ +++ +++/* Unlocks the memory associated with this user mapped address. +++** +++** Returns: 0 on success +++** -errno on error. +++** +++** After unlocking a mapped address, the user should no longer +++** attempt to reference it. +++*/ +++int vcsm_unlock_ptr( void *usr_ptr ); +++ +++ +++/* Unlocks the memory associated with this user mapped address. +++** Apply special processing that would override the otherwise +++** default behavior. +++** +++** If 'cache_no_flush' is specified: +++** Do not flush cache as the result of the unlock (if cache +++** flush was otherwise applicable in this case). +++** +++** Returns: 0 on success +++** -errno on error. +++** +++** After unlocking a mapped address, the user should no longer +++** attempt to reference it. +++*/ +++int vcsm_unlock_ptr_sp( void *usr_ptr, int cache_no_flush ); +++ +++ +++/* Unlocks the memory associated with this user opaque handle. +++** +++** Returns: 0 on success +++** -errno on error. +++** +++** After unlocking an opaque handle, the user should no longer +++** attempt to reference the mapped addressed once associated +++** with it. +++*/ +++int vcsm_unlock_hdl( unsigned int handle ); +++ +++ +++/* Unlocks the memory associated with this user opaque handle. +++** Apply special processing that would override the otherwise +++** default behavior. +++** +++** If 'cache_no_flush' is specified: +++** Do not flush cache as the result of the unlock (if cache +++** flush was otherwise applicable in this case). +++** +++** Returns: 0 on success +++** -errno on error. +++** +++** After unlocking an opaque handle, the user should no longer +++** attempt to reference the mapped addressed once associated +++** with it. +++*/ +++int vcsm_unlock_hdl_sp( unsigned int handle, int cache_no_flush ); +++ +++#ifdef __cplusplus +++} +++#endif +++ +++#endif /* __USER_VCSM__H__INCLUDED__ */ ++-- ++2.7.4 ++ ++ ++From 6cfa5910be47865aaaf58c185587189c332765a6 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@argondesign.com> ++Date: Sat, 2 May 2015 21:15:37 +0100 ++Subject: [PATCH 04/68] First working version with uncached memory ++ ++--- ++ libavcodec/hevc.c | 61 +++++- ++ libavcodec/hevc.h | 12 +- ++ libavcodec/hevc_cabac.c | 39 +++- ++ libavcodec/hevc_filter.c | 16 ++ ++ libavcodec/hevcpred_template.c | 6 + ++ libavcodec/rpi_hevc_transform.h | 422 +++++++++++++++++++++++++++++++++++++++- ++ libavcodec/rpi_hevc_transform.s | 153 +++++++++++++-- ++ libavcodec/rpi_qpu.c | 72 +++++++ ++ libavcodec/rpi_qpu.h | 1 + ++ 9 files changed, 736 insertions(+), 46 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index ab55df1..94ff709 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -45,6 +45,8 @@ ++ #include "rpi_qpu.h" ++ #endif ++ +++// #define DISABLE_MC +++ ++ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 }; ++ ++ /** ++@@ -1079,11 +1081,15 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ for (i = 0; i < (size * size); i++) { ++ coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3); ++ } +++ printf("Cross component not supported\n"); // TODO +++ exit(-1); ++ s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride); ++ } ++ } ++ ++ if (lc->tu.cross_pf) { +++ printf("Cross component not supported\n"); // TODO +++ exit(-1); ++ hls_cross_component_pred(s, 1); ++ } ++ for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) { ++@@ -1112,6 +1118,8 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ for (i = 0; i < (size * size); i++) { ++ coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3); ++ } +++ printf("Cross component not supported\n"); // TODO +++ exit(-1); ++ s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride); ++ } ++ } ++@@ -1409,6 +1417,10 @@ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++ int idx = ff_hevc_pel_weight[block_w]; ++ +++#ifdef DISABLE_MC +++ return; +++#endif +++ ++ x_off += mv->x >> 2; ++ y_off += mv->y >> 2; ++ src += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift)); ++@@ -1479,6 +1491,10 @@ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ uint8_t *src0 = ref0->data[0] + y_off0 * src0stride + (int)((unsigned)x_off0 << s->ps.sps->pixel_shift); ++ uint8_t *src1 = ref1->data[0] + y_off1 * src1stride + (int)((unsigned)x_off1 << s->ps.sps->pixel_shift); ++ +++#ifdef DISABLE_MC +++ return; +++#endif +++ ++ if (x_off0 < QPEL_EXTRA_BEFORE || y_off0 < QPEL_EXTRA_AFTER || ++ x_off0 >= pic_width - block_w - QPEL_EXTRA_AFTER || ++ y_off0 >= pic_height - block_h - QPEL_EXTRA_AFTER) { ++@@ -1564,6 +1580,10 @@ static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0, ++ intptr_t _mx = mx << (1 - hshift); ++ intptr_t _my = my << (1 - vshift); ++ +++#ifdef DISABLE_MC +++ return; +++#endif +++ ++ x_off += mv->x >> (2 + hshift); ++ y_off += mv->y >> (2 + vshift); ++ src0 += y_off * srcstride + (x_off * (1 << s->ps.sps->pixel_shift)); ++@@ -1628,6 +1648,10 @@ static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVF ++ int hshift = s->ps.sps->hshift[1]; ++ int vshift = s->ps.sps->vshift[1]; ++ +++#ifdef DISABLE_MC +++ return; +++#endif +++ ++ intptr_t mx0 = av_mod_uintp2(mv0->x, 2 + hshift); ++ intptr_t my0 = av_mod_uintp2(mv0->y, 2 + vshift); ++ intptr_t mx1 = av_mod_uintp2(mv1->x, 2 + hshift); ++@@ -2367,6 +2391,22 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb, ++ } ++ ++ #ifdef RPI +++static void rpi_execute_transform(HEVCContext *s) +++{ +++ int i=2; +++ //int j; +++ //int16_t *coeffs = s->coeffs_buf_arm[i]; +++ //for(j=s->num_coeffs[i]; j > 0; j-= 16*16, coeffs+=16*16) { +++ // s->hevcdsp.idct[4-2](coeffs, 16); +++ //} +++ +++ //gpu_cache_flush(&s->coeffs_buf[i]); +++ vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf[i].vc, s->num_coeffs[i] >> 8, 0, 0, 0); +++ +++ for(i=0;i<4;i++) +++ s->num_coeffs[i] = 0; +++} +++ ++ static void rpi_execute_pred_cmds(HEVCContext *s) ++ { ++ int i; ++@@ -2387,7 +2427,6 @@ static void rpi_execute_pred_cmds(HEVCContext *s) ++ } ++ } ++ s->num_pred_cmds = 0; ++- s->num_coeffs = 0; ++ } ++ #endif ++ ++@@ -2434,7 +2473,8 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ ++ more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); ++ #ifdef RPI ++- if (x_ctb + ctb_size >= s->ps.sps->width) { +++ if (1 || x_ctb + ctb_size >= s->ps.sps->width) { // TODO watch out for deblocking! +++ rpi_execute_transform(s); ++ rpi_execute_pred_cmds(s); ++ } ++ #endif ++@@ -3179,7 +3219,9 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ av_freep(&s->unif_mv_cmds); ++ av_freep(&s->unif_xfm_cmds); ++ av_freep(&s->univ_pred_cmds); ++- av_freep(&s->coeffs_buf); +++ for(i = 0; i < 4; i++) { +++ gpu_free(&s->coeffs_buf[i]); +++ } ++ #endif ++ ++ for (i = 0; i < 3; i++) { ++@@ -3246,13 +3288,16 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ s->univ_pred_cmds = av_mallocz(sizeof(HEVCPredCmd)*RPI_MAX_PRED_CMDS); ++ if (!s->univ_pred_cmds) ++ goto fail; ++- s->coeffs_buf = av_mallocz(sizeof(int16_t)*RPI_MAX_XFM_CMDS*16); ++- if (!s->coeffs_buf) ++- goto fail; +++ for(i = 0; i < 4; i++) { +++ gpu_malloc_uncached(sizeof(int16_t)*RPI_MAX_XFM_CMDS*16, &s->coeffs_buf[i]); // TODO slim this down and share across sizes +++ s->coeffs_buf_arm[i] = (int16_t*) s->coeffs_buf[i].arm; +++ if (!s->coeffs_buf_arm[i]) +++ goto fail; +++ } ++ s->enable_rpi = 0; ++ ++ // A little test program ++- { +++ /*{ ++ GPU_MEM_PTR_T p; ++ int err = gpu_malloc_cached(16, &p); ++ short *q = (short *)p.arm; ++@@ -3273,7 +3318,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ printf(")\n"); ++ gpu_free(&p); ++ goto fail; // Early out ++- } +++ }*/ ++ ++ #endif ++ ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 7a1c35f..4167985 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -40,6 +40,11 @@ ++ #include "thread.h" ++ #include "videodsp.h" ++ +++// define RPI to split the CABAC/prediction/transform into separate stages +++#ifdef RPI +++#include "rpi_qpu.h" +++#endif +++ ++ #define MAX_DPB_SIZE 16 // A.4.1 ++ #define MAX_REFS 16 ++ ++@@ -856,11 +861,12 @@ typedef struct HEVCContext { ++ HEVCMvCmd *unif_mv_cmds; ++ HEVCXfmCmd *unif_xfm_cmds; ++ HEVCPredCmd *univ_pred_cmds; ++- int16_t *coeffs_buf; ++- int num_mv_cmds; +++ GPU_MEM_PTR_T coeffs_buf[4]; +++ int16_t *coeffs_buf_arm[4]; +++ int num_coeffs[4]; ++ int num_xfm_cmds; +++ int num_mv_cmds; ++ int num_pred_cmds; ++- int num_coeffs; ++ #endif ++ ++ uint8_t *cabac_state; ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index 4e97f06..d1cba86 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1031,6 +1031,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ int vshift = s->ps.sps->vshift[c_idx]; ++ uint8_t *dst = &s->frame->data[c_idx][(y0 >> vshift) * stride + ++ ((x0 >> hshift) << s->ps.sps->pixel_shift)]; +++ int use_vpu = s->enable_rpi && !lc->cu.cu_transquant_bypass_flag && !transform_skip_flag && !lc->tu.cross_pf && log2_trafo_size==4; ++ int16_t *coeffs = (int16_t*)(c_idx ? lc->edge_emu_buffer2 : lc->edge_emu_buffer); ++ uint8_t significant_coeff_group_flag[8][8] = {{0}}; ++ int explicit_rdpcm_flag = 0; ++@@ -1044,6 +1045,18 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ uint8_t dc_scale; ++ int pred_mode_intra = (c_idx == 0) ? lc->tu.intra_pred_mode : ++ lc->tu.intra_pred_mode_c; +++#ifdef RPI +++ if (s->enable_rpi) { +++ int n = trafo_size * trafo_size; +++ if (use_vpu) { +++ coeffs = s->coeffs_buf_arm[log2_trafo_size - 2] + s->num_coeffs[log2_trafo_size - 2]; +++ s->num_coeffs[log2_trafo_size - 2] += n; +++ } else { +++ coeffs = s->coeffs_buf_arm[0] + s->num_coeffs[0]; +++ s->num_coeffs[0] += n; +++ } +++ } +++#endif ++ ++ memset(coeffs, 0, trafo_size * trafo_size * sizeof(int16_t)); ++ ++@@ -1488,6 +1501,24 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ } else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 && log2_trafo_size == 2) { ++ s->hevcdsp.idct_4x4_luma(coeffs); ++ } else { +++#ifdef RPI +++ if (!use_vpu) { +++ int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y); +++ if (max_xy == 0) +++ s->hevcdsp.idct_dc[log2_trafo_size-2](coeffs); +++ else { +++ int col_limit = last_significant_coeff_x + last_significant_coeff_y + 4; +++ if (max_xy < 4) +++ col_limit = FFMIN(4, col_limit); +++ else if (max_xy < 8) +++ col_limit = FFMIN(8, col_limit); +++ else if (max_xy < 12) +++ col_limit = FFMIN(24, col_limit); +++ +++ s->hevcdsp.idct[log2_trafo_size-2](coeffs, col_limit); +++ } +++ } +++#else ++ int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y); ++ if (max_xy == 0) ++ s->hevcdsp.idct_dc[log2_trafo_size-2](coeffs); ++@@ -1501,6 +1532,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ col_limit = FFMIN(24, col_limit); ++ s->hevcdsp.idct[log2_trafo_size-2](coeffs, col_limit); ++ } +++#endif ++ } ++ } ++ if (lc->tu.cross_pf) { ++@@ -1512,14 +1544,11 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ } ++ #ifdef RPI ++ if (s->enable_rpi) { ++- int16_t *c = s->coeffs_buf + s->num_coeffs; ++- int n = trafo_size * trafo_size; ++ HEVCPredCmd *cmd = s->univ_pred_cmds + s->num_pred_cmds++; ++- memcpy(c, coeffs, n * sizeof(int16_t)); // TODO change pointer earlier and we can avoid this copy ++- s->num_coeffs += n; +++ //memcpy(coeffs2, coeffs, sizeof(int16_t) * trafo_size * trafo_size); // TODO ++ cmd->type = RPI_PRED_TRANSFORM_ADD; ++ cmd->size = log2_trafo_size; ++- cmd->buf = c; +++ cmd->buf = coeffs; ++ cmd->dst = dst; ++ cmd->stride = stride; ++ return; ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 1f33b0c..e4c3da7 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -22,6 +22,10 @@ ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ +++//#define DISABLE_SAO +++//#define DISABLE_DEBLOCK +++//#define DISABLE_STRENGTHS +++ ++ #include "libavutil/common.h" ++ #include "libavutil/internal.h" ++ ++@@ -273,6 +277,10 @@ static void sao_filter_CTB(HEVCContext *s, int x, int y) ++ edges[2] = x_ctb == s->ps.sps->ctb_width - 1; ++ edges[3] = y_ctb == s->ps.sps->ctb_height - 1; ++ +++#ifdef DISABLE_SAO +++ return; +++#endif +++ ++ if (restore) { ++ if (!edges[0]) { ++ left_tile_edge = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]]; ++@@ -496,6 +504,10 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) ++ s->ps.sps->pcm.loop_filter_disable_flag) || ++ s->ps.pps->transquant_bypass_enable_flag; ++ +++#ifdef DISABLE_DEBLOCK +++ return; +++#endif +++ ++ if (x0) { ++ left_tc_offset = s->deblock[ctb - 1].tc_offset; ++ left_beta_offset = s->deblock[ctb - 1].beta_offset; ++@@ -726,6 +738,10 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, ++ int boundary_upper, boundary_left; ++ int i, j, bs; ++ +++#ifdef DISABLE_STRENGTHS +++ return; +++#endif +++ ++ boundary_upper = y0 > 0 && !(y0 & 7); ++ if (boundary_upper && ++ ((!s->sh.slice_loop_filter_across_slices_enabled_flag && ++diff --git a/libavcodec/hevcpred_template.c b/libavcodec/hevcpred_template.c ++index 6ae87cc..71c6d52 100644 ++--- a/libavcodec/hevcpred_template.c +++++ b/libavcodec/hevcpred_template.c ++@@ -20,6 +20,8 @@ ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ +++//#define DISABLE_INTRA +++ ++ #include "libavutil/pixdesc.h" ++ ++ #include "bit_depth_template.c" ++@@ -114,6 +116,10 @@ do { \ ++ int top_right_size = (FFMIN(x0 + 2 * size_in_luma_h, s->ps.sps->width) - ++ (x0 + size_in_luma_h)) >> hshift; ++ +++#ifdef DISABLE_INTRA +++ return; +++#endif +++ ++ if (s->ps.pps->constrained_intra_pred_flag == 1) { ++ int size_in_luma_pu_v = PU(size_in_luma_v); ++ int size_in_luma_pu_h = PU(size_in_luma_h); ++diff --git a/libavcodec/rpi_hevc_transform.h b/libavcodec/rpi_hevc_transform.h ++index 85a9102..c0c279f 100644 ++--- a/libavcodec/rpi_hevc_transform.h +++++ b/libavcodec/rpi_hevc_transform.h ++@@ -3,11 +3,11 @@ unsigned char rpi_hevc_transform [] = { ++ 3, ++ 3, ++ 232, ++-128, +++32, ++ 0, ++ 0, ++ 0, ++-20, +++12, ++ 248, ++ 0, ++ 136, ++@@ -56,9 +56,9 @@ unsigned char rpi_hevc_transform [] = { ++ 5, ++ 232, ++ 0, ++-0, ++ 8, ++ 0, +++0, ++ 128, ++ 69, ++ 113, ++@@ -108,8 +108,8 @@ unsigned char rpi_hevc_transform [] = { ++ 128, ++ 2, ++ 0, ++-248, ++-62, +++8, +++2, ++ 0, ++ 128, ++ 144, ++@@ -123,13 +123,13 @@ unsigned char rpi_hevc_transform [] = { ++ 3, ++ 32, ++ 8, ++-16, +++20, ++ 0, ++ 76, ++ 254, ++ 48, ++ 192, ++-9, +++4, ++ 4, ++ 32, ++ 8, ++@@ -155,14 +155,46 @@ unsigned char rpi_hevc_transform [] = { ++ 192, ++ 41, ++ 3, ++-68, +++70, +++192, +++80, +++7, +++164, +++255, +++36, +++204, +++96, +++2, +++0, +++248, +++62, +++0, +++3, +++255, +++55, +++208, +++120, +++3, +++224, +++3, +++190, +++11, +++16, +++139, +++246, +++91, +++0, +++103, +++90, +++0, +++70, ++ 192, ++ 80, ++ 7, ++ 164, ++ 255, ++ 36, ++-220, +++204, ++ 96, ++ 2, ++ 0, ++@@ -182,7 +214,7 @@ unsigned char rpi_hevc_transform [] = { ++ 16, ++ 139, ++ 246, ++-83, +++91, ++ 0, ++ 103, ++ 90, ++@@ -209,4 +241,374 @@ unsigned char rpi_hevc_transform [] = { ++ 96, ++ 90, ++ 0, +++169, +++3, +++3, +++232, +++32, +++0, +++0, +++0, +++12, +++248, +++0, +++136, +++0, +++0, +++192, +++248, +++0, +++0, +++64, +++232, +++0, +++2, +++0, +++0, +++12, +++248, +++0, +++168, +++0, +++0, +++192, +++248, +++0, +++0, +++3, +++232, +++128, +++0, +++0, +++0, +++7, +++232, +++0, +++2, +++0, +++0, +++4, +++232, +++64, +++0, +++0, +++0, +++5, +++232, +++0, +++8, +++0, +++0, +++57, +++239, +++224, +++247, +++255, +++255, +++72, +++192, +++95, +++207, +++88, +++122, +++88, +++124, +++137, +++64, +++26, +++64, +++161, +++64, +++152, +++64, +++128, +++144, +++31, +++0, +++72, +++232, +++32, +++0, +++0, +++0, +++65, +++232, +++32, +++0, +++0, +++0, +++128, +++144, +++23, +++0, +++145, +++64, +++168, +++64, +++128, +++144, +++19, +++0, +++72, +++232, +++32, +++0, +++0, +++0, +++65, +++232, +++32, +++0, +++0, +++0, +++128, +++144, +++11, +++0, +++74, +++232, +++0, +++8, +++0, +++0, +++242, +++140, +++229, +++192, +++57, +++239, +++32, +++8, +++0, +++0, +++41, +++3, +++12, +++248, +++0, +++128, +++0, +++0, +++192, +++8, +++4, +++0, +++12, +++248, +++0, +++132, +++64, +++0, +++192, +++8, +++4, +++0, +++0, +++96, +++255, +++159, +++131, +++255, +++0, +++232, +++0, +++4, +++0, +++0, +++255, +++159, +++142, +++255, +++4, +++255, +++48, +++204, +++16, +++3, +++224, +++251, +++62, +++0, +++5, +++255, +++51, +++204, +++128, +++3, +++224, +++251, +++16, +++0, +++77, +++254, +++51, +++204, +++9, +++4, +++224, +++251, +++0, +++0, +++128, +++64, +++6, +++232, +++64, +++0, +++0, +++0, +++140, +++248, +++47, +++0, +++0, +++0, +++224, +++99, +++0, +++0, +++4, +++254, +++0, +++144, +++128, +++2, +++0, +++8, +++2, +++0, +++32, +++247, +++240, +++207, +++16, +++3, +++32, +++247, +++176, +++207, +++17, +++3, +++32, +++247, +++112, +++207, +++18, +++3, +++32, +++247, +++48, +++207, +++19, +++3, +++32, +++247, +++240, +++206, +++20, +++3, +++32, +++247, +++176, +++206, +++21, +++3, +++32, +++247, +++112, +++206, +++22, +++3, +++32, +++247, +++48, +++206, +++23, +++3, +++32, +++247, +++240, +++205, +++24, +++3, +++32, +++247, +++176, +++205, +++25, +++3, +++32, +++247, +++112, +++205, +++26, +++3, +++32, +++247, +++48, +++205, +++27, +++3, +++32, +++247, +++240, +++204, +++28, +++3, +++32, +++247, +++176, +++204, +++29, +++3, +++32, +++247, +++112, +++204, +++30, +++3, +++32, +++247, +++48, +++204, +++31, +++3, +++5, +++255, +++51, +++204, +++128, +++3, +++224, +++251, +++16, +++0, +++77, +++254, +++51, +++204, +++9, +++4, +++224, +++251, +++0, +++0, +++0, +++237, +++0, +++4, +++0, +++0, +++140, +++248, +++47, +++0, +++0, +++0, +++224, +++99, +++0, +++0, +++90, +++0, ++ }; ++diff --git a/libavcodec/rpi_hevc_transform.s b/libavcodec/rpi_hevc_transform.s ++index 5e2728d..1e389c7 100644 ++--- a/libavcodec/rpi_hevc_transform.s +++++ b/libavcodec/rpi_hevc_transform.s ++@@ -58,13 +58,6 @@ ++ # ++ # ++ ++-test_add: ++- vldh HX(0,0),(r0) ++- vadd HX(0,0),HX(0,0),10 ++- vsth HX(0,0),(r0) ++- mov r0,7 # return value ++- b lr ++- ++ # Columns are transformed first ++ # ++ # Store top left half of transMatrix2 in ++@@ -79,7 +72,7 @@ test_add: ++ # ++ ++ ++-# hevc_trans_16x16(short *transMatrix2, short *coeffs, int num) +++# hevc_trans_16x16(short *transMatrix2, short *coeffs, int num) # TODO add size so we can branch to correct implementation (or perhaps have coeffs32 and num32 as secondary inputs!) ++ # transMatrix2: address of the constant matrix (must be at 32 byte aligned address in Videocore memory) ++ # coeffs: address of the transform coefficients (must be at 32 byte aligned address in Videocore memory) ++ # num: number of 16x16 transforms to be done ++@@ -87,17 +80,17 @@ test_add: ++ hevc_trans_16x16: ++ push r6-r15, lr # TODO cut down number of used registers ++ ++- mov r3, 2*32*2 # Twice Stride of transMatrix2 in bytes ++- vld HX(32++,0),(r0 += r3) REP 16 # This is the 16x16 matrix, a transform is equivalent to multiplying input row vector * matrix +++ mov r3, 16*2 # Stride of transMatrix2 in bytes +++ vldh HX(32++,0),(r0 += r3) REP 16 # This is the 16x16 matrix, a transform is equivalent to multiplying input row vector * matrix ++ # Now use r0 to describe which matrix we are working on. ++ # Allows us to prefetch the next block of coefficients for efficiency. ++ mov r0,0 # This describes the location where we read our coefficients from ++- mov r3,16*2 # Stride of coefficients in bytes +++ mov r3,16*2 # Stride of coefficients in bytes (TODO remove) ++ mov r7,16*16*2 # Total block size ++ mov r8,64*16 # Value used to swap from current to next VRF location ++ vldh HX(0++,0)+r0,(r1 += r3) REP 16 ++ mov r4,64 # Constant used for rounding first pass ++- mov r5,1<<19 # Constant used for rounding second pass +++ mov r5,1<<11 # Constant used for rounding second pass ++ ++ # At start of block r0,r1 point to the current block (that has already been loaded) ++ block_loop: ++@@ -113,12 +106,12 @@ block_loop: ++ vadd HY(0++,0)+r0,HY(0++,0)+r0,r4 REP 16 # Now add on rounding, shift down by 7, and saturate ++ #vsasls HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # 9+7=16 so this ends up with the output saturated and in the top half of the word. ++ vasl HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # This should be saturating, but the instruction above does not assemble? ++- vmov VX(0,0++), HX(0++,32) REP 16 # For simplicity transpose this back to the original position +++ vmov VX(0,0++)+r0, HX(0++,32)+r0 REP 16 # For simplicity transpose this back to the original position ++ ++ bl col_trans_16 ++- vadd HY(0++,0)+r0,HY(0++,0)+r0,r4 REP 16 # Now add on rounding, shift down by 7, and saturate ++- #vsasls HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # 9+7=16 so this ends up with the output saturated and in the top half of the word. ++- vasl HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # This should be saturating, but the instruction above does not assemble? +++ vadd HY(0++,0)+r0,HY(0++,0)+r0,r5 REP 16 # Now add on rounding, shift down by 7, and saturate +++ #vsasls HY(0++,0)+r0,HY(0++,0)+r0,4 REP 16 # 4+12=16 so this ends up with the output saturated and in the top half of the word. +++ vasl HY(0++,0)+r0,HY(0++,0)+r0,4 REP 16 # This should be saturating, but the instruction above does not assemble? (Probably because it ends with ls which is interpreted as a condition flag) ++ ++ # Save results - note there has been a transposition during the processing so we save columns ++ vsth VX(0,32++)+r0, (r1 += r3) REP 16 ++@@ -132,16 +125,136 @@ block_loop: ++ ++ # r1,r2,r3 r7,r8 should be preserved ++ # HX(0++,0)+r0 is the block to be transformed ++-# HX(32++,0) is the 16x16 matrix of transform coefficients +++# HX(32++,0)+r6 is the 16x16 matrix of transform coefficients ++ # Use HY(48,0) for intermediate results ++ # r0 can be used, but should be returned to its original value at the end ++ col_trans_16: ++- add r4,r0,16 # Final value for this loop +++ add r6,r0,16 # Final value for this loop ++ col_trans_16_loop: ++ # First compute partial products for a single column ++- vmul32s VY(48,0++), VX(0,0)+r0, VX(32,0++) REP 16 +++ vmul32s HY(48++,0), VX(0,0)+r0, VX(32,0++) REP 16 ++ # Then sum up the results and place back ++ vadd VY(0,0)+r0, VY(48,0++), VY(48,8++) REP 8 CLRA SACC ++- addcmpblt r0,1,r4,col_trans_16_loop +++ addcmpblt r0,1,r6,col_trans_16_loop ++ sub r0,16 # but r0 back to its original value ++ b lr +++ +++col_trans_odd_16: +++ add r6,r0,16 # Final value for this loop +++col_trans_odd_16_loop: +++ # First compute partial products for a single column +++ vmul32s HY(48++,0), VX(0,0)+r0, VX(32,0++) REP 16 +++ # Then sum up the results and place back +++ vadd VY(0,0)+r0, VY(48,0++), VY(48,8++) REP 8 CLRA SACC +++ addcmpblt r0,1,r6,col_trans_odd_16_loop +++ sub r0,16 # but r0 back to its original value +++ b lr +++ +++ +++test_add: +++ vldh HX(0,0),(r0) +++ vadd HX(0,0),HX(0,0),10 +++ vsth HX(0,0),(r0) +++ mov r0,7 # return value +++ b lr +++ +++# hevc_trans_32x32(short *transMatrix2, short *coeffs, int num) +++# transMatrix2: address of the constant matrix (must be at 32 byte aligned address in Videocore memory) Even followed by odd +++# coeffs: address of the transform coefficients (must be at 32 byte aligned address in Videocore memory) +++# num: number of 16x16 transforms to be done +++# +++hevc_trans_32x32: +++ push r6-r15, lr # TODO cut down number of used registers +++ +++ # Fetch transform matrices +++ mov r3, 16*2 # Stride of transMatrix2 in bytes (and of coefficients) +++ vldh HX(32++,0),(r0 += r3) REP 16 # This is the even 16x16 matrix +++ add r0, 16*16*2 +++ vldh HX(32++,32),(r0 += r3) REP 16 # This is the odd 16x16 matrix +++ +++ mov r3, 32*2*2 # Stride used to fetch alternate rows of our input coefficient buffer +++ mov r7, 16*16*2 # Total block size +++ mov r4, 64 # Constant used for rounding first pass +++ mov r5, 1<<11 # Constant used for rounding second pass +++ sub sp,sp,32*32*2+32 # Allocate some space on the stack for us to store 32*32 shorts as temporary results (needs to be aligned) +++ # set r8 to 32byte aligned stack pointer +++ add r8,sp,31 +++ lsr r8,5 +++ lsl r8,5 +++ mov r9,r8 # Backup of the temporary storage +++ mov r10,r1 # Backup of the coefficient buffer +++block_loop32: +++ +++ # COLUMN TRANSFORM +++ # Transform the first 16 columns +++ mov r1,r10 # Input Coefficient buffer +++ mov r8,r9 # Output temporary storage +++ bl trans32 +++ # Transform the second 16 columns +++ add r8,32 +++ add r1,32 +++ bl trans32 +++ +++ # ROW TRANSFORM +++ mov r1,r9 # Input temporary storage +++ mov r8,r10 # Output Coefficient buffer +++ bl trans32 +++ # Transform the second 16 columns +++ add r8,32 +++ add r1,32 +++ bl trans32 +++ +++ add r10, 32*32*2 # move onto next block of coefficients +++ addcmpbgt r2,-1,0,block_loop32 +++ +++ add sp,sp,32*32*2+32 # Restore stack +++ +++ pop r6-r15, pc +++ +++trans32: +++ # We can no longer afford the VRF space to do prefetching when doing 32x32 +++ # Fetch the even rows +++ vldh HX(0++,0)+r0,(r1 += r3) REP 16 +++ # Fetch the odd rows +++ vldh HX(16++,0)+r0,64(r1 += r3) REP 16 # First odd row is 32 shorts ahead of r1 +++ +++ # Transform the even rows using even matrix +++ mov r0, 0 # Even rows +++ bl col_trans_16 +++ +++ # Now transform the odd rows using odd matrix +++ mov r0, 64*16 # Odd rows +++ bl col_trans_odd_16 +++ +++ # Now apply butterfly to compute the first 16 results +++ vadd HY(48++,0),HY(0++,0),HY(16++,0) REP 16 +++ vadd HY(48++,0),HY(48++,0),r4 REP 32 # add on rounding, +++ vasl HY(48++,0),HY(48++,0),9 REP 32 # shift down by 7, and saturate +++ # 16bit results now in HX(48,32) +++ mov r0,r8 +++ mov r6,32*2 +++ vsth VX(48,32++),(r0+=r6) REP 16 +++ vmov VX(0,0++)+r0, HX(0++,32)+r0 REP 16 # Store transposed +++ +++ # Now apply butterfly to compute the second 16 results (in reverse order) +++ vsub HY(63,0),HY(0,0),HY(16,0) +++ vsub HY(62,0),HY(0,0),HY(17,0) +++ vsub HY(61,0),HY(0,0),HY(18,0) +++ vsub HY(60,0),HY(0,0),HY(19,0) +++ vsub HY(59,0),HY(0,0),HY(20,0) +++ vsub HY(58,0),HY(0,0),HY(21,0) +++ vsub HY(57,0),HY(0,0),HY(22,0) +++ vsub HY(56,0),HY(0,0),HY(23,0) +++ vsub HY(55,0),HY(0,0),HY(24,0) +++ vsub HY(54,0),HY(0,0),HY(25,0) +++ vsub HY(53,0),HY(0,0),HY(26,0) +++ vsub HY(52,0),HY(0,0),HY(27,0) +++ vsub HY(51,0),HY(0,0),HY(28,0) +++ vsub HY(50,0),HY(0,0),HY(29,0) +++ vsub HY(49,0),HY(0,0),HY(30,0) +++ vsub HY(48,0),HY(0,0),HY(31,0) +++ vadd HY(48++,0),HY(48++,0),r4 REP 32 # add on rounding, +++ vasl HY(48++,0),HY(48++,0),9 REP 32 # shift down by 7, and saturate +++ add r0,r8,16*32*2 # Move to 16th row +++ vsth VX(48,32++),(r0+=r6) REP 16 +++ b lr ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index b1f50ee..d720546 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -3,6 +3,7 @@ ++ // This works better than the mmap in that the memory can be cached, but requires a kernel modification to enable the device. ++ #define RPI_USE_VCSM ++ #define RPI_TIME_TOTAL_QPU +++#define RPI_TIME_TOTAL_VPU ++ ++ #include <stdio.h> ++ #include <stdlib.h> ++@@ -48,10 +49,47 @@ typedef int int32_t; ++ #define QPU_CODE_SIZE 2048 ++ #define VPU_CODE_SIZE 2048 ++ +++const short rpi_transMatrix2even[32][16] = { // Even rows first +++{64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64}, +++{90, 87, 80, 70, 57, 43, 25, 9, -9, -25, -43, -57, -70, -80, -87, -90}, +++{89, 75, 50, 18, -18, -50, -75, -89, -89, -75, -50, -18, 18, 50, 75, 89}, +++{87, 57, 9, -43, -80, -90, -70, -25, 25, 70, 90, 80, 43, -9, -57, -87}, +++{83, 36, -36, -83, -83, -36, 36, 83, 83, 36, -36, -83, -83, -36, 36, 83}, +++{80, 9, -70, -87, -25, 57, 90, 43, -43, -90, -57, 25, 87, 70, -9, -80}, +++{75, -18, -89, -50, 50, 89, 18, -75, -75, 18, 89, 50, -50, -89, -18, 75}, +++{70, -43, -87, 9, 90, 25, -80, -57, 57, 80, -25, -90, -9, 87, 43, -70}, +++{64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64, 64, -64, -64, 64}, +++{57, -80, -25, 90, -9, -87, 43, 70, -70, -43, 87, 9, -90, 25, 80, -57}, +++{50, -89, 18, 75, -75, -18, 89, -50, -50, 89, -18, -75, 75, 18, -89, 50}, +++{43, -90, 57, 25, -87, 70, 9, -80, 80, -9, -70, 87, -25, -57, 90, -43}, +++{36, -83, 83, -36, -36, 83, -83, 36, 36, -83, 83, -36, -36, 83, -83, 36}, +++{25, -70, 90, -80, 43, 9, -57, 87, -87, 57, -9, -43, 80, -90, 70, -25}, +++{18, -50, 75, -89, 89, -75, 50, -18, -18, 50, -75, 89, -89, 75, -50, 18}, +++{ 9, -25, 43, -57, 70, -80, 87, -90, 90, -87, 80, -70, 57, -43, 25, -9}, +++// Odd rows +++{90, 90, 88, 85, 82, 78, 73, 67, 61, 54, 46, 38, 31, 22, 13, 4}, +++{90, 82, 67, 46, 22, -4, -31, -54, -73, -85, -90, -88, -78, -61, -38, -13}, +++{88, 67, 31, -13, -54, -82, -90, -78, -46, -4, 38, 73, 90, 85, 61, 22}, +++{85, 46, -13, -67, -90, -73, -22, 38, 82, 88, 54, -4, -61, -90, -78, -31}, +++{82, 22, -54, -90, -61, 13, 78, 85, 31, -46, -90, -67, 4, 73, 88, 38}, +++{78, -4, -82, -73, 13, 85, 67, -22, -88, -61, 31, 90, 54, -38, -90, -46}, +++{73, -31, -90, -22, 78, 67, -38, -90, -13, 82, 61, -46, -88, -4, 85, 54}, +++{67, -54, -78, 38, 85, -22, -90, 4, 90, 13, -88, -31, 82, 46, -73, -61}, +++{61, -73, -46, 82, 31, -88, -13, 90, -4, -90, 22, 85, -38, -78, 54, 67}, +++{54, -85, -4, 88, -46, -61, 82, 13, -90, 38, 67, -78, -22, 90, -31, -73}, +++{46, -90, 38, 54, -90, 31, 61, -88, 22, 67, -85, 13, 73, -82, 4, 78}, +++{38, -88, 73, -4, -67, 90, -46, -31, 85, -78, 13, 61, -90, 54, 22, -82}, +++{31, -78, 90, -61, 4, 54, -88, 82, -38, -22, 73, -90, 67, -13, -46, 85}, +++{22, -61, 85, -90, 73, -38, -4, 46, -78, 90, -82, 54, -13, -31, 67, -88}, +++{13, -38, 61, -78, 88, -90, 85, -73, 54, -31, 4, 22, -46, 67, -82, 90}, +++{ 4, -13, 22, -31, 38, -46, 54, -61, 67, -73, 78, -82, 85, -88, 90, -90} +++}; +++ ++ struct GPU ++ { ++ unsigned int qpu_code[QPU_CODE_SIZE]; ++ unsigned int vpu_code[VPU_CODE_SIZE]; +++ short transMatrix2even[16*16]; ++ int open_count; // Number of allocated video buffers ++ unsigned int vc_handle; // Handle of this memory ++ int mb; // Mailbox handle ++@@ -123,6 +161,8 @@ static int gpu_init(volatile struct GPU **gpu) { ++ assert(num_bytes<=VPU_CODE_SIZE*sizeof(unsigned int)); ++ memcpy((void*)ptr->vpu_code, rpi_hevc_transform, num_bytes); ++ } +++ // And the transform coefficients +++ memcpy((void*)ptr->transMatrix2even, rpi_transMatrix2even, 16*16*sizeof(short)); ++ ++ return 0; ++ } ++@@ -274,11 +314,43 @@ unsigned int vpu_get_fn(void) { ++ return gpu->vc + offsetof(struct GPU,vpu_code); ++ } ++ +++unsigned int vpu_get_constants(void) { +++ if (gpu==NULL) { +++ gpu_lock(); +++ gpu_unlock(); +++ } +++ return gpu->vc + offsetof(struct GPU,transMatrix2even); +++} +++ ++ unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5) ++ { ++ unsigned r; +++#ifdef RPI_TIME_TOTAL_VPU +++ static int last_time=0; +++ static long long on_time=0; +++ static long long off_time=0; +++ int start_time; +++ int end_time; +++ static int count=0; +++ static long long countr2=0; +++#endif ++ gpu_lock(); +++#ifdef RPI_TIME_TOTAL_VPU +++ start_time = Microseconds(); +++ if (last_time==0) +++ last_time = start_time; +++ off_time += start_time-last_time; +++#endif ++ r = execute_code(gpu->mb, code, r0, r1, r2, r3, r4, r5); +++#ifdef RPI_TIME_TOTAL_VPU +++ end_time = Microseconds(); +++ last_time = end_time; +++ on_time += end_time - start_time; +++ count++; +++ countr2 += r2; +++ if ((count&0x7f)==0) +++ printf("VPU %d %lld On=%dms, Off=%dms\n",count,countr2,(int)(on_time/1000),(int)(off_time/1000)); +++#endif ++ gpu_unlock(); ++ return r; ++ } ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 4e3c35c..814fc3c 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -34,6 +34,7 @@ extern unsigned int qpu_get_fn(int num); ++ ++ // VPU specific functions ++ extern unsigned int vpu_get_fn(void); +++extern unsigned int vpu_get_constants(void); ++ extern unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5); ++ ++ // Simple test of shader code ++-- ++2.7.4 ++ ++ ++From 4bb0a7ba6723650e74d63cec2123f76da4c3eb0e Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Tue, 5 May 2015 09:41:23 +0100 ++Subject: [PATCH 05/68] Fixed deblocking ++ ++--- ++ libavcodec/hevc.c | 20 +++++++++++++++++--- ++ 1 file changed, 17 insertions(+), 3 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 94ff709..391c57a 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2400,8 +2400,9 @@ static void rpi_execute_transform(HEVCContext *s) ++ // s->hevcdsp.idct[4-2](coeffs, 16); ++ //} ++ ++- //gpu_cache_flush(&s->coeffs_buf[i]); +++ gpu_cache_flush(&s->coeffs_buf[i]); ++ vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf[i].vc, s->num_coeffs[i] >> 8, 0, 0, 0); +++ gpu_cache_flush(&s->coeffs_buf[i]); ++ ++ for(i=0;i<4;i++) ++ s->num_coeffs[i] = 0; ++@@ -2440,6 +2441,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs]; ++ ++ #ifdef RPI +++ int start_ctb_x = (s->sh.slice_ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size; ++ s->enable_rpi = 1; // TODO this should depend on cross component and frame width etc. ++ #endif ++ ++@@ -2473,9 +2475,17 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ ++ more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); ++ #ifdef RPI ++- if (1 || x_ctb + ctb_size >= s->ps.sps->width) { // TODO watch out for deblocking! +++ if (s->enable_rpi && x_ctb + ctb_size >= s->ps.sps->width) { +++ int x; +++ // Transform all blocks ++ rpi_execute_transform(s); +++ // Perform intra prediction and residual reconstruction ++ rpi_execute_pred_cmds(s); +++ // Perform deblocking for CTBs in this row +++ for(x = start_ctb_x; x <= x_ctb; x += ctb_size) { // TODO this will fail for tiles +++ ff_hevc_hls_filters(s, x, y_ctb, ctb_size); +++ } +++ start_ctb_x = 0; ++ } ++ #endif ++ if (more_data < 0) { ++@@ -2486,6 +2496,10 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ ++ ctb_addr_ts++; ++ ff_hevc_save_states(s, ctb_addr_ts); +++#ifdef RPI +++ if (s->enable_rpi) +++ continue; +++#endif ++ ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size); ++ } ++ ++@@ -3289,7 +3303,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ if (!s->univ_pred_cmds) ++ goto fail; ++ for(i = 0; i < 4; i++) { ++- gpu_malloc_uncached(sizeof(int16_t)*RPI_MAX_XFM_CMDS*16, &s->coeffs_buf[i]); // TODO slim this down and share across sizes +++ gpu_malloc_cached(sizeof(int16_t)*RPI_MAX_XFM_CMDS*16, &s->coeffs_buf[i]); // TODO slim this down and share across sizes ++ s->coeffs_buf_arm[i] = (int16_t*) s->coeffs_buf[i].arm; ++ if (!s->coeffs_buf_arm[i]) ++ goto fail; ++-- ++2.7.4 ++ ++ ++From 9079ef888e3d81a69f3c802ddc3c5134679e74a6 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Tue, 5 May 2015 11:32:30 +0100 ++Subject: [PATCH 06/68] Added 32x32 transform ++ ++--- ++ libavcodec/hevc.c | 8 +- ++ libavcodec/hevc_cabac.c | 4 +- ++ libavcodec/rpi_hevc_transform.h | 200 +++++++++++++++++----------------------- ++ libavcodec/rpi_hevc_transform.s | 102 ++++++++++---------- ++ libavcodec/rpi_qpu.c | 4 +- ++ 5 files changed, 148 insertions(+), 170 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 391c57a..0dde6f2 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2400,9 +2400,11 @@ static void rpi_execute_transform(HEVCContext *s) ++ // s->hevcdsp.idct[4-2](coeffs, 16); ++ //} ++ ++- gpu_cache_flush(&s->coeffs_buf[i]); ++- vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf[i].vc, s->num_coeffs[i] >> 8, 0, 0, 0); ++- gpu_cache_flush(&s->coeffs_buf[i]); +++ gpu_cache_flush(&s->coeffs_buf[2]); +++ gpu_cache_flush(&s->coeffs_buf[3]); +++ vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf[2].vc, s->num_coeffs[2] >> 8, s->coeffs_buf[3].vc, s->num_coeffs[3] >> 10, 0); +++ gpu_cache_flush(&s->coeffs_buf[2]); +++ gpu_cache_flush(&s->coeffs_buf[3]); ++ ++ for(i=0;i<4;i++) ++ s->num_coeffs[i] = 0; ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index d1cba86..88aa959 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1031,7 +1031,9 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ int vshift = s->ps.sps->vshift[c_idx]; ++ uint8_t *dst = &s->frame->data[c_idx][(y0 >> vshift) * stride + ++ ((x0 >> hshift) << s->ps.sps->pixel_shift)]; ++- int use_vpu = s->enable_rpi && !lc->cu.cu_transquant_bypass_flag && !transform_skip_flag && !lc->tu.cross_pf && log2_trafo_size==4; +++#ifdef RPI +++ int use_vpu = s->enable_rpi && !lc->cu.cu_transquant_bypass_flag && !transform_skip_flag && !lc->tu.cross_pf && log2_trafo_size>=4; +++#endif ++ int16_t *coeffs = (int16_t*)(c_idx ? lc->edge_emu_buffer2 : lc->edge_emu_buffer); ++ uint8_t significant_coeff_group_flag[8][8] = {{0}}; ++ int explicit_rdpcm_flag = 0; ++diff --git a/libavcodec/rpi_hevc_transform.h b/libavcodec/rpi_hevc_transform.h ++index c0c279f..6d772d7 100644 ++--- a/libavcodec/rpi_hevc_transform.h +++++ b/libavcodec/rpi_hevc_transform.h ++@@ -1,6 +1,10 @@ ++ unsigned char rpi_hevc_transform [] = { ++ 169, ++ 3, +++62, +++64, +++79, +++64, ++ 3, ++ 232, ++ 32, ++@@ -17,6 +21,22 @@ unsigned char rpi_hevc_transform [] = { ++ 248, ++ 0, ++ 0, +++64, +++232, +++0, +++2, +++0, +++0, +++12, +++248, +++0, +++168, +++0, +++0, +++192, +++248, +++0, +++0, ++ 0, ++ 96, ++ 3, ++@@ -79,7 +99,7 @@ unsigned char rpi_hevc_transform [] = { ++ 70, ++ 128, ++ 144, ++-39, +++40, ++ 0, ++ 4, ++ 255, ++@@ -113,7 +133,7 @@ unsigned char rpi_hevc_transform [] = { ++ 0, ++ 128, ++ 144, ++-22, +++23, ++ 0, ++ 4, ++ 255, ++@@ -153,6 +173,8 @@ unsigned char rpi_hevc_transform [] = { ++ 140, ++ 211, ++ 192, +++34, +++31, ++ 41, ++ 3, ++ 70, ++@@ -195,7 +217,7 @@ unsigned char rpi_hevc_transform [] = { ++ 255, ++ 36, ++ 204, ++-96, +++224, ++ 2, ++ 0, ++ 248, ++@@ -219,62 +241,10 @@ unsigned char rpi_hevc_transform [] = { ++ 103, ++ 90, ++ 0, ++-8, ++-240, ++-0, ++-128, ++-128, ++-3, ++-0, ++-247, ++-32, ++-128, ++-10, ++-4, ++-136, ++-240, ++-32, ++-0, ++-128, ++-3, ++-112, ++-96, ++-90, ++-0, ++-169, ++-3, ++-3, ++-232, ++-32, ++-0, ++-0, ++-0, ++-12, ++-248, ++-0, ++-136, ++-0, ++-0, ++-192, ++-248, ++-0, ++-0, +++225, +++64, +++242, ++ 64, ++-232, ++-0, ++-2, ++-0, ++-0, ++-12, ++-248, ++-0, ++-168, ++-0, ++-0, ++-192, ++-248, ++-0, ++-0, ++ 3, ++ 232, ++ 128, ++@@ -287,18 +257,6 @@ unsigned char rpi_hevc_transform [] = { ++ 2, ++ 0, ++ 0, ++-4, ++-232, ++-64, ++-0, ++-0, ++-0, ++-5, ++-232, ++-0, ++-8, ++-0, ++-0, ++ 57, ++ 239, ++ 224, ++@@ -317,18 +275,26 @@ unsigned char rpi_hevc_transform [] = { ++ 64, ++ 26, ++ 64, +++4, +++232, +++64, +++0, +++0, +++0, +++149, +++96, ++ 161, ++ 64, ++ 152, ++ 64, ++ 128, ++ 144, ++-31, +++35, ++ 0, ++ 72, ++ 232, ++-32, ++ 0, +++4, ++ 0, ++ 0, ++ 65, ++@@ -339,8 +305,16 @@ unsigned char rpi_hevc_transform [] = { ++ 0, ++ 128, ++ 144, ++-23, +++27, +++0, +++4, +++232, +++0, +++8, ++ 0, +++0, +++69, +++96, ++ 145, ++ 64, ++ 168, ++@@ -351,8 +325,8 @@ unsigned char rpi_hevc_transform [] = { ++ 0, ++ 72, ++ 232, ++-32, ++ 0, +++4, ++ 0, ++ 0, ++ 65, ++@@ -373,7 +347,7 @@ unsigned char rpi_hevc_transform [] = { ++ 0, ++ 242, ++ 140, ++-229, +++221, ++ 192, ++ 57, ++ 239, ++@@ -383,6 +357,8 @@ unsigned char rpi_hevc_transform [] = { ++ 0, ++ 41, ++ 3, +++239, +++3, ++ 12, ++ 248, ++ 0, ++@@ -390,7 +366,7 @@ unsigned char rpi_hevc_transform [] = { ++ 0, ++ 0, ++ 192, ++-8, +++248, ++ 4, ++ 0, ++ 12, ++@@ -400,14 +376,14 @@ unsigned char rpi_hevc_transform [] = { ++ 64, ++ 0, ++ 192, ++-8, +++248, ++ 4, ++ 0, ++ 0, ++ 96, ++ 255, ++ 159, ++-131, +++154, ++ 255, ++ 0, ++ 232, ++@@ -417,7 +393,7 @@ unsigned char rpi_hevc_transform [] = { ++ 0, ++ 255, ++ 159, ++-142, +++165, ++ 255, ++ 4, ++ 255, ++@@ -429,7 +405,7 @@ unsigned char rpi_hevc_transform [] = { ++ 251, ++ 62, ++ 0, ++-5, +++4, ++ 255, ++ 51, ++ 204, ++@@ -439,15 +415,15 @@ unsigned char rpi_hevc_transform [] = { ++ 251, ++ 16, ++ 0, ++-77, +++76, ++ 254, ++ 51, ++ 204, ++-9, ++-4, +++128, +++3, ++ 224, ++ 251, ++-0, +++20, ++ 0, ++ 128, ++ 64, ++@@ -467,16 +443,6 @@ unsigned char rpi_hevc_transform [] = { ++ 99, ++ 0, ++ 0, ++-4, ++-254, ++-0, ++-144, ++-128, ++-2, ++-0, ++-8, ++-2, ++-0, ++ 32, ++ 247, ++ 240, ++@@ -488,92 +454,92 @@ unsigned char rpi_hevc_transform [] = { ++ 176, ++ 207, ++ 17, ++-3, +++19, ++ 32, ++ 247, ++ 112, ++ 207, ++ 18, ++-3, +++35, ++ 32, ++ 247, ++ 48, ++ 207, ++ 19, ++-3, +++51, ++ 32, ++ 247, ++ 240, ++ 206, ++ 20, ++-3, +++67, ++ 32, ++ 247, ++ 176, ++ 206, ++ 21, ++-3, +++83, ++ 32, ++ 247, ++ 112, ++ 206, ++ 22, ++-3, +++99, ++ 32, ++ 247, ++ 48, ++ 206, ++ 23, ++-3, +++115, ++ 32, ++ 247, ++ 240, ++ 205, ++ 24, ++-3, +++131, ++ 32, ++ 247, ++ 176, ++ 205, ++ 25, ++-3, +++147, ++ 32, ++ 247, ++ 112, ++ 205, ++ 26, ++-3, +++163, ++ 32, ++ 247, ++ 48, ++ 205, ++ 27, ++-3, +++179, ++ 32, ++ 247, ++ 240, ++ 204, ++ 28, ++-3, +++195, ++ 32, ++ 247, ++ 176, ++ 204, ++ 29, ++-3, +++211, ++ 32, ++ 247, ++ 112, ++ 204, ++ 30, ++-3, +++227, ++ 32, ++ 247, ++ 48, ++ 204, ++ 31, ++-3, ++-5, +++243, +++4, ++ 255, ++ 51, ++ 204, ++@@ -583,20 +549,20 @@ unsigned char rpi_hevc_transform [] = { ++ 251, ++ 16, ++ 0, ++-77, +++76, ++ 254, ++ 51, ++ 204, ++-9, ++-4, +++128, +++3, ++ 224, ++ 251, ++-0, +++20, ++ 0, ++ 0, ++ 237, +++32, ++ 0, ++-4, ++ 0, ++ 0, ++ 140, ++@@ -609,6 +575,6 @@ unsigned char rpi_hevc_transform [] = { ++ 99, ++ 0, ++ 0, ++-90, ++-0, +++111, +++3, ++ }; ++diff --git a/libavcodec/rpi_hevc_transform.s b/libavcodec/rpi_hevc_transform.s ++index 1e389c7..afdb32a 100644 ++--- a/libavcodec/rpi_hevc_transform.s +++++ b/libavcodec/rpi_hevc_transform.s ++@@ -76,12 +76,19 @@ ++ # transMatrix2: address of the constant matrix (must be at 32 byte aligned address in Videocore memory) ++ # coeffs: address of the transform coefficients (must be at 32 byte aligned address in Videocore memory) ++ # num: number of 16x16 transforms to be done +++# coeffs32 +++# num32: number of 32x32 transforms ++ # ++ hevc_trans_16x16: ++ push r6-r15, lr # TODO cut down number of used registers ++- +++ mov r14,r3 # coeffs32 +++ mov r15,r4 # num32 ++ mov r3, 16*2 # Stride of transMatrix2 in bytes ++ vldh HX(32++,0),(r0 += r3) REP 16 # This is the 16x16 matrix, a transform is equivalent to multiplying input row vector * matrix +++ +++ add r0, 16*16*2 # For 32x32 transforms we also need this matrix +++ vldh HX(32++,32),(r0 += r3) REP 16 # This is the odd 16x16 matrix +++ ++ # Now use r0 to describe which matrix we are working on. ++ # Allows us to prefetch the next block of coefficients for efficiency. ++ mov r0,0 # This describes the location where we read our coefficients from ++@@ -121,6 +128,10 @@ block_loop: ++ add r1,r7 ++ ++ addcmpbgt r2,-1,0,block_loop +++ +++ # Now go and do any 32x32 transforms +++ b hevc_trans_32x32 +++ ++ pop r6-r15, pc ++ ++ # r1,r2,r3 r7,r8 should be preserved ++@@ -136,26 +147,18 @@ col_trans_16_loop: ++ # Then sum up the results and place back ++ vadd VY(0,0)+r0, VY(48,0++), VY(48,8++) REP 8 CLRA SACC ++ addcmpblt r0,1,r6,col_trans_16_loop ++- sub r0,16 # but r0 back to its original value +++ sub r0,16 # put r0 back to its original value ++ b lr ++ ++ col_trans_odd_16: ++ add r6,r0,16 # Final value for this loop ++ col_trans_odd_16_loop: ++ # First compute partial products for a single column ++- vmul32s HY(48++,0), VX(0,0)+r0, VX(32,0++) REP 16 +++ vmul32s HY(48++,0), VX(0,0)+r0, VX(32,32++) REP 16 ++ # Then sum up the results and place back ++ vadd VY(0,0)+r0, VY(48,0++), VY(48,8++) REP 8 CLRA SACC ++ addcmpblt r0,1,r6,col_trans_odd_16_loop ++- sub r0,16 # but r0 back to its original value ++- b lr ++- ++- ++-test_add: ++- vldh HX(0,0),(r0) ++- vadd HX(0,0),HX(0,0),10 ++- vsth HX(0,0),(r0) ++- mov r0,7 # return value +++ sub r0,16 # put r0 back to its original value ++ b lr ++ ++ # hevc_trans_32x32(short *transMatrix2, short *coeffs, int num) ++@@ -164,18 +167,17 @@ test_add: ++ # num: number of 16x16 transforms to be done ++ # ++ hevc_trans_32x32: ++- push r6-r15, lr # TODO cut down number of used registers +++ mov r1,r14 # coeffs +++ mov r2,r15 # num ++ ++- # Fetch transform matrices ++- mov r3, 16*2 # Stride of transMatrix2 in bytes (and of coefficients) ++- vldh HX(32++,0),(r0 += r3) REP 16 # This is the even 16x16 matrix ++- add r0, 16*16*2 ++- vldh HX(32++,32),(r0 += r3) REP 16 # This is the odd 16x16 matrix +++ # Fetch odd transform matrix +++ #mov r3, 16*2 # Stride of transMatrix2 in bytes (and of coefficients) +++ #vldh HX(32++,0),(r0 += r3) REP 16 # This is the even 16x16 matrix +++ #add r0, 16*16*2 +++ #vldh HX(32++,32),(r0 += r3) REP 16 # This is the odd 16x16 matrix ++ ++ mov r3, 32*2*2 # Stride used to fetch alternate rows of our input coefficient buffer ++ mov r7, 16*16*2 # Total block size ++- mov r4, 64 # Constant used for rounding first pass ++- mov r5, 1<<11 # Constant used for rounding second pass ++ sub sp,sp,32*32*2+32 # Allocate some space on the stack for us to store 32*32 shorts as temporary results (needs to be aligned) ++ # set r8 to 32byte aligned stack pointer ++ add r8,sp,31 ++@@ -186,21 +188,27 @@ hevc_trans_32x32: ++ block_loop32: ++ ++ # COLUMN TRANSFORM +++ mov r4, 64 # Constant used for rounding first pass +++ mov r5, 9 # left shift used for rounding first pass +++ ++ # Transform the first 16 columns ++ mov r1,r10 # Input Coefficient buffer ++ mov r8,r9 # Output temporary storage ++ bl trans32 ++ # Transform the second 16 columns ++- add r8,32 +++ add r8,32*16*2 ++ add r1,32 ++ bl trans32 ++ ++ # ROW TRANSFORM +++ mov r4, 1<<11 # Constant used for rounding second pass +++ mov r5, 4 # left shift used for rounding second pass +++ ++ mov r1,r9 # Input temporary storage ++ mov r8,r10 # Output Coefficient buffer ++ bl trans32 ++ # Transform the second 16 columns ++- add r8,32 +++ add r8,32*16*2 ++ add r1,32 ++ bl trans32 ++ ++@@ -212,11 +220,12 @@ block_loop32: ++ pop r6-r15, pc ++ ++ trans32: +++ push lr ++ # We can no longer afford the VRF space to do prefetching when doing 32x32 ++ # Fetch the even rows ++- vldh HX(0++,0)+r0,(r1 += r3) REP 16 +++ vldh HX(0++,0),(r1 += r3) REP 16 ++ # Fetch the odd rows ++- vldh HX(16++,0)+r0,64(r1 += r3) REP 16 # First odd row is 32 shorts ahead of r1 +++ vldh HX(16++,0),64(r1 += r3) REP 16 # First odd row is 32 shorts ahead of r1 ++ ++ # Transform the even rows using even matrix ++ mov r0, 0 # Even rows ++@@ -228,33 +237,32 @@ trans32: ++ ++ # Now apply butterfly to compute the first 16 results ++ vadd HY(48++,0),HY(0++,0),HY(16++,0) REP 16 ++- vadd HY(48++,0),HY(48++,0),r4 REP 32 # add on rounding, ++- vasl HY(48++,0),HY(48++,0),9 REP 32 # shift down by 7, and saturate +++ vadd HY(48++,0),HY(48++,0),r4 REP 16 # add on rounding, +++ vasl HY(48++,0),HY(48++,0),r5 REP 16 # shift down by 7, and saturate ++ # 16bit results now in HX(48,32) ++ mov r0,r8 ++ mov r6,32*2 ++ vsth VX(48,32++),(r0+=r6) REP 16 ++- vmov VX(0,0++)+r0, HX(0++,32)+r0 REP 16 # Store transposed ++ ++ # Now apply butterfly to compute the second 16 results (in reverse order) ++- vsub HY(63,0),HY(0,0),HY(16,0) ++- vsub HY(62,0),HY(0,0),HY(17,0) ++- vsub HY(61,0),HY(0,0),HY(18,0) ++- vsub HY(60,0),HY(0,0),HY(19,0) ++- vsub HY(59,0),HY(0,0),HY(20,0) ++- vsub HY(58,0),HY(0,0),HY(21,0) ++- vsub HY(57,0),HY(0,0),HY(22,0) ++- vsub HY(56,0),HY(0,0),HY(23,0) ++- vsub HY(55,0),HY(0,0),HY(24,0) ++- vsub HY(54,0),HY(0,0),HY(25,0) ++- vsub HY(53,0),HY(0,0),HY(26,0) ++- vsub HY(52,0),HY(0,0),HY(27,0) ++- vsub HY(51,0),HY(0,0),HY(28,0) ++- vsub HY(50,0),HY(0,0),HY(29,0) ++- vsub HY(49,0),HY(0,0),HY(30,0) ++- vsub HY(48,0),HY(0,0),HY(31,0) ++- vadd HY(48++,0),HY(48++,0),r4 REP 32 # add on rounding, ++- vasl HY(48++,0),HY(48++,0),9 REP 32 # shift down by 7, and saturate ++- add r0,r8,16*32*2 # Move to 16th row +++ vsub HY(63,0),HY(0 ,0),HY(16,0) +++ vsub HY(62,0),HY(1 ,0),HY(17,0) +++ vsub HY(61,0),HY(2 ,0),HY(18,0) +++ vsub HY(60,0),HY(3 ,0),HY(19,0) +++ vsub HY(59,0),HY(4 ,0),HY(20,0) +++ vsub HY(58,0),HY(5 ,0),HY(21,0) +++ vsub HY(57,0),HY(6 ,0),HY(22,0) +++ vsub HY(56,0),HY(7 ,0),HY(23,0) +++ vsub HY(55,0),HY(8 ,0),HY(24,0) +++ vsub HY(54,0),HY(9 ,0),HY(25,0) +++ vsub HY(53,0),HY(10,0),HY(26,0) +++ vsub HY(52,0),HY(11,0),HY(27,0) +++ vsub HY(51,0),HY(12,0),HY(28,0) +++ vsub HY(50,0),HY(13,0),HY(29,0) +++ vsub HY(49,0),HY(14,0),HY(30,0) +++ vsub HY(48,0),HY(15,0),HY(31,0) +++ vadd HY(48++,0),HY(48++,0),r4 REP 16 # add on rounding, +++ vasl HY(48++,0),HY(48++,0),r5 REP 16 # shift down by 7, and saturate +++ add r0,r8,32 ++ vsth VX(48,32++),(r0+=r6) REP 16 ++- b lr +++ pop pc ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index d720546..12ad5fb 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -89,7 +89,7 @@ struct GPU ++ { ++ unsigned int qpu_code[QPU_CODE_SIZE]; ++ unsigned int vpu_code[VPU_CODE_SIZE]; ++- short transMatrix2even[16*16]; +++ short transMatrix2even[16*16*2]; ++ int open_count; // Number of allocated video buffers ++ unsigned int vc_handle; // Handle of this memory ++ int mb; // Mailbox handle ++@@ -162,7 +162,7 @@ static int gpu_init(volatile struct GPU **gpu) { ++ memcpy((void*)ptr->vpu_code, rpi_hevc_transform, num_bytes); ++ } ++ // And the transform coefficients ++- memcpy((void*)ptr->transMatrix2even, rpi_transMatrix2even, 16*16*sizeof(short)); +++ memcpy((void*)ptr->transMatrix2even, rpi_transMatrix2even, sizeof(rpi_transMatrix2even)); ++ ++ return 0; ++ } ++-- ++2.7.4 ++ ++ ++From 6c2ed6109c4dd5c8ab16bf16e0ae3be6ae166e50 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Tue, 5 May 2015 16:57:03 +0100 ++Subject: [PATCH 07/68] Clear coefficients in advance ++ ++--- ++ libavcodec/hevc.c | 129 ++++++++++++++++++++++++++++------------ ++ libavcodec/hevc.h | 6 +- ++ libavcodec/hevc_cabac.c | 7 ++- ++ libavcodec/rpi_hevc_transform.h | 50 ++++++++++++++++ ++ libavcodec/rpi_hevc_transform.s | 16 +++++ ++ 5 files changed, 168 insertions(+), 40 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 0dde6f2..1424007 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -43,6 +43,8 @@ ++ ++ #ifdef RPI ++ #include "rpi_qpu.h" +++// For some unknown reason, the code seems to crash if I do a late malloc +++#define EARLY_MALLOC ++ #endif ++ ++ // #define DISABLE_MC ++@@ -61,6 +63,20 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ /* free everything allocated by pic_arrays_init() */ ++ static void pic_arrays_free(HEVCContext *s) ++ { +++#ifdef RPI +++#ifdef EARLY_MALLOC +++#else +++ printf("pic_arrays_free\n"); +++ if (s->coeffs_buf_arm[0]) { +++ gpu_free(&s->coeffs_buf_default); +++ s->coeffs_buf_arm[0] = 0; +++ } +++ if (s->coeffs_buf_arm[2]) { +++ gpu_free(&s->coeffs_buf_accelerated); +++ s->coeffs_buf_arm[2] = 0; +++ } +++#endif +++#endif ++ av_freep(&s->sao); ++ av_freep(&s->deblock); ++ ++@@ -97,6 +113,28 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ int ctb_count = sps->ctb_width * sps->ctb_height; ++ int min_pu_size = sps->min_pu_width * sps->min_pu_height; ++ +++#ifdef RPI +++#ifdef EARLY_MALLOC +++#else +++ int coeffs_in_ctb = (1 << s->ps.sps->log2_ctb_size) * (1 << s->ps.sps->log2_ctb_size); +++ int coefs_per_row = sps->ctb_width * coeffs_in_ctb * 3; // Allow space for chroma +++ printf("pic_arrays_init\n"); +++ printf("Allocated %d\n",coefs_per_row); +++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row, &s->coeffs_buf_default); +++ s->coeffs_buf_arm[0] = (int16_t*) s->coeffs_buf_default.arm; +++ if (!s->coeffs_buf_arm[0]) +++ goto fail; +++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row * 2, &s->coeffs_buf_accelerated); +++ s->coeffs_buf_arm[2] = (int16_t*) s->coeffs_buf_accelerated.arm; +++ s->coeffs_buf_vc[2] = s->coeffs_buf_accelerated.vc; +++ if (!s->coeffs_buf_arm[2]) +++ goto fail; +++ s->coeffs_buf_arm[3] = coefs_per_row + s->coeffs_buf_arm[2]; +++ s->coeffs_buf_vc[3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[2]; +++ printf("Done\n"); +++#endif +++#endif +++ ++ s->bs_width = (width >> 2) + 1; ++ s->bs_height = (height >> 2) + 1; ++ ++@@ -2400,11 +2438,10 @@ static void rpi_execute_transform(HEVCContext *s) ++ // s->hevcdsp.idct[4-2](coeffs, 16); ++ //} ++ ++- gpu_cache_flush(&s->coeffs_buf[2]); ++- gpu_cache_flush(&s->coeffs_buf[3]); ++- vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf[2].vc, s->num_coeffs[2] >> 8, s->coeffs_buf[3].vc, s->num_coeffs[3] >> 10, 0); ++- gpu_cache_flush(&s->coeffs_buf[2]); ++- gpu_cache_flush(&s->coeffs_buf[3]); +++ +++ gpu_cache_flush(&s->coeffs_buf_accelerated); +++ vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0); +++ //gpu_cache_flush(&s->coeffs_buf_accelerated); ++ ++ for(i=0;i<4;i++) ++ s->num_coeffs[i] = 0; ++@@ -2426,7 +2463,9 @@ static void rpi_execute_pred_cmds(HEVCContext *s) ++ lc->na.cand_up_right = (cmd->na >> 0) & 1; ++ s->hpc.intra_pred[cmd->size - 2](s, cmd->x, cmd->y, cmd->c_idx); ++ } else { +++ int trafo_size = 1 << cmd->size; ++ s->hevcdsp.transform_add[cmd->size-2](cmd->dst, cmd->buf, cmd->stride); +++ memset(cmd->buf, 0, trafo_size * trafo_size * sizeof(int16_t)); // Clear coefficients here while they are in the cache ++ } ++ } ++ s->num_pred_cmds = 0; ++@@ -3235,10 +3274,18 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ av_freep(&s->unif_mv_cmds); ++ av_freep(&s->unif_xfm_cmds); ++ av_freep(&s->univ_pred_cmds); ++- for(i = 0; i < 4; i++) { ++- gpu_free(&s->coeffs_buf[i]); +++ +++#ifdef EARLY_MALLOC +++ if (s->coeffs_buf_arm[0]) { +++ gpu_free(&s->coeffs_buf_default); +++ s->coeffs_buf_arm[0] = 0; +++ } +++ if (s->coeffs_buf_arm[2]) { +++ gpu_free(&s->coeffs_buf_accelerated); +++ s->coeffs_buf_arm[2] = 0; ++ } ++ #endif +++#endif ++ ++ for (i = 0; i < 3; i++) { ++ av_freep(&s->sao_pixel_buffer_h[i]); ++@@ -3281,6 +3328,16 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ return 0; ++ } ++ +++#ifdef RPI +++static av_cold void memclear16(int16_t *p, int n) +++{ +++ vpu_execute_code( vpu_get_fn(), p, n, 0, 0, 0, 1); +++ //int i; +++ //for(i=0;i<n;i++) +++ // p[i] = 0; +++} +++#endif +++ ++ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ { ++ HEVCContext *s = avctx->priv_data; ++@@ -3304,37 +3361,35 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ s->univ_pred_cmds = av_mallocz(sizeof(HEVCPredCmd)*RPI_MAX_PRED_CMDS); ++ if (!s->univ_pred_cmds) ++ goto fail; ++- for(i = 0; i < 4; i++) { ++- gpu_malloc_cached(sizeof(int16_t)*RPI_MAX_XFM_CMDS*16, &s->coeffs_buf[i]); // TODO slim this down and share across sizes ++- s->coeffs_buf_arm[i] = (int16_t*) s->coeffs_buf[i].arm; ++- if (!s->coeffs_buf_arm[i]) ++- goto fail; ++- } ++- s->enable_rpi = 0; ++ ++- // A little test program ++- /*{ ++- GPU_MEM_PTR_T p; ++- int err = gpu_malloc_cached(16, &p); ++- short *q = (short *)p.arm; ++- int i; ++- int r; ++- printf("Allocated memory %d ARM 0x%x, VC 0x%x, Code 0x%x\n",err,(int)p.arm,p.vc,(int)vpu_get_fn()); ++- printf("Allocated memory %d ARM 0x%x, VC 0x%x\n",err,(int)p.arm,p.vc); ++- printf("Preparing data %p\n",q); ++- for(i=0;i<16;i++) ++- q[i] = i; ++- printf("Flush cache\n"); ++- gpu_cache_flush(&p); ++- printf("Executing code\n"); ++- r = vpu_execute_code( vpu_get_fn(), p.vc, 0, 0, 0, 0, 0); ++- printf("Return value %d (",r); ++- for(i=0;i<16;i++) ++- printf("%d ",q[i]); ++- printf(")\n"); ++- gpu_free(&p); ++- goto fail; // Early out ++- }*/ +++ s->coeffs_buf_arm[0] = 0; +++ s->coeffs_buf_arm[2] = 0; +++ +++#ifdef EARLY_MALLOC +++ int coeffs_in_ctb = 64*64; +++ int coefs_per_row = (2048/64) * coeffs_in_ctb * 3; // Allow space for chroma +++ printf("Allocated %d\n",coefs_per_row); +++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row, &s->coeffs_buf_default); +++ s->coeffs_buf_arm[0] = (int16_t*) s->coeffs_buf_default.arm; +++ if (!s->coeffs_buf_arm[0]) +++ goto fail; +++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row * 2, &s->coeffs_buf_accelerated); +++ s->coeffs_buf_arm[2] = (int16_t*) s->coeffs_buf_accelerated.arm; +++ s->coeffs_buf_vc[2] = s->coeffs_buf_accelerated.vc; +++ if (!s->coeffs_buf_arm[2]) +++ goto fail; +++ s->coeffs_buf_arm[3] = coefs_per_row + s->coeffs_buf_arm[2]; +++ s->coeffs_buf_vc[3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[2]; +++ printf("Done\n"); +++ //memset(s->coeffs_buf_arm[0],0, sizeof(int16_t) * coefs_per_row); +++ memclear16(s->coeffs_buf_arm[0], coefs_per_row); +++ //memset(s->coeffs_buf_arm[2],0, sizeof(int16_t) * coefs_per_row); +++ memclear16(s->coeffs_buf_arm[2], coefs_per_row); +++ //memset(s->coeffs_buf_arm[3],0, sizeof(int16_t) * coefs_per_row); +++ memclear16(s->coeffs_buf_arm[3], coefs_per_row); +++#endif +++ +++ s->enable_rpi = 0; ++ ++ #endif ++ ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 4167985..9a228f6 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -861,8 +861,12 @@ typedef struct HEVCContext { ++ HEVCMvCmd *unif_mv_cmds; ++ HEVCXfmCmd *unif_xfm_cmds; ++ HEVCPredCmd *univ_pred_cmds; ++- GPU_MEM_PTR_T coeffs_buf[4]; +++ int buf_width; +++ GPU_MEM_PTR_T coeffs_buf_default; +++ GPU_MEM_PTR_T coeffs_buf_accelerated; ++ int16_t *coeffs_buf_arm[4]; +++ unsigned int coeffs_buf_vc[4]; +++ ++ int num_coeffs[4]; ++ int num_xfm_cmds; ++ int num_mv_cmds; ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index 88aa959..dbfee85 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1058,9 +1058,13 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ s->num_coeffs[0] += n; ++ } ++ } +++ // We now do the memset after transform_add while we know the data is cached. +++ //memset(coeffs, 0, trafo_size * trafo_size * sizeof(int16_t)); +++#else +++ memset(coeffs, 0, trafo_size * trafo_size * sizeof(int16_t)); ++ #endif ++ ++- memset(coeffs, 0, trafo_size * trafo_size * sizeof(int16_t)); +++ ++ ++ // Derive QP for dequant ++ if (!lc->cu.cu_transquant_bypass_flag) { ++@@ -1547,7 +1551,6 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ #ifdef RPI ++ if (s->enable_rpi) { ++ HEVCPredCmd *cmd = s->univ_pred_cmds + s->num_pred_cmds++; ++- //memcpy(coeffs2, coeffs, sizeof(int16_t) * trafo_size * trafo_size); // TODO ++ cmd->type = RPI_PRED_TRANSFORM_ADD; ++ cmd->size = log2_trafo_size; ++ cmd->buf = coeffs; ++diff --git a/libavcodec/rpi_hevc_transform.h b/libavcodec/rpi_hevc_transform.h ++index 6d772d7..4f13622 100644 ++--- a/libavcodec/rpi_hevc_transform.h +++++ b/libavcodec/rpi_hevc_transform.h ++@@ -1,4 +1,10 @@ ++ unsigned char rpi_hevc_transform [] = { +++21, +++106, +++0, +++144, +++35, +++1, ++ 169, ++ 3, ++ 62, ++@@ -577,4 +583,48 @@ unsigned char rpi_hevc_transform [] = { ++ 0, ++ 111, ++ 3, +++4, +++254, +++0, +++128, +++0, +++4, +++0, +++248, +++0, +++0, +++2, +++232, +++32, +++0, +++0, +++0, +++140, +++248, +++32, +++0, +++0, +++0, +++224, +++35, +++0, +++0, +++64, +++232, +++0, +++2, +++0, +++0, +++193, +++232, +++0, +++1, +++0, +++0, +++1, +++106, +++116, +++30, +++90, +++0, ++ }; ++diff --git a/libavcodec/rpi_hevc_transform.s b/libavcodec/rpi_hevc_transform.s ++index afdb32a..fd159bc 100644 ++--- a/libavcodec/rpi_hevc_transform.s +++++ b/libavcodec/rpi_hevc_transform.s ++@@ -78,8 +78,11 @@ ++ # num: number of 16x16 transforms to be done ++ # coeffs32 ++ # num32: number of 32x32 transforms +++# command 0 for transform, 1 for memclear16(int16_t *dst,num16) ++ # ++ hevc_trans_16x16: +++ cmp r5,1 +++ beq memclear16 ++ push r6-r15, lr # TODO cut down number of used registers ++ mov r14,r3 # coeffs32 ++ mov r15,r4 # num32 ++@@ -266,3 +269,16 @@ trans32: ++ add r0,r8,32 ++ vsth VX(48,32++),(r0+=r6) REP 16 ++ pop pc +++ +++memclear16: +++ # r0 is address +++ # r1 is number of 16bits values to set to 0 (may overrun past end and clear more than specified) +++ vmov HX(0++,0),0 REP 16 +++ mov r2,32 +++loop: +++ vsth HX(0++,0),(r0+=r2) REP 16 +++ add r0,16*16*2 +++ sub r1,16*16 +++ cmp r1,0 +++ bgt loop +++ b lr ++-- ++2.7.4 ++ ++ ++From 48282c2fb55c0d9a72222f384c03c432f78a3016 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 6 May 2015 09:56:43 +0100 ++Subject: [PATCH 08/68] Prepared inter offload ++ ++--- ++ libavcodec/hevc.c | 116 +++++++++++++++++++++++++++++++++++++++++++----- ++ libavcodec/hevc.h | 29 +++++++++++- ++ libavcodec/hevc_cabac.c | 5 ++- ++ 3 files changed, 137 insertions(+), 13 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 1424007..8215201 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -45,6 +45,8 @@ ++ #include "rpi_qpu.h" ++ // For some unknown reason, the code seems to crash if I do a late malloc ++ #define EARLY_MALLOC +++// Move Inter prediction into separate pass +++//#define RPI_INTER ++ #endif ++ ++ // #define DISABLE_MC ++@@ -1440,6 +1442,95 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size) ++ * @param luma_offset additive offset applied to the luma prediction value ++ */ ++ +++#ifdef RPI_INTER +++#define RPI_REDIRECT(fn) rpi_ ## fn +++static void rpi_luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, +++ AVFrame *ref, const Mv *mv, int x_off, int y_off, +++ int block_w, int block_h, int luma_weight, int luma_offset) +++{ +++ HEVCMvCmd *cmd = unif_mv_cmds + s->num_mv_cmds++; +++ cmd->cmd = RPI_CMD_LUMA_UNI; +++ cmd->dst = dst; +++ cmd->dststride = dststride; +++ cmd->src = ref->data[0]; +++ cmd->srcstride = ref->linesize[0]; +++ cmd->mv = *mv; +++ cmd->x_off = x_off; +++ cmd->y_off = y_off; +++ cmd->block_w = block_w; +++ cmd->block_h = block_h; +++ cmd->weight = luma_weight; +++ cmd->offset = luma_offset; +++} +++ +++static void rpi_luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, +++ AVFrame *ref0, const Mv *mv0, int x_off, int y_off, +++ int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv) +++{ +++ HEVCMvCmd *cmd = unif_mv_cmds + s->num_mv_cmds++; +++ cmd->cmd = RPI_CMD_LUMA_BI; +++ cmd->dst = dst; +++ cmd->dststride = dststride; +++ cmd->src = ref->data[0]; +++ cmd->srcstride = ref->linesize[0]; +++ cmd->mv = *mv; +++ cmd->x_off = x_off; +++ cmd->y_off = y_off; +++ cmd->block_w = block_w; +++ cmd->block_h = block_h; +++ cmd->weight = luma_weight; +++ cmd->offset = luma_offset; +++ cmd->src1 = ref1->data[]; +++ cmd->srcstride1 = ref1->linesize[0]; +++ cmd->mv1 = *mv1; +++ cmd->ref_idx[0] = current_mv->ref_idx[0]; +++ cmd->ref_idx[1] = current_mv->ref_idx[1]; +++} +++ +++static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0, +++ ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist, +++ int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset) +++{ +++ HEVCMvCmd *cmd = unif_mv_cmds + s->num_mv_cmds++; +++ cmd->cmd = RPI_CMD_CHROMA_UNI; +++ cmd->dst = dst0; +++ cmd->dststride = dststride; +++ cmd->src = src0; +++ cmd->srcstride = srcstride; +++ cmd->mv = current_mv->mv[reflist]; +++ cmd->x_off = x_off; +++ cmd->y_off = y_off; +++ cmd->block_w = block_w; +++ cmd->block_h = block_h; +++ cmd->weight = chroma_weight; +++ cmd->offset = chroma_offset; +++} +++ +++static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1, +++ int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx) +++{ +++ HEVCMvCmd *cmd = unif_mv_cmds + s->num_mv_cmds++; +++ cmd->cmd = RPI_CMD_CHROMA_BI+cidx; +++ cmd->dst = dst0; +++ cmd->dststride = dststride; +++ cmd->src = ref0->data[cidx+1]; +++ cmd->srcstride = ref0->linesize[cidx+1]; +++ cmd->mv = current_mv->mv[reflist]; +++ cmd->x_off = x_off; +++ cmd->y_off = y_off; +++ cmd->block_w = block_w; +++ cmd->block_h = block_h; +++ cmd->weight = chroma_weight; +++ cmd->offset = chroma_offset; +++ cmd->src = ref1->data[cidx+1]; +++ cmd->srcstride1 = ref1->linesize[cidx+1]; +++ cmd->ref_idx[0] = current_mv->ref_idx[0]; +++ cmd->ref_idx[1] = current_mv->ref_idx[1]; +++} +++#else +++#define RPI_REDIRECT(fn) fn +++#endif +++ ++ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ AVFrame *ref, const Mv *mv, int x_off, int y_off, ++ int block_w, int block_h, int luma_weight, int luma_offset) ++@@ -1505,7 +1596,7 @@ static void luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ * @param mv1 motion vector1 (relative to block position) to get pixel data from ++ * @param current_mv current motion vector structure ++ */ ++- static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, +++static void luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ AVFrame *ref0, const Mv *mv0, int x_off, int y_off, ++ int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv) ++ { ++@@ -1887,16 +1978,16 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int nPbW_c = nPbW >> s->ps.sps->hshift[1]; ++ int nPbH_c = nPbH >> s->ps.sps->vshift[1]; ++ ++- luma_mc_uni(s, dst0, s->frame->linesize[0], ref0->frame, +++ RPI_REDIRECT(luma_mc_uni)(s, dst0, s->frame->linesize[0], ref0->frame, ++ ¤t_mv.mv[0], x0, y0, nPbW, nPbH, ++ s->sh.luma_weight_l0[current_mv.ref_idx[0]], ++ s->sh.luma_offset_l0[current_mv.ref_idx[0]]); ++ ++ if (s->ps.sps->chroma_format_idc) { ++- chroma_mc_uni(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1], +++ RPI_REDIRECT(chroma_mc_uni)(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1], ++ 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, ++ s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]); ++- chroma_mc_uni(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2], +++ RPI_REDIRECT(chroma_mc_uni)(s, dst2, s->frame->linesize[2], ref0->frame->data[2], ref0->frame->linesize[2], ++ 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, ++ s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1]); ++ } ++@@ -1906,17 +1997,17 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int nPbW_c = nPbW >> s->ps.sps->hshift[1]; ++ int nPbH_c = nPbH >> s->ps.sps->vshift[1]; ++ ++- luma_mc_uni(s, dst0, s->frame->linesize[0], ref1->frame, +++ RPI_REDIRECT(luma_mc_uni)(s, dst0, s->frame->linesize[0], ref1->frame, ++ ¤t_mv.mv[1], x0, y0, nPbW, nPbH, ++ s->sh.luma_weight_l1[current_mv.ref_idx[1]], ++ s->sh.luma_offset_l1[current_mv.ref_idx[1]]); ++ ++ if (s->ps.sps->chroma_format_idc) { ++- chroma_mc_uni(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1], +++ RPI_REDIRECT(chroma_mc_uni)(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1], ++ 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, ++ s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]); ++ ++- chroma_mc_uni(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2], +++ RPI_REDIRECT(chroma_mc_uni)(s, dst2, s->frame->linesize[2], ref1->frame->data[2], ref1->frame->linesize[2], ++ 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, ++ s->sh.chroma_weight_l1[current_mv.ref_idx[1]][1], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][1]); ++ } ++@@ -1926,15 +2017,15 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int nPbW_c = nPbW >> s->ps.sps->hshift[1]; ++ int nPbH_c = nPbH >> s->ps.sps->vshift[1]; ++ ++- luma_mc_bi(s, dst0, s->frame->linesize[0], ref0->frame, +++ RPI_REDIRECT(luma_mc_bi)(s, dst0, s->frame->linesize[0], ref0->frame, ++ ¤t_mv.mv[0], x0, y0, nPbW, nPbH, ++ ref1->frame, ¤t_mv.mv[1], ¤t_mv); ++ ++ if (s->ps.sps->chroma_format_idc) { ++- chroma_mc_bi(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame, +++ RPI_REDIRECT(chroma_mc_bi)(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame, ++ x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 0); ++ ++- chroma_mc_bi(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame, +++ RPI_REDIRECT(chroma_mc_bi)(s, dst2, s->frame->linesize[2], ref0->frame, ref1->frame, ++ x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 1); ++ } ++ } ++@@ -2465,7 +2556,9 @@ static void rpi_execute_pred_cmds(HEVCContext *s) ++ } else { ++ int trafo_size = 1 << cmd->size; ++ s->hevcdsp.transform_add[cmd->size-2](cmd->dst, cmd->buf, cmd->stride); +++#ifdef RPI_PRECLEAR ++ memset(cmd->buf, 0, trafo_size * trafo_size * sizeof(int16_t)); // Clear coefficients here while they are in the cache +++#endif ++ } ++ } ++ s->num_pred_cmds = 0; ++@@ -3381,6 +3474,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ s->coeffs_buf_arm[3] = coefs_per_row + s->coeffs_buf_arm[2]; ++ s->coeffs_buf_vc[3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[2]; ++ printf("Done\n"); +++#ifdef RPI_PRECLEAR ++ //memset(s->coeffs_buf_arm[0],0, sizeof(int16_t) * coefs_per_row); ++ memclear16(s->coeffs_buf_arm[0], coefs_per_row); ++ //memset(s->coeffs_buf_arm[2],0, sizeof(int16_t) * coefs_per_row); ++@@ -3389,6 +3483,8 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ memclear16(s->coeffs_buf_arm[3], coefs_per_row); ++ #endif ++ +++#endif +++ ++ s->enable_rpi = 0; ++ ++ #endif ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 9a228f6..1ac119a 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -803,14 +803,39 @@ typedef struct HEVCLocalContext { ++ // RPI_MAX_WIDTH is maximum width in pixels supported by the accelerated code ++ #define RPI_MAX_WIDTH 2048 ++ ++-// Worst case is for 4:4:4 4x4 blocks with 64 high coding tree blocks, so 16 MV cmds per 4 pixels across for each colour plane ++-#define RPI_MAX_MV_CMDS (16*3*(RPI_MAX_WIDTH/4)) +++// Worst case is for 4:4:4 4x4 blocks with 64 high coding tree blocks, so 16 MV cmds per 4 pixels across for each colour plane, * 2 for bi +++#define RPI_MAX_MV_CMDS (2*16*3*(RPI_MAX_WIDTH/4)) ++ #define RPI_MAX_XFM_CMDS (16*3*(RPI_MAX_WIDTH/4)) ++ // Each block can have an intra prediction and a transform_add command ++ #define RPI_MAX_PRED_CMDS (2*16*3*(RPI_MAX_WIDTH/4)) ++ +++#define RPI_CMD_LUMA_UNI 0 +++#define RPI_CMD_CHROMA_UNI 1 +++#define RPI_CMD_LUMA_BI 2 +++#define RPI_CMD_U_BI 3 +++#define RPI_CMD_V_BI 4 +++ +++// RPI_PRECLEAR is not working yet - perhaps clearing on VPUs is flawed? +++// #define RPI_PRECLEAR +++ ++ // Command for inter prediction ++ typedef struct HEVCMvCmd { +++ int cmd; +++ uint8_t *dst; +++ ptrdiff_t dststride; +++ uint8_t *src; +++ ptrdiff_t srcstride; +++ Mv mv; +++ int x_off; +++ int y_off; +++ int block_w; +++ int block_h; +++ int weight; +++ int offset; +++ uint8_t *src1; +++ ptrdiff_t srcstride1; +++ Mv mv1; +++ int8_t ref_idx[2]; ++ } HEVCMvCmd; ++ ++ // Command for transform to process a block of coefficients ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index dbfee85..4f072be 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1059,7 +1059,10 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ } ++ } ++ // We now do the memset after transform_add while we know the data is cached. ++- //memset(coeffs, 0, trafo_size * trafo_size * sizeof(int16_t)); +++ #ifdef RPI_PRECLEAR +++ #else +++ memset(coeffs, 0, trafo_size * trafo_size * sizeof(int16_t)); +++ #endif ++ #else ++ memset(coeffs, 0, trafo_size * trafo_size * sizeof(int16_t)); ++ #endif ++-- ++2.7.4 ++ ++ ++From 25d3b4e876febe08302a01abd85d5009160ead3e Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 6 May 2015 11:08:50 +0100 ++Subject: [PATCH 09/68] Inter prediction in separate pass ++ ++--- ++ libavcodec/hevc.c | 93 +++++++++++++++++++++++++++++++++++++++++++++---------- ++ libavcodec/hevc.h | 2 +- ++ 2 files changed, 77 insertions(+), 18 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 8215201..b7bc6ad 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -46,7 +46,7 @@ ++ // For some unknown reason, the code seems to crash if I do a late malloc ++ #define EARLY_MALLOC ++ // Move Inter prediction into separate pass ++-//#define RPI_INTER +++#define RPI_INTER ++ #endif ++ ++ // #define DISABLE_MC ++@@ -1448,7 +1448,7 @@ static void rpi_luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ AVFrame *ref, const Mv *mv, int x_off, int y_off, ++ int block_w, int block_h, int luma_weight, int luma_offset) ++ { ++- HEVCMvCmd *cmd = unif_mv_cmds + s->num_mv_cmds++; +++ HEVCMvCmd *cmd = s->unif_mv_cmds + s->num_mv_cmds++; ++ cmd->cmd = RPI_CMD_LUMA_UNI; ++ cmd->dst = dst; ++ cmd->dststride = dststride; ++@@ -1467,31 +1467,29 @@ static void rpi_luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ AVFrame *ref0, const Mv *mv0, int x_off, int y_off, ++ int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv) ++ { ++- HEVCMvCmd *cmd = unif_mv_cmds + s->num_mv_cmds++; +++ HEVCMvCmd *cmd = s->unif_mv_cmds + s->num_mv_cmds++; ++ cmd->cmd = RPI_CMD_LUMA_BI; ++ cmd->dst = dst; ++ cmd->dststride = dststride; ++- cmd->src = ref->data[0]; ++- cmd->srcstride = ref->linesize[0]; ++- cmd->mv = *mv; +++ cmd->src = ref0->data[0]; +++ cmd->srcstride = ref0->linesize[0]; +++ cmd->mv = *mv0; ++ cmd->x_off = x_off; ++ cmd->y_off = y_off; ++ cmd->block_w = block_w; ++ cmd->block_h = block_h; ++- cmd->weight = luma_weight; ++- cmd->offset = luma_offset; ++- cmd->src1 = ref1->data[]; +++ cmd->src1 = ref1->data[0]; ++ cmd->srcstride1 = ref1->linesize[0]; ++ cmd->mv1 = *mv1; ++ cmd->ref_idx[0] = current_mv->ref_idx[0]; ++ cmd->ref_idx[1] = current_mv->ref_idx[1]; ++ } ++ ++-static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0, +++static void rpi_chroma_mc_uni(HEVCContext *s, uint8_t *dst0, ++ ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist, ++ int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset) ++ { ++- HEVCMvCmd *cmd = unif_mv_cmds + s->num_mv_cmds++; +++ HEVCMvCmd *cmd = s->unif_mv_cmds + s->num_mv_cmds++; ++ cmd->cmd = RPI_CMD_CHROMA_UNI; ++ cmd->dst = dst0; ++ cmd->dststride = dststride; ++@@ -1506,27 +1504,27 @@ static void chroma_mc_uni(HEVCContext *s, uint8_t *dst0, ++ cmd->offset = chroma_offset; ++ } ++ ++-static void chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1, +++static void rpi_chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1, ++ int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx) ++ { ++- HEVCMvCmd *cmd = unif_mv_cmds + s->num_mv_cmds++; +++ HEVCMvCmd *cmd = s->unif_mv_cmds + s->num_mv_cmds++; ++ cmd->cmd = RPI_CMD_CHROMA_BI+cidx; ++ cmd->dst = dst0; ++ cmd->dststride = dststride; ++ cmd->src = ref0->data[cidx+1]; ++ cmd->srcstride = ref0->linesize[cidx+1]; ++- cmd->mv = current_mv->mv[reflist]; +++ cmd->mv = current_mv->mv[0]; +++ cmd->mv1 = current_mv->mv[1]; ++ cmd->x_off = x_off; ++ cmd->y_off = y_off; ++ cmd->block_w = block_w; ++ cmd->block_h = block_h; ++- cmd->weight = chroma_weight; ++- cmd->offset = chroma_offset; ++- cmd->src = ref1->data[cidx+1]; +++ cmd->src1 = ref1->data[cidx+1]; ++ cmd->srcstride1 = ref1->linesize[cidx+1]; ++ cmd->ref_idx[0] = current_mv->ref_idx[0]; ++ cmd->ref_idx[1] = current_mv->ref_idx[1]; ++ } +++ ++ #else ++ #define RPI_REDIRECT(fn) fn ++ #endif ++@@ -2554,7 +2552,9 @@ static void rpi_execute_pred_cmds(HEVCContext *s) ++ lc->na.cand_up_right = (cmd->na >> 0) & 1; ++ s->hpc.intra_pred[cmd->size - 2](s, cmd->x, cmd->y, cmd->c_idx); ++ } else { +++#ifdef RPI_PRECLEAR ++ int trafo_size = 1 << cmd->size; +++#endif ++ s->hevcdsp.transform_add[cmd->size-2](cmd->dst, cmd->buf, cmd->stride); ++ #ifdef RPI_PRECLEAR ++ memset(cmd->buf, 0, trafo_size * trafo_size * sizeof(int16_t)); // Clear coefficients here while they are in the cache ++@@ -2563,6 +2563,61 @@ static void rpi_execute_pred_cmds(HEVCContext *s) ++ } ++ s->num_pred_cmds = 0; ++ } +++ +++static void rpi_execute_inter_cmds(HEVCContext *s) +++{ +++ HEVCMvCmd *cmd = s->unif_mv_cmds; +++ int n,cidx; +++ AVFrame myref; +++ AVFrame myref1; +++ struct MvField mymv; +++ if (s->num_mv_cmds > RPI_MAX_MV_CMDS) { +++ printf("Overflow inter_cmds\n"); +++ exit(-1); +++ } +++ for(n = s->num_mv_cmds; n>0 ; n--, cmd++) { +++ switch(cmd->cmd) { +++ case RPI_CMD_LUMA_UNI: +++ myref.data[0] = cmd->src; +++ myref.linesize[0] = cmd->srcstride; +++ luma_mc_uni(s, cmd->dst, cmd->dststride, &myref, &cmd->mv, cmd->x_off, cmd->y_off, cmd->block_w, cmd->block_h, cmd->weight, cmd->offset); +++ break; +++ case RPI_CMD_LUMA_BI: +++ myref.data[0] = cmd->src; +++ myref.linesize[0] = cmd->srcstride; +++ myref1.data[0] = cmd->src1; +++ myref1.linesize[0] = cmd->srcstride1; +++ mymv.ref_idx[0] = cmd->ref_idx[0]; +++ mymv.ref_idx[1] = cmd->ref_idx[1]; +++ luma_mc_bi(s, cmd->dst, cmd->dststride, +++ &myref, &cmd->mv, cmd->x_off, cmd->y_off, cmd->block_w, cmd->block_h, +++ &myref1, &cmd->mv1, &mymv); +++ break; +++ case RPI_CMD_CHROMA_UNI: +++ mymv.mv[0] = cmd->mv; +++ chroma_mc_uni(s, cmd->dst, +++ cmd->dststride, cmd->src, cmd->srcstride, 0, +++ cmd->x_off, cmd->y_off, cmd->block_w, cmd->block_h, &mymv, cmd->weight, cmd->offset); +++ break; +++ case RPI_CMD_CHROMA_BI: +++ case RPI_CMD_CHROMA_BI+1: +++ cidx = cmd->cmd - RPI_CMD_CHROMA_BI; +++ myref.data[cidx+1] = cmd->src; +++ myref.linesize[cidx+1] = cmd->srcstride; +++ myref1.data[cidx+1] = cmd->src1; +++ myref1.linesize[cidx+1] = cmd->srcstride1; +++ mymv.ref_idx[0] = cmd->ref_idx[0]; +++ mymv.ref_idx[1] = cmd->ref_idx[1]; +++ mymv.mv[0] = cmd->mv; +++ mymv.mv[1] = cmd->mv1; +++ chroma_mc_bi(s, cmd->dst, cmd->dststride, &myref, &myref1, +++ cmd->x_off, cmd->y_off, cmd->block_w, cmd->block_h, &mymv, cidx); +++ break; +++ } +++ } +++ s->num_mv_cmds = 0; +++} +++ ++ #endif ++ ++ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++@@ -2611,6 +2666,8 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ #ifdef RPI ++ if (s->enable_rpi && x_ctb + ctb_size >= s->ps.sps->width) { ++ int x; +++ // Perform inter prediction +++ rpi_execute_inter_cmds(s); ++ // Transform all blocks ++ rpi_execute_transform(s); ++ // Perform intra prediction and residual reconstruction ++@@ -3422,6 +3479,7 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ } ++ ++ #ifdef RPI +++#ifdef RPI_PRECLEAR ++ static av_cold void memclear16(int16_t *p, int n) ++ { ++ vpu_execute_code( vpu_get_fn(), p, n, 0, 0, 0, 1); ++@@ -3430,6 +3488,7 @@ static av_cold void memclear16(int16_t *p, int n) ++ // p[i] = 0; ++ } ++ #endif +++#endif ++ ++ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ { ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 1ac119a..a0eb71b 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -812,7 +812,7 @@ typedef struct HEVCLocalContext { ++ #define RPI_CMD_LUMA_UNI 0 ++ #define RPI_CMD_CHROMA_UNI 1 ++ #define RPI_CMD_LUMA_BI 2 ++-#define RPI_CMD_U_BI 3 +++#define RPI_CMD_CHROMA_BI 3 ++ #define RPI_CMD_V_BI 4 ++ ++ // RPI_PRECLEAR is not working yet - perhaps clearing on VPUs is flawed? ++-- ++2.7.4 ++ ++ ++From 8af0a0a036e4bb3883f144d0567bc527772dd65b Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 6 May 2015 13:03:50 +0100 ++Subject: [PATCH 10/68] Added VPU thread ++ ++--- ++ libavcodec/hevc.c | 11 +++-- ++ libavcodec/hevc.h | 1 + ++ libavcodec/rpi_qpu.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++-- ++ libavcodec/rpi_qpu.h | 2 + ++ 4 files changed, 133 insertions(+), 6 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index b7bc6ad..98dbd69 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2529,8 +2529,10 @@ static void rpi_execute_transform(HEVCContext *s) ++ ++ ++ gpu_cache_flush(&s->coeffs_buf_accelerated); ++- vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0); +++ s->vpu_id = vpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0, &s->coeffs_buf_accelerated); +++ //vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0); ++ //gpu_cache_flush(&s->coeffs_buf_accelerated); +++ //vpu_wait(s->vpu_id); ++ ++ for(i=0;i<4;i++) ++ s->num_coeffs[i] = 0; ++@@ -2666,10 +2668,12 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ #ifdef RPI ++ if (s->enable_rpi && x_ctb + ctb_size >= s->ps.sps->width) { ++ int x; ++- // Perform inter prediction ++- rpi_execute_inter_cmds(s); ++ // Transform all blocks ++ rpi_execute_transform(s); +++ // Perform inter prediction +++ rpi_execute_inter_cmds(s); +++ // Wait for transform completion +++ vpu_wait(s->vpu_id); ++ // Perform intra prediction and residual reconstruction ++ rpi_execute_pred_cmds(s); ++ // Perform deblocking for CTBs in this row ++@@ -3426,6 +3430,7 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ av_freep(&s->univ_pred_cmds); ++ ++ #ifdef EARLY_MALLOC +++ printf("hevc_decode_free\n"); ++ if (s->coeffs_buf_arm[0]) { ++ gpu_free(&s->coeffs_buf_default); ++ s->coeffs_buf_arm[0] = 0; ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index a0eb71b..0d8dfe9 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -896,6 +896,7 @@ typedef struct HEVCContext { ++ int num_xfm_cmds; ++ int num_mv_cmds; ++ int num_pred_cmds; +++ int vpu_id; ++ #endif ++ ++ uint8_t *cabac_state; ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 12ad5fb..378dd74 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -1,9 +1,13 @@ ++ #ifdef RPI ++-// Use the vcsm device for shared memory +++// define RPI_USE_VCSM to use the vcsm device for shared memory ++ // This works better than the mmap in that the memory can be cached, but requires a kernel modification to enable the device. ++ #define RPI_USE_VCSM ++-#define RPI_TIME_TOTAL_QPU ++-#define RPI_TIME_TOTAL_VPU +++// define RPI_TIME_TOTAL_QPU to print out how much time is spent in the QPU code +++//#define RPI_TIME_TOTAL_QPU +++// define RPI_TIME_TOTAL_VPU to print out how much time is spent in the VPI code +++//#define RPI_TIME_TOTAL_VPU +++// define RPI_ASYNC to run the VPU in a separate thread, need to make a separate call to check for completion +++#define RPI_ASYNC ++ ++ #include <stdio.h> ++ #include <stdlib.h> ++@@ -113,6 +117,19 @@ static unsigned int Microseconds(void) { ++ } ++ #endif ++ +++#ifdef RPI_ASYNC +++pthread_t vpu_thread; +++static void *vpu_start(void *arg); +++ +++#define MAXCMDS 128 +++static pthread_cond_t post_cond = PTHREAD_COND_INITIALIZER; +++static pthread_mutex_t post_mutex = PTHREAD_MUTEX_INITIALIZER; +++ +++static int vpu_cmds[MAXCMDS][8]; +++static volatile int vpu_async_tail=0; // Contains the number of posted jobs +++static volatile int vpu_async_head=0; +++#endif +++ ++ // Connect to QPU, returns 0 on success. ++ static int gpu_init(volatile struct GPU **gpu) { ++ int mb = mbox_open(); ++@@ -164,12 +181,27 @@ static int gpu_init(volatile struct GPU **gpu) { ++ // And the transform coefficients ++ memcpy((void*)ptr->transMatrix2even, rpi_transMatrix2even, sizeof(rpi_transMatrix2even)); ++ +++#ifdef RPI_ASYNC +++ { +++ int err; +++ vpu_async_tail = 0; +++ vpu_async_head = 0; +++ err = pthread_create(&vpu_thread, NULL, vpu_start, NULL); +++ //printf("Created thread\n"); +++ if (err) { +++ printf("Failed to create vpu thread\n"); +++ return -4; +++ } +++ } +++#endif +++ ++ return 0; ++ } ++ ++ // Make sure we have exclusive access to the mailbox, and enable qpu if necessary. ++ static void gpu_lock(void) { ++ pthread_mutex_lock(&gpu_mutex); +++ ++ if (gpu==NULL) { ++ gpu_init(&gpu); ++ } ++@@ -264,6 +296,16 @@ static void gpu_term(void) ++ unsigned handle = gpu->vc_handle; ++ if (gpu==NULL) ++ return; +++ +++#ifdef RPI_ASYNC +++ { +++ void *res; +++ vpu_post_code(0, 0, 0, 0, 0, 0, -1, NULL); +++ pthread_join(vpu_thread, &res); +++ } +++#endif +++ +++ ++ unmapmem((void*)gpu, sizeof(struct GPU)); ++ mem_unlock(mb, handle); ++ mem_free(mb, handle); ++@@ -322,6 +364,79 @@ unsigned int vpu_get_constants(void) { ++ return gpu->vc + offsetof(struct GPU,transMatrix2even); ++ } ++ +++#ifdef RPI_ASYNC +++ +++static void *vpu_start(void *arg) { +++ while(1) { +++ pthread_mutex_lock(&post_mutex); +++ while( vpu_async_tail - vpu_async_head <= 0) +++ { +++ //printf("Checking number %d %d\n",vpu_async_head,vpu_async_tail); +++ pthread_cond_wait(&post_cond, &post_mutex); +++ } +++ int *p = vpu_cmds[vpu_async_head%MAXCMDS]; +++ pthread_mutex_unlock(&post_mutex); +++ +++ if (p[6] == -1) { +++ break; // Last job +++ } +++ if (p[7]) { +++ GPU_MEM_PTR_T *buf = (GPU_MEM_PTR_T *)p[7]; +++ //gpu_cache_flush(buf); +++ } +++ vpu_execute_code(p[0], p[1], p[2], p[3], p[4], p[5], p[6]); +++ +++ pthread_mutex_lock(&post_mutex); +++ vpu_async_head++; +++ pthread_cond_broadcast(&post_cond); +++ pthread_mutex_unlock(&post_mutex); +++ } +++ +++ return NULL; +++} +++ +++// Post a command to the queue +++// Returns an id which we can use to wait for completion +++int vpu_post_code(unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, GPU_MEM_PTR_T *buf) +++{ +++ pthread_mutex_lock(&post_mutex); +++ { +++ int id = vpu_async_tail++; +++ int *p = vpu_cmds[id%MAXCMDS]; +++ int num = vpu_async_tail - vpu_async_head; +++ if (num>MAXCMDS) { +++ printf("Too many commands submitted\n"); +++ exit(-1); +++ } +++ p[0] = code; +++ p[1] = r0; +++ p[2] = r1; +++ p[3] = r2; +++ p[4] = r3; +++ p[5] = r4; +++ p[6] = r5; +++ p[7] = (int) buf; +++ if (num<=1) +++ pthread_cond_broadcast(&post_cond); // Otherwise the vpu thread must already be awake +++ pthread_mutex_unlock(&post_mutex); +++ return id; +++ } +++} +++ +++// Wait for completion of the given command +++void vpu_wait(int id) +++{ +++ pthread_mutex_lock(&post_mutex); +++ while( id + 1 - vpu_async_head > 0) +++ { +++ pthread_cond_wait(&post_cond, &post_mutex); +++ } +++ pthread_mutex_unlock(&post_mutex); +++} +++ +++#endif +++ +++ ++ unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5) ++ { ++ unsigned r; ++@@ -334,7 +449,9 @@ unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, ++ static int count=0; ++ static long long countr2=0; ++ #endif +++#ifndef RPI_ASYNC ++ gpu_lock(); +++#endif ++ #ifdef RPI_TIME_TOTAL_VPU ++ start_time = Microseconds(); ++ if (last_time==0) ++@@ -351,7 +468,9 @@ unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, ++ if ((count&0x7f)==0) ++ printf("VPU %d %lld On=%dms, Off=%dms\n",count,countr2,(int)(on_time/1000),(int)(off_time/1000)); ++ #endif +++#ifndef RPI_ASYNC ++ gpu_unlock(); +++#endif ++ return r; ++ } ++ ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 814fc3c..3526fce 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -36,6 +36,8 @@ extern unsigned int qpu_get_fn(int num); ++ extern unsigned int vpu_get_fn(void); ++ extern unsigned int vpu_get_constants(void); ++ extern unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5); +++extern int vpu_post_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, GPU_MEM_PTR_T *buf); +++extern void vpu_wait( int id); ++ ++ // Simple test of shader code ++ extern int rpi_test_shader(void); ++-- ++2.7.4 ++ ++ ++From 016d3db644e60fbe272bfcf1d7c3670c82422317 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 6 May 2015 15:03:37 +0100 ++Subject: [PATCH 11/68] Added different signal when tail moves ++ ++--- ++ libavcodec/rpi_qpu.c | 11 ++++++----- ++ 1 file changed, 6 insertions(+), 5 deletions(-) ++ ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 378dd74..d1c3e20 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -122,7 +122,8 @@ pthread_t vpu_thread; ++ static void *vpu_start(void *arg); ++ ++ #define MAXCMDS 128 ++-static pthread_cond_t post_cond = PTHREAD_COND_INITIALIZER; +++static pthread_cond_t post_cond_head = PTHREAD_COND_INITIALIZER; +++static pthread_cond_t post_cond_tail = PTHREAD_COND_INITIALIZER; ++ static pthread_mutex_t post_mutex = PTHREAD_MUTEX_INITIALIZER; ++ ++ static int vpu_cmds[MAXCMDS][8]; ++@@ -372,7 +373,7 @@ static void *vpu_start(void *arg) { ++ while( vpu_async_tail - vpu_async_head <= 0) ++ { ++ //printf("Checking number %d %d\n",vpu_async_head,vpu_async_tail); ++- pthread_cond_wait(&post_cond, &post_mutex); +++ pthread_cond_wait(&post_cond_tail, &post_mutex); ++ } ++ int *p = vpu_cmds[vpu_async_head%MAXCMDS]; ++ pthread_mutex_unlock(&post_mutex); ++@@ -388,7 +389,7 @@ static void *vpu_start(void *arg) { ++ ++ pthread_mutex_lock(&post_mutex); ++ vpu_async_head++; ++- pthread_cond_broadcast(&post_cond); +++ pthread_cond_broadcast(&post_cond_head); ++ pthread_mutex_unlock(&post_mutex); ++ } ++ ++@@ -417,7 +418,7 @@ int vpu_post_code(unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned ++ p[6] = r5; ++ p[7] = (int) buf; ++ if (num<=1) ++- pthread_cond_broadcast(&post_cond); // Otherwise the vpu thread must already be awake +++ pthread_cond_broadcast(&post_cond_tail); // Otherwise the vpu thread must already be awake ++ pthread_mutex_unlock(&post_mutex); ++ return id; ++ } ++@@ -429,7 +430,7 @@ void vpu_wait(int id) ++ pthread_mutex_lock(&post_mutex); ++ while( id + 1 - vpu_async_head > 0) ++ { ++- pthread_cond_wait(&post_cond, &post_mutex); +++ pthread_cond_wait(&post_cond_head, &post_mutex); ++ } ++ pthread_mutex_unlock(&post_mutex); ++ } ++-- ++2.7.4 ++ ++ ++From b04a72641253dc89fd1ec688035c3e2a946aa370 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 7 May 2015 08:57:11 +0100 ++Subject: [PATCH 12/68] Add option to test for gpu_idle ++ ++--- ++ libavcodec/hevc.c | 3 ++- ++ libavcodec/rpi_qpu.c | 18 ++++++++++++++++++ ++ 2 files changed, 20 insertions(+), 1 deletion(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 98dbd69..2e269b6 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2527,7 +2527,6 @@ static void rpi_execute_transform(HEVCContext *s) ++ // s->hevcdsp.idct[4-2](coeffs, 16); ++ //} ++ ++- ++ gpu_cache_flush(&s->coeffs_buf_accelerated); ++ s->vpu_id = vpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0, &s->coeffs_buf_accelerated); ++ //vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0); ++@@ -2669,6 +2668,8 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ if (s->enable_rpi && x_ctb + ctb_size >= s->ps.sps->width) { ++ int x; ++ // Transform all blocks +++ //printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); +++ ++ rpi_execute_transform(s); ++ // Perform inter prediction ++ rpi_execute_inter_cmds(s); ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index d1c3e20..85f49db 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -199,6 +199,17 @@ static int gpu_init(volatile struct GPU **gpu) { ++ return 0; ++ } ++ +++// Returns 1 if the gpu is currently idle +++static int gpu_idle(void) +++{ +++ int ret = pthread_mutex_trylock(&gpu_mutex); +++ if (ret==0) { +++ pthread_mutex_unlock(&gpu_mutex); +++ return 1; +++ } +++ return 0; +++} +++ ++ // Make sure we have exclusive access to the mailbox, and enable qpu if necessary. ++ static void gpu_lock(void) { ++ pthread_mutex_lock(&gpu_mutex); ++@@ -400,6 +411,13 @@ static void *vpu_start(void *arg) { ++ // Returns an id which we can use to wait for completion ++ int vpu_post_code(unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, GPU_MEM_PTR_T *buf) ++ { +++ // If the gpu is idle then just run the command immediately +++ // This works, but doesn't seem to give any benefit +++ // if (gpu_idle()) { +++ // vpu_execute_code( code, r0, r1, r2, r3, r4, r5); +++ // return -1; // TODO perhaps a wraparound bug here? +++ // } +++ ++ pthread_mutex_lock(&post_mutex); ++ { ++ int id = vpu_async_tail++; ++-- ++2.7.4 ++ ++ ++From e7b457e683d4ca92bf2677b69708fbfc3849847b Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 7 May 2015 11:01:35 +0100 ++Subject: [PATCH 13/68] Added deblocking pass ++ ++--- ++ libavcodec/hevc.c | 33 +++++++++++++++++++++++++++------ ++ libavcodec/hevc.h | 7 ++++++- ++ libavcodec/hevc_filter.c | 6 +++++- ++ libavcodec/rpi_qpu.c | 2 +- ++ 4 files changed, 39 insertions(+), 9 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 2e269b6..29f8415 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2518,6 +2518,17 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb, ++ } ++ ++ #ifdef RPI +++static void rpi_execute_dblk_cmds(HEVCContext *s) +++{ +++ int n; +++ int ctb_size = 1 << s->ps.sps->log2_ctb_size; +++ int (*p)[2] = s->dblk_cmds; +++ for(n = s->num_dblk_cmds; n>0 ;n--,p++) { +++ ff_hevc_hls_filters(s, (*p)[0], (*p)[1], ctb_size); +++ } +++ s->num_dblk_cmds = 0; +++} +++ ++ static void rpi_execute_transform(HEVCContext *s) ++ { ++ int i=2; ++@@ -2631,7 +2642,6 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs]; ++ ++ #ifdef RPI ++- int start_ctb_x = (s->sh.slice_ctb_addr_rs % ((s->ps.sps->width + ctb_size - 1) >> s->ps.sps->log2_ctb_size)) << s->ps.sps->log2_ctb_size; ++ s->enable_rpi = 1; // TODO this should depend on cross component and frame width etc. ++ #endif ++ ++@@ -2665,7 +2675,10 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ ++ more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); ++ #ifdef RPI ++- if (s->enable_rpi && x_ctb + ctb_size >= s->ps.sps->width) { +++ if (s->enable_rpi) { +++ s->dblk_cmds[s->num_dblk_cmds][0] = x_ctb; +++ s->dblk_cmds[s->num_dblk_cmds++][1] = y_ctb; +++ if ( (((y_ctb + ctb_size)&63) == 0) && x_ctb + ctb_size >= s->ps.sps->width) { ++ int x; ++ // Transform all blocks ++ //printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); ++@@ -2678,10 +2691,8 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ // Perform intra prediction and residual reconstruction ++ rpi_execute_pred_cmds(s); ++ // Perform deblocking for CTBs in this row ++- for(x = start_ctb_x; x <= x_ctb; x += ctb_size) { // TODO this will fail for tiles ++- ff_hevc_hls_filters(s, x, y_ctb, ctb_size); ++- } ++- start_ctb_x = 0; +++ rpi_execute_dblk_cmds(s); +++ } ++ } ++ #endif ++ if (more_data < 0) { ++@@ -2699,6 +2710,16 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ ff_hevc_hls_filters(s, x_ctb, y_ctb, ctb_size); ++ } ++ +++#ifdef RPI +++ if (s->enable_rpi && s->num_dblk_cmds) { +++ rpi_execute_transform(s); +++ rpi_execute_inter_cmds(s); +++ vpu_wait(s->vpu_id); +++ rpi_execute_pred_cmds(s); +++ rpi_execute_dblk_cmds(s); +++ } +++#endif +++ ++ if (x_ctb + ctb_size >= s->ps.sps->width && ++ y_ctb + ctb_size >= s->ps.sps->height) ++ ff_hevc_hls_filter(s, x_ctb, y_ctb, ctb_size); ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 0d8dfe9..990bd8c 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -808,6 +808,8 @@ typedef struct HEVCLocalContext { ++ #define RPI_MAX_XFM_CMDS (16*3*(RPI_MAX_WIDTH/4)) ++ // Each block can have an intra prediction and a transform_add command ++ #define RPI_MAX_PRED_CMDS (2*16*3*(RPI_MAX_WIDTH/4)) +++// Worst case is 16x16 CTUs +++#define RPI_MAX_DEBLOCK_CMDS (RPI_MAX_WIDTH*4/16) ++ ++ #define RPI_CMD_LUMA_UNI 0 ++ #define RPI_CMD_CHROMA_UNI 1 ++@@ -867,6 +869,9 @@ typedef struct HEVCPredCmd { ++ #endif ++ ++ typedef struct HEVCContext { +++#ifdef RPI +++ int dblk_cmds[RPI_MAX_DEBLOCK_CMDS][2]; +++#endif ++ const AVClass *c; // needed by private avoptions ++ AVCodecContext *avctx; ++ ++@@ -891,11 +896,11 @@ typedef struct HEVCContext { ++ GPU_MEM_PTR_T coeffs_buf_accelerated; ++ int16_t *coeffs_buf_arm[4]; ++ unsigned int coeffs_buf_vc[4]; ++- ++ int num_coeffs[4]; ++ int num_xfm_cmds; ++ int num_mv_cmds; ++ int num_pred_cmds; +++ int num_dblk_cmds; ++ int vpu_id; ++ #endif ++ ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index e4c3da7..ea0af91 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -877,8 +877,12 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ if (s->threads_type & FF_THREAD_FRAME ) ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size, 0); ++ } ++- } else if (s->threads_type & FF_THREAD_FRAME && x_end) +++ } else if (s->threads_type & FF_THREAD_FRAME && x_end) { +++ int newh = y + ctb_size - 4; +++ //int currh = s->ref->tf.progress->data[0]; +++ //if (((y + ctb_size)&63)==0) ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); +++ } ++ } ++ ++ void ff_hevc_hls_filters(HEVCContext *s, int x_ctb, int y_ctb, int ctb_size) ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 85f49db..3b6dae7 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -105,7 +105,7 @@ struct GPU ++ static pthread_mutex_t gpu_mutex = PTHREAD_MUTEX_INITIALIZER; ++ static volatile struct GPU* gpu = NULL; ++ ++-#ifdef RPI_TIME_TOTAL_QPU +++#if defined(RPI_TIME_TOTAL_QPU) || defined(RPI_TIME_TOTAL_VPU) ++ static unsigned int Microseconds(void) { ++ struct timespec ts; ++ unsigned int x; ++-- ++2.7.4 ++ ++ ++From 7a443df9115f21b4428de378bd146dcdba3dd42a Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 7 May 2015 16:47:47 +0100 ++Subject: [PATCH 14/68] Added option to disable deblocking for non-ref frames ++ ++--- ++ libavcodec/hevc_filter.c | 10 ++++++++++ ++ 1 file changed, 10 insertions(+) ++ ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index ea0af91..2cdd621 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -25,6 +25,8 @@ ++ //#define DISABLE_SAO ++ //#define DISABLE_DEBLOCK ++ //#define DISABLE_STRENGTHS +++// define DISABLE_DEBLOCK_NONREF for a 6% speed boost (by skipping deblocking on unimportant frames) +++//#define DISABLE_DEBLOCK_NONREF ++ ++ #include "libavutil/common.h" ++ #include "libavutil/internal.h" ++@@ -504,6 +506,14 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) ++ s->ps.sps->pcm.loop_filter_disable_flag) || ++ s->ps.pps->transquant_bypass_enable_flag; ++ +++#ifdef DISABLE_DEBLOCK_NONREF +++ if ( s->nal_unit_type == NAL_TRAIL_N || +++ s->nal_unit_type == NAL_TSA_N || +++ s->nal_unit_type == NAL_STSA_N || +++ s->nal_unit_type == NAL_RADL_N || +++ s->nal_unit_type == NAL_RASL_N ) +++ return; // Don't deblock non-reference frames +++#endif ++ #ifdef DISABLE_DEBLOCK ++ return; ++ #endif ++-- ++2.7.4 ++ ++ ++From 9606e160a582db64ccf981d971cdc258d8cc02f7 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Mon, 11 May 2015 10:00:27 +0100 ++Subject: [PATCH 15/68] Moved buffers to VPU memory ++ ++--- ++ libavcodec/hevc_filter.c | 17 +++++++++++++- ++ libavcodec/utils.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++ ++ libavutil/buffer.c | 6 +++++ ++ libavutil/buffer.h | 3 +++ ++ 4 files changed, 84 insertions(+), 1 deletion(-) ++ ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 2cdd621..e1b32d4 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -866,6 +866,13 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, ++ #undef CB ++ #undef CR ++ +++#ifdef RPI_INTER_QPU +++static void flush_buffer(AVBufferRef *bref) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); +++ gpu_cache_flush(p); +++} +++#endif +++ ++ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ { ++ int x_end = x >= s->ps.sps->width - ctb_size; ++@@ -888,9 +895,17 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size, 0); ++ } ++ } else if (s->threads_type & FF_THREAD_FRAME && x_end) { ++- int newh = y + ctb_size - 4; +++ //int newh = y + ctb_size - 4; ++ //int currh = s->ref->tf.progress->data[0]; ++ //if (((y + ctb_size)&63)==0) +++ if (!( s->nal_unit_type == NAL_TRAIL_N || +++ s->nal_unit_type == NAL_TSA_N || +++ s->nal_unit_type == NAL_STSA_N || +++ s->nal_unit_type == NAL_RADL_N || +++ s->nal_unit_type == NAL_RASL_N )) { +++ flush_buffer(s->frame->buf[1]); +++ flush_buffer(s->frame->buf[2]); +++ } ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); ++ } ++ } ++diff --git a/libavcodec/utils.c b/libavcodec/utils.c ++index f7adb52..708526e 100644 ++--- a/libavcodec/utils.c +++++ b/libavcodec/utils.c ++@@ -26,6 +26,12 @@ ++ */ ++ ++ #include "config.h" +++ +++#ifdef RPI +++// Move video buffers to GPU memory +++#define RPI_GPU_BUFFERS +++#endif +++ ++ #include "libavutil/atomic.h" ++ #include "libavutil/attributes.h" ++ #include "libavutil/avassert.h" ++@@ -64,6 +70,10 @@ ++ #include "libavutil/ffversion.h" ++ const char av_codec_ffversion[] = "FFmpeg version " FFMPEG_VERSION; ++ +++#ifdef RPI_GPU_BUFFERS +++#include "rpi_qpu.h" +++#endif +++ ++ #if HAVE_PTHREADS || HAVE_W32THREADS || HAVE_OS2THREADS ++ static int default_lockmgr_cb(void **arg, enum AVLockOp op) ++ { ++@@ -503,6 +513,47 @@ int avcodec_fill_audio_frame(AVFrame *frame, int nb_channels, ++ return ret; ++ } ++ +++#ifdef RPI_GPU_BUFFERS +++static void rpi_buffer_default_free(void *opaque, uint8_t *data) +++{ +++ GPU_MEM_PTR_T *p = opaque; +++ gpu_free(p); +++ av_free(p); +++} +++ +++static AVBufferRef *rpi_buffer_alloc(int size) +++{ +++ AVBufferRef *ret = NULL; +++ uint8_t *data = NULL; +++ GPU_MEM_PTR_T *p; +++ +++ static int total=0; +++ total+=size; +++ +++ p = av_malloc(sizeof *p); +++ if (!p) +++ return NULL; +++ +++ if (gpu_malloc_cached(size,p)<0) // Change this line to choose cached or uncached memory. The caching here refers to the ARM data cache. +++ return NULL; +++ +++ data = p->arm; +++ printf("Rpi alloc %d/%d ARM=%p VC=%x->%x\n",size,total,p->arm,p->vc,p->vc+size); +++ //memset(data, 64, size); +++ +++ if (!data) +++ return NULL; +++ +++ ret = av_buffer_create(data, size, rpi_buffer_default_free, p, 0); +++ if (!ret) { +++ gpu_free(p); +++ av_freep(&p); +++ } +++ +++ return ret; +++} +++#endif +++ ++ static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame) ++ { ++ FramePool *pool = avctx->internal->pool; ++@@ -550,6 +601,14 @@ static int update_frame_pool(AVCodecContext *avctx, AVFrame *frame) ++ av_buffer_pool_uninit(&pool->pools[i]); ++ pool->linesize[i] = linesize[i]; ++ if (size[i]) { +++#ifdef RPI_GPU_BUFFERS +++ if (avctx->codec_id == AV_CODEC_ID_HEVC) +++ pool->pools[i] = av_buffer_pool_init(size[i] + 16 + STRIDE_ALIGN - 1, +++ CONFIG_MEMORY_POISONING ? +++ NULL : +++ rpi_buffer_alloc); +++ else +++#endif ++ pool->pools[i] = av_buffer_pool_init(size[i] + 16 + STRIDE_ALIGN - 1, ++ CONFIG_MEMORY_POISONING ? ++ NULL : ++diff --git a/libavutil/buffer.c b/libavutil/buffer.c ++index 694e116..203ca7b 100644 ++--- a/libavutil/buffer.c +++++ b/libavutil/buffer.c ++@@ -425,3 +425,9 @@ AVBufferRef *av_buffer_pool_get(AVBufferPool *pool) ++ ++ return ret; ++ } +++ +++// Return the opaque for the underlying frame (gives us a GPU_MEM_PTR_T) +++void *av_buffer_pool_opaque(AVBufferRef *ref) { +++ BufferPoolEntry *buf = av_buffer_get_opaque(ref); +++ return buf->opaque; +++} ++diff --git a/libavutil/buffer.h b/libavutil/buffer.h ++index 0c0ce12..82e0bc3 100644 ++--- a/libavutil/buffer.h +++++ b/libavutil/buffer.h ++@@ -283,6 +283,9 @@ void av_buffer_pool_uninit(AVBufferPool **pool); ++ */ ++ AVBufferRef *av_buffer_pool_get(AVBufferPool *pool); ++ +++// Return the opaque for the underlying frame +++void *av_buffer_pool_opaque(AVBufferRef *ref); +++ ++ /** ++ * @} ++ */ ++-- ++2.7.4 ++ ++ ++From f56515b9a720c829ba3ddf6da4232a91b13e0f03 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Mon, 11 May 2015 14:04:37 +0100 ++Subject: [PATCH 16/68] Prepared QPU execute code ++ ++--- ++ libavcodec/hevc.c | 227 ++++++++++++++++++++++++++++++++++++++++------- ++ libavcodec/hevc.h | 22 ++++- ++ libavcodec/hevc_filter.c | 7 +- ++ libavcodec/rpi_qpu.c | 55 +++++++++++- ++ libavcodec/rpi_qpu.h | 2 + ++ 5 files changed, 276 insertions(+), 37 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 29f8415..66ed37a 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -42,17 +42,45 @@ ++ #include "profiles.h" ++ ++ #ifdef RPI ++-#include "rpi_qpu.h" ++-// For some unknown reason, the code seems to crash if I do a late malloc ++-#define EARLY_MALLOC ++-// Move Inter prediction into separate pass ++-#define RPI_INTER +++ #include "rpi_qpu.h" +++ // For some unknown reason, the code seems to crash if I do a late malloc +++ #define EARLY_MALLOC +++ // Move Inter prediction into separate pass +++ #define RPI_INTER ++ #endif ++ ++ // #define DISABLE_MC ++ ++ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 }; ++ +++ +++#ifdef RPI_INTER_QPU +++ +++#define RPI_CHROMA_COMMAND_WORDS 12 +++// The QPU code for UV blocks only works up to a block width of 8 +++#define RPI_CHROMA_BLOCK_WIDTH 8 +++ +++#define ENCODE_COEFFS(c0, c1, c2, c3) (((-c0) & 0xff) | ((-c1) & 0xff) << 8 | ((-c2) & 0xff) << 16 | ((-c3) & 0xff) << 24) +++ +++// TODO Chroma only needs 4 taps +++static uint32_t rpi_filter_coefs[8][2] = { +++ { ENCODE_COEFFS( 0, 0, 0, 128), ENCODE_COEFFS( 0, 0, 0, 0 ) }, +++ { ENCODE_COEFFS( 0, 0, -2, 58), ENCODE_COEFFS( 10, -2, 0, 0 ) }, +++ { ENCODE_COEFFS( 0, 0, -4, 54), ENCODE_COEFFS( 16, -2, 0, 0 ) }, +++ { ENCODE_COEFFS( 0, 0, -6, 46), ENCODE_COEFFS( 28, -4, 0, 0 ) }, +++ { ENCODE_COEFFS( 0, 0, -4, 36), ENCODE_COEFFS( 36, -4, 0, 0 ) }, +++ { ENCODE_COEFFS( 0, 0, -4, 28), ENCODE_COEFFS( 46, -6, 0, 0 ) }, +++ { ENCODE_COEFFS( 0, 0, -2, 16), ENCODE_COEFFS( 54, -4, 0, 0 ) }, +++ { ENCODE_COEFFS( 0, 0, -2, 10), ENCODE_COEFFS( 58, -2, 0, 0 ) } +++}; +++ +++static uint32_t get_vc_address(AVBufferRef *bref) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); +++ return p->vc; +++} +++ +++#endif +++ ++ /** ++ * NOTE: Each function hls_foo correspond to the function foo in the ++ * specification (HLS stands for High Level Syntax). ++@@ -66,6 +94,7 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ static void pic_arrays_free(HEVCContext *s) ++ { ++ #ifdef RPI +++ ++ #ifdef EARLY_MALLOC ++ #else ++ printf("pic_arrays_free\n"); ++@@ -1982,6 +2011,43 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ s->sh.luma_offset_l0[current_mv.ref_idx[0]]); ++ ++ if (s->ps.sps->chroma_format_idc) { +++#ifdef RPI_INTER_QPU +++ if (s->enable_rpi) { +++ int reflist = 0; +++ int hshift = s->ps.sps->hshift[1]; +++ int vshift = s->ps.sps->vshift[1]; +++ const Mv *mv = ¤t_mv.mv[reflist]; +++ intptr_t mx = av_mod_uintp2(mv->x, 2 + hshift); +++ intptr_t my = av_mod_uintp2(mv->y, 2 + vshift); +++ intptr_t _mx = mx << (1 - hshift); +++ intptr_t _my = my << (1 - vshift); // Fractional part of motion vector +++ +++ int x1_c = x0_c + (mv->x >> (2 + hshift)); +++ int y1_c = y0_c + (mv->y >> (2 + hshift)); +++ int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width +++ +++ uint32_t *u = s->u_mvs[chan & 7]; +++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { +++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { +++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 3 + start_x; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 3 + start_y; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); +++ *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); +++ // TODO chroma weight and offset... s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] +++ *u++ = rpi_filter_coefs[_mx][0]; +++ *u++ = rpi_filter_coefs[_mx][1]; +++ *u++ = rpi_filter_coefs[_my][0]; +++ *u++ = rpi_filter_coefs[_my][1]; +++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); +++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); +++ } +++ } +++ s->u_mvs[chan & 7] = u; +++ return; +++ } +++#endif ++ RPI_REDIRECT(chroma_mc_uni)(s, dst1, s->frame->linesize[1], ref0->frame->data[1], ref0->frame->linesize[1], ++ 0, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, ++ s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0]); ++@@ -2632,6 +2698,54 @@ static void rpi_execute_inter_cmds(HEVCContext *s) ++ ++ #endif ++ +++#ifdef RPI_INTER_QPU +++static void rpi_inter_clear(HEVCContext *s) +++{ +++ int i; +++ int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1]; +++ int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1]; +++ for(i=0;i<8;i++) { +++ s->u_mvs[i] = s->mvs_base[i]; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = pic_width; +++ *s->u_mvs[i]++ = pic_height; +++ *s->u_mvs[i]++ = s->frame->linesize[1]; +++ *s->u_mvs[i]++ = s->frame->linesize[2]; +++ s->u_mvs[i] += 3; // Padding words +++ } +++} +++ +++static void rpi_execute_inter_qpu(HEVCContext *s) +++{ +++ int k; +++ uint32_t *unif_vc = (uint32_t *)s->unif_mvs_ptr.vc; +++ +++ if (s->sh.slice_type == I_SLICE) +++ return; +++ for(k=0;k<8;k++) { +++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command +++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined +++ } +++ +++ s->u_mvs[8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore +++ +++ qpu_run_shader8(qpu_get_fn(QPU_MC_SETUP_UV), +++ (uint32_t)(unif_vc+(s->mvs_base[0 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[1 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[2 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[3 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[4 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[5 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[6 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)) +++ ); +++} +++#endif +++ ++ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ { ++ HEVCContext *s = avctxt->priv_data; ++@@ -2658,6 +2772,10 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ } ++ } ++ +++#ifdef RPI_INTER_QPU +++ rpi_inter_clear(s); +++#endif +++ ++ while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) { ++ int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts]; ++ ++@@ -2679,19 +2797,30 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ s->dblk_cmds[s->num_dblk_cmds][0] = x_ctb; ++ s->dblk_cmds[s->num_dblk_cmds++][1] = y_ctb; ++ if ( (((y_ctb + ctb_size)&63) == 0) && x_ctb + ctb_size >= s->ps.sps->width) { ++- int x; +++#ifdef RPI_INTER_QPU +++ // Kick off inter prediction on QPUs +++ rpi_execute_inter_qpu(s); +++#endif ++ // Transform all blocks ++ //printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); ++- ++ rpi_execute_transform(s); ++ // Perform inter prediction ++ rpi_execute_inter_cmds(s); ++ // Wait for transform completion ++ vpu_wait(s->vpu_id); +++ +++ // Copy back reconstructed data +++ //memcpy(s->frame->data[0],s->dummy.arm,2048*64); +++ //memcpy(s->frame->data[1],s->dummy.arm,1024*32); +++ //memcpy(s->frame->data[2],s->dummy.arm,1024*32); +++ ++ // Perform intra prediction and residual reconstruction ++ rpi_execute_pred_cmds(s); ++ // Perform deblocking for CTBs in this row ++ rpi_execute_dblk_cmds(s); +++#ifdef RPI_INTER_QPU +++ rpi_inter_clear(s); +++#endif ++ } ++ } ++ #endif ++@@ -2712,6 +2841,9 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ ++ #ifdef RPI ++ if (s->enable_rpi && s->num_dblk_cmds) { +++#ifdef RPI_INTER_QPU +++ rpi_execute_inter_qpu(s); +++#endif ++ rpi_execute_transform(s); ++ rpi_execute_inter_cmds(s); ++ vpu_wait(s->vpu_id); ++@@ -3451,6 +3583,14 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ av_freep(&s->unif_xfm_cmds); ++ av_freep(&s->univ_pred_cmds); ++ +++#ifdef RPI_INTER_QPU +++ if (s->unif_mvs) { +++ gpu_free( &s->unif_mvs_ptr ); +++ s->unif_mvs = 0; +++ } +++#endif +++ //gpu_free(&s->dummy); +++ ++ #ifdef EARLY_MALLOC ++ printf("hevc_decode_free\n"); ++ if (s->coeffs_buf_arm[0]) { ++@@ -3541,34 +3681,59 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ if (!s->univ_pred_cmds) ++ goto fail; ++ ++- s->coeffs_buf_arm[0] = 0; ++- s->coeffs_buf_arm[2] = 0; +++#ifdef RPI_INTER_QPU +++ // We divide the image into blocks 256 wide and 64 high +++ // We support up to 2048 widths +++ // We compute the number of chroma motion vector commands for 4:4:4 format and 4x4 chroma blocks - assuming all blocks are B predicted +++ // Also add space for the startup command for each stream. +++ +++ { +++ int uv_commands_per_qpu = (1 + (256*64*2)/(4*4)) * RPI_CHROMA_COMMAND_WORDS; +++ uint32_t *p; +++ gpu_malloc_uncached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr ); +++ s->unif_mvs = (uint32_t *) s->unif_mvs_ptr.arm; // TODO support this allocation in non EARLY_MALLOC +++ +++ // Set up initial locations for uniform streams +++ p = s->unif_mvs; +++ for(i = 0; i < 8; i++) { +++ s->mvs_base[i] = p; +++ p += uv_commands_per_qpu; +++ } +++ s->mc_filter_uv = qpu_get_fn(QPU_MC_FILTER_UV); +++ s->mc_filter_uv_b = qpu_get_fn(QPU_MC_FILTER_UV_B); +++ +++ } +++#endif +++ //gpu_malloc_uncached(2048*64,&s->dummy); ++ ++ #ifdef EARLY_MALLOC ++- int coeffs_in_ctb = 64*64; ++- int coefs_per_row = (2048/64) * coeffs_in_ctb * 3; // Allow space for chroma ++- printf("Allocated %d\n",coefs_per_row); ++- gpu_malloc_cached(sizeof(int16_t) * coefs_per_row, &s->coeffs_buf_default); ++- s->coeffs_buf_arm[0] = (int16_t*) s->coeffs_buf_default.arm; ++- if (!s->coeffs_buf_arm[0]) ++- goto fail; ++- gpu_malloc_cached(sizeof(int16_t) * coefs_per_row * 2, &s->coeffs_buf_accelerated); ++- s->coeffs_buf_arm[2] = (int16_t*) s->coeffs_buf_accelerated.arm; ++- s->coeffs_buf_vc[2] = s->coeffs_buf_accelerated.vc; ++- if (!s->coeffs_buf_arm[2]) ++- goto fail; ++- s->coeffs_buf_arm[3] = coefs_per_row + s->coeffs_buf_arm[2]; ++- s->coeffs_buf_vc[3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[2]; ++- printf("Done\n"); +++ { +++ int coeffs_in_ctb = 64*64; +++ int coefs_per_row = (2048/64) * coeffs_in_ctb * 3; // Allow space for chroma +++ s->coeffs_buf_arm[0] = 0; +++ s->coeffs_buf_arm[2] = 0; +++ printf("Allocated %d\n",coefs_per_row); +++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row, &s->coeffs_buf_default); +++ s->coeffs_buf_arm[0] = (int16_t*) s->coeffs_buf_default.arm; +++ if (!s->coeffs_buf_arm[0]) +++ goto fail; +++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row * 2, &s->coeffs_buf_accelerated); +++ s->coeffs_buf_arm[2] = (int16_t*) s->coeffs_buf_accelerated.arm; +++ s->coeffs_buf_vc[2] = s->coeffs_buf_accelerated.vc; +++ if (!s->coeffs_buf_arm[2]) +++ goto fail; +++ s->coeffs_buf_arm[3] = coefs_per_row + s->coeffs_buf_arm[2]; +++ s->coeffs_buf_vc[3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[2]; +++ printf("Done\n"); ++ #ifdef RPI_PRECLEAR ++- //memset(s->coeffs_buf_arm[0],0, sizeof(int16_t) * coefs_per_row); ++- memclear16(s->coeffs_buf_arm[0], coefs_per_row); ++- //memset(s->coeffs_buf_arm[2],0, sizeof(int16_t) * coefs_per_row); ++- memclear16(s->coeffs_buf_arm[2], coefs_per_row); ++- //memset(s->coeffs_buf_arm[3],0, sizeof(int16_t) * coefs_per_row); ++- memclear16(s->coeffs_buf_arm[3], coefs_per_row); +++ //memset(s->coeffs_buf_arm[0],0, sizeof(int16_t) * coefs_per_row); +++ memclear16(s->coeffs_buf_arm[0], coefs_per_row); +++ //memset(s->coeffs_buf_arm[2],0, sizeof(int16_t) * coefs_per_row); +++ memclear16(s->coeffs_buf_arm[2], coefs_per_row); +++ //memset(s->coeffs_buf_arm[3],0, sizeof(int16_t) * coefs_per_row); +++ memclear16(s->coeffs_buf_arm[3], coefs_per_row); ++ #endif ++- +++ } ++ #endif ++ ++ s->enable_rpi = 0; ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 990bd8c..da345f6 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -42,7 +42,11 @@ ++ ++ // define RPI to split the CABAC/prediction/transform into separate stages ++ #ifdef RPI ++-#include "rpi_qpu.h" +++ +++ #include "rpi_qpu.h" +++ // Use QPU for inter prediction +++ //#define RPI_INTER_QPU +++ ++ #endif ++ ++ #define MAX_DPB_SIZE 16 // A.4.1 ++@@ -888,7 +892,7 @@ typedef struct HEVCContext { ++ ++ #ifdef RPI ++ int enable_rpi; ++- HEVCMvCmd *unif_mv_cmds; +++ HEVCMvCmd *unif_mv_cmds; // TODO rename ++ HEVCXfmCmd *unif_xfm_cmds; ++ HEVCPredCmd *univ_pred_cmds; ++ int buf_width; ++@@ -902,6 +906,20 @@ typedef struct HEVCContext { ++ int num_pred_cmds; ++ int num_dblk_cmds; ++ int vpu_id; +++ //GPU_MEM_PTR_T dummy; +++#ifdef RPI_INTER_QPU +++ GPU_MEM_PTR_T unif_mvs_ptr; +++ uint32_t *unif_mvs; // Base of memory for motion vector commands +++ +++ // _base pointers are to the start of the row +++ uint32_t *mvs_base[8]; +++ // these pointers are to the next free space +++ uint32_t *u_mvs[8]; +++ // Function pointers +++ uint32_t mc_filter_uv; +++ uint32_t mc_filter_uv_b; +++#endif +++ ++ #endif ++ ++ uint8_t *cabac_state; ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index e1b32d4..5b3d759 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -903,8 +903,11 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ s->nal_unit_type == NAL_STSA_N || ++ s->nal_unit_type == NAL_RADL_N || ++ s->nal_unit_type == NAL_RASL_N )) { ++- flush_buffer(s->frame->buf[1]); ++- flush_buffer(s->frame->buf[2]); +++ //flush_buffer(s->frame->buf[1]); +++ //flush_buffer(s->frame->buf[2]); +++ //memcpy(s->dummy.arm,s->frame->data[0],2048*64); +++ //memcpy(s->dummy.arm,s->frame->data[1],1024*32); +++ //memcpy(s->dummy.arm,s->frame->data[2],1024*32); ++ } ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); ++ } ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 3b6dae7..e4dd58a 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -3,7 +3,7 @@ ++ // This works better than the mmap in that the memory can be cached, but requires a kernel modification to enable the device. ++ #define RPI_USE_VCSM ++ // define RPI_TIME_TOTAL_QPU to print out how much time is spent in the QPU code ++-//#define RPI_TIME_TOTAL_QPU +++#define RPI_TIME_TOTAL_QPU ++ // define RPI_TIME_TOTAL_VPU to print out how much time is spent in the VPI code ++ //#define RPI_TIME_TOTAL_VPU ++ // define RPI_ASYNC to run the VPU in a separate thread, need to make a separate call to check for completion ++@@ -30,7 +30,7 @@ ++ #endif ++ ++ // On Pi2 there is no way to access the VPU L2 cache ++-// GPU_MEM_FLG should be 4 for uncached memory. +++// GPU_MEM_FLG should be 4 for uncached memory. (Or C for alias to allocate in the VPU L2 cache) ++ // However, if using VCSM allocated buffers, need to use C at the moment because VCSM does not allocate uncached memory correctly ++ // The QPU crashes if we mix L2 cached and L2 uncached accesses due to a HW bug. ++ #define GPU_MEM_FLG 0xC ++@@ -549,6 +549,54 @@ void qpu_run_shader12(int code, int num, int code2, int num2, int unifs1, int un ++ gpu_unlock(); ++ } ++ +++// Run a program on 8 QPUs with the given code and uniform stream (given in GPU addresses) +++void qpu_run_shader8(int code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8) +++{ +++ int i; +++#ifdef RPI_TIME_TOTAL_QPU +++ static int last_time=0; +++ static long long on_time=0; +++ static long long off_time=0; +++ int start_time; +++ int end_time; +++ static int count=0; +++#endif +++ +++ gpu_lock(); +++#ifdef RPI_TIME_TOTAL_QPU +++ start_time = Microseconds(); +++ if (last_time==0) +++ last_time = start_time; +++ off_time += start_time-last_time; +++#endif +++ for(i=0;i<8;i++) { +++ gpu->mail[i*2 + 1] = code; +++ } +++ gpu->mail[0 ] = unifs1; +++ gpu->mail[2 ] = unifs2; +++ gpu->mail[4 ] = unifs3; +++ gpu->mail[6 ] = unifs4; +++ gpu->mail[8 ] = unifs5; +++ gpu->mail[10] = unifs6; +++ gpu->mail[12] = unifs7; +++ gpu->mail[14] = unifs8; +++ execute_qpu( +++ gpu->mb, +++ 8 /* Number of QPUs */, +++ gpu->vc + offsetof(struct GPU, mail), +++ 1 /* no flush */, // Don't flush VPU L1 cache +++ 5000 /* timeout ms */); +++#ifdef RPI_TIME_TOTAL_QPU +++ end_time = Microseconds(); +++ last_time = end_time; +++ on_time += end_time - start_time; +++ count++; +++ if ((count&0x7f)==0) +++ printf("On=%dms, Off=%dms\n",(int)(on_time/1000),(int)(off_time/1000)); +++#endif +++ gpu_unlock(); +++} +++ ++ unsigned int qpu_get_fn(int num) { ++ // Make sure that the gpu is initialized ++ unsigned int *fn; ++@@ -585,6 +633,9 @@ unsigned int qpu_get_fn(int num) { ++ case QPU_MC_FILTER_UV_B: ++ fn = mc_filter_uv_b; ++ break; +++ case QPU_MC_INTERRUPT_EXIT8: +++ fn = mc_interrupt_exit8; +++ break; ++ case QPU_MC_END: ++ fn = mc_end; ++ break; ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 3526fce..2b22d98 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -16,6 +16,7 @@ extern void gpu_free(GPU_MEM_PTR_T *p); ++ extern void gpu_cache_flush(GPU_MEM_PTR_T *p); ++ ++ // QPU specific functions +++extern void qpu_run_shader8(int code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8); ++ extern void qpu_run_shader12(int code, int num, int code2, int num2, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8, int unifs9, int unifs10, int unifs11, int unifs12); ++ ++ enum { ++@@ -28,6 +29,7 @@ enum { ++ QPU_MC_SETUP_UV, ++ QPU_MC_FILTER_UV, ++ QPU_MC_FILTER_UV_B, +++ QPU_MC_INTERRUPT_EXIT8, ++ QPU_MC_END ++ }; ++ extern unsigned int qpu_get_fn(int num); ++-- ++2.7.4 ++ ++ ++From bd651e1569ebe0cdc41a6be169e139758cce069d Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 13 May 2015 11:47:23 +0100 ++Subject: [PATCH 17/68] Drafted chroma interpolation on QPUs ++ ++--- ++ libavcodec/hevc.c | 5 ++- ++ libavcodec/hevc.h | 2 +- ++ libavcodec/hevc_filter.c | 6 ++- ++ libavcodec/rpi_qpu.c | 101 +++++++++++++++++++++++++++++++++++++++++++-- ++ libavcodec/rpi_qpu.h | 1 + ++ libavcodec/rpi_shader.c | 42 +++++++++---------- ++ libavcodec/rpi_shader.qasm | 42 +++++++++---------- ++ 7 files changed, 149 insertions(+), 50 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 66ed37a..d5ea45e 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -60,11 +60,11 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ // The QPU code for UV blocks only works up to a block width of 8 ++ #define RPI_CHROMA_BLOCK_WIDTH 8 ++ ++-#define ENCODE_COEFFS(c0, c1, c2, c3) (((-c0) & 0xff) | ((-c1) & 0xff) << 8 | ((-c2) & 0xff) << 16 | ((-c3) & 0xff) << 24) +++#define ENCODE_COEFFS(c0, c1, c2, c3) (((c0) & 0xff) | ((c1) & 0xff) << 8 | ((c2) & 0xff) << 16 | ((c3) & 0xff) << 24) ++ ++ // TODO Chroma only needs 4 taps ++ static uint32_t rpi_filter_coefs[8][2] = { ++- { ENCODE_COEFFS( 0, 0, 0, 128), ENCODE_COEFFS( 0, 0, 0, 0 ) }, +++ { ENCODE_COEFFS( 0, 0, 0, 64), ENCODE_COEFFS( 0, 0, 0, 0 ) }, ++ { ENCODE_COEFFS( 0, 0, -2, 58), ENCODE_COEFFS( 10, -2, 0, 0 ) }, ++ { ENCODE_COEFFS( 0, 0, -4, 54), ENCODE_COEFFS( 16, -2, 0, 0 ) }, ++ { ENCODE_COEFFS( 0, 0, -6, 46), ENCODE_COEFFS( 28, -4, 0, 0 ) }, ++@@ -2729,6 +2729,7 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ for(k=0;k<8;k++) { ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined +++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+4] = qpu_get_fn(QPU_MC_SETUP); // Also need a dummy for V ++ } ++ ++ s->u_mvs[8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index da345f6..2497c47 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -45,7 +45,7 @@ ++ ++ #include "rpi_qpu.h" ++ // Use QPU for inter prediction ++- //#define RPI_INTER_QPU +++ // #define RPI_INTER_QPU ++ ++ #endif ++ ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 5b3d759..9b6e26d 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -903,8 +903,10 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ s->nal_unit_type == NAL_STSA_N || ++ s->nal_unit_type == NAL_RADL_N || ++ s->nal_unit_type == NAL_RASL_N )) { ++- //flush_buffer(s->frame->buf[1]); ++- //flush_buffer(s->frame->buf[2]); +++#ifdef RPI_INTER_QPU +++ flush_buffer(s->frame->buf[1]); +++ flush_buffer(s->frame->buf[2]); +++#endif ++ //memcpy(s->dummy.arm,s->frame->data[0],2048*64); ++ //memcpy(s->dummy.arm,s->frame->data[1],1024*32); ++ //memcpy(s->dummy.arm,s->frame->data[2],1024*32); ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index e4dd58a..4d9eda8 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -33,7 +33,8 @@ ++ // GPU_MEM_FLG should be 4 for uncached memory. (Or C for alias to allocate in the VPU L2 cache) ++ // However, if using VCSM allocated buffers, need to use C at the moment because VCSM does not allocate uncached memory correctly ++ // The QPU crashes if we mix L2 cached and L2 uncached accesses due to a HW bug. ++-#define GPU_MEM_FLG 0xC +++#define GPU_MEM_FLG 0x4 +++// GPU_MEM_MAP is meaningless on the Pi2 and should be left at 0 (On Pi1 it allows ARM to access VPU L2 cache) ++ #define GPU_MEM_MAP 0x0 ++ ++ #define vcos_verify(x) ((x)>=0) ++@@ -165,6 +166,8 @@ static int gpu_init(volatile struct GPU **gpu) { ++ ptr->vc_handle = handle; ++ ptr->vc = vc; ++ +++ printf("GPU allocated at 0x%x\n",vc); +++ ++ *gpu = ptr; ++ ++ // Now copy over the QPU code into GPU memory ++@@ -304,10 +307,13 @@ int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p) { ++ ++ static void gpu_term(void) ++ { ++- int mb = gpu->mb; ++- unsigned handle = gpu->vc_handle; +++ int mb; +++ unsigned handle; +++ ++ if (gpu==NULL) ++ return; +++ mb = gpu->mb; +++ handle = gpu->vc_handle; ++ ++ #ifdef RPI_ASYNC ++ { ++@@ -648,6 +654,95 @@ unsigned int qpu_get_fn(int num) { ++ } ++ ++ #if 0 +++typedef unsigned int uint32_t; +++ +++typedef struct mvs_s { +++ GPU_MEM_PTR_T unif_mvs_ptr; +++ uint32_t *unif_mvs; // Base of memory for motion vector commands +++ +++ // _base pointers are to the start of the row +++ uint32_t *mvs_base[8]; +++ // these pointers are to the next free space +++ uint32_t *u_mvs[8]; +++ +++} HEVCContext; +++ +++#define RPI_CHROMA_COMMAND_WORDS 12 +++ +++static void rpi_inter_clear(HEVCContext *s) +++{ +++ int i; +++ for(i=0;i<8;i++) { +++ s->u_mvs[i] = s->mvs_base[i]; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 0; +++ *s->u_mvs[i]++ = 128; // w +++ *s->u_mvs[i]++ = 128; // h +++ *s->u_mvs[i]++ = 128; // stride u +++ *s->u_mvs[i]++ = 128; // stride v +++ s->u_mvs[i] += 3; // Padding words +++ } +++} +++ +++static void rpi_execute_inter_qpu(HEVCContext *s) +++{ +++ int k; +++ uint32_t *unif_vc = (uint32_t *)s->unif_mvs_ptr.vc; +++ +++ for(k=0;k<8;k++) { +++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command +++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined +++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+4] = qpu_get_fn(QPU_MC_SETUP); // dummy location for V +++ } +++ +++ s->u_mvs[8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore +++ +++ qpu_run_shader8(qpu_get_fn(QPU_MC_SETUP_UV), +++ (uint32_t)(unif_vc+(s->mvs_base[0 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[1 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[2 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[3 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[4 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[5 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[6 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)) +++ ); +++} +++ +++void rpi_test_qpu(void) +++{ +++ HEVCContext mvs; +++ HEVCContext *s = &mvs; +++ int i; +++ int uv_commands_per_qpu = (1 + (256*64*2)/(4*4)) * RPI_CHROMA_COMMAND_WORDS; +++ uint32_t *p; +++ printf("Allocate memory\n"); +++ gpu_malloc_uncached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr ); +++ s->unif_mvs = (uint32_t *) s->unif_mvs_ptr.arm; +++ +++ // Set up initial locations for uniform streams +++ p = s->unif_mvs; +++ for(i = 0; i < 8; i++) { +++ s->mvs_base[i] = p; +++ p += uv_commands_per_qpu; +++ } +++ // Now run a simple program that should just quit immediately after a single texture fetch +++ rpi_inter_clear(s); +++ for(i=0;i<4;i++) { +++ printf("Launch QPUs\n"); +++ rpi_execute_inter_qpu(s); +++ printf("Done\n"); +++ } +++ printf("Free memory\n"); +++ gpu_free(&s->unif_mvs_ptr); +++ return; +++} +++#endif +++ +++#if 0 ++ ++ int32_t hcoeffs[] = {-4, 10, -21, 70, 90, -24, 11, -4}; ++ //int32_t hcoeffs[] = {1, 1, 1, 1, 1, 1, 1, 1}; ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 2b22d98..f9ad333 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -18,6 +18,7 @@ extern void gpu_cache_flush(GPU_MEM_PTR_T *p); ++ // QPU specific functions ++ extern void qpu_run_shader8(int code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8); ++ extern void qpu_run_shader12(int code, int num, int code2, int num2, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8, int unifs9, int unifs10, int unifs11, int unifs12); +++extern void rpi_test_qpu(void); ++ ++ enum { ++ QPU_MC_SETUP, ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 41cc2e1..d7ed297 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -152,23 +152,23 @@ unsigned int rpi_shader[] = { ++ /* [0x000003e8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++ /* [0x000003f0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++ /* [0x000003f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000400] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000408] */ 0x40038031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 ++-/* [0x00000410] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000400] */ 0x55015fc6, 0x100248a2, // mov r2, rb21 ; mul24 r2, r0, ra0 +++/* [0x00000408] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000410] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++ /* [0x00000418] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000420] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000420] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ /* [0x00000428] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000430] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000430] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ /* [0x00000438] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000440] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000440] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++ /* [0x00000448] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000450] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000450] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++ /* [0x00000458] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000460] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000460] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++ /* [0x00000468] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000470] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000470] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++ /* [0x00000478] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000480] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x00000480] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++ /* [0x00000488] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++ /* [0x00000490] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++ /* [0x00000498] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++@@ -179,20 +179,20 @@ unsigned int rpi_shader[] = { ++ /* [0x000004c0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++ /* [0x000004c8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop ++ /* [0x000004d0] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x000004d8] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x000004d8] */ 0x8f54e1f6, 0xd0024821, // asr r0, r0, 14 ; mov r1, ra21 ++ /* [0x000004e0] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x000004e8] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x000004f0] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x000004f8] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000500] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000508] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000510] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000518] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000520] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000528] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x000004e8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x000004f0] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x000004f8] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000500] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000508] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000510] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000518] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000520] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000528] */ 0x8c9f223f, 0x100a0867, // add.ifnn r1, r1, r0 ; mov -, vw_wait ++ /* [0x00000530] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++ /* [0x00000538] */ 0xfffffe50, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000540] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x00000540] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++ /* [0x00000548] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++ /* [0x00000550] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++ /* [0x00000558] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 6851e83..02fdcb2 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -270,23 +270,23 @@ add t0s, ra_x2_base, r2 ++ ++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ ++-mov r2, rb21 ; mul24 r3, r0, ra0 ++-nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 ++-sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++mov r2, rb21 ; mul24 r2, r0, ra0 +++nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++nop ; mul24 r3, ra1 << 1, r0 << 1 ++ nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++ nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++ nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++ nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++ nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-sub r0, r2, r3 +++add r0, r2, r3 ++ ++ mov r3, rb31 ++ ++@@ -302,23 +302,23 @@ sub.setf -, r3, 8 ; mov r1, ra22 ++ # apply horizontal filter ++ brr.anyn -, r:uvloop ++ max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++-asr r0, r0, 15 ; mov r1, ra21 +++asr r0, r0, 14 ; mov r1, ra21 ++ min.setf ra15, r0, rb22 ++ ++ # apply vertical filter and write to VPM ++ ++-nop ; mul24 r0, ra14, rb14 ++-sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-sub.ifnn r1, r1, r0 ; mov -, vw_wait +++nop ; mul24 r1, ra14, rb14 +++nop ; mul24 r0, ra13, rb13 +++add r1, r1, r0 ; mul24 r0, ra12, rb12 +++add r1, r1, r0 ; mul24 r0, ra11, rb11 +++add r1, r1, r0 ; mul24 r0, ra10, rb10 +++add r1, r1, r0 ; mul24 r0, ra9, rb9 +++add r1, r1, r0 ; mul24 r0, ra8, rb8 +++add r1, r1, r0 ; mul24 r0, ra15, rb15 +++add.ifnn r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++ brr.anyn -, r:uvloop ++-asr r1, r1, 15 +++asr r1, r1, 14 ++ min r1, r1, rb22 ++ max vpm, r1, 0 ++ ++-- ++2.7.4 ++ ++ ++From 61628063461ee5d891af6dbedfd495efcf464012 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 13 May 2015 13:54:11 +0100 ++Subject: [PATCH 18/68] Fixed chroma inter prediction ++ ++--- ++ libavcodec/hevc.c | 8 +- ++ libavcodec/hevc.h | 2 +- ++ libavcodec/rpi_shader.c | 1170 ++++++++++++++++++++++---------------------- ++ libavcodec/rpi_shader.h | 22 +- ++ libavcodec/rpi_shader.qasm | 24 +- ++ 5 files changed, 617 insertions(+), 609 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index d5ea45e..d6d78ee 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -57,9 +57,11 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ #ifdef RPI_INTER_QPU ++ ++ #define RPI_CHROMA_COMMAND_WORDS 12 +++#define UV_COMMANDS_PER_QPU ((1 + (256*64*2)/(4*4)) * RPI_CHROMA_COMMAND_WORDS) ++ // The QPU code for UV blocks only works up to a block width of 8 ++ #define RPI_CHROMA_BLOCK_WIDTH 8 ++ +++ ++ #define ENCODE_COEFFS(c0, c1, c2, c3) (((c0) & 0xff) | ((c1) & 0xff) << 8 | ((c2) & 0xff) << 16 | ((c3) & 0xff) << 24) ++ ++ // TODO Chroma only needs 4 taps ++@@ -2024,7 +2026,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ ++ int x1_c = x0_c + (mv->x >> (2 + hshift)); ++ int y1_c = y0_c + (mv->y >> (2 + hshift)); ++- int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width +++ //int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width +++ int chan = x0>>8; ++ ++ uint32_t *u = s->u_mvs[chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++@@ -2730,6 +2733,7 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+4] = qpu_get_fn(QPU_MC_SETUP); // Also need a dummy for V +++ assert(s->u_mvs[k] - s->mvs_base[k] < UV_COMMANDS_PER_QPU); ++ } ++ ++ s->u_mvs[8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore ++@@ -3689,7 +3693,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ // Also add space for the startup command for each stream. ++ ++ { ++- int uv_commands_per_qpu = (1 + (256*64*2)/(4*4)) * RPI_CHROMA_COMMAND_WORDS; +++ int uv_commands_per_qpu = UV_COMMANDS_PER_QPU; ++ uint32_t *p; ++ gpu_malloc_uncached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr ); ++ s->unif_mvs = (uint32_t *) s->unif_mvs_ptr.arm; // TODO support this allocation in non EARLY_MALLOC ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 2497c47..d513579 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -45,7 +45,7 @@ ++ ++ #include "rpi_qpu.h" ++ // Use QPU for inter prediction ++- // #define RPI_INTER_QPU +++ #define RPI_INTER_QPU ++ ++ #endif ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index d7ed297..831633b 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -33,7 +33,7 @@ unsigned int rpi_shader[] = { ++ /* [0x00000040] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) ++ /* [0x00000048] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 ++ /* [0x00000050] */ 0x00000001, 0xe0020527, // mov ra20, 1 ++-/* [0x00000058] */ 0x00000040, 0xe0020567, // mov ra21, 64 +++/* [0x00000058] */ 0x00000020, 0xe0020567, // mov ra21, 32 ++ /* [0x00000060] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++ /* [0x00000068] */ 0x00000008, 0xe00205e7, // mov ra23, 8 ++ /* [0x00000070] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++@@ -152,7 +152,7 @@ unsigned int rpi_shader[] = { ++ /* [0x000003e8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++ /* [0x000003f0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++ /* [0x000003f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000400] */ 0x55015fc6, 0x100248a2, // mov r2, rb21 ; mul24 r2, r0, ra0 +++/* [0x00000400] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++ /* [0x00000408] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++ /* [0x00000410] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++ /* [0x00000418] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++@@ -178,9 +178,9 @@ unsigned int rpi_shader[] = { ++ /* [0x000004b8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++ /* [0x000004c0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++ /* [0x000004c8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004d0] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x000004d8] */ 0x8f54e1f6, 0xd0024821, // asr r0, r0, 14 ; mov r1, ra21 ++-/* [0x000004e0] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x000004d0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x000004d8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x000004e0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ /* [0x000004e8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++ /* [0x000004f0] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++ /* [0x000004f8] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++@@ -189,400 +189,400 @@ unsigned int rpi_shader[] = { ++ /* [0x00000510] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++ /* [0x00000518] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++ /* [0x00000520] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000528] */ 0x8c9f223f, 0x100a0867, // add.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x00000528] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++ /* [0x00000530] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000538] */ 0xfffffe50, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000540] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000548] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000550] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000558] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000560] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000568] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000570] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000578] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000580] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000588] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000590] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000598] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000538] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000540] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x00000548] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000550] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x00000558] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000560] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000568] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000570] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000578] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000580] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000588] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000590] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000598] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000005a0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000005a8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter ++-/* [0x000005a0] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000005a8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000005b0] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next ++-/* [0x000005b8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000005c0] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 ++-/* [0x000005c8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000005d0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif ++-/* [0x000005d8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000005e0] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 ++-/* [0x000005e8] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 ++-/* [0x000005f0] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 ++-/* [0x000005f8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000600] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 ++-/* [0x00000608] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000610] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 ++-/* [0x00000618] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000620] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000628] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000630] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000638] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000640] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000648] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000650] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000658] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000660] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000668] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000670] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000678] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000680] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000688] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000690] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000698] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006a0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006a8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000006b0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006b8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006c0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006c8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000006d0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006d8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006e0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006e8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000006f0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006f8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000700] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000708] */ 0x000001d0, 0xf07809e7, // brr.anynn -, r:fast_path ++-/* [0x00000710] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000718] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000720] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000728] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000005b0] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000005b8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000005c0] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next +++/* [0x000005c8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000005d0] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 +++/* [0x000005d8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000005e0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif +++/* [0x000005e8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000005f0] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 +++/* [0x000005f8] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 +++/* [0x00000600] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 +++/* [0x00000608] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000610] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 +++/* [0x00000618] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000620] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 +++/* [0x00000628] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000630] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000638] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000640] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000648] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000650] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000658] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000660] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000668] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000670] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000678] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000680] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000688] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000690] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000698] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000006a0] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006a8] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006b0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006b8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000006c0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006c8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006d0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006d8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000006e0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006e8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006f0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006f8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000700] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000708] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000710] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000718] */ 0x000001d0, 0xf07809e7, // brr.anynn -, r:fast_path +++/* [0x00000720] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000728] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000730] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000738] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :loop ++-/* [0x00000730] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000738] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000740] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000748] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000750] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 ++-/* [0x00000758] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000760] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000768] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000770] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000778] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000780] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000788] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000790] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000798] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000007a0] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000007a8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000007b0] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000007b8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000007c0] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x000007c8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x000007d0] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x000007d8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x000007e0] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x000007e8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x000007f0] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x000007f8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000800] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 ++-/* [0x00000808] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000810] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000818] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000820] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000828] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000830] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000838] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000840] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000848] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:loop ++-/* [0x00000850] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x00000858] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x00000860] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x00000868] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x00000870] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x00000878] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000880] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000888] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000890] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000898] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x000008a0] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x000008a8] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-/* [0x000008b0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000008b8] */ 0xfffffe58, 0xf06809e7, // brr.anyn -, r:loop ++-/* [0x000008c0] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 ++-/* [0x000008c8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000008d0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000008d8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000008e0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000008e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000008f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000740] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000748] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000750] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000758] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000760] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 +++/* [0x00000768] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000770] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000778] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000780] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000788] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000790] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000798] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x000007a0] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000007a8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000007b0] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000007b8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000007c0] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000007c8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000007d0] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x000007d8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x000007e0] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x000007e8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x000007f0] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x000007f8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000800] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000808] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000810] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x00000818] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000820] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000828] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000830] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000838] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000840] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000848] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000850] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000858] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:loop +++/* [0x00000860] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x00000868] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x00000870] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x00000878] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x00000880] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x00000888] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000890] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000898] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x000008a0] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x000008a8] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x000008b0] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x000008b8] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x000008c0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000008c8] */ 0xfffffe58, 0xf06809e7, // brr.anyn -, r:loop +++/* [0x000008d0] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x000008d8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000008e0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000008e8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000008f0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000008f8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000900] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // :fast_path ++-/* [0x000008f8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000908] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :fast_loop ++-/* [0x00000900] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000908] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000910] */ 0x95727d9b, 0x1004475f, // mov.ifz ra_y, ra_y_next ; mov rb31, r3 ++-/* [0x00000918] */ 0x95690dbf, 0x10044623, // mov.ifz ra_x2_base, ra_x2_base_next ; mov r3, rb_pitch ++-/* [0x00000920] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000928] */ 0x929de5e4, 0x100248a1, // min r2, r2, rb_frame_height_minus_1 ; mov r1, r4 ++-/* [0x00000930] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000938] */ 0xec414c87, 0x10024e20, // add t0s, ra_x_base, r2 ; v8subs r0, r0, rb20 ++-/* [0x00000940] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000948] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000950] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000958] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000960] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000968] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000970] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000978] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000980] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000988] */ 0x8d9df4ff, 0x10024823, // sub r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000990] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000998] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x000009a0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x000009a8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x000009b0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x000009b8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000009c0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x000009c8] */ 0xffffff18, 0xf06809e7, // brr.anyn -, r:fast_loop ++-/* [0x000009d0] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x000009d8] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x000009e0] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x000009e8] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x000009f0] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x000009f8] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000a00] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000a08] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000a10] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000a18] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000a20] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000a28] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000a30] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000a38] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:fast_loop ++-/* [0x00000a40] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 ++-/* [0x00000a48] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000a50] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000a58] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000a60] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000a68] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a70] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000910] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000918] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000920] */ 0x95727d9b, 0x1004475f, // mov.ifz ra_y, ra_y_next ; mov rb31, r3 +++/* [0x00000928] */ 0x95690dbf, 0x10044623, // mov.ifz ra_x2_base, ra_x2_base_next ; mov r3, rb_pitch +++/* [0x00000930] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000938] */ 0x929de5e4, 0x100248a1, // min r2, r2, rb_frame_height_minus_1 ; mov r1, r4 +++/* [0x00000940] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000948] */ 0xec414c87, 0x10024e20, // add t0s, ra_x_base, r2 ; v8subs r0, r0, rb20 +++/* [0x00000950] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000958] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000960] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000968] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000970] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000978] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000980] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000988] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000990] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000998] */ 0x8d9df4ff, 0x10024823, // sub r0, r2, r3 ; mov r3, rb31 +++/* [0x000009a0] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x000009a8] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x000009b0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x000009b8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x000009c0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x000009c8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000009d0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x000009d8] */ 0xffffff18, 0xf06809e7, // brr.anyn -, r:fast_loop +++/* [0x000009e0] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x000009e8] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x000009f0] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x000009f8] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x00000a00] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x00000a08] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000a10] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000a18] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000a20] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000a28] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000a30] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000a38] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x00000a40] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000a48] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:fast_loop +++/* [0x00000a50] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x00000a58] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000a60] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000a68] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000a70] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000a78] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a80] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_b ++-/* [0x00000a78] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000a80] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000a88] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next ++-/* [0x00000a90] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000a98] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 ++-/* [0x00000aa0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000aa8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif ++-/* [0x00000ab0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000ab8] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 ++-/* [0x00000ac0] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 ++-/* [0x00000ac8] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 ++-/* [0x00000ad0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000ad8] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 ++-/* [0x00000ae0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000ae8] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 ++-/* [0x00000af0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000af8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000b00] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000b08] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000b10] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000b18] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000b20] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000b28] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000b30] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000b38] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000b40] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000b48] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000b50] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000b58] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000b60] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000b68] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000b70] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 ++-/* [0x00000b78] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000b80] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000b88] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000b90] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000b98] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000ba0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ba8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bb0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bb8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000bc0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bc8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bd0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bd8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000be0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000be8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bf0] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bf8] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000c00] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000c08] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000c10] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000a88] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000a90] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000a98] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next +++/* [0x00000aa0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000aa8] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 +++/* [0x00000ab0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000ab8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif +++/* [0x00000ac0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000ac8] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 +++/* [0x00000ad0] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 +++/* [0x00000ad8] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 +++/* [0x00000ae0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000ae8] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 +++/* [0x00000af0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000af8] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 +++/* [0x00000b00] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000b08] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000b10] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000b18] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000b20] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000b28] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000b30] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000b38] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000b40] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000b48] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000b50] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000b58] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000b60] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000b68] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000b70] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000b78] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000b80] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 +++/* [0x00000b88] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000b90] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000b98] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ba0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ba8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000bb0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bb8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bc0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bc8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000bd0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bd8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000be0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000be8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000bf0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000bf8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000c00] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000c08] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000c10] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000c18] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000c20] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :bloop ++-/* [0x00000c18] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000c20] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000c28] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000c30] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000c38] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 ++-/* [0x00000c40] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000c48] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000c50] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000c58] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000c60] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000c68] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000c70] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000c78] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000c80] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000c88] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000c90] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000c98] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000ca0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000ca8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000cb0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000cb8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000cc0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000cc8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000cd0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000cd8] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000ce0] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000ce8] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 ++-/* [0x00000cf0] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000cf8] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000d00] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000d08] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000d10] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000d18] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000d20] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000d28] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000d30] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:bloop ++-/* [0x00000d38] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x00000d40] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x00000d48] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x00000d50] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x00000d58] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x00000d60] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000d68] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000d70] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000d78] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000d80] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000d88] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000d90] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000d98] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000da0] */ 0x8fc8f3f6, 0xd0020867, // asr r1, r1, 15 ; mov -, vr_wait ++-/* [0x00000da8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000db0] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 ++-/* [0x00000db8] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:bloop ++-/* [0x00000dc0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x00000dc8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x00000dd0] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 ++-/* [0x00000dd8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000de0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000de8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000df0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000c28] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000c30] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000c38] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000c40] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000c48] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 +++/* [0x00000c50] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000c58] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000c60] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000c68] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000c70] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000c78] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000c80] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000c88] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000c90] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000c98] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000ca0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000ca8] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000cb0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000cb8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000cc0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000cc8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000cd0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000cd8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000ce0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000ce8] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000cf0] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000cf8] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x00000d00] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000d08] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000d10] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000d18] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000d20] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000d28] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000d30] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000d38] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000d40] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:bloop +++/* [0x00000d48] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x00000d50] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x00000d58] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x00000d60] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x00000d68] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x00000d70] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000d78] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000d80] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000d88] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000d90] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000d98] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000da0] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x00000da8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000db0] */ 0x8fc8f3f6, 0xd0020867, // asr r1, r1, 15 ; mov -, vr_wait +++/* [0x00000db8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000dc0] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 +++/* [0x00000dc8] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:bloop +++/* [0x00000dd0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000dd8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x00000de0] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 +++/* [0x00000de8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000df0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000df8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000e00] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_honly ++-/* [0x00000df8] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000e00] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000e08] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next ++-/* [0x00000e10] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000e18] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 ++-/* [0x00000e20] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000e28] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif ++-/* [0x00000e30] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000e38] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 ++-/* [0x00000e40] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 ++-/* [0x00000e48] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 ++-/* [0x00000e50] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000e58] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 ++-/* [0x00000e60] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000e68] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 ++-/* [0x00000e70] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000e78] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000e80] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000e88] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000e90] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000e98] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000ea0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000ea8] */ 0x0c9de1c0, 0xd0021467, // add rb17, r0, -2 ++-/* [0x00000eb0] */ 0x919c71c0, 0xd0024812, // shl r0, r0, 7 ; mov rb18,r0 ++-/* [0x00000eb8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000ec0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000ec8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000ed0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000ed8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ee0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ee8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ef0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000ef8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f00] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f08] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f10] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000f18] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000f20] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000f28] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000f30] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000e08] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000e10] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000e18] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next +++/* [0x00000e20] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000e28] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 +++/* [0x00000e30] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000e38] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif +++/* [0x00000e40] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000e48] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 +++/* [0x00000e50] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 +++/* [0x00000e58] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 +++/* [0x00000e60] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000e68] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 +++/* [0x00000e70] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000e78] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 +++/* [0x00000e80] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000e88] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000e90] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000e98] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000ea0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000ea8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000eb0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000eb8] */ 0x0c9de1c0, 0xd0021467, // add rb17, r0, -2 +++/* [0x00000ec0] */ 0x919c71c0, 0xd0024812, // shl r0, r0, 7 ; mov rb18,r0 +++/* [0x00000ec8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000ed0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000ed8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000ee0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000ee8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ef0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ef8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f00] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000f08] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f10] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f18] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f20] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000f28] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000f30] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000f38] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000f40] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :loop_honly ++-/* [0x00000f38] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000f40] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000f48] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000f50] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000f58] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 ++-/* [0x00000f60] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000f68] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000f70] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000f78] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000f80] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000f88] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000f90] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000f98] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000fa0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000fa8] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000fb0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000fb8] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000fc0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000fc8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000fd0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000fd8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000fe0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000fe8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000ff0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000ff8] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00001000] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00001008] */ 0x8d9df4ff, 0x10024823, // sub r0, r2, r3 ; mov r3, rb31 ++-/* [0x00001010] */ 0x8d5927f6, 0x100269e1, // sub.setf -, r3, rb18 ; mov r1, ra22 ++-/* [0x00001018] */ 0x559f2fc1, 0x100049e0, // mov -, vw_wait ; mul24 r0, r0, r1 ++-/* [0x00001020] */ 0xfffffef8, 0xf06809e7, // brr.anyn -, r:loop_honly ++-/* [0x00001028] */ 0x0f9cf1c0, 0xd0020827, // asr r0, r0, 15 ++-/* [0x00001030] */ 0x129d61c0, 0x10020827, // min r0, r0, rb22 ++-/* [0x00001038] */ 0x139c01c0, 0xd0020c27, // max vpm, r0, 0 ++-/* [0x00001040] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001048] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00001050] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00001058] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000f48] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000f50] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000f58] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000f60] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000f68] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 +++/* [0x00000f70] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000f78] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000f80] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000f88] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000f90] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000f98] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000fa0] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000fa8] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000fb0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000fb8] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000fc0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000fc8] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000fd0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000fd8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000fe0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000fe8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000ff0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000ff8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00001000] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00001008] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00001010] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00001018] */ 0x8d9df4ff, 0x10024823, // sub r0, r2, r3 ; mov r3, rb31 +++/* [0x00001020] */ 0x8d5927f6, 0x100269e1, // sub.setf -, r3, rb18 ; mov r1, ra22 +++/* [0x00001028] */ 0x559f2fc1, 0x100049e0, // mov -, vw_wait ; mul24 r0, r0, r1 +++/* [0x00001030] */ 0xfffffef8, 0xf06809e7, // brr.anyn -, r:loop_honly +++/* [0x00001038] */ 0x0f9cf1c0, 0xd0020827, // asr r0, r0, 15 +++/* [0x00001040] */ 0x129d61c0, 0x10020827, // min r0, r0, rb22 +++/* [0x00001048] */ 0x139c01c0, 0xd0020c27, // max vpm, r0, 0 +++/* [0x00001050] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001058] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00001060] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001068] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00001060] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00001068] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00001070] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001078] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001070] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00001078] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++ /* [0x00001080] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00001088] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001090] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00001098] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000010a0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00001090] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001098] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010a0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000010a8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000010b0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_exit1 ++-/* [0x000010a8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000010b0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000010b8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010b8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++ /* [0x000010c0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x000010c8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000010d0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000010d8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x000010e0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000010d0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010d8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010e0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000010e8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000010f0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit ++-/* [0x000010e8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000010f0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000010f8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000010f8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++ /* [0x00001100] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00001108] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001110] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001118] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001110] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001118] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00001120] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001128] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001130] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++@@ -592,225 +592,227 @@ unsigned int rpi_shader[] = { ++ /* [0x00001150] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001158] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001160] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001168] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00001170] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00001178] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00001168] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001170] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001178] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00001180] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00001188] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit4 ++-/* [0x00001180] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00001188] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001190] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001190] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++ /* [0x00001198] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x000011a0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000011a8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000011b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000011a8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000011b0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x000011b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000011c0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000011c8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x000011d0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000011c0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000011c8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000011d0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000011d8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000011e0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x000011d8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000011e0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000011e8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000011e8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++ /* [0x000011f0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x000011f8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001200] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001208] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001200] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001208] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00001210] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001218] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001220] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001228] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001230] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001238] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00001240] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00001248] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00001238] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001240] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001248] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00001250] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00001258] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_setup_uv ++-/* [0x00001250] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00001258] */ 0x0c9a0f80, 0x10020427, // add ra_x_base, unif, elem_num ++-/* [0x00001260] */ 0x15827d80, 0x10020767, // mov ra_y, unif ++-/* [0x00001268] */ 0x15827d80, 0x10020627, // mov ra_x2_base, unif ++-/* [0x00001270] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00001278] */ 0x0d620f80, 0x10020667, // sub ra_u2v_ref_offset, unif, ra_x2_base ++-/* [0x00001280] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 ++-/* [0x00001288] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 ++-/* [0x00001290] */ 0x15827d80, 0x10021427, // mov rb16, unif ++-/* [0x00001298] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000012a0] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) ++-/* [0x000012a8] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 ++-/* [0x000012b0] */ 0x00000001, 0xe0020527, // mov ra20, 1 ++-/* [0x000012b8] */ 0x00000040, 0xe0020567, // mov ra21, 64 ++-/* [0x000012c0] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++-/* [0x000012c8] */ 0x00000008, 0xe00205e7, // mov ra23, 8 ++-/* [0x000012d0] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++-/* [0x000012d8] */ 0x00000040, 0xe0021567, // mov rb21, 64 ++-/* [0x000012e0] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 ++-/* [0x000012e8] */ 0x00000018, 0xe00215e7, // mov rb23, 24 ++-/* [0x000012f0] */ 0x00000000, 0xe0020227, // mov ra8, 0 ++-/* [0x000012f8] */ 0x00000000, 0xe0020267, // mov ra9, 0 ++-/* [0x00001300] */ 0x00000000, 0xe00202a7, // mov ra10, 0 ++-/* [0x00001308] */ 0x00000000, 0xe00202e7, // mov ra11, 0 ++-/* [0x00001310] */ 0x00000000, 0xe0020327, // mov ra12, 0 ++-/* [0x00001318] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++-/* [0x00001320] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++-/* [0x00001328] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x00001330] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00001338] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00001340] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00001348] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00001350] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00001358] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00001360] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00001368] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00001370] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++-/* [0x00001378] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++-/* [0x00001380] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00001388] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00001390] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00001398] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x000013a0] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x000013a8] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x000013b0] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x000013b8] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x000013c0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000013c8] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x000013d0] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x000013d8] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base ++-/* [0x000013e0] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y ++-/* [0x000013e8] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base ++-/* [0x000013f0] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++-/* [0x000013f8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x00001400] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00001408] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00001410] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 ++-/* [0x00001418] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00001420] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-/* [0x00001428] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 ++-/* [0x00001430] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x00001438] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001440] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001260] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00001268] */ 0x0c9a0f80, 0x10020427, // add ra_x_base, unif, elem_num +++/* [0x00001270] */ 0x15827d80, 0x10020767, // mov ra_y, unif +++/* [0x00001278] */ 0x15827d80, 0x10020627, // mov ra_x2_base, unif +++/* [0x00001280] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00001288] */ 0x0d620f80, 0x10020667, // sub ra_u2v_ref_offset, unif, ra_x2_base +++/* [0x00001290] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 +++/* [0x00001298] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 +++/* [0x000012a0] */ 0x15827d80, 0x10021427, // mov rb16, unif +++/* [0x000012a8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000012b0] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) +++/* [0x000012b8] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 +++/* [0x000012c0] */ 0x00000001, 0xe0020527, // mov ra20, 1 +++/* [0x000012c8] */ 0x00000020, 0xe0020567, // mov ra21, 32 +++/* [0x000012d0] */ 0x00000100, 0xe00205a7, // mov ra22, 256 +++/* [0x000012d8] */ 0x00000008, 0xe00205e7, // mov ra23, 8 +++/* [0x000012e0] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x000012e8] */ 0x00000040, 0xe0021567, // mov rb21, 64 +++/* [0x000012f0] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x000012f8] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x00001300] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x00001308] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x00001310] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x00001318] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x00001320] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x00001328] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x00001330] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x00001338] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x00001340] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00001348] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00001350] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00001358] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00001360] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00001368] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00001370] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00001378] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00001380] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00001388] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00001390] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00001398] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x000013a0] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x000013a8] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x000013b0] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x000013b8] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x000013c0] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x000013c8] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x000013d0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000013d8] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x000013e0] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x000013e8] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base +++/* [0x000013f0] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x000013f8] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base +++/* [0x00001400] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset +++/* [0x00001408] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x00001410] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00001418] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00001420] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 +++/* [0x00001428] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00001430] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++/* [0x00001438] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 +++/* [0x00001440] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++ /* [0x00001448] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001450] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00001458] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00001460] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00001468] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001470] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00001478] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base ++-/* [0x00001480] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++/* [0x00001450] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001458] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001460] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00001468] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00001470] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00001478] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001480] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00001488] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base +++/* [0x00001490] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base ++ // ::mc_filter_uv_b ++-/* [0x00001488] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00001490] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00001498] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000014a0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000014a8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000014b0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000014b8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000014c0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000014c8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000014d0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000014d8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000014e0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000014e8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000014f0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000014f8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00001500] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00001508] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00001510] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00001518] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00001520] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00001528] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00001530] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00001538] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00001540] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00001548] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00001550] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00001558] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 ++-/* [0x00001560] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00001568] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001570] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001578] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001580] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001588] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00001590] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001598] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015a0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015a8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000015b0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015b8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015c0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015c8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000015d0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015d8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015e0] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015e8] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x000015f0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000015f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00001600] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00001498] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000014a0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000014a8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000014b0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000014b8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000014c0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000014c8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000014d0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000014d8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000014e0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000014e8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000014f0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000014f8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00001500] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001508] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00001510] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00001518] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00001520] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00001528] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00001530] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00001538] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00001540] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00001548] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00001550] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00001558] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00001560] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00001568] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 +++/* [0x00001570] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00001578] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001580] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001588] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001590] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001598] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000015a0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015a8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015b0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015b8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000015c0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015c8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015d0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015d8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000015e0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015e8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015f0] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000015f8] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00001600] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00001608] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00001610] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x00001608] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00001610] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00001618] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00001620] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00001628] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00001630] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00001638] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00001640] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00001648] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00001650] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00001658] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00001660] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00001668] */ 0x40038031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 ++-/* [0x00001670] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00001678] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00001680] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00001688] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00001690] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00001698] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000016a0] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x000016a8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x000016b0] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x000016b8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x000016c0] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x000016c8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x000016d0] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x000016d8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x000016e0] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 ++-/* [0x000016e8] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x000016f0] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x000016f8] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00001700] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00001708] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00001710] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00001718] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00001720] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00001728] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00001730] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x00001738] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x00001740] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x00001748] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x00001750] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x00001758] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00001760] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00001768] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00001770] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00001778] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00001780] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00001788] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-/* [0x00001790] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00001798] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 ++-/* [0x000017a0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000017a8] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 ++-/* [0x000017b0] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000017b8] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x000017c0] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x000017c8] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 ++-/* [0x000017d0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000017d8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000017e0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000017e8] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000017f0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000017f8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001800] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00001808] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00001810] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00001618] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00001620] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00001628] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00001630] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00001638] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00001640] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00001648] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00001650] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00001658] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00001660] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00001668] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00001670] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00001678] */ 0x40038031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 +++/* [0x00001680] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00001688] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00001690] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00001698] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000016a0] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000016a8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000016b0] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x000016b8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x000016c0] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x000016c8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x000016d0] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x000016d8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x000016e0] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x000016e8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x000016f0] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x000016f8] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00001700] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00001708] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00001710] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00001718] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00001720] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00001728] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00001730] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00001738] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00001740] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x00001748] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x00001750] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x00001758] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x00001760] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x00001768] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00001770] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00001778] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00001780] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00001788] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00001790] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00001798] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x000017a0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000017a8] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x000017b0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000017b8] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 +++/* [0x000017c0] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000017c8] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x000017d0] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x000017d8] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 +++/* [0x000017e0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000017e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000017f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000017f8] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00001800] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00001808] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001810] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00001818] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001820] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index db971f4..3464cdb 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -5,16 +5,16 @@ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup (rpi_shader + 0) ++ #define mc_filter_uv (rpi_shader + 146) ++-#define mc_filter (rpi_shader + 360) ++-#define mc_filter_b (rpi_shader + 670) ++-#define mc_filter_honly (rpi_shader + 894) ++-#define mc_exit (rpi_shader + 1048) ++-#define mc_exit1 (rpi_shader + 1066) ++-#define mc_interrupt_exit (rpi_shader + 1082) ++-#define mc_interrupt_exit4 (rpi_shader + 1120) ++-#define mc_interrupt_exit8 (rpi_shader + 1142) ++-#define mc_setup_uv (rpi_shader + 1172) ++-#define mc_filter_uv_b (rpi_shader + 1314) ++-#define mc_end (rpi_shader + 1542) +++#define mc_filter (rpi_shader + 364) +++#define mc_filter_b (rpi_shader + 674) +++#define mc_filter_honly (rpi_shader + 898) +++#define mc_exit (rpi_shader + 1052) +++#define mc_exit1 (rpi_shader + 1070) +++#define mc_interrupt_exit (rpi_shader + 1086) +++#define mc_interrupt_exit4 (rpi_shader + 1124) +++#define mc_interrupt_exit8 (rpi_shader + 1146) +++#define mc_setup_uv (rpi_shader + 1176) +++#define mc_filter_uv_b (rpi_shader + 1318) +++#define mc_end (rpi_shader + 1546) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 02fdcb2..4809e1d 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -21,7 +21,7 @@ ++ # rb19 next ra16 ++ # ++ # ra20 1 ++-# ra21 64 +++# ra21 32 ++ # ra22 256 ++ # ra23 8 ++ # ++@@ -97,7 +97,7 @@ add rb24, r1, r0 ++ # load constants ++ ++ mov ra20, 1 ++-mov ra21, 64 +++mov ra21, 32 ++ mov ra22, 256 ++ mov ra23, 8 ++ ++@@ -270,7 +270,7 @@ add t0s, ra_x2_base, r2 ++ ++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ ++-mov r2, rb21 ; mul24 r2, r0, ra0 +++nop ; mul24 r2, r0, ra0 ++ nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++ nop ; mul24 r3, ra1 << 1, r0 << 1 ++ nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++@@ -301,9 +301,9 @@ sub.setf -, r3, 8 ; mov r1, ra22 ++ ++ # apply horizontal filter ++ brr.anyn -, r:uvloop ++-max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++-asr r0, r0, 14 ; mov r1, ra21 ++-min.setf ra15, r0, rb22 +++mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll +++asr ra15, r0, 8 ; nop +++nop ; nop # Delay slot 3 (TODO move more of the context scroll into here) ++ ++ # apply vertical filter and write to VPM ++ ++@@ -315,12 +315,14 @@ add r1, r1, r0 ; mul24 r0, ra10, rb10 ++ add r1, r1, r0 ; mul24 r0, ra9, rb9 ++ add r1, r1, r0 ; mul24 r0, ra8, rb8 ++ add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-add.ifnn r1, r1, r0 ; mov -, vw_wait +++add r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-brr.anyn -, r:uvloop ++ asr r1, r1, 14 ++-min r1, r1, rb22 ++-max vpm, r1, 0 +++add r1, r1, ra21 +++brr.anyn -, r:uvloop +++asr r1, r1, 6 # Delay 1 +++min r1, r1, rb22 # Delay 2 +++max vpm, r1, 0 # Delay 3 ++ ++ # DMA out for U ++ ++@@ -1161,7 +1163,7 @@ add rb24, r1, r0 ++ # load constants ++ ++ mov ra20, 1 ++-mov ra21, 64 +++mov ra21, 32 ++ mov ra22, 256 ++ mov ra23, 8 ++ ++-- ++2.7.4 ++ ++ ++From b7321192751956ed7deceeb3dabe22ccedb8e08d Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 13 May 2015 14:37:32 +0100 ++Subject: [PATCH 19/68] Removed unused luma functions ++ ++--- ++ libavcodec/hevc.c | 4 +- ++ libavcodec/rpi_qpu.c | 32 +- ++ libavcodec/rpi_shader.c | 1097 +++++++++++++------------------------------- ++ libavcodec/rpi_shader.h | 19 +- ++ libavcodec/rpi_shader.qasm | 970 +++------------------------------------ ++ 5 files changed, 396 insertions(+), 1726 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index d6d78ee..31b8b2f 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2731,8 +2731,8 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ return; ++ for(k=0;k<8;k++) { ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command ++- s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined ++- s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+4] = qpu_get_fn(QPU_MC_SETUP); // Also need a dummy for V +++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined +++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+4] = qpu_get_fn(QPU_MC_SETUP_UV); // Also need a dummy for V ++ assert(s->u_mvs[k] - s->mvs_base[k] < UV_COMMANDS_PER_QPU); ++ } ++ ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 4d9eda8..4e90cc1 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -172,7 +172,7 @@ static int gpu_init(volatile struct GPU **gpu) { ++ ++ // Now copy over the QPU code into GPU memory ++ { ++- int num_bytes = qpu_get_fn(QPU_MC_END) - qpu_get_fn(QPU_MC_SETUP); +++ int num_bytes = qpu_get_fn(QPU_MC_END) - qpu_get_fn(QPU_MC_SETUP_UV); ++ assert(num_bytes<=QPU_CODE_SIZE*sizeof(unsigned int)); ++ memcpy((void*)ptr->qpu_code, rpi_shader, num_bytes); ++ } ++@@ -612,24 +612,24 @@ unsigned int qpu_get_fn(int num) { ++ gpu_unlock(); ++ } ++ switch(num) { ++- case QPU_MC_SETUP: ++- fn = mc_setup; ++- break; ++- case QPU_MC_FILTER: ++- fn = mc_filter; ++- break; +++ //case QPU_MC_SETUP: +++ // fn = mc_setup; +++ // break; +++ //case QPU_MC_FILTER: +++ // fn = mc_filter; +++ // break; ++ case QPU_MC_EXIT: ++ fn = mc_exit; ++ break; ++- case QPU_MC_INTERRUPT_EXIT: ++- fn = mc_interrupt_exit; ++- break; ++- case QPU_MC_FILTER_B: ++- fn = mc_filter_b; ++- break; ++- case QPU_MC_FILTER_HONLY: ++- fn = mc_filter_honly; ++- break; +++ //case QPU_MC_INTERRUPT_EXIT: +++ // fn = mc_interrupt_exit; +++ // break; +++ //case QPU_MC_FILTER_B: +++ // fn = mc_filter_b; +++ // break; +++ //case QPU_MC_FILTER_HONLY: +++ // fn = mc_filter_honly; +++ // break; ++ case QPU_MC_SETUP_UV: ++ fn = mc_setup_uv; ++ break; ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 831633b..170e8ac 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -21,798 +21,331 @@ __declspec(align(8)) ++ __attribute__((aligned(8))) ++ #endif ++ unsigned int rpi_shader[] = { ++-// ::mc_setup +++// ::mc_setup_uv ++ /* [0x00000000] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++ /* [0x00000008] */ 0x0c9a0f80, 0x10020427, // add ra_x_base, unif, elem_num ++ /* [0x00000010] */ 0x15827d80, 0x10020767, // mov ra_y, unif ++ /* [0x00000018] */ 0x15827d80, 0x10020627, // mov ra_x2_base, unif ++-/* [0x00000020] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 ++-/* [0x00000028] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 ++-/* [0x00000030] */ 0x15827d80, 0x10021427, // mov rb16, unif ++-/* [0x00000038] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000040] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) ++-/* [0x00000048] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 ++-/* [0x00000050] */ 0x00000001, 0xe0020527, // mov ra20, 1 ++-/* [0x00000058] */ 0x00000020, 0xe0020567, // mov ra21, 32 ++-/* [0x00000060] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++-/* [0x00000068] */ 0x00000008, 0xe00205e7, // mov ra23, 8 ++-/* [0x00000070] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++-/* [0x00000078] */ 0x00000040, 0xe0021567, // mov rb21, 64 ++-/* [0x00000080] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 ++-/* [0x00000088] */ 0x00000018, 0xe00215e7, // mov rb23, 24 ++-/* [0x00000090] */ 0x00000000, 0xe0020227, // mov ra8, 0 ++-/* [0x00000098] */ 0x00000000, 0xe0020267, // mov ra9, 0 ++-/* [0x000000a0] */ 0x00000000, 0xe00202a7, // mov ra10, 0 ++-/* [0x000000a8] */ 0x00000000, 0xe00202e7, // mov ra11, 0 ++-/* [0x000000b0] */ 0x00000000, 0xe0020327, // mov ra12, 0 ++-/* [0x000000b8] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++-/* [0x000000c0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++-/* [0x000000c8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x000000d0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x000000d8] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x000000e0] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x000000e8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x000000f0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x000000f8] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000100] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000108] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000110] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++-/* [0x00000118] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++-/* [0x00000120] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00000128] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000130] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00000138] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000140] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000148] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000150] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000158] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000160] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000168] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x00000170] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x00000178] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base ++-/* [0x00000180] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 ++-/* [0x00000188] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y ++-/* [0x00000190] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base ++-/* [0x00000198] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000001a0] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 +++/* [0x00000020] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000028] */ 0x0d620f80, 0x10020667, // sub ra_u2v_ref_offset, unif, ra_x2_base +++/* [0x00000030] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 +++/* [0x00000038] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 +++/* [0x00000040] */ 0x15827d80, 0x10021427, // mov rb16, unif +++/* [0x00000048] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000050] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) +++/* [0x00000058] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 +++/* [0x00000060] */ 0x00000001, 0xe0020527, // mov ra20, 1 +++/* [0x00000068] */ 0x00000020, 0xe0020567, // mov ra21, 32 +++/* [0x00000070] */ 0x00000100, 0xe00205a7, // mov ra22, 256 +++/* [0x00000078] */ 0x00000008, 0xe00205e7, // mov ra23, 8 +++/* [0x00000080] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x00000088] */ 0x00000040, 0xe0021567, // mov rb21, 64 +++/* [0x00000090] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x00000098] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x000000a0] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x000000a8] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x000000b0] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x000000b8] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x000000c0] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x000000c8] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x000000d0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x000000d8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x000000e0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x000000e8] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x000000f0] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x000000f8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000100] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000108] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000110] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000118] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000120] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00000128] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00000130] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00000138] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000140] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00000148] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000150] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000158] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000160] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000168] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000170] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000178] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000180] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000188] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base +++/* [0x00000190] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x00000198] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base +++/* [0x000001a0] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++ /* [0x000001a8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x000001b0] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 ++-/* [0x000001b8] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 ++-/* [0x000001c0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x000001b0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000001b8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000001c0] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 ++ /* [0x000001c8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x000001d0] */ 0x4c9d00cf, 0x10024821, // add r0, r0, r3; mul24 r1, r1, rb_pitch ++-/* [0x000001d8] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 ++-/* [0x000001e0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000001e8] */ 0x949dc5c0, 0xd0025890, // and r2, r2, ~3; mov ra_x_base, r0 ++-/* [0x000001f0] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 ++-/* [0x000001f8] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x00000200] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000208] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000210] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000218] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000220] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000228] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000230] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000238] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base ++-/* [0x00000240] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++/* [0x000001d0] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++/* [0x000001d8] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 +++/* [0x000001e0] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 +++/* [0x000001e8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000001f0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000001f8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000200] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000208] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000210] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000218] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000220] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000228] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base +++/* [0x00000230] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base ++ // ::mc_filter_uv ++-/* [0x00000248] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000250] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000258] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000260] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000268] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000270] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000278] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000280] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000288] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000290] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000298] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000002a0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000002a8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000002b0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000002b8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000002c0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000002c8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002d0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x000002d8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x000002e0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000002e8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000002f0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000002f8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000300] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000308] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000310] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000318] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000320] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000328] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000330] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000340] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000348] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000350] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000358] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000360] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000368] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000370] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000378] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000380] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000388] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000390] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000398] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000003a0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000238] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000240] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000248] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000250] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000258] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000260] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000268] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000270] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000278] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000280] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000288] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000290] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000298] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000002a0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002a8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000002b0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000002b8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000002c0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x000002c8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x000002d0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000002d8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000002e0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000002e8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000002f0] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x000002f8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000300] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000308] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000310] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000318] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000320] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000328] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000330] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000340] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000348] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000350] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000358] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000360] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000368] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000370] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000378] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000380] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000388] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000390] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x000003a8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000003b0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000003b8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000003c0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003c8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003d0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003d8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003e0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003e8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000003f0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000003f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000400] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000408] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000410] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000418] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000420] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000428] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000430] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000438] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000440] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000448] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000450] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000458] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000460] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000468] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000470] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000478] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000480] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000488] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000490] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000498] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x000004a0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x000004a8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x000004b0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x000004b8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000004c0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x000004c8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004d0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x000004d8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x000004e0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000004e8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x000004f0] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x000004f8] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000500] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000508] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000510] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000518] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000520] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000528] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000530] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000538] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000540] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x00000548] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000550] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x00000558] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000560] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000568] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000570] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000578] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000580] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000588] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000590] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000598] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x000005a0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000005a8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-// ::mc_filter ++-/* [0x000005b0] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000005b8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000005c0] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next ++-/* [0x000005c8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000005d0] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 ++-/* [0x000005d8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000005e0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif ++-/* [0x000005e8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000005f0] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 ++-/* [0x000005f8] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 ++-/* [0x00000600] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 ++-/* [0x00000608] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000610] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 ++-/* [0x00000618] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000620] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 ++-/* [0x00000628] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000630] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000638] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000640] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000648] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000650] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000658] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000660] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000668] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000670] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000678] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000680] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000688] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000690] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000698] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000006a0] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006a8] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006b0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006b8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000006c0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006c8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006d0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006d8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000006e0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006e8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006f0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006f8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000700] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000708] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000710] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000718] */ 0x000001d0, 0xf07809e7, // brr.anynn -, r:fast_path ++-/* [0x00000720] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000728] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000730] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000738] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++-// :loop ++-/* [0x00000740] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000748] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000750] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000758] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000760] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 ++-/* [0x00000768] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000770] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000778] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000780] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000788] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000790] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000798] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x000007a0] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000007a8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000007b0] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000007b8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000007c0] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000007c8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000007d0] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x000007d8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x000007e0] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x000007e8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x000007f0] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x000007f8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000800] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000808] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000810] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 ++-/* [0x00000818] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000820] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000828] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000830] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000838] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000840] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000848] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000850] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000858] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:loop ++-/* [0x00000860] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x00000868] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x00000870] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x00000878] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x00000880] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x00000888] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000890] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000898] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x000008a0] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x000008a8] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x000008b0] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x000008b8] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-/* [0x000008c0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000008c8] */ 0xfffffe58, 0xf06809e7, // brr.anyn -, r:loop ++-/* [0x000008d0] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 ++-/* [0x000008d8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000008e0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000008e8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000008f0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000008f8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000900] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-// :fast_path ++-/* [0x00000908] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++-// :fast_loop ++-/* [0x00000910] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000918] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000920] */ 0x95727d9b, 0x1004475f, // mov.ifz ra_y, ra_y_next ; mov rb31, r3 ++-/* [0x00000928] */ 0x95690dbf, 0x10044623, // mov.ifz ra_x2_base, ra_x2_base_next ; mov r3, rb_pitch ++-/* [0x00000930] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000938] */ 0x929de5e4, 0x100248a1, // min r2, r2, rb_frame_height_minus_1 ; mov r1, r4 ++-/* [0x00000940] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000948] */ 0xec414c87, 0x10024e20, // add t0s, ra_x_base, r2 ; v8subs r0, r0, rb20 ++-/* [0x00000950] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000958] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000960] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000968] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000970] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000978] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000980] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000988] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000990] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000998] */ 0x8d9df4ff, 0x10024823, // sub r0, r2, r3 ; mov r3, rb31 ++-/* [0x000009a0] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x000009a8] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x000009b0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x000009b8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x000009c0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x000009c8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000009d0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x000009d8] */ 0xffffff18, 0xf06809e7, // brr.anyn -, r:fast_loop ++-/* [0x000009e0] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x000009e8] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x000009f0] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x000009f8] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x00000a00] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x00000a08] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000a10] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000a18] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000a20] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000a28] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000a30] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000a38] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000a40] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000a48] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:fast_loop ++-/* [0x00000a50] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 ++-/* [0x00000a58] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000a60] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000a68] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000a70] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000a78] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a80] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-// ::mc_filter_b ++-/* [0x00000a88] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000a90] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000a98] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next ++-/* [0x00000aa0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000aa8] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 ++-/* [0x00000ab0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000ab8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif ++-/* [0x00000ac0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000ac8] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 ++-/* [0x00000ad0] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 ++-/* [0x00000ad8] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 ++-/* [0x00000ae0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000ae8] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 ++-/* [0x00000af0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000af8] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 ++-/* [0x00000b00] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000b08] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000b10] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000b18] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000b20] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000b28] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000b30] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000b38] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000b40] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000b48] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000b50] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000b58] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000b60] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000b68] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000b70] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000b78] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000b80] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 ++-/* [0x00000b88] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000b90] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000b98] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ba0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ba8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000bb0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bb8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bc0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bc8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000bd0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bd8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000be0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000be8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000bf0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000bf8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000c00] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000c08] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000c10] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000c18] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000c20] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++-// :bloop ++-/* [0x00000c28] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000c30] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000c38] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000c40] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000c48] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 ++-/* [0x00000c50] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000c58] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000c60] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000c68] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000c70] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000c78] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000c80] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000c88] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000c90] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000c98] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000ca0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000ca8] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000cb0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000cb8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000cc0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000cc8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000cd0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000cd8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000ce0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000ce8] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000cf0] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000cf8] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 ++-/* [0x00000d00] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000d08] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000d10] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000d18] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000d20] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000d28] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000d30] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000d38] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000d40] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:bloop ++-/* [0x00000d48] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x00000d50] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x00000d58] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x00000d60] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x00000d68] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x00000d70] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000d78] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000d80] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000d88] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000d90] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000d98] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000da0] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000da8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000db0] */ 0x8fc8f3f6, 0xd0020867, // asr r1, r1, 15 ; mov -, vr_wait ++-/* [0x00000db8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000dc0] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 ++-/* [0x00000dc8] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:bloop ++-/* [0x00000dd0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x00000dd8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x00000de0] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 ++-/* [0x00000de8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000df0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000df8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000e00] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-// ::mc_filter_honly ++-/* [0x00000e08] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000e10] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000e18] */ 0x156e7d80, 0x10020667, // mov ra_x2shift, ra_x2shift_next ++-/* [0x00000e20] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000e28] */ 0x0c9c81c0, 0xd00208a7, // add r2, r0, 8 ++-/* [0x00000e30] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000e38] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3,unif ++-/* [0x00000e40] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000e48] */ 0x139c05c0, 0xd00208a7, // max r2, r2, 0 ++-/* [0x00000e50] */ 0x129d95c0, 0x100208a7, // min r2, r2, rb_frame_width_minus_1 ++-/* [0x00000e58] */ 0x119c35c0, 0xd00206e7, // shl ra_x2shift_next, r2, 3 ++-/* [0x00000e60] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000e68] */ 0x0c9e74c0, 0x100208a7, // add r2, r2, r3 ++-/* [0x00000e70] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000e78] */ 0x149dc5c0, 0xd00206a7, // and ra_x2_base_next, r2, ~3 ++-/* [0x00000e80] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000e88] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000e90] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000e98] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000ea0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000ea8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000eb0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000eb8] */ 0x0c9de1c0, 0xd0021467, // add rb17, r0, -2 ++-/* [0x00000ec0] */ 0x919c71c0, 0xd0024812, // shl r0, r0, 7 ; mov rb18,r0 ++-/* [0x00000ec8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000ed0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000ed8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000ee0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000ee8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ef0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ef8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f00] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000f08] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f10] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f18] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f20] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000f28] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000f30] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000f38] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000f40] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++-// :loop_honly ++-/* [0x00000f48] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000f50] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000f58] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000f60] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000f68] */ 0xee654987, 0x10024860, // shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 ++-/* [0x00000f70] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000f78] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000f80] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000f88] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000f90] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000f98] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000fa0] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000fa8] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000fb0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000fb8] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000fc0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000fc8] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000fd0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000fd8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000fe0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000fe8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000ff0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000ff8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00001000] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00001008] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00001010] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00001018] */ 0x8d9df4ff, 0x10024823, // sub r0, r2, r3 ; mov r3, rb31 ++-/* [0x00001020] */ 0x8d5927f6, 0x100269e1, // sub.setf -, r3, rb18 ; mov r1, ra22 ++-/* [0x00001028] */ 0x559f2fc1, 0x100049e0, // mov -, vw_wait ; mul24 r0, r0, r1 ++-/* [0x00001030] */ 0xfffffef8, 0xf06809e7, // brr.anyn -, r:loop_honly ++-/* [0x00001038] */ 0x0f9cf1c0, 0xd0020827, // asr r0, r0, 15 ++-/* [0x00001040] */ 0x129d61c0, 0x10020827, // min r0, r0, rb22 ++-/* [0x00001048] */ 0x139c01c0, 0xd0020c27, // max vpm, r0, 0 ++-/* [0x00001050] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001058] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00001060] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00001068] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-// ::mc_exit ++-/* [0x00001070] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00001078] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00001080] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001088] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001090] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001098] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000010a0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000010a8] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000010b0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-// ::mc_exit1 ++-/* [0x000010b8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000010c0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000010c8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000010d0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000010d8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000010e0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000010e8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x000010f0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-// ::mc_interrupt_exit ++-/* [0x000010f8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00001100] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001108] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001110] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001118] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001120] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001128] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001130] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001138] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001140] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001148] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001150] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001158] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001160] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001168] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001170] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001178] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00001180] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00001188] */ 0x009e7000, 0x100009e7, // nop ; nop ++-// ::mc_interrupt_exit4 ++-/* [0x00001190] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00001198] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000011a0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000011a8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000011b0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000011b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000011c0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000011c8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000011d0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000011d8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x000011e0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-// ::mc_interrupt_exit8 ++-/* [0x000011e8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000011f0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000011f8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001200] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001208] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001210] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001218] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001220] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001228] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001230] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001238] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001240] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001248] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00001250] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00001258] */ 0x009e7000, 0x100009e7, // nop ; nop ++-// ::mc_setup_uv ++-/* [0x00001260] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00001268] */ 0x0c9a0f80, 0x10020427, // add ra_x_base, unif, elem_num ++-/* [0x00001270] */ 0x15827d80, 0x10020767, // mov ra_y, unif ++-/* [0x00001278] */ 0x15827d80, 0x10020627, // mov ra_x2_base, unif ++-/* [0x00001280] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00001288] */ 0x0d620f80, 0x10020667, // sub ra_u2v_ref_offset, unif, ra_x2_base ++-/* [0x00001290] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 ++-/* [0x00001298] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 ++-/* [0x000012a0] */ 0x15827d80, 0x10021427, // mov rb16, unif ++-/* [0x000012a8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000012b0] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) ++-/* [0x000012b8] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 ++-/* [0x000012c0] */ 0x00000001, 0xe0020527, // mov ra20, 1 ++-/* [0x000012c8] */ 0x00000020, 0xe0020567, // mov ra21, 32 ++-/* [0x000012d0] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++-/* [0x000012d8] */ 0x00000008, 0xe00205e7, // mov ra23, 8 ++-/* [0x000012e0] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++-/* [0x000012e8] */ 0x00000040, 0xe0021567, // mov rb21, 64 ++-/* [0x000012f0] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 ++-/* [0x000012f8] */ 0x00000018, 0xe00215e7, // mov rb23, 24 ++-/* [0x00001300] */ 0x00000000, 0xe0020227, // mov ra8, 0 ++-/* [0x00001308] */ 0x00000000, 0xe0020267, // mov ra9, 0 ++-/* [0x00001310] */ 0x00000000, 0xe00202a7, // mov ra10, 0 ++-/* [0x00001318] */ 0x00000000, 0xe00202e7, // mov ra11, 0 ++-/* [0x00001320] */ 0x00000000, 0xe0020327, // mov ra12, 0 ++-/* [0x00001328] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++-/* [0x00001330] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++-/* [0x00001338] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x00001340] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00001348] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00001350] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00001358] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00001360] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00001368] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00001370] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00001378] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00001380] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++-/* [0x00001388] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++-/* [0x00001390] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00001398] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x000013a0] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x000013a8] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x000013b0] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x000013b8] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x000013c0] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x000013c8] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x000013d0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000013d8] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x000013e0] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x000013e8] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base ++-/* [0x000013f0] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y ++-/* [0x000013f8] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base ++-/* [0x00001400] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++-/* [0x00001408] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x00001410] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00001418] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00001420] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 ++-/* [0x00001428] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00001430] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-/* [0x00001438] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 ++-/* [0x00001440] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x00001448] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001450] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001458] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001460] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00001468] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00001470] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00001478] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001480] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00001488] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base ++-/* [0x00001490] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++/* [0x00000398] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000003a0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000003a8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000003b0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003b8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003c0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003c8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003d0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003d8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000003e0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000003e8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003f0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000003f8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000400] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000408] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000410] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000418] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000420] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000428] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000430] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000438] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000440] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000448] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000450] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000458] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000460] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000468] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000470] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000478] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000480] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000488] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000490] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000498] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x000004a0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x000004a8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000004b0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x000004b8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000004c0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x000004c8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x000004d0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000004d8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x000004e0] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x000004e8] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x000004f0] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x000004f8] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000500] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000508] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000510] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000518] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000520] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000528] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000530] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x00000538] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000540] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x00000548] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000550] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000558] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000560] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000568] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000570] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000578] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000580] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000588] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000590] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000598] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b ++-/* [0x00001498] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000014a0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000014a8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000014b0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000014b8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000014c0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000014c8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000014d0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000014d8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000014e0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000014e8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000014f0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000014f8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00001500] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001508] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00001510] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00001518] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00001520] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00001528] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00001530] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00001538] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00001540] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00001548] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00001550] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00001558] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00001560] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00001568] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 ++-/* [0x00001570] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00001578] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001580] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001588] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001590] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001598] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000015a0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015a8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015b0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015b8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000015c0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015c8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015d0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015d8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000015e0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015e8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015f0] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000015f8] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00001600] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00001608] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00001610] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000005a0] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000005a8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000005b0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000005b8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000005c0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000005c8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000005d0] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000005d8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000005e0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000005e8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000005f0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000005f8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000600] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000608] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000610] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000618] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000620] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000628] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000630] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000638] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000640] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000648] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000650] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000658] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000660] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000668] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000670] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 +++/* [0x00000678] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000680] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000688] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000690] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000698] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006a0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000006a8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006b0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006b8] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006c0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000006c8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006d0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006d8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006e0] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000006e8] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006f0] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006f8] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000700] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000708] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000710] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000718] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x00001618] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00001620] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00001628] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00001630] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00001638] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00001640] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00001648] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00001650] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00001658] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00001660] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00001668] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00001670] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00001678] */ 0x40038031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 ++-/* [0x00001680] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00001688] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00001690] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00001698] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000016a0] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000016a8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000016b0] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x000016b8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x000016c0] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x000016c8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x000016d0] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x000016d8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x000016e0] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x000016e8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x000016f0] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 ++-/* [0x000016f8] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00001700] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00001708] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00001710] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00001718] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00001720] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00001728] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00001730] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00001738] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00001740] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x00001748] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x00001750] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x00001758] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x00001760] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x00001768] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00001770] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00001778] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00001780] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00001788] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00001790] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00001798] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-/* [0x000017a0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000017a8] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 ++-/* [0x000017b0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000017b8] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 ++-/* [0x000017c0] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000017c8] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x000017d0] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x000017d8] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 ++-/* [0x000017e0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000017e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000017f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000017f8] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00001800] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00001808] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001810] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00001818] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00001820] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000720] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000728] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000730] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000738] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000740] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000748] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000750] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000758] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000760] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000768] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000770] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000778] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 +++/* [0x00000780] */ 0x40038031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 +++/* [0x00000788] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000790] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000798] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000007a0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000007a8] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000007b0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000007b8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x000007c0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x000007c8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x000007d0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x000007d8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x000007e0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x000007e8] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x000007f0] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x000007f8] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x00000800] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000808] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000810] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000818] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000820] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000828] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000830] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000838] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000840] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000848] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 +++/* [0x00000850] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 +++/* [0x00000858] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 +++/* [0x00000860] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 +++/* [0x00000868] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 +++/* [0x00000870] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000878] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000880] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000888] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000890] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000898] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x000008a0] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x000008a8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000008b0] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x000008b8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000008c0] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 +++/* [0x000008c8] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000008d0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x000008d8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x000008e0] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 +++/* [0x000008e8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000008f0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000008f8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000900] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000908] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000910] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000918] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000920] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000928] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_exit +++/* [0x00000930] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000938] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000940] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000948] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000950] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000958] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000960] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000968] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000970] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_interrupt_exit8 +++/* [0x00000978] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000980] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000988] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000990] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000998] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000009a0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009a8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009c0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009c8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009d0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009d8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000009e0] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000009e8] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 3464cdb..9de4535 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -3,18 +3,11 @@ ++ ++ extern unsigned int rpi_shader[]; ++ ++-#define mc_setup (rpi_shader + 0) ++-#define mc_filter_uv (rpi_shader + 146) ++-#define mc_filter (rpi_shader + 364) ++-#define mc_filter_b (rpi_shader + 674) ++-#define mc_filter_honly (rpi_shader + 898) ++-#define mc_exit (rpi_shader + 1052) ++-#define mc_exit1 (rpi_shader + 1070) ++-#define mc_interrupt_exit (rpi_shader + 1086) ++-#define mc_interrupt_exit4 (rpi_shader + 1124) ++-#define mc_interrupt_exit8 (rpi_shader + 1146) ++-#define mc_setup_uv (rpi_shader + 1176) ++-#define mc_filter_uv_b (rpi_shader + 1318) ++-#define mc_end (rpi_shader + 1546) +++#define mc_setup_uv (rpi_shader + 0) +++#define mc_filter_uv (rpi_shader + 142) +++#define mc_filter_uv_b (rpi_shader + 360) +++#define mc_exit (rpi_shader + 588) +++#define mc_interrupt_exit8 (rpi_shader + 606) +++#define mc_end (rpi_shader + 636) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 4809e1d..cd7346d 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -71,8 +71,10 @@ ++ ++ .set rb_const_64, rb21 ++ ++-# mc_setup(next_kernel, x, y, ref_base, frame_width, frame_height, pitch, dst_pitch, pad0, pad1) ++-::mc_setup +++ +++################################################################################ +++# mc_setup_uv(next_kernel, x, y, ref_u_base, ref_v_base, frame_width, frame_height, pitch, dst_pitch, pad0, pad1, pad2) +++::mc_setup_uv ++ ++ # Read starting kernel ++ mov ra31, unif ++@@ -80,7 +82,9 @@ mov ra31, unif ++ # Load first request location ++ add ra_x_base, unif, elem_num # Store x ++ mov ra_y, unif # Store y ++-mov ra_x2_base, unif # Store frame base +++mov ra_x2_base, unif # Store frame u base +++nop +++sub ra_u2v_ref_offset, unif, ra_x2_base # Store offset to add to move from u to v in reference frame ++ ++ # Read image dimensions ++ sub rb25,unif,1 ++@@ -143,29 +147,24 @@ mov r1, vpm_setup(0, 4, h8p(0, 0)) ++ add rb28, r0, r1 ++ ++ # Compute base address for first and second access ++-#add r0, unif, elem_num # x ++ mov r0, ra_x_base # Load x ++-add r2, r0, 8 # x+8 ++ max r0, r0, 0; mov r1, ra_y # Load y ++ min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base # Load the frame base ++-shl ra_xshift_next, r0, 3 ++-max r2, r2, 0 +++shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++ add ra_y, r1, 1 ++-min r2, r2, rb_frame_width_minus_1 ++-shl ra_x2shift_next, r2, 3 ++-max r1, r1, 0 # y ++-min r1, r1, rb_frame_height_minus_1 ++-add r0, r0, r3; mul24 r1, r1, rb_pitch ++-add r2, r2, r3 +++add r0, r0, r3 ++ and r0, r0, ~3 ++-and r2, r2, ~3; mov ra_x_base, r0 +++max r1, r1, 0 ; mov ra_x_base, r0 # y +++min r1, r1, rb_frame_height_minus_1 ++ # submit texture requests for first line +++add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++ add t0s, r0, r1 ; mov ra_x2_base, r2 ++ add t0s, r2, r1 ++ ++ # Dump padding words ++ mov r0, unif ++ mov r0, unif +++mov r0, unif ++ ++ # submit texture requests for second line ++ max r1, ra_y, 0 ++@@ -176,6 +175,8 @@ nop ; mul24 r1, r1, rb_pitch ++ add t0s, r1, ra_x_base ++ add t0s, r1, ra_x2_base ++ +++ +++ ++ ################################################################################ ++ ++ # mc_filter_uv(next_kernel, x, y, frame_u_base, frame_v_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_u_dst, this_v_dst) ++@@ -341,453 +342,26 @@ add vw_setup, rb26, r0 # VDW setup 0 ++ mov vw_setup, rb29 # Stride ++ mov vw_addr, unif # start the VDW ++ ++-################################################################################ ++- ++- ++-# mc_filter(next_kernel, x, y, frame_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_dst) ++- ++-# At this point we have already issued two pairs of texture requests for the current block ++-# ra_x_base, ra_x16_base point to the current coordinates for this block ++-::mc_filter ++-mov ra31, unif ++- ++-# per-channel shifts were calculated on the *previous* invocation ++- ++-mov ra_xshift, ra_xshift_next ++-mov ra_x2shift, ra_x2shift_next ++- ++-# get base addresses and per-channel shifts for *next* invocation ++-add r0, unif, elem_num # x ++-add r2, r0, 8 # x+8 ++-max r0, r0, 0; mov r1, unif # y ++-min r0, r0, rb_frame_width_minus_1 ; mov r3,unif # frame_base ++-shl ra_xshift_next, r0, 3 ++-max r2, r2, 0 ++-min r2, r2, rb_frame_width_minus_1 ++-shl ra_x2shift_next, r2, 3 ++-add r0, r0, r3 ++-add r2, r2, r3 ++-and rb_x_base_next, r0, ~3 ++-and ra_x2_base_next, r2, ~3 ++-mov ra_y_next, r1 ++- ++-# set up VPM write ++-mov vw_setup, rb28 ++- ++-# get width,height of block ++-mov r2, 16 ++-mov r0, unif ++-shr r1, r0, r2 # Extract width ++-sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) ++-and r0, r0, rb22 # Extract height ++-add rb17, r0, 5 ++-add rb18, r0, 7 ++-shl r0, r0, 7 ++-add r0, r0, r1 # Combine width and height of destination area ++-shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++-add rb26, r0, rb27 ++- ++-sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code ++- ++-# get filter coefficients ++- ++-mov r0, unif ++-asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-asr ra0, r0, rb23; mov r0, unif ++-asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-asr ra4, r0, rb23; mov r0, unif ++-asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-asr rb8, r0, rb23; mov r0, unif ++-asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-brr.anynn -, r:fast_path ++-asr rb12, r0, rb23 # delay slot 1 ++- ++-# r2 is elem_num ++-# r3 is loop counter ++- ++-mov r5rep, -8 # delay slot 2 ++- ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] # delay slot 3 ++- ++-# retrieve texture results and pick out bytes ++-# then submit two more texture requests ++- ++-## nop ; ldtmu0 # loop counter increment ++-## shr r0, r4, ra17 ; ldtmu0 ++-## shr r1, r4, ra17 ; v8subs r0, r0, rb20 ++-## add t0s, ra16, r5 ; v8subs r1, r1, rb20 ++-## add ra16, ra16, rb16 ; mov t0s, ra16 ++-## ++-## # generate seven shifted versions ++-## # interleave with scroll of vertical context ++-## ++-## mov r2, rb21 ; mul24 r3, r0, ra0 ++-## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-## nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-## nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-## nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-## nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-## nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-## nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-## nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-## sub r2, r2, r3 ; ldtmu0 ++-## ++-## mov r0, ra22 ++-## shr r0, r4, ra17 ; mul24 r2, r2, r0 ; ldtmu0 ++-## shr r1, r4, ra17 ; v8subs r0, r0, rb20 ++-## add t0s, ra16, r5 ; v8subs r1, r1, rb20 ++-## add ra16, ra16, rb16 ; mov t0s, ra16 ++-## ++-## # apply horizontal filter ++-## ++-## asr r2, r2, 15 ; mul24 r3, r0, ra0 ++-## min r2, r2, rb22 ++-## max ra13, r2, 0 ++-## ++-## # generate seven shifted versions ++-## # interleave with scroll of vertical context ++-## ++-## mov r2, rb21 ++-## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-## nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-## nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-## nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-## nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-## nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-## nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-## nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-## sub r0, r2, r3 ++-## ++-## # apply horizontal filter ++-## ++-## nop ; mul24 r0, r0, ra22 # last bit of context scroll, including clamp to zero ++-## asr r0, r0, 15 ++-## min r0, r0, rb22 ++-## max ra14, r0, 0 ++-## ++-## ++-## ++-## ++-## nop ; ldtmu0 # loop counter increment ++-## shr r0, r4, ra17 ; ldtmu0 ++-## shr r1, r4, ra17 ; v8subs r0, r0, rb20 ++-## add t0s, ra16, r5 ; v8subs r1, r1, rb20 ++-## add ra16, ra16, rb16 ; mov t0s, ra16 ++-## ++-## # generate seven shifted versions ++-## # interleave with scroll of vertical context ++-## ++-## mov r2, rb21 ; mul24 r3, r0, ra0 ++-## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-## nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-## nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-## nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-## nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-## nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-## nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-## nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-## sub r0, r2, r3 ++-## ++-## # apply horizontal filter ++-## ++-## nop ; mul24 r0, r0, ra22 # last bit of context scroll, including clamp to zero ++-## asr r0, r0, 15 ++-## min r0, r0, rb22 ++-## max ra15, r0, 0 ++- ++- ++- ++- ++-mov r3, 0 ++- ++-:loop ++-# retrieve texture results and pick out bytes ++-# then submit two more texture requests ++- ++-sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++- ++-max r2, ra_y, 0 # y ++-min r2, r2, rb_frame_height_minus_1 ++-add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_x2_base, r2 ++- ++-# generate seven shifted versions ++-# interleave with scroll of vertical context ++- ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++- ++-mov r2, rb21 ; mul24 r3, r0, ra0 ++-sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-sub r0, r2, r3 ++- ++-mov r3, rb31 ++- ++-mov ra8, ra9 ++-mov ra9, ra10 ++-mov ra10, ra11 ++-mov ra11, ra12 ++-mov ra12, ra13 ++-mov ra13, ra14 ++- ++-sub.setf -, r3, 8 ; mov r1, ra22 ++- ++-# apply horizontal filter ++-brr.anyn -, r:loop ++-max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++-asr r0, r0, 15 ; mov r1, ra21 ++-min.setf ra15, r0, rb22 ++- ++-# apply vertical filter and write to VPM ++- ++-nop ; mul24 r0, ra14, rb14 ++-sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-brr.anyn -, r:loop ++-asr r1, r1, 15 ++-min r1, r1, rb22 ++-max vpm, r1, 0 ++- ++-# DMA out ++- ++-bra -, ra31 ++-mov vw_setup, rb26 # VDW: height rows, 16 8-bit units long ++-mov vw_setup, rb29 ++-mov vw_addr, unif # start the VDW ++- ++-#################################################### ++- ++-:fast_path ++-## nop ; ldtmu0 # loop counter increment ++-## shr r0, r4, ra17 ; ldtmu0 ++-## shr r1, r4, ra17 ; v8subs r0, r0, rb20 ++-## add t0s, ra16, r5 ; v8subs r1, r1, rb20 ++-## add ra16, ra16, rb16 ; mov t0s, ra16 ++-## ++-## # generate seven shifted versions ++-## # interleave with scroll of vertical context ++-## ++-## mov r2, rb21 ; mul24 r3, r0, ra0 ++-## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-## sub r2, r2, r3 ; ldtmu0 ++-## ++-## mov r0, ra22 ++-## shr r0, r4, ra17 ; mul24 r2, r2, r0 ; ldtmu0 ++-## shr r1, r4, ra17 ; v8subs r0, r0, rb20 ++-## add t0s, ra16, r5 ; v8subs r1, r1, rb20 ++-## add ra16, ra16, rb16 ; mov t0s, ra16 ++-## ++-## # apply horizontal filter ++-## ++-## asr r2, r2, 15 ; mul24 r3, r0, ra0 ++-## min r2, r2, rb22 ++-## max ra13, r2, 0 ++-## ++-## # generate seven shifted versions ++-## # interleave with scroll of vertical context ++-## ++-## mov r2, rb21 ++-## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-## sub r0, r2, r3 ++-## ++-## # apply horizontal filter ++-## ++-## nop ; mul24 r0, r0, ra22 # last bit of context scroll, including clamp to zero ++-## asr r0, r0, 15 ++-## min r0, r0, rb22 ++-## max ra14, r0, 0 ++-## ++-## ++-## ++-## ++-## nop ; ldtmu0 # loop counter increment ++-## shr r0, r4, ra17 ; ldtmu0 ++-## shr r1, r4, ra17 ; v8subs r0, r0, rb20 ++-## add t0s, ra16, r5 ; v8subs r1, r1, rb20 ++-## add ra16, ra16, rb16 ; mov t0s, ra16 ++-## ++-## # generate seven shifted versions ++-## # interleave with scroll of vertical context ++-## ++-## mov r2, rb21 ; mul24 r3, r0, ra0 ++-## sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-## sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-## sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-## sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-## sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-## sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-## sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-## sub r0, r2, r3 ++-## ++-## # apply horizontal filter ++-## ++-## nop ; mul24 r0, r0, ra22 # last bit of context scroll, including clamp to zero ++-## asr r0, r0, 15 ++-## min r0, r0, rb22 ++-## max ra15, r0, 0 ++- ++- ++-mov r3, 0 # This signifies the amount of unrolling ++- ++-:fast_loop ++-# retrieve texture results and pick out bytes ++-# then submit two more texture requests ++- ++-# Due to pipelining we can only skip second pipeline instructions related to the fetched pixels ++-sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-mov.ifz ra_y, ra_y_next ; mov rb31, r3 ++-mov.ifz ra_x2_base, ra_x2_base_next ; mov r3, rb_pitch ++- ++-max r2, ra_y, 0 ++-min r2, r2, rb_frame_height_minus_1 ; mov r1, r4 # discard texture read ++-add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_x_base, r2 ; v8subs r0, r0, rb20 ++-add t0s, ra_x2_base, r2 ++- ++-# generate seven shifted versions ++-# interleave with scroll of vertical context ++- ++-mov r2, rb21 ; mul24 r3, r0, ra0 ++-sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-sub r0, r2, r3 ; mov r3, rb31 ++- ++-mov ra8, ra9 ++-mov ra9, ra10 ++-mov ra10, ra11 ++-mov ra11, ra12 ++-mov ra12, ra13 ++-mov ra13, ra14 ++- ++-sub.setf -, r3, 8 ; mov r1, ra22 ++- ++-# apply horizontal filter ++- ++-brr.anyn -, r:fast_loop ++-max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++-asr r0, r0, 15 ; mov r1, ra21 ++-min.setf ra15, r0, rb22 ++- ++-# apply vertical filter and write to VPM ++- ++-nop ; mul24 r0, ra14, rb14 ++-sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-brr.anyn -, r:fast_loop ++-asr r1, r1, 15 ++-min r1, r1, rb22 ++-max vpm, r1, 0 ++- ++-# DMA out ++- ++-bra -, ra31 ++-mov vw_setup, rb26 # VDW: height rows, 16 8-bit units long ++-mov vw_setup, rb29 ++-mov vw_addr, unif # start the VDW ++ ++ ################################################################################ ++ ++-# mc_filter_b(next_kernel, x, y, frame_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_dst) ++- ++-# At this point we have already issued two pairs of texture requests for the current block ++-# ra_x_base, ra_x16_base point to the current coordinates for this block ++-::mc_filter_b +++::mc_filter_uv_b ++ mov ra31, unif ++ ++ # per-channel shifts were calculated on the *previous* invocation ++ ++ mov ra_xshift, ra_xshift_next ++-mov ra_x2shift, ra_x2shift_next ++ ++ # get base addresses and per-channel shifts for *next* invocation ++ add r0, unif, elem_num # x ++-add r2, r0, 8 # x+8 ++ max r0, r0, 0; mov r1, unif # y ++-min r0, r0, rb_frame_width_minus_1 ; mov r3,unif # frame_base +++min r0, r0, rb_frame_width_minus_1 ; mov r3, unif # frame_base ++ shl ra_xshift_next, r0, 3 ++-max r2, r2, 0 ++-min r2, r2, rb_frame_width_minus_1 ++-shl ra_x2shift_next, r2, 3 +++sub r2, unif, r3 # compute offset from frame base u to frame base v ++ add r0, r0, r3 ++-add r2, r2, r3 ++ and rb_x_base_next, r0, ~3 ++-and ra_x2_base_next, r2, ~3 ++ mov ra_y_next, r1 +++add ra_x2_base_next, rb_x_base_next, r2 ++ ++ # set up VPM write ++ mov vw_setup, rb28 ++@@ -801,17 +375,22 @@ and r0, r0, rb22 # Extract height ++ add rb17, r0, 5 ++ add rb18, r0, 7 ++ shl r0, r0, 7 +++ ++ # r0 is currently height<<7 ++ # For vr_setup we want height<<20 (so 20-7=13 additional bits) ++ shl r3, r0, 13 ++ shl r3, r3, 8 # Mask off top 8 bits ++ shr r3, r3, 8 +++ ++ add r0, r0, r1 # Combine width and height of destination area ++ shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++ add rb26, r0, rb27 +++ ++ # In a B frame, so also set up VPM read ++ add vr_setup, r3, rb28 ++ +++sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code +++ ++ # get filter coefficients ++ ++ mov r0, unif ++@@ -837,9 +416,13 @@ asr rb12, r0, rb23 ++ ++ mov r5rep, -8 ++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ ++ mov r3, 0 ++ ++-:bloop +++:uvloop_b ++ # retrieve texture results and pick out bytes ++ # then submit two more texture requests ++ ++@@ -847,7 +430,7 @@ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++ shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++ mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++ ++ max r2, ra_y, 0 # y ++ min r2, r2, rb_frame_height_minus_1 ++@@ -861,6 +444,7 @@ add t0s, ra_x2_base, r2 ++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ ++ mov r2, rb21 ; mul24 r3, r0, ra0 +++nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 ++ sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++ nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++ sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++@@ -889,7 +473,7 @@ mov ra13, ra14 ++ sub.setf -, r3, 8 ; mov r1, ra22 ++ ++ # apply horizontal filter ++-brr.anyn -, r:bloop +++brr.anyn -, r:uvloop_b ++ max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++ asr r0, r0, 15 ; mov r1, ra21 ++ min.setf ra15, r0, rb22 ++@@ -906,213 +490,50 @@ sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++ sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++ sub.ifnn r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-asr r1, r1, 15 ; mov -, vr_wait +++asr r1, r1, 15 ++ min r1, r1, rb22 ++ add r0, vpm, 1 # Blend in previous VPM contents at this location ++-brr.anyn -, r:bloop +++brr.anyn -, r:uvloop_b ++ max r1, r1, 0 ++ add r1, r1, r0 ++ shr vpm, r1, 1 ++ ++-# DMA out +++ +++# DMA out for U +++ +++mov vw_setup, rb26 # VDW setup 0 +++mov vw_setup, rb29 # Stride +++mov vw_addr, unif # start the VDW +++ +++# DMA out for V +++# We need to wait for the U to complete first, but have nothing useful to compute while we wait. +++# Could potentially push this write into the start of the next pipeline stage. +++mov r0, 16 +++mov -, vw_wait ++ ++ bra -, ra31 ++-mov vw_setup, rb26 # VDW: height rows, 16 8-bit units long ++-mov vw_setup, rb29 +++add vw_setup, rb26, r0 # VDW setup 0 +++mov vw_setup, rb29 # Stride ++ mov vw_addr, unif # start the VDW ++ ++ ################################################################################ ++ ++-# mc_filter_honly(next_kernel, x, y, frame_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_dst) ++-# This filter only does horizontal filtering. ++-# It is assumed that the region to fetch does not include extra rows above. +++# mc_exit() ++ ++-# At this point we have already issued two pairs of texture requests for the current block ++-# ra_x_base, ra_x16_base point to the current coordinates for this block ++-::mc_filter_honly ++-mov ra31, unif +++::mc_exit +++mov -, vw_wait # wait on the VDW ++ ++-# per-channel shifts were calculated on the *previous* invocation +++mov -,srel(0) ++ ++-mov ra_xshift, ra_xshift_next ++-mov ra_x2shift, ra_x2shift_next ++- ++-# get base addresses and per-channel shifts for *next* invocation ++-add r0, unif, elem_num # x ++-add r2, r0, 8 # x+8 ++-max r0, r0, 0; mov r1, unif # y ++-min r0, r0, rb_frame_width_minus_1 ; mov r3,unif # frame_base ++-shl ra_xshift_next, r0, 3 ++-max r2, r2, 0 ++-min r2, r2, rb_frame_width_minus_1 ++-shl ra_x2shift_next, r2, 3 ++-add r0, r0, r3 ++-add r2, r2, r3 ++-and rb_x_base_next, r0, ~3 ++-and ra_x2_base_next, r2, ~3 ++-mov ra_y_next, r1 ++- ++-# set up VPM write ++-mov vw_setup, rb28 ++- ++-# get width,height of block ++-mov r2, 16 ++-mov r0, unif ++-shr r1, r0, r2 # Extract width ++-sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) ++-and r0, r0, rb22 # Extract height ++-add rb17, r0, -2 # Pipelining means we move data across 2 iterations early ++-shl r0, r0, 7 ; mov rb18,r0 ++-add r0, r0, r1 # Combine width and height of destination area ++-shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++-add rb26, r0, rb27 ++- ++-# get filter coefficients ++- ++-mov r0, unif ++-asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-asr ra0, r0, rb23; mov r0, unif ++-asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-asr ra4, r0, rb23; mov r0, unif ++-mov r0, unif ++- ++-# r2 is elem_num ++-# r3 is loop counter ++-mov r5rep, -8 ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] # delay slot 3 ++-mov r3, 0 ++- ++-:loop_honly ++-# retrieve texture results and pick out bytes ++-# then submit two more texture requests ++- ++-sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-shr r1, r4, ra_x2shift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++- ++-max r2, ra_y, 0 # y ++-min r2, r2, rb_frame_height_minus_1 ++-add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_x2_base, r2 ++- ++-# generate seven shifted versions ++-# interleave with scroll of vertical context ++- ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++- ++-mov r2, rb21 ; mul24 r3, r0, ra0 ++-sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-sub r0, r2, r3 ; mov r3, rb31 ++- ++-sub.setf -, r3, rb18 ; mov r1, ra22 ++- ++-mov -, vw_wait ; mul24 r0, r0, r1 ++-brr.anyn -, r:loop_honly ++-asr r0, r0, 15 # delay 1 ++-min r0, r0, rb22 # delay 2 ++-max vpm, r0, 0 # delay 3 ++- ++-# DMA out ++-bra -, ra31 ++-mov vw_setup, rb26 # VDW: height rows, 16 8-bit units long ++-mov vw_setup, rb29 ++-mov vw_addr, unif # start the VDW ++- ++- ++-################################################################################ ++- ++-# mc_exit() ++- ++-::mc_exit ++-mov -, vw_wait # wait on the VDW ++- ++-mov -,srel(0) ++- ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ldtmu0 ++ ++ nop ; nop ; thrend ++ nop ; nop # delay slot 1 ++ nop ; nop # delay slot 2 ++ ++-::mc_exit1 ++-mov -, vw_wait # wait on the VDW ++- ++-#mov -,srel(1) ++- ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++- ++-nop ; nop ; thrend ++-mov interrupt, 1; nop # delay slot 1 ++-nop ; nop # delay slot 2 ++- ++-# mc_interrupt_exit() ++-::mc_interrupt_exit ++-mov -, vw_wait # wait on the VDW ++- ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++- ++-mov -,sacq(0) # 1 ++-mov -,sacq(0) # 2 ++-mov -,sacq(0) # 3 ++-mov -,sacq(0) # 4 ++-mov -,sacq(0) # 5 ++-mov -,sacq(0) # 6 ++-mov -,sacq(0) # 7 ++-mov -,sacq(0) # 8 ++-mov -,sacq(0) # 9 ++-mov -,sacq(0) # 10 ++-mov -,sacq(0) # 11 ++- ++-nop ; nop ; thrend ++-mov interrupt, 1; nop # delay slot 1 ++-nop ; nop # delay slot 2 ++- ++-# mc_interrupt_exit4() ++-::mc_interrupt_exit4 ++-mov -, vw_wait # wait on the VDW ++- ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++- ++-mov -,sacq(0) # 1 ++-mov -,sacq(0) # 2 ++-mov -,sacq(0) # 3 ++- ++-nop ; nop ; thrend ++-mov interrupt, 1; nop # delay slot 1 ++-nop ; nop # delay slot 2 ++- ++ # mc_interrupt_exit8() ++ ::mc_interrupt_exit8 ++ mov -, vw_wait # wait on the VDW ++@@ -1134,282 +555,5 @@ nop ; nop ; thrend ++ mov interrupt, 1; nop # delay slot 1 ++ nop ; nop # delay slot 2 ++ ++-################################################################################ ++-# mc_setup_uv(next_kernel, x, y, ref_u_base, ref_v_base, frame_width, frame_height, pitch, dst_pitch, pad0, pad1, pad2) ++-::mc_setup_uv ++- ++-# Read starting kernel ++-mov ra31, unif ++- ++-# Load first request location ++-add ra_x_base, unif, elem_num # Store x ++-mov ra_y, unif # Store y ++-mov ra_x2_base, unif # Store frame u base ++-nop ++-sub ra_u2v_ref_offset, unif, ra_x2_base # Store offset to add to move from u to v in reference frame ++- ++-# Read image dimensions ++-sub rb25,unif,1 ++-sub rb30,unif,1 ++- ++-# get source pitch ++-mov rb16, unif ++- ++-# get destination pitch ++-mov r0, unif ++-mov r1, vdw_setup_1(0) ++-add rb24, r1, r0 ++- ++-# load constants ++- ++-mov ra20, 1 ++-mov ra21, 32 ++-mov ra22, 256 ++-mov ra23, 8 ++- ++-mov rb20, 0xffffff00 ++-mov rb21, 64 ++-mov rb22, 255 ++-mov rb23, 24 ++- ++-# touch vertical context to keep simulator happy ++- ++-mov ra8, 0 ++-mov ra9, 0 ++-mov ra10, 0 ++-mov ra11, 0 ++-mov ra12, 0 ++-mov ra13, 0 ++-mov ra14, 0 ++-mov ra15, 0 ++- ++-# Compute part of VPM to use for DMA output ++-mov r2, qpu_num ++-and r2, r2, 15 ++-mov r1, r2 ++-asr r1, r1, 2 ++-shl r1, r1, 6 ++-mov r0, r2 ++-and r0, r0, 3 ++-add r0, r0, r1 ++-mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later ++-shl r0, r0, 5 ++-add rb27, r0, r1 ++- ++-# Compute part of VPM to save data into ++-mov r2, qpu_num ++-and r2, r2, 15 ++-mov r1, r2 ++-asr r1, r1, 2 ++-shl r1, r1, 6 ++-mov r0, r2 ++-and r0, r0, 3 ++-add r0, r0, r1 ++-mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-add rb28, r0, r1 ++- ++-# Compute base address for first and second access ++-mov r0, ra_x_base # Load x ++-max r0, r0, 0; mov r1, ra_y # Load y ++-min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base # Load the frame base ++-shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++-add ra_y, r1, 1 ++-add r0, r0, r3 ++-and r0, r0, ~3 ++-max r1, r1, 0 ; mov ra_x_base, r0 # y ++-min r1, r1, rb_frame_height_minus_1 ++-# submit texture requests for first line ++-add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-add t0s, r0, r1 ; mov ra_x2_base, r2 ++-add t0s, r2, r1 ++- ++-# Dump padding words ++-mov r0, unif ++-mov r0, unif ++-mov r0, unif ++- ++-# submit texture requests for second line ++-max r1, ra_y, 0 ++-min r1, r1, rb_frame_height_minus_1 ++-add ra_y, ra_y, 1 ++-bra -, ra31 ++-nop ; mul24 r1, r1, rb_pitch ++-add t0s, r1, ra_x_base ++-add t0s, r1, ra_x2_base ++- ++- ++- ++-################################################################################ ++- ++-::mc_filter_uv_b ++-mov ra31, unif ++- ++-# per-channel shifts were calculated on the *previous* invocation ++- ++-mov ra_xshift, ra_xshift_next ++- ++-# get base addresses and per-channel shifts for *next* invocation ++-add r0, unif, elem_num # x ++-max r0, r0, 0; mov r1, unif # y ++-min r0, r0, rb_frame_width_minus_1 ; mov r3, unif # frame_base ++-shl ra_xshift_next, r0, 3 ++-sub r2, unif, r3 # compute offset from frame base u to frame base v ++-add r0, r0, r3 ++-and rb_x_base_next, r0, ~3 ++-mov ra_y_next, r1 ++-add ra_x2_base_next, rb_x_base_next, r2 ++- ++-# set up VPM write ++-mov vw_setup, rb28 ++- ++-# get width,height of block ++-mov r2, 16 ++-mov r0, unif ++-shr r1, r0, r2 # Extract width ++-sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) ++-and r0, r0, rb22 # Extract height ++-add rb17, r0, 5 ++-add rb18, r0, 7 ++-shl r0, r0, 7 ++- ++-# r0 is currently height<<7 ++-# For vr_setup we want height<<20 (so 20-7=13 additional bits) ++-shl r3, r0, 13 ++-shl r3, r3, 8 # Mask off top 8 bits ++-shr r3, r3, 8 ++- ++-add r0, r0, r1 # Combine width and height of destination area ++-shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++-add rb26, r0, rb27 ++- ++-# In a B frame, so also set up VPM read ++-add vr_setup, r3, rb28 ++- ++-sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code ++- ++-# get filter coefficients ++- ++-mov r0, unif ++-asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-asr ra0, r0, rb23; mov r0, unif ++-asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-asr ra4, r0, rb23; mov r0, unif ++-asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-asr rb8, r0, rb23; mov r0, unif ++-asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-asr rb12, r0, rb23 ++- ++-# r2 is elem_num ++-# r3 is loop counter ++- ++-mov r5rep, -8 ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++- ++-# retrieve texture results and pick out bytes ++-# then submit two more texture requests ++- ++-mov r3, 0 ++- ++-:uvloop_b ++-# retrieve texture results and pick out bytes ++-# then submit two more texture requests ++- ++-sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++- ++-max r2, ra_y, 0 # y ++-min r2, r2, rb_frame_height_minus_1 ++-add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_x2_base, r2 ++- ++-# generate seven shifted versions ++-# interleave with scroll of vertical context ++- ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++- ++-mov r2, rb21 ; mul24 r3, r0, ra0 ++-nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 ++-sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 ++-nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-sub r0, r2, r3 ++- ++-mov r3, rb31 ++- ++-mov ra8, ra9 ++-mov ra9, ra10 ++-mov ra10, ra11 ++-mov ra11, ra12 ++-mov ra12, ra13 ++-mov ra13, ra14 ++- ++-sub.setf -, r3, 8 ; mov r1, ra22 ++- ++-# apply horizontal filter ++-brr.anyn -, r:uvloop_b ++-max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++-asr r0, r0, 15 ; mov r1, ra21 ++-min.setf ra15, r0, rb22 ++- ++-# apply vertical filter and write to VPM ++- ++-nop ; mul24 r0, ra14, rb14 ++-sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-sub.ifnn r1, r1, r0 ; mov -, vw_wait ++-sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-asr r1, r1, 15 ++-min r1, r1, rb22 ++-add r0, vpm, 1 # Blend in previous VPM contents at this location ++-brr.anyn -, r:uvloop_b ++-max r1, r1, 0 ++-add r1, r1, r0 ++-shr vpm, r1, 1 ++- ++- ++-# DMA out for U ++- ++-mov vw_setup, rb26 # VDW setup 0 ++-mov vw_setup, rb29 # Stride ++-mov vw_addr, unif # start the VDW ++- ++-# DMA out for V ++-# We need to wait for the U to complete first, but have nothing useful to compute while we wait. ++-# Could potentially push this write into the start of the next pipeline stage. ++-mov r0, 16 ++-mov -, vw_wait ++- ++-bra -, ra31 ++-add vw_setup, rb26, r0 # VDW setup 0 ++-mov vw_setup, rb29 # Stride ++-mov vw_addr, unif # start the VDW ++- ++ ::mc_end +++# Do not add code here because mc_end must appear after all other code. ++-- ++2.7.4 ++ ++ ++From d40d59de0f09fd1a6e7146532418b63d8e2711b7 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 13 May 2015 14:54:25 +0100 ++Subject: [PATCH 20/68] Moved chroma P1 to QPUs ++ ++--- ++ libavcodec/hevc.c | 38 ++++++++++++++++++++++++++++++++++++++ ++ 1 file changed, 38 insertions(+) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 31b8b2f..391d139 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2070,6 +2070,44 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ s->sh.luma_offset_l1[current_mv.ref_idx[1]]); ++ ++ if (s->ps.sps->chroma_format_idc) { +++#ifdef RPI_INTER_QPU +++ if (s->enable_rpi) { +++ int reflist = 1; +++ int hshift = s->ps.sps->hshift[1]; +++ int vshift = s->ps.sps->vshift[1]; +++ const Mv *mv = ¤t_mv.mv[reflist]; +++ intptr_t mx = av_mod_uintp2(mv->x, 2 + hshift); +++ intptr_t my = av_mod_uintp2(mv->y, 2 + vshift); +++ intptr_t _mx = mx << (1 - hshift); +++ intptr_t _my = my << (1 - vshift); // Fractional part of motion vector +++ +++ int x1_c = x0_c + (mv->x >> (2 + hshift)); +++ int y1_c = y0_c + (mv->y >> (2 + hshift)); +++ //int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width +++ int chan = x0>>8; +++ +++ uint32_t *u = s->u_mvs[chan & 7]; +++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { +++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { +++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 3 + start_x; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 3 + start_y; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); +++ *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); +++ // TODO chroma weight and offset... s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] +++ *u++ = rpi_filter_coefs[_mx][0]; +++ *u++ = rpi_filter_coefs[_mx][1]; +++ *u++ = rpi_filter_coefs[_my][0]; +++ *u++ = rpi_filter_coefs[_my][1]; +++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); +++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); +++ } +++ } +++ s->u_mvs[chan & 7] = u; +++ return; +++ } +++#endif ++ RPI_REDIRECT(chroma_mc_uni)(s, dst1, s->frame->linesize[1], ref1->frame->data[1], ref1->frame->linesize[1], ++ 1, x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, ++ s->sh.chroma_weight_l1[current_mv.ref_idx[1]][0], s->sh.chroma_offset_l1[current_mv.ref_idx[1]][0]); ++-- ++2.7.4 ++ ++ ++From 75777ba7927086e862104b14f6446e81bc789611 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 13 May 2015 15:13:47 +0100 ++Subject: [PATCH 21/68] Added B prediction - not quite right ++ ++--- ++ libavcodec/hevc.c | 58 ++++++++++++++++++++++++ ++ libavcodec/rpi_shader.c | 108 +++++++++++++++++++++++---------------------- ++ libavcodec/rpi_shader.h | 6 +-- ++ libavcodec/rpi_shader.qasm | 48 ++++++++++---------- ++ 4 files changed, 141 insertions(+), 79 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 391d139..47ddfff 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2127,6 +2127,64 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ ref1->frame, ¤t_mv.mv[1], ¤t_mv); ++ ++ if (s->ps.sps->chroma_format_idc) { +++#ifdef RPI_INTER_QPU +++ if (s->enable_rpi) { +++ int hshift = s->ps.sps->hshift[1]; +++ int vshift = s->ps.sps->vshift[1]; +++ const Mv *mv = ¤t_mv.mv[0]; +++ intptr_t mx = av_mod_uintp2(mv->x, 2 + hshift); +++ intptr_t my = av_mod_uintp2(mv->y, 2 + vshift); +++ intptr_t _mx = mx << (1 - hshift); +++ intptr_t _my = my << (1 - vshift); // Fractional part of motion vector +++ int x1_c = x0_c + (mv->x >> (2 + hshift)); +++ int y1_c = y0_c + (mv->y >> (2 + hshift)); +++ +++ const Mv *mv2 = ¤t_mv.mv[1]; +++ intptr_t mx2 = av_mod_uintp2(mv2->x, 2 + hshift); +++ intptr_t my2 = av_mod_uintp2(mv2->y, 2 + vshift); +++ intptr_t _mx2 = mx2 << (1 - hshift); +++ intptr_t _my2 = my2 << (1 - vshift); // Fractional part of motion vector +++ +++ int x2_c = x0_c + (mv2->x >> (2 + hshift)); +++ int y2_c = y0_c + (mv2->y >> (2 + hshift)); +++ +++ int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width +++ +++ uint32_t *u = s->u_mvs[chan & 7]; +++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { +++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { +++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 3 + start_x; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 3 + start_y; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); +++ *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); +++ *u++ = rpi_filter_coefs[_mx][0]; +++ *u++ = rpi_filter_coefs[_mx][1]; +++ *u++ = rpi_filter_coefs[_my][0]; +++ *u++ = rpi_filter_coefs[_my][1]; +++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); // TODO this will become unused once we have a dedicated pass0 filter +++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); +++ +++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv_b; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = x2_c - 3 + start_x; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = y2_c - 3 + start_y; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); +++ *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); +++ // TODO chroma weight and offset... s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] +++ *u++ = rpi_filter_coefs[_mx2][0]; +++ *u++ = rpi_filter_coefs[_mx2][1]; +++ *u++ = rpi_filter_coefs[_my2][0]; +++ *u++ = rpi_filter_coefs[_my2][1]; +++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); +++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); +++ } +++ } +++ s->u_mvs[chan & 7] = u; +++ return; +++ } +++#endif ++ RPI_REDIRECT(chroma_mc_bi)(s, dst1, s->frame->linesize[1], ref0->frame, ref1->frame, ++ x0_c, y0_c, nPbW_c, nPbH_c, ¤t_mv, 0); ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 170e8ac..5d00cb2 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -265,23 +265,23 @@ unsigned int rpi_shader[] = { ++ /* [0x00000760] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++ /* [0x00000768] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++ /* [0x00000770] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000778] */ 0x55015fc6, 0x100248a3, // mov r2, rb21 ; mul24 r3, r0, ra0 ++-/* [0x00000780] */ 0x40038031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 ++-/* [0x00000788] */ 0x4d07f4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000778] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000780] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000788] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++ /* [0x00000790] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000798] */ 0x4d0be4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000798] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ /* [0x000007a0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000007a8] */ 0x4d0fd4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000007a8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ /* [0x000007b0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000007b8] */ 0x4d13c4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x000007b8] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++ /* [0x000007c0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x000007c8] */ 0x4d17b4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x000007c8] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++ /* [0x000007d0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x000007d8] */ 0x4d1ba4f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x000007d8] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++ /* [0x000007e0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x000007e8] */ 0x4d1f94f0, 0xd00248a3, // sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x000007e8] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++ /* [0x000007f0] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x000007f8] */ 0x0d9e74c0, 0x10020827, // sub r0, r2, r3 +++/* [0x000007f8] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++ /* [0x00000800] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++ /* [0x00000808] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++ /* [0x00000810] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++@@ -291,61 +291,63 @@ unsigned int rpi_shader[] = { ++ /* [0x00000830] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++ /* [0x00000838] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++ /* [0x00000840] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000848] */ 0x533c0dc1, 0xd00243a0, // max ra14, ra15, 0 ; mul24 r0, r0, r1 ++-/* [0x00000850] */ 0x8f54f1f6, 0xd0024821, // asr r0, r0, 15 ; mov r1, ra21 ++-/* [0x00000858] */ 0x129d61c0, 0x100223e7, // min.setf ra15, r0, rb22 ++-/* [0x00000860] */ 0x4038e037, 0x100049e0, // nop ; mul24 r0, ra14, rb14 ++-/* [0x00000868] */ 0x4d34d237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-/* [0x00000870] */ 0x4d30c237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000878] */ 0x4d2cb237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000880] */ 0x4d28a237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000888] */ 0x4d249237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000890] */ 0x4d208237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000898] */ 0x4d3cf237, 0x10024860, // sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x000008a0] */ 0x8d9f223f, 0x100a0867, // sub.ifnn r1, r1, r0 ; mov -, vw_wait +++/* [0x00000848] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x00000850] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x00000858] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000860] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x00000868] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x00000870] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000878] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000880] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000888] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000890] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000898] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x000008a0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++ /* [0x000008a8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000008b0] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 ++-/* [0x000008b8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000008c0] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 ++-/* [0x000008c8] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000008d0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x000008d8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x000008e0] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 ++-/* [0x000008e8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000008f0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000008f8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000900] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000908] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000910] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000918] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000920] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000928] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008b0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000008b8] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x000008c0] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x000008c8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000008d0] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 +++/* [0x000008d8] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000008e0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x000008e8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x000008f0] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 +++/* [0x000008f8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000900] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000908] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000910] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000918] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000920] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000928] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000930] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000938] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000930] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000938] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000940] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000948] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000940] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000948] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++ /* [0x00000950] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000958] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000960] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000968] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000970] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000960] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000968] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000970] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000978] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000980] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000978] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000980] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000988] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000988] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++ /* [0x00000990] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000998] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000009a0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009a8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009a0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000009a8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x000009b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x000009b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x000009c0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x000009c8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x000009d0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009d8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000009e0] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x000009e8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000009d8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009e0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000009e8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000009f0] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000009f8] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 9de4535..e36c4ae 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -6,8 +6,8 @@ extern unsigned int rpi_shader[]; ++ #define mc_setup_uv (rpi_shader + 0) ++ #define mc_filter_uv (rpi_shader + 142) ++ #define mc_filter_uv_b (rpi_shader + 360) ++-#define mc_exit (rpi_shader + 588) ++-#define mc_interrupt_exit8 (rpi_shader + 606) ++-#define mc_end (rpi_shader + 636) +++#define mc_exit (rpi_shader + 592) +++#define mc_interrupt_exit8 (rpi_shader + 610) +++#define mc_end (rpi_shader + 640) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index cd7346d..870437d2 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -443,23 +443,23 @@ add t0s, ra_x2_base, r2 ++ ++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ ++-mov r2, rb21 ; mul24 r3, r0, ra0 ++-nop ; mul24.ifnz r3, ra0 << 8, r1 << 8 ++-sub r2, r2, r3 ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24 r2, r0, ra0 +++nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++nop ; mul24 r3, ra1 << 1, r0 << 1 ++ nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-sub r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-sub r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-sub r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++ nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-sub r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++ nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-sub r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++ nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-sub r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++ nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-sub r0, r2, r3 +++add r0, r2, r3 ++ ++ mov r3, rb31 ++ ++@@ -474,23 +474,25 @@ sub.setf -, r3, 8 ; mov r1, ra22 ++ ++ # apply horizontal filter ++ brr.anyn -, r:uvloop_b ++-max ra14, ra15, 0 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++-asr r0, r0, 15 ; mov r1, ra21 ++-min.setf ra15, r0, rb22 +++mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero +++asr ra15, r0, 8 ; nop +++nop ; nop ++ ++ # apply vertical filter and write to VPM ++ ++-nop ; mul24 r0, ra14, rb14 ++-sub r1, r1, r0 ; mul24 r0, ra13, rb13 ++-sub r1, r1, r0 ; mul24 r0, ra12, rb12 ++-sub r1, r1, r0 ; mul24 r0, ra11, rb11 ++-sub r1, r1, r0 ; mul24 r0, ra10, rb10 ++-sub r1, r1, r0 ; mul24 r0, ra9, rb9 ++-sub r1, r1, r0 ; mul24 r0, ra8, rb8 ++-sub r1, r1, r0 ; mul24 r0, ra15, rb15 ++-sub.ifnn r1, r1, r0 ; mov -, vw_wait +++nop ; mul24 r1, ra14, rb14 +++nop ; mul24 r0, ra13, rb13 +++add r1, r1, r0 ; mul24 r0, ra12, rb12 +++add r1, r1, r0 ; mul24 r0, ra11, rb11 +++add r1, r1, r0 ; mul24 r0, ra10, rb10 +++add r1, r1, r0 ; mul24 r0, ra9, rb9 +++add r1, r1, r0 ; mul24 r0, ra8, rb8 +++add r1, r1, r0 ; mul24 r0, ra15, rb15 +++add r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-asr r1, r1, 15 +++asr r1, r1, 14 +++add r1, r1, ra21 +++asr r1, r1, 6 ++ min r1, r1, rb22 ++ add r0, vpm, 1 # Blend in previous VPM contents at this location ++ brr.anyn -, r:uvloop_b ++-- ++2.7.4 ++ ++ ++From 3d4e94b8f0b08fe4c0b582fc7f1dbe9d1d9d60ed Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 14 May 2015 08:15:55 +0100 ++Subject: [PATCH 22/68] Added flush for SAO ++ ++--- ++ libavcodec/hevc.c | 2 +- ++ libavcodec/hevc_filter.c | 39 ++++++++++++++++++++++++++------------- ++ 2 files changed, 27 insertions(+), 14 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 47ddfff..93e1eba 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2903,7 +2903,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ rpi_execute_inter_qpu(s); ++ #endif ++ // Transform all blocks ++- //printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); +++ // printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); ++ rpi_execute_transform(s); ++ // Perform inter prediction ++ rpi_execute_inter_cmds(s); ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 9b6e26d..92a8271 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -871,6 +871,21 @@ static void flush_buffer(AVBufferRef *bref) { ++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++ gpu_cache_flush(p); ++ } +++ +++static void ff_hevc_flush_chroma(HEVCContext *s) +++{ +++ if (s->enable_rpi && !( s->nal_unit_type == NAL_TRAIL_N || +++ s->nal_unit_type == NAL_TSA_N || +++ s->nal_unit_type == NAL_STSA_N || +++ s->nal_unit_type == NAL_RADL_N || +++ s->nal_unit_type == NAL_RASL_N )) { +++ flush_buffer(s->frame->buf[1]); +++ flush_buffer(s->frame->buf[2]); +++ //memcpy(s->dummy.arm,s->frame->data[0],2048*64); +++ //memcpy(s->dummy.arm,s->frame->data[1],1024*32); +++ //memcpy(s->dummy.arm,s->frame->data[2],1024*32); +++ } +++} ++ #endif ++ ++ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++@@ -886,31 +901,29 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ sao_filter_CTB(s, x - ctb_size, y); ++ if (y && x_end) { ++ sao_filter_CTB(s, x, y - ctb_size); ++- if (s->threads_type & FF_THREAD_FRAME ) +++ if (s->threads_type & FF_THREAD_FRAME ) { +++#ifdef RPI_INTER_QPU +++ ff_hevc_flush_chroma(s); +++#endif ++ ff_thread_report_progress(&s->ref->tf, y, 0); +++ } ++ } ++ if (x_end && y_end) { ++ sao_filter_CTB(s, x , y); ++- if (s->threads_type & FF_THREAD_FRAME ) +++ if (s->threads_type & FF_THREAD_FRAME ) { +++#ifdef RPI_INTER_QPU +++ ff_hevc_flush_chroma(s); +++#endif ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size, 0); +++ } ++ } ++ } else if (s->threads_type & FF_THREAD_FRAME && x_end) { ++ //int newh = y + ctb_size - 4; ++ //int currh = s->ref->tf.progress->data[0]; ++ //if (((y + ctb_size)&63)==0) ++- if (!( s->nal_unit_type == NAL_TRAIL_N || ++- s->nal_unit_type == NAL_TSA_N || ++- s->nal_unit_type == NAL_STSA_N || ++- s->nal_unit_type == NAL_RADL_N || ++- s->nal_unit_type == NAL_RASL_N )) { ++ #ifdef RPI_INTER_QPU ++- flush_buffer(s->frame->buf[1]); ++- flush_buffer(s->frame->buf[2]); +++ ff_hevc_flush_chroma(s); ++ #endif ++- //memcpy(s->dummy.arm,s->frame->data[0],2048*64); ++- //memcpy(s->dummy.arm,s->frame->data[1],1024*32); ++- //memcpy(s->dummy.arm,s->frame->data[2],1024*32); ++- } ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); ++ } ++ } ++-- ++2.7.4 ++ ++ ++From 3e337b9c4ef0c356a0259be2254ad1bc4d5bbe29 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 14 May 2015 09:17:28 +0100 ++Subject: [PATCH 23/68] Stopped using acceleration in unsupported cases ++ ++--- ++ libavcodec/hevc.c | 14 +++++++------- ++ libavcodec/hevc_cabac.c | 4 ++-- ++ 2 files changed, 9 insertions(+), 9 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 93e1eba..bfd5a55 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -1152,15 +1152,11 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ for (i = 0; i < (size * size); i++) { ++ coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3); ++ } ++- printf("Cross component not supported\n"); // TODO ++- exit(-1); ++ s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride); ++ } ++ } ++ ++ if (lc->tu.cross_pf) { ++- printf("Cross component not supported\n"); // TODO ++- exit(-1); ++ hls_cross_component_pred(s, 1); ++ } ++ for (i = 0; i < (s->ps.sps->chroma_format_idc == 2 ? 2 : 1); i++) { ++@@ -1189,8 +1185,6 @@ static int hls_transform_unit(HEVCContext *s, int x0, int y0, ++ for (i = 0; i < (size * size); i++) { ++ coeffs[i] = ((lc->tu.res_scale_val * coeffs_y[i]) >> 3); ++ } ++- printf("Cross component not supported\n"); // TODO ++- exit(-1); ++ s->hevcdsp.transform_add[log2_trafo_size_c-2](dst, coeffs, stride); ++ } ++ } ++@@ -2857,7 +2851,13 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs]; ++ ++ #ifdef RPI ++- s->enable_rpi = 1; // TODO this should depend on cross component and frame width etc. +++ s->enable_rpi = s->ps.sps->bit_depth == 8 +++ && s->ps.sps->width <= RPI_MAX_WIDTH +++ && !s->ps.pps->cross_component_prediction_enabled_flag +++ && s->ps.pps->num_tile_rows <= 1 && s->ps.pps->num_tile_columns <= 1 +++ && !(s->ps.pps->weighted_pred_flag && s->sh.slice_type == P_SLICE) +++ && !(s->ps.pps->weighted_bipred_flag && s->sh.slice_type == B_SLICE); +++ ++ #endif ++ ++ if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) { ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index 4f072be..38f53de 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1513,9 +1513,9 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ #ifdef RPI ++ if (!use_vpu) { ++ int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y); ++- if (max_xy == 0) +++ if (max_xy == 0) { ++ s->hevcdsp.idct_dc[log2_trafo_size-2](coeffs); ++- else { +++ } else { ++ int col_limit = last_significant_coeff_x + last_significant_coeff_y + 4; ++ if (max_xy < 4) ++ col_limit = FFMIN(4, col_limit); ++-- ++2.7.4 ++ ++ ++From 3941d3e4c2305fa037e8aba5a14cf698ac8673db Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 14 May 2015 09:42:16 +0100 ++Subject: [PATCH 24/68] Split B prediction into two passes ++ ++--- ++ libavcodec/hevc.c | 1 + ++ libavcodec/hevc.h | 1 + ++ libavcodec/rpi_qpu.c | 3 + ++ libavcodec/rpi_qpu.h | 1 + ++ libavcodec/rpi_shader.c | 559 +++++++++++++++++++++++++++------------------ ++ libavcodec/rpi_shader.h | 11 +- ++ libavcodec/rpi_shader.qasm | 196 ++++++++++++++-- ++ 7 files changed, 531 insertions(+), 241 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index bfd5a55..4b133d2 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -3801,6 +3801,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ p += uv_commands_per_qpu; ++ } ++ s->mc_filter_uv = qpu_get_fn(QPU_MC_FILTER_UV); +++ s->mc_filter_uv_b0 = qpu_get_fn(QPU_MC_FILTER_UV_B0); ++ s->mc_filter_uv_b = qpu_get_fn(QPU_MC_FILTER_UV_B); ++ ++ } ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index d513579..4a39e39 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -917,6 +917,7 @@ typedef struct HEVCContext { ++ uint32_t *u_mvs[8]; ++ // Function pointers ++ uint32_t mc_filter_uv; +++ uint32_t mc_filter_uv_b0; ++ uint32_t mc_filter_uv_b; ++ #endif ++ ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 4e90cc1..60bf079 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -636,6 +636,9 @@ unsigned int qpu_get_fn(int num) { ++ case QPU_MC_FILTER_UV: ++ fn = mc_filter_uv; ++ break; +++ case QPU_MC_FILTER_UV_B0: +++ fn = mc_filter_uv_b0; +++ break; ++ case QPU_MC_FILTER_UV_B: ++ fn = mc_filter_uv_b; ++ break; ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index f9ad333..543c84b 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -29,6 +29,7 @@ enum { ++ QPU_MC_FILTER_HONLY, ++ QPU_MC_SETUP_UV, ++ QPU_MC_FILTER_UV, +++ QPU_MC_FILTER_UV_B0, ++ QPU_MC_FILTER_UV_B, ++ QPU_MC_INTERRUPT_EXIT8, ++ QPU_MC_END ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 5d00cb2..88ad20b 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -39,18 +39,18 @@ unsigned int rpi_shader[] = { ++ /* [0x00000070] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++ /* [0x00000078] */ 0x00000008, 0xe00205e7, // mov ra23, 8 ++ /* [0x00000080] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++-/* [0x00000088] */ 0x00000040, 0xe0021567, // mov rb21, 64 ++-/* [0x00000090] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 ++-/* [0x00000098] */ 0x00000018, 0xe00215e7, // mov rb23, 24 ++-/* [0x000000a0] */ 0x00000000, 0xe0020227, // mov ra8, 0 ++-/* [0x000000a8] */ 0x00000000, 0xe0020267, // mov ra9, 0 ++-/* [0x000000b0] */ 0x00000000, 0xe00202a7, // mov ra10, 0 ++-/* [0x000000b8] */ 0x00000000, 0xe00202e7, // mov ra11, 0 ++-/* [0x000000c0] */ 0x00000000, 0xe0020327, // mov ra12, 0 ++-/* [0x000000c8] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++-/* [0x000000d0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++-/* [0x000000d8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x000000e0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000088] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x00000090] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x00000098] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x000000a0] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x000000a8] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x000000b0] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x000000b8] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x000000c0] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x000000c8] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x000000d0] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x000000d8] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x000000e0] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 ++ /* [0x000000e8] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++ /* [0x000000f0] */ 0x159e7480, 0x10020867, // mov r1, r2 ++ /* [0x000000f8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++@@ -62,176 +62,176 @@ unsigned int rpi_shader[] = { ++ /* [0x00000128] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++ /* [0x00000130] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++ /* [0x00000138] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000140] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00000148] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000150] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000158] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000160] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000168] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000170] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000178] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x00000180] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x00000188] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base ++-/* [0x00000190] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y ++-/* [0x00000198] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base ++-/* [0x000001a0] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++-/* [0x000001a8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x000001b0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000001b8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000001c0] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 ++-/* [0x000001c8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x000001d0] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-/* [0x000001d8] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 ++-/* [0x000001e0] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x000001e8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000001f0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000001f8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000200] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000208] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000210] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000218] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000220] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000228] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base ++-/* [0x00000230] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++/* [0x00000140] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 +++/* [0x00000148] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00000150] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000158] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000160] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000168] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000170] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000178] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000180] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000188] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000190] */ 0x0f9c11c0, 0xd0020827, // asr r0, r0, 1 +++/* [0x00000198] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) +++/* [0x000001a0] */ 0x0c9e7040, 0x10021567, // add rb21, r0, r1 +++/* [0x000001a8] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base +++/* [0x000001b0] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x000001b8] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base +++/* [0x000001c0] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset +++/* [0x000001c8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x000001d0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000001d8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000001e0] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 +++/* [0x000001e8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x000001f0] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++/* [0x000001f8] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 +++/* [0x00000200] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 +++/* [0x00000208] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000210] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000218] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000220] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000228] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000230] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000238] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000240] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000248] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base +++/* [0x00000250] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base ++ // ::mc_filter_uv ++-/* [0x00000238] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000240] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000248] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000250] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000258] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000260] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000268] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000270] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000278] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000280] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000288] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000290] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000298] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000002a0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000002a8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000002b0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000002b8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002c0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x000002c8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x000002d0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000002d8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000002e0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000002e8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000002f0] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x000002f8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000300] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000308] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000310] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000318] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000320] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000328] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000330] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000340] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000348] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000350] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000358] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000360] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000368] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000370] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000378] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000380] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000388] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000390] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000258] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000260] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000268] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000270] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000278] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000280] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000288] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000290] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000298] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000002a0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000002a8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000002b0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000002b8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000002c0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002c8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000002d0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000002d8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000002e0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x000002e8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x000002f0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000002f8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000300] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000308] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000310] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000318] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000320] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000328] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000330] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000340] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000348] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000350] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000358] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000360] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000368] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000370] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000378] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000380] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000388] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000390] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000398] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x000003a0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000003a8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003b0] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x00000398] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000003a0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000003a8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000003b0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003b8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003c0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003c8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003d0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003d8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000003e0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000003e8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000003f0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000003f8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000400] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000408] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000410] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000418] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000420] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000428] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000430] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000438] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000440] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000448] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000450] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000458] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000460] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000468] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000470] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000478] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000480] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000488] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000490] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000498] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x000004a0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x000004a8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000004b0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x000004b8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004c0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x000004c8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x000004d0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000004d8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x000004e0] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x000004e8] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x000004f0] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x000004f8] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000500] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000508] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000510] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000518] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000520] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000528] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000530] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x00000538] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000540] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x00000548] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000550] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000558] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000560] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000568] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000570] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000578] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000580] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000588] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000590] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000598] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-// ::mc_filter_uv_b ++-/* [0x000005a0] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000005a8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000005b0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000005b8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000005c0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000005c8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000005d0] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000005d8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000005e0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000005e8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000005f0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000005f8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000600] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000608] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000610] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000618] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000620] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000628] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000630] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000638] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000640] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000648] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000650] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000658] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000660] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000668] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000670] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 +++/* [0x000003b8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000003c0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000003c8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000003d0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003d8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003e0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003e8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003f0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003f8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000400] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000408] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000410] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000418] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000420] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000428] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000430] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000438] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000440] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000448] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000450] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000458] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000460] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000468] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000470] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000478] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000480] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000488] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000490] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000498] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x000004a0] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x000004a8] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x000004b0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x000004b8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x000004c0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x000004c8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000004d0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x000004d8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000004e0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x000004e8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x000004f0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000004f8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x00000500] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x00000508] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000510] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000518] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000520] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000528] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000530] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000538] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000540] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000548] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000550] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x00000558] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000560] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x00000568] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000570] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000578] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000580] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000588] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000590] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000598] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000005a0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000005a8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000005b0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000005b8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_filter_uv_b0 +++/* [0x000005c0] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000005c8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000005d0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000005d8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000005e0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000005e8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000005f0] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000005f8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000600] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000608] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000610] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000618] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000620] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000628] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000630] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000638] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000640] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000648] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000650] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000658] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000660] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000668] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000670] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++ /* [0x00000678] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++ /* [0x00000680] */ 0x15827d80, 0x10020827, // mov r0, unif ++ /* [0x00000688] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++@@ -253,7 +253,7 @@ unsigned int rpi_shader[] = { ++ /* [0x00000708] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++ /* [0x00000710] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ /* [0x00000718] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++-// :uvloop_b +++// :uvloop_b0 ++ /* [0x00000720] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++ /* [0x00000728] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++ /* [0x00000730] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++@@ -290,7 +290,7 @@ unsigned int rpi_shader[] = { ++ /* [0x00000828] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++ /* [0x00000830] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++ /* [0x00000838] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000840] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000840] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++ /* [0x00000848] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++ /* [0x00000850] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++ /* [0x00000858] */ 0x009e7000, 0x100009e7, // nop ; nop ++@@ -306,48 +306,163 @@ unsigned int rpi_shader[] = { ++ /* [0x000008a8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++ /* [0x000008b0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++ /* [0x000008b8] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x000008c0] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x000008c8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000008d0] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 ++-/* [0x000008d8] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000008e0] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x000008e8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x000008f0] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 ++-/* [0x000008f8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000900] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000908] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000910] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000918] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000920] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000928] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000930] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000938] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008c0] */ 0xfffffad8, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000008c8] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x000008d0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000008d8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000008e0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000008e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000008f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008f8] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000900] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000908] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000910] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000918] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000920] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_filter_uv_b +++/* [0x00000928] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000930] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000938] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000940] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000948] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000950] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000958] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000960] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000968] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000970] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000978] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000980] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000988] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000990] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000998] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000009a0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000009a8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000009b0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x000009b8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x000009c0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000009c8] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x000009d0] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x000009d8] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x000009e0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000009e8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000009f0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000009f8] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 +++/* [0x00000a00] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000a08] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000a10] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a18] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a20] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a28] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000a30] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a38] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a40] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a48] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000a50] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a58] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a60] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a68] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000a70] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a78] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a80] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a88] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000a90] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000a98] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000aa0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :uvloop_b +++/* [0x00000aa8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000ab0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000ab8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000ac0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000ac8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000ad0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000ad8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000ae0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000ae8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000af0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000af8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000b00] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000b08] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000b10] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000b18] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000b20] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000b28] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000b30] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000b38] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000b40] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000b48] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000b50] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000b58] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000b60] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000b68] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000b70] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000b78] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000b80] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000b88] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000b90] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000b98] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000ba0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000ba8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000bb0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000bb8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000bc0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000bc8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000bd0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x00000bd8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x00000be0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000be8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x00000bf0] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x00000bf8] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000c00] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000c08] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000c10] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000c18] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000c20] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000c28] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000c30] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000c38] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000c40] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x00000c48] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x00000c50] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000c58] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 +++/* [0x00000c60] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000c68] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000c70] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x00000c78] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 +++/* [0x00000c80] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000c88] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000c90] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000c98] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000ca0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000ca8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000cb0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000cb8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000cc0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000940] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000948] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000950] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000958] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000960] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000968] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000970] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000978] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000980] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000cc8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000cd0] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000cd8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ce0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ce8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000cf0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000cf8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000d00] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000d08] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000988] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000990] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000998] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000009a0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000009a8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000009b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009c0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009c8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009d0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009d8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009e0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000009e8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000009f0] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x000009f8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000d10] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000d18] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000d20] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000d28] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000d30] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000d38] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d40] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d48] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d50] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d58] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d60] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d68] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d70] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000d78] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000d80] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index e36c4ae..809e582 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -4,10 +4,11 @@ ++ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++-#define mc_filter_uv (rpi_shader + 142) ++-#define mc_filter_uv_b (rpi_shader + 360) ++-#define mc_exit (rpi_shader + 592) ++-#define mc_interrupt_exit8 (rpi_shader + 610) ++-#define mc_end (rpi_shader + 640) +++#define mc_filter_uv (rpi_shader + 150) +++#define mc_filter_uv_b0 (rpi_shader + 368) +++#define mc_filter_uv_b (rpi_shader + 586) +++#define mc_exit (rpi_shader + 818) +++#define mc_interrupt_exit8 (rpi_shader + 836) +++#define mc_end (rpi_shader + 866) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 870437d2..635b894 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -26,7 +26,7 @@ ++ # ra23 8 ++ # ++ # rb20 0xffffff00 ++-# rb21 64 +++# rb21 vpm_setup for writing 16bit results into VPM ++ # rb22 255 ++ # rb23 24 ++ # ++@@ -34,7 +34,7 @@ ++ # rb25 frame width-1 ++ # rb26 height<<23 + width<<16 + vdw_setup_0 ++ # rb27 vdw_setup_0 (depends on QPU number) ++-# rb28 vpm_setup (depends on QPU number) +++# rb28 vpm_setup (depends on QPU number) for writing 8bit results into VPM ++ # rb29 vdw_setup_1(dst_pitch-width) ++ # rb30 frame height-1 ++ # rb31 used as temp to count loop iterations ++@@ -69,8 +69,6 @@ ++ .set ra_y_next, ra28 ++ .set ra_y, ra29 ++ ++-.set rb_const_64, rb21 ++- ++ ++ ################################################################################ ++ # mc_setup_uv(next_kernel, x, y, ref_u_base, ref_v_base, frame_width, frame_height, pitch, dst_pitch, pad0, pad1, pad2) ++@@ -106,7 +104,6 @@ mov ra22, 256 ++ mov ra23, 8 ++ ++ mov rb20, 0xffffff00 ++-mov rb21, 64 ++ mov rb22, 255 ++ mov rb23, 24 ++ ++@@ -123,6 +120,7 @@ mov ra15, 0 ++ ++ # Compute part of VPM to use for DMA output ++ mov r2, qpu_num +++shl r2, r2, 1 # Convert QPU numbers to be even (this means we can only use 8 QPUs, but is necessary as we need to save 16bit intermediate results) ++ and r2, r2, 15 ++ mov r1, r2 ++ asr r1, r1, 2 ++@@ -135,16 +133,21 @@ shl r0, r0, 5 ++ add rb27, r0, r1 ++ ++ # Compute part of VPM to save data into ++-mov r2, qpu_num ++-and r2, r2, 15 ++-mov r1, r2 ++-asr r1, r1, 2 ++-shl r1, r1, 6 ++-mov r0, r2 ++-and r0, r0, 3 ++-add r0, r0, r1 ++-mov r1, vpm_setup(0, 4, h8p(0, 0)) +++mov r2, qpu_num # qpu_num = abcd +++shl r2, r2, 1 +++and r2, r2, 15 # r2 = bcd0 +++mov r1, r2 # r1 = bcd0 +++asr r1, r1, 2 # r1 = bc +++shl r1, r1, 6 # r1 = bc000000 +++mov r0, r2 # r0 = bcd0 +++and r0, r0, 3 # r0 = d0 +++add r0, r0, r1 # r0 = bc0000d0 +++mov r1, vpm_setup(0, 4, h8p(0, 0)) # 4 is stride - stride acts on ADDR which is Y[5:0],B[1:0] for 8 bit ++ add rb28, r0, r1 +++asr r0, r0, 1 # r0 = bc0000d +++# Prepare VPM command for 16bit intermediates +++mov r1, vpm_setup(0, 2, h16p(0, 0)) # 2 is stride - stride acts on ADDR which is Y[5:0],H[0] for 16 bit +++add rb21, r0, r1 ++ ++ # Compute base address for first and second access ++ mov r0, ra_x_base # Load x ++@@ -345,6 +348,171 @@ mov vw_addr, unif # start the VDW ++ ++ ################################################################################ ++ +++# mc_filter_uv_b0(next_kernel, x, y, frame_u_base, frame_v_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_u_dst, this_v_dst) +++ +++# At this point we have already issued two pairs of texture requests for the current block +++# ra_x_base, ra_x16_base point to the current coordinates for this block +++::mc_filter_uv_b0 +++mov ra31, unif +++ +++# per-channel shifts were calculated on the *previous* invocation +++ +++mov ra_xshift, ra_xshift_next +++ +++# get base addresses and per-channel shifts for *next* invocation +++add r0, unif, elem_num # x +++max r0, r0, 0; mov r1, unif # y +++min r0, r0, rb_frame_width_minus_1 ; mov r3, unif # frame_base +++shl ra_xshift_next, r0, 3 +++sub r2, unif, r3 # compute offset from frame base u to frame base v +++add r0, r0, r3 +++and rb_x_base_next, r0, ~3 +++mov ra_y_next, r1 +++add ra_x2_base_next, rb_x_base_next, r2 +++ +++# set up VPM write +++mov vw_setup, rb28 +++ +++# get width,height of block +++mov r2, 16 +++mov r0, unif +++shr r1, r0, r2 # Extract width +++sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++and r0, r0, rb22 # Extract height +++add rb17, r0, 5 +++add rb18, r0, 7 +++shl r0, r0, 7 +++add r0, r0, r1 # Combine width and height of destination area +++shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register +++add rb26, r0, rb27 +++ +++sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code +++ +++# get filter coefficients +++ +++mov r0, unif +++asr ra3, r0, rb23; mul24 r0, r0, ra22 +++asr ra2, r0, rb23; mul24 r0, r0, ra22 +++asr ra1, r0, rb23; mul24 r0, r0, ra22 +++asr ra0, r0, rb23; mov r0, unif +++asr ra7, r0, rb23; mul24 r0, r0, ra22 +++asr ra6, r0, rb23; mul24 r0, r0, ra22 +++asr ra5, r0, rb23; mul24 r0, r0, ra22 +++asr ra4, r0, rb23; mov r0, unif +++asr rb11, r0, rb23; mul24 r0, r0, ra22 +++asr rb10, r0, rb23; mul24 r0, r0, ra22 +++asr rb9, r0, rb23; mul24 r0, r0, ra22 +++asr rb8, r0, rb23; mov r0, unif +++asr rb15, r0, rb23; mul24 r0, r0, ra22 +++asr rb14, r0, rb23; mul24 r0, r0, ra22 +++asr rb13, r0, rb23; mul24 r0, r0, ra22 +++asr rb12, r0, rb23 +++ +++# r2 is elem_num +++# r3 is loop counter +++ +++mov r5rep, -8 +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++mov r3, 0 +++ +++:uvloop_b0 +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment +++shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++ +++max r2, ra_y, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_x2_base, r2 +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++nop ; mul24 r2, r0, ra0 +++nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++nop ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++add r0, r2, r3 +++ +++mov r3, rb31 +++ +++mov ra8, ra9 +++mov ra9, ra10 +++mov ra10, ra11 +++mov ra11, ra12 +++mov ra12, ra13 +++mov ra13, ra14 +++ +++sub.setf -, r3, 8 ; mov r1, ra22 +++ +++# apply horizontal filter +++brr.anyn -, r:uvloop_b0 +++mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll +++asr ra15, r0, 8 ; nop +++nop ; nop # Delay slot 3 (TODO move more of the context scroll into here) +++ +++# apply vertical filter and write to VPM +++ +++nop ; mul24 r1, ra14, rb14 +++nop ; mul24 r0, ra13, rb13 +++add r1, r1, r0 ; mul24 r0, ra12, rb12 +++add r1, r1, r0 ; mul24 r0, ra11, rb11 +++add r1, r1, r0 ; mul24 r0, ra10, rb10 +++add r1, r1, r0 ; mul24 r0, ra9, rb9 +++add r1, r1, r0 ; mul24 r0, ra8, rb8 +++add r1, r1, r0 ; mul24 r0, ra15, rb15 +++add r1, r1, r0 ; mov -, vw_wait +++sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++asr r1, r1, 14 +++add r1, r1, ra21 +++brr.anyn -, r:uvloop +++asr r1, r1, 6 # Delay 1 +++min r1, r1, rb22 # Delay 2 +++max vpm, r1, 0 # Delay 3 +++ +++# DMA out for U +++ +++mov vw_setup, rb26 # VDW setup 0 +++mov vw_setup, rb29 # Stride +++mov vw_addr, unif # start the VDW +++ +++# DMA out for V +++# We need to wait for the U to complete first, but have nothing useful to compute while we wait. +++# Could potentially push this write into the start of the next pipeline stage. +++mov r0, 16 +++mov -, vw_wait +++ +++bra -, ra31 +++add vw_setup, rb26, r0 # VDW setup 0 +++mov vw_setup, rb29 # Stride +++mov vw_addr, unif # start the VDW +++ +++################################################################################ +++ ++ ::mc_filter_uv_b ++ mov ra31, unif ++ ++-- ++2.7.4 ++ ++ ++From 85d0ffa2bcf6a2b94c1a0c8f84241cda9ac92ce2 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 14 May 2015 10:04:55 +0100 ++Subject: [PATCH 25/68] Switch to using 16bit temp buffers ++ ++--- ++ libavcodec/hevc.c | 2 +- ++ libavcodec/rpi_shader.c | 4 ++-- ++ libavcodec/rpi_shader.qasm | 10 +++++----- ++ 3 files changed, 8 insertions(+), 8 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 4b133d2..28a6660 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2147,7 +2147,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ uint32_t *u = s->u_mvs[chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++- u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv_b0; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 3 + start_x; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 3 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 88ad20b..ffd3a07 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -220,7 +220,7 @@ unsigned int rpi_shader[] = { ++ /* [0x00000600] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++ /* [0x00000608] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++ /* [0x00000610] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000618] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000618] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++ /* [0x00000620] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++ /* [0x00000628] */ 0x15827d80, 0x10020827, // mov r0, unif ++ /* [0x00000630] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++@@ -346,7 +346,7 @@ unsigned int rpi_shader[] = { ++ /* [0x000009e0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++ /* [0x000009e8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++ /* [0x000009f0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000009f8] */ 0x0c9dc7c0, 0x10020c67, // add vr_setup, r3, rb28 +++/* [0x000009f8] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++ /* [0x00000a00] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++ /* [0x00000a08] */ 0x15827d80, 0x10020827, // mov r0, unif ++ /* [0x00000a10] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 635b894..9577121 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -26,7 +26,7 @@ ++ # ra23 8 ++ # ++ # rb20 0xffffff00 ++-# rb21 vpm_setup for writing 16bit results into VPM +++# rb21 vpm_setup for reading/writing 16bit results into VPM ++ # rb22 255 ++ # rb23 24 ++ # ++@@ -370,8 +370,8 @@ and rb_x_base_next, r0, ~3 ++ mov ra_y_next, r1 ++ add ra_x2_base_next, rb_x_base_next, r2 ++ ++-# set up VPM write ++-mov vw_setup, rb28 +++# set up VPM write, we need to save 16bit precision +++mov vw_setup, rb21 ++ ++ # get width,height of block ++ mov r2, 16 ++@@ -554,8 +554,8 @@ add r0, r0, r1 # Combine width and height of destination area ++ shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++ add rb26, r0, rb27 ++ ++-# In a B frame, so also set up VPM read ++-add vr_setup, r3, rb28 +++# In a B frame, so also set up VPM read (reading back 16bit precision) +++add vr_setup, r3, rb21 ++ ++ sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code ++ ++-- ++2.7.4 ++ ++ ++From abc51bf61df597082fbd7cf1bba5031e4d44318b Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 14 May 2015 10:30:44 +0100 ++Subject: [PATCH 26/68] Corrected B prediction: matching md5 sum for hobbit50 ++ ++--- ++ libavcodec/rpi_shader.c | 815 ++++++++++++++++++++++----------------------- ++ libavcodec/rpi_shader.h | 12 +- ++ libavcodec/rpi_shader.qasm | 36 +- ++ 3 files changed, 429 insertions(+), 434 deletions(-) ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index ffd3a07..77cca46 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -38,431 +38,428 @@ unsigned int rpi_shader[] = { ++ /* [0x00000068] */ 0x00000020, 0xe0020567, // mov ra21, 32 ++ /* [0x00000070] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++ /* [0x00000078] */ 0x00000008, 0xe00205e7, // mov ra23, 8 ++-/* [0x00000080] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++-/* [0x00000088] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 ++-/* [0x00000090] */ 0x00000018, 0xe00215e7, // mov rb23, 24 ++-/* [0x00000098] */ 0x00000000, 0xe0020227, // mov ra8, 0 ++-/* [0x000000a0] */ 0x00000000, 0xe0020267, // mov ra9, 0 ++-/* [0x000000a8] */ 0x00000000, 0xe00202a7, // mov ra10, 0 ++-/* [0x000000b0] */ 0x00000000, 0xe00202e7, // mov ra11, 0 ++-/* [0x000000b8] */ 0x00000000, 0xe0020327, // mov ra12, 0 ++-/* [0x000000c0] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++-/* [0x000000c8] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++-/* [0x000000d0] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x000000d8] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x000000e0] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 ++-/* [0x000000e8] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x000000f0] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x000000f8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000100] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000108] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000110] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000118] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000120] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++-/* [0x00000128] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++-/* [0x00000130] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00000138] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000140] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 ++-/* [0x00000148] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00000150] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000158] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000160] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000168] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000170] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000178] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000180] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x00000188] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x00000190] */ 0x0f9c11c0, 0xd0020827, // asr r0, r0, 1 ++-/* [0x00000198] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) ++-/* [0x000001a0] */ 0x0c9e7040, 0x10021567, // add rb21, r0, r1 ++-/* [0x000001a8] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base ++-/* [0x000001b0] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y ++-/* [0x000001b8] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base ++-/* [0x000001c0] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++-/* [0x000001c8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x000001d0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000001d8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000001e0] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 ++-/* [0x000001e8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x000001f0] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-/* [0x000001f8] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 ++-/* [0x00000200] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x00000208] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000080] */ 0x00000040, 0xe00207a7, // mov ra30, 64 +++/* [0x00000088] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x00000090] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x00000098] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x000000a0] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x000000a8] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x000000b0] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x000000b8] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x000000c0] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x000000c8] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x000000d0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x000000d8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x000000e0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x000000e8] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 +++/* [0x000000f0] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x000000f8] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000100] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000108] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000110] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000118] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000120] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000128] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00000130] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00000138] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00000140] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000148] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 +++/* [0x00000150] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00000158] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000160] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000168] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000170] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000178] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000180] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000188] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000190] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000198] */ 0x0f9c11c0, 0xd0020827, // asr r0, r0, 1 +++/* [0x000001a0] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) +++/* [0x000001a8] */ 0x0c9e7040, 0x10021567, // add rb21, r0, r1 +++/* [0x000001b0] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base +++/* [0x000001b8] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x000001c0] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base +++/* [0x000001c8] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset +++/* [0x000001d0] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x000001d8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000001e0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000001e8] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 +++/* [0x000001f0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x000001f8] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++/* [0x00000200] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 +++/* [0x00000208] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++ /* [0x00000210] */ 0x15827d80, 0x10020827, // mov r0, unif ++ /* [0x00000218] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000220] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000228] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000230] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000238] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000240] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000248] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base ++-/* [0x00000250] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++/* [0x00000220] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000228] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000230] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000238] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000240] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000248] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000250] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base +++/* [0x00000258] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base ++ // ::mc_filter_uv ++-/* [0x00000258] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000260] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000268] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000270] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000278] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000280] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000288] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000290] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000298] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000002a0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000002a8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000002b0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000002b8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000002c0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000002c8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000002d0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000002d8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002e0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x000002e8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x000002f0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000002f8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000300] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000308] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000310] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000318] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000320] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000328] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000330] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000340] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000348] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000350] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000358] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000360] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000368] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000370] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000378] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000380] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000388] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000390] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000398] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x000003a0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000003a8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000003b0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000260] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000268] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000270] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000278] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000280] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000288] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000290] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000298] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000002a0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000002a8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000002b0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000002b8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000002c0] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000002c8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002d0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000002d8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000002e0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000002e8] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x000002f0] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x000002f8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000300] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000308] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000310] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000318] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000320] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000328] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000330] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000340] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000348] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000350] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000358] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000360] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000368] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000370] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000378] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000380] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000388] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000390] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000398] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000003a0] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x000003a8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000003b0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003b8] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x000003b8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000003c0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000003c8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000003d0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003d8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003e0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003e8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003f0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003f8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000400] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000408] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000410] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000418] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000420] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000428] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000430] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000438] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000440] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000448] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000450] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000458] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000460] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000468] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000470] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000478] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000480] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000488] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000490] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000498] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x000004a0] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x000004a8] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x000004b0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x000004b8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x000004c0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x000004c8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000004d0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x000004d8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004e0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x000004e8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x000004f0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000004f8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x00000500] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x00000508] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000510] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000518] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000520] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000528] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000530] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000538] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000540] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000548] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000550] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x00000558] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000560] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x00000568] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000570] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000578] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000580] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000588] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000590] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000598] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000005a0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000005a8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x000005b0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000005b8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000003c0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000003c8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000003d0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000003d8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003e0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003e8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003f0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003f8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000400] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000408] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000410] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000418] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000420] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000428] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000430] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000438] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000440] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000448] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000450] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000458] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000460] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000468] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000470] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000478] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000480] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000488] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000490] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000498] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x000004a0] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x000004a8] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x000004b0] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x000004b8] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x000004c0] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x000004c8] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x000004d0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000004d8] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x000004e0] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000004e8] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x000004f0] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x000004f8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000500] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x00000508] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x00000510] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000518] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000520] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000528] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000530] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000538] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000540] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000548] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000550] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000558] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x00000560] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000568] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x00000570] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000578] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000580] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000588] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000590] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000598] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000005a0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000005a8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000005b0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000005b8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000005c0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x000005c0] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000005c8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000005d0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000005d8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000005e0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000005e8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000005f0] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000005f8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000600] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000608] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000610] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000618] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x00000620] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000628] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000630] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000638] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000640] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000648] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000650] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000658] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000660] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000668] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000670] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000678] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000680] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000688] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000690] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000698] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006a0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000006a8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006b0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006b8] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006c0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000006c8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006d0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006d8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006e0] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000006e8] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006f0] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006f8] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000700] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000708] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000710] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000718] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000005c8] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000005d0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000005d8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000005e0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000005e8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000005f0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000005f8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000600] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000608] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000610] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000618] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000620] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x00000628] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000630] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000638] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000640] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000648] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000650] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000658] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000660] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000668] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000670] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000678] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000680] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000688] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000690] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000698] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006a0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006a8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000006b0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006b8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006c0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006c8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000006d0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006d8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006e0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006e8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000006f0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000006f8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000700] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000708] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000710] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000718] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000720] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000720] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000728] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000730] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000738] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000740] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000748] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000750] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000758] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000760] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000768] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000770] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000778] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000780] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000788] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000790] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000798] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000007a0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000007a8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000007b0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000007b8] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x000007c0] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x000007c8] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x000007d0] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x000007d8] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x000007e0] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x000007e8] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x000007f0] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x000007f8] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000800] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000808] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000810] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000818] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000820] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000828] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000830] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000838] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000840] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000848] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x00000850] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x00000858] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000860] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x00000868] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x00000870] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000878] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000880] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000888] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000890] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000898] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x000008a0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000008a8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000008b0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000008b8] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x000008c0] */ 0xfffffad8, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000008c8] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x000008d0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000008d8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000008e0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000008e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000008f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000008f8] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000900] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000908] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000910] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000918] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000920] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000728] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000730] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000738] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000740] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000748] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000750] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000758] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000760] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000768] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000770] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000778] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000780] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000788] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000790] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000798] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000007a0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000007a8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000007b0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000007b8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000007c0] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x000007c8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x000007d0] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x000007d8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x000007e0] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x000007e8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x000007f0] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x000007f8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000800] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000808] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000810] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000818] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000820] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000828] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000830] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000838] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000840] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000848] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000850] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x00000858] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x00000860] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000868] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x00000870] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x00000878] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000880] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000888] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000890] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000898] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x000008a0] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x000008a8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000008b0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000008b8] */ 0xfffffe50, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x000008c0] */ 0x0f9ce3c0, 0xd0020c27, // asr vpm, r1, 14 +++/* [0x000008c8] */ 0x009e7000, 0x100009e7, // nop +++/* [0x000008d0] */ 0x009e7000, 0x100009e7, // nop +++/* [0x000008d8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000008e0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000008e8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008f0] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000008f8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000900] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000908] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000910] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000918] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b ++-/* [0x00000928] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000930] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000938] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000940] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000948] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000950] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000958] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000960] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000968] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000970] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000978] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000980] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000988] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000990] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000998] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000009a0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000009a8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000009b0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x000009b8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x000009c0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000009c8] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x000009d0] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x000009d8] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x000009e0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000009e8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000009f0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000009f8] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000a00] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000a08] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000a10] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a18] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a20] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a28] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000a30] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a38] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a40] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a48] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000a50] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a58] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a60] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a68] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000a70] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a78] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a80] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a88] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000a90] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000a98] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000aa0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000920] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000928] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000930] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000938] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000940] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000948] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000950] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000958] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000960] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000968] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000970] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000978] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000980] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000988] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000990] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000998] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000009a0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000009a8] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x000009b0] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x000009b8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000009c0] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x000009c8] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x000009d0] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x000009d8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000009e0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000009e8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000009f0] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x000009f8] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000a00] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000a08] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a10] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a18] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a20] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000a28] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a30] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a38] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a40] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000a48] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a50] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a58] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a60] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000a68] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a70] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a78] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000a80] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000a88] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000a90] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000a98] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x00000aa8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000ab0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000ab8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000ac0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000ac8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000ad0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000ad8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000ae0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000ae8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000af0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000af8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000b00] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000b08] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000b10] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000b18] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000b20] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000b28] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000b30] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000b38] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000b40] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000b48] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000b50] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000b58] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000b60] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000b68] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000b70] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000b78] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000b80] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000b88] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000b90] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000b98] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000ba0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000ba8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000bb0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000bb8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000bc0] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000bc8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000bd0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x00000bd8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x00000be0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000be8] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x00000bf0] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x00000bf8] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000c00] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000c08] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000c10] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000c18] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000c20] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000c28] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000c30] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000c38] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000c40] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x00000c48] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x00000c50] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000c58] */ 0x0cc01dc0, 0xd0020827, // add r0, vpm, 1 ++-/* [0x00000c60] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000c68] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x00000c70] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x00000c78] */ 0x0e9c13c0, 0xd0020c27, // shr vpm, r1, 1 ++-/* [0x00000c80] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000c88] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000c90] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000c98] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000ca0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000ca8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000cb0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000cb8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000cc0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000aa0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000aa8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000ab0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000ab8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000ac0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000ac8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000ad0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000ad8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000ae0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000ae8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000af0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000af8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000b00] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000b08] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000b10] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000b18] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000b20] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000b28] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000b30] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000b38] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000b40] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000b48] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000b50] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000b58] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000b60] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000b68] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000b70] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000b78] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000b80] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000b88] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000b90] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000b98] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000ba0] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000ba8] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000bb0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000bb8] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000bc0] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000bc8] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x00000bd0] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x00000bd8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000be0] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x00000be8] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x00000bf0] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000bf8] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000c00] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000c08] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000c10] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000c18] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000c20] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000c28] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000c30] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000c38] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x00000c40] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000c48] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000c50] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000c58] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000c60] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000c68] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000c70] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000c78] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000c80] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000c88] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000c90] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000c98] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000ca0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000ca8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000cc8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000cd0] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000cb0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000cb8] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000cc0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000cc8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000cd0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000cd8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ce0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ce8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000cf0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000cf8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000d00] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000d08] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000ce0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000ce8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000cf0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000d10] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000cf8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000d00] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000d08] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000d10] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000d18] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000d20] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000d28] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000d30] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000d20] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d28] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000d30] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000d38] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000d40] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000d48] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000d50] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d58] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d60] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d68] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d70] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000d78] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000d80] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000d58] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000d60] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000d68] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 809e582..6562fa9 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -4,11 +4,11 @@ ++ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++-#define mc_filter_uv (rpi_shader + 150) ++-#define mc_filter_uv_b0 (rpi_shader + 368) ++-#define mc_filter_uv_b (rpi_shader + 586) ++-#define mc_exit (rpi_shader + 818) ++-#define mc_interrupt_exit8 (rpi_shader + 836) ++-#define mc_end (rpi_shader + 866) +++#define mc_filter_uv (rpi_shader + 152) +++#define mc_filter_uv_b0 (rpi_shader + 370) +++#define mc_filter_uv_b (rpi_shader + 584) +++#define mc_exit (rpi_shader + 812) +++#define mc_interrupt_exit8 (rpi_shader + 830) +++#define mc_end (rpi_shader + 860) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 9577121..562dc35 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -39,13 +39,13 @@ ++ # rb30 frame height-1 ++ # rb31 used as temp to count loop iterations ++ # ++-# ra24...ra30 15, 14, 13, 12, 11, 10, 9 ++ # ra24 clipped(row start address+8+elem_num)&~3 ++ # ra25 per-channel shifts 2 ++ # ra26 next ra24 ++ # ra27 next ra25 ++ # ra28 next y ++ # ra29 y for next texture access +++# ra30 64 ++ # ++ # ra31 next kernel address ++ ++@@ -102,6 +102,7 @@ mov ra20, 1 ++ mov ra21, 32 ++ mov ra22, 256 ++ mov ra23, 8 +++mov ra30, 64 ++ ++ mov rb20, 0xffffff00 ++ mov rb22, 255 ++@@ -472,7 +473,7 @@ sub.setf -, r3, 8 ; mov r1, ra22 ++ # apply horizontal filter ++ brr.anyn -, r:uvloop_b0 ++ mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll ++-asr ra15, r0, 8 ; nop +++asr ra15, r0, 8 ; nop # TODO isn't ra15 already in 24bit precision, may not need the sign extension here? ++ nop ; nop # Delay slot 3 (TODO move more of the context scroll into here) ++ ++ # apply vertical filter and write to VPM ++@@ -487,18 +488,18 @@ add r1, r1, r0 ; mul24 r0, ra8, rb8 ++ add r1, r1, r0 ; mul24 r0, ra15, rb15 ++ add r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-asr r1, r1, 14 ++-add r1, r1, ra21 ++-brr.anyn -, r:uvloop ++-asr r1, r1, 6 # Delay 1 ++-min r1, r1, rb22 # Delay 2 ++-max vpm, r1, 0 # Delay 3 +++#asr r1, r1, 14 +++#add r1, r1, ra21 +++brr.anyn -, r:uvloop_b0 +++asr vpm, r1, 14 # Delay 1 shifts down by shift2=6, but results are still in 16bit precision TODO may be able to avoid the mul24 and use more delay slots +++nop # Delay 2 +++nop # Delay 3 ++ ++ # DMA out for U ++ ++ mov vw_setup, rb26 # VDW setup 0 ++ mov vw_setup, rb29 # Stride ++-mov vw_addr, unif # start the VDW +++mov vw_addr, unif # start the VDW # TODO in pass0 we don't need to save any results ++ ++ # DMA out for V ++ # We need to wait for the U to complete first, but have nothing useful to compute while we wait. ++@@ -639,12 +640,11 @@ mov ra12, ra13 ++ mov ra13, ra14 ++ ++ sub.setf -, r3, 8 ; mov r1, ra22 ++- ++ # apply horizontal filter ++ brr.anyn -, r:uvloop_b ++ mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++ asr ra15, r0, 8 ; nop ++-nop ; nop +++nop ; nop # TODO improve use of delay slots ++ ++ # apply vertical filter and write to VPM ++ ++@@ -658,15 +658,13 @@ add r1, r1, r0 ; mul24 r0, ra8, rb8 ++ add r1, r1, r0 ; mul24 r0, ra15, rb15 ++ add r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-asr r1, r1, 14 ++-add r1, r1, ra21 ++-asr r1, r1, 6 ++-min r1, r1, rb22 ++-add r0, vpm, 1 # Blend in previous VPM contents at this location +++asr r1, r1, 14 # shift2=6 +++add r1, r1, vpm # Blend in previous VPM contents at this location +++add r1, r1, ra30 ++ brr.anyn -, r:uvloop_b ++-max r1, r1, 0 ++-add r1, r1, r0 ++-shr vpm, r1, 1 +++asr r1, r1, 7 # Delay 1 +++min r1, r1, rb22 # Delay 2 +++max vpm, r1, 0 # Delay 3 ++ ++ ++ # DMA out for U ++-- ++2.7.4 ++ ++ ++From ea60373134f98099c4ebaf0d23cca666008b4bba Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 14 May 2015 10:55:07 +0100 ++Subject: [PATCH 27/68] P prediction uses 4 tap filters ++ ++--- ++ libavcodec/hevc.c | 50 ++-- ++ libavcodec/rpi_shader.c | 631 ++++++++++++++++++++++----------------------- ++ libavcodec/rpi_shader.h | 10 +- ++ libavcodec/rpi_shader.qasm | 43 +-- ++ 4 files changed, 344 insertions(+), 390 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 28a6660..a47ebc5 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -65,15 +65,15 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ #define ENCODE_COEFFS(c0, c1, c2, c3) (((c0) & 0xff) | ((c1) & 0xff) << 8 | ((c2) & 0xff) << 16 | ((c3) & 0xff) << 24) ++ ++ // TODO Chroma only needs 4 taps ++-static uint32_t rpi_filter_coefs[8][2] = { ++- { ENCODE_COEFFS( 0, 0, 0, 64), ENCODE_COEFFS( 0, 0, 0, 0 ) }, ++- { ENCODE_COEFFS( 0, 0, -2, 58), ENCODE_COEFFS( 10, -2, 0, 0 ) }, ++- { ENCODE_COEFFS( 0, 0, -4, 54), ENCODE_COEFFS( 16, -2, 0, 0 ) }, ++- { ENCODE_COEFFS( 0, 0, -6, 46), ENCODE_COEFFS( 28, -4, 0, 0 ) }, ++- { ENCODE_COEFFS( 0, 0, -4, 36), ENCODE_COEFFS( 36, -4, 0, 0 ) }, ++- { ENCODE_COEFFS( 0, 0, -4, 28), ENCODE_COEFFS( 46, -6, 0, 0 ) }, ++- { ENCODE_COEFFS( 0, 0, -2, 16), ENCODE_COEFFS( 54, -4, 0, 0 ) }, ++- { ENCODE_COEFFS( 0, 0, -2, 10), ENCODE_COEFFS( 58, -2, 0, 0 ) } +++static uint32_t rpi_filter_coefs[8][1] = { +++ { ENCODE_COEFFS( 0, 64, 0, 0) }, +++ { ENCODE_COEFFS( -2, 58, 10, -2) }, +++ { ENCODE_COEFFS( -4, 54, 16, -2) }, +++ { ENCODE_COEFFS( -6, 46, 28, -4) }, +++ { ENCODE_COEFFS( -4, 36, 36, -4) }, +++ { ENCODE_COEFFS( -4, 28, 46, -6) }, +++ { ENCODE_COEFFS( -2, 16, 54, -4) }, +++ { ENCODE_COEFFS( -2, 10, 58, -2) } ++ }; ++ ++ static uint32_t get_vc_address(AVBufferRef *bref) { ++@@ -2027,16 +2027,16 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 3 + start_x; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 3 + start_y; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); ++ *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); ++ // TODO chroma weight and offset... s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx][0]; ++- *u++ = rpi_filter_coefs[_mx][1]; +++ u++; ++ *u++ = rpi_filter_coefs[_my][0]; ++- *u++ = rpi_filter_coefs[_my][1]; +++ u++; ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++@@ -2084,16 +2084,16 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 3 + start_x; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 3 + start_y; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); ++ *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); ++ // TODO chroma weight and offset... s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx][0]; ++- *u++ = rpi_filter_coefs[_mx][1]; +++ u++; ++ *u++ = rpi_filter_coefs[_my][0]; ++- *u++ = rpi_filter_coefs[_my][1]; +++ u++; ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++@@ -2148,29 +2148,29 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv_b0; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 3 + start_x; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 3 + start_y; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); ++ *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); ++ *u++ = rpi_filter_coefs[_mx][0]; ++- *u++ = rpi_filter_coefs[_mx][1]; +++ u++; ++ *u++ = rpi_filter_coefs[_my][0]; ++- *u++ = rpi_filter_coefs[_my][1]; +++ u++; ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); // TODO this will become unused once we have a dedicated pass0 filter ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv_b; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = x2_c - 3 + start_x; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = y2_c - 3 + start_y; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = x2_c - 1 + start_x; +++ u++[-RPI_CHROMA_COMMAND_WORDS] = y2_c - 1 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); ++ *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); ++ // TODO chroma weight and offset... s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx2][0]; ++- *u++ = rpi_filter_coefs[_mx2][1]; +++ u++; ++ *u++ = rpi_filter_coefs[_my2][0]; ++- *u++ = rpi_filter_coefs[_my2][1]; +++ u++; ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 77cca46..c8d0728 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -116,8 +116,8 @@ unsigned int rpi_shader[] = { ++ /* [0x000002d0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++ /* [0x000002d8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++ /* [0x000002e0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002e8] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x000002f0] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x000002e8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000002f0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++ /* [0x000002f8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++ /* [0x00000300] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++ /* [0x00000308] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++@@ -128,338 +128,315 @@ unsigned int rpi_shader[] = { ++ /* [0x00000330] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++ /* [0x00000338] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++ /* [0x00000340] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000348] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000350] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000358] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000360] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000368] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000370] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000378] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000380] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000388] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000390] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000398] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000003a0] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x000003a8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000003b0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000003b8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000348] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000350] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000358] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000360] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000368] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000370] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000378] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000380] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x000003c0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000003c8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000003d0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000003d8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003e0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003e8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003f0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003f8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000400] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000408] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000410] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000418] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000420] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000428] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000430] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000438] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000440] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000448] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000450] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000458] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000460] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000468] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000470] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000478] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000480] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000488] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000490] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000498] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x000004a0] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x000004a8] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x000004b0] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x000004b8] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x000004c0] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x000004c8] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x000004d0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000004d8] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x000004e0] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004e8] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x000004f0] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x000004f8] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000500] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x00000508] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x00000510] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000518] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000520] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000528] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000530] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000538] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000540] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000548] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000550] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000558] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x00000560] */ 0xfffffe40, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000568] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x00000570] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000578] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000580] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000588] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000590] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000598] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000005a0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000005a8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000005b0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x000005b8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000005c0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000388] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000390] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000398] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000003a0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003a8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003b0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003b8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003c0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003c8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000003d0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000003d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003e0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000003e8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000003f0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000003f8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000400] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000408] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000410] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000418] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000420] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000428] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000430] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000438] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000440] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 +++/* [0x00000448] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000450] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x00000458] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x00000460] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000468] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000470] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000478] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000480] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000488] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000490] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000498] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000004a0] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x000004a8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000004b0] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x000004b8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000004c0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000004c8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000004d0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004d8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000004e0] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000004e8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000004f0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000004f8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000500] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000508] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x000005c8] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000005d0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000005d8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000005e0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000005e8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000005f0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000005f8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000600] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000608] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000610] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000618] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000620] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x00000628] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000630] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000638] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000640] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000648] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000650] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000658] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000660] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000668] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000670] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000678] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000680] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000688] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000690] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000698] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006a0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006a8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000006b0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006b8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006c0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006c8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000006d0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006d8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006e0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006e8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000006f0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000006f8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000700] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000708] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000710] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000718] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000720] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000510] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000518] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000520] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000528] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000530] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000538] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000540] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000548] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000550] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000558] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000560] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000568] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x00000570] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000578] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000580] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000588] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000590] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000598] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x000005a0] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x000005a8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000005b0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000005b8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000005c0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000005c8] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x000005d0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005d8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005e0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005e8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005f0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000005f8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000600] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000608] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000610] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000618] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000620] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000628] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000630] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000638] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000640] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000648] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000650] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000658] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000660] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000668] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000728] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000730] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000738] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000740] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000748] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000750] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000758] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000760] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000768] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000770] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000778] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000780] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000788] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000790] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000798] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000007a0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000007a8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000007b0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000007b8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000007c0] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x000007c8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x000007d0] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x000007d8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x000007e0] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x000007e8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x000007f0] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x000007f8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000800] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000808] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000810] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000818] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000820] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000828] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000830] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000838] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000840] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000848] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000850] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x00000858] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x00000860] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000868] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x00000870] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x00000878] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000880] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000888] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000890] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000898] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x000008a0] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x000008a8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000008b0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000008b8] */ 0xfffffe50, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x000008c0] */ 0x0f9ce3c0, 0xd0020c27, // asr vpm, r1, 14 ++-/* [0x000008c8] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x000008d0] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x000008d8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000008e0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000008e8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000008f0] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000008f8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000900] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000908] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000910] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000918] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000670] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000678] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000680] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000688] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000690] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000698] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000006a0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000006a8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000006b0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000006b8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000006c0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000006c8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000006d0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000006d8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000006e0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000006e8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000006f0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000006f8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000700] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000708] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000710] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000718] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000720] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000728] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000730] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000738] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000740] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000748] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000750] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000758] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000760] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000768] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000770] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000778] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000780] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000788] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000790] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000798] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x000007a0] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x000007a8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000007b0] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x000007b8] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x000007c0] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x000007c8] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x000007d0] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x000007d8] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x000007e0] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x000007e8] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x000007f0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000007f8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000800] */ 0xfffffe50, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000808] */ 0x0f9ce3c0, 0xd0020c27, // asr vpm, r1, 14 +++/* [0x00000810] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000818] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000820] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000828] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000830] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000838] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000840] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000848] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000850] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000858] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000860] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b ++-/* [0x00000920] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000928] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000930] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000938] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000940] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000948] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000950] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000958] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000960] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000968] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000970] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000978] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000980] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000988] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000990] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000998] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000009a0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000009a8] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x000009b0] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x000009b8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000009c0] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x000009c8] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x000009d0] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x000009d8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000009e0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000009e8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000009f0] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x000009f8] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000a00] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000a08] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a10] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a18] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a20] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000a28] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a30] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a38] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a40] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000a48] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a50] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a58] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a60] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000a68] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a70] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a78] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000a80] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000a88] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000a90] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000a98] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000868] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000870] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000878] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000880] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000888] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000890] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000898] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000008a0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000008a8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000008b0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000008b8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000008c0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000008c8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000008d0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000008d8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000008e0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000008e8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000008f0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x000008f8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000900] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000908] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000910] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000918] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000920] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000928] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000930] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000938] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000940] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000948] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000950] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000958] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000960] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000968] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000970] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000978] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000980] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000988] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000990] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000998] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000009a0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000009a8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000009b0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000009b8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000009c0] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000009c8] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x000009d0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000009d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000009e0] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x00000aa0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000aa8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000ab0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000ab8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000ac0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000ac8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000ad0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000ad8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000ae0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000ae8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000af0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000af8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000b00] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000b08] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000b10] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000b18] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000b20] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000b28] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000b30] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000b38] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000b40] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000b48] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000b50] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000b58] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000b60] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000b68] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000b70] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000b78] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000b80] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000b88] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000b90] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000b98] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000ba0] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000ba8] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000bb0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000bb8] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000bc0] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000bc8] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x00000bd0] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x00000bd8] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000be0] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x00000be8] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x00000bf0] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000bf8] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000c00] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000c08] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000c10] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000c18] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000c20] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000c28] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000c30] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000c38] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x00000c40] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x00000c48] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000c50] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000c58] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000c60] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000c68] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000c70] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000c78] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000c80] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000c88] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000c90] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000c98] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000ca0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000ca8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000009e8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000009f0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000009f8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000a00] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000a08] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000a10] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000a18] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000a20] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000a28] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000a30] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000a38] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000a40] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000a48] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000a50] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000a58] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000a60] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000a68] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000a70] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000a78] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000a80] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00000a88] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00000a90] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00000a98] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00000aa0] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00000aa8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00000ab0] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00000ab8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000ac0] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000ac8] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000ad0] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000ad8] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000ae0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000ae8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000af0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000af8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000b00] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000b08] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000b10] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x00000b18] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x00000b20] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000b28] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x00000b30] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x00000b38] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000b40] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000b48] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000b50] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000b58] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000b60] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000b68] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000b70] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000b78] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000b80] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x00000b88] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000b90] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000b98] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000ba0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000ba8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000bb0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000bb8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000bc0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000bc8] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000bd0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000bd8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000be0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000be8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000bf0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000cb0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000cb8] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000cc0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000cc8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000cd0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000cd8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ce0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000ce8] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000cf0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000bf8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000c00] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000c08] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000c10] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000c18] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000c20] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000c28] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000c30] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000c38] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000cf8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000d00] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000d08] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000d10] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000d18] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000d20] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d28] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d30] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d38] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d40] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d48] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d50] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000d58] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000d60] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000d68] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000c40] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000c48] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000c50] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000c58] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000c60] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000c68] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000c70] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000c78] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000c80] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000c88] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000c90] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000c98] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ca0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000ca8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000cb0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 6562fa9..1bf7a68 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -5,10 +5,10 @@ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++ #define mc_filter_uv (rpi_shader + 152) ++-#define mc_filter_uv_b0 (rpi_shader + 370) ++-#define mc_filter_uv_b (rpi_shader + 584) ++-#define mc_exit (rpi_shader + 812) ++-#define mc_interrupt_exit8 (rpi_shader + 830) ++-#define mc_end (rpi_shader + 860) +++#define mc_filter_uv_b0 (rpi_shader + 324) +++#define mc_filter_uv_b (rpi_shader + 538) +++#define mc_exit (rpi_shader + 766) +++#define mc_interrupt_exit8 (rpi_shader + 784) +++#define mc_end (rpi_shader + 814) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 562dc35..8e4f18f 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -16,8 +16,8 @@ ++ # ra19 next ra17 ++ # ++ # rb16 pitch ++-# rb17 height + 5 ++-# rb18 height + 7 +++# rb17 height + 1 +++# rb18 height + 3 ++ # rb19 next ra16 ++ # ++ # ra20 1 ++@@ -214,8 +214,8 @@ mov r0, unif ++ shr r1, r0, r2 # Extract width ++ sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) ++ and r0, r0, rb22 # Extract height ++-add rb17, r0, 5 ++-add rb18, r0, 7 +++add rb17, r0, 1 +++add rb18, r0, 3 ++ shl r0, r0, 7 ++ add r0, r0, r1 # Combine width and height of destination area ++ shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++@@ -230,18 +230,11 @@ asr ra3, r0, rb23; mul24 r0, r0, ra22 ++ asr ra2, r0, rb23; mul24 r0, r0, ra22 ++ asr ra1, r0, rb23; mul24 r0, r0, ra22 ++ asr ra0, r0, rb23; mov r0, unif ++-asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-asr ra4, r0, rb23; mov r0, unif +++ mov r0, unif ++ asr rb11, r0, rb23; mul24 r0, r0, ra22 ++ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++ asr rb8, r0, rb23; mov r0, unif ++-asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-asr rb12, r0, rb23 ++ ++ # r2 is elem_num ++ # r3 is loop counter ++@@ -283,26 +276,14 @@ add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++ add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++ add r0, r2, r3 ++ ++ mov r3, rb31 ++ ++-mov ra8, ra9 ++-mov ra9, ra10 ++-mov ra10, ra11 ++-mov ra11, ra12 ++ mov ra12, ra13 ++ mov ra13, ra14 ++ ++-sub.setf -, r3, 8 ; mov r1, ra22 +++sub.setf -, r3, 4 ; mov r1, ra22 ++ ++ # apply horizontal filter ++ brr.anyn -, r:uvloop ++@@ -312,14 +293,10 @@ nop ; nop # Delay slot 3 (TODO move more of the context scr ++ ++ # apply vertical filter and write to VPM ++ ++-nop ; mul24 r1, ra14, rb14 ++-nop ; mul24 r0, ra13, rb13 ++-add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-add r1, r1, r0 ; mul24 r0, ra15, rb15 +++nop ; mul24 r1, ra14, rb10 +++nop ; mul24 r0, ra13, rb9 +++add r1, r1, r0 ; mul24 r0, ra12, rb8 +++add r1, r1, r0 ; mul24 r0, ra15, rb11 ++ add r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++ asr r1, r1, 14 ++-- ++2.7.4 ++ ++ ++From e4bdd110d4640519b751ab428e7976a1e9a15802 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 14 May 2015 11:03:51 +0100 ++Subject: [PATCH 28/68] Optimised B0 pass ++ ++--- ++ libavcodec/rpi_shader.c | 424 +++++++++++++++++++++------------------------ ++ libavcodec/rpi_shader.h | 8 +- ++ libavcodec/rpi_shader.qasm | 43 +---- ++ 3 files changed, 212 insertions(+), 263 deletions(-) ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index c8d0728..1f63ee0 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -204,239 +204,215 @@ unsigned int rpi_shader[] = { ++ /* [0x00000580] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++ /* [0x00000588] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++ /* [0x00000590] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000598] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x000005a0] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000598] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000005a0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++ /* [0x000005a8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++ /* [0x000005b0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++ /* [0x000005b8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++ /* [0x000005c0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000005c8] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x000005d0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005d8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005e0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005e8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005f0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000005f8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000600] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000608] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000610] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000618] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000620] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000628] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000630] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000638] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000640] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000648] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000650] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000658] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000660] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000668] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000005c8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005d0] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005d8] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005e0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005e8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000005f0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005f8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000600] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000608] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000610] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000618] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000620] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000628] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000670] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000678] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000680] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000688] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000690] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000698] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000006a0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000006a8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000006b0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000006b8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000006c0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000006c8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000006d0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000006d8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000006e0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000006e8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000006f0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000006f8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000700] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000708] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000710] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000718] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000720] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000728] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000730] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000738] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000740] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000748] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000750] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000758] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000760] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000768] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000770] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000778] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000780] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000788] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000790] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000798] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x000007a0] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x000007a8] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000007b0] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x000007b8] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x000007c0] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x000007c8] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x000007d0] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x000007d8] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x000007e0] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x000007e8] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x000007f0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000007f8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000800] */ 0xfffffe50, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000808] */ 0x0f9ce3c0, 0xd0020c27, // asr vpm, r1, 14 ++-/* [0x00000810] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000818] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000820] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000828] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000830] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000838] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000840] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000848] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000850] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000858] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000860] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000630] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000638] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000640] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000648] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000650] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000658] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000660] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000668] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000670] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000678] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000680] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000688] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000690] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000698] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000006a0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000006a8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000006b0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000006b8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000006c0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000006c8] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x000006d0] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x000006d8] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x000006e0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000006e8] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 +++/* [0x000006f0] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x000006f8] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x00000700] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x00000708] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000710] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000718] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000720] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000728] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000730] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000738] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000740] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000748] */ 0x0f9ce3c0, 0xd0020c27, // asr vpm, r1, 14 +++/* [0x00000750] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000758] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000760] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000768] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000770] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000778] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000780] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000788] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000790] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000798] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000007a0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b ++-/* [0x00000868] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000870] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000878] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000880] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000888] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000890] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000898] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000008a0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000008a8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000008b0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000008b8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000008c0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000008c8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000008d0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000008d8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000008e0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000008e8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000008f0] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x000008f8] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000900] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000908] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000910] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000918] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000920] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000928] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000930] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000938] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000940] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000948] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000950] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000958] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000960] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000968] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000970] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000978] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000980] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000988] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000990] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000998] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000009a0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000009a8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000009b0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000009b8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000009c0] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000009c8] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x000009d0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000009d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000009e0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000007a8] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000007b0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000007b8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000007c0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000007c8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000007d0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000007d8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000007e0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000007e8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000007f0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000007f8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000800] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000808] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000810] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000818] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000820] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000828] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000830] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000838] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000840] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000848] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000850] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000858] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000860] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000868] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000870] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000878] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000880] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 +++/* [0x00000888] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000890] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000898] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000008b0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008b8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008c0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008c8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000008d0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008d8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008e0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008e8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000008f0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008f8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000900] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000908] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 +++/* [0x00000910] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000918] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000920] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x000009e8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000009f0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000009f8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000a00] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000a08] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000a10] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000a18] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000a20] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000a28] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000a30] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000a38] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000a40] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000a48] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000a50] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000a58] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000a60] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000a68] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000a70] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000a78] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000a80] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00000a88] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00000a90] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00000a98] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00000aa0] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00000aa8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00000ab0] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00000ab8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000ac0] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000ac8] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000ad0] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000ad8] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000ae0] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000ae8] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000af0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000af8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000b00] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000b08] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000b10] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x00000b18] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x00000b20] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000b28] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x00000b30] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x00000b38] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000b40] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000b48] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000b50] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000b58] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000b60] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000b68] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000b70] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000b78] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000b80] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x00000b88] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x00000b90] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000b98] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000ba0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000ba8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000bb0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000bb8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000bc0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000bc8] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000bd0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000bd8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000be0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000be8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000bf0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000928] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000930] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000938] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000940] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000948] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000950] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000958] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000960] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000968] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000970] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000978] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000980] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000988] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000990] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000998] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000009a0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000009a8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000009b0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000009b8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000009c0] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x000009c8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x000009d0] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x000009d8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x000009e0] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x000009e8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x000009f0] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x000009f8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00000a00] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000a08] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000a10] */ 0x15267d80, 0x10020227, // mov ra8, ra9 +++/* [0x00000a18] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00000a20] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00000a28] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00000a30] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000a38] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000a40] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 +++/* [0x00000a48] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000a50] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x00000a58] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x00000a60] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a68] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 +++/* [0x00000a70] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 +++/* [0x00000a78] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 +++/* [0x00000a80] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 +++/* [0x00000a88] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 +++/* [0x00000a90] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 +++/* [0x00000a98] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 +++/* [0x00000aa0] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 +++/* [0x00000aa8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000ab0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000ab8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000ac0] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x00000ac8] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000ad0] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000ad8] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000ae0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000ae8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000af0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000af8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000b00] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000b08] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000b10] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000b18] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000b20] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000b28] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000b30] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000bf8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000c00] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000c08] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000c10] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000c18] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000c20] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000c28] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000c30] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000c38] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000b38] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000b40] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000b48] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000b50] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000b58] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000b60] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000b68] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000b70] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000b78] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000c40] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000c48] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000c50] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000c58] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000c60] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000c68] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000c70] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000c78] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000c80] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000c88] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000c90] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000c98] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ca0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000ca8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000cb0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000b80] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000b88] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000b90] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000b98] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ba0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ba8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000bb0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000bb8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000bc0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000bc8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000bd0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000bd8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000be0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000be8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000bf0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 1bf7a68..cb74887 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -6,9 +6,9 @@ extern unsigned int rpi_shader[]; ++ #define mc_setup_uv (rpi_shader + 0) ++ #define mc_filter_uv (rpi_shader + 152) ++ #define mc_filter_uv_b0 (rpi_shader + 324) ++-#define mc_filter_uv_b (rpi_shader + 538) ++-#define mc_exit (rpi_shader + 766) ++-#define mc_interrupt_exit8 (rpi_shader + 784) ++-#define mc_end (rpi_shader + 814) +++#define mc_filter_uv_b (rpi_shader + 490) +++#define mc_exit (rpi_shader + 718) +++#define mc_interrupt_exit8 (rpi_shader + 736) +++#define mc_end (rpi_shader + 766) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 8e4f18f..faa5755 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -357,15 +357,13 @@ mov r0, unif ++ shr r1, r0, r2 # Extract width ++ sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) ++ and r0, r0, rb22 # Extract height ++-add rb17, r0, 5 ++-add rb18, r0, 7 +++add rb17, r0, 1 +++add rb18, r0, 3 ++ shl r0, r0, 7 ++ add r0, r0, r1 # Combine width and height of destination area ++ shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++ add rb26, r0, rb27 ++ ++-sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code ++- ++ # get filter coefficients ++ ++ mov r0, unif ++@@ -373,18 +371,11 @@ asr ra3, r0, rb23; mul24 r0, r0, ra22 ++ asr ra2, r0, rb23; mul24 r0, r0, ra22 ++ asr ra1, r0, rb23; mul24 r0, r0, ra22 ++ asr ra0, r0, rb23; mov r0, unif ++-asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-asr ra4, r0, rb23; mov r0, unif +++ mov r0, unif ++ asr rb11, r0, rb23; mul24 r0, r0, ra22 ++ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++ asr rb8, r0, rb23; mov r0, unif ++-asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-asr rb12, r0, rb23 ++ ++ # r2 is elem_num ++ # r3 is loop counter ++@@ -426,26 +417,14 @@ add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++ add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++ add r0, r2, r3 ++ ++ mov r3, rb31 ++ ++-mov ra8, ra9 ++-mov ra9, ra10 ++-mov ra10, ra11 ++-mov ra11, ra12 ++ mov ra12, ra13 ++ mov ra13, ra14 ++ ++-sub.setf -, r3, 8 ; mov r1, ra22 +++sub.setf -, r3, 4 ; mov r1, ra22 ++ ++ # apply horizontal filter ++ brr.anyn -, r:uvloop_b0 ++@@ -455,18 +434,12 @@ nop ; nop # Delay slot 3 (TODO move more of the context scr ++ ++ # apply vertical filter and write to VPM ++ ++-nop ; mul24 r1, ra14, rb14 ++-nop ; mul24 r0, ra13, rb13 ++-add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-add r1, r1, r0 ; mul24 r0, ra15, rb15 +++nop ; mul24 r1, ra14, rb10 +++nop ; mul24 r0, ra13, rb9 +++add r1, r1, r0 ; mul24 r0, ra12, rb8 +++add r1, r1, r0 ; mul24 r0, ra15, rb11 ++ add r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-#asr r1, r1, 14 ++-#add r1, r1, ra21 ++ brr.anyn -, r:uvloop_b0 ++ asr vpm, r1, 14 # Delay 1 shifts down by shift2=6, but results are still in 16bit precision TODO may be able to avoid the mul24 and use more delay slots ++ nop # Delay 2 ++-- ++2.7.4 ++ ++ ++From 93805e78a13d36e28ed84a0e8456da2eac45be89 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 14 May 2015 11:12:43 +0100 ++Subject: [PATCH 29/68] Optimised B pass ++ ++--- ++ libavcodec/rpi_shader.c | 202 ++++++++++++++++++++------------------------- ++ libavcodec/rpi_shader.h | 6 +- ++ libavcodec/rpi_shader.qasm | 41 ++------- ++ 3 files changed, 100 insertions(+), 149 deletions(-) ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 1f63ee0..4e6c5ea 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -289,8 +289,8 @@ unsigned int rpi_shader[] = { ++ /* [0x00000818] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++ /* [0x00000820] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++ /* [0x00000828] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000830] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000838] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000830] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000838] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++ /* [0x00000840] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++ /* [0x00000848] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++ /* [0x00000850] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++@@ -299,120 +299,96 @@ unsigned int rpi_shader[] = { ++ /* [0x00000868] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++ /* [0x00000870] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++ /* [0x00000878] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000880] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000888] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000890] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000898] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000008b0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008b8] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008c0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008c8] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000008d0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008d8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008e0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008e8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000008f0] */ 0x4f5971c6, 0x100253e0, // asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008f8] */ 0x4f5971c6, 0x100253a0, // asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000900] */ 0x4f5971c6, 0x10025360, // asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000908] */ 0x0f9d71c0, 0x10021327, // asr rb12, r0, rb23 ++-/* [0x00000910] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000918] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000920] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000880] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000888] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000890] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000898] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000008a8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000008b0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008b8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008c0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008c8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000008d0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000008d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008e0] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x00000928] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000930] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000938] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000940] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000948] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000950] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000958] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000960] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000968] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000970] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000978] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000980] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000988] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000990] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000998] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000009a0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000009a8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000009b0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000009b8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000009c0] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x000009c8] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x000009d0] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x000009d8] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x000009e0] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x000009e8] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x000009f0] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x000009f8] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00000a00] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000a08] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000a10] */ 0x15267d80, 0x10020227, // mov ra8, ra9 ++-/* [0x00000a18] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00000a20] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00000a28] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00000a30] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000a38] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000a40] */ 0x8d5887f6, 0xd00269e1, // sub.setf -, r3, 8 ; mov r1, ra22 ++-/* [0x00000a48] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000a50] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x00000a58] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x00000a60] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000a68] */ 0x4038e037, 0x100049e1, // nop ; mul24 r1, ra14, rb14 ++-/* [0x00000a70] */ 0x4034d037, 0x100049e0, // nop ; mul24 r0, ra13, rb13 ++-/* [0x00000a78] */ 0x4c30c237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-/* [0x00000a80] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-/* [0x00000a88] */ 0x4c28a237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-/* [0x00000a90] */ 0x4c249237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-/* [0x00000a98] */ 0x4c208237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-/* [0x00000aa0] */ 0x4c3cf237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb15 ++-/* [0x00000aa8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000ab0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000ab8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000ac0] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x00000ac8] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x00000ad0] */ 0xfffffe38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000ad8] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000ae0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000ae8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000af0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000af8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000b00] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000b08] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000b10] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000b18] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000b20] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000b28] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000b30] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008e8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000008f0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000008f8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000900] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000908] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000910] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000918] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000920] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000928] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000930] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000938] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000940] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000948] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000950] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000958] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000960] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000968] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000970] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000978] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000980] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000988] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000990] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000998] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000009a0] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 +++/* [0x000009a8] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000009b0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x000009b8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x000009c0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000009c8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000009d0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000009d8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000009e0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000009e8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000009f0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000009f8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000a00] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x00000a08] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000a10] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000a18] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000a20] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000a28] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000a30] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000a38] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a40] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000a48] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000a50] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a58] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000a60] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000a68] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a70] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000b38] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000b40] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000b48] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000b50] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000b58] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000b60] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000b68] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000b70] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000b78] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a78] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a80] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a88] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a90] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a98] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000aa0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000aa8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000ab0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000ab8] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000b80] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000b88] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000b90] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000b98] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ba0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ba8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000bb0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000bb8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000bc0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000bc8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000bd0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000bd8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000be0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000be8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000bf0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000ac0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000ac8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ad0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ad8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ae0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ae8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000af0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000af8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b00] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b08] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b10] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b18] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b20] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000b28] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000b30] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index cb74887..53da629 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -7,8 +7,8 @@ extern unsigned int rpi_shader[]; ++ #define mc_filter_uv (rpi_shader + 152) ++ #define mc_filter_uv_b0 (rpi_shader + 324) ++ #define mc_filter_uv_b (rpi_shader + 490) ++-#define mc_exit (rpi_shader + 718) ++-#define mc_interrupt_exit8 (rpi_shader + 736) ++-#define mc_end (rpi_shader + 766) +++#define mc_exit (rpi_shader + 670) +++#define mc_interrupt_exit8 (rpi_shader + 688) +++#define mc_end (rpi_shader + 718) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index faa5755..f38c926 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -491,8 +491,8 @@ mov r0, unif ++ shr r1, r0, r2 # Extract width ++ sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) ++ and r0, r0, rb22 # Extract height ++-add rb17, r0, 5 ++-add rb18, r0, 7 +++add rb17, r0, 1 +++add rb18, r0, 3 ++ shl r0, r0, 7 ++ ++ # r0 is currently height<<7 ++@@ -508,8 +508,6 @@ add rb26, r0, rb27 ++ # In a B frame, so also set up VPM read (reading back 16bit precision) ++ add vr_setup, r3, rb21 ++ ++-sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code ++- ++ # get filter coefficients ++ ++ mov r0, unif ++@@ -517,18 +515,11 @@ asr ra3, r0, rb23; mul24 r0, r0, ra22 ++ asr ra2, r0, rb23; mul24 r0, r0, ra22 ++ asr ra1, r0, rb23; mul24 r0, r0, ra22 ++ asr ra0, r0, rb23; mov r0, unif ++-asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-asr ra4, r0, rb23; mov r0, unif +++ mov r0, unif ++ asr rb11, r0, rb23; mul24 r0, r0, ra22 ++ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++ asr rb8, r0, rb23; mov r0, unif ++-asr rb15, r0, rb23; mul24 r0, r0, ra22 ++-asr rb14, r0, rb23; mul24 r0, r0, ra22 ++-asr rb13, r0, rb23; mul24 r0, r0, ra22 ++-asr rb12, r0, rb23 ++ ++ # r2 is elem_num ++ # r3 is loop counter ++@@ -570,26 +561,14 @@ add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++ add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++ add r0, r2, r3 ++ ++ mov r3, rb31 ++ ++-mov ra8, ra9 ++-mov ra9, ra10 ++-mov ra10, ra11 ++-mov ra11, ra12 ++ mov ra12, ra13 ++ mov ra13, ra14 ++ ++-sub.setf -, r3, 8 ; mov r1, ra22 +++sub.setf -, r3, 4 ; mov r1, ra22 ++ # apply horizontal filter ++ brr.anyn -, r:uvloop_b ++ mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++@@ -598,14 +577,10 @@ nop ; nop # TODO improve use of delay slots ++ ++ # apply vertical filter and write to VPM ++ ++-nop ; mul24 r1, ra14, rb14 ++-nop ; mul24 r0, ra13, rb13 ++-add r1, r1, r0 ; mul24 r0, ra12, rb12 ++-add r1, r1, r0 ; mul24 r0, ra11, rb11 ++-add r1, r1, r0 ; mul24 r0, ra10, rb10 ++-add r1, r1, r0 ; mul24 r0, ra9, rb9 ++-add r1, r1, r0 ; mul24 r0, ra8, rb8 ++-add r1, r1, r0 ; mul24 r0, ra15, rb15 +++nop ; mul24 r1, ra14, rb10 +++nop ; mul24 r0, ra13, rb9 +++add r1, r1, r0 ; mul24 r0, ra12, rb8 +++add r1, r1, r0 ; mul24 r0, ra15, rb11 ++ add r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++ asr r1, r1, 14 # shift2=6 ++-- ++2.7.4 ++ ++ ++From e48df43c16de74dddbc7c702d64dd01eaf8e6b39 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 14 May 2015 11:17:09 +0100 ++Subject: [PATCH 30/68] Used P delay slots more efficiently ++ ++--- ++ libavcodec/rpi_shader.c | 437 ++++++++++++++++++++++----------------------- ++ libavcodec/rpi_shader.h | 10 +- ++ libavcodec/rpi_shader.qasm | 19 +- ++ 3 files changed, 228 insertions(+), 238 deletions(-) ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 4e6c5ea..a1af4e3 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -156,239 +156,236 @@ unsigned int rpi_shader[] = { ++ /* [0x00000408] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++ /* [0x00000410] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ /* [0x00000418] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000420] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000428] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000430] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000420] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000428] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000430] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop ++ /* [0x00000438] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000440] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 ++-/* [0x00000448] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000450] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x00000458] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x00000460] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000468] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000470] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000478] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000480] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000488] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000490] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000498] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000004a0] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x000004a8] */ 0xfffffec0, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004b0] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x000004b8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000004c0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000004c8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000004d0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000004d8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000004e0] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000004e8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000004f0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000004f8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000500] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000508] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000440] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000448] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000450] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000458] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000460] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000468] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000470] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000478] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000480] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000488] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x00000490] */ 0xfffffed8, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000498] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x000004a0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000004a8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000004b0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000004b8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004c0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000004c8] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000004d0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000004d8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000004e0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000004e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x00000510] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000518] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000520] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000528] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000530] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000538] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000540] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000548] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000550] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000558] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000560] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000568] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x00000570] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000578] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000580] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000588] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000590] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000598] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000005a0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000005a8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000005b0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000005b8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000005c0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000005c8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005d0] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005d8] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005e0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005e8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000005f0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005f8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000600] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000608] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000610] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000618] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000620] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000628] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000004f8] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000500] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000508] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000510] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000518] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000520] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000528] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000530] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000538] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000540] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000548] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000550] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x00000558] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000560] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000568] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000570] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000578] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000580] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000588] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000590] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000598] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000005a0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000005a8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000005b0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005b8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005c0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005c8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005d0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000005d8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005e0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005e8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005f0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005f8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000600] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000608] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000610] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000630] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000638] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000640] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000648] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000650] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000658] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000660] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000668] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000670] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000678] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000680] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000688] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000690] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000698] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000006a0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000006a8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000006b0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000006b8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000006c0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000006c8] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x000006d0] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x000006d8] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x000006e0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000006e8] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 ++-/* [0x000006f0] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x000006f8] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x00000700] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x00000708] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000710] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000718] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000720] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000728] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000730] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000738] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000740] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000748] */ 0x0f9ce3c0, 0xd0020c27, // asr vpm, r1, 14 ++-/* [0x00000750] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000758] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000760] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000768] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000770] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000778] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000780] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000788] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000790] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000798] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000007a0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000618] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000620] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000628] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000630] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000638] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000640] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000648] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000650] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000658] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000660] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000668] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000670] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000678] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000680] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000688] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000690] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000698] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000006a0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000006a8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000006b0] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x000006b8] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x000006c0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x000006c8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000006d0] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 +++/* [0x000006d8] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x000006e0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x000006e8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x000006f0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000006f8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000700] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000708] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000710] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000718] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000720] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000728] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000730] */ 0x0f9ce3c0, 0xd0020c27, // asr vpm, r1, 14 +++/* [0x00000738] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000740] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000748] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000750] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000758] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000760] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000768] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000770] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000778] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000780] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000788] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b ++-/* [0x000007a8] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000007b0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000007b8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000007c0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000007c8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000007d0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000007d8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000007e0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000007e8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000007f0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000007f8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000800] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000808] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000810] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000818] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000820] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000828] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000830] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000838] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000840] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000848] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000850] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000858] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000860] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000868] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000870] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000878] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000880] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000888] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000890] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000898] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000008a8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000008b0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008b8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008c0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008c8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000008d0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000008d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000008e0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000790] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000798] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000007a0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000007a8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000007b0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000007b8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000007c0] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000007c8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000007d0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000007d8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000007e0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000007e8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000007f0] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000007f8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000800] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000808] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000810] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000818] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000820] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000828] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000830] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000838] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000840] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000848] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000850] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000858] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000860] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000868] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000870] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000878] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000880] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000888] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000890] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000898] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008b0] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000008b8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000008c0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008c8] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x000008e8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000008f0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000008f8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000900] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000908] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000910] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000918] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000920] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000928] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000930] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000938] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000940] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000948] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000950] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000958] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000960] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000968] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000970] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000978] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000980] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000988] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000990] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000998] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000009a0] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 ++-/* [0x000009a8] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000009b0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x000009b8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x000009c0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000009c8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000009d0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000009d8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000009e0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000009e8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000009f0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000009f8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000a00] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x00000a08] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x00000a10] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000a18] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000a20] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000a28] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000a30] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000a38] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a40] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000a48] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000a50] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a58] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000a60] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000a68] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a70] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008d0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000008d8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000008e0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000008e8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000008f0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000008f8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000900] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000908] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000910] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000918] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000920] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000928] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000930] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000938] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000940] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000948] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000950] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000958] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000960] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000968] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 +++/* [0x00000970] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 +++/* [0x00000978] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00000980] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000988] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 +++/* [0x00000990] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000998] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 +++/* [0x000009a0] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop +++/* [0x000009a8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000009b0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000009b8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000009c0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000009c8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000009d0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000009d8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000009e0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000009e8] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x000009f0] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x000009f8] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000a00] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000a08] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000a10] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000a18] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000a20] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a28] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000a30] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000a38] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a40] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000a48] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000a50] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a58] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000a78] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a80] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a60] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a68] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a70] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a78] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a80] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000a88] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a90] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a98] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000aa0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000aa8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000ab0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000ab8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a90] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000a98] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000aa0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000ac0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000aa8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000ab0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ab8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ac0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000ac8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ad0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ad8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ae0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ad0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ad8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ae0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000ae8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000af0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000af8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000b00] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b08] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b10] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b18] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b20] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000b28] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000b30] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000b08] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000b10] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000b18] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 53da629..1fb3e37 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -5,10 +5,10 @@ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++ #define mc_filter_uv (rpi_shader + 152) ++-#define mc_filter_uv_b0 (rpi_shader + 324) ++-#define mc_filter_uv_b (rpi_shader + 490) ++-#define mc_exit (rpi_shader + 670) ++-#define mc_interrupt_exit8 (rpi_shader + 688) ++-#define mc_end (rpi_shader + 718) +++#define mc_filter_uv_b0 (rpi_shader + 318) +++#define mc_filter_uv_b (rpi_shader + 484) +++#define mc_exit (rpi_shader + 664) +++#define mc_interrupt_exit8 (rpi_shader + 682) +++#define mc_end (rpi_shader + 712) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index f38c926..02e95dd 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -268,6 +268,7 @@ add t0s, ra_x2_base, r2 ++ ++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ +++# apply horizontal filter ++ nop ; mul24 r2, r0, ra0 ++ nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++ nop ; mul24 r3, ra1 << 1, r0 << 1 ++@@ -276,20 +277,12 @@ add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++ add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-add r0, r2, r3 ++- ++-mov r3, rb31 ++- ++-mov ra12, ra13 ++-mov ra13, ra14 ++- ++-sub.setf -, r3, 4 ; mov r1, ra22 ++- ++-# apply horizontal filter +++add r0, r2, r3 ; mov r3, rb31 +++sub.setf -, r3, 4 ; mov ra12, ra13 ++ brr.anyn -, r:uvloop ++-mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll ++-asr ra15, r0, 8 ; nop ++-nop ; nop # Delay slot 3 (TODO move more of the context scroll into here) +++mov ra13, ra14 # Delay slot 1 +++mov ra14, ra15 # Delay slot 2 +++mov ra15, r0 # Delay slot 3 ++ ++ # apply vertical filter and write to VPM ++ ++-- ++2.7.4 ++ ++ ++From b33dfc243ff5509299685add3c532ab7f207fd73 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 14 May 2015 11:22:25 +0100 ++Subject: [PATCH 31/68] Improved use of delay slots ++ ++--- ++ libavcodec/rpi_shader.c | 503 ++++++++++++++++++++++----------------------- ++ libavcodec/rpi_shader.h | 10 +- ++ libavcodec/rpi_shader.qasm | 41 ++-- ++ 3 files changed, 265 insertions(+), 289 deletions(-) ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index a1af4e3..c498f28 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -122,270 +122,263 @@ unsigned int rpi_shader[] = { ++ /* [0x00000300] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++ /* [0x00000308] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++ /* [0x00000310] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000318] */ 0x0d9c8e40, 0xd00229e7, // sub.setf -,8,r1 ++-/* [0x00000320] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000328] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000330] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000340] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000348] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000350] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000358] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000360] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000368] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000370] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000378] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000380] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000318] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000320] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000328] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000330] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000340] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000348] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000350] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000358] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000360] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000368] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000370] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000378] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x00000388] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000390] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000398] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000003a0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003a8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003b0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003b8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003c0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003c8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000003d0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000003d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000003e0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000003e8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000003f0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000003f8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000400] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000408] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000410] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000418] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000420] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000428] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000430] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000438] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000440] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000448] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000450] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000458] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000460] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000468] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000470] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000478] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000480] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000488] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x00000490] */ 0xfffffed8, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000498] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x000004a0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000004a8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000004b0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000004b8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000004c0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000004c8] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000004d0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000004d8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000004e0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x000004e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000004f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000380] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000388] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000390] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000398] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003a0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003a8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003b0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003b8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003c0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000003c8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000003d0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003d8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000003e0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000003e8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000003f0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000003f8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000400] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000408] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000410] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000418] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000420] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000428] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000430] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000438] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000440] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000448] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000450] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000458] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000460] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000468] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000470] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000478] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000480] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x00000488] */ 0xfffffed8, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000490] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x00000498] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000004a0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000004a8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000004b0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004b8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000004c0] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000004c8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000004d0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000004d8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000004e0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004e8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x000004f8] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000500] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000508] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000510] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000518] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000520] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000528] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000530] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000538] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000540] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000548] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000550] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x00000558] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000560] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000568] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000570] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000578] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000580] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000588] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000590] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000598] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000005a0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000005a8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000005b0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005b8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005c0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005c8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005d0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000005d8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005e0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005e8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005f0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005f8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000600] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000608] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000610] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000004f0] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000004f8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000500] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000508] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000510] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000518] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000520] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000528] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000530] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000538] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000540] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000548] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x00000550] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000558] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000560] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000568] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000570] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000578] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000580] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000588] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000590] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000598] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000005a0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000005a8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005b0] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005b8] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005c0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005c8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000005d0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005d8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005e0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005e8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005f0] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000005f8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000600] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000608] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000618] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000620] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000628] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000630] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000638] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000640] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000648] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000650] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000658] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000660] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000668] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000670] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000678] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000680] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000688] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000690] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000698] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000006a0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000006a8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000006b0] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x000006b8] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x000006c0] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x000006c8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000006d0] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 ++-/* [0x000006d8] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x000006e0] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x000006e8] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x000006f0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000006f8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000700] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000708] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000710] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000718] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000720] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000728] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000730] */ 0x0f9ce3c0, 0xd0020c27, // asr vpm, r1, 14 ++-/* [0x00000738] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000740] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000748] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000750] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000758] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000760] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000768] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000770] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000778] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000780] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000788] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000610] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000618] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000620] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000628] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000630] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000638] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000640] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000648] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000650] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000658] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000660] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000668] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000670] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000678] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000680] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000688] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000690] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000698] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000006a0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000006a8] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x000006b0] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x000006b8] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x000006c0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000006c8] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x000006d0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000006d8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000006e0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000006e8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000006f0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000006f8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000700] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 +++/* [0x00000708] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000710] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 +++/* [0x00000718] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000720] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000728] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000730] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000738] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000740] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000748] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000750] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000758] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000760] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000768] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b ++-/* [0x00000790] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000798] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000007a0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000007a8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000007b0] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000007b8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000007c0] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000007c8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000007d0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000007d8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000007e0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000007e8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000007f0] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000007f8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000800] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000808] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000810] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000818] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000820] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000828] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000830] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000838] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000840] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000848] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000850] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000858] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000860] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000868] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000870] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000878] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000880] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000888] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000890] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000898] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008b0] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000008b8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000008c0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000008c8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000770] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000778] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000780] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000788] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000790] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000798] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000007a0] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000007a8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000007b0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000007b8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000007c0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000007c8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000007d0] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000007d8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000007e0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000007e8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000007f0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000007f8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000800] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000808] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000810] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000818] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000820] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000828] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000830] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000838] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000840] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000848] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000850] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000858] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000860] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000868] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000870] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000878] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000880] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000888] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000890] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000898] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000008a0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008a8] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x000008d0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000008d8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000008e0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000008e8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000008f0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000008f8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000900] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000908] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000910] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000918] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000920] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000928] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000930] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000938] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000940] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000948] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000950] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000958] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000960] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000968] */ 0x0c9e74c0, 0x10020827, // add r0, r2, r3 ++-/* [0x00000970] */ 0x159dffc0, 0x100208e7, // mov r3, rb31 ++-/* [0x00000978] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00000980] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000988] */ 0x8d5847f6, 0xd00269e1, // sub.setf -, r3, 4 ; mov r1, ra22 ++-/* [0x00000990] */ 0xffffff20, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000998] */ 0x553e7d81, 0x100243a0, // mov ra14, ra15 ; mul24 r0, r0, r1 ++-/* [0x000009a0] */ 0x0f9c81c0, 0xd00203e7, // asr ra15, r0, 8 ; nop ++-/* [0x000009a8] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x000009b0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000009b8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000009c0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000009c8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000009d0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000009d8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000009e0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000009e8] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x000009f0] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x000009f8] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000a00] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000a08] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000a10] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000a18] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000a20] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a28] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000a30] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000a38] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a40] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000a48] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000a50] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a58] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008b0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000008b8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000008c0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000008c8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000008d0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000008d8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000008e0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000008e8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000008f0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000008f8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000900] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000908] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000910] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000918] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000920] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000928] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000930] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000938] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000940] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000948] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000950] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000958] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000960] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000968] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000970] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000978] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000980] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000988] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000990] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000998] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000009a0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000009a8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000009b0] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x000009b8] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x000009c0] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000009c8] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x000009d0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000009d8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000009e0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000009e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000009f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000009f8] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000a00] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a08] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000a10] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000a18] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a20] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000a60] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a68] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000a70] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a28] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a30] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a38] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a40] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a48] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a50] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a58] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000a60] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a68] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_interrupt_exit8 +++/* [0x00000a70] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++ /* [0x00000a78] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000a80] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000a88] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a90] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000a98] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000aa0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-// ::mc_interrupt_exit8 ++-/* [0x00000aa8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000ab0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ab8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ac0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ac8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ad0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ad8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ae0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ae8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000af0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000af8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b00] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b08] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000b10] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000b18] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a90] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a98] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000aa0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000aa8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ab0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ab8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ac0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ac8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ad0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000ad8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000ae0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 1fb3e37..3fac45f 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -5,10 +5,10 @@ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++ #define mc_filter_uv (rpi_shader + 152) ++-#define mc_filter_uv_b0 (rpi_shader + 318) ++-#define mc_filter_uv_b (rpi_shader + 484) ++-#define mc_exit (rpi_shader + 664) ++-#define mc_interrupt_exit8 (rpi_shader + 682) ++-#define mc_end (rpi_shader + 712) +++#define mc_filter_uv_b0 (rpi_shader + 316) +++#define mc_filter_uv_b (rpi_shader + 476) +++#define mc_exit (rpi_shader + 650) +++#define mc_interrupt_exit8 (rpi_shader + 668) +++#define mc_end (rpi_shader + 698) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 02e95dd..10f5113 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -221,8 +221,6 @@ add r0, r0, r1 # Combine width and height of destination area ++ shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++ add rb26, r0, rb27 ++ ++-sub.setf -,8,r1 # 8-r1, so if <0 (negative) we need to use the full code ++- ++ # get filter coefficients ++ ++ mov r0, unif ++@@ -410,20 +408,12 @@ add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++ add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-add r0, r2, r3 ++- ++-mov r3, rb31 ++- ++-mov ra12, ra13 ++-mov ra13, ra14 ++- ++-sub.setf -, r3, 4 ; mov r1, ra22 ++- ++-# apply horizontal filter +++add r0, r2, r3 ; mov r3, rb31 +++sub.setf -, r3, 4 ; mov ra12, ra13 ++ brr.anyn -, r:uvloop_b0 ++-mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll ++-asr ra15, r0, 8 ; nop # TODO isn't ra15 already in 24bit precision, may not need the sign extension here? ++-nop ; nop # Delay slot 3 (TODO move more of the context scroll into here) +++mov ra13, ra14 # Delay slot 1 +++mov ra14, ra15 # Delay slot 2 +++mov ra15, r0 # Delay slot 3 ++ ++ # apply vertical filter and write to VPM ++ ++@@ -432,9 +422,9 @@ nop ; mul24 r0, ra13, rb9 ++ add r1, r1, r0 ; mul24 r0, ra12, rb8 ++ add r1, r1, r0 ; mul24 r0, ra15, rb11 ++ add r1, r1, r0 ; mov -, vw_wait ++-sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++sub.setf -, r3, rb18 ++ brr.anyn -, r:uvloop_b0 ++-asr vpm, r1, 14 # Delay 1 shifts down by shift2=6, but results are still in 16bit precision TODO may be able to avoid the mul24 and use more delay slots +++asr vpm, r1, 6 # Delay 1 shifts down by shift2=6, but results are still in 16bit precision ++ nop # Delay 2 ++ nop # Delay 3 ++ ++@@ -554,19 +544,12 @@ add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++ add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-add r0, r2, r3 ++- ++-mov r3, rb31 ++- ++-mov ra12, ra13 ++-mov ra13, ra14 ++- ++-sub.setf -, r3, 4 ; mov r1, ra22 ++-# apply horizontal filter +++add r0, r2, r3 ; mov r3, rb31 +++sub.setf -, r3, 4 ; mov ra12, ra13 ++ brr.anyn -, r:uvloop_b ++-mov ra14, ra15 ; mul24 r0, r0, r1 # last bit of context scroll, including clamp to zero ++-asr ra15, r0, 8 ; nop ++-nop ; nop # TODO improve use of delay slots +++mov ra13, ra14 # Delay slot 1 +++mov ra14, ra15 # Delay slot 2 +++mov ra15, r0 # Delay slot 3 ++ ++ # apply vertical filter and write to VPM ++ ++-- ++2.7.4 ++ ++ ++From af59f8e00eb977e97debc5e72ba47e0077db1787 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 14 May 2015 11:31:23 +0100 ++Subject: [PATCH 32/68] Avoid writeback of first B results ++ ++--- ++ libavcodec/rpi_shader.c | 229 ++++++++++++++++++++++----------------------- ++ libavcodec/rpi_shader.h | 8 +- ++ libavcodec/rpi_shader.qasm | 18 +--- ++ 3 files changed, 121 insertions(+), 134 deletions(-) ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index c498f28..ba453a2 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -255,130 +255,125 @@ unsigned int rpi_shader[] = { ++ /* [0x00000710] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 ++ /* [0x00000718] */ 0x009e7000, 0x100009e7, // nop ++ /* [0x00000720] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000728] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000730] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000738] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000740] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000748] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000750] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000758] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000760] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000768] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000728] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000730] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000738] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000740] */ 0x009e7000, 0x100009e7, // nop ++ // ::mc_filter_uv_b ++-/* [0x00000770] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000778] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000780] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000788] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000790] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000798] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000007a0] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000007a8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000007b0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000007b8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000007c0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000007c8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000007d0] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000007d8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000007e0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000007e8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000007f0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000007f8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000800] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000808] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000810] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000818] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000820] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000828] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000830] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000838] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000840] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000748] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000750] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000758] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000760] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000768] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000770] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000778] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000780] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000788] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000790] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000798] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000007a0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000007a8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000007b0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000007b8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000007c0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000007c8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000007d0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000007d8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000007e0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000007e8] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x000007f0] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x000007f8] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000800] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000808] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000810] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000818] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000820] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000828] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000830] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000838] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000840] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++ /* [0x00000848] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000850] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000858] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000860] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000868] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000870] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000878] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000880] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000888] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000890] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000898] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000008a0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000008a8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000850] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000858] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000860] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000868] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000870] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000878] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000880] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x000008b0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000008b8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000008c0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000008c8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000008d0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000008d8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000008e0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000008e8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000008f0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000008f8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000900] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000908] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000910] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000918] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000920] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000928] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000930] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000938] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000940] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000948] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000950] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000958] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000960] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000968] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000970] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000978] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000980] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000988] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000990] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000998] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000009a0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000009a8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000009b0] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x000009b8] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x000009c0] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000009c8] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x000009d0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000009d8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000009e0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000009e8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000009f0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000009f8] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000a00] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a08] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000a10] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000a18] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a20] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000888] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000890] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000898] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000008a0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000008a8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000008b0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000008b8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000008c0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000008c8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000008d0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000008d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008e0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000008e8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000008f0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000008f8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000900] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000908] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000910] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000918] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000920] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000928] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000930] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000938] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000940] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000948] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000950] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000958] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000960] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000968] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000970] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000978] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000980] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000988] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x00000990] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000998] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000009a0] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x000009a8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000009b0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000009b8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000009c0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000009c8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000009d0] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000009d8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000009e0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000009e8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000009f0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000009f8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000a28] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a30] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000a38] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a40] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a48] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a50] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a58] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000a60] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000a68] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a00] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a08] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a10] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a18] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a20] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a28] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a30] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000a38] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a40] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000a70] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a78] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a80] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a88] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a90] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a48] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a50] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a58] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a60] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a68] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a70] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a78] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a80] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a88] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a90] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000a98] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000aa0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000aa8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ab0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ab8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ac0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ac8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ad0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000ad8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000ae0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000aa8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000ab0] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000ab8] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 3fac45f..45dbe0e 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -6,9 +6,9 @@ extern unsigned int rpi_shader[]; ++ #define mc_setup_uv (rpi_shader + 0) ++ #define mc_filter_uv (rpi_shader + 152) ++ #define mc_filter_uv_b0 (rpi_shader + 316) ++-#define mc_filter_uv_b (rpi_shader + 476) ++-#define mc_exit (rpi_shader + 650) ++-#define mc_interrupt_exit8 (rpi_shader + 668) ++-#define mc_end (rpi_shader + 698) +++#define mc_filter_uv_b (rpi_shader + 466) +++#define mc_exit (rpi_shader + 640) +++#define mc_interrupt_exit8 (rpi_shader + 658) +++#define mc_end (rpi_shader + 688) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 10f5113..e138c95 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -428,22 +428,14 @@ asr vpm, r1, 6 # Delay 1 shifts down by shift2=6, but results are still ++ nop # Delay 2 ++ nop # Delay 3 ++ +++# in pass0 we don't really need to save any results, but need to discard the uniforms ++ # DMA out for U ++ ++-mov vw_setup, rb26 # VDW setup 0 ++-mov vw_setup, rb29 # Stride ++-mov vw_addr, unif # start the VDW # TODO in pass0 we don't need to save any results ++- ++-# DMA out for V ++-# We need to wait for the U to complete first, but have nothing useful to compute while we wait. ++-# Could potentially push this write into the start of the next pipeline stage. ++-mov r0, 16 ++-mov -, vw_wait ++- ++ bra -, ra31 ++-add vw_setup, rb26, r0 # VDW setup 0 ++-mov vw_setup, rb29 # Stride ++-mov vw_addr, unif # start the VDW +++mov r0, unif # Delay 1 +++mov r0, unif # Delay 2 +++nop # Delay 3 +++ ++ ++ ################################################################################ ++ ++-- ++2.7.4 ++ ++ ++From 12e57278cb19a769d2e1488e8e94003027493d09 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 14 May 2015 11:36:24 +0100 ++Subject: [PATCH 33/68] Cutdown size of chroma prediction commands ++ ++--- ++ libavcodec/hevc.c | 17 +- ++ libavcodec/rpi_shader.c | 543 ++++++++++++++++++++++----------------------- ++ libavcodec/rpi_shader.h | 12 +- ++ libavcodec/rpi_shader.qasm | 11 +- ++ 4 files changed, 281 insertions(+), 302 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index a47ebc5..32b89d5 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -56,7 +56,7 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ ++ #ifdef RPI_INTER_QPU ++ ++-#define RPI_CHROMA_COMMAND_WORDS 12 +++#define RPI_CHROMA_COMMAND_WORDS 10 ++ #define UV_COMMANDS_PER_QPU ((1 + (256*64*2)/(4*4)) * RPI_CHROMA_COMMAND_WORDS) ++ // The QPU code for UV blocks only works up to a block width of 8 ++ #define RPI_CHROMA_BLOCK_WIDTH 8 ++@@ -2032,11 +2032,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); ++ *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); ++- // TODO chroma weight and offset... s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx][0]; ++- u++; ++ *u++ = rpi_filter_coefs[_my][0]; ++- u++; ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++@@ -2091,9 +2088,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); ++ // TODO chroma weight and offset... s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx][0]; ++- u++; ++ *u++ = rpi_filter_coefs[_my][0]; ++- u++; ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++@@ -2154,11 +2149,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); ++ *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); ++ *u++ = rpi_filter_coefs[_mx][0]; ++- u++; ++ *u++ = rpi_filter_coefs[_my][0]; ++- u++; ++- *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); // TODO this will become unused once we have a dedicated pass0 filter ++- *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); +++ u+=2; // Intermediate results are not written back in first pass of B filtering ++ ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv_b; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x2_c - 1 + start_x; ++@@ -2166,11 +2158,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); ++ *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); ++- // TODO chroma weight and offset... s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx2][0]; ++- u++; ++ *u++ = rpi_filter_coefs[_my2][0]; ++- u++; ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++@@ -2808,7 +2797,7 @@ static void rpi_inter_clear(HEVCContext *s) ++ *s->u_mvs[i]++ = pic_height; ++ *s->u_mvs[i]++ = s->frame->linesize[1]; ++ *s->u_mvs[i]++ = s->frame->linesize[2]; ++- s->u_mvs[i] += 3; // Padding words +++ s->u_mvs[i] += 1; // Padding words ++ } ++ } ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index ba453a2..b0b93b5 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -89,291 +89,286 @@ unsigned int rpi_shader[] = { ++ /* [0x00000200] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 ++ /* [0x00000208] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++ /* [0x00000210] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000218] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000220] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000228] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000230] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000238] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000240] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000248] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000250] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base ++-/* [0x00000258] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++/* [0x00000218] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000220] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000228] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000230] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000238] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000240] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base +++/* [0x00000248] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base ++ // ::mc_filter_uv ++-/* [0x00000260] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000268] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000270] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000278] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000280] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000288] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000290] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000298] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000002a0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000002a8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000002b0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000002b8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000002c0] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000002c8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000002d0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000002d8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000002e0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002e8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000002f0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000002f8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000300] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000308] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000310] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000318] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000320] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000328] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000330] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000340] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000348] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000350] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000358] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000360] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000368] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000370] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000378] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000250] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000258] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000260] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000268] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000270] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000278] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000280] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000288] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000290] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000298] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000002a0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000002a8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000002b0] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000002b8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002c0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000002c8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000002d0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000002d8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000002e0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000002e8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000002f0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000002f8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000300] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000308] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000310] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000318] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000320] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000328] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000330] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000340] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000348] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000350] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000358] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000360] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x00000380] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000388] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000390] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000398] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003a0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003a8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003b0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003b8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003c0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000003c8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000003d0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000003d8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000003e0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000003e8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000003f0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000003f8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000400] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000408] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000410] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000418] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000420] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000428] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000430] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000438] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000440] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000448] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000450] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000458] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000460] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000468] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000470] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000478] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000480] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x00000488] */ 0xfffffed8, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000490] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x00000498] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000004a0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000004a8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000004b0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000004b8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000004c0] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000004c8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000004d0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000004d8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x000004e0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000004e8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000368] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000370] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000378] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000380] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000388] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000390] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000398] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003a0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003a8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000003b0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000003b8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003c0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000003c8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000003d0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000003d8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000003e0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000003e8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000003f0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000003f8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000400] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000408] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000410] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000418] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000420] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000428] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000430] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000438] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000440] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000448] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000450] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000458] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000460] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000468] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 +++/* [0x00000470] */ 0xfffffed8, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000478] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x00000480] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000488] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000490] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000498] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004a0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000004a8] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000004b0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000004b8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000004c0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000004c8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004d0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x000004f0] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000004f8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000500] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000508] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000510] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000518] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000520] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000528] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000530] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000538] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000540] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000548] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x00000550] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000558] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000560] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000568] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000570] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000578] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000580] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000588] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000590] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000598] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000005a0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000005a8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005b0] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005b8] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005c0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005c8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000005d0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005d8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005e0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005e8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005f0] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000005f8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000600] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000608] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000004d8] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000004e0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000004e8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000004f0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000004f8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000500] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000508] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000510] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000518] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000520] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000528] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000530] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x00000538] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000540] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000548] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000550] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000558] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000560] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000568] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000570] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000578] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000580] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000588] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000590] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000598] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005a0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005a8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005b0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000005b8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005c0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005c8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005d0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x000005d8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000005e0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000005e8] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000610] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000618] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000620] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000628] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000630] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000638] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000640] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000648] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000650] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000658] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000660] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000668] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000670] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000678] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000680] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000688] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000690] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000698] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000006a0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000006a8] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x000006b0] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x000006b8] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x000006c0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000006c8] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x000006d0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x000006d8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000006e0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000006e8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000006f0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000006f8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000700] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 ++-/* [0x00000708] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000710] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 ++-/* [0x00000718] */ 0x009e7000, 0x100009e7, // nop +++/* [0x000005f0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000005f8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000600] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000608] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000610] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000618] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000620] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000628] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000630] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000638] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000640] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000648] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000650] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000658] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000660] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000668] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000670] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000678] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000680] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000688] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000690] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000698] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x000006a0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000006a8] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x000006b0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000006b8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000006c0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000006c8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000006d0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000006d8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000006e0] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 +++/* [0x000006e8] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x000006f0] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 +++/* [0x000006f8] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000700] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000708] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000710] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000718] */ 0x15827d80, 0x10020827, // mov r0, unif ++ /* [0x00000720] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000728] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000730] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000738] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000740] */ 0x009e7000, 0x100009e7, // nop ++ // ::mc_filter_uv_b ++-/* [0x00000748] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000750] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000758] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000760] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000768] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000770] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000778] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000780] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000788] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000790] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000798] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000007a0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000007a8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000007b0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000007b8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000007c0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000007c8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000007d0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000007d8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000007e0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000007e8] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x000007f0] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x000007f8] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000800] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000808] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000810] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000818] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000820] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000828] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000830] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000838] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000840] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000848] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000850] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000858] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000860] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000868] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000870] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000878] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000880] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000728] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000730] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000738] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000740] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000748] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000750] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000758] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000760] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000768] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000770] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000778] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000780] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000788] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000790] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000798] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000007a0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000007a8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000007b0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000007b8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000007c0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000007c8] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x000007d0] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x000007d8] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x000007e0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000007e8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000007f0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000007f8] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000800] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000808] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000810] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000818] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000820] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000828] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000830] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000838] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000840] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000848] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000850] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000858] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x00000888] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000890] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000898] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000008a0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000008a8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000008b0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000008b8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000008c0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000008c8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000008d0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000008d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000008e0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000008e8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000008f0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000008f8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000900] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000908] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000910] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000918] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000920] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000928] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000930] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000938] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000940] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000948] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000950] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000958] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000960] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000968] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000970] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000978] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000980] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000988] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x00000990] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x00000998] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000009a0] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x000009a8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000009b0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000009b8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000009c0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000009c8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000009d0] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000009d8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000009e0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000009e8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x000009f0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000009f8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000860] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000868] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000870] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000878] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000880] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000888] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000890] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000898] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000008a0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000008a8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000008b0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008b8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000008c0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000008c8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000008d0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000008d8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000008e0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000008e8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000008f0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000008f8] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000900] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000908] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000910] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000918] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000920] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000928] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000930] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000938] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000940] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000948] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000950] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000958] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000960] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x00000968] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000970] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000978] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000980] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000988] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000990] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000998] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000009a0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000009a8] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000009b0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000009b8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000009c0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000009c8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000009d0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000a00] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a08] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000a10] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a18] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a20] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a28] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a30] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000a38] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000a40] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000009d8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000009e0] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x000009e8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000009f0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x000009f8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a00] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a08] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000a10] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a18] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000a48] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a50] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a58] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a60] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a68] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a20] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a28] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a30] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a38] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a40] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a48] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a50] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a58] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a60] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a68] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000a70] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000a78] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a80] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a88] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a90] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a98] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000aa0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000aa8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000ab0] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000ab8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a80] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000a88] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000a90] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 45dbe0e..99927c4 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -4,11 +4,11 @@ ++ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++-#define mc_filter_uv (rpi_shader + 152) ++-#define mc_filter_uv_b0 (rpi_shader + 316) ++-#define mc_filter_uv_b (rpi_shader + 466) ++-#define mc_exit (rpi_shader + 640) ++-#define mc_interrupt_exit8 (rpi_shader + 658) ++-#define mc_end (rpi_shader + 688) +++#define mc_filter_uv (rpi_shader + 148) +++#define mc_filter_uv_b0 (rpi_shader + 310) +++#define mc_filter_uv_b (rpi_shader + 458) +++#define mc_exit (rpi_shader + 630) +++#define mc_interrupt_exit8 (rpi_shader + 648) +++#define mc_end (rpi_shader + 678) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index e138c95..d9ffcda 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -167,8 +167,6 @@ add t0s, r2, r1 ++ ++ # Dump padding words ++ mov r0, unif ++-mov r0, unif ++-mov r0, unif ++ ++ # submit texture requests for second line ++ max r1, ra_y, 0 ++@@ -228,11 +226,10 @@ asr ra3, r0, rb23; mul24 r0, r0, ra22 ++ asr ra2, r0, rb23; mul24 r0, r0, ra22 ++ asr ra1, r0, rb23; mul24 r0, r0, ra22 ++ asr ra0, r0, rb23; mov r0, unif ++- mov r0, unif ++ asr rb11, r0, rb23; mul24 r0, r0, ra22 ++ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-asr rb8, r0, rb23; mov r0, unif +++asr rb8, r0, rb23 ++ ++ # r2 is elem_num ++ # r3 is loop counter ++@@ -362,11 +359,10 @@ asr ra3, r0, rb23; mul24 r0, r0, ra22 ++ asr ra2, r0, rb23; mul24 r0, r0, ra22 ++ asr ra1, r0, rb23; mul24 r0, r0, ra22 ++ asr ra0, r0, rb23; mov r0, unif ++- mov r0, unif ++ asr rb11, r0, rb23; mul24 r0, r0, ra22 ++ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-asr rb8, r0, rb23; mov r0, unif +++asr rb8, r0, rb23 ++ ++ # r2 is elem_num ++ # r3 is loop counter ++@@ -490,11 +486,10 @@ asr ra3, r0, rb23; mul24 r0, r0, ra22 ++ asr ra2, r0, rb23; mul24 r0, r0, ra22 ++ asr ra1, r0, rb23; mul24 r0, r0, ra22 ++ asr ra0, r0, rb23; mov r0, unif ++- mov r0, unif ++ asr rb11, r0, rb23; mul24 r0, r0, ra22 ++ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-asr rb8, r0, rb23; mov r0, unif +++asr rb8, r0, rb23 ++ ++ # r2 is elem_num ++ # r3 is loop counter ++-- ++2.7.4 ++ ++ ++From 3e8f02cf9d3e4bfcd07a5fcf321ace07c4f2e6f3 Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Thu, 14 May 2015 15:21:49 +0100 ++Subject: [PATCH 34/68] hevc: don't redirect when not rpi_enabled ++ ++--- ++ libavcodec/hevc.c | 2 +- ++ 1 file changed, 1 insertion(+), 1 deletion(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 32b89d5..2459e34 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -1468,7 +1468,7 @@ static int hls_pcm_sample(HEVCContext *s, int x0, int y0, int log2_cb_size) ++ */ ++ ++ #ifdef RPI_INTER ++-#define RPI_REDIRECT(fn) rpi_ ## fn +++#define RPI_REDIRECT(fn) (s->enable_rpi ? rpi_ ## fn : fn) ++ static void rpi_luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ AVFrame *ref, const Mv *mv, int x_off, int y_off, ++ int block_w, int block_h, int luma_weight, int luma_offset) ++-- ++2.7.4 ++ ++ ++From 6da455b382b28c3c1f4e98c1703a695cdb946ad3 Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Thu, 14 May 2015 15:22:02 +0100 ++Subject: [PATCH 35/68] Use /dev/vcio for mailbox access ++ ++--- ++ libavcodec/rpi_mailbox.c | 2 +- ++ 1 file changed, 1 insertion(+), 1 deletion(-) ++ ++diff --git a/libavcodec/rpi_mailbox.c b/libavcodec/rpi_mailbox.c ++index 536896f..77a56dd 100644 ++--- a/libavcodec/rpi_mailbox.c +++++ b/libavcodec/rpi_mailbox.c ++@@ -39,7 +39,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ ++ #define MAJOR_NUM 100 ++ #define IOCTL_MBOX_PROPERTY _IOWR(MAJOR_NUM, 0, char *) ++-#define DEVICE_FILE_NAME "/dev/char_dev" +++#define DEVICE_FILE_NAME "/dev/vcio" ++ ++ #include "rpi_mailbox.h" ++ ++-- ++2.7.4 ++ ++ ++From f96ef6131f16a4c03b8e2882bdf7319c3b646a6c Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Thu, 14 May 2015 15:25:25 +0100 ++Subject: [PATCH 36/68] Use vcsm for all memory allocations ++ ++--- ++ libavcodec/rpi_qpu.c | 174 +++++++++++++++++++-------------------------------- ++ 1 file changed, 64 insertions(+), 110 deletions(-) ++ ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 60bf079..f62051f 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -1,7 +1,5 @@ ++ #ifdef RPI ++-// define RPI_USE_VCSM to use the vcsm device for shared memory ++ // This works better than the mmap in that the memory can be cached, but requires a kernel modification to enable the device. ++-#define RPI_USE_VCSM ++ // define RPI_TIME_TOTAL_QPU to print out how much time is spent in the QPU code ++ #define RPI_TIME_TOTAL_QPU ++ // define RPI_TIME_TOTAL_VPU to print out how much time is spent in the VPI code ++@@ -25,9 +23,7 @@ ++ #include "rpi_shader.h" ++ #include "rpi_hevc_transform.h" ++ ++-#ifdef RPI_USE_VCSM ++ #include "rpi_user_vcsm.h" ++-#endif ++ ++ // On Pi2 there is no way to access the VPU L2 cache ++ // GPU_MEM_FLG should be 4 for uncached memory. (Or C for alias to allocate in the VPU L2 cache) ++@@ -96,7 +92,6 @@ struct GPU ++ unsigned int vpu_code[VPU_CODE_SIZE]; ++ short transMatrix2even[16*16*2]; ++ int open_count; // Number of allocated video buffers ++- unsigned int vc_handle; // Handle of this memory ++ int mb; // Mailbox handle ++ int vc; // Address in GPU memory ++ int mail[12]; // These are used to pass pairs of code/unifs to the QPUs ++@@ -105,6 +100,7 @@ struct GPU ++ // Stop more than one thread trying to allocate memory or use the processing resources at once ++ static pthread_mutex_t gpu_mutex = PTHREAD_MUTEX_INITIALIZER; ++ static volatile struct GPU* gpu = NULL; +++static GPU_MEM_PTR_T gpu_mem_ptr; ++ ++ #if defined(RPI_TIME_TOTAL_QPU) || defined(RPI_TIME_TOTAL_VPU) ++ static unsigned int Microseconds(void) { ++@@ -132,39 +128,27 @@ static volatile int vpu_async_tail=0; // Contains the number of posted jobs ++ static volatile int vpu_async_head=0; ++ #endif ++ +++static int gpu_malloc_uncached_internal(int numbytes, GPU_MEM_PTR_T *p, int mb); +++static void gpu_free_internal(GPU_MEM_PTR_T *p); +++ ++ // Connect to QPU, returns 0 on success. ++ static int gpu_init(volatile struct GPU **gpu) { ++ int mb = mbox_open(); ++ int vc; ++- int handle; ++ volatile struct GPU* ptr; ++ if (mb < 0) ++ return -1; ++ ++ if (qpu_enable(mb, 1)) return -2; ++ ++-#ifdef RPI_USE_VCSM ++ vcsm_init(); ++-#endif +++ gpu_malloc_uncached_internal(sizeof(struct GPU), &gpu_mem_ptr, mb); +++ ptr = (volatile struct GPU*)gpu_mem_ptr.arm; +++ memset(ptr, 0, sizeof *ptr); +++ vc = gpu_mem_ptr.vc; ++ ++- handle = mem_alloc(mb, sizeof(struct GPU), 4096, GPU_MEM_FLG); ++- if (!handle) ++- { ++- qpu_enable(mb, 0); ++- return -3; ++- } ++- vc = mem_lock(mb, handle); ++- ptr = mapmem_shared((vc+GPU_MEM_MAP)&~0xc0000000, sizeof(struct GPU)); ++- if (ptr == NULL) ++- { mem_free(mb, handle); ++- mem_unlock(mb, handle); ++- qpu_enable(mb, 0); ++- return -4; ++- } ++- ++- ptr->mb = mb; ++- ptr->vc_handle = handle; ++- ptr->vc = vc; +++ ptr->mb = mb; +++ ptr->vc = vc; ++ ++ printf("GPU allocated at 0x%x\n",vc); ++ ++@@ -226,94 +210,74 @@ static void gpu_unlock(void) { ++ pthread_mutex_unlock(&gpu_mutex); ++ } ++ +++static int gpu_malloc_uncached_internal(int numbytes, GPU_MEM_PTR_T *p, int mb) { +++ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); +++ assert(p->vcsm_handle); +++ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); +++ assert(p->vc_handle); +++ p->arm = vcsm_lock(p->vcsm_handle); +++ assert(p->arm); +++ p->vc = mem_lock(mb, p->vc_handle); +++ assert(p->vc); +++ return 0; +++} +++ ++ // Allocate memory on GPU ++ // Fills in structure <p> containing ARM pointer, videocore handle, videocore memory address, numbytes ++ // Returns 0 on success. ++ // This allocates memory that will not be cached in ARM's data cache. ++ // Therefore safe to use without data cache flushing. ++-int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p) { +++int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p) +++{ +++ int r; ++ gpu_lock(); ++- p->vc_handle = mem_alloc(gpu->mb, numbytes, 4096, GPU_MEM_FLG); ++- p->vcsm_handle = 0; ++- if (!p->vc_handle) ++- { ++- qpu_enable(gpu->mb, 0); ++- return -3; ++- } ++- p->vc = mem_lock(gpu->mb, p->vc_handle); ++- p->arm = mapmem_shared((p->vc+GPU_MEM_MAP)&~0xc0000000,numbytes); ++- p->numbytes = numbytes; ++- if (p->arm == NULL) ++- { ++- mem_free(gpu->mb, p->vc_handle); ++- mem_unlock(gpu->mb, p->vc_handle); ++- gpu_unlock(); ++- qpu_enable(gpu->mb, 0); ++- return -4; ++- } +++ r = gpu_malloc_uncached_internal(numbytes, p, gpu->mb); ++ gpu->open_count++; ++ gpu_unlock(); ++- return 0; +++ return r; ++ } ++ ++ void gpu_cache_flush(GPU_MEM_PTR_T *p) ++ { ++- // This only works when using RPI_USE_VCSM ++ void *tmp = vcsm_lock(p->vcsm_handle); ++ vcsm_unlock_ptr(tmp); ++ } ++ +++static int gpu_malloc_cached_internal(int numbytes, GPU_MEM_PTR_T *p) { +++ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST, (char *)"Video Frame" ); +++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_VC, (char *)"Video Frame" ); +++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); +++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST_AND_VC, (char *)"Video Frame" ); +++ assert(p->vcsm_handle); +++ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); +++ assert(p->vc_handle); +++ p->arm = vcsm_lock(p->vcsm_handle); +++ assert(p->arm); +++ p->vc = mem_lock(gpu->mb, p->vc_handle); +++ assert(p->vc); +++ return 0; +++} +++ ++ // This allocates data that will be ++ // Cached in ARM L2 ++ // Uncached in VPU L2 ++-int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p) { +++int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p) +++{ +++ int r; ++ gpu_lock(); ++-#ifdef RPI_USE_VCSM ++- { ++- p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST, (char *)"Video Frame" ); // f....... locks up for VP9 - retest this? ++- //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_VC, (char *)"Video Frame" ); // 3b...... works ++- //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); //fb...... locks up ++- //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST_AND_VC, (char *)"Video Frame" ); // 3b works (but corrupted due to caching) ++- p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); ++- p->arm = vcsm_lock(p->vcsm_handle); ++- p->vc = mem_lock(gpu->mb, p->vc_handle); ++- } ++-#else ++- p->vc_handle = mem_alloc(gpu->mb, numbytes, 4096, GPU_MEM_FLG); ++- p->vcsm_handle = 0; ++- if (!p->handle) ++- { ++- qpu_enable(gpu->mb, 0); ++- return -3; ++- } ++- p->vc = mem_lock(gpu->mb, p->vc_handle); ++- printf("This mapmem_private does not seem to work\n"); ++- exit(-1); ++- p->arm = mapmem_private((p->vc+GPU_MEM_MAP)&~0xc0000000,numbytes); ++- p->numbytes = numbytes; ++- if (p->arm == NULL) ++- { ++- mem_free(gpu->mb, p->handle); ++- mem_unlock(gpu->mb, p->handle); ++- gpu_unlock(); ++- qpu_enable(gpu->mb, 0); ++- return -4; ++- } ++-#endif +++ r = gpu_malloc_cached_internal(numbytes, p); ++ gpu->open_count++; ++ gpu_unlock(); ++- return 0; +++ return r; ++ } ++ ++ static void gpu_term(void) ++ { ++- int mb; ++- unsigned handle; +++ int mb; ++ ++ if (gpu==NULL) ++ return; ++ mb = gpu->mb; ++- handle = gpu->vc_handle; ++ ++ #ifdef RPI_ASYNC ++ { ++@@ -323,37 +287,26 @@ static void gpu_term(void) ++ } ++ #endif ++ +++ qpu_enable(mb, 0); +++ gpu_free_internal(&gpu_mem_ptr); ++ ++- unmapmem((void*)gpu, sizeof(struct GPU)); ++- mem_unlock(mb, handle); ++- mem_free(mb, handle); ++- qpu_enable(mb, 0); ++-#ifdef RPI_USE_VCSM ++ vcsm_exit(); ++-#endif ++- mbox_close(mb); +++ +++ mbox_close(mb); ++ gpu = NULL; ++ } ++ ++-void gpu_free(GPU_MEM_PTR_T *p) { +++void gpu_free_internal(GPU_MEM_PTR_T *p) { ++ int mb = gpu->mb; ++- unsigned handle = p->vc_handle; +++ mem_unlock(mb,p->vc_handle); +++ vcsm_unlock_ptr(p->arm); +++ vcsm_free(p->vcsm_handle); +++} +++ +++void gpu_free(GPU_MEM_PTR_T *p) { ++ gpu_lock(); ++-#ifdef RPI_USE_VCSM ++- if (p->vcsm_handle) { ++- mem_unlock(mb,p->vc_handle); ++- vcsm_unlock_ptr(p->arm); ++- vcsm_free(p->vcsm_handle); ++- } else { ++- unmapmem((void*)p->arm, sizeof(struct GPU)); ++- mem_unlock(mb, handle); ++- mem_free(mb, handle); ++- } ++-#else ++- unmapmem((void*)p->arm, sizeof(struct GPU)); ++- mem_unlock(mb, handle); ++- mem_free(mb, handle); ++-#endif +++ +++ gpu_free_internal(p); ++ ++ gpu->open_count--; ++ if (gpu->open_count==0) { ++@@ -386,20 +339,21 @@ unsigned int vpu_get_constants(void) { ++ ++ static void *vpu_start(void *arg) { ++ while(1) { +++ int *p; ++ pthread_mutex_lock(&post_mutex); ++ while( vpu_async_tail - vpu_async_head <= 0) ++ { ++ //printf("Checking number %d %d\n",vpu_async_head,vpu_async_tail); ++ pthread_cond_wait(&post_cond_tail, &post_mutex); ++ } ++- int *p = vpu_cmds[vpu_async_head%MAXCMDS]; +++ p = vpu_cmds[vpu_async_head%MAXCMDS]; ++ pthread_mutex_unlock(&post_mutex); ++ ++ if (p[6] == -1) { ++ break; // Last job ++ } ++ if (p[7]) { ++- GPU_MEM_PTR_T *buf = (GPU_MEM_PTR_T *)p[7]; +++ //GPU_MEM_PTR_T *buf = (GPU_MEM_PTR_T *)p[7]; ++ //gpu_cache_flush(buf); ++ } ++ vpu_execute_code(p[0], p[1], p[2], p[3], p[4], p[5], p[6]); ++-- ++2.7.4 ++ ++ ++From 7c94b833b48a455d27d82eb2ca1b53a162705caf Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Thu, 14 May 2015 15:43:17 +0100 ++Subject: [PATCH 37/68] Enable EARLY_MALLOC and fix sps access bug ++ ++--- ++ libavcodec/hevc.c | 5 +++-- ++ 1 file changed, 3 insertions(+), 2 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 2459e34..4e82a15 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -44,7 +44,7 @@ ++ #ifdef RPI ++ #include "rpi_qpu.h" ++ // For some unknown reason, the code seems to crash if I do a late malloc ++- #define EARLY_MALLOC +++ //#define EARLY_MALLOC ++ // Move Inter prediction into separate pass ++ #define RPI_INTER ++ #endif ++@@ -149,7 +149,8 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ #ifdef RPI ++ #ifdef EARLY_MALLOC ++ #else ++- int coeffs_in_ctb = (1 << s->ps.sps->log2_ctb_size) * (1 << s->ps.sps->log2_ctb_size); +++ assert(sps); +++ int coeffs_in_ctb = (1 << sps->log2_ctb_size) * (1 << sps->log2_ctb_size); ++ int coefs_per_row = sps->ctb_width * coeffs_in_ctb * 3; // Allow space for chroma ++ printf("pic_arrays_init\n"); ++ printf("Allocated %d\n",coefs_per_row); ++-- ++2.7.4 ++ ++ ++From 0a0a92817a7959d213dca9c75a242b6ad88d6b80 Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Thu, 14 May 2015 16:40:51 +0100 ++Subject: [PATCH 38/68] Add copy of av_mod_uintp2 for use with stable ffmpeg ++ ++--- ++ libavcodec/hevc.c | 8 ++++++++ ++ 1 file changed, 8 insertions(+) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 4e82a15..80db603 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -51,6 +51,14 @@ ++ ++ // #define DISABLE_MC ++ +++#ifndef av_mod_uintp2 +++static av_always_inline av_const unsigned av_mod_uintp2_c(unsigned a, unsigned p) +++{ +++ return a & ((1 << p) - 1); +++} +++# define av_mod_uintp2 av_mod_uintp2_c +++#endif +++ ++ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 }; ++ ++ ++-- ++2.7.4 ++ ++ ++From c48d08e968b24c2e260b0cc76c7901a1b4d75bbf Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Mon, 18 May 2015 11:11:02 +0100 ++Subject: [PATCH 39/68] Added support for weighted prediction in P frames ++ ++--- ++ libavcodec/hevc.c | 52 ++++- ++ libavcodec/rpi_shader.c | 566 +++++++++++++++++++++++---------------------- ++ libavcodec/rpi_shader.h | 12 +- ++ libavcodec/rpi_shader.qasm | 39 +++- ++ 4 files changed, 384 insertions(+), 285 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 80db603..9668ef8 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -64,7 +64,7 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ ++ #ifdef RPI_INTER_QPU ++ ++-#define RPI_CHROMA_COMMAND_WORDS 10 +++#define RPI_CHROMA_COMMAND_WORDS 12 ++ #define UV_COMMANDS_PER_QPU ((1 + (256*64*2)/(4*4)) * RPI_CHROMA_COMMAND_WORDS) ++ // The QPU code for UV blocks only works up to a block width of 8 ++ #define RPI_CHROMA_BLOCK_WIDTH 8 ++@@ -2031,6 +2031,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int y1_c = y0_c + (mv->y >> (2 + hshift)); ++ //int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width ++ int chan = x0>>8; +++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || +++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++ ++ uint32_t *u = s->u_mvs[chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++@@ -2043,6 +2045,13 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); ++ *u++ = rpi_filter_coefs[_mx][0]; ++ *u++ = rpi_filter_coefs[_my][0]; +++ if (weight_flag) { +++ *u++ = (s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] << 16) + (s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0] & 0xffff); +++ *u++ = (s->sh.chroma_offset_l0[current_mv.ref_idx[0]][1] << 16) + (s->sh.chroma_weight_l0[current_mv.ref_idx[0]][1] & 0xffff); +++ } else { +++ *u++ = 1; // Weight of 1 and offset of 0 +++ *u++ = 1; +++ } ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++@@ -2085,6 +2094,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int y1_c = y0_c + (mv->y >> (2 + hshift)); ++ //int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width ++ int chan = x0>>8; +++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || +++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++ ++ uint32_t *u = s->u_mvs[chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++@@ -2098,6 +2109,13 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ // TODO chroma weight and offset... s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx][0]; ++ *u++ = rpi_filter_coefs[_my][0]; +++ if (weight_flag) { +++ *u++ = (s->sh.chroma_offset_l0[current_mv.ref_idx[1]][0] << 16) + (s->sh.chroma_weight_l0[current_mv.ref_idx[1]][0] & 0xffff); +++ *u++ = (s->sh.chroma_offset_l0[current_mv.ref_idx[1]][1] << 16) + (s->sh.chroma_weight_l0[current_mv.ref_idx[1]][1] & 0xffff); +++ } else { +++ *u++ = 1; // Weight of 1 and offset of 0 +++ *u++ = 1; +++ } ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++@@ -2159,6 +2177,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); ++ *u++ = rpi_filter_coefs[_mx][0]; ++ *u++ = rpi_filter_coefs[_my][0]; +++ u+=2; // Weights not supported in B slices ++ u+=2; // Intermediate results are not written back in first pass of B filtering ++ ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv_b; ++@@ -2169,6 +2188,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); ++ *u++ = rpi_filter_coefs[_mx2][0]; ++ *u++ = rpi_filter_coefs[_my2][0]; +++ u+=2; // Weights not supported in B slices ++ *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++@@ -2795,6 +2815,9 @@ static void rpi_inter_clear(HEVCContext *s) ++ int i; ++ int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1]; ++ int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1]; +++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || +++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); +++ ++ for(i=0;i<8;i++) { ++ s->u_mvs[i] = s->mvs_base[i]; ++ *s->u_mvs[i]++ = 0; ++@@ -2806,6 +2829,13 @@ static void rpi_inter_clear(HEVCContext *s) ++ *s->u_mvs[i]++ = pic_height; ++ *s->u_mvs[i]++ = s->frame->linesize[1]; ++ *s->u_mvs[i]++ = s->frame->linesize[2]; +++ if (weight_flag) { +++ *s->u_mvs[i]++ = 1 << (s->sh.chroma_log2_weight_denom + 6 - 1); +++ *s->u_mvs[i]++ = s->sh.chroma_log2_weight_denom + 6; +++ } else { +++ *s->u_mvs[i]++ = 1 << 5; +++ *s->u_mvs[i]++ = 6; +++ } ++ s->u_mvs[i] += 1; // Padding words ++ } ++ } ++@@ -2849,12 +2879,29 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs]; ++ ++ #ifdef RPI +++#ifdef RPI_INTER_QPU ++ s->enable_rpi = s->ps.sps->bit_depth == 8 ++ && s->ps.sps->width <= RPI_MAX_WIDTH ++ && !s->ps.pps->cross_component_prediction_enabled_flag ++ && s->ps.pps->num_tile_rows <= 1 && s->ps.pps->num_tile_columns <= 1 ++- && !(s->ps.pps->weighted_pred_flag && s->sh.slice_type == P_SLICE) ++ && !(s->ps.pps->weighted_bipred_flag && s->sh.slice_type == B_SLICE); +++#else +++ s->enable_rpi = s->ps.sps->bit_depth == 8 +++ && s->ps.sps->width <= RPI_MAX_WIDTH +++ && !s->ps.pps->cross_component_prediction_enabled_flag +++ && s->ps.pps->num_tile_rows <= 1 && s->ps.pps->num_tile_columns <= 1; +++#endif +++ +++ /*if (!s->enable_rpi) { +++ if (s->ps.pps->cross_component_prediction_enabled_flag) +++ printf("Cross component\n"); +++ if (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1) +++ printf("Tiles\n"); +++ if (s->ps.pps->weighted_pred_flag && s->sh.slice_type == P_SLICE) +++ printf("Weighted P slice\n"); +++ if (s->ps.pps->weighted_bipred_flag && s->sh.slice_type == B_SLICE) +++ printf("Weighted B slice\n"); +++ }*/ ++ ++ #endif ++ ++@@ -2987,6 +3034,7 @@ static int hls_decode_entry_wpp(AVCodecContext *avctxt, void *input_ctb_row, int ++ ++ #ifdef RPI ++ s->enable_rpi = 0; +++ //printf("Wavefront\n"); ++ #endif ++ ++ if(ctb_row) { ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index b0b93b5..3f04d80 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -88,287 +88,307 @@ unsigned int rpi_shader[] = { ++ /* [0x000001f8] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++ /* [0x00000200] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 ++ /* [0x00000208] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x00000210] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000218] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000220] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000228] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000230] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000238] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000240] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base ++-/* [0x00000248] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++/* [0x00000210] */ 0x15827d80, 0x10021327, // mov rb12,unif +++/* [0x00000218] */ 0x15827d80, 0x10021367, // mov rb13,unif +++/* [0x00000220] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000228] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000230] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000238] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000240] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000248] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000250] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base +++/* [0x00000258] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base ++ // ::mc_filter_uv ++-/* [0x00000250] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000258] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000260] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000268] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000270] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000278] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000280] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000288] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000290] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000298] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000002a0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000002a8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000002b0] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000002b8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000002c0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000002c8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000002d0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002d8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000002e0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000002e8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000002f0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000002f8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000300] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000308] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000310] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000318] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000320] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000328] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000330] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000340] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000348] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x00000350] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000358] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000360] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000260] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000268] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000270] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000278] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000280] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000288] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000290] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000298] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000002a0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000002a8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000002b0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000002b8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000002c0] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000002c8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002d0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000002d8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000002e0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000002e8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000002f0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000002f8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000300] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000308] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000310] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000318] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000320] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000328] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000330] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000340] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000348] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000350] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000358] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000360] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000368] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000370] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 +++/* [0x00000378] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000380] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 +++/* [0x00000388] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000390] */ 0x0f9e7080, 0x100613e7, // asr.ifnz rb15, r0, r2 +++/* [0x00000398] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000003a0] */ 0x0f9e7080, 0x100613a7, // asr.ifnz rb14, r0, r2 +++/* [0x000003a8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000003b0] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x00000368] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000370] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000378] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000380] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000388] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000390] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000398] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003a0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003a8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000003b0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000003b8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000003c0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000003c8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000003d0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000003d8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000003e0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000003e8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000003f0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000003f8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000400] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000408] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000410] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000418] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000420] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000428] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000430] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000438] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000440] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000448] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000450] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000458] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000460] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000468] */ 0x0c567380, 0x10020867, // add r1, r1, ra21 ++-/* [0x00000470] */ 0xfffffed8, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000478] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 ++-/* [0x00000480] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000488] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000490] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000498] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000004a0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000004a8] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000004b0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000004b8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000004c0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x000004c8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000004d0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000003b8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000003c0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x000003c8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x000003d0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003d8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003e0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003e8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003f0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003f8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000400] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000408] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000410] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000418] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000420] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000428] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000430] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000438] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000440] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000448] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000450] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000458] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000460] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000468] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000470] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000478] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000480] */ 0x00000020, 0xe0021327, // mov rb12,32 +++/* [0x00000488] */ 0x00000006, 0xe0021367, // mov rb13,6 +++/* [0x00000490] */ 0x00000001, 0xe00213a7, // mov rb14,1 +++/* [0x00000498] */ 0x00000000, 0xe00213e7, // mov rb15,0 +++/* [0x000004a0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000004a8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000004b0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000004b8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000004c0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000004c8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000004d0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000004d8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 +++/* [0x000004e0] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 +++/* [0x000004e8] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 +++/* [0x000004f0] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000004f8] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 +++/* [0x00000500] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000508] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000510] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000518] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000520] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000528] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000530] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000538] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000540] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000548] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000550] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x000004d8] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000004e0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000004e8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000004f0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000004f8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000500] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000508] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000510] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000518] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000520] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000528] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000530] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x00000538] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000540] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000548] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000550] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000558] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000560] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000568] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000570] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000578] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000580] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000588] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000590] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000598] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005a0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005a8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005b0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000005b8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005c0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005c8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000005d0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x000005d8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000005e0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000005e8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000558] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000560] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000568] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000570] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000578] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000580] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000588] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000590] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000598] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x000005a0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000005a8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x000005b0] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x000005b8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000005c0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005c8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000005d0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000005d8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000005e0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000005e8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000005f0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000005f8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000600] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000608] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000610] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000618] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000620] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000628] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000630] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000638] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000640] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000648] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000650] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000658] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000660] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000668] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000670] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000678] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x000005f0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000005f8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000600] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000608] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000610] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000618] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000620] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000628] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000630] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000638] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000640] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000648] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000650] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000658] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000660] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000668] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000670] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000678] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000680] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000688] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000690] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000698] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x000006a0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000006a8] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x000006b0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x000006b8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000006c0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000006c8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000006d0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000006d8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000006e0] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 ++-/* [0x000006e8] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x000006f0] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 ++-/* [0x000006f8] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000700] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000708] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000710] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000718] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000720] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000680] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000688] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000690] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000698] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000006a0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000006a8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000006b0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000006b8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000006c0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x000006c8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x000006d0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000006d8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000006e0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000006e8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000006f0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000006f8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000700] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000708] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000710] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000718] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000720] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000728] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000730] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000738] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000740] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000748] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000750] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000758] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000760] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000768] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000770] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 +++/* [0x00000778] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000780] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 +++/* [0x00000788] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000790] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000798] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000007a0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000007a8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000007b0] */ 0x009e7000, 0x100009e7, // nop ++ // ::mc_filter_uv_b ++-/* [0x00000728] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000730] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000738] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000740] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000748] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000750] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000758] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000760] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000768] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000770] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000778] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000780] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000788] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000790] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000798] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000007a0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000007a8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000007b0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000007b8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000007c0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000007c8] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x000007d0] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x000007d8] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x000007e0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000007e8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000007f0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000007f8] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000800] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000808] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000810] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000818] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000820] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000828] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000830] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000838] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000840] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x00000848] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000850] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000858] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000007b8] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000007c0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000007c8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000007d0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000007d8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000007e0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000007e8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000007f0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000007f8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 +++/* [0x00000800] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000808] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 +++/* [0x00000810] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000818] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000820] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000828] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000830] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000838] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000840] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000848] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000850] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000858] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000860] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000868] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000870] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000878] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000880] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000888] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000890] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000898] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008b0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000008b8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008c0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008c8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008d0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x000008d8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000008e0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000008e8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000008f0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008f8] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x00000860] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000868] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000870] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000878] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000880] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000888] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000890] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000898] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000008a0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000008a8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000008b0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000008b8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000008c0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000008c8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000008d0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000008d8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000008e0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000008e8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000008f0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000008f8] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000900] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000908] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000910] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000918] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000920] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000928] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000930] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000938] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000940] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000948] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000950] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x00000958] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000960] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x00000968] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x00000970] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000978] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000980] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000988] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000990] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000998] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000009a0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x000009a8] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x000009b0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000009b8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000009c0] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x000009c8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x000009d0] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000900] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000908] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 +++/* [0x00000910] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++/* [0x00000918] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000920] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000928] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000930] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000938] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000940] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 +++/* [0x00000948] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 +++/* [0x00000950] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000958] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000960] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000968] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000970] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000978] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000980] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000988] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000990] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000998] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x000009a0] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x000009a8] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000009b0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000009b8] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x000009c0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000009c8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000009d0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000009d8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000009e0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000009e8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000009f0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000009f8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000a00] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x00000a08] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000a10] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000a18] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000a20] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000a28] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000a30] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000a38] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a40] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000a48] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000a50] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a58] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000a60] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000a68] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a70] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x000009d8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x000009e0] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x000009e8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000009f0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x000009f8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a00] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a08] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000a10] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000a18] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a78] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a80] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a88] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a90] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a98] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000aa0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000aa8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000ab0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000ab8] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000a20] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a28] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a30] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a38] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a40] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a48] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a50] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a58] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a60] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a68] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a70] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a78] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000a80] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000a88] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000a90] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000ac0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000ac8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ad0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ad8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ae0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ae8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000af0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000af8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b00] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b08] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b10] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b18] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000b20] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000b28] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000b30] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 99927c4..cec9901 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -4,11 +4,11 @@ ++ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++-#define mc_filter_uv (rpi_shader + 148) ++-#define mc_filter_uv_b0 (rpi_shader + 310) ++-#define mc_filter_uv_b (rpi_shader + 458) ++-#define mc_exit (rpi_shader + 630) ++-#define mc_interrupt_exit8 (rpi_shader + 648) ++-#define mc_end (rpi_shader + 678) +++#define mc_filter_uv (rpi_shader + 152) +++#define mc_filter_uv_b0 (rpi_shader + 342) +++#define mc_filter_uv_b (rpi_shader + 494) +++#define mc_exit (rpi_shader + 670) +++#define mc_interrupt_exit8 (rpi_shader + 688) +++#define mc_end (rpi_shader + 718) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index d9ffcda..97c4c02 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -9,7 +9,12 @@ ++ # (ra15 isn't clamped to zero - this happens during the ++ # copy to ra14, and during its use in the vertical filter) ++ # ++-# rb8...rb15 eight vertical filter coefficients +++# rb8...rb11 eight vertical filter coefficients +++ +++# rb12 offset to add before shift +++# rb13 shift +++# rb14 weight (U on left, V on right) +++# rb15 offset (U on left, V on right) ++ # ++ # ra16 clipped(row start address+elem_num)&~3 ++ # ra17 per-channel shifts ++@@ -165,6 +170,9 @@ add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++ add t0s, r0, r1 ; mov ra_x2_base, r2 ++ add t0s, r2, r1 ++ +++mov rb12,unif # offset before shift +++mov rb13,unif # offset after shift +++ ++ # Dump padding words ++ mov r0, unif ++ ++@@ -231,11 +239,21 @@ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++ asr rb8, r0, rb23 ++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++mov r0, unif # U offset/weight +++asr rb15, r0, r2 # Compute offset from MSBs +++shl r0, r0, r2 +++asr rb14, r0, r2 # Compute weight from LSBs +++mov r0, unif # V offset/weight +++asr.ifnz rb15, r0, r2 +++shl r0, r0, r2 +++asr.ifnz rb14, r0, r2 +++ ++ # r2 is elem_num ++ # r3 is loop counter ++ ++ mov r5rep, -8 ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ ++ # retrieve texture results and pick out bytes ++ # then submit two more texture requests ++@@ -279,6 +297,11 @@ mov ra13, ra14 # Delay slot 1 ++ mov ra14, ra15 # Delay slot 2 ++ mov ra15, r0 # Delay slot 3 ++ +++mov rb12,32 +++mov rb13,6 +++mov rb14,1 +++mov rb15,0 +++ ++ # apply vertical filter and write to VPM ++ ++ nop ; mul24 r1, ra14, rb10 ++@@ -288,9 +311,11 @@ add r1, r1, r0 ; mul24 r0, ra15, rb11 ++ add r1, r1, r0 ; mov -, vw_wait ++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++ asr r1, r1, 14 ++-add r1, r1, ra21 +++nop ; mul24 r1, r1, rb14 +++add r1, r1, rb12 +++asr r1, r1, rb13 ++ brr.anyn -, r:uvloop ++-asr r1, r1, 6 # Delay 1 +++add r1, r1, rb15 # Delay 1 ++ min r1, r1, rb22 # Delay 2 ++ max vpm, r1, 0 # Delay 3 ++ ++@@ -364,6 +389,9 @@ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++ asr rb8, r0, rb23 ++ +++mov r0, unif # U offset/weight +++mov r0, unif # V offset/weight +++ ++ # r2 is elem_num ++ # r3 is loop counter ++ ++@@ -491,6 +519,9 @@ asr rb10, r0, rb23; mul24 r0, r0, ra22 ++ asr rb9, r0, rb23; mul24 r0, r0, ra22 ++ asr rb8, r0, rb23 ++ +++mov r0, unif # U offset/weight +++mov r0, unif # V offset/weight +++ ++ # r2 is elem_num ++ # r3 is loop counter ++ ++-- ++2.7.4 ++ ++ ++From 310d994ea39e29b41a6a013abc4d94e6b90487b2 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Tue, 19 May 2015 08:43:30 +0100 ++Subject: [PATCH 40/68] Improved ordering of tasks ++ ++--- ++ libavcodec/hevc.c | 8 ++++---- ++ 1 file changed, 4 insertions(+), 4 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 9668ef8..951e2d3 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2943,15 +2943,15 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ s->dblk_cmds[s->num_dblk_cmds][0] = x_ctb; ++ s->dblk_cmds[s->num_dblk_cmds++][1] = y_ctb; ++ if ( (((y_ctb + ctb_size)&63) == 0) && x_ctb + ctb_size >= s->ps.sps->width) { ++-#ifdef RPI_INTER_QPU ++- // Kick off inter prediction on QPUs ++- rpi_execute_inter_qpu(s); ++-#endif ++ // Transform all blocks ++ // printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); ++ rpi_execute_transform(s); ++ // Perform inter prediction ++ rpi_execute_inter_cmds(s); +++#ifdef RPI_INTER_QPU +++ // Kick off inter prediction on QPUs +++ rpi_execute_inter_qpu(s); +++#endif ++ // Wait for transform completion ++ vpu_wait(s->vpu_id); ++ ++-- ++2.7.4 ++ ++ ++From d6e1ce7898196e49e52a6223c12979b3d0014588 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 20 May 2015 19:58:19 +0100 ++Subject: [PATCH 41/68] Drafted Luma inter prediction ++ ++--- ++ libavcodec/rpi_shader.qasm | 594 ++++++++++++++++++++++++++++++++++++++++++--- ++ 1 file changed, 554 insertions(+), 40 deletions(-) ++ ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 97c4c02..9cfc0d9 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -2,7 +2,10 @@ ++ # ++ # ra0...ra7 eight horizontal filter coefficients ++ # ++-# rb1...rb7 seven shifted copies of the current unfiltered row +++# rb0 rx_shift2 +++# rb1 ra_y2_next +++# +++# rb4...rb7 ++ # ++ # ra8...ra15 eight filtered rows of context (rb15 == most recent) ++ # ++@@ -26,9 +29,9 @@ ++ # rb19 next ra16 ++ # ++ # ra20 1 ++-# ra21 32 +++# ra21 ra_21 ++ # ra22 256 ++-# ra23 8 +++# ra23 rx_shift2_next ++ # ++ # rb20 0xffffff00 ++ # rb21 vpm_setup for reading/writing 16bit results into VPM ++@@ -57,16 +60,23 @@ ++ .set rb_frame_width_minus_1, rb25 ++ .set rb_frame_height_minus_1, rb30 ++ .set rb_pitch, rb16 ++-.set ra_x_base, ra16 ++-.set rb_x_base_next, rb19 ++-.set ra_x2_base, ra24 ++-.set ra_x2_base_next, ra26 +++.set ra_x, ra16 +++.set ra_y2, ra21 +++.set ra_y2_next, rb1 +++ +++.set rb_x_next, rb19 +++.set rx_frame_base2_next, rb19 +++ +++.set ra_frame_base, ra24 +++.set ra_frame_base_next, ra26 ++ .set ra_xshift, ra17 ++ ++-.set ra_x2shift, ra25 ++ .set ra_u2v_ref_offset, ra25 +++.set ra_frame_base2, ra25 ++ ++ .set ra_xshift_next, ra19 +++.set rx_xshift2, rb0 +++.set rx_xshift2_next, ra23 ++ ++ .set ra_x2shift_next, ra27 ++ .set ra_u2v_dst_offset, ra27 ++@@ -83,11 +93,11 @@ ++ mov ra31, unif ++ ++ # Load first request location ++-add ra_x_base, unif, elem_num # Store x +++add ra_x, unif, elem_num # Store x ++ mov ra_y, unif # Store y ++-mov ra_x2_base, unif # Store frame u base +++mov ra_frame_base, unif # Store frame u base ++ nop ++-sub ra_u2v_ref_offset, unif, ra_x2_base # Store offset to add to move from u to v in reference frame +++sub ra_u2v_ref_offset, unif, ra_frame_base # Store offset to add to move from u to v in reference frame ++ ++ # Read image dimensions ++ sub rb25,unif,1 ++@@ -104,9 +114,7 @@ add rb24, r1, r0 ++ # load constants ++ ++ mov ra20, 1 ++-mov ra21, 32 ++ mov ra22, 256 ++-mov ra23, 8 ++ mov ra30, 64 ++ ++ mov rb20, 0xffffff00 ++@@ -156,18 +164,18 @@ mov r1, vpm_setup(0, 2, h16p(0, 0)) # 2 is stride - stride acts on ADDR which i ++ add rb21, r0, r1 ++ ++ # Compute base address for first and second access ++-mov r0, ra_x_base # Load x +++mov r0, ra_x # Load x ++ max r0, r0, 0; mov r1, ra_y # Load y ++-min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base # Load the frame base +++min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_frame_base # Load the frame base ++ shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++ add ra_y, r1, 1 ++ add r0, r0, r3 ++ and r0, r0, ~3 ++-max r1, r1, 0 ; mov ra_x_base, r0 # y +++max r1, r1, 0 ; mov ra_x, r0 # y ++ min r1, r1, rb_frame_height_minus_1 ++ # submit texture requests for first line ++ add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-add t0s, r0, r1 ; mov ra_x2_base, r2 +++add t0s, r0, r1 ; mov ra_frame_base, r2 ++ add t0s, r2, r1 ++ ++ mov rb12,unif # offset before shift ++@@ -182,8 +190,8 @@ min r1, r1, rb_frame_height_minus_1 ++ add ra_y, ra_y, 1 ++ bra -, ra31 ++ nop ; mul24 r1, r1, rb_pitch ++-add t0s, r1, ra_x_base ++-add t0s, r1, ra_x2_base +++add t0s, r1, ra_x +++add t0s, r1, ra_frame_base ++ ++ ++ ++@@ -192,7 +200,7 @@ add t0s, r1, ra_x2_base ++ # mc_filter_uv(next_kernel, x, y, frame_u_base, frame_v_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_u_dst, this_v_dst) ++ ++ # At this point we have already issued two pairs of texture requests for the current block ++-# ra_x_base, ra_x16_base point to the current coordinates for this block +++# ra_x, ra_x16_base point to the current coordinates for this block ++ ::mc_filter_uv ++ mov ra31, unif ++ ++@@ -207,9 +215,9 @@ min r0, r0, rb_frame_width_minus_1 ; mov r3, unif # frame_base ++ shl ra_xshift_next, r0, 3 ++ sub r2, unif, r3 # compute offset from frame base u to frame base v ++ add r0, r0, r3 ++-and rb_x_base_next, r0, ~3 +++and rb_x_next, r0, ~3 ++ mov ra_y_next, r1 ++-add ra_x2_base_next, rb_x_base_next, r2 +++add ra_frame_base_next, rb_x_next, r2 ++ ++ # set up VPM write ++ mov vw_setup, rb28 ++@@ -265,16 +273,16 @@ mov r3, 0 ++ # then submit two more texture requests ++ ++ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++ shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++ ++ max r2, ra_y, 0 # y ++ min r2, r2, rb_frame_height_minus_1 ++ add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_x2_base, r2 +++add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_frame_base, r2 ++ ++ # generate seven shifted versions ++ # interleave with scroll of vertical context ++@@ -297,7 +305,7 @@ mov ra13, ra14 # Delay slot 1 ++ mov ra14, ra15 # Delay slot 2 ++ mov ra15, r0 # Delay slot 3 ++ ++-mov rb12,32 +++mov rb12,32 # TODO remove these to make P weighted prediction work properly ++ mov rb13,6 ++ mov rb14,1 ++ mov rb15,0 ++@@ -342,7 +350,7 @@ mov vw_addr, unif # start the VDW ++ # mc_filter_uv_b0(next_kernel, x, y, frame_u_base, frame_v_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_u_dst, this_v_dst) ++ ++ # At this point we have already issued two pairs of texture requests for the current block ++-# ra_x_base, ra_x16_base point to the current coordinates for this block +++# ra_x, ra_x16_base point to the current coordinates for this block ++ ::mc_filter_uv_b0 ++ mov ra31, unif ++ ++@@ -357,9 +365,9 @@ min r0, r0, rb_frame_width_minus_1 ; mov r3, unif # frame_base ++ shl ra_xshift_next, r0, 3 ++ sub r2, unif, r3 # compute offset from frame base u to frame base v ++ add r0, r0, r3 ++-and rb_x_base_next, r0, ~3 +++and rb_x_next, r0, ~3 ++ mov ra_y_next, r1 ++-add ra_x2_base_next, rb_x_base_next, r2 +++add ra_frame_base_next, rb_x_next, r2 ++ ++ # set up VPM write, we need to save 16bit precision ++ mov vw_setup, rb21 ++@@ -408,16 +416,16 @@ mov r3, 0 ++ # then submit two more texture requests ++ ++ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++ shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++ ++ max r2, ra_y, 0 # y ++ min r2, r2, rb_frame_height_minus_1 ++ add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_x2_base, r2 +++add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_frame_base, r2 ++ ++ # generate seven shifted versions ++ # interleave with scroll of vertical context ++@@ -477,9 +485,9 @@ min r0, r0, rb_frame_width_minus_1 ; mov r3, unif # frame_base ++ shl ra_xshift_next, r0, 3 ++ sub r2, unif, r3 # compute offset from frame base u to frame base v ++ add r0, r0, r3 ++-and rb_x_base_next, r0, ~3 +++and rb_x_next, r0, ~3 ++ mov ra_y_next, r1 ++-add ra_x2_base_next, rb_x_base_next, r2 +++add ra_frame_base_next, rb_x_next, r2 ++ ++ # set up VPM write ++ mov vw_setup, rb28 ++@@ -538,16 +546,16 @@ mov r3, 0 ++ # then submit two more texture requests ++ ++ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 +++shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++ shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++ ++ max r2, ra_y, 0 # y ++ min r2, r2, rb_frame_height_minus_1 ++ add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_x2_base, r2 +++add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++add t0s, ra_frame_base, r2 ++ ++ # generate seven shifted versions ++ # interleave with scroll of vertical context ++@@ -642,5 +650,511 @@ nop ; nop ; thrend ++ mov interrupt, 1; nop # delay slot 1 ++ nop ; nop # delay slot 2 ++ +++ +++ +++ +++ +++# LUMA CODE +++ +++# The idea is to form B predictions by doing 8 pixels from ref0 in parallel with 8 pixels from ref1. +++# For P frames we make the second x,y coordinates offset by +8 +++ +++################################################################################ +++# mc_setup(next_kernel, x, y, ref_y_base, x2, y2, ref_y2_base, frame_width, frame_height, pitch, dst_pitch, offset, shift, pad2) +++::mc_setup +++ +++# Read starting kernel +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++mov ra31, unif +++ +++# Compute base address for first and second access +++add r0, unif, elem_num # Load x +++max r0, r0, 0; mov r1, unif # Load y +++min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base +++shl ra_xshift_next, r0, 3 # Compute shifts +++add ra_y, r1, 1 +++and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++add r2, r2, r0 # r2 is address for frame0 (not including y offset) +++max r1, r1, 0 +++min r1, r1, rb_frame_height_minus_1 +++nop ; mul24 r1, r1, rb_pitch # r2 contains the addresses (not including y offset) for frame0 +++add t0s, r2, r1 ; mov ra_frame_base, r2 +++ +++add r0, unif, elem_num # Load x +++max r0, r0, 0; mov r1, unif # Load y +++min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base +++shl rx_xshift2_next, r0, 3 # Compute shifts +++add ra_y2, r1, 1 +++and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++add r2, r2, r0 # r2 is address for frame1 (not including y offset) +++max r1, r1, 0 +++min r1, r1, rb_frame_height_minus_1 +++nop ; mul24 r1, r1, rb_pitch # r2 contains the addresses (not including y offset) for frame0 +++add t0s, r2, r1 ; mov ra_frame_base2, r2 +++ +++ +++# Read image dimensions +++sub rb25,unif,1 +++sub rb30,unif,1 +++ +++# get source pitch +++mov rb16, unif +++ +++# get destination pitch +++mov r0, unif +++mov r1, vdw_setup_1(0) +++add rb24, r1, r0 +++ +++# load constants +++ +++mov ra20, 1 +++mov ra22, 256 +++mov ra30, 64 +++ +++mov rb20, 0xffffff00 +++mov rb22, 255 +++mov rb23, 24 +++ +++# touch vertical context to keep simulator happy +++ +++mov ra8, 0 +++mov ra9, 0 +++mov ra10, 0 +++mov ra11, 0 +++mov ra12, 0 +++mov ra13, 0 +++mov ra14, 0 +++mov ra15, 0 +++ +++# Compute part of VPM to use for DMA output +++mov r2, qpu_num +++mov r1, r2 +++asr r1, r1, 2 +++shl r1, r1, 6 +++mov r0, r2 +++and r0, r0, 3 +++add r0, r0, r1 +++mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later +++shl r0, r0, 5 +++add rb27, r0, r1 +++ +++# Compute part of VPM to save data into +++mov r2, qpu_num # qpu_num = abcd +++mov r1, r2 +++asr r1, r1, 2 +++shl r1, r1, 6 +++mov r0, r2 +++and r0, r0, 3 +++add r0, r0, r1 +++mov r1, vpm_setup(0, 4, h8p(0, 0)) # 4 is stride - stride acts on ADDR which is Y[5:0],B[1:0] for 8 bit +++add rb28, r0, r1 +++ +++mov rb12,unif # offset before shift +++mov rb13,unif # shift +++ +++# Dump padding words +++mov r0, unif +++ +++# submit texture requests for second line +++max r1, ra_y, 0 +++min r1, r1, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 +++nop ; mul24 r1, r1, rb_pitch +++add t0s, r1, ra_frame_base +++ +++max r1, ra_y2, 0 +++min r1, r1, rb_frame_height_minus_1 +++bra -, ra31 +++add ra_y2, ra_y2, 1 # Delay 1 +++nop ; mul24 r1, r1, rb_pitch # Delay 2 +++add t0s, r1, ra_frame_base2 # Delay 3 +++ +++ +++################################################################################ +++ +++# mc_filter(next_kernel, x, y, frame_base, x2, y2, frame_base2, height, hcoeffs[0], hcoeffs2[0], hcoeffs[1], hcoeffs2[1], vcoeffs[0], vcoeffs2[0], vcoeffs[1], vcoeffs2[1], offsetweight0, offsetweight1, this_dst) +++# In a P block, only the first half of coefficients contain used information. +++# At this point we have already issued two pairs of texture requests for the current block +++# ra_x, ra_x16_base point to the current coordinates for this block +++::mc_filter +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++mov ra31, unif +++ +++# per-channel shifts were calculated on the *previous* invocation +++ +++mov ra_xshift, ra_xshift_next +++mov rx_xshift2, rx_xshift2_next +++ +++# get base addresses and per-channel shifts for *next* invocation +++add r0, unif, elem_num # Load x +++max r0, r0, 0; mov r1, unif # Load y +++min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base +++shl ra_xshift_next, r0, 3 # Compute shifts +++mov ra_y_next, r1 +++and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++add ra_frame_base_next, r2, r0 # r2 is address for frame0 (not including y offset) +++ +++add r0, unif, elem_num # Load x +++max r0, r0, 0 ; mov r1, unif # Load y +++min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base +++shl rx_xshift2_next, r0, 3 # Compute shifts +++add ra_y2_next, r1, 1 +++and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++add rx_frame_base2_next, r2, r0 # r2 is address for frame1 (not including y offset) +++ +++ +++# set up VPM write +++mov vw_setup, rb28 +++ +++# get width,height of block +++mov r2, 16 +++mov r0, unif +++shr r1, r0, r2 # Extract width +++sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++and r0, r0, rb22 # Extract height +++add rb17, r0, 5 +++add rb18, r0, 7 +++shl r0, r0, 7 +++add r0, r0, r1 # Combine width and height of destination area +++shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register +++add rb26, r0, rb27 +++ +++# get filter coefficients and discard unused B frame values +++mov r0, unif +++mov.ifnz -, unif # Alternate coefficients are unused for P frames +++asr ra3, r0, rb23; mul24 r0, r0, ra22 # These may need some pre-rotation to be used in B frames correctly +++asr ra2, r0, rb23; mul24 r0, r0, ra22 +++asr ra1, r0, rb23; mul24 r0, r0, ra22 +++asr ra0, r0, rb23; mov r0, unif +++mov.ifnz -, unif +++asr ra7, r0, rb23; mul24 r0, r0, ra22 +++asr ra6, r0, rb23; mul24 r0, r0, ra22 +++asr ra5, r0, rb23; mul24 r0, r0, ra22 +++asr ra4, r0, rb23; mov r0, unif +++mov.ifnz -, unif +++asr rb11, r0, rb23; mul24 r0, r0, ra22 +++asr rb10, r0, rb23; mul24 r0, r0, ra22 +++asr rb9, r0, rb23; mul24 r0, r0, ra22 +++asr rb8, r0, rb23; mov r0, unif +++mov.ifnz -, unif +++asr rb7, r0, rb23; mul24 r0, r0, ra22 +++asr rb6, r0, rb23; mul24 r0, r0, ra22 +++asr rb5, r0, rb23; mul24 r0, r0, ra22 +++asr rb4, r0, rb23 +++ +++mov r0, unif # Frame0 offset/weight +++mov.ifnz -, unif # Frame1 offset/weight unused +++asr rb15, r0, r2 # Compute offset from MSBs +++shl r0, r0, r2 +++asr rb14, r0, r2 # Compute weight from LSBs +++ +++# r3 is loop counter +++ +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++mov r3, 0 +++ +++:yloop +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++# If we knew there was no clipping then this code would get simpler. +++# Perhaps we could add on the pitch and clip using larger values? +++ +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 +++mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++shr r1, r4, rx_xshift2 +++mov.ifz ra_y2, ra_y2_next +++ +++max r2, ra_y, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++ +++max r2, ra_y2, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 +++ +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++# apply horizontal filter +++nop ; mul24 r2, r0, ra0 +++nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++nop ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++add r0, r2, r3 ; mov r3, rb31 +++sub.setf -, r3, 8 ; mov ra12, ra13 +++mov ra9, ra10 +++mov ra10, ra11 +++mov ra11, ra12 +++mov ra12, ra13 +++brr.anyn -, r:yloop +++mov ra13, ra14 # Delay slot 1 +++mov ra14, ra15 # Delay slot 2 +++mov ra15, r0 # Delay slot 3 +++ +++# apply vertical filter and write to VPM +++ +++nop ; mul24 r1, ra14, rb10 +++nop ; mul24 r0, ra13, rb9 +++add r1, r1, r0 ; mul24 r0, ra12, rb8 +++add r1, r1, r0 ; mul24 r0, ra15, rb11 +++add r1, r1, r0 ; mul24 r0, ra8, rb4 +++add r1, r1, r0 ; mul24 r0, ra9, rb5 +++add r1, r1, r0 ; mul24 r0, ra10, rb6 +++add r1, r1, r0 ; mul24 r0, ra11, rb7 +++ +++add r1, r1, r0 ; mov -, vw_wait +++sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++asr r1, r1, 14 +++nop ; mul24 r1, r1, rb14 +++add r1, r1, rb12 +++asr r1, r1, rb13 +++brr.anyn -, r:yloop +++add r1, r1, rb15 # Delay 1 +++min r1, r1, rb22 # Delay 2 +++max vpm, r1, 0 # Delay 3 +++ +++# DMA out +++ +++bra -, ra31 +++mov vw_setup, rb26 # VDW setup 0 Delay 1 +++mov vw_setup, rb29 # Stride Delay 2 +++mov vw_addr, unif # start the VDW Delay 3 +++ +++ +++ +++################################################################################ +++ +++# mc_filter_b(next_kernel, x, y, frame_base, x2, y2, frame_base2, width_height, hcoeffs[0], hcoeffs2[0], hcoeffs[1], hcoeffs2[1], vcoeffs[0], vcoeffs2[0], vcoeffs[1], vcoeffs2[1], offsetweight0, offsetweight1, this_dst) +++# In a P block, only the first half of coefficients contain used information. +++# At this point we have already issued two pairs of texture requests for the current block +++# May be better to just send 16.16 motion vector and figure out the coefficients inside this block (only 4 cases so can compute hcoeffs in around 24 cycles?) +++# Can fill in the coefficients so only +++# Can also assume default weighted prediction for B frames. +++# Perhaps can unpack coefficients in a more efficient manner by doing H/V for a and b at the same time? +++# Or possibly by taking advantage of symmetry? +++# From 19->7 32bits per command. +++::mc_filter_b +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++mov ra31, unif +++ +++# per-channel shifts were calculated on the *previous* invocation +++ +++mov ra_xshift, ra_xshift_next +++mov rx_xshift2, rx_xshift2_next +++ +++# get base addresses and per-channel shifts for *next* invocation +++add r0, unif, elem_num # Load x +++max r0, r0, 0; mov r1, unif # Load y +++min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base +++shl ra_xshift_next, r0, 3 # Compute shifts +++mov ra_y_next, r1 +++and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++add ra_frame_base_next, r2, r0 # r2 is address for frame0 (not including y offset) +++ +++add r0, unif, elem_num # Load x +++max r0, r0, 0 ; mov r1, unif # Load y +++min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base +++shl rx_xshift2_next, r0, 3 # Compute shifts +++add ra_y2_next, r1, 1 +++and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++add rx_frame_base2_next, r2, r0 # r2 is address for frame1 (not including y offset) +++ +++ +++# set up VPM write +++mov vw_setup, rb28 +++ +++# get width,height of block +++mov r2, 16 +++mov r0, unif +++shr r1, r0, r2 # Extract width +++sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++and r0, r0, rb22 # Extract height +++add rb17, r0, 5 +++add rb18, r0, 7 +++shl r0, r0, 7 +++add r0, r0, r1 # Combine width and height of destination area +++shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register +++add rb26, r0, rb27 +++ +++# get filter coefficients and discard unused B frame values +++mov r0, unif +++mov r1, 1 +++mov.ifnz r0, unif # Alternate coefficients are unused for P frames +++nop ; mul24 r0, r0 << 13, r1 << 13 +++asr ra3, r0, rb23; mul24 r0, r0, ra22 +++nop ; mul24 r0, r0 << 14, r1 << 14 +++asr ra2, r0, rb23; mul24 r0, r0, ra22 +++nop ; mul24 r0, r0 << 15, r1 << 15 # Adjust such that a rotate of 1 will produce the values with first 8 on left, second 8 on right +++asr ra1, r0, rb23; mul24 r0, r0, ra22 +++asr ra0, r0, rb23; mov r0, unif +++mov.ifnz r0, unif +++nop ; mul24 r0, r0 << 9, r1 << 9 +++asr ra7, r0, rb23; mul24 r0, r0, ra22 +++nop ; mul24 r0, r0 << 10, r1 << 10 +++asr ra6, r0, rb23; mul24 r0, r0, ra22 +++nop ; mul24 r0, r0 << 11, r1 << 11 +++asr ra5, r0, rb23; mul24 r0, r0, ra22 +++nop ; mul24 r0, r0 << 12, r1 << 12 +++asr ra4, r0, rb23; mov r0, unif +++mov.ifnz r0, unif +++asr rb11, r0, rb23; mul24 r0, r0, ra22 +++asr rb10, r0, rb23; mul24 r0, r0, ra22 +++asr rb9, r0, rb23; mul24 r0, r0, ra22 +++asr rb8, r0, rb23; mov r0, unif +++mov.ifnz r0, unif +++asr rb7, r0, rb23; mul24 r0, r0, ra22 +++asr rb6, r0, rb23; mul24 r0, r0, ra22 +++asr rb5, r0, rb23; mul24 r0, r0, ra22 +++asr rb4, r0, rb23 +++ +++mov r0, unif # Frame0 offset/weight +++mov.ifnz r0, unif # Frame1 offset/weight unused +++asr rb15, r0, r2 # Compute offset from MSBs +++shl r0, r0, r2 +++asr rb14, r0, r2 # Compute weight from LSBs +++ +++# r3 is loop counter +++ +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++mov r3, 0 +++ +++:yloopb +++# retrieve texture results and pick out bytes +++# then submit two more texture requests +++ +++# If we knew there was no clipping then this code would get simpler. +++# Perhaps we could add on the pitch and clip using larger values? +++ +++sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 +++mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++shr r1, r4, rx_xshift2 +++mov.ifz ra_y2, ra_y2_next +++ +++max r2, ra_y, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte +++ +++max r2, ra_y2, 0 # y +++min r2, r2, rb_frame_height_minus_1 +++add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 +++ +++ +++# generate seven shifted versions +++# interleave with scroll of vertical context +++ +++mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ +++# apply horizontal filter +++nop ; mul24 r2, r0, ra0 +++nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++nop ; mul24 r3, ra1 << 1, r0 << 1 +++nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++add r0, r2, r3 ; mov r3, rb31 +++sub.setf -, r3, 8 ; mov ra12, ra13 +++mov ra9, ra10 +++mov ra10, ra11 +++mov ra11, ra12 +++mov ra12, ra13 +++brr.anyn -, r:yloopb +++mov ra13, ra14 # Delay slot 1 +++mov ra14, ra15 # Delay slot 2 +++mov ra15, r0 # Delay slot 3 +++ +++# apply vertical filter and write to VPM +++ +++nop ; mul24 r1, ra14, rb10 +++nop ; mul24 r0, ra13, rb9 +++add r1, r1, r0 ; mul24 r0, ra12, rb8 +++add r1, r1, r0 ; mul24 r0, ra15, rb11 +++add r1, r1, r0 ; mul24 r0, ra8, rb4 +++add r1, r1, r0 ; mul24 r0, ra9, rb5 +++add r1, r1, r0 ; mul24 r0, ra10, rb6 +++add r1, r1, r0 ; mul24 r0, ra11, rb7 +++ +++add r1, r1, r0 ; mov -, vw_wait +++sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++asr r1, r1, 14 +++nop ; mul24 r1, r1 << 8, ra20 << 8 # Rotate to align left and right halves +++add r1, r1, ra30 ; mul24 r0, r1, rb14 +++add r1, r1, r0 +++brr.anyn -, r:yloopb +++asr r1, r1, 7 # Delay 1 +++min r1, r1, rb22 # Delay 2 +++max vpm, r1, 0 # Delay 3 +++ +++# DMA out +++bra -, ra31 +++mov vw_setup, rb26 # VDW setup 0 Delay 1 +++mov vw_setup, rb29 # Stride Delay 2 +++mov vw_addr, unif # start the VDW Delay 3 +++ +++################################################################################ +++ +++# mc_interrupt_exit12() +++::mc_interrupt_exit12 +++mov -, vw_wait # wait on the VDW +++ +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ldtmu0 +++ +++mov -,sacq(0) # 1 +++mov -,sacq(0) # 2 +++mov -,sacq(0) # 3 +++mov -,sacq(0) # 4 +++mov -,sacq(0) # 5 +++mov -,sacq(0) # 6 +++mov -,sacq(0) # 7 +++mov -,sacq(0) # 8 +++mov -,sacq(0) # 9 +++mov -,sacq(0) # 10 +++mov -,sacq(0) # 11 +++ +++nop ; nop ; thrend +++mov interrupt, 1; nop # delay slot 1 +++nop ; nop # delay slot 2 +++ +++ ++ ::mc_end ++ # Do not add code here because mc_end must appear after all other code. ++-- ++2.7.4 ++ ++ ++From f2ffe4186fa49cb27579953c276b51728a08a8b5 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 20 May 2015 19:58:30 +0100 ++Subject: [PATCH 42/68] Added support for fast cache flush in deblocker ++ ++--- ++ libavcodec/hevc_filter.c | 44 +- ++ libavcodec/rpi_qpu.c | 6 + ++ libavcodec/rpi_qpu.h | 2 + ++ libavcodec/rpi_shader.c | 1028 +++++++++++++++++++++++++++++--------------- ++ libavcodec/rpi_shader.h | 16 +- ++ libavcodec/rpi_user_vcsm.h | 22 + ++ 6 files changed, 768 insertions(+), 350 deletions(-) ++ ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 92a8271..186317a 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -37,6 +37,11 @@ ++ ++ #include "bit_depth_template.c" ++ +++#ifdef RPI +++#include "rpi_user_vcsm.h" +++#include "rpi_qpu.h" +++#endif +++ ++ #define LUMA 0 ++ #define CB 1 ++ #define CR 2 ++@@ -872,15 +877,46 @@ static void flush_buffer(AVBufferRef *bref) { ++ gpu_cache_flush(p); ++ } ++ ++-static void ff_hevc_flush_chroma(HEVCContext *s) +++// Return Physical address for this image +++static int ff_hevc_buf_base(AVBufferRef *bref) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); +++ return p->vc & 0x3fffffff; +++} +++ +++static void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n) ++ { ++ if (s->enable_rpi && !( s->nal_unit_type == NAL_TRAIL_N || ++ s->nal_unit_type == NAL_TSA_N || ++ s->nal_unit_type == NAL_STSA_N || ++ s->nal_unit_type == NAL_RADL_N || ++ s->nal_unit_type == NAL_RASL_N )) { +++#define RPI_FAST_CACHEFLUSH +++#ifdef RPI_FAST_CACHEFLUSH +++ struct vcsm_user_clean_invalid_s iocache = {}; +++ int curr_y = f->progress->data[0]; +++ int sz,base; +++ if (curr_y < 0) curr_y = 0; +++ if (n<=curr_y) return; // Should not happen +++ sz = s->frame->linesize[1] * (n-curr_y); +++ base = s->frame->linesize[1] * curr_y; +++ iocache.s[0].cmd = 3; // Flush L1 cache +++ iocache.s[0].addr = 0; +++ iocache.s[0].size = 0; +++ +++ iocache.s[1].cmd = 2; +++ iocache.s[1].addr = ff_hevc_buf_base(s->frame->buf[1]) + base; +++ iocache.s[1].size = sz; +++ +++ iocache.s[2].cmd = 2; +++ iocache.s[2].addr = ff_hevc_buf_base(s->frame->buf[2]) + base; +++ iocache.s[2].size = sz; +++ +++ vcsm_clean_invalid( gpu_get_mailbox(), &iocache ); +++ +++#else ++ flush_buffer(s->frame->buf[1]); ++ flush_buffer(s->frame->buf[2]); +++#endif ++ //memcpy(s->dummy.arm,s->frame->data[0],2048*64); ++ //memcpy(s->dummy.arm,s->frame->data[1],1024*32); ++ //memcpy(s->dummy.arm,s->frame->data[2],1024*32); ++@@ -903,7 +939,7 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ sao_filter_CTB(s, x, y - ctb_size); ++ if (s->threads_type & FF_THREAD_FRAME ) { ++ #ifdef RPI_INTER_QPU ++- ff_hevc_flush_chroma(s); +++ ff_hevc_flush_chroma(s,&s->ref->tf, y); ++ #endif ++ ff_thread_report_progress(&s->ref->tf, y, 0); ++ } ++@@ -912,7 +948,7 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ sao_filter_CTB(s, x , y); ++ if (s->threads_type & FF_THREAD_FRAME ) { ++ #ifdef RPI_INTER_QPU ++- ff_hevc_flush_chroma(s); +++ ff_hevc_flush_chroma(s, &s->ref->tf, y + ctb_size); ++ #endif ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size, 0); ++ } ++@@ -922,7 +958,7 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ //int currh = s->ref->tf.progress->data[0]; ++ //if (((y + ctb_size)&63)==0) ++ #ifdef RPI_INTER_QPU ++- ff_hevc_flush_chroma(s); +++ ff_hevc_flush_chroma(s, &s->ref->tf, y + ctb_size - 4); ++ #endif ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); ++ } ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index f62051f..fd8a276 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -237,6 +237,12 @@ int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p) ++ return r; ++ } ++ +++int gpu_get_mailbox(void) +++{ +++ assert(gpu); +++ return gpu->mb; +++} +++ ++ void gpu_cache_flush(GPU_MEM_PTR_T *p) ++ { ++ void *tmp = vcsm_lock(p->vcsm_handle); ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 543c84b..88965e5 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -49,4 +49,6 @@ extern int rpi_test_shader(void); ++ extern void rpi_do_block(const unsigned char *in_buffer_vc, int src_pitch, unsigned char *dst_vc, int dst_pitch, unsigned char *dst); ++ extern void rpi_do_block_arm(const unsigned char *in_buffer, int src_pitch, unsigned char *dst, int dst_pitch); ++ +++extern int gpu_get_mailbox(void); +++ ++ #endif ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 3f04d80..9c30e32 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -23,11 +23,11 @@ __attribute__((aligned(8))) ++ unsigned int rpi_shader[] = { ++ // ::mc_setup_uv ++ /* [0x00000000] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000008] */ 0x0c9a0f80, 0x10020427, // add ra_x_base, unif, elem_num +++/* [0x00000008] */ 0x0c9a0f80, 0x10020427, // add ra_x, unif, elem_num ++ /* [0x00000010] */ 0x15827d80, 0x10020767, // mov ra_y, unif ++-/* [0x00000018] */ 0x15827d80, 0x10020627, // mov ra_x2_base, unif +++/* [0x00000018] */ 0x15827d80, 0x10020627, // mov ra_frame_base, unif ++ /* [0x00000020] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000028] */ 0x0d620f80, 0x10020667, // sub ra_u2v_ref_offset, unif, ra_x2_base +++/* [0x00000028] */ 0x0d620f80, 0x10020667, // sub ra_u2v_ref_offset, unif, ra_frame_base ++ /* [0x00000030] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 ++ /* [0x00000038] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 ++ /* [0x00000040] */ 0x15827d80, 0x10021427, // mov rb16, unif ++@@ -35,360 +35,708 @@ unsigned int rpi_shader[] = { ++ /* [0x00000050] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) ++ /* [0x00000058] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 ++ /* [0x00000060] */ 0x00000001, 0xe0020527, // mov ra20, 1 ++-/* [0x00000068] */ 0x00000020, 0xe0020567, // mov ra21, 32 ++-/* [0x00000070] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++-/* [0x00000078] */ 0x00000008, 0xe00205e7, // mov ra23, 8 ++-/* [0x00000080] */ 0x00000040, 0xe00207a7, // mov ra30, 64 ++-/* [0x00000088] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++-/* [0x00000090] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 ++-/* [0x00000098] */ 0x00000018, 0xe00215e7, // mov rb23, 24 ++-/* [0x000000a0] */ 0x00000000, 0xe0020227, // mov ra8, 0 ++-/* [0x000000a8] */ 0x00000000, 0xe0020267, // mov ra9, 0 ++-/* [0x000000b0] */ 0x00000000, 0xe00202a7, // mov ra10, 0 ++-/* [0x000000b8] */ 0x00000000, 0xe00202e7, // mov ra11, 0 ++-/* [0x000000c0] */ 0x00000000, 0xe0020327, // mov ra12, 0 ++-/* [0x000000c8] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++-/* [0x000000d0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++-/* [0x000000d8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x000000e0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x000000e8] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 ++-/* [0x000000f0] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x000000f8] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000100] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000108] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000110] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000118] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000120] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000128] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++-/* [0x00000130] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++-/* [0x00000138] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00000140] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000148] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 ++-/* [0x00000150] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00000158] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000160] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000168] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000170] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000178] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000180] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000188] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x00000190] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x00000198] */ 0x0f9c11c0, 0xd0020827, // asr r0, r0, 1 ++-/* [0x000001a0] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) ++-/* [0x000001a8] */ 0x0c9e7040, 0x10021567, // add rb21, r0, r1 ++-/* [0x000001b0] */ 0x15427d80, 0x10020827, // mov r0, ra_x_base ++-/* [0x000001b8] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y ++-/* [0x000001c0] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_x2_base ++-/* [0x000001c8] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++-/* [0x000001d0] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x000001d8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000001e0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000001e8] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x_base, r0 ++-/* [0x000001f0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x000001f8] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-/* [0x00000200] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_x2_base, r2 ++-/* [0x00000208] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x00000210] */ 0x15827d80, 0x10021327, // mov rb12,unif ++-/* [0x00000218] */ 0x15827d80, 0x10021367, // mov rb13,unif ++-/* [0x00000220] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000228] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000230] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000238] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000240] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000248] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000250] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x_base ++-/* [0x00000258] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_x2_base +++/* [0x00000068] */ 0x00000100, 0xe00205a7, // mov ra22, 256 +++/* [0x00000070] */ 0x00000040, 0xe00207a7, // mov ra30, 64 +++/* [0x00000078] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x00000080] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x00000088] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x00000090] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x00000098] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x000000a0] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x000000a8] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x000000b0] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x000000b8] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x000000c0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x000000c8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x000000d0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x000000d8] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 +++/* [0x000000e0] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x000000e8] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x000000f0] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x000000f8] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000100] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000108] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000110] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000118] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00000120] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00000128] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00000130] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000138] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 +++/* [0x00000140] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00000148] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000150] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000158] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000160] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000168] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000170] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000178] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000180] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000188] */ 0x0f9c11c0, 0xd0020827, // asr r0, r0, 1 +++/* [0x00000190] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) +++/* [0x00000198] */ 0x0c9e7040, 0x10021567, // add rb21, r0, r1 +++/* [0x000001a0] */ 0x15427d80, 0x10020827, // mov r0, ra_x +++/* [0x000001a8] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x000001b0] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_frame_base +++/* [0x000001b8] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset +++/* [0x000001c0] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x000001c8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000001d0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000001d8] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x, r0 +++/* [0x000001e0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x000001e8] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++/* [0x000001f0] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_frame_base, r2 +++/* [0x000001f8] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 +++/* [0x00000200] */ 0x15827d80, 0x10021327, // mov rb12,unif +++/* [0x00000208] */ 0x15827d80, 0x10021367, // mov rb13,unif +++/* [0x00000210] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000218] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000220] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000228] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000230] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000238] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000240] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x +++/* [0x00000248] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base ++ // ::mc_filter_uv ++-/* [0x00000260] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000268] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000270] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000278] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000280] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000288] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000290] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000298] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000002a0] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000002a8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000002b0] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000002b8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000002c0] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000002c8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000002d0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000002d8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000002e0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002e8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000002f0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000002f8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000300] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000308] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000310] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000318] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000320] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000328] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000330] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000340] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000348] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000350] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000358] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x00000360] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000368] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000370] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 ++-/* [0x00000378] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000380] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 ++-/* [0x00000388] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000390] */ 0x0f9e7080, 0x100613e7, // asr.ifnz rb15, r0, r2 ++-/* [0x00000398] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000003a0] */ 0x0f9e7080, 0x100613a7, // asr.ifnz rb14, r0, r2 ++-/* [0x000003a8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000003b0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000250] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000258] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000260] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000268] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000270] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000278] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000280] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000288] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000290] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x00000298] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000002a0] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x000002a8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000002b0] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000002b8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002c0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000002c8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000002d0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000002d8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000002e0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000002e8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000002f0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000002f8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000300] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000308] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000310] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000318] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000320] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000328] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000330] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000340] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000348] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000350] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000358] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000360] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 +++/* [0x00000368] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000370] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 +++/* [0x00000378] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000380] */ 0x0f9e7080, 0x100613e7, // asr.ifnz rb15, r0, r2 +++/* [0x00000388] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000390] */ 0x0f9e7080, 0x100613a7, // asr.ifnz rb14, r0, r2 +++/* [0x00000398] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000003a0] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x000003b8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000003c0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x000003c8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x000003d0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003d8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003e0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003e8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003f0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003f8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000400] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000408] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000410] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000418] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000420] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000428] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000430] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000438] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000440] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000448] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000450] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000458] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000460] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000468] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000470] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000478] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000480] */ 0x00000020, 0xe0021327, // mov rb12,32 ++-/* [0x00000488] */ 0x00000006, 0xe0021367, // mov rb13,6 ++-/* [0x00000490] */ 0x00000001, 0xe00213a7, // mov rb14,1 ++-/* [0x00000498] */ 0x00000000, 0xe00213e7, // mov rb15,0 ++-/* [0x000004a0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000004a8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000004b0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000004b8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000004c0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000004c8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000004d0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000004d8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 ++-/* [0x000004e0] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 ++-/* [0x000004e8] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 ++-/* [0x000004f0] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004f8] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 ++-/* [0x00000500] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000508] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000510] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000518] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000520] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000528] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000530] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000538] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000540] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000548] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000550] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000003a8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000003b0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++/* [0x000003b8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x000003c0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003c8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003d0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003d8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003e0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003e8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x000003f0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 +++/* [0x000003f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000400] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000408] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000410] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000418] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000420] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000428] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000430] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000438] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000440] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000448] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000450] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000458] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000460] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000468] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000470] */ 0x00000020, 0xe0021327, // mov rb12,32 +++/* [0x00000478] */ 0x00000006, 0xe0021367, // mov rb13,6 +++/* [0x00000480] */ 0x00000001, 0xe00213a7, // mov rb14,1 +++/* [0x00000488] */ 0x00000000, 0xe00213e7, // mov rb15,0 +++/* [0x00000490] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000498] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000004a0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000004a8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000004b0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000004b8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000004c0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000004c8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 +++/* [0x000004d0] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 +++/* [0x000004d8] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 +++/* [0x000004e0] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000004e8] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 +++/* [0x000004f0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000004f8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000500] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000508] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000510] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000518] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000520] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000528] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000530] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000538] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000540] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x00000558] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000560] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000568] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000570] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000578] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000580] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000588] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000590] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000598] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x000005a0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000005a8] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x000005b0] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x000005b8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000005c0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005c8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000005d0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000005d8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000005e0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000005e8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000005f0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000005f8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000600] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000608] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000610] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000618] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000620] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000628] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000630] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000638] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000640] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000648] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000650] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x00000658] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000660] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000668] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000670] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000678] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000548] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000550] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000558] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000560] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000568] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000570] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000578] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000580] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000588] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x00000590] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000598] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x000005a0] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x000005a8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000005b0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005b8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000005c0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000005c8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000005d0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000005d8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000005e0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000005e8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000005f0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000005f8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000600] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000608] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000610] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000618] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000620] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000628] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000630] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000638] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000640] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000648] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000650] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000658] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000660] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000668] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000680] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000688] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000690] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000698] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000006a0] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000006a8] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000006b0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000006b8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000006c0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x000006c8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x000006d0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000006d8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000006e0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000006e8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000006f0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000006f8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000700] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000708] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000710] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000718] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000720] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000728] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000730] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000738] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000740] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000748] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000750] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000758] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000760] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000768] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000770] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 ++-/* [0x00000778] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000780] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 ++-/* [0x00000788] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000790] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000798] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x000007a0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000007a8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000007b0] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000670] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000678] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++/* [0x00000680] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00000688] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000690] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000698] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000006a0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000006a8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000006b0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x000006b8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 +++/* [0x000006c0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000006c8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000006d0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000006d8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000006e0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000006e8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000006f0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000006f8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000700] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000708] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000710] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000718] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000720] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000728] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000730] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000738] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000740] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000748] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000750] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000758] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000760] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 +++/* [0x00000768] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000770] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 +++/* [0x00000778] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000780] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000788] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000790] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000798] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000007a0] */ 0x009e7000, 0x100009e7, // nop ++ // ::mc_filter_uv_b ++-/* [0x000007b8] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000007c0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000007c8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000007d0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000007d8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000007e0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000007e8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000007f0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000007f8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_base_next, r0, ~3 ++-/* [0x00000800] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000808] */ 0x0c9d3e80, 0x100206a7, // add ra_x2_base_next, rb_x_base_next, r2 ++-/* [0x00000810] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000818] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000820] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000828] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000830] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000838] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000840] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000848] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000850] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000858] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000860] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000868] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000870] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000878] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000880] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000888] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000890] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000898] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008b0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000008b8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008c0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008c8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008d0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x000008d8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000008e0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000008e8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000008f0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000008f8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000007a8] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000007b0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000007b8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000007c0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000007c8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000007d0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000007d8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000007e0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000007e8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x000007f0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000007f8] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x00000800] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000808] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000810] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000818] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000820] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000828] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000830] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000838] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000840] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000848] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000850] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000858] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000860] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000868] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000870] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000878] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000880] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000888] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000890] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000898] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000008a8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008b0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008b8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008c0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x000008c8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000008d0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000008d8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000008e0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008e8] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x00000900] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000908] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x_base, rb_x_base_next ; ldtmu0 ++-/* [0x00000910] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_x2_base, ra_x2_base_next ; mov rb31, r3 ++-/* [0x00000918] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000920] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000928] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000930] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000938] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000940] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x_base, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000948] */ 0x0c627c80, 0x10020e27, // add t0s, ra_x2_base, r2 ++-/* [0x00000950] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000958] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000960] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000968] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000970] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000978] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000980] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000988] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000990] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000998] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x000009a0] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x000009a8] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000009b0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000009b8] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x000009c0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x000009c8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000009d0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000009d8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000009e0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000009e8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000009f0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000009f8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x00000a00] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x00000a08] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x00000a10] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000a18] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000a20] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000a28] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000a30] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000a38] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a40] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000a48] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000a50] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a58] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000a60] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000a68] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a70] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008f0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000008f8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++/* [0x00000900] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00000908] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000910] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000918] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000920] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000928] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000930] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x00000938] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 +++/* [0x00000940] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000948] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000950] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000958] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000960] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000968] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000970] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000978] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000980] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000988] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000990] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000998] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000009a0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000009a8] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x000009b0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000009b8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000009c0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000009c8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000009d0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000009d8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000009e0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000009e8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000009f0] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x000009f8] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000a00] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000a08] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000a10] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000a18] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000a20] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000a28] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a30] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000a38] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000a40] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a48] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000a50] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000a58] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a60] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000a78] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a80] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a68] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a70] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a78] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a80] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000a88] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000a90] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a98] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000aa0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000aa8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000ab0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000ab8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a98] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000aa0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000aa8] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000ac0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000ab0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000ab8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ac0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000ac8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000ad0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ad8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ae0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ad8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ae0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000ae8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000af0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000af8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000b00] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000b08] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b10] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b18] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b20] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000b28] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000b30] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000b10] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000b18] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000b20] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_setup +++/* [0x00000b28] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000b30] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000b38] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000b40] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000b48] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000b50] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000b58] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x00000b60] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000b68] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 +++/* [0x00000b70] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000b78] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000b80] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000b88] */ 0x8c9e7452, 0x10025e18, // add t0s, r2, r1 ; mov ra_frame_base, r2 +++/* [0x00000b90] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000b98] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000ba0] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000ba8] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 +++/* [0x00000bb0] */ 0x0c9c13c0, 0xd0020567, // add ra_y2, r1, 1 +++/* [0x00000bb8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000bc0] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 +++/* [0x00000bc8] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000bd0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000bd8] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000be0] */ 0x8c9e7452, 0x10025e19, // add t0s, r2, r1 ; mov ra_frame_base2, r2 +++/* [0x00000be8] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 +++/* [0x00000bf0] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 +++/* [0x00000bf8] */ 0x15827d80, 0x10021427, // mov rb16, unif +++/* [0x00000c00] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000c08] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) +++/* [0x00000c10] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 +++/* [0x00000c18] */ 0x00000001, 0xe0020527, // mov ra20, 1 +++/* [0x00000c20] */ 0x00000100, 0xe00205a7, // mov ra22, 256 +++/* [0x00000c28] */ 0x00000040, 0xe00207a7, // mov ra30, 64 +++/* [0x00000c30] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x00000c38] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x00000c40] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x00000c48] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x00000c50] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x00000c58] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x00000c60] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x00000c68] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x00000c70] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x00000c78] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x00000c80] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x00000c88] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000c90] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000c98] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000ca0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000ca8] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000cb0] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000cb8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000cc0] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00000cc8] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00000cd0] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00000cd8] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000ce0] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000ce8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000cf0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000cf8] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000d00] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000d08] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000d10] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000d18] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000d20] */ 0x15827d80, 0x10021327, // mov rb12,unif +++/* [0x00000d28] */ 0x15827d80, 0x10021367, // mov rb13,unif +++/* [0x00000d30] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000d38] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000d40] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000d48] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000d50] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000d58] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base +++/* [0x00000d60] */ 0x13540dc0, 0xd0020867, // max r1, ra_y2, 0 +++/* [0x00000d68] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000d70] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000d78] */ 0x0c541dc0, 0xd0020567, // add ra_y2, ra_y2, 1 +++/* [0x00000d80] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000d88] */ 0x0c667380, 0x10020e27, // add t0s, r1, ra_frame_base2 +++// ::mc_filter +++/* [0x00000d90] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000d98] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000da0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000da8] */ 0x155e7d80, 0x10021027, // mov rx_xshift2, rx_xshift2_next +++/* [0x00000db0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000db8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000dc0] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000dc8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000dd0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000dd8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000de0] */ 0x0c9e7400, 0x100206a7, // add ra_frame_base_next, r2, r0 +++/* [0x00000de8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000df0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0 ; mov r1, unif +++/* [0x00000df8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000e00] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 +++/* [0x00000e08] */ 0x0c9c13c0, 0xd0021067, // add ra_y2_next, r1, 1 +++/* [0x00000e10] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000e18] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 +++/* [0x00000e20] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000e28] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000e30] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000e38] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000e40] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000e48] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000e50] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000e58] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000e60] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000e68] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000e70] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000e78] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000e80] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000e88] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000e90] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000e98] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ea0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ea8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000eb0] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000eb8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ec0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ec8] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ed0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000ed8] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000ee0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ee8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ef0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ef8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000f00] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000f08] */ 0x4f5971c6, 0x100251e0, // asr rb7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f10] */ 0x4f5971c6, 0x100251a0, // asr rb6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f18] */ 0x4f5971c6, 0x10025160, // asr rb5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f20] */ 0x0f9d71c0, 0x10021127, // asr rb4, r0, rb23 +++/* [0x00000f28] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000f30] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000f38] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 +++/* [0x00000f40] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000f48] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 +++/* [0x00000f50] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :yloop +++/* [0x00000f58] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000f60] */ 0x8e4539bf, 0xa0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 +++/* [0x00000f68] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00000f70] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000f78] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 +++/* [0x00000f80] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next +++/* [0x00000f88] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000f90] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000f98] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000fa0] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 +++/* [0x00000fa8] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 +++/* [0x00000fb0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000fb8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++/* [0x00000fc0] */ 0xec654c87, 0x10024e20, // add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 +++/* [0x00000fc8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000fd0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000fd8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000fe0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000fe8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000ff0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000ff8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00001000] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00001008] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00001010] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00001018] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00001020] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00001028] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00001030] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00001038] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00001040] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00001048] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00001050] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00001058] */ 0x8d3487f6, 0xd00279cc, // sub.setf -, r3, 8 ; mov ra12, ra13 +++/* [0x00001060] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00001068] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00001070] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00001078] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00001080] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloop +++/* [0x00001088] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00001090] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00001098] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000010a0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000010a8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000010b0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000010b8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000010c0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 +++/* [0x000010c8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 +++/* [0x000010d0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 +++/* [0x000010d8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 +++/* [0x000010e0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000010e8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000010f0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000010f8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 +++/* [0x00001100] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 +++/* [0x00001108] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 +++/* [0x00001110] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloop +++/* [0x00001118] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 +++/* [0x00001120] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00001128] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00001130] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001138] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00001140] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001148] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_filter_b +++/* [0x00001150] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00001158] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00001160] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00001168] */ 0x155e7d80, 0x10021027, // mov rx_xshift2, rx_xshift2_next +++/* [0x00001170] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00001178] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00001180] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00001188] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00001190] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00001198] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000011a0] */ 0x0c9e7400, 0x100206a7, // add ra_frame_base_next, r2, r0 +++/* [0x000011a8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000011b0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0 ; mov r1, unif +++/* [0x000011b8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x000011c0] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 +++/* [0x000011c8] */ 0x0c9c13c0, 0xd0021067, // add ra_y2_next, r1, 1 +++/* [0x000011d0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000011d8] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 +++/* [0x000011e0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000011e8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000011f0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000011f8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00001200] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00001208] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00001210] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00001218] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00001220] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00001228] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00001230] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00001238] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00001240] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001248] */ 0x00000001, 0xe0020867, // mov r1, 1 +++/* [0x00001250] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x00001258] */ 0x409f3001, 0xd00049e0, // nop ; mul24 r0, r0 << 13, r1 << 13 +++/* [0x00001260] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001268] */ 0x409f2001, 0xd00049e0, // nop ; mul24 r0, r0 << 14, r1 << 14 +++/* [0x00001270] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001278] */ 0x409f1001, 0xd00049e0, // nop ; mul24 r0, r0 << 15, r1 << 15 +++/* [0x00001280] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001288] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00001290] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x00001298] */ 0x409f7001, 0xd00049e0, // nop ; mul24 r0, r0 << 9, r1 << 9 +++/* [0x000012a0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012a8] */ 0x409f6001, 0xd00049e0, // nop ; mul24 r0, r0 << 10, r1 << 10 +++/* [0x000012b0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012b8] */ 0x409f5001, 0xd00049e0, // nop ; mul24 r0, r0 << 11, r1 << 11 +++/* [0x000012c0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012c8] */ 0x409f4001, 0xd00049e0, // nop ; mul24 r0, r0 << 12, r1 << 12 +++/* [0x000012d0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000012d8] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x000012e0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012e8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012f0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012f8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00001300] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x00001308] */ 0x4f5971c6, 0x100251e0, // asr rb7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001310] */ 0x4f5971c6, 0x100251a0, // asr rb6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001318] */ 0x4f5971c6, 0x10025160, // asr rb5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001320] */ 0x0f9d71c0, 0x10021127, // asr rb4, r0, rb23 +++/* [0x00001328] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001330] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x00001338] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 +++/* [0x00001340] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00001348] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 +++/* [0x00001350] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++// :yloopb +++/* [0x00001358] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00001360] */ 0x8e4539bf, 0xa0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 +++/* [0x00001368] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00001370] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00001378] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 +++/* [0x00001380] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next +++/* [0x00001388] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00001390] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00001398] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000013a0] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 +++/* [0x000013a8] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 +++/* [0x000013b0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000013b8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++/* [0x000013c0] */ 0xec654c87, 0x10024e20, // add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 +++/* [0x000013c8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000013d0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000013d8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000013e0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000013e8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000013f0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000013f8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00001400] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00001408] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00001410] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00001418] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00001420] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00001428] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00001430] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00001438] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00001440] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00001448] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00001450] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00001458] */ 0x8d3487f6, 0xd00279cc, // sub.setf -, r3, 8 ; mov ra12, ra13 +++/* [0x00001460] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00001468] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00001470] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00001478] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00001480] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloopb +++/* [0x00001488] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00001490] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00001498] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000014a0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000014a8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000014b0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000014b8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000014c0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 +++/* [0x000014c8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 +++/* [0x000014d0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 +++/* [0x000014d8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 +++/* [0x000014e0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000014e8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000014f0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000014f8] */ 0x4053800e, 0xd00049e1, // nop ; mul24 r1, r1 << 8, ra20 << 8 +++/* [0x00001500] */ 0x4c78e38f, 0x10024860, // add r1, r1, ra30 ; mul24 r0, r1, rb14 +++/* [0x00001508] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x00001510] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloopb +++/* [0x00001518] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00001520] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00001528] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00001530] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001538] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00001540] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001548] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++// ::mc_interrupt_exit12 +++/* [0x00001550] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00001558] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001560] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001568] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001570] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001578] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001580] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001588] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001590] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001598] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000015a0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000015a8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000015b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000015b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000015c0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000015c8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000015d0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000015d8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000015e0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index cec9901..3fa8531 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -4,11 +4,15 @@ ++ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++-#define mc_filter_uv (rpi_shader + 152) ++-#define mc_filter_uv_b0 (rpi_shader + 342) ++-#define mc_filter_uv_b (rpi_shader + 494) ++-#define mc_exit (rpi_shader + 670) ++-#define mc_interrupt_exit8 (rpi_shader + 688) ++-#define mc_end (rpi_shader + 718) +++#define mc_filter_uv (rpi_shader + 148) +++#define mc_filter_uv_b0 (rpi_shader + 338) +++#define mc_filter_uv_b (rpi_shader + 490) +++#define mc_exit (rpi_shader + 666) +++#define mc_interrupt_exit8 (rpi_shader + 684) +++#define mc_setup (rpi_shader + 714) +++#define mc_filter (rpi_shader + 868) +++#define mc_filter_b (rpi_shader + 1108) +++#define mc_interrupt_exit12 (rpi_shader + 1364) +++#define mc_end (rpi_shader + 1402) ++ ++ #endif ++diff --git a/libavcodec/rpi_user_vcsm.h b/libavcodec/rpi_user_vcsm.h ++index fbebbbe..95e6de1 100644 ++--- a/libavcodec/rpi_user_vcsm.h +++++ b/libavcodec/rpi_user_vcsm.h ++@@ -418,6 +418,28 @@ int vcsm_unlock_hdl( unsigned int handle ); ++ */ ++ int vcsm_unlock_hdl_sp( unsigned int handle, int cache_no_flush ); ++ +++/* Clean and/or invalidate the memory associated with this user opaque handle +++** +++** Returns: non-zero on error +++** +++** structure contains a list of flush/invalidate commands. Commands are: +++** 0: nop +++** 1: invalidate given physical range in L2 +++** 2: clean given physical range in L2 +++** 3: clean+invalidate all of L1 +++** 4: flush all of L2 and all of L1 +++*/ +++struct vcsm_user_clean_invalid_s { +++ struct { +++ unsigned int cmd; +++ unsigned int addr; +++ unsigned int size; +++ } s[8]; +++}; +++ +++int vcsm_clean_invalid( unsigned int handle, struct vcsm_user_clean_invalid_s *s ); +++ +++ ++ #ifdef __cplusplus ++ } ++ #endif ++-- ++2.7.4 ++ ++ ++From 09685ab55aecb9400e354522894e0fbbb6381ca9 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 20 May 2015 21:12:55 +0100 ++Subject: [PATCH 43/68] Added multi mailbox - not working ++ ++--- ++ libavcodec/hevc.c | 40 ++++++++++++++++++++++++++++--- ++ libavcodec/rpi_mailbox.c | 47 +++++++++++++++++++++++++++++++++++++ ++ libavcodec/rpi_mailbox.h | 5 ++++ ++ libavcodec/rpi_qpu.c | 61 ++++++++++++++++++++++++++++++++++++++++++++---- ++ libavcodec/rpi_qpu.h | 2 ++ ++ 5 files changed, 147 insertions(+), 8 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 951e2d3..ab63efd 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -47,6 +47,11 @@ ++ //#define EARLY_MALLOC ++ // Move Inter prediction into separate pass ++ #define RPI_INTER +++ +++ #ifdef RPI_INTER_QPU +++ // Define RPI_MULTI_MAILBOX to use the updated mailbox that can launch both QPU and VPU +++ #define RPI_MULTI_MAILBOX +++ #endif ++ #endif ++ ++ // #define DISABLE_MC ++@@ -2843,10 +2848,14 @@ static void rpi_inter_clear(HEVCContext *s) ++ static void rpi_execute_inter_qpu(HEVCContext *s) ++ { ++ int k; +++ int i; ++ uint32_t *unif_vc = (uint32_t *)s->unif_mvs_ptr.vc; ++- ++- if (s->sh.slice_type == I_SLICE) ++- return; +++ if (s->sh.slice_type == I_SLICE) { +++#ifdef RPI_MULTI_MAILBOX +++ rpi_execute_transform(s); +++ return; +++#endif +++ } ++ for(k=0;k<8;k++) { ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined ++@@ -2856,6 +2865,22 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ ++ s->u_mvs[8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore ++ +++#ifdef RPI_MULTI_MAILBOX +++ gpu_cache_flush(&s->coeffs_buf_accelerated); +++ s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0, +++ qpu_get_fn(QPU_MC_SETUP_UV), +++ (uint32_t)(unif_vc+(s->mvs_base[0 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[1 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[2 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[3 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[4 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[5 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[6 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)) +++ ); +++ for(i=0;i<4;i++) +++ s->num_coeffs[i] = 0; +++#else ++ qpu_run_shader8(qpu_get_fn(QPU_MC_SETUP_UV), ++ (uint32_t)(unif_vc+(s->mvs_base[0 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++ (uint32_t)(unif_vc+(s->mvs_base[1 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++@@ -2866,6 +2891,7 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ (uint32_t)(unif_vc+(s->mvs_base[6 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++ (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)) ++ ); +++#endif ++ } ++ #endif ++ ++@@ -2945,6 +2971,12 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ if ( (((y_ctb + ctb_size)&63) == 0) && x_ctb + ctb_size >= s->ps.sps->width) { ++ // Transform all blocks ++ // printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); +++#ifdef RPI_MULTI_MAILBOX +++ // Kick off inter prediction on QPUs +++ rpi_execute_inter_qpu(s); +++ // Perform luma inter prediction +++ rpi_execute_inter_cmds(s); +++#else ++ rpi_execute_transform(s); ++ // Perform inter prediction ++ rpi_execute_inter_cmds(s); ++@@ -2952,6 +2984,8 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ // Kick off inter prediction on QPUs ++ rpi_execute_inter_qpu(s); ++ #endif +++#endif +++ ++ // Wait for transform completion ++ vpu_wait(s->vpu_id); ++ ++diff --git a/libavcodec/rpi_mailbox.c b/libavcodec/rpi_mailbox.c ++index 77a56dd..3904efc 100644 ++--- a/libavcodec/rpi_mailbox.c +++++ b/libavcodec/rpi_mailbox.c ++@@ -276,6 +276,53 @@ unsigned execute_qpu(int file_desc, unsigned num_qpus, unsigned control, unsigne ++ return p[5]; ++ } ++ +++void execute_multi(int file_desc, +++ unsigned num_qpus, unsigned control, unsigned noflush, unsigned timeout, +++ unsigned num_qpus_2, unsigned control_2, unsigned noflush_2, unsigned timeout_2, +++ unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, +++ unsigned code_2, unsigned r0_2, unsigned r1_2, unsigned r2_2, unsigned r3_2, unsigned r4_2, unsigned r5_2) { +++ int i=0; +++ unsigned p[32]; +++ +++ p[i++] = 0; // size +++ p[i++] = 0x00000000; // process request +++ p[i++] = 0x30018; // (the tag id) +++ p[i++] = 88; // (size of the buffer) +++ p[i++] = 88; // (size of the data) +++ +++ p[i++] = num_qpus; +++ p[i++] = control; +++ p[i++] = noflush; +++ p[i++] = timeout; // ms +++ +++ p[i++] = num_qpus_2; +++ p[i++] = control_2; +++ p[i++] = noflush_2; +++ p[i++] = timeout_2; // ms +++ +++ p[i++] = code; +++ p[i++] = r0; +++ p[i++] = r1; +++ p[i++] = r2; +++ p[i++] = r3; +++ p[i++] = r4; +++ p[i++] = r5; +++ +++ p[i++] = code_2; +++ p[i++] = r0_2; +++ p[i++] = r1_2; +++ p[i++] = r2_2; +++ p[i++] = r3_2; +++ p[i++] = r4_2; +++ p[i++] = r5_2; +++ +++ p[i++] = 0x00000000; // end tag +++ p[0] = i*sizeof *p; // actual size +++ +++ mbox_property(file_desc, p); +++ return; +++} +++ ++ int mbox_open() { ++ int file_desc; ++ ++diff --git a/libavcodec/rpi_mailbox.h b/libavcodec/rpi_mailbox.h ++index c264d2e..5898102 100644 ++--- a/libavcodec/rpi_mailbox.h +++++ b/libavcodec/rpi_mailbox.h ++@@ -15,6 +15,11 @@ extern void unmapmem(void *addr, unsigned size); ++ ++ extern unsigned execute_code(int file_desc, unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5); ++ extern unsigned execute_qpu(int file_desc, unsigned num_qpus, unsigned control, unsigned noflush, unsigned timeout); +++extern void execute_multi(int file_desc, +++ unsigned num_qpus, unsigned control, unsigned noflush, unsigned timeout, +++ unsigned num_qpus_2, unsigned control_2, unsigned noflush_2, unsigned timeout_2, +++ unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, +++ unsigned code_2, unsigned r0_2, unsigned r1_2, unsigned r2_2, unsigned r3_2, unsigned r4_2, unsigned r5_2); ++ extern unsigned qpu_enable(int file_desc, unsigned enable); ++ ++ #endif ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index fd8a276..feb3284 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -123,7 +123,7 @@ static pthread_cond_t post_cond_head = PTHREAD_COND_INITIALIZER; ++ static pthread_cond_t post_cond_tail = PTHREAD_COND_INITIALIZER; ++ static pthread_mutex_t post_mutex = PTHREAD_MUTEX_INITIALIZER; ++ ++-static int vpu_cmds[MAXCMDS][8]; +++static int vpu_cmds[MAXCMDS][16]; ++ static volatile int vpu_async_tail=0; // Contains the number of posted jobs ++ static volatile int vpu_async_head=0; ++ #endif ++@@ -346,6 +346,7 @@ unsigned int vpu_get_constants(void) { ++ static void *vpu_start(void *arg) { ++ while(1) { ++ int *p; +++ int qpu_code; ++ pthread_mutex_lock(&post_mutex); ++ while( vpu_async_tail - vpu_async_head <= 0) ++ { ++@@ -358,12 +359,25 @@ static void *vpu_start(void *arg) { ++ if (p[6] == -1) { ++ break; // Last job ++ } ++- if (p[7]) { +++ qpu_code = p[7]; +++ //if (p[7]) { ++ //GPU_MEM_PTR_T *buf = (GPU_MEM_PTR_T *)p[7]; ++ //gpu_cache_flush(buf); ++- } ++- vpu_execute_code(p[0], p[1], p[2], p[3], p[4], p[5], p[6]); +++ //} +++ if (!qpu_code) { +++ vpu_execute_code(p[0], p[1], p[2], p[3], p[4], p[5], p[6]); +++ } else { +++ int i; +++ for(i=0;i<8;i++) { +++ gpu->mail[i*2] = p[8+i]; +++ gpu->mail[i*2 + 1] = qpu_code; +++ } ++ +++ execute_multi(gpu->mb,8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */, +++ 0, 0, 0, 0, +++ p[0], p[1], p[2], p[3], p[4], p[5], p[6], // VPU0 +++ 0, 0 , 0 , 0 , 0 , 0 , 0); // VPU1 +++ } ++ pthread_mutex_lock(&post_mutex); ++ vpu_async_head++; ++ pthread_cond_broadcast(&post_cond_head); ++@@ -400,7 +414,43 @@ int vpu_post_code(unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned ++ p[4] = r3; ++ p[5] = r4; ++ p[6] = r5; ++- p[7] = (int) buf; +++ p[7] = 0; +++ if (num<=1) +++ pthread_cond_broadcast(&post_cond_tail); // Otherwise the vpu thread must already be awake +++ pthread_mutex_unlock(&post_mutex); +++ return id; +++ } +++} +++ +++int vpu_qpu_post_code(unsigned vpu_code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, +++ int qpu_code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8) +++{ +++ +++ pthread_mutex_lock(&post_mutex); +++ { +++ int id = vpu_async_tail++; +++ int *p = vpu_cmds[id%MAXCMDS]; +++ int num = vpu_async_tail - vpu_async_head; +++ if (num>MAXCMDS) { +++ printf("Too many commands submitted\n"); +++ exit(-1); +++ } +++ p[0] = vpu_code; +++ p[1] = r0; +++ p[2] = r1; +++ p[3] = r2; +++ p[4] = r3; +++ p[5] = r4; +++ p[6] = r5; +++ p[7] = qpu_code; +++ p[8 ] = unifs1; +++ p[9 ] = unifs2; +++ p[10] = unifs3; +++ p[11] = unifs4; +++ p[12] = unifs5; +++ p[13] = unifs6; +++ p[14] = unifs7; +++ p[15] = unifs8; ++ if (num<=1) ++ pthread_cond_broadcast(&post_cond_tail); // Otherwise the vpu thread must already be awake ++ pthread_mutex_unlock(&post_mutex); ++@@ -966,6 +1016,7 @@ void rpi_do_block(const uint8_t *in_buffer_vc, int src_pitch, uint8_t *dst_vc, i ++ } ++ ++ +++ ++ #endif ++ ++ #endif // RPI ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 88965e5..2f08f03 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -41,6 +41,8 @@ extern unsigned int vpu_get_fn(void); ++ extern unsigned int vpu_get_constants(void); ++ extern unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5); ++ extern int vpu_post_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, GPU_MEM_PTR_T *buf); +++int vpu_qpu_post_code(unsigned vpu_code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, +++ int qpu_code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8); ++ extern void vpu_wait( int id); ++ ++ // Simple test of shader code ++-- ++2.7.4 ++ ++ ++From 311f2da06d13a98d9bdda2df8684d7cf55b9a08e Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Thu, 21 May 2015 16:50:02 +0100 ++Subject: [PATCH 44/68] Pass qpu number in as uniform ++ ++--- ++ libavcodec/hevc.c | 2 +- ++ libavcodec/rpi_shader.c | 1288 ++++++++++++++++++++++---------------------- ++ libavcodec/rpi_shader.h | 20 +- ++ libavcodec/rpi_shader.qasm | 10 +- ++ 4 files changed, 657 insertions(+), 663 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index ab63efd..caadfaa 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -2834,6 +2834,7 @@ static void rpi_inter_clear(HEVCContext *s) ++ *s->u_mvs[i]++ = pic_height; ++ *s->u_mvs[i]++ = s->frame->linesize[1]; ++ *s->u_mvs[i]++ = s->frame->linesize[2]; +++ *s->u_mvs[i]++ = i; ++ if (weight_flag) { ++ *s->u_mvs[i]++ = 1 << (s->sh.chroma_log2_weight_denom + 6 - 1); ++ *s->u_mvs[i]++ = s->sh.chroma_log2_weight_denom + 6; ++@@ -2841,7 +2842,6 @@ static void rpi_inter_clear(HEVCContext *s) ++ *s->u_mvs[i]++ = 1 << 5; ++ *s->u_mvs[i]++ = 6; ++ } ++- s->u_mvs[i] += 1; // Padding words ++ } ++ } ++ ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index 9c30e32..a0f0282 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -48,8 +48,8 @@ unsigned int rpi_shader[] = { ++ /* [0x000000b8] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++ /* [0x000000c0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++ /* [0x000000c8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x000000d0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x000000d8] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 +++/* [0x000000d0] */ 0x15827d80, 0x100208e7, // mov r3, unif +++/* [0x000000d8] */ 0x119c17c0, 0xd00208a7, // shl r2, r3, 1 ++ /* [0x000000e0] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++ /* [0x000000e8] */ 0x159e7480, 0x10020867, // mov r1, r2 ++ /* [0x000000f0] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++@@ -60,669 +60,669 @@ unsigned int rpi_shader[] = { ++ /* [0x00000118] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++ /* [0x00000120] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++ /* [0x00000128] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00000130] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000138] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 ++-/* [0x00000140] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00000148] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000150] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000158] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000160] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000168] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000170] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000178] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x00000180] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x00000188] */ 0x0f9c11c0, 0xd0020827, // asr r0, r0, 1 ++-/* [0x00000190] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) ++-/* [0x00000198] */ 0x0c9e7040, 0x10021567, // add rb21, r0, r1 ++-/* [0x000001a0] */ 0x15427d80, 0x10020827, // mov r0, ra_x ++-/* [0x000001a8] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y ++-/* [0x000001b0] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_frame_base ++-/* [0x000001b8] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++-/* [0x000001c0] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x000001c8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000001d0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000001d8] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x, r0 ++-/* [0x000001e0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x000001e8] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-/* [0x000001f0] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_frame_base, r2 ++-/* [0x000001f8] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x00000200] */ 0x15827d80, 0x10021327, // mov rb12,unif ++-/* [0x00000208] */ 0x15827d80, 0x10021367, // mov rb13,unif ++-/* [0x00000210] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000218] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000220] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000228] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000230] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000238] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000240] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x ++-/* [0x00000248] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base +++/* [0x00000130] */ 0x119c17c0, 0xd00208a7, // shl r2, r3, 1 +++/* [0x00000138] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00000140] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000148] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000150] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000158] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000160] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000168] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000170] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000178] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000180] */ 0x0f9c11c0, 0xd0020827, // asr r0, r0, 1 +++/* [0x00000188] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) +++/* [0x00000190] */ 0x0c9e7040, 0x10021567, // add rb21, r0, r1 +++/* [0x00000198] */ 0x15427d80, 0x10020827, // mov r0, ra_x +++/* [0x000001a0] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x000001a8] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_frame_base +++/* [0x000001b0] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset +++/* [0x000001b8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x000001c0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000001c8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000001d0] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x, r0 +++/* [0x000001d8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x000001e0] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++/* [0x000001e8] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_frame_base, r2 +++/* [0x000001f0] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 +++/* [0x000001f8] */ 0x15827d80, 0x10021327, // mov rb12,unif +++/* [0x00000200] */ 0x15827d80, 0x10021367, // mov rb13,unif +++/* [0x00000208] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000210] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000218] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000220] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000228] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000230] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x +++/* [0x00000238] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base ++ // ::mc_filter_uv ++-/* [0x00000250] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000258] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000260] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000268] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000270] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000278] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000280] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000288] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000290] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 ++-/* [0x00000298] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000002a0] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 ++-/* [0x000002a8] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000002b0] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000002b8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000002c0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000002c8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000002d0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002d8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000002e0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000002e8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000002f0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000002f8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000300] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000308] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000310] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000318] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000320] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000328] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000330] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000340] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000348] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x00000350] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000358] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000360] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 ++-/* [0x00000368] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000370] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 ++-/* [0x00000378] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000380] */ 0x0f9e7080, 0x100613e7, // asr.ifnz rb15, r0, r2 ++-/* [0x00000388] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000390] */ 0x0f9e7080, 0x100613a7, // asr.ifnz rb14, r0, r2 ++-/* [0x00000398] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000003a0] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000240] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000248] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000250] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000258] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000260] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000268] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000270] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000278] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000280] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x00000288] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000290] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x00000298] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000002a0] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000002a8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002b0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000002b8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000002c0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000002c8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000002d0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000002d8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000002e0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000002e8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000002f0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000002f8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000300] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000308] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000310] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000318] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000320] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000328] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000330] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000338] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000340] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000348] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000350] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 +++/* [0x00000358] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000360] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 +++/* [0x00000368] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000370] */ 0x0f9e7080, 0x100613e7, // asr.ifnz rb15, r0, r2 +++/* [0x00000378] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000380] */ 0x0f9e7080, 0x100613a7, // asr.ifnz rb14, r0, r2 +++/* [0x00000388] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000390] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x000003a8] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000003b0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 ++-/* [0x000003b8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x000003c0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003c8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003d0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003d8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003e0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003e8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-/* [0x000003f0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 ++-/* [0x000003f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000400] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000408] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000410] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000418] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000420] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000428] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000430] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000438] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000440] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000448] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000450] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000458] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000460] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000468] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000470] */ 0x00000020, 0xe0021327, // mov rb12,32 ++-/* [0x00000478] */ 0x00000006, 0xe0021367, // mov rb13,6 ++-/* [0x00000480] */ 0x00000001, 0xe00213a7, // mov rb14,1 ++-/* [0x00000488] */ 0x00000000, 0xe00213e7, // mov rb15,0 ++-/* [0x00000490] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000498] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000004a0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000004a8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000004b0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000004b8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000004c0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000004c8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 ++-/* [0x000004d0] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 ++-/* [0x000004d8] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 ++-/* [0x000004e0] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004e8] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 ++-/* [0x000004f0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000004f8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000500] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000508] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000510] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000518] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000520] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000528] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000530] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000538] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000540] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000398] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000003a0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++/* [0x000003a8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x000003b0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000003b8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000003c0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000003c8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000003d0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003d8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x000003e0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 +++/* [0x000003e8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003f0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000003f8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000400] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000408] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000410] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000418] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000420] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000428] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000430] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000438] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000440] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000448] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000450] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000458] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000460] */ 0x00000020, 0xe0021327, // mov rb12,32 +++/* [0x00000468] */ 0x00000006, 0xe0021367, // mov rb13,6 +++/* [0x00000470] */ 0x00000001, 0xe00213a7, // mov rb14,1 +++/* [0x00000478] */ 0x00000000, 0xe00213e7, // mov rb15,0 +++/* [0x00000480] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000488] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000490] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000498] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000004a0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000004a8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000004b0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000004b8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 +++/* [0x000004c0] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 +++/* [0x000004c8] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 +++/* [0x000004d0] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x000004d8] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 +++/* [0x000004e0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000004e8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000004f0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000004f8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000500] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000508] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000510] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000518] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000520] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000528] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000530] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x00000548] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000550] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000558] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000560] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000568] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000570] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000578] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000580] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000588] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 ++-/* [0x00000590] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000598] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 ++-/* [0x000005a0] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x000005a8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000005b0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005b8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000005c0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000005c8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000005d0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000005d8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000005e0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000005e8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000005f0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000005f8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000600] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000608] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000610] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000618] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000620] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000628] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000630] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000638] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000640] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x00000648] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000650] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000658] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000660] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000668] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000538] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000540] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000548] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000550] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000558] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000560] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000568] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000570] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000578] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x00000580] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000588] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x00000590] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x00000598] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000005a0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005a8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000005b0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000005b8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000005c0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000005c8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000005d0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000005d8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000005e0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000005e8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000005f0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005f8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000600] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000608] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000610] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000618] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000620] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000628] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000630] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000638] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000640] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000648] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000650] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000658] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000670] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000678] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 ++-/* [0x00000680] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x00000688] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000690] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000698] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000006a0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000006a8] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000006b0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-/* [0x000006b8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 ++-/* [0x000006c0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000006c8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000006d0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000006d8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000006e0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000006e8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000006f0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000006f8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000700] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000708] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000710] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000718] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000720] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000728] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000730] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000738] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000740] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000748] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000750] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000758] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000760] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 ++-/* [0x00000768] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000770] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 ++-/* [0x00000778] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000780] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000788] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000790] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000798] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000007a0] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000660] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000668] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++/* [0x00000670] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00000678] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000680] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000688] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000690] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000698] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000006a0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x000006a8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 +++/* [0x000006b0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000006b8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000006c0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000006c8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000006d0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000006d8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000006e0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000006e8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000006f0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000006f8] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000700] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000708] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000710] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000718] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000720] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000728] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000730] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000738] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000740] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000748] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000750] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 +++/* [0x00000758] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000760] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 +++/* [0x00000768] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000770] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000778] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000780] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000788] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000790] */ 0x009e7000, 0x100009e7, // nop ++ // ::mc_filter_uv_b ++-/* [0x000007a8] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000007b0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000007b8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000007c0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000007c8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000007d0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000007d8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000007e0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000007e8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 ++-/* [0x000007f0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000007f8] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 ++-/* [0x00000800] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000808] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000810] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000818] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000820] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000828] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000830] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000838] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000840] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000848] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000850] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000858] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000860] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000868] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000870] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000878] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000880] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000888] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000890] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000898] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x000008a8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008b0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008b8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008c0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x000008c8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000008d0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000008d8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000008e0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000008e8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000798] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000007a0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000007a8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000007b0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x000007b8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x000007c0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x000007c8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x000007d0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x000007d8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x000007e0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x000007e8] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x000007f0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000007f8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000800] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000808] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000810] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000818] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000820] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000828] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000830] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000838] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x00000840] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x00000848] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x00000850] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000858] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000860] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000868] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000870] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000878] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000880] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000888] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000890] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000898] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008a8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000008b0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x000008b8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000008c0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000008c8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000008d0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008d8] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x000008f0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000008f8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 ++-/* [0x00000900] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x00000908] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000910] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000918] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000920] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000928] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000930] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000938] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 ++-/* [0x00000940] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000948] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000950] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000958] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000960] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000968] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000970] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000978] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000980] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000988] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000990] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000998] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000009a0] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x000009a8] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x000009b0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x000009b8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000009c0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000009c8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000009d0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000009d8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000009e0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000009e8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000009f0] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x000009f8] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x00000a00] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000a08] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000a10] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000a18] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000a20] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000a28] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a30] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000a38] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000a40] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a48] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000a50] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000a58] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a60] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000008e0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x000008e8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++/* [0x000008f0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x000008f8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000900] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000908] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000910] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000918] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000920] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x00000928] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 +++/* [0x00000930] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000938] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000940] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000948] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000950] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000958] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000960] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000968] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000970] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000978] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000980] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000988] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000990] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000998] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x000009a0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000009a8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000009b0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000009b8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000009c0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000009c8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000009d0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000009d8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000009e0] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x000009e8] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x000009f0] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000009f8] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00000a00] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000a08] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000a10] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00000a18] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a20] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000a28] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x00000a30] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a38] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000a40] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x00000a48] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00000a50] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000a68] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a70] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a58] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a60] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a68] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a70] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000a78] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000a80] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a88] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a90] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a98] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000aa0] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000aa8] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a88] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000a90] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a98] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000ab0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000aa0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000aa8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ab0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000ab8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00000ac0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ac8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ad0] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000ac8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000ad0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000ad8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000ae0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000ae8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000af0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00000af8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b00] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b08] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b10] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000b18] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000b20] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000b00] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000b08] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000b10] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_setup ++-/* [0x00000b28] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000b30] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000b38] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000b40] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000b48] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00000b50] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000b58] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x00000b60] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00000b68] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 ++-/* [0x00000b70] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x00000b78] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000b80] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000b88] */ 0x8c9e7452, 0x10025e18, // add t0s, r2, r1 ; mov ra_frame_base, r2 ++-/* [0x00000b90] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000b98] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000ba0] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00000ba8] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 ++-/* [0x00000bb0] */ 0x0c9c13c0, 0xd0020567, // add ra_y2, r1, 1 ++-/* [0x00000bb8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00000bc0] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 ++-/* [0x00000bc8] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x00000bd0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000bd8] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000be0] */ 0x8c9e7452, 0x10025e19, // add t0s, r2, r1 ; mov ra_frame_base2, r2 ++-/* [0x00000be8] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 ++-/* [0x00000bf0] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 ++-/* [0x00000bf8] */ 0x15827d80, 0x10021427, // mov rb16, unif ++-/* [0x00000c00] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000c08] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) ++-/* [0x00000c10] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 ++-/* [0x00000c18] */ 0x00000001, 0xe0020527, // mov ra20, 1 ++-/* [0x00000c20] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++-/* [0x00000c28] */ 0x00000040, 0xe00207a7, // mov ra30, 64 ++-/* [0x00000c30] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++-/* [0x00000c38] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 ++-/* [0x00000c40] */ 0x00000018, 0xe00215e7, // mov rb23, 24 ++-/* [0x00000c48] */ 0x00000000, 0xe0020227, // mov ra8, 0 ++-/* [0x00000c50] */ 0x00000000, 0xe0020267, // mov ra9, 0 ++-/* [0x00000c58] */ 0x00000000, 0xe00202a7, // mov ra10, 0 ++-/* [0x00000c60] */ 0x00000000, 0xe00202e7, // mov ra11, 0 ++-/* [0x00000c68] */ 0x00000000, 0xe0020327, // mov ra12, 0 ++-/* [0x00000c70] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++-/* [0x00000c78] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++-/* [0x00000c80] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x00000c88] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000c90] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000c98] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000ca0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000ca8] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000cb0] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000cb8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000cc0] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++-/* [0x00000cc8] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++-/* [0x00000cd0] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00000cd8] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000ce0] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000ce8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000cf0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000cf8] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000d00] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000d08] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000d10] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x00000d18] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x00000d20] */ 0x15827d80, 0x10021327, // mov rb12,unif ++-/* [0x00000d28] */ 0x15827d80, 0x10021367, // mov rb13,unif ++-/* [0x00000d30] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000d38] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000d40] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000d48] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000d50] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000d58] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base ++-/* [0x00000d60] */ 0x13540dc0, 0xd0020867, // max r1, ra_y2, 0 ++-/* [0x00000d68] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000d70] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000d78] */ 0x0c541dc0, 0xd0020567, // add ra_y2, ra_y2, 1 ++-/* [0x00000d80] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000d88] */ 0x0c667380, 0x10020e27, // add t0s, r1, ra_frame_base2 +++/* [0x00000b18] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000b20] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000b28] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000b30] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000b38] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000b40] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000b48] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x00000b50] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000b58] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 +++/* [0x00000b60] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000b68] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000b70] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000b78] */ 0x8c9e7452, 0x10025e18, // add t0s, r2, r1 ; mov ra_frame_base, r2 +++/* [0x00000b80] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000b88] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000b90] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000b98] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 +++/* [0x00000ba0] */ 0x0c9c13c0, 0xd0020567, // add ra_y2, r1, 1 +++/* [0x00000ba8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000bb0] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 +++/* [0x00000bb8] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000bc0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000bc8] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000bd0] */ 0x8c9e7452, 0x10025e19, // add t0s, r2, r1 ; mov ra_frame_base2, r2 +++/* [0x00000bd8] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 +++/* [0x00000be0] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 +++/* [0x00000be8] */ 0x15827d80, 0x10021427, // mov rb16, unif +++/* [0x00000bf0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000bf8] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) +++/* [0x00000c00] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 +++/* [0x00000c08] */ 0x00000001, 0xe0020527, // mov ra20, 1 +++/* [0x00000c10] */ 0x00000100, 0xe00205a7, // mov ra22, 256 +++/* [0x00000c18] */ 0x00000040, 0xe00207a7, // mov ra30, 64 +++/* [0x00000c20] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x00000c28] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x00000c30] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x00000c38] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x00000c40] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x00000c48] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x00000c50] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x00000c58] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x00000c60] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x00000c68] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x00000c70] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x00000c78] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000c80] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000c88] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000c90] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000c98] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000ca0] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000ca8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000cb0] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00000cb8] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00000cc0] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00000cc8] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000cd0] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000cd8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000ce0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000ce8] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000cf0] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000cf8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000d00] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000d08] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000d10] */ 0x15827d80, 0x10021327, // mov rb12,unif +++/* [0x00000d18] */ 0x15827d80, 0x10021367, // mov rb13,unif +++/* [0x00000d20] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000d28] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000d30] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000d38] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000d40] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000d48] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base +++/* [0x00000d50] */ 0x13540dc0, 0xd0020867, // max r1, ra_y2, 0 +++/* [0x00000d58] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000d60] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000d68] */ 0x0c541dc0, 0xd0020567, // add ra_y2, ra_y2, 1 +++/* [0x00000d70] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000d78] */ 0x0c667380, 0x10020e27, // add t0s, r1, ra_frame_base2 ++ // ::mc_filter ++-/* [0x00000d90] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000d98] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000da0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000da8] */ 0x155e7d80, 0x10021027, // mov rx_xshift2, rx_xshift2_next ++-/* [0x00000db0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000db8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000dc0] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00000dc8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000dd0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000dd8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00000de0] */ 0x0c9e7400, 0x100206a7, // add ra_frame_base_next, r2, r0 ++-/* [0x00000de8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000df0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0 ; mov r1, unif ++-/* [0x00000df8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00000e00] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 ++-/* [0x00000e08] */ 0x0c9c13c0, 0xd0021067, // add ra_y2_next, r1, 1 ++-/* [0x00000e10] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00000e18] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 ++-/* [0x00000e20] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000e28] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000e30] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000e38] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000e40] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000e48] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000e50] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000e58] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000e60] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000e68] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000e70] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000e78] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000e80] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000e88] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000e90] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000e98] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ea0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ea8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000eb0] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000eb8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ec0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ec8] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ed0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000ed8] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000ee0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ee8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ef0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ef8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000f00] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000f08] */ 0x4f5971c6, 0x100251e0, // asr rb7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f10] */ 0x4f5971c6, 0x100251a0, // asr rb6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f18] */ 0x4f5971c6, 0x10025160, // asr rb5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f20] */ 0x0f9d71c0, 0x10021127, // asr rb4, r0, rb23 ++-/* [0x00000f28] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000f30] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000f38] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 ++-/* [0x00000f40] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000f48] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 ++-/* [0x00000f50] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000d80] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000d88] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000d90] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000d98] */ 0x155e7d80, 0x10021027, // mov rx_xshift2, rx_xshift2_next +++/* [0x00000da0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000da8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000db0] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000db8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000dc0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000dc8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000dd0] */ 0x0c9e7400, 0x100206a7, // add ra_frame_base_next, r2, r0 +++/* [0x00000dd8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000de0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0 ; mov r1, unif +++/* [0x00000de8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000df0] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 +++/* [0x00000df8] */ 0x0c9c13c0, 0xd0021067, // add ra_y2_next, r1, 1 +++/* [0x00000e00] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000e08] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 +++/* [0x00000e10] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000e18] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000e20] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000e28] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000e30] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000e38] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000e40] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000e48] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000e50] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000e58] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000e60] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000e68] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000e70] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000e78] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000e80] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000e88] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000e90] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000e98] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000ea0] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000ea8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000eb0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000eb8] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ec0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x00000ec8] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000ed0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ed8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ee0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ee8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x00000ef0] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000ef8] */ 0x4f5971c6, 0x100251e0, // asr rb7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f00] */ 0x4f5971c6, 0x100251a0, // asr rb6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f08] */ 0x4f5971c6, 0x10025160, // asr rb5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000f10] */ 0x0f9d71c0, 0x10021127, // asr rb4, r0, rb23 +++/* [0x00000f18] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000f20] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif +++/* [0x00000f28] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 +++/* [0x00000f30] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000f38] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 +++/* [0x00000f40] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :yloop ++-/* [0x00000f58] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000f60] */ 0x8e4539bf, 0xa0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 ++-/* [0x00000f68] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x00000f70] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000f78] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 ++-/* [0x00000f80] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next ++-/* [0x00000f88] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000f90] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000f98] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000fa0] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 ++-/* [0x00000fa8] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 ++-/* [0x00000fb0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000fb8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 ++-/* [0x00000fc0] */ 0xec654c87, 0x10024e20, // add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 ++-/* [0x00000fc8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000fd0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000fd8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000fe0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000fe8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000ff0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000ff8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00001000] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00001008] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00001010] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00001018] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00001020] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00001028] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00001030] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00001038] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00001040] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00001048] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00001050] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00001058] */ 0x8d3487f6, 0xd00279cc, // sub.setf -, r3, 8 ; mov ra12, ra13 ++-/* [0x00001060] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00001068] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00001070] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00001078] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00001080] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloop ++-/* [0x00001088] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00001090] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00001098] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x000010a0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000010a8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000010b0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000010b8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000010c0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 ++-/* [0x000010c8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 ++-/* [0x000010d0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 ++-/* [0x000010d8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 ++-/* [0x000010e0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000010e8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000010f0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000010f8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 ++-/* [0x00001100] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 ++-/* [0x00001108] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 ++-/* [0x00001110] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloop ++-/* [0x00001118] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 ++-/* [0x00001120] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00001128] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00001130] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001138] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00001140] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00001148] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000f48] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000f50] */ 0x8e4539bf, 0xa0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 +++/* [0x00000f58] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00000f60] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000f68] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 +++/* [0x00000f70] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next +++/* [0x00000f78] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000f80] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000f88] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000f90] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 +++/* [0x00000f98] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 +++/* [0x00000fa0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000fa8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++/* [0x00000fb0] */ 0xec654c87, 0x10024e20, // add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 +++/* [0x00000fb8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000fc0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000fc8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000fd0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000fd8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000fe0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000fe8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000ff0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000ff8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00001000] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00001008] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00001010] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00001018] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00001020] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00001028] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00001030] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00001038] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00001040] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00001048] */ 0x8d3487f6, 0xd00279cc, // sub.setf -, r3, 8 ; mov ra12, ra13 +++/* [0x00001050] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00001058] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00001060] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00001068] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00001070] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloop +++/* [0x00001078] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00001080] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00001088] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00001090] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00001098] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000010a0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000010a8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000010b0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 +++/* [0x000010b8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 +++/* [0x000010c0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 +++/* [0x000010c8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 +++/* [0x000010d0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000010d8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000010e0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000010e8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 +++/* [0x000010f0] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 +++/* [0x000010f8] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 +++/* [0x00001100] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloop +++/* [0x00001108] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 +++/* [0x00001110] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00001118] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00001120] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001128] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00001130] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001138] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_b ++-/* [0x00001150] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00001158] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00001160] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00001168] */ 0x155e7d80, 0x10021027, // mov rx_xshift2, rx_xshift2_next ++-/* [0x00001170] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00001178] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00001180] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00001188] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00001190] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00001198] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000011a0] */ 0x0c9e7400, 0x100206a7, // add ra_frame_base_next, r2, r0 ++-/* [0x000011a8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000011b0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0 ; mov r1, unif ++-/* [0x000011b8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x000011c0] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 ++-/* [0x000011c8] */ 0x0c9c13c0, 0xd0021067, // add ra_y2_next, r1, 1 ++-/* [0x000011d0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000011d8] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 ++-/* [0x000011e0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000011e8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000011f0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000011f8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00001200] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00001208] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00001210] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00001218] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00001220] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00001228] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00001230] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00001238] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00001240] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001248] */ 0x00000001, 0xe0020867, // mov r1, 1 ++-/* [0x00001250] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x00001258] */ 0x409f3001, 0xd00049e0, // nop ; mul24 r0, r0 << 13, r1 << 13 ++-/* [0x00001260] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001268] */ 0x409f2001, 0xd00049e0, // nop ; mul24 r0, r0 << 14, r1 << 14 ++-/* [0x00001270] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001278] */ 0x409f1001, 0xd00049e0, // nop ; mul24 r0, r0 << 15, r1 << 15 ++-/* [0x00001280] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001288] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00001290] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x00001298] */ 0x409f7001, 0xd00049e0, // nop ; mul24 r0, r0 << 9, r1 << 9 ++-/* [0x000012a0] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012a8] */ 0x409f6001, 0xd00049e0, // nop ; mul24 r0, r0 << 10, r1 << 10 ++-/* [0x000012b0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012b8] */ 0x409f5001, 0xd00049e0, // nop ; mul24 r0, r0 << 11, r1 << 11 ++-/* [0x000012c0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012c8] */ 0x409f4001, 0xd00049e0, // nop ; mul24 r0, r0 << 12, r1 << 12 ++-/* [0x000012d0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000012d8] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x000012e0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012e8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012f0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012f8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00001300] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x00001308] */ 0x4f5971c6, 0x100251e0, // asr rb7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001310] */ 0x4f5971c6, 0x100251a0, // asr rb6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001318] */ 0x4f5971c6, 0x10025160, // asr rb5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001320] */ 0x0f9d71c0, 0x10021127, // asr rb4, r0, rb23 ++-/* [0x00001328] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001330] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x00001338] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 ++-/* [0x00001340] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00001348] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 ++-/* [0x00001350] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00001140] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00001148] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00001150] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00001158] */ 0x155e7d80, 0x10021027, // mov rx_xshift2, rx_xshift2_next +++/* [0x00001160] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00001168] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00001170] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00001178] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00001180] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00001188] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00001190] */ 0x0c9e7400, 0x100206a7, // add ra_frame_base_next, r2, r0 +++/* [0x00001198] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000011a0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0 ; mov r1, unif +++/* [0x000011a8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x000011b0] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 +++/* [0x000011b8] */ 0x0c9c13c0, 0xd0021067, // add ra_y2_next, r1, 1 +++/* [0x000011c0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x000011c8] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 +++/* [0x000011d0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000011d8] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000011e0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000011e8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000011f0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000011f8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00001200] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00001208] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00001210] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00001218] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00001220] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00001228] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00001230] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001238] */ 0x00000001, 0xe0020867, // mov r1, 1 +++/* [0x00001240] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x00001248] */ 0x409f3001, 0xd00049e0, // nop ; mul24 r0, r0 << 13, r1 << 13 +++/* [0x00001250] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001258] */ 0x409f2001, 0xd00049e0, // nop ; mul24 r0, r0 << 14, r1 << 14 +++/* [0x00001260] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001268] */ 0x409f1001, 0xd00049e0, // nop ; mul24 r0, r0 << 15, r1 << 15 +++/* [0x00001270] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001278] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00001280] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x00001288] */ 0x409f7001, 0xd00049e0, // nop ; mul24 r0, r0 << 9, r1 << 9 +++/* [0x00001290] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001298] */ 0x409f6001, 0xd00049e0, // nop ; mul24 r0, r0 << 10, r1 << 10 +++/* [0x000012a0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012a8] */ 0x409f5001, 0xd00049e0, // nop ; mul24 r0, r0 << 11, r1 << 11 +++/* [0x000012b0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012b8] */ 0x409f4001, 0xd00049e0, // nop ; mul24 r0, r0 << 12, r1 << 12 +++/* [0x000012c0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif +++/* [0x000012c8] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x000012d0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012d8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012e0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000012e8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif +++/* [0x000012f0] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x000012f8] */ 0x4f5971c6, 0x100251e0, // asr rb7, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001300] */ 0x4f5971c6, 0x100251a0, // asr rb6, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001308] */ 0x4f5971c6, 0x10025160, // asr rb5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00001310] */ 0x0f9d71c0, 0x10021127, // asr rb4, r0, rb23 +++/* [0x00001318] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001320] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif +++/* [0x00001328] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 +++/* [0x00001330] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00001338] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 +++/* [0x00001340] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :yloopb ++-/* [0x00001358] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00001360] */ 0x8e4539bf, 0xa0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 ++-/* [0x00001368] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x00001370] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00001378] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 ++-/* [0x00001380] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next ++-/* [0x00001388] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00001390] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00001398] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000013a0] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 ++-/* [0x000013a8] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 ++-/* [0x000013b0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000013b8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 ++-/* [0x000013c0] */ 0xec654c87, 0x10024e20, // add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 ++-/* [0x000013c8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000013d0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000013d8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000013e0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000013e8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000013f0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000013f8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00001400] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00001408] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00001410] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00001418] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00001420] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00001428] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00001430] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00001438] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00001440] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00001448] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00001450] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00001458] */ 0x8d3487f6, 0xd00279cc, // sub.setf -, r3, 8 ; mov ra12, ra13 ++-/* [0x00001460] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00001468] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00001470] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00001478] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00001480] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloopb ++-/* [0x00001488] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00001490] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00001498] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x000014a0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000014a8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000014b0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000014b8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000014c0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 ++-/* [0x000014c8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 ++-/* [0x000014d0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 ++-/* [0x000014d8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 ++-/* [0x000014e0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000014e8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000014f0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000014f8] */ 0x4053800e, 0xd00049e1, // nop ; mul24 r1, r1 << 8, ra20 << 8 ++-/* [0x00001500] */ 0x4c78e38f, 0x10024860, // add r1, r1, ra30 ; mul24 r0, r1, rb14 ++-/* [0x00001508] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x00001510] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloopb ++-/* [0x00001518] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00001520] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00001528] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00001530] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001538] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00001540] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00001548] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00001348] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00001350] */ 0x8e4539bf, 0xa0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 +++/* [0x00001358] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00001360] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00001368] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 +++/* [0x00001370] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next +++/* [0x00001378] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00001380] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00001388] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00001390] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 +++/* [0x00001398] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 +++/* [0x000013a0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000013a8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++/* [0x000013b0] */ 0xec654c87, 0x10024e20, // add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 +++/* [0x000013b8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000013c0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000013c8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000013d0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000013d8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000013e0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000013e8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000013f0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000013f8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00001400] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00001408] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00001410] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00001418] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00001420] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00001428] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00001430] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00001438] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00001440] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00001448] */ 0x8d3487f6, 0xd00279cc, // sub.setf -, r3, 8 ; mov ra12, ra13 +++/* [0x00001450] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00001458] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00001460] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00001468] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00001470] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloopb +++/* [0x00001478] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00001480] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00001488] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00001490] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00001498] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000014a0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000014a8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000014b0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 +++/* [0x000014b8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 +++/* [0x000014c0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 +++/* [0x000014c8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 +++/* [0x000014d0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000014d8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000014e0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x000014e8] */ 0x4053800e, 0xd00049e1, // nop ; mul24 r1, r1 << 8, ra20 << 8 +++/* [0x000014f0] */ 0x4c78e38f, 0x10024860, // add r1, r1, ra30 ; mul24 r0, r1, rb14 +++/* [0x000014f8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x00001500] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloopb +++/* [0x00001508] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x00001510] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00001518] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00001520] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001528] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00001530] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001538] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_interrupt_exit12 ++-/* [0x00001550] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00001540] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00001548] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001550] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00001558] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++ /* [0x00001560] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001568] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001570] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001568] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001570] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001578] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001580] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x00001588] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++@@ -732,11 +732,9 @@ unsigned int rpi_shader[] = { ++ /* [0x000015a8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x000015b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++ /* [0x000015b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000015c0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000015c8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000015d0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000015d8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x000015e0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x000015c0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000015c8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000015d0] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 3fa8531..6e552d9 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -4,15 +4,15 @@ ++ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++-#define mc_filter_uv (rpi_shader + 148) ++-#define mc_filter_uv_b0 (rpi_shader + 338) ++-#define mc_filter_uv_b (rpi_shader + 490) ++-#define mc_exit (rpi_shader + 666) ++-#define mc_interrupt_exit8 (rpi_shader + 684) ++-#define mc_setup (rpi_shader + 714) ++-#define mc_filter (rpi_shader + 868) ++-#define mc_filter_b (rpi_shader + 1108) ++-#define mc_interrupt_exit12 (rpi_shader + 1364) ++-#define mc_end (rpi_shader + 1402) +++#define mc_filter_uv (rpi_shader + 144) +++#define mc_filter_uv_b0 (rpi_shader + 334) +++#define mc_filter_uv_b (rpi_shader + 486) +++#define mc_exit (rpi_shader + 662) +++#define mc_interrupt_exit8 (rpi_shader + 680) +++#define mc_setup (rpi_shader + 710) +++#define mc_filter (rpi_shader + 864) +++#define mc_filter_b (rpi_shader + 1104) +++#define mc_interrupt_exit12 (rpi_shader + 1360) +++#define mc_end (rpi_shader + 1398) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 9cfc0d9..a0b8e5a 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -133,8 +133,8 @@ mov ra14, 0 ++ mov ra15, 0 ++ ++ # Compute part of VPM to use for DMA output ++-mov r2, qpu_num ++-shl r2, r2, 1 # Convert QPU numbers to be even (this means we can only use 8 QPUs, but is necessary as we need to save 16bit intermediate results) +++mov r3, unif +++shl r2, r3, 1 # Convert QPU numbers to be even (this means we can only use 8 QPUs, but is necessary as we need to save 16bit intermediate results) ++ and r2, r2, 15 ++ mov r1, r2 ++ asr r1, r1, 2 ++@@ -147,8 +147,7 @@ shl r0, r0, 5 ++ add rb27, r0, r1 ++ ++ # Compute part of VPM to save data into ++-mov r2, qpu_num # qpu_num = abcd ++-shl r2, r2, 1 +++shl r2, r3, 1 ++ and r2, r2, 15 # r2 = bcd0 ++ mov r1, r2 # r1 = bcd0 ++ asr r1, r1, 2 # r1 = bc ++@@ -181,9 +180,6 @@ add t0s, r2, r1 ++ mov rb12,unif # offset before shift ++ mov rb13,unif # offset after shift ++ ++-# Dump padding words ++-mov r0, unif ++- ++ # submit texture requests for second line ++ max r1, ra_y, 0 ++ min r1, r1, rb_frame_height_minus_1 ++-- ++2.7.4 ++ ++ ++From db6fe49d50e42c444b5833acc6206c0bbfaacef4 Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Sat, 23 May 2015 13:20:21 +0100 ++Subject: [PATCH 45/68] Add new cache flushing routine ++ ++--- ++ libavcodec/hevc.c | 8 +++-- ++ libavcodec/hevc_filter.c | 39 ++++++++++----------- ++ libavcodec/rpi_qpu.c | 17 +++++++-- ++ libavcodec/rpi_qpu.h | 2 ++ ++ libavcodec/rpi_user_vcsm.h | 86 ++++++++++++++++++++++++++-------------------- ++ 5 files changed, 91 insertions(+), 61 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index caadfaa..9d12583 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -3575,9 +3575,13 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length) ++ } ++ ++ fail: ++- if (s->ref && s->threads_type == FF_THREAD_FRAME) +++ if (s->ref && s->threads_type == FF_THREAD_FRAME) { +++#ifdef RPI_INTER_QPU +++ void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n); +++ ff_hevc_flush_chroma(s, &s->ref->tf, s->ps.sps->height); +++#endif ++ ff_thread_report_progress(&s->ref->tf, INT_MAX, 0); ++- +++ } ++ return ret; ++ } ++ ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 186317a..ec84e8a 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -883,36 +883,35 @@ static int ff_hevc_buf_base(AVBufferRef *bref) { ++ return p->vc & 0x3fffffff; ++ } ++ ++-static void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n) +++void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n); +++void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n) ++ { ++ if (s->enable_rpi && !( s->nal_unit_type == NAL_TRAIL_N || ++ s->nal_unit_type == NAL_TSA_N || ++ s->nal_unit_type == NAL_STSA_N || ++ s->nal_unit_type == NAL_RADL_N || ++ s->nal_unit_type == NAL_RASL_N )) { ++-#define RPI_FAST_CACHEFLUSH ++ #ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; ++- int curr_y = f->progress->data[0]; +++ int curr_y = ((int *)f->progress->data)[0]; +++ int curr_uv = curr_y >> s->ps.sps->vshift[1]; +++ int n_uv = n >> s->ps.sps->vshift[1]; ++ int sz,base; ++- if (curr_y < 0) curr_y = 0; ++- if (n<=curr_y) return; // Should not happen ++- sz = s->frame->linesize[1] * (n-curr_y); ++- base = s->frame->linesize[1] * curr_y; ++- iocache.s[0].cmd = 3; // Flush L1 cache ++- iocache.s[0].addr = 0; ++- iocache.s[0].size = 0; ++- ++- iocache.s[1].cmd = 2; ++- iocache.s[1].addr = ff_hevc_buf_base(s->frame->buf[1]) + base; +++ if (curr_uv < 0) curr_uv = 0; +++ if (n_uv<=curr_uv) { assert(0); return; } // Should not happen +++ sz = s->frame->linesize[1] * (n_uv-curr_uv); +++ base = s->frame->linesize[1] * curr_uv; +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(s->frame->buf[1]); +++ iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].cmd = 3; // clean+invalidate +++ iocache.s[0].addr = p->arm + base; +++ iocache.s[0].size = sz; +++ p = av_buffer_pool_opaque(s->frame->buf[2]); +++ iocache.s[1].handle = p->vcsm_handle; +++ iocache.s[1].cmd = 3; // clean+invalidate +++ iocache.s[1].addr = p->arm + base; ++ iocache.s[1].size = sz; ++- ++- iocache.s[2].cmd = 2; ++- iocache.s[2].addr = ff_hevc_buf_base(s->frame->buf[2]) + base; ++- iocache.s[2].size = sz; ++- ++- vcsm_clean_invalid( gpu_get_mailbox(), &iocache ); ++- +++ vcsm_clean_invalid( &iocache ); ++ #else ++ flush_buffer(s->frame->buf[1]); ++ flush_buffer(s->frame->buf[2]); ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index feb3284..aa65a77 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -211,6 +211,7 @@ static void gpu_unlock(void) { ++ } ++ ++ static int gpu_malloc_uncached_internal(int numbytes, GPU_MEM_PTR_T *p, int mb) { +++ p->numbytes = numbytes; ++ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); ++ assert(p->vcsm_handle); ++ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); ++@@ -243,13 +244,25 @@ int gpu_get_mailbox(void) ++ return gpu->mb; ++ } ++ +++// Call this to clean and invalidate a region of memory ++ void gpu_cache_flush(GPU_MEM_PTR_T *p) ++ { ++- void *tmp = vcsm_lock(p->vcsm_handle); ++- vcsm_unlock_ptr(tmp); +++#define RPI_FAST_CACHEFLUSH +++#ifdef RPI_FAST_CACHEFLUSH +++ struct vcsm_user_clean_invalid_s iocache = {}; +++ iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].cmd = 3; // clean+invalidate +++ iocache.s[0].addr = p->arm; +++ iocache.s[0].size = p->numbytes; +++ vcsm_clean_invalid( &iocache ); +++#else +++ void *tmp = vcsm_lock(p->vcsm_handle); +++ vcsm_unlock_ptr(tmp); +++#endif ++ } ++ ++ static int gpu_malloc_cached_internal(int numbytes, GPU_MEM_PTR_T *p) { +++ p->numbytes = numbytes; ++ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST, (char *)"Video Frame" ); ++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_VC, (char *)"Video Frame" ); ++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 2f08f03..0565a60 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -1,6 +1,8 @@ ++ #ifndef RPI_QPU_H ++ #define RPI_QPU_H ++ +++#define RPI_FAST_CACHEFLUSH +++ ++ typedef struct gpu_mem_ptr_s { ++ unsigned char *arm; // Pointer to memory mapped on ARM side ++ int vc_handle; // Videocore handle of relocatable memory ++diff --git a/libavcodec/rpi_user_vcsm.h b/libavcodec/rpi_user_vcsm.h ++index 95e6de1..db41a4d 100644 ++--- a/libavcodec/rpi_user_vcsm.h +++++ b/libavcodec/rpi_user_vcsm.h ++@@ -1,29 +1,41 @@ ++-/* ++-Copyright (c) 2012, Broadcom Europe Ltd ++-All rights reserved. ++- ++-Redistribution and use in source and binary forms, with or without ++-modification, are permitted provided that the following conditions are met: ++- * Redistributions of source code must retain the above copyright ++- notice, this list of conditions and the following disclaimer. ++- * Redistributions in binary form must reproduce the above copyright ++- notice, this list of conditions and the following disclaimer in the ++- documentation and/or other materials provided with the distribution. ++- * Neither the name of the copyright holder nor the ++- names of its contributors may be used to endorse or promote products ++- derived from this software without specific prior written permission. ++- ++-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ++-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ++-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY ++-DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES ++-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; ++-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ++-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ++-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++-*/ +++/***************************************************************************** +++* Copyright 2001 - 2011 Broadcom Corporation. All rights reserved. +++* +++* This program is the proprietary software of Broadcom Corporation and/or +++* its licensors, and may only be used, duplicated, modified or distributed +++* pursuant to the terms and conditions of a separate, written license +++* agreement executed between you and Broadcom (an "Authorized License"). +++* Except as set forth in an Authorized License, Broadcom grants no license +++* (express or implied), right to use, or waiver of any kind with respect to +++* the Software, and Broadcom expressly reserves all rights in and to the +++* Software and all intellectual property rights therein. IF YOU HAVE NO +++* AUTHORIZED LICENSE, THEN YOU HAVE NO RIGHT TO USE THIS SOFTWARE IN ANY +++* WAY, AND SHOULD IMMEDIATELY NOTIFY BROADCOM AND DISCONTINUE ALL USE OF +++* THE SOFTWARE. +++* +++* Except as expressly set forth in the Authorized License, +++* 1. This program, including its structure, sequence and organization, +++* constitutes the valuable trade secrets of Broadcom, and you shall use +++* all reasonable efforts to protect the confidentiality thereof, and to +++* use this information only in connection with your use of Broadcom +++* integrated circuit products. +++* 2. TO THE MAXIMUM EXTENT PERMITTED BY LAW, THE SOFTWARE IS PROVIDED "AS IS" +++* AND WITH ALL FAULTS AND BROADCOM MAKES NO PROMISES, REPRESENTATIONS OR +++* WARRANTIES, EITHER EXPRESS, IMPLIED, STATUTORY, OR OTHERWISE, WITH +++* RESPECT TO THE SOFTWARE. BROADCOM SPECIFICALLY DISCLAIMS ANY AND ALL +++* IMPLIED WARRANTIES OF TITLE, MERCHANTABILITY, NONINFRINGEMENT, FITNESS +++* FOR A PARTICULAR PURPOSE, LACK OF VIRUSES, ACCURACY OR COMPLETENESS, +++* QUIET ENJOYMENT, QUIET POSSESSION OR CORRESPONDENCE TO DESCRIPTION. YOU +++* ASSUME THE ENTIRE RISK ARISING OUT OF USE OR PERFORMANCE OF THE SOFTWARE. +++* 3. TO THE MAXIMUM EXTENT PERMITTED BY LAW, IN NO EVENT SHALL BROADCOM OR ITS +++* LICENSORS BE LIABLE FOR (i) CONSEQUENTIAL, INCIDENTAL, SPECIAL, INDIRECT, +++* OR EXEMPLARY DAMAGES WHATSOEVER ARISING OUT OF OR IN ANY WAY RELATING TO +++* YOUR USE OF OR INABILITY TO USE THE SOFTWARE EVEN IF BROADCOM HAS BEEN +++* ADVISED OF THE POSSIBILITY OF SUCH DAMAGES; OR (ii) ANY AMOUNT IN EXCESS +++* OF THE AMOUNT ACTUALLY PAID FOR THE SOFTWARE ITSELF OR U.S. $1, WHICHEVER +++* IS GREATER. THESE LIMITATIONS SHALL APPLY NOTWITHSTANDING ANY FAILURE OF +++* ESSENTIAL PURPOSE OF ANY LIMITED REMEDY. +++*****************************************************************************/ ++ ++ #ifndef __USER_VCSM__H__INCLUDED__ ++ #define __USER_VCSM__H__INCLUDED__ ++@@ -424,21 +436,21 @@ int vcsm_unlock_hdl_sp( unsigned int handle, int cache_no_flush ); ++ ** ++ ** structure contains a list of flush/invalidate commands. Commands are: ++ ** 0: nop ++-** 1: invalidate given physical range in L2 ++-** 2: clean given physical range in L2 ++-** 3: clean+invalidate all of L1 ++-** 4: flush all of L2 and all of L1 +++** 1: invalidate given virtual range in L1/L2 +++** 2: clean given virtual range in L1/L2 +++** 3: clean+invalidate given virtual range in L1/L2 +++** 4: flush all L1/L2 ++ */ ++ struct vcsm_user_clean_invalid_s { ++- struct { ++- unsigned int cmd; ++- unsigned int addr; ++- unsigned int size; ++- } s[8]; +++ struct { +++ unsigned int cmd; +++ unsigned int handle; +++ unsigned int addr; +++ unsigned int size; +++ } s[8]; ++ }; ++ ++-int vcsm_clean_invalid( unsigned int handle, struct vcsm_user_clean_invalid_s *s ); ++- +++int vcsm_clean_invalid( struct vcsm_user_clean_invalid_s *s ); ++ ++ #ifdef __cplusplus ++ } ++-- ++2.7.4 ++ ++ ++From 87a6cb3a4f7189e711c85de6d20077b6453b2ebe Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Sat, 23 May 2015 21:10:10 +0100 ++Subject: [PATCH 46/68] Fix multi mailbox extra transform call ++ ++--- ++ libavcodec/hevc.c | 2 ++ ++ 1 file changed, 2 insertions(+) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 9d12583..30f5834 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -3024,7 +3024,9 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ #ifdef RPI_INTER_QPU ++ rpi_execute_inter_qpu(s); ++ #endif +++#ifndef RPI_MULTI_MAILBOX ++ rpi_execute_transform(s); +++#endif ++ rpi_execute_inter_cmds(s); ++ vpu_wait(s->vpu_id); ++ rpi_execute_pred_cmds(s); ++-- ++2.7.4 ++ ++ ++From 2a3672a1bda0296453953bebe8b17d69445260b4 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 27 May 2015 16:44:29 +0100 ++Subject: [PATCH 47/68] Added support for running luma prediction on QPUs ++ ++--- ++ libavcodec/hevc.c | 237 +++++++- ++ libavcodec/hevc.h | 26 +- ++ libavcodec/hevc_filter.c | 23 +- ++ libavcodec/rpi_qpu.c | 156 ++++-- ++ libavcodec/rpi_qpu.h | 8 +- ++ libavcodec/rpi_shader.c | 1313 ++++++++++++++++++++++---------------------- ++ libavcodec/rpi_shader.h | 21 +- ++ libavcodec/rpi_shader.qasm | 883 ++++++++++++++--------------- ++ 8 files changed, 1464 insertions(+), 1203 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 30f5834..2da88ec 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -52,6 +52,11 @@ ++ // Define RPI_MULTI_MAILBOX to use the updated mailbox that can launch both QPU and VPU ++ #define RPI_MULTI_MAILBOX ++ #endif +++ +++ // Define RPI_CACHE_UNIF_MVS to write motion vector uniform stream to cached memory +++ // RPI_CACHE_UNIF_MVS doesn't seem to make much difference, so left undefined. +++ +++ ++ #endif ++ ++ // #define DISABLE_MC ++@@ -74,6 +79,13 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ // The QPU code for UV blocks only works up to a block width of 8 ++ #define RPI_CHROMA_BLOCK_WIDTH 8 ++ +++// Split image of 2048 into parts 64 wide +++// So some QPUs will have 3 blocks of 64 to do, and others 2 blocks for an image 2048 wide with 32 blocks across +++// Each block of 64*64 +++// Smallest CTU size is 16x16, so smallest block is 8x8 +++// Corresponds to a total of 83kbytes over all 12 QPUs +++#define RPI_LUMA_COMMAND_WORDS 9 +++#define Y_COMMANDS_PER_QPU ((1+3*(64*64)/(8*8)) * RPI_LUMA_COMMAND_WORDS) ++ ++ #define ENCODE_COEFFS(c0, c1, c2, c3) (((c0) & 0xff) | ((c1) & 0xff) << 8 | ((c2) & 0xff) << 16 | ((c3) & 0xff) << 24) ++ ++@@ -2015,10 +2027,46 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int nPbW_c = nPbW >> s->ps.sps->hshift[1]; ++ int nPbH_c = nPbH >> s->ps.sps->vshift[1]; ++ ++- RPI_REDIRECT(luma_mc_uni)(s, dst0, s->frame->linesize[0], ref0->frame, +++#ifdef RPI_LUMA_QPU +++ if (s->enable_rpi) { +++ int reflist = 0; +++ const Mv *mv = ¤t_mv.mv[reflist]; +++ int mx = mv->x & 3; +++ int my = mv->y & 3; +++ int my_mx = (my<<8) + mx; +++ int my2_mx2_my_mx = (my_mx << 16) + my_mx; +++ int x1 = x0 + (mv->x >> 2); +++ int y1 = y0 + (mv->y >> 2); +++ int chan = x0>>6; // 64 wide blocks per QPU +++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || +++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); +++ uint32_t *y = s->y_mvs[chan % 12]; +++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go +++ for(int start_x=0;start_x < nPbW;start_x+=16) { +++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + 8 + start_x) & 0xffff); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); +++ *y++ = ( (nPbW<16 ? nPbW : 16) << 16 ) + (nPbH<16 ? nPbH : 16); +++ *y++ = my2_mx2_my_mx; +++ if (weight_flag) { +++ *y++ = (s->sh.luma_offset_l0[current_mv.ref_idx[reflist]] << 16) + (s->sh.luma_weight_l0[current_mv.ref_idx[reflist]] & 0xffff); +++ } else { +++ *y++ = 1; // Weight of 1 and offset of 0 +++ } +++ *y++ = (get_vc_address(s->frame->buf[0]) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter; +++ } +++ } +++ s->y_mvs[chan % 12] = y; +++ } else +++#endif +++ { +++ RPI_REDIRECT(luma_mc_uni)(s, dst0, s->frame->linesize[0], ref0->frame, ++ ¤t_mv.mv[0], x0, y0, nPbW, nPbH, ++ s->sh.luma_weight_l0[current_mv.ref_idx[0]], ++ s->sh.luma_offset_l0[current_mv.ref_idx[0]]); +++ } ++ ++ if (s->ps.sps->chroma_format_idc) { ++ #ifdef RPI_INTER_QPU ++@@ -2078,10 +2126,47 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int nPbW_c = nPbW >> s->ps.sps->hshift[1]; ++ int nPbH_c = nPbH >> s->ps.sps->vshift[1]; ++ ++- RPI_REDIRECT(luma_mc_uni)(s, dst0, s->frame->linesize[0], ref1->frame, +++#ifdef RPI_LUMA_QPU +++ if (s->enable_rpi) { +++ int reflist = 1; +++ const Mv *mv = ¤t_mv.mv[reflist]; +++ int mx = mv->x & 3; +++ int my = mv->y & 3; +++ int my_mx = (my<<8) + mx; +++ int my2_mx2_my_mx = (my_mx << 16) + my_mx; +++ int x1 = x0 + (mv->x >> 2); +++ int y1 = y0 + (mv->y >> 2); +++ int chan = x0>>6; // 64 wide blocks per QPU +++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || +++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); +++ uint32_t *y = s->y_mvs[chan % 12]; +++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go +++ for(int start_x=0;start_x < nPbW;start_x+=16) { +++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + 8 + start_x) & 0xffff); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); +++ *y++ = ( (nPbW<16 ? nPbW : 16) << 16 ) + (nPbH<16 ? nPbH : 16); +++ *y++ = my2_mx2_my_mx; +++ if (weight_flag) { +++ *y++ = (s->sh.luma_offset_l0[current_mv.ref_idx[reflist]] << 16) + (s->sh.luma_weight_l0[current_mv.ref_idx[reflist]] & 0xffff); +++ } else { +++ *y++ = 1; // Weight of 1 and offset of 0 +++ } +++ *y++ = (get_vc_address(s->frame->buf[0]) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter; +++ } +++ } +++ s->y_mvs[chan % 12] = y; +++ } else +++#endif +++ +++ { +++ RPI_REDIRECT(luma_mc_uni)(s, dst0, s->frame->linesize[0], ref1->frame, ++ ¤t_mv.mv[1], x0, y0, nPbW, nPbH, ++ s->sh.luma_weight_l1[current_mv.ref_idx[1]], ++ s->sh.luma_offset_l1[current_mv.ref_idx[1]]); +++ } ++ ++ if (s->ps.sps->chroma_format_idc) { ++ #ifdef RPI_INTER_QPU ++@@ -2115,8 +2200,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = rpi_filter_coefs[_mx][0]; ++ *u++ = rpi_filter_coefs[_my][0]; ++ if (weight_flag) { ++- *u++ = (s->sh.chroma_offset_l0[current_mv.ref_idx[1]][0] << 16) + (s->sh.chroma_weight_l0[current_mv.ref_idx[1]][0] & 0xffff); ++- *u++ = (s->sh.chroma_offset_l0[current_mv.ref_idx[1]][1] << 16) + (s->sh.chroma_weight_l0[current_mv.ref_idx[1]][1] & 0xffff); +++ *u++ = (s->sh.chroma_offset_l0[current_mv.ref_idx[reflist]][0] << 16) + (s->sh.chroma_weight_l0[current_mv.ref_idx[reflist]][0] & 0xffff); +++ *u++ = (s->sh.chroma_offset_l0[current_mv.ref_idx[reflist]][1] << 16) + (s->sh.chroma_weight_l0[current_mv.ref_idx[reflist]][1] & 0xffff); ++ } else { ++ *u++ = 1; // Weight of 1 and offset of 0 ++ *u++ = 1; ++@@ -2143,9 +2228,44 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int nPbW_c = nPbW >> s->ps.sps->hshift[1]; ++ int nPbH_c = nPbH >> s->ps.sps->vshift[1]; ++ ++- RPI_REDIRECT(luma_mc_bi)(s, dst0, s->frame->linesize[0], ref0->frame, +++#ifdef RPI_LUMA_QPU +++ if (s->enable_rpi) { +++ const Mv *mv = ¤t_mv.mv[0]; +++ int mx = mv->x & 3; +++ int my = mv->y & 3; +++ int my_mx = (my<<8) + mx; +++ const Mv *mv2 = ¤t_mv.mv[1]; +++ int mx2 = mv2->x & 3; +++ int my2 = mv2->y & 3; +++ int my2_mx2 = (my2<<8) + mx2; +++ int my2_mx2_my_mx = (my2_mx2 << 16) + my_mx; +++ int x1 = x0 + (mv->x >> 2); +++ int y1 = y0 + (mv->y >> 2); +++ int x2 = x0 + (mv2->x >> 2); +++ int y2 = y0 + (mv2->y >> 2); +++ int chan = x0>>6; // 64 wide blocks per QPU +++ uint32_t *y = s->y_mvs[chan % 12]; +++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go +++ for(int start_x=0;start_x < nPbW;start_x+=8) { // B blocks work 8 at a time +++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y2 - 3 + start_y) << 16) + ( (x2 - 3 + start_x) & 0xffff); // Second fetch is for ref1 +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); +++ *y++ = ( (nPbW<8 ? nPbW : 8) << 16 ) + (nPbH<16 ? nPbH : 16); +++ *y++ = my2_mx2_my_mx; +++ *y++ = 1; // B frame weighted prediction not supported +++ *y++ = (get_vc_address(s->frame->buf[0]) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter_b; +++ } +++ } +++ s->y_mvs[chan % 12] = y; +++ } else +++#endif +++ { +++ RPI_REDIRECT(luma_mc_bi)(s, dst0, s->frame->linesize[0], ref0->frame, ++ ¤t_mv.mv[0], x0, y0, nPbW, nPbH, ++ ref1->frame, ¤t_mv.mv[1], ¤t_mv); +++ } ++ ++ if (s->ps.sps->chroma_format_idc) { ++ #ifdef RPI_INTER_QPU ++@@ -2834,7 +2954,6 @@ static void rpi_inter_clear(HEVCContext *s) ++ *s->u_mvs[i]++ = pic_height; ++ *s->u_mvs[i]++ = s->frame->linesize[1]; ++ *s->u_mvs[i]++ = s->frame->linesize[2]; ++- *s->u_mvs[i]++ = i; ++ if (weight_flag) { ++ *s->u_mvs[i]++ = 1 << (s->sh.chroma_log2_weight_denom + 6 - 1); ++ *s->u_mvs[i]++ = s->sh.chroma_log2_weight_denom + 6; ++@@ -2842,7 +2961,31 @@ static void rpi_inter_clear(HEVCContext *s) ++ *s->u_mvs[i]++ = 1 << 5; ++ *s->u_mvs[i]++ = 6; ++ } +++ *s->u_mvs[i]++ = i; // Select section of VPM (avoid collisions with 3d unit) +++ } +++ +++#ifdef RPI_LUMA_QPU +++ for(i=0;i<12;i++) { +++ s->y_mvs[i] = s->y_mvs_base[i]; +++ *s->y_mvs[i]++ = 0; // y_x +++ *s->y_mvs[i]++ = 0; // ref_y_base +++ *s->y_mvs[i]++ = 0; // y2_x2 +++ *s->y_mvs[i]++ = 0; // ref_y2_base +++ *s->y_mvs[i]++ = (s->ps.sps->width << 16) + s->ps.sps->height; +++ *s->y_mvs[i]++ = s->frame->linesize[0]; // pitch +++ *s->y_mvs[i]++ = s->frame->linesize[0]; // dst_pitch +++ if (weight_flag) { +++ int offset = 1 << (s->sh.luma_log2_weight_denom + 6 - 1); +++ int shift = s->sh.luma_log2_weight_denom + 6; +++ *s->y_mvs[i]++ = (offset << 16) + shift; +++ } else { +++ int offset = 1 << 5; +++ int shift = 6; +++ *s->y_mvs[i]++ = (offset << 16) + shift; +++ } +++ *s->y_mvs[i]++ = 0; // Next kernel ++ } +++#endif ++ } ++ ++ static void rpi_execute_inter_qpu(HEVCContext *s) ++@@ -2850,6 +2993,9 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ int k; ++ int i; ++ uint32_t *unif_vc = (uint32_t *)s->unif_mvs_ptr.vc; +++#ifdef RPI_LUMA_QPU +++ uint32_t *y_unif_vc = (uint32_t *)s->y_unif_mvs_ptr.vc; +++#endif ++ if (s->sh.slice_type == I_SLICE) { ++ #ifdef RPI_MULTI_MAILBOX ++ rpi_execute_transform(s); ++@@ -2865,8 +3011,23 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ ++ s->u_mvs[8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore ++ +++#ifdef RPI_LUMA_QPU +++ for(k=0;k<12;k++) { +++ s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+1] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined +++ s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // Also need a dummy for second request +++ s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+8] = qpu_get_fn(QPU_MC_EXIT); // Add exit command +++ assert(s->y_mvs[k] - s->y_mvs_base[k] < Y_COMMANDS_PER_QPU); +++ } +++ s->y_mvs[12-1][-RPI_LUMA_COMMAND_WORDS+8] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT12); // This QPU will signal interrupt when all others are done and have acquired a semaphore +++#endif +++ +++ ++ #ifdef RPI_MULTI_MAILBOX +++#ifdef RPI_CACHE_UNIF_MVS +++ gpu_cache_flush3(&s->coeffs_buf_accelerated,&s->y_unif_mvs_ptr, &s->unif_mvs_ptr); +++#else ++ gpu_cache_flush(&s->coeffs_buf_accelerated); +++#endif ++ s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0, ++ qpu_get_fn(QPU_MC_SETUP_UV), ++ (uint32_t)(unif_vc+(s->mvs_base[0 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++@@ -2876,7 +3037,27 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ (uint32_t)(unif_vc+(s->mvs_base[4 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++ (uint32_t)(unif_vc+(s->mvs_base[5 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++ (uint32_t)(unif_vc+(s->mvs_base[6 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)) +++ (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++#ifdef RPI_LUMA_QPU +++ qpu_get_fn(QPU_MC_SETUP), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[0 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[1 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[2 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[3 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[4 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[5 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[6 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[7 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[8 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[9 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[10 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[11 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)) +++#else +++ 0, +++ 0,0,0,0, +++ 0,0,0,0, +++ 0,0,0,0 +++#endif ++ ); ++ for(i=0;i<4;i++) ++ s->num_coeffs[i] = 0; ++@@ -2892,6 +3073,8 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)) ++ ); ++ #endif +++ +++ ++ } ++ #endif ++ ++@@ -3579,8 +3762,7 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length) ++ fail: ++ if (s->ref && s->threads_type == FF_THREAD_FRAME) { ++ #ifdef RPI_INTER_QPU ++- void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n); ++- ff_hevc_flush_chroma(s, &s->ref->tf, s->ps.sps->height); +++ ff_hevc_flush_buffer(s, &s->ref->tf, s->ps.sps->height); ++ #endif ++ ff_thread_report_progress(&s->ref->tf, INT_MAX, 0); ++ } ++@@ -3767,7 +3949,6 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ ++ #ifdef RPI ++ av_freep(&s->unif_mv_cmds); ++- av_freep(&s->unif_xfm_cmds); ++ av_freep(&s->univ_pred_cmds); ++ ++ #ifdef RPI_INTER_QPU ++@@ -3776,7 +3957,12 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ s->unif_mvs = 0; ++ } ++ #endif ++- //gpu_free(&s->dummy); +++#ifdef RPI_LUMA_QPU +++ if (s->y_unif_mvs) { +++ gpu_free( &s->y_unif_mvs_ptr ); +++ s->y_unif_mvs = 0; +++ } +++#endif ++ ++ #ifdef EARLY_MALLOC ++ printf("hevc_decode_free\n"); ++@@ -3861,9 +4047,6 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ s->unif_mv_cmds = av_mallocz(sizeof(HEVCMvCmd)*RPI_MAX_MV_CMDS); ++ if (!s->unif_mv_cmds) ++ goto fail; ++- s->unif_xfm_cmds = av_mallocz(sizeof(HEVCXfmCmd)*RPI_MAX_XFM_CMDS); ++- if (!s->unif_xfm_cmds) ++- goto fail; ++ s->univ_pred_cmds = av_mallocz(sizeof(HEVCPredCmd)*RPI_MAX_PRED_CMDS); ++ if (!s->univ_pred_cmds) ++ goto fail; ++@@ -3877,7 +4060,11 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ { ++ int uv_commands_per_qpu = UV_COMMANDS_PER_QPU; ++ uint32_t *p; +++#ifdef RPI_CACHE_UNIF_MVS +++ gpu_malloc_cached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr ); +++#else ++ gpu_malloc_uncached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr ); +++#endif ++ s->unif_mvs = (uint32_t *) s->unif_mvs_ptr.arm; // TODO support this allocation in non EARLY_MALLOC ++ ++ // Set up initial locations for uniform streams ++@@ -3892,6 +4079,28 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ ++ } ++ #endif +++#ifdef RPI_LUMA_QPU +++ { +++ int y_commands_per_qpu = Y_COMMANDS_PER_QPU; +++ uint32_t *p; +++#ifdef RPI_CACHE_UNIF_MVS +++ gpu_malloc_cached( 12 * y_commands_per_qpu * sizeof(uint32_t), &s->y_unif_mvs_ptr ); +++#else +++ gpu_malloc_uncached( 12 * y_commands_per_qpu * sizeof(uint32_t), &s->y_unif_mvs_ptr ); +++#endif +++ s->y_unif_mvs = (uint32_t *) s->y_unif_mvs_ptr.arm; // TODO support this allocation in non EARLY_MALLOC +++ +++ // Set up initial locations for uniform streams +++ p = s->y_unif_mvs; +++ for(i = 0; i < 12; i++) { +++ s->y_mvs_base[i] = p; +++ p += y_commands_per_qpu; +++ } +++ s->mc_filter = qpu_get_fn(QPU_MC_FILTER); +++ s->mc_filter_b = qpu_get_fn(QPU_MC_FILTER_B); +++ +++ } +++#endif ++ //gpu_malloc_uncached(2048*64,&s->dummy); ++ ++ #ifdef EARLY_MALLOC ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 4a39e39..5df9dcd 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -44,9 +44,13 @@ ++ #ifdef RPI ++ ++ #include "rpi_qpu.h" ++- // Use QPU for inter prediction +++ // Define RPI_INTER_QPU to use QPU for chroma inter prediction ++ #define RPI_INTER_QPU ++ +++ #ifdef RPI_INTER_QPU +++ // Define RPI_LUMA_QPU to also use QPU for luma inter prediction +++ #define RPI_LUMA_QPU +++ #endif ++ #endif ++ ++ #define MAX_DPB_SIZE 16 // A.4.1 ++@@ -809,7 +813,6 @@ typedef struct HEVCLocalContext { ++ ++ // Worst case is for 4:4:4 4x4 blocks with 64 high coding tree blocks, so 16 MV cmds per 4 pixels across for each colour plane, * 2 for bi ++ #define RPI_MAX_MV_CMDS (2*16*3*(RPI_MAX_WIDTH/4)) ++-#define RPI_MAX_XFM_CMDS (16*3*(RPI_MAX_WIDTH/4)) ++ // Each block can have an intra prediction and a transform_add command ++ #define RPI_MAX_PRED_CMDS (2*16*3*(RPI_MAX_WIDTH/4)) ++ // Worst case is 16x16 CTUs ++@@ -844,9 +847,6 @@ typedef struct HEVCMvCmd { ++ int8_t ref_idx[2]; ++ } HEVCMvCmd; ++ ++-// Command for transform to process a block of coefficients ++-typedef struct HEVCXfmCmd { ++-} HEVCXfmCmd; ++ ++ // Command for intra prediction and transform_add of predictions to coefficients ++ #define RPI_PRED_TRANSFORM_ADD 0 ++@@ -892,8 +892,7 @@ typedef struct HEVCContext { ++ ++ #ifdef RPI ++ int enable_rpi; ++- HEVCMvCmd *unif_mv_cmds; // TODO rename ++- HEVCXfmCmd *unif_xfm_cmds; +++ HEVCMvCmd *unif_mv_cmds; ++ HEVCPredCmd *univ_pred_cmds; ++ int buf_width; ++ GPU_MEM_PTR_T coeffs_buf_default; ++@@ -920,6 +919,15 @@ typedef struct HEVCContext { ++ uint32_t mc_filter_uv_b0; ++ uint32_t mc_filter_uv_b; ++ #endif +++#ifdef RPI_LUMA_QPU +++ GPU_MEM_PTR_T y_unif_mvs_ptr; +++ uint32_t *y_unif_mvs; // Base of memory for motion vector commands +++ uint32_t *y_mvs_base[12]; +++ uint32_t *y_mvs[12]; +++ // Function pointers +++ uint32_t mc_filter; +++ uint32_t mc_filter_b; +++#endif ++ ++ #endif ++ ++@@ -1166,6 +1174,10 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ int log2_trafo_size, enum ScanType scan_idx, ++ int c_idx); ++ +++#ifdef RPI_INTER_QPU +++extern void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n); +++#endif +++ ++ void ff_hevc_hls_mvd_coding(HEVCContext *s, int x0, int y0, int log2_cb_size); ++ ++ ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index ec84e8a..11629e4 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -883,8 +883,7 @@ static int ff_hevc_buf_base(AVBufferRef *bref) { ++ return p->vc & 0x3fffffff; ++ } ++ ++-void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n); ++-void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n) +++void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ { ++ if (s->enable_rpi && !( s->nal_unit_type == NAL_TRAIL_N || ++ s->nal_unit_type == NAL_TSA_N || ++@@ -911,10 +910,24 @@ void ff_hevc_flush_chroma(HEVCContext *s, ThreadFrame *f, int n) ++ iocache.s[1].cmd = 3; // clean+invalidate ++ iocache.s[1].addr = p->arm + base; ++ iocache.s[1].size = sz; +++ +++#ifdef RPI_LUMA_QPU +++ p = av_buffer_pool_opaque(s->frame->buf[0]); +++ sz = s->frame->linesize[0] * (n-curr_y); +++ base = s->frame->linesize[0] * curr_y; +++ iocache.s[2].handle = p->vcsm_handle; +++ iocache.s[2].cmd = 3; // clean+invalidate +++ iocache.s[2].addr = p->arm + base; +++ iocache.s[2].size = sz; +++#endif ++ vcsm_clean_invalid( &iocache ); ++ #else ++ flush_buffer(s->frame->buf[1]); ++ flush_buffer(s->frame->buf[2]); +++#ifdef RPI_LUMA_QPU +++ flush_buffer(s->frame->buf[1]); +++#endif +++ ++ #endif ++ //memcpy(s->dummy.arm,s->frame->data[0],2048*64); ++ //memcpy(s->dummy.arm,s->frame->data[1],1024*32); ++@@ -938,7 +951,7 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ sao_filter_CTB(s, x, y - ctb_size); ++ if (s->threads_type & FF_THREAD_FRAME ) { ++ #ifdef RPI_INTER_QPU ++- ff_hevc_flush_chroma(s,&s->ref->tf, y); +++ ff_hevc_flush_buffer(s,&s->ref->tf, y); ++ #endif ++ ff_thread_report_progress(&s->ref->tf, y, 0); ++ } ++@@ -947,7 +960,7 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ sao_filter_CTB(s, x , y); ++ if (s->threads_type & FF_THREAD_FRAME ) { ++ #ifdef RPI_INTER_QPU ++- ff_hevc_flush_chroma(s, &s->ref->tf, y + ctb_size); +++ ff_hevc_flush_buffer(s, &s->ref->tf, y + ctb_size); ++ #endif ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size, 0); ++ } ++@@ -957,7 +970,7 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ //int currh = s->ref->tf.progress->data[0]; ++ //if (((y + ctb_size)&63)==0) ++ #ifdef RPI_INTER_QPU ++- ff_hevc_flush_chroma(s, &s->ref->tf, y + ctb_size - 4); +++ ff_hevc_flush_buffer(s, &s->ref->tf, y + ctb_size - 4); ++ #endif ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); ++ } ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index aa65a77..e12304b 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -1,9 +1,11 @@ ++ #ifdef RPI ++ // This works better than the mmap in that the memory can be cached, but requires a kernel modification to enable the device. ++ // define RPI_TIME_TOTAL_QPU to print out how much time is spent in the QPU code ++-#define RPI_TIME_TOTAL_QPU +++//#define RPI_TIME_TOTAL_QPU ++ // define RPI_TIME_TOTAL_VPU to print out how much time is spent in the VPI code ++ //#define RPI_TIME_TOTAL_VPU +++// define RPI_TIME_TOTAL_POSTED to print out how much time is spent in the multi execute QPU/VPU combined +++//#define RPI_TIME_TOTAL_POSTED ++ // define RPI_ASYNC to run the VPU in a separate thread, need to make a separate call to check for completion ++ #define RPI_ASYNC ++ ++@@ -94,7 +96,8 @@ struct GPU ++ int open_count; // Number of allocated video buffers ++ int mb; // Mailbox handle ++ int vc; // Address in GPU memory ++- int mail[12]; // These are used to pass pairs of code/unifs to the QPUs +++ int mail[12*2]; // These are used to pass pairs of code/unifs to the QPUs for the first QPU task +++ int mail2[12*2]; // These are used to pass pairs of code/unifs to the QPUs for the second QPU task ++ }; ++ ++ // Stop more than one thread trying to allocate memory or use the processing resources at once ++@@ -102,7 +105,7 @@ static pthread_mutex_t gpu_mutex = PTHREAD_MUTEX_INITIALIZER; ++ static volatile struct GPU* gpu = NULL; ++ static GPU_MEM_PTR_T gpu_mem_ptr; ++ ++-#if defined(RPI_TIME_TOTAL_QPU) || defined(RPI_TIME_TOTAL_VPU) +++#if defined(RPI_TIME_TOTAL_QPU) || defined(RPI_TIME_TOTAL_VPU) || defined(RPI_TIME_TOTAL_POSTED) ++ static unsigned int Microseconds(void) { ++ struct timespec ts; ++ unsigned int x; ++@@ -123,7 +126,7 @@ static pthread_cond_t post_cond_head = PTHREAD_COND_INITIALIZER; ++ static pthread_cond_t post_cond_tail = PTHREAD_COND_INITIALIZER; ++ static pthread_mutex_t post_mutex = PTHREAD_MUTEX_INITIALIZER; ++ ++-static int vpu_cmds[MAXCMDS][16]; +++static int vpu_cmds[MAXCMDS][32]; ++ static volatile int vpu_async_tail=0; // Contains the number of posted jobs ++ static volatile int vpu_async_head=0; ++ #endif ++@@ -247,7 +250,6 @@ int gpu_get_mailbox(void) ++ // Call this to clean and invalidate a region of memory ++ void gpu_cache_flush(GPU_MEM_PTR_T *p) ++ { ++-#define RPI_FAST_CACHEFLUSH ++ #ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; ++ iocache.s[0].handle = p->vcsm_handle; ++@@ -261,6 +263,34 @@ void gpu_cache_flush(GPU_MEM_PTR_T *p) ++ #endif ++ } ++ +++void gpu_cache_flush3(GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2) +++{ +++#ifdef RPI_FAST_CACHEFLUSH +++ struct vcsm_user_clean_invalid_s iocache = {}; +++ iocache.s[0].handle = p0->vcsm_handle; +++ iocache.s[0].cmd = 3; // clean+invalidate +++ iocache.s[0].addr = (int) p0->arm; +++ iocache.s[0].size = p0->numbytes; +++ iocache.s[1].handle = p1->vcsm_handle; +++ iocache.s[1].cmd = 3; // clean+invalidate +++ iocache.s[1].addr = (int) p1->arm; +++ iocache.s[1].size = p1->numbytes; +++ iocache.s[2].handle = p2->vcsm_handle; +++ iocache.s[2].cmd = 3; // clean+invalidate +++ iocache.s[2].addr = (int) p2->arm; +++ iocache.s[2].size = p2->numbytes; +++ vcsm_clean_invalid( &iocache ); +++#else +++ void *tmp; +++ tmp = vcsm_lock(p0->vcsm_handle); +++ vcsm_unlock_ptr(tmp); +++ tmp = vcsm_lock(p1->vcsm_handle); +++ vcsm_unlock_ptr(tmp); +++ tmp = vcsm_lock(p2->vcsm_handle); +++ vcsm_unlock_ptr(tmp); +++#endif +++} +++ ++ static int gpu_malloc_cached_internal(int numbytes, GPU_MEM_PTR_T *p) { ++ p->numbytes = numbytes; ++ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST, (char *)"Video Frame" ); ++@@ -357,9 +387,19 @@ unsigned int vpu_get_constants(void) { ++ #ifdef RPI_ASYNC ++ ++ static void *vpu_start(void *arg) { +++#ifdef RPI_TIME_TOTAL_POSTED +++ int last_time=0; +++ long long on_time=0; +++ long long off_time=0; +++ int start_time; +++ int end_time; +++ int count=0; +++#endif ++ while(1) { +++ int i; ++ int *p; ++ int qpu_code; +++ int qpu_codeb; ++ pthread_mutex_lock(&post_mutex); ++ while( vpu_async_tail - vpu_async_head <= 0) ++ { ++@@ -373,24 +413,49 @@ static void *vpu_start(void *arg) { ++ break; // Last job ++ } ++ qpu_code = p[7]; +++ qpu_codeb = p[16]; ++ //if (p[7]) { ++ //GPU_MEM_PTR_T *buf = (GPU_MEM_PTR_T *)p[7]; ++ //gpu_cache_flush(buf); ++ //} +++ +++#ifdef RPI_TIME_TOTAL_POSTED +++ start_time = Microseconds(); +++ if (last_time==0) +++ last_time = start_time; +++ off_time += start_time-last_time; +++#endif +++ ++ if (!qpu_code) { ++ vpu_execute_code(p[0], p[1], p[2], p[3], p[4], p[5], p[6]); ++ } else { ++- int i; ++ for(i=0;i<8;i++) { ++ gpu->mail[i*2] = p[8+i]; ++ gpu->mail[i*2 + 1] = qpu_code; ++ } ++- ++- execute_multi(gpu->mb,8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */, ++- 0, 0, 0, 0, +++ for(i=0;i<12;i++) { +++ gpu->mail2[i*2] = p[17+i]; +++ gpu->mail2[i*2 + 1] = qpu_codeb; +++ } +++#if (0) +++ vpu_execute_code(p[0], p[1], p[2], p[3], p[4], p[5], p[6]); +++ execute_qpu(gpu->mb,8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */); +++#else +++ execute_multi(gpu->mb, +++ 12,gpu->vc + offsetof(struct GPU, mail2), 1, 5000, +++ 8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */, ++ p[0], p[1], p[2], p[3], p[4], p[5], p[6], // VPU0 ++ 0, 0 , 0 , 0 , 0 , 0 , 0); // VPU1 +++#endif ++ } +++#ifdef RPI_TIME_TOTAL_POSTED +++ end_time = Microseconds(); +++ last_time = end_time; +++ on_time += end_time - start_time; +++ count++; +++ if ((count&0x7f)==0) +++ printf("Posted %d On=%dms, Off=%dms\n",count,(int)(on_time/1000),(int)(off_time/1000)); +++#endif ++ pthread_mutex_lock(&post_mutex); ++ vpu_async_head++; ++ pthread_cond_broadcast(&post_cond_head); ++@@ -436,7 +501,9 @@ int vpu_post_code(unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned ++ } ++ ++ int vpu_qpu_post_code(unsigned vpu_code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, ++- int qpu_code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8) +++ int qpu_code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8, +++ int qpu_codeb, int unifs1b, int unifs2b, int unifs3b, int unifs4b, int unifs5b, int unifs6b, int unifs7b, int unifs8b, int unifs9b, int unifs10b, int unifs11b, int unifs12b +++ ) ++ { ++ ++ pthread_mutex_lock(&post_mutex); ++@@ -464,6 +531,21 @@ int vpu_qpu_post_code(unsigned vpu_code, unsigned r0, unsigned r1, unsigned r2, ++ p[13] = unifs6; ++ p[14] = unifs7; ++ p[15] = unifs8; +++ +++ p[16] = qpu_codeb; +++ p[17] = unifs1b; +++ p[18] = unifs2b; +++ p[19] = unifs3b; +++ p[20] = unifs4b; +++ p[21] = unifs5b; +++ p[22] = unifs6b; +++ p[23] = unifs7b; +++ p[24] = unifs8b; +++ p[25] = unifs9b; +++ p[26] = unifs10b; +++ p[27] = unifs11b; +++ p[28] = unifs12b; +++ ++ if (num<=1) ++ pthread_cond_broadcast(&post_cond_tail); // Otherwise the vpu thread must already be awake ++ pthread_mutex_unlock(&post_mutex); ++@@ -544,27 +626,27 @@ void qpu_run_shader12(int code, int num, int code2, int num2, int unifs1, int un ++ off_time += start_time-last_time; ++ #endif ++ for(i=0;i<num;i++) { ++- gpu->mail[i*2 + 1] = code; +++ gpu->mail2[i*2 + 1] = code; ++ } ++ for(;i<num+num2;i++) { ++- gpu->mail[i*2 + 1] = code2; +++ gpu->mail2[i*2 + 1] = code2; ++ } ++- gpu->mail[0 ] = unifs1; ++- gpu->mail[2 ] = unifs2; ++- gpu->mail[4 ] = unifs3; ++- gpu->mail[6 ] = unifs4; ++- gpu->mail[8 ] = unifs5; ++- gpu->mail[10] = unifs6; ++- gpu->mail[12] = unifs7; ++- gpu->mail[14] = unifs8; ++- gpu->mail[16] = unifs9; ++- gpu->mail[18] = unifs10; ++- gpu->mail[20] = unifs11; ++- gpu->mail[22] = unifs12; +++ gpu->mail2[0 ] = unifs1; +++ gpu->mail2[2 ] = unifs2; +++ gpu->mail2[4 ] = unifs3; +++ gpu->mail2[6 ] = unifs4; +++ gpu->mail2[8 ] = unifs5; +++ gpu->mail2[10] = unifs6; +++ gpu->mail2[12] = unifs7; +++ gpu->mail2[14] = unifs8; +++ gpu->mail2[16] = unifs9; +++ gpu->mail2[18] = unifs10; +++ gpu->mail2[20] = unifs11; +++ gpu->mail2[22] = unifs12; ++ execute_qpu( ++ gpu->mb, ++ 12 /* Number of QPUs */, ++- gpu->vc + offsetof(struct GPU, mail), +++ gpu->vc + offsetof(struct GPU, mail2), ++ 1 /* no flush */, // Don't flush VPU L1 cache ++ 5000 /* timeout ms */); ++ #ifdef RPI_TIME_TOTAL_QPU ++@@ -635,21 +717,21 @@ unsigned int qpu_get_fn(int num) { ++ gpu_unlock(); ++ } ++ switch(num) { ++- //case QPU_MC_SETUP: ++- // fn = mc_setup; ++- // break; ++- //case QPU_MC_FILTER: ++- // fn = mc_filter; ++- // break; +++ case QPU_MC_SETUP: +++ fn = mc_setup; +++ break; +++ case QPU_MC_FILTER: +++ fn = mc_filter; +++ break; ++ case QPU_MC_EXIT: ++ fn = mc_exit; ++ break; ++- //case QPU_MC_INTERRUPT_EXIT: ++- // fn = mc_interrupt_exit; ++- // break; ++- //case QPU_MC_FILTER_B: ++- // fn = mc_filter_b; ++- // break; +++ case QPU_MC_INTERRUPT_EXIT12: +++ fn = mc_interrupt_exit12; +++ break; +++ case QPU_MC_FILTER_B: +++ fn = mc_filter_b; +++ break; ++ //case QPU_MC_FILTER_HONLY: ++ // fn = mc_filter_honly; ++ // break; ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 0565a60..81c2bb1 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -1,6 +1,7 @@ ++ #ifndef RPI_QPU_H ++ #define RPI_QPU_H ++ +++// Define RPI_FAST_CACHEFLUSH to use the VCSM cache flush code ++ #define RPI_FAST_CACHEFLUSH ++ ++ typedef struct gpu_mem_ptr_s { ++@@ -16,6 +17,7 @@ extern int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p); ++ extern int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p); ++ extern void gpu_free(GPU_MEM_PTR_T *p); ++ extern void gpu_cache_flush(GPU_MEM_PTR_T *p); +++extern void gpu_cache_flush3(GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2); ++ ++ // QPU specific functions ++ extern void qpu_run_shader8(int code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8); ++@@ -26,7 +28,7 @@ enum { ++ QPU_MC_SETUP, ++ QPU_MC_FILTER, ++ QPU_MC_EXIT, ++- QPU_MC_INTERRUPT_EXIT, +++ QPU_MC_INTERRUPT_EXIT12, ++ QPU_MC_FILTER_B, ++ QPU_MC_FILTER_HONLY, ++ QPU_MC_SETUP_UV, ++@@ -44,7 +46,9 @@ extern unsigned int vpu_get_constants(void); ++ extern unsigned vpu_execute_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5); ++ extern int vpu_post_code( unsigned code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, GPU_MEM_PTR_T *buf); ++ int vpu_qpu_post_code(unsigned vpu_code, unsigned r0, unsigned r1, unsigned r2, unsigned r3, unsigned r4, unsigned r5, ++- int qpu_code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8); +++ int qpu_code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8, +++ int qpu_codeb, int unifs1b, int unifs2b, int unifs3b, int unifs4b, int unifs5b, int unifs6b, int unifs7b, int unifs8b, int unifs9b, int unifs10b, int unifs11b, int unifs12b +++ ); ++ extern void vpu_wait( int id); ++ ++ // Simple test of shader code ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index a0f0282..e86eb30 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -48,693 +48,674 @@ unsigned int rpi_shader[] = { ++ /* [0x000000b8] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++ /* [0x000000c0] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++ /* [0x000000c8] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x000000d0] */ 0x15827d80, 0x100208e7, // mov r3, unif ++-/* [0x000000d8] */ 0x119c17c0, 0xd00208a7, // shl r2, r3, 1 ++-/* [0x000000e0] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x000000e8] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x000000f0] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x000000f8] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000100] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000108] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000110] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000118] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++-/* [0x00000120] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++-/* [0x00000128] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00000130] */ 0x119c17c0, 0xd00208a7, // shl r2, r3, 1 ++-/* [0x00000138] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 ++-/* [0x00000140] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000148] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000150] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000158] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000160] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000168] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000170] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x00000178] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x00000180] */ 0x0f9c11c0, 0xd0020827, // asr r0, r0, 1 ++-/* [0x00000188] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) ++-/* [0x00000190] */ 0x0c9e7040, 0x10021567, // add rb21, r0, r1 ++-/* [0x00000198] */ 0x15427d80, 0x10020827, // mov r0, ra_x ++-/* [0x000001a0] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y ++-/* [0x000001a8] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_frame_base ++-/* [0x000001b0] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset ++-/* [0x000001b8] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x000001c0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000001c8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000001d0] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x, r0 +++/* [0x000000d0] */ 0x15427d80, 0x10020827, // mov r0, ra_x +++/* [0x000000d8] */ 0x937401f6, 0xd0024821, // max r0, r0, 0; mov r1, ra_y +++/* [0x000000e0] */ 0x926191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, ra_frame_base +++/* [0x000000e8] */ 0x916431f6, 0xd00244e2, // shl ra_xshift_next, r0, 3 ; mov r2, ra_u2v_ref_offset +++/* [0x000000f0] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x000000f8] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000100] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000108] */ 0x939c03c0, 0xd0025850, // max r1, r1, 0 ; mov ra_x, r0 +++/* [0x00000110] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000118] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch +++/* [0x00000120] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_frame_base, r2 +++/* [0x00000128] */ 0x0c9e7440, 0x10020f27, // add t1s, r2, r1 +++/* [0x00000130] */ 0x00000008, 0xe00208a7, // mov r2,8 +++/* [0x00000138] */ 0x11827c80, 0x10021327, // shl rb12,unif, r2 +++/* [0x00000140] */ 0x0c827c80, 0x10021367, // add rb13,unif,r2 +++/* [0x00000148] */ 0x15827d80, 0x100208a7, // mov r2, unif +++/* [0x00000150] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 +++/* [0x00000158] */ 0x149cf5c0, 0xd00208a7, // and r2, r2, 15 +++/* [0x00000160] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000168] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000170] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000178] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000180] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000188] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000190] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000198] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x000001a0] */ 0x0f9c11c0, 0xd00208a7, // asr r2, r0, 1 +++/* [0x000001a8] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0)) +++/* [0x000001b0] */ 0x0c9e7440, 0x10021567, // add rb21, r2, r1 +++/* [0x000001b8] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x000001c0] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x000001c8] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x000001d0] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++ /* [0x000001d8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x000001e0] */ 0x4c9d040f, 0x100248a1, // add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++-/* [0x000001e8] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_frame_base, r2 ++-/* [0x000001f0] */ 0x0c9e7440, 0x10020e27, // add t0s, r2, r1 ++-/* [0x000001f8] */ 0x15827d80, 0x10021327, // mov rb12,unif ++-/* [0x00000200] */ 0x15827d80, 0x10021367, // mov rb13,unif ++-/* [0x00000208] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000210] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000218] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000220] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000228] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000230] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x ++-/* [0x00000238] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base +++/* [0x000001e0] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x000001e8] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000001f0] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x000001f8] */ 0x0c427380, 0x10020e27, // add t0s, r1, ra_x +++/* [0x00000200] */ 0x0c627380, 0x10020f27, // add t1s, r1, ra_frame_base ++ // ::mc_filter_uv ++-/* [0x00000240] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000248] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000250] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000258] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000260] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000268] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000270] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000278] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000280] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 ++-/* [0x00000288] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000290] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 ++-/* [0x00000298] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000002a0] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000002a8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000002b0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000002b8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000002c0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000002c8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000002d0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000002d8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000002e0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000002e8] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000002f0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000002f8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000300] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000308] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000310] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000318] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000320] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000328] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000330] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000338] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x00000340] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000348] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000350] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 ++-/* [0x00000358] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000360] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 ++-/* [0x00000368] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000370] */ 0x0f9e7080, 0x100613e7, // asr.ifnz rb15, r0, r2 ++-/* [0x00000378] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000380] */ 0x0f9e7080, 0x100613a7, // asr.ifnz rb14, r0, r2 ++-/* [0x00000388] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000390] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000208] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000210] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000218] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000220] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000228] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000230] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000238] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000240] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000248] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x00000250] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000258] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x00000260] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000268] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000270] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000278] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000280] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000288] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000290] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000298] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000002a0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000002a8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x000002b0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x000002b8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x000002c0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000002c8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000002d0] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000002d8] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000002e0] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000002e8] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000002f0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000002f8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000300] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000308] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000310] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000318] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 +++/* [0x00000320] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000328] */ 0x0f9e7080, 0x100208e7, // asr r3, r0, r2 +++/* [0x00000330] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000338] */ 0x0f9e7080, 0x100613e7, // asr.ifnz rb15, r0, r2 +++/* [0x00000340] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000348] */ 0x0f9e7080, 0x100608e7, // asr.ifnz r3, r0, r2 +++/* [0x00000350] */ 0x119c87c0, 0xd00213a7, // shl rb14,r3,8 +++/* [0x00000358] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop ++-/* [0x00000398] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000003a0] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 ++-/* [0x000003a8] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x000003b0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x000003b8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x000003c0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x000003c8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000003d0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000003d8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-/* [0x000003e0] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 ++-/* [0x000003e8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000003f0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000003f8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000400] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000408] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000410] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000418] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000420] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000428] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000430] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000438] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000440] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x00000448] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000450] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000458] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000460] */ 0x00000020, 0xe0021327, // mov rb12,32 ++-/* [0x00000468] */ 0x00000006, 0xe0021367, // mov rb13,6 ++-/* [0x00000470] */ 0x00000001, 0xe00213a7, // mov rb14,1 ++-/* [0x00000478] */ 0x00000000, 0xe00213e7, // mov rb15,0 ++-/* [0x00000480] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000488] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000490] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000498] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000004a0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000004a8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000004b0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000004b8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 ++-/* [0x000004c0] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 ++-/* [0x000004c8] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 ++-/* [0x000004d0] */ 0xfffffea8, 0xf06809e7, // brr.anyn -, r:uvloop ++-/* [0x000004d8] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 ++-/* [0x000004e0] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x000004e8] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x000004f0] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x000004f8] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000500] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000508] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000510] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000518] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000520] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000528] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000530] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000360] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000368] */ 0x8e4539bf, 0xb0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu1 +++/* [0x00000370] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00000378] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000380] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000388] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000390] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000398] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000003a0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x000003a8] */ 0x0c627c80, 0x10020f27, // add t1s, ra_frame_base, r2 +++/* [0x000003b0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000003b8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000003c0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000003c8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000003d0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000003d8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000003e0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x000003e8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x000003f0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000003f8] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000400] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000408] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000410] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000418] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000420] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000428] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000430] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000438] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000440] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000448] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000450] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000458] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000460] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 +++/* [0x00000468] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 +++/* [0x00000470] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 +++/* [0x00000478] */ 0xfffffec8, 0xf06809e7, // brr.anyn -, r:uvloop +++/* [0x00000480] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 +++/* [0x00000488] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00000490] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00000498] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000004a0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004a8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000004b0] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000004b8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000004c0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000004c8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000004d0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000004d8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_uv_b0 ++-/* [0x00000538] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000540] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00000548] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000550] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000558] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x00000560] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000568] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x00000570] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x00000578] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 ++-/* [0x00000580] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000588] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 ++-/* [0x00000590] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 ++-/* [0x00000598] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000005a0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005a8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000005b0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000005b8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x000005c0] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x000005c8] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x000005d0] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x000005d8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x000005e0] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x000005e8] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x000005f0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000005f8] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000600] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000608] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000610] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000618] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000620] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000628] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000630] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x00000638] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000640] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000648] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x00000650] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000658] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x000004e0] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x000004e8] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x000004f0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x000004f8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000500] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000508] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000510] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000518] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000520] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x00000528] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000530] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x00000538] */ 0x159d5fc0, 0x10021c67, // mov vw_setup, rb21 +++/* [0x00000540] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x00000548] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000550] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x00000558] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000560] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000568] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x00000570] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x00000578] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000580] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000588] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000590] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000598] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005a0] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005a8] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005b0] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005b8] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x000005c0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005c8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005d0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x000005d8] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x000005e0] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005e8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000005f0] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x000005f8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000600] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b0 ++-/* [0x00000660] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000668] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 ++-/* [0x00000670] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x00000678] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000680] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000688] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000690] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000698] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x000006a0] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-/* [0x000006a8] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 ++-/* [0x000006b0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000006b8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000006c0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000006c8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000006d0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000006d8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000006e0] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000006e8] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000006f0] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x000006f8] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000700] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000708] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000710] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000718] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00000720] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00000728] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00000730] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x00000738] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x00000740] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x00000748] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x00000750] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 ++-/* [0x00000758] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 ++-/* [0x00000760] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 ++-/* [0x00000768] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000770] */ 0x009e7000, 0x100009e7, // nop ++-/* [0x00000778] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000780] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000788] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000790] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000608] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000610] */ 0x8e4539bf, 0xb0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu1 +++/* [0x00000618] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00000620] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00000628] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x00000630] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00000638] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00000640] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x00000648] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x00000650] */ 0x0c627c80, 0x10020f27, // add t1s, ra_frame_base, r2 +++/* [0x00000658] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000660] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x00000668] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x00000670] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x00000678] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000680] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000688] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000690] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000698] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x000006a0] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x000006a8] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x000006b0] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x000006b8] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x000006c0] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x000006c8] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000006d0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000006d8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000006e0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000006e8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000006f0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000006f8] */ 0x0d9d27c0, 0x100229e7, // sub.setf -, r3, rb18 +++/* [0x00000700] */ 0xfffffee8, 0xf06809e7, // brr.anyn -, r:uvloop_b0 +++/* [0x00000708] */ 0x0f9c63c0, 0xd0020c27, // asr vpm, r1, 6 +++/* [0x00000710] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000718] */ 0x009e7000, 0x100009e7, // nop +++/* [0x00000720] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000728] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000730] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000738] */ 0x009e7000, 0x100009e7, // nop ++ // ::mc_filter_uv_b ++-/* [0x00000798] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x000007a0] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x000007a8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000007b0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x000007b8] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif ++-/* [0x000007c0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x000007c8] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 ++-/* [0x000007d0] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 ++-/* [0x000007d8] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 ++-/* [0x000007e0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x000007e8] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 ++-/* [0x000007f0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000007f8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000800] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000808] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000810] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000818] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000820] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 ++-/* [0x00000828] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 ++-/* [0x00000830] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000838] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 ++-/* [0x00000840] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 ++-/* [0x00000848] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 ++-/* [0x00000850] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000858] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000860] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000868] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 ++-/* [0x00000870] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000878] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000880] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000888] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000890] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000898] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a0] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008a8] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000008b0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 ++-/* [0x000008b8] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000008c0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000008c8] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 ++-/* [0x000008d0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000008d8] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000740] */ 0x15827d80, 0x100207e7, // mov ra31, unif +++/* [0x00000748] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next +++/* [0x00000750] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num +++/* [0x00000758] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif +++/* [0x00000760] */ 0x928191f6, 0x10024823, // min r0, r0, rb_frame_width_minus_1 ; mov r3, unif +++/* [0x00000768] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000770] */ 0x0d827cc0, 0x100208a7, // sub r2, unif, r3 +++/* [0x00000778] */ 0x0c9e70c0, 0x10020827, // add r0, r0, r3 +++/* [0x00000780] */ 0x149dc1c0, 0xd00214e7, // and rb_x_next, r0, ~3 +++/* [0x00000788] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000790] */ 0x0c9d3e80, 0x100206a7, // add ra_frame_base_next, rb_x_next, r2 +++/* [0x00000798] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x000007a0] */ 0x00000010, 0xe00208a7, // mov r2, 16 +++/* [0x000007a8] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x000007b0] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 +++/* [0x000007b8] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x000007c0] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x000007c8] */ 0x0c9c11c0, 0xd0021467, // add rb17, r0, 1 +++/* [0x000007d0] */ 0x0c9c31c0, 0xd00214a7, // add rb18, r0, 3 +++/* [0x000007d8] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x000007e0] */ 0x119cd1c0, 0xd00208e7, // shl r3, r0, 13 +++/* [0x000007e8] */ 0x119c87c0, 0xd00208e7, // shl r3, r3, 8 +++/* [0x000007f0] */ 0x0e9c87c0, 0xd00208e7, // shr r3, r3, 8 +++/* [0x000007f8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000800] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 +++/* [0x00000808] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000810] */ 0x0c9d57c0, 0x10020c67, // add vr_setup, r3, rb21 +++/* [0x00000818] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000820] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000828] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000830] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000838] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif +++/* [0x00000840] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000848] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000850] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000858] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000860] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000868] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000870] */ 0xfffffff8, 0xe0021967, // mov r5rep, -8 +++/* [0x00000878] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x00000880] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :uvloop_b ++-/* [0x000008e0] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x000008e8] */ 0x8e4539bf, 0xa0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 ++-/* [0x000008f0] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x000008f8] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000900] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 ++-/* [0x00000908] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000910] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000918] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000920] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-/* [0x00000928] */ 0x0c627c80, 0x10020e27, // add t0s, ra_frame_base, r2 ++-/* [0x00000930] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000938] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000940] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000948] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000950] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000958] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000960] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000968] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000970] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00000978] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00000980] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 ++-/* [0x00000988] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x00000990] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00000998] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x000009a0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x000009a8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x000009b0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000009b8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000009c0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000009c8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000009d0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000009d8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000009e0] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm ++-/* [0x000009e8] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 ++-/* [0x000009f0] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b ++-/* [0x000009f8] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00000a00] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00000a08] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00000a10] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00000a18] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a20] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++-/* [0x00000a28] */ 0x00000010, 0xe0020827, // mov r0, 16 ++-/* [0x00000a30] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a38] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00000a40] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 ++-/* [0x00000a48] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00000a50] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00000888] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00000890] */ 0x8e4539bf, 0xb0029810, // shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu1 +++/* [0x00000898] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x000008a0] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x000008a8] */ 0xee454987, 0x10024860, // shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 +++/* [0x000008b0] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x000008b8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000008c0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000008c8] */ 0xec414c8f, 0x10024e21, // add t0s, ra_x, r2 ; v8subs r1, r1, rb20 +++/* [0x000008d0] */ 0x0c627c80, 0x10020f27, // add t1s, ra_frame_base, r2 +++/* [0x000008d8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000008e0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000008e8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000008f0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000008f8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x00000900] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00000908] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00000910] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00000918] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00000920] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00000928] */ 0x8d3447f6, 0xd00279cc, // sub.setf -, r3, 4 ; mov ra12, ra13 +++/* [0x00000930] */ 0xffffff38, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x00000938] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00000940] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00000948] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x00000950] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x00000958] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x00000960] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x00000968] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x00000970] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x00000978] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x00000980] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00000988] */ 0x0cc27380, 0x10020867, // add r1, r1, vpm +++/* [0x00000990] */ 0x0c7a7380, 0x10020867, // add r1, r1, ra30 +++/* [0x00000998] */ 0xfffffed0, 0xf06809e7, // brr.anyn -, r:uvloop_b +++/* [0x000009a0] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 +++/* [0x000009a8] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x000009b0] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x000009b8] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x000009c0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000009c8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x000009d0] */ 0x00000010, 0xe0020827, // mov r0, 16 +++/* [0x000009d8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000009e0] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x000009e8] */ 0x0c9dae00, 0x10021c67, // add vw_setup, rb26, r0 +++/* [0x000009f0] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x000009f8] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_exit ++-/* [0x00000a58] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000a60] */ 0x00000000, 0xe80009e7, // mov -,srel(0) ++-/* [0x00000a68] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a70] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a78] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a80] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000a88] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000a90] */ 0x009e7000, 0x100009e7, // nop ; nop ++-/* [0x00000a98] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a00] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a08] */ 0x00000000, 0xe80009e7, // mov -,srel(0) +++/* [0x00000a10] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a18] */ 0x009e7000, 0xb00009e7, // ldtmu1 +++/* [0x00000a20] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a28] */ 0x009e7000, 0xb00009e7, // ldtmu1 +++/* [0x00000a30] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000a38] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a40] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_interrupt_exit8 ++-/* [0x00000aa0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00000aa8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ab0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ab8] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ac0] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00000ac8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ad0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ad8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ae0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000ae8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000af0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000af8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00000b00] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x00000b08] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x00000b10] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00000a48] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00000a50] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a58] */ 0x009e7000, 0xb00009e7, // ldtmu1 +++/* [0x00000a60] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00000a68] */ 0x009e7000, 0xb00009e7, // ldtmu1 +++/* [0x00000a70] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a78] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a80] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a88] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a90] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000a98] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000aa0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00000aa8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00000ab0] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00000ab8] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_setup ++-/* [0x00000b18] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000b20] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00000b28] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000b30] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000b38] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00000b40] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000b48] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 ++-/* [0x00000b50] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00000b58] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 ++-/* [0x00000b60] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x00000b68] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000b70] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000b78] */ 0x8c9e7452, 0x10025e18, // add t0s, r2, r1 ; mov ra_frame_base, r2 ++-/* [0x00000b80] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000b88] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000b90] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00000b98] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 ++-/* [0x00000ba0] */ 0x0c9c13c0, 0xd0020567, // add ra_y2, r1, 1 ++-/* [0x00000ba8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00000bb0] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 ++-/* [0x00000bb8] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 ++-/* [0x00000bc0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000bc8] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000bd0] */ 0x8c9e7452, 0x10025e19, // add t0s, r2, r1 ; mov ra_frame_base2, r2 ++-/* [0x00000bd8] */ 0x0d801dc0, 0xd0021667, // sub rb25,unif,1 ++-/* [0x00000be0] */ 0x0d801dc0, 0xd00217a7, // sub rb30,unif,1 ++-/* [0x00000be8] */ 0x15827d80, 0x10021427, // mov rb16, unif ++-/* [0x00000bf0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000bf8] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) ++-/* [0x00000c00] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 ++-/* [0x00000c08] */ 0x00000001, 0xe0020527, // mov ra20, 1 ++-/* [0x00000c10] */ 0x00000100, 0xe00205a7, // mov ra22, 256 ++-/* [0x00000c18] */ 0x00000040, 0xe00207a7, // mov ra30, 64 ++-/* [0x00000c20] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 ++-/* [0x00000c28] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 ++-/* [0x00000c30] */ 0x00000018, 0xe00215e7, // mov rb23, 24 ++-/* [0x00000c38] */ 0x00000000, 0xe0020227, // mov ra8, 0 ++-/* [0x00000c40] */ 0x00000000, 0xe0020267, // mov ra9, 0 ++-/* [0x00000c48] */ 0x00000000, 0xe00202a7, // mov ra10, 0 ++-/* [0x00000c50] */ 0x00000000, 0xe00202e7, // mov ra11, 0 ++-/* [0x00000c58] */ 0x00000000, 0xe0020327, // mov ra12, 0 ++-/* [0x00000c60] */ 0x00000000, 0xe0020367, // mov ra13, 0 ++-/* [0x00000c68] */ 0x00000000, 0xe00203a7, // mov ra14, 0 ++-/* [0x00000c70] */ 0x00000000, 0xe00203e7, // mov ra15, 0 ++-/* [0x00000c78] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000c80] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000c88] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000c90] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000c98] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000ca0] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000ca8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000cb0] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) ++-/* [0x00000cb8] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 ++-/* [0x00000cc0] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 ++-/* [0x00000cc8] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num ++-/* [0x00000cd0] */ 0x159e7480, 0x10020867, // mov r1, r2 ++-/* [0x00000cd8] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 ++-/* [0x00000ce0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 ++-/* [0x00000ce8] */ 0x159e7480, 0x10020827, // mov r0, r2 ++-/* [0x00000cf0] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 ++-/* [0x00000cf8] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000d00] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) ++-/* [0x00000d08] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 ++-/* [0x00000d10] */ 0x15827d80, 0x10021327, // mov rb12,unif ++-/* [0x00000d18] */ 0x15827d80, 0x10021367, // mov rb13,unif ++-/* [0x00000d20] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000d28] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 ++-/* [0x00000d30] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000d38] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 ++-/* [0x00000d40] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000d48] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base ++-/* [0x00000d50] */ 0x13540dc0, 0xd0020867, // max r1, ra_y2, 0 ++-/* [0x00000d58] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++-/* [0x00000d60] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00000ac0] */ 0x00000010, 0xe00208e7, // mov r3, 16 +++/* [0x00000ac8] */ 0x15827d80, 0x10020227, // mov ra8, unif +++/* [0x00000ad0] */ 0x15827d80, 0x10020267, // mov ra9, unif +++/* [0x00000ad8] */ 0x15827d80, 0x100202a7, // mov ra10, unif +++/* [0x00000ae0] */ 0x15827d80, 0x100202e7, // mov ra11, unif +++/* [0x00000ae8] */ 0x15827d80, 0x10020867, // mov r1, unif +++/* [0x00000af0] */ 0x119e72c0, 0x10020827, // shl r0,r1,r3 +++/* [0x00000af8] */ 0x0f9e72c0, 0x10020867, // asr r1,r1,r3 +++/* [0x00000b00] */ 0x0f9e70c0, 0x10020827, // asr r0,r0,r3 +++/* [0x00000b08] */ 0x0d9c13c0, 0xd0021667, // sub rb_frame_width_minus_1,r1,1 +++/* [0x00000b10] */ 0x0d9c11c0, 0xd00217a7, // sub rb_frame_height_minus_1,r0,1 +++/* [0x00000b18] */ 0x15827d80, 0x10021427, // mov rb_pitch, unif +++/* [0x00000b20] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000b28] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0) +++/* [0x00000b30] */ 0x0c9e7200, 0x10021627, // add rb24, r1, r0 +++/* [0x00000b38] */ 0x15227d80, 0x10020867, // mov r1, ra8 +++/* [0x00000b40] */ 0x119e72c0, 0x10020827, // shl r0,r1,r3 +++/* [0x00000b48] */ 0x0f9e72c0, 0x10020867, // asr r1,r1,r3 +++/* [0x00000b50] */ 0x0f9e70c0, 0x10020827, // asr r0,r0,r3 +++/* [0x00000b58] */ 0x0c9a7180, 0x10020827, // add r0, r0, elem_num +++/* [0x00000b60] */ 0x139c01c0, 0xd0020827, // max r0, r0, 0 +++/* [0x00000b68] */ 0x922591f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, ra9 +++/* [0x00000b70] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000b78] */ 0x0c9c13c0, 0xd0020767, // add ra_y, r1, 1 +++/* [0x00000b80] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000b88] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 +++/* [0x00000b90] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000b98] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000ba0] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000ba8] */ 0x8c9e7452, 0x10025e18, // add t0s, r2, r1 ; mov ra_frame_base, r2 +++/* [0x00000bb0] */ 0x152a7d80, 0x10020867, // mov r1, ra10 +++/* [0x00000bb8] */ 0x119e72c0, 0x10020827, // shl r0,r1,r3 +++/* [0x00000bc0] */ 0x0f9e72c0, 0x10020867, // asr r1,r1,r3 +++/* [0x00000bc8] */ 0x0f9e70c0, 0x10020827, // asr r0,r0,r3 +++/* [0x00000bd0] */ 0x0c9a7180, 0x10020827, // add r0, r0, elem_num +++/* [0x00000bd8] */ 0x139c01c0, 0xd0020827, // max r0, r0, 0 +++/* [0x00000be0] */ 0x922d91f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, ra11 +++/* [0x00000be8] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 +++/* [0x00000bf0] */ 0x0c9c13c0, 0xd0020567, // add ra_y2, r1, 1 +++/* [0x00000bf8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000c00] */ 0x0c9e7400, 0x100208a7, // add r2, r2, r0 +++/* [0x00000c08] */ 0x139c03c0, 0xd0020867, // max r1, r1, 0 +++/* [0x00000c10] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000c18] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000c20] */ 0x8c9e7452, 0x10025f19, // add t1s, r2, r1 ; mov ra_frame_base2, r2 +++/* [0x00000c28] */ 0x00000001, 0xe0020527, // mov ra20, 1 +++/* [0x00000c30] */ 0x00000100, 0xe00205a7, // mov ra22, 256 +++/* [0x00000c38] */ 0x00000040, 0xe00207a7, // mov ra30, 64 +++/* [0x00000c40] */ 0xffffff00, 0xe0021527, // mov rb20, 0xffffff00 +++/* [0x00000c48] */ 0x000000ff, 0xe00215a7, // mov rb22, 255 +++/* [0x00000c50] */ 0x00000018, 0xe00215e7, // mov rb23, 24 +++/* [0x00000c58] */ 0x00000000, 0xe0020227, // mov ra8, 0 +++/* [0x00000c60] */ 0x00000000, 0xe0020267, // mov ra9, 0 +++/* [0x00000c68] */ 0x00000000, 0xe00202a7, // mov ra10, 0 +++/* [0x00000c70] */ 0x00000000, 0xe00202e7, // mov ra11, 0 +++/* [0x00000c78] */ 0x00000000, 0xe0020327, // mov ra12, 0 +++/* [0x00000c80] */ 0x00000000, 0xe0020367, // mov ra13, 0 +++/* [0x00000c88] */ 0x00000000, 0xe00203a7, // mov ra14, 0 +++/* [0x00000c90] */ 0x00000000, 0xe00203e7, // mov ra15, 0 +++/* [0x00000c98] */ 0x00004000, 0xe00204a7, // mov ra18, 0x4000 +++/* [0x00000ca0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num +++/* [0x00000ca8] */ 0x159e7480, 0x10020867, // mov r1, r2 +++/* [0x00000cb0] */ 0x0f9c23c0, 0xd0020867, // asr r1, r1, 2 +++/* [0x00000cb8] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6 +++/* [0x00000cc0] */ 0x159e7480, 0x10020827, // mov r0, r2 +++/* [0x00000cc8] */ 0x149c31c0, 0xd0020827, // and r0, r0, 3 +++/* [0x00000cd0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000cd8] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0)) +++/* [0x00000ce0] */ 0x0c9e7040, 0x10021727, // add rb28, r0, r1 +++/* [0x00000ce8] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) +++/* [0x00000cf0] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5 +++/* [0x00000cf8] */ 0x0c9e7040, 0x100216e7, // add rb27, r0, r1 +++/* [0x00000d00] */ 0x15827d80, 0x10020867, // mov r1, unif +++/* [0x00000d08] */ 0x919c82ff, 0xd0024822, // shl r0,r1,r3 ; mov r2,8 +++/* [0x00000d10] */ 0x0f9e70c0, 0x10021367, // asr rb13,r0,r3 +++/* [0x00000d18] */ 0x0f9e72c0, 0x10021327, // asr rb12,r1,r3 +++/* [0x00000d20] */ 0x0c9cde80, 0x10021367, // add rb13,rb13,r2 +++/* [0x00000d28] */ 0x119cce80, 0x10021327, // shl rb12, rb12, r2 +++/* [0x00000d30] */ 0x13740dc0, 0xd0020867, // max r1, ra_y, 0 +++/* [0x00000d38] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 +++/* [0x00000d40] */ 0x0c741dc0, 0xd0020767, // add ra_y, ra_y, 1 +++/* [0x00000d48] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch +++/* [0x00000d50] */ 0x0c627380, 0x10020e27, // add t0s, r1, ra_frame_base +++/* [0x00000d58] */ 0x13540dc0, 0xd0020867, // max r1, ra_y2, 0 +++/* [0x00000d60] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_frame_height_minus_1 ++ /* [0x00000d68] */ 0x0c541dc0, 0xd0020567, // add ra_y2, ra_y2, 1 ++ /* [0x00000d70] */ 0x409d000f, 0x100049e1, // nop ; mul24 r1, r1, rb_pitch ++-/* [0x00000d78] */ 0x0c667380, 0x10020e27, // add t0s, r1, ra_frame_base2 ++-// ::mc_filter +++/* [0x00000d78] */ 0x0c667380, 0x10020f27, // add t1s, r1, ra_frame_base2 +++// :per_block_setup ++ /* [0x00000d80] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ /* [0x00000d88] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++ /* [0x00000d90] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++ /* [0x00000d98] */ 0x155e7d80, 0x10021027, // mov rx_xshift2, rx_xshift2_next ++-/* [0x00000da0] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000da8] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00000db0] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00000db8] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00000dc0] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00000dc8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00000dd0] */ 0x0c9e7400, 0x100206a7, // add ra_frame_base_next, r2, r0 ++-/* [0x00000dd8] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00000de0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0 ; mov r1, unif ++-/* [0x00000de8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00000df0] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 ++-/* [0x00000df8] */ 0x0c9c13c0, 0xd0021067, // add ra_y2_next, r1, 1 ++-/* [0x00000e00] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00000e08] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 ++-/* [0x00000e10] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x00000e18] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x00000e20] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000e28] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x00000e30] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x00000e38] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00000e40] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00000e48] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00000e50] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00000e58] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00000e60] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000e68] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00000e70] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000e78] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000e80] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000e88] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000e90] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000e98] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00000ea0] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000ea8] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000eb0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000eb8] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ec0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x00000ec8] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000ed0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ed8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ee0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000ee8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x00000ef0] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000ef8] */ 0x4f5971c6, 0x100251e0, // asr rb7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f00] */ 0x4f5971c6, 0x100251a0, // asr rb6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00000f08] */ 0x4f5971c6, 0x10025160, // asr rb5, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000da0] */ 0x00000010, 0xe00208e7, // mov r3, 16 +++/* [0x00000da8] */ 0x15827d80, 0x10020867, // mov r1, unif +++/* [0x00000db0] */ 0x119e72c0, 0x10020827, // shl r0,r1,r3 +++/* [0x00000db8] */ 0x0f9e72c0, 0x10020867, // asr r1,r1,r3 +++/* [0x00000dc0] */ 0x0f9e70c0, 0x10020827, // asr r0,r0,r3 +++/* [0x00000dc8] */ 0x0c9a7180, 0x10020827, // add r0, r0, elem_num +++/* [0x00000dd0] */ 0x139c01c0, 0xd0020827, // max r0, r0, 0 +++/* [0x00000dd8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000de0] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 +++/* [0x00000de8] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 +++/* [0x00000df0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000df8] */ 0x8c827436, 0x100246a1, // add ra_frame_base_next, r2, r0 ; mov r1, unif +++/* [0x00000e00] */ 0x119e72c0, 0x10020827, // shl r0,r1,r3 +++/* [0x00000e08] */ 0x0f9e72c0, 0x10020867, // asr r1,r1,r3 +++/* [0x00000e10] */ 0x0f9e70c0, 0x10020827, // asr r0,r0,r3 +++/* [0x00000e18] */ 0x0c9a7180, 0x10020827, // add r0, r0, elem_num +++/* [0x00000e20] */ 0x139c01c0, 0xd0020827, // max r0, r0, 0 +++/* [0x00000e28] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif +++/* [0x00000e30] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 +++/* [0x00000e38] */ 0x159e7240, 0x10021067, // mov ra_y2_next, r1 +++/* [0x00000e40] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 +++/* [0x00000e48] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 +++/* [0x00000e50] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 +++/* [0x00000e58] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00000e60] */ 0x0e9e70c0, 0x10020867, // shr r1, r0, r3 +++/* [0x00000e68] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 +++/* [0x00000e70] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 +++/* [0x00000e78] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 +++/* [0x00000e80] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 +++/* [0x00000e88] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 +++/* [0x00000e90] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 +++/* [0x00000e98] */ 0x119e70c0, 0x10020827, // shl r0, r0, r3 +++/* [0x00000ea0] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 +++/* [0x00000ea8] */ 0x95801dbf, 0xd0024821, // mov r0, unif ; mov r1,1 +++/* [0x00000eb0] */ 0x4f5971c6, 0x10024260, // asr ra9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000eb8] */ 0x4f5971c6, 0x10024220, // asr ra8, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ec0] */ 0x4f5971c6, 0x10044260, // asr.ifz ra9, r0, rb23; mul24 r0, r0, ra22 +++/* [0x00000ec8] */ 0x0f9d71c0, 0x10040227, // asr.ifz ra8, r0, rb23 +++/* [0x00000ed0] */ 0x0d243f80, 0xd0020267, // sub ra9,3,ra9 +++/* [0x00000ed8] */ 0x0d203f80, 0xd0020227, // sub ra8,3,ra8 +++/* [0x00000ee0] */ 0x11243dc0, 0xd0020267, // shl ra9,ra9,3 +++/* [0x00000ee8] */ 0x11203dc0, 0xd0020227, // shl ra8,ra8,3 +++/* [0x00000ef0] */ 0x00ffff00, 0xe0020867, // mov r1,0xffff00 +++/* [0x00000ef8] */ 0x11227380, 0x10020827, // shl r0, r1, ra8 +++/* [0x00000f00] */ 0x0f9d71c0, 0x10020027, // asr ra0, r0, rb23 +++/* [0x00000f08] */ 0x11267380, 0x10020827, // shl r0, r1, ra9 ++ /* [0x00000f10] */ 0x0f9d71c0, 0x10021127, // asr rb4, r0, rb23 ++-/* [0x00000f18] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00000f20] */ 0x15827d80, 0x100009e7, // mov.ifnz -, unif ++-/* [0x00000f28] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 ++-/* [0x00000f30] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00000f38] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 ++-/* [0x00000f40] */ 0x00000000, 0xe00208e7, // mov r3, 0 +++/* [0x00000f18] */ 0x01040400, 0xe0020867, // mov r1,0x1040400 +++/* [0x00000f20] */ 0x11227380, 0x10020827, // shl r0, r1, ra8 +++/* [0x00000f28] */ 0x0f9d71c0, 0x10020067, // asr ra1, r0, rb23 +++/* [0x00000f30] */ 0x11267380, 0x10020827, // shl r0, r1, ra9 +++/* [0x00000f38] */ 0x0f9d71c0, 0x10021167, // asr rb5, r0, rb23 +++/* [0x00000f40] */ 0xfbf5f600, 0xe0020867, // mov r1,0xfbf5f600 +++/* [0x00000f48] */ 0x11227380, 0x10020827, // shl r0, r1, ra8 +++/* [0x00000f50] */ 0x0f9d71c0, 0x100200a7, // asr ra2, r0, rb23 +++/* [0x00000f58] */ 0x11267380, 0x10020827, // shl r0, r1, ra9 +++/* [0x00000f60] */ 0x0f9d71c0, 0x100211a7, // asr rb6, r0, rb23 +++/* [0x00000f68] */ 0x11283a40, 0xe0020867, // mov r1,0x11283a40 +++/* [0x00000f70] */ 0x11227380, 0x10020827, // shl r0, r1, ra8 +++/* [0x00000f78] */ 0x0f9d71c0, 0x100200e7, // asr ra3, r0, rb23 +++/* [0x00000f80] */ 0x11267380, 0x10020827, // shl r0, r1, ra9 +++/* [0x00000f88] */ 0x0f9d71c0, 0x100211e7, // asr rb7, r0, rb23 +++/* [0x00000f90] */ 0x3a281100, 0xe0020867, // mov r1,0x3a281100 +++/* [0x00000f98] */ 0x11227380, 0x10020827, // shl r0, r1, ra8 +++/* [0x00000fa0] */ 0x0f9d71c0, 0x10020127, // asr ra4, r0, rb23 +++/* [0x00000fa8] */ 0x11267380, 0x10020827, // shl r0, r1, ra9 +++/* [0x00000fb0] */ 0x0f9d71c0, 0x10021227, // asr rb8, r0, rb23 +++/* [0x00000fb8] */ 0xf6f5fb00, 0xe0020867, // mov r1,0xf6f5fb00 +++/* [0x00000fc0] */ 0x11227380, 0x10020827, // shl r0, r1, ra8 +++/* [0x00000fc8] */ 0x0f9d71c0, 0x10020167, // asr ra5, r0, rb23 +++/* [0x00000fd0] */ 0x11267380, 0x10020827, // shl r0, r1, ra9 +++/* [0x00000fd8] */ 0x0f9d71c0, 0x10021267, // asr rb9, r0, rb23 +++/* [0x00000fe0] */ 0x04040100, 0xe0020867, // mov r1,0x4040100 +++/* [0x00000fe8] */ 0x11227380, 0x10020827, // shl r0, r1, ra8 +++/* [0x00000ff0] */ 0x0f9d71c0, 0x100201a7, // asr ra6, r0, rb23 +++/* [0x00000ff8] */ 0x11267380, 0x10020827, // shl r0, r1, ra9 +++/* [0x00001000] */ 0x0f9d71c0, 0x100212a7, // asr rb10, r0, rb23 +++/* [0x00001008] */ 0xffff0000, 0xe0020867, // mov r1,0xffff0000 +++/* [0x00001010] */ 0x11227380, 0x10020827, // shl r0, r1, ra8 +++/* [0x00001018] */ 0x0f9d71c0, 0x100201e7, // asr ra7, r0, rb23 +++/* [0x00001020] */ 0x11267380, 0x10020827, // shl r0, r1, ra9 +++/* [0x00001028] */ 0x0f9d71c0, 0x100212e7, // asr rb11, r0, rb23 +++/* [0x00001030] */ 0x15827d80, 0x10020827, // mov r0, unif +++/* [0x00001038] */ 0x0f9e70c0, 0x100213e7, // asr rb15, r0, r3 +++/* [0x00001040] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 +++/* [0x00001048] */ 0x119e70c0, 0x10020827, // shl r0, r0, r3 +++/* [0x00001050] */ 0x8f9c00ff, 0xd0024823, // asr r0, r0, r3 ; mov r3, 0 +++/* [0x00001058] */ 0x119c81c0, 0xd00213a7, // shl rb14, r0, 8 +++// ::mc_filter ++ // :yloop ++-/* [0x00000f48] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00000f50] */ 0x8e4539bf, 0xa0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 ++-/* [0x00000f58] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x00000f60] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00000f68] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 ++-/* [0x00000f70] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next ++-/* [0x00000f78] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00000f80] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000f88] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00000f90] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 ++-/* [0x00000f98] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 ++-/* [0x00000fa0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00000fa8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 ++-/* [0x00000fb0] */ 0xec654c87, 0x10024e20, // add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 ++-/* [0x00000fb8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00000fc0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x00000fc8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x00000fd0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x00000fd8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x00000fe0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x00000fe8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x00000ff0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x00000ff8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00001000] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00001008] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00001010] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00001018] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00001020] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00001028] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00001030] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00001038] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00001040] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00001048] */ 0x8d3487f6, 0xd00279cc, // sub.setf -, r3, 8 ; mov ra12, ra13 ++-/* [0x00001050] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00001058] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00001060] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00001068] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00001070] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloop ++-/* [0x00001078] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00001080] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00001088] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00001090] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00001098] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000010a0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000010a8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000010b0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 ++-/* [0x000010b8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 ++-/* [0x000010c0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 ++-/* [0x000010c8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 ++-/* [0x000010d0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000010d8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000010e0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000010e8] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 ++-/* [0x000010f0] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 ++-/* [0x000010f8] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 ++-/* [0x00001100] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloop ++-/* [0x00001108] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 ++-/* [0x00001110] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00001118] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00001120] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001128] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00001130] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00001138] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00001060] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00001068] */ 0x8e4539bf, 0xb0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu1 +++/* [0x00001070] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00001078] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00001080] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 +++/* [0x00001088] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next +++/* [0x00001090] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00001098] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000010a0] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000010a8] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 +++/* [0x000010b0] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 +++/* [0x000010b8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000010c0] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++/* [0x000010c8] */ 0xec654c8f, 0x10024f21, // add t1s, ra_frame_base2, r2 ; v8subs r1, r1, rb20 +++/* [0x000010d0] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000010d8] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000010e0] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000010e8] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000010f0] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000010f8] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x00001100] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00001108] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00001110] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00001118] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00001120] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00001128] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00001130] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00001138] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00001140] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00001148] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00001150] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00001158] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00001160] */ 0x8d2487f6, 0xd00279c8, // sub.setf -, r3, 8 ; mov ra8, ra9 +++/* [0x00001168] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00001170] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00001178] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00001180] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00001188] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloop +++/* [0x00001190] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00001198] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x000011a0] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000011a8] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000011b0] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000011b8] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000011c0] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000011c8] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 +++/* [0x000011d0] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 +++/* [0x000011d8] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 +++/* [0x000011e0] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 +++/* [0x000011e8] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000011f0] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000011f8] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 +++/* [0x00001200] */ 0x409ce00f, 0x100049e1, // nop ; mul24 r1, r1, rb14 +++/* [0x00001208] */ 0x0c9cc3c0, 0x10020867, // add r1, r1, rb12 +++/* [0x00001210] */ 0x0f9cd3c0, 0x10020867, // asr r1, r1, rb13 +++/* [0x00001218] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloop +++/* [0x00001220] */ 0x0c9cf3c0, 0x10020867, // add r1, r1, rb15 +++/* [0x00001228] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00001230] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00001238] */ 0xfffffb28, 0xf0f809e7, // brr -, r:per_block_setup +++/* [0x00001240] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00001248] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001250] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_filter_b ++-/* [0x00001140] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x00001148] */ 0x15827d80, 0x100207e7, // mov ra31, unif ++-/* [0x00001150] */ 0x154e7d80, 0x10020467, // mov ra_xshift, ra_xshift_next ++-/* [0x00001158] */ 0x155e7d80, 0x10021027, // mov rx_xshift2, rx_xshift2_next ++-/* [0x00001160] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x00001168] */ 0x938001f6, 0xd0024821, // max r0, r0, 0; mov r1, unif ++-/* [0x00001170] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x00001178] */ 0x119c31c0, 0xd00204e7, // shl ra_xshift_next, r0, 3 ++-/* [0x00001180] */ 0x159e7240, 0x10020727, // mov ra_y_next, r1 ++-/* [0x00001188] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x00001190] */ 0x0c9e7400, 0x100206a7, // add ra_frame_base_next, r2, r0 ++-/* [0x00001198] */ 0x0c9a0f80, 0x10020827, // add r0, unif, elem_num ++-/* [0x000011a0] */ 0x938001f6, 0xd0024821, // max r0, r0, 0 ; mov r1, unif ++-/* [0x000011a8] */ 0x928191f6, 0x10024822, // min r0, r0, rb_frame_width_minus_1 ; mov r2, unif ++-/* [0x000011b0] */ 0x119c31c0, 0xd00205e7, // shl rx_xshift2_next, r0, 3 ++-/* [0x000011b8] */ 0x0c9c13c0, 0xd0021067, // add ra_y2_next, r1, 1 ++-/* [0x000011c0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, ~3 ++-/* [0x000011c8] */ 0x0c9e7400, 0x100214e7, // add rx_frame_base2_next, r2, r0 ++-/* [0x000011d0] */ 0x159dcfc0, 0x10021c67, // mov vw_setup, rb28 ++-/* [0x000011d8] */ 0x00000010, 0xe00208a7, // mov r2, 16 ++-/* [0x000011e0] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x000011e8] */ 0x0e9e7080, 0x10020867, // shr r1, r0, r2 ++-/* [0x000011f0] */ 0x0d9d8e40, 0x10021767, // sub rb29, rb24, r1 ++-/* [0x000011f8] */ 0x149d61c0, 0x10020827, // and r0, r0, rb22 ++-/* [0x00001200] */ 0x0c9c51c0, 0xd0021467, // add rb17, r0, 5 ++-/* [0x00001208] */ 0x0c9c71c0, 0xd00214a7, // add rb18, r0, 7 ++-/* [0x00001210] */ 0x119c71c0, 0xd0020827, // shl r0, r0, 7 ++-/* [0x00001218] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1 ++-/* [0x00001220] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00001228] */ 0x0c9db1c0, 0x100216a7, // add rb26, r0, rb27 ++-/* [0x00001230] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001238] */ 0x00000001, 0xe0020867, // mov r1, 1 ++-/* [0x00001240] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x00001248] */ 0x409f3001, 0xd00049e0, // nop ; mul24 r0, r0 << 13, r1 << 13 ++-/* [0x00001250] */ 0x4f5971c6, 0x100240e0, // asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001258] */ 0x409f2001, 0xd00049e0, // nop ; mul24 r0, r0 << 14, r1 << 14 ++-/* [0x00001260] */ 0x4f5971c6, 0x100240a0, // asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001268] */ 0x409f1001, 0xd00049e0, // nop ; mul24 r0, r0 << 15, r1 << 15 ++-/* [0x00001270] */ 0x4f5971c6, 0x10024060, // asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001278] */ 0x8f8171f6, 0x10024020, // asr ra0, r0, rb23; mov r0, unif ++-/* [0x00001280] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x00001288] */ 0x409f7001, 0xd00049e0, // nop ; mul24 r0, r0 << 9, r1 << 9 ++-/* [0x00001290] */ 0x4f5971c6, 0x100241e0, // asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001298] */ 0x409f6001, 0xd00049e0, // nop ; mul24 r0, r0 << 10, r1 << 10 ++-/* [0x000012a0] */ 0x4f5971c6, 0x100241a0, // asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012a8] */ 0x409f5001, 0xd00049e0, // nop ; mul24 r0, r0 << 11, r1 << 11 ++-/* [0x000012b0] */ 0x4f5971c6, 0x10024160, // asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012b8] */ 0x409f4001, 0xd00049e0, // nop ; mul24 r0, r0 << 12, r1 << 12 ++-/* [0x000012c0] */ 0x8f8171f6, 0x10024120, // asr ra4, r0, rb23; mov r0, unif ++-/* [0x000012c8] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x000012d0] */ 0x4f5971c6, 0x100252e0, // asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012d8] */ 0x4f5971c6, 0x100252a0, // asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012e0] */ 0x4f5971c6, 0x10025260, // asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x000012e8] */ 0x8f8171f6, 0x10025220, // asr rb8, r0, rb23; mov r0, unif ++-/* [0x000012f0] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x000012f8] */ 0x4f5971c6, 0x100251e0, // asr rb7, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001300] */ 0x4f5971c6, 0x100251a0, // asr rb6, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001308] */ 0x4f5971c6, 0x10025160, // asr rb5, r0, rb23; mul24 r0, r0, ra22 ++-/* [0x00001310] */ 0x0f9d71c0, 0x10021127, // asr rb4, r0, rb23 ++-/* [0x00001318] */ 0x15827d80, 0x10020827, // mov r0, unif ++-/* [0x00001320] */ 0x15827d80, 0x10060827, // mov.ifnz r0, unif ++-/* [0x00001328] */ 0x0f9e7080, 0x100213e7, // asr rb15, r0, r2 ++-/* [0x00001330] */ 0x119e7080, 0x10020827, // shl r0, r0, r2 ++-/* [0x00001338] */ 0x0f9e7080, 0x100213a7, // asr rb14, r0, r2 ++-/* [0x00001340] */ 0x00000000, 0xe00208e7, // mov r3, 0 ++ // :yloopb ++-/* [0x00001348] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-/* [0x00001350] */ 0x8e4539bf, 0xa0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 ++-/* [0x00001358] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-/* [0x00001360] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-/* [0x00001368] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 ++-/* [0x00001370] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next ++-/* [0x00001378] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 ++-/* [0x00001380] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x00001388] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-/* [0x00001390] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 ++-/* [0x00001398] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 ++-/* [0x000013a0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 ++-/* [0x000013a8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 ++-/* [0x000013b0] */ 0xec654c87, 0x10024e20, // add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 ++-/* [0x000013b8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-/* [0x000013c0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 ++-/* [0x000013c8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-/* [0x000013d0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 ++-/* [0x000013d8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-/* [0x000013e0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-/* [0x000013e8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-/* [0x000013f0] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-/* [0x000013f8] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-/* [0x00001400] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-/* [0x00001408] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-/* [0x00001410] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-/* [0x00001418] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-/* [0x00001420] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-/* [0x00001428] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-/* [0x00001430] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-/* [0x00001438] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-/* [0x00001440] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 ++-/* [0x00001448] */ 0x8d3487f6, 0xd00279cc, // sub.setf -, r3, 8 ; mov ra12, ra13 ++-/* [0x00001450] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 ++-/* [0x00001458] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 ++-/* [0x00001460] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 ++-/* [0x00001468] */ 0x15367d80, 0x10020327, // mov ra12, ra13 ++-/* [0x00001470] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloopb ++-/* [0x00001478] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 ++-/* [0x00001480] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 ++-/* [0x00001488] */ 0x159e7000, 0x100203e7, // mov ra15, r0 ++-/* [0x00001490] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 ++-/* [0x00001498] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 ++-/* [0x000014a0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-/* [0x000014a8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-/* [0x000014b0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 ++-/* [0x000014b8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 ++-/* [0x000014c0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 ++-/* [0x000014c8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 ++-/* [0x000014d0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait ++-/* [0x000014d8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-/* [0x000014e0] */ 0x0f9ce3c0, 0xd0020867, // asr r1, r1, 14 ++-/* [0x000014e8] */ 0x4053800e, 0xd00049e1, // nop ; mul24 r1, r1 << 8, ra20 << 8 ++-/* [0x000014f0] */ 0x4c78e38f, 0x10024860, // add r1, r1, ra30 ; mul24 r0, r1, rb14 ++-/* [0x000014f8] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 ++-/* [0x00001500] */ 0xfffffe28, 0xf06809e7, // brr.anyn -, r:yloopb ++-/* [0x00001508] */ 0x0f9c73c0, 0xd0020867, // asr r1, r1, 7 ++-/* [0x00001510] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 ++-/* [0x00001518] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 ++-/* [0x00001520] */ 0x00000000, 0xf0f7e9e7, // bra -, ra31 ++-/* [0x00001528] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 ++-/* [0x00001530] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 ++-/* [0x00001538] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif +++/* [0x00001258] */ 0xcd5117de, 0xa00269e3, // sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++/* [0x00001260] */ 0x8e4539bf, 0xb0029819, // shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu1 +++/* [0x00001268] */ 0x956a7d9b, 0x1004461f, // mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++/* [0x00001270] */ 0x95710dbf, 0x10044763, // mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++/* [0x00001278] */ 0x0e9c09c0, 0x10020867, // shr r1, r4, rx_xshift2 +++/* [0x00001280] */ 0x159c1fc0, 0x10040567, // mov.ifz ra_y2, ra_y2_next +++/* [0x00001288] */ 0x13740dc0, 0xd00208a7, // max r2, ra_y, 0 +++/* [0x00001290] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x00001298] */ 0x4c741dd3, 0xd0024762, // add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++/* [0x000012a0] */ 0xec614c87, 0x10024e20, // add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 +++/* [0x000012a8] */ 0x13540dc0, 0xd00208a7, // max r2, ra_y2, 0 +++/* [0x000012b0] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_frame_height_minus_1 +++/* [0x000012b8] */ 0x4c541dd3, 0xd0024562, // add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++/* [0x000012c0] */ 0xec654c8f, 0x10024f21, // add t1s, ra_frame_base2, r2 ; v8subs r1, r1, rb20 +++/* [0x000012c8] */ 0x0000ff00, 0xe20229e7, // mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++/* [0x000012d0] */ 0x40027006, 0x100049e2, // nop ; mul24 r2, r0, ra0 +++/* [0x000012d8] */ 0x40038031, 0xd000c9e2, // nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++/* [0x000012e0] */ 0x4007f030, 0xd00049e3, // nop ; mul24 r3, ra1 << 1, r0 << 1 +++/* [0x000012e8] */ 0x40077031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++/* [0x000012f0] */ 0x4c0be4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++/* [0x000012f8] */ 0x400b6031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++/* [0x00001300] */ 0x4c0fd4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++/* [0x00001308] */ 0x400f5031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++/* [0x00001310] */ 0x4c13c4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++/* [0x00001318] */ 0x40134031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++/* [0x00001320] */ 0x4c17b4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++/* [0x00001328] */ 0x40173031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++/* [0x00001330] */ 0x4c1ba4f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++/* [0x00001338] */ 0x401b2031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++/* [0x00001340] */ 0x4c1f94f0, 0xd00248a3, // add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++/* [0x00001348] */ 0x401f1031, 0xd000c9e3, // nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++/* [0x00001350] */ 0x8c9df4ff, 0x10024823, // add r0, r2, r3 ; mov r3, rb31 +++/* [0x00001358] */ 0x8d2487f6, 0xd00279c8, // sub.setf -, r3, 8 ; mov ra8, ra9 +++/* [0x00001360] */ 0x152a7d80, 0x10020267, // mov ra9, ra10 +++/* [0x00001368] */ 0x152e7d80, 0x100202a7, // mov ra10, ra11 +++/* [0x00001370] */ 0x15327d80, 0x100202e7, // mov ra11, ra12 +++/* [0x00001378] */ 0x15367d80, 0x10020327, // mov ra12, ra13 +++/* [0x00001380] */ 0xfffffeb8, 0xf06809e7, // brr.anyn -, r:yloopb +++/* [0x00001388] */ 0x153a7d80, 0x10020367, // mov ra13, ra14 +++/* [0x00001390] */ 0x153e7d80, 0x100203a7, // mov ra14, ra15 +++/* [0x00001398] */ 0x159e7000, 0x100203e7, // mov ra15, r0 +++/* [0x000013a0] */ 0x4038a037, 0x100049e1, // nop ; mul24 r1, ra14, rb10 +++/* [0x000013a8] */ 0x40349037, 0x100049e0, // nop ; mul24 r0, ra13, rb9 +++/* [0x000013b0] */ 0x4c308237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra12, rb8 +++/* [0x000013b8] */ 0x4c3cb237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra15, rb11 +++/* [0x000013c0] */ 0x4c204237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra8, rb4 +++/* [0x000013c8] */ 0x4c245237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra9, rb5 +++/* [0x000013d0] */ 0x4c286237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra10, rb6 +++/* [0x000013d8] */ 0x4c2c7237, 0x10024860, // add r1, r1, r0 ; mul24 r0, ra11, rb7 +++/* [0x000013e0] */ 0x8c9f223f, 0x10020867, // add r1, r1, r0 ; mov -, vw_wait +++/* [0x000013e8] */ 0x4d5927ce, 0x100269e1, // sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++/* [0x000013f0] */ 0x0f9ce3c0, 0xd0020827, // asr r0, r1, 14 +++/* [0x000013f8] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6 +++/* [0x00001400] */ 0x405b8006, 0xd00049e0, // nop ; mul24 r0, r0 << 8, ra22 << 8 +++/* [0x00001408] */ 0x0c4a7380, 0x10020867, // add r1, r1, ra18 +++/* [0x00001410] */ 0x0c9e7200, 0x10020867, // add r1, r1, r0 +++/* [0x00001418] */ 0xfffffe20, 0xf06809e7, // brr.anyn -, r:yloopb +++/* [0x00001420] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, 15 +++/* [0x00001428] */ 0x129d63c0, 0x10020867, // min r1, r1, rb22 +++/* [0x00001430] */ 0x139c03c0, 0xd0020c27, // max vpm, r1, 0 +++/* [0x00001438] */ 0xfffff928, 0xf0f809e7, // brr -, r:per_block_setup +++/* [0x00001440] */ 0x159dafc0, 0x10021c67, // mov vw_setup, rb26 +++/* [0x00001448] */ 0x159ddfc0, 0x10021c67, // mov vw_setup, rb29 +++/* [0x00001450] */ 0x15827d80, 0x10021ca7, // mov vw_addr, unif ++ // ::mc_interrupt_exit12 ++-/* [0x00001540] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait ++-/* [0x00001548] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001550] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001558] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001560] */ 0x009e7000, 0xa00009e7, // ldtmu0 ++-/* [0x00001568] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001570] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001578] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001580] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001588] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001590] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x00001598] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000015a0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000015a8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000015b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000015b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) ++-/* [0x000015c0] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend ++-/* [0x000015c8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop ++-/* [0x000015d0] */ 0x009e7000, 0x100009e7, // nop ; nop +++/* [0x00001458] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x00001460] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001468] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001470] */ 0x009e7000, 0xb00009e7, // ldtmu1 +++/* [0x00001478] */ 0x009e7000, 0xb00009e7, // ldtmu1 +++/* [0x00001480] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001488] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001490] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x00001498] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000014a0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000014a8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000014b0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000014b8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000014c0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000014c8] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000014d0] */ 0x00000010, 0xe80009e7, // mov -,sacq(0) +++/* [0x000014d8] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x000014e0] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x000014e8] */ 0x009e7000, 0x100009e7, // nop ; nop +++// ::mc_exit1 +++/* [0x000014f0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait +++/* [0x000014f8] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001500] */ 0x009e7000, 0xb00009e7, // ldtmu1 +++/* [0x00001508] */ 0x009e7000, 0xa00009e7, // ldtmu0 +++/* [0x00001510] */ 0x009e7000, 0xb00009e7, // ldtmu1 +++/* [0x00001518] */ 0x009e7000, 0x300009e7, // nop ; nop ; thrend +++/* [0x00001520] */ 0x00000001, 0xe00209a7, // mov interrupt, 1; nop +++/* [0x00001528] */ 0x009e7000, 0x100009e7, // nop ; nop ++ // ::mc_end ++ }; ++ #ifdef __HIGHC__ ++diff --git a/libavcodec/rpi_shader.h b/libavcodec/rpi_shader.h ++index 6e552d9..760bd17 100644 ++--- a/libavcodec/rpi_shader.h +++++ b/libavcodec/rpi_shader.h ++@@ -4,15 +4,16 @@ ++ extern unsigned int rpi_shader[]; ++ ++ #define mc_setup_uv (rpi_shader + 0) ++-#define mc_filter_uv (rpi_shader + 144) ++-#define mc_filter_uv_b0 (rpi_shader + 334) ++-#define mc_filter_uv_b (rpi_shader + 486) ++-#define mc_exit (rpi_shader + 662) ++-#define mc_interrupt_exit8 (rpi_shader + 680) ++-#define mc_setup (rpi_shader + 710) ++-#define mc_filter (rpi_shader + 864) ++-#define mc_filter_b (rpi_shader + 1104) ++-#define mc_interrupt_exit12 (rpi_shader + 1360) ++-#define mc_end (rpi_shader + 1398) +++#define mc_filter_uv (rpi_shader + 130) +++#define mc_filter_uv_b0 (rpi_shader + 312) +++#define mc_filter_uv_b (rpi_shader + 464) +++#define mc_exit (rpi_shader + 640) +++#define mc_interrupt_exit8 (rpi_shader + 658) +++#define mc_setup (rpi_shader + 688) +++#define mc_filter (rpi_shader + 1048) +++#define mc_filter_b (rpi_shader + 1174) +++#define mc_interrupt_exit12 (rpi_shader + 1302) +++#define mc_exit1 (rpi_shader + 1340) +++#define mc_end (rpi_shader + 1356) ++ ++ #endif ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index a0b8e5a..60d1ec2 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -21,6 +21,7 @@ ++ # ++ # ra16 clipped(row start address+elem_num)&~3 ++ # ra17 per-channel shifts +++# ra18 0x4000 ++ # ra19 next ra17 ++ # ++ # rb16 pitch ++@@ -86,7 +87,7 @@ ++ ++ ++ ################################################################################ ++-# mc_setup_uv(next_kernel, x, y, ref_u_base, ref_v_base, frame_width, frame_height, pitch, dst_pitch, pad0, pad1, pad2) +++# mc_setup_uv(next_kernel, x, y, ref_u_base, ref_v_base, frame_width, frame_height, pitch, dst_pitch, offset, denom, vpm_id) ++ ::mc_setup_uv ++ ++ # Read starting kernel ++@@ -132,36 +133,6 @@ mov ra13, 0 ++ mov ra14, 0 ++ mov ra15, 0 ++ ++-# Compute part of VPM to use for DMA output ++-mov r3, unif ++-shl r2, r3, 1 # Convert QPU numbers to be even (this means we can only use 8 QPUs, but is necessary as we need to save 16bit intermediate results) ++-and r2, r2, 15 ++-mov r1, r2 ++-asr r1, r1, 2 ++-shl r1, r1, 6 ++-mov r0, r2 ++-and r0, r0, 3 ++-add r0, r0, r1 ++-mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later ++-shl r0, r0, 5 ++-add rb27, r0, r1 ++- ++-# Compute part of VPM to save data into ++-shl r2, r3, 1 ++-and r2, r2, 15 # r2 = bcd0 ++-mov r1, r2 # r1 = bcd0 ++-asr r1, r1, 2 # r1 = bc ++-shl r1, r1, 6 # r1 = bc000000 ++-mov r0, r2 # r0 = bcd0 ++-and r0, r0, 3 # r0 = d0 ++-add r0, r0, r1 # r0 = bc0000d0 ++-mov r1, vpm_setup(0, 4, h8p(0, 0)) # 4 is stride - stride acts on ADDR which is Y[5:0],B[1:0] for 8 bit ++-add rb28, r0, r1 ++-asr r0, r0, 1 # r0 = bc0000d ++-# Prepare VPM command for 16bit intermediates ++-mov r1, vpm_setup(0, 2, h16p(0, 0)) # 2 is stride - stride acts on ADDR which is Y[5:0],H[0] for 16 bit ++-add rb21, r0, r1 ++- ++ # Compute base address for first and second access ++ mov r0, ra_x # Load x ++ max r0, r0, 0; mov r1, ra_y # Load y ++@@ -175,10 +146,31 @@ min r1, r1, rb_frame_height_minus_1 ++ # submit texture requests for first line ++ add r2, r2, r0 ; mul24 r1, r1, rb_pitch ++ add t0s, r0, r1 ; mov ra_frame_base, r2 ++-add t0s, r2, r1 +++add t1s, r2, r1 +++ +++mov r2,8 +++shl rb12,unif, r2 # offset before shift +++add rb13,unif,r2 # offset after shift +++ +++# Compute part of VPM to use for DMA output +++mov r2, unif +++shl r2, r2, 1 # Convert QPU numbers to be even (this means we can only use 8 QPUs, but is necessary as we need to save 16bit intermediate results) +++and r2, r2, 15 +++mov r1, r2 +++asr r1, r1, 2 +++shl r1, r1, 6 +++mov r0, r2 +++and r0, r0, 3 +++add r0, r0, r1 ++ ++-mov rb12,unif # offset before shift ++-mov rb13,unif # offset after shift +++mov r1, vpm_setup(0, 4, h8p(0, 0)) # 4 is stride - stride acts on ADDR which is Y[5:0],B[1:0] for 8 bit +++add rb28, r0, r1 # VPM 8bit storage +++asr r2, r0, 1 # r0 = bc0000d +++mov r1, vpm_setup(0, 2, h16p(0, 0)) # 2 is stride - stride acts on ADDR which is Y[5:0],H[0] for 16 bit +++add rb21, r2, r1 # VPM for 16bit intermediates +++mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later +++shl r0, r0, 5 +++add rb27, r0, r1 # DMA out ++ ++ # submit texture requests for second line ++ max r1, ra_y, 0 ++@@ -187,7 +179,7 @@ add ra_y, ra_y, 1 ++ bra -, ra31 ++ nop ; mul24 r1, r1, rb_pitch ++ add t0s, r1, ra_x ++-add t0s, r1, ra_frame_base +++add t1s, r1, ra_frame_base ++ ++ ++ ++@@ -248,17 +240,15 @@ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ mov r0, unif # U offset/weight ++ asr rb15, r0, r2 # Compute offset from MSBs ++ shl r0, r0, r2 ++-asr rb14, r0, r2 # Compute weight from LSBs +++asr r3, r0, r2 # Compute weight from LSBs ++ mov r0, unif # V offset/weight ++ asr.ifnz rb15, r0, r2 ++ shl r0, r0, r2 ++-asr.ifnz rb14, r0, r2 +++asr.ifnz r3, r0, r2 +++shl rb14,r3,8 # Scale up weights so we can use mul24 in signed fashion ++ ++ # r2 is elem_num ++ # r3 is loop counter ++- ++-mov r5rep, -8 ++- ++ # retrieve texture results and pick out bytes ++ # then submit two more texture requests ++ ++@@ -269,7 +259,7 @@ mov r3, 0 ++ # then submit two more texture requests ++ ++ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu1 ++ mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++ shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++@@ -278,7 +268,7 @@ max r2, ra_y, 0 # y ++ min r2, r2, rb_frame_height_minus_1 ++ add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++ add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_frame_base, r2 +++add t1s, ra_frame_base, r2 ++ ++ # generate seven shifted versions ++ # interleave with scroll of vertical context ++@@ -301,11 +291,6 @@ mov ra13, ra14 # Delay slot 1 ++ mov ra14, ra15 # Delay slot 2 ++ mov ra15, r0 # Delay slot 3 ++ ++-mov rb12,32 # TODO remove these to make P weighted prediction work properly ++-mov rb13,6 ++-mov rb14,1 ++-mov rb15,0 ++- ++ # apply vertical filter and write to VPM ++ ++ nop ; mul24 r1, ra14, rb10 ++@@ -412,7 +397,7 @@ mov r3, 0 ++ # then submit two more texture requests ++ ++ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu1 ++ mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++ shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++@@ -421,7 +406,7 @@ max r2, ra_y, 0 # y ++ min r2, r2, rb_frame_height_minus_1 ++ add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++ add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_frame_base, r2 +++add t1s, ra_frame_base, r2 ++ ++ # generate seven shifted versions ++ # interleave with scroll of vertical context ++@@ -542,7 +527,7 @@ mov r3, 0 ++ # then submit two more texture requests ++ ++ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 # loop counter increment ++-shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu0 +++shr r0, r4, ra_xshift ; mov.ifz ra_x, rb_x_next ; ldtmu1 ++ mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++ shr r1, r4, ra_xshift ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++@@ -551,7 +536,7 @@ max r2, ra_y, 0 # y ++ min r2, r2, rb_frame_height_minus_1 ++ add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++ add t0s, ra_x, r2 ; v8subs r1, r1, rb20 ++-add t0s, ra_frame_base, r2 +++add t1s, ra_frame_base, r2 ++ ++ # generate seven shifted versions ++ # interleave with scroll of vertical context ++@@ -617,9 +602,9 @@ mov -, vw_wait # wait on the VDW ++ mov -,srel(0) ++ ++ ldtmu0 +++ldtmu1 ++ ldtmu0 ++-ldtmu0 ++-ldtmu0 +++ldtmu1 ++ ++ nop ; nop ; thrend ++ nop ; nop # delay slot 1 ++@@ -630,9 +615,9 @@ nop ; nop # delay slot 2 ++ mov -, vw_wait # wait on the VDW ++ ++ ldtmu0 +++ldtmu1 ++ ldtmu0 ++-ldtmu0 ++-ldtmu0 +++ldtmu1 ++ ++ mov -,sacq(0) # 1 ++ mov -,sacq(0) # 2 ++@@ -656,200 +641,249 @@ nop ; nop # delay slot 2 ++ # For P frames we make the second x,y coordinates offset by +8 ++ ++ ################################################################################ ++-# mc_setup(next_kernel, x, y, ref_y_base, x2, y2, ref_y2_base, frame_width, frame_height, pitch, dst_pitch, offset, shift, pad2) +++# mc_setup(y_x, ref_y_base, y2_x2, ref_y2_base, frame_width_height, pitch, dst_pitch, offset_shift, next_kernel) ++ ::mc_setup +++ mov r3, 16 ++ ++-# Read starting kernel ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-mov ra31, unif ++- ++-# Compute base address for first and second access ++-add r0, unif, elem_num # Load x ++-max r0, r0, 0; mov r1, unif # Load y ++-min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base ++-shl ra_xshift_next, r0, 3 # Compute shifts ++-add ra_y, r1, 1 ++-and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate ++-add r2, r2, r0 # r2 is address for frame0 (not including y offset) ++-max r1, r1, 0 ++-min r1, r1, rb_frame_height_minus_1 ++-nop ; mul24 r1, r1, rb_pitch # r2 contains the addresses (not including y offset) for frame0 ++-add t0s, r2, r1 ; mov ra_frame_base, r2 ++- ++-add r0, unif, elem_num # Load x ++-max r0, r0, 0; mov r1, unif # Load y ++-min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base ++-shl rx_xshift2_next, r0, 3 # Compute shifts ++-add ra_y2, r1, 1 ++-and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate ++-add r2, r2, r0 # r2 is address for frame1 (not including y offset) ++-max r1, r1, 0 ++-min r1, r1, rb_frame_height_minus_1 ++-nop ; mul24 r1, r1, rb_pitch # r2 contains the addresses (not including y offset) for frame0 ++-add t0s, r2, r1 ; mov ra_frame_base2, r2 ++- +++ # Need to save these because we need to know the frame dimensions before computing texture coordinates +++ mov ra8, unif +++ mov ra9, unif +++ mov ra10, unif +++ mov ra11, unif ++ ++ # Read image dimensions ++-sub rb25,unif,1 ++-sub rb30,unif,1 +++ mov r1, unif # width_height +++ shl r0,r1,r3 +++ asr r1,r1,r3 # width +++ asr r0,r0,r3 # height +++ sub rb_frame_width_minus_1,r1,1 +++ sub rb_frame_height_minus_1,r0,1 ++ ++ # get source pitch ++-mov rb16, unif +++ mov rb_pitch, unif ++ ++ # get destination pitch ++-mov r0, unif ++-mov r1, vdw_setup_1(0) ++-add rb24, r1, r0 +++ mov r0, unif +++ mov r1, vdw_setup_1(0) +++ add rb24, r1, r0 ++ ++-# load constants ++- ++-mov ra20, 1 ++-mov ra22, 256 ++-mov ra30, 64 ++- ++-mov rb20, 0xffffff00 ++-mov rb22, 255 ++-mov rb23, 24 +++# Compute base address for first and second access +++ mov r1, ra8 # y_x +++ shl r0,r1,r3 # r0 is x<<16 +++ asr r1,r1,r3 # r1 is y +++ asr r0,r0,r3 # r0 is x +++ add r0, r0, elem_num # Load x +++ max r0, r0, 0 +++ min r0, r0, rb_frame_width_minus_1 ; mov r2, ra9 # Load the frame base +++ shl ra_xshift_next, r0, 3 # Compute shifts +++ add ra_y, r1, 1 +++ and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++ add r2, r2, r0 # r2 is address for frame0 (not including y offset) +++ max r1, r1, 0 +++ min r1, r1, rb_frame_height_minus_1 +++ nop ; mul24 r1, r1, rb_pitch # r2 contains the addresses (not including y offset) for frame0 +++ add t0s, r2, r1 ; mov ra_frame_base, r2 +++ +++ mov r1, ra10 # y_x +++ shl r0,r1,r3 # r0 is x<<16 +++ asr r1,r1,r3 # r1 is y +++ asr r0,r0,r3 # r0 is x +++ add r0, r0, elem_num # Load x +++ max r0, r0, 0 +++ min r0, r0, rb_frame_width_minus_1 ; mov r2, ra11 # Load the frame base +++ shl rx_xshift2_next, r0, 3 # Compute shifts +++ add ra_y2, r1, 1 +++ and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++ add r2, r2, r0 # r2 is address for frame1 (not including y offset) +++ max r1, r1, 0 +++ min r1, r1, rb_frame_height_minus_1 +++ nop ; mul24 r1, r1, rb_pitch # r2 contains the addresses (not including y offset) for frame0 +++ add t1s, r2, r1 ; mov ra_frame_base2, r2 ++ ++-# touch vertical context to keep simulator happy ++ ++-mov ra8, 0 ++-mov ra9, 0 ++-mov ra10, 0 ++-mov ra11, 0 ++-mov ra12, 0 ++-mov ra13, 0 ++-mov ra14, 0 ++-mov ra15, 0 +++# load constants ++ ++-# Compute part of VPM to use for DMA output ++-mov r2, qpu_num ++-mov r1, r2 ++-asr r1, r1, 2 ++-shl r1, r1, 6 ++-mov r0, r2 ++-and r0, r0, 3 ++-add r0, r0, r1 ++-mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later ++-shl r0, r0, 5 ++-add rb27, r0, r1 +++ mov ra20, 1 +++ mov ra22, 256 +++ mov ra30, 64 ++ ++-# Compute part of VPM to save data into ++-mov r2, qpu_num # qpu_num = abcd ++-mov r1, r2 ++-asr r1, r1, 2 ++-shl r1, r1, 6 ++-mov r0, r2 ++-and r0, r0, 3 ++-add r0, r0, r1 ++-mov r1, vpm_setup(0, 4, h8p(0, 0)) # 4 is stride - stride acts on ADDR which is Y[5:0],B[1:0] for 8 bit ++-add rb28, r0, r1 +++ mov rb20, 0xffffff00 +++ mov rb22, 255 +++ mov rb23, 24 ++ ++-mov rb12,unif # offset before shift ++-mov rb13,unif # shift +++# touch vertical context to keep simulator happy ++ ++-# Dump padding words ++-mov r0, unif +++ mov ra8, 0 +++ mov ra9, 0 +++ mov ra10, 0 +++ mov ra11, 0 +++ mov ra12, 0 +++ mov ra13, 0 +++ mov ra14, 0 +++ mov ra15, 0 +++ mov ra18, 0x4000 +++ +++# Compute part of VPM to use +++ mov r2, qpu_num +++ mov r1, r2 +++ asr r1, r1, 2 +++ shl r1, r1, 6 +++ mov r0, r2 +++ and r0, r0, 3 +++ add r0, r0, r1 +++ mov r1, vpm_setup(0, 4, h8p(0, 0)) # 4 is stride - stride acts on ADDR which is Y[5:0],B[1:0] for 8 bit +++ add rb28, r0, r1 # VPM for saving data +++ mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later +++ shl r0, r0, 5 +++ add rb27, r0, r1 # Command for dma output +++ +++# Weighted prediction denom +++ +++ mov r1, unif # offset_shift +++ shl r0,r1,r3 ; mov r2,8 +++ asr rb13,r0,r3 # shift +++ asr rb12,r1,r3 # offset +++ add rb13,rb13,r2 # mul24 is unsigned so scale up into high bits +++ shl rb12, rb12, r2 # Account for larger shift ++ ++ # submit texture requests for second line ++-max r1, ra_y, 0 ++-min r1, r1, rb_frame_height_minus_1 ++-add ra_y, ra_y, 1 ++-nop ; mul24 r1, r1, rb_pitch ++-add t0s, r1, ra_frame_base ++- ++-max r1, ra_y2, 0 ++-min r1, r1, rb_frame_height_minus_1 ++-bra -, ra31 ++-add ra_y2, ra_y2, 1 # Delay 1 ++-nop ; mul24 r1, r1, rb_pitch # Delay 2 ++-add t0s, r1, ra_frame_base2 # Delay 3 ++- ++- ++-################################################################################ ++- ++-# mc_filter(next_kernel, x, y, frame_base, x2, y2, frame_base2, height, hcoeffs[0], hcoeffs2[0], hcoeffs[1], hcoeffs2[1], vcoeffs[0], vcoeffs2[0], vcoeffs[1], vcoeffs2[1], offsetweight0, offsetweight1, this_dst) ++-# In a P block, only the first half of coefficients contain used information. ++-# At this point we have already issued two pairs of texture requests for the current block ++-# ra_x, ra_x16_base point to the current coordinates for this block ++-::mc_filter ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-mov ra31, unif +++ max r1, ra_y, 0 +++ min r1, r1, rb_frame_height_minus_1 +++ add ra_y, ra_y, 1 +++ nop ; mul24 r1, r1, rb_pitch +++ add t0s, r1, ra_frame_base +++ +++ max r1, ra_y2, 0 +++ min r1, r1, rb_frame_height_minus_1 +++ add ra_y2, ra_y2, 1 +++ nop ; mul24 r1, r1, rb_pitch +++ add t1s, r1, ra_frame_base2 +++ +++# FALL THROUGHT TO PER-BLOCK SETUP +++ +++# Start of per-block setup code +++# P and B blocks share the same setup code to save on Icache space +++:per_block_setup +++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ mov ra31, unif ++ ++ # per-channel shifts were calculated on the *previous* invocation ++- ++-mov ra_xshift, ra_xshift_next ++-mov rx_xshift2, rx_xshift2_next +++ mov ra_xshift, ra_xshift_next +++ mov rx_xshift2, rx_xshift2_next ++ ++ # get base addresses and per-channel shifts for *next* invocation ++-add r0, unif, elem_num # Load x ++-max r0, r0, 0; mov r1, unif # Load y ++-min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base ++-shl ra_xshift_next, r0, 3 # Compute shifts ++-mov ra_y_next, r1 ++-and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate ++-add ra_frame_base_next, r2, r0 # r2 is address for frame0 (not including y offset) ++- ++-add r0, unif, elem_num # Load x ++-max r0, r0, 0 ; mov r1, unif # Load y ++-min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base ++-shl rx_xshift2_next, r0, 3 # Compute shifts ++-add ra_y2_next, r1, 1 ++-and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate ++-add rx_frame_base2_next, r2, r0 # r2 is address for frame1 (not including y offset) ++- +++ mov r3, 16 +++ mov r1, unif # y_x +++ shl r0,r1,r3 # r0 is x<<16 +++ asr r1,r1,r3 # r1 is y +++ asr r0,r0,r3 # r0 is x +++ add r0, r0, elem_num # Load x +++ max r0, r0, 0 +++ min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base +++ shl ra_xshift_next, r0, 3 # Compute shifts +++ mov ra_y_next, r1 +++ and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++ add ra_frame_base_next, r2, r0 ; mov r1, unif # y2_x2 +++ +++ shl r0,r1,r3 # r0 is x2<<16 +++ asr r1,r1,r3 # r1 is y2 +++ asr r0,r0,r3 # r0 is x2 +++ add r0, r0, elem_num # Load x +++ max r0, r0, 0 +++ min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base +++ shl rx_xshift2_next, r0, 3 # Compute shifts +++ mov ra_y2_next, r1 +++ and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate +++ add rx_frame_base2_next, r2, r0 # r2 is address for frame1 (not including y offset) ++ ++ # set up VPM write ++-mov vw_setup, rb28 +++ mov vw_setup, rb28 ++ ++ # get width,height of block ++-mov r2, 16 ++-mov r0, unif ++-shr r1, r0, r2 # Extract width ++-sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) ++-and r0, r0, rb22 # Extract height ++-add rb17, r0, 5 ++-add rb18, r0, 7 ++-shl r0, r0, 7 ++-add r0, r0, r1 # Combine width and height of destination area ++-shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++-add rb26, r0, rb27 +++ mov r0, unif +++ shr r1, r0, r3 # Extract width +++ sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) +++ and r0, r0, rb22 # Extract height +++ add rb17, r0, 5 +++ add rb18, r0, 7 +++ shl r0, r0, 7 +++ add r0, r0, r1 # Combine width and height of destination area +++ shl r0, r0, r3 # Shift into bits 16 upwards of the vdw_setup0 register +++ add rb26, r0, rb27 ++ ++ # get filter coefficients and discard unused B frame values ++-mov r0, unif ++-mov.ifnz -, unif # Alternate coefficients are unused for P frames ++-asr ra3, r0, rb23; mul24 r0, r0, ra22 # These may need some pre-rotation to be used in B frames correctly ++-asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-asr ra0, r0, rb23; mov r0, unif ++-mov.ifnz -, unif ++-asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-asr ra4, r0, rb23; mov r0, unif ++-mov.ifnz -, unif ++-asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-asr rb8, r0, rb23; mov r0, unif ++-mov.ifnz -, unif ++-asr rb7, r0, rb23; mul24 r0, r0, ra22 ++-asr rb6, r0, rb23; mul24 r0, r0, ra22 ++-asr rb5, r0, rb23; mul24 r0, r0, ra22 ++-asr rb4, r0, rb23 ++- ++-mov r0, unif # Frame0 offset/weight ++-mov.ifnz -, unif # Frame1 offset/weight unused ++-asr rb15, r0, r2 # Compute offset from MSBs ++-shl r0, r0, r2 ++-asr rb14, r0, r2 # Compute weight from LSBs ++- ++-# r3 is loop counter +++ mov r0, unif ; mov r1,1 # Packed filter offsets, unpack into ra8... (to be used for vertical context later) +++ asr ra9, r0, rb23; mul24 r0, r0, ra22 # my2 +++ asr ra8, r0, rb23; mul24 r0, r0, ra22 # mx2 +++ asr.ifz ra9, r0, rb23; mul24 r0, r0, ra22 # my:my2 +++ asr.ifz ra8, r0, rb23 # mx:mx2 +++ sub ra9,3,ra9 +++ sub ra8,3,ra8 +++ shl ra9,ra9,3 # Scale up by 8 +++ shl ra8,ra8,3 # Scale up by 8 +++# Now if we want aligned we have a mul of 1, so put 0 coefficients at the top +++ mov r1,0xffff00 +++ shl r0, r1, ra8 +++ asr ra0, r0, rb23 +++ shl r0, r1, ra9 +++ asr rb4, r0, rb23 +++ +++ mov r1,0x1040400 +++ shl r0, r1, ra8 +++ asr ra1, r0, rb23 +++ shl r0, r1, ra9 +++ asr rb5, r0, rb23 +++ +++ mov r1,0xfbf5f600 +++ shl r0, r1, ra8 +++ asr ra2, r0, rb23 +++ shl r0, r1, ra9 +++ asr rb6, r0, rb23 +++ +++ mov r1,0x11283a40 +++ shl r0, r1, ra8 +++ asr ra3, r0, rb23 +++ shl r0, r1, ra9 +++ asr rb7, r0, rb23 +++ +++ mov r1,0x3a281100 +++ shl r0, r1, ra8 +++ asr ra4, r0, rb23 +++ shl r0, r1, ra9 +++ asr rb8, r0, rb23 +++ +++ mov r1,0xf6f5fb00 +++ shl r0, r1, ra8 +++ asr ra5, r0, rb23 +++ shl r0, r1, ra9 +++ asr rb9, r0, rb23 +++ +++ mov r1,0x4040100 +++ shl r0, r1, ra8 +++ asr ra6, r0, rb23 +++ shl r0, r1, ra9 +++ asr rb10, r0, rb23 +++ +++ mov r1,0xffff0000 +++ shl r0, r1, ra8 +++ asr ra7, r0, rb23 +++ shl r0, r1, ra9 +++ asr rb11, r0, rb23 +++ +++# Extract weighted prediction information +++ mov r0, unif # offset/weight TODO move up +++ asr rb15, r0, r3 # Compute offset from MSBs +++ bra -, ra31 +++ shl r0, r0, r3 # Delay 1 +++ asr r0, r0, r3 ; mov r3, 0 # Compute weight from LSBs and reset loop counter Delay 2 +++ shl rb14, r0, 8 # Use a larger shift to avoid unsigned multiply problem Delay 3 ++ ++-# retrieve texture results and pick out bytes ++-# then submit two more texture requests +++################################################################################ +++# mc_filter(y_x, frame_base, y2_x2, frame_base2, width_height, my2_mx2_my_mx, offsetweight0, this_dst, next_kernel) +++# In a P block, y2_x2 should be y_x+8 +++# At this point we have already issued two pairs of texture requests for the current block ++ ++-mov r3, 0 +++::mc_filter ++ ++ :yloop ++ # retrieve texture results and pick out bytes ++@@ -858,91 +892,90 @@ mov r3, 0 ++ # If we knew there was no clipping then this code would get simpler. ++ # Perhaps we could add on the pitch and clip using larger values? ++ ++-sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 ++-mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-shr r1, r4, rx_xshift2 ++-mov.ifz ra_y2, ra_y2_next +++ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++ shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu1 +++ mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++ shr r1, r4, rx_xshift2 +++ mov.ifz ra_y2, ra_y2_next ++ ++-max r2, ra_y, 0 # y ++-min r2, r2, rb_frame_height_minus_1 ++-add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++- ++-max r2, ra_y2, 0 # y ++-min r2, r2, rb_frame_height_minus_1 ++-add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 +++ max r2, ra_y, 0 # y +++ min r2, r2, rb_frame_height_minus_1 +++ add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++ add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++ +++ max r2, ra_y2, 0 # y +++ min r2, r2, rb_frame_height_minus_1 +++ add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++ add t1s, ra_frame_base2, r2 ; v8subs r1, r1, rb20 ++ ++ # generate seven shifted versions ++ # interleave with scroll of vertical context ++ ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ ++ # apply horizontal filter ++-nop ; mul24 r2, r0, ra0 ++-nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-nop ; mul24 r3, ra1 << 1, r0 << 1 ++-nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-add r0, r2, r3 ; mov r3, rb31 ++-sub.setf -, r3, 8 ; mov ra12, ra13 ++-mov ra9, ra10 ++-mov ra10, ra11 ++-mov ra11, ra12 ++-mov ra12, ra13 ++-brr.anyn -, r:yloop ++-mov ra13, ra14 # Delay slot 1 ++-mov ra14, ra15 # Delay slot 2 ++-mov ra15, r0 # Delay slot 3 +++ nop ; mul24 r2, r0, ra0 +++ nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++ nop ; mul24 r3, ra1 << 1, r0 << 1 +++ nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++ add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++ add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++ add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++ nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++ add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++ nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++ add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++ nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++ add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++ nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++ add r0, r2, r3 ; mov r3, rb31 +++ sub.setf -, r3, 8 ; mov ra8, ra9 +++ mov ra9, ra10 +++ mov ra10, ra11 +++ mov ra11, ra12 +++ mov ra12, ra13 +++ brr.anyn -, r:yloop +++ mov ra13, ra14 # Delay slot 1 +++ mov ra14, ra15 # Delay slot 2 +++ mov ra15, r0 # Delay slot 3 ++ ++ # apply vertical filter and write to VPM ++ ++-nop ; mul24 r1, ra14, rb10 ++-nop ; mul24 r0, ra13, rb9 ++-add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-add r1, r1, r0 ; mul24 r0, ra8, rb4 ++-add r1, r1, r0 ; mul24 r0, ra9, rb5 ++-add r1, r1, r0 ; mul24 r0, ra10, rb6 ++-add r1, r1, r0 ; mul24 r0, ra11, rb7 ++- ++-add r1, r1, r0 ; mov -, vw_wait ++-sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-asr r1, r1, 14 ++-nop ; mul24 r1, r1, rb14 ++-add r1, r1, rb12 ++-asr r1, r1, rb13 ++-brr.anyn -, r:yloop ++-add r1, r1, rb15 # Delay 1 ++-min r1, r1, rb22 # Delay 2 ++-max vpm, r1, 0 # Delay 3 +++ nop ; mul24 r1, ra14, rb10 +++ nop ; mul24 r0, ra13, rb9 +++ add r1, r1, r0 ; mul24 r0, ra12, rb8 +++ add r1, r1, r0 ; mul24 r0, ra15, rb11 +++ add r1, r1, r0 ; mul24 r0, ra8, rb4 +++ add r1, r1, r0 ; mul24 r0, ra9, rb5 +++ add r1, r1, r0 ; mul24 r0, ra10, rb6 +++ add r1, r1, r0 ; mul24 r0, ra11, rb7 +++ +++ add r1, r1, r0 ; mov -, vw_wait +++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++ asr r1, r1, 14 +++ nop ; mul24 r1, r1, rb14 +++ add r1, r1, rb12 +++ asr r1, r1, rb13 +++ brr.anyn -, r:yloop +++ add r1, r1, rb15 # Delay 1 +++ min r1, r1, rb22 # Delay 2 +++ max vpm, r1, 0 # Delay 3 ++ ++ # DMA out ++ ++-bra -, ra31 ++-mov vw_setup, rb26 # VDW setup 0 Delay 1 ++-mov vw_setup, rb29 # Stride Delay 2 ++-mov vw_addr, unif # start the VDW Delay 3 +++ brr -, r:per_block_setup +++ mov vw_setup, rb26 # VDW setup 0 Delay 1 +++ mov vw_setup, rb29 # Stride Delay 2 +++ mov vw_addr, unif # start the VDW Delay 3 ++ ++ ++ ++ ################################################################################ ++ ++-# mc_filter_b(next_kernel, x, y, frame_base, x2, y2, frame_base2, width_height, hcoeffs[0], hcoeffs2[0], hcoeffs[1], hcoeffs2[1], vcoeffs[0], vcoeffs2[0], vcoeffs[1], vcoeffs2[1], offsetweight0, offsetweight1, this_dst) +++# mc_filter_b(y_x, frame_base, y2_x2, frame_base2, width_height, my2_mx2_my_mx, offsetweight0, this_dst, next_kernel) ++ # In a P block, only the first half of coefficients contain used information. ++ # At this point we have already issued two pairs of texture requests for the current block ++ # May be better to just send 16.16 motion vector and figure out the coefficients inside this block (only 4 cases so can compute hcoeffs in around 24 cycles?) ++@@ -952,92 +985,6 @@ mov vw_addr, unif # start the VDW Delay 3 ++ # Or possibly by taking advantage of symmetry? ++ # From 19->7 32bits per command. ++ ::mc_filter_b ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++-mov ra31, unif ++- ++-# per-channel shifts were calculated on the *previous* invocation ++- ++-mov ra_xshift, ra_xshift_next ++-mov rx_xshift2, rx_xshift2_next ++- ++-# get base addresses and per-channel shifts for *next* invocation ++-add r0, unif, elem_num # Load x ++-max r0, r0, 0; mov r1, unif # Load y ++-min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base ++-shl ra_xshift_next, r0, 3 # Compute shifts ++-mov ra_y_next, r1 ++-and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate ++-add ra_frame_base_next, r2, r0 # r2 is address for frame0 (not including y offset) ++- ++-add r0, unif, elem_num # Load x ++-max r0, r0, 0 ; mov r1, unif # Load y ++-min r0, r0, rb_frame_width_minus_1 ; mov r2, unif # Load the frame base ++-shl rx_xshift2_next, r0, 3 # Compute shifts ++-add ra_y2_next, r1, 1 ++-and r0, r0, ~3 # r0 gives the clipped and aligned x coordinate ++-add rx_frame_base2_next, r2, r0 # r2 is address for frame1 (not including y offset) ++- ++- ++-# set up VPM write ++-mov vw_setup, rb28 ++- ++-# get width,height of block ++-mov r2, 16 ++-mov r0, unif ++-shr r1, r0, r2 # Extract width ++-sub rb29, rb24, r1 # Compute vdw_setup1(dst_pitch-width) ++-and r0, r0, rb22 # Extract height ++-add rb17, r0, 5 ++-add rb18, r0, 7 ++-shl r0, r0, 7 ++-add r0, r0, r1 # Combine width and height of destination area ++-shl r0, r0, r2 # Shift into bits 16 upwards of the vdw_setup0 register ++-add rb26, r0, rb27 ++- ++-# get filter coefficients and discard unused B frame values ++-mov r0, unif ++-mov r1, 1 ++-mov.ifnz r0, unif # Alternate coefficients are unused for P frames ++-nop ; mul24 r0, r0 << 13, r1 << 13 ++-asr ra3, r0, rb23; mul24 r0, r0, ra22 ++-nop ; mul24 r0, r0 << 14, r1 << 14 ++-asr ra2, r0, rb23; mul24 r0, r0, ra22 ++-nop ; mul24 r0, r0 << 15, r1 << 15 # Adjust such that a rotate of 1 will produce the values with first 8 on left, second 8 on right ++-asr ra1, r0, rb23; mul24 r0, r0, ra22 ++-asr ra0, r0, rb23; mov r0, unif ++-mov.ifnz r0, unif ++-nop ; mul24 r0, r0 << 9, r1 << 9 ++-asr ra7, r0, rb23; mul24 r0, r0, ra22 ++-nop ; mul24 r0, r0 << 10, r1 << 10 ++-asr ra6, r0, rb23; mul24 r0, r0, ra22 ++-nop ; mul24 r0, r0 << 11, r1 << 11 ++-asr ra5, r0, rb23; mul24 r0, r0, ra22 ++-nop ; mul24 r0, r0 << 12, r1 << 12 ++-asr ra4, r0, rb23; mov r0, unif ++-mov.ifnz r0, unif ++-asr rb11, r0, rb23; mul24 r0, r0, ra22 ++-asr rb10, r0, rb23; mul24 r0, r0, ra22 ++-asr rb9, r0, rb23; mul24 r0, r0, ra22 ++-asr rb8, r0, rb23; mov r0, unif ++-mov.ifnz r0, unif ++-asr rb7, r0, rb23; mul24 r0, r0, ra22 ++-asr rb6, r0, rb23; mul24 r0, r0, ra22 ++-asr rb5, r0, rb23; mul24 r0, r0, ra22 ++-asr rb4, r0, rb23 ++- ++-mov r0, unif # Frame0 offset/weight ++-mov.ifnz r0, unif # Frame1 offset/weight unused ++-asr rb15, r0, r2 # Compute offset from MSBs ++-shl r0, r0, r2 ++-asr rb14, r0, r2 # Compute weight from LSBs ++- ++-# r3 is loop counter ++- ++-# retrieve texture results and pick out bytes ++-# then submit two more texture requests ++- ++-mov r3, 0 ++- ++ :yloopb ++ # retrieve texture results and pick out bytes ++ # then submit two more texture requests ++@@ -1045,111 +992,123 @@ mov r3, 0 ++ # If we knew there was no clipping then this code would get simpler. ++ # Perhaps we could add on the pitch and clip using larger values? ++ ++-sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 ++-shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu0 ++-mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 ++-mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch ++-shr r1, r4, rx_xshift2 ++-mov.ifz ra_y2, ra_y2_next +++ sub.setf -, r3, rb17 ; v8adds r3, r3, ra20 ; ldtmu0 +++ shr r0, r4, ra_xshift ; mov.ifz ra_frame_base2, rx_frame_base2_next ; ldtmu1 +++ mov.ifz ra_frame_base, ra_frame_base_next ; mov rb31, r3 +++ mov.ifz ra_y, ra_y_next ; mov r3, rb_pitch +++ shr r1, r4, rx_xshift2 +++ mov.ifz ra_y2, ra_y2_next ++ ++-max r2, ra_y, 0 # y ++-min r2, r2, rb_frame_height_minus_1 ++-add ra_y, ra_y, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++- ++-max r2, ra_y2, 0 # y ++-min r2, r2, rb_frame_height_minus_1 ++-add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 ++-add t0s, ra_frame_base2, r2 ; v8subs r0, r0, rb20 +++ max r2, ra_y, 0 # y +++ min r2, r2, rb_frame_height_minus_1 +++ add ra_y, ra_y, 1 ; mul24 r2, r2, r3 +++ add t0s, ra_frame_base, r2 ; v8subs r0, r0, rb20 # v8subs masks out all but bottom byte ++ +++ max r2, ra_y2, 0 # y +++ min r2, r2, rb_frame_height_minus_1 +++ add ra_y2, ra_y2, 1 ; mul24 r2, r2, r3 +++ add t1s, ra_frame_base2, r2 ; v8subs r1, r1, rb20 ++ ++ # generate seven shifted versions ++ # interleave with scroll of vertical context ++ ++-mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] +++ mov.setf -, [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1] ++ ++ # apply horizontal filter ++-nop ; mul24 r2, r0, ra0 ++-nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 ++-nop ; mul24 r3, ra1 << 1, r0 << 1 ++-nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 ++-add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 ++-nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 ++-add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 ++-nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 ++-add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 ++-nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 ++-add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 ++-nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 ++-add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 ++-nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 ++-add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 ++-nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 ++-add r0, r2, r3 ; mov r3, rb31 ++-sub.setf -, r3, 8 ; mov ra12, ra13 ++-mov ra9, ra10 ++-mov ra10, ra11 ++-mov ra11, ra12 ++-mov ra12, ra13 ++-brr.anyn -, r:yloopb ++-mov ra13, ra14 # Delay slot 1 ++-mov ra14, ra15 # Delay slot 2 ++-mov ra15, r0 # Delay slot 3 ++- ++-# apply vertical filter and write to VPM ++- ++-nop ; mul24 r1, ra14, rb10 ++-nop ; mul24 r0, ra13, rb9 ++-add r1, r1, r0 ; mul24 r0, ra12, rb8 ++-add r1, r1, r0 ; mul24 r0, ra15, rb11 ++-add r1, r1, r0 ; mul24 r0, ra8, rb4 ++-add r1, r1, r0 ; mul24 r0, ra9, rb5 ++-add r1, r1, r0 ; mul24 r0, ra10, rb6 ++-add r1, r1, r0 ; mul24 r0, ra11, rb7 ++- ++-add r1, r1, r0 ; mov -, vw_wait ++-sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 ++-asr r1, r1, 14 ++-nop ; mul24 r1, r1 << 8, ra20 << 8 # Rotate to align left and right halves ++-add r1, r1, ra30 ; mul24 r0, r1, rb14 ++-add r1, r1, r0 ++-brr.anyn -, r:yloopb ++-asr r1, r1, 7 # Delay 1 ++-min r1, r1, rb22 # Delay 2 ++-max vpm, r1, 0 # Delay 3 +++ nop ; mul24 r2, r0, ra0 +++ nop ; mul24.ifnz r2, ra0 << 8, r1 << 8 +++ nop ; mul24 r3, ra1 << 1, r0 << 1 +++ nop ; mul24.ifnz r3, ra1 << 9, r1 << 9 +++ add r2, r2, r3 ; mul24 r3, ra2 << 2, r0 << 2 +++ nop ; mul24.ifnz r3, ra2 << 10, r1 << 10 +++ add r2, r2, r3 ; mul24 r3, ra3 << 3, r0 << 3 +++ nop ; mul24.ifnz r3, ra3 << 11, r1 << 11 +++ add r2, r2, r3 ; mul24 r3, ra4 << 4, r0 << 4 +++ nop ; mul24.ifnz r3, ra4 << 12, r1 << 12 +++ add r2, r2, r3 ; mul24 r3, ra5 << 5, r0 << 5 +++ nop ; mul24.ifnz r3, ra5 << 13, r1 << 13 +++ add r2, r2, r3 ; mul24 r3, ra6 << 6, r0 << 6 +++ nop ; mul24.ifnz r3, ra6 << 14, r1 << 14 +++ add r2, r2, r3 ; mul24 r3, ra7 << 7, r0 << 7 +++ nop ; mul24.ifnz r3, ra7 << 15, r1 << 15 +++ add r0, r2, r3 ; mov r3, rb31 +++ sub.setf -, r3, 8 ; mov ra8, ra9 +++ mov ra9, ra10 +++ mov ra10, ra11 +++ mov ra11, ra12 +++ mov ra12, ra13 +++ brr.anyn -, r:yloopb +++ mov ra13, ra14 # Delay slot 1 +++ mov ra14, ra15 # Delay slot 2 +++ mov ra15, r0 # Delay slot 3 +++ +++ # apply vertical filter and write to VPM +++ +++ nop ; mul24 r1, ra14, rb10 +++ nop ; mul24 r0, ra13, rb9 +++ add r1, r1, r0 ; mul24 r0, ra12, rb8 +++ add r1, r1, r0 ; mul24 r0, ra15, rb11 +++ add r1, r1, r0 ; mul24 r0, ra8, rb4 +++ add r1, r1, r0 ; mul24 r0, ra9, rb5 +++ add r1, r1, r0 ; mul24 r0, ra10, rb6 +++ add r1, r1, r0 ; mul24 r0, ra11, rb7 +++ +++ add r1, r1, r0 ; mov -, vw_wait +++ sub.setf -, r3, rb18 ; mul24 r1, r1, ra22 +++ asr r0, r1, 14 +++ asr r1, r1, 6 # Wait state so we can use the rotate instruction +++ nop ; mul24 r0, r0 << 8, ra22 << 8 # Rotate to align left and right halves +++ add r1, r1, ra18 +++ add r1, r1, r0 +++ brr.anyn -, r:yloopb +++ asr r1, r1, 15 # Delay 1 +++ min r1, r1, rb22 # Delay 2 +++ max vpm, r1, 0 # Delay 3 ++ ++ # DMA out ++-bra -, ra31 ++-mov vw_setup, rb26 # VDW setup 0 Delay 1 ++-mov vw_setup, rb29 # Stride Delay 2 ++-mov vw_addr, unif # start the VDW Delay 3 +++ brr -, r:per_block_setup +++ mov vw_setup, rb26 # VDW setup 0 Delay 1 +++ mov vw_setup, rb29 # Stride Delay 2 +++ mov vw_addr, unif # start the VDW Delay 3 ++ ++ ################################################################################ ++ ++ # mc_interrupt_exit12() ++ ::mc_interrupt_exit12 ++-mov -, vw_wait # wait on the VDW ++- ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++-ldtmu0 ++- ++-mov -,sacq(0) # 1 ++-mov -,sacq(0) # 2 ++-mov -,sacq(0) # 3 ++-mov -,sacq(0) # 4 ++-mov -,sacq(0) # 5 ++-mov -,sacq(0) # 6 ++-mov -,sacq(0) # 7 ++-mov -,sacq(0) # 8 ++-mov -,sacq(0) # 9 ++-mov -,sacq(0) # 10 ++-mov -,sacq(0) # 11 ++- ++-nop ; nop ; thrend ++-mov interrupt, 1; nop # delay slot 1 ++-nop ; nop # delay slot 2 +++ mov -, vw_wait # wait on the VDW +++ +++ ldtmu0 +++ ldtmu0 +++ ldtmu1 +++ ldtmu1 +++ +++ mov -,sacq(0) # 1 +++ mov -,sacq(0) # 2 +++ mov -,sacq(0) # 3 +++ mov -,sacq(0) # 4 +++ mov -,sacq(0) # 5 +++ mov -,sacq(0) # 6 +++ mov -,sacq(0) # 7 +++ mov -,sacq(0) # 8 +++ mov -,sacq(0) # 9 +++ mov -,sacq(0) # 10 +++ mov -,sacq(0) # 11 +++ +++ nop ; nop ; thrend +++ mov interrupt, 1; nop # delay slot 1 +++ nop ; nop # delay slot 2 +++ +++ +++::mc_exit1 +++ mov -, vw_wait # wait on the VDW +++ +++ ldtmu0 +++ ldtmu1 +++ ldtmu0 +++ ldtmu1 +++ nop ; nop ; thrend +++ mov interrupt, 1; nop # delay slot 1 +++ nop ; nop # delay slot 2 ++ ++ ++ ::mc_end ++-- ++2.7.4 ++ ++ ++From f02ec34c772aad3caa17432c6a4860f9ed0d5dc6 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Tue, 2 Jun 2015 10:58:25 +0100 ++Subject: [PATCH 48/68] Added option to simulate QPUs ++ ++--- ++ libavcodec/hevc.c | 288 +++++++++++++++++++++++++++++++++++++++++++-- ++ libavcodec/rpi_qpu.c | 24 ++-- ++ libavcodec/rpi_shader.qasm | 6 +- ++ 3 files changed, 295 insertions(+), 23 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 2da88ec..34d92e2 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -56,6 +56,8 @@ ++ // Define RPI_CACHE_UNIF_MVS to write motion vector uniform stream to cached memory ++ // RPI_CACHE_UNIF_MVS doesn't seem to make much difference, so left undefined. ++ +++ // Define RPI_SIMULATE_QPUS for debugging to run QPU code on the ARMs +++ //#define RPI_SIMULATE_QPUS ++ ++ #endif ++ ++@@ -124,7 +126,6 @@ static void pic_arrays_free(HEVCContext *s) ++ ++ #ifdef EARLY_MALLOC ++ #else ++- printf("pic_arrays_free\n"); ++ if (s->coeffs_buf_arm[0]) { ++ gpu_free(&s->coeffs_buf_default); ++ s->coeffs_buf_arm[0] = 0; ++@@ -174,11 +175,9 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ #ifdef RPI ++ #ifdef EARLY_MALLOC ++ #else ++- assert(sps); +++ av_assert0(sps); ++ int coeffs_in_ctb = (1 << sps->log2_ctb_size) * (1 << sps->log2_ctb_size); ++ int coefs_per_row = sps->ctb_width * coeffs_in_ctb * 3; // Allow space for chroma ++- printf("pic_arrays_init\n"); ++- printf("Allocated %d\n",coefs_per_row); ++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row, &s->coeffs_buf_default); ++ s->coeffs_buf_arm[0] = (int16_t*) s->coeffs_buf_default.arm; ++ if (!s->coeffs_buf_arm[0]) ++@@ -2988,6 +2987,274 @@ static void rpi_inter_clear(HEVCContext *s) ++ #endif ++ } ++ +++ +++#ifdef RPI_SIMULATE_QPUS +++ +++static int32_t clipx(int x,int FRAME_WIDTH) +++{ +++ if (x<=0) return 0; +++ if (x>=FRAME_WIDTH) return FRAME_WIDTH-1; +++ return x; +++} +++ +++static int32_t clipy(int y,int FRAME_HEIGHT) +++{ +++ if (y<=0) return 0; +++ if (y>=FRAME_HEIGHT) return FRAME_HEIGHT-1; +++ return y; +++} +++ +++/*static int32_t filter8(uint8_t *data, int x0, int y0, int pitch, int mx, int my,int round,int denom,int weight,int offset) +++{ +++ int32_t vsum = 0; +++ int x, y; +++ +++ for (y = 0; y < 8; y++) { +++ int32_t hsum = 0; +++ +++ for (x = 0; x < 8; x++) +++ hsum += lumaFilter[mx][x]*data[clipx(x + x0) + clipy(y + y0) * pitch]; +++ +++ vsum += lumaFilter[my][y]*hsum; +++ } +++ vsum >>= 6; +++ vsum = (((vsum*weight)+round)>>denom)+offset; +++ +++ return av_clip_uint8( vsum ); +++}*/ +++ +++static int32_t filter8_chroma(uint8_t *data, int x0, int y0, int pitch, int hcoeffs, int vcoeffs,int offset_weight,int offset_before,int denom,int pic_width, int pic_height) +++{ +++ int32_t vsum = 0; +++ int x, y; +++ int chromaFilterH[4]; +++ int chromaFilterV[4]; +++ int i; +++ int offset_after = offset_weight>>16; +++ int weight = (offset_weight<<16)>>16; +++ for(i=0;i<4;i++) { +++ chromaFilterH[i] = ((hcoeffs>>(8*i))<<24)>>24; +++ chromaFilterV[i] = ((vcoeffs>>(8*i))<<24)>>24; +++ } +++ +++ for (y = 0; y < 4; y++) { +++ int32_t hsum = 0; +++ +++ for (x = 0; x < 4; x++) +++ hsum += chromaFilterH[x]*data[clipx(x + x0,pic_width) + clipy(y + y0,pic_height) * pitch]; +++ +++ vsum += chromaFilterV[y]*hsum; +++ } +++ vsum >>= 6; +++ vsum = (((vsum*weight)+offset_before)>>denom)+offset_after; +++ +++ return vsum; +++} +++ +++int lumaFilter[4][8]={ {0,0,0,64,0,0,0,0},{-1,4,-10,58,17,-5,1,0},{-1,4,-11,40,40,-11,4,-1},{0,1,-5,17,58,-10,4,-1} }; +++ +++static int32_t filter8_luma(uint8_t *data, int x0, int y0, int pitch, int my_mx,int offset_weight,int offset_before,int denom,int pic_width, int pic_height) +++{ +++ int32_t vsum = 0; +++ int x, y; +++ int i; +++ int offset_after = offset_weight>>16; +++ int weight = (offset_weight<<16)>>16; +++ +++ for (y = 0; y < 8; y++) { +++ int32_t hsum = 0; +++ +++ for (x = 0; x < 8; x++) +++ hsum += lumaFilter[my_mx&3][x]*data[clipx(x + x0,pic_width) + clipy(y + y0,pic_height) * pitch]; +++ +++ vsum += lumaFilter[(my_mx>>8)&3][y]*hsum; +++ } +++ vsum >>= 6; +++ vsum = (((vsum*weight)+offset_before)>>denom)+offset_after; +++ +++ return vsum; +++} +++ +++static uint8_t *test_frame(HEVCContext *s,uint32_t p, AVFrame *frame, int cIdx) +++{ +++ //int pic_width = s->ps.sps->width >> s->ps.sps->hshift[cIdx]; +++ int pic_height = s->ps.sps->height >> s->ps.sps->vshift[cIdx]; +++ int pitch = frame->linesize[cIdx]; +++ uint32_t base = get_vc_address(frame->buf[cIdx]); +++ if (p>=base && p<base+pitch*pic_height) { +++ return frame->data[cIdx] + (p-base); +++ } +++ return NULL; +++} +++ +++static uint8_t *compute_arm_addr(HEVCContext *s,uint32_t p, int cIdx) +++{ +++ SliceHeader *sh = &s->sh; +++ uint8_t *arm = test_frame(s,p,s->frame,cIdx); +++ int i; +++ if (arm) return arm; +++ if (sh->slice_type == P_SLICE || sh->slice_type == B_SLICE) +++ { +++ for(i=0;i<sh->nb_refs[L0];i++) { +++ arm = test_frame(s,p,s->ref->refPicList[0].ref[i]->frame,cIdx); +++ if (arm) return arm; +++ } +++ } +++ if (sh->slice_type == B_SLICE) { +++ for(i=0;i<sh->nb_refs[L1];i++) { +++ arm = test_frame(s,p,s->ref->refPicList[1].ref[i]->frame,cIdx); +++ if (arm) return arm; +++ } +++ } +++ printf("Frame 0x%x not found! Exit=%x\n",p,qpu_get_fn(QPU_MC_EXIT)); +++ exit(-1); +++ return NULL; +++} +++ +++static void rpi_simulate_inter_chroma(HEVCContext *s,uint32_t *p) +++{ +++ uint32_t next_kernel; +++ uint32_t x0; +++ uint32_t y0; +++ uint8_t *ref_u_base; +++ uint8_t *ref_v_base; +++ uint32_t frame_width = p[5]; +++ uint32_t frame_height = p[6]; +++ uint32_t pitch = p[7]; +++ uint32_t dst_pitch = p[8]; +++ int32_t offset_before = p[9]; +++ int32_t denom = p[10]; +++ uint32_t vpm_id = p[11]; +++ uint32_t tmp_u_dst[256]; +++ uint32_t tmp_v_dst[256]; +++ while(1) { +++ p += 12; +++ next_kernel = p[0-12]; +++ x0 = p[1-12]; +++ y0 = p[2-12]; +++ if (next_kernel==s->mc_filter_uv || next_kernel==s->mc_filter_uv_b0 || next_kernel==s->mc_filter_uv_b) { +++ int x,y; +++ uint32_t width_height = p[5]; +++ uint32_t hcoeffs = p[6]; +++ uint32_t vcoeffs = p[7]; +++ uint32_t offset_weight_u = p[8]; +++ uint32_t offset_weight_v = p[9]; +++ uint8_t *this_u_dst; +++ uint8_t *this_v_dst; +++ uint32_t width = width_height >> 16; +++ uint32_t height = (width_height << 16) >> 16; +++ ref_u_base = compute_arm_addr(s,p[3-12],1); +++ ref_v_base = compute_arm_addr(s,p[4-12],2); +++ if (next_kernel!=s->mc_filter_uv_b0) +++ { +++ this_u_dst = compute_arm_addr(s,p[10],1); +++ this_v_dst = compute_arm_addr(s,p[11],2); +++ } +++ for (y=0; y<height; ++y) { +++ for (x=0; x<width; ++x) { +++ if (next_kernel==s->mc_filter_uv) { +++ int32_t refa = filter8_chroma(ref_u_base,x+x0, y+y0, pitch, hcoeffs, vcoeffs, offset_weight_u,offset_before,denom,frame_width,frame_height); +++ int32_t refb = filter8_chroma(ref_v_base,x+x0, y+y0, pitch, hcoeffs, vcoeffs, offset_weight_v,offset_before,denom,frame_width,frame_height); +++ this_u_dst[x+y*dst_pitch] = av_clip_uint8(refa); +++ this_v_dst[x+y*dst_pitch] = av_clip_uint8(refb); +++ } else if (next_kernel==s->mc_filter_uv_b0) { +++ int32_t refa = filter8_chroma(ref_u_base, x+x0, y+y0, pitch, hcoeffs, vcoeffs, 1,0,0,frame_width,frame_height); +++ int32_t refb = filter8_chroma(ref_v_base, x+x0, y+y0, pitch, hcoeffs, vcoeffs, 1,0,0,frame_width,frame_height); +++ tmp_u_dst[x+y*16] = refa; +++ tmp_v_dst[x+y*16] = refb; +++ } else { +++ int32_t refa = filter8_chroma(ref_u_base, x+x0, y+y0, pitch, hcoeffs, vcoeffs, 1, 64 + tmp_u_dst[x+y*16], 7, frame_width, frame_height); +++ int32_t refb = filter8_chroma(ref_v_base, x+x0, y+y0, pitch, hcoeffs, vcoeffs, 1, 64 + tmp_v_dst[x+y*16], 7, frame_width, frame_height); +++ this_u_dst[x+y*dst_pitch] = av_clip_uint8(refa); +++ this_v_dst[x+y*dst_pitch] = av_clip_uint8(refb); +++ } +++ } +++ } +++ } else { +++ av_assert0(next_kernel==qpu_get_fn(QPU_MC_INTERRUPT_EXIT8) || next_kernel==qpu_get_fn(QPU_MC_EXIT) ); +++ break; +++ } +++ } +++} +++ +++// mc_setup(y_x, ref_y_base, y2_x2, ref_y2_base, frame_width_height, pitch, dst_pitch, offset_shift, next_kernel) +++static void rpi_simulate_inter_luma(HEVCContext *s,uint32_t *p) +++{ +++ uint32_t next_kernel; +++ int y_x,y2_x2; +++ uint32_t x0; +++ uint32_t y0; +++ uint32_t x2; +++ uint32_t y2; +++ uint8_t *ref_y_base; +++ uint8_t *ref_y2_base; +++ uint32_t frame_width_height = p[4]; +++ uint32_t frame_width = frame_width_height>>16; +++ uint32_t frame_height = (frame_width_height<<16)>>16; +++ uint32_t pitch = p[5]; +++ uint32_t dst_pitch = p[6]; +++ int offset_shift = p[7]; +++ int32_t offset_before = offset_shift>>16; +++ int32_t denom = (offset_shift<<16)>>16; +++ while(1) { +++ p += 9; +++ next_kernel = p[8-9]; +++ y_x = p[0-9]; +++ x0 = (y_x<<16)>>16; +++ y0 = y_x>>16; +++ y2_x2 = p[2-9]; +++ x2 = (y2_x2<<16)>>16; +++ y2 = y2_x2>>16; +++ +++ if (next_kernel==s->mc_filter || next_kernel==s->mc_filter_b) { +++ // y_x, frame_base, y2_x2, frame_base2, width_height, my2_mx2_my_mx, offsetweight0, this_dst, next_kernel) +++ int x,y; +++ uint32_t width_height = p[4]; +++ uint32_t my2_mx2_my_mx = p[5]; +++ uint32_t offset_weight = p[6]; +++ uint8_t *this_dst = compute_arm_addr(s,p[7],0); +++ uint32_t width = width_height >> 16; +++ uint32_t height = (width_height << 16) >> 16; +++ ref_y_base = compute_arm_addr(s,p[1-9],0); +++ ref_y2_base = compute_arm_addr(s,p[3-9],0); +++ for (y=0; y<height; ++y) { +++ for (x=0; x<width; ++x) { +++ if (next_kernel==s->mc_filter) { +++ int32_t refa = filter8_luma(ref_y_base,x+x0, y+y0, pitch, my2_mx2_my_mx, offset_weight,offset_before,denom,frame_width,frame_height); +++ this_dst[x+y*dst_pitch] = av_clip_uint8(refa); +++ } +++ else { +++ int32_t refa = filter8_luma(ref_y_base, x+x0, y+y0, pitch, my2_mx2_my_mx, 1, 0, 0, frame_width, frame_height); +++ int32_t refb = filter8_luma(ref_y2_base, x+x2, y+y2, pitch, my2_mx2_my_mx>>16, 1, 64 + refa, 7, frame_width, frame_height); +++ this_dst[x+y*dst_pitch] = av_clip_uint8(refb); +++ } +++ } +++ } +++ } else { +++ av_assert0(next_kernel==qpu_get_fn(QPU_MC_INTERRUPT_EXIT12) || next_kernel==qpu_get_fn(QPU_MC_EXIT) ); +++ break; +++ } +++ } +++} +++ +++static void rpi_simulate_inter_qpu(HEVCContext *s) +++{ +++ // First run the transform as normal +++ int i; +++ rpi_execute_transform(s); +++ for(i=0;i<8;i++) +++ { +++ rpi_simulate_inter_chroma(s,s->mvs_base[i]); +++ } +++ for(i=0;i<12;i++) +++ { +++ rpi_simulate_inter_luma(s,s->y_mvs_base[i]); +++ } +++} +++ +++#endif +++ +++ ++ static void rpi_execute_inter_qpu(HEVCContext *s) ++ { ++ int k; ++@@ -3006,7 +3273,7 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined ++ s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+4] = qpu_get_fn(QPU_MC_SETUP_UV); // Also need a dummy for V ++- assert(s->u_mvs[k] - s->mvs_base[k] < UV_COMMANDS_PER_QPU); +++ av_assert0(s->u_mvs[k] - s->mvs_base[k] < UV_COMMANDS_PER_QPU); ++ } ++ ++ s->u_mvs[8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore ++@@ -3016,11 +3283,16 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+1] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined ++ s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // Also need a dummy for second request ++ s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+8] = qpu_get_fn(QPU_MC_EXIT); // Add exit command ++- assert(s->y_mvs[k] - s->y_mvs_base[k] < Y_COMMANDS_PER_QPU); +++ av_assert0(s->y_mvs[k] - s->y_mvs_base[k] < Y_COMMANDS_PER_QPU); ++ } ++ s->y_mvs[12-1][-RPI_LUMA_COMMAND_WORDS+8] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT12); // This QPU will signal interrupt when all others are done and have acquired a semaphore ++ #endif ++ +++#ifdef RPI_SIMULATE_QPUS +++ rpi_simulate_inter_qpu(s); +++ s->vpu_id = -1; +++ return; +++#endif ++ ++ #ifdef RPI_MULTI_MAILBOX ++ #ifdef RPI_CACHE_UNIF_MVS ++@@ -3101,7 +3373,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ && s->ps.pps->num_tile_rows <= 1 && s->ps.pps->num_tile_columns <= 1; ++ #endif ++ ++- /*if (!s->enable_rpi) { +++ if (!s->enable_rpi) { ++ if (s->ps.pps->cross_component_prediction_enabled_flag) ++ printf("Cross component\n"); ++ if (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1) ++@@ -3110,7 +3382,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ printf("Weighted P slice\n"); ++ if (s->ps.pps->weighted_bipred_flag && s->sh.slice_type == B_SLICE) ++ printf("Weighted B slice\n"); ++- }*/ +++ } ++ ++ #endif ++ ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index e12304b..4480f72 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -13,7 +13,7 @@ ++ #include <stdlib.h> ++ #include <string.h> ++ #include <stddef.h> ++-#include <assert.h> +++#include "libavutil/avassert.h" ++ ++ #include "config.h" ++ ++@@ -160,13 +160,13 @@ static int gpu_init(volatile struct GPU **gpu) { ++ // Now copy over the QPU code into GPU memory ++ { ++ int num_bytes = qpu_get_fn(QPU_MC_END) - qpu_get_fn(QPU_MC_SETUP_UV); ++- assert(num_bytes<=QPU_CODE_SIZE*sizeof(unsigned int)); +++ av_assert0(num_bytes<=QPU_CODE_SIZE*sizeof(unsigned int)); ++ memcpy((void*)ptr->qpu_code, rpi_shader, num_bytes); ++ } ++ // And the VPU code ++ { ++ int num_bytes = sizeof(rpi_hevc_transform); ++- assert(num_bytes<=VPU_CODE_SIZE*sizeof(unsigned int)); +++ av_assert0(num_bytes<=VPU_CODE_SIZE*sizeof(unsigned int)); ++ memcpy((void*)ptr->vpu_code, rpi_hevc_transform, num_bytes); ++ } ++ // And the transform coefficients ++@@ -216,13 +216,13 @@ static void gpu_unlock(void) { ++ static int gpu_malloc_uncached_internal(int numbytes, GPU_MEM_PTR_T *p, int mb) { ++ p->numbytes = numbytes; ++ p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); ++- assert(p->vcsm_handle); +++ av_assert0(p->vcsm_handle); ++ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); ++- assert(p->vc_handle); +++ av_assert0(p->vc_handle); ++ p->arm = vcsm_lock(p->vcsm_handle); ++- assert(p->arm); +++ av_assert0(p->arm); ++ p->vc = mem_lock(mb, p->vc_handle); ++- assert(p->vc); +++ av_assert0(p->vc); ++ return 0; ++ } ++ ++@@ -243,7 +243,7 @@ int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p) ++ ++ int gpu_get_mailbox(void) ++ { ++- assert(gpu); +++ av_assert0(gpu); ++ return gpu->mb; ++ } ++ ++@@ -297,13 +297,13 @@ static int gpu_malloc_cached_internal(int numbytes, GPU_MEM_PTR_T *p) { ++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_VC, (char *)"Video Frame" ); ++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_NONE, (char *)"Video Frame" ); ++ //p->vcsm_handle = vcsm_malloc_cache(numbytes, VCSM_CACHE_TYPE_HOST_AND_VC, (char *)"Video Frame" ); ++- assert(p->vcsm_handle); +++ av_assert0(p->vcsm_handle); ++ p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle); ++- assert(p->vc_handle); +++ av_assert0(p->vc_handle); ++ p->arm = vcsm_lock(p->vcsm_handle); ++- assert(p->arm); +++ av_assert0(p->arm); ++ p->vc = mem_lock(gpu->mb, p->vc_handle); ++- assert(p->vc); +++ av_assert0(p->vc); ++ return 0; ++ } ++ ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 60d1ec2..0686249 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -149,8 +149,8 @@ add t0s, r0, r1 ; mov ra_frame_base, r2 ++ add t1s, r2, r1 ++ ++ mov r2,8 ++-shl rb12,unif, r2 # offset before shift ++-add rb13,unif,r2 # offset after shift +++shl rb12,unif,r2 # offset before shift +++add rb13,unif,r2 # denominator ++ ++ # Compute part of VPM to use for DMA output ++ mov r2, unif ++@@ -185,7 +185,7 @@ add t1s, r1, ra_frame_base ++ ++ ################################################################################ ++ ++-# mc_filter_uv(next_kernel, x, y, frame_u_base, frame_v_base, height, hcoeffs[0], hcoeffs[1], vcoeffs[0], vcoeffs[1], this_u_dst, this_v_dst) +++# mc_filter_uv(next_kernel, x, y, frame_u_base, frame_v_base, width_height, hcoeffs, vcoeffs, offset_weight_u, offset_weight_v, this_u_dst, this_v_dst) ++ ++ # At this point we have already issued two pairs of texture requests for the current block ++ # ra_x, ra_x16_base point to the current coordinates for this block ++-- ++2.7.4 ++ ++ ++From 8bdf6b06c612ff4971c2ce99a62d093cf92468ca Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Tue, 2 Jun 2015 13:17:50 +0100 ++Subject: [PATCH 49/68] Increased motion vector memory and fixed block size ++ computation for non-multiple of 2 block sizes ++ ++--- ++ libavcodec/hevc.c | 50 +++++++++++++++++++++++++++++++------------------- ++ 1 file changed, 31 insertions(+), 19 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 34d92e2..3fb1e2a 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -83,11 +83,9 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ ++ // Split image of 2048 into parts 64 wide ++ // So some QPUs will have 3 blocks of 64 to do, and others 2 blocks for an image 2048 wide with 32 blocks across ++-// Each block of 64*64 ++-// Smallest CTU size is 16x16, so smallest block is 8x8 ++-// Corresponds to a total of 83kbytes over all 12 QPUs +++// For each block of 64*64 the smallest block size is 8x4 ++ #define RPI_LUMA_COMMAND_WORDS 9 ++-#define Y_COMMANDS_PER_QPU ((1+3*(64*64)/(8*8)) * RPI_LUMA_COMMAND_WORDS) +++#define Y_COMMANDS_PER_QPU ((1+3*(64*64)/(8*4)) * RPI_LUMA_COMMAND_WORDS) ++ ++ #define ENCODE_COEFFS(c0, c1, c2, c3) (((c0) & 0xff) | ((c1) & 0xff) << 8 | ((c2) & 0xff) << 16 | ((c3) & 0xff) << 24) ++ ++@@ -2042,11 +2040,13 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ uint32_t *y = s->y_mvs[chan % 12]; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=16) { +++ int bw = nPbW-start_x; +++ int bh = nPbH-start_y; ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); ++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + 8 + start_x) & 0xffff); ++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); ++- *y++ = ( (nPbW<16 ? nPbW : 16) << 16 ) + (nPbH<16 ? nPbH : 16); +++ *y++ = ( (bw<16 ? bw : 16) << 16 ) + (bh<16 ? bh : 16); ++ *y++ = my2_mx2_my_mx; ++ if (weight_flag) { ++ *y++ = (s->sh.luma_offset_l0[current_mv.ref_idx[reflist]] << 16) + (s->sh.luma_weight_l0[current_mv.ref_idx[reflist]] & 0xffff); ++@@ -2089,12 +2089,14 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ uint32_t *u = s->u_mvs[chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { +++ int bw = nPbW_c-start_x; +++ int bh = nPbH_c-start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); ++- *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); +++ *u++ = ( (bw<RPI_CHROMA_BLOCK_WIDTH ? bw : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (bh<16 ? bh : 16); ++ *u++ = rpi_filter_coefs[_mx][0]; ++ *u++ = rpi_filter_coefs[_my][0]; ++ if (weight_flag) { ++@@ -2141,11 +2143,13 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ uint32_t *y = s->y_mvs[chan % 12]; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=16) { +++ int bw = nPbW-start_x; +++ int bh = nPbH-start_y; ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); ++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + 8 + start_x) & 0xffff); ++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); ++- *y++ = ( (nPbW<16 ? nPbW : 16) << 16 ) + (nPbH<16 ? nPbH : 16); +++ *y++ = ( (bw<16 ? bw : 16) << 16 ) + (bh<16 ? bh : 16); ++ *y++ = my2_mx2_my_mx; ++ if (weight_flag) { ++ *y++ = (s->sh.luma_offset_l0[current_mv.ref_idx[reflist]] << 16) + (s->sh.luma_weight_l0[current_mv.ref_idx[reflist]] & 0xffff); ++@@ -2189,12 +2193,14 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ uint32_t *u = s->u_mvs[chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { +++ int bw = nPbW_c-start_x; +++ int bh = nPbH_c-start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); ++- *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); +++ *u++ = ( (bw<RPI_CHROMA_BLOCK_WIDTH ? bw : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (bh<16 ? bh : 16); ++ // TODO chroma weight and offset... s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx][0]; ++ *u++ = rpi_filter_coefs[_my][0]; ++@@ -2246,11 +2252,13 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ uint32_t *y = s->y_mvs[chan % 12]; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=8) { // B blocks work 8 at a time +++ int bw = nPbW-start_x; +++ int bh = nPbH-start_y; ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); ++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y2 - 3 + start_y) << 16) + ( (x2 - 3 + start_x) & 0xffff); // Second fetch is for ref1 ++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); ++- *y++ = ( (nPbW<8 ? nPbW : 8) << 16 ) + (nPbH<16 ? nPbH : 16); +++ *y++ = ( (bw<8 ? bw : 8) << 16 ) + (bh<16 ? bh : 16); ++ *y++ = my2_mx2_my_mx; ++ *y++ = 1; // B frame weighted prediction not supported ++ *y++ = (get_vc_address(s->frame->buf[0]) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); ++@@ -2293,12 +2301,14 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ uint32_t *u = s->u_mvs[chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { +++ int bw = nPbW_c-start_x; +++ int bh = nPbH_c-start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv_b0; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); ++- *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); +++ *u++ = ( (bw<RPI_CHROMA_BLOCK_WIDTH ? bw : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (bh<16 ? bh : 16); ++ *u++ = rpi_filter_coefs[_mx][0]; ++ *u++ = rpi_filter_coefs[_my][0]; ++ u+=2; // Weights not supported in B slices ++@@ -2309,7 +2319,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y2_c - 1 + start_y; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); ++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); ++- *u++ = ( (nPbW_c<RPI_CHROMA_BLOCK_WIDTH ? nPbW_c : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (nPbH_c<16 ? nPbH_c : 16); +++ *u++ = ( (bw<RPI_CHROMA_BLOCK_WIDTH ? bw : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (bh<16 ? bh : 16); ++ *u++ = rpi_filter_coefs[_mx2][0]; ++ *u++ = rpi_filter_coefs[_my2][0]; ++ u+=2; // Weights not supported in B slices ++@@ -3178,14 +3188,15 @@ static void rpi_simulate_inter_chroma(HEVCContext *s,uint32_t *p) ++ } ++ ++ // mc_setup(y_x, ref_y_base, y2_x2, ref_y2_base, frame_width_height, pitch, dst_pitch, offset_shift, next_kernel) ++-static void rpi_simulate_inter_luma(HEVCContext *s,uint32_t *p) +++static void rpi_simulate_inter_luma(HEVCContext *s,uint32_t *p,int chan) ++ { ++ uint32_t next_kernel; ++ int y_x,y2_x2; ++- uint32_t x0; ++- uint32_t y0; ++- uint32_t x2; ++- uint32_t y2; +++ int x0; +++ int y0; +++ int x2; +++ int y2; +++ uint32_t *p0 = p; ++ uint8_t *ref_y_base; ++ uint8_t *ref_y2_base; ++ uint32_t frame_width_height = p[4]; ++@@ -3215,13 +3226,15 @@ static void rpi_simulate_inter_luma(HEVCContext *s,uint32_t *p) ++ uint8_t *this_dst = compute_arm_addr(s,p[7],0); ++ uint32_t width = width_height >> 16; ++ uint32_t height = (width_height << 16) >> 16; +++ uint8_t *dst_base = s->frame->data[0]; ++ ref_y_base = compute_arm_addr(s,p[1-9],0); ++ ref_y2_base = compute_arm_addr(s,p[3-9],0); ++ for (y=0; y<height; ++y) { ++ for (x=0; x<width; ++x) { ++ if (next_kernel==s->mc_filter) { ++ int32_t refa = filter8_luma(ref_y_base,x+x0, y+y0, pitch, my2_mx2_my_mx, offset_weight,offset_before,denom,frame_width,frame_height); ++- this_dst[x+y*dst_pitch] = av_clip_uint8(refa); +++ refa = av_clip_uint8(refa); +++ this_dst[x+y*dst_pitch] = refa; ++ } ++ else { ++ int32_t refa = filter8_luma(ref_y_base, x+x0, y+y0, pitch, my2_mx2_my_mx, 1, 0, 0, frame_width, frame_height); ++@@ -3248,7 +3261,7 @@ static void rpi_simulate_inter_qpu(HEVCContext *s) ++ } ++ for(i=0;i<12;i++) ++ { ++- rpi_simulate_inter_luma(s,s->y_mvs_base[i]); +++ rpi_simulate_inter_luma(s,s->y_mvs_base[i],i); ++ } ++ } ++ ++@@ -3290,7 +3303,6 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ ++ #ifdef RPI_SIMULATE_QPUS ++ rpi_simulate_inter_qpu(s); ++- s->vpu_id = -1; ++ return; ++ #endif ++ ++-- ++2.7.4 ++ ++ ++From da5ae7e96dd961ccc7bc162c8acf336d54a50092 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Tue, 2 Jun 2015 14:36:54 +0100 ++Subject: [PATCH 50/68] Added support for skip deblock ++ ++--- ++ libavcodec/hevc.c | 5 +++++ ++ libavcodec/hevc.h | 2 ++ ++ libavcodec/hevc_filter.c | 14 ++++---------- ++ 3 files changed, 11 insertions(+), 10 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 3fb1e2a..0ac4f4c 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -3397,6 +3397,11 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ } ++ ++ #endif +++ s->used_for_ref = !(s->nal_unit_type == NAL_TRAIL_N || +++ s->nal_unit_type == NAL_TSA_N || +++ s->nal_unit_type == NAL_STSA_N || +++ s->nal_unit_type == NAL_RADL_N || +++ s->nal_unit_type == NAL_RASL_N); ++ ++ if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) { ++ av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n"); ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 5df9dcd..5cb90b5 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -890,6 +890,8 @@ typedef struct HEVCContext { ++ int width; ++ int height; ++ +++ int used_for_ref; +++ ++ #ifdef RPI ++ int enable_rpi; ++ HEVCMvCmd *unif_mv_cmds; ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 11629e4..14a0952 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -512,16 +512,14 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) ++ s->ps.pps->transquant_bypass_enable_flag; ++ ++ #ifdef DISABLE_DEBLOCK_NONREF ++- if ( s->nal_unit_type == NAL_TRAIL_N || ++- s->nal_unit_type == NAL_TSA_N || ++- s->nal_unit_type == NAL_STSA_N || ++- s->nal_unit_type == NAL_RADL_N || ++- s->nal_unit_type == NAL_RASL_N ) +++ if (!s->used_for_ref) ++ return; // Don't deblock non-reference frames ++ #endif ++ #ifdef DISABLE_DEBLOCK ++ return; ++ #endif +++ if (!s->used_for_ref && s->avctx->skip_loop_filter >= AVDISCARD_NONREF) +++ return; ++ ++ if (x0) { ++ left_tc_offset = s->deblock[ctb - 1].tc_offset; ++@@ -885,11 +883,7 @@ static int ff_hevc_buf_base(AVBufferRef *bref) { ++ ++ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ { ++- if (s->enable_rpi && !( s->nal_unit_type == NAL_TRAIL_N || ++- s->nal_unit_type == NAL_TSA_N || ++- s->nal_unit_type == NAL_STSA_N || ++- s->nal_unit_type == NAL_RADL_N || ++- s->nal_unit_type == NAL_RASL_N )) { +++ if (s->enable_rpi && s->used_for_ref) { ++ #ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; ++ int curr_y = ((int *)f->progress->data)[0]; ++-- ++2.7.4 ++ ++ ++From 6401d88c310cd3bfec7be94bf3ceb6d0c5736c7e Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Tue, 2 Jun 2015 15:22:52 +0100 ++Subject: [PATCH 51/68] Added support for skip_frame ++ ++--- ++ libavcodec/hevc.c | 15 ++++++++++----- ++ 1 file changed, 10 insertions(+), 5 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 0ac4f4c..639e4df 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -3397,11 +3397,6 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ } ++ ++ #endif ++- s->used_for_ref = !(s->nal_unit_type == NAL_TRAIL_N || ++- s->nal_unit_type == NAL_TSA_N || ++- s->nal_unit_type == NAL_STSA_N || ++- s->nal_unit_type == NAL_RADL_N || ++- s->nal_unit_type == NAL_RASL_N); ++ ++ if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) { ++ av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n"); ++@@ -3925,6 +3920,16 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal) ++ if (ret < 0) ++ return ret; ++ +++ s->used_for_ref = !(s->nal_unit_type == NAL_TRAIL_N || +++ s->nal_unit_type == NAL_TSA_N || +++ s->nal_unit_type == NAL_STSA_N || +++ s->nal_unit_type == NAL_RADL_N || +++ s->nal_unit_type == NAL_RASL_N); +++ +++ if (!s->used_for_ref && s->avctx->skip_frame >= AVDISCARD_NONREF) { +++ s->is_decoded = 0; +++ break; +++ } ++ if (s->max_ra == INT_MAX) { ++ if (s->nal_unit_type == NAL_CRA_NUT || IS_BLA(s)) { ++ s->max_ra = s->poc; ++-- ++2.7.4 ++ ++ ++From d2951e2ca73e234d1b775621e3993948a4a2c8ea Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 3 Jun 2015 09:15:38 +0100 ++Subject: [PATCH 52/68] Fixed cache flushing of luma when using old method ++ ++--- ++ libavcodec/hevc_filter.c | 2 +- ++ 1 file changed, 1 insertion(+), 1 deletion(-) ++ ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 14a0952..b286bbf 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -919,7 +919,7 @@ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ flush_buffer(s->frame->buf[1]); ++ flush_buffer(s->frame->buf[2]); ++ #ifdef RPI_LUMA_QPU ++- flush_buffer(s->frame->buf[1]); +++ flush_buffer(s->frame->buf[0]); ++ #endif ++ ++ #endif ++-- ++2.7.4 ++ ++ ++From 7ae612e69c1cabcc7d0b37b65efa8c5bdcfa7bf5 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 3 Jun 2015 11:37:27 +0100 ++Subject: [PATCH 53/68] Option to parallelise coefficient decode and inter ++ prediction and deblock for each frame ++ ++--- ++ libavcodec/hevc.c | 701 +++++++++++++++++++++++++++-------------- ++ libavcodec/hevc.h | 74 +++-- ++ libavcodec/hevc_cabac.c | 12 +- ++ libavcodec/hevcpred_template.c | 5 +- ++ 4 files changed, 522 insertions(+), 270 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 639e4df..12aacc5 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -43,8 +43,6 @@ ++ ++ #ifdef RPI ++ #include "rpi_qpu.h" ++- // For some unknown reason, the code seems to crash if I do a late malloc ++- //#define EARLY_MALLOC ++ // Move Inter prediction into separate pass ++ #define RPI_INTER ++ ++@@ -58,6 +56,21 @@ ++ ++ // Define RPI_SIMULATE_QPUS for debugging to run QPU code on the ARMs ++ //#define RPI_SIMULATE_QPUS +++ #ifdef RPI_WORKER +++ #include "pthread.h" +++ #endif +++ +++ static void rpi_execute_dblk_cmds(HEVCContext *s); +++ static void rpi_execute_transform(HEVCContext *s); +++ static void rpi_execute_inter_qpu(HEVCContext *s); +++ static void rpi_execute_pred_cmds(HEVCContext *s); +++ static void rpi_execute_inter_cmds(HEVCContext *s); +++ static void rpi_inter_clear(HEVCContext *s); +++ +++ // Define INTER_PASS0 to do inter prediction in first pass +++ //#define INTER_PASS0 +++ // Define LAUNCH_PASS0 to launch QPU/VPU from pass0 +++ //#define LAUNCH_PASS0 ++ ++ #endif ++ ++@@ -105,6 +118,143 @@ static uint32_t get_vc_address(AVBufferRef *bref) { ++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++ return p->vc; ++ } +++#endif +++ +++ +++#ifdef RPI_WORKER +++ +++//#define LOG_ENTER printf("Enter %s: p0=%d p1=%d (%d jobs) %p\n", __func__,s->pass0_job,s->pass1_job,s->worker_tail-s->worker_head,s); +++//#define LOG_EXIT printf("Exit %s: p0=%d p1=%d (%d jobs) %p\n", __func__,s->pass0_job,s->pass1_job,s->worker_tail-s->worker_head,s); +++ +++#define LOG_ENTER +++#define LOG_EXIT +++ +++// Call this when we have completed pass0 and wish to trigger pass1 for the current job +++static void worker_submit_job(HEVCContext *s) +++{ +++ LOG_ENTER +++ //pthread_mutex_lock(&s->worker_mutex); +++ s->worker_tail++; // This is the only place that can change tail so we do not need the mutex +++ s->pass0_job = (s->pass0_job + 1) % RPI_MAX_JOBS; // Move onto the next slot +++ pthread_cond_broadcast(&s->worker_cond_tail); // Let people know that the tail has moved +++ //pthread_mutex_unlock(&s->worker_mutex); +++ LOG_EXIT +++} +++ +++// Call this to say we have completed pass1 +++static void worker_complete_middle_job(HEVCContext *s) +++{ +++ LOG_ENTER +++ //pthread_mutex_lock(&s->worker_mutex); +++ s->worker_middle++; // This is the only place that can change head so we do not need the mutex +++ s->pass1_job = (s->pass1_job + 1) % RPI_MAX_JOBS; // Move onto the next slot +++ pthread_cond_broadcast(&s->worker_cond_middle); // Let people know that the tail has moved +++ //pthread_mutex_unlock(&s->worker_mutex); +++ LOG_EXIT +++} +++ +++// Call this to say we have completed pass2 +++static void worker_complete_job(HEVCContext *s) +++{ +++ LOG_ENTER +++ //pthread_mutex_lock(&s->worker_mutex); +++ s->worker_head++; // This is the only place that can change head so we do not need the mutex +++ s->pass2_job = (s->pass2_job + 1) % RPI_MAX_JOBS; // Move onto the next slot +++ pthread_cond_broadcast(&s->worker_cond_head); // Let people know that the tail has moved +++ //pthread_mutex_unlock(&s->worker_mutex); +++ LOG_EXIT +++} +++ +++// Call this to wait for all jobs to have completed at the end of a frame +++static void worker_wait(HEVCContext *s) +++{ +++ LOG_ENTER +++ pthread_mutex_lock(&s->worker_mutex); +++ while( s->worker_head !=s->worker_tail) +++ { +++ pthread_cond_wait(&s->worker_cond_head, &s->worker_mutex); +++ } +++ pthread_mutex_unlock(&s->worker_mutex); +++ LOG_EXIT +++} +++ +++// Call worker_pass0_ready to wait until the s->pass0_job slot becomes +++// available to receive the next job. +++static void worker_pass0_ready(HEVCContext *s) +++{ +++ LOG_ENTER +++ pthread_mutex_lock(&s->worker_mutex); +++ // tail is number of submitted jobs +++ // head is number of completed jobs +++ // tail-head is number of outstanding jobs in the queue +++ // we need to ensure there is at least 1 space left for us to use +++ while( s->worker_tail - s->worker_head >= RPI_MAX_JOBS) +++ { +++ // Wait until another job is completed +++ pthread_cond_wait(&s->worker_cond_head, &s->worker_mutex); +++ } +++ pthread_mutex_unlock(&s->worker_mutex); +++ LOG_EXIT +++} +++ +++static void *worker_start(void *arg) +++{ +++ HEVCContext *s = (HEVCContext *)arg; +++ while(1) { +++ pthread_mutex_lock(&s->worker_mutex); +++ +++ while( !s->kill_worker && s->worker_tail - s->worker_middle <= 0) +++ { +++ pthread_cond_wait(&s->worker_cond_tail, &s->worker_mutex); +++ } +++ pthread_mutex_unlock(&s->worker_mutex); +++ +++ if (s->kill_worker) { +++ break; +++ } +++ LOG_ENTER +++ // printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); +++#ifndef LAUNCH_PASS0 +++ rpi_execute_inter_qpu(s); +++#endif +++#ifndef INTER_PASS0 +++ // Perform inter prediction +++ rpi_execute_inter_cmds(s); +++#endif +++ // Wait for transform completion +++ vpu_wait(s->vpu_id); +++ +++ worker_complete_middle_job(s); +++ LOG_EXIT +++ } +++ return NULL; +++} +++ +++static void *worker_deblock_start(void *arg) +++{ +++ HEVCContext *s = (HEVCContext *)arg; +++ while(1) { +++ pthread_mutex_lock(&s->worker_mutex); +++ while( !s->kill_worker && s->worker_middle - s->worker_head <= 0) +++ { +++ pthread_cond_wait(&s->worker_cond_middle, &s->worker_mutex); +++ } +++ pthread_mutex_unlock(&s->worker_mutex); +++ +++ if (s->kill_worker) { +++ break; +++ } +++ LOG_ENTER +++ // Perform intra prediction and residual reconstruction +++ rpi_execute_pred_cmds(s); +++ // Perform deblocking for CTBs in this row +++ rpi_execute_dblk_cmds(s); +++ +++ worker_complete_job(s); +++ LOG_EXIT +++ } +++ return NULL; +++} ++ ++ #endif ++ ++@@ -121,19 +271,18 @@ static uint32_t get_vc_address(AVBufferRef *bref) { ++ static void pic_arrays_free(HEVCContext *s) ++ { ++ #ifdef RPI ++- ++-#ifdef EARLY_MALLOC ++-#else ++- if (s->coeffs_buf_arm[0]) { ++- gpu_free(&s->coeffs_buf_default); ++- s->coeffs_buf_arm[0] = 0; ++- } ++- if (s->coeffs_buf_arm[2]) { ++- gpu_free(&s->coeffs_buf_accelerated); ++- s->coeffs_buf_arm[2] = 0; +++ int job; +++ for(job=0;job<RPI_MAX_JOBS;job++) { +++ if (s->coeffs_buf_arm[job][0]) { +++ gpu_free(&s->coeffs_buf_default[job]); +++ s->coeffs_buf_arm[job][0] = 0; +++ } +++ if (s->coeffs_buf_arm[job][2]) { +++ gpu_free(&s->coeffs_buf_accelerated[job]); +++ s->coeffs_buf_arm[job][2] = 0; +++ } ++ } ++ #endif ++-#endif ++ av_freep(&s->sao); ++ av_freep(&s->deblock); ++ ++@@ -171,24 +320,26 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ int min_pu_size = sps->min_pu_width * sps->min_pu_height; ++ ++ #ifdef RPI ++-#ifdef EARLY_MALLOC ++-#else ++ av_assert0(sps); ++ int coeffs_in_ctb = (1 << sps->log2_ctb_size) * (1 << sps->log2_ctb_size); ++ int coefs_per_row = sps->ctb_width * coeffs_in_ctb * 3; // Allow space for chroma ++- gpu_malloc_cached(sizeof(int16_t) * coefs_per_row, &s->coeffs_buf_default); ++- s->coeffs_buf_arm[0] = (int16_t*) s->coeffs_buf_default.arm; ++- if (!s->coeffs_buf_arm[0]) ++- goto fail; ++- gpu_malloc_cached(sizeof(int16_t) * coefs_per_row * 2, &s->coeffs_buf_accelerated); ++- s->coeffs_buf_arm[2] = (int16_t*) s->coeffs_buf_accelerated.arm; ++- s->coeffs_buf_vc[2] = s->coeffs_buf_accelerated.vc; ++- if (!s->coeffs_buf_arm[2]) ++- goto fail; ++- s->coeffs_buf_arm[3] = coefs_per_row + s->coeffs_buf_arm[2]; ++- s->coeffs_buf_vc[3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[2]; ++- printf("Done\n"); ++-#endif +++ int job; +++ for(job=0;job<RPI_MAX_JOBS;job++) { +++ printf("Allocated %d\n",coefs_per_row); +++ for(job=0;job<RPI_MAX_JOBS;job++) { +++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row, &s->coeffs_buf_default[job]); +++ s->coeffs_buf_arm[job][0] = (int16_t*) s->coeffs_buf_default[job].arm; +++ if (!s->coeffs_buf_arm[job][0]) +++ goto fail; +++ gpu_malloc_cached(sizeof(int16_t) * coefs_per_row * 2, &s->coeffs_buf_accelerated[job]); +++ s->coeffs_buf_arm[job][2] = (int16_t*) s->coeffs_buf_accelerated[job].arm; +++ s->coeffs_buf_vc[job][2] = s->coeffs_buf_accelerated[job].vc; +++ if (!s->coeffs_buf_arm[job][2]) +++ goto fail; +++ s->coeffs_buf_arm[job][3] = coefs_per_row + s->coeffs_buf_arm[job][2]; +++ s->coeffs_buf_vc[job][3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[job][2]; +++ } +++ } ++ #endif ++ ++ s->bs_width = (width >> 2) + 1; ++@@ -1036,7 +1187,7 @@ static void rpi_intra_pred(HEVCContext *s, int log2_trafo_size, int x0, int y0, ++ { ++ if (s->enable_rpi) { ++ HEVCLocalContext *lc = s->HEVClc; ++- HEVCPredCmd *cmd = s->univ_pred_cmds + s->num_pred_cmds++; +++ HEVCPredCmd *cmd = s->univ_pred_cmds[s->pass0_job] + s->num_pred_cmds[s->pass0_job]++; ++ cmd->type = RPI_PRED_INTRA; ++ cmd->size = log2_trafo_size; ++ cmd->c_idx = c_idx; ++@@ -1496,7 +1647,7 @@ static void rpi_luma_mc_uni(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ AVFrame *ref, const Mv *mv, int x_off, int y_off, ++ int block_w, int block_h, int luma_weight, int luma_offset) ++ { ++- HEVCMvCmd *cmd = s->unif_mv_cmds + s->num_mv_cmds++; +++ HEVCMvCmd *cmd = s->unif_mv_cmds[s->pass0_job] + s->num_mv_cmds[s->pass0_job]++; ++ cmd->cmd = RPI_CMD_LUMA_UNI; ++ cmd->dst = dst; ++ cmd->dststride = dststride; ++@@ -1515,7 +1666,7 @@ static void rpi_luma_mc_bi(HEVCContext *s, uint8_t *dst, ptrdiff_t dststride, ++ AVFrame *ref0, const Mv *mv0, int x_off, int y_off, ++ int block_w, int block_h, AVFrame *ref1, const Mv *mv1, struct MvField *current_mv) ++ { ++- HEVCMvCmd *cmd = s->unif_mv_cmds + s->num_mv_cmds++; +++ HEVCMvCmd *cmd = s->unif_mv_cmds[s->pass0_job] + s->num_mv_cmds[s->pass0_job]++; ++ cmd->cmd = RPI_CMD_LUMA_BI; ++ cmd->dst = dst; ++ cmd->dststride = dststride; ++@@ -1537,7 +1688,7 @@ static void rpi_chroma_mc_uni(HEVCContext *s, uint8_t *dst0, ++ ptrdiff_t dststride, uint8_t *src0, ptrdiff_t srcstride, int reflist, ++ int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int chroma_weight, int chroma_offset) ++ { ++- HEVCMvCmd *cmd = s->unif_mv_cmds + s->num_mv_cmds++; +++ HEVCMvCmd *cmd = s->unif_mv_cmds[s->pass0_job] + s->num_mv_cmds[s->pass0_job]++; ++ cmd->cmd = RPI_CMD_CHROMA_UNI; ++ cmd->dst = dst0; ++ cmd->dststride = dststride; ++@@ -1555,7 +1706,7 @@ static void rpi_chroma_mc_uni(HEVCContext *s, uint8_t *dst0, ++ static void rpi_chroma_mc_bi(HEVCContext *s, uint8_t *dst0, ptrdiff_t dststride, AVFrame *ref0, AVFrame *ref1, ++ int x_off, int y_off, int block_w, int block_h, struct MvField *current_mv, int cidx) ++ { ++- HEVCMvCmd *cmd = s->unif_mv_cmds + s->num_mv_cmds++; +++ HEVCMvCmd *cmd = s->unif_mv_cmds[s->pass0_job] + s->num_mv_cmds[s->pass0_job]++; ++ cmd->cmd = RPI_CMD_CHROMA_BI+cidx; ++ cmd->dst = dst0; ++ cmd->dststride = dststride; ++@@ -2037,7 +2188,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int chan = x0>>6; // 64 wide blocks per QPU ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++- uint32_t *y = s->y_mvs[chan % 12]; +++ uint32_t *y = s->y_mvs[s->pass0_job][chan % 12]; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=16) { ++ int bw = nPbW-start_x; ++@@ -2057,7 +2208,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter; ++ } ++ } ++- s->y_mvs[chan % 12] = y; +++ s->y_mvs[s->pass0_job][chan % 12] = y; ++ } else ++ #endif ++ { ++@@ -2086,7 +2237,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++ ++- uint32_t *u = s->u_mvs[chan & 7]; +++ uint32_t *u = s->u_mvs[s->pass0_job][chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ int bw = nPbW_c-start_x; ++@@ -2110,7 +2261,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++- s->u_mvs[chan & 7] = u; +++ s->u_mvs[s->pass0_job][chan & 7] = u; ++ return; ++ } ++ #endif ++@@ -2140,7 +2291,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int chan = x0>>6; // 64 wide blocks per QPU ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++- uint32_t *y = s->y_mvs[chan % 12]; +++ uint32_t *y = s->y_mvs[s->pass0_job][chan % 12]; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=16) { ++ int bw = nPbW-start_x; ++@@ -2160,7 +2311,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter; ++ } ++ } ++- s->y_mvs[chan % 12] = y; +++ s->y_mvs[s->pass0_job][chan % 12] = y; ++ } else ++ #endif ++ ++@@ -2190,7 +2341,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++ ++- uint32_t *u = s->u_mvs[chan & 7]; +++ uint32_t *u = s->u_mvs[s->pass0_job][chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ int bw = nPbW_c-start_x; ++@@ -2215,7 +2366,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++- s->u_mvs[chan & 7] = u; +++ s->u_mvs[s->pass0_job][chan & 7] = u; ++ return; ++ } ++ #endif ++@@ -2249,7 +2400,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int x2 = x0 + (mv2->x >> 2); ++ int y2 = y0 + (mv2->y >> 2); ++ int chan = x0>>6; // 64 wide blocks per QPU ++- uint32_t *y = s->y_mvs[chan % 12]; +++ uint32_t *y = s->y_mvs[s->pass0_job][chan % 12]; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=8) { // B blocks work 8 at a time ++ int bw = nPbW-start_x; ++@@ -2265,7 +2416,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter_b; ++ } ++ } ++- s->y_mvs[chan % 12] = y; +++ s->y_mvs[s->pass0_job][chan % 12] = y; ++ } else ++ #endif ++ { ++@@ -2298,7 +2449,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ ++ int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width ++ ++- uint32_t *u = s->u_mvs[chan & 7]; +++ uint32_t *u = s->u_mvs[s->pass0_job][chan & 7]; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ int bw = nPbW_c-start_x; ++@@ -2327,7 +2478,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++- s->u_mvs[chan & 7] = u; +++ s->u_mvs[s->pass0_job][chan & 7] = u; ++ return; ++ } ++ #endif ++@@ -2832,40 +2983,54 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb, ++ static void rpi_execute_dblk_cmds(HEVCContext *s) ++ { ++ int n; +++ int job = s->pass2_job; ++ int ctb_size = 1 << s->ps.sps->log2_ctb_size; ++- int (*p)[2] = s->dblk_cmds; ++- for(n = s->num_dblk_cmds; n>0 ;n--,p++) { +++ int (*p)[2] = s->dblk_cmds[job]; +++ for(n = s->num_dblk_cmds[job]; n>0 ;n--,p++) { ++ ff_hevc_hls_filters(s, (*p)[0], (*p)[1], ctb_size); ++ } ++- s->num_dblk_cmds = 0; +++ s->num_dblk_cmds[job] = 0; ++ } ++ ++ static void rpi_execute_transform(HEVCContext *s) ++ { ++ int i=2; +++#ifdef LAUNCH_PASS0 +++ int job = s->pass0_job; +++#else +++ int job = s->pass1_job; +++#endif ++ //int j; ++ //int16_t *coeffs = s->coeffs_buf_arm[i]; ++ //for(j=s->num_coeffs[i]; j > 0; j-= 16*16, coeffs+=16*16) { ++ // s->hevcdsp.idct[4-2](coeffs, 16); ++ //} ++ ++- gpu_cache_flush(&s->coeffs_buf_accelerated); ++- s->vpu_id = vpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0, &s->coeffs_buf_accelerated); +++ gpu_cache_flush(&s->coeffs_buf_accelerated[job]); +++ s->vpu_id = vpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[job][2], +++ s->num_coeffs[job][2] >> 8, s->coeffs_buf_vc[job][3], +++ s->num_coeffs[job][3] >> 10, 0, &s->coeffs_buf_accelerated[job]); ++ //vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0); ++ //gpu_cache_flush(&s->coeffs_buf_accelerated); ++ //vpu_wait(s->vpu_id); ++ ++ for(i=0;i<4;i++) ++- s->num_coeffs[i] = 0; +++ s->num_coeffs[job][i] = 0; ++ } ++ ++ static void rpi_execute_pred_cmds(HEVCContext *s) ++ { ++ int i; ++- HEVCPredCmd *cmd = s->univ_pred_cmds; +++ int job = s->pass2_job; +++ HEVCPredCmd *cmd = s->univ_pred_cmds[job]; +++#ifdef RPI_WORKER +++ HEVCLocalContextIntra *lc = &s->HEVClcIntra; +++#else ++ HEVCLocalContext *lc = s->HEVClc; +++#endif ++ ++- for(i = s->num_pred_cmds; i > 0; i--, cmd++) { +++ for(i = s->num_pred_cmds[job]; i > 0; i--, cmd++) { +++ //printf("i=%d cmd=%p job1=%d job0=%d\n",i,cmd,s->pass1_job,s->pass0_job); ++ if (cmd->type == RPI_PRED_INTRA) { ++ lc->tu.intra_pred_mode_c = lc->tu.intra_pred_mode = cmd->mode; ++ lc->na.cand_bottom_left = (cmd->na >> 4) & 1; ++@@ -2884,21 +3049,26 @@ static void rpi_execute_pred_cmds(HEVCContext *s) ++ #endif ++ } ++ } ++- s->num_pred_cmds = 0; +++ s->num_pred_cmds[job] = 0; ++ } ++ ++ static void rpi_execute_inter_cmds(HEVCContext *s) ++ { ++- HEVCMvCmd *cmd = s->unif_mv_cmds; +++#ifdef INTER_PASS0 +++ int job = s->pass0_job; +++#else +++ int job = s->pass1_job; +++#endif +++ HEVCMvCmd *cmd = s->unif_mv_cmds[job]; ++ int n,cidx; ++ AVFrame myref; ++ AVFrame myref1; ++ struct MvField mymv; ++- if (s->num_mv_cmds > RPI_MAX_MV_CMDS) { +++ if (s->num_mv_cmds[job] > RPI_MAX_MV_CMDS) { ++ printf("Overflow inter_cmds\n"); ++ exit(-1); ++ } ++- for(n = s->num_mv_cmds; n>0 ; n--, cmd++) { +++ for(n = s->num_mv_cmds[job]; n>0 ; n--, cmd++) { ++ switch(cmd->cmd) { ++ case RPI_CMD_LUMA_UNI: ++ myref.data[0] = cmd->src; ++@@ -2938,7 +3108,28 @@ static void rpi_execute_inter_cmds(HEVCContext *s) ++ break; ++ } ++ } ++- s->num_mv_cmds = 0; +++ s->num_mv_cmds[job] = 0; +++} +++ +++static void rpi_do_all_passes(HEVCContext *s) +++{ +++#ifdef RPI_INTER_QPU +++ // Kick off inter prediction on QPUs +++ rpi_execute_inter_qpu(s); +++#else +++ rpi_execute_transform(s); +++#endif +++ // Perform luma inter prediction +++ rpi_execute_inter_cmds(s); +++ // Wait for transform completion +++ vpu_wait(s->vpu_id); +++ // Perform intra prediction and residual reconstruction +++ rpi_execute_pred_cmds(s); +++ // Perform deblocking for CTBs in this row +++ rpi_execute_dblk_cmds(s); +++#ifdef RPI_INTER_QPU +++ rpi_inter_clear(s); +++#endif ++ } ++ ++ #endif ++@@ -2946,6 +3137,7 @@ static void rpi_execute_inter_cmds(HEVCContext *s) ++ #ifdef RPI_INTER_QPU ++ static void rpi_inter_clear(HEVCContext *s) ++ { +++ int job = s->pass0_job; ++ int i; ++ int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1]; ++ int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1]; ++@@ -2953,51 +3145,50 @@ static void rpi_inter_clear(HEVCContext *s) ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++ ++ for(i=0;i<8;i++) { ++- s->u_mvs[i] = s->mvs_base[i]; ++- *s->u_mvs[i]++ = 0; ++- *s->u_mvs[i]++ = 0; ++- *s->u_mvs[i]++ = 0; ++- *s->u_mvs[i]++ = 0; ++- *s->u_mvs[i]++ = 0; ++- *s->u_mvs[i]++ = pic_width; ++- *s->u_mvs[i]++ = pic_height; ++- *s->u_mvs[i]++ = s->frame->linesize[1]; ++- *s->u_mvs[i]++ = s->frame->linesize[2]; +++ s->u_mvs[job][i] = s->mvs_base[job][i]; +++ *s->u_mvs[job][i]++ = 0; +++ *s->u_mvs[job][i]++ = 0; +++ *s->u_mvs[job][i]++ = 0; +++ *s->u_mvs[job][i]++ = 0; +++ *s->u_mvs[job][i]++ = 0; +++ *s->u_mvs[job][i]++ = pic_width; +++ *s->u_mvs[job][i]++ = pic_height; +++ *s->u_mvs[job][i]++ = s->frame->linesize[1]; +++ *s->u_mvs[job][i]++ = s->frame->linesize[2]; ++ if (weight_flag) { ++- *s->u_mvs[i]++ = 1 << (s->sh.chroma_log2_weight_denom + 6 - 1); ++- *s->u_mvs[i]++ = s->sh.chroma_log2_weight_denom + 6; +++ *s->u_mvs[job][i]++ = 1 << (s->sh.chroma_log2_weight_denom + 6 - 1); +++ *s->u_mvs[job][i]++ = s->sh.chroma_log2_weight_denom + 6; ++ } else { ++- *s->u_mvs[i]++ = 1 << 5; ++- *s->u_mvs[i]++ = 6; +++ *s->u_mvs[job][i]++ = 1 << 5; +++ *s->u_mvs[job][i]++ = 6; ++ } ++- *s->u_mvs[i]++ = i; // Select section of VPM (avoid collisions with 3d unit) +++ *s->u_mvs[job][i]++ = i; // Select section of VPM (avoid collisions with 3d unit) ++ } ++ ++ #ifdef RPI_LUMA_QPU ++ for(i=0;i<12;i++) { ++- s->y_mvs[i] = s->y_mvs_base[i]; ++- *s->y_mvs[i]++ = 0; // y_x ++- *s->y_mvs[i]++ = 0; // ref_y_base ++- *s->y_mvs[i]++ = 0; // y2_x2 ++- *s->y_mvs[i]++ = 0; // ref_y2_base ++- *s->y_mvs[i]++ = (s->ps.sps->width << 16) + s->ps.sps->height; ++- *s->y_mvs[i]++ = s->frame->linesize[0]; // pitch ++- *s->y_mvs[i]++ = s->frame->linesize[0]; // dst_pitch +++ s->y_mvs[job][i] = s->y_mvs_base[job][i]; +++ *s->y_mvs[job][i]++ = 0; // y_x +++ *s->y_mvs[job][i]++ = 0; // ref_y_base +++ *s->y_mvs[job][i]++ = 0; // y2_x2 +++ *s->y_mvs[job][i]++ = 0; // ref_y2_base +++ *s->y_mvs[job][i]++ = (s->ps.sps->width << 16) + s->ps.sps->height; +++ *s->y_mvs[job][i]++ = s->frame->linesize[0]; // pitch +++ *s->y_mvs[job][i]++ = s->frame->linesize[0]; // dst_pitch ++ if (weight_flag) { ++ int offset = 1 << (s->sh.luma_log2_weight_denom + 6 - 1); ++ int shift = s->sh.luma_log2_weight_denom + 6; ++- *s->y_mvs[i]++ = (offset << 16) + shift; +++ *s->y_mvs[job][i]++ = (offset << 16) + shift; ++ } else { ++ int offset = 1 << 5; ++ int shift = 6; ++- *s->y_mvs[i]++ = (offset << 16) + shift; +++ *s->y_mvs[job][i]++ = (offset << 16) + shift; ++ } ++- *s->y_mvs[i]++ = 0; // Next kernel +++ *s->y_mvs[job][i]++ = 0; // Next kernel ++ } ++ #endif ++ } ++ ++- ++ #ifdef RPI_SIMULATE_QPUS ++ ++ static int32_t clipx(int x,int FRAME_WIDTH) ++@@ -3271,10 +3462,15 @@ static void rpi_simulate_inter_qpu(HEVCContext *s) ++ static void rpi_execute_inter_qpu(HEVCContext *s) ++ { ++ int k; +++#ifdef LAUNCH_PASS0 +++ int job = s->pass0_job; +++#else +++ int job = s->pass1_job; +++#endif ++ int i; ++- uint32_t *unif_vc = (uint32_t *)s->unif_mvs_ptr.vc; +++ uint32_t *unif_vc = (uint32_t *)s->unif_mvs_ptr[job].vc; ++ #ifdef RPI_LUMA_QPU ++- uint32_t *y_unif_vc = (uint32_t *)s->y_unif_mvs_ptr.vc; +++ uint32_t *y_unif_vc = (uint32_t *)s->y_unif_mvs_ptr[job].vc; ++ #endif ++ if (s->sh.slice_type == I_SLICE) { ++ #ifdef RPI_MULTI_MAILBOX ++@@ -3283,22 +3479,22 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ #endif ++ } ++ for(k=0;k<8;k++) { ++- s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command ++- s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined ++- s->u_mvs[k][-RPI_CHROMA_COMMAND_WORDS+4] = qpu_get_fn(QPU_MC_SETUP_UV); // Also need a dummy for V ++- av_assert0(s->u_mvs[k] - s->mvs_base[k] < UV_COMMANDS_PER_QPU); +++ s->u_mvs[job][k][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_EXIT); // Add exit command +++ s->u_mvs[job][k][-RPI_CHROMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined +++ s->u_mvs[job][k][-RPI_CHROMA_COMMAND_WORDS+4] = qpu_get_fn(QPU_MC_SETUP_UV); // Also need a dummy for V +++ av_assert0(s->u_mvs[job][k] - s->mvs_base[job][k] < UV_COMMANDS_PER_QPU); ++ } ++ ++- s->u_mvs[8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore +++ s->u_mvs[job][8-1][-RPI_CHROMA_COMMAND_WORDS] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT8); // This QPU will signal interrupt when all others are done and have acquired a semaphore ++ ++ #ifdef RPI_LUMA_QPU ++ for(k=0;k<12;k++) { ++- s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+1] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined ++- s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // Also need a dummy for second request ++- s->y_mvs[k][-RPI_LUMA_COMMAND_WORDS+8] = qpu_get_fn(QPU_MC_EXIT); // Add exit command ++- av_assert0(s->y_mvs[k] - s->y_mvs_base[k] < Y_COMMANDS_PER_QPU); +++ s->y_mvs[job][k][-RPI_LUMA_COMMAND_WORDS+1] = qpu_get_fn(QPU_MC_SETUP_UV); // A dummy texture location (maps to our code) - this is needed as the texture requests are pipelined +++ s->y_mvs[job][k][-RPI_LUMA_COMMAND_WORDS+3] = qpu_get_fn(QPU_MC_SETUP_UV); // Also need a dummy for second request +++ s->y_mvs[job][k][-RPI_LUMA_COMMAND_WORDS+8] = qpu_get_fn(QPU_MC_EXIT); // Add exit command +++ av_assert0(s->y_mvs[job][k] - s->y_mvs_base[job][k] < Y_COMMANDS_PER_QPU); ++ } ++- s->y_mvs[12-1][-RPI_LUMA_COMMAND_WORDS+8] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT12); // This QPU will signal interrupt when all others are done and have acquired a semaphore +++ s->y_mvs[job][12-1][-RPI_LUMA_COMMAND_WORDS+8] = qpu_get_fn(QPU_MC_INTERRUPT_EXIT12); // This QPU will signal interrupt when all others are done and have acquired a semaphore ++ #endif ++ ++ #ifdef RPI_SIMULATE_QPUS ++@@ -3308,34 +3504,34 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ ++ #ifdef RPI_MULTI_MAILBOX ++ #ifdef RPI_CACHE_UNIF_MVS ++- gpu_cache_flush3(&s->coeffs_buf_accelerated,&s->y_unif_mvs_ptr, &s->unif_mvs_ptr); +++ gpu_cache_flush3(&s->coeffs_buf_accelerated[job],&s->y_unif_mvs_ptr[job], &s->unif_mvs_ptr[job]); ++ #else ++- gpu_cache_flush(&s->coeffs_buf_accelerated); +++ gpu_cache_flush(&s->coeffs_buf_accelerated[job]); ++ #endif ++- s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0, +++ s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[job][2], s->num_coeffs[job][2] >> 8, s->coeffs_buf_vc[job][3], s->num_coeffs[job][3] >> 10, 0, ++ qpu_get_fn(QPU_MC_SETUP_UV), ++- (uint32_t)(unif_vc+(s->mvs_base[0 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[1 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[2 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[3 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[4 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[5 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[6 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][0 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][1 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][2 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][3 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][4 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][5 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][6 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][7 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), ++ #ifdef RPI_LUMA_QPU ++ qpu_get_fn(QPU_MC_SETUP), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[0 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[1 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[2 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[3 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[4 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[5 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[6 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[7 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[8 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[9 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[10 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)), ++- (uint32_t)(y_unif_vc+(s->y_mvs_base[11 ] - (uint32_t*)s->y_unif_mvs_ptr.arm)) +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][0 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][1 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][2 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][3 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][4 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][5 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][6 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][7 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][8 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][9 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][10 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)), +++ (uint32_t)(y_unif_vc+(s->y_mvs_base[job][11 ] - (uint32_t*)s->y_unif_mvs_ptr[job].arm)) ++ #else ++ 0, ++ 0,0,0,0, ++@@ -3344,17 +3540,17 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ #endif ++ ); ++ for(i=0;i<4;i++) ++- s->num_coeffs[i] = 0; +++ s->num_coeffs[job][i] = 0; ++ #else ++ qpu_run_shader8(qpu_get_fn(QPU_MC_SETUP_UV), ++- (uint32_t)(unif_vc+(s->mvs_base[0 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[1 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[2 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[3 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[4 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[5 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[6 ] - (uint32_t*)s->unif_mvs_ptr.arm)), ++- (uint32_t)(unif_vc+(s->mvs_base[7 ] - (uint32_t*)s->unif_mvs_ptr.arm)) +++ (uint32_t)(unif_vc+(s->mvs_base[job][0 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][1 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][2 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][3 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][4 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][5 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][6 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), +++ (uint32_t)(unif_vc+(s->mvs_base[job][7 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)) ++ ); ++ #endif ++ ++@@ -3411,6 +3607,11 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ } ++ } ++ +++#ifdef RPI_WORKER +++ s->pass0_job = 0; +++ s->pass1_job = 0; +++ s->pass2_job = 0; +++#endif ++ #ifdef RPI_INTER_QPU ++ rpi_inter_clear(s); ++ #endif ++@@ -3431,46 +3632,42 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag; ++ ++ more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); +++ ++ #ifdef RPI ++ if (s->enable_rpi) { ++- s->dblk_cmds[s->num_dblk_cmds][0] = x_ctb; ++- s->dblk_cmds[s->num_dblk_cmds++][1] = y_ctb; +++ s->dblk_cmds[s->pass0_job][s->num_dblk_cmds[s->pass0_job]][0] = x_ctb; +++ s->dblk_cmds[s->pass0_job][s->num_dblk_cmds[s->pass0_job]++][1] = y_ctb; ++ if ( (((y_ctb + ctb_size)&63) == 0) && x_ctb + ctb_size >= s->ps.sps->width) { ++- // Transform all blocks ++- // printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); ++-#ifdef RPI_MULTI_MAILBOX ++- // Kick off inter prediction on QPUs ++- rpi_execute_inter_qpu(s); ++- // Perform luma inter prediction ++- rpi_execute_inter_cmds(s); ++-#else ++- rpi_execute_transform(s); ++- // Perform inter prediction ++- rpi_execute_inter_cmds(s); ++-#ifdef RPI_INTER_QPU ++- // Kick off inter prediction on QPUs ++- rpi_execute_inter_qpu(s); ++-#endif ++-#endif ++- ++- // Wait for transform completion ++- vpu_wait(s->vpu_id); ++- ++- // Copy back reconstructed data ++- //memcpy(s->frame->data[0],s->dummy.arm,2048*64); ++- //memcpy(s->frame->data[1],s->dummy.arm,1024*32); ++- //memcpy(s->frame->data[2],s->dummy.arm,1024*32); +++#ifdef RPI_WORKER +++ if (s->used_for_ref) { +++ // Split work load onto separate threads so we make as rapid progress as possible with this frame +++ #ifdef INTER_PASS0 +++ rpi_execute_inter_cmds(s); +++ #endif +++ #ifdef LAUNCH_PASS0 +++ rpi_execute_inter_qpu(s); +++ #endif +++ // Pass on this job to worker thread +++ worker_submit_job(s); +++ // Make sure we have space to prepare the next job +++ worker_pass0_ready(s); ++ ++- // Perform intra prediction and residual reconstruction ++- rpi_execute_pred_cmds(s); ++- // Perform deblocking for CTBs in this row ++- rpi_execute_dblk_cmds(s); +++ // Prepare the next batch of commands ++ #ifdef RPI_INTER_QPU ++- rpi_inter_clear(s); +++ rpi_inter_clear(s); +++#endif +++ } else { +++ // Non-ref frame so do it all on this thread +++ rpi_do_all_passes(s); +++ } +++#else +++ rpi_do_all_passes(s); ++ #endif ++ } ++ } ++ #endif +++ +++ ++ if (more_data < 0) { ++ s->tab_slice_address[ctb_addr_rs] = -1; ++ return more_data; ++@@ -3487,18 +3684,21 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ } ++ ++ #ifdef RPI ++- if (s->enable_rpi && s->num_dblk_cmds) { ++-#ifdef RPI_INTER_QPU ++- rpi_execute_inter_qpu(s); ++-#endif ++-#ifndef RPI_MULTI_MAILBOX ++- rpi_execute_transform(s); +++ +++#ifdef RPI_WORKER +++ // Wait for the worker to finish all its jobs +++ if (s->enable_rpi) { +++ worker_wait(s); +++ av_assert0(s->pass0_job==s->pass1_job); +++ av_assert0(s->pass1_job==s->pass2_job); +++ } ++ #endif ++- rpi_execute_inter_cmds(s); ++- vpu_wait(s->vpu_id); ++- rpi_execute_pred_cmds(s); ++- rpi_execute_dblk_cmds(s); +++ +++ // Finish off any half-completed rows +++ if (s->enable_rpi && s->num_dblk_cmds[s->pass0_job]) { +++ rpi_do_all_passes(s); ++ } +++ ++ #endif ++ ++ if (x_ctb + ctb_size >= s->ps.sps->width && ++@@ -4230,6 +4430,48 @@ fail: ++ return AVERROR(ENOMEM); ++ } ++ +++#ifdef RPI_WORKER +++static av_cold void hevc_init_worker(HEVCContext *s) +++{ +++ int err; +++ pthread_cond_init(&s->worker_cond_head, NULL); +++ pthread_cond_init(&s->worker_cond_middle, NULL); +++ pthread_cond_init(&s->worker_cond_tail, NULL); +++ pthread_mutex_init(&s->worker_mutex, NULL); +++ +++ s->worker_tail=0; +++ s->worker_middle=0; +++ s->worker_head=0; +++ s->kill_worker=0; +++ err = pthread_create(&s->worker_thread, NULL, worker_start, s); +++ err = pthread_create(&s->worker_deblock_thread, NULL, worker_deblock_start, s); +++ if (err) { +++ printf("Failed to create worker thread\n"); +++ exit(-1); +++ } +++} +++ +++static av_cold void hevc_exit_worker(HEVCContext *s) +++{ +++ void *res; +++ s->kill_worker=1; +++ pthread_cond_broadcast(&s->worker_cond_tail); +++ pthread_cond_broadcast(&s->worker_cond_middle); +++ pthread_join(s->worker_thread, &res); +++ pthread_join(s->worker_deblock_thread, &res); +++ +++ pthread_cond_destroy(&s->worker_cond_head); +++ pthread_cond_destroy(&s->worker_cond_middle); +++ pthread_cond_destroy(&s->worker_cond_tail); +++ pthread_mutex_destroy(&s->worker_mutex); +++ +++ s->worker_tail=0; +++ s->worker_middle=0; +++ s->worker_head=0; +++ s->kill_worker=0; +++} +++#endif +++ ++ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ { ++ HEVCContext *s = avctx->priv_data; ++@@ -4242,33 +4484,29 @@ static av_cold int hevc_decode_free(AVCodecContext *avctx) ++ av_freep(&s->cabac_state); ++ ++ #ifdef RPI ++- av_freep(&s->unif_mv_cmds); ++- av_freep(&s->univ_pred_cmds); +++ +++#ifdef RPI_WORKER +++ hevc_exit_worker(s); +++#endif +++ +++ for(i=0;i<RPI_MAX_JOBS;i++) { +++ av_freep(&s->unif_mv_cmds[i]); +++ av_freep(&s->univ_pred_cmds[i]); ++ ++ #ifdef RPI_INTER_QPU ++- if (s->unif_mvs) { ++- gpu_free( &s->unif_mvs_ptr ); ++- s->unif_mvs = 0; ++- } +++ if (s->unif_mvs[i]) { +++ gpu_free( &s->unif_mvs_ptr[i] ); +++ s->unif_mvs[i] = 0; +++ } ++ #endif ++ #ifdef RPI_LUMA_QPU ++- if (s->y_unif_mvs) { ++- gpu_free( &s->y_unif_mvs_ptr ); ++- s->y_unif_mvs = 0; ++- } +++ if (s->y_unif_mvs[i]) { +++ gpu_free( &s->y_unif_mvs_ptr[i] ); +++ s->y_unif_mvs[i] = 0; +++ } ++ #endif ++- ++-#ifdef EARLY_MALLOC ++- printf("hevc_decode_free\n"); ++- if (s->coeffs_buf_arm[0]) { ++- gpu_free(&s->coeffs_buf_default); ++- s->coeffs_buf_arm[0] = 0; ++- } ++- if (s->coeffs_buf_arm[2]) { ++- gpu_free(&s->coeffs_buf_accelerated); ++- s->coeffs_buf_arm[2] = 0; ++ } ++-#endif +++ ++ #endif ++ ++ for (i = 0; i < 3; i++) { ++@@ -4328,6 +4566,7 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ { ++ HEVCContext *s = avctx->priv_data; ++ int i; +++ int job; ++ ++ s->avctx = avctx; ++ ++@@ -4338,12 +4577,14 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ s->sList[0] = s; ++ ++ #ifdef RPI ++- s->unif_mv_cmds = av_mallocz(sizeof(HEVCMvCmd)*RPI_MAX_MV_CMDS); ++- if (!s->unif_mv_cmds) ++- goto fail; ++- s->univ_pred_cmds = av_mallocz(sizeof(HEVCPredCmd)*RPI_MAX_PRED_CMDS); ++- if (!s->univ_pred_cmds) ++- goto fail; +++ for(job=0;job<RPI_MAX_JOBS;job++) { +++ s->unif_mv_cmds[job] = av_mallocz(sizeof(HEVCMvCmd)*RPI_MAX_MV_CMDS); +++ if (!s->unif_mv_cmds[job]) +++ goto fail; +++ s->univ_pred_cmds[job] = av_mallocz(sizeof(HEVCPredCmd)*RPI_MAX_PRED_CMDS); +++ if (!s->univ_pred_cmds[job]) +++ goto fail; +++ } ++ ++ #ifdef RPI_INTER_QPU ++ // We divide the image into blocks 256 wide and 64 high ++@@ -4354,18 +4595,20 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ { ++ int uv_commands_per_qpu = UV_COMMANDS_PER_QPU; ++ uint32_t *p; +++ for(job=0;job<RPI_MAX_JOBS;job++) { ++ #ifdef RPI_CACHE_UNIF_MVS ++- gpu_malloc_cached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr ); +++ gpu_malloc_cached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr[job] ); ++ #else ++- gpu_malloc_uncached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr ); +++ gpu_malloc_uncached( 8 * uv_commands_per_qpu * sizeof(uint32_t), &s->unif_mvs_ptr[job] ); ++ #endif ++- s->unif_mvs = (uint32_t *) s->unif_mvs_ptr.arm; // TODO support this allocation in non EARLY_MALLOC +++ s->unif_mvs[job] = (uint32_t *) s->unif_mvs_ptr[job].arm; ++ ++- // Set up initial locations for uniform streams ++- p = s->unif_mvs; ++- for(i = 0; i < 8; i++) { ++- s->mvs_base[i] = p; +++ // Set up initial locations for uniform streams +++ p = s->unif_mvs[job]; +++ for(i = 0; i < 8; i++) { +++ s->mvs_base[job][i] = p; ++ p += uv_commands_per_qpu; +++ } ++ } ++ s->mc_filter_uv = qpu_get_fn(QPU_MC_FILTER_UV); ++ s->mc_filter_uv_b0 = qpu_get_fn(QPU_MC_FILTER_UV_B0); ++@@ -4374,61 +4617,35 @@ static av_cold int hevc_init_context(AVCodecContext *avctx) ++ } ++ #endif ++ #ifdef RPI_LUMA_QPU +++ for(job=0;job<RPI_MAX_JOBS;job++) ++ { ++ int y_commands_per_qpu = Y_COMMANDS_PER_QPU; ++ uint32_t *p; ++ #ifdef RPI_CACHE_UNIF_MVS ++- gpu_malloc_cached( 12 * y_commands_per_qpu * sizeof(uint32_t), &s->y_unif_mvs_ptr ); +++ gpu_malloc_cached( 12 * y_commands_per_qpu * sizeof(uint32_t), &s->y_unif_mvs_ptr[job] ); ++ #else ++- gpu_malloc_uncached( 12 * y_commands_per_qpu * sizeof(uint32_t), &s->y_unif_mvs_ptr ); +++ gpu_malloc_uncached( 12 * y_commands_per_qpu * sizeof(uint32_t), &s->y_unif_mvs_ptr[job] ); ++ #endif ++- s->y_unif_mvs = (uint32_t *) s->y_unif_mvs_ptr.arm; // TODO support this allocation in non EARLY_MALLOC +++ s->y_unif_mvs[job] = (uint32_t *) s->y_unif_mvs_ptr[job].arm; ++ ++ // Set up initial locations for uniform streams ++- p = s->y_unif_mvs; +++ p = s->y_unif_mvs[job]; ++ for(i = 0; i < 12; i++) { ++- s->y_mvs_base[i] = p; +++ s->y_mvs_base[job][i] = p; ++ p += y_commands_per_qpu; ++ } ++- s->mc_filter = qpu_get_fn(QPU_MC_FILTER); ++- s->mc_filter_b = qpu_get_fn(QPU_MC_FILTER_B); ++- ++ } +++ s->mc_filter = qpu_get_fn(QPU_MC_FILTER); +++ s->mc_filter_b = qpu_get_fn(QPU_MC_FILTER_B); ++ #endif ++ //gpu_malloc_uncached(2048*64,&s->dummy); ++ ++-#ifdef EARLY_MALLOC ++- { ++- int coeffs_in_ctb = 64*64; ++- int coefs_per_row = (2048/64) * coeffs_in_ctb * 3; // Allow space for chroma ++- s->coeffs_buf_arm[0] = 0; ++- s->coeffs_buf_arm[2] = 0; ++- printf("Allocated %d\n",coefs_per_row); ++- gpu_malloc_cached(sizeof(int16_t) * coefs_per_row, &s->coeffs_buf_default); ++- s->coeffs_buf_arm[0] = (int16_t*) s->coeffs_buf_default.arm; ++- if (!s->coeffs_buf_arm[0]) ++- goto fail; ++- gpu_malloc_cached(sizeof(int16_t) * coefs_per_row * 2, &s->coeffs_buf_accelerated); ++- s->coeffs_buf_arm[2] = (int16_t*) s->coeffs_buf_accelerated.arm; ++- s->coeffs_buf_vc[2] = s->coeffs_buf_accelerated.vc; ++- if (!s->coeffs_buf_arm[2]) ++- goto fail; ++- s->coeffs_buf_arm[3] = coefs_per_row + s->coeffs_buf_arm[2]; ++- s->coeffs_buf_vc[3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[2]; ++- printf("Done\n"); ++-#ifdef RPI_PRECLEAR ++- //memset(s->coeffs_buf_arm[0],0, sizeof(int16_t) * coefs_per_row); ++- memclear16(s->coeffs_buf_arm[0], coefs_per_row); ++- //memset(s->coeffs_buf_arm[2],0, sizeof(int16_t) * coefs_per_row); ++- memclear16(s->coeffs_buf_arm[2], coefs_per_row); ++- //memset(s->coeffs_buf_arm[3],0, sizeof(int16_t) * coefs_per_row); ++- memclear16(s->coeffs_buf_arm[3], coefs_per_row); ++-#endif ++- } ++-#endif ++- ++ s->enable_rpi = 0; ++ +++#ifdef RPI_WORKER +++ hevc_init_worker(s); +++#endif +++ ++ #endif ++ ++ s->cabac_state = av_malloc(HEVC_CONTEXTS); ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 5cb90b5..7bd295a 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -51,6 +51,12 @@ ++ // Define RPI_LUMA_QPU to also use QPU for luma inter prediction ++ #define RPI_LUMA_QPU ++ #endif +++ +++ // By passing jobs to a worker thread we hope to be able to catch up during slow frames +++ #define RPI_MAX_JOBS 2 +++ // Define RPI_WORKER to launch a worker thread for pixel processing tasks +++ #define RPI_WORKER +++ ++ #endif ++ ++ #define MAX_DPB_SIZE 16 // A.4.1 ++@@ -806,6 +812,13 @@ typedef struct HEVCLocalContext { ++ int boundary_flags; ++ } HEVCLocalContext; ++ +++#ifdef RPI_WORKER +++typedef struct HEVCLocalContextIntra { +++ TransformUnit tu; +++ NeighbourAvailable na; +++} HEVCLocalContextIntra; +++#endif +++ ++ #ifdef RPI ++ ++ // RPI_MAX_WIDTH is maximum width in pixels supported by the accelerated code ++@@ -874,7 +887,7 @@ typedef struct HEVCPredCmd { ++ ++ typedef struct HEVCContext { ++ #ifdef RPI ++- int dblk_cmds[RPI_MAX_DEBLOCK_CMDS][2]; +++ int dblk_cmds[RPI_MAX_JOBS][RPI_MAX_DEBLOCK_CMDS][2]; ++ #endif ++ const AVClass *c; // needed by private avoptions ++ AVCodecContext *avctx; ++@@ -883,7 +896,9 @@ typedef struct HEVCContext { ++ ++ HEVCLocalContext *HEVClcList[MAX_NB_THREADS]; ++ HEVCLocalContext *HEVClc; ++- +++#ifdef RPI_WORKER +++ HEVCLocalContextIntra HEVClcIntra; +++#endif ++ uint8_t threads_type; ++ uint8_t threads_number; ++ ++@@ -894,43 +909,60 @@ typedef struct HEVCContext { ++ ++ #ifdef RPI ++ int enable_rpi; ++- HEVCMvCmd *unif_mv_cmds; ++- HEVCPredCmd *univ_pred_cmds; +++ HEVCMvCmd *unif_mv_cmds[RPI_MAX_JOBS]; +++ HEVCPredCmd *univ_pred_cmds[RPI_MAX_JOBS]; ++ int buf_width; ++- GPU_MEM_PTR_T coeffs_buf_default; ++- GPU_MEM_PTR_T coeffs_buf_accelerated; ++- int16_t *coeffs_buf_arm[4]; ++- unsigned int coeffs_buf_vc[4]; ++- int num_coeffs[4]; ++- int num_xfm_cmds; ++- int num_mv_cmds; ++- int num_pred_cmds; ++- int num_dblk_cmds; +++ GPU_MEM_PTR_T coeffs_buf_default[RPI_MAX_JOBS]; +++ GPU_MEM_PTR_T coeffs_buf_accelerated[RPI_MAX_JOBS]; +++ int16_t *coeffs_buf_arm[RPI_MAX_JOBS][4]; +++ unsigned int coeffs_buf_vc[RPI_MAX_JOBS][4]; +++ int num_coeffs[RPI_MAX_JOBS][4]; +++ int num_xfm_cmds[RPI_MAX_JOBS]; +++ int num_mv_cmds[RPI_MAX_JOBS]; +++ int num_pred_cmds[RPI_MAX_JOBS]; +++ int num_dblk_cmds[RPI_MAX_JOBS]; ++ int vpu_id; ++ //GPU_MEM_PTR_T dummy; +++ int pass0_job; // Pass0 does coefficient decode +++ int pass1_job; // Pass1 does pixel processing +++ int pass2_job; // Pass2 does reconstruction and deblocking ++ #ifdef RPI_INTER_QPU ++- GPU_MEM_PTR_T unif_mvs_ptr; ++- uint32_t *unif_mvs; // Base of memory for motion vector commands +++ GPU_MEM_PTR_T unif_mvs_ptr[RPI_MAX_JOBS]; +++ uint32_t *unif_mvs[RPI_MAX_JOBS]; // Base of memory for motion vector commands ++ ++ // _base pointers are to the start of the row ++- uint32_t *mvs_base[8]; +++ uint32_t *mvs_base[RPI_MAX_JOBS][8]; ++ // these pointers are to the next free space ++- uint32_t *u_mvs[8]; +++ uint32_t *u_mvs[RPI_MAX_JOBS][8]; ++ // Function pointers ++ uint32_t mc_filter_uv; ++ uint32_t mc_filter_uv_b0; ++ uint32_t mc_filter_uv_b; ++ #endif ++ #ifdef RPI_LUMA_QPU ++- GPU_MEM_PTR_T y_unif_mvs_ptr; ++- uint32_t *y_unif_mvs; // Base of memory for motion vector commands ++- uint32_t *y_mvs_base[12]; ++- uint32_t *y_mvs[12]; +++ GPU_MEM_PTR_T y_unif_mvs_ptr[RPI_MAX_JOBS]; +++ uint32_t *y_unif_mvs[RPI_MAX_JOBS]; // Base of memory for motion vector commands +++ uint32_t *y_mvs_base[RPI_MAX_JOBS][12]; +++ uint32_t *y_mvs[RPI_MAX_JOBS][12]; ++ // Function pointers ++ uint32_t mc_filter; ++ uint32_t mc_filter_b; ++ #endif ++ +++#ifdef RPI_WORKER +++ pthread_t worker_thread; +++ pthread_t worker_deblock_thread; +++ pthread_cond_t worker_cond_head; +++ pthread_cond_t worker_cond_tail; +++ pthread_cond_t worker_cond_middle; +++ pthread_mutex_t worker_mutex; +++ +++ int worker_tail; // Contains the number of posted jobs +++ int worker_head; // Contains the number of completed jobs +++ int worker_middle; // Contains the number of completed jobs +++ int kill_worker; // set to 1 to terminate the worker +++#endif +++ ++ #endif ++ ++ uint8_t *cabac_state; ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index 38f53de..f0982cd 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1051,11 +1051,11 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ if (s->enable_rpi) { ++ int n = trafo_size * trafo_size; ++ if (use_vpu) { ++- coeffs = s->coeffs_buf_arm[log2_trafo_size - 2] + s->num_coeffs[log2_trafo_size - 2]; ++- s->num_coeffs[log2_trafo_size - 2] += n; +++ coeffs = s->coeffs_buf_arm[s->pass0_job][log2_trafo_size - 2] + s->num_coeffs[s->pass0_job][log2_trafo_size - 2]; +++ s->num_coeffs[s->pass0_job][log2_trafo_size - 2] += n; ++ } else { ++- coeffs = s->coeffs_buf_arm[0] + s->num_coeffs[0]; ++- s->num_coeffs[0] += n; +++ coeffs = s->coeffs_buf_arm[s->pass0_job][0] + s->num_coeffs[s->pass0_job][0]; +++ s->num_coeffs[s->pass0_job][0] += n; ++ } ++ } ++ // We now do the memset after transform_add while we know the data is cached. ++@@ -1508,7 +1508,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ s->hevcdsp.transform_rdpcm(coeffs, log2_trafo_size, mode); ++ } ++ } else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 && log2_trafo_size == 2) { ++- s->hevcdsp.idct_4x4_luma(coeffs); +++ s->hevcdsp.idct_4x4_luma(coeffs); ++ } else { ++ #ifdef RPI ++ if (!use_vpu) { ++@@ -1553,7 +1553,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ } ++ #ifdef RPI ++ if (s->enable_rpi) { ++- HEVCPredCmd *cmd = s->univ_pred_cmds + s->num_pred_cmds++; +++ HEVCPredCmd *cmd = s->univ_pred_cmds[s->pass0_job] + s->num_pred_cmds[s->pass0_job]++; ++ cmd->type = RPI_PRED_TRANSFORM_ADD; ++ cmd->size = log2_trafo_size; ++ cmd->buf = coeffs; ++diff --git a/libavcodec/hevcpred_template.c b/libavcodec/hevcpred_template.c ++index 71c6d52..344e021 100644 ++--- a/libavcodec/hevcpred_template.c +++++ b/libavcodec/hevcpred_template.c ++@@ -71,8 +71,11 @@ do { \ ++ AV_WN4P(&ptr[i], a); \ ++ else \ ++ a = PIXEL_SPLAT_X4(ptr[i + 3]) ++- +++#ifdef RPI_WORKER +++ HEVCLocalContextIntra *lc = &s->HEVClcIntra; +++#else ++ HEVCLocalContext *lc = s->HEVClc; +++#endif ++ int i; ++ int hshift = s->ps.sps->hshift[c_idx]; ++ int vshift = s->ps.sps->vshift[c_idx]; ++-- ++2.7.4 ++ ++ ++From 1e0885f8d98175777fff65b4cedd708176c2abcf Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 3 Jun 2015 13:43:48 +0100 ++Subject: [PATCH 54/68] Avoid lockup bug with RPI_WORKER enabled ++ ++--- ++ libavcodec/hevc.c | 22 +++++++++++----------- ++ libavcodec/hevc_cabac.c | 1 - ++ 2 files changed, 11 insertions(+), 12 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 12aacc5..182a82f 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -133,11 +133,11 @@ static uint32_t get_vc_address(AVBufferRef *bref) { ++ static void worker_submit_job(HEVCContext *s) ++ { ++ LOG_ENTER ++- //pthread_mutex_lock(&s->worker_mutex); ++- s->worker_tail++; // This is the only place that can change tail so we do not need the mutex +++ pthread_mutex_lock(&s->worker_mutex); +++ s->worker_tail++; ++ s->pass0_job = (s->pass0_job + 1) % RPI_MAX_JOBS; // Move onto the next slot ++ pthread_cond_broadcast(&s->worker_cond_tail); // Let people know that the tail has moved ++- //pthread_mutex_unlock(&s->worker_mutex); +++ pthread_mutex_unlock(&s->worker_mutex); ++ LOG_EXIT ++ } ++ ++@@ -145,11 +145,11 @@ static void worker_submit_job(HEVCContext *s) ++ static void worker_complete_middle_job(HEVCContext *s) ++ { ++ LOG_ENTER ++- //pthread_mutex_lock(&s->worker_mutex); ++- s->worker_middle++; // This is the only place that can change head so we do not need the mutex +++ pthread_mutex_lock(&s->worker_mutex); +++ s->worker_middle++; ++ s->pass1_job = (s->pass1_job + 1) % RPI_MAX_JOBS; // Move onto the next slot ++- pthread_cond_broadcast(&s->worker_cond_middle); // Let people know that the tail has moved ++- //pthread_mutex_unlock(&s->worker_mutex); +++ pthread_cond_broadcast(&s->worker_cond_middle); // Let people know that the middle has moved +++ pthread_mutex_unlock(&s->worker_mutex); ++ LOG_EXIT ++ } ++ ++@@ -157,11 +157,11 @@ static void worker_complete_middle_job(HEVCContext *s) ++ static void worker_complete_job(HEVCContext *s) ++ { ++ LOG_ENTER ++- //pthread_mutex_lock(&s->worker_mutex); ++- s->worker_head++; // This is the only place that can change head so we do not need the mutex +++ pthread_mutex_lock(&s->worker_mutex); +++ s->worker_head++; ++ s->pass2_job = (s->pass2_job + 1) % RPI_MAX_JOBS; // Move onto the next slot ++- pthread_cond_broadcast(&s->worker_cond_head); // Let people know that the tail has moved ++- //pthread_mutex_unlock(&s->worker_mutex); +++ pthread_cond_broadcast(&s->worker_cond_head); // Let people know that the head has moved +++ pthread_mutex_unlock(&s->worker_mutex); ++ LOG_EXIT ++ } ++ ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index f0982cd..6523e66 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1497,7 +1497,6 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ for (i = 0; i < 8; i++) ++ FFSWAP(int16_t, coeffs[i], coeffs[16 - i - 1]); ++ } ++- ++ s->hevcdsp.transform_skip(coeffs, log2_trafo_size); ++ ++ if (explicit_rdpcm_flag || (s->ps.sps->implicit_rdpcm_enabled_flag && ++-- ++2.7.4 ++ ++ ++From 1d7ad81069dec6914ec7e9983855d7a1b5e4b123 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 3 Jun 2015 15:37:19 +0100 ++Subject: [PATCH 55/68] Added code to flush buffers at start of frame ++ ++--- ++ libavcodec/hevc.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ ++ 1 file changed, 72 insertions(+) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 182a82f..e5b9f1e 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -43,6 +43,7 @@ ++ ++ #ifdef RPI ++ #include "rpi_qpu.h" +++ #include "rpi_user_vcsm.h" ++ // Move Inter prediction into separate pass ++ #define RPI_INTER ++ ++@@ -3508,6 +3509,7 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ #else ++ gpu_cache_flush(&s->coeffs_buf_accelerated[job]); ++ #endif +++ ++ s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[job][2], s->num_coeffs[job][2] >> 8, s->coeffs_buf_vc[job][3], s->num_coeffs[job][3] >> 10, 0, ++ qpu_get_fn(QPU_MC_SETUP_UV), ++ (uint32_t)(unif_vc+(s->mvs_base[job][0 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), ++@@ -3558,6 +3560,71 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ } ++ #endif ++ +++#ifdef RPI +++ +++static void flush_buffer(AVBufferRef *bref) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); +++ gpu_cache_flush(p); +++} +++ +++static void flush_frame(HEVCContext *s,AVFrame *frame) +++{ +++#if 1 +++ struct vcsm_user_clean_invalid_s iocache = {}; +++ int n = s->ps.sps->height; +++ int curr_y = 0; +++ int curr_uv = 0; +++ int n_uv = n >> s->ps.sps->vshift[1]; +++ int sz,base; +++ sz = s->frame->linesize[1] * (n_uv-curr_uv); +++ base = s->frame->linesize[1] * curr_uv; +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); +++ iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].cmd = 3; // clean+invalidate +++ iocache.s[0].addr = p->arm + base; +++ iocache.s[0].size = sz; +++ p = av_buffer_pool_opaque(frame->buf[2]); +++ iocache.s[1].handle = p->vcsm_handle; +++ iocache.s[1].cmd = 3; // clean+invalidate +++ iocache.s[1].addr = p->arm + base; +++ iocache.s[1].size = sz; +++ p = av_buffer_pool_opaque(frame->buf[0]); +++ sz = s->frame->linesize[0] * (n-curr_y); +++ base = s->frame->linesize[0] * curr_y; +++ iocache.s[2].handle = p->vcsm_handle; +++ iocache.s[2].cmd = 3; // clean+invalidate +++ iocache.s[2].addr = p->arm + base; +++ iocache.s[2].size = sz; +++ vcsm_clean_invalid( &iocache ); +++#else +++ flush_buffer(frame->buf[0]); +++ flush_buffer(frame->buf[1]); +++ flush_buffer(frame->buf[2]); +++#endif +++} +++ +++static void flush_all(HEVCContext *s) +++{ +++#if 0 +++ struct vcsm_user_clean_invalid_s iocache = {}; +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(s->frame->buf[0]); +++ iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].cmd = 4; // Flush all +++ iocache.s[0].addr = p->arm; +++ iocache.s[0].size = 4096; +++ vcsm_clean_invalid( &iocache ); +++#else +++ int i,k; +++ for(i=0;i<2;i++) { +++ for (k = 0; k < s->sh.nb_refs[i]; k++) { +++ flush_frame(s,s->ref->refPicList[i].ref[k]->frame); +++ } +++ } +++ flush_frame(s,s->frame); +++#endif +++} +++#endif +++ ++ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ { ++ HEVCContext *s = avctxt->priv_data; ++@@ -3592,8 +3659,12 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ printf("Weighted B slice\n"); ++ } ++ +++ // Now flush all reference frames and our destination frame to get everything ready for decode +++ flush_all(s); ++ #endif ++ +++ //printf("L0=%d L1=%d\n",s->sh.nb_refs[L1],s->sh.nb_refs[L1]); +++ ++ if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) { ++ av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n"); ++ return AVERROR_INVALIDDATA; ++@@ -3664,6 +3735,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ rpi_do_all_passes(s); ++ #endif ++ } +++ ++ } ++ #endif ++ ++-- ++2.7.4 ++ ++ ++From 7a57f233dcd4048e20a0b5bc06bc20abb589d3fa Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 3 Jun 2015 16:42:24 +0100 ++Subject: [PATCH 56/68] Reduce the amount that needs to be flushed ++ ++--- ++ libavcodec/hevc.c | 35 +++++++++++------------------------ ++ 1 file changed, 11 insertions(+), 24 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index e5b9f1e..73d7f74 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -3569,7 +3569,7 @@ static void flush_buffer(AVBufferRef *bref) { ++ ++ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ { ++-#if 1 +++#ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; ++ int n = s->ps.sps->height; ++ int curr_y = 0; ++@@ -3603,26 +3603,6 @@ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ #endif ++ } ++ ++-static void flush_all(HEVCContext *s) ++-{ ++-#if 0 ++- struct vcsm_user_clean_invalid_s iocache = {}; ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(s->frame->buf[0]); ++- iocache.s[0].handle = p->vcsm_handle; ++- iocache.s[0].cmd = 4; // Flush all ++- iocache.s[0].addr = p->arm; ++- iocache.s[0].size = 4096; ++- vcsm_clean_invalid( &iocache ); ++-#else ++- int i,k; ++- for(i=0;i<2;i++) { ++- for (k = 0; k < s->sh.nb_refs[i]; k++) { ++- flush_frame(s,s->ref->refPicList[i].ref[k]->frame); ++- } ++- } ++- flush_frame(s,s->frame); ++-#endif ++-} ++ #endif ++ ++ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++@@ -3658,9 +3638,6 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ if (s->ps.pps->weighted_bipred_flag && s->sh.slice_type == B_SLICE) ++ printf("Weighted B slice\n"); ++ } ++- ++- // Now flush all reference frames and our destination frame to get everything ready for decode ++- flush_all(s); ++ #endif ++ ++ //printf("L0=%d L1=%d\n",s->sh.nb_refs[L1],s->sh.nb_refs[L1]); ++@@ -4130,6 +4107,11 @@ static int hevc_frame_start(HEVCContext *s) ++ if (!s->avctx->hwaccel) ++ ff_thread_finish_setup(s->avctx); ++ +++#ifdef RPI_INTER_QPU +++ // Invalidate the output data buffer so it is ready for the QPUs to write into it. +++ flush_frame(s,s->frame); +++#endif +++ ++ return 0; ++ ++ fail: ++@@ -4331,6 +4313,11 @@ fail: ++ ff_hevc_flush_buffer(s, &s->ref->tf, s->ps.sps->height); ++ #endif ++ ff_thread_report_progress(&s->ref->tf, INT_MAX, 0); +++ } else if (s->ref) { +++#ifdef RPI_INTER_QPU +++ // When running single threaded we need to flush the whole frame +++ flush_frame(s,s->frame); +++#endif ++ } ++ return ret; ++ } ++-- ++2.7.4 ++ ++ ++From 26eba8e3266cc5f2120e8284a1ce486d6a402010 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 4 Jun 2015 07:59:28 +0100 ++Subject: [PATCH 57/68] Corrected support for disabled rpi when using ++ RPI_WORKER ++ ++--- ++ libavcodec/hevc.h | 18 ++++++++++-------- ++ libavcodec/hevcpred_template.c | 2 +- ++ 2 files changed, 11 insertions(+), 9 deletions(-) ++ ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 7bd295a..3cb34bd 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -769,7 +769,17 @@ typedef struct HEVCFrame { ++ uint8_t flags; ++ } HEVCFrame; ++ +++#ifdef RPI_WORKER +++typedef struct HEVCLocalContextIntra { +++ TransformUnit tu; +++ NeighbourAvailable na; +++} HEVCLocalContextIntra; +++#endif +++ ++ typedef struct HEVCLocalContext { +++ TransformUnit tu; +++ NeighbourAvailable na; // WARNING tu and na must be the first two fields to match HEVCLocalContextIntra +++ ++ uint8_t cabac_state[HEVC_CONTEXTS]; ++ ++ uint8_t stat_coeff[4]; ++@@ -784,7 +794,6 @@ typedef struct HEVCLocalContext { ++ ++ int qPy_pred; ++ ++- TransformUnit tu; ++ ++ uint8_t ctb_left_flag; ++ uint8_t ctb_up_flag; ++@@ -801,7 +810,6 @@ typedef struct HEVCLocalContext { ++ int ct_depth; ++ CodingUnit cu; ++ PredictionUnit pu; ++- NeighbourAvailable na; ++ ++ #define BOUNDARY_LEFT_SLICE (1 << 0) ++ #define BOUNDARY_LEFT_TILE (1 << 1) ++@@ -812,12 +820,6 @@ typedef struct HEVCLocalContext { ++ int boundary_flags; ++ } HEVCLocalContext; ++ ++-#ifdef RPI_WORKER ++-typedef struct HEVCLocalContextIntra { ++- TransformUnit tu; ++- NeighbourAvailable na; ++-} HEVCLocalContextIntra; ++-#endif ++ ++ #ifdef RPI ++ ++diff --git a/libavcodec/hevcpred_template.c b/libavcodec/hevcpred_template.c ++index 344e021..325b60e 100644 ++--- a/libavcodec/hevcpred_template.c +++++ b/libavcodec/hevcpred_template.c ++@@ -72,7 +72,7 @@ do { \ ++ else \ ++ a = PIXEL_SPLAT_X4(ptr[i + 3]) ++ #ifdef RPI_WORKER ++- HEVCLocalContextIntra *lc = &s->HEVClcIntra; +++ HEVCLocalContextIntra *lc = s->enable_rpi ? &s->HEVClcIntra : (HEVCLocalContextIntra *)s->HEVClc ; ++ #else ++ HEVCLocalContext *lc = s->HEVClc; ++ #endif ++-- ++2.7.4 ++ ++ ++From 5b3eee9be88a5326df7621de95095def969e05a8 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 4 Jun 2015 11:52:55 +0100 ++Subject: [PATCH 58/68] Draft support for tiles ++ ++--- ++ libavcodec/hevc.c | 140 +++++++++++++++++++++++------------------ ++ libavcodec/hevc.h | 21 +++++-- ++ libavcodec/hevc_filter.c | 2 +- ++ libavcodec/hevcpred_template.c | 2 +- ++ 4 files changed, 99 insertions(+), 66 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 73d7f74..ec67252 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -63,10 +63,10 @@ ++ ++ static void rpi_execute_dblk_cmds(HEVCContext *s); ++ static void rpi_execute_transform(HEVCContext *s); ++- static void rpi_execute_inter_qpu(HEVCContext *s); +++ static void rpi_launch_vpu_qpu(HEVCContext *s); ++ static void rpi_execute_pred_cmds(HEVCContext *s); ++ static void rpi_execute_inter_cmds(HEVCContext *s); ++- static void rpi_inter_clear(HEVCContext *s); +++ static void rpi_begin(HEVCContext *s); ++ ++ // Define INTER_PASS0 to do inter prediction in first pass ++ //#define INTER_PASS0 ++@@ -90,16 +90,18 @@ const uint8_t ff_hevc_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12 ++ ++ #ifdef RPI_INTER_QPU ++ +++// Each luma QPU processes 2*RPI_NUM_CHUNKS 64x64 blocks +++// Each chroma QPU processes 3*RPI_NUM_CHUNKS 64x64 blocks, but requires two commands for B blocks +++// For each block of 64*64 the smallest block size is 8x4 +++// We also need an extra command for the setup information +++ ++ #define RPI_CHROMA_COMMAND_WORDS 12 ++-#define UV_COMMANDS_PER_QPU ((1 + (256*64*2)/(4*4)) * RPI_CHROMA_COMMAND_WORDS) +++#define UV_COMMANDS_PER_QPU ((1 + 3*RPI_NUM_CHUNKS*(64*64)*2/(8*4)) * RPI_CHROMA_COMMAND_WORDS) ++ // The QPU code for UV blocks only works up to a block width of 8 ++ #define RPI_CHROMA_BLOCK_WIDTH 8 ++ ++-// Split image of 2048 into parts 64 wide ++-// So some QPUs will have 3 blocks of 64 to do, and others 2 blocks for an image 2048 wide with 32 blocks across ++-// For each block of 64*64 the smallest block size is 8x4 ++ #define RPI_LUMA_COMMAND_WORDS 9 ++-#define Y_COMMANDS_PER_QPU ((1+3*(64*64)/(8*4)) * RPI_LUMA_COMMAND_WORDS) +++#define Y_COMMANDS_PER_QPU ((1+2*RPI_NUM_CHUNKS*(64*64)/(8*4)) * RPI_LUMA_COMMAND_WORDS) ++ ++ #define ENCODE_COEFFS(c0, c1, c2, c3) (((c0) & 0xff) | ((c1) & 0xff) << 8 | ((c2) & 0xff) << 16 | ((c3) & 0xff) << 24) ++ ++@@ -216,7 +218,7 @@ static void *worker_start(void *arg) ++ LOG_ENTER ++ // printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); ++ #ifndef LAUNCH_PASS0 ++- rpi_execute_inter_qpu(s); +++ rpi_launch_vpu_qpu(s); ++ #endif ++ #ifndef INTER_PASS0 ++ // Perform inter prediction ++@@ -322,9 +324,14 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ ++ #ifdef RPI ++ av_assert0(sps); ++- int coeffs_in_ctb = (1 << sps->log2_ctb_size) * (1 << sps->log2_ctb_size); ++- int coefs_per_row = sps->ctb_width * coeffs_in_ctb * 3; // Allow space for chroma +++ int coefs_in_ctb = (1 << sps->log2_ctb_size) * (1 << sps->log2_ctb_size); +++ int coefs_per_luma = 64*64*24*RPI_NUM_CHUNKS; +++ int coefs_per_chroma = (coefs_per_luma * 2) >> sps->vshift[1] >> sps->hshift[1]; +++ int coefs_per_row = coefs_per_luma + coefs_per_chroma; ++ int job; +++ s->max_ctu_count = coefs_per_luma / coefs_in_ctb; +++ s->ctu_per_y_chan = s->max_ctu_count / 12; +++ s->ctu_per_uv_chan = s->max_ctu_count / 8; ++ for(job=0;job<RPI_MAX_JOBS;job++) { ++ printf("Allocated %d\n",coefs_per_row); ++ for(job=0;job<RPI_MAX_JOBS;job++) { ++@@ -2186,10 +2193,9 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int my2_mx2_my_mx = (my_mx << 16) + my_mx; ++ int x1 = x0 + (mv->x >> 2); ++ int y1 = y0 + (mv->y >> 2); ++- int chan = x0>>6; // 64 wide blocks per QPU ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++- uint32_t *y = s->y_mvs[s->pass0_job][chan % 12]; +++ uint32_t *y = s->curr_y_mvs; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=16) { ++ int bw = nPbW-start_x; ++@@ -2209,7 +2215,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter; ++ } ++ } ++- s->y_mvs[s->pass0_job][chan % 12] = y; +++ s->curr_y_mvs = y; ++ } else ++ #endif ++ { ++@@ -2233,12 +2239,10 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ ++ int x1_c = x0_c + (mv->x >> (2 + hshift)); ++ int y1_c = y0_c + (mv->y >> (2 + hshift)); ++- //int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width ++- int chan = x0>>8; ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++ ++- uint32_t *u = s->u_mvs[s->pass0_job][chan & 7]; +++ uint32_t *u = s->curr_u_mvs; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ int bw = nPbW_c-start_x; ++@@ -2262,7 +2266,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++- s->u_mvs[s->pass0_job][chan & 7] = u; +++ s->curr_u_mvs = u; ++ return; ++ } ++ #endif ++@@ -2289,10 +2293,9 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int my2_mx2_my_mx = (my_mx << 16) + my_mx; ++ int x1 = x0 + (mv->x >> 2); ++ int y1 = y0 + (mv->y >> 2); ++- int chan = x0>>6; // 64 wide blocks per QPU ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++- uint32_t *y = s->y_mvs[s->pass0_job][chan % 12]; +++ uint32_t *y = s->curr_y_mvs; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=16) { ++ int bw = nPbW-start_x; ++@@ -2312,7 +2315,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter; ++ } ++ } ++- s->y_mvs[s->pass0_job][chan % 12] = y; +++ s->curr_y_mvs = y; ++ } else ++ #endif ++ ++@@ -2337,12 +2340,10 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ ++ int x1_c = x0_c + (mv->x >> (2 + hshift)); ++ int y1_c = y0_c + (mv->y >> (2 + hshift)); ++- //int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width ++- int chan = x0>>8; ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++ (s->sh.slice_type == B_SLICE && s->ps.pps->weighted_bipred_flag); ++ ++- uint32_t *u = s->u_mvs[s->pass0_job][chan & 7]; +++ uint32_t *u = s->curr_u_mvs; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ int bw = nPbW_c-start_x; ++@@ -2367,7 +2368,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++- s->u_mvs[s->pass0_job][chan & 7] = u; +++ s->curr_u_mvs = u; ++ return; ++ } ++ #endif ++@@ -2400,8 +2401,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int y1 = y0 + (mv->y >> 2); ++ int x2 = x0 + (mv2->x >> 2); ++ int y2 = y0 + (mv2->y >> 2); ++- int chan = x0>>6; // 64 wide blocks per QPU ++- uint32_t *y = s->y_mvs[s->pass0_job][chan % 12]; +++ uint32_t *y = s->curr_y_mvs; ++ for(int start_y=0;start_y < nPbH;start_y+=16) { // Potentially we could change the assembly code to support taller sizes in one go ++ for(int start_x=0;start_x < nPbW;start_x+=8) { // B blocks work 8 at a time ++ int bw = nPbW-start_x; ++@@ -2417,7 +2417,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter_b; ++ } ++ } ++- s->y_mvs[s->pass0_job][chan % 12] = y; +++ s->curr_y_mvs = y; ++ } else ++ #endif ++ { ++@@ -2448,9 +2448,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int x2_c = x0_c + (mv2->x >> (2 + hshift)); ++ int y2_c = y0_c + (mv2->y >> (2 + hshift)); ++ ++- int chan = x0>>8; // Allocate commands for the first 256 luma pixels across to the first QPU. This is optimised for images around 1920 width ++ ++- uint32_t *u = s->u_mvs[s->pass0_job][chan & 7]; +++ uint32_t *u = s->curr_u_mvs; ++ for(int start_y=0;start_y < nPbH_c;start_y+=16) { ++ for(int start_x=0;start_x < nPbW_c;start_x+=RPI_CHROMA_BLOCK_WIDTH) { ++ int bw = nPbW_c-start_x; ++@@ -2479,7 +2478,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++- s->u_mvs[s->pass0_job][chan & 7] = u; +++ s->curr_u_mvs = u; ++ return; ++ } ++ #endif ++@@ -3114,12 +3113,8 @@ static void rpi_execute_inter_cmds(HEVCContext *s) ++ ++ static void rpi_do_all_passes(HEVCContext *s) ++ { ++-#ifdef RPI_INTER_QPU ++- // Kick off inter prediction on QPUs ++- rpi_execute_inter_qpu(s); ++-#else ++- rpi_execute_transform(s); ++-#endif +++ // Kick off QPUs and VPUs +++ rpi_launch_vpu_qpu(s); ++ // Perform luma inter prediction ++ rpi_execute_inter_cmds(s); ++ // Wait for transform completion ++@@ -3128,18 +3123,18 @@ static void rpi_do_all_passes(HEVCContext *s) ++ rpi_execute_pred_cmds(s); ++ // Perform deblocking for CTBs in this row ++ rpi_execute_dblk_cmds(s); ++-#ifdef RPI_INTER_QPU ++- rpi_inter_clear(s); ++-#endif +++ // Prepare next batch +++ rpi_begin(s); ++ } ++ ++ #endif ++ ++-#ifdef RPI_INTER_QPU ++-static void rpi_inter_clear(HEVCContext *s) +++#ifdef RPI +++static void rpi_begin(HEVCContext *s) ++ { ++ int job = s->pass0_job; ++ int i; +++#ifdef RPI_INTER_QPU ++ int pic_width = s->ps.sps->width >> s->ps.sps->hshift[1]; ++ int pic_height = s->ps.sps->height >> s->ps.sps->vshift[1]; ++ int weight_flag = (s->sh.slice_type == P_SLICE && s->ps.pps->weighted_pred_flag) || ++@@ -3165,6 +3160,8 @@ static void rpi_inter_clear(HEVCContext *s) ++ } ++ *s->u_mvs[job][i]++ = i; // Select section of VPM (avoid collisions with 3d unit) ++ } +++ s->curr_u_mvs = s->u_mvs[job][0]; +++#endif ++ ++ #ifdef RPI_LUMA_QPU ++ for(i=0;i<12;i++) { ++@@ -3187,8 +3184,11 @@ static void rpi_inter_clear(HEVCContext *s) ++ } ++ *s->y_mvs[job][i]++ = 0; // Next kernel ++ } +++ s->curr_y_mvs = s->y_mvs[job][0]; ++ #endif +++ s->ctu_count = 0; ++ } +++#endif ++ ++ #ifdef RPI_SIMULATE_QPUS ++ ++@@ -3459,8 +3459,9 @@ static void rpi_simulate_inter_qpu(HEVCContext *s) ++ ++ #endif ++ +++#ifdef RPI_INTER_QPU ++ ++-static void rpi_execute_inter_qpu(HEVCContext *s) +++static void rpi_launch_vpu_qpu(HEVCContext *s) ++ { ++ int k; ++ #ifdef LAUNCH_PASS0 ++@@ -3558,6 +3559,15 @@ static void rpi_execute_inter_qpu(HEVCContext *s) ++ ++ ++ } +++#else +++ +++#ifdef RPI +++static void rpi_launch_vpu_qpu(HEVCContext *s) +++{ +++ rpi_execute_transform(s); +++} +++#endif +++ ++ #endif ++ ++ #ifdef RPI ++@@ -3617,29 +3627,20 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ #ifdef RPI ++ #ifdef RPI_INTER_QPU ++ s->enable_rpi = s->ps.sps->bit_depth == 8 ++- && s->ps.sps->width <= RPI_MAX_WIDTH ++ && !s->ps.pps->cross_component_prediction_enabled_flag ++- && s->ps.pps->num_tile_rows <= 1 && s->ps.pps->num_tile_columns <= 1 ++ && !(s->ps.pps->weighted_bipred_flag && s->sh.slice_type == B_SLICE); ++ #else ++ s->enable_rpi = s->ps.sps->bit_depth == 8 ++- && s->ps.sps->width <= RPI_MAX_WIDTH ++- && !s->ps.pps->cross_component_prediction_enabled_flag ++- && s->ps.pps->num_tile_rows <= 1 && s->ps.pps->num_tile_columns <= 1; +++ && !s->ps.pps->cross_component_prediction_enabled_flag; ++ #endif ++ ++ if (!s->enable_rpi) { ++ if (s->ps.pps->cross_component_prediction_enabled_flag) ++ printf("Cross component\n"); ++- if (s->ps.pps->num_tile_rows > 1 || s->ps.pps->num_tile_columns > 1) ++- printf("Tiles\n"); ++- if (s->ps.pps->weighted_pred_flag && s->sh.slice_type == P_SLICE) ++- printf("Weighted P slice\n"); ++ if (s->ps.pps->weighted_bipred_flag && s->sh.slice_type == B_SLICE) ++ printf("Weighted B slice\n"); ++ } ++ #endif ++- ++ //printf("L0=%d L1=%d\n",s->sh.nb_refs[L1],s->sh.nb_refs[L1]); ++ ++ if (!ctb_addr_ts && s->sh.dependent_slice_segment_flag) { ++@@ -3660,8 +3661,8 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ s->pass1_job = 0; ++ s->pass2_job = 0; ++ #endif ++-#ifdef RPI_INTER_QPU ++- rpi_inter_clear(s); +++#ifdef RPI +++ rpi_begin(s); ++ #endif ++ ++ while (more_data && ctb_addr_ts < s->ps.sps->ctb_size) { ++@@ -3679,13 +3680,34 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ s->deblock[ctb_addr_rs].tc_offset = s->sh.tc_offset; ++ s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag; ++ +++#ifdef RPI_INTER_QPU +++ s->curr_u_mvs = s->u_mvs[s->pass0_job][s->ctu_count / s->ctu_per_uv_chan]; +++#endif +++#ifdef RPI_LUMA_QPU +++ s->curr_y_mvs = s->y_mvs[s->pass0_job][s->ctu_count / s->ctu_per_y_chan]; +++#endif +++ ++ more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); ++ +++#ifdef RPI_INTER_QPU +++ s->u_mvs[s->pass0_job][s->ctu_count / s->ctu_per_uv_chan] = s->curr_u_mvs; +++#endif +++#ifdef RPI_LUMA_QPU +++ s->y_mvs[s->pass0_job][s->ctu_count / s->ctu_per_y_chan] = s->curr_y_mvs; +++#endif +++ ++ #ifdef RPI ++ if (s->enable_rpi) { +++ //av_assert0(s->num_dblk_cmds[s->pass0_job]>=0); +++ //av_assert0(s->num_dblk_cmds[s->pass0_job]<RPI_MAX_DEBLOCK_CMDS); +++ //av_assert0(s->pass0_job<RPI_MAX_JOBS); +++ //av_assert0(s->pass0_job>=0); ++ s->dblk_cmds[s->pass0_job][s->num_dblk_cmds[s->pass0_job]][0] = x_ctb; ++ s->dblk_cmds[s->pass0_job][s->num_dblk_cmds[s->pass0_job]++][1] = y_ctb; ++- if ( (((y_ctb + ctb_size)&63) == 0) && x_ctb + ctb_size >= s->ps.sps->width) { +++ s->ctu_count++; +++ //printf("%d %d/%d job=%d\n",s->ctu_count,s->num_dblk_cmds[s->pass0_job],RPI_MAX_DEBLOCK_CMDS,s->pass0_job); +++ +++ if ( s->ctu_count >= s->max_ctu_count ) { ++ #ifdef RPI_WORKER ++ if (s->used_for_ref) { ++ // Split work load onto separate threads so we make as rapid progress as possible with this frame ++@@ -3693,7 +3715,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ rpi_execute_inter_cmds(s); ++ #endif ++ #ifdef LAUNCH_PASS0 ++- rpi_execute_inter_qpu(s); +++ rpi_launch_vpu_qpu(s); ++ #endif ++ // Pass on this job to worker thread ++ worker_submit_job(s); ++@@ -3701,9 +3723,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ worker_pass0_ready(s); ++ ++ // Prepare the next batch of commands ++-#ifdef RPI_INTER_QPU ++- rpi_inter_clear(s); ++-#endif +++ rpi_begin(s); ++ } else { ++ // Non-ref frame so do it all on this thread ++ rpi_do_all_passes(s); ++@@ -3744,7 +3764,7 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ #endif ++ ++ // Finish off any half-completed rows ++- if (s->enable_rpi && s->num_dblk_cmds[s->pass0_job]) { +++ if (s->enable_rpi && s->ctu_count) { ++ rpi_do_all_passes(s); ++ } ++ ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 3cb34bd..a141316 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -823,8 +823,15 @@ typedef struct HEVCLocalContext { ++ ++ #ifdef RPI ++ +++// The processing is done in chunks +++// Each chunk corresponds to 24 64x64 luma blocks (24 so it is divisible by 8 for chroma and 12 for luma) +++// This is a distance of 1536 pixels across the screen +++// Increasing RPI_NUM_CHUNKS will reduce time spent activating QPUs and cache flushing, +++// but allocate more memory and increase the latency before data in the next frame can be processed +++#define RPI_NUM_CHUNKS 1 +++ ++ // RPI_MAX_WIDTH is maximum width in pixels supported by the accelerated code ++-#define RPI_MAX_WIDTH 2048 +++#define RPI_MAX_WIDTH (RPI_NUM_CHUNKS*64*24) ++ ++ // Worst case is for 4:4:4 4x4 blocks with 64 high coding tree blocks, so 16 MV cmds per 4 pixels across for each colour plane, * 2 for bi ++ #define RPI_MAX_MV_CMDS (2*16*3*(RPI_MAX_WIDTH/4)) ++@@ -888,9 +895,6 @@ typedef struct HEVCPredCmd { ++ #endif ++ ++ typedef struct HEVCContext { ++-#ifdef RPI ++- int dblk_cmds[RPI_MAX_JOBS][RPI_MAX_DEBLOCK_CMDS][2]; ++-#endif ++ const AVClass *c; // needed by private avoptions ++ AVCodecContext *avctx; ++ ++@@ -928,6 +932,10 @@ typedef struct HEVCContext { ++ int pass0_job; // Pass0 does coefficient decode ++ int pass1_job; // Pass1 does pixel processing ++ int pass2_job; // Pass2 does reconstruction and deblocking +++ int ctu_count; // Number of CTUs done in pass0 so far +++ int max_ctu_count; // Number of CTUs when we trigger a round of processing +++ int ctu_per_y_chan; // Number of CTUs per luma QPU +++ int ctu_per_uv_chan; // Number of CTUs per chroma QPU ++ #ifdef RPI_INTER_QPU ++ GPU_MEM_PTR_T unif_mvs_ptr[RPI_MAX_JOBS]; ++ uint32_t *unif_mvs[RPI_MAX_JOBS]; // Base of memory for motion vector commands ++@@ -936,6 +944,7 @@ typedef struct HEVCContext { ++ uint32_t *mvs_base[RPI_MAX_JOBS][8]; ++ // these pointers are to the next free space ++ uint32_t *u_mvs[RPI_MAX_JOBS][8]; +++ uint32_t *curr_u_mvs; // Current uniform stream to use for chroma ++ // Function pointers ++ uint32_t mc_filter_uv; ++ uint32_t mc_filter_uv_b0; ++@@ -946,6 +955,7 @@ typedef struct HEVCContext { ++ uint32_t *y_unif_mvs[RPI_MAX_JOBS]; // Base of memory for motion vector commands ++ uint32_t *y_mvs_base[RPI_MAX_JOBS][12]; ++ uint32_t *y_mvs[RPI_MAX_JOBS][12]; +++ uint32_t *curr_y_mvs; // Current uniform stream for luma ++ // Function pointers ++ uint32_t mc_filter; ++ uint32_t mc_filter_b; ++@@ -1084,6 +1094,9 @@ typedef struct HEVCContext { ++ uint32_t max_mastering_luminance; ++ uint32_t min_mastering_luminance; ++ +++#ifdef RPI +++ int dblk_cmds[RPI_MAX_JOBS][RPI_MAX_DEBLOCK_CMDS][2]; +++#endif ++ } HEVCContext; ++ ++ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx, ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index b286bbf..1f04790 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -891,7 +891,7 @@ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ int n_uv = n >> s->ps.sps->vshift[1]; ++ int sz,base; ++ if (curr_uv < 0) curr_uv = 0; ++- if (n_uv<=curr_uv) { assert(0); return; } // Should not happen +++ if (n_uv<=curr_uv) { return; } ++ sz = s->frame->linesize[1] * (n_uv-curr_uv); ++ base = s->frame->linesize[1] * curr_uv; ++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(s->frame->buf[1]); ++diff --git a/libavcodec/hevcpred_template.c b/libavcodec/hevcpred_template.c ++index 325b60e..28d2653 100644 ++--- a/libavcodec/hevcpred_template.c +++++ b/libavcodec/hevcpred_template.c ++@@ -72,7 +72,7 @@ do { \ ++ else \ ++ a = PIXEL_SPLAT_X4(ptr[i + 3]) ++ #ifdef RPI_WORKER ++- HEVCLocalContextIntra *lc = s->enable_rpi ? &s->HEVClcIntra : (HEVCLocalContextIntra *)s->HEVClc ; +++ HEVCLocalContextIntra *lc = (s->enable_rpi) ? &s->HEVClcIntra : (HEVCLocalContextIntra *)s->HEVClc ; ++ #else ++ HEVCLocalContext *lc = s->HEVClc; ++ #endif ++-- ++2.7.4 ++ ++ ++From 1674a80d147e5342ef6ea9a4fb4ddfc640c15a05 Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Thu, 4 Jun 2015 15:48:10 +0100 ++Subject: [PATCH 59/68] Move deblocker into second pass ++ ++--- ++ libavcodec/hevc.c | 79 +++++++++++++++++++++++++++++++++++++++++++++---------- ++ 1 file changed, 65 insertions(+), 14 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index ec67252..6cecbdd 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -67,6 +67,8 @@ ++ static void rpi_execute_pred_cmds(HEVCContext *s); ++ static void rpi_execute_inter_cmds(HEVCContext *s); ++ static void rpi_begin(HEVCContext *s); +++ static void flush_frame(HEVCContext *s,AVFrame *frame); +++ static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2); ++ ++ // Define INTER_PASS0 to do inter prediction in first pass ++ //#define INTER_PASS0 ++@@ -227,6 +229,11 @@ static void *worker_start(void *arg) ++ // Wait for transform completion ++ vpu_wait(s->vpu_id); ++ +++ // Perform intra prediction and residual reconstruction +++ rpi_execute_pred_cmds(s); +++ // Perform deblocking for CTBs in this row +++ rpi_execute_dblk_cmds(s); +++ ++ worker_complete_middle_job(s); ++ LOG_EXIT ++ } ++@@ -248,10 +255,6 @@ static void *worker_deblock_start(void *arg) ++ break; ++ } ++ LOG_ENTER ++- // Perform intra prediction and residual reconstruction ++- rpi_execute_pred_cmds(s); ++- // Perform deblocking for CTBs in this row ++- rpi_execute_dblk_cmds(s); ++ ++ worker_complete_job(s); ++ LOG_EXIT ++@@ -2983,7 +2986,7 @@ static void hls_decode_neighbour(HEVCContext *s, int x_ctb, int y_ctb, ++ static void rpi_execute_dblk_cmds(HEVCContext *s) ++ { ++ int n; ++- int job = s->pass2_job; +++ int job = s->pass1_job; ++ int ctb_size = 1 << s->ps.sps->log2_ctb_size; ++ int (*p)[2] = s->dblk_cmds[job]; ++ for(n = s->num_dblk_cmds[job]; n>0 ;n--,p++) { ++@@ -3021,7 +3024,7 @@ static void rpi_execute_transform(HEVCContext *s) ++ static void rpi_execute_pred_cmds(HEVCContext *s) ++ { ++ int i; ++- int job = s->pass2_job; +++ int job = s->pass1_job; ++ HEVCPredCmd *cmd = s->univ_pred_cmds[job]; ++ #ifdef RPI_WORKER ++ HEVCLocalContextIntra *lc = &s->HEVClcIntra; ++@@ -3506,11 +3509,10 @@ static void rpi_launch_vpu_qpu(HEVCContext *s) ++ ++ #ifdef RPI_MULTI_MAILBOX ++ #ifdef RPI_CACHE_UNIF_MVS ++- gpu_cache_flush3(&s->coeffs_buf_accelerated[job],&s->y_unif_mvs_ptr[job], &s->unif_mvs_ptr[job]); +++ flush_frame3(s, s->frame,&s->coeffs_buf_accelerated[job],&s->y_unif_mvs_ptr[job], &s->unif_mvs_ptr[job]); ++ #else ++- gpu_cache_flush(&s->coeffs_buf_accelerated[job]); +++ flush_frame3(s, s->frame,&s->coeffs_buf_accelerated[job],NULL,NULL); ++ #endif ++- ++ s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[job][2], s->num_coeffs[job][2] >> 8, s->coeffs_buf_vc[job][3], s->num_coeffs[job][3] >> 10, 0, ++ qpu_get_fn(QPU_MC_SETUP_UV), ++ (uint32_t)(unif_vc+(s->mvs_base[job][0 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), ++@@ -3613,6 +3615,60 @@ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ #endif ++ } ++ +++static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2) +++{ +++#ifdef RPI_FAST_CACHEFLUSH +++ struct vcsm_user_clean_invalid_s iocache = {}; +++ int n = s->ps.sps->height; +++ int curr_y = 0; +++ int curr_uv = 0; +++ int n_uv = n >> s->ps.sps->vshift[1]; +++ int sz,base; +++ sz = s->frame->linesize[1] * (n_uv-curr_uv); +++ base = s->frame->linesize[1] * curr_uv; +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); +++ iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].cmd = 3; // clean+invalidate +++ iocache.s[0].addr = p->arm + base; +++ iocache.s[0].size = sz; +++ p = av_buffer_pool_opaque(frame->buf[2]); +++ iocache.s[1].handle = p->vcsm_handle; +++ iocache.s[1].cmd = 3; // clean+invalidate +++ iocache.s[1].addr = p->arm + base; +++ iocache.s[1].size = sz; +++ p = av_buffer_pool_opaque(frame->buf[0]); +++ sz = s->frame->linesize[0] * (n-curr_y); +++ base = s->frame->linesize[0] * curr_y; +++ iocache.s[2].handle = p->vcsm_handle; +++ iocache.s[2].cmd = 3; // clean+invalidate +++ iocache.s[2].addr = p->arm + base; +++ iocache.s[2].size = sz; +++ +++ iocache.s[3].handle = p0->vcsm_handle; +++ iocache.s[3].cmd = 3; // clean+invalidate +++ iocache.s[3].addr = (int) p0->arm; +++ iocache.s[3].size = p0->numbytes; +++ if (p1) { +++ iocache.s[4].handle = p1->vcsm_handle; +++ iocache.s[4].cmd = 3; // clean+invalidate +++ iocache.s[4].addr = (int) p1->arm; +++ iocache.s[4].size = p1->numbytes; +++ } +++ if (p2) { +++ iocache.s[5].handle = p2->vcsm_handle; +++ iocache.s[5].cmd = 3; // clean+invalidate +++ iocache.s[5].addr = (int) p2->arm; +++ iocache.s[5].size = p2->numbytes; +++ } +++ vcsm_clean_invalid( &iocache ); +++#else +++ flush_buffer(frame->buf[0]); +++ flush_buffer(frame->buf[1]); +++ flush_buffer(frame->buf[2]); +++ gpu_cache_flush3(p0, p1, p2); +++#endif +++} +++ ++ #endif ++ ++ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++@@ -4127,11 +4183,6 @@ static int hevc_frame_start(HEVCContext *s) ++ if (!s->avctx->hwaccel) ++ ff_thread_finish_setup(s->avctx); ++ ++-#ifdef RPI_INTER_QPU ++- // Invalidate the output data buffer so it is ready for the QPUs to write into it. ++- flush_frame(s,s->frame); ++-#endif ++- ++ return 0; ++ ++ fail: ++-- ++2.7.4 ++ ++ ++From a453fe438c4ab311d6476955d0a40a5d2ed8a1c6 Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Thu, 4 Jun 2015 16:10:23 +0100 ++Subject: [PATCH 60/68] Change order of ctu accesses to improve qpu performance ++ ++--- ++ libavcodec/hevc.c | 8 ++++---- ++ 1 file changed, 4 insertions(+), 4 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 6cecbdd..ec17e64 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -3737,19 +3737,19 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ s->filter_slice_edges[ctb_addr_rs] = s->sh.slice_loop_filter_across_slices_enabled_flag; ++ ++ #ifdef RPI_INTER_QPU ++- s->curr_u_mvs = s->u_mvs[s->pass0_job][s->ctu_count / s->ctu_per_uv_chan]; +++ s->curr_u_mvs = s->u_mvs[s->pass0_job][s->ctu_count % 8]; ++ #endif ++ #ifdef RPI_LUMA_QPU ++- s->curr_y_mvs = s->y_mvs[s->pass0_job][s->ctu_count / s->ctu_per_y_chan]; +++ s->curr_y_mvs = s->y_mvs[s->pass0_job][s->ctu_count % 12]; ++ #endif ++ ++ more_data = hls_coding_quadtree(s, x_ctb, y_ctb, s->ps.sps->log2_ctb_size, 0); ++ ++ #ifdef RPI_INTER_QPU ++- s->u_mvs[s->pass0_job][s->ctu_count / s->ctu_per_uv_chan] = s->curr_u_mvs; +++ s->u_mvs[s->pass0_job][s->ctu_count % 8]= s->curr_u_mvs; ++ #endif ++ #ifdef RPI_LUMA_QPU ++- s->y_mvs[s->pass0_job][s->ctu_count / s->ctu_per_y_chan] = s->curr_y_mvs; +++ s->y_mvs[s->pass0_job][s->ctu_count % 12] = s->curr_y_mvs; ++ #endif ++ ++ #ifdef RPI ++-- ++2.7.4 ++ ++ ++From 504de0435e8f660c1b7b2d6ec053dc922a2d2896 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Mon, 8 Jun 2015 09:36:59 +0100 ++Subject: [PATCH 61/68] Removed deblocker thread ++ ++--- ++ libavcodec/hevc.c | 77 +++---------------------------------------------------- ++ libavcodec/hevc.h | 4 --- ++ 2 files changed, 4 insertions(+), 77 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index ec17e64..1868532 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -70,11 +70,6 @@ ++ static void flush_frame(HEVCContext *s,AVFrame *frame); ++ static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2); ++ ++- // Define INTER_PASS0 to do inter prediction in first pass ++- //#define INTER_PASS0 ++- // Define LAUNCH_PASS0 to launch QPU/VPU from pass0 ++- //#define LAUNCH_PASS0 ++- ++ #endif ++ ++ // #define DISABLE_MC ++@@ -147,24 +142,12 @@ static void worker_submit_job(HEVCContext *s) ++ } ++ ++ // Call this to say we have completed pass1 ++-static void worker_complete_middle_job(HEVCContext *s) ++-{ ++- LOG_ENTER ++- pthread_mutex_lock(&s->worker_mutex); ++- s->worker_middle++; ++- s->pass1_job = (s->pass1_job + 1) % RPI_MAX_JOBS; // Move onto the next slot ++- pthread_cond_broadcast(&s->worker_cond_middle); // Let people know that the middle has moved ++- pthread_mutex_unlock(&s->worker_mutex); ++- LOG_EXIT ++-} ++- ++-// Call this to say we have completed pass2 ++ static void worker_complete_job(HEVCContext *s) ++ { ++ LOG_ENTER ++ pthread_mutex_lock(&s->worker_mutex); ++ s->worker_head++; ++- s->pass2_job = (s->pass2_job + 1) % RPI_MAX_JOBS; // Move onto the next slot +++ s->pass1_job = (s->pass1_job + 1) % RPI_MAX_JOBS; // Move onto the next slot ++ pthread_cond_broadcast(&s->worker_cond_head); // Let people know that the head has moved ++ pthread_mutex_unlock(&s->worker_mutex); ++ LOG_EXIT ++@@ -208,7 +191,7 @@ static void *worker_start(void *arg) ++ while(1) { ++ pthread_mutex_lock(&s->worker_mutex); ++ ++- while( !s->kill_worker && s->worker_tail - s->worker_middle <= 0) +++ while( !s->kill_worker && s->worker_tail - s->worker_head <= 0) ++ { ++ pthread_cond_wait(&s->worker_cond_tail, &s->worker_mutex); ++ } ++@@ -219,13 +202,9 @@ static void *worker_start(void *arg) ++ } ++ LOG_ENTER ++ // printf("%d %d %d : %d %d %d %d\n",s->poc, x_ctb, y_ctb, s->num_pred_cmds,s->num_mv_cmds,s->num_coeffs[2] >> 8,s->num_coeffs[3] >> 10); ++-#ifndef LAUNCH_PASS0 ++ rpi_launch_vpu_qpu(s); ++-#endif ++-#ifndef INTER_PASS0 ++ // Perform inter prediction ++ rpi_execute_inter_cmds(s); ++-#endif ++ // Wait for transform completion ++ vpu_wait(s->vpu_id); ++ ++@@ -234,28 +213,6 @@ static void *worker_start(void *arg) ++ // Perform deblocking for CTBs in this row ++ rpi_execute_dblk_cmds(s); ++ ++- worker_complete_middle_job(s); ++- LOG_EXIT ++- } ++- return NULL; ++-} ++- ++-static void *worker_deblock_start(void *arg) ++-{ ++- HEVCContext *s = (HEVCContext *)arg; ++- while(1) { ++- pthread_mutex_lock(&s->worker_mutex); ++- while( !s->kill_worker && s->worker_middle - s->worker_head <= 0) ++- { ++- pthread_cond_wait(&s->worker_cond_middle, &s->worker_mutex); ++- } ++- pthread_mutex_unlock(&s->worker_mutex); ++- ++- if (s->kill_worker) { ++- break; ++- } ++- LOG_ENTER ++- ++ worker_complete_job(s); ++ LOG_EXIT ++ } ++@@ -2998,11 +2955,7 @@ static void rpi_execute_dblk_cmds(HEVCContext *s) ++ static void rpi_execute_transform(HEVCContext *s) ++ { ++ int i=2; ++-#ifdef LAUNCH_PASS0 ++- int job = s->pass0_job; ++-#else ++ int job = s->pass1_job; ++-#endif ++ //int j; ++ //int16_t *coeffs = s->coeffs_buf_arm[i]; ++ //for(j=s->num_coeffs[i]; j > 0; j-= 16*16, coeffs+=16*16) { ++@@ -3057,11 +3010,7 @@ static void rpi_execute_pred_cmds(HEVCContext *s) ++ ++ static void rpi_execute_inter_cmds(HEVCContext *s) ++ { ++-#ifdef INTER_PASS0 ++- int job = s->pass0_job; ++-#else ++ int job = s->pass1_job; ++-#endif ++ HEVCMvCmd *cmd = s->unif_mv_cmds[job]; ++ int n,cidx; ++ AVFrame myref; ++@@ -3467,11 +3416,7 @@ static void rpi_simulate_inter_qpu(HEVCContext *s) ++ static void rpi_launch_vpu_qpu(HEVCContext *s) ++ { ++ int k; ++-#ifdef LAUNCH_PASS0 ++- int job = s->pass0_job; ++-#else ++ int job = s->pass1_job; ++-#endif ++ int i; ++ uint32_t *unif_vc = (uint32_t *)s->unif_mvs_ptr[job].vc; ++ #ifdef RPI_LUMA_QPU ++@@ -3574,10 +3519,12 @@ static void rpi_launch_vpu_qpu(HEVCContext *s) ++ ++ #ifdef RPI ++ +++#ifndef RPI_FAST_CACHEFLUSH ++ static void flush_buffer(AVBufferRef *bref) { ++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++ gpu_cache_flush(p); ++ } +++#endif ++ ++ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ { ++@@ -3715,7 +3662,6 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ #ifdef RPI_WORKER ++ s->pass0_job = 0; ++ s->pass1_job = 0; ++- s->pass2_job = 0; ++ #endif ++ #ifdef RPI ++ rpi_begin(s); ++@@ -3767,12 +3713,6 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ #ifdef RPI_WORKER ++ if (s->used_for_ref) { ++ // Split work load onto separate threads so we make as rapid progress as possible with this frame ++- #ifdef INTER_PASS0 ++- rpi_execute_inter_cmds(s); ++- #endif ++- #ifdef LAUNCH_PASS0 ++- rpi_launch_vpu_qpu(s); ++- #endif ++ // Pass on this job to worker thread ++ worker_submit_job(s); ++ // Make sure we have space to prepare the next job ++@@ -3814,8 +3754,6 @@ static int hls_decode_entry(AVCodecContext *avctxt, void *isFilterThread) ++ // Wait for the worker to finish all its jobs ++ if (s->enable_rpi) { ++ worker_wait(s); ++- av_assert0(s->pass0_job==s->pass1_job); ++- av_assert0(s->pass1_job==s->pass2_job); ++ } ++ #endif ++ ++@@ -4565,16 +4503,13 @@ static av_cold void hevc_init_worker(HEVCContext *s) ++ { ++ int err; ++ pthread_cond_init(&s->worker_cond_head, NULL); ++- pthread_cond_init(&s->worker_cond_middle, NULL); ++ pthread_cond_init(&s->worker_cond_tail, NULL); ++ pthread_mutex_init(&s->worker_mutex, NULL); ++ ++ s->worker_tail=0; ++- s->worker_middle=0; ++ s->worker_head=0; ++ s->kill_worker=0; ++ err = pthread_create(&s->worker_thread, NULL, worker_start, s); ++- err = pthread_create(&s->worker_deblock_thread, NULL, worker_deblock_start, s); ++ if (err) { ++ printf("Failed to create worker thread\n"); ++ exit(-1); ++@@ -4586,17 +4521,13 @@ static av_cold void hevc_exit_worker(HEVCContext *s) ++ void *res; ++ s->kill_worker=1; ++ pthread_cond_broadcast(&s->worker_cond_tail); ++- pthread_cond_broadcast(&s->worker_cond_middle); ++ pthread_join(s->worker_thread, &res); ++- pthread_join(s->worker_deblock_thread, &res); ++ ++ pthread_cond_destroy(&s->worker_cond_head); ++- pthread_cond_destroy(&s->worker_cond_middle); ++ pthread_cond_destroy(&s->worker_cond_tail); ++ pthread_mutex_destroy(&s->worker_mutex); ++ ++ s->worker_tail=0; ++- s->worker_middle=0; ++ s->worker_head=0; ++ s->kill_worker=0; ++ } ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index a141316..ef5bfb1 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -931,7 +931,6 @@ typedef struct HEVCContext { ++ //GPU_MEM_PTR_T dummy; ++ int pass0_job; // Pass0 does coefficient decode ++ int pass1_job; // Pass1 does pixel processing ++- int pass2_job; // Pass2 does reconstruction and deblocking ++ int ctu_count; // Number of CTUs done in pass0 so far ++ int max_ctu_count; // Number of CTUs when we trigger a round of processing ++ int ctu_per_y_chan; // Number of CTUs per luma QPU ++@@ -963,15 +962,12 @@ typedef struct HEVCContext { ++ ++ #ifdef RPI_WORKER ++ pthread_t worker_thread; ++- pthread_t worker_deblock_thread; ++ pthread_cond_t worker_cond_head; ++ pthread_cond_t worker_cond_tail; ++- pthread_cond_t worker_cond_middle; ++ pthread_mutex_t worker_mutex; ++ ++ int worker_tail; // Contains the number of posted jobs ++ int worker_head; // Contains the number of completed jobs ++- int worker_middle; // Contains the number of completed jobs ++ int kill_worker; // set to 1 to terminate the worker ++ #endif ++ ++-- ++2.7.4 ++ ++ ++From 74892301cdb0829de959b798debac6ffe1c71603 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Mon, 8 Jun 2015 11:04:43 +0100 ++Subject: [PATCH 62/68] Reduced amount of output frame that is invalidated ++ ++--- ++ libavcodec/hevc.c | 45 +++++++++++++++++++++++++++++---------------- ++ 1 file changed, 29 insertions(+), 16 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 1868532..cbb4f46 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -68,7 +68,7 @@ ++ static void rpi_execute_inter_cmds(HEVCContext *s); ++ static void rpi_begin(HEVCContext *s); ++ static void flush_frame(HEVCContext *s,AVFrame *frame); ++- static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2); +++ static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2, int job); ++ ++ #endif ++ ++@@ -3454,9 +3454,9 @@ static void rpi_launch_vpu_qpu(HEVCContext *s) ++ ++ #ifdef RPI_MULTI_MAILBOX ++ #ifdef RPI_CACHE_UNIF_MVS ++- flush_frame3(s, s->frame,&s->coeffs_buf_accelerated[job],&s->y_unif_mvs_ptr[job], &s->unif_mvs_ptr[job]); +++ flush_frame3(s, s->frame,&s->coeffs_buf_accelerated[job],&s->y_unif_mvs_ptr[job], &s->unif_mvs_ptr[job], job); ++ #else ++- flush_frame3(s, s->frame,&s->coeffs_buf_accelerated[job],NULL,NULL); +++ flush_frame3(s, s->frame,&s->coeffs_buf_accelerated[job],NULL,NULL, job); ++ #endif ++ s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[job][2], s->num_coeffs[job][2] >> 8, s->coeffs_buf_vc[job][3], s->num_coeffs[job][3] >> 10, 0, ++ qpu_get_fn(QPU_MC_SETUP_UV), ++@@ -3530,6 +3530,7 @@ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ { ++ #ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); ++ int n = s->ps.sps->height; ++ int curr_y = 0; ++ int curr_uv = 0; ++@@ -3537,22 +3538,21 @@ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ int sz,base; ++ sz = s->frame->linesize[1] * (n_uv-curr_uv); ++ base = s->frame->linesize[1] * curr_uv; ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); ++ iocache.s[0].handle = p->vcsm_handle; ++ iocache.s[0].cmd = 3; // clean+invalidate ++- iocache.s[0].addr = p->arm + base; +++ iocache.s[0].addr = (int)(p->arm) + base; ++ iocache.s[0].size = sz; ++ p = av_buffer_pool_opaque(frame->buf[2]); ++ iocache.s[1].handle = p->vcsm_handle; ++ iocache.s[1].cmd = 3; // clean+invalidate ++- iocache.s[1].addr = p->arm + base; +++ iocache.s[1].addr = (int)(p->arm) + base; ++ iocache.s[1].size = sz; ++ p = av_buffer_pool_opaque(frame->buf[0]); ++ sz = s->frame->linesize[0] * (n-curr_y); ++ base = s->frame->linesize[0] * curr_y; ++ iocache.s[2].handle = p->vcsm_handle; ++ iocache.s[2].cmd = 3; // clean+invalidate ++- iocache.s[2].addr = p->arm + base; +++ iocache.s[2].addr = (int)(p->arm) + base; ++ iocache.s[2].size = sz; ++ vcsm_clean_invalid( &iocache ); ++ #else ++@@ -3562,33 +3562,46 @@ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ #endif ++ } ++ ++-static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2) +++static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2, int job) ++ { ++ #ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; ++- int n = s->ps.sps->height; ++- int curr_y = 0; ++- int curr_uv = 0; ++- int n_uv = n >> s->ps.sps->vshift[1]; +++ int n; +++ int curr_y; +++ int curr_uv; +++ int n_uv; +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); ++ int sz,base; +++ int (*d)[2] = s->dblk_cmds[job]; +++ int low=(*d)[1]; +++ int high=(*d)[1]; +++ for(n = s->num_dblk_cmds[job]; n>0 ;n--,d++) { +++ int y = (*d)[1]; +++ low=FFMIN(low,y); +++ high=FFMAX(high,y); +++ } +++ curr_y = low; +++ n = high+(1 << s->ps.sps->log2_ctb_size); +++ curr_uv = curr_y >> s->ps.sps->vshift[1]; +++ n_uv = n >> s->ps.sps->vshift[1]; +++ ++ sz = s->frame->linesize[1] * (n_uv-curr_uv); ++ base = s->frame->linesize[1] * curr_uv; ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); ++ iocache.s[0].handle = p->vcsm_handle; ++ iocache.s[0].cmd = 3; // clean+invalidate ++- iocache.s[0].addr = p->arm + base; +++ iocache.s[0].addr = (int)(p->arm) + base; ++ iocache.s[0].size = sz; ++ p = av_buffer_pool_opaque(frame->buf[2]); ++ iocache.s[1].handle = p->vcsm_handle; ++ iocache.s[1].cmd = 3; // clean+invalidate ++- iocache.s[1].addr = p->arm + base; +++ iocache.s[1].addr = (int)(p->arm) + base; ++ iocache.s[1].size = sz; ++ p = av_buffer_pool_opaque(frame->buf[0]); ++ sz = s->frame->linesize[0] * (n-curr_y); ++ base = s->frame->linesize[0] * curr_y; ++ iocache.s[2].handle = p->vcsm_handle; ++ iocache.s[2].cmd = 3; // clean+invalidate ++- iocache.s[2].addr = p->arm + base; +++ iocache.s[2].addr = (int)(p->arm) + base; ++ iocache.s[2].size = sz; ++ ++ iocache.s[3].handle = p0->vcsm_handle; ++-- ++2.7.4 ++ ++ ++From 090b6be5b501bd3c547700926e540397f0b39e69 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Mon, 8 Jun 2015 11:55:29 +0100 ++Subject: [PATCH 63/68] Packed 16x16 and 32x32 into the same buffer ++ ++--- ++ libavcodec/hevc.c | 24 +++++++++++++++--------- ++ libavcodec/hevc_cabac.c | 9 ++++++++- ++ libavcodec/rpi_qpu.c | 2 +- ++ 3 files changed, 24 insertions(+), 11 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index cbb4f46..a596534 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -299,12 +299,12 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ s->coeffs_buf_arm[job][0] = (int16_t*) s->coeffs_buf_default[job].arm; ++ if (!s->coeffs_buf_arm[job][0]) ++ goto fail; ++- gpu_malloc_cached(sizeof(int16_t) * coefs_per_row * 2, &s->coeffs_buf_accelerated[job]); +++ gpu_malloc_cached(sizeof(int16_t) * (coefs_per_row + 32*32), &s->coeffs_buf_accelerated[job]); // We prefetch past the end so provide an extra blocks worth of data ++ s->coeffs_buf_arm[job][2] = (int16_t*) s->coeffs_buf_accelerated[job].arm; ++ s->coeffs_buf_vc[job][2] = s->coeffs_buf_accelerated[job].vc; ++ if (!s->coeffs_buf_arm[job][2]) ++ goto fail; ++- s->coeffs_buf_arm[job][3] = coefs_per_row + s->coeffs_buf_arm[job][2]; +++ s->coeffs_buf_arm[job][3] = coefs_per_row + s->coeffs_buf_arm[job][2]; // This points to just beyond the end of the buffer. Coefficients fill in backwards. ++ s->coeffs_buf_vc[job][3] = sizeof(int16_t) * coefs_per_row + s->coeffs_buf_vc[job][2]; ++ } ++ } ++@@ -2956,15 +2956,20 @@ static void rpi_execute_transform(HEVCContext *s) ++ { ++ int i=2; ++ int job = s->pass1_job; ++- //int j; ++- //int16_t *coeffs = s->coeffs_buf_arm[i]; ++- //for(j=s->num_coeffs[i]; j > 0; j-= 16*16, coeffs+=16*16) { ++- // s->hevcdsp.idct[4-2](coeffs, 16); ++- //} +++ /*int j; +++ int16_t *coeffs = s->coeffs_buf_arm[job][i]; +++ for(j=s->num_coeffs[job][i]; j > 0; j-= 16*16, coeffs+=16*16) { +++ s->hevcdsp.idct[4-2](coeffs, 16); +++ } +++ i=3; +++ coeffs = s->coeffs_buf_arm[job][i] - s->num_coeffs[job][i]; +++ for(j=s->num_coeffs[job][i]; j > 0; j-= 32*32, coeffs+=32*32) { +++ s->hevcdsp.idct[5-2](coeffs, 32); +++ }*/ ++ ++ gpu_cache_flush(&s->coeffs_buf_accelerated[job]); ++ s->vpu_id = vpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[job][2], ++- s->num_coeffs[job][2] >> 8, s->coeffs_buf_vc[job][3], +++ s->num_coeffs[job][2] >> 8, s->coeffs_buf_vc[job][3] - sizeof(int16_t) * s->num_coeffs[job][3], ++ s->num_coeffs[job][3] >> 10, 0, &s->coeffs_buf_accelerated[job]); ++ //vpu_execute_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[2], s->num_coeffs[2] >> 8, s->coeffs_buf_vc[3], s->num_coeffs[3] >> 10, 0); ++ //gpu_cache_flush(&s->coeffs_buf_accelerated); ++@@ -3458,7 +3463,8 @@ static void rpi_launch_vpu_qpu(HEVCContext *s) ++ #else ++ flush_frame3(s, s->frame,&s->coeffs_buf_accelerated[job],NULL,NULL, job); ++ #endif ++- s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[job][2], s->num_coeffs[job][2] >> 8, s->coeffs_buf_vc[job][3], s->num_coeffs[job][3] >> 10, 0, +++ s->vpu_id = vpu_qpu_post_code( vpu_get_fn(), vpu_get_constants(), s->coeffs_buf_vc[job][2], s->num_coeffs[job][2] >> 8, +++ s->coeffs_buf_vc[job][3] - sizeof(int16_t) * s->num_coeffs[job][3], s->num_coeffs[job][3] >> 10, 0, ++ qpu_get_fn(QPU_MC_SETUP_UV), ++ (uint32_t)(unif_vc+(s->mvs_base[job][0 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), ++ (uint32_t)(unif_vc+(s->mvs_base[job][1 ] - (uint32_t*)s->unif_mvs_ptr[job].arm)), ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index 6523e66..8656917 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -1051,7 +1051,14 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ if (s->enable_rpi) { ++ int n = trafo_size * trafo_size; ++ if (use_vpu) { ++- coeffs = s->coeffs_buf_arm[s->pass0_job][log2_trafo_size - 2] + s->num_coeffs[s->pass0_job][log2_trafo_size - 2]; +++ // We support size 4 and size 5. +++ // Size 4 grows from the front (Coeffs_buf_arm[2] points to start of buf) +++ // Size 5 grows from the back (Coeffs_buf_arm[3] points to end of buf) +++ // num_coeffs is indexed by log2_trafo_size-2 +++ if (log2_trafo_size == 4) +++ coeffs = s->coeffs_buf_arm[s->pass0_job][log2_trafo_size - 2] + s->num_coeffs[s->pass0_job][log2_trafo_size - 2]; +++ else +++ coeffs = s->coeffs_buf_arm[s->pass0_job][log2_trafo_size - 2] - s->num_coeffs[s->pass0_job][log2_trafo_size - 2] - n; ++ s->num_coeffs[s->pass0_job][log2_trafo_size - 2] += n; ++ } else { ++ coeffs = s->coeffs_buf_arm[s->pass0_job][0] + s->num_coeffs[s->pass0_job][0]; ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 4480f72..0121fca 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -5,7 +5,7 @@ ++ // define RPI_TIME_TOTAL_VPU to print out how much time is spent in the VPI code ++ //#define RPI_TIME_TOTAL_VPU ++ // define RPI_TIME_TOTAL_POSTED to print out how much time is spent in the multi execute QPU/VPU combined ++-//#define RPI_TIME_TOTAL_POSTED +++#define RPI_TIME_TOTAL_POSTED ++ // define RPI_ASYNC to run the VPU in a separate thread, need to make a separate call to check for completion ++ #define RPI_ASYNC ++ ++-- ++2.7.4 ++ ++ ++From ed359bbce56817bf9db0e54701103bd0505c353b Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Thu, 25 Jun 2015 09:02:47 +0100 ++Subject: [PATCH 64/68] Moved luma deblock to VPU ++ ++--- ++ libavcodec/hevc.c | 18 +- ++ libavcodec/hevc.h | 11 + ++ libavcodec/hevc_filter.c | 120 ++- ++ libavcodec/rpi_hevc_transform.h | 1802 ++++++++++++++++++++++++++++++++++++++- ++ libavcodec/rpi_hevc_transform.s | 426 +++++++++ ++ libavcodec/rpi_qpu.c | 12 +- ++ libavcodec/rpi_shader.c | 2 +- ++ 7 files changed, 2378 insertions(+), 13 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index a596534..4ce94a7 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -246,6 +246,12 @@ static void pic_arrays_free(HEVCContext *s) ++ } ++ } ++ #endif +++#ifdef RPI_DEBLOCK_VPU +++ if (s->y_setup_arm) { +++ gpu_free(&s->y_setup_ptr); +++ s->y_setup_arm = 0; +++ } +++#endif ++ av_freep(&s->sao); ++ av_freep(&s->deblock); ++ ++@@ -283,12 +289,12 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ int min_pu_size = sps->min_pu_width * sps->min_pu_height; ++ ++ #ifdef RPI ++- av_assert0(sps); ++ int coefs_in_ctb = (1 << sps->log2_ctb_size) * (1 << sps->log2_ctb_size); ++ int coefs_per_luma = 64*64*24*RPI_NUM_CHUNKS; ++ int coefs_per_chroma = (coefs_per_luma * 2) >> sps->vshift[1] >> sps->hshift[1]; ++ int coefs_per_row = coefs_per_luma + coefs_per_chroma; ++ int job; +++ av_assert0(sps); ++ s->max_ctu_count = coefs_per_luma / coefs_in_ctb; ++ s->ctu_per_y_chan = s->max_ctu_count / 12; ++ s->ctu_per_uv_chan = s->max_ctu_count / 8; ++@@ -309,6 +315,16 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ } ++ } ++ #endif +++#ifdef RPI_DEBLOCK_VPU +++ s->enable_rpi_deblock = !sps->sao_enabled; +++ s->setup_width = (sps->width+15) / 16; +++ s->setup_height = (sps->height+15) / 16; +++ gpu_malloc_uncached(sizeof(*s->y_setup_arm) * s->setup_width * s->setup_height, &s->y_setup_ptr); // TODO make this cached +++ s->y_setup_arm = (void*)s->y_setup_ptr.arm; +++ s->y_setup_vc = (void*)s->y_setup_ptr.vc; +++ memset(s->y_setup_arm, 0, s->y_setup_ptr.numbytes); +++ printf("Setup %d by %d by %d\n",s->setup_width,s->setup_height,sizeof(*s->y_setup_arm)); +++#endif ++ ++ s->bs_width = (width >> 2) + 1; ++ s->bs_height = (height >> 2) + 1; ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index ef5bfb1..cf08489 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -57,6 +57,8 @@ ++ // Define RPI_WORKER to launch a worker thread for pixel processing tasks ++ #define RPI_WORKER ++ +++ #define RPI_DEBLOCK_VPU +++ ++ #endif ++ ++ #define MAX_DPB_SIZE 16 // A.4.1 ++@@ -971,6 +973,15 @@ typedef struct HEVCContext { ++ int kill_worker; // set to 1 to terminate the worker ++ #endif ++ +++#ifdef RPI_DEBLOCK_VPU +++ int enable_rpi_deblock; +++ GPU_MEM_PTR_T y_setup_ptr; +++ uint8_t (*y_setup_arm)[2][2][2][4]; +++ uint8_t (*y_setup_vc)[2][2][2][4]; +++ int setup_width; // Number of 16x16 blocks across the image +++ int setup_height; // Number of 16x16 blocks down the image +++#endif +++ ++ #endif ++ ++ uint8_t *cabac_state; ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 1f04790..06371da 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -564,6 +564,19 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) ++ s->frame->linesize[LUMA], ++ beta, tc, no_p, no_q); ++ } else +++#ifdef RPI_DEBLOCK_VPU +++ if (s->enable_rpi_deblock) { +++ uint8_t (*setup)[2][2][4]; +++ int num16 = (y>>4)*s->setup_width + (x>>4); +++ int a = ((y>>3) & 1) << 1; +++ int b = (x>>3) & 1; +++ setup = s->y_setup_arm[num16]; +++ setup[0][b][0][a] = beta; +++ setup[0][b][0][a + 1] = beta; +++ setup[0][b][1][a] = tc[0]; +++ setup[0][b][1][a + 1] = tc[1]; +++ } else +++#endif ++ s->hevcdsp.hevc_v_loop_filter_luma(src, ++ s->frame->linesize[LUMA], ++ beta, tc, no_p, no_q); ++@@ -596,6 +609,19 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) ++ s->frame->linesize[LUMA], ++ beta, tc, no_p, no_q); ++ } else +++#ifdef RPI_DEBLOCK_VPU +++ if (s->enable_rpi_deblock) { +++ uint8_t (*setup)[2][2][4]; +++ int num16 = (y>>4)*s->setup_width + (x>>4); +++ int a = ((x>>3) & 1) << 1; +++ int b = (y>>3) & 1; +++ setup = s->y_setup_arm[num16]; +++ setup[1][b][0][a] = beta; +++ setup[1][b][0][a + 1] = beta; +++ setup[1][b][1][a] = tc[0]; +++ setup[1][b][1][a + 1] = tc[1]; +++ } else +++#endif ++ s->hevcdsp.hevc_h_loop_filter_luma(src, ++ s->frame->linesize[LUMA], ++ beta, tc, no_p, no_q); ++@@ -876,33 +902,85 @@ static void flush_buffer(AVBufferRef *bref) { ++ } ++ ++ // Return Physical address for this image ++-static int ff_hevc_buf_base(AVBufferRef *bref) { +++static uint32_t get_vc_address(AVBufferRef *bref) { ++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++- return p->vc & 0x3fffffff; +++ return p->vc; ++ } ++ +++// ff_hevc_flush_buffer_lines +++// flushes and invalidates all pixel rows in [start,end-1] +++static void ff_hevc_flush_buffer_lines(HEVCContext *s, int start, int end, int flush_luma, int flush_chroma) +++{ +++#ifdef RPI_FAST_CACHEFLUSH +++ struct vcsm_user_clean_invalid_s iocache = {}; +++ int curr_y = start; +++ int n = end; +++ int curr_uv = curr_y >> s->ps.sps->vshift[1]; +++ int n_uv = n >> s->ps.sps->vshift[1]; +++ int sz,base; +++ GPU_MEM_PTR_T *p; +++ if (curr_uv < 0) curr_uv = 0; +++ if (n_uv<=curr_uv) { return; } +++ sz = s->frame->linesize[1] * (n_uv-curr_uv); +++ base = s->frame->linesize[1] * curr_uv; +++ if (flush_chroma) { +++ p = av_buffer_pool_opaque(s->frame->buf[1]); +++ iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].cmd = 3; // clean+invalidate +++ iocache.s[0].addr = (int)p->arm + base; +++ iocache.s[0].size = sz; +++ p = av_buffer_pool_opaque(s->frame->buf[2]); +++ iocache.s[1].handle = p->vcsm_handle; +++ iocache.s[1].cmd = 3; // clean+invalidate +++ iocache.s[1].addr = (int)p->arm + base; +++ iocache.s[1].size = sz; +++ } +++ if (flush_luma) { +++ p = av_buffer_pool_opaque(s->frame->buf[0]); +++ sz = s->frame->linesize[0] * (n-curr_y); +++ base = s->frame->linesize[0] * curr_y; +++ iocache.s[2].handle = p->vcsm_handle; +++ iocache.s[2].cmd = 3; // clean+invalidate +++ iocache.s[2].addr = (int)p->arm + base; +++ iocache.s[2].size = sz; +++ } +++ vcsm_clean_invalid( &iocache ); +++#else +++ if (flush_chroma) { +++ flush_buffer(s->frame->buf[1]); +++ flush_buffer(s->frame->buf[2]); +++ } +++ if (flush_luma) { +++ flush_buffer(s->frame->buf[0]); +++ } +++#endif +++} +++ +++ ++ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ { ++ if (s->enable_rpi && s->used_for_ref) { +++ // TODO make this use ff_hevc_flush_buffer_lines ++ #ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; ++ int curr_y = ((int *)f->progress->data)[0]; ++ int curr_uv = curr_y >> s->ps.sps->vshift[1]; ++ int n_uv = n >> s->ps.sps->vshift[1]; ++ int sz,base; +++ GPU_MEM_PTR_T *p; ++ if (curr_uv < 0) curr_uv = 0; ++ if (n_uv<=curr_uv) { return; } ++ sz = s->frame->linesize[1] * (n_uv-curr_uv); ++ base = s->frame->linesize[1] * curr_uv; ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(s->frame->buf[1]); +++ p = av_buffer_pool_opaque(s->frame->buf[1]); ++ iocache.s[0].handle = p->vcsm_handle; ++ iocache.s[0].cmd = 3; // clean+invalidate ++- iocache.s[0].addr = p->arm + base; +++ iocache.s[0].addr = (int)p->arm + base; ++ iocache.s[0].size = sz; ++ p = av_buffer_pool_opaque(s->frame->buf[2]); ++ iocache.s[1].handle = p->vcsm_handle; ++ iocache.s[1].cmd = 3; // clean+invalidate ++- iocache.s[1].addr = p->arm + base; +++ iocache.s[1].addr = (int)p->arm + base; ++ iocache.s[1].size = sz; ++ ++ #ifdef RPI_LUMA_QPU ++@@ -911,7 +989,7 @@ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ base = s->frame->linesize[0] * curr_y; ++ iocache.s[2].handle = p->vcsm_handle; ++ iocache.s[2].cmd = 3; // clean+invalidate ++- iocache.s[2].addr = p->arm + base; +++ iocache.s[2].addr = (int)p->arm + base; ++ iocache.s[2].size = sz; ++ #endif ++ vcsm_clean_invalid( &iocache ); ++@@ -930,11 +1008,40 @@ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ } ++ #endif ++ +++#ifdef RPI_DEBLOCK_VPU +++/* rpi_deblock deblocks an entire row of ctbs using the VPU */ +++static void rpi_deblock(HEVCContext *s, int y, int ctb_size) +++{ +++ // Flush image, 4 lines above to bottom of ctb stripe +++ ff_hevc_flush_buffer_lines(s, FFMAX(y-4,0), y+ctb_size, 1, 0); +++ // TODO flush buffer of beta/tc setup when it becomes cached +++ // Call VPU +++ // TODO add this to a separate pipeline of VPU jobs that can be run in parallel and wait for completion +++ vpu_wait(vpu_post_code( vpu_get_fn(), get_vc_address(s->frame->buf[0]) + s->frame->linesize[0] * y, s->frame->linesize[0], +++ s->setup_width, (int) ( s->y_setup_vc + s->setup_width * (y>>4) ), +++ ctb_size>>4, 2, 0)); // 2 means to do the deblocking code +++} +++ +++static void rpi_deblock2(HEVCContext *s, int y, int ctb_size) +++{ +++ int y2; +++ for(y2=y;y2<y+ctb_size;y2+=16) { +++ rpi_deblock(s,y2,16); +++ } +++} +++#endif +++ ++ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ { ++ int x_end = x >= s->ps.sps->width - ctb_size; ++ if (s->avctx->skip_loop_filter < AVDISCARD_ALL) ++ deblocking_filter_CTB(s, x, y); +++#ifdef RPI_DEBLOCK_VPU +++ if (s->enable_rpi_deblock && x_end) +++ { +++ rpi_deblock(s, y, ctb_size); +++ } +++#endif ++ if (s->ps.sps->sao_enabled) { ++ int y_end = y >= s->ps.sps->height - ctb_size; ++ if (y && x) ++@@ -965,6 +1072,7 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ //if (((y + ctb_size)&63)==0) ++ #ifdef RPI_INTER_QPU ++ ff_hevc_flush_buffer(s, &s->ref->tf, y + ctb_size - 4); +++ // TODO we no longer need to flush the luma buffer as it is in GPU memory when using deblocking on the rpi ++ #endif ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); ++ } ++diff --git a/libavcodec/rpi_hevc_transform.h b/libavcodec/rpi_hevc_transform.h ++index 4f13622..b3f155f 100644 ++--- a/libavcodec/rpi_hevc_transform.h +++++ b/libavcodec/rpi_hevc_transform.h ++@@ -3,7 +3,13 @@ unsigned char rpi_hevc_transform [] = { ++ 106, ++ 0, ++ 144, ++-35, +++38, +++1, +++37, +++106, +++0, +++144, +++57, ++ 1, ++ 169, ++ 3, ++@@ -627,4 +633,1798 @@ unsigned char rpi_hevc_transform [] = { ++ 30, ++ 90, ++ 0, +++169, +++3, +++73, +++64, +++52, +++64, +++45, +++64, +++2, +++64, +++10, +++64, +++64, +++198, +++1, +++7, +++8, +++232, +++63, +++0, +++0, +++0, +++6, +++232, +++253, +++255, +++255, +++255, +++0, +++246, +++0, +++0, +++0, +++4, +++215, +++64, +++3, +++96, +++2, +++248, +++0, +++35, +++0, +++0, +++64, +++56, +++0, +++0, +++4, +++248, +++0, +++36, +++0, +++0, +++64, +++56, +++8, +++0, +++0, +++240, +++64, +++0, +++132, +++3, +++128, +++240, +++0, +++0, +++132, +++3, +++128, +++144, +++137, +++0, +++131, +++98, +++0, +++255, +++64, +++0, +++0, +++20, +++200, +++243, +++0, +++0, +++128, +++144, +++129, +++0, +++131, +++102, +++0, +++158, +++67, +++0, +++2, +++248, +++0, +++35, +++0, +++0, +++64, +++56, +++0, +++0, +++4, +++248, +++0, +++36, +++0, +++0, +++64, +++56, +++8, +++0, +++0, +++240, +++64, +++0, +++132, +++3, +++128, +++240, +++0, +++0, +++132, +++3, +++128, +++144, +++108, +++0, +++131, +++98, +++0, +++255, +++64, +++0, +++0, +++20, +++200, +++243, +++0, +++0, +++128, +++144, +++100, +++0, +++131, +++102, +++0, +++248, +++64, +++0, +++112, +++0, +++192, +++243, +++211, +++31, +++128, +++248, +++0, +++0, +++112, +++0, +++192, +++243, +++211, +++31, +++128, +++144, +++161, +++0, +++188, +++64, +++67, +++232, +++0, +++2, +++0, +++0, +++0, +++255, +++64, +++0, +++0, +++20, +++200, +++243, +++0, +++0, +++128, +++144, +++150, +++0, +++195, +++232, +++0, +++2, +++0, +++0, +++12, +++128, +++7, +++192, +++130, +++248, +++0, +++0, +++112, +++192, +++224, +++16, +++195, +++31, +++132, +++248, +++1, +++0, +++112, +++0, +++224, +++16, +++203, +++31, +++3, +++99, +++131, +++71, +++68, +++232, +++32, +++0, +++0, +++0, +++0, +++99, +++2, +++99, +++23, +++102, +++7, +++106, +++127, +++156, +++182, +++255, +++0, +++248, +++64, +++0, +++112, +++0, +++192, +++243, +++211, +++31, +++128, +++248, +++0, +++0, +++112, +++0, +++192, +++243, +++211, +++31, +++128, +++144, +++112, +++0, +++188, +++64, +++67, +++232, +++0, +++2, +++0, +++0, +++0, +++255, +++64, +++0, +++0, +++20, +++200, +++243, +++0, +++0, +++128, +++144, +++101, +++0, +++195, +++232, +++0, +++2, +++0, +++0, +++12, +++128, +++7, +++192, +++130, +++248, +++0, +++0, +++112, +++192, +++224, +++16, +++195, +++31, +++132, +++248, +++1, +++0, +++112, +++0, +++224, +++16, +++203, +++31, +++25, +++102, +++9, +++106, +++2, +++30, +++41, +++3, +++26, +++87, +++162, +++64, +++64, +++198, +++1, +++23, +++127, +++158, +++103, +++255, +++239, +++3, +++0, +++254, +++0, +++143, +++92, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++64, +++143, +++93, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++128, +++143, +++94, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++192, +++143, +++95, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++192, +++142, +++208, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++128, +++142, +++209, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++64, +++142, +++210, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++0, +++142, +++211, +++0, +++0, +++240, +++12, +++0, +++128, +++144, +++107, +++0, +++8, +++255, +++99, +++23, +++0, +++212, +++192, +++51, +++0, +++0, +++8, +++255, +++163, +++23, +++0, +++228, +++192, +++51, +++0, +++0, +++8, +++255, +++227, +++23, +++0, +++244, +++192, +++51, +++0, +++0, +++8, +++255, +++35, +++52, +++0, +++180, +++192, +++51, +++0, +++0, +++8, +++255, +++99, +++52, +++0, +++164, +++192, +++51, +++0, +++0, +++8, +++255, +++163, +++52, +++0, +++148, +++192, +++51, +++0, +++0, +++111, +++3, +++239, +++3, +++0, +++254, +++0, +++143, +++12, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++64, +++143, +++13, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++128, +++143, +++14, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++192, +++143, +++15, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++192, +++142, +++16, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++128, +++142, +++17, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++64, +++142, +++18, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++0, +++142, +++19, +++0, +++0, +++240, +++12, +++0, +++128, +++144, +++33, +++0, +++8, +++255, +++99, +++3, +++0, +++212, +++192, +++51, +++0, +++0, +++8, +++255, +++163, +++3, +++0, +++228, +++192, +++51, +++0, +++0, +++8, +++255, +++227, +++3, +++0, +++244, +++192, +++51, +++0, +++0, +++8, +++255, +++35, +++4, +++0, +++180, +++192, +++51, +++0, +++0, +++8, +++255, +++99, +++4, +++0, +++164, +++192, +++51, +++0, +++0, +++8, +++255, +++163, +++4, +++0, +++148, +++192, +++51, +++0, +++0, +++111, +++3, +++32, +++246, +++192, +++11, +++1, +++16, +++32, +++246, +++2, +++137, +++47, +++240, +++40, +++246, +++2, +++140, +++47, +++240, +++128, +++245, +++99, +++140, +++5, +++4, +++0, +++247, +++99, +++140, +++1, +++20, +++88, +++246, +++99, +++140, +++1, +++20, +++0, +++247, +++35, +++136, +++62, +++226, +++32, +++247, +++35, +++136, +++32, +++210, +++0, +++247, +++34, +++136, +++63, +++2, +++208, +++246, +++34, +++136, +++0, +++4, +++0, +++247, +++99, +++136, +++58, +++162, +++32, +++247, +++99, +++136, +++33, +++146, +++0, +++247, +++98, +++136, +++59, +++18, +++208, +++246, +++98, +++136, +++0, +++20, +++0, +++247, +++162, +++136, +++33, +++2, +++88, +++246, +++98, +++137, +++2, +++68, +++88, +++246, +++162, +++137, +++3, +++68, +++208, +++254, +++227, +++136, +++60, +++242, +++192, +++243, +++188, +++11, +++208, +++254, +++227, +++136, +++56, +++178, +++192, +++243, +++188, +++10, +++32, +++255, +++226, +++136, +++38, +++58, +++192, +++243, +++60, +++0, +++208, +++254, +++227, +++136, +++59, +++242, +++192, +++243, +++60, +++128, +++32, +++255, +++226, +++136, +++49, +++58, +++192, +++243, +++60, +++128, +++0, +++255, +++226, +++136, +++34, +++34, +++192, +++243, +++60, +++128, +++32, +++255, +++226, +++136, +++37, +++58, +++192, +++243, +++60, +++128, +++0, +++254, +++192, +++136, +++1, +++4, +++0, +++240, +++0, +++160, +++0, +++255, +++194, +++8, +++0, +++52, +++195, +++243, +++0, +++128, +++0, +++255, +++202, +++40, +++0, +++52, +++195, +++243, +++0, +++128, +++0, +++254, +++0, +++240, +++35, +++10, +++0, +++240, +++60, +++0, +++0, +++254, +++192, +++136, +++1, +++4, +++0, +++240, +++0, +++160, +++0, +++255, +++226, +++140, +++34, +++34, +++195, +++243, +++60, +++0, +++32, +++255, +++227, +++140, +++36, +++58, +++192, +++243, +++60, +++0, +++0, +++254, +++192, +++136, +++0, +++4, +++0, +++240, +++0, +++160, +++16, +++246, +++226, +++136, +++35, +++50, +++16, +++246, +++226, +++136, +++35, +++50, +++32, +++246, +++226, +++136, +++35, +++50, +++32, +++254, +++226, +++136, +++35, +++58, +++192, +++243, +++60, +++0, +++11, +++96, +++0, +++254, +++0, +++240, +++1, +++4, +++0, +++240, +++64, +++115, +++5, +++106, +++0, +++144, +++173, +++1, +++27, +++96, +++0, +++254, +++0, +++240, +++1, +++4, +++0, +++240, +++64, +++147, +++5, +++106, +++0, +++144, +++227, +++0, +++64, +++246, +++163, +++140, +++1, +++4, +++0, +++246, +++192, +++175, +++63, +++2, +++0, +++246, +++192, +++174, +++59, +++2, +++0, +++246, +++128, +++175, +++62, +++2, +++0, +++246, +++128, +++174, +++58, +++2, +++0, +++246, +++64, +++175, +++61, +++2, +++0, +++246, +++64, +++174, +++57, +++2, +++0, +++255, +++43, +++240, +++4, +++212, +++192, +++243, +++128, +++11, +++64, +++254, +++43, +++240, +++1, +++228, +++192, +++243, +++128, +++10, +++64, +++254, +++43, +++240, +++1, +++244, +++192, +++243, +++128, +++10, +++64, +++254, +++43, +++240, +++1, +++180, +++192, +++243, +++128, +++10, +++64, +++254, +++43, +++141, +++0, +++164, +++192, +++243, +++128, +++10, +++88, +++246, +++35, +++141, +++3, +++68, +++32, +++247, +++35, +++141, +++191, +++66, +++240, +++246, +++35, +++141, +++50, +++66, +++0, +++255, +++235, +++143, +++52, +++242, +++192, +++243, +++60, +++128, +++0, +++255, +++43, +++240, +++2, +++212, +++192, +++243, +++128, +++11, +++0, +++255, +++43, +++240, +++191, +++226, +++192, +++243, +++188, +++10, +++64, +++254, +++43, +++141, +++0, +++180, +++192, +++243, +++128, +++10, +++88, +++246, +++35, +++141, +++2, +++68, +++32, +++247, +++35, +++141, +++190, +++66, +++240, +++246, +++35, +++141, +++50, +++66, +++0, +++255, +++171, +++143, +++52, +++226, +++192, +++243, +++60, +++128, +++0, +++255, +++43, +++240, +++4, +++180, +++192, +++243, +++128, +++11, +++0, +++255, +++43, +++240, +++191, +++226, +++192, +++243, +++188, +++10, +++128, +++253, +++43, +++240, +++3, +++212, +++192, +++243, +++128, +++10, +++64, +++254, +++35, +++141, +++1, +++196, +++192, +++243, +++128, +++10, +++88, +++246, +++35, +++141, +++3, +++68, +++32, +++247, +++35, +++141, +++189, +++66, +++240, +++246, +++35, +++141, +++50, +++66, +++0, +++255, +++107, +++143, +++52, +++210, +++192, +++243, +++60, +++128, +++0, +++255, +++43, +++240, +++4, +++148, +++192, +++243, +++128, +++11, +++64, +++254, +++43, +++240, +++1, +++164, +++192, +++243, +++128, +++10, +++64, +++254, +++43, +++240, +++1, +++180, +++192, +++243, +++128, +++10, +++64, +++254, +++43, +++240, +++1, +++244, +++192, +++243, +++128, +++10, +++64, +++254, +++43, +++141, +++0, +++228, +++192, +++243, +++128, +++10, +++88, +++246, +++35, +++141, +++3, +++68, +++32, +++247, +++35, +++141, +++187, +++66, +++240, +++246, +++35, +++141, +++50, +++66, +++0, +++255, +++235, +++142, +++52, +++178, +++192, +++243, +++60, +++128, +++0, +++255, +++43, +++240, +++2, +++148, +++192, +++243, +++128, +++11, +++0, +++255, +++43, +++240, +++187, +++162, +++192, +++243, +++188, +++10, +++64, +++254, +++43, +++141, +++0, +++244, +++192, +++243, +++128, +++10, +++88, +++246, +++35, +++141, +++2, +++68, +++32, +++247, +++35, +++141, +++186, +++66, +++240, +++246, +++35, +++141, +++50, +++66, +++0, +++255, +++171, +++142, +++52, +++162, +++192, +++243, +++60, +++128, +++0, +++255, +++43, +++240, +++4, +++244, +++192, +++243, +++128, +++11, +++0, +++255, +++43, +++240, +++187, +++162, +++192, +++243, +++188, +++10, +++128, +++253, +++43, +++240, +++3, +++148, +++192, +++243, +++128, +++10, +++64, +++254, +++35, +++141, +++1, +++132, +++192, +++243, +++128, +++10, +++88, +++246, +++35, +++141, +++3, +++68, +++32, +++247, +++35, +++141, +++185, +++66, +++240, +++246, +++35, +++141, +++50, +++66, +++0, +++255, +++107, +++142, +++52, +++146, +++192, +++243, +++60, +++128, +++64, +++255, +++98, +++141, +++0, +++52, +++192, +++243, +++0, +++0, +++0, +++254, +++0, +++240, +++53, +++10, +++0, +++240, +++60, +++0, +++0, +++254, +++0, +++240, +++1, +++4, +++0, +++240, +++64, +++147, +++5, +++106, +++0, +++144, +++177, +++0, +++88, +++246, +++163, +++140, +++1, +++4, +++128, +++245, +++99, +++141, +++10, +++4, +++88, +++246, +++162, +++138, +++1, +++68, +++0, +++247, +++162, +++138, +++36, +++162, +++88, +++254, +++162, +++138, +++3, +++164, +++192, +++243, +++128, +++11, +++0, +++255, +++226, +++137, +++32, +++2, +++195, +++243, +++60, +++0, +++32, +++247, +++226, +++137, +++42, +++114, +++0, +++255, +++34, +++138, +++33, +++18, +++195, +++243, +++60, +++0, +++32, +++247, +++34, +++138, +++42, +++130, +++16, +++246, +++98, +++138, +++40, +++114, +++16, +++246, +++98, +++138, +++41, +++146, +++32, +++246, +++98, +++138, +++41, +++146, +++32, +++246, +++226, +++137, +++41, +++146, +++40, +++246, +++34, +++138, +++41, +++146, +++32, +++247, +++163, +++141, +++63, +++178, +++32, +++247, +++227, +++141, +++62, +++162, +++0, +++254, +++0, +++240, +++8, +++4, +++0, +++240, +++128, +++11, +++128, +++253, +++35, +++240, +++9, +++100, +++192, +++243, +++128, +++10, +++128, +++253, +++163, +++141, +++128, +++115, +++192, +++243, +++152, +++10, +++88, +++246, +++163, +++141, +++4, +++100, +++208, +++246, +++35, +++139, +++0, +++100, +++32, +++255, +++34, +++139, +++53, +++202, +++192, +++243, +++60, +++128, +++0, +++254, +++0, +++139, +++0, +++4, +++0, +++240, +++0, +++160, +++240, +++246, +++163, +++141, +++48, +++98, +++0, +++247, +++99, +++139, +++63, +++210, +++0, +++247, +++98, +++139, +++1, +++212, +++88, +++254, +++98, +++139, +++1, +++212, +++192, +++243, +++128, +++11, +++32, +++255, +++99, +++139, +++62, +++98, +++192, +++243, +++188, +++10, +++88, +++246, +++98, +++139, +++1, +++212, +++240, +++246, +++98, +++139, +++50, +++210, +++0, +++247, +++163, +++128, +++59, +++146, +++0, +++247, +++160, +++128, +++1, +++36, +++88, +++254, +++160, +++128, +++1, +++36, +++192, +++243, +++128, +++11, +++0, +++247, +++163, +++128, +++58, +++98, +++64, +++255, +++35, +++240, +++0, +++100, +++192, +++243, +++128, +++10, +++64, +++255, +++163, +++128, +++0, +++164, +++192, +++243, +++128, +++10, +++88, +++246, +++160, +++128, +++1, +++36, +++240, +++246, +++160, +++128, +++50, +++34, +++8, +++255, +++227, +++143, +++54, +++242, +++192, +++243, +++60, +++128, +++40, +++255, +++227, +++142, +++54, +++178, +++192, +++243, +++60, +++128, +++0, +++254, +++0, +++240, +++39, +++10, +++0, +++240, +++60, +++128, +++8, +++255, +++163, +++143, +++45, +++226, +++192, +++243, +++60, +++128, +++0, +++254, +++0, +++240, +++44, +++10, +++0, +++240, +++60, +++0, +++0, +++254, +++0, +++240, +++40, +++10, +++0, +++240, +++60, +++128, +++8, +++255, +++163, +++142, +++2, +++162, +++192, +++243, +++60, +++128, +++90, +++0, ++ }; ++diff --git a/libavcodec/rpi_hevc_transform.s b/libavcodec/rpi_hevc_transform.s ++index fd159bc..b055208 100644 ++--- a/libavcodec/rpi_hevc_transform.s +++++ b/libavcodec/rpi_hevc_transform.s ++@@ -83,6 +83,8 @@ ++ hevc_trans_16x16: ++ cmp r5,1 ++ beq memclear16 +++ cmp r5,2 +++ beq hevc_deblock_16x16 ++ push r6-r15, lr # TODO cut down number of used registers ++ mov r14,r3 # coeffs32 ++ mov r15,r4 # num32 ++@@ -282,3 +284,427 @@ loop: ++ cmp r1,0 ++ bgt loop ++ b lr +++ +++ +++################################################################################ +++# HEVC VPU Deblock +++# +++# Vertical edges before horizontal +++# Decision can change every 4 pixels, but only 8 pixel boundaries are deblocked +++# +++# ARM is responsible for storing beta and tc for each 4 pixels horiz and vert edge. +++# The VPU code works in units of 16x16 blocks. +++# We do vertical filtering for the current block followed by horizontal filtering for the previous (except for the first time). +++# One final horizontal filter is required at the end. +++# PCM is not allowed in this code. +++# +++# +++# H(16-4:16+15,0) contains previous block (note that we need 4 lines above of context that may get altered during filtering) +++# H(16:31,16) contains current block (note that we do not need the upper lines until the horizontal filtering. +++ +++.set P0,63 +++.set P1,62 +++.set P2,61 +++.set P3,60 +++.set Q0,59 +++.set Q1,58 +++.set Q2,57 +++.set Q3,56 +++ +++.set dp,32 +++.set dq,33 +++.set d,34 +++.set decision,35 +++.set beta,36 +++.set beta2,37 +++.set beta3,38 +++.set ptest,39 +++.set qtest,40 +++.set pqtest,41 +++.set thresh,42 +++.set deltatest, 44 +++.set deltap1, 45 +++.set tc25, 46 +++.set setup,47 +++.set tc,48 +++.set tc25,49 +++.set tc2, 50 +++.set do_filter, 51 +++.set delta, 52 +++.set tc10, 53 +++.set delta0, 54 +++.set delta1, 55 +++.set zeros, 0 +++.set setup_input, 1 +++.set deltaq1, 2 +++ +++ +++ +++# hevc_deblock_16x16 deblocks an entire row that is 16 pixels high by the full width of the image. +++# Row has num16 16x16 blocks across +++# Beta goes from 0 to 64 +++# tc goes from 0 to 24 +++# setup[block_idx][0=vert,1=horz][0=first edge, 1=second edge][0=beta,1=tc][0..3=edge number] +++# has 8 bytes per edge +++# has 16 bytes per direction +++# has 32 bytes per 16x16 block +++# hevc_deblock_16x16(uint8_t *img (r0), int stride (r1), int num16w (r2), uint8_t setup[num16][2][2][2][4](r3),int num16h(r4)) +++hevc_deblock_16x16: +++ push r6-r15, lr +++ mov r9,r4 +++ mov r4,r3 +++ mov r13,r2 +++ mov r2,r0 +++ mov r10,r0 +++ subscale4 r0,r1 +++ mov r8,63 +++ mov r6,-3 +++ vmov H(zeros,0),0 +++# r7 is number of blocks still to load +++# r0 is location of current block - 4 * stride +++# r1 is stride +++# r2 is location of current block +++# r3 is offset of start of block (actual edges start at H(16,16)+r3 for horizontal and H(16,0)+r3 for vertical +++# r4 is setup +++# r5 is for temporary calculations +++# r8 holds 63 +++# r6 holds -3 +++# r9 holds the number of 16 high rows to process +++# r10 holds the original img base +++# r11 returns 0 if no filtering was done on the edge +++# r12 saves a copy of this +++# r13 is copy of width +++ +++process_row: +++ # First iteration does not do horizontal filtering on previous +++ mov r7, r13 +++ mov r3,0 +++ vldb H(12++,16)+r3,(r0 += r1) REP 4 # Load the current block +++ vldb H(16++,16)+r3,(r2 += r1) REP 16 +++ vldb H(setup_input,0), (r4) # We may wish to prefetch these +++ vstb H(zeros,0),(r4) +++ bl vert_filter +++ add r3,8 +++ vadd H(setup_input,0),H(setup_input,8),0 # Rotate to second set of 8 +++ bl vert_filter +++ sub r3,8 +++ b start_deblock_loop +++deblock_loop: +++ # Middle iterations do vertical on current block and horizontal on preceding +++ vldb H(12++,16)+r3,(r0 += r1) REP 4 # load the current block +++ vldb H(16++,16)+r3,(r2 += r1) REP 16 +++ vldb H(setup_input,0), (r4) +++ vstb H(zeros,0),(r4) +++ bl vert_filter +++ add r3,8 +++ vadd H(setup_input,0),H(setup_input,8),0 +++ bl vert_filter +++ sub r3,8 +++ vldb H(setup_input,0), -16(r4) +++ vstb H(zeros,0),-16(r4) +++ bl horz_filter +++ mov r12,r11 +++ add r3,8*64 +++ vadd H(setup_input,0),H(setup_input,8),0 +++ bl horz_filter +++ sub r3,8*64 +++ addcmpbeq r12,0,0,skip_save_top +++ vstb H(12++,0)+r3,-16(r0 += r1) REP 4 # Save the deblocked pixels for the previous block +++skip_save_top: +++ vstb H(16++,0)+r3,-16(r2 += r1) REP 16 +++start_deblock_loop: +++ # move onto next 16x16 (could do this with circular buffer support instead) +++ add r3,16 +++ and r3,r8 +++ add r4,32 +++ # Perform loop counter operations (may work with an addcmpbgt as well?) +++ add r0,16 +++ add r2,16 +++ sub r7,1 +++ cmp r7,0 # Are there still more blocks to load +++ bgt deblock_loop +++ +++ # Final iteration needs to just do horizontal filtering +++ vldb H(setup_input,0), -16(r4) +++ vstb H(zeros,0),-16(r4) +++ bl horz_filter +++ mov r12,r11 +++ add r3,8*64 +++ vadd H(setup_input,0),H(setup_input,8),0 +++ bl horz_filter +++ sub r3,64*8 +++ addcmpbeq r12,0,0,skip_save_top2 +++ vstb H(12++,0)+r3,-16(r0 += r1) REP 4 # Save the deblocked pixels for the previous block +++skip_save_top2: +++ vstb H(16++,0)+r3,-16(r2 += r1) REP 16 +++ +++# Now look to see if we should do another row +++ sub r9,1 +++ cmp r9,0 +++ bgt start_again +++ pop r6-r15, pc +++start_again: +++ # Need to sort out r0,r2 to point to next row down +++ addscale16 r10,r1 +++ mov r2,r10 +++ subscale4 r0,r2,r1 +++ b process_row +++ +++ +++# At this stage H(16,16)+r3 points to the first pixel of the 16 high edge to be filtered +++# So we can reuse the code we move the parts to be filtered into HX(P0/P1/P2/P3/Q0/Q1/Q2/Q3,0) - we will perform a final saturation step on placing them back into the correct locations +++ +++vert_filter: +++ push lr +++ +++ vmov HX(P3,0), V(16,12)+r3 +++ vmov HX(P2,0), V(16,13)+r3 +++ vmov HX(P1,0), V(16,14)+r3 +++ vmov HX(P0,0), V(16,15)+r3 +++ vmov HX(Q0,0), V(16,16)+r3 +++ vmov HX(Q1,0), V(16,17)+r3 +++ vmov HX(Q2,0), V(16,18)+r3 +++ vmov HX(Q3,0), V(16,19)+r3 +++ +++ bl do_luma_filter +++ +++ vadds V(16,13)+r3, HX(P2,0), 0 +++ vadds V(16,14)+r3, HX(P1,0), 0 +++ vadds V(16,15)+r3, HX(P0,0), 0 +++ # P3 and Q3 never change so don't bother saving back +++ vadds V(16,16)+r3, HX(Q0,0), 0 +++ vadds V(16,17)+r3, HX(Q1,0), 0 +++ vadds V(16,18)+r3, HX(Q2,0), 0 +++ +++ pop pc +++ +++# Filter edge at H(16,0)+r3 +++horz_filter: +++ push lr +++ +++ vmov HX(P3,0), H(12,0)+r3 +++ vmov HX(P2,0), H(13,0)+r3 +++ vmov HX(P1,0), H(14,0)+r3 +++ vmov HX(P0,0), H(15,0)+r3 +++ vmov HX(Q0,0), H(16,0)+r3 +++ vmov HX(Q1,0), H(17,0)+r3 +++ vmov HX(Q2,0), H(18,0)+r3 +++ vmov HX(Q3,0), H(19,0)+r3 +++ +++ bl do_luma_filter +++ +++ vadds H(13,0)+r3, HX(P2,0), 0 +++ vadds H(14,0)+r3, HX(P1,0), 0 +++ vadds H(15,0)+r3, HX(P0,0), 0 +++ # P3 and Q3 never change so don't bother saving back +++ vadds H(16,0)+r3, HX(Q0,0), 0 +++ vadds H(17,0)+r3, HX(Q1,0), 0 +++ vadds H(18,0)+r3, HX(Q2,0), 0 +++ +++ pop pc +++ +++# r4 points to array of beta/tc for each 4 length edge +++do_luma_filter: +++ valtl H(setup,0),H(setup_input,0),H(setup_input,0) # b*8tc*8 +++ valtl HX(beta,0),H(setup,0),H(setup,0) +++ valtu HX(tc,0),H(setup,0),H(setup,0) +++ vmul HX(tc25,0), HX(tc,0), 5 +++ vadd HX(tc25,0),HX(tc25,0), 1 +++ vasr HX(tc25,0), HX(tc25,0), 1 +++ +++ # Compute decision +++ vadd HX(dp,0),HX(P1,0),HX(P1,0) # 2*P1 +++ vsub HX(dp,0),HX(P2,0),HX(dp,0) # P2-2*P1 +++ vadd HX(dp,0),HX(dp,0),HX(P0,0) # P2-2*P1+P0 +++ vdist HX(dp,0),HX(dp,0),0 # abs(P2-2*P1+P0) # dp0 +++ +++ vadd HX(dq,0),HX(Q1,0),HX(Q1,0) # 2*Q1 +++ vsub HX(dq,0),HX(Q2,0),HX(dq,0) # Q2-2*Q1 +++ vadd HX(dq,0),HX(dq,0),HX(Q0,0) # Q2-2*Q1+Q0 +++ vdist HX(dq,0),HX(dq,0),0 # abs(Q2-2*Q1+Q0) # dq0 +++ +++ vadd HX(d,0), HX(dp,0), HX(dq,0) +++ vasr HX(beta2,0),HX(beta,0),2 +++ vasr HX(beta3,0),HX(beta,0),3 +++ +++ # Compute flags that are negative if all conditions pass +++ vdist HX(decision,0), HX(P0,0), HX(P3,0) CLRA SACC +++ vdist HX(decision,0), HX(Q0,0), HX(Q3,0) SACC +++ vsub HX(decision,0), HX(decision,0), HX(beta3,0) SETF +++ +++ vdist HX(decision,0), HX(P0,0), HX(Q0,0) IFN +++ vsub HX(decision,0), HX(decision,0), HX(tc25,0) IFN SETF +++ vadd HX(decision,0), HX(d,0), HX(d,0) IFN +++ vsub HX(decision,0), HX(decision,0), HX(beta2,0) IFN SETF +++ vmov HX(decision,0), 1 IFNN +++ vadd H(decision,0),H(decision,3),0 IFN +++ vadd H(decision,16),H(decision,19),0 IFN +++ vmov -,HX(decision,0) SETF # N marks strong filter +++ vmov HX(decision,0), 1 IFNN # NN marks normal filter +++ +++ vadd HX(do_filter,0), HX(d,3), HX(d,0) +++ vsub HX(do_filter,0), HX(do_filter,0), HX(beta,0) SETF # IFNN means no filter +++ vmov HX(decision,0),0 IFNN # Z marks no filter +++ +++ # Expand out decision (currently valid one every 4 pixels) 0...1...2...3 +++ # First extract out even terms +++ vodd HX(decision,0),HX(decision,0),HX(decision,0) # 0.1.2.3 +++ vodd HX(decision,0),HX(decision,0),HX(decision,0) # 0123 +++ # Now expand back +++ valtl HX(decision,0),HX(decision,0),HX(decision,0) # 00112233 +++ valtl HX(decision,0),HX(decision,0),HX(decision,0) SETF # 0000111122223333 +++ +++ # HX(decision,0) is negative if want strong filtering, 1 if want normal filtering, 0 if want no filtering +++ +++ # Do a quick check to see if there is anything to do +++ mov r11, 0 # Signal no filtering +++ vmov -,1 IFNZ SUMS r5 +++ cmp r5,0 +++ beq filtering_done +++ mov r11, 1 # Signal some filtering +++ # And whether there is any strong filtering +++ vmov -,1 IFN SUMS r5 +++ cmp r5,0 +++ beq normal_filtering +++ +++ ############################################################################## +++ # Strong filtering - could maybe fast case if all have same sign? (especially if all disabled!) +++ vshl HX(tc2,0), HX(tc,0), 1 # Note that in normal filtering tx2 is tc/2, while here it is tc*2 +++ +++ # Take a copy of the original pixels for use in decision calculation +++ vmov HX(P0,32),HX(P0,0) +++ vmov HX(Q0,32),HX(Q0,0) +++ vmov HX(P1,32),HX(P1,0) +++ vmov HX(Q1,32),HX(Q1,0) +++ vmov HX(P2,32),HX(P2,0) +++ vmov HX(Q2,32),HX(Q2,0) +++ +++ vadd -,HX(P2,32),4 CLRA SACC +++ vshl -,HX(P1,32),1 SACC +++ vshl -,HX(P0,32),1 SACC +++ vshl -,HX(Q0,32),1 SACC +++ vshl HX(delta,0),HX(Q1,32),0 SACC +++ vasr HX(delta,0),HX(delta,0), 3 +++ vsub HX(delta,0),HX(delta,0),HX(P0,32) +++ vclamps HX(delta,0), HX(delta,0), HX(tc2,0) +++ vadd HX(P0,0),HX(P0,32),HX(delta,0) IFN +++ +++ vadd -,HX(P2,32),2 CLRA SACC +++ vadd -,HX(P1,32),HX(P0,32) SACC +++ vshl HX(delta,0),HX(Q0,32),0 SACC +++ vasr HX(delta,0),HX(delta,0), 2 +++ vsub HX(delta,0),HX(delta,0),HX(P1,32) +++ vclamps HX(delta,0), HX(delta,0), HX(tc2,0) +++ vadd HX(P1,0),HX(P1,32),HX(delta,0) IFN +++ +++ vadd -,HX(Q0,32),4 CLRA SACC +++ vadd -,HX(P1,32),HX(P0,32) SACC +++ vmul -,HX(P2,32),3 SACC +++ vshl HX(delta,0),HX(P3,0),1 SACC # Note that we have not made a copy of P3, so using P3,0 is correct +++ vasr HX(delta,0),HX(delta,0), 3 +++ vsub HX(delta,0),HX(delta,0),HX(P2,32) +++ vclamps HX(delta,0), HX(delta,0), HX(tc2,0) +++ vadd HX(P2,0),HX(P2,32),HX(delta,0) IFN +++ #vmov HX(P2,0),3 IFN +++ +++ # Now reverse all P/Qs +++ +++ vadd -,HX(Q2,32),4 CLRA SACC +++ vshl -,HX(Q1,32),1 SACC +++ vshl -,HX(Q0,32),1 SACC +++ vshl -,HX(P0,32),1 SACC +++ vshl HX(delta,0),HX(P1,32),0 SACC +++ vasr HX(delta,0),HX(delta,0), 3 +++ vsub HX(delta,0),HX(delta,0),HX(Q0,32) +++ vclamps HX(delta,0), HX(delta,0), HX(tc2,0) +++ vadd HX(Q0,0),HX(Q0,32),HX(delta,0) IFN +++ +++ vadd -,HX(Q2,32),2 CLRA SACC +++ vadd -,HX(Q1,32),HX(Q0,32) SACC +++ vshl HX(delta,0),HX(P0,32),0 SACC +++ vasr HX(delta,0),HX(delta,0), 2 +++ vsub HX(delta,0),HX(delta,0),HX(Q1,32) +++ vclamps HX(delta,0), HX(delta,0), HX(tc2,0) +++ vadd HX(Q1,0),HX(Q1,32),HX(delta,0) IFN +++ +++ vadd -,HX(P0,32),4 CLRA SACC +++ vadd -,HX(Q1,32),HX(Q0,32) SACC +++ vmul -,HX(Q2,32),3 SACC +++ vshl HX(delta,0),HX(Q3,0),1 SACC # Note that we have not made a copy of Q3, so using Q3,0 is correct +++ vasr HX(delta,0),HX(delta,0), 3 +++ vsub HX(delta,0),HX(delta,0),HX(Q2,32) +++ vclamps HX(delta,0), HX(delta,0), HX(tc2,0) +++ vadd HX(Q2,0),HX(Q2,32),HX(delta,0) IFN +++ +++ ############################################################################## +++ # Normal filtering +++normal_filtering: +++ # Invert the decision flags +++ # make instruction more complicated as assembler has error and loses SETF +++ vrsub HX(tc10,0), HX(decision,0), 0 SETF # IFN means normal filtering +++ vmov -, HX(tc10,0) SETF # IFN means normal filtering +++ +++ vmov -,1 IFN SUMS r5 +++ cmp r5,0 +++ beq filtering_done +++ +++ vasr HX(tc2,0), HX(tc,0), 1 +++ vmul HX(tc10,0), HX(tc,0), 10 +++ +++ vasr HX(thresh,0), HX(beta,0), 1 +++ vadd HX(thresh,0), HX(thresh,0), HX(beta,0) +++ vasr HX(thresh,0), HX(thresh,0), 3 CLRA SACC +++ +++ vadd HX(ptest,0),HX(dp,3),HX(dp,0) +++ vsub HX(ptest,0),HX(ptest,0),HX(thresh,0) # ptest is negative if we need to do the P2 pixel +++ vadd HX(qtest,0),HX(dq,3),HX(dq,0) +++ vsub HX(qtest,0),HX(qtest,0),HX(thresh,0) # qtest is negative if we need to do the Q2 pixel +++ # Expand ptest and qtest together +++ vodd HX(pqtest,0),HX(ptest,0),HX(qtest,0) # p.p.p.p.q.q.q.q +++ vodd HX(pqtest,0),HX(pqtest,0),HX(pqtest,0) # ppppqqqq........ +++ valtl HX(pqtest,0),HX(pqtest,0),HX(pqtest,0) # ppppppppqqqqqqqq +++ valtl HX(ptest,0),HX(pqtest,0),HX(pqtest,0) +++ valtu HX(qtest,0),HX(pqtest,0),HX(pqtest,0) +++ +++ vsub HX(delta0,0), HX(Q0,0), HX(P0,0) +++ vsub HX(delta1,0), HX(Q1,0), HX(P1,0) +++ vmov -,8 CLRA SACC +++ vmul -,HX(delta0,0), 9 SACC +++ vmul HX(delta0,0),HX(delta1,0), r6 SACC +++ vasr HX(delta0,0), HX(delta0,0), 4 +++ vdist HX(deltatest,0), HX(delta0,0), 0 +++ vsub HX(deltatest,0), HX(deltatest,0), HX(tc10,0) IFN SETF # negative if still need to do something +++ vmov HX(deltatest,0), 0 IFNN # clear if no need to do anything so we can reload flags later +++ +++ vclamps HX(delta0,0), HX(delta0,0), HX(tc,0) +++ +++ vadd HX(deltap1,0), HX(P2,0), HX(P0,0) +++ vadd HX(deltap1,0), HX(deltap1,0), 1 +++ vasr HX(deltap1,0), HX(deltap1,0), 1 CLRA SACC +++ vsub HX(deltap1,0), HX(delta0,0), HX(P1,0) SACC +++ vasr HX(deltap1,0), HX(deltap1,0), 1 +++ vclamps HX(deltap1,0), HX(deltap1,0), HX(tc2,0) +++ +++ vadd HX(deltaq1,0), HX(Q2,0), HX(Q0,0) +++ vadd HX(deltaq1,0), HX(deltaq1,0), 1 +++ vasr HX(deltaq1,0), HX(deltaq1,0), 1 CLRA SACC +++ vadd HX(deltaq1,0), HX(delta0,0), HX(Q1,0) +++ vrsub -, HX(delta0,0), 0 SACC +++ vrsub HX(deltaq1,0), HX(Q1,0), 0 SACC +++ vasr HX(deltaq1,0), HX(deltaq1,0), 1 +++ vclamps HX(deltaq1,0), HX(deltaq1,0), HX(tc2,0) +++ +++ vadds HX(P0,0), HX(P0,0), HX(delta0,0) IFN +++ vsubs HX(Q0,0), HX(Q0,0), HX(delta0,0) IFN +++ +++ vmov -,HX(ptest,0) IFN SETF # Negative if need to do p1 +++ vadds HX(P1,0), HX(P1,0), HX(deltap1,0) IFN +++ +++ vmov -,HX(deltatest,0) SETF +++ vmov -,HX(qtest,0) IFN SETF # Negative if need to do q1 +++ vadds HX(Q1,0), HX(Q1,0), HX(deltaq1,0) IFN +++ +++ #vmov HX(P2,0),1 IFN +++ +++filtering_done: +++ b lr ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 0121fca..05b2169 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -147,7 +147,7 @@ static int gpu_init(volatile struct GPU **gpu) { ++ vcsm_init(); ++ gpu_malloc_uncached_internal(sizeof(struct GPU), &gpu_mem_ptr, mb); ++ ptr = (volatile struct GPU*)gpu_mem_ptr.arm; ++- memset(ptr, 0, sizeof *ptr); +++ memset((void*)ptr, 0, sizeof *ptr); ++ vc = gpu_mem_ptr.vc; ++ ++ ptr->mb = mb; ++@@ -254,7 +254,7 @@ void gpu_cache_flush(GPU_MEM_PTR_T *p) ++ struct vcsm_user_clean_invalid_s iocache = {}; ++ iocache.s[0].handle = p->vcsm_handle; ++ iocache.s[0].cmd = 3; // clean+invalidate ++- iocache.s[0].addr = p->arm; +++ iocache.s[0].addr = (int) p->arm; ++ iocache.s[0].size = p->numbytes; ++ vcsm_clean_invalid( &iocache ); ++ #else ++@@ -390,6 +390,7 @@ static void *vpu_start(void *arg) { ++ #ifdef RPI_TIME_TOTAL_POSTED ++ int last_time=0; ++ long long on_time=0; +++ long long on_time_deblock=0; ++ long long off_time=0; ++ int start_time; ++ int end_time; ++@@ -451,10 +452,13 @@ static void *vpu_start(void *arg) { ++ #ifdef RPI_TIME_TOTAL_POSTED ++ end_time = Microseconds(); ++ last_time = end_time; ++- on_time += end_time - start_time; +++ if (p[6]==2) +++ on_time_deblock += end_time - start_time; +++ else +++ on_time += end_time - start_time; ++ count++; ++ if ((count&0x7f)==0) ++- printf("Posted %d On=%dms, Off=%dms\n",count,(int)(on_time/1000),(int)(off_time/1000)); +++ printf("Posted %d On=%dms, On_deblock=%dms, Off=%dms\n",count,(int)(on_time/1000),(int)(on_time_deblock/1000),(int)(off_time/1000)); ++ #endif ++ pthread_mutex_lock(&post_mutex); ++ vpu_async_head++; ++diff --git a/libavcodec/rpi_shader.c b/libavcodec/rpi_shader.c ++index e86eb30..c5d8b29 100644 ++--- a/libavcodec/rpi_shader.c +++++ b/libavcodec/rpi_shader.c ++@@ -61,7 +61,7 @@ unsigned int rpi_shader[] = { ++ /* [0x00000120] */ 0x8c9e7052, 0x10025e18, // add t0s, r0, r1 ; mov ra_frame_base, r2 ++ /* [0x00000128] */ 0x0c9e7440, 0x10020f27, // add t1s, r2, r1 ++ /* [0x00000130] */ 0x00000008, 0xe00208a7, // mov r2,8 ++-/* [0x00000138] */ 0x11827c80, 0x10021327, // shl rb12,unif, r2 +++/* [0x00000138] */ 0x11827c80, 0x10021327, // shl rb12,unif,r2 ++ /* [0x00000140] */ 0x0c827c80, 0x10021367, // add rb13,unif,r2 ++ /* [0x00000148] */ 0x15827d80, 0x100208a7, // mov r2, unif ++ /* [0x00000150] */ 0x119c15c0, 0xd00208a7, // shl r2, r2, 1 ++-- ++2.7.4 ++ ++ ++From e9c59f0d7b42dfb10d85ab2477f95b44484a8d70 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 1 Jul 2015 09:21:17 +0100 ++Subject: [PATCH 65/68] Added ability to combine jobs ++ ++--- ++ libavcodec/rpi_qpu.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++- ++ 1 file changed, 80 insertions(+), 1 deletion(-) ++ ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 05b2169..91777be 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -8,6 +8,8 @@ ++ #define RPI_TIME_TOTAL_POSTED ++ // define RPI_ASYNC to run the VPU in a separate thread, need to make a separate call to check for completion ++ #define RPI_ASYNC +++// Define RPI_COMBINE_JOBS to find jobs that can be executed in parallel +++#define RPI_COMBINE_JOBS ++ ++ #include <stdio.h> ++ #include <stdlib.h> ++@@ -398,9 +400,15 @@ static void *vpu_start(void *arg) { ++ #endif ++ while(1) { ++ int i; ++- int *p; +++ int *p; // Pointer for a QPU/VPU job +++#ifdef RPI_COMBINE_JOBS +++ int *q = NULL; // Pointer for a VPU only job +++ int have_qpu = 0; +++ int have_vpu = 0; +++#endif ++ int qpu_code; ++ int qpu_codeb; +++ int num_jobs; // Number of jobs available ++ pthread_mutex_lock(&post_mutex); ++ while( vpu_async_tail - vpu_async_head <= 0) ++ { ++@@ -408,13 +416,38 @@ static void *vpu_start(void *arg) { ++ pthread_cond_wait(&post_cond_tail, &post_mutex); ++ } ++ p = vpu_cmds[vpu_async_head%MAXCMDS]; +++ num_jobs = vpu_async_tail - vpu_async_head; ++ pthread_mutex_unlock(&post_mutex); ++ ++ if (p[6] == -1) { ++ break; // Last job ++ } +++ if (p[7] == 0 && p[0] == 0 && p[16]==0) +++ goto job_done_early; +++ +++#ifdef RPI_COMBINE_JOBS +++ // First scan for a qpu job +++ for (int x=0;x<num_jobs;x++) { +++ p = vpu_cmds[(vpu_async_head+x)%MAXCMDS]; +++ if (p[7]) { +++ have_qpu = 1; +++ break; +++ } +++ } +++ // Now scan for a non-qpu job +++ for (int x=0;x<num_jobs;x++) { +++ q = vpu_cmds[(vpu_async_head+x)%MAXCMDS]; +++ if (!q[7]) { +++ have_vpu = 1; +++ break; +++ } +++ } +++ printf("Have_qpu = %d, have_vpu=%d\n",have_qpu,have_vpu); +++#endif ++ qpu_code = p[7]; ++ qpu_codeb = p[16]; +++ +++ ++ //if (p[7]) { ++ //GPU_MEM_PTR_T *buf = (GPU_MEM_PTR_T *)p[7]; ++ //gpu_cache_flush(buf); ++@@ -427,6 +460,40 @@ static void *vpu_start(void *arg) { ++ off_time += start_time-last_time; ++ #endif ++ +++#ifdef RPI_COMBINE_JOBS +++ if (have_qpu) { +++ for(i=0;i<8;i++) { +++ gpu->mail[i*2] = p[8+i]; +++ gpu->mail[i*2 + 1] = qpu_code; +++ } +++ for(i=0;i<12;i++) { +++ gpu->mail2[i*2] = p[17+i]; +++ gpu->mail2[i*2 + 1] = qpu_codeb; +++ } +++ if (have_vpu) { +++ execute_multi(gpu->mb, +++ 12,gpu->vc + offsetof(struct GPU, mail2), 1, 5000, +++ 8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */, +++ p[0], p[1], p[2], p[3], p[4], p[5], p[6], // VPU0 +++ q[0], q[1], q[2], q[3], q[4], q[5], q[6]); // VPU1 +++ q[0] = 0; +++ } else { +++ execute_multi(gpu->mb, +++ 12,gpu->vc + offsetof(struct GPU, mail2), 1, 5000, +++ 8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */, +++ p[0], p[1], p[2], p[3], p[4], p[5], p[6], // VPU0 +++ 0, 0 , 0 , 0 , 0 , 0 , 0); // VPU1 +++ } +++ p[0] = 0; +++ p[7] = 0; +++ p[16] = 0; +++ } else { +++ av_assert0(have_vpu); +++ vpu_execute_code(q[0], q[1], q[2], q[3], q[4], q[5], q[6]); +++ q[0] = 0; +++ } +++#else +++ ++ if (!qpu_code) { ++ vpu_execute_code(p[0], p[1], p[2], p[3], p[4], p[5], p[6]); ++ } else { ++@@ -449,17 +516,29 @@ static void *vpu_start(void *arg) { ++ 0, 0 , 0 , 0 , 0 , 0 , 0); // VPU1 ++ #endif ++ } +++#endif +++ ++ #ifdef RPI_TIME_TOTAL_POSTED ++ end_time = Microseconds(); ++ last_time = end_time; +++#ifdef RPI_COMBINE_JOBS +++ // There are three cases we may wish to distinguish of VPU/QPU activity +++ on_time += end_time - start_time; +++#else ++ if (p[6]==2) ++ on_time_deblock += end_time - start_time; ++ else ++ on_time += end_time - start_time; +++#endif ++ count++; ++ if ((count&0x7f)==0) +++#ifdef RPI_COMBINE_JOBS ++ printf("Posted %d On=%dms, On_deblock=%dms, Off=%dms\n",count,(int)(on_time/1000),(int)(on_time_deblock/1000),(int)(off_time/1000)); +++#else +++ printf("Posted %d On=%dms, Off=%dms\n",count,(int)(on_time/1000),(int)(off_time/1000)); +++#endif ++ #endif +++job_done_early: ++ pthread_mutex_lock(&post_mutex); ++ vpu_async_head++; ++ pthread_cond_broadcast(&post_cond_head); ++-- ++2.7.4 ++ ++ ++From 0d54661f303b2a8903e806648ed54a34dcf315dc Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 1 Jul 2015 12:53:10 +0100 ++Subject: [PATCH 66/68] Added chroma deblocking ++ ++--- ++ libavcodec/hevc.c | 20 ++ ++ libavcodec/hevc.h | 12 +- ++ libavcodec/hevc_filter.c | 92 +++++- ++ libavcodec/rpi_hevc_transform.h | 644 +++++++++++++++++++++++++++++++++++++++- ++ libavcodec/rpi_hevc_transform.s | 207 +++++++++++++ ++ libavcodec/rpi_qpu.c | 27 +- ++ libavcodec/rpi_shader.qasm | 11 + ++ 7 files changed, 988 insertions(+), 25 deletions(-) ++ ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 4ce94a7..8437e10 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -251,6 +251,14 @@ static void pic_arrays_free(HEVCContext *s) ++ gpu_free(&s->y_setup_ptr); ++ s->y_setup_arm = 0; ++ } +++ if (s->uv_setup_arm) { +++ gpu_free(&s->uv_setup_ptr); +++ s->uv_setup_arm = 0; +++ } +++ if (s->vpu_cmds_arm) { +++ gpu_free(&s->vpu_cmds_ptr); +++ s->vpu_cmds_arm = 0; +++ } ++ #endif ++ av_freep(&s->sao); ++ av_freep(&s->deblock); ++@@ -324,6 +332,18 @@ static int pic_arrays_init(HEVCContext *s, const HEVCSPS *sps) ++ s->y_setup_vc = (void*)s->y_setup_ptr.vc; ++ memset(s->y_setup_arm, 0, s->y_setup_ptr.numbytes); ++ printf("Setup %d by %d by %d\n",s->setup_width,s->setup_height,sizeof(*s->y_setup_arm)); +++ +++ s->uv_setup_width = ( (sps->width >> sps->hshift[1]) + 15) / 16; +++ s->uv_setup_height = ( (sps->height >> sps->vshift[1]) + 15) / 16; +++ gpu_malloc_uncached(sizeof(*s->uv_setup_arm) * s->uv_setup_width * s->uv_setup_height, &s->uv_setup_ptr); // TODO make this cached +++ s->uv_setup_arm = (void*)s->uv_setup_ptr.arm; +++ s->uv_setup_vc = (void*)s->uv_setup_ptr.vc; +++ memset(s->uv_setup_arm, 0, s->uv_setup_ptr.numbytes); +++ printf("Setup uv %d by %d by %d\n",s->uv_setup_width,s->uv_setup_height,sizeof(*s->uv_setup_arm)); +++ +++ gpu_malloc_uncached(sizeof(*s->vpu_cmds_arm) * 3,&s->vpu_cmds_ptr); +++ s->vpu_cmds_arm = (void*) s->vpu_cmds_ptr.arm; +++ s->vpu_cmds_vc = s->vpu_cmds_ptr.vc; ++ #endif ++ ++ s->bs_width = (width >> 2) + 1; ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index cf08489..7eb37e6 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -56,7 +56,7 @@ ++ #define RPI_MAX_JOBS 2 ++ // Define RPI_WORKER to launch a worker thread for pixel processing tasks ++ #define RPI_WORKER ++- +++ // Define RPI_DEBLOCK_VPU to perform deblocking on the VPUs ++ #define RPI_DEBLOCK_VPU ++ ++ #endif ++@@ -980,6 +980,16 @@ typedef struct HEVCContext { ++ uint8_t (*y_setup_vc)[2][2][2][4]; ++ int setup_width; // Number of 16x16 blocks across the image ++ int setup_height; // Number of 16x16 blocks down the image +++ +++ GPU_MEM_PTR_T uv_setup_ptr; +++ uint8_t (*uv_setup_arm)[2][2][2][4]; // Half of this is unused [][][1][], but easier for the VPU as it allows us to store with zeros and addresses are aligned +++ uint8_t (*uv_setup_vc)[2][2][2][4]; +++ int uv_setup_width; +++ int uv_setup_height; +++ +++ GPU_MEM_PTR_T vpu_cmds_ptr; +++ int (*vpu_cmds_arm)[6]; // r0-r5 for each command +++ int vpu_cmds_vc; ++ #endif ++ ++ #endif ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 06371da..6367068 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -656,9 +656,23 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) ++ s->frame->linesize[chroma], ++ c_tc, no_p, no_q); ++ } else +++#ifdef RPI_DEBLOCK_VPU +++ if (s->enable_rpi_deblock) { +++ uint8_t (*setup)[2][2][4]; +++ int xc = x>>s->ps.sps->hshift[chroma]; +++ int yc = y>>s->ps.sps->vshift[chroma]; +++ int num16 = (yc>>4)*s->uv_setup_width + (xc>>4); +++ int a = ((yc>>3) & 1) << 1; +++ int b = (xc>>3) & 1; +++ setup = s->uv_setup_arm[num16]; +++ setup[0][b][0][a] = c_tc[0]; +++ setup[0][b][0][a + 1] = c_tc[1]; +++ } else +++#endif ++ s->hevcdsp.hevc_v_loop_filter_chroma(src, ++ s->frame->linesize[chroma], ++ c_tc, no_p, no_q); +++ ++ } ++ } ++ ++@@ -689,6 +703,19 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) ++ s->frame->linesize[chroma], ++ c_tc, no_p, no_q); ++ } else +++#ifdef RPI_DEBLOCK_VPU +++ if (s->enable_rpi_deblock) { +++ uint8_t (*setup)[2][2][4]; +++ int xc = x>>s->ps.sps->hshift[chroma]; +++ int yc = y>>s->ps.sps->vshift[chroma]; +++ int num16 = (yc>>4)*s->uv_setup_width + (xc>>4); +++ int a = ((xc>>3) & 1) << 1; +++ int b = (yc>>3) & 1; +++ setup = s->uv_setup_arm[num16]; +++ setup[1][b][0][a] = c_tc[0]; +++ setup[1][b][0][a + 1] = c_tc[1]; +++ } else +++#endif ++ s->hevcdsp.hevc_h_loop_filter_chroma(src, ++ s->frame->linesize[chroma], ++ c_tc, no_p, no_q); ++@@ -1013,33 +1040,56 @@ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ static void rpi_deblock(HEVCContext *s, int y, int ctb_size) ++ { ++ // Flush image, 4 lines above to bottom of ctb stripe ++- ff_hevc_flush_buffer_lines(s, FFMAX(y-4,0), y+ctb_size, 1, 0); +++ ff_hevc_flush_buffer_lines(s, FFMAX(y-4,0), y+ctb_size, 1, 1); ++ // TODO flush buffer of beta/tc setup when it becomes cached +++ +++ // Prepare three commands at once to avoid calling overhead +++ s->vpu_cmds_arm[0][0] = get_vc_address(s->frame->buf[0]) + s->frame->linesize[0] * y; +++ s->vpu_cmds_arm[0][1] = s->frame->linesize[0]; +++ s->vpu_cmds_arm[0][2] = s->setup_width; +++ s->vpu_cmds_arm[0][3] = (int) ( s->y_setup_vc + s->setup_width * (y>>4) ); +++ s->vpu_cmds_arm[0][4] = ctb_size>>4; +++ s->vpu_cmds_arm[0][5] = 2; +++ +++ s->vpu_cmds_arm[1][0] = get_vc_address(s->frame->buf[1]) + s->frame->linesize[1] * (y>> s->ps.sps->vshift[1]); +++ s->vpu_cmds_arm[1][1] = s->frame->linesize[1]; +++ s->vpu_cmds_arm[1][2] = s->uv_setup_width; +++ s->vpu_cmds_arm[1][3] = (int) ( s->uv_setup_vc + s->uv_setup_width * ((y>>4)>> s->ps.sps->vshift[1]) ); +++ s->vpu_cmds_arm[1][4] = (ctb_size>>4)>> s->ps.sps->vshift[1]; +++ s->vpu_cmds_arm[1][5] = 3; +++ +++ s->vpu_cmds_arm[2][0] = get_vc_address(s->frame->buf[2]) + s->frame->linesize[2] * (y>> s->ps.sps->vshift[2]); +++ s->vpu_cmds_arm[2][1] = s->frame->linesize[2]; +++ s->vpu_cmds_arm[2][2] = s->uv_setup_width; +++ s->vpu_cmds_arm[2][3] = (int) ( s->uv_setup_vc + s->uv_setup_width * ((y>>4)>> s->ps.sps->vshift[1]) ); +++ s->vpu_cmds_arm[2][4] = (ctb_size>>4)>> s->ps.sps->vshift[1]; +++ s->vpu_cmds_arm[2][5] = 4; +++ ++ // Call VPU ++- // TODO add this to a separate pipeline of VPU jobs that can be run in parallel and wait for completion ++- vpu_wait(vpu_post_code( vpu_get_fn(), get_vc_address(s->frame->buf[0]) + s->frame->linesize[0] * y, s->frame->linesize[0], ++- s->setup_width, (int) ( s->y_setup_vc + s->setup_width * (y>>4) ), ++- ctb_size>>4, 2, 0)); // 2 means to do the deblocking code +++ vpu_wait(vpu_post_code( vpu_get_fn(), s->vpu_cmds_vc, 3, 0, 0, 0, 5, 0)); // 5 means to do all the commands ++ } ++ ++-static void rpi_deblock2(HEVCContext *s, int y, int ctb_size) ++-{ ++- int y2; ++- for(y2=y;y2<y+ctb_size;y2+=16) { ++- rpi_deblock(s,y2,16); ++- } ++-} ++ #endif ++ ++ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ { ++ int x_end = x >= s->ps.sps->width - ctb_size; +++#ifdef RPI_DEBLOCK_VPU +++ int done_deblock = 0; +++#endif ++ if (s->avctx->skip_loop_filter < AVDISCARD_ALL) ++ deblocking_filter_CTB(s, x, y); ++ #ifdef RPI_DEBLOCK_VPU ++ if (s->enable_rpi_deblock && x_end) ++ { ++- rpi_deblock(s, y, ctb_size); +++ int y_at_end = y >= s->ps.sps->height - ctb_size; +++ int height = 64; // Deblock in units 64 high to avoid too many VPU calls +++ int y_start = y&~63; +++ if (y_at_end) height = s->ps.sps->height - y_start; +++ if ((((y+ctb_size)&63)==0) || y_at_end) { +++ done_deblock = 1; +++ rpi_deblock(s, y_start, height); +++ } ++ } ++ #endif ++ if (s->ps.sps->sao_enabled) { ++@@ -1070,11 +1120,25 @@ void ff_hevc_hls_filter(HEVCContext *s, int x, int y, int ctb_size) ++ //int newh = y + ctb_size - 4; ++ //int currh = s->ref->tf.progress->data[0]; ++ //if (((y + ctb_size)&63)==0) +++#ifdef RPI_DEBLOCK_VPU +++ if (s->enable_rpi_deblock) { +++ // we no longer need to flush the luma buffer as it is in GPU memory when using deblocking on the rpi +++ if (done_deblock) { +++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); +++ } +++ } else { +++#ifdef RPI_INTER_QPU +++ ff_hevc_flush_buffer(s, &s->ref->tf, y + ctb_size - 4); +++#endif +++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); +++ } +++#else ++ #ifdef RPI_INTER_QPU ++ ff_hevc_flush_buffer(s, &s->ref->tf, y + ctb_size - 4); ++- // TODO we no longer need to flush the luma buffer as it is in GPU memory when using deblocking on the rpi +++ // we no longer need to flush the luma buffer as it is in GPU memory when using deblocking on the rpi ++ #endif ++ ff_thread_report_progress(&s->ref->tf, y + ctb_size - 4, 0); +++#endif ++ } ++ } ++ ++diff --git a/libavcodec/rpi_hevc_transform.h b/libavcodec/rpi_hevc_transform.h ++index b3f155f..4309f1c 100644 ++--- a/libavcodec/rpi_hevc_transform.h +++++ b/libavcodec/rpi_hevc_transform.h ++@@ -3,14 +3,32 @@ unsigned char rpi_hevc_transform [] = { ++ 106, ++ 0, ++ 144, ++-38, +++47, ++ 1, ++ 37, ++ 106, ++ 0, ++ 144, ++-57, +++66, ++ 1, +++53, +++106, +++0, +++144, +++192, +++4, +++69, +++106, +++0, +++144, +++192, +++4, +++85, +++106, +++0, +++144, +++220, +++5, ++ 169, ++ 3, ++ 62, ++@@ -2427,4 +2445,626 @@ unsigned char rpi_hevc_transform [] = { ++ 128, ++ 90, ++ 0, +++169, +++3, +++14, +++96, +++4, +++31, +++169, +++3, +++30, +++96, +++1, +++31, +++73, +++64, +++52, +++64, +++45, +++64, +++2, +++64, +++10, +++64, +++64, +++198, +++1, +++7, +++8, +++232, +++63, +++0, +++0, +++0, +++6, +++232, +++253, +++255, +++255, +++255, +++0, +++246, +++0, +++0, +++0, +++4, +++215, +++64, +++3, +++96, +++2, +++248, +++0, +++35, +++0, +++0, +++64, +++56, +++0, +++0, +++4, +++248, +++0, +++36, +++0, +++0, +++64, +++56, +++8, +++0, +++0, +++240, +++64, +++0, +++132, +++3, +++30, +++106, +++132, +++24, +++128, +++240, +++0, +++0, +++132, +++3, +++128, +++144, +++143, +++0, +++131, +++98, +++0, +++255, +++64, +++0, +++0, +++20, +++200, +++243, +++0, +++0, +++128, +++144, +++135, +++0, +++131, +++102, +++0, +++158, +++71, +++0, +++2, +++248, +++0, +++35, +++0, +++0, +++64, +++56, +++0, +++0, +++4, +++248, +++0, +++36, +++0, +++0, +++64, +++56, +++8, +++0, +++0, +++240, +++64, +++0, +++132, +++3, +++30, +++106, +++132, +++24, +++128, +++240, +++0, +++0, +++132, +++3, +++128, +++144, +++112, +++0, +++131, +++98, +++0, +++255, +++64, +++0, +++0, +++20, +++200, +++243, +++0, +++0, +++128, +++144, +++104, +++0, +++131, +++102, +++0, +++248, +++64, +++0, +++112, +++0, +++192, +++243, +++211, +++31, +++30, +++106, +++134, +++24, +++128, +++248, +++0, +++0, +++112, +++0, +++192, +++243, +++211, +++31, +++128, +++144, +++123, +++0, +++188, +++64, +++67, +++232, +++0, +++2, +++0, +++0, +++0, +++255, +++64, +++0, +++0, +++20, +++200, +++243, +++0, +++0, +++128, +++144, +++112, +++0, +++195, +++232, +++0, +++2, +++0, +++0, +++12, +++128, +++7, +++192, +++130, +++248, +++0, +++0, +++112, +++192, +++224, +++16, +++195, +++31, +++132, +++248, +++1, +++0, +++112, +++0, +++224, +++16, +++203, +++31, +++3, +++99, +++131, +++71, +++68, +++232, +++32, +++0, +++0, +++0, +++0, +++99, +++2, +++99, +++23, +++102, +++7, +++106, +++127, +++156, +++178, +++255, +++0, +++248, +++64, +++0, +++112, +++0, +++192, +++243, +++211, +++31, +++30, +++106, +++134, +++24, +++128, +++248, +++0, +++0, +++112, +++0, +++192, +++243, +++211, +++31, +++128, +++144, +++72, +++0, +++188, +++64, +++67, +++232, +++0, +++2, +++0, +++0, +++0, +++255, +++64, +++0, +++0, +++20, +++200, +++243, +++0, +++0, +++128, +++144, +++61, +++0, +++195, +++232, +++0, +++2, +++0, +++0, +++12, +++128, +++7, +++192, +++130, +++248, +++0, +++0, +++112, +++192, +++224, +++16, +++195, +++31, +++132, +++248, +++1, +++0, +++112, +++0, +++224, +++16, +++203, +++31, +++25, +++102, +++9, +++106, +++2, +++30, +++41, +++3, +++26, +++87, +++162, +++64, +++64, +++198, +++1, +++23, +++127, +++158, +++95, +++255, +++239, +++3, +++0, +++254, +++128, +++143, +++94, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++192, +++143, +++95, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++192, +++142, +++208, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++128, +++142, +++209, +++0, +++0, +++240, +++12, +++0, +++128, +++144, +++47, +++0, +++8, +++255, +++227, +++23, +++0, +++244, +++192, +++51, +++0, +++0, +++8, +++255, +++35, +++52, +++0, +++180, +++192, +++51, +++0, +++0, +++111, +++3, +++239, +++3, +++0, +++254, +++128, +++143, +++14, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++192, +++143, +++15, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++192, +++142, +++16, +++0, +++0, +++240, +++12, +++0, +++0, +++254, +++128, +++142, +++17, +++0, +++0, +++240, +++12, +++0, +++128, +++144, +++13, +++0, +++8, +++255, +++227, +++3, +++0, +++244, +++192, +++51, +++0, +++0, +++8, +++255, +++35, +++4, +++0, +++180, +++192, +++51, +++0, +++0, +++111, +++3, +++32, +++246, +++192, +++11, +++1, +++16, +++32, +++246, +++2, +++140, +++47, +++240, +++32, +++247, +++35, +++141, +++63, +++178, +++64, +++254, +++35, +++141, +++2, +++68, +++192, +++243, +++128, +++11, +++32, +++255, +++35, +++240, +++58, +++226, +++192, +++243, +++188, +++10, +++0, +++254, +++0, +++141, +++4, +++4, +++0, +++240, +++128, +++10, +++88, +++246, +++35, +++141, +++3, +++68, +++240, +++246, +++35, +++141, +++48, +++66, +++0, +++247, +++227, +++143, +++52, +++242, +++32, +++247, +++227, +++142, +++52, +++178, +++90, +++0, +++161, +++3, +++6, +++64, +++23, +++64, +++96, +++8, +++70, +++98, +++97, +++8, +++70, +++98, +++98, +++8, +++70, +++98, +++99, +++8, +++70, +++98, +++100, +++8, +++70, +++98, +++101, +++8, +++70, +++98, +++255, +++159, +++8, +++250, +++23, +++102, +++7, +++106, +++112, +++30, +++33, +++3, ++ }; ++diff --git a/libavcodec/rpi_hevc_transform.s b/libavcodec/rpi_hevc_transform.s ++index b055208..5543093 100644 ++--- a/libavcodec/rpi_hevc_transform.s +++++ b/libavcodec/rpi_hevc_transform.s ++@@ -85,6 +85,13 @@ hevc_trans_16x16: ++ beq memclear16 ++ cmp r5,2 ++ beq hevc_deblock_16x16 +++ cmp r5,3 +++ beq hevc_uv_deblock_16x16 +++ cmp r5,4 +++ beq hevc_uv_deblock_16x16_with_clear +++ cmp r5,5 +++ beq hevc_run_command_list +++ ++ push r6-r15, lr # TODO cut down number of used registers ++ mov r14,r3 # coeffs32 ++ mov r15,r4 # num32 ++@@ -708,3 +715,203 @@ normal_filtering: ++ ++ filtering_done: ++ b lr +++ +++ +++hevc_uv_deblock_16x16: +++ push r6-r15, lr +++ mov r14,0 +++ b hevc_uv_start +++hevc_uv_deblock_16x16_with_clear: +++ push r6-r15, lr +++ mov r14,1 +++ b hevc_uv_start +++ +++hevc_uv_start: +++ mov r9,r4 +++ mov r4,r3 +++ mov r13,r2 +++ mov r2,r0 +++ mov r10,r0 +++ subscale4 r0,r1 +++ mov r8,63 +++ mov r6,-3 +++ vmov H(zeros,0),0 +++# r7 is number of blocks still to load +++# r0 is location of current block - 4 * stride +++# r1 is stride +++# r2 is location of current block +++# r3 is offset of start of block (actual edges start at H(16,16)+r3 for horizontal and H(16,0)+r3 for vertical +++# r4 is setup +++# r5 is for temporary calculations +++# r8 holds 63 +++# r6 holds -3 +++# r9 holds the number of 16 high rows to process +++# r10 holds the original img base +++# r11 returns 0 if no filtering was done on the edge +++# r12 saves a copy of this +++# r13 is copy of width +++# r14 is 1 if we should clear the old contents, or 0 if not +++ +++uv_process_row: +++ # First iteration does not do horizontal filtering on previous +++ mov r7, r13 +++ mov r3,0 +++ vldb H(12++,16)+r3,(r0 += r1) REP 4 # Load the current block +++ vldb H(16++,16)+r3,(r2 += r1) REP 16 +++ vldb H(setup_input,0), (r4) # We may wish to prefetch these +++ cmp r14,1 +++ bne uv_skip0 +++ vstb H(zeros,0),(r4) +++uv_skip0: +++ bl uv_vert_filter +++ add r3,8 +++ vadd H(setup_input,0),H(setup_input,8),0 # Rotate to second set of 8 +++ bl uv_vert_filter +++ sub r3,8 +++ b uv_start_deblock_loop +++uv_deblock_loop: +++ # Middle iterations do vertical on current block and horizontal on preceding +++ vldb H(12++,16)+r3,(r0 += r1) REP 4 # load the current block +++ vldb H(16++,16)+r3,(r2 += r1) REP 16 +++ vldb H(setup_input,0), (r4) +++ cmp r14,1 +++ bne uv_skip1 +++ vstb H(zeros,0),(r4) +++uv_skip1: +++ bl uv_vert_filter +++ add r3,8 +++ vadd H(setup_input,0),H(setup_input,8),0 +++ bl uv_vert_filter +++ sub r3,8 +++ vldb H(setup_input,0), -16(r4) +++ cmp r14,1 +++ bne uv_skip3 +++ vstb H(zeros,0),-16(r4) +++uv_skip3: +++ bl uv_horz_filter +++ mov r12,r11 +++ add r3,8*64 +++ vadd H(setup_input,0),H(setup_input,8),0 +++ bl uv_horz_filter +++ sub r3,8*64 +++ addcmpbeq r12,0,0,uv_skip_save_top +++ vstb H(12++,0)+r3,-16(r0 += r1) REP 4 # Save the deblocked pixels for the previous block +++uv_skip_save_top: +++ vstb H(16++,0)+r3,-16(r2 += r1) REP 16 +++uv_start_deblock_loop: +++ # move onto next 16x16 (could do this with circular buffer support instead) +++ add r3,16 +++ and r3,r8 +++ add r4,32 +++ # Perform loop counter operations (may work with an addcmpbgt as well?) +++ add r0,16 +++ add r2,16 +++ sub r7,1 +++ cmp r7,0 # Are there still more blocks to load +++ bgt uv_deblock_loop +++ +++ # Final iteration needs to just do horizontal filtering +++ vldb H(setup_input,0), -16(r4) +++ cmp r14,1 +++ bne uv_skip2 +++ vstb H(zeros,0),-16(r4) +++uv_skip2: +++ bl uv_horz_filter +++ mov r12,r11 +++ add r3,8*64 +++ vadd H(setup_input,0),H(setup_input,8),0 +++ bl uv_horz_filter +++ sub r3,64*8 +++ addcmpbeq r12,0,0,uv_skip_save_top2 +++ vstb H(12++,0)+r3,-16(r0 += r1) REP 4 # Save the deblocked pixels for the previous block +++uv_skip_save_top2: +++ vstb H(16++,0)+r3,-16(r2 += r1) REP 16 +++ +++# Now look to see if we should do another row +++ sub r9,1 +++ cmp r9,0 +++ bgt uv_start_again +++ pop r6-r15, pc +++uv_start_again: +++ # Need to sort out r0,r2 to point to next row down +++ addscale16 r10,r1 +++ mov r2,r10 +++ subscale4 r0,r2,r1 +++ b uv_process_row +++ +++ +++# At this stage H(16,16)+r3 points to the first pixel of the 16 high edge to be filtered +++# So we can reuse the code we move the parts to be filtered into HX(P0/P1/P2/P3/Q0/Q1/Q2/Q3,0) - we will perform a final saturation step on placing them back into the correct locations +++ +++uv_vert_filter: +++ push lr +++ +++ vmov HX(P1,0), V(16,14)+r3 +++ vmov HX(P0,0), V(16,15)+r3 +++ vmov HX(Q0,0), V(16,16)+r3 +++ vmov HX(Q1,0), V(16,17)+r3 +++ +++ bl do_chroma_filter +++ +++ vadds V(16,15)+r3, HX(P0,0), 0 +++ vadds V(16,16)+r3, HX(Q0,0), 0 +++ +++ pop pc +++ +++# Filter edge at H(16,0)+r3 +++uv_horz_filter: +++ push lr +++ +++ vmov HX(P1,0), H(14,0)+r3 +++ vmov HX(P0,0), H(15,0)+r3 +++ vmov HX(Q0,0), H(16,0)+r3 +++ vmov HX(Q1,0), H(17,0)+r3 +++ +++ bl do_chroma_filter +++ +++ vadds H(15,0)+r3, HX(P0,0), 0 +++ # P3 and Q3 never change so don't bother saving back +++ vadds H(16,0)+r3, HX(Q0,0), 0 +++ +++ pop pc +++ +++# r4 points to array of beta/tc for each 4 length edge +++do_chroma_filter: +++ valtl H(setup,0),H(setup_input,0),H(setup_input,0) # tc*8 +++ valtl HX(tc,0),H(setup,0),H(setup,0) +++ +++ vsub HX(delta,0),HX(Q0,0),HX(P0,0) +++ vshl HX(delta,0),HX(delta,0),2 CLRA SACC +++ vsub -,HX(P1,0),HX(Q1,0) SACC +++ vmov HX(delta,0),4 SACC +++ vasr HX(delta,0),HX(delta,0),3 +++ vclamps HX(delta,0), HX(delta,0), HX(tc,0) +++ vadd HX(P0,0),HX(P0,0),HX(delta,0) +++ vsub HX(Q0,0),HX(Q0,0),HX(delta,0) +++ b lr +++ +++# r0 = list +++# r1 = number +++hevc_run_command_list: +++ push r6-r7, lr +++ mov r6, r0 +++ mov r7, r1 +++loop_cmds: +++ ld r0,(r6) # How to encode r6++? +++ add r6,4 +++ ld r1,(r6) +++ add r6,4 +++ ld r2,(r6) +++ add r6,4 +++ ld r3,(r6) +++ add r6,4 +++ ld r4,(r6) +++ add r6,4 +++ ld r5,(r6) +++ add r6,4 +++ bl hevc_trans_16x16 +++ sub r7,1 +++ cmp r7,0 +++ bgt loop_cmds +++ +++ pop r6-r7, pc ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 91777be..5aa0432 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -397,6 +397,8 @@ static void *vpu_start(void *arg) { ++ int start_time; ++ int end_time; ++ int count=0; +++ int count_deblock=0; +++ int count_qpu=0; ++ #endif ++ while(1) { ++ int i; ++@@ -442,7 +444,7 @@ static void *vpu_start(void *arg) { ++ break; ++ } ++ } ++- printf("Have_qpu = %d, have_vpu=%d\n",have_qpu,have_vpu); +++ //printf("Have_qpu = %d, have_vpu=%d\n",have_qpu,have_vpu); ++ #endif ++ qpu_code = p[7]; ++ qpu_codeb = p[16]; ++@@ -460,6 +462,12 @@ static void *vpu_start(void *arg) { ++ off_time += start_time-last_time; ++ #endif ++ +++#define NO_FLUSH 1 +++#define CLEAR_PROFILE 2 +++#define OUTPUT_COUNTS 4 +++ +++#define FLAGS_FOR_PROFILING (NO_FLUSH) +++ ++ #ifdef RPI_COMBINE_JOBS ++ if (have_qpu) { ++ for(i=0;i<8;i++) { ++@@ -472,14 +480,14 @@ static void *vpu_start(void *arg) { ++ } ++ if (have_vpu) { ++ execute_multi(gpu->mb, ++- 12,gpu->vc + offsetof(struct GPU, mail2), 1, 5000, +++ 12,gpu->vc + offsetof(struct GPU, mail2), FLAGS_FOR_PROFILING, 5000, ++ 8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */, ++ p[0], p[1], p[2], p[3], p[4], p[5], p[6], // VPU0 ++ q[0], q[1], q[2], q[3], q[4], q[5], q[6]); // VPU1 ++ q[0] = 0; ++ } else { ++ execute_multi(gpu->mb, ++- 12,gpu->vc + offsetof(struct GPU, mail2), 1, 5000, +++ 12,gpu->vc + offsetof(struct GPU, mail2), FLAGS_FOR_PROFILING, 5000, ++ 8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */, ++ p[0], p[1], p[2], p[3], p[4], p[5], p[6], // VPU0 ++ 0, 0 , 0 , 0 , 0 , 0 , 0); // VPU1 ++@@ -510,7 +518,7 @@ static void *vpu_start(void *arg) { ++ execute_qpu(gpu->mb,8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */); ++ #else ++ execute_multi(gpu->mb, ++- 12,gpu->vc + offsetof(struct GPU, mail2), 1, 5000, +++ 12,gpu->vc + offsetof(struct GPU, mail2), FLAGS_FOR_PROFILING , 5000, ++ 8,gpu->vc + offsetof(struct GPU, mail), 1 /* no flush */, 5000 /* timeout ms */, ++ p[0], p[1], p[2], p[3], p[4], p[5], p[6], // VPU0 ++ 0, 0 , 0 , 0 , 0 , 0 , 0); // VPU1 ++@@ -525,17 +533,20 @@ static void *vpu_start(void *arg) { ++ // There are three cases we may wish to distinguish of VPU/QPU activity ++ on_time += end_time - start_time; ++ #else ++- if (p[6]==2) +++ if (p[6]>1) { +++ count_deblock++; ++ on_time_deblock += end_time - start_time; ++- else +++ } else { ++ on_time += end_time - start_time; +++ count_qpu++; +++ } ++ #endif ++ count++; ++ if ((count&0x7f)==0) ++ #ifdef RPI_COMBINE_JOBS ++- printf("Posted %d On=%dms, On_deblock=%dms, Off=%dms\n",count,(int)(on_time/1000),(int)(on_time_deblock/1000),(int)(off_time/1000)); ++-#else ++ printf("Posted %d On=%dms, Off=%dms\n",count,(int)(on_time/1000),(int)(off_time/1000)); +++#else +++ printf("Posted %d On=%dms (%d calls), On_deblock=%dms (%d calls), Off=%dms\n",count,(int)(on_time/1000),count_qpu,(int)(on_time_deblock/1000),count_deblock,(int)(off_time/1000)); ++ #endif ++ #endif ++ job_done_early: ++diff --git a/libavcodec/rpi_shader.qasm b/libavcodec/rpi_shader.qasm ++index 0686249..64bf5b0 100644 ++--- a/libavcodec/rpi_shader.qasm +++++ b/libavcodec/rpi_shader.qasm ++@@ -1077,6 +1077,17 @@ nop ; nop # delay slot 2 ++ ::mc_interrupt_exit12 ++ mov -, vw_wait # wait on the VDW ++ +++ # Dummy wait to test instructions +++# mov r3,1000000 +++#:dummy_loop +++# sub.setf r3, r3, 1 +++# nop +++# nop +++# brr.anynn -, r:dummy_loop +++# nop +++# nop +++# nop +++ ++ ldtmu0 ++ ldtmu0 ++ ldtmu1 ++-- ++2.7.4 ++ ++ ++From 12a194bddd049ab97154e9fbdd46b63b558a3bee Mon Sep 17 00:00:00 2001 ++From: Ben Avison <bavison@riscosopen.org> ++Date: Tue, 23 Jun 2015 23:42:03 +0100 ++Subject: [PATCH 67/68] armv7/hevc: Optimise deblocking boundary strength ++ calculation ++ ++--- ++ libavcodec/arm/hevcdsp_deblock_neon.S | 115 +++++++++++++++++ ++ libavcodec/arm/hevcdsp_init_neon.c | 9 ++ ++ libavcodec/hevc.h | 11 -- ++ libavcodec/hevc_filter.c | 224 ++++++++++++++-------------------- ++ libavcodec/hevcdsp.c | 116 ++++++++++++++++++ ++ libavcodec/hevcdsp.h | 14 +++ ++ 6 files changed, 344 insertions(+), 145 deletions(-) ++ ++diff --git a/libavcodec/arm/hevcdsp_deblock_neon.S b/libavcodec/arm/hevcdsp_deblock_neon.S ++index 166bddb..bad4589 100644 ++--- a/libavcodec/arm/hevcdsp_deblock_neon.S +++++ b/libavcodec/arm/hevcdsp_deblock_neon.S ++@@ -383,3 +383,118 @@ function ff_hevc_h_loop_filter_chroma_neon, export=1 ++ vst1.8 {d4}, [r0] ++ bx lr ++ endfunc +++ +++/* ff_hevc_deblocking_boundary_strengths_neon(int pus, int dup, int in_inc, int out_inc, +++ * int *curr_rpl0, int *curr_rpl1, int *neigh_rpl0, int *neigh_rpl1, +++ * MvField *curr, MvField *neigh, uint8_t *bs) +++ */ +++function ff_hevc_deblocking_boundary_strengths_neon, export=1 +++ add ip, sp, #4*4 +++ push {a2-a4,v1-v8,lr} +++ ldmia ip, {v5-v7} +++1: ldmdb ip, {v1-v4} +++ ldrsb a3, [v5, #8] @ curr->ref_idx +++ ldrsb v8, [v5, #9] +++ ldrsb ip, [v6, #8] @ neigh->ref_idx +++ ldrsb lr, [v6, #9] +++ ldr v1, [v1, a3, lsl #2] +++ ldrb a3, [v5, #10] @ curr->pred_flag +++ ldr v2, [v2, v8, lsl #2] +++ ldrb v8, [v6, #10] @ neigh->pred_flag +++ ldr v3, [v3, ip, lsl #2] +++ ldr v4, [v4, lr, lsl #2] +++ teq a3, #3 +++ beq 20f +++ teq v8, #3 +++ beq 90f +++ +++ tst a3, #1 +++ ldrne a3, [v5, #0] @ curr->mv[0] +++ ldreq a3, [v5, #4] @ curr->mv[1] +++ moveq v1, v2 +++ tst v8, #1 +++ ldrne v8, [v6, #0] @ neigh->mv[0] +++ ldreq v8, [v6, #4] @ neigh->mv[1] +++ moveq v3, v4 +++ teq v1, v3 +++ bne 10f +++ ldr lr, =0xFFFCFFFC +++ ssub16 ip, v8, a3 +++ ssub16 a3, a3, v8 +++ sel a3, a3, ip +++ ands a3, a3, lr +++ @ drop through +++10: movne a3, #1 +++11: subs a2, a2, #1 +++12: strbhs a3, [v7], a4 +++ subs a2, a2, #1 +++ bhs 12b +++ +++ ldm sp, {a2, a3} +++ add ip, sp, #16*4 +++ subs a1, a1, #1 +++ add v5, v5, a3 +++ add v6, v6, a3 +++ bhi 1b +++ pop {a2-a4,v1-v8,pc} +++ +++20: teq v8, #3 +++ bne 10b +++ +++ teq v1, v3 +++ teqeq v2, v4 +++ bne 40f +++ teq v1, v2 +++ bne 30f +++ +++ ldrd v1, v2, [v5] @ curr->mv +++ ldrd v3, v4, [v6] @ neigh->mv +++ ldr lr, =0xFFFCFFFC +++ ssub16 ip, v3, v1 +++ ssub16 a3, v1, v3 +++ sel a3, a3, ip +++ ands a3, a3, lr +++ bne 25f +++ ssub16 ip, v4, v2 +++ ssub16 a3, v2, v4 +++ sel a3, a3, ip +++ ands a3, a3, lr +++ beq 11b +++ @ drop through +++25: ssub16 ip, v4, v1 +++ ssub16 a3, v1, v4 +++ sel a3, a3, ip +++ ands a3, a3, lr +++ bne 10b +++ ssub16 ip, v3, v2 +++ ssub16 a3, v2, v3 +++ sel a3, a3, ip +++ ands a3, a3, lr +++ b 10b +++ +++30: ldrd v1, v2, [v5] @ curr->mv +++ ldrd v3, v4, [v6] @ neigh->mv +++ ldr lr, =0xFFFCFFFC +++ ssub16 ip, v3, v1 +++ ssub16 a3, v1, v3 +++ sel a3, a3, ip +++ ands a3, a3, lr +++ bne 10b +++ ssub16 ip, v4, v2 +++ ssub16 a3, v2, v4 +++ sel a3, a3, ip +++ ands a3, a3, lr +++ b 10b +++ +++40: teq v1, v4 +++ teqeq v2, v3 +++ bne 10b +++ +++ ldrd v1, v2, [v5] @ curr->mv +++ ldrd v3, v4, [v6] @ neigh->mv +++ ldr lr, =0xFFFCFFFC +++ b 25b +++ +++90: mov a3, #1 +++ b 11b +++endfunc ++diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c ++index e5da7e9..49c70dd 100644 ++--- a/libavcodec/arm/hevcdsp_init_neon.c +++++ b/libavcodec/arm/hevcdsp_init_neon.c ++@@ -290,6 +290,10 @@ static void ff_hevc_sao_edge_neon_wrapper(uint8_t *_dst /* align 16 */, uint8_t ++ } ++ #undef CMP ++ +++void ff_hevc_deblocking_boundary_strengths_neon(int pus, int dup, int in_inc, int out_inc, +++ int *curr_rpl0, int *curr_rpl1, int *neigh_rpl0, int *neigh_rpl1, +++ MvField *curr, MvField *neigh, uint8_t *bs); +++ ++ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) ++ { ++ if (bit_depth == 8) { ++@@ -387,4 +391,9 @@ av_cold void ff_hevcdsp_init_neon(HEVCDSPContext *c, const int bit_depth) ++ c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_qpel_uw_pixels_w48_neon_8; ++ c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_qpel_uw_pixels_w64_neon_8; ++ } +++ +++ assert(offsetof(MvField, mv) == 0); +++ assert(offsetof(MvField, ref_idx) == 8); +++ assert(offsetof(MvField, pred_flag) == 10); +++ c->hevc_deblocking_boundary_strengths = ff_hevc_deblocking_boundary_strengths_neon; ++ } ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 7eb37e6..496c0e1 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -684,17 +684,6 @@ typedef struct CodingUnit { ++ uint8_t cu_transquant_bypass_flag; ++ } CodingUnit; ++ ++-typedef struct Mv { ++- int16_t x; ///< horizontal component of motion vector ++- int16_t y; ///< vertical component of motion vector ++-} Mv; ++- ++-typedef struct MvField { ++- DECLARE_ALIGNED(4, Mv, mv)[2]; ++- int8_t ref_idx[2]; ++- int8_t pred_flag; ++-} MvField; ++- ++ typedef struct NeighbourAvailable { ++ int cand_bottom_left; ++ int cand_left; ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 6367068..826a82f 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -726,69 +726,6 @@ static void deblocking_filter_CTB(HEVCContext *s, int x0, int y0) ++ } ++ } ++ ++-static int boundary_strength(HEVCContext *s, MvField *curr, MvField *neigh, ++- RefPicList *neigh_refPicList) ++-{ ++- if (curr->pred_flag == PF_BI && neigh->pred_flag == PF_BI) { ++- // same L0 and L1 ++- if (s->ref->refPicList[0].list[curr->ref_idx[0]] == neigh_refPicList[0].list[neigh->ref_idx[0]] && ++- s->ref->refPicList[0].list[curr->ref_idx[0]] == s->ref->refPicList[1].list[curr->ref_idx[1]] && ++- neigh_refPicList[0].list[neigh->ref_idx[0]] == neigh_refPicList[1].list[neigh->ref_idx[1]]) { ++- if ((FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 || ++- FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4) && ++- (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 || ++- FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4)) ++- return 1; ++- else ++- return 0; ++- } else if (neigh_refPicList[0].list[neigh->ref_idx[0]] == s->ref->refPicList[0].list[curr->ref_idx[0]] && ++- neigh_refPicList[1].list[neigh->ref_idx[1]] == s->ref->refPicList[1].list[curr->ref_idx[1]]) { ++- if (FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 || ++- FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4) ++- return 1; ++- else ++- return 0; ++- } else if (neigh_refPicList[1].list[neigh->ref_idx[1]] == s->ref->refPicList[0].list[curr->ref_idx[0]] && ++- neigh_refPicList[0].list[neigh->ref_idx[0]] == s->ref->refPicList[1].list[curr->ref_idx[1]]) { ++- if (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 || ++- FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4) ++- return 1; ++- else ++- return 0; ++- } else { ++- return 1; ++- } ++- } else if ((curr->pred_flag != PF_BI) && (neigh->pred_flag != PF_BI)){ // 1 MV ++- Mv A, B; ++- int ref_A, ref_B; ++- ++- if (curr->pred_flag & 1) { ++- A = curr->mv[0]; ++- ref_A = s->ref->refPicList[0].list[curr->ref_idx[0]]; ++- } else { ++- A = curr->mv[1]; ++- ref_A = s->ref->refPicList[1].list[curr->ref_idx[1]]; ++- } ++- ++- if (neigh->pred_flag & 1) { ++- B = neigh->mv[0]; ++- ref_B = neigh_refPicList[0].list[neigh->ref_idx[0]]; ++- } else { ++- B = neigh->mv[1]; ++- ref_B = neigh_refPicList[1].list[neigh->ref_idx[1]]; ++- } ++- ++- if (ref_A == ref_B) { ++- if (FFABS(A.x - B.x) >= 4 || FFABS(A.y - B.y) >= 4) ++- return 1; ++- else ++- return 0; ++- } else ++- return 1; ++- } ++- ++- return 1; ++-} ++ ++ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, ++ int log2_trafo_size) ++@@ -799,10 +736,17 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, ++ int log2_min_tu_size = s->ps.sps->log2_min_tb_size; ++ int min_pu_width = s->ps.sps->min_pu_width; ++ int min_tu_width = s->ps.sps->min_tb_width; ++- int is_intra = tab_mvf[(y0 >> log2_min_pu_size) * min_pu_width + ++- (x0 >> log2_min_pu_size)].pred_flag == PF_INTRA; ++ int boundary_upper, boundary_left; ++- int i, j, bs; +++ int i, j; +++ RefPicList *rpl = s->ref->refPicList; +++ int min_pu_in_4pix = (1 << log2_min_pu_size) >> 2; +++ int trafo_in_min_pus = (1 << log2_trafo_size) >> log2_min_pu_size; +++ int y_pu = y0 >> log2_min_pu_size; +++ int x_pu = x0 >> log2_min_pu_size; +++ MvField *curr = &tab_mvf[y_pu * min_pu_width + x_pu]; +++ int is_intra = curr->pred_flag == PF_INTRA; +++ int inc = log2_min_pu_size == 2 ? 2 : 1; +++ uint8_t *bs; ++ ++ #ifdef DISABLE_STRENGTHS ++ return; ++@@ -818,34 +762,56 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, ++ (y0 % (1 << s->ps.sps->log2_ctb_size)) == 0))) ++ boundary_upper = 0; ++ +++ bs = &s->horizontal_bs[(x0 + y0 * s->bs_width) >> 2]; +++ ++ if (boundary_upper) { ++ RefPicList *rpl_top = (lc->boundary_flags & BOUNDARY_UPPER_SLICE) ? ++ ff_hevc_get_ref_list(s, s->ref, x0, y0 - 1) : ++- s->ref->refPicList; ++- int yp_pu = (y0 - 1) >> log2_min_pu_size; ++- int yq_pu = y0 >> log2_min_pu_size; ++- int yp_tu = (y0 - 1) >> log2_min_tu_size; ++- int yq_tu = y0 >> log2_min_tu_size; +++ rpl; +++ MvField *top = curr - min_pu_width; +++ +++ if (is_intra) { +++ for (i = 0; i < (1 << log2_trafo_size); i += 4) +++ bs[i >> 2] = 2; +++ +++ } else { +++ int y_tu = y0 >> log2_min_tu_size; +++ int x_tu = x0 >> log2_min_tu_size; +++ uint8_t *curr_cbf_luma = &s->cbf_luma[y_tu * min_tu_width + x_tu]; +++ uint8_t *top_cbf_luma = curr_cbf_luma - min_tu_width; +++ +++ s->hevcdsp.hevc_deblocking_boundary_strengths(trafo_in_min_pus, +++ min_pu_in_4pix, sizeof (MvField), 4 >> 2, +++ rpl[0].list, rpl[1].list, rpl_top[0].list, rpl_top[1].list, +++ curr, top, bs); ++ ++ for (i = 0; i < (1 << log2_trafo_size); i += 4) { ++- int x_pu = (x0 + i) >> log2_min_pu_size; ++- int x_tu = (x0 + i) >> log2_min_tu_size; ++- MvField *top = &tab_mvf[yp_pu * min_pu_width + x_pu]; ++- MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu]; ++- uint8_t top_cbf_luma = s->cbf_luma[yp_tu * min_tu_width + x_tu]; ++- uint8_t curr_cbf_luma = s->cbf_luma[yq_tu * min_tu_width + x_tu]; ++- ++- if (curr->pred_flag == PF_INTRA || top->pred_flag == PF_INTRA) ++- bs = 2; ++- else if (curr_cbf_luma || top_cbf_luma) ++- bs = 1; ++- else ++- bs = boundary_strength(s, curr, top, rpl_top); ++- s->horizontal_bs[((x0 + i) + y0 * s->bs_width) >> 2] = bs; +++ int i_pu = i >> log2_min_pu_size; +++ int i_tu = i >> log2_min_tu_size; +++ +++ if (top[i_pu].pred_flag == PF_INTRA) +++ bs[i >> 2] = 2; +++ else if (curr_cbf_luma[i_tu] || top_cbf_luma[i_tu]) +++ bs[i >> 2] = 1; ++ } +++ } +++ } +++ +++ if (!is_intra) { +++ for (j = inc; j < trafo_in_min_pus; j += inc) { +++ MvField *top; +++ +++ curr += min_pu_width * inc; +++ top = curr - min_pu_width; +++ bs += s->bs_width * inc << log2_min_pu_size >> 2; +++ +++ s->hevcdsp.hevc_deblocking_boundary_strengths(trafo_in_min_pus, +++ min_pu_in_4pix, sizeof (MvField), 4 >> 2, +++ rpl[0].list, rpl[1].list, rpl[0].list, rpl[1].list, +++ curr, top, bs); +++ } ++ } ++ ++- // bs for vertical TU boundaries ++ boundary_left = x0 > 0 && !(x0 & 7); ++ if (boundary_left && ++ ((!s->sh.slice_loop_filter_across_slices_enabled_flag && ++@@ -856,64 +822,54 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, ++ (x0 % (1 << s->ps.sps->log2_ctb_size)) == 0))) ++ boundary_left = 0; ++ +++ curr = &tab_mvf[y_pu * min_pu_width + x_pu]; +++ bs = &s->vertical_bs[(x0 + y0 * s->bs_width) >> 2]; +++ ++ if (boundary_left) { ++ RefPicList *rpl_left = (lc->boundary_flags & BOUNDARY_LEFT_SLICE) ? ++ ff_hevc_get_ref_list(s, s->ref, x0 - 1, y0) : ++- s->ref->refPicList; ++- int xp_pu = (x0 - 1) >> log2_min_pu_size; ++- int xq_pu = x0 >> log2_min_pu_size; ++- int xp_tu = (x0 - 1) >> log2_min_tu_size; ++- int xq_tu = x0 >> log2_min_tu_size; ++- ++- for (i = 0; i < (1 << log2_trafo_size); i += 4) { ++- int y_pu = (y0 + i) >> log2_min_pu_size; ++- int y_tu = (y0 + i) >> log2_min_tu_size; ++- MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu]; ++- MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu]; ++- uint8_t left_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xp_tu]; ++- uint8_t curr_cbf_luma = s->cbf_luma[y_tu * min_tu_width + xq_tu]; ++- ++- if (curr->pred_flag == PF_INTRA || left->pred_flag == PF_INTRA) ++- bs = 2; ++- else if (curr_cbf_luma || left_cbf_luma) ++- bs = 1; ++- else ++- bs = boundary_strength(s, curr, left, rpl_left); ++- s->vertical_bs[(x0 + (y0 + i) * s->bs_width) >> 2] = bs; ++- } ++- } +++ rpl; +++ MvField *left = curr - 1; ++ ++- if (log2_trafo_size > log2_min_pu_size && !is_intra) { ++- RefPicList *rpl = s->ref->refPicList; +++ if (is_intra) { +++ for (j = 0; j < (1 << log2_trafo_size); j += 4) +++ bs[j * s->bs_width >> 2] = 2; ++ ++- // bs for TU internal horizontal PU boundaries ++- for (j = 8; j < (1 << log2_trafo_size); j += 8) { ++- int yp_pu = (y0 + j - 1) >> log2_min_pu_size; ++- int yq_pu = (y0 + j) >> log2_min_pu_size; ++- ++- for (i = 0; i < (1 << log2_trafo_size); i += 4) { ++- int x_pu = (x0 + i) >> log2_min_pu_size; ++- MvField *top = &tab_mvf[yp_pu * min_pu_width + x_pu]; ++- MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu]; ++- ++- bs = boundary_strength(s, curr, top, rpl); ++- s->horizontal_bs[((x0 + i) + (y0 + j) * s->bs_width) >> 2] = bs; +++ } else { +++ int y_tu = y0 >> log2_min_tu_size; +++ int x_tu = x0 >> log2_min_tu_size; +++ uint8_t *curr_cbf_luma = &s->cbf_luma[y_tu * min_tu_width + x_tu]; +++ uint8_t *left_cbf_luma = curr_cbf_luma - 1; +++ +++ s->hevcdsp.hevc_deblocking_boundary_strengths(trafo_in_min_pus, +++ min_pu_in_4pix, min_pu_width * sizeof (MvField), 4 * s->bs_width >> 2, +++ rpl[0].list, rpl[1].list, rpl_left[0].list, rpl_left[1].list, +++ curr, left, bs); +++ +++ for (j = 0; j < (1 << log2_trafo_size); j += 4) { +++ int j_pu = j >> log2_min_pu_size; +++ int j_tu = j >> log2_min_tu_size; +++ +++ if (left[j_pu * min_pu_width].pred_flag == PF_INTRA) +++ bs[j * s->bs_width >> 2] = 2; +++ else if (curr_cbf_luma[j_tu * min_tu_width] || left_cbf_luma[j_tu * min_tu_width]) +++ bs[j * s->bs_width >> 2] = 1; ++ } ++ } +++ } ++ ++- // bs for TU internal vertical PU boundaries ++- for (j = 0; j < (1 << log2_trafo_size); j += 4) { ++- int y_pu = (y0 + j) >> log2_min_pu_size; +++ if (!is_intra) { +++ for (i = inc; i < trafo_in_min_pus; i += inc) { +++ MvField *left; ++ ++- for (i = 8; i < (1 << log2_trafo_size); i += 8) { ++- int xp_pu = (x0 + i - 1) >> log2_min_pu_size; ++- int xq_pu = (x0 + i) >> log2_min_pu_size; ++- MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu]; ++- MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu]; +++ curr += inc; +++ left = curr - 1; +++ bs += inc << log2_min_pu_size >> 2; ++ ++- bs = boundary_strength(s, curr, left, rpl); ++- s->vertical_bs[((x0 + i) + (y0 + j) * s->bs_width) >> 2] = bs; ++- } +++ s->hevcdsp.hevc_deblocking_boundary_strengths(trafo_in_min_pus, +++ min_pu_in_4pix, min_pu_width * sizeof (MvField), 4 * s->bs_width >> 2, +++ rpl[0].list, rpl[1].list, rpl[0].list, rpl[1].list, +++ curr, left, bs); ++ } ++ } ++ } ++diff --git a/libavcodec/hevcdsp.c b/libavcodec/hevcdsp.c ++index 9d773d9..a6534a9 100644 ++--- a/libavcodec/hevcdsp.c +++++ b/libavcodec/hevcdsp.c ++@@ -123,6 +123,120 @@ DECLARE_ALIGNED(16, const int8_t, ff_hevc_qpel_filters[3][16]) = { ++ #include "hevcdsp_template.c" ++ #undef BIT_DEPTH ++ +++static void hevc_deblocking_boundary_strengths(int pus, int dup, int in_inc, int out_inc, +++ int *curr_rpl0, int *curr_rpl1, int *neigh_rpl0, int *neigh_rpl1, +++ MvField *curr, MvField *neigh, uint8_t *bs) +++{ +++ for (; pus > 0; pus--) { +++ int strength, out; +++ int curr_refL0 = curr_rpl0[curr->ref_idx[0]]; +++ int curr_refL1 = curr_rpl1[curr->ref_idx[1]]; +++ int neigh_refL0 = neigh_rpl0[neigh->ref_idx[0]]; +++ int neigh_refL1 = neigh_rpl1[neigh->ref_idx[1]]; +++ +++#if 1 // This more directly matches the original implementation +++ if (curr->pred_flag == PF_BI && neigh->pred_flag == PF_BI) { +++ // same L0 and L1 +++ if (curr_refL0 == neigh_refL0 && +++ curr_refL0 == curr_refL1 && +++ neigh_refL0 == neigh_refL1) { +++ if ((FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 || +++ FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4) && +++ (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 || +++ FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4)) +++ strength = 1; +++ else +++ strength = 0; +++ } else if (neigh_refL0 == curr_refL0 && +++ neigh_refL1 == curr_refL1) { +++ if (FFABS(neigh->mv[0].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 4 || +++ FFABS(neigh->mv[1].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 4) +++ strength = 1; +++ else +++ strength = 0; +++ } else if (neigh_refL1 == curr_refL0 && +++ neigh_refL0 == curr_refL1) { +++ if (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 4 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 4 || +++ FFABS(neigh->mv[0].x - curr->mv[1].x) >= 4 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 4) +++ strength = 1; +++ else +++ strength = 0; +++ } else { +++ strength = 1; +++ } +++ } else if ((curr->pred_flag != PF_BI) && (neigh->pred_flag != PF_BI)){ // 1 MV +++ Mv curr_mv0, neigh_mv0; +++ +++ if (curr->pred_flag & 1) { +++ curr_mv0 = curr->mv[0]; +++ } else { +++ curr_mv0 = curr->mv[1]; +++ curr_refL0 = curr_refL1; +++ } +++ +++ if (neigh->pred_flag & 1) { +++ neigh_mv0 = neigh->mv[0]; +++ } else { +++ neigh_mv0 = neigh->mv[1]; +++ neigh_refL0 = neigh_refL1; +++ } +++ +++ if (curr_refL0 == neigh_refL0) { +++ if (FFABS(curr_mv0.x - neigh_mv0.x) >= 4 || FFABS(curr_mv0.y - neigh_mv0.y) >= 4) +++ strength = 1; +++ else +++ strength = 0; +++ } else +++ strength = 1; +++ } else +++ strength = 1; +++#else // This has exactly the same effect, but is more suitable for vectorisation +++ Mv curr_mv[2]; +++ Mv neigh_mv[2]; +++ memcpy(curr_mv, curr->mv, sizeof curr_mv); +++ memcpy(neigh_mv, neigh->mv, sizeof neigh_mv); +++ +++ if (!(curr->pred_flag & 2)) { +++ curr_mv[1] = curr_mv[0]; +++ curr_refL1 = curr_refL0; +++ } +++ if (!(neigh->pred_flag & 2)) { +++ neigh_mv[1] = neigh_mv[0]; +++ neigh_refL1 = neigh_refL0; +++ } +++ if (!(curr->pred_flag & 1)) { +++ curr_mv[0] = curr_mv[1]; +++ curr_refL0 = curr_refL1; +++ } +++ if (!(neigh->pred_flag & 1)) { +++ neigh_mv[0] = neigh_mv[1]; +++ neigh_refL0 = neigh_refL1; +++ } +++ +++ strength = 1; +++ +++ strength &= (neigh_refL0 != curr_refL0) | (neigh_refL1 != curr_refL1) | +++ (FFABS(neigh_mv[0].x - curr_mv[0].x) >= 4) | (FFABS(neigh_mv[0].y - curr_mv[0].y) >= 4) | +++ (FFABS(neigh_mv[1].x - curr_mv[1].x) >= 4) | (FFABS(neigh_mv[1].y - curr_mv[1].y) >= 4); +++ +++ strength &= (neigh_refL1 != curr_refL0) | (neigh_refL0 != curr_refL1) | +++ (FFABS(neigh_mv[1].x - curr_mv[0].x) >= 4) | (FFABS(neigh_mv[1].y - curr_mv[0].y) >= 4) | +++ (FFABS(neigh_mv[0].x - curr_mv[1].x) >= 4) | (FFABS(neigh_mv[0].y - curr_mv[1].y) >= 4); +++ +++ strength |= (((curr->pred_flag + 1) ^ (neigh->pred_flag + 1)) >> 2); +++#endif +++ +++ curr += in_inc / sizeof (MvField); +++ neigh += in_inc / sizeof (MvField); +++ +++ for (out = dup; out > 0; out--) +++ { +++ *bs = strength; +++ bs += out_inc; +++ } +++ } +++} +++ ++ void ff_hevc_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth) ++ { ++ #undef FUNC ++@@ -257,6 +371,8 @@ int i = 0; ++ break; ++ } ++ +++ hevcdsp->hevc_deblocking_boundary_strengths = hevc_deblocking_boundary_strengths; +++ ++ if (ARCH_X86) ++ ff_hevc_dsp_init_x86(hevcdsp, bit_depth); ++ if (ARCH_ARM) ++diff --git a/libavcodec/hevcdsp.h b/libavcodec/hevcdsp.h ++index 9f1f6dd..e221e54 100644 ++--- a/libavcodec/hevcdsp.h +++++ b/libavcodec/hevcdsp.h ++@@ -42,6 +42,17 @@ typedef struct SAOParams { ++ uint8_t type_idx[3]; ///< sao_type_idx ++ } SAOParams; ++ +++typedef struct Mv { +++ int16_t x; ///< horizontal component of motion vector +++ int16_t y; ///< vertical component of motion vector +++} Mv; +++ +++typedef struct MvField { +++ DECLARE_ALIGNED(4, Mv, mv)[2]; +++ int8_t ref_idx[2]; +++ int8_t pred_flag; +++} MvField; +++ ++ typedef struct HEVCDSPContext { ++ void (*put_pcm)(uint8_t *_dst, ptrdiff_t _stride, int width, int height, ++ struct GetBitContext *gb, int pcm_bit_depth); ++@@ -120,6 +131,9 @@ typedef struct HEVCDSPContext { ++ void (*hevc_v_loop_filter_chroma_c)(uint8_t *pix, ptrdiff_t stride, ++ int32_t *tc, uint8_t *no_p, ++ uint8_t *no_q); +++ void (*hevc_deblocking_boundary_strengths)(int pus, int dup, int in_inc, int out_inc, +++ int *curr_rpl0, int *curr_rpl1, int *neigh_rpl0, int *neigh_rpl1, +++ MvField *curr, MvField *neigh, uint8_t *bs); ++ } HEVCDSPContext; ++ ++ void ff_hevc_dsp_init(HEVCDSPContext *hpc, int bit_depth); ++-- ++2.7.4 ++ ++ ++From 619366d6acfd5f040a3116fda97b1146c8e40250 Mon Sep 17 00:00:00 2001 ++From: Peter de Rivaz <peter.derivaz@gmail.com> ++Date: Wed, 15 Jul 2015 09:09:11 +0100 ++Subject: [PATCH 68/68] Only enable qpu when needed ++ ++--- ++ libavcodec/hevc.h | 2 +- ++ libavcodec/rpi_qpu.c | 21 ++++++++++++++++----- ++ 2 files changed, 17 insertions(+), 6 deletions(-) ++ ++diff --git a/libavcodec/hevc.h b/libavcodec/hevc.h ++index 496c0e1..ce14975 100644 ++--- a/libavcodec/hevc.h +++++ b/libavcodec/hevc.h ++@@ -57,7 +57,7 @@ ++ // Define RPI_WORKER to launch a worker thread for pixel processing tasks ++ #define RPI_WORKER ++ // Define RPI_DEBLOCK_VPU to perform deblocking on the VPUs ++- #define RPI_DEBLOCK_VPU +++ //#define RPI_DEBLOCK_VPU ++ ++ #endif ++ ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index 5aa0432..ffd13ca 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -9,7 +9,7 @@ ++ // define RPI_ASYNC to run the VPU in a separate thread, need to make a separate call to check for completion ++ #define RPI_ASYNC ++ // Define RPI_COMBINE_JOBS to find jobs that can be executed in parallel ++-#define RPI_COMBINE_JOBS +++//#define RPI_COMBINE_JOBS ++ ++ #include <stdio.h> ++ #include <stdlib.h> ++@@ -143,9 +143,9 @@ static int gpu_init(volatile struct GPU **gpu) { ++ volatile struct GPU* ptr; ++ if (mb < 0) ++ return -1; ++- +++#ifndef RPI_ASYNC ++ if (qpu_enable(mb, 1)) return -2; ++- +++#endif ++ vcsm_init(); ++ gpu_malloc_uncached_internal(sizeof(struct GPU), &gpu_mem_ptr, mb); ++ ptr = (volatile struct GPU*)gpu_mem_ptr.arm; ++@@ -336,9 +336,9 @@ static void gpu_term(void) ++ vpu_post_code(0, 0, 0, 0, 0, 0, -1, NULL); ++ pthread_join(vpu_thread, &res); ++ } ++-#endif ++- +++#else ++ qpu_enable(mb, 0); +++#endif ++ gpu_free_internal(&gpu_mem_ptr); ++ ++ vcsm_exit(); ++@@ -400,6 +400,7 @@ static void *vpu_start(void *arg) { ++ int count_deblock=0; ++ int count_qpu=0; ++ #endif +++ int qpu_started = 0; ++ while(1) { ++ int i; ++ int *p; // Pointer for a QPU/VPU job ++@@ -427,6 +428,12 @@ static void *vpu_start(void *arg) { ++ if (p[7] == 0 && p[0] == 0 && p[16]==0) ++ goto job_done_early; ++ +++ if (!qpu_started) { +++ int result = qpu_enable(gpu->mb, 1); +++ av_assert0(result==0); +++ qpu_started = 1; +++ } +++ ++ #ifdef RPI_COMBINE_JOBS ++ // First scan for a qpu job ++ for (int x=0;x<num_jobs;x++) { ++@@ -556,6 +563,10 @@ job_done_early: ++ pthread_mutex_unlock(&post_mutex); ++ } ++ +++ if (qpu_started) { +++ qpu_enable(gpu->mb, 0); +++ } +++ ++ return NULL; ++ } ++ ++-- ++2.7.4 ++ ++From a0d0946951b53e64ce103dd61b455f8d1f72caf9 Mon Sep 17 00:00:00 2001 ++From: John Cox <jc@kynesim.co.uk> ++Date: Tue, 9 Feb 2016 11:57:40 +0000 ++Subject: [PATCH 1/2] Zero copy code v6 ++ ++This version has GPU buffer pooling code ++--- ++ ffmpeg.c | 123 +++++++++----- ++ libavcodec/Makefile | 2 + ++ libavcodec/avcodec.h | 6 + ++ libavcodec/hevc.c | 92 ++++++----- ++ libavcodec/hevc_filter.c | 83 +++++----- ++ libavcodec/rpi_qpu.c | 2 +- ++ libavcodec/rpi_qpu.h | 109 ++++++++++++- ++ libavcodec/rpi_zc.c | 406 +++++++++++++++++++++++++++++++++++++++++++++++ ++ libavcodec/rpi_zc.h | 83 ++++++++++ ++ 9 files changed, 779 insertions(+), 127 deletions(-) ++ create mode 100644 libavcodec/rpi_zc.c ++ create mode 100644 libavcodec/rpi_zc.h ++ ++diff --git a/ffmpeg.c b/ffmpeg.c ++index 50c6e86..953e5b8 100644 ++--- a/ffmpeg.c +++++ b/ffmpeg.c ++@@ -25,7 +25,7 @@ ++ ++ #ifdef RPI ++ #define RPI_DISPLAY ++-//#define RPI_ZERO_COPY +++#define RPI_ZERO_COPY ++ #endif ++ ++ #include "config.h" ++@@ -80,9 +80,7 @@ ++ #include <interface/mmal/util/mmal_default_components.h> ++ #include <interface/mmal/util/mmal_connection.h> ++ #include <interface/mmal/util/mmal_util_params.h> ++-#ifdef RPI_ZERO_COPY ++-#include "libavcodec/rpi_qpu.h" ++-#endif +++#include "libavcodec/rpi_zc.h" ++ #endif ++ ++ #if HAVE_SYS_RESOURCE_H ++@@ -183,13 +181,7 @@ static void free_input_threads(void); ++ ++ static MMAL_COMPONENT_T* rpi_display = NULL; ++ static MMAL_POOL_T *rpi_pool = NULL; ++- ++-#ifdef RPI_ZERO_COPY ++-static uint8_t *get_vc_handle(AVBufferRef *bref) { ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++- return (uint8_t *)p->vc_handle; ++-} ++-#endif +++static volatile int rpi_display_count = 0; ++ ++ static MMAL_POOL_T* display_alloc_pool(MMAL_PORT_T* port, size_t w, size_t h) ++ { ++@@ -206,7 +198,7 @@ static MMAL_POOL_T* display_alloc_pool(MMAL_PORT_T* port, size_t w, size_t h) ++ for (i = 0; i < NUM_BUFFERS; ++i) ++ { ++ MMAL_BUFFER_HEADER_T* buffer = pool->header[i]; ++- void* bufPtr = buffer->data; +++ char * bufPtr = buffer->data; ++ memset(bufPtr, i*30, w*h); ++ memset(bufPtr+w*h, 128, (w*h)/2); ++ } ++@@ -215,23 +207,31 @@ static MMAL_POOL_T* display_alloc_pool(MMAL_PORT_T* port, size_t w, size_t h) ++ return pool; ++ } ++ ++-static void display_cb_input(MMAL_PORT_T *port,MMAL_BUFFER_HEADER_T *buffer) { +++static void display_cb_input(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer) { +++#ifdef RPI_ZERO_COPY +++ av_rpi_zc_unref(buffer->user_data); +++ --rpi_display_count; +++#endif +++ mmal_buffer_header_release(buffer); +++} +++ +++static void display_cb_control(MMAL_PORT_T *port,MMAL_BUFFER_HEADER_T *buffer) { ++ mmal_buffer_header_release(buffer); ++ } ++ ++ static MMAL_COMPONENT_T* display_init(size_t x, size_t y, size_t w, size_t h) ++ { ++ MMAL_COMPONENT_T* display; ++- int w2 = (w+31)&~31; ++- int h2 = (h+15)&~15; ++ MMAL_DISPLAYREGION_T region = ++ { ++- {MMAL_PARAMETER_DISPLAYREGION, sizeof(region)}, +++ .hdr = {MMAL_PARAMETER_DISPLAYREGION, sizeof(region)}, ++ .set = MMAL_DISPLAY_SET_LAYER | MMAL_DISPLAY_SET_FULLSCREEN | MMAL_DISPLAY_SET_DEST_RECT, ++ .layer = 2, ++ .fullscreen = 0, ++ .dest_rect = {x, y, w, h} ++ }; +++ const AVRpiZcFrameGeometry geo = av_rpi_zc_frame_geometry(w, h); +++ ++ bcm_host_init(); // TODO is this needed? ++ mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &display); ++ assert(display); ++@@ -240,8 +240,8 @@ static MMAL_COMPONENT_T* display_init(size_t x, size_t y, size_t w, size_t h) ++ ++ MMAL_ES_FORMAT_T* format = display->input[0]->format; ++ format->encoding = MMAL_ENCODING_I420; ++- format->es->video.width = w2; ++- format->es->video.height = h2; +++ format->es->video.width = geo.stride_y; +++ format->es->video.height = geo.height_y; ++ format->es->video.crop.x = 0; ++ format->es->video.crop.y = 0; ++ format->es->video.crop.width = w; ++@@ -250,46 +250,75 @@ static MMAL_COMPONENT_T* display_init(size_t x, size_t y, size_t w, size_t h) ++ ++ mmal_component_enable(display); ++ ++- rpi_pool = display_alloc_pool(display->input[0], w2, h2); +++ rpi_pool = display_alloc_pool(display->input[0], geo.stride_y, geo.height_y); ++ ++ mmal_port_enable(display->input[0],display_cb_input); ++- mmal_port_enable(display->control,display_cb_input); +++ mmal_port_enable(display->control,display_cb_control); ++ ++- printf("Allocated display %d %d\n",w,h); +++ printf("Allocated display %dx%d in %dx%d\n", w, h, geo.stride_y, geo.height_y); ++ ++ return display; ++ } ++ ++-static void display_frame(MMAL_COMPONENT_T* display,AVFrame* fr) +++static void display_frame(struct AVCodecContext * const s, MMAL_COMPONENT_T* const display, const AVFrame* const fr) ++ { ++- int w = fr->width; ++- int h = fr->height; ++- int w2 = (w+31)&~31; ++- int h2 = (h+15)&~15; ++ if (!display || !rpi_pool) ++ return; +++ +++ if (rpi_display_count >= 3) { +++ av_log(s, AV_LOG_VERBOSE, "Frame dropped\n"); +++ return; +++ } +++ ++ MMAL_BUFFER_HEADER_T* buf = mmal_queue_get(rpi_pool->queue); ++ if (!buf) { ++- // Running too fast so drop the frame ++- return; +++ // Running too fast so drop the frame +++ printf("Q alloc failure\n"); +++ return; ++ } ++ assert(buf); ++ buf->cmd = 0; ++- buf->length = (w2 * h2 * 3)/2; ++ buf->offset = 0; // Offset to valid data ++ buf->flags = 0; ++ #ifdef RPI_ZERO_COPY ++- buf->data = get_vc_handle(fr->buf[0]); ++- buf->alloc_size = (w2*h2*3)/2; +++{ +++ const AVRpiZcRefPtr fr_buf = av_rpi_zc_ref(s, fr, 1); +++ +++ buf->user_data = fr_buf; +++ buf->data = av_rpi_zc_vc_handle(fr_buf); +++ buf->alloc_size = +++ buf->length = av_rpi_zc_numbytes(fr_buf); +++ +++ ++rpi_display_count; +++} ++ #else +++{ +++#error YYY +++ int w = fr->width; +++ int h = fr->height; +++ int w2 = (w+31)&~31; +++ int h2 = (h+15)&~15; +++ +++ buf->length = (w2 * h2 * 3)/2; +++ buf->user_data = NULL; +++ ++ //mmal_buffer_header_mem_lock(buf); ++ memcpy(buf->data, fr->data[0], w2 * h); ++ memcpy(buf->data+w2*h2, fr->data[1], w2 * h / 4); ++ memcpy(buf->data+w2*h2*5/4, fr->data[2], w2 * h / 4); ++ //mmal_buffer_header_mem_unlock(buf); +++} ++ #endif ++ ++- mmal_port_send_buffer(display->input[0], buf); // I assume this will automatically get released +++ while (rpi_display_count >= 3) { +++ usleep(5000); +++ } +++ +++ if (mmal_port_send_buffer(display->input[0], buf) != MMAL_SUCCESS) +++ { +++ printf("** send failed: depth=%d\n", rpi_display_count); +++ display_cb_input(NULL, buf); +++ } ++ } ++ ++ static void display_exit(MMAL_COMPONENT_T* display) ++@@ -687,6 +716,11 @@ static void ffmpeg_cleanup(int ret) ++ avformat_close_input(&input_files[i]->ctx); ++ av_freep(&input_files[i]); ++ } +++ +++#ifdef RPI_DISPLAY +++ display_exit(rpi_display); +++#endif +++ ++ for (i = 0; i < nb_input_streams; i++) { ++ InputStream *ist = input_streams[i]; ++ ++@@ -698,6 +732,9 @@ static void ffmpeg_cleanup(int ret) ++ av_freep(&ist->filters); ++ av_freep(&ist->hwaccel_device); ++ +++#ifdef RPI_ZERO_COPY +++ av_rpi_zc_uninit(ist->dec_ctx); +++#endif ++ avcodec_free_context(&ist->dec_ctx); ++ ++ av_freep(&input_streams[i]); ++@@ -729,9 +766,6 @@ static void ffmpeg_cleanup(int ret) ++ term_exit(); ++ ffmpeg_exited = 1; ++ ++-#ifdef RPI_DISPLAY ++- display_exit(rpi_display); ++-#endif ++ } ++ ++ void remove_avoptions(AVDictionary **a, AVDictionary *b) ++@@ -1091,18 +1125,19 @@ static void do_video_out(AVFormatContext *s, ++ int frame_size = 0; ++ InputStream *ist = NULL; ++ AVFilterContext *filter = ost->filter->filter; +++ +++ if (ost->source_index >= 0) +++ ist = input_streams[ost->source_index]; +++ ++ #ifdef RPI_DISPLAY ++- if (next_picture) +++ if (next_picture && ist != NULL) ++ { ++- if (!rpi_display) +++ if (!rpi_display) ++ rpi_display = display_init(0,0,next_picture->width,next_picture->height); ++- display_frame(rpi_display,next_picture); +++ display_frame(ist->dec_ctx, rpi_display, next_picture); ++ } ++ #endif ++ ++- if (ost->source_index >= 0) ++- ist = input_streams[ost->source_index]; ++- ++ if (filter->inputs[0]->frame_rate.num > 0 && ++ filter->inputs[0]->frame_rate.den > 0) ++ duration = 1/(av_q2d(filter->inputs[0]->frame_rate) * av_q2d(enc->time_base)); ++@@ -2708,6 +2743,12 @@ static int init_input_stream(int ist_index, char *error, int error_len) ++ ist->dec_ctx->opaque = ist; ++ ist->dec_ctx->get_format = get_format; ++ ist->dec_ctx->get_buffer2 = get_buffer; +++ +++#ifdef RPI_ZERO_COPY +++ // Overrides the above get_buffer2 +++ av_rpi_zc_init(ist->dec_ctx); +++#endif +++ ++ ist->dec_ctx->thread_safe_callbacks = 1; ++ ++ av_opt_set_int(ist->dec_ctx, "refcounted_frames", 1, 0); ++diff --git a/libavcodec/Makefile b/libavcodec/Makefile ++index 03065cd..21e4514 100644 ++--- a/libavcodec/Makefile +++++ b/libavcodec/Makefile ++@@ -9,6 +9,7 @@ HEADERS = avcodec.h \ ++ rpi_shader.h \ ++ rpi_mailbox.h \ ++ rpi_hevc_transform.h \ +++ rpi_zc.h \ ++ d3d11va.h \ ++ dirac.h \ ++ dv_profile.h \ ++@@ -50,6 +51,7 @@ OBJS = allcodecs.o \ ++ rpi_qpu.o \ ++ rpi_shader.o \ ++ rpi_mailbox.o \ +++ rpi_zc.o \ ++ vorbis_parser.o \ ++ xiph.o \ ++ ++diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h ++index 39713ed..a1ba217 100644 ++--- a/libavcodec/avcodec.h +++++ b/libavcodec/avcodec.h ++@@ -3505,6 +3505,12 @@ typedef struct AVCodecContext { ++ #define FF_SUB_TEXT_FMT_ASS_WITH_TIMINGS 1 ++ #endif ++ +++ /** +++ * Opaque pointer for use by replacement get_buffer2 code +++ * +++ * @author jc (08/02/2016) +++ */ +++ void * get_buffer_context; ++ } AVCodecContext; ++ ++ AVRational av_codec_get_pkt_timebase (const AVCodecContext *avctx); ++diff --git a/libavcodec/hevc.c b/libavcodec/hevc.c ++index 8437e10..51736c7 100644 ++--- a/libavcodec/hevc.c +++++ b/libavcodec/hevc.c ++@@ -114,10 +114,6 @@ static uint32_t rpi_filter_coefs[8][1] = { ++ { ENCODE_COEFFS( -2, 10, 58, -2) } ++ }; ++ ++-static uint32_t get_vc_address(AVBufferRef *bref) { ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++- return p->vc; ++-} ++ #endif ++ ++ ++@@ -2197,9 +2193,9 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int bw = nPbW-start_x; ++ int bh = nPbH-start_y; ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); ++- y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address_y(ref0->frame); ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + 8 + start_x) & 0xffff); ++- y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address_y(ref0->frame); ++ *y++ = ( (bw<16 ? bw : 16) << 16 ) + (bh<16 ? bh : 16); ++ *y++ = my2_mx2_my_mx; ++ if (weight_flag) { ++@@ -2207,7 +2203,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ } else { ++ *y++ = 1; // Weight of 1 and offset of 0 ++ } ++- *y++ = (get_vc_address(s->frame->buf[0]) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); +++ *y++ = (get_vc_address_y(s->frame) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter; ++ } ++ } ++@@ -2246,8 +2242,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); ++- u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address_u(ref0->frame); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address_v(ref0->frame); ++ *u++ = ( (bw<RPI_CHROMA_BLOCK_WIDTH ? bw : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (bh<16 ? bh : 16); ++ *u++ = rpi_filter_coefs[_mx][0]; ++ *u++ = rpi_filter_coefs[_my][0]; ++@@ -2258,8 +2254,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = 1; // Weight of 1 and offset of 0 ++ *u++ = 1; ++ } ++- *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++- *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); +++ *u++ = (get_vc_address_u(s->frame) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); +++ *u++ = (get_vc_address_v(s->frame) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++ s->curr_u_mvs = u; ++@@ -2297,9 +2293,9 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int bw = nPbW-start_x; ++ int bh = nPbH-start_y; ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); ++- y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address_y(ref1->frame); ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + 8 + start_x) & 0xffff); ++- y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address_y(ref1->frame); ++ *y++ = ( (bw<16 ? bw : 16) << 16 ) + (bh<16 ? bh : 16); ++ *y++ = my2_mx2_my_mx; ++ if (weight_flag) { ++@@ -2307,7 +2303,7 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ } else { ++ *y++ = 1; // Weight of 1 and offset of 0 ++ } ++- *y++ = (get_vc_address(s->frame->buf[0]) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); +++ *y++ = (get_vc_address_y(s->frame) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter; ++ } ++ } ++@@ -2347,8 +2343,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); ++- u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address_u(ref1->frame); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address_v(ref1->frame); ++ *u++ = ( (bw<RPI_CHROMA_BLOCK_WIDTH ? bw : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (bh<16 ? bh : 16); ++ // TODO chroma weight and offset... s->sh.chroma_weight_l0[current_mv.ref_idx[0]][0], s->sh.chroma_offset_l0[current_mv.ref_idx[0]][0] ++ *u++ = rpi_filter_coefs[_mx][0]; ++@@ -2360,8 +2356,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ *u++ = 1; // Weight of 1 and offset of 0 ++ *u++ = 1; ++ } ++- *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++- *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); +++ *u++ = (get_vc_address_u(s->frame) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); +++ *u++ = (get_vc_address_v(s->frame) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++ s->curr_u_mvs = u; ++@@ -2403,13 +2399,13 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ int bw = nPbW-start_x; ++ int bh = nPbH-start_y; ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y1 - 3 + start_y) << 16) + ( (x1 - 3 + start_x) & 0xffff); ++- y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address_y(ref0->frame); ++ y++[-RPI_LUMA_COMMAND_WORDS] = ((y2 - 3 + start_y) << 16) + ( (x2 - 3 + start_x) & 0xffff); // Second fetch is for ref1 ++- y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[0]); +++ y++[-RPI_LUMA_COMMAND_WORDS] = get_vc_address_y(ref1->frame); ++ *y++ = ( (bw<8 ? bw : 8) << 16 ) + (bh<16 ? bh : 16); ++ *y++ = my2_mx2_my_mx; ++ *y++ = 1; // B frame weighted prediction not supported ++- *y++ = (get_vc_address(s->frame->buf[0]) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); +++ *y++ = (get_vc_address_y(s->frame) + x0 + start_x + (start_y + y0) * s->frame->linesize[0]); ++ y++[-RPI_LUMA_COMMAND_WORDS] = s->mc_filter_b; ++ } ++ } ++@@ -2453,8 +2449,8 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv_b0; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x1_c - 1 + start_x; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y1_c - 1 + start_y; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[1]); ++- u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref0->frame->buf[2]); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address_u(ref0->frame); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address_v(ref0->frame); ++ *u++ = ( (bw<RPI_CHROMA_BLOCK_WIDTH ? bw : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (bh<16 ? bh : 16); ++ *u++ = rpi_filter_coefs[_mx][0]; ++ *u++ = rpi_filter_coefs[_my][0]; ++@@ -2464,14 +2460,14 @@ static void hls_prediction_unit(HEVCContext *s, int x0, int y0, ++ u++[-RPI_CHROMA_COMMAND_WORDS] = s->mc_filter_uv_b; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = x2_c - 1 + start_x; ++ u++[-RPI_CHROMA_COMMAND_WORDS] = y2_c - 1 + start_y; ++- u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[1]); ++- u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address(ref1->frame->buf[2]); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address_u(ref1->frame); +++ u++[-RPI_CHROMA_COMMAND_WORDS] = get_vc_address_v(ref1->frame); ++ *u++ = ( (bw<RPI_CHROMA_BLOCK_WIDTH ? bw : RPI_CHROMA_BLOCK_WIDTH) << 16 ) + (bh<16 ? bh : 16); ++ *u++ = rpi_filter_coefs[_mx2][0]; ++ *u++ = rpi_filter_coefs[_my2][0]; ++ u+=2; // Weights not supported in B slices ++- *u++ = (get_vc_address(s->frame->buf[1]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); ++- *u++ = (get_vc_address(s->frame->buf[2]) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); +++ *u++ = (get_vc_address_u(s->frame) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[1]); +++ *u++ = (get_vc_address_v(s->frame) + x0_c + start_x + (start_y + y0_c) * s->frame->linesize[2]); ++ } ++ } ++ s->curr_u_mvs = u; ++@@ -3270,12 +3266,13 @@ static int32_t filter8_luma(uint8_t *data, int x0, int y0, int pitch, int my_mx, ++ return vsum; ++ } ++ ++-static uint8_t *test_frame(HEVCContext *s,uint32_t p, AVFrame *frame, int cIdx) +++static uint8_t *test_frame(HEVCContext *s,uint32_t p, AVFrame *frame, const int cIdx) ++ { ++ //int pic_width = s->ps.sps->width >> s->ps.sps->hshift[cIdx]; ++ int pic_height = s->ps.sps->height >> s->ps.sps->vshift[cIdx]; ++ int pitch = frame->linesize[cIdx]; ++- uint32_t base = get_vc_address(frame->buf[cIdx]); +++ uint32_t base = c_idx == 0 ? get_vc_address_y(frame); +++ c_idx == 1 ? get_vc_address_u(frame) : get_vc_address_v(frame); ++ if (p>=base && p<base+pitch*pic_height) { ++ return frame->data[cIdx] + (p-base); ++ } ++@@ -3562,6 +3559,7 @@ static void rpi_launch_vpu_qpu(HEVCContext *s) ++ #ifdef RPI ++ ++ #ifndef RPI_FAST_CACHEFLUSH +++#error RPI_FAST_CACHEFLUSH is broken ++ static void flush_buffer(AVBufferRef *bref) { ++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++ gpu_cache_flush(p); ++@@ -3572,7 +3570,7 @@ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ { ++ #ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); +++ GPU_MEM_PTR_T p = get_gpu_mem_ptr_u(s->frame); ++ int n = s->ps.sps->height; ++ int curr_y = 0; ++ int curr_uv = 0; ++@@ -3580,21 +3578,21 @@ static void flush_frame(HEVCContext *s,AVFrame *frame) ++ int sz,base; ++ sz = s->frame->linesize[1] * (n_uv-curr_uv); ++ base = s->frame->linesize[1] * curr_uv; ++- iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].handle = p.vcsm_handle; ++ iocache.s[0].cmd = 3; // clean+invalidate ++- iocache.s[0].addr = (int)(p->arm) + base; +++ iocache.s[0].addr = (int)(p.arm) + base; ++ iocache.s[0].size = sz; ++- p = av_buffer_pool_opaque(frame->buf[2]); ++- iocache.s[1].handle = p->vcsm_handle; +++ p = get_gpu_mem_ptr_v(s->frame); +++ iocache.s[1].handle = p.vcsm_handle; ++ iocache.s[1].cmd = 3; // clean+invalidate ++- iocache.s[1].addr = (int)(p->arm) + base; +++ iocache.s[1].addr = (int)(p.arm) + base; ++ iocache.s[1].size = sz; ++- p = av_buffer_pool_opaque(frame->buf[0]); +++ p = get_gpu_mem_ptr_y(s->frame); ++ sz = s->frame->linesize[0] * (n-curr_y); ++ base = s->frame->linesize[0] * curr_y; ++- iocache.s[2].handle = p->vcsm_handle; +++ iocache.s[2].handle = p.vcsm_handle; ++ iocache.s[2].cmd = 3; // clean+invalidate ++- iocache.s[2].addr = (int)(p->arm) + base; +++ iocache.s[2].addr = (int)(p.arm) + base; ++ iocache.s[2].size = sz; ++ vcsm_clean_invalid( &iocache ); ++ #else ++@@ -3612,7 +3610,7 @@ static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM ++ int curr_y; ++ int curr_uv; ++ int n_uv; ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); +++ GPU_MEM_PTR_T p = get_gpu_mem_ptr_u(s->frame); ++ int sz,base; ++ int (*d)[2] = s->dblk_cmds[job]; ++ int low=(*d)[1]; ++@@ -3629,21 +3627,21 @@ static void flush_frame3(HEVCContext *s,AVFrame *frame,GPU_MEM_PTR_T *p0,GPU_MEM ++ ++ sz = s->frame->linesize[1] * (n_uv-curr_uv); ++ base = s->frame->linesize[1] * curr_uv; ++- iocache.s[0].handle = p->vcsm_handle; +++ iocache.s[0].handle = p.vcsm_handle; ++ iocache.s[0].cmd = 3; // clean+invalidate ++- iocache.s[0].addr = (int)(p->arm) + base; +++ iocache.s[0].addr = (int)(p.arm) + base; ++ iocache.s[0].size = sz; ++- p = av_buffer_pool_opaque(frame->buf[2]); ++- iocache.s[1].handle = p->vcsm_handle; +++ p = get_gpu_mem_ptr_v(s->frame); +++ iocache.s[1].handle = p.vcsm_handle; ++ iocache.s[1].cmd = 3; // clean+invalidate ++- iocache.s[1].addr = (int)(p->arm) + base; +++ iocache.s[1].addr = (int)(p.arm) + base; ++ iocache.s[1].size = sz; ++- p = av_buffer_pool_opaque(frame->buf[0]); +++ p = get_gpu_mem_ptr_y(s->frame); ++ sz = s->frame->linesize[0] * (n-curr_y); ++ base = s->frame->linesize[0] * curr_y; ++- iocache.s[2].handle = p->vcsm_handle; +++ iocache.s[2].handle = p.vcsm_handle; ++ iocache.s[2].cmd = 3; // clean+invalidate ++- iocache.s[2].addr = (int)(p->arm) + base; +++ iocache.s[2].addr = (int)(p.arm) + base; ++ iocache.s[2].size = sz; ++ ++ iocache.s[3].handle = p0->vcsm_handle; ++diff --git a/libavcodec/hevc_filter.c b/libavcodec/hevc_filter.c ++index 826a82f..c4fa305 100644 ++--- a/libavcodec/hevc_filter.c +++++ b/libavcodec/hevc_filter.c ++@@ -879,17 +879,25 @@ void ff_hevc_deblocking_boundary_strengths(HEVCContext *s, int x0, int y0, ++ #undef CR ++ ++ #ifdef RPI_INTER_QPU ++-static void flush_buffer(AVBufferRef *bref) { ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++- gpu_cache_flush(p); +++static void flush_buffer_y(const AVFrame * const frame) { +++ GPU_MEM_PTR_T p = get_gpu_mem_ptr_y(frame); +++ gpu_cache_flush(&p); ++ } ++ ++-// Return Physical address for this image ++-static uint32_t get_vc_address(AVBufferRef *bref) { ++- GPU_MEM_PTR_T *p = av_buffer_pool_opaque(bref); ++- return p->vc; +++static void flush_buffer_u(const AVFrame * const frame) { +++ GPU_MEM_PTR_T p = get_gpu_mem_ptr_u(frame); +++ gpu_cache_flush(&p); ++ } ++ +++static void flush_buffer_v(const AVFrame * const frame) { +++ GPU_MEM_PTR_T p = get_gpu_mem_ptr_v(frame); +++ gpu_cache_flush(&p); +++} +++ +++ +++#ifdef RPI_DEBLOCK_VPU +++#error Not fixed yet +++ ++ // ff_hevc_flush_buffer_lines ++ // flushes and invalidates all pixel rows in [start,end-1] ++ static void ff_hevc_flush_buffer_lines(HEVCContext *s, int start, int end, int flush_luma, int flush_chroma) ++@@ -901,44 +909,44 @@ static void ff_hevc_flush_buffer_lines(HEVCContext *s, int start, int end, int f ++ int curr_uv = curr_y >> s->ps.sps->vshift[1]; ++ int n_uv = n >> s->ps.sps->vshift[1]; ++ int sz,base; ++- GPU_MEM_PTR_T *p; +++ GPU_MEM_PTR_T p; ++ if (curr_uv < 0) curr_uv = 0; ++ if (n_uv<=curr_uv) { return; } ++ sz = s->frame->linesize[1] * (n_uv-curr_uv); ++ base = s->frame->linesize[1] * curr_uv; ++ if (flush_chroma) { ++- p = av_buffer_pool_opaque(s->frame->buf[1]); ++- iocache.s[0].handle = p->vcsm_handle; +++ p = get_gpu_mem_ptr_u(s->frame); +++ iocache.s[0].handle = p.vcsm_handle; ++ iocache.s[0].cmd = 3; // clean+invalidate ++- iocache.s[0].addr = (int)p->arm + base; +++ iocache.s[0].addr = (int)p.arm + base; ++ iocache.s[0].size = sz; ++- p = av_buffer_pool_opaque(s->frame->buf[2]); ++- iocache.s[1].handle = p->vcsm_handle; +++ p = get_gpu_mem_ptr_v(s->frame); +++ iocache.s[1].handle = p.vcsm_handle; ++ iocache.s[1].cmd = 3; // clean+invalidate ++- iocache.s[1].addr = (int)p->arm + base; +++ iocache.s[1].addr = (int)p.arm + base; ++ iocache.s[1].size = sz; ++ } ++ if (flush_luma) { ++- p = av_buffer_pool_opaque(s->frame->buf[0]); +++ p = get_gpu_mem_ptr_y(s->frame); ++ sz = s->frame->linesize[0] * (n-curr_y); ++ base = s->frame->linesize[0] * curr_y; ++- iocache.s[2].handle = p->vcsm_handle; +++ iocache.s[2].handle = p.vcsm_handle; ++ iocache.s[2].cmd = 3; // clean+invalidate ++- iocache.s[2].addr = (int)p->arm + base; +++ iocache.s[2].addr = (int)p.arm + base; ++ iocache.s[2].size = sz; ++ } ++ vcsm_clean_invalid( &iocache ); ++ #else ++ if (flush_chroma) { ++- flush_buffer(s->frame->buf[1]); ++- flush_buffer(s->frame->buf[2]); +++ flush_buffer_u(s->frame); +++ flush_buffer_v(s->frame); ++ } ++ if (flush_luma) { ++- flush_buffer(s->frame->buf[0]); +++ flush_buffer_y(s->frame); ++ } ++ #endif ++ } ++- +++#endif ++ ++ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ { ++@@ -950,37 +958,37 @@ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ int curr_uv = curr_y >> s->ps.sps->vshift[1]; ++ int n_uv = n >> s->ps.sps->vshift[1]; ++ int sz,base; ++- GPU_MEM_PTR_T *p; +++ GPU_MEM_PTR_T p; ++ if (curr_uv < 0) curr_uv = 0; ++ if (n_uv<=curr_uv) { return; } ++ sz = s->frame->linesize[1] * (n_uv-curr_uv); ++ base = s->frame->linesize[1] * curr_uv; ++- p = av_buffer_pool_opaque(s->frame->buf[1]); ++- iocache.s[0].handle = p->vcsm_handle; +++ p = get_gpu_mem_ptr_u(s->frame); +++ iocache.s[0].handle = p.vcsm_handle; ++ iocache.s[0].cmd = 3; // clean+invalidate ++- iocache.s[0].addr = (int)p->arm + base; +++ iocache.s[0].addr = (int)p.arm + base; ++ iocache.s[0].size = sz; ++- p = av_buffer_pool_opaque(s->frame->buf[2]); ++- iocache.s[1].handle = p->vcsm_handle; +++ p = get_gpu_mem_ptr_v(s->frame); +++ iocache.s[1].handle = p.vcsm_handle; ++ iocache.s[1].cmd = 3; // clean+invalidate ++- iocache.s[1].addr = (int)p->arm + base; +++ iocache.s[1].addr = (int)p.arm + base; ++ iocache.s[1].size = sz; ++ ++ #ifdef RPI_LUMA_QPU ++- p = av_buffer_pool_opaque(s->frame->buf[0]); +++ p = get_gpu_mem_ptr_y(s->frame); ++ sz = s->frame->linesize[0] * (n-curr_y); ++ base = s->frame->linesize[0] * curr_y; ++- iocache.s[2].handle = p->vcsm_handle; +++ iocache.s[2].handle = p.vcsm_handle; ++ iocache.s[2].cmd = 3; // clean+invalidate ++- iocache.s[2].addr = (int)p->arm + base; +++ iocache.s[2].addr = (int)p.arm + base; ++ iocache.s[2].size = sz; ++ #endif ++ vcsm_clean_invalid( &iocache ); ++ #else ++- flush_buffer(s->frame->buf[1]); ++- flush_buffer(s->frame->buf[2]); +++ flush_buffer_u(s->frame); +++ flush_buffer_v(s->frame); ++ #ifdef RPI_LUMA_QPU ++- flush_buffer(s->frame->buf[0]); +++ flush_buffer_y(s->frame); ++ #endif ++ ++ #endif ++@@ -992,6 +1000,7 @@ void ff_hevc_flush_buffer(HEVCContext *s, ThreadFrame *f, int n) ++ #endif ++ ++ #ifdef RPI_DEBLOCK_VPU +++#error XXX ++ /* rpi_deblock deblocks an entire row of ctbs using the VPU */ ++ static void rpi_deblock(HEVCContext *s, int y, int ctb_size) ++ { ++@@ -1000,21 +1009,21 @@ static void rpi_deblock(HEVCContext *s, int y, int ctb_size) ++ // TODO flush buffer of beta/tc setup when it becomes cached ++ ++ // Prepare three commands at once to avoid calling overhead ++- s->vpu_cmds_arm[0][0] = get_vc_address(s->frame->buf[0]) + s->frame->linesize[0] * y; +++ s->vpu_cmds_arm[0][0] = get_vc_address_y(s->frame) + s->frame->linesize[0] * y; ++ s->vpu_cmds_arm[0][1] = s->frame->linesize[0]; ++ s->vpu_cmds_arm[0][2] = s->setup_width; ++ s->vpu_cmds_arm[0][3] = (int) ( s->y_setup_vc + s->setup_width * (y>>4) ); ++ s->vpu_cmds_arm[0][4] = ctb_size>>4; ++ s->vpu_cmds_arm[0][5] = 2; ++ ++- s->vpu_cmds_arm[1][0] = get_vc_address(s->frame->buf[1]) + s->frame->linesize[1] * (y>> s->ps.sps->vshift[1]); +++ s->vpu_cmds_arm[1][0] = get_vc_address_u(s->frame) + s->frame->linesize[1] * (y>> s->ps.sps->vshift[1]); ++ s->vpu_cmds_arm[1][1] = s->frame->linesize[1]; ++ s->vpu_cmds_arm[1][2] = s->uv_setup_width; ++ s->vpu_cmds_arm[1][3] = (int) ( s->uv_setup_vc + s->uv_setup_width * ((y>>4)>> s->ps.sps->vshift[1]) ); ++ s->vpu_cmds_arm[1][4] = (ctb_size>>4)>> s->ps.sps->vshift[1]; ++ s->vpu_cmds_arm[1][5] = 3; ++ ++- s->vpu_cmds_arm[2][0] = get_vc_address(s->frame->buf[2]) + s->frame->linesize[2] * (y>> s->ps.sps->vshift[2]); +++ s->vpu_cmds_arm[2][0] = get_vc_address_v(s->frame) + s->frame->linesize[2] * (y>> s->ps.sps->vshift[2]); ++ s->vpu_cmds_arm[2][1] = s->frame->linesize[2]; ++ s->vpu_cmds_arm[2][2] = s->uv_setup_width; ++ s->vpu_cmds_arm[2][3] = (int) ( s->uv_setup_vc + s->uv_setup_width * ((y>>4)>> s->ps.sps->vshift[1]) ); ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index ffd13ca..b0c9bc5 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -250,7 +250,7 @@ int gpu_get_mailbox(void) ++ } ++ ++ // Call this to clean and invalidate a region of memory ++-void gpu_cache_flush(GPU_MEM_PTR_T *p) +++void gpu_cache_flush(const GPU_MEM_PTR_T * const p) ++ { ++ #ifdef RPI_FAST_CACHEFLUSH ++ struct vcsm_user_clean_invalid_s iocache = {}; ++diff --git a/libavcodec/rpi_qpu.h b/libavcodec/rpi_qpu.h ++index 81c2bb1..b913f79 100644 ++--- a/libavcodec/rpi_qpu.h +++++ b/libavcodec/rpi_qpu.h ++@@ -2,8 +2,11 @@ ++ #define RPI_QPU_H ++ ++ // Define RPI_FAST_CACHEFLUSH to use the VCSM cache flush code +++// *** N.B. Code has rotted & crashes if this is unset (before this set of changes) ++ #define RPI_FAST_CACHEFLUSH ++ +++#define RPI_ONE_BUF 1 +++ ++ typedef struct gpu_mem_ptr_s { ++ unsigned char *arm; // Pointer to memory mapped on ARM side ++ int vc_handle; // Videocore handle of relocatable memory ++@@ -16,9 +19,113 @@ typedef struct gpu_mem_ptr_s { ++ extern int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p); ++ extern int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p); ++ extern void gpu_free(GPU_MEM_PTR_T *p); ++-extern void gpu_cache_flush(GPU_MEM_PTR_T *p); +++extern void gpu_cache_flush(const GPU_MEM_PTR_T * const p); ++ extern void gpu_cache_flush3(GPU_MEM_PTR_T *p0,GPU_MEM_PTR_T *p1,GPU_MEM_PTR_T *p2); ++ +++#include "libavutil/frame.h" +++#if !RPI_ONE_BUF +++static inline uint32_t get_vc_address_y(const AVFrame * const frame) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[0]); +++ return p->vc; +++} +++ +++static inline uint32_t get_vc_address_u(const AVFrame * const frame) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[1]); +++ return p->vc; +++} +++ +++static inline uint32_t get_vc_address_v(const AVFrame * const frame) { +++ GPU_MEM_PTR_T *p = av_buffer_pool_opaque(frame->buf[2]); +++ return p->vc; +++} +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_y(const AVFrame * const frame) { +++ return *(GPU_MEM_PTR_T *)av_buffer_pool_opaque(frame->buf[0]); +++} +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_u(const AVFrame * const frame) { +++ return *(GPU_MEM_PTR_T *)av_buffer_pool_opaque(frame->buf[1]); +++} +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_v(const AVFrame * const frame) { +++ return *(GPU_MEM_PTR_T *)av_buffer_pool_opaque(frame->buf[2]); +++} +++ +++#else +++ +++static inline int gpu_is_buf1(const AVFrame * const frame) +++{ +++ return frame->buf[1] == NULL; +++} +++ +++static inline GPU_MEM_PTR_T * gpu_buf1_gmem(const AVFrame * const frame) +++{ +++ return av_buffer_get_opaque(frame->buf[0]); +++} +++ +++static inline GPU_MEM_PTR_T * gpu_buf3_gmem(const AVFrame * const frame, const int n) +++{ +++ return av_buffer_pool_opaque(frame->buf[n]); +++} +++ +++ +++static inline uint32_t get_vc_address_y(const AVFrame * const frame) { +++ return gpu_is_buf1(frame) ? gpu_buf1_gmem(frame)->vc : gpu_buf3_gmem(frame, 0)->vc; +++} +++ +++static inline uint32_t get_vc_address_u(const AVFrame * const frame) { +++ return gpu_is_buf1(frame) ? +++ gpu_buf1_gmem(frame)->vc + frame->data[1] - frame->data[0] : +++ gpu_buf3_gmem(frame, 1)->vc; +++} +++ +++static inline uint32_t get_vc_address_v(const AVFrame * const frame) { +++ return gpu_is_buf1(frame) ? +++ gpu_buf1_gmem(frame)->vc + frame->data[2] - frame->data[0] : +++ gpu_buf3_gmem(frame, 2)->vc; +++} +++ +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_y(const AVFrame * const frame) { +++ if (gpu_is_buf1(frame)) +++ { +++ GPU_MEM_PTR_T g = *gpu_buf1_gmem(frame); +++ g.numbytes = frame->data[1] - frame->data[0]; +++ return g; +++ } +++ else +++ return *gpu_buf3_gmem(frame, 0); +++} +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_u(const AVFrame * const frame) { +++ if (gpu_is_buf1(frame)) +++ { +++ GPU_MEM_PTR_T g = *gpu_buf1_gmem(frame); +++ g.arm += frame->data[1] - frame->data[0]; +++ g.vc += frame->data[1] - frame->data[0]; +++ g.numbytes = frame->data[2] - frame->data[1]; // chroma size +++ return g; +++ } +++ else +++ return *gpu_buf3_gmem(frame, 1); +++} +++ +++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_v(const AVFrame * const frame) { +++ if (gpu_is_buf1(frame)) +++ { +++ GPU_MEM_PTR_T g = *gpu_buf1_gmem(frame); +++ g.arm += frame->data[2] - frame->data[0]; +++ g.vc += frame->data[2] - frame->data[0]; +++ g.numbytes = frame->data[2] - frame->data[1]; // chroma size +++ return g; +++ } +++ else +++ return *gpu_buf3_gmem(frame, 2); +++} +++ +++#endif +++ +++ ++ // QPU specific functions ++ extern void qpu_run_shader8(int code, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8); ++ extern void qpu_run_shader12(int code, int num, int code2, int num2, int unifs1, int unifs2, int unifs3, int unifs4, int unifs5, int unifs6, int unifs7, int unifs8, int unifs9, int unifs10, int unifs11, int unifs12); ++diff --git a/libavcodec/rpi_zc.c b/libavcodec/rpi_zc.c ++new file mode 100644 ++index 0000000..9580165 ++--- /dev/null +++++ b/libavcodec/rpi_zc.c ++@@ -0,0 +1,406 @@ +++#include "config.h" +++#ifdef RPI +++#include "rpi_qpu.h" +++#include "rpi_zc.h" +++ +++#include "libavutil/buffer_internal.h" +++ +++struct ZcPoolEnt; +++ +++typedef struct ZcPool +++{ +++ int numbytes; +++ struct ZcPoolEnt * head; +++ pthread_mutex_t lock; +++} ZcPool; +++ +++typedef struct ZcPoolEnt +++{ +++ // It is important that we start with gmem as other bits of code will expect to see that +++ GPU_MEM_PTR_T gmem; +++ struct ZcPoolEnt * next; +++ struct ZcPool * pool; +++} ZcPoolEnt; +++ +++static ZcPoolEnt * zc_pool_ent_alloc(ZcPool * const pool, const int size) +++{ +++ ZcPoolEnt * const zp = av_malloc(sizeof(ZcPoolEnt)); +++ +++ if (zp == NULL) { +++ av_log(NULL, AV_LOG_ERROR, "av_malloc(ZcPoolEnt) failed\n"); +++ goto fail0; +++ } +++ +++ if (gpu_malloc_cached(size, &zp->gmem) != 0) +++ { +++ av_log(NULL, AV_LOG_ERROR, "av_gpu_malloc_cached(%d) failed\n", size); +++ goto fail1; +++ } +++ +++ zp->next = NULL; +++ zp->pool = pool; +++ return zp; +++ +++fail1: +++ av_free(zp); +++fail0: +++ return NULL; +++} +++ +++static void zc_pool_ent_free(ZcPoolEnt * const zp) +++{ +++ gpu_free(&zp->gmem); +++ av_free(zp); +++} +++ +++static void zc_pool_flush(ZcPool * const pool) +++{ +++ ZcPoolEnt * p = pool->head; +++ pool->head = NULL; +++ while (p != NULL) +++ { +++ ZcPoolEnt * const zp = p; +++ p = p->next; +++ zc_pool_ent_free(zp); +++ } +++} +++ +++static ZcPoolEnt * zc_pool_alloc(ZcPool * const pool, const int numbytes) +++{ +++ ZcPoolEnt * zp; +++ pthread_mutex_lock(&pool->lock); +++ +++ if (numbytes != pool->numbytes) +++ { +++ zc_pool_flush(pool); +++ pool->numbytes = numbytes; +++ } +++ +++ if (pool->head != NULL) +++ { +++ zp = pool->head; +++ pool->head = zp->next; +++ } +++ else +++ { +++ zp = zc_pool_ent_alloc(pool, numbytes); +++ } +++ +++ pthread_mutex_unlock(&pool->lock); +++ return zp; +++} +++ +++static void zc_pool_free(ZcPoolEnt * const zp) +++{ +++ ZcPool * const pool = zp == NULL ? NULL : zp->pool; +++ if (zp != NULL) +++ { +++ pthread_mutex_lock(&pool->lock); +++ if (pool->numbytes == zp->gmem.numbytes) +++ { +++ zp->next = pool->head; +++ pool->head = zp; +++ pthread_mutex_unlock(&pool->lock); +++ } +++ else +++ { +++ pthread_mutex_unlock(&pool->lock); +++ zc_pool_ent_free(zp); +++ } +++ } +++} +++ +++static void +++zc_pool_init(ZcPool * const pool) +++{ +++ pool->numbytes = -1; +++ pool->head = NULL; +++ pthread_mutex_init(&pool->lock, NULL); +++} +++ +++static void +++zc_pool_destroy(ZcPool * const pool) +++{ +++ pool->numbytes = -1; +++ zc_pool_flush(pool); +++ pthread_mutex_destroy(&pool->lock); +++} +++ +++ +++typedef struct AVZcEnv +++{ +++ ZcPool pool; +++} ZcEnv; +++ +++// Callback when buffer unrefed to zero +++static void rpi_free_display_buffer(void *opaque, uint8_t *data) +++{ +++ ZcPoolEnt *const zp = opaque; +++// printf("%s: data=%p\n", __func__, data); +++ zc_pool_free(zp); +++} +++ +++static inline GPU_MEM_PTR_T * pic_gm_ptr(AVBufferRef * const buf) +++{ +++ // Kludge where we check the free fn to check this is really +++ // one of our buffers - can't think of a better way +++ return buf == NULL || buf->buffer->free != rpi_free_display_buffer ? NULL : +++ av_buffer_get_opaque(buf); +++} +++ +++AVRpiZcFrameGeometry av_rpi_zc_frame_geometry( +++ const unsigned int video_width, const unsigned int video_height) +++{ +++ AVRpiZcFrameGeometry geo; +++ geo.stride_y = (video_width + 32 + 31) & ~31; +++ geo.stride_c = geo.stride_y / 2; +++// geo.height_y = (video_height + 15) & ~15; +++ geo.height_y = (video_height + 32 + 31) & ~31; +++ geo.height_c = geo.height_y / 2; +++ return geo; +++} +++ +++static AVBufferRef * rpi_buf_pool_alloc(ZcPool * const pool, int size) +++{ +++ ZcPoolEnt *const zp = zc_pool_alloc(pool, size); +++ AVBufferRef * buf; +++ +++ if (zp == NULL) { +++ av_log(NULL, AV_LOG_ERROR, "zc_pool_alloc(%d) failed\n", size); +++ goto fail0; +++ } +++ +++ if ((buf = av_buffer_create(zp->gmem.arm, size, rpi_free_display_buffer, zp, AV_BUFFER_FLAG_READONLY)) == NULL) +++ { +++ av_log(NULL, AV_LOG_ERROR, "av_buffer_create() failed\n"); +++ goto fail2; +++ } +++ +++ return buf; +++ +++fail2: +++ zc_pool_free(zp); +++fail0: +++ return NULL; +++} +++ +++static int rpi_get_display_buffer(struct AVCodecContext * const s, AVFrame * const frame) +++{ +++ ZcEnv *const zc = s->get_buffer_context; +++ const AVRpiZcFrameGeometry geo = av_rpi_zc_frame_geometry(frame->width, frame->height); +++ const unsigned int size_y = geo.stride_y * geo.height_y; +++ const unsigned int size_c = geo.stride_c * geo.height_c; +++ const unsigned int size_pic = size_y + size_c * 2; +++ AVBufferRef * buf; +++ unsigned int i; +++ +++// printf("Do local alloc: format=%#x, %dx%d: %u\n", frame->format, frame->width, frame->height, size_pic); +++ +++ if ((buf = rpi_buf_pool_alloc(&zc->pool, size_pic)) == NULL) +++ { +++ av_log(s, AV_LOG_ERROR, "rpi_get_display_buffer: Failed to get buffer from pool\n"); +++ return AVERROR(ENOMEM); +++ } +++ +++ for (i = 0; i < AV_NUM_DATA_POINTERS; i++) { +++ frame->buf[i] = NULL; +++ frame->data[i] = NULL; +++ frame->linesize[i] = 0; +++ } +++ +++ frame->buf[0] = buf; +++ frame->linesize[0] = geo.stride_y; +++ frame->linesize[1] = geo.stride_c; +++ frame->linesize[2] = geo.stride_c; +++ frame->data[0] = buf->data; +++ frame->data[1] = frame->data[0] + size_y; +++ frame->data[2] = frame->data[1] + size_c; +++ frame->extended_data = frame->data; +++ // Leave extended buf alone +++ +++ return 0; +++} +++ +++ +++#define RPI_GET_BUFFER2 1 +++ +++int av_rpi_zc_get_buffer2(struct AVCodecContext *s, AVFrame *frame, int flags) +++{ +++#if !RPI_GET_BUFFER2 +++ return avcodec_default_get_buffer2(s, frame, flags); +++#else +++ int rv; +++ +++ if ((s->codec->capabilities & AV_CODEC_CAP_DR1) == 0 || +++ frame->format != AV_PIX_FMT_YUV420P) +++ { +++// printf("Do default alloc: format=%#x\n", frame->format); +++ rv = avcodec_default_get_buffer2(s, frame, flags); +++ } +++ else +++ { +++ rv = rpi_get_display_buffer(s, frame); +++ } +++ +++#if 0 +++ printf("%s: %dx%d lsize=%d/%d/%d data=%p/%p/%p bref=%p/%p/%p opaque[0]=%p\n", __func__, +++ frame->width, frame->height, +++ frame->linesize[0], frame->linesize[1], frame->linesize[2], +++ frame->data[0], frame->data[1], frame->data[2], +++ frame->buf[0], frame->buf[1], frame->buf[2], +++ av_buffer_get_opaque(frame->buf[0])); +++#endif +++ return rv; +++#endif +++} +++ +++ +++static AVBufferRef * zc_copy(struct AVCodecContext * const s, +++ const AVFrame * const src) +++{ +++ AVFrame dest_frame; +++ AVFrame * const dest = &dest_frame; +++ unsigned int i; +++ uint8_t * psrc, * pdest; +++ +++ dest->width = src->width; +++ dest->height = src->height; +++ +++ if (rpi_get_display_buffer(s, dest) != 0) +++ { +++ return NULL; +++ } +++ +++ for (i = 0, psrc = src->data[0], pdest = dest->data[0]; +++ i != dest->height; +++ ++i, psrc += src->linesize[0], pdest += dest->linesize[0]) +++ { +++ memcpy(pdest, psrc, dest->width); +++ } +++ for (i = 0, psrc = src->data[1], pdest = dest->data[1]; +++ i != dest->height / 2; +++ ++i, psrc += src->linesize[1], pdest += dest->linesize[1]) +++ { +++ memcpy(pdest, psrc, dest->width / 2); +++ } +++ for (i = 0, psrc = src->data[2], pdest = dest->data[2]; +++ i != dest->height / 2; +++ ++i, psrc += src->linesize[2], pdest += dest->linesize[2]) +++ { +++ memcpy(pdest, psrc, dest->width / 2); +++ } +++ +++ return dest->buf[0]; +++} +++ +++ +++AVRpiZcRefPtr av_rpi_zc_ref(struct AVCodecContext * const s, +++ const AVFrame * const frame, const int maycopy) +++{ +++ assert(s != NULL); +++ +++ if (frame->format != AV_PIX_FMT_YUV420P) +++ { +++ av_log(s, AV_LOG_WARNING, "%s: *** Format not YUV420P: %d\n", __func__, frame->format); +++ return NULL; +++ } +++ +++ if (frame->buf[1] != NULL) +++ { +++ if (maycopy) +++ { +++ av_log(s, AV_LOG_INFO, "%s: *** Not a single buf frame: copying\n", __func__); +++ return zc_copy(s, frame); +++ } +++ else +++ { +++ av_log(s, AV_LOG_WARNING, "%s: *** Not a single buf frame: NULL\n", __func__); +++ return NULL; +++ } +++ } +++ +++ if (pic_gm_ptr(frame->buf[0]) == NULL) +++ { +++ if (maycopy) +++ { +++ av_log(s, AV_LOG_INFO, "%s: *** Not one of our buffers: copying\n", __func__); +++ return zc_copy(s, frame); +++ } +++ else +++ { +++ av_log(s, AV_LOG_WARNING, "%s: *** Not one of our buffers: NULL\n", __func__); +++ return NULL; +++ } +++ } +++ +++ return av_buffer_ref(frame->buf[0]); +++} +++ +++int av_rpi_zc_vc_handle(const AVRpiZcRefPtr fr_ref) +++{ +++ const GPU_MEM_PTR_T * const p = pic_gm_ptr(fr_ref); +++ return p == NULL ? -1 : p->vc_handle; +++} +++ +++int av_rpi_zc_numbytes(const AVRpiZcRefPtr fr_ref) +++{ +++ const GPU_MEM_PTR_T * const p = pic_gm_ptr(fr_ref); +++ return p == NULL ? 0 : p->numbytes; +++} +++ +++void av_rpi_zc_unref(AVRpiZcRefPtr fr_ref) +++{ +++ if (fr_ref != NULL) +++ { +++ av_buffer_unref(&fr_ref); +++ } +++} +++ +++AVZcEnvPtr av_rpi_zc_env_alloc(void) +++{ +++ ZcEnv * const zc = av_mallocz(sizeof(ZcEnv)); +++ if (zc == NULL) +++ { +++ av_log(NULL, AV_LOG_ERROR, "av_rpi_zc_env_alloc: Context allocation failed\n"); +++ return NULL; +++ } +++ +++ zc_pool_init(&zc->pool); +++ return zc; +++} +++ +++void av_rpi_zc_env_free(AVZcEnvPtr zc) +++{ +++ if (zc != NULL) +++ { +++ zc_pool_destroy(&zc->pool); ; +++ av_free(zc); +++ } +++} +++ +++int av_rpi_zc_init(struct AVCodecContext * const s) +++{ +++ ZcEnv * const zc = av_rpi_zc_env_alloc(); +++ if (zc == NULL) +++ { +++ return AVERROR(ENOMEM); +++ } +++ +++ s->get_buffer_context = zc; +++ s->get_buffer2 = av_rpi_zc_get_buffer2; +++ return 0; +++} +++ +++void av_rpi_zc_uninit(struct AVCodecContext * const s) +++{ +++ if (s->get_buffer2 == av_rpi_zc_get_buffer2) +++ { +++ ZcEnv * const zc = s->get_buffer_context; +++ s->get_buffer2 = avcodec_default_get_buffer2; +++ s->get_buffer_context = NULL; +++ av_rpi_zc_env_free(zc); +++ } +++} +++ +++#endif // RPI +++ ++diff --git a/libavcodec/rpi_zc.h b/libavcodec/rpi_zc.h ++new file mode 100644 ++index 0000000..f0109f4 ++--- /dev/null +++++ b/libavcodec/rpi_zc.h ++@@ -0,0 +1,83 @@ +++#ifndef LIBAVCODEC_RPI_ZC_H +++#define LIBAVCODEC_RPI_ZC_H +++ +++// Zero-Copy frame code for RPi +++// RPi needs Y/U/V planes to be contiguous for display. By default +++// ffmpeg will allocate separated planes so a memcpy is needed before +++// display. This code prodes a method a making ffmpeg allocate a single +++// bit of memory for the frame when can then be refrence counted until +++// display ahs finsihed with it. +++ +++#include "libavutil/frame.h" +++#include "libavcodec/avcodec.h" +++ +++// "Opaque" pointer to whatever we are using as a buffer reference +++typedef AVBufferRef * AVRpiZcRefPtr; +++ +++struct AVZcEnv; +++typedef struct AVZcEnv * AVZcEnvPtr; +++ +++typedef struct AVRpiZcFrameGeometry +++{ +++ unsigned int stride_y; +++ unsigned int height_y; +++ unsigned int stride_c; +++ unsigned int height_c; +++} AVRpiZcFrameGeometry; +++ +++ +++AVRpiZcFrameGeometry av_rpi_zc_frame_geometry( +++ const unsigned int video_width, const unsigned int video_height); +++ +++// Replacement fn for avctx->get_buffer2 +++// Should be set before calling avcodec_decode_open2 +++// +++// N.B. in addition to to setting avctx->get_buffer2, avctx->refcounted_frames +++// must be set to 1 as otherwise the buffer info is killed before being returned +++// by avcodec_decode_video2. Note also that this means that the AVFrame that is +++// return must be manually derefed with av_frame_unref. This should be done +++// after av_rpi_zc_ref has been called. +++int av_rpi_zc_get_buffer2(struct AVCodecContext *s, AVFrame *frame, int flags); +++ +++// Generate a ZC reference to the buffer(s) in this frame +++// If the buffer doesn't appear to be one allocated by _get_buffer_2 +++// then the behaviour depends on maycopy: +++// If maycopy=0 then return NULL +++// If maycopy=1 && the src frame is in a form where we can easily copy +++// the data, then allocate a new buffer and copy the data into it +++// Otherwise return NULL +++AVRpiZcRefPtr av_rpi_zc_ref(struct AVCodecContext * const s, +++ const AVFrame * const frame, const int maycopy); +++ +++// Get the vc_handle from the frame ref +++// Returns -1 if ref doesn't look valid +++int av_rpi_zc_vc_handle(const AVRpiZcRefPtr fr_ref); +++// Get the number of bytes allocated from the frame ref +++// Returns 0 if ref doesn't look valid +++int av_rpi_zc_numbytes(const AVRpiZcRefPtr fr_ref); +++ +++// Unreference the buffer refed/allocated by _zc_ref +++// If fr_ref is NULL then this will NOP +++void av_rpi_zc_unref(AVRpiZcRefPtr fr_ref); +++ +++// Allocate an environment for the buffer pool used by the ZC code +++// This should be put in avctx->get_buffer_context so it can be found by +++// av_rpi_zc_get_buffer2 when it is called from ffmpeg +++AVZcEnvPtr av_rpi_zc_env_alloc(void); +++ +++// Allocate the environment used by the ZC code +++void av_rpi_zc_env_free(AVZcEnvPtr); +++ +++ +++// Init ZC into a context +++// There is nothing magic in this fn - it just packages setting +++// get_buffer2 & get_buffer_context +++int av_rpi_zc_init(struct AVCodecContext * const s); +++ +++// Free ZC from a context +++// There is nothing magic in this fn - it just packages unsetting +++// get_buffer2 & get_buffer_context +++void av_rpi_zc_uninit(struct AVCodecContext * const s); +++ +++#endif +++ ++-- ++2.7.4 ++ ++ ++From a6da64e1ca42f0394ccfa55dca782a456841da94 Mon Sep 17 00:00:00 2001 ++From: John Cox <jc@kynesim.co.uk> ++Date: Tue, 1 Mar 2016 14:21:25 +0000 ++Subject: [PATCH 2/2] Set VPU scheduling thread to high priority after creation ++ ++--- ++ libavcodec/rpi_qpu.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- ++ 1 file changed, 47 insertions(+), 1 deletion(-) ++ ++diff --git a/libavcodec/rpi_qpu.c b/libavcodec/rpi_qpu.c ++index b0c9bc5..ee19231 100644 ++--- a/libavcodec/rpi_qpu.c +++++ b/libavcodec/rpi_qpu.c ++@@ -182,9 +182,55 @@ static int gpu_init(volatile struct GPU **gpu) { ++ err = pthread_create(&vpu_thread, NULL, vpu_start, NULL); ++ //printf("Created thread\n"); ++ if (err) { ++- printf("Failed to create vpu thread\n"); +++ av_log(NULL, AV_LOG_FATAL, "Failed to create vpu thread\n"); ++ return -4; ++ } +++ +++ { +++ struct sched_param param = {0}; +++ int policy = 0; +++ +++ if (pthread_getschedparam(vpu_thread, &policy, ¶m) != 0) +++ { +++ av_log(NULL, AV_LOG_ERROR, "Unable to get VPU thread scheduling parameters\n"); +++ } +++ else +++ { +++ av_log(NULL, AV_LOG_INFO, "VPU thread: policy=%d (%s), pri=%d\n", +++ policy, +++ policy == SCHED_RR ? "RR" : policy == SCHED_FIFO ? "FIFO" : "???" , +++ param.sched_priority); +++ +++ policy = SCHED_FIFO; +++ param.sched_priority = sched_get_priority_max(SCHED_FIFO); +++ +++ av_log(NULL, AV_LOG_INFO, "Attempt to set: policy=%d (%s), pri=%d\n", +++ policy, +++ policy == SCHED_RR ? "RR" : policy == SCHED_FIFO ? "FIFO" : "???" , +++ param.sched_priority); +++ +++ if (pthread_setschedparam(vpu_thread, policy, ¶m) != 0) +++ { +++ av_log(NULL, AV_LOG_ERROR, "Unable to set VPU thread scheduling parameters\n"); +++ } +++ else +++ { +++ if (pthread_getschedparam(vpu_thread, &policy, ¶m) != 0) +++ { +++ av_log(NULL, AV_LOG_ERROR, "Unable to get VPU thread scheduling parameters\n"); +++ } +++ else +++ { +++ av_log(NULL, AV_LOG_INFO, "VPU thread (after): policy=%d (%s), pri=%d\n", +++ policy, +++ policy == SCHED_RR ? "RR" : policy == SCHED_FIFO ? "FIFO" : "???" , +++ param.sched_priority); +++ } +++ } +++ } +++ +++ } +++ ++ } ++ #endif ++ ++-- ++2.7.4 ++ + +From 4dcf6adc09c509c7e448a4fcfe48bc7da6f907a8 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sat, 22 Aug 2015 23:06:56 +0100 +Subject: [PATCH 29/67] [dvdmessage] Increase timeout on + CDVDMsgGeneralSynchronize + +--- + xbmc/cores/VideoPlayer/DVDMessage.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDMessage.cpp b/xbmc/cores/VideoPlayer/DVDMessage.cpp +index 5aed6918d217df884107fe6366b3668efa96af20..2442fc808ae89c5550b8db34b2605f5037f2ef29 100644 +--- a/xbmc/cores/VideoPlayer/DVDMessage.cpp ++++ b/xbmc/cores/VideoPlayer/DVDMessage.cpp +@@ -90,7 +90,7 @@ bool CDVDMsgGeneralSynchronize::Wait(unsigned int milliseconds, unsigned int sou + + void CDVDMsgGeneralSynchronize::Wait(volatile bool *abort, unsigned int source) + { +- while(!Wait(100, source)) ++ while(!Wait(200, source)) + { + if(abort && *abort) + return; + +From 01bce0b478e428f0e8805868222928e8274bb809 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Wed, 16 Sep 2015 19:05:12 +0100 +Subject: [PATCH 30/67] [3d] Make MVC a valid 3D filename tag + +--- + xbmc/guilib/StereoscopicsManager.cpp | 9 +++++++++ + xbmc/settings/AdvancedSettings.cpp | 2 ++ + xbmc/settings/AdvancedSettings.h | 1 + + 3 files changed, 12 insertions(+) + +diff --git a/xbmc/guilib/StereoscopicsManager.cpp b/xbmc/guilib/StereoscopicsManager.cpp +index b34873cba6534086ae243326550385867a03256a..1443acaf0f25df458ae49766e13dd0323454f2eb 100644 +--- a/xbmc/guilib/StereoscopicsManager.cpp ++++ b/xbmc/guilib/StereoscopicsManager.cpp +@@ -197,6 +197,15 @@ std::string CStereoscopicsManager::DetectStereoModeByString(const std::string &n + if (re.RegFind(searchString) > -1) + stereoMode = "top_bottom"; + ++ if (!re.RegComp(g_advancedSettings.m_stereoscopicregex_mvc.c_str())) ++ { ++ CLog::Log(LOGERROR, "%s: Invalid RegExp for matching 3d MVC content:'%s'", __FUNCTION__, g_advancedSettings.m_stereoscopicregex_mvc.c_str()); ++ return stereoMode; ++ } ++ ++ if (re.RegFind(searchString) > -1) ++ stereoMode = "left_right"; ++ + return stereoMode; + } + +diff --git a/xbmc/settings/AdvancedSettings.cpp b/xbmc/settings/AdvancedSettings.cpp +index 446293308010f3b8cd8d325fa6d0285fcc9f892d..ae21da29314ae8faa35129a79e62e82b55fbc306 100644 +--- a/xbmc/settings/AdvancedSettings.cpp ++++ b/xbmc/settings/AdvancedSettings.cpp +@@ -403,6 +403,7 @@ void CAdvancedSettings::Initialize() + m_stereoscopicregex_3d = "[-. _]3d[-. _]"; + m_stereoscopicregex_sbs = "[-. _]h?sbs[-. _]"; + m_stereoscopicregex_tab = "[-. _]h?tab[-. _]"; ++ m_stereoscopicregex_mvc = "[-. _]h?mvc[-. _]"; + + m_useDisplayControlHWStereo = false; + +@@ -517,6 +518,7 @@ void CAdvancedSettings::ParseSettingsFile(const std::string &file) + XMLUtils::GetString(pElement, "stereoscopicregex3d", m_stereoscopicregex_3d); + XMLUtils::GetString(pElement, "stereoscopicregexsbs", m_stereoscopicregex_sbs); + XMLUtils::GetString(pElement, "stereoscopicregextab", m_stereoscopicregex_tab); ++ XMLUtils::GetString(pElement, "stereoscopicregexmvc", m_stereoscopicregex_mvc); + XMLUtils::GetFloat(pElement, "subsdelayrange", m_videoSubsDelayRange, 10, 600); + XMLUtils::GetFloat(pElement, "audiodelayrange", m_videoAudioDelayRange, 10, 600); + XMLUtils::GetString(pElement, "defaultplayer", m_videoDefaultPlayer); +diff --git a/xbmc/settings/AdvancedSettings.h b/xbmc/settings/AdvancedSettings.h +index bcbd5d1c68b576034a418dd2dce0b47071229e0b..d4a30863806eb1c86042e0991793aedf20bf8344 100644 +--- a/xbmc/settings/AdvancedSettings.h ++++ b/xbmc/settings/AdvancedSettings.h +@@ -372,6 +372,7 @@ class CAdvancedSettings : public ISettingCallback, public ISettingsHandler + std::string m_stereoscopicregex_3d; + std::string m_stereoscopicregex_sbs; + std::string m_stereoscopicregex_tab; ++ std::string m_stereoscopicregex_mvc; + + bool m_useDisplayControlHWStereo; + + +From 6268ac7405bc4f407e486644d11383f30e48c952 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Mon, 5 Oct 2015 14:58:05 +0100 +Subject: [PATCH 31/67] [3d] Swap top/bottom sides of GUI + +--- + xbmc/guilib/GraphicContext.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xbmc/guilib/GraphicContext.cpp b/xbmc/guilib/GraphicContext.cpp +index 9caa43113f63139d277bd71242a858a581736845..3ace73527a7c359ac21c87bf38b5d648a0f4d9c2 100644 +--- a/xbmc/guilib/GraphicContext.cpp ++++ b/xbmc/guilib/GraphicContext.cpp +@@ -265,7 +265,7 @@ CPoint CGraphicContext::StereoCorrection(const CPoint &point) const + { + const RESOLUTION_INFO info = GetResInfo(); + +- if(m_stereoView == RENDER_STEREO_VIEW_RIGHT) ++ if(m_stereoView == RENDER_STEREO_VIEW_LEFT) + res.y += info.iHeight + info.iBlanking; + } + if(m_stereoMode == RENDER_STEREO_MODE_SPLIT_VERTICAL) + +From 97e700d5324b40fc895f1cbcf656ad4291ecfbee Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sun, 11 Oct 2015 20:51:37 +0100 +Subject: [PATCH 32/67] Revert "Revert "Disable extra logging by default"" + +This reverts commit a880554325be187b877cd8f0e2b338e7267da636. +--- + system/settings/settings.xml | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/system/settings/settings.xml b/system/settings/settings.xml +index ca7e8892606782e54d4883c5b2f0e6686b1ae280..b67d1113477541f5ce3533495a9960b8646b83ed 100644 +--- a/system/settings/settings.xml ++++ b/system/settings/settings.xml +@@ -2649,12 +2649,12 @@ + </setting> + <setting id="debug.extralogging" type="boolean" label="666" help="36394"> + <level>1</level> +- <default>true</default> ++ <default>false</default> + <control type="toggle" /> + </setting> + <setting id="debug.setextraloglevel" type="list[integer]" parent="debug.extralogging" label="668" help="36534"> + <level>1</level> +- <default>32768</default> ++ <default></default> + <constraints> + <options>loggingcomponents</options> + <delimiter>,</delimiter> + +From 52605aac1a6ca5d9a77513d6467935e7795c540a Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Thu, 26 Nov 2015 17:14:49 +0000 +Subject: [PATCH 33/67] [ae] Add debug logging showing resamplerate + +--- + xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp +index 5bb87b2764fdf1606f438fb3a008b322f8adf271..f9e8a9beaa9b3b4590c698a4d64351cb14c2339d 100644 +--- a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp ++++ b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp +@@ -2471,6 +2471,7 @@ CSampleBuffer* CActiveAE::SyncStream(CActiveAEStream *stream) + if (stream->m_resampleBuffers) + { + stream->m_resampleBuffers->m_resampleRatio = stream->CalcResampleRatio(error); ++ CLog::Log(LOGDEBUG, "CDVDPlayerAudio::%s rr:%.5f error:%.6f", __FUNCTION__, stream->m_resampleBuffers->m_resampleRatio, error); + } + } + else if (stream->m_resampleBuffers) + +From 17b01a2c74a918d1d6fddc35d7b3a7da986d7225 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Mon, 21 Dec 2015 22:17:25 +0000 +Subject: [PATCH 34/67] [omximage] Fall back to arm jpeg encode/decode when gpu + is busy + +--- + xbmc/cores/omxplayer/OMXImage.cpp | 50 ++++++++++++++++++++++++++++++++------- + xbmc/cores/omxplayer/OMXImage.h | 7 ++++++ + 2 files changed, 48 insertions(+), 9 deletions(-) + +diff --git a/xbmc/cores/omxplayer/OMXImage.cpp b/xbmc/cores/omxplayer/OMXImage.cpp +index d2560aa78980e44d5f2d1483bce976cb83353502..e16dbf00b8d8192df4c6e946a48d8f20a72d762d 100644 +--- a/xbmc/cores/omxplayer/OMXImage.cpp ++++ b/xbmc/cores/omxplayer/OMXImage.cpp +@@ -57,12 +57,17 @@ static XbmcThreads::ConditionVariable g_count_cond; + static CCriticalSection g_count_lock; + static int g_count_val; + +-static void limit_calls_enter() ++static bool limit_calls_enter() + { + CSingleLock lock(g_count_lock); ++ // on Pi2 fall back to arm decode if the queue is getting big ++ if (g_RBP.RasberryPiVersion() > 1 && g_count_val >= 2) ++ return false; ++ + while (g_count_val >= 3) + g_count_cond.wait(lock); + g_count_val++; ++ return true; + } + + static void limit_calls_leave() +@@ -112,6 +117,9 @@ bool COMXImage::CreateThumbnailFromSurface(unsigned char* buffer, unsigned int w + unsigned int format, unsigned int pitch, const std::string& destFile) + { + COMXImageEnc omxImageEnc; ++ if (!omxImageEnc.Gpu()) ++ return false; ++ + bool ret = omxImageEnc.CreateThumbnailFromSurface(buffer, width, height, format, pitch, destFile); + if (!ret) + CLog::Log(LOGNOTICE, "%s: unable to create thumbnail %s %dx%d", __func__, destFile.c_str(), width, height); +@@ -205,6 +213,8 @@ bool COMXImage::CreateThumb(const std::string& srcFile, unsigned int maxHeight, + bool okay = false; + COMXImageFile file; + COMXImageReEnc reenc; ++ if (!reenc.Gpu()) ++ return false; + void *pDestBuffer; + unsigned int nDestSize; + int orientation = additional_info == "flipped" ? 1:0; +@@ -310,6 +320,9 @@ bool COMXImage::DecodeJpegToTexture(COMXImageFile *file, unsigned int width, uns + bool ret = false; + COMXTexture omx_image; + ++ if (!omx_image.Gpu()) ++ return false; ++ + struct textureinfo *tex = new struct textureinfo; + if (!tex) + return NULL; +@@ -924,7 +937,7 @@ bool COMXImageFile::ReadFile(const std::string& inputFile, int orientation) + + COMXImageDec::COMXImageDec() + { +- limit_calls_enter(); ++ m_gpu = limit_calls_enter(); + m_decoded_buffer = NULL; + OMX_INIT_STRUCTURE(m_decoded_format); + m_success = false; +@@ -936,7 +949,8 @@ COMXImageDec::~COMXImageDec() + + OMX_INIT_STRUCTURE(m_decoded_format); + m_decoded_buffer = NULL; +- limit_calls_leave(); ++ if (m_gpu) ++ limit_calls_leave(); + } + + void COMXImageDec::Close() +@@ -1086,6 +1100,9 @@ bool COMXImageDec::HandlePortSettingChange(unsigned int resize_width, unsigned i + + bool COMXImageDec::Decode(const uint8_t *demuxer_content, unsigned demuxer_bytes, unsigned width, unsigned height, unsigned stride, void *pixels) + { ++ if (!m_gpu) ++ return false; ++ + CSingleLock lock(m_OMXSection); + OMX_ERRORTYPE omx_err = OMX_ErrorNone; + OMX_BUFFERHEADERTYPE *omx_buffer = NULL; +@@ -1223,7 +1240,7 @@ bool COMXImageDec::Decode(const uint8_t *demuxer_content, unsigned demuxer_bytes + + COMXImageEnc::COMXImageEnc() + { +- limit_calls_enter(); ++ m_gpu = limit_calls_enter(); + CSingleLock lock(m_OMXSection); + OMX_INIT_STRUCTURE(m_encoded_format); + m_encoded_buffer = NULL; +@@ -1247,11 +1264,15 @@ COMXImageEnc::~COMXImageEnc() + m_omx_encoder.Deinitialize(); + } + } +- limit_calls_leave(); ++ if (m_gpu) ++ limit_calls_leave(); + } + + bool COMXImageEnc::Encode(unsigned char *buffer, int size, unsigned width, unsigned height, unsigned int pitch) + { ++ if (!m_gpu) ++ return false; ++ + CSingleLock lock(m_OMXSection); + + unsigned int demuxer_bytes = 0; +@@ -1432,6 +1453,9 @@ bool COMXImageEnc::Encode(unsigned char *buffer, int size, unsigned width, unsig + bool COMXImageEnc::CreateThumbnailFromSurface(unsigned char* buffer, unsigned int width, unsigned int height, + unsigned int format, unsigned int pitch, const std::string& destFile) + { ++ if (!m_gpu) ++ return false; ++ + if(format != XB_FMT_A8R8G8B8 || !buffer) + { + CLog::Log(LOGDEBUG, "%s::%s : %s failed format=0x%x\n", CLASSNAME, __func__, destFile.c_str(), format); +@@ -1465,7 +1489,7 @@ bool COMXImageEnc::CreateThumbnailFromSurface(unsigned char* buffer, unsigned in + + COMXImageReEnc::COMXImageReEnc() + { +- limit_calls_enter(); ++ m_gpu = limit_calls_enter(); + m_encoded_buffer = NULL; + m_pDestBuffer = NULL; + m_nDestAllocSize = 0; +@@ -1479,7 +1503,8 @@ COMXImageReEnc::~COMXImageReEnc() + free (m_pDestBuffer); + m_pDestBuffer = NULL; + m_nDestAllocSize = 0; +- limit_calls_leave(); ++ if (m_gpu) ++ limit_calls_leave(); + } + + void COMXImageReEnc::Close() +@@ -1771,6 +1796,9 @@ bool COMXImageReEnc::HandlePortSettingChange(unsigned int resize_width, unsigned + + bool COMXImageReEnc::ReEncode(COMXImageFile &srcFile, unsigned int maxWidth, unsigned int maxHeight, void * &pDestBuffer, unsigned int &nDestSize) + { ++ if (!m_gpu) ++ return false; ++ + CSingleLock lock(m_OMXSection); + OMX_ERRORTYPE omx_err = OMX_ErrorNone; + +@@ -1943,14 +1971,15 @@ bool COMXImageReEnc::ReEncode(COMXImageFile &srcFile, unsigned int maxWidth, uns + + COMXTexture::COMXTexture() + { +- limit_calls_enter(); ++ m_gpu = limit_calls_enter(); + m_success = false; + } + + COMXTexture::~COMXTexture() + { + Close(); +- limit_calls_leave(); ++ if (m_gpu) ++ limit_calls_leave(); + } + + void COMXTexture::Close() +@@ -2134,6 +2163,9 @@ bool COMXTexture::HandlePortSettingChange(unsigned int resize_width, unsigned in + + bool COMXTexture::Decode(const uint8_t *demuxer_content, unsigned demuxer_bytes, unsigned int width, unsigned int height, void *egl_image) + { ++ if (!m_gpu) ++ return false; ++ + CSingleLock lock(m_OMXSection); + OMX_ERRORTYPE omx_err = OMX_ErrorNone; + +diff --git a/xbmc/cores/omxplayer/OMXImage.h b/xbmc/cores/omxplayer/OMXImage.h +index a93aa82663903fb1bf712058c2e259290ee742e6..6f38dbc7e5cc721c59a3633935f08218eb1dd169 100644 +--- a/xbmc/cores/omxplayer/OMXImage.h ++++ b/xbmc/cores/omxplayer/OMXImage.h +@@ -133,6 +133,7 @@ protected: + OMX_PARAM_PORTDEFINITIONTYPE m_decoded_format; + CCriticalSection m_OMXSection; + bool m_success; ++ bool m_gpu; + }; + + class COMXImageEnc +@@ -144,6 +145,7 @@ public: + // Required overrides + bool CreateThumbnailFromSurface(unsigned char* buffer, unsigned int width, unsigned int height, + unsigned int format, unsigned int pitch, const std::string& destFile); ++ bool Gpu() { return m_gpu; } + protected: + bool Encode(unsigned char *buffer, int size, unsigned int width, unsigned int height, unsigned int pitch); + // Components +@@ -152,6 +154,7 @@ protected: + OMX_PARAM_PORTDEFINITIONTYPE m_encoded_format; + CCriticalSection m_OMXSection; + bool m_success; ++ bool m_gpu; + }; + + class COMXImageReEnc +@@ -163,6 +166,7 @@ public: + // Required overrides + void Close(); + bool ReEncode(COMXImageFile &srcFile, unsigned int width, unsigned int height, void * &pDestBuffer, unsigned int &nDestSize); ++ bool Gpu() { return m_gpu; } + protected: + bool HandlePortSettingChange(unsigned int resize_width, unsigned int resize_height, int orientation, bool port_settings_changed); + // Components +@@ -176,6 +180,7 @@ protected: + void *m_pDestBuffer; + unsigned int m_nDestAllocSize; + bool m_success; ++ bool m_gpu; + }; + + class COMXTexture +@@ -187,6 +192,7 @@ public: + // Required overrides + void Close(void); + bool Decode(const uint8_t *data, unsigned size, unsigned int width, unsigned int height, void *egl_image); ++ bool Gpu() { return m_gpu; } + protected: + bool HandlePortSettingChange(unsigned int resize_width, unsigned int resize_height, void *egl_image, bool port_settings_changed); + +@@ -201,6 +207,7 @@ protected: + OMX_BUFFERHEADERTYPE *m_egl_buffer; + CCriticalSection m_OMXSection; + bool m_success; ++ bool m_gpu; + }; + + extern COMXImage g_OMXImage; + +From 95d0673204e2559173405d03df038ac152a5501b Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Wed, 9 Dec 2015 13:31:14 +0000 +Subject: [PATCH 35/67] [mmalcodec] Fail to open when width is invalid. Can + happen with mpegts files + +--- + xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +index c0e553ca060749edff28bcbb880ed3e149b9f751..8691b086a46fcdd03eee809a53ea9b20f74dcc05 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +@@ -541,6 +541,9 @@ bool CMMALVideo::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options) + if (g_advancedSettings.CanLogComponent(LOGVIDEO)) + CLog::Log(LOGDEBUG, "%s::%s usemmal:%d software:%d %dx%d renderer:%p", CLASSNAME, __func__, CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEMMAL), hints.software, hints.width, hints.height, options.m_opaque_pointer); + ++ // This occurs at start of m2ts files before streams have been fully identified - just ignore ++ if (!hints.width) ++ return false; + // we always qualify even if DVDFactoryCodec does this too. + if (!CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEMMAL) || hints.software) + return false; + +From 9f2c6309ca9bcc281124fa0a5bdb665d9fb50f35 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Tue, 12 Jan 2016 16:29:57 +0000 +Subject: [PATCH 36/67] ffmpeg: Add cabac opimisations for hevc + +--- + .../0001-Squashed-commit-of-the-following.patch | 2179 ++++++++++++++++++++ + tools/depends/target/ffmpeg/Makefile | 5 +- + tools/depends/target/ffmpeg/autobuild.sh | 1 + + 3 files changed, 2184 insertions(+), 1 deletion(-) + create mode 100644 tools/depends/target/ffmpeg/0001-Squashed-commit-of-the-following.patch + +diff --git a/tools/depends/target/ffmpeg/0001-Squashed-commit-of-the-following.patch b/tools/depends/target/ffmpeg/0001-Squashed-commit-of-the-following.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..38554af0af30a85b7d88d31b7d21775cf294b0e3 +--- /dev/null ++++ b/tools/depends/target/ffmpeg/0001-Squashed-commit-of-the-following.patch +@@ -0,0 +1,2179 @@ ++From d08594462136274636c1f2f476a6410ff92a9e16 Mon Sep 17 00:00:00 2001 ++From: John Cox <jc@kynesim.co.uk> ++Date: Wed, 13 Jan 2016 16:13:33 +0000 ++Subject: [PATCH] H.265 residual decode rework (v2) ++ ++Rework the cabac decode functions ++Simplify the code flow and variable usage where possible ++ ++(Remove profiling and other spurious deltas that were in v1) ++--- ++ libavcodec/arm/cabac.h | 155 ++++- ++ libavcodec/arm/hevc_cabac.h | 491 +++++++++++++++ ++ libavcodec/arm/hevcdsp_deblock_neon.S | 13 +- ++ libavcodec/arm/hevcdsp_epel_neon.S | 9 +- ++ libavcodec/cabac.h | 9 +- ++ libavcodec/hevc_cabac.c | 1096 +++++++++++++++++++++++++-------- ++ 6 files changed, 1509 insertions(+), 264 deletions(-) ++ create mode 100644 libavcodec/arm/hevc_cabac.h ++ ++diff --git a/libavcodec/arm/cabac.h b/libavcodec/arm/cabac.h ++index fdbf86b..0a3980a 100644 ++--- a/libavcodec/arm/cabac.h +++++ b/libavcodec/arm/cabac.h ++@@ -26,13 +26,34 @@ ++ #include "libavutil/internal.h" ++ #include "libavcodec/cabac.h" ++ +++ +++#if UNCHECKED_BITSTREAM_READER +++#define LOAD_16BITS_BEHI\ +++ "ldrh %[tmp] , [%[ptr]] , #2 \n\t"\ +++ "rev %[tmp] , %[tmp] \n\t" +++#elif CONFIG_THUMB +++#define LOAD_16BITS_BEHI\ +++ "ldr %[tmp] , [%[c], %[end]] \n\t"\ +++ "cmp %[tmp] , %[ptr] \n\t"\ +++ "it cs \n\t"\ +++ "ldrhcs %[tmp] , [%[ptr]] , #2 \n\t"\ +++ "rev %[tmp] , %[tmp] \n\t" +++#else +++#define LOAD_16BITS_BEHI\ +++ "ldr %[tmp] , [%[c], %[end]] \n\t"\ +++ "cmp %[tmp] , %[ptr] \n\t"\ +++ "ldrcsh %[tmp] , [%[ptr]] , #2 \n\t"\ +++ "rev %[tmp] , %[tmp] \n\t" +++#endif +++ +++ ++ #define get_cabac_inline get_cabac_inline_arm ++ static av_always_inline int get_cabac_inline_arm(CABACContext *c, ++ uint8_t *const state) ++ { ++ int bit; +++#if 0 ++ void *reg_b, *reg_c, *tmp; ++- ++ __asm__ volatile( ++ "ldrb %[bit] , [%[state]] \n\t" ++ "add %[r_b] , %[tables] , %[lps_off] \n\t" ++@@ -100,9 +121,141 @@ static av_always_inline int get_cabac_inline_arm(CABACContext *c, ++ [mlps_off]"I"(H264_MLPS_STATE_OFFSET + 128) ++ : "memory", "cc" ++ ); +++#else +++ // *** Not thumb compatible yet +++ unsigned int reg_b, tmp; +++ __asm__ ( +++ "ldrb %[bit] , [%[state]] \n\t" +++ "sub %[r_b] , %[mlps_tables], %[lps_off] \n\t" +++ "and %[tmp] , %[range] , #0xC0 \n\t" +++ "add %[r_b] , %[r_b] , %[bit] \n\t" +++ "ldrb %[tmp] , [%[r_b] , %[tmp], lsl #1] \n\t" +++// %bit = *state +++// %range = range +++// %tmp = RangeLPS +++ "sub %[range] , %[range] , %[tmp] \n\t" +++ +++ "cmp %[low] , %[range] , lsl #17 \n\t" +++ "ittt ge \n\t" +++ "subge %[low] , %[low] , %[range], lsl #17 \n\t" +++ "mvnge %[bit] , %[bit] \n\t" +++ "movge %[range] , %[tmp] \n\t" +++ +++ "clz %[tmp] , %[range] \n\t" +++ "sub %[tmp] , #23 \n\t" +++ +++ "ldrb %[r_b] , [%[mlps_tables], %[bit]] \n\t" +++ "lsl %[low] , %[low] , %[tmp] \n\t" +++ "lsl %[range] , %[range] , %[tmp] \n\t" +++ +++ "strb %[r_b] , [%[state]] \n\t" +++ "lsls %[tmp] , %[low] , #16 \n\t" +++ +++ "bne 2f \n\t" +++ LOAD_16BITS_BEHI +++ "lsr %[tmp] , %[tmp] , #15 \n\t" +++ "movw %[r_b] , #0xFFFF \n\t" +++ "sub %[tmp] , %[tmp] , %[r_b] \n\t" +++ +++ "rbit %[r_b] , %[low] \n\t" +++ "clz %[r_b] , %[r_b] \n\t" +++ "sub %[r_b] , %[r_b] , #16 \n\t" +++#if CONFIG_THUMB +++ "lsl %[tmp] , %[tmp] , %[r_b] \n\t" +++ "add %[low] , %[low] , %[tmp] \n\t" +++#else +++ "add %[low] , %[low] , %[tmp], lsl %[r_b] \n\t" +++#endif +++ "2: \n\t" +++ : [bit]"=&r"(bit), +++ [low]"+&r"(c->low), +++ [range]"+&r"(c->range), +++ [r_b]"=&r"(reg_b), +++ [ptr]"+&r"(c->bytestream), +++ [tmp]"=&r"(tmp) +++ : [state]"r"(state), +++ [mlps_tables]"r"(ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET + 128), +++ [byte]"M"(offsetof(CABACContext, bytestream)), +++#if !UNCHECKED_BITSTREAM_READER +++ [c]"r"(c), +++ [end]"M"(offsetof(CABACContext, bytestream_end)), +++#endif +++ [lps_off]"I"((H264_MLPS_STATE_OFFSET + 128) - H264_LPS_RANGE_OFFSET) +++ : "memory", "cc" +++ ); +++#endif ++ ++ return bit & 1; ++ } +++ +++#define get_cabac_bypass get_cabac_bypass_arm +++static inline int get_cabac_bypass_arm(CABACContext * const c) +++{ +++ int rv = 0; +++ unsigned int tmp; +++ __asm ( +++ "lsl %[low] , #1 \n\t" +++ "cmp %[low] , %[range] , lsl #17 \n\t" +++ "adc %[rv] , %[rv] , #0 \n\t" +++ "it cs \n\t" +++ "subcs %[low] , %[low] , %[range], lsl #17 \n\t" +++ "lsls %[tmp] , %[low] , #16 \n\t" +++ "bne 1f \n\t" +++ LOAD_16BITS_BEHI +++ "add %[low] , %[low] , %[tmp], lsr #15 \n\t" +++ "movw %[tmp] , #0xFFFF \n\t" +++ "sub %[low] , %[low] , %[tmp] \n\t" +++ "1: \n\t" +++ : // Outputs +++ [rv]"+&r"(rv), +++ [low]"+&r"(c->low), +++ [tmp]"=&r"(tmp), +++ [ptr]"+&r"(c->bytestream) +++ : // Inputs +++#if !UNCHECKED_BITSTREAM_READER +++ [c]"r"(c), +++ [end]"M"(offsetof(CABACContext, bytestream_end)), +++#endif +++ [range]"r"(c->range) +++ : "cc" +++ ); +++ return rv; +++} +++ +++ +++#define get_cabac_bypass_sign get_cabac_bypass_sign_arm +++static inline int get_cabac_bypass_sign_arm(CABACContext * const c, int rv) +++{ +++ unsigned int tmp; +++ __asm ( +++ "lsl %[low] , #1 \n\t" +++ "cmp %[low] , %[range] , lsl #17 \n\t" +++ "ite cc \n\t" +++ "rsbcc %[rv] , %[rv] , #0 \n\t" +++ "subcs %[low] , %[low] , %[range], lsl #17 \n\t" +++ "lsls %[tmp] , %[low] , #16 \n\t" +++ "bne 1f \n\t" +++ LOAD_16BITS_BEHI +++ "add %[low] , %[low] , %[tmp], lsr #15 \n\t" +++ "movw %[tmp] , #0xFFFF \n\t" +++ "sub %[low] , %[low] , %[tmp] \n\t" +++ "1: \n\t" +++ : // Outputs +++ [rv]"+&r"(rv), +++ [low]"+&r"(c->low), +++ [tmp]"=&r"(tmp), +++ [ptr]"+&r"(c->bytestream) +++ : // Inputs +++#if !UNCHECKED_BITSTREAM_READER +++ [c]"r"(c), +++ [end]"M"(offsetof(CABACContext, bytestream_end)), +++#endif +++ [range]"r"(c->range) +++ : "cc" +++ ); +++ return rv; +++} +++ ++ #endif /* HAVE_ARMV6T2_INLINE */ ++ ++ #endif /* AVCODEC_ARM_CABAC_H */ ++diff --git a/libavcodec/arm/hevc_cabac.h b/libavcodec/arm/hevc_cabac.h ++new file mode 100644 ++index 0000000..31d3c59 ++--- /dev/null +++++ b/libavcodec/arm/hevc_cabac.h ++@@ -0,0 +1,491 @@ +++/* +++ * This file is part of FFmpeg. +++ * +++ * FFmpeg is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * FFmpeg is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with FFmpeg; if not, write to the Free Software +++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +++ */ +++ +++#ifndef AVCODEC_ARM_HEVC_CABAC_H +++#define AVCODEC_ARM_HEVC_CABAC_H +++ +++#include "config.h" +++#if HAVE_ARMV6T2_INLINE +++ +++#define hevc_mem_bits32 hevc_mem_bits32_arm +++static inline uint32_t hevc_mem_bits32_arm(const void * p, const unsigned int bits) +++{ +++ unsigned int n; +++ __asm__ ( +++ "rev %[n], %[x] \n\t" +++ : [n]"=r"(n) +++ : [x]"r"(*(const uint32_t *)((const uint8_t *)p + (bits >> 3))) +++ : +++ ); +++ return n << (bits & 7); +++} +++ +++ +++// --------------------------------------------------------------------------- +++// +++// Helper fns - little bits of code where ARM has an instraction that the +++// compiler doesn't know about / use +++ +++#define trans_scale_sat trans_scale_sat_arm +++static inline int trans_scale_sat_arm(const int level, const unsigned int scale, const unsigned int scale_m, const unsigned int shift) +++{ +++ int rv; +++ int t = ((level * (int)(scale * scale_m)) >> shift) + 1; +++ +++ __asm__ ( +++ "ssat %[rv], #16, %[t], ASR #1 \n\t" +++ : [rv]"=r"(rv) +++ : [t]"r"(t) +++ : +++ ); +++ return rv; +++} +++ +++#define update_rice update_rice_arm +++static inline void update_rice_arm(uint8_t * const stat_coeff, +++ const unsigned int last_coeff_abs_level_remaining, +++ const unsigned int c_rice_param) +++{ +++ int t; +++ __asm__ ( +++ "lsl %[t], %[coeff], #1 \n\t" +++ "lsrs %[t], %[t], %[shift] \n\t" +++ "it eq \n\t" +++ "subeq %[stat], %[stat], #1 \n\t" +++ "cmp %[t], #6 \n\t" +++ "adc %[stat], %[stat], #0 \n\t" +++ "usat %[stat], #8, %[stat] \n\t" +++ : [stat]"+&r"(*stat_coeff), +++ [t]"=&r"(t) +++ : [coeff]"r"(last_coeff_abs_level_remaining), +++ [shift]"r"(c_rice_param) +++ : "cc" +++ ); +++} +++ +++// --------------------------------------------------------------------------- +++// +++// CABAC get loops +++// +++// Where the loop is simple enough we can normally do 10-30% better than the +++// compiler +++ +++// Get the residual greater than 1 bits +++ +++#define get_cabac_greater1_bits get_cabac_greater1_bits_arm +++static inline unsigned int get_cabac_greater1_bits_arm(CABACContext * const c, const unsigned int n, +++ uint8_t * const state0) +++{ +++ unsigned int i, reg_b, st, tmp, bit, rv; +++ __asm__ ( +++ "mov %[i] , #0 \n\t" +++ "mov %[rv] , #0 \n\t" +++ "1: \n\t" +++ "add %[i] , %[i] , #1 \n\t" +++ "cmp %[rv] , #0 \n\t" +++ "ite eq \n\t" +++ "usateq %[st] , #2 , %[i] \n\t" +++ "movne %[st] , #0 \n\t" +++ +++ "ldrb %[bit] , [%[state0], %[st]] \n\t" +++ "sub %[r_b] , %[mlps_tables], %[lps_off] \n\t" +++ "and %[tmp] , %[range] , #0xC0 \n\t" +++ "add %[r_b] , %[r_b] , %[bit] \n\t" +++ "ldrb %[tmp] , [%[r_b], %[tmp], lsl #1] \n\t" +++ "sub %[range] , %[range] , %[tmp] \n\t" +++ +++ "cmp %[low] , %[range], lsl #17 \n\t" +++ "ittt ge \n\t" +++ "subge %[low] , %[low] , %[range], lsl #17 \n\t" +++ "mvnge %[bit] , %[bit] \n\t" +++ "movge %[range] , %[tmp] \n\t" +++ +++ "ldrb %[r_b] , [%[mlps_tables], %[bit]] \n\t" +++ "and %[bit] , %[bit] , #1 \n\t" +++ "orr %[rv] , %[bit] , %[rv], lsl #1 \n\t" +++ +++ "clz %[tmp] , %[range] \n\t" +++ "sub %[tmp] , #23 \n\t" +++ +++ "lsl %[low] , %[low] , %[tmp] \n\t" +++ "lsl %[range] , %[range] , %[tmp] \n\t" +++ +++ "strb %[r_b] , [%[state0], %[st]] \n\t" +++// There is a small speed gain from combining both conditions, using a single +++// branch and then working out what that meant later +++ "lsls %[tmp] , %[low] , #16 \n\t" +++ "it ne \n\t" +++ "cmpne %[n] , %[i] \n\t" +++ "bne 1b \n\t" +++ +++// If reload is not required then we must have run out of flags to decode +++ "tst %[tmp] , %[tmp] \n\t" +++ "bne 2f \n\t" +++ +++// Do reload +++ "ldrh %[tmp] , [%[bptr]] , #2 \n\t" +++ "movw %[r_b] , #0xFFFF \n\t" +++ "rev %[tmp] , %[tmp] \n\t" +++ "rsb %[tmp] , %[r_b] , %[tmp], lsr #15 \n\t" +++ +++ "rbit %[r_b] , %[low] \n\t" +++ "clz %[r_b] , %[r_b] \n\t" +++ "sub %[r_b] , %[r_b] , #16 \n\t" +++ +++#if CONFIG_THUMB +++ "lsl %[tmp] , %[tmp] , %[r_b] \n\t" +++ "add %[low] , %[low] , %[tmp] \n\t" +++#else +++ "add %[low] , %[low] , %[tmp], lsl %[r_b] \n\t" +++#endif +++ +++ "cmp %[n] , %[i] \n\t" +++ "bne 1b \n\t" +++ "2: \n\t" +++ : [bit]"=&r"(bit), +++ [low]"+&r"(c->low), +++ [range]"+&r"(c->range), +++ [r_b]"=&r"(reg_b), +++ [bptr]"+&r"(c->bytestream), +++ [i]"=&r"(i), +++ [tmp]"=&r"(tmp), +++ [st]"=&r"(st), +++ [rv]"=&r"(rv) +++ : [state0]"r"(state0), +++ [n]"r"(n), +++ [mlps_tables]"r"(ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET + 128), +++ [byte]"M"(offsetof(CABACContext, bytestream)), +++ [lps_off]"I"((H264_MLPS_STATE_OFFSET + 128) - H264_LPS_RANGE_OFFSET) +++ : "memory", "cc" +++ ); +++ return rv; +++} +++ +++ +++// n must be > 0 on entry +++#define get_cabac_sig_coeff_flag_idxs get_cabac_sig_coeff_flag_idxs_arm +++static inline uint8_t * get_cabac_sig_coeff_flag_idxs_arm(CABACContext * const c, uint8_t * const state0, +++ unsigned int n, +++ const uint8_t const * ctx_map, +++ uint8_t * p) +++{ +++ unsigned int reg_b, tmp, st, bit; +++ __asm__ ( +++ "1: \n\t" +++// Get bin from map +++ "ldrb %[st] , [%[ctx_map], %[n]] \n\t" +++ +++// Load state & ranges +++ "sub %[r_b] , %[mlps_tables], %[lps_off] \n\t" +++ "ldrb %[bit] , [%[state0], %[st]] \n\t" +++ "and %[tmp] , %[range] , #0xC0 \n\t" +++ "add %[r_b] , %[r_b] , %[tmp], lsl #1 \n\t" +++ "ldrb %[tmp] , [%[r_b], %[bit]] \n\t" +++ "sub %[range] , %[range] , %[tmp] \n\t" +++ +++ "cmp %[low] , %[range], lsl #17 \n\t" +++ "ittt ge \n\t" +++ "subge %[low] , %[low] , %[range], lsl #17 \n\t" +++ "mvnge %[bit] , %[bit] \n\t" +++ "movge %[range] , %[tmp] \n\t" +++ +++ "ldrb %[r_b] , [%[mlps_tables], %[bit]] \n\t" +++ "tst %[bit] , #1 \n\t" +++// GCC asm seems to need strbne written differently for thumb and arm +++#if CONFIG_THUMB +++ "it ne \n\t" +++ "strbne %[n] , [%[idx]] , #1 \n\t" +++#else +++ "strneb %[n] , [%[idx]] , #1 \n\t" +++#endif +++ +++// Renorm +++ "clz %[tmp] , %[range] \n\t" +++ "sub %[tmp] , #23 \n\t" +++ "lsl %[low] , %[low] , %[tmp] \n\t" +++ "lsl %[range] , %[range] , %[tmp] \n\t" +++ +++ "strb %[r_b] , [%[state0], %[st]] \n\t" +++// There is a small speed gain from combining both conditions, using a single +++// branch and then working out what that meant later +++ "subs %[n] , %[n] , #1 \n\t" +++#if CONFIG_THUMB +++ "itt ne \n\t" +++ "lslsne %[tmp] , %[low] , #16 \n\t" +++ "bne 1b \n\t" +++#else +++ "lslnes %[tmp] , %[low] , #16 \n\t" +++ "bne 1b \n\t" +++#endif +++ +++// If we have bits left then n must be 0 so give up now +++ "lsls %[tmp] , %[low] , #16 \n\t" +++ "bne 2f \n\t" +++ +++// Do reload +++ "ldrh %[tmp] , [%[bptr]] , #2 \n\t" +++ "movw %[r_b] , #0xFFFF \n\t" +++ "rev %[tmp] , %[tmp] \n\t" +++ "rsb %[tmp] , %[r_b] , %[tmp], lsr #15 \n\t" +++ +++ "rbit %[r_b] , %[low] \n\t" +++ "clz %[r_b] , %[r_b] \n\t" +++ "sub %[r_b] , %[r_b] , #16 \n\t" +++ +++#if CONFIG_THUMB +++ "lsl %[tmp] , %[tmp] , %[r_b] \n\t" +++ "add %[low] , %[low] , %[tmp] \n\t" +++#else +++ "add %[low] , %[low] , %[tmp], lsl %[r_b] \n\t" +++#endif +++ +++// Check to see if we still have more to do +++ "cmp %[n] , #0 \n\t" +++ "bne 1b \n\t" +++ "2: \n\t" +++ : [bit]"=&r"(bit), +++ [low]"+&r"(c->low), +++ [range]"+&r"(c->range), +++ [r_b]"=&r"(reg_b), +++ [bptr]"+&r"(c->bytestream), +++ [idx]"+&r"(p), +++ [n]"+&r"(n), +++ [tmp]"=&r"(tmp), +++ [st]"=&r"(st) +++ : [state0]"r"(state0), +++ [ctx_map]"r"(ctx_map), +++ [mlps_tables]"r"(ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET + 128), +++ [byte]"M"(offsetof(CABACContext, bytestream)), +++ [lps_off]"I"((H264_MLPS_STATE_OFFSET + 128) - H264_LPS_RANGE_OFFSET) +++ : "memory", "cc" +++ ); +++ +++ return p; +++} +++ +++// --------------------------------------------------------------------------- +++// +++// CABAC_BY22 functions +++// +++// By and large these are (at best) no faster than their C equivalents - the +++// only one worth having is _peek where we do a slightly better job than the +++// compiler +++// +++// The others have been stashed here for reference in case larger scale asm +++// is attempted in which case they might be a useful base +++ +++ +++#define get_cabac_by22_peek get_cabac_by22_peek_arm +++static inline uint32_t get_cabac_by22_peek_arm(const CABACContext *const c) +++{ +++ uint32_t rv, tmp; +++ __asm__ ( +++ "bic %[rv] , %[low], #1 \n\t" +++ "cmp %[inv] , #0 \n\t" +++ "it ne \n\t" +++ "umullne %[tmp] , %[rv] , %[inv], %[rv] \n\t" +++ : // Outputs +++ [rv]"=&r"(rv), +++ [tmp]"=r"(tmp) +++ : // Inputs +++ [low]"r"(c->low), +++ [inv]"r"(c->range) +++ : // Clobbers +++ "cc" +++ ); +++ return rv << 1; +++} +++ +++#if 0 +++ +++// ***** Slower than the C :-( +++#define get_cabac_by22_flush get_cabac_by22_flush_arm +++static inline void get_cabac_by22_flush_arm(CABACContext *const c, const unsigned int n, const uint32_t val) +++{ +++ uint32_t m, tmp; +++ __asm__ ( +++ "add %[bits], %[bits], %[n] \n\t" +++ "ldr %[m], [%[ptr], %[bits], lsr #3] \n\t" +++ +++ "rsb %[tmp], %[n], #32 \n\t" +++ "lsr %[tmp], %[val], %[tmp] \n\t" +++ "mul %[tmp], %[range], %[tmp] \n\t" +++ +++ "rev %[m], %[m] \n\t" +++ +++ "lsl %[tmp], %[tmp], #23 \n\t" +++ "rsb %[low], %[tmp], %[low], lsl %[n] \n\t" +++ +++ "and %[tmp], %[bits], #7 \n\t" +++ "lsl %[m], %[m], %[tmp] \n\t" +++ +++ "orr %[low], %[low], %[m], lsr #9 \n\t" +++ : // Outputs +++ [m]"=&r"(m), +++ [tmp]"=&r"(tmp), +++ [bits]"+&r"(c->by22.bits), +++ [low]"+&r"(c->low) +++ : // Inputs +++ [n]"r"(n), +++ [val]"r"(val), +++ [inv]"r"(c->range), +++ [range]"r"(c->by22.range), +++ [ptr]"r"(c->bytestream) +++ : // Clobbers +++ ); +++} +++ +++ +++// Works but slower than C +++#define coeff_abs_level_remaining_decode_by22(c,r) coeff_abs_level_remaining_decode_by22_arm(c, r) +++static int coeff_abs_level_remaining_decode_by22_arm(CABACContext * const c, const unsigned int c_rice_param) +++{ +++ uint32_t n, val, tmp, level; +++ +++// PROFILE_START(); +++ +++ __asm__ ( +++ // Peek +++ "bic %[val], %[low], #1 \n\t" +++ "cmp %[inv], #0 \n\t" +++ "umullne %[tmp], %[val], %[inv], %[val] \n\t" +++ "lsl %[val], %[val], #1 \n\t" +++ +++ // Count bits (n = prefix) +++ "mvn %[n], %[val] \n\t" +++ "clz %[n], %[n] \n\t" +++ +++ "lsl %[level], %[val], %[n] \n\t" +++ "subs %[tmp], %[n], #3 \n\t" +++ "blo 2f \n\t" +++ +++ // prefix >= 3 +++ // < tmp = prefix - 3 +++ // > tmp = prefix + rice - 3 +++ "add %[tmp], %[tmp], %[rice] \n\t" +++ // > n = prefix * 2 + rice - 3 +++ "add %[n], %[tmp], %[n] \n\t" +++ "cmp %[n], #21 \n\t" +++ "bhi 3f \n\t" +++ +++ "orr %[level], %[level], #0x80000000 \n\t" +++ "rsb %[tmp], %[tmp], #31 \n\t" +++ "lsr %[level], %[level], %[tmp] \n\t" +++ +++ "mov %[tmp], #2 \n\t" +++ "add %[level], %[level], %[tmp], lsl %[rice] \n\t" +++ "b 1f \n\t" +++ +++ // > 22 bits used in total - need reload +++ "3: \n\t" +++ +++ // Stash prefix + rice - 3 in level (only spare reg) +++ "mov %[level], %[tmp] \n\t" +++ // Restore n to flush value (prefix) +++ "sub %[n], %[n], %[tmp] \n\t" +++ +++ // Flush + reload +++ +++// "rsb %[tmp], %[n], #32 \n\t" +++// "lsr %[tmp], %[val], %[tmp] \n\t" +++// "mul %[tmp], %[range], %[tmp] \n\t" +++ +++ // As it happens we know that all the bits we are flushing are 1 +++ // so we can cheat slightly +++ "rsb %[tmp], %[range], %[range], lsl %[n] \n\t" +++ "lsl %[tmp], %[tmp], #23 \n\t" +++ "rsb %[low], %[tmp], %[low], lsl %[n] \n\t" +++ +++ "add %[bits], %[bits], %[n] \n\t" +++ "ldr %[n], [%[ptr], %[bits], lsr #3] \n\t" +++ "rev %[n], %[n] \n\t" +++ "and %[tmp], %[bits], #7 \n\t" +++ "lsl %[n], %[n], %[tmp] \n\t" +++ +++ "orr %[low], %[low], %[n], lsr #9 \n\t" +++ +++ // (reload) +++ +++ "bic %[val], %[low], #1 \n\t" +++ "cmp %[inv], #0 \n\t" +++ "umullne %[tmp], %[val], %[inv], %[val] \n\t" +++ "lsl %[val], %[val], #1 \n\t" +++ +++ // Build value +++ +++ "mov %[n], %[level] \n\t" +++ +++ "orr %[tmp], %[val], #0x80000000 \n\t" +++ "rsb %[level], %[level], #31 \n\t" +++ "lsr %[level], %[tmp], %[level] \n\t" +++ +++ "mov %[tmp], #2 \n\t" +++ "add %[level], %[level], %[tmp], lsl %[rice] \n\t" +++ "b 1f \n\t" +++ +++ // prefix < 3 +++ "2: \n\t" +++ "rsb %[tmp], %[rice], #31 \n\t" +++ "lsr %[level], %[level], %[tmp] \n\t" +++ "orr %[level], %[level], %[n], lsl %[rice] \n\t" +++ "add %[n], %[n], %[rice] \n\t" +++ +++ "1: \n\t" +++ // Flush +++ "add %[n], %[n], #1 \n\t" +++ +++ "rsb %[tmp], %[n], #32 \n\t" +++ "lsr %[tmp], %[val], %[tmp] \n\t" +++ +++ "add %[bits], %[bits], %[n] \n\t" +++ "ldr %[val], [%[ptr], %[bits], lsr #3] \n\t" +++ +++ "mul %[tmp], %[range], %[tmp] \n\t" +++ "lsl %[tmp], %[tmp], #23 \n\t" +++ "rsb %[low], %[tmp], %[low], lsl %[n] \n\t" +++ +++ "rev %[val], %[val] \n\t" +++ "and %[tmp], %[bits], #7 \n\t" +++ "lsl %[val], %[val], %[tmp] \n\t" +++ +++ "orr %[low], %[low], %[val], lsr #9 \n\t" +++ : // Outputs +++ [level]"=&r"(level), +++ [n]"=&r"(n), +++ [val]"=&r"(val), +++ [tmp]"=&r"(tmp), +++ [bits]"+&r"(c->by22.bits), +++ [low]"+&r"(c->low) +++ : // Inputs +++ [rice]"r"(c_rice_param), +++ [inv]"r"(c->range), +++ [range]"r"(c->by22.range), +++ [ptr]"r"(c->bytestream) +++ : // Clobbers +++ "cc" +++ ); +++ +++// PROFILE_ACC(residual_abs); +++ +++ return level; +++} +++#endif +++ +++#endif /* HAVE_ARMV6T2_INLINE */ +++ +++#endif /* AVCODEC_ARM_HEVC_CABAC_H */ ++diff --git a/libavcodec/arm/hevcdsp_deblock_neon.S b/libavcodec/arm/hevcdsp_deblock_neon.S ++index bad4589..a088cc3 100644 ++--- a/libavcodec/arm/hevcdsp_deblock_neon.S +++++ b/libavcodec/arm/hevcdsp_deblock_neon.S ++@@ -409,10 +409,12 @@ function ff_hevc_deblocking_boundary_strengths_neon, export=1 ++ beq 90f ++ ++ tst a3, #1 +++ itee ne ++ ldrne a3, [v5, #0] @ curr->mv[0] ++ ldreq a3, [v5, #4] @ curr->mv[1] ++ moveq v1, v2 ++ tst v8, #1 +++ itee ne ++ ldrne v8, [v6, #0] @ neigh->mv[0] ++ ldreq v8, [v6, #4] @ neigh->mv[1] ++ moveq v3, v4 ++@@ -424,9 +426,14 @@ function ff_hevc_deblocking_boundary_strengths_neon, export=1 ++ sel a3, a3, ip ++ ands a3, a3, lr ++ @ drop through ++-10: movne a3, #1 +++10: it ne +++ movne a3, #1 ++ 11: subs a2, a2, #1 ++-12: strbhs a3, [v7], a4 +++12: +++A strbhs a3, [v7], a4 +++T itt hs +++T strbhs a3, [v7] +++T addhs v7, v7, a4 ++ subs a2, a2, #1 ++ bhs 12b ++ ++@@ -442,6 +449,7 @@ function ff_hevc_deblocking_boundary_strengths_neon, export=1 ++ bne 10b ++ ++ teq v1, v3 +++ it eq ++ teqeq v2, v4 ++ bne 40f ++ teq v1, v2 ++@@ -487,6 +495,7 @@ function ff_hevc_deblocking_boundary_strengths_neon, export=1 ++ b 10b ++ ++ 40: teq v1, v4 +++ ite eq ++ teqeq v2, v3 ++ bne 10b ++ ++diff --git a/libavcodec/arm/hevcdsp_epel_neon.S b/libavcodec/arm/hevcdsp_epel_neon.S ++index 516ae5b..00eab9e 100644 ++--- a/libavcodec/arm/hevcdsp_epel_neon.S +++++ b/libavcodec/arm/hevcdsp_epel_neon.S ++@@ -110,7 +110,9 @@ function ff_hevc_put_epel_h_neon_8, export=1 ++ sub r7, #1 ++ lsl r7, #2 ++ vpush {d8-d15} ++- adrl r12, epel_coeffs +++@ adr reaches if we are in thumb mode but not in arm +++T adr r12, epel_coeffs +++A adrl r12, epel_coeffs ++ add r7, r12 ++ sub r1, #1 ++ lsl r4, #1 ++@@ -170,7 +172,8 @@ function ff_hevc_put_epel_v_neon_8, export=1 ++ sub r7, #1 ++ lsl r7, #2 ++ vpush {d8-d15} ++- adrl r12, epel_coeffs +++T adr r12, epel_coeffs +++A adrl r12, epel_coeffs ++ add r7, r12 ++ load_coeffs_16b r7 ++ sub r1, r2 ++@@ -246,7 +249,7 @@ function ff_hevc_put_epel_hv_neon_8, export=1 ++ sub r7, #1 ++ lsl r7, #2 ++ vpush {d8-d15} ++- adrl r12, epel_coeffs +++ adr r12, epel_coeffs ++ sub r6, #1 ++ lsl r6, #2 ++ add r6, r12 // mx epel coeff offset ++diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h ++index 1bf1c62..ccfa991 100644 ++--- a/libavcodec/cabac.h +++++ b/libavcodec/cabac.h ++@@ -43,7 +43,14 @@ extern const uint8_t ff_h264_cabac_tables[512 + 4*2*64 + 4*64 + 63]; ++ typedef struct CABACContext{ ++ int low; ++ int range; ++- int outstanding_count; +++ union +++ { +++ int outstanding_count; +++ struct { +++ uint16_t bits; +++ uint16_t range; +++ } by22; +++ }; ++ const uint8_t *bytestream_start; ++ const uint8_t *bytestream; ++ const uint8_t *bytestream_end; ++diff --git a/libavcodec/hevc_cabac.c b/libavcodec/hevc_cabac.c ++index 8656917..4caf720 100644 ++--- a/libavcodec/hevc_cabac.c +++++ b/libavcodec/hevc_cabac.c ++@@ -21,14 +21,72 @@ ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ +++#define UNCHECKED_BITSTREAM_READER 1 +++ ++ #include "libavutil/attributes.h" ++ #include "libavutil/common.h" ++ ++-#include "cabac_functions.h" ++ #include "hevc.h" +++#include "cabac_functions.h" +++ +++// BY22 is probably faster than simple bypass if the processor has +++// either a fast 32-bit divide or a fast 32x32->64[63:32] instruction +++// x86 has fast int divide +++// Arm doesn't have divide or general fast 64 bit, but does have the multiply +++// * Beware: ARCH_xxx isn't set if configure --disable-asm is used +++#define USE_BY22 (HAVE_FAST_64BIT || ARCH_ARM || ARCH_X86) +++// Use native divide if we have a fast one - otherwise use mpy 1/x +++// x86 has a fast integer divide - arm doesn't - unsure about other +++// architectures +++#define USE_BY22_DIV ARCH_X86 +++ +++// Special case blocks with a single significant ceoff +++// Decreases the complexity of the code for a common case but increases the +++// code size. +++#define USE_N_END_1 1 +++ +++#if ARCH_ARM +++#include "arm/hevc_cabac.h" +++#endif ++ ++ #define CABAC_MAX_BIN 31 ++ +++ +++#if USE_BY22 && !USE_BY22_DIV +++#define I(x) (uint32_t)((0x10000000000ULL / (uint64_t)(x)) + 1ULL) +++ +++static const uint32_t cabac_by22_inv_range[256] = { +++ 0, I(257), I(258), I(259), +++ I(260), I(261), I(262), I(263), I(264), I(265), I(266), I(267), I(268), I(269), +++ I(270), I(271), I(272), I(273), I(274), I(275), I(276), I(277), I(278), I(279), +++ I(280), I(281), I(282), I(283), I(284), I(285), I(286), I(287), I(288), I(289), +++ I(290), I(291), I(292), I(293), I(294), I(295), I(296), I(297), I(298), I(299), +++ I(300), I(301), I(302), I(303), I(304), I(305), I(306), I(307), I(308), I(309), +++ I(310), I(311), I(312), I(313), I(314), I(315), I(316), I(317), I(318), I(319), +++ I(320), I(321), I(322), I(323), I(324), I(325), I(326), I(327), I(328), I(329), +++ I(330), I(331), I(332), I(333), I(334), I(335), I(336), I(337), I(338), I(339), +++ I(340), I(341), I(342), I(343), I(344), I(345), I(346), I(347), I(348), I(349), +++ I(350), I(351), I(352), I(353), I(354), I(355), I(356), I(357), I(358), I(359), +++ I(360), I(361), I(362), I(363), I(364), I(365), I(366), I(367), I(368), I(369), +++ I(370), I(371), I(372), I(373), I(374), I(375), I(376), I(377), I(378), I(379), +++ I(380), I(381), I(382), I(383), I(384), I(385), I(386), I(387), I(388), I(389), +++ I(390), I(391), I(392), I(393), I(394), I(395), I(396), I(397), I(398), I(399), +++ I(400), I(401), I(402), I(403), I(404), I(405), I(406), I(407), I(408), I(409), +++ I(410), I(411), I(412), I(413), I(414), I(415), I(416), I(417), I(418), I(419), +++ I(420), I(421), I(422), I(423), I(424), I(425), I(426), I(427), I(428), I(429), +++ I(430), I(431), I(432), I(433), I(434), I(435), I(436), I(437), I(438), I(439), +++ I(440), I(441), I(442), I(443), I(444), I(445), I(446), I(447), I(448), I(449), +++ I(450), I(451), I(452), I(453), I(454), I(455), I(456), I(457), I(458), I(459), +++ I(460), I(461), I(462), I(463), I(464), I(465), I(466), I(467), I(468), I(469), +++ I(470), I(471), I(472), I(473), I(474), I(475), I(476), I(477), I(478), I(479), +++ I(480), I(481), I(482), I(483), I(484), I(485), I(486), I(487), I(488), I(489), +++ I(490), I(491), I(492), I(493), I(494), I(495), I(496), I(497), I(498), I(499), +++ I(500), I(501), I(502), I(503), I(504), I(505), I(506), I(507), I(508), I(509), +++ I(510), I(511) +++}; +++#undef I +++#endif // USE_BY22 +++ ++ /** ++ * number of bin by SyntaxElement. ++ */ ++@@ -445,6 +503,211 @@ static const uint8_t diag_scan8x8_inv[8][8] = { ++ { 28, 36, 43, 49, 54, 58, 61, 63, }, ++ }; ++ +++ +++typedef struct +++{ +++ uint16_t coeff; +++ uint16_t scale; +++} xy_off_t; +++ +++#define XYT_C(x,y,t) ((x) + ((y) << (t))) +++#define SCALE_TRAFO(t) ((t) > 3 ? 3 : (t)) +++#define SCALE_SHR(t) ((t) - SCALE_TRAFO(t)) +++#define XYT_S(x,y,t) (((x) >> SCALE_SHR(t)) + (((y) >> SCALE_SHR(t)) << SCALE_TRAFO(t))) +++ +++#define XYT(x,y,t) {XYT_C(x,y,t), XYT_S(x,y,t)} +++ +++#define OFF_DIAG(t) {\ +++ XYT(0,0,t), XYT(0,1,t), XYT(1,0,t), XYT(0,2,t),\ +++ XYT(1,1,t), XYT(2,0,t), XYT(0,3,t), XYT(1,2,t),\ +++ XYT(2,1,t), XYT(3,0,t), XYT(1,3,t), XYT(2,2,t),\ +++ XYT(3,1,t), XYT(2,3,t), XYT(3,2,t), XYT(3,3,t)\ +++} +++ +++#define OFF_HORIZ(t) {\ +++ XYT(0,0,t), XYT(1,0,t), XYT(2,0,t), XYT(3,0,t),\ +++ XYT(0,1,t), XYT(1,1,t), XYT(2,1,t), XYT(3,1,t),\ +++ XYT(0,2,t), XYT(1,2,t), XYT(2,2,t), XYT(3,2,t),\ +++ XYT(0,3,t), XYT(1,3,t), XYT(2,3,t), XYT(3,3,t)\ +++} +++ +++#define OFF_VERT(t) {\ +++ XYT(0,0,t), XYT(0,1,t), XYT(0,2,t), XYT(0,3,t),\ +++ XYT(1,0,t), XYT(1,1,t), XYT(1,2,t), XYT(1,3,t),\ +++ XYT(2,0,t), XYT(2,1,t), XYT(2,2,t), XYT(2,3,t),\ +++ XYT(3,0,t), XYT(3,1,t), XYT(3,2,t), XYT(3,3,t)\ +++} +++ +++static const xy_off_t off_xys[3][4][16] = +++{ +++ {OFF_DIAG(2), OFF_DIAG(3), OFF_DIAG(4), OFF_DIAG(5)}, +++ {OFF_HORIZ(2), OFF_HORIZ(3), OFF_HORIZ(4), OFF_HORIZ(5)}, +++ {OFF_VERT(2), OFF_VERT(3), OFF_VERT(4), OFF_VERT(5)} +++}; +++ +++ +++// Helper fns +++#ifndef hevc_mem_bits32 +++static av_always_inline uint32_t hevc_mem_bits32(const void * buf, const unsigned int offset) +++{ +++ return AV_RB32((const uint8_t *)buf + (offset >> 3)) << (offset & 7); +++} +++#endif +++ +++#if AV_GCC_VERSION_AT_LEAST(3,4) && !defined(hevc_clz32) +++#define hevc_clz32 hevc_clz32_builtin +++static av_always_inline unsigned int hevc_clz32_builtin(const uint32_t x) +++{ +++ // __builtin_clz says it works on ints - so adjust if int is >32 bits long +++ return __builtin_clz(x) - (sizeof(int) * 8 - 32); +++} +++#endif +++ +++// It is unlikely that we will ever need this but include for completeness +++#ifndef hevc_clz32 +++static inline unsigned int hevc_clz32(unsigned int x) +++{ +++ unsigned int n = 1; +++ if ((x & 0xffff0000) == 0) { +++ n += 16; +++ x <<= 16; +++ } +++ if ((x & 0xff000000) == 0) { +++ n += 8; +++ x <<= 8; +++ } +++ if ((x & 0xf0000000) == 0) { +++ n += 4; +++ x <<= 4; +++ } +++ if ((x & 0xc0000000) == 0) { +++ n += 2; +++ x <<= 2; +++ } +++ return n - ((x >> 31) & 1); +++} +++#endif +++ +++ +++#if !USE_BY22 +++// If no by22 then _by22 functions will revert to normal and so _peek/_flush +++// will no longer be called but the setup calls will still exist and we want +++// to null them out +++#define bypass_start(s) +++#define bypass_finish(s) +++#else +++// Use BY22 for residual bypass block +++ +++#define bypass_start(s) get_cabac_by22_start(&s->HEVClc->cc) +++#define bypass_finish(s) get_cabac_by22_finish(&s->HEVClc->cc) +++ +++// BY22 notes that bypass is simply a divide into the bitstream and so we +++// can peek out large quantities of bits at one and treat the result as if +++// it was VLC. In many cases this will lead to O(1) processing rather than +++// O(n) though the setup and teardown is sufficiently expensive that it is +++// only worth using if we expect to be dealing with more than a few bits +++// The definition of "a few bits" will vary from platform to platform but +++// tests on ARM show that it probably isn't worth it for a single coded +++// residual, but is for >1 - this is probaly reinforced that if there are +++// more residuals then they are likely to be bigger and this will make the +++// O(1) nature of the code more worthwhile. +++ +++ +++#if !USE_BY22_DIV +++// * 1/x @ 32 bits gets us 22 bits of accuracy +++#define CABAC_BY22_PEEK_BITS 22 +++#else +++// A real 32-bit divide gets us another bit +++// If we have a 64 bit int & a unit time divider then we should get a lot +++// of bits (55) but that is untested and it is unclear if it would give +++// us a large advantage +++#define CABAC_BY22_PEEK_BITS 23 +++#endif +++ +++// Bypass block start +++// Must be called before _by22_peek is used as it sets the CABAC environment +++// into the correct state. _by22_finish must be called to return to 'normal' +++// (i.e. non-bypass) cabac decoding +++static inline void get_cabac_by22_start(CABACContext * const c) +++{ +++ const unsigned int bits = __builtin_ctz(c->low); +++ const uint32_t m = hevc_mem_bits32(c->bytestream, 0); +++ uint32_t x = (c->low << (22 - CABAC_BITS)) ^ ((m ^ 0x80000000U) >> (9 + CABAC_BITS - bits)); +++#if !USE_BY22_DIV +++ const uint32_t inv = cabac_by22_inv_range[c->range & 0xff]; +++#endif +++ +++ c->bytestream -= (CABAC_BITS / 8); +++ c->by22.bits = bits; +++#if !USE_BY22_DIV +++ c->by22.range = c->range; +++ c->range = inv; +++#endif +++ c->low = x; +++} +++ +++// Bypass block finish +++// Must be called at the end of the bypass block to return to normal operation +++static inline void get_cabac_by22_finish(CABACContext * const c) +++{ +++ unsigned int used = c->by22.bits; +++ unsigned int bytes_used = (used / CABAC_BITS) * (CABAC_BITS / 8); +++ unsigned int bits_used = used & (CABAC_BITS == 16 ? 15 : 7); +++ +++ c->bytestream += bytes_used + (CABAC_BITS / 8); +++ c->low = (((uint32_t)c->low >> (22 - CABAC_BITS + bits_used)) | 1) << bits_used; +++#if !USE_BY22_DIV +++ c->range = c->by22.range; +++#endif +++} +++ +++// Peek bypass bits +++// _by22_start must be called before _by22_peek is called and _by22_flush +++// must be called afterwards to flush any used bits +++// The actual number of valid bits returned is +++// min(<coded bypass block length>, CABAC_BY22_PEEK_BITS). CABAC_BY22_PEEK_BITS +++// will be at least 22 which should be long enough for any prefix or suffix +++// though probably not long enough for the worst case combination +++#ifndef get_cabac_by22_peek +++static inline uint32_t get_cabac_by22_peek(const CABACContext * const c) +++{ +++#if USE_BY22_DIV +++ return ((unsigned int)c->low / (unsigned int)c->range) << 9; +++#else +++ uint32_t x = c->low & ~1U; +++ const uint32_t inv = c->range; +++ +++ if (inv != 0) +++ x = (uint32_t)(((uint64_t)x * (uint64_t)inv) >> 32); +++ +++ return x << 1; +++#endif +++} +++#endif +++ +++// Flush bypass bits peeked by _by22_peek +++// Flush n bypass bits. n must be >= 1 to guarantee correct operation +++// val is an unmodified copy of whatever _by22_peek returned +++#ifndef get_cabac_by22_flush +++static inline void get_cabac_by22_flush(CABACContext * c, const unsigned int n, const uint32_t val) +++{ +++ // Subtract the bits used & reshift up to the top of the word +++#if USE_BY22_DIV +++ const uint32_t low = (((unsigned int)c->low << n) - (((val >> (32 - n)) * (unsigned int)c->range) << 23)); +++#else +++ const uint32_t low = (((uint32_t)c->low << n) - (((val >> (32 - n)) * c->by22.range) << 23)); +++#endif +++ +++ // and refill lower bits +++ // We will probably OR over some existing bits but that doesn't matter +++ c->by22.bits += n; +++ c->low = low | (hevc_mem_bits32(c->bytestream, c->by22.bits) >> 9); +++} +++#endif +++ +++#endif // USE_BY22 +++ +++ ++ void ff_hevc_save_states(HEVCContext *s, int ctb_addr_ts) ++ { ++ if (s->ps.pps->entropy_coding_sync_enabled_flag && ++@@ -863,19 +1126,19 @@ int ff_hevc_cbf_luma_decode(HEVCContext *s, int trafo_depth) ++ return GET_CABAC(elem_offset[CBF_LUMA] + !trafo_depth); ++ } ++ ++-static int hevc_transform_skip_flag_decode(HEVCContext *s, int c_idx) +++static int hevc_transform_skip_flag_decode(HEVCContext *s, int c_idx_nz) ++ { ++- return GET_CABAC(elem_offset[TRANSFORM_SKIP_FLAG] + !!c_idx); +++ return GET_CABAC(elem_offset[TRANSFORM_SKIP_FLAG] + c_idx_nz); ++ } ++ ++-static int explicit_rdpcm_flag_decode(HEVCContext *s, int c_idx) +++static int explicit_rdpcm_flag_decode(HEVCContext *s, int c_idx_nz) ++ { ++- return GET_CABAC(elem_offset[EXPLICIT_RDPCM_FLAG] + !!c_idx); +++ return GET_CABAC(elem_offset[EXPLICIT_RDPCM_FLAG] + c_idx_nz); ++ } ++ ++-static int explicit_rdpcm_dir_flag_decode(HEVCContext *s, int c_idx) +++static int explicit_rdpcm_dir_flag_decode(HEVCContext *s, int c_idx_nz) ++ { ++- return GET_CABAC(elem_offset[EXPLICIT_RDPCM_DIR_FLAG] + !!c_idx); +++ return GET_CABAC(elem_offset[EXPLICIT_RDPCM_DIR_FLAG] + c_idx_nz); ++ } ++ ++ int ff_hevc_log2_res_scale_abs(HEVCContext *s, int idx) { ++@@ -891,14 +1154,14 @@ int ff_hevc_res_scale_sign_flag(HEVCContext *s, int idx) { ++ return GET_CABAC(elem_offset[RES_SCALE_SIGN_FLAG] + idx); ++ } ++ ++-static av_always_inline void last_significant_coeff_xy_prefix_decode(HEVCContext *s, int c_idx, +++static av_always_inline void last_significant_coeff_xy_prefix_decode(HEVCContext *s, int c_idx_nz, ++ int log2_size, int *last_scx_prefix, int *last_scy_prefix) ++ { ++ int i = 0; ++ int max = (log2_size << 1) - 1; ++ int ctx_offset, ctx_shift; ++ ++- if (!c_idx) { +++ if (!c_idx_nz) { ++ ctx_offset = 3 * (log2_size - 2) + ((log2_size - 1) >> 2); ++ ctx_shift = (log2_size + 1) >> 2; ++ } else { ++@@ -929,22 +1192,16 @@ static av_always_inline int last_significant_coeff_suffix_decode(HEVCContext *s, ++ return value; ++ } ++ ++-static av_always_inline int significant_coeff_group_flag_decode(HEVCContext *s, int c_idx, int ctx_cg) +++static av_always_inline int significant_coeff_group_flag_decode(HEVCContext *s, int c_idx_nz, int ctx_cg) ++ { ++ int inc; ++ ++- inc = FFMIN(ctx_cg, 1) + (c_idx>0 ? 2 : 0); +++ inc = (ctx_cg != 0) + (c_idx_nz << 1); ++ ++ return GET_CABAC(elem_offset[SIGNIFICANT_COEFF_GROUP_FLAG] + inc); ++ } ++-static av_always_inline int significant_coeff_flag_decode(HEVCContext *s, int x_c, int y_c, ++- int offset, const uint8_t *ctx_idx_map) ++-{ ++- int inc = ctx_idx_map[(y_c << 2) + x_c] + offset; ++- return GET_CABAC(elem_offset[SIGNIFICANT_COEFF_FLAG] + inc); ++-} ++ ++-static av_always_inline int significant_coeff_flag_decode_0(HEVCContext *s, int c_idx, int offset) +++static av_always_inline int significant_coeff_flag_decode_0(HEVCContext *s, int offset) ++ { ++ return GET_CABAC(elem_offset[SIGNIFICANT_COEFF_FLAG] + offset); ++ } ++@@ -966,65 +1223,305 @@ static av_always_inline int coeff_abs_level_greater2_flag_decode(HEVCContext *s, ++ return GET_CABAC(elem_offset[COEFF_ABS_LEVEL_GREATER2_FLAG] + inc); ++ } ++ ++-static av_always_inline int coeff_abs_level_remaining_decode(HEVCContext *s, int rc_rice_param) +++ +++#if !USE_BY22 +++#define coeff_abs_level_remaining_decode_bypass(s,r) coeff_abs_level_remaining_decode(s, r) +++#endif +++ +++ +++#ifndef coeff_abs_level_remaining_decode_bypass +++static int coeff_abs_level_remaining_decode_bypass(HEVCContext * const s, const unsigned int rice_param) +++{ +++ CABACContext * const c = &s->HEVClc->cc; +++ uint32_t y; +++ unsigned int prefix; +++ unsigned int last_coeff_abs_level_remaining; +++ unsigned int n; +++ +++ y = get_cabac_by22_peek(c); +++ prefix = hevc_clz32(~y); +++ // y << prefix will always have top bit 0 +++ +++ if (prefix < 3) { +++ const unsigned int suffix = (y << prefix) >> (31 - rice_param); +++ last_coeff_abs_level_remaining = (prefix << rice_param) + suffix; +++ n = prefix + 1 + rice_param; +++ } +++ else if (prefix * 2 + rice_param <= CABAC_BY22_PEEK_BITS + 2) +++ { +++ const uint32_t suffix = ((y << prefix) | 0x80000000) >> (34 - (prefix + rice_param)); +++ +++ last_coeff_abs_level_remaining = (2 << rice_param) + suffix; +++ n = prefix * 2 + rice_param - 2; +++ } +++ else { +++ unsigned int suffix; +++ +++ get_cabac_by22_flush(c, prefix, y); +++ y = get_cabac_by22_peek(c); +++ +++ suffix = (y | 0x80000000) >> (34 - (prefix + rice_param)); +++ last_coeff_abs_level_remaining = (2 << rice_param) + suffix; +++ n = prefix + rice_param - 2; +++ } +++ +++ get_cabac_by22_flush(c, n, y); +++ +++ return last_coeff_abs_level_remaining; +++} +++#endif +++ +++static int coeff_abs_level_remaining_decode(HEVCContext * const s, int rc_rice_param) ++ { +++ CABACContext * const c = &s->HEVClc->cc; ++ int prefix = 0; ++ int suffix = 0; ++ int last_coeff_abs_level_remaining; ++ int i; ++ ++- while (prefix < CABAC_MAX_BIN && get_cabac_bypass(&s->HEVClc->cc)) +++ while (prefix < CABAC_MAX_BIN && get_cabac_bypass(c)) ++ prefix++; ++ if (prefix == CABAC_MAX_BIN) { ++ av_log(s->avctx, AV_LOG_ERROR, "CABAC_MAX_BIN : %d\n", prefix); ++ return 0; ++ } +++ ++ if (prefix < 3) { ++ for (i = 0; i < rc_rice_param; i++) ++- suffix = (suffix << 1) | get_cabac_bypass(&s->HEVClc->cc); +++ suffix = (suffix << 1) | get_cabac_bypass(c); ++ last_coeff_abs_level_remaining = (prefix << rc_rice_param) + suffix; ++ } else { ++ int prefix_minus3 = prefix - 3; ++ for (i = 0; i < prefix_minus3 + rc_rice_param; i++) ++- suffix = (suffix << 1) | get_cabac_bypass(&s->HEVClc->cc); +++ suffix = (suffix << 1) | get_cabac_bypass(c); ++ last_coeff_abs_level_remaining = (((1 << prefix_minus3) + 3 - 1) ++ << rc_rice_param) + suffix; ++ } +++ ++ return last_coeff_abs_level_remaining; ++ } ++ ++-static av_always_inline int coeff_sign_flag_decode(HEVCContext *s, uint8_t nb) +++#if !USE_BY22 +++#define coeff_sign_flag_decode_bypass coeff_sign_flag_decode +++static inline uint32_t coeff_sign_flag_decode(HEVCContext * const s, const unsigned int nb) ++ { ++- int i; ++- int ret = 0; +++ CABACContext * const c = &s->HEVClc->cc; +++ unsigned int i; +++ uint32_t ret = 0; ++ ++ for (i = 0; i < nb; i++) ++- ret = (ret << 1) | get_cabac_bypass(&s->HEVClc->cc); ++- return ret; +++ ret = (ret << 1) | get_cabac_bypass(c); +++ +++ return ret << (32 - nb); ++ } +++#endif +++ +++#ifndef coeff_sign_flag_decode_bypass +++static inline uint32_t coeff_sign_flag_decode_bypass(HEVCContext * const s, const unsigned int nb) +++{ +++ CABACContext * const c = &s->HEVClc->cc; +++ uint32_t y; +++ y = get_cabac_by22_peek(c); +++ get_cabac_by22_flush(c, nb, y); +++ return y & ~(0xffffffffU >> nb); +++} +++#endif +++ +++ +++#ifndef get_cabac_greater1_bits +++static inline unsigned int get_cabac_greater1_bits(CABACContext * const c, const unsigned int n, +++ uint8_t * const state0) +++{ +++ unsigned int i; +++ unsigned int rv = 0; +++ for (i = 0; i != n; ++i) { +++ const unsigned int idx = rv != 0 ? 0 : i < 3 ? i + 1 : 3; +++ const unsigned int b = get_cabac(c, state0 + idx); +++ rv = (rv << 1) | b; +++ } +++ return rv; +++} +++#endif +++ +++ +++// N.B. levels returned are the values assuming coeff_abs_level_remaining +++// is uncoded, so 1 must be added if it is coded. sum_abs also reflects +++// this version of events. +++static inline uint32_t get_greaterx_bits(HEVCContext * const s, const unsigned int n_end, int * const levels, +++ int * const pprev_subset_coded, int * const psum, +++ const unsigned int idx0_gt1, const unsigned int idx_gt2) +++{ +++ CABACContext * const c = &s->HEVClc->cc; +++ uint8_t * const state0 = s->HEVClc->cabac_state + idx0_gt1; +++ uint8_t * const state_gt2 = s->HEVClc->cabac_state + idx_gt2; +++ unsigned int rv; +++ unsigned int i; +++ const unsigned int n = FFMIN(n_end, 8); +++ +++ // Really this is i != n but the simple unconditional loop is cheaper +++ // and faster +++ for (i = 0; i != 8; ++i) +++ levels[i] = 1; +++ +++ rv = get_cabac_greater1_bits(c, n, state0); +++ +++ *pprev_subset_coded = 0; +++ *psum = n; +++ +++ rv <<= (32 - n); +++ if (rv != 0) +++ { +++ *pprev_subset_coded = 1; +++ *psum = n + 1; +++ i = hevc_clz32(rv); +++ levels[i] = 2; +++ if (get_cabac(c, state_gt2) == 0) +++ { +++ // Unset first coded bit +++ rv &= ~(0x80000000U >> i); +++ } +++ } +++ +++ if (n_end > 8) { +++ const unsigned int g8 = n_end - 8; +++ rv |= ((1 << g8) - 1) << (24 - g8); +++ for (i = 0; i != g8; ++i) { +++ levels[i + 8] = 0; +++ } +++ } +++ +++ return rv; +++} +++ +++// extended_precision_processing_flag must be false given we are +++// putting the result into a 16-bit array +++// So trans_coeff_level must fit in 16 bits too (7.4.9.1 definition of coeff_abs_level_remaining) +++// scale_m is uint8_t +++// +++// scale is [40 - 72] << [0..12] based on qp- worst case is (45 << 12) +++// or it can be 2 (if we have transquant_bypass) +++// shift is set to one less than we really want but would normally be +++// s->ps.sps->bit_depth (max 16, min 8) + log2_trafo_size (max 5, min 2?) - 5 = max 16 min 5? +++// however the scale shift is substracted from shift to a min 0 so scale_m worst = 45 << 6 +++// This can still theoretically lead to overflow but the coding would have to be very odd (& inefficient) +++// to achieve it +++ +++#ifndef trans_scale_sat +++static inline int trans_scale_sat(const int level, const unsigned int scale, const unsigned int scale_m, const unsigned int shift) +++{ +++ return av_clip_int16((((level * (int)(scale * scale_m)) >> shift) + 1) >> 1); +++} +++#endif +++ +++ +++#ifndef update_rice +++static inline void update_rice(uint8_t * const stat_coeff, +++ const unsigned int last_coeff_abs_level_remaining, +++ const unsigned int c_rice_param) +++{ +++ const unsigned int x = (last_coeff_abs_level_remaining << 1) >> c_rice_param; +++ if (x >= 6) +++ (*stat_coeff)++; +++ else if (x == 0 && *stat_coeff > 0) +++ (*stat_coeff)--; +++} +++#endif +++ +++ +++// n must be > 0 on entry +++#ifndef get_cabac_sig_coeff_flag_idxs +++static inline uint8_t * get_cabac_sig_coeff_flag_idxs(CABACContext * const c, uint8_t * const state0, +++ unsigned int n, +++ const uint8_t const * ctx_map, +++ uint8_t * p) +++{ +++ do { +++ if (get_cabac(c, state0 + ctx_map[n])) +++ *p++ = n; +++ } while (--n != 0); +++ return p; +++} +++#endif +++ +++ +++static int get_sig_coeff_flag_idxs(CABACContext * const c, uint8_t * const state0, +++ unsigned int n, +++ const uint8_t const * ctx_map, +++ uint8_t * const flag_idx) +++{ +++ int rv; +++ +++ rv = get_cabac_sig_coeff_flag_idxs(c, state0, n, ctx_map, flag_idx) - flag_idx; +++ +++ return rv; +++} +++ +++#define H4x4(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) {\ +++ x0, x1, x2, x3,\ +++ x4, x5, x6, x7,\ +++ x8, x9, x10, x11,\ +++ x12, x13, x14, x15} +++ +++#define V4x4(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) {\ +++ x0, x4, x8, x12,\ +++ x1, x5, x9, x13,\ +++ x2, x6, x10, x14,\ +++ x3, x7, x11, x15} +++ +++#define D4x4(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) {\ +++ x0, x4, x1, x8,\ +++ x5, x2, x12, x9,\ +++ x6, x3, x13, x10,\ +++ x7, x14, x11, x15} +++ +++ +++static inline int next_subset(HEVCContext * const s, int i, const int c_idx_nz, +++ uint8_t * const significant_coeff_group_flag, +++ const uint8_t * const scan_x_cg, const uint8_t * const scan_y_cg, +++ int * const pPrev_sig) +++{ +++ while (--i >= 0) { +++ unsigned int x_cg = scan_x_cg[i]; +++ unsigned int y_cg = scan_y_cg[i]; +++ +++ // For the flag decode we only care about Z/NZ but +++ // we use the full Right + Down * 2 when calculating +++ // significant coeff flags so we obtain it here +++ //. +++ // The group flag array is one longer than it needs to +++ // be so we don't need to check for y_cg limits +++ unsigned int prev_sig = ((significant_coeff_group_flag[y_cg] >> (x_cg + 1)) & 1) | +++ (((significant_coeff_group_flag[y_cg + 1] >> x_cg) & 1) << 1); +++ +++ if (i == 0 || +++ significant_coeff_group_flag_decode(s, c_idx_nz, prev_sig)) +++ { +++ significant_coeff_group_flag[y_cg] |= (1 << x_cg); +++ *pPrev_sig = prev_sig; +++ break; +++ } +++ } +++ +++ return i; +++} +++ ++ ++ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ int log2_trafo_size, enum ScanType scan_idx, ++ int c_idx) ++ { ++-#define GET_COORD(offset, n) \ ++- do { \ ++- x_c = (x_cg << 2) + scan_x_off[n]; \ ++- y_c = (y_cg << 2) + scan_y_off[n]; \ ++- } while (0) ++- HEVCLocalContext *lc = s->HEVClc; ++- int transform_skip_flag = 0; +++ HEVCLocalContext * const lc = s->HEVClc; +++ int trans_skip_or_bypass = lc->cu.cu_transquant_bypass_flag; ++ ++ int last_significant_coeff_x, last_significant_coeff_y; ++- int last_scan_pos; ++- int n_end; ++ int num_coeff = 0; ++- int greater1_ctx = 1; +++ int prev_subset_coded = 0; ++ ++ int num_last_subset; ++ int x_cg_last_sig, y_cg_last_sig; ++ ++- const uint8_t *scan_x_cg, *scan_y_cg, *scan_x_off, *scan_y_off; +++ const uint8_t *scan_x_cg, *scan_y_cg; +++ const xy_off_t * scan_xy_off; ++ ++ ptrdiff_t stride = s->frame->linesize[c_idx]; ++ int hshift = s->ps.sps->hshift[c_idx]; ++@@ -1032,21 +1529,28 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ uint8_t *dst = &s->frame->data[c_idx][(y0 >> vshift) * stride + ++ ((x0 >> hshift) << s->ps.sps->pixel_shift)]; ++ #ifdef RPI ++- int use_vpu = s->enable_rpi && !lc->cu.cu_transquant_bypass_flag && !transform_skip_flag && !lc->tu.cross_pf && log2_trafo_size>=4; +++ //***** transform_skip_flag decoded later! +++ int use_vpu = s->enable_rpi && !lc->cu.cu_transquant_bypass_flag /* && !transform_skip_flag*/ && !lc->tu.cross_pf && log2_trafo_size>=4; ++ #endif ++ int16_t *coeffs = (int16_t*)(c_idx ? lc->edge_emu_buffer2 : lc->edge_emu_buffer); ++- uint8_t significant_coeff_group_flag[8][8] = {{0}}; +++ uint8_t significant_coeff_group_flag[9] = {0}; // Allow 1 final byte that is always zero ++ int explicit_rdpcm_flag = 0; ++ int explicit_rdpcm_dir_flag; ++ ++ int trafo_size = 1 << log2_trafo_size; ++ int i; ++- int qp,shift,add,scale,scale_m; +++ int qp,shift,scale; ++ static const uint8_t level_scale[] = { 40, 45, 51, 57, 64, 72 }; ++ const uint8_t *scale_matrix = NULL; ++ uint8_t dc_scale; ++ int pred_mode_intra = (c_idx == 0) ? lc->tu.intra_pred_mode : ++ lc->tu.intra_pred_mode_c; +++ +++ int prev_sig = 0; +++ const int c_idx_nz = (c_idx != 0); +++ +++ int may_hide_sign; +++ ++ #ifdef RPI ++ if (s->enable_rpi) { ++ int n = trafo_size * trafo_size; ++@@ -1078,7 +1582,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ ++ // Derive QP for dequant ++ if (!lc->cu.cu_transquant_bypass_flag) { ++- static const int qp_c[] = { 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37 }; +++ static const uint8_t qp_c[] = { 29, 30, 31, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37 }; ++ static const uint8_t rem6[51 + 4 * 6 + 1] = { ++ 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, ++ 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, ++@@ -1094,9 +1598,19 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ }; ++ int qp_y = lc->qp_y; ++ +++ may_hide_sign = s->ps.pps->sign_data_hiding_flag; +++ ++ if (s->ps.pps->transform_skip_enabled_flag && ++ log2_trafo_size <= s->ps.pps->log2_max_transform_skip_block_size) { ++- transform_skip_flag = hevc_transform_skip_flag_decode(s, c_idx); +++ int transform_skip_flag = hevc_transform_skip_flag_decode(s, c_idx_nz); +++ if (transform_skip_flag) { +++ trans_skip_or_bypass = 1; +++ if (lc->cu.pred_mode == MODE_INTRA && +++ s->ps.sps->implicit_rdpcm_enabled_flag && +++ (pred_mode_intra == 10 || pred_mode_intra == 26)) { +++ may_hide_sign = 0; +++ } +++ } ++ } ++ ++ if (c_idx == 0) { ++@@ -1129,39 +1643,73 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ qp += s->ps.sps->qp_bd_offset; ++ } ++ ++- shift = s->ps.sps->bit_depth + log2_trafo_size - 5; ++- add = 1 << (shift-1); ++- scale = level_scale[rem6[qp]] << (div6[qp]); ++- scale_m = 16; // default when no custom scaling lists. ++- dc_scale = 16; +++ // Shift is set to one less than will actually occur as the scale +++ // and saturate step adds 1 and then shifts right again +++ shift = s->ps.sps->bit_depth + log2_trafo_size - 6; +++ scale = level_scale[rem6[qp]]; +++ if (div6[qp] >= shift) { +++ scale <<= (div6[qp] - shift); +++ shift = 0; +++ } else { +++ shift -= div6[qp]; +++ } ++ ++- if (s->ps.sps->scaling_list_enable_flag && !(transform_skip_flag && log2_trafo_size > 2)) { +++ if (s->ps.sps->scaling_list_enable_flag && !(trans_skip_or_bypass && log2_trafo_size > 2)) { ++ const ScalingList *sl = s->ps.pps->scaling_list_data_present_flag ? ++- &s->ps.pps->scaling_list : &s->ps.sps->scaling_list; +++ &s->ps.pps->scaling_list : &s->ps.sps->scaling_list; ++ int matrix_id = lc->cu.pred_mode != MODE_INTRA; ++ ++ matrix_id = 3 * matrix_id + c_idx; ++ ++ scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id]; +++ dc_scale = scale_matrix[0]; ++ if (log2_trafo_size >= 4) ++ dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id]; ++ } +++ else +++ { +++ static const uint8_t sixteen_scale[64] = { +++ 16, 16, 16, 16, 16, 16, 16, 16, +++ 16, 16, 16, 16, 16, 16, 16, 16, +++ 16, 16, 16, 16, 16, 16, 16, 16, +++ 16, 16, 16, 16, 16, 16, 16, 16, +++ 16, 16, 16, 16, 16, 16, 16, 16, +++ 16, 16, 16, 16, 16, 16, 16, 16, +++ 16, 16, 16, 16, 16, 16, 16, 16, +++ 16, 16, 16, 16, 16, 16, 16, 16 +++ }; +++ scale_matrix = sixteen_scale; +++ dc_scale = 16; +++ } ++ } else { +++ static const uint8_t unit_scale[64] = { +++ 1, 1, 1, 1, 1, 1, 1, 1, +++ 1, 1, 1, 1, 1, 1, 1, 1, +++ 1, 1, 1, 1, 1, 1, 1, 1, +++ 1, 1, 1, 1, 1, 1, 1, 1, +++ 1, 1, 1, 1, 1, 1, 1, 1, +++ 1, 1, 1, 1, 1, 1, 1, 1, +++ 1, 1, 1, 1, 1, 1, 1, 1, +++ 1, 1, 1, 1, 1, 1, 1, 1, +++ }; +++ scale_matrix = unit_scale; ++ shift = 0; ++- add = 0; ++- scale = 0; ++- dc_scale = 0; +++ scale = 2; // We will shift right to kill this +++ dc_scale = 1; +++ +++ may_hide_sign = 0; ++ } ++ ++ if (lc->cu.pred_mode == MODE_INTER && s->ps.sps->explicit_rdpcm_enabled_flag && ++- (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) { ++- explicit_rdpcm_flag = explicit_rdpcm_flag_decode(s, c_idx); +++ trans_skip_or_bypass) { +++ explicit_rdpcm_flag = explicit_rdpcm_flag_decode(s, c_idx_nz); ++ if (explicit_rdpcm_flag) { ++- explicit_rdpcm_dir_flag = explicit_rdpcm_dir_flag_decode(s, c_idx); +++ may_hide_sign = 0; +++ explicit_rdpcm_dir_flag = explicit_rdpcm_dir_flag_decode(s, c_idx_nz); ++ } ++ } ++ ++- last_significant_coeff_xy_prefix_decode(s, c_idx, log2_trafo_size, +++ last_significant_coeff_xy_prefix_decode(s, c_idx_nz, log2_trafo_size, ++ &last_significant_coeff_x, &last_significant_coeff_y); ++ ++ if (last_significant_coeff_x > 3) { ++@@ -1189,119 +1737,113 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ int last_x_c = last_significant_coeff_x & 3; ++ int last_y_c = last_significant_coeff_y & 3; ++ ++- scan_x_off = ff_hevc_diag_scan4x4_x; ++- scan_y_off = ff_hevc_diag_scan4x4_y; ++ num_coeff = diag_scan4x4_inv[last_y_c][last_x_c]; ++- if (trafo_size == 4) { +++ +++ switch (log2_trafo_size) { +++ case 2: ++ scan_x_cg = scan_1x1; ++ scan_y_cg = scan_1x1; ++- } else if (trafo_size == 8) { +++ break; +++ case 3: ++ num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4; ++ scan_x_cg = diag_scan2x2_x; ++ scan_y_cg = diag_scan2x2_y; ++- } else if (trafo_size == 16) { +++ break; +++ case 4: ++ num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4; ++ scan_x_cg = ff_hevc_diag_scan4x4_x; ++ scan_y_cg = ff_hevc_diag_scan4x4_y; ++- } else { // trafo_size == 32 +++ break; +++ case 5: +++ default: ++ num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4; ++ scan_x_cg = ff_hevc_diag_scan8x8_x; ++ scan_y_cg = ff_hevc_diag_scan8x8_y; +++ break; ++ } ++ break; ++ } ++ case SCAN_HORIZ: ++ scan_x_cg = horiz_scan2x2_x; ++ scan_y_cg = horiz_scan2x2_y; ++- scan_x_off = horiz_scan4x4_x; ++- scan_y_off = horiz_scan4x4_y; ++ num_coeff = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x]; ++ break; ++ default: //SCAN_VERT ++ scan_x_cg = horiz_scan2x2_y; ++ scan_y_cg = horiz_scan2x2_x; ++- scan_x_off = horiz_scan4x4_y; ++- scan_y_off = horiz_scan4x4_x; ++ num_coeff = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y]; ++ break; ++ } ++ num_coeff++; ++ num_last_subset = (num_coeff - 1) >> 4; ++ ++- for (i = num_last_subset; i >= 0; i--) { ++- int n, m; ++- int x_cg, y_cg, x_c, y_c, pos; ++- int implicit_non_zero_coeff = 0; ++- int64_t trans_coeff_level; ++- int prev_sig = 0; ++- int offset = i << 4; ++- int rice_init = 0; ++- ++- uint8_t significant_coeff_flag_idx[16]; ++- uint8_t nb_significant_coeff_flag = 0; +++ significant_coeff_group_flag[y_cg_last_sig] = 1 << x_cg_last_sig; // 1st subset always significant ++ ++- x_cg = scan_x_cg[i]; ++- y_cg = scan_y_cg[i]; +++ scan_xy_off = off_xys[scan_idx][log2_trafo_size - 2]; ++ ++- if ((i < num_last_subset) && (i > 0)) { ++- int ctx_cg = 0; ++- if (x_cg < (1 << (log2_trafo_size - 2)) - 1) ++- ctx_cg += significant_coeff_group_flag[x_cg + 1][y_cg]; ++- if (y_cg < (1 << (log2_trafo_size - 2)) - 1) ++- ctx_cg += significant_coeff_group_flag[x_cg][y_cg + 1]; ++- ++- significant_coeff_group_flag[x_cg][y_cg] = ++- significant_coeff_group_flag_decode(s, c_idx, ctx_cg); ++- implicit_non_zero_coeff = 1; ++- } else { ++- significant_coeff_group_flag[x_cg][y_cg] = ++- ((x_cg == x_cg_last_sig && y_cg == y_cg_last_sig) || ++- (x_cg == 0 && y_cg == 0)); ++- } +++ i = num_last_subset; +++ do { +++ int implicit_non_zero_coeff = 0; +++ int n_end; ++ ++- last_scan_pos = num_coeff - offset - 1; +++ uint8_t significant_coeff_flag_idx[16]; +++ unsigned int nb_significant_coeff_flag = 0; ++ ++ if (i == num_last_subset) { +++ // First time through +++ int last_scan_pos = num_coeff - (i << 4) - 1; ++ n_end = last_scan_pos - 1; ++ significant_coeff_flag_idx[0] = last_scan_pos; ++ nb_significant_coeff_flag = 1; ++ } else { ++ n_end = 15; +++ implicit_non_zero_coeff = (i != 0); ++ } ++ ++- if (x_cg < ((1 << log2_trafo_size) - 1) >> 2) ++- prev_sig = !!significant_coeff_group_flag[x_cg + 1][y_cg]; ++- if (y_cg < ((1 << log2_trafo_size) - 1) >> 2) ++- prev_sig += (!!significant_coeff_group_flag[x_cg][y_cg + 1] << 1); ++- ++- if (significant_coeff_group_flag[x_cg][y_cg] && n_end >= 0) { ++- static const uint8_t ctx_idx_map[] = { ++- 0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 8, // log2_trafo_size == 2 ++- 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, // prev_sig == 0 ++- 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, // prev_sig == 1 ++- 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, // prev_sig == 2 ++- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 // default +++ if (n_end >= 0) { +++ static const uint8_t ctx_idx_maps_ts2[3][16] = { +++ D4x4(0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 8), // log2_trafo_size == 2 +++ H4x4(0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 8), // log2_trafo_size == 2 +++ V4x4(0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 8) // log2_trafo_size == 2 +++ }; +++ static const uint8_t ctx_idx_maps[3][4][16] = { +++ { +++ D4x4(1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 0 +++ D4x4(2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 1 +++ D4x4(2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0), // prev_sig == 2 +++ D4x4(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2) // prev_sig == 3, default +++ }, +++ { +++ H4x4(1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 0 +++ H4x4(2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 1 +++ H4x4(2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0), // prev_sig == 2 +++ H4x4(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2) // prev_sig == 3, default +++ }, +++ { +++ V4x4(1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 0 +++ V4x4(2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 1 +++ V4x4(2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0), // prev_sig == 2 +++ V4x4(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2) // prev_sig == 3, default +++ } ++ }; ++ const uint8_t *ctx_idx_map_p; ++ int scf_offset = 0; ++- if (s->ps.sps->transform_skip_context_enabled_flag && ++- (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) { ++- ctx_idx_map_p = (uint8_t*) &ctx_idx_map[4 * 16]; ++- if (c_idx == 0) { ++- scf_offset = 40; ++- } else { ++- scf_offset = 14 + 27; ++- } +++ +++ if (s->ps.sps->transform_skip_context_enabled_flag && trans_skip_or_bypass) { +++ ctx_idx_map_p = ctx_idx_maps[0][3]; +++ scf_offset = 40 + c_idx_nz; ++ } else { ++- if (c_idx != 0) +++ if (c_idx_nz != 0) ++ scf_offset = 27; +++ ++ if (log2_trafo_size == 2) { ++- ctx_idx_map_p = (uint8_t*) &ctx_idx_map[0]; +++ ctx_idx_map_p = ctx_idx_maps_ts2[scan_idx]; ++ } else { ++- ctx_idx_map_p = (uint8_t*) &ctx_idx_map[(prev_sig + 1) << 4]; ++- if (c_idx == 0) { ++- if ((x_cg > 0 || y_cg > 0)) +++ ctx_idx_map_p = ctx_idx_maps[scan_idx][prev_sig]; +++ if (!c_idx_nz) { +++ if (i != 0) ++ scf_offset += 3; +++ ++ if (log2_trafo_size == 3) { ++ scf_offset += (scan_idx == SCAN_DIAG) ? 9 : 15; ++ } else { ++@@ -1315,34 +1857,30 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ } ++ } ++ } ++- for (n = n_end; n > 0; n--) { ++- x_c = scan_x_off[n]; ++- y_c = scan_y_off[n]; ++- if (significant_coeff_flag_decode(s, x_c, y_c, scf_offset, ctx_idx_map_p)) { ++- significant_coeff_flag_idx[nb_significant_coeff_flag] = n; ++- nb_significant_coeff_flag++; +++ +++ if (n_end > 0) { +++ int cnt = get_sig_coeff_flag_idxs(&s->HEVClc->cc, +++ s->HEVClc->cabac_state + elem_offset[SIGNIFICANT_COEFF_FLAG] + scf_offset, +++ n_end, ctx_idx_map_p, +++ significant_coeff_flag_idx + nb_significant_coeff_flag); +++ +++ nb_significant_coeff_flag += cnt; +++ if (cnt != 0) { ++ implicit_non_zero_coeff = 0; ++ } ++ } +++ ++ if (implicit_non_zero_coeff == 0) { ++- if (s->ps.sps->transform_skip_context_enabled_flag && ++- (transform_skip_flag || lc->cu.cu_transquant_bypass_flag)) { ++- if (c_idx == 0) { ++- scf_offset = 42; ++- } else { ++- scf_offset = 16 + 27; ++- } +++ if (s->ps.sps->transform_skip_context_enabled_flag && trans_skip_or_bypass) { +++ scf_offset = 42 + c_idx_nz; ++ } else { ++ if (i == 0) { ++- if (c_idx == 0) ++- scf_offset = 0; ++- else ++- scf_offset = 27; +++ scf_offset = c_idx_nz ? 27 : 0; ++ } else { ++ scf_offset = 2 + scf_offset; ++ } ++ } ++- if (significant_coeff_flag_decode_0(s, c_idx, scf_offset) == 1) { +++ if (significant_coeff_flag_decode_0(s, scf_offset) == 1) { ++ significant_coeff_flag_idx[nb_significant_coeff_flag] = 0; ++ nb_significant_coeff_flag++; ++ } ++@@ -1352,141 +1890,185 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ } ++ } ++ ++- n_end = nb_significant_coeff_flag; ++- +++ if (nb_significant_coeff_flag != 0) { +++ const unsigned int gt1_idx_delta = (c_idx_nz << 2) | +++ ((i != 0 && !c_idx_nz) ? 2 : 0) | +++ prev_subset_coded; +++ const unsigned int idx0_gt1 = elem_offset[COEFF_ABS_LEVEL_GREATER1_FLAG] + +++ (gt1_idx_delta << 2); +++ const unsigned int idx_gt2 = elem_offset[COEFF_ABS_LEVEL_GREATER2_FLAG] + +++ gt1_idx_delta; +++ +++ const unsigned int x_cg = scan_x_cg[i]; +++ const unsigned int y_cg = scan_y_cg[i]; +++ int16_t * const blk_coeffs = coeffs + +++ ((x_cg + (y_cg << log2_trafo_size)) << 2); +++ // This calculation is 'wrong' for log2_traffo_size == 2 +++ // but that doesn't mattor as in this case x_cg & y_cg +++ // are always 0 so result is correct (0) anyway +++ const uint8_t * const blk_scale = scale_matrix + +++ (((x_cg + (y_cg << 3)) << (5 - log2_trafo_size))); +++ +++ // * THe following code block doesn't deal with these flags: +++ // (nor did the one it replaces) +++ // +++ // cabac_bypass_alignment_enabled_flag +++ // This should be easy but I can't find a test case +++ // extended_precision_processing_flag +++ // This can extend the required precision past 16bits +++ // so is probably tricky - also no example found yet +++ +++#if USE_N_END_1 +++ if (nb_significant_coeff_flag == 1) { +++ // There is a small gain to be had from special casing the single +++ // transform coefficient case. The reduction in complexity +++ // makes up for the code duplicatioon. +++ +++ int trans_coeff_level = 1; +++ int coeff_sign_flag; +++ int coded_val = 0; +++ +++ // initialize first elem of coeff_bas_level_greater1_flag +++ prev_subset_coded = 0; +++ +++ if (get_cabac(&s->HEVClc->cc, s->HEVClc->cabac_state + idx0_gt1 + 1)) { +++ trans_coeff_level = 2; +++ prev_subset_coded = 1; +++ coded_val = get_cabac(&s->HEVClc->cc, s->HEVClc->cabac_state + idx_gt2); +++ } ++ ++- if (n_end) { ++- int first_nz_pos_in_cg; ++- int last_nz_pos_in_cg; ++- int c_rice_param = 0; ++- int first_greater1_coeff_idx = -1; ++- uint8_t coeff_abs_level_greater1_flag[8]; ++- uint16_t coeff_sign_flag; ++- int sum_abs = 0; ++- int sign_hidden; ++- int sb_type; +++ // Probably not worth the overhead of starting by22 for just one value +++ coeff_sign_flag = get_cabac_bypass(&s->HEVClc->cc); ++ +++ if (coded_val) +++ { +++ if (!s->ps.sps->persistent_rice_adaptation_enabled_flag) { +++ trans_coeff_level = 3 + coeff_abs_level_remaining_decode(s, 0); +++ } else { +++ uint8_t * const stat_coeff = +++ lc->stat_coeff + trans_skip_or_bypass + 2 - ((c_idx_nz) << 1); +++ const unsigned int c_rice_param = *stat_coeff >> 2; +++ const int last_coeff_abs_level_remaining = coeff_abs_level_remaining_decode(s, c_rice_param); ++ ++- // initialize first elem of coeff_bas_level_greater1_flag ++- int ctx_set = (i > 0 && c_idx == 0) ? 2 : 0; +++ trans_coeff_level = 3 + last_coeff_abs_level_remaining; +++ update_rice(stat_coeff, last_coeff_abs_level_remaining, c_rice_param); +++ } +++ } ++ ++- if (s->ps.sps->persistent_rice_adaptation_enabled_flag) { ++- if (!transform_skip_flag && !lc->cu.cu_transquant_bypass_flag) ++- sb_type = 2 * (c_idx == 0 ? 1 : 0); ++- else ++- sb_type = 2 * (c_idx == 0 ? 1 : 0) + 1; ++- c_rice_param = lc->stat_coeff[sb_type] / 4; ++- } +++ { +++ const xy_off_t * const xy_off = scan_xy_off + significant_coeff_flag_idx[0]; +++ const int k = (int32_t)(coeff_sign_flag << 31) >> 31; +++ const unsigned int scale_m = blk_scale[xy_off->scale]; ++ ++- if (!(i == num_last_subset) && greater1_ctx == 0) ++- ctx_set++; ++- greater1_ctx = 1; ++- last_nz_pos_in_cg = significant_coeff_flag_idx[0]; ++- ++- for (m = 0; m < (n_end > 8 ? 8 : n_end); m++) { ++- int inc = (ctx_set << 2) + greater1_ctx; ++- coeff_abs_level_greater1_flag[m] = ++- coeff_abs_level_greater1_flag_decode(s, c_idx, inc); ++- if (coeff_abs_level_greater1_flag[m]) { ++- greater1_ctx = 0; ++- if (first_greater1_coeff_idx == -1) ++- first_greater1_coeff_idx = m; ++- } else if (greater1_ctx > 0 && greater1_ctx < 3) { ++- greater1_ctx++; +++ blk_coeffs[xy_off->coeff] = trans_scale_sat( +++ (trans_coeff_level ^ k) - k, // Apply sign +++ scale, +++ i == 0 && xy_off->coeff == 0 ? dc_scale : scale_m, +++ shift); ++ } ++ } ++- first_nz_pos_in_cg = significant_coeff_flag_idx[n_end - 1]; ++- ++- if (lc->cu.cu_transquant_bypass_flag || ++- (lc->cu.pred_mode == MODE_INTRA && ++- s->ps.sps->implicit_rdpcm_enabled_flag && transform_skip_flag && ++- (pred_mode_intra == 10 || pred_mode_intra == 26 )) || ++- explicit_rdpcm_flag) ++- sign_hidden = 0; ++ else ++- sign_hidden = (last_nz_pos_in_cg - first_nz_pos_in_cg >= 4); +++#endif +++ { +++ int sign_hidden = may_hide_sign; +++ int levels[16]; // Should be able to get away with int16_t but that fails some tests +++ uint32_t coeff_sign_flags; +++ uint32_t coded_vals = 0; +++ // Sum(abs(level[])) +++ // In fact we only need the bottom bit and in some future +++ // version that may be all we calculate +++ unsigned int sum_abs; +++ +++ coded_vals = get_greaterx_bits(s, nb_significant_coeff_flag, levels, +++ &prev_subset_coded, &sum_abs, idx0_gt1, idx_gt2); +++ +++ if (significant_coeff_flag_idx[0] - significant_coeff_flag_idx[nb_significant_coeff_flag - 1] <= 3) +++ sign_hidden = 0; +++ +++ // -- Start bypass block +++ +++ bypass_start(s); +++ +++ coeff_sign_flags = coeff_sign_flag_decode_bypass(s, nb_significant_coeff_flag - sign_hidden); +++ +++ if (coded_vals != 0) +++ { +++ const int rice_adaptation_enabled = s->ps.sps->persistent_rice_adaptation_enabled_flag; +++ uint8_t * stat_coeff = !rice_adaptation_enabled ? NULL : +++ lc->stat_coeff + trans_skip_or_bypass + 2 - ((c_idx_nz) << 1); +++ int c_rice_param = !rice_adaptation_enabled ? 0 : *stat_coeff >> 2; +++ int * level = levels - 1; +++ +++ do { +++ { +++ const unsigned int z = hevc_clz32(coded_vals) + 1; +++ level += z; +++ coded_vals <<= z; +++ } ++ ++- if (first_greater1_coeff_idx != -1) { ++- coeff_abs_level_greater1_flag[first_greater1_coeff_idx] += coeff_abs_level_greater2_flag_decode(s, c_idx, ctx_set); ++- } ++- if (!s->ps.pps->sign_data_hiding_flag || !sign_hidden ) { ++- coeff_sign_flag = coeff_sign_flag_decode(s, nb_significant_coeff_flag) << (16 - nb_significant_coeff_flag); ++- } else { ++- coeff_sign_flag = coeff_sign_flag_decode(s, nb_significant_coeff_flag - 1) << (16 - (nb_significant_coeff_flag - 1)); ++- } +++ { +++ const int last_coeff_abs_level_remaining = coeff_abs_level_remaining_decode_bypass(s, c_rice_param); +++ const int trans_coeff_level = *level + last_coeff_abs_level_remaining + 1; +++ +++ sum_abs += last_coeff_abs_level_remaining + 1; +++ *level = trans_coeff_level; ++ ++- for (m = 0; m < n_end; m++) { ++- n = significant_coeff_flag_idx[m]; ++- GET_COORD(offset, n); ++- if (m < 8) { ++- trans_coeff_level = 1 + coeff_abs_level_greater1_flag[m]; ++- if (trans_coeff_level == ((m == first_greater1_coeff_idx) ? 3 : 2)) { ++- int last_coeff_abs_level_remaining = coeff_abs_level_remaining_decode(s, c_rice_param); ++- ++- trans_coeff_level += last_coeff_abs_level_remaining; ++- if (trans_coeff_level > (3 << c_rice_param)) ++- c_rice_param = s->ps.sps->persistent_rice_adaptation_enabled_flag ? c_rice_param + 1 : FFMIN(c_rice_param + 1, 4); ++- if (s->ps.sps->persistent_rice_adaptation_enabled_flag && !rice_init) { ++- int c_rice_p_init = lc->stat_coeff[sb_type] / 4; ++- if (last_coeff_abs_level_remaining >= (3 << c_rice_p_init)) ++- lc->stat_coeff[sb_type]++; ++- else if (2 * last_coeff_abs_level_remaining < (1 << c_rice_p_init)) ++- if (lc->stat_coeff[sb_type] > 0) ++- lc->stat_coeff[sb_type]--; ++- rice_init = 1; +++ if (stat_coeff != NULL) +++ update_rice(stat_coeff, last_coeff_abs_level_remaining, c_rice_param); +++ stat_coeff = NULL; +++ +++ if (trans_coeff_level > (3 << c_rice_param) && +++ (c_rice_param < 4 || rice_adaptation_enabled)) +++ ++c_rice_param; ++ } ++- } ++- } else { ++- int last_coeff_abs_level_remaining = coeff_abs_level_remaining_decode(s, c_rice_param); ++- ++- trans_coeff_level = 1 + last_coeff_abs_level_remaining; ++- if (trans_coeff_level > (3 << c_rice_param)) ++- c_rice_param = s->ps.sps->persistent_rice_adaptation_enabled_flag ? c_rice_param + 1 : FFMIN(c_rice_param + 1, 4); ++- if (s->ps.sps->persistent_rice_adaptation_enabled_flag && !rice_init) { ++- int c_rice_p_init = lc->stat_coeff[sb_type] / 4; ++- if (last_coeff_abs_level_remaining >= (3 << c_rice_p_init)) ++- lc->stat_coeff[sb_type]++; ++- else if (2 * last_coeff_abs_level_remaining < (1 << c_rice_p_init)) ++- if (lc->stat_coeff[sb_type] > 0) ++- lc->stat_coeff[sb_type]--; ++- rice_init = 1; ++- } +++ } while (coded_vals != 0); ++ } ++- if (s->ps.pps->sign_data_hiding_flag && sign_hidden) { ++- sum_abs += trans_coeff_level; ++- if (n == first_nz_pos_in_cg && (sum_abs&1)) ++- trans_coeff_level = -trans_coeff_level; +++ +++ // sign_hidden = 0 or 1 so we can combine the tests +++ if ((sign_hidden & sum_abs) != 0) { +++ levels[nb_significant_coeff_flag - 1] = -levels[nb_significant_coeff_flag - 1]; ++ } ++- if (coeff_sign_flag >> 15) ++- trans_coeff_level = -trans_coeff_level; ++- coeff_sign_flag <<= 1; ++- if(!lc->cu.cu_transquant_bypass_flag) { ++- if (s->ps.sps->scaling_list_enable_flag && !(transform_skip_flag && log2_trafo_size > 2)) { ++- if(y_c || x_c || log2_trafo_size < 4) { ++- switch(log2_trafo_size) { ++- case 3: pos = (y_c << 3) + x_c; break; ++- case 4: pos = ((y_c >> 1) << 3) + (x_c >> 1); break; ++- case 5: pos = ((y_c >> 2) << 3) + (x_c >> 2); break; ++- default: pos = (y_c << 2) + x_c; break; ++- } ++- scale_m = scale_matrix[pos]; ++- } else { ++- scale_m = dc_scale; ++- } +++ +++ bypass_finish(s); +++ +++ // -- Finish bypass block +++ +++ // Scale loop +++ { +++ int m = nb_significant_coeff_flag - 1; +++ +++ // Deal with DC component (if any) first +++ if (i == 0 && significant_coeff_flag_idx[m] == 0) +++ { +++ const int k = (int32_t)(coeff_sign_flags << m) >> 31; +++ blk_coeffs[0] = trans_scale_sat( +++ (levels[m] ^ k) - k, scale, dc_scale, shift); +++ --m; ++ } ++- trans_coeff_level = (trans_coeff_level * (int64_t)scale * (int64_t)scale_m + add) >> shift; ++- if(trans_coeff_level < 0) { ++- if((~trans_coeff_level) & 0xFffffffffff8000) ++- trans_coeff_level = -32768; ++- } else { ++- if(trans_coeff_level & 0xffffffffffff8000) ++- trans_coeff_level = 32767; +++ +++#if !USE_N_END_1 +++ // If N_END_! set then m was at least 1 initially +++ if (m >= 0) +++#endif +++ { +++ do { +++ const xy_off_t * const xy_off = scan_xy_off + +++ significant_coeff_flag_idx[m]; +++ const int k = (int32_t)(coeff_sign_flags << m) >> 31; +++ +++ blk_coeffs[xy_off->coeff] = trans_scale_sat( +++ (levels[m] ^ k) - k, +++ scale, +++ blk_scale[xy_off->scale], +++ shift); +++ } while (--m >= 0); ++ } ++ } ++- coeffs[y_c * trafo_size + x_c] = trans_coeff_level; +++ ++ } ++ } ++- } +++ } while ((i = next_subset(s, i, c_idx_nz, +++ significant_coeff_group_flag, scan_x_cg, scan_y_cg, &prev_sig)) >= 0); ++ ++ if (lc->cu.cu_transquant_bypass_flag) { ++ if (explicit_rdpcm_flag || (s->ps.sps->implicit_rdpcm_enabled_flag && ++@@ -1496,7 +2078,7 @@ void ff_hevc_hls_residual_coding(HEVCContext *s, int x0, int y0, ++ s->hevcdsp.transform_rdpcm(coeffs, log2_trafo_size, mode); ++ } ++ } else { ++- if (transform_skip_flag) { +++ if (trans_skip_or_bypass) { // Must be trans_skip as we've already dealt with bypass ++ int rot = s->ps.sps->transform_skip_rotation_enabled_flag && ++ log2_trafo_size == 2 && ++ lc->cu.pred_mode == MODE_INTRA; ++-- ++2.7.4 ++ +diff --git a/tools/depends/target/ffmpeg/Makefile b/tools/depends/target/ffmpeg/Makefile +index 2dc4addea504d142eb74385653584bf39b253156..d1d76cb2ce04d5fd056796cc133fceb3f3c246c9 100644 +--- a/tools/depends/target/ffmpeg/Makefile ++++ b/tools/depends/target/ffmpeg/Makefile +@@ -3,7 +3,8 @@ include FFMPEG-VERSION + DEPS= ../../Makefile.include FFMPEG-VERSION Makefile \ + 0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch \ + hevcdsp_ARM_NEON_optimized_epel_functions.patch added_ARM_NEON_optimized_SAO_patches.patch \ +- pfcd_hevc_optimisations.patch ++ pfcd_hevc_optimisations.patch \ ++ 0001-Squashed-commit-of-the-following.patch + + # set to "yes" to enable patching + # we don't apply patches until we move to a vanilla ffmpeg tarball +@@ -67,6 +68,7 @@ ifeq ($(Configuration), Release) + ffmpg_config += --disable-debug + endif + ++ffmpg_config += --extra-cflags="-DRPI=1" + + CLEAN_FILES=$(ARCHIVE) $(PLATFORM) + +@@ -83,6 +85,7 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + cd $(PLATFORM); patch -p1 < ../hevcdsp_ARM_NEON_optimized_epel_functions.patch + cd $(PLATFORM); patch -p1 < ../added_ARM_NEON_optimized_SAO_patches.patch + cd $(PLATFORM); patch -p1 < ../pfcd_hevc_optimisations.patch ++ cd $(PLATFORM); patch -p1 < ../0001-Squashed-commit-of-the-following.patch + + cd $(PLATFORM);\ + CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)" \ +diff --git a/tools/depends/target/ffmpeg/autobuild.sh b/tools/depends/target/ffmpeg/autobuild.sh +index b6bd57731bca6dfe5f814a4043b3e08d1bb08318..65800dfccc7cbf17124a96d81378b1c3ddf92342 100755 +--- a/tools/depends/target/ffmpeg/autobuild.sh ++++ b/tools/depends/target/ffmpeg/autobuild.sh +@@ -132,6 +132,7 @@ patch -p1 < ../../0001-Discard-data-before-VO-VOL-in-mpeg-4-over-mpegts.patch + patch -p1 < ../../hevcdsp_ARM_NEON_optimized_epel_functions.patch + patch -p1 < ../../added_ARM_NEON_optimized_SAO_patches.patch + patch -p1 < ../../pfcd_hevc_optimisations.patch ++patch -p1 < ../../0001-Squashed-commit-of-the-following.patch + + CFLAGS="$CFLAGS" CXXFLAGS="$CXXFLAGS" LDFLAGS="$LDFLAGS" \ + ./configure --prefix=$FFMPEG_PREFIX \ + +From 3dac5d0c77bfd3b88d90944154c058d1e6429bb8 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Fri, 19 Sep 2014 11:54:49 +0100 +Subject: [PATCH 37/67] [videoplayer/rbp] Add pi specific option to maintain + vsync with pll adjustment + +New A/V sync option in settings/video/playback to do "Adjust PLL". +This uses video clock (so perfect video syncing) but avoids having to resample +or drop/dupe audio packets which is normally required. +--- + .../resource.language.en_gb/resources/strings.po | 32 ++++++++++++++++++++++ + system/settings/rbp.xml | 14 ++++++++++ + .../AudioEngine/Engines/ActiveAE/ActiveAE.cpp | 31 +++++++++++++++------ + xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.h | 9 +++++- + .../Engines/ActiveAE/ActiveAEStream.cpp | 7 +++-- + .../AudioEngine/Engines/ActiveAE/ActiveAEStream.h | 4 ++- + xbmc/cores/AudioEngine/Interfaces/AEStream.h | 10 ++++++- + xbmc/cores/VideoPlayer/DVDAudio.cpp | 4 +-- + xbmc/cores/VideoPlayer/DVDAudio.h | 2 +- + xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp | 12 ++++++-- + xbmc/linux/RBP.cpp | 13 +++++++++ + xbmc/linux/RBP.h | 3 ++ + 12 files changed, 122 insertions(+), 19 deletions(-) + +diff --git a/addons/resource.language.en_gb/resources/strings.po b/addons/resource.language.en_gb/resources/strings.po +index 085e2a195d2e52ce6bea3ed791bf817f5be23b15..8cb9f8503c29c54cd0cb55018f867a45248c649f 100644 +--- a/addons/resource.language.en_gb/resources/strings.po ++++ b/addons/resource.language.en_gb/resources/strings.po +@@ -19417,3 +19417,35 @@ msgstr "" + msgctxt "#38190" + msgid "Extract thumbnails from video files" + msgstr "" ++ ++#. Description of setting "System -> Audio Ouput -> A/V sync method" with label #38200 ++#: system/settings/settings.xml ++msgctxt "#38200" ++msgid "PLL adustment to maintain audio/video sync" ++msgstr "" ++ ++#. Description of setting "Videos -> Playback -> A/V sync method" with label #38201 ++#: system/settings/settings.xml ++msgctxt "#38201" ++msgid "Allows sync adjustment without resampling. Lower the settings if you get audio/video dropouts." ++msgstr "" ++ ++msgctxt "#38202" ++msgid "Off" ++msgstr "" ++ ++msgctxt "#38203" ++msgid "Low" ++msgstr "" ++ ++msgctxt "#38204" ++msgid "Medium" ++msgstr "" ++ ++msgctxt "#38205" ++msgid "High" ++msgstr "" ++ ++msgctxt "#38206" ++msgid "Max" ++msgstr "" +diff --git a/system/settings/rbp.xml b/system/settings/rbp.xml +index 737ec4e0c7f0feb98a6dd008b53e238c41dde8af..2e6c903df5e4d2cd064466db0ef55deada5cdc80 100644 +--- a/system/settings/rbp.xml ++++ b/system/settings/rbp.xml +@@ -98,6 +98,20 @@ + <setting id="audiooutput.processquality"> + <default>101</default> <!-- AE_QUALITY_GPU --> + </setting> ++ <setting id="audiooutput.plladjust" type="integer" label="38200" help="38201"> ++ <level>3</level> ++ <default>0</default> ++ <constraints> ++ <options> ++ <option label="38202">0</option> <!-- off --> ++ <option label="38203">1</option> <!-- low --> ++ <option label="38204">2</option> <!-- medium --> ++ <option label="38205">3</option> <!-- high --> ++ <option label="38206">4</option> <!-- max --> ++ </options> ++ </constraints> ++ <control type="spinner" format="string" /> ++ </setting> + </group> + <group id="3"> + <setting id="audiooutput.ac3transcode" help="37024"> +diff --git a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp +index f9e8a9beaa9b3b4590c698a4d64351cb14c2339d..6a22f8145ce9dfb46f0ddae27eb0753413b066d3 100644 +--- a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp ++++ b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.cpp +@@ -34,6 +34,10 @@ using namespace ActiveAE; + #include "windowing/WindowingFactory.h" + #include "utils/log.h" + ++#if defined(TARGET_RASPBERRY_PI) ++#include "linux/RBP.h" ++#endif ++ + #define MAX_CACHE_LEVEL 0.4 // total cache time of stream in seconds + #define MAX_WATER_LEVEL 0.2 // buffered time after stream stages in seconds + #define MAX_BUFFER_TIME 0.1 // max time of a buffer in seconds +@@ -365,11 +369,12 @@ void CActiveAE::StateMachine(int signal, Protocol *port, Message *msg) + m_sink.m_controlPort.SendOutMessage(CSinkControlProtocol::APPFOCUSED, msg->data, sizeof(bool)); + return; + case CActiveAEControlProtocol::STREAMRESAMPLEMODE: +- MsgStreamParameter *par; +- par = (MsgStreamParameter*)msg->data; ++ MsgStreamResample *par; ++ par = (MsgStreamResample*)msg->data; + if (par->stream) + { +- par->stream->m_resampleMode = par->parameter.int_par; ++ par->stream->m_resampleMode = par->mode; ++ par->stream->m_pllAdjust = par->plladjust; + par->stream->m_resampleIntegral = 0.0; + } + return; +@@ -2466,7 +2471,16 @@ CSampleBuffer* CActiveAE::SyncStream(CActiveAEStream *stream) + if (!newerror || stream->m_syncState != CAESyncInfo::AESyncState::SYNC_INSYNC) + return ret; + +- if (stream->m_resampleMode) ++ if (stream->m_pllAdjust > 0) // pll adjust ++ { ++#if defined(TARGET_RASPBERRY_PI) ++ double e = std::max(std::min(error / 50.0, 1.0), -1.0); ++ double m_plladjust = 1.0 + e * stream->m_pllAdjust; ++ double m_last_plladjust = g_RBP.AdjustHDMIClock(m_plladjust); ++ CLog::Log(LOGDEBUG, "CDVDPlayerAudio::%s pll:%.5f (%.5f) error:%.6f e:%.6f a:%f", __FUNCTION__, m_plladjust, m_last_plladjust, error, e * stream->m_pllAdjust, stream->m_pllAdjust ); ++#endif ++ } ++ else if (stream->m_resampleMode) + { + if (stream->m_resampleBuffers) + { +@@ -3322,13 +3336,14 @@ void CActiveAE::SetStreamResampleRatio(CActiveAEStream *stream, double ratio) + &msg, sizeof(MsgStreamParameter)); + } + +-void CActiveAE::SetStreamResampleMode(CActiveAEStream *stream, int mode) ++void CActiveAE::SetStreamResampleMode(CActiveAEStream *stream, int mode, float plladjust) + { +- MsgStreamParameter msg; ++ MsgStreamResample msg; + msg.stream = stream; +- msg.parameter.int_par = mode; ++ msg.mode = mode; ++ msg.plladjust = plladjust; + m_controlPort.SendOutMessage(CActiveAEControlProtocol::STREAMRESAMPLEMODE, +- &msg, sizeof(MsgStreamParameter)); ++ &msg, sizeof(MsgStreamResample)); + } + + void CActiveAE::SetStreamFFmpegInfo(CActiveAEStream *stream, int profile, enum AVMatrixEncoding matrix_encoding, enum AVAudioServiceType audio_service_type) +diff --git a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.h b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.h +index 2a31a6e3c09fa61907ef9e518158773ba7d3b03e..3efc7afc255c542ea2aedbf83d6962beeae286a2 100644 +--- a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.h ++++ b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAE.h +@@ -174,6 +174,13 @@ struct MsgStreamFFmpegInfo + enum AVAudioServiceType audio_service_type; + }; + ++struct MsgStreamResample ++{ ++ CActiveAEStream *stream; ++ int mode; ++ float plladjust; ++}; ++ + class CEngineStats + { + public: +@@ -290,7 +297,7 @@ protected: + void SetStreamReplaygain(CActiveAEStream *stream, float rgain); + void SetStreamVolume(CActiveAEStream *stream, float volume); + void SetStreamResampleRatio(CActiveAEStream *stream, double ratio); +- void SetStreamResampleMode(CActiveAEStream *stream, int mode); ++ void SetStreamResampleMode(CActiveAEStream *stream, int mode, float plladjust); + void SetStreamFFmpegInfo(CActiveAEStream *stream, int profile, enum AVMatrixEncoding matrix_encoding, enum AVAudioServiceType audio_service_type); + void SetStreamFade(CActiveAEStream *stream, float from, float target, unsigned int millis); + +diff --git a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.cpp b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.cpp +index 1d58691db79e53a4a4cfb32c45f209a115853722..d1e8863cb9600bf1a026520f77501bb98e51918a 100644 +--- a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.cpp ++++ b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.cpp +@@ -503,11 +503,12 @@ void CActiveAEStream::SetResampleRatio(double ratio) + m_streamResampleRatio = ratio; + } + +-void CActiveAEStream::SetResampleMode(int mode) ++void CActiveAEStream::SetResampleMode(int mode, float plladjust) + { +- if (mode != m_streamResampleMode) +- AE.SetStreamResampleMode(this, mode); ++ if (mode != m_streamResampleMode || plladjust != m_streamPllAdjust) ++ AE.SetStreamResampleMode(this, mode, plladjust); + m_streamResampleMode = mode; ++ m_streamPllAdjust = plladjust; + } + + void CActiveAEStream::SetFFmpegInfo(int profile, enum AVMatrixEncoding matrix_encoding, enum AVAudioServiceType audio_service_type) +diff --git a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.h b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.h +index 0fd959b8071e5a03d7749689e2e0042907d4d4bf..8b25159f198279f2515fe4f84fc9403dcb46c401 100644 +--- a/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.h ++++ b/xbmc/cores/AudioEngine/Engines/ActiveAE/ActiveAEStream.h +@@ -137,7 +137,7 @@ public: + + virtual double GetResampleRatio(); + virtual void SetResampleRatio(double ratio); +- virtual void SetResampleMode(int mode); ++ virtual void SetResampleMode(int mode, float plladjust); + virtual void RegisterAudioCallback(IAudioCallback* pCallback); + virtual void UnRegisterAudioCallback(); + virtual void FadeVolume(float from, float to, unsigned int time); +@@ -154,6 +154,7 @@ protected: + float m_streamAmplify; + double m_streamResampleRatio; + int m_streamResampleMode; ++ float m_streamPllAdjust; + unsigned int m_streamSpace; + bool m_streamDraining; + bool m_streamDrained; +@@ -194,6 +195,7 @@ protected: + int m_fadingTime; + int m_profile; + int m_resampleMode; ++ float m_pllAdjust; + double m_resampleIntegral; + enum AVMatrixEncoding m_matrixEncoding; + enum AVAudioServiceType m_audioServiceType; +diff --git a/xbmc/cores/AudioEngine/Interfaces/AEStream.h b/xbmc/cores/AudioEngine/Interfaces/AEStream.h +index 7416685ef766492b13bbbde9001f868f28907d34..e3dbc5f2ddd6269f5e80086d2fd04e1ae68ac828 100644 +--- a/xbmc/cores/AudioEngine/Interfaces/AEStream.h ++++ b/xbmc/cores/AudioEngine/Interfaces/AEStream.h +@@ -41,6 +41,14 @@ public: + class CAESyncInfo + { + public: ++ CAESyncInfo() ++ { ++ delay = 0.0; ++ error = 0.0; ++ rr = 1.0; ++ errortime = 0; ++ state = SYNC_OFF; ++ } + double delay; + double error; + double rr; +@@ -231,7 +239,7 @@ public: + /** + * Sets the resamplling on/ff + */ +- virtual void SetResampleMode(int mode) = 0; ++ virtual void SetResampleMode(int mode, float plladjust) = 0; + + /** + * Registers the audio callback to call with each block of data, this is used by Audio Visualizations +diff --git a/xbmc/cores/VideoPlayer/DVDAudio.cpp b/xbmc/cores/VideoPlayer/DVDAudio.cpp +index 2674fb381aa42a3575ae85ad54be5f9891cafe2a..48894c6cea7ba54e37855963cf2caf76e1d1ab36 100644 +--- a/xbmc/cores/VideoPlayer/DVDAudio.cpp ++++ b/xbmc/cores/VideoPlayer/DVDAudio.cpp +@@ -321,12 +321,12 @@ double CDVDAudio::GetResampleRatio() + return m_resampleRatio; + } + +-void CDVDAudio::SetResampleMode(int mode) ++void CDVDAudio::SetResampleMode(int mode, float plladjust) + { + CSingleLock lock (m_critSection); + if(m_pAudioStream) + { +- m_pAudioStream->SetResampleMode(mode); ++ m_pAudioStream->SetResampleMode(mode, plladjust); + } + } + +diff --git a/xbmc/cores/VideoPlayer/DVDAudio.h b/xbmc/cores/VideoPlayer/DVDAudio.h +index 48b5c42d2998a25901c31a9ad762d81f89eb430b..70559f9570041a11693d21d2de890f81b07fe2ab 100644 +--- a/xbmc/cores/VideoPlayer/DVDAudio.h ++++ b/xbmc/cores/VideoPlayer/DVDAudio.h +@@ -61,7 +61,7 @@ public: + double GetSyncError(); + void SetSyncErrorCorrection(double correction); + double GetResampleRatio(); +- void SetResampleMode(int mode); ++ void SetResampleMode(int mode, float plladjust); + void Flush(); + void Drain(); + void AbortAddPackets(); +diff --git a/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp b/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp +index 188b85b12b86f887324cdcfda3c3aa4cd90d3a11..b05c4e4c6a2361455ab553133965aa2018e9d684 100644 +--- a/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp ++++ b/xbmc/cores/VideoPlayer/VideoPlayerAudio.cpp +@@ -96,6 +96,7 @@ bool CVideoPlayerAudio::OpenStream(CDVDStreamInfo &hints) + bool allowpassthrough = !CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEDISPLAYASCLOCK); + if (hints.realtime) + allowpassthrough = false; ++ allowpassthrough |= CSettings::GetInstance().GetInt("audiooutput.plladjust") > 0; + CDVDAudioCodec* codec = CDVDFactoryCodec::CreateAudioCodec(hints, m_processInfo, allowpassthrough, m_processInfo.AllowDTSHDDecode()); + if(!codec) + { +@@ -217,8 +218,12 @@ void CVideoPlayerAudio::UpdatePlayerInfo() + + //print the inverse of the resample ratio, since that makes more sense + //if the resample ratio is 0.5, then we're playing twice as fast ++#ifdef TARGET_RASPBERRY_PI ++ s << ", rr:" << std::fixed << std::setprecision(5) << 1.0 / m_dvdAudio.GetResampleRatio() << ", pll:" << std::fixed << std::setprecision(5) << g_RBP.GetAdjustHDMIClock() << ", err:" << std::fixed << std::setprecision(1) << m_dvdAudio.GetSyncError() * 1e-3 << "ms"; ++#else + if (m_synctype == SYNC_RESAMPLE) + s << ", rr:" << std::fixed << std::setprecision(5) << 1.0 / m_dvdAudio.GetResampleRatio(); ++#endif + + s << ", att:" << std::fixed << std::setprecision(1) << log(GetCurrentAttenuation()) * 20.0f << " dB"; + +@@ -545,10 +550,12 @@ void CVideoPlayerAudio::SetSyncType(bool passthrough) + int synctype = (m_synctype >= 0 && m_synctype <= 1) ? m_synctype : 2; + CLog::Log(LOGDEBUG, "CVideoPlayerAudio:: synctype set to %i: %s", m_synctype, synctypes[synctype]); + m_prevsynctype = m_synctype; ++ const float plladjusts[] = { 0.0f, 0.00001f, 0.0001f, 0.001f, 0.01f }; ++ float plladjust = plladjusts[CSettings::GetInstance().GetInt("audiooutput.plladjust")]; + if (m_synctype == SYNC_RESAMPLE) +- m_dvdAudio.SetResampleMode(1); ++ m_dvdAudio.SetResampleMode(1, plladjust); + else +- m_dvdAudio.SetResampleMode(0); ++ m_dvdAudio.SetResampleMode(0, plladjust); + } + } + +@@ -606,6 +613,7 @@ bool CVideoPlayerAudio::SwitchCodecIfNeeded() + bool allowpassthrough = !CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_USEDISPLAYASCLOCK); + if (m_streaminfo.realtime) + allowpassthrough = false; ++ allowpassthrough |= CSettings::GetInstance().GetInt("audiooutput.plladjust") > 0; + CDVDAudioCodec *codec = CDVDFactoryCodec::CreateAudioCodec(m_streaminfo, m_processInfo, allowpassthrough, m_processInfo.AllowDTSHDDecode()); + if (!codec || codec->NeedPassthrough() == m_pAudioCodec->NeedPassthrough()) { + // passthrough state has not changed +diff --git a/xbmc/linux/RBP.cpp b/xbmc/linux/RBP.cpp +index fbffa3a952d920cb41412f00f59d5c1c91f98740..d6591cc4e1938b231cd3ce9035ca9334dcffdde9 100644 +--- a/xbmc/linux/RBP.cpp ++++ b/xbmc/linux/RBP.cpp +@@ -49,6 +49,7 @@ CRBP::CRBP() + m_DllBcmHost = new DllBcmHost(); + m_OMX = new COMXCore(); + m_display = DISPMANX_NO_HANDLE; ++ m_last_pll_adjust = 1.0; + m_p = NULL; + m_x = 0; + m_y = 0; +@@ -162,6 +163,7 @@ void CRBP::CloseDisplay(DISPMANX_DISPLAY_HANDLE_T display) + assert(s == 0); + vc_dispmanx_display_close(m_display); + m_display = DISPMANX_NO_HANDLE; ++ m_last_pll_adjust = 1.0; + } + + void CRBP::GetDisplaySize(int &width, int &height) +@@ -504,4 +506,15 @@ void CRBP::uninit_cursor() + mailbox_set_cursor_position(m_mb, 0, 0, 0); + } + ++double CRBP::AdjustHDMIClock(double adjust) ++{ ++ char response[80]; ++ vc_gencmd(response, sizeof response, "hdmi_adjust_clock %f", adjust); ++ char *p = strchr(response, '='); ++ if (p) ++ m_last_pll_adjust = atof(p+1); ++ CLog::Log(LOGDEBUG, "CRBP::%s(%.4f) = %.4f", __func__, adjust, m_last_pll_adjust); ++ return m_last_pll_adjust; ++} ++ + #endif +diff --git a/xbmc/linux/RBP.h b/xbmc/linux/RBP.h +index 90b04db5405058be2ff20aeaa6af2d2ac651586f..084fba87f49f4c3b33a8dd4a20a626a370a1f371 100644 +--- a/xbmc/linux/RBP.h ++++ b/xbmc/linux/RBP.h +@@ -82,6 +82,8 @@ public: + uint32_t WaitVsync(uint32_t target = ~0U); + void VSyncCallback(); + int GetMBox() { return m_mb; } ++ double AdjustHDMIClock(double adjust); ++ double GetAdjustHDMIClock() { return m_last_pll_adjust; } + + private: + DllBcmHost *m_DllBcmHost; +@@ -107,6 +109,7 @@ private: + int m_x; + int m_y; + bool m_enabled; ++ double m_last_pll_adjust; + public: + void init_cursor(); + void set_cursor(const void *pixels, int width, int height, int hotspot_x, int hotspot_y); + +From d7f88d01cde2cd4b0894463321e1ff0c413d9446 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Thu, 7 May 2015 15:35:43 +0100 +Subject: [PATCH 38/67] rbp: Support zero copy interface with hevc acceleration + +--- + xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp | 9 +++++++++ + xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp | 5 +++-- + 2 files changed, 12 insertions(+), 2 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp +index 967d5181a42f7cad0fe7b559a8eb958073a8144d..ec2d47d7443ab75af5ad119b8ae04fb072eca677 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/DVDVideoCodecFFmpeg.cpp +@@ -306,6 +306,15 @@ bool CDVDVideoCodecFFmpeg::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options + if (tryhw && m_decoderState == STATE_NONE) + { + m_decoderState = STATE_HW_SINGLE; ++#ifdef TARGET_RASPBERRY_PI ++ int num_threads = g_cpuInfo.getCPUCount() * 3 / 2; ++ num_threads = std::max(1, std::min(num_threads, 16)); ++ if (pCodec->id == AV_CODEC_ID_HEVC) ++ num_threads = 8; ++ m_pCodecContext->thread_count = num_threads; ++ m_pCodecContext->thread_safe_callbacks = 0; ++ CLog::Log(LOGDEBUG, "CDVDVideoCodecFFmpeg - open frame threaded with %d threads", num_threads); ++#endif + } + else + { +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp +index 3825e4cca4df7e1a791410b741aecc64823a3c69..e1bb3ab37f68b69e39fb00ab6e4785a430250173 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALFFmpeg.cpp +@@ -355,8 +355,9 @@ bool CDecoder::GetPicture(AVCodecContext* avctx, AVFrame* frame, DVDVideoPicture + assert(!picture->MMALBuffer->mmal_buffer); + picture->MMALBuffer->mmal_buffer = mmal_buffer; + +- // need to flush ARM cache so GPU can see it +- gmem->Flush(); ++ // need to flush ARM cache so GPU can see it (HEVC will have already done this) ++ if (avctx->codec_id != AV_CODEC_ID_HEVC) ++ gmem->Flush(); + + if (g_advancedSettings.CanLogComponent(LOGVIDEO)) + CLog::Log(LOGDEBUG, "%s::%s - mmal:%p dts:%.3f pts:%.3f buf:%p gpu:%p", CLASSNAME, __FUNCTION__, picture->MMALBuffer->mmal_buffer, 1e-6*picture->dts, 1e-6*picture->pts, picture->MMALBuffer, gmem); + +From 2b6121f39768cf5d22ffc73a475484519ac2881e Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sat, 16 May 2015 18:26:04 +0100 +Subject: [PATCH 39/67] ffmpeg: use upstream mvc patches + +--- + ...vcodec-add-h264_mvc-codec-id-and-profiles.patch | 68 ++++++++++++ + ...er-add-support-for-parsing-h264-mvc-NALUs.patch | 116 +++++++++++++++++++++ + tools/depends/target/ffmpeg/Makefile | 7 +- + tools/depends/target/ffmpeg/autobuild.sh | 3 + + ...arsing_of_mvc_slices_in_some_corner_cases.patch | 55 ++++++++++ + 5 files changed, 248 insertions(+), 1 deletion(-) + create mode 100644 tools/depends/target/ffmpeg/0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch + create mode 100644 tools/depends/target/ffmpeg/0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch + create mode 100644 tools/depends/target/ffmpeg/h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch + +diff --git a/tools/depends/target/ffmpeg/0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch b/tools/depends/target/ffmpeg/0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..2e7381fe6538089759ebc7288c0a5d908cd0973c +--- /dev/null ++++ b/tools/depends/target/ffmpeg/0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch +@@ -0,0 +1,68 @@ ++From 4060f15e2d29e268110032d4366382e370e088d0 Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Sun, 26 Jun 2016 20:09:18 +0100 ++Subject: [PATCH] avcodec: add h264_mvc codec id and profiles ++ ++--- ++ libavcodec/avcodec.h | 5 +++++ ++ libavcodec/codec_desc.c | 7 +++++++ ++ libavformat/mpegts.c | 2 +- ++ 3 files changed, 13 insertions(+), 1 deletion(-) ++ ++diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h ++index a1ba217..abd2e91 100644 ++--- a/libavcodec/avcodec.h +++++ b/libavcodec/avcodec.h ++@@ -410,6 +410,8 @@ enum AVCodecID { ++ AV_CODEC_ID_SHEERVIDEO, ++ AV_CODEC_ID_YLC, ++ +++ AV_CODEC_ID_H264_MVC, +++ ++ /* various PCM "codecs" */ ++ AV_CODEC_ID_FIRST_AUDIO = 0x10000, ///< A dummy id pointing at the start of audio codecs ++ AV_CODEC_ID_PCM_S16LE = 0x10000, ++@@ -3195,6 +3197,9 @@ typedef struct AVCodecContext { ++ #define FF_PROFILE_H264_HIGH_444_PREDICTIVE 244 ++ #define FF_PROFILE_H264_HIGH_444_INTRA (244|FF_PROFILE_H264_INTRA) ++ #define FF_PROFILE_H264_CAVLC_444 44 +++#define FF_PROFILE_H264_MULTIVIEW_HIGH 118 +++#define FF_PROFILE_H264_STEREO_HIGH 128 +++#define FF_PROFILE_H264_MULTIVIEW_HIGH_DEPTH 138 ++ ++ #define FF_PROFILE_VC1_SIMPLE 0 ++ #define FF_PROFILE_VC1_MAIN 1 ++diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c ++index 9d94b72..535ebf0 100644 ++--- a/libavcodec/codec_desc.c +++++ b/libavcodec/codec_desc.c ++@@ -1563,6 +1563,13 @@ static const AVCodecDescriptor codec_descriptors[] = { ++ .long_name = NULL_IF_CONFIG_SMALL("YUY2 Lossless Codec"), ++ .props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS, ++ }, +++ { +++ .id = AV_CODEC_ID_H264_MVC, +++ .type = AVMEDIA_TYPE_VIDEO, +++ .name = "h264_mvc", +++ .long_name = NULL_IF_CONFIG_SMALL("H264 MVC"), +++ .props = AV_CODEC_PROP_LOSSY, +++ }, ++ ++ /* various PCM "codecs" */ ++ { ++diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c ++index b31d233..2767306 100644 ++--- a/libavformat/mpegts.c +++++ b/libavformat/mpegts.c ++@@ -701,7 +701,7 @@ static const StreamType ISO_types[] = { ++ #endif ++ { 0x1b, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_H264 }, ++ { 0x1c, AVMEDIA_TYPE_AUDIO, AV_CODEC_ID_AAC }, ++- { 0x20, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_H264 }, +++ { 0x20, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_H264_MVC }, ++ { 0x21, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_JPEG2000 }, ++ { 0x24, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_HEVC }, ++ { 0x42, AVMEDIA_TYPE_VIDEO, AV_CODEC_ID_CAVS }, ++-- ++2.7.4 ++ +diff --git a/tools/depends/target/ffmpeg/0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch b/tools/depends/target/ffmpeg/0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..399e8a95984771e4388bfe4785423ff3f664f89b +--- /dev/null ++++ b/tools/depends/target/ffmpeg/0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch +@@ -0,0 +1,116 @@ ++From 23dd20678a05e1764e5d8d30481cb354a51b6c8b Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Sun, 26 Jun 2016 20:16:03 +0100 ++Subject: [PATCH] h264_parser: add support for parsing h264 mvc NALUs ++ ++--- ++ libavcodec/allcodecs.c | 1 + ++ libavcodec/h264.h | 2 ++ ++ libavcodec/h264_parser.c | 34 ++++++++++++++++++++++++++++++---- ++ 3 files changed, 33 insertions(+), 4 deletions(-) ++ ++diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c ++index 54efaad..02a89c3 100644 ++--- a/libavcodec/allcodecs.c +++++ b/libavcodec/allcodecs.c ++@@ -667,6 +667,7 @@ void avcodec_register_all(void) ++ REGISTER_PARSER(H261, h261); ++ REGISTER_PARSER(H263, h263); ++ REGISTER_PARSER(H264, h264); +++ REGISTER_PARSER(H264_MVC, h264_mvc); ++ REGISTER_PARSER(HEVC, hevc); ++ REGISTER_PARSER(MJPEG, mjpeg); ++ REGISTER_PARSER(MLP, mlp); ++diff --git a/libavcodec/h264.h b/libavcodec/h264.h ++index efe3555..16358aa 100644 ++--- a/libavcodec/h264.h +++++ b/libavcodec/h264.h ++@@ -126,7 +126,9 @@ enum { ++ NAL_END_STREAM = 11, ++ NAL_FILLER_DATA = 12, ++ NAL_SPS_EXT = 13, +++ NAL_SPS_SUBSET = 15, ++ NAL_AUXILIARY_SLICE = 19, +++ NAL_SLICE_EXT = 20, ++ NAL_FF_IGNORE = 0xff0f001, ++ }; ++ ++diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c ++index ce4bab2..082ac17 100644 ++--- a/libavcodec/h264_parser.c +++++ b/libavcodec/h264_parser.c ++@@ -58,6 +58,7 @@ typedef struct H264ParseContext { ++ uint8_t parse_history[6]; ++ int parse_history_count; ++ int parse_last_mb; +++ int is_mvc; ++ } H264ParseContext; ++ ++ ++@@ -105,14 +106,18 @@ static int h264_find_frame_end(H264ParseContext *p, const uint8_t *buf, ++ } else if (state <= 5) { ++ int nalu_type = buf[i] & 0x1F; ++ if (nalu_type == NAL_SEI || nalu_type == NAL_SPS || ++- nalu_type == NAL_PPS || nalu_type == NAL_AUD) { +++ nalu_type == NAL_PPS || nalu_type == NAL_AUD || +++ nalu_type == NAL_SPS_SUBSET) { ++ if (pc->frame_start_found) { ++ i++; ++ goto found; ++ } ++ } else if (nalu_type == NAL_SLICE || nalu_type == NAL_DPA || ++- nalu_type == NAL_IDR_SLICE) { +++ nalu_type == NAL_IDR_SLICE || (p->is_mvc && nalu_type == NAL_SLICE_EXT)) { ++ state += 8; +++ +++ if (nalu_type == NAL_SLICE_EXT) +++ i += 3; // skip mvc extension ++ continue; ++ } ++ state = 7; ++@@ -585,7 +590,8 @@ static int h264_parse(AVCodecParserContext *s, ++ } ++ } ++ ++- parse_nal_units(s, avctx, buf, buf_size); +++ if (!p->is_mvc) +++ parse_nal_units(s, avctx, buf, buf_size); ++ ++ if (avctx->framerate.num) ++ avctx->time_base = av_inv_q(av_mul_q(avctx->framerate, (AVRational){avctx->ticks_per_frame, 1})); ++@@ -622,7 +628,7 @@ static int h264_split(AVCodecContext *avctx, ++ if ((state & 0xFFFFFF00) != 0x100) ++ break; ++ nalu_type = state & 0x1F; ++- if (nalu_type == NAL_SPS) { +++ if (nalu_type == NAL_SPS || nalu_type == NAL_SPS_SUBSET) { ++ has_sps = 1; ++ } else if (nalu_type == NAL_PPS) ++ has_pps = 1; ++@@ -672,3 +678,23 @@ AVCodecParser ff_h264_parser = { ++ .parser_close = h264_close, ++ .split = h264_split, ++ }; +++ +++static av_cold int init_mvc(AVCodecParserContext *s) +++{ +++ H264ParseContext *p = s->priv_data; +++ int ret = init(s); +++ if (ret < 0) +++ return ret; +++ +++ p->is_mvc = 1; +++ return 0; +++} +++ +++AVCodecParser ff_h264_mvc_parser = { +++ .codec_ids = { AV_CODEC_ID_H264_MVC }, +++ .priv_data_size = sizeof(H264ParseContext), +++ .parser_init = init_mvc, +++ .parser_parse = h264_parse, +++ .parser_close = h264_close, +++ .split = h264_split, +++}; ++-- ++2.7.4 ++ +diff --git a/tools/depends/target/ffmpeg/Makefile b/tools/depends/target/ffmpeg/Makefile +index d1d76cb2ce04d5fd056796cc133fceb3f3c246c9..92d9437b36eaa4e655990f7e68634e0bbf4d9605 100644 +--- a/tools/depends/target/ffmpeg/Makefile ++++ b/tools/depends/target/ffmpeg/Makefile +@@ -4,7 +4,9 @@ DEPS= ../../Makefile.include FFMPEG-VERSION Makefile \ + 0001-mpeg4video-Signal-unsupported-GMC-with-more-than-one.patch \ + hevcdsp_ARM_NEON_optimized_epel_functions.patch added_ARM_NEON_optimized_SAO_patches.patch \ + pfcd_hevc_optimisations.patch \ +- 0001-Squashed-commit-of-the-following.patch ++ 0001-Squashed-commit-of-the-following.patch \ ++ 0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch 0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch \ ++ h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch + + # set to "yes" to enable patching + # we don't apply patches until we move to a vanilla ffmpeg tarball +@@ -86,6 +88,9 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + cd $(PLATFORM); patch -p1 < ../added_ARM_NEON_optimized_SAO_patches.patch + cd $(PLATFORM); patch -p1 < ../pfcd_hevc_optimisations.patch + cd $(PLATFORM); patch -p1 < ../0001-Squashed-commit-of-the-following.patch ++ cd $(PLATFORM); patch -p1 < ../0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch ++ cd $(PLATFORM); patch -p1 < ../0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch ++ cd $(PLATFORM); patch -p1 < ../h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch + + cd $(PLATFORM);\ + CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)" \ +diff --git a/tools/depends/target/ffmpeg/autobuild.sh b/tools/depends/target/ffmpeg/autobuild.sh +index 65800dfccc7cbf17124a96d81378b1c3ddf92342..4217ea350aa93e4a7acbe9dd15c9f8699db383b8 100755 +--- a/tools/depends/target/ffmpeg/autobuild.sh ++++ b/tools/depends/target/ffmpeg/autobuild.sh +@@ -133,6 +133,9 @@ patch -p1 < ../../hevcdsp_ARM_NEON_optimized_epel_functions.patch + patch -p1 < ../../added_ARM_NEON_optimized_SAO_patches.patch + patch -p1 < ../../pfcd_hevc_optimisations.patch + patch -p1 < ../../0001-Squashed-commit-of-the-following.patch ++patch -p1 < ../../0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch ++patch -p1 < ../../0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch ++patch -p1 < ../../h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch + + CFLAGS="$CFLAGS" CXXFLAGS="$CXXFLAGS" LDFLAGS="$LDFLAGS" \ + ./configure --prefix=$FFMPEG_PREFIX \ +diff --git a/tools/depends/target/ffmpeg/h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch b/tools/depends/target/ffmpeg/h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..b39480ad098b9cd0882fcf75b96afb1b98686bcc +--- /dev/null ++++ b/tools/depends/target/ffmpeg/h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch +@@ -0,0 +1,55 @@ ++From 12d99a92469e5916de3bc787dce4c13abfdd5e09 Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Sun, 26 Jun 2016 20:20:04 +0100 ++Subject: [PATCH] h264_parser: fix parsing of mvc slices in some corner cases ++ ++--- ++ libavcodec/h264_parser.c | 10 +++++----- ++ 1 file changed, 5 insertions(+), 5 deletions(-) ++ ++diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c ++index 082ac17..b9b0c78 100644 ++--- a/libavcodec/h264_parser.c +++++ b/libavcodec/h264_parser.c ++@@ -59,6 +59,7 @@ typedef struct H264ParseContext { ++ int parse_history_count; ++ int parse_last_mb; ++ int is_mvc; +++ int slice_ext; ++ } H264ParseContext; ++ ++ ++@@ -116,18 +117,17 @@ static int h264_find_frame_end(H264ParseContext *p, const uint8_t *buf, ++ nalu_type == NAL_IDR_SLICE || (p->is_mvc && nalu_type == NAL_SLICE_EXT)) { ++ state += 8; ++ ++- if (nalu_type == NAL_SLICE_EXT) ++- i += 3; // skip mvc extension +++ p->slice_ext = (nalu_type == NAL_SLICE_EXT); ++ continue; ++ } ++ state = 7; ++ } else { ++ p->parse_history[p->parse_history_count++] = buf[i]; ++- if (p->parse_history_count > 5) { +++ if (p->parse_history_count > 8) { ++ unsigned int mb, last_mb = p->parse_last_mb; ++ GetBitContext gb; ++ ++- init_get_bits(&gb, p->parse_history, 8*p->parse_history_count); +++ init_get_bits8(&gb, p->parse_history + 3*p->slice_ext, p->parse_history_count - 3*p->slice_ext); ++ p->parse_history_count = 0; ++ mb= get_ue_golomb_long(&gb); ++ p->parse_last_mb = mb; ++@@ -150,7 +150,7 @@ found: ++ pc->frame_start_found = 0; ++ if (p->is_avc) ++ return next_avc; ++- return i - (state & 5) - 5 * (state > 7); +++ return i - (state & 5) - 8 * (state > 7); ++ } ++ ++ static int scan_mmco_reset(AVCodecParserContext *s, GetBitContext *gb, ++-- ++2.7.4 ++ + +From c2b0929d428aa4eb33d771121448a59e883c9842 Mon Sep 17 00:00:00 2001 +From: Anton Fedchin <afedchin@ruswizards.com> +Date: Wed, 20 Jan 2016 17:02:16 +0300 +Subject: [PATCH 40/67] [VideoPlayer] DemuxFFmpeg: Properly demuxing h264_mvc + streams. + +--- + .../VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp | 23 +++++++++++++++++++++- + 1 file changed, 22 insertions(+), 1 deletion(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +index 84310bbda6440dd10f9aa0711859f4dc0bb1fd1a..16e8e270b5a060bd174f794480a8b178a620d490 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +@@ -25,6 +25,7 @@ + + #include "commons/Exception.h" + #include "cores/FFmpeg.h" ++#include "DVDCodecs/DVDCodecUtils.h" + #include "DVDClock.h" // for DVD_TIME_BASE + #include "DVDDemuxUtils.h" + #include "DVDInputStreams/DVDInputStream.h" +@@ -1249,6 +1250,15 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + } + case AVMEDIA_TYPE_VIDEO: + { ++ if (pStream->codec->codec_id == AV_CODEC_ID_H264_MVC) ++ { ++ // ignore MVC extension streams, they are handled specially ++ stream = new CDemuxStream(); ++ stream->type = STREAM_DATA; ++ stream->disabled = true; ++ pStream->need_parsing = AVSTREAM_PARSE_NONE; ++ break; ++ } + CDemuxStreamVideoFFmpeg* st = new CDemuxStreamVideoFFmpeg(this, pStream); + stream = st; + if(strcmp(m_pFormatContext->iformat->name, "flv") == 0) +@@ -1257,7 +1267,7 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + st->bVFR = false; + + // never trust pts in avi files with h264. +- if (m_bAVI && pStream->codec->codec_id == AV_CODEC_ID_H264) ++ if (m_bAVI && (pStream->codec->codec_id == AV_CODEC_ID_H264 || pStream->codec->codec_id == AV_CODEC_ID_H264_MVC)) + st->bPTSInvalid = true; + + #if defined(AVFORMAT_HAS_STREAM_GET_R_FRAME_RATE) +@@ -1328,6 +1338,17 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + if (av_dict_get(pStream->metadata, "title", NULL, 0)) + st->m_description = av_dict_get(pStream->metadata, "title", NULL, 0)->value; + ++ if (pStream->codec->codec_id == AV_CODEC_ID_H264) ++ { ++ if (CDVDCodecUtils::IsH264AnnexB(m_pFormatContext->iformat->name, pStream)) ++ { ++ // TODO ++ } ++ else if (CDVDCodecUtils::ProcessH264MVCExtradata(pStream->codec->extradata, pStream->codec->extradata_size)) ++ { ++ pStream->codec->codec_tag = MKTAG('M', 'V', 'C', '1'); ++ } ++ } + break; + } + case AVMEDIA_TYPE_DATA: + +From 052ba44b0a0bd4736bc330c2f86e34cb8424ba60 Mon Sep 17 00:00:00 2001 +From: Anton Fedchin <anightik@gmail.com> +Date: Thu, 25 Feb 2016 11:21:25 +0300 +Subject: [PATCH 41/67] [Stereo3D] Added mvc modes. + +--- + xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp | 4 ++++ + xbmc/guilib/StereoscopicsManager.cpp | 6 +++++- + 2 files changed, 9 insertions(+), 1 deletion(-) + +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp +index 809766a64b0289ca0a7f69cf68dd7651c249d161..04ceed1504c2d81aaa165d232e128c410b9fdc2c 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp +@@ -104,6 +104,8 @@ namespace RenderManager { + convert["anaglyph_yellow_blue"] = 0u; + convert["block_lr"] = 0u; + convert["block_rl"] = 0u; ++ convert["mvc_lr"] = 0u; ++ convert["mvc_rl"] = 0u; + } + return convert[mode]; + } +@@ -125,6 +127,8 @@ namespace RenderManager { + convert["col_interleaved_lr"] = "col_interleaved_rl"; + convert["block_lr"] = "block_lr"; + convert["block_rl"] = "block_rl"; ++ convert["mvc_lr"] = "mvc_rl"; ++ convert["mvc_rl"] = "mvc_lr"; + } + std::string res = convert[mode]; + if(res.empty()) +diff --git a/xbmc/guilib/StereoscopicsManager.cpp b/xbmc/guilib/StereoscopicsManager.cpp +index 1443acaf0f25df458ae49766e13dd0323454f2eb..6eb0752994bc5f8c47efbbf211120af0a0720d0c 100644 +--- a/xbmc/guilib/StereoscopicsManager.cpp ++++ b/xbmc/guilib/StereoscopicsManager.cpp +@@ -72,6 +72,10 @@ static const struct StereoModeMap VideoModeToGuiModeMap[] = + { "anaglyph_yellow_blue", RENDER_STEREO_MODE_ANAGLYPH_YELLOW_BLUE }, + { "block_lr", RENDER_STEREO_MODE_OFF }, // unsupported + { "block_rl", RENDER_STEREO_MODE_OFF }, // unsupported ++ { "mvc_lr", RENDER_STEREO_MODE_HARDWAREBASED }, ++ { "mvc_rl", RENDER_STEREO_MODE_HARDWAREBASED }, ++ { "mvc_lr", RENDER_STEREO_MODE_SPLIT_HORIZONTAL }, // fallback ++ { "mvc_rl", RENDER_STEREO_MODE_SPLIT_HORIZONTAL }, // fallback + {} + }; + +@@ -310,7 +314,7 @@ int CStereoscopicsManager::ConvertVideoToGuiStereoMode(const std::string &mode) + size_t i = 0; + while (VideoModeToGuiModeMap[i].name) + { +- if (mode == VideoModeToGuiModeMap[i].name) ++ if (mode == VideoModeToGuiModeMap[i].name && g_Windowing.SupportsStereo(VideoModeToGuiModeMap[i].mode)) + return VideoModeToGuiModeMap[i].mode; + i++; + } + +From 0bcb7f56f0fa79c4d7af4c64e0b931a997045d72 Mon Sep 17 00:00:00 2001 +From: Anton Fedchin <afedchin@ruswizards.com> +Date: Sat, 23 Jan 2016 10:21:32 +0300 +Subject: [PATCH 42/67] [VideoPlayer] Fix possible wrong aspect. + +--- + xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp b/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp +index f6d1b8572c6a4a8b4a193ebfc9d36d85ccd2d819..6b97183835ce7d614e8814cb065ac168947f5ce1 100644 +--- a/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp ++++ b/xbmc/cores/VideoPlayer/VideoPlayerVideo.cpp +@@ -182,7 +182,7 @@ void CVideoPlayerVideo::OpenStream(CDVDStreamInfo &hint, CDVDVideoCodec* codec) + } + + // use aspect in stream if available +- if(hint.forced_aspect) ++ if (hint.forced_aspect && !std::isnan(hint.aspect)) + m_fForcedAspectRatio = hint.aspect; + else + m_fForcedAspectRatio = 0.0; + +From b409948c86ffdb3b000a82333be9c4ddeb45ddd7 Mon Sep 17 00:00:00 2001 +From: Anton Fedchin <afedchin@ruswizards.com> +Date: Fri, 22 Jan 2016 18:18:33 +0300 +Subject: [PATCH 43/67] [VideoPlayer] DemuxFFmpeg: ssif remux + +--- + project/VS2010Express/XBMC.vcxproj | 2 + + project/VS2010Express/XBMC.vcxproj.filters | 8 +- + xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt | 2 + + .../VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp | 47 ++++++- + .../cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.h | 2 + + .../VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp | 156 +++++++++++++++++++++ + .../VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h | 49 +++++++ + xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in | 1 + + xbmc/settings/AdvancedSettings.cpp | 2 +- + 9 files changed, 260 insertions(+), 9 deletions(-) + create mode 100644 xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp + create mode 100644 xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h + +diff --git a/project/VS2010Express/XBMC.vcxproj b/project/VS2010Express/XBMC.vcxproj +index 164b608f77e4848bda558daf44dede1fc18a8fb4..601c5848ab9bda32e90fced986cf61dad38800bf 100644 +--- a/project/VS2010Express/XBMC.vcxproj ++++ b/project/VS2010Express/XBMC.vcxproj +@@ -295,6 +295,7 @@ copy "..\Win32BuildSetup\dependencies\python27.dll" "$(TargetDir)"</Command> + <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxCDDA.cpp" /> + <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxClient.cpp" /> + <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DemuxMultiSource.cpp" /> ++ <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxStreamSSIF.cpp" /> + <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDInputStreams\DVDInputStreamBluray.cpp" /> + <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDInputStreams\InputStreamMultiSource.cpp" /> + <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDInputStreams\DVDInputStreamPVRManager.cpp" /> +@@ -1069,6 +1070,7 @@ copy "..\Win32BuildSetup\dependencies\python27.dll" "$(TargetDir)"</Command> + <ClInclude Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DemuxMultiSource.h" /> + <ClInclude Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxPacket.h" /> + <ClInclude Include="..\..\xbmc\cores\VideoPlayer\Process\ProcessInfo.h" /> ++ <ClInclude Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxStreamSSIF.h" /> + <ClInclude Include="..\..\xbmc\cores\VideoPlayer\VideoRenderers\BaseRenderer.h" /> + <ClInclude Include="..\..\xbmc\cores\VideoPlayer\VideoRenderers\DebugRenderer.h" /> + <ClInclude Include="..\..\xbmc\cores\VideoPlayer\VideoRenderers\HwDecRender\DXVAHD.h" /> +diff --git a/project/VS2010Express/XBMC.vcxproj.filters b/project/VS2010Express/XBMC.vcxproj.filters +index b3c53788819764a400ea53e12440ba229735819c..b2d5230fdcd32f6db50e580f55cd7a63d4d19247 100644 +--- a/project/VS2010Express/XBMC.vcxproj.filters ++++ b/project/VS2010Express/XBMC.vcxproj.filters +@@ -3452,6 +3452,9 @@ + <ClCompile Include="..\..\xbmc\dialogs\GUIDialogKeyboardTouch.cpp"> + <Filter>dialogs</Filter> + </ClCompile> ++ <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxStreamSSIF.cpp"> ++ <Filter>cores\VideoPlayer\DVDDemuxers</Filter> ++ </ClCompile> + </ItemGroup> + <ItemGroup> + <ClInclude Include="..\..\xbmc\win32\pch.h"> +@@ -6704,6 +6707,9 @@ + <ClInclude Include="..\..\xbmc\cores\AudioEngine\Engines\ActiveAE\AudioDSPAddons\ActiveAEDSP.h"> + <Filter>cores\AudioEngine\Engines\ActiveAE\AudioDSPAddons</Filter> + </ClInclude> ++ <ClInclude Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxStreamSSIF.h"> ++ <Filter>cores\VideoPlayer\DVDDemuxers</Filter> ++ </ClInclude> + </ItemGroup> + <ItemGroup> + <ResourceCompile Include="..\..\xbmc\win32\XBMC_PC.rc"> +@@ -6790,4 +6796,4 @@ + <Filter>shaders</Filter> + </FxCompile> + </ItemGroup> +-</Project> +\ No newline at end of file ++</Project> +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt b/xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt +index 7d254f7650377485b909f26189d126455d49569a..65b369054c4ea329649a51f20f448394c70b110d 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt +@@ -5,6 +5,7 @@ set(SOURCES DemuxMultiSource.cpp + DVDDemuxCDDA.cpp + DVDDemuxClient.cpp + DVDDemuxFFmpeg.cpp ++ DVDDemuxStreamSSIF.cpp + DVDDemuxUtils.cpp + DVDDemuxVobsub.cpp + DVDFactoryDemuxer.cpp) +@@ -16,6 +17,7 @@ set(HEADERS DemuxMultiSource.h + DVDDemuxCDDA.h + DVDDemuxClient.h + DVDDemuxFFmpeg.h ++ DVDDemuxStreamSSIF.h + DVDDemuxPacket.h + DVDDemuxUtils.h + DVDDemuxVobsub.h +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +index 16e8e270b5a060bd174f794480a8b178a620d490..4490b16318e1c54822cdbbf5fa6344d66c2fdbdd 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +@@ -164,6 +164,7 @@ CDVDDemuxFFmpeg::CDVDDemuxFFmpeg() : CDVDDemux() + m_currentPts = DVD_NOPTS_VALUE; + m_bMatroska = false; + m_bAVI = false; ++ m_pSSIF = nullptr; + m_speed = DVD_PLAYSPEED_NORMAL; + m_program = UINT_MAX; + m_pkt.result = -1; +@@ -537,6 +538,8 @@ void CDVDDemuxFFmpeg::Dispose() + m_pkt.result = -1; + av_packet_unref(&m_pkt.pkt); + ++ SAFE_DELETE(m_pSSIF); ++ + if (m_pFormatContext) + { + for (unsigned int i = 0; i < m_pFormatContext->nb_streams; i++) +@@ -587,6 +590,9 @@ void CDVDDemuxFFmpeg::Flush() + + m_displayTime = 0; + m_dtsAtDisplayTime = DVD_NOPTS_VALUE; ++ ++ if (m_pSSIF) ++ m_pSSIF->Flush(); + } + + void CDVDDemuxFFmpeg::Abort() +@@ -808,7 +814,9 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + { + Flush(); + } +- else if (IsProgramChange()) ++ // libavformat is confused by the interleaved SSIF. ++ // Disable program management for those ++ else if (!m_pSSIF && IsProgramChange()) + { + // update streams + CreateStreams(m_program); +@@ -836,6 +844,9 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + + m_pkt.result = -1; + av_packet_unref(&m_pkt.pkt); ++ ++ if (m_pSSIF) ++ m_pSSIF->Flush(); + } + else + { +@@ -845,7 +856,9 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + + if (IsVideoReady()) + { +- if (m_program != UINT_MAX) ++ // libavformat is confused by the interleaved SSIF. ++ // Disable program management for those ++ if (!m_pSSIF && m_program != UINT_MAX ) + { + /* check so packet belongs to selected program */ + for (unsigned int i = 0; i < m_pFormatContext->programs[m_program]->nb_stream_indexes; i++) +@@ -994,6 +1007,15 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + stream = AddStream(pPacket->iStreamId); + } + } ++ if (stream && m_pSSIF) ++ { ++ if (stream->type == STREAM_VIDEO || ++ stream->type == STREAM_DATA) ++ pPacket = m_pSSIF->AddPacket(pPacket); ++ ++ if (stream->type == STREAM_DATA && stream->codec == AV_CODEC_ID_H264_MVC && pPacket->iSize) ++ stream = GetStream(pPacket->iStreamId); ++ } + if (!stream) + { + CLog::Log(LOGERROR, "CDVDDemuxFFmpeg::AddStream - internal error, stream is null"); +@@ -1018,6 +1040,9 @@ bool CDVDDemuxFFmpeg::SeekTime(int time, bool backwords, double *startpts) + m_pkt.result = -1; + av_packet_unref(&m_pkt.pkt); + ++ if (m_pSSIF) ++ m_pSSIF->Flush(); ++ + CDVDInputStream::IPosTime* ist = m_pInput->GetIPosTime(); + if (ist) + { +@@ -1085,6 +1110,9 @@ bool CDVDDemuxFFmpeg::SeekByte(int64_t pos) + m_pkt.result = -1; + av_packet_unref(&m_pkt.pkt); + ++ if (m_pSSIF) ++ m_pSSIF->Flush(); ++ + return (ret >= 0); + } + +@@ -1252,11 +1280,12 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + { + if (pStream->codec->codec_id == AV_CODEC_ID_H264_MVC) + { +- // ignore MVC extension streams, they are handled specially ++ m_pSSIF = new CDVDDemuxStreamSSIF(); ++ m_pSSIF->SetMVCStreamId(streamIdx); ++ + stream = new CDemuxStream(); + stream->type = STREAM_DATA; +- stream->disabled = true; +- pStream->need_parsing = AVSTREAM_PARSE_NONE; ++ pStream->codec->codec_type = AVMEDIA_TYPE_DATA; + break; + } + CDemuxStreamVideoFFmpeg* st = new CDemuxStreamVideoFFmpeg(this, pStream); +@@ -1342,7 +1371,11 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + { + if (CDVDCodecUtils::IsH264AnnexB(m_pFormatContext->iformat->name, pStream)) + { +- // TODO ++ if (m_pSSIF) ++ { ++ m_pSSIF->SetH264StreamId(streamIdx); ++ pStream->codec->codec_tag = MKTAG('A', 'M', 'V', 'C'); ++ } + } + else if (CDVDCodecUtils::ProcessH264MVCExtradata(pStream->codec->extradata, pStream->codec->extradata_size)) + { +@@ -1435,7 +1468,7 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + if (langTag) + strncpy(stream->language, langTag->value, 3); + +- if( stream->type != STREAM_NONE && pStream->codec->extradata && pStream->codec->extradata_size > 0 ) ++ if (stream->type != STREAM_NONE && pStream->codec->extradata && pStream->codec->extradata_size > 0) + { + stream->ExtraSize = pStream->codec->extradata_size; + stream->ExtraData = new uint8_t[pStream->codec->extradata_size]; +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.h b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.h +index 60cfc6fe48df93210d6bb5a12f85af571dfa1f72..dae871cff339e085cf2aa6d8d921d20b0db03132 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.h ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.h +@@ -21,6 +21,7 @@ + */ + + #include "DVDDemux.h" ++#include "DVDDemuxStreamSSIF.h" + #include "threads/CriticalSection.h" + #include "threads/SystemClock.h" + #include <map> +@@ -152,6 +153,7 @@ protected: + double m_currentPts; // used for stream length estimation + bool m_bMatroska; + bool m_bAVI; ++ CDVDDemuxStreamSSIF* m_pSSIF; + int m_speed; + unsigned m_program; + XbmcThreads::EndTime m_timeout; +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp +new file mode 100644 +index 0000000000000000000000000000000000000000..e99352a90f348a95673ef3442d3f6cb020cd57d4 +--- /dev/null ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp +@@ -0,0 +1,156 @@ ++/* ++* Copyright (C) 2005-2013 Team XBMC ++* http://xbmc.org ++* ++* This Program is free software; you can redistribute it and/or modify ++* it under the terms of the GNU General Public License as published by ++* the Free Software Foundation; either version 2, or (at your option) ++* any later version. ++* ++* This Program is distributed in the hope that it will be useful, ++* but WITHOUT ANY WARRANTY; without even the implied warranty of ++* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++* GNU General Public License for more details. ++* ++* You should have received a copy of the GNU General Public License ++* along with XBMC; see the file COPYING. If not, see ++* <http://www.gnu.org/licenses/>. ++* ++*/ ++ ++#include "DVDDemuxStreamSSIF.h" ++#include "DVDClock.h" ++#include "DVDDemuxUtils.h" ++#include "utils/log.h" ++ ++//#define DEBUG_VERBOSE ++ ++DemuxPacket* CDVDDemuxStreamSSIF::AddPacket(DemuxPacket* &srcPkt) ++{ ++ if (srcPkt->iStreamId != m_h264StreamId && ++ srcPkt->iStreamId != m_mvcStreamId) ++ return srcPkt; ++ ++ if (srcPkt->iStreamId == m_h264StreamId) ++ { ++ m_H264queue.push(srcPkt); ++ } ++ else if (srcPkt->iStreamId == m_mvcStreamId) ++ { ++ m_MVCqueue.push(srcPkt); ++ } ++ ++ return GetMVCPacket(); ++} ++ ++void CDVDDemuxStreamSSIF::Flush() ++{ ++ while (!m_H264queue.empty()) ++ { ++ CDVDDemuxUtils::FreeDemuxPacket(m_H264queue.front()); ++ m_H264queue.pop(); ++ } ++ while (!m_MVCqueue.empty()) ++ { ++ CDVDDemuxUtils::FreeDemuxPacket(m_MVCqueue.front()); ++ m_MVCqueue.pop(); ++ } ++} ++ ++DemuxPacket* CDVDDemuxStreamSSIF::MergePacket(DemuxPacket* &srcPkt, DemuxPacket* &appendPkt) ++{ ++ DemuxPacket* newpkt = NULL; ++ newpkt = CDVDDemuxUtils::AllocateDemuxPacket(srcPkt->iSize + appendPkt->iSize); ++ newpkt->iSize = srcPkt->iSize + appendPkt->iSize; ++ ++ newpkt->pts = srcPkt->pts; ++ newpkt->dts = srcPkt->dts; ++ newpkt->duration = srcPkt->duration; ++ newpkt->iGroupId = srcPkt->iGroupId; ++ newpkt->iStreamId = srcPkt->iStreamId; ++ memcpy(newpkt->pData, srcPkt->pData, srcPkt->iSize); ++ memcpy(newpkt->pData + srcPkt->iSize, appendPkt->pData, appendPkt->iSize); ++ ++ CDVDDemuxUtils::FreeDemuxPacket(srcPkt); ++ srcPkt = NULL; ++ CDVDDemuxUtils::FreeDemuxPacket(appendPkt); ++ appendPkt = NULL; ++ ++ return newpkt; ++} ++ ++DemuxPacket* CDVDDemuxStreamSSIF::GetMVCPacket() ++{ ++ // Here, we recreate a h264 MVC packet from the base one + buffered MVC NALU's ++ while (!m_H264queue.empty() && !m_MVCqueue.empty()) ++ { ++ DemuxPacket* h264pkt = m_H264queue.front(); ++ double tsH264 = (h264pkt->dts != DVD_NOPTS_VALUE ? h264pkt->dts : h264pkt->pts); ++ DemuxPacket* mvcpkt = m_MVCqueue.front(); ++ double tsMVC = (mvcpkt->dts != DVD_NOPTS_VALUE ? mvcpkt->dts : mvcpkt->pts); ++ ++ if (tsH264 == tsMVC) ++ { ++ m_H264queue.pop(); ++ m_MVCqueue.pop(); ++ ++ while (!m_H264queue.empty()) ++ { ++ DemuxPacket* pkt = m_H264queue.front(); ++ double ts = (pkt->dts != DVD_NOPTS_VALUE ? pkt->dts : pkt->pts); ++ if (ts == DVD_NOPTS_VALUE) ++ { ++#if defined(DEBUG_VERBOSE) ++ CLog::Log(LOGDEBUG, ">>> MVC merge h264 fragment: %6d+%6d, pts(%.3f/%.3f) dts(%.3f/%.3f)", h264pkt->iSize, pkt->iSize, h264pkt->pts*1e-6, pkt->pts*1e-6, h264pkt->dts*1e-6, pkt->dts*1e-6); ++#endif ++ h264pkt = MergePacket(h264pkt, pkt); ++ m_H264queue.pop(); ++ } ++ else ++ break; ++ } ++ while (!m_MVCqueue.empty()) ++ { ++ DemuxPacket* pkt = m_MVCqueue.front(); ++ double ts = (pkt->dts != DVD_NOPTS_VALUE ? pkt->dts : pkt->pts); ++ if (ts == DVD_NOPTS_VALUE) ++ { ++#if defined(DEBUG_VERBOSE) ++ CLog::Log(LOGDEBUG, ">>> MVC merge mvc fragment: %6d+%6d, pts(%.3f/%.3f) dts(%.3f/%.3f)", mvcpkt->iSize, pkt->iSize, mvcpkt->pts*1e-6, pkt->pts*1e-6, mvcpkt->dts*1e-6, pkt->dts*1e-6); ++#endif ++ mvcpkt = MergePacket(mvcpkt, pkt); ++ m_MVCqueue.pop(); ++ } ++ else ++ break; ++ } ++ ++#if defined(DEBUG_VERBOSE) ++ CLog::Log(LOGDEBUG, ">>> MVC merge packet: %6d+%6d, pts(%.3f/%.3f) dts(%.3f/%.3f)", h264pkt->iSize, mvcpkt->iSize, h264pkt->pts*1e-6, mvcpkt->pts*1e-6, h264pkt->dts*1e-6, mvcpkt->dts*1e-6); ++#endif ++ return MergePacket(h264pkt, mvcpkt); ++ } ++ else if (tsH264 > tsMVC) ++ { ++#if defined(DEBUG_VERBOSE) ++ CLog::Log(LOGDEBUG, ">>> MVC discard mvc: %6d, pts(%.3f) dts(%.3f)", mvcpkt->iSize, mvcpkt->pts*1e-6, mvcpkt->dts*1e-6); ++#endif ++ CDVDDemuxUtils::FreeDemuxPacket(mvcpkt); ++ m_MVCqueue.pop(); ++ } ++ else ++ { ++#if defined(DEBUG_VERBOSE) ++ CLog::Log(LOGDEBUG, ">>> MVC discard h264: %6d, pts(%.3f) dts(%.3f)", h264pkt->iSize, h264pkt->pts*1e-6, h264pkt->dts*1e-6); ++#endif ++ CDVDDemuxUtils::FreeDemuxPacket(h264pkt); ++ m_H264queue.pop(); ++ } ++ } ++ ++#if defined(DEBUG_VERBOSE) ++ CLog::Log(LOGDEBUG, ">>> MVC waiting. MVC(%d) H264(%d)", m_MVCqueue.size(), m_H264queue.size()); ++#endif ++ ++ return CDVDDemuxUtils::AllocateDemuxPacket(0); ++} +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h +new file mode 100644 +index 0000000000000000000000000000000000000000..8412627a3ea13f59bd2c96c23bd386e4b5b2658e +--- /dev/null ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h +@@ -0,0 +1,49 @@ ++#pragma once ++ ++/* ++* Copyright (C) 2005-2013 Team XBMC ++* http://xbmc.org ++* ++* This Program is free software; you can redistribute it and/or modify ++* it under the terms of the GNU General Public License as published by ++* the Free Software Foundation; either version 2, or (at your option) ++* any later version. ++* ++* This Program is distributed in the hope that it will be useful, ++* but WITHOUT ANY WARRANTY; without even the implied warranty of ++* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++* GNU General Public License for more details. ++* ++* You should have received a copy of the GNU General Public License ++* along with XBMC; see the file COPYING. If not, see ++* <http://www.gnu.org/licenses/>. ++* ++*/ ++ ++#include "DVDDemuxPacket.h" ++#include <queue> ++ ++extern "C" { ++#include "libavformat/avformat.h" ++} ++ ++class CDVDDemuxStreamSSIF ++{ ++public: ++ CDVDDemuxStreamSSIF() {}; ++ ~CDVDDemuxStreamSSIF() { Flush(); } ++ ++ DemuxPacket* AddPacket(DemuxPacket* &scrPkt); ++ void Flush(); ++ void SetH264StreamId(int id) { m_h264StreamId = id; }; ++ void SetMVCStreamId(int id) { m_mvcStreamId = id; }; ++ ++private: ++ DemuxPacket* GetMVCPacket(); ++ DemuxPacket* MergePacket(DemuxPacket* &srcPkt, DemuxPacket* &appendPkt); ++ ++ std::queue<DemuxPacket*> m_H264queue; ++ std::queue<DemuxPacket*> m_MVCqueue; ++ int m_h264StreamId = 0; ++ int m_mvcStreamId = 0; ++}; +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in b/xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in +index e4f8aed0af96fe0dceec4d8517087742f2c7df81..f3b717ddabb4729fe0db5ebab5a7913b8fe8297c 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in +@@ -10,6 +10,7 @@ SRCS += DVDDemuxUtils.cpp + SRCS += DVDDemuxVobsub.cpp + SRCS += DVDDemuxCC.cpp + SRCS += DVDFactoryDemuxer.cpp ++SRCS += DVDDemuxStreamSSIF.cpp + + LIB = DVDDemuxers.a + +diff --git a/xbmc/settings/AdvancedSettings.cpp b/xbmc/settings/AdvancedSettings.cpp +index ae21da29314ae8faa35129a79e62e82b55fbc306..8426b6c3f8f6af274e2990c8da323e4064db9b65 100644 +--- a/xbmc/settings/AdvancedSettings.cpp ++++ b/xbmc/settings/AdvancedSettings.cpp +@@ -392,7 +392,7 @@ void CAdvancedSettings::Initialize() + + m_pictureExtensions = ".png|.jpg|.jpeg|.bmp|.gif|.ico|.tif|.tiff|.tga|.pcx|.cbz|.zip|.cbr|.rar|.rss|.webp|.jp2|.apng"; + m_musicExtensions = ".nsv|.m4a|.flac|.aac|.strm|.pls|.rm|.rma|.mpa|.wav|.wma|.ogg|.mp3|.mp2|.m3u|.gdm|.imf|.m15|.sfx|.uni|.ac3|.dts|.cue|.aif|.aiff|.wpl|.ape|.mac|.mpc|.mp+|.mpp|.shn|.zip|.rar|.wv|.dsp|.xsp|.xwav|.waa|.wvs|.wam|.gcm|.idsp|.mpdsp|.mss|.spt|.rsd|.sap|.cmc|.cmr|.dmc|.mpt|.mpd|.rmt|.tmc|.tm8|.tm2|.oga|.url|.pxml|.tta|.rss|.wtv|.mka|.tak|.opus|.dff|.dsf"; +- m_videoExtensions = ".m4v|.3g2|.3gp|.nsv|.tp|.ts|.ty|.strm|.pls|.rm|.rmvb|.mpd|.m3u|.m3u8|.ifo|.mov|.qt|.divx|.xvid|.bivx|.vob|.nrg|.img|.iso|.pva|.wmv|.asf|.asx|.ogm|.m2v|.avi|.bin|.dat|.mpg|.mpeg|.mp4|.mkv|.mk3d|.avc|.vp3|.svq3|.nuv|.viv|.dv|.fli|.flv|.rar|.001|.wpl|.zip|.vdr|.dvr-ms|.xsp|.mts|.m2t|.m2ts|.evo|.ogv|.sdp|.avs|.rec|.url|.pxml|.vc1|.h264|.rcv|.rss|.mpls|.webm|.bdmv|.wtv"; ++ m_videoExtensions = ".m4v|.3g2|.3gp|.nsv|.tp|.ts|.ty|.strm|.pls|.rm|.rmvb|.mpd|.m3u|.m3u8|.ifo|.mov|.qt|.divx|.xvid|.bivx|.vob|.nrg|.img|.iso|.pva|.wmv|.asf|.asx|.ogm|.m2v|.avi|.bin|.dat|.mpg|.mpeg|.mp4|.mkv|.mk3d|.avc|.vp3|.svq3|.nuv|.viv|.dv|.fli|.flv|.rar|.001|.wpl|.zip|.vdr|.dvr-ms|.xsp|.mts|.m2t|.m2ts|.evo|.ogv|.sdp|.avs|.rec|.url|.pxml|.vc1|.h264|.rcv|.rss|.mpls|.webm|.bdmv|.wtv|.ssif"; + m_subtitlesExtensions = ".utf|.utf8|.utf-8|.sub|.srt|.smi|.rt|.txt|.ssa|.text|.ssa|.aqt|.jss|.ass|.idx|.ifo|.rar|.zip"; + m_discStubExtensions = ".disc"; + // internal music extensions + +From ac2167deb4a7e8408903ca2aab446b3d0d954fa7 Mon Sep 17 00:00:00 2001 +From: Anton Fedchin <afedchin@ruswizards.com> +Date: Tue, 23 Feb 2016 16:01:08 +0300 +Subject: [PATCH 44/67] [libbluray] bump libbluray to 0.9.2-mvc. + +--- + project/BuildDependencies/scripts/0_package.list | 2 +- + xbmc/DllPaths_win32.h | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/project/BuildDependencies/scripts/0_package.list b/project/BuildDependencies/scripts/0_package.list +index 71024bfb7da48ddb033b159f83037319176229b4..2565d1d08f6591955266fcca3f1a8031db4379e4 100644 +--- a/project/BuildDependencies/scripts/0_package.list ++++ b/project/BuildDependencies/scripts/0_package.list +@@ -16,7 +16,7 @@ freetype-2.4.6-win32-3.7z + giflib-5.1.4-win32-vc140.7z + jsonschemabuilder-1.0.0-win32-3.7z + libass-0.12.1-win32.7z +-libbluray-0.8.1-win32-vc120.7z ++libbluray-0.9.2-mvc-win32-vc120.7z + libcdio-0.83-win32-2.7z + libcec-3.0.0-win32-2.7z + libexpat_2.0.1-win32.7z +diff --git a/xbmc/DllPaths_win32.h b/xbmc/DllPaths_win32.h +index 3748589f39b1f83f1e23e9eb4f64eddcf61cb030..ff34ff541049ad7d2fa5472c49e6412e0d68056b 100644 +--- a/xbmc/DllPaths_win32.h ++++ b/xbmc/DllPaths_win32.h +@@ -35,7 +35,7 @@ + #define DLL_PATH_LIBDVDNAV "special://xbmcbin/system/players/VideoPlayer/libdvdnav.dll" + + /* libbluray */ +-#define DLL_PATH_LIBBLURAY "special://xbmcbin/system/players/dvdplayer/libbluray.dll" ++#define DLL_PATH_LIBBLURAY "special://xbmcbin/system/players/VideoPlayer/libbluray.dll" + + #endif + + +From 6bb5fcf3d003296bbe290c171577bb65ba6ea04d Mon Sep 17 00:00:00 2001 +From: Anton Fedchin <afedchin@ruswizards.com> +Date: Tue, 23 Feb 2016 16:02:46 +0300 +Subject: [PATCH 45/67] [3DBD] Added support of 3D-BluRay playback. + +--- + lib/DllLibbluray.h | 8 + + project/VS2010Express/XBMC.vcxproj | 2 + + project/VS2010Express/XBMC.vcxproj.filters | 6 + + xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt | 2 + + .../VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp | 57 ++++- + xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.cpp | 262 +++++++++++++++++++++ + xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.h | 57 +++++ + .../VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp | 40 +++- + .../VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h | 12 +- + xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in | 1 + + .../DVDInputStreams/DVDInputStreamBluray.cpp | 159 +++++++++++-- + .../DVDInputStreams/DVDInputStreamBluray.h | 20 ++ + 12 files changed, 591 insertions(+), 35 deletions(-) + create mode 100644 xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.cpp + create mode 100644 xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.h + +diff --git a/lib/DllLibbluray.h b/lib/DllLibbluray.h +index f5a337fe19beff472557c97ff7a203ad30a912b2..03f93391265e164837c2a17a8fe6d7da41c2f13e 100644 +--- a/lib/DllLibbluray.h ++++ b/lib/DllLibbluray.h +@@ -31,6 +31,8 @@ extern "C" + #include <libbluray/log_control.h> + #include <libbluray/keys.h> + #include <libbluray/overlay.h> ++#include <libbluray/clpi_parse.h> ++#include <libbluray/mpls_parse.h> + } + + class DllLibblurayInterface +@@ -80,6 +82,8 @@ public: + #endif + virtual int bd_menu_call (BLURAY *bd, int64_t pts)=0; + virtual int bd_mouse_select (BLURAY *bd, int64_t pts, uint16_t x, uint16_t y)=0; ++ virtual MPLS_PL* bd_get_title_mpls (BLURAY *bd) = 0; ++ virtual int bd_get_clip_infos (BLURAY *bd, unsigned clip, uint64_t *clip_start_time, uint64_t *stream_start_time, uint64_t *pos, uint64_t *duration) = 0; + }; + + class DllLibbluray : public DllDynamic, DllLibblurayInterface +@@ -128,6 +132,8 @@ class DllLibbluray : public DllDynamic, DllLibblurayInterface + #endif + DEFINE_METHOD2(int, bd_menu_call, (BLURAY *p1, int64_t p2)) + DEFINE_METHOD4(int, bd_mouse_select, (BLURAY *p1, int64_t p2, uint16_t p3, uint16_t p4)) ++ DEFINE_METHOD1(MPLS_PL*, bd_get_title_mpls, (BLURAY *p1)) ++ DEFINE_METHOD6(int, bd_get_clip_infos, (BLURAY *p1, unsigned p2, uint64_t *p3, uint64_t *p4, uint64_t *p5, uint64_t *p6)) + + BEGIN_METHOD_RESOLVE() + RESOLVE_METHOD(bd_get_titles) +@@ -172,6 +178,8 @@ class DllLibbluray : public DllDynamic, DllLibblurayInterface + #endif + RESOLVE_METHOD(bd_menu_call) + RESOLVE_METHOD(bd_mouse_select) ++ RESOLVE_METHOD(bd_get_title_mpls) ++ RESOLVE_METHOD(bd_get_clip_infos) + END_METHOD_RESOLVE() + + public: +diff --git a/project/VS2010Express/XBMC.vcxproj b/project/VS2010Express/XBMC.vcxproj +index 601c5848ab9bda32e90fced986cf61dad38800bf..189b698f57d1e2bbb50dd7541136309c59a1fb84 100644 +--- a/project/VS2010Express/XBMC.vcxproj ++++ b/project/VS2010Express/XBMC.vcxproj +@@ -295,6 +295,7 @@ copy "..\Win32BuildSetup\dependencies\python27.dll" "$(TargetDir)"</Command> + <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxCDDA.cpp" /> + <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxClient.cpp" /> + <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DemuxMultiSource.cpp" /> ++ <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxMVC.cpp" /> + <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxStreamSSIF.cpp" /> + <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDInputStreams\DVDInputStreamBluray.cpp" /> + <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDInputStreams\InputStreamMultiSource.cpp" /> +@@ -1070,6 +1071,7 @@ copy "..\Win32BuildSetup\dependencies\python27.dll" "$(TargetDir)"</Command> + <ClInclude Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DemuxMultiSource.h" /> + <ClInclude Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxPacket.h" /> + <ClInclude Include="..\..\xbmc\cores\VideoPlayer\Process\ProcessInfo.h" /> ++ <ClInclude Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxMVC.h" /> + <ClInclude Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxStreamSSIF.h" /> + <ClInclude Include="..\..\xbmc\cores\VideoPlayer\VideoRenderers\BaseRenderer.h" /> + <ClInclude Include="..\..\xbmc\cores\VideoPlayer\VideoRenderers\DebugRenderer.h" /> +diff --git a/project/VS2010Express/XBMC.vcxproj.filters b/project/VS2010Express/XBMC.vcxproj.filters +index b2d5230fdcd32f6db50e580f55cd7a63d4d19247..65d8e075716c05669c1e5665de9e3ba0ac1188ea 100644 +--- a/project/VS2010Express/XBMC.vcxproj.filters ++++ b/project/VS2010Express/XBMC.vcxproj.filters +@@ -3455,6 +3455,9 @@ + <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxStreamSSIF.cpp"> + <Filter>cores\VideoPlayer\DVDDemuxers</Filter> + </ClCompile> ++ <ClCompile Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxMVC.cpp"> ++ <Filter>cores\VideoPlayer\DVDDemuxers</Filter> ++ </ClCompile> + </ItemGroup> + <ItemGroup> + <ClInclude Include="..\..\xbmc\win32\pch.h"> +@@ -6710,6 +6713,9 @@ + <ClInclude Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxStreamSSIF.h"> + <Filter>cores\VideoPlayer\DVDDemuxers</Filter> + </ClInclude> ++ <ClInclude Include="..\..\xbmc\cores\VideoPlayer\DVDDemuxers\DVDDemuxMVC.h"> ++ <Filter>cores\VideoPlayer\DVDDemuxers</Filter> ++ </ClInclude> + </ItemGroup> + <ItemGroup> + <ResourceCompile Include="..\..\xbmc\win32\XBMC_PC.rc"> +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt b/xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt +index 65b369054c4ea329649a51f20f448394c70b110d..2706bcadc177a4f8f9c12c3be7976f7a0f81fc8f 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/CMakeLists.txt +@@ -5,6 +5,7 @@ set(SOURCES DemuxMultiSource.cpp + DVDDemuxCDDA.cpp + DVDDemuxClient.cpp + DVDDemuxFFmpeg.cpp ++ DVDDemuxMVC.cpp + DVDDemuxStreamSSIF.cpp + DVDDemuxUtils.cpp + DVDDemuxVobsub.cpp +@@ -17,6 +18,7 @@ set(HEADERS DemuxMultiSource.h + DVDDemuxCDDA.h + DVDDemuxClient.h + DVDDemuxFFmpeg.h ++ DVDDemuxMVC.h + DVDDemuxStreamSSIF.h + DVDDemuxPacket.h + DVDDemuxUtils.h +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +index 4490b16318e1c54822cdbbf5fa6344d66c2fdbdd..54e4d0b66680a08c1e4c1be343fabe4371aec6af 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +@@ -27,6 +27,7 @@ + #include "cores/FFmpeg.h" + #include "DVDCodecs/DVDCodecUtils.h" + #include "DVDClock.h" // for DVD_TIME_BASE ++#include "DVDDemuxMVC.h" + #include "DVDDemuxUtils.h" + #include "DVDInputStreams/DVDInputStream.h" + #include "DVDInputStreams/DVDInputStreamFFmpeg.h" +@@ -497,6 +498,16 @@ bool CDVDDemuxFFmpeg::Open(CDVDInputStream* pInput, bool streaminfo, bool filein + + UpdateCurrentPTS(); + ++ if (!fileinfo && m_pInput->IsStreamType(DVDSTREAM_TYPE_BLURAY)) ++ { ++ CDVDInputStreamBluray *bluRay = static_cast<CDVDInputStreamBluray*>(m_pInput); ++ if (bluRay->HasMVC()) ++ { ++ SAFE_DELETE(m_pSSIF); ++ m_pSSIF = new CDVDDemuxStreamSSIF(); ++ m_pSSIF->SetBluRay(bluRay); ++ } ++ } + // in case of mpegts and we have not seen pat/pmt, defer creation of streams + if (!skipCreateStreams || m_pFormatContext->nb_programs > 0) + { +@@ -814,9 +825,7 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + { + Flush(); + } +- // libavformat is confused by the interleaved SSIF. +- // Disable program management for those +- else if (!m_pSSIF && IsProgramChange()) ++ else if (IsProgramChange()) + { + // update streams + CreateStreams(m_program); +@@ -857,8 +866,7 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + if (IsVideoReady()) + { + // libavformat is confused by the interleaved SSIF. +- // Disable program management for those +- if (!m_pSSIF && m_program != UINT_MAX ) ++ if ((!m_pSSIF || m_pSSIF->IsBluRay()) && m_program != UINT_MAX) + { + /* check so packet belongs to selected program */ + for (unsigned int i = 0; i < m_pFormatContext->programs[m_program]->nb_stream_indexes; i++) +@@ -1009,10 +1017,7 @@ DemuxPacket* CDVDDemuxFFmpeg::Read() + } + if (stream && m_pSSIF) + { +- if (stream->type == STREAM_VIDEO || +- stream->type == STREAM_DATA) +- pPacket = m_pSSIF->AddPacket(pPacket); +- ++ pPacket = m_pSSIF->AddPacket(pPacket); + if (stream->type == STREAM_DATA && stream->codec == AV_CODEC_ID_H264_MVC && pPacket->iSize) + stream = GetStream(pPacket->iStreamId); + } +@@ -1375,6 +1380,29 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + { + m_pSSIF->SetH264StreamId(streamIdx); + pStream->codec->codec_tag = MKTAG('A', 'M', 'V', 'C'); ++ ++ AVStream* mvcStream = nullptr; ++ if (m_pInput->IsStreamType(DVDSTREAM_TYPE_BLURAY)) ++ { ++ CDVDInputStreamBluray *bluRay = static_cast<CDVDInputStreamBluray*>(m_pInput); ++ if (bluRay->HasMVC()) ++ { ++ st->stereo_mode = bluRay->AreEyesFlipped() ? "mvc_rl" : "mvc_lr"; ++ mvcStream = static_cast<CDVDDemuxMVC*>(bluRay->GetDemuxMVC())->GetAVStream(); ++ } ++ } ++ else ++ mvcStream = m_pFormatContext->streams[m_pSSIF->GetMVCStreamId()]; ++ ++ if (mvcStream && pStream->codec->extradata_size > 0 && mvcStream->codec->extradata_size > 0) ++ { ++ uint8_t* extr = pStream->codec->extradata; ++ pStream->codec->extradata = (uint8_t*)av_mallocz(pStream->codec->extradata_size + mvcStream->codec->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE); ++ memcpy(pStream->codec->extradata, extr, pStream->codec->extradata_size); ++ memcpy(pStream->codec->extradata + pStream->codec->extradata_size, mvcStream->codec->extradata, mvcStream->codec->extradata_size); ++ pStream->codec->extradata_size += mvcStream->codec->extradata_size; ++ av_free(extr); ++ } + } + } + else if (CDVDCodecUtils::ProcessH264MVCExtradata(pStream->codec->extradata, pStream->codec->extradata_size)) +@@ -1635,6 +1663,12 @@ bool CDVDDemuxFFmpeg::SeekChapter(int chapter, double* startpts) + } + + Flush(); ++ if (m_pInput->IsStreamType(DVDSTREAM_TYPE_BLURAY) ++ && static_cast<CDVDInputStreamBluray*>(m_pInput)->HasMVC()) ++ { ++ // also empty the internal ffmpeg buffer otherwise it may cause MVC buffers hang ++ m_ioContext->buf_ptr = m_ioContext->buf_end; ++ } + return true; + } + +@@ -1704,6 +1738,11 @@ std::string CDVDDemuxFFmpeg::GetStreamCodecName(int iStreamId) + + bool CDVDDemuxFFmpeg::IsProgramChange() + { ++ // libavformat is confused by the interleaved SSIF. ++ // disable program management for those ++ if (m_pSSIF && !m_pSSIF->IsBluRay()) ++ return false; ++ + if (m_program == UINT_MAX) + return false; + +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.cpp +new file mode 100644 +index 0000000000000000000000000000000000000000..4ed7c439e7c36de211f2136c9b6b9a95549fe634 +--- /dev/null ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.cpp +@@ -0,0 +1,262 @@ ++/* ++* Copyright (C) 2005-2013 Team XBMC ++* http://xbmc.org ++* ++* This Program is free software; you can redistribute it and/or modify ++* it under the terms of the GNU General Public License as published by ++* the Free Software Foundation; either version 2, or (at your option) ++* any later version. ++* ++* This Program is distributed in the hope that it will be useful, ++* but WITHOUT ANY WARRANTY; without even the implied warranty of ++* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++* GNU General Public License for more details. ++* ++* You should have received a copy of the GNU General Public License ++* along with XBMC; see the file COPYING. If not, see ++* <http://www.gnu.org/licenses/>. ++* ++*/ ++ ++#include "DVDDemuxMVC.h" ++#include "DVDDemuxUtils.h" ++#include "DVDInputStreams/DVDInputStream.h" ++#include "DVDClock.h" ++#include "cores/FFmpeg.h" ++#include "utils/log.h" ++ ++extern "C" { ++#include "libavutil/opt.h" ++}; ++ ++#define MVC_SEEK_TIME_WINDOW 75000 // experimental value depends on seeking accurate ++ ++static int mvc_file_read(void *h, uint8_t* buf, int size) ++{ ++ CDVDInputStream* pInputStream = static_cast<CDVDDemuxMVC*>(h)->m_pInput; ++ return pInputStream->Read(buf, size); ++} ++ ++static int64_t mvc_file_seek(void *h, int64_t pos, int whence) ++{ ++ CDVDInputStream* pInputStream = static_cast<CDVDDemuxMVC*>(h)->m_pInput; ++ if (whence == AVSEEK_SIZE) ++ return pInputStream->GetLength(); ++ else ++ return pInputStream->Seek(pos, whence & ~AVSEEK_FORCE); ++} ++ ++CDVDDemuxMVC::CDVDDemuxMVC() ++{ ++ m_ioContext = nullptr; ++ m_pFormatContext = nullptr; ++ m_pInput = nullptr; ++ m_nStreamIndex = -1; ++} ++ ++CDVDDemuxMVC::~CDVDDemuxMVC() ++{ ++ Dispose(); ++} ++ ++bool CDVDDemuxMVC::Open(CDVDInputStream* pInput) ++{ ++ int ret; ++ ++ if (!pInput) ++ return false; ++ m_pInput = pInput; ++ ++ unsigned char* buffer = (unsigned char*)av_malloc(FFMPEG_FILE_BUFFER_SIZE); ++ m_ioContext = avio_alloc_context(buffer, FFMPEG_FILE_BUFFER_SIZE, 0, this, mvc_file_read, NULL, mvc_file_seek); ++ m_ioContext->max_packet_size = m_pInput->GetBlockSize(); ++ if (m_ioContext->max_packet_size) ++ m_ioContext->max_packet_size *= FFMPEG_FILE_BUFFER_SIZE / m_ioContext->max_packet_size; ++ ++ m_pFormatContext = avformat_alloc_context(); ++ m_pFormatContext->pb = m_ioContext; ++ ++ AVInputFormat *format = av_find_input_format("mpegts"); ++ ret = avformat_open_input(&m_pFormatContext, m_pInput->GetFileName().c_str(), format, nullptr); ++ if (ret < 0) ++ { ++ CLog::Log(LOGDEBUG, "%s: Opening MVC demuxing context failed (%d)", __FUNCTION__, ret); ++ Dispose(); ++ return false; ++ } ++ ++ av_opt_set_int(m_pFormatContext, "analyzeduration", 500000, 0); ++ av_opt_set_int(m_pFormatContext, "correct_ts_overflow", 0, 0); ++ m_pFormatContext->flags |= AVFMT_FLAG_KEEP_SIDE_DATA; ++ ++ // Find the streams ++ ret = avformat_find_stream_info(m_pFormatContext, nullptr); ++ //it always returns -1 so just ignore it ++ //if (ret < 0) ++ //{ ++ // CLog::Log(LOGDEBUG, "CDVDInputStreamBluray::OpenMVCDemuxer(): avformat_find_stream_info failed (%d)", ret); ++ // Dispose(); ++ // return false; ++ //} ++ ++ // print some extra information ++ av_dump_format(m_pFormatContext, 0, m_pInput->GetFileName().c_str(), 0); ++ ++ // Find and select our MVC stream ++ CLog::Log(LOGDEBUG, "%s: MVC m2ts has %d streams", __FUNCTION__, m_pFormatContext->nb_streams); ++ for (unsigned i = 0; i < m_pFormatContext->nb_streams; i++) ++ { ++ if (m_pFormatContext->streams[i]->codec->codec_id == AV_CODEC_ID_H264_MVC ++ && m_pFormatContext->streams[i]->codec->extradata_size > 0) ++ { ++ m_nStreamIndex = i; ++ break; ++ } ++ else ++ m_pFormatContext->streams[i]->discard = AVDISCARD_ALL; ++ } ++ ++ if (m_nStreamIndex < 0) ++ { ++ CLog::Log(LOGDEBUG, "%s: MVC Stream not found", __FUNCTION__); ++ Dispose(); ++ return false; ++ } ++ ++ return true; ++} ++ ++void CDVDDemuxMVC::Reset() ++{ ++ CDVDInputStream* pInput = m_pInput; ++ Dispose(); ++ Open(pInput); ++} ++ ++void CDVDDemuxMVC::Abort() ++{ ++} ++ ++void CDVDDemuxMVC::Flush() ++{ ++ if (m_pFormatContext) ++ avformat_flush(m_pFormatContext); ++} ++ ++DemuxPacket* CDVDDemuxMVC::Read() ++{ ++ int ret; ++ AVPacket mvcPacket = { 0 }; ++ av_init_packet(&mvcPacket); ++ ++ while (true) ++ { ++ ret = av_read_frame(m_pFormatContext, &mvcPacket); ++ ++ if (ret == AVERROR(EINTR) || ret == AVERROR(EAGAIN)) ++ continue; ++ else if (ret == AVERROR_EOF) ++ break; ++ else if (mvcPacket.size <= 0 || mvcPacket.stream_index != m_nStreamIndex) ++ { ++ av_packet_unref(&mvcPacket); ++ continue; ++ } ++ else ++ { ++ AVStream *stream = m_pFormatContext->streams[mvcPacket.stream_index]; ++ double dts = ConvertTimestamp(mvcPacket.dts, stream->time_base.den, stream->time_base.num); ++ double pts = ConvertTimestamp(mvcPacket.pts, stream->time_base.den, stream->time_base.num); ++ ++ DemuxPacket* newPkt = CDVDDemuxUtils::AllocateDemuxPacket(mvcPacket.size); ++ if (mvcPacket.data) ++ memcpy(newPkt->pData, mvcPacket.data, mvcPacket.size); ++ newPkt->iSize = mvcPacket.size; ++ newPkt->dts = dts; ++ newPkt->pts = pts; ++ newPkt->iStreamId = stream->id; ++ ++ av_packet_unref(&mvcPacket); ++ return newPkt; ++ } ++ } ++ ++ return nullptr; ++} ++ ++bool CDVDDemuxMVC::SeekTime(int time, bool backwords, double* startpts) ++{ ++ if (!m_pInput) ++ return false; ++ ++ AVRational time_base = m_pFormatContext->streams[m_nStreamIndex]->time_base; ++ int64_t seek_pts = av_rescale(DVD_MSEC_TO_TIME(time), time_base.den, (int64_t)time_base.num * AV_TIME_BASE); ++ int64_t starttime = 0; ++ ++ if (m_pFormatContext->start_time != (int64_t)AV_NOPTS_VALUE) ++ starttime = av_rescale(m_pFormatContext->start_time, time_base.den, (int64_t)time_base.num * AV_TIME_BASE); ++ if (starttime != 0) ++ seek_pts += starttime; ++ if (seek_pts < MVC_SEEK_TIME_WINDOW) ++ seek_pts = 0; ++ else ++ seek_pts -= MVC_SEEK_TIME_WINDOW; ++ ++ av_seek_frame(m_pFormatContext, m_nStreamIndex, seek_pts, AVSEEK_FLAG_BACKWARD); ++ return true; ++} ++ ++std::string CDVDDemuxMVC::GetFileName() ++{ ++ return m_pInput->GetFileName(); ++} ++ ++AVStream* CDVDDemuxMVC::GetAVStream() ++{ ++ return m_pFormatContext ? m_pFormatContext->streams[m_nStreamIndex] : nullptr; ++} ++ ++void CDVDDemuxMVC::Dispose() ++{ ++ if (m_pFormatContext) ++ avformat_close_input(&m_pFormatContext); ++ ++ if (m_ioContext) ++ { ++ av_free(m_ioContext->buffer); ++ av_free(m_ioContext); ++ } ++ ++ m_ioContext = nullptr; ++ m_pFormatContext = nullptr; ++ m_pInput = nullptr; ++ m_nStreamIndex = -1; ++} ++ ++double CDVDDemuxMVC::ConvertTimestamp(int64_t pts, int den, int num) ++{ ++ if (pts == (int64_t)AV_NOPTS_VALUE) ++ return DVD_NOPTS_VALUE; ++ ++ // do calculations in floats as they can easily overflow otherwise ++ // we don't care for having a completly exact timestamp anyway ++ double timestamp = (double)pts * num / den; ++ double starttime = 0.0f; ++ ++ /*if (m_MVCFormatContext->start_time != (int64_t)AV_NOPTS_VALUE) ++ starttime = (double)m_MVCFormatContext->start_time / AV_TIME_BASE;*/ ++ ++ if (timestamp > starttime) ++ timestamp -= starttime; ++ // allow for largest possible difference in pts and dts for a single packet ++ else if (timestamp + 0.5f > starttime) ++ timestamp = 0; ++ ++ return timestamp * DVD_TIME_BASE; ++} ++ ++std::vector<CDemuxStream*> CDVDDemuxMVC::GetStreams() const ++{ ++ std::vector<CDemuxStream*> streams; ++ return streams; ++} +\ No newline at end of file +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.h b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.h +new file mode 100644 +index 0000000000000000000000000000000000000000..284358f282ed3d708be5929e6b04d6f49782079d +--- /dev/null ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxMVC.h +@@ -0,0 +1,57 @@ ++#pragma once ++ ++/* ++* Copyright (C) 2005-2013 Team XBMC ++* http://xbmc.org ++* ++* This Program is free software; you can redistribute it and/or modify ++* it under the terms of the GNU General Public License as published by ++* the Free Software Foundation; either version 2, or (at your option) ++* any later version. ++* ++* This Program is distributed in the hope that it will be useful, ++* but WITHOUT ANY WARRANTY; without even the implied warranty of ++* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++* GNU General Public License for more details. ++* ++* You should have received a copy of the GNU General Public License ++* along with XBMC; see the file COPYING. If not, see ++* <http://www.gnu.org/licenses/>. ++* ++*/ ++ ++#include "DVDDemux.h" ++ ++extern "C" { ++#include "libavformat/avformat.h" ++} ++ ++class CDVDDemuxMVC : public CDVDDemux ++{ ++public: ++ CDVDDemuxMVC(); ++ virtual ~CDVDDemuxMVC(); ++ bool Open(CDVDInputStream* pInput); ++ virtual void Reset(); ++ virtual void Abort(); ++ virtual void Flush(); ++ virtual DemuxPacket* Read(); ++ virtual bool SeekTime(int time, bool backwords = false, double* startpts = nullptr); ++ virtual void SetSpeed(int iSpeed) { }; ++ virtual int GetStreamLength() { return 0; }; ++ virtual CDemuxStream* GetStream(int iStreamId) const override { return nullptr; }; ++ virtual std::vector<CDemuxStream*> GetStreams() const override; ++ virtual int GetNrOfStreams() const override { return 1; }; ++ virtual std::string GetFileName(); ++ ++ AVStream* GetAVStream(); ++ CDVDInputStream* m_pInput; ++ ++private: ++ void Dispose(); ++ double ConvertTimestamp(int64_t pts, int den, int num); ++ ++ AVIOContext *m_ioContext = nullptr; ++ AVFormatContext *m_pFormatContext = nullptr; ++ int m_nStreamIndex = -1; ++}; +\ No newline at end of file +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp +index e99352a90f348a95673ef3442d3f6cb020cd57d4..e390529bb8602af04c3853337821123546fb098d 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.cpp +@@ -19,11 +19,13 @@ + */ + + #include "DVDDemuxStreamSSIF.h" ++#include "DVDDemux.h" + #include "DVDClock.h" + #include "DVDDemuxUtils.h" + #include "utils/log.h" + + //#define DEBUG_VERBOSE ++#define MVC_QUEUE_SIZE 100 + + DemuxPacket* CDVDDemuxStreamSSIF::AddPacket(DemuxPacket* &srcPkt) + { +@@ -37,7 +39,7 @@ DemuxPacket* CDVDDemuxStreamSSIF::AddPacket(DemuxPacket* &srcPkt) + } + else if (srcPkt->iStreamId == m_mvcStreamId) + { +- m_MVCqueue.push(srcPkt); ++ AddMVCExtPacket(srcPkt); + } + + return GetMVCPacket(); +@@ -81,6 +83,10 @@ DemuxPacket* CDVDDemuxStreamSSIF::MergePacket(DemuxPacket* &srcPkt, DemuxPacket* + + DemuxPacket* CDVDDemuxStreamSSIF::GetMVCPacket() + { ++ // if input is a bluray fill mvc queue before processing ++ if (m_bluRay && m_MVCqueue.empty() && !m_H264queue.empty()) ++ FillMVCQueue(m_H264queue.front()->dts); ++ + // Here, we recreate a h264 MVC packet from the base one + buffered MVC NALU's + while (!m_H264queue.empty() && !m_MVCqueue.empty()) + { +@@ -151,6 +157,36 @@ DemuxPacket* CDVDDemuxStreamSSIF::GetMVCPacket() + #if defined(DEBUG_VERBOSE) + CLog::Log(LOGDEBUG, ">>> MVC waiting. MVC(%d) H264(%d)", m_MVCqueue.size(), m_H264queue.size()); + #endif +- + return CDVDDemuxUtils::AllocateDemuxPacket(0); + } ++ ++void CDVDDemuxStreamSSIF::AddMVCExtPacket(DemuxPacket* &mvcExtPkt) ++{ ++ m_MVCqueue.push(mvcExtPkt); ++} ++ ++bool CDVDDemuxStreamSSIF::FillMVCQueue(double dtsBase) ++{ ++ if (!m_bluRay) ++ return false; ++ ++ CDVDDemux* demux = m_bluRay->GetDemuxMVC(); ++ DemuxPacket* mvc; ++ while ((m_MVCqueue.size() < MVC_QUEUE_SIZE) && (mvc = demux->Read())) ++ { ++ if (dtsBase == DVD_NOPTS_VALUE || mvc->dts == DVD_NOPTS_VALUE) ++ { ++ // do nothing, can't compare timestamps when they are not set ++ } ++ else if (mvc->dts < dtsBase) ++ { ++#if defined(DEBUG_VERBOSE) ++ CLog::Log(LOGDEBUG, ">>> MVC discard mvc: %6d, pts(%.3f) dts(%.3f)", mvc->iSize, mvc->pts*1e-6, mvc->dts*1e-6); ++#endif ++ CDVDDemuxUtils::FreeDemuxPacket(mvc); ++ continue; ++ } ++ AddMVCExtPacket(mvc); ++ }; ++ return m_MVCqueue.size() == MVC_QUEUE_SIZE; ++} +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h +index 8412627a3ea13f59bd2c96c23bd386e4b5b2658e..579c382dca988b0c37e5da7396803f70d4cf15ff 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxStreamSSIF.h +@@ -21,6 +21,7 @@ + */ + + #include "DVDDemuxPacket.h" ++#include "DVDInputStreams/DVDInputStreamBluray.h" + #include <queue> + + extern "C" { +@@ -37,13 +38,20 @@ public: + void Flush(); + void SetH264StreamId(int id) { m_h264StreamId = id; }; + void SetMVCStreamId(int id) { m_mvcStreamId = id; }; ++ int GetH264StreamId() { return m_h264StreamId; }; ++ int GetMVCStreamId() { return m_mvcStreamId; }; ++ void AddMVCExtPacket(DemuxPacket* &scrPkt); ++ void SetBluRay(CDVDInputStreamBluray* &bluRay) { m_bluRay = bluRay; }; ++ bool IsBluRay() { return m_bluRay != nullptr; }; + + private: + DemuxPacket* GetMVCPacket(); + DemuxPacket* MergePacket(DemuxPacket* &srcPkt, DemuxPacket* &appendPkt); ++ bool FillMVCQueue(double dtsBase); + ++ CDVDInputStreamBluray* m_bluRay = nullptr; + std::queue<DemuxPacket*> m_H264queue; + std::queue<DemuxPacket*> m_MVCqueue; +- int m_h264StreamId = 0; +- int m_mvcStreamId = 0; ++ int m_h264StreamId = -1; ++ int m_mvcStreamId = -1; + }; +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in b/xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in +index f3b717ddabb4729fe0db5ebab5a7913b8fe8297c..80ceeeaea6f061ec0b82f95d1edf7c572960397c 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/Makefile.in +@@ -11,6 +11,7 @@ SRCS += DVDDemuxVobsub.cpp + SRCS += DVDDemuxCC.cpp + SRCS += DVDFactoryDemuxer.cpp + SRCS += DVDDemuxStreamSSIF.cpp ++SRCS += DVDDemuxMVC.cpp + + LIB = DVDDemuxers.a + +diff --git a/xbmc/cores/VideoPlayer/DVDInputStreams/DVDInputStreamBluray.cpp b/xbmc/cores/VideoPlayer/DVDInputStreams/DVDInputStreamBluray.cpp +index 6ccd9a8c00fdc5175df3ecbb3a5d30dc93e319ab..5e85db23b09c920c4b19f3e7cae6e3f8ccae2db9 100644 +--- a/xbmc/cores/VideoPlayer/DVDInputStreams/DVDInputStreamBluray.cpp ++++ b/xbmc/cores/VideoPlayer/DVDInputStreams/DVDInputStreamBluray.cpp +@@ -26,6 +26,8 @@ + #include "IVideoPlayer.h" + #include "DVDCodecs/Overlay/DVDOverlay.h" + #include "DVDCodecs/Overlay/DVDOverlayImage.h" ++#include "DVDInputStreamFile.h" ++#include "DVDDemuxers/DVDDemuxMVC.h" + #include "settings/Settings.h" + #include "LangInfo.h" + #include "utils/log.h" +@@ -231,10 +233,8 @@ bool CDVDInputStreamBluray::IsEOF() + + BLURAY_TITLE_INFO* CDVDInputStreamBluray::GetTitleLongest() + { +- int titles = m_dll->bd_get_titles(m_bd, TITLES_RELEVANT, 0); +- + BLURAY_TITLE_INFO *s = NULL; +- for(int i=0; i < titles; i++) ++ for(int i=0; i < m_nTitles; i++) + { + BLURAY_TITLE_INFO *t = m_dll->bd_get_title_info(m_bd, i, 0); + if(!t) +@@ -326,6 +326,7 @@ bool CDVDInputStreamBluray::Open() + return false; + } + ++ m_root = root; + const BLURAY_DISC_INFO *disc_info; + + disc_info = m_dll->bd_get_disc_info(m_bd); +@@ -349,6 +350,7 @@ bool CDVDInputStreamBluray::Open() + CLog::Log(LOGDEBUG, "CDVDInputStreamBluray::Open - BD+ detected : %d", disc_info->bdplus_detected); + CLog::Log(LOGDEBUG, "CDVDInputStreamBluray::Open - libbdplus detected : %d", disc_info->libbdplus_detected); + CLog::Log(LOGDEBUG, "CDVDInputStreamBluray::Open - BD+ handled : %d", disc_info->bdplus_handled); ++ CLog::Log(LOGDEBUG, "CDVDInputStreamBluray::Open - 3D content exist : %d", disc_info->content_exist_3D); + } + else + CLog::Log(LOGERROR, "CDVDInputStreamBluray::Open - BluRay not detected"); +@@ -365,6 +367,7 @@ bool CDVDInputStreamBluray::Open() + return false; + } + ++ m_nTitles = m_dll->bd_get_titles(m_bd, TITLES_RELEVANT, 0); + int mode = CSettings::GetInstance().GetInt(CSettings::SETTING_DISC_PLAYBACK); + + if (URIUtils::HasExtension(filename, ".mpls")) +@@ -393,18 +396,17 @@ bool CDVDInputStreamBluray::Open() + m_title = GetTitleLongest(); + } + +- if(m_navmode) ++ SetupPlayerSettings(); ++ m_dll->bd_get_event(m_bd, NULL); ++ ++ if (m_navmode) + { +- SetupPlayerSettings(); + + m_dll->bd_register_overlay_proc (m_bd, this, bluray_overlay_cb); + #ifdef HAVE_LIBBLURAY_BDJ + m_dll->bd_register_argb_overlay_proc (m_bd, this, bluray_overlay_argb_cb, NULL); + #endif + +- m_dll->bd_get_event(m_bd, NULL); +- +- + if(m_dll->bd_play(m_bd) <= 0) + { + CLog::Log(LOGERROR, "CDVDInputStreamBluray::Open - failed play disk %s", strPath.c_str()); +@@ -419,21 +421,25 @@ bool CDVDInputStreamBluray::Open() + CLog::Log(LOGERROR, "CDVDInputStreamBluray::Open - failed to get title info"); + return false; + } +- +- if(m_dll->bd_select_playlist(m_bd, m_title->playlist) == 0 ) ++ if (m_dll->bd_select_playlist(m_bd, m_title->playlist) == 0) + { + CLog::Log(LOGERROR, "CDVDInputStreamBluray::Open - failed to select title %d", m_title->idx); + return false; + } +- m_clip = 0; + } + ++ // Process any events that occured during opening ++ while (m_dll->bd_get_event(m_bd, &m_event)) ++ ProcessEvent(); ++ + return true; + } + + // close file and reset everyting + void CDVDInputStreamBluray::Close() + { ++ CloseMVCDemux(); ++ + if (!m_dll) + return; + if(m_title) +@@ -449,7 +455,7 @@ void CDVDInputStreamBluray::Close() + + void CDVDInputStreamBluray::ProcessEvent() { + +- int pid = -1; ++ int pid = -1, ret; + switch (m_event.event) { + + case BD_EVENT_ERROR: +@@ -514,15 +520,17 @@ void CDVDInputStreamBluray::ProcessEvent() { + CLog::Log(LOGDEBUG, "CDVDInputStreamBluray - BD_EVENT_PLAYLIST %d", + m_event.param); + m_playlist = m_event.param; +- if(m_title) +- m_dll->bd_free_title_info(m_title); +- m_title = m_dll->bd_get_playlist_info(m_bd, m_playlist, m_angle); ++ ProcessItem(m_playlist); + break; + + case BD_EVENT_PLAYITEM: + CLog::Log(LOGDEBUG, "CDVDInputStreamBluray - BD_EVENT_PLAYITEM %d", + m_event.param); + m_clip = m_event.param; ++ uint64_t clip_start, clip_in, bytepos; ++ ret = m_dll->bd_get_clip_infos(m_bd, m_clip, &clip_start, &clip_in, &bytepos, nullptr); ++ if (ret) ++ m_clipStartTime = clip_start / 90; + break; + + case BD_EVENT_CHAPTER: +@@ -601,14 +609,20 @@ void CDVDInputStreamBluray::ProcessEvent() { + + /* event has been consumed */ + m_event.event = BD_EVENT_NONE; ++ ++ if (m_bMVCPlayback && m_clip >= 0 && m_title && m_clip < m_title->clip_count && m_nMVCClip != m_clip) ++ { ++ CloseMVCDemux(); ++ OpenMVCDemux(m_clip); ++ } + } + + int CDVDInputStreamBluray::Read(uint8_t* buf, int buf_size) + { ++ int result = 0; + m_dispTimeBeforeRead = (int)(m_dll->bd_tell_time(m_bd) / 90); + if(m_navmode) + { +- int result = 0; + do { + + if(m_hold == HOLD_HELD) +@@ -658,10 +672,14 @@ int CDVDInputStreamBluray::Read(uint8_t* buf, int buf_size) + + } while(result == 0); + +- return result; + } + else +- return m_dll->bd_read(m_bd, buf, buf_size); ++ { ++ result = m_dll->bd_read(m_bd, buf, buf_size); ++ while (m_dll->bd_get_event(m_bd, &m_event)) ++ ProcessEvent(); ++ } ++ return result; + } + + static uint8_t clamp(double v) +@@ -909,8 +927,12 @@ bool CDVDInputStreamBluray::PosTime(int ms) + { + if(m_dll->bd_seek_time(m_bd, ms * 90) < 0) + return false; +- else +- return true; ++ ++ while (m_dll->bd_get_event(m_bd, &m_event)) ++ ProcessEvent(); ++ ++ SeekMVCDemux(ms - m_clipStartTime); ++ return true; + } + + int CDVDInputStreamBluray::GetChapterCount() +@@ -933,8 +955,12 @@ bool CDVDInputStreamBluray::SeekChapter(int ch) + { + if(m_title && m_dll->bd_seek_chapter(m_bd, ch-1) < 0) + return false; +- else +- return true; ++ ++ while (m_dll->bd_get_event(m_bd, &m_event)) ++ ProcessEvent(); ++ ++ SeekMVCDemux(GetChapterPos(ch) * 1000 - m_clipStartTime); ++ return true; + } + + int64_t CDVDInputStreamBluray::GetChapterPos(int ch) +@@ -1132,6 +1158,95 @@ bool CDVDInputStreamBluray::HasMenu() + return m_navmode; + } + ++bool CDVDInputStreamBluray::ProcessItem(int playitem) ++{ ++ if (m_title) ++ m_dll->bd_free_title_info(m_title); ++ ++ m_title = m_dll->bd_get_playlist_info(m_bd, playitem, m_angle); ++ ++ if (CSettings::GetInstance().GetBool("videoplayer.supportmvc")) ++ { ++ MPLS_PL * mpls = m_dll->bd_get_title_mpls(m_bd); ++ if (mpls) ++ { ++ for (int i = 0; i < mpls->ext_sub_count; i++) ++ { ++ if (mpls->ext_sub_path[i].type == 8 ++ && mpls->ext_sub_path[i].sub_playitem_count == mpls->list_count) ++ { ++ CLog::Log(LOGDEBUG, "CDVDInputStreamBluray - Enabling BD3D MVC demuxing"); ++ CLog::Log(LOGDEBUG, "CDVDInputStreamBluray - MVC_Base_view_R_flag: %d", m_title->mvc_base_view_r_flag); ++ m_bMVCPlayback = true; ++ m_nMVCSubPathIndex = i; ++ m_bFlipEyes = m_title->mvc_base_view_r_flag != 0; ++ break; ++ } ++ } ++ } ++ } ++ CloseMVCDemux(); ++ return true; ++} ++ ++bool CDVDInputStreamBluray::OpenMVCDemux(int playItem) ++{ ++ MPLS_PL *pl = m_dll->bd_get_title_mpls(m_bd); ++ if (!pl) ++ return false; ++ ++ std::string strFileName; ++ strFileName.append(m_root); ++ strFileName.append("/BDMV/STREAM/"); ++ strFileName.append(pl->ext_sub_path[m_nMVCSubPathIndex].sub_play_item[playItem].clip->clip_id); ++ strFileName.append(".m2ts"); ++ ++ CLog::Log(LOGDEBUG, "CDVDInputStreamBluray::OpenMVCDemuxer(): Opening MVC extension stream at %s", strFileName.c_str()); ++ ++ CFileItem fileitem(CURL(strFileName), false); ++ m_pMVCInput = new CDVDInputStreamFile(fileitem); ++ ++ // Try to open the MVC stream ++ if (!m_pMVCInput->Open()) ++ { ++ CloseMVCDemux(); ++ m_bMVCPlayback = false; ++ return false; ++ } ++ ++ if (m_pMVCDemux) ++ SAFE_DELETE(m_pMVCDemux); ++ ++ CDVDDemuxMVC* pMVCDemux = new CDVDDemuxMVC; ++ m_pMVCDemux = pMVCDemux; ++ ++ if (!pMVCDemux->Open(m_pMVCInput)) ++ { ++ CloseMVCDemux(); ++ m_bMVCPlayback = false; ++ return false; ++ } ++ ++ m_nMVCClip = playItem; ++ return true; ++} ++ ++bool CDVDInputStreamBluray::CloseMVCDemux() ++{ ++ if (m_pMVCDemux) ++ SAFE_DELETE(m_pMVCDemux); ++ ++ SAFE_DELETE(m_pMVCInput); ++ m_nMVCClip = -1; ++ return true; ++} ++ ++void CDVDInputStreamBluray::SeekMVCDemux(int64_t time) ++{ ++ if (m_bMVCPlayback && m_pMVCDemux) ++ m_pMVCDemux->SeekTime(time); ++} ++ + void CDVDInputStreamBluray::SetupPlayerSettings() + { + int region = CSettings::GetInstance().GetInt(CSettings::SETTING_BLURAY_PLAYERREGION); +diff --git a/xbmc/cores/VideoPlayer/DVDInputStreams/DVDInputStreamBluray.h b/xbmc/cores/VideoPlayer/DVDInputStreams/DVDInputStreamBluray.h +index b967a85e6557e42a7f1235cdd804d5a0263b866f..561fb5cd4f971bc9ee4f41218a60bb3d5bc5625f 100644 +--- a/xbmc/cores/VideoPlayer/DVDInputStreams/DVDInputStreamBluray.h ++++ b/xbmc/cores/VideoPlayer/DVDInputStreams/DVDInputStreamBluray.h +@@ -38,6 +38,7 @@ extern "C" + class CDVDOverlayImage; + class DllLibbluray; + class IVideoPlayer; ++class CDVDDemux; + + class CDVDInputStreamBluray + : public CDVDInputStream +@@ -119,6 +120,9 @@ public: + BLURAY_TITLE_INFO* GetTitleFile(const std::string& name); + + void ProcessEvent(); ++ CDVDDemux* GetDemuxMVC() { return m_pMVCDemux; }; ++ bool HasMVC() { return m_bMVCPlayback; } ++ bool AreEyesFlipped() { return m_bFlipEyes; } + + protected: + struct SPlane; +@@ -127,6 +131,11 @@ protected: + void OverlayClose(); + static void OverlayClear(SPlane& plane, int x, int y, int w, int h); + static void OverlayInit (SPlane& plane, int w, int h); ++ bool ProcessItem(int playitem); ++ ++ bool OpenMVCDemux(int playItem); ++ bool CloseMVCDemux(); ++ void SeekMVCDemux(int64_t time); + + IVideoPlayer* m_player; + DllLibbluray* m_dll; +@@ -138,6 +147,17 @@ protected: + bool m_menu; + bool m_navmode; + int m_dispTimeBeforeRead; ++ int m_nTitles = -1; ++ std::string m_root; ++ ++ // MVC related members ++ CDVDDemux* m_pMVCDemux = nullptr; ++ CDVDInputStream *m_pMVCInput = nullptr; ++ bool m_bMVCPlayback = false; ++ int m_nMVCSubPathIndex = 0; ++ int m_nMVCClip = -1; ++ bool m_bFlipEyes = false; ++ uint64_t m_clipStartTime = 0; + + typedef std::shared_ptr<CDVDOverlayImage> SOverlay; + typedef std::list<SOverlay> SOverlays; + +From 9209fd1862041094e9f01e17c377c6d50c37ebb0 Mon Sep 17 00:00:00 2001 +From: Anton Fedchin <afedchin@ruswizards.com> +Date: Wed, 2 Mar 2016 23:31:50 +0300 +Subject: [PATCH 46/67] [BaseRenderer] Fix aspect for TAB/SBS (need more + testing) + +--- + xbmc/cores/VideoPlayer/VideoRenderers/BaseRenderer.cpp | 18 ++++++++++++++++++ + 1 file changed, 18 insertions(+) + +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/BaseRenderer.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/BaseRenderer.cpp +index f18c671d90c85eed1ca4bd52028d7e5074a1312a..5c6f7453c2b3fd1155c18af8d37cb3d4fa9de1c6 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/BaseRenderer.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/BaseRenderer.cpp +@@ -35,6 +35,9 @@ + #include "settings/AdvancedSettings.h" + #include "cores/VideoPlayer/VideoRenderers/RenderFlags.h" + ++extern "C" { ++#include "libavformat/version.h" ++} + + CBaseRenderer::CBaseRenderer() + { +@@ -369,6 +372,21 @@ void CBaseRenderer::CalculateFrameAspectRatio(unsigned int desired_width, unsign + if (m_sourceHeight == 576) // PAL + m_sourceFrameRatio = imageFrameRatio * PALPixelRatio * Non4by3Correction; + } ++#if (LIBAVFORMAT_VERSION_MAJOR >= 57) ++ bool isAnamorph = m_sourceWidth <= 1920 && m_sourceHeight <= 1080; ++ float factor = isAnamorph ? 2.0f : 4.0f; ++ switch (CONF_FLAGS_STEREO_MODE_MASK(m_iFlags)) ++ { ++ case CONF_FLAGS_STEREO_MODE_TAB: ++ m_sourceFrameRatio *= factor; ++ break; ++ case CONF_FLAGS_STEREO_MODE_SBS: ++ m_sourceFrameRatio /= factor; ++ break; ++ default: ++ break; ++ } ++#endif + } + + void CBaseRenderer::ManageRenderArea() + +From 7aa4746fe6adef77e5ff99b60d242a575fff583c Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Mon, 29 Feb 2016 17:00:50 +0000 +Subject: [PATCH 47/67] libbluray: Bump to Nevcairie's v0.9.2 + +This includes 3D support +--- + tools/depends/target/libbluray/Makefile | 1 + + .../libbluray/bump_to_Nevcairie_v0.9.2.patch | 24397 +++++++++++++++++++ + 2 files changed, 24398 insertions(+) + create mode 100644 tools/depends/target/libbluray/bump_to_Nevcairie_v0.9.2.patch + +diff --git a/tools/depends/target/libbluray/Makefile b/tools/depends/target/libbluray/Makefile +index 3c85b96ca38409fec6de87cb30162b725ce170db..d8fa16ed83ea997c8b3cf34ee83383e830986197 100644 +--- a/tools/depends/target/libbluray/Makefile ++++ b/tools/depends/target/libbluray/Makefile +@@ -27,6 +27,7 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + ifeq ($(OS),android) + cd $(PLATFORM); patch -p1 < ../android.patch + endif ++ cd $(PLATFORM); patch -p1 < ../bump_to_Nevcairie_v0.9.2.patch + cd $(PLATFORM); ./bootstrap + cd $(PLATFORM); $(CONFIGURE) + +diff --git a/tools/depends/target/libbluray/bump_to_Nevcairie_v0.9.2.patch b/tools/depends/target/libbluray/bump_to_Nevcairie_v0.9.2.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..5884d91590f80927cc5138fdd0ed41072c65354a +--- /dev/null ++++ b/tools/depends/target/libbluray/bump_to_Nevcairie_v0.9.2.patch +@@ -0,0 +1,24397 @@ ++diff --git a/ChangeLog b/ChangeLog ++index ffc7788..545fb3f 100644 ++--- a/ChangeLog +++++ b/ChangeLog ++@@ -1,3 +1,29 @@ +++2015-12-01: Version 0.9.2 +++- Add primary audio stream to bd_select_stream(). +++- Improve error resilience. +++- Fix Java 8 compability issues. +++- Fix Android build. +++- Fix SecurityException in AWTAutoShutdown. +++- Fix BD-J check when install path in Windows contains non-ASCII chars. +++- Fix jvm.dll loading in Windows ($JAVA_HOME/bin should be in dll load path). +++- Fix class translating in recent Java 8 versions. +++ +++2015-11-03: Version 0.9.1 +++- Improved BD-J security. +++- Improved error resilience. +++- Improved seeking (avoid skipping PAT/PMT/PCR). +++- Fix UO mask check when bd_play_title() is used for Top Menu. +++- Fix re-starting of title bound Xlets when title changes. +++- Fix loading classes with invalid debug info. +++ +++2015-10-02: Version 0.9.0 +++- Add functions to read files from VFS. +++- Improved error resilience. +++- Improved BD-J compability. +++- Fix Xlet-initiated font caching. +++- Fix return value when setting BLURAY_PLAYER_SETTING_DECODE_PG. +++- Fix build with C++ compiler +++ ++ 2015-05-15: Version 0.8.1 ++ - Notify application when UO mask changes. ++ - Improved error resilience. ++diff --git a/Makefile.am b/Makefile.am ++index e03e926..87093c4 100644 ++--- a/Makefile.am +++++ b/Makefile.am ++@@ -26,7 +26,8 @@ EXTRA_DIST = \ ++ src/libbluray/bdj/build.xml \ ++ src/libbluray/bdj/java \ ++ src/libbluray/bdj/java-j2me \ ++- src/libbluray/bdj/java-j2se +++ src/libbluray/bdj/java-j2se \ +++ contrib/asm ++ ++ lib_LTLIBRARIES=libbluray.la ++ libbluray_la_SOURCES = \ ++@@ -149,7 +150,7 @@ libbluray_la_SOURCES+= \ ++ endif ++ endif ++ ++-libbluray_la_LDFLAGS= -version-info $(LT_VERSION_INFO) -export-symbols-regex "^bd_" +++libbluray_la_LDFLAGS= -no-undefined -version-info $(LT_VERSION_INFO) -export-symbols-regex "^bd_" ++ libbluray_la_LIBADD= $(LIBXML2_LIBS) $(FT2_LIBS) $(FONTCONFIG_LIBS) ++ ++ noinst_HEADERS = \ ++@@ -158,6 +159,15 @@ noinst_HEADERS = \ ++ jni/win32/jni_md.h \ ++ jni/darwin/jni_md.h ++ +++ +++bdnavdir=$(pkgincludedir)/bdnav +++bdnav_HEADERS = \ +++ src/libbluray/bdnav/clpi_data.h +++ +++utildir=$(pkgincludedir)/../util +++util_HEADERS = \ +++ src/util/attributes.h +++ ++ pkginclude_HEADERS = \ ++ src/file/filesystem.h \ ++ src/libbluray/bluray.h \ ++@@ -165,6 +175,9 @@ pkginclude_HEADERS = \ ++ src/libbluray/keys.h \ ++ src/libbluray/player_settings.h \ ++ src/libbluray/bdnav/clpi_data.h \ +++ src/libbluray/bdnav/clpi_parse.h \ +++ src/libbluray/bdnav/mpls_parse.h \ +++ src/libbluray/bdnav/uo_mask_table.h \ ++ src/libbluray/bdnav/meta_data.h \ ++ src/libbluray/decoders/overlay.h \ ++ src/util/log_control.h ++@@ -185,10 +198,12 @@ endif ++ ++ ++ if USING_BDJAVA +++if USING_BDJAVA_BUILD_JAR ++ jardir=$(datadir)/java/ ++ jar_DATA=$(top_builddir)/.libs/libbluray-$(BDJ_TYPE)-$(VERSION).jar ++ ++ $(top_builddir)/.libs/libbluray-$(BDJ_TYPE)-$(VERSION).jar: all-local +++endif ++ ++ libbluray_la_SOURCES += \ ++ src/libbluray/bdj/bdj.h \ ++@@ -213,6 +228,7 @@ libbluray_la_SOURCES += \ ++ ++ AM_CFLAGS += $(BDJAVA_CFLAGS) ++ +++if USING_BDJAVA_BUILD_JAR ++ all-local: ++ ant -f $(top_srcdir)/src/libbluray/bdj/build.xml \ ++ -Dbuild='$(abs_builddir)/src/libbluray/bdj/build' \ ++@@ -228,6 +244,7 @@ clean-local: ++ -Dversion='$(BDJ_TYPE)-$(VERSION)' \ ++ clean ++ endif +++endif ++ ++ pkgconfigdir = $(libdir)/pkgconfig ++ pkgconfig_DATA = src/libbluray.pc ++@@ -265,20 +282,20 @@ bd_info_LDADD = libbluray.la ++ bdsplice_SOURCES = src/examples/bdsplice.c ++ bdsplice_LDADD = libbluray.la ++ ++-bdj_test_SOURCES = src/examples/bdj_test.c +++bdj_test_SOURCES = src/devtools/bdj_test.c ++ bdj_test_LDADD = libbluray.la ++ ++-bdjo_dump_SOURCES = src/examples/bdjo_dump.c +++bdjo_dump_SOURCES = src/devtools/bdjo_dump.c ++ bdjo_dump_LDADD = libbluray.la ++ ++ clpi_dump_CFLAGS = $(AM_CFLAGS) ++ clpi_dump_SOURCES = \ ++- src/examples/clpi_dump.c \ ++- src/examples/util.c \ ++- src/examples/util.h +++ src/devtools/clpi_dump.c \ +++ src/devtools/util.c \ +++ src/devtools/util.h ++ clpi_dump_LDADD = libbluray.la ++ ++-hdmv_test_SOURCES = src/examples/hdmv_test.c +++hdmv_test_SOURCES = src/devtools/hdmv_test.c ++ hdmv_test_LDADD = libbluray.la ++ ++ index_dump_SOURCES = src/examples/index_dump.c ++@@ -291,15 +308,15 @@ list_titles_SOURCES = src/examples/list_titles.c ++ list_titles_LDADD = libbluray.la ++ ++ mobj_dump_CFLAGS = $(AM_CFLAGS) ++-mobj_dump_SOURCES = src/examples/mobj_dump.c \ +++mobj_dump_SOURCES = src/devtools/mobj_dump.c \ ++ src/libbluray/hdmv/mobj_print.c ++ mobj_dump_LDADD = libbluray.la ++ ++ mpls_dump_CFLAGS = $(AM_CFLAGS) ++ mpls_dump_SOURCES = \ ++- src/examples/mpls_dump.c \ ++- src/examples/util.c \ ++- src/examples/util.h +++ src/devtools/mpls_dump.c \ +++ src/devtools/util.c \ +++ src/devtools/util.h ++ mpls_dump_LDADD = libbluray.la ++ ++ sound_dump_SOURCES = src/examples/sound_dump.c ++diff --git a/bootstrap b/bootstrap ++index 872167c..bde67cb 100755 ++--- a/bootstrap +++++ b/bootstrap ++@@ -1,3 +1,7 @@ ++ #!/bin/sh ++ +++set -e +++ +++cd "$(dirname "$0")" +++ ++ autoreconf -vif ++diff --git a/config.h b/config.h ++new file mode 100644 ++index 0000000..6764704 ++--- /dev/null +++++ b/config.h ++@@ -0,0 +1,157 @@ +++/* config.h. Generated from config.h.in by configure. */ +++/* config.h.in. Generated from configure.ac by autoheader. */ +++ +++/* Define to 1 if libudfread is to be used for disc image access */ +++/* #undef ENABLE_UDF */ +++ +++/* Define to 1 if using libbluray J2ME stack */ +++/* #undef HAVE_BDJ_J2ME */ +++ +++/* Define to 1 if you have the <dirent.h> header file, and it defines `DIR'. +++ */ +++/* #undef HAVE_DIRENT_H */ +++ +++/* Define to 1 if you have the <dlfcn.h> header file. */ +++/* #undef HAVE_DLFCN_H */ +++ +++/* Define to 1 if you have the <errno.h> header file. */ +++#define HAVE_ERRNO_H 1 +++ +++/* Define to 1 if you have the <fcntl.h> header file. */ +++/* #undef HAVE_FCNTL_H */ +++ +++/* Define this if you have fontconfig library */ +++/* #undef HAVE_FONTCONFIG */ +++ +++/* Define this if you have FreeType2 library */ +++/* #undef HAVE_FT2 */ +++ +++/* Define to 1 if you have the <inttypes.h> header file. */ +++#define HAVE_INTTYPES_H 1 +++ +++/* Define to 1 if you have the <jni.h> header file. */ +++/* #undef HAVE_JNI_H */ +++ +++/* Define to 1 if you have the <libgen.h> header file. */ +++#define HAVE_LIBGEN_H 1 +++ +++/* Define to 1 if libxml2 is to be used for metadata parsing */ +++/* #undef HAVE_LIBXML2 */ +++ +++/* Define to 1 if you have the <linux/cdrom.h> header file. */ +++/* #undef HAVE_LINUX_CDROM_H */ +++ +++/* Define to 1 if you have the <malloc.h> header file. */ +++#define HAVE_MALLOC_H 1 +++ +++/* Define to 1 if you have the <memory.h> header file. */ +++#define HAVE_MEMORY_H 1 +++ +++/* Define to 1 if you have the <mntent.h> header file. */ +++/* #undef HAVE_MNTENT_H */ +++ +++/* Define to 1 if you have the <ndir.h> header file, and it defines `DIR'. */ +++/* #undef HAVE_NDIR_H */ +++ +++/* Define to 1 if you have the <pthread.h> header file. */ +++/* #undef HAVE_PTHREAD_H */ +++ +++/* Define to 1 if you have the <stdarg.h> header file. */ +++#define HAVE_STDARG_H 1 +++ +++/* Define to 1 if you have the <stdint.h> header file. */ +++#define HAVE_STDINT_H 1 +++ +++/* Define to 1 if you have the <stdlib.h> header file. */ +++#define HAVE_STDLIB_H 1 +++ +++/* Define to 1 if you have the <strings.h> header file. */ +++/* #undef HAVE_STRINGS_H */ +++ +++/* Define to 1 if you have the <string.h> header file. */ +++#define HAVE_STRING_H 1 +++ +++/* Define to 1 if `d_type' is a member of `struct dirent'. */ +++/* #undef HAVE_STRUCT_DIRENT_D_TYPE */ +++ +++/* Define to 1 if you have the <sys/dir.h> header file, and it defines `DIR'. +++ */ +++/* #undef HAVE_SYS_DIR_H */ +++ +++/* Define to 1 if you have the <sys/ndir.h> header file, and it defines `DIR'. +++ */ +++/* #undef HAVE_SYS_NDIR_H */ +++ +++/* Define to 1 if you have the <sys/stat.h> header file. */ +++#define HAVE_SYS_STAT_H 1 +++ +++/* Define to 1 if you have the <sys/time.h> header file. */ +++#define HAVE_SYS_TIME_H 1 +++ +++/* Define to 1 if you have the <sys/types.h> header file. */ +++#define HAVE_SYS_TYPES_H 1 +++ +++/* Define to 1 if you have the <time.h> header file. */ +++#define HAVE_TIME_H 1 +++ +++/* Define to 1 if you have the <unistd.h> header file. */ +++/* #undef HAVE_UNISTD_H */ +++ +++/* "Defines the architecture of the java vm." */ +++/* #undef JAVA_ARCH */ +++ +++/* "" */ +++/* #undef JDK_HOME */ +++ +++/* Define to the sub-directory where libtool stores uninstalled libraries. */ +++#define LT_OBJDIR ".libs/" +++ +++/* Name of package */ +++#define PACKAGE "libbluray" +++ +++/* Define to the address where bug reports for this package should be sent. */ +++#define PACKAGE_BUGREPORT "http://www.videolan.org/developers/libbluray.html" +++ +++/* Define to the full name of this package. */ +++#define PACKAGE_NAME "libbluray" +++ +++/* Define to the full name and version of this package. */ +++#define PACKAGE_STRING "libbluray 0.9.2" +++ +++/* Define to the one symbol short name of this package. */ +++#define PACKAGE_TARNAME "libbluray" +++ +++/* Define to the home page for this package. */ +++#define PACKAGE_URL "" +++ +++/* Define to the version of this package. */ +++#define PACKAGE_VERSION "0.9.2" +++ +++/* Define as the return type of signal handlers (`int' or `void'). */ +++#define RETSIGTYPE void +++ +++/* Define to 1 if you have the ANSI C header files. */ +++#define STDC_HEADERS 1 +++ +++/* "Define to 1 if using BD-Java" */ +++/* #undef USING_BDJAVA */ +++ +++/* Version number of package */ +++#define VERSION "0.9.2" +++ +++/* Enable large inode numbers on Mac OS X 10.5. */ +++#ifndef _DARWIN_USE_64_BIT_INODE +++# define _DARWIN_USE_64_BIT_INODE 1 +++#endif +++ +++/* Number of bits in a file offset, on hosts where this is settable. */ +++#define _FILE_OFFSET_BITS 64 +++ +++/* Define for large files, on AIX-style hosts. */ +++/* #undef _LARGE_FILES */ +++ +++/* Define to '0x0501' for IE 5.01. */ +++#define _WIN32_IE 0x0501 +++ +++/* Define to '0x0502' for Windows XP SP2 APIs. */ +++#define _WIN32_WINNT 0x0502 ++diff --git a/configure.ac b/configure.ac ++index 5d5fe2c..ed08c96 100644 ++--- a/configure.ac +++++ b/configure.ac ++@@ -1,7 +1,7 @@ ++ dnl library version number ++ m4_define([bluray_major], 0) ++-m4_define([bluray_minor], 8) ++-m4_define([bluray_micro], 1) +++m4_define([bluray_minor], 9) +++m4_define([bluray_micro], 2) ++ m4_define([bluray_version],[bluray_major.bluray_minor.bluray_micro]) ++ ++ dnl shared library version (.so version) ++@@ -12,9 +12,9 @@ dnl - If interfaces have been changed or removed, increase current and set age a ++ dnl ++ dnl Library file name will be libbluray.so.(current-age).age.revision ++ dnl ++-m4_define([lt_current], 9) ++-m4_define([lt_revision], 1) ++-m4_define([lt_age], 8) +++m4_define([lt_current], 10) +++m4_define([lt_revision], 2) +++m4_define([lt_age], 9) ++ ++ dnl initilization ++ AC_INIT([libbluray], bluray_version, [http://www.videolan.org/developers/libbluray.html]) ++@@ -87,7 +87,15 @@ AC_ARG_ENABLE([bdjava], ++ [use_bdjava=yes]) ++ ++ AC_ARG_ENABLE([udf], ++- [AS_HELP_STRING([--enable-udf], [enable UDF support @<:@default=disabled@:>@])]) +++ [AS_HELP_STRING([--disable-udf], [disable UDF support @<:@default=enabled@:>@])], +++ [enable_udf=$enableval], +++ [enable_udf=yes]) +++ +++AC_ARG_ENABLE([bdjava-jar], +++ [AS_HELP_STRING([--disable-bdjava-jar], +++ [disable building of BD-Java JAR file @<:@default=enabled@:>@])], +++ [use_bdjava_jar=$enableval], +++ [use_bdjava_jar=yes]) ++ ++ AC_ARG_WITH([libxml2], ++ [AS_HELP_STRING([--without-libxml2], [build without libxml2 support @<:@default=with@:>@])]) ++@@ -224,7 +232,7 @@ if [[ $use_bdjava = "yes" ]]; then ++ ]) ++ ++ AC_CHECK_PROG(HAVE_ANT, [ant], yes, no) ++- if test "x$HAVE_ANT" = "xno"; then +++ if test "x$use_bdjava_jar" = "xyes" && test "x$HAVE_ANT" = "xno"; then ++ AC_MSG_ERROR([BD-J requires ANT, but ant was not found. Please install it.]) ++ fi ++ ++@@ -233,6 +241,7 @@ if [[ $use_bdjava = "yes" ]]; then ++ AC_DEFINE_UNQUOTED([JDK_HOME], ["$JDK_HOME"], [""]) ++ fi ++ AM_CONDITIONAL([USING_BDJAVA], [ test $use_bdjava = "yes" ]) +++AM_CONDITIONAL([USING_BDJAVA_BUILD_JAR], [ test $use_bdjava_jar = "yes" ]) ++ ++ dnl BD-J type ++ if test "$BDJ_TYPE" = "j2me"; then ++@@ -292,6 +301,7 @@ echo " --------" ++ echo " BD-J support: $use_bdjava" ++ if [[ $use_bdjava = "yes" ]]; then ++ echo " BD-J type: $BDJ_TYPE" +++echo " build JAR: $use_bdjava_jar" ++ if test x"$BDJ_BOOTCLASSPATH" != x""; then ++ echo " BD-J bootclasspath: $BDJ_BOOTCLASSPATH" ++ fi ++diff --git a/contrib/asm/LICENSE.txt b/contrib/asm/LICENSE.txt ++new file mode 100644 ++index 0000000..4d19185 ++--- /dev/null +++++ b/contrib/asm/LICENSE.txt ++@@ -0,0 +1,28 @@ +++ +++ ASM: a very small and fast Java bytecode manipulation framework +++ Copyright (c) 2000-2011 INRIA, France Telecom +++ All rights reserved. +++ +++ Redistribution and use in source and binary forms, with or without +++ modification, are permitted provided that the following conditions +++ are met: +++ 1. Redistributions of source code must retain the above copyright +++ notice, this list of conditions and the following disclaimer. +++ 2. Redistributions in binary form must reproduce the above copyright +++ notice, this list of conditions and the following disclaimer in the +++ documentation and/or other materials provided with the distribution. +++ 3. Neither the name of the copyright holders nor the names of its +++ contributors may be used to endorse or promote products derived from +++ this software without specific prior written permission. +++ +++ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ THE POSSIBILITY OF SUCH DAMAGE. ++diff --git a/contrib/asm/SOURCE b/contrib/asm/SOURCE ++new file mode 100644 ++index 0000000..804aede ++--- /dev/null +++++ b/contrib/asm/SOURCE ++@@ -0,0 +1,9 @@ +++Core functionality from asm 5.0.4 +++ +++http://asm.ow2.org/ +++ +++ASM is an all purpose Java bytecode manipulation and analysis framework. It can be used to modify +++existing classes or dynamically generate classes, directly in binary form. Provided common +++transformations and analysis algorithms allow to easily assemble custom complex transformations +++and code analysis tools. +++ ++diff --git a/contrib/asm/src/org/objectweb/asm/AnnotationVisitor.java b/contrib/asm/src/org/objectweb/asm/AnnotationVisitor.java ++new file mode 100644 ++index 0000000..b644083 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/AnnotationVisitor.java ++@@ -0,0 +1,169 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A visitor to visit a Java annotation. The methods of this class must be +++ * called in the following order: ( <tt>visit</tt> | <tt>visitEnum</tt> | +++ * <tt>visitAnnotation</tt> | <tt>visitArray</tt> )* <tt>visitEnd</tt>. +++ * +++ * @author Eric Bruneton +++ * @author Eugene Kuleshov +++ */ +++public abstract class AnnotationVisitor { +++ +++ /** +++ * The ASM API version implemented by this visitor. The value of this field +++ * must be one of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ */ +++ protected final int api; +++ +++ /** +++ * The annotation visitor to which this visitor must delegate method calls. +++ * May be null. +++ */ +++ protected AnnotationVisitor av; +++ +++ /** +++ * Constructs a new {@link AnnotationVisitor}. +++ * +++ * @param api +++ * the ASM API version implemented by this visitor. Must be one +++ * of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ */ +++ public AnnotationVisitor(final int api) { +++ this(api, null); +++ } +++ +++ /** +++ * Constructs a new {@link AnnotationVisitor}. +++ * +++ * @param api +++ * the ASM API version implemented by this visitor. Must be one +++ * of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ * @param av +++ * the annotation visitor to which this visitor must delegate +++ * method calls. May be null. +++ */ +++ public AnnotationVisitor(final int api, final AnnotationVisitor av) { +++ if (api != Opcodes.ASM4 && api != Opcodes.ASM5) { +++ throw new IllegalArgumentException(); +++ } +++ this.api = api; +++ this.av = av; +++ } +++ +++ /** +++ * Visits a primitive value of the annotation. +++ * +++ * @param name +++ * the value name. +++ * @param value +++ * the actual value, whose type must be {@link Byte}, +++ * {@link Boolean}, {@link Character}, {@link Short}, +++ * {@link Integer} , {@link Long}, {@link Float}, {@link Double}, +++ * {@link String} or {@link Type} or OBJECT or ARRAY sort. This +++ * value can also be an array of byte, boolean, short, char, int, +++ * long, float or double values (this is equivalent to using +++ * {@link #visitArray visitArray} and visiting each array element +++ * in turn, but is more convenient). +++ */ +++ public void visit(String name, Object value) { +++ if (av != null) { +++ av.visit(name, value); +++ } +++ } +++ +++ /** +++ * Visits an enumeration value of the annotation. +++ * +++ * @param name +++ * the value name. +++ * @param desc +++ * the class descriptor of the enumeration class. +++ * @param value +++ * the actual enumeration value. +++ */ +++ public void visitEnum(String name, String desc, String value) { +++ if (av != null) { +++ av.visitEnum(name, desc, value); +++ } +++ } +++ +++ /** +++ * Visits a nested annotation value of the annotation. +++ * +++ * @param name +++ * the value name. +++ * @param desc +++ * the class descriptor of the nested annotation class. +++ * @return a visitor to visit the actual nested annotation value, or +++ * <tt>null</tt> if this visitor is not interested in visiting this +++ * nested annotation. <i>The nested annotation value must be fully +++ * visited before calling other methods on this annotation +++ * visitor</i>. +++ */ +++ public AnnotationVisitor visitAnnotation(String name, String desc) { +++ if (av != null) { +++ return av.visitAnnotation(name, desc); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits an array value of the annotation. Note that arrays of primitive +++ * types (such as byte, boolean, short, char, int, long, float or double) +++ * can be passed as value to {@link #visit visit}. This is what +++ * {@link ClassReader} does. +++ * +++ * @param name +++ * the value name. +++ * @return a visitor to visit the actual array value elements, or +++ * <tt>null</tt> if this visitor is not interested in visiting these +++ * values. The 'name' parameters passed to the methods of this +++ * visitor are ignored. <i>All the array values must be visited +++ * before calling other methods on this annotation visitor</i>. +++ */ +++ public AnnotationVisitor visitArray(String name) { +++ if (av != null) { +++ return av.visitArray(name); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits the end of the annotation. +++ */ +++ public void visitEnd() { +++ if (av != null) { +++ av.visitEnd(); +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/AnnotationWriter.java b/contrib/asm/src/org/objectweb/asm/AnnotationWriter.java ++new file mode 100644 ++index 0000000..6b95608 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/AnnotationWriter.java ++@@ -0,0 +1,371 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * An {@link AnnotationVisitor} that generates annotations in bytecode form. +++ * +++ * @author Eric Bruneton +++ * @author Eugene Kuleshov +++ */ +++final class AnnotationWriter extends AnnotationVisitor { +++ +++ /** +++ * The class writer to which this annotation must be added. +++ */ +++ private final ClassWriter cw; +++ +++ /** +++ * The number of values in this annotation. +++ */ +++ private int size; +++ +++ /** +++ * <tt>true<tt> if values are named, <tt>false</tt> otherwise. Annotation +++ * writers used for annotation default and annotation arrays use unnamed +++ * values. +++ */ +++ private final boolean named; +++ +++ /** +++ * The annotation values in bytecode form. This byte vector only contains +++ * the values themselves, i.e. the number of values must be stored as a +++ * unsigned short just before these bytes. +++ */ +++ private final ByteVector bv; +++ +++ /** +++ * The byte vector to be used to store the number of values of this +++ * annotation. See {@link #bv}. +++ */ +++ private final ByteVector parent; +++ +++ /** +++ * Where the number of values of this annotation must be stored in +++ * {@link #parent}. +++ */ +++ private final int offset; +++ +++ /** +++ * Next annotation writer. This field is used to store annotation lists. +++ */ +++ AnnotationWriter next; +++ +++ /** +++ * Previous annotation writer. This field is used to store annotation lists. +++ */ +++ AnnotationWriter prev; +++ +++ // ------------------------------------------------------------------------ +++ // Constructor +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Constructs a new {@link AnnotationWriter}. +++ * +++ * @param cw +++ * the class writer to which this annotation must be added. +++ * @param named +++ * <tt>true<tt> if values are named, <tt>false</tt> otherwise. +++ * @param bv +++ * where the annotation values must be stored. +++ * @param parent +++ * where the number of annotation values must be stored. +++ * @param offset +++ * where in <tt>parent</tt> the number of annotation values must +++ * be stored. +++ */ +++ AnnotationWriter(final ClassWriter cw, final boolean named, +++ final ByteVector bv, final ByteVector parent, final int offset) { +++ super(Opcodes.ASM5); +++ this.cw = cw; +++ this.named = named; +++ this.bv = bv; +++ this.parent = parent; +++ this.offset = offset; +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Implementation of the AnnotationVisitor abstract class +++ // ------------------------------------------------------------------------ +++ +++ @Override +++ public void visit(final String name, final Object value) { +++ ++size; +++ if (named) { +++ bv.putShort(cw.newUTF8(name)); +++ } +++ if (value instanceof String) { +++ bv.put12('s', cw.newUTF8((String) value)); +++ } else if (value instanceof Byte) { +++ bv.put12('B', cw.newInteger(((Byte) value).byteValue()).index); +++ } else if (value instanceof Boolean) { +++ int v = ((Boolean) value).booleanValue() ? 1 : 0; +++ bv.put12('Z', cw.newInteger(v).index); +++ } else if (value instanceof Character) { +++ bv.put12('C', cw.newInteger(((Character) value).charValue()).index); +++ } else if (value instanceof Short) { +++ bv.put12('S', cw.newInteger(((Short) value).shortValue()).index); +++ } else if (value instanceof Type) { +++ bv.put12('c', cw.newUTF8(((Type) value).getDescriptor())); +++ } else if (value instanceof byte[]) { +++ byte[] v = (byte[]) value; +++ bv.put12('[', v.length); +++ for (int i = 0; i < v.length; i++) { +++ bv.put12('B', cw.newInteger(v[i]).index); +++ } +++ } else if (value instanceof boolean[]) { +++ boolean[] v = (boolean[]) value; +++ bv.put12('[', v.length); +++ for (int i = 0; i < v.length; i++) { +++ bv.put12('Z', cw.newInteger(v[i] ? 1 : 0).index); +++ } +++ } else if (value instanceof short[]) { +++ short[] v = (short[]) value; +++ bv.put12('[', v.length); +++ for (int i = 0; i < v.length; i++) { +++ bv.put12('S', cw.newInteger(v[i]).index); +++ } +++ } else if (value instanceof char[]) { +++ char[] v = (char[]) value; +++ bv.put12('[', v.length); +++ for (int i = 0; i < v.length; i++) { +++ bv.put12('C', cw.newInteger(v[i]).index); +++ } +++ } else if (value instanceof int[]) { +++ int[] v = (int[]) value; +++ bv.put12('[', v.length); +++ for (int i = 0; i < v.length; i++) { +++ bv.put12('I', cw.newInteger(v[i]).index); +++ } +++ } else if (value instanceof long[]) { +++ long[] v = (long[]) value; +++ bv.put12('[', v.length); +++ for (int i = 0; i < v.length; i++) { +++ bv.put12('J', cw.newLong(v[i]).index); +++ } +++ } else if (value instanceof float[]) { +++ float[] v = (float[]) value; +++ bv.put12('[', v.length); +++ for (int i = 0; i < v.length; i++) { +++ bv.put12('F', cw.newFloat(v[i]).index); +++ } +++ } else if (value instanceof double[]) { +++ double[] v = (double[]) value; +++ bv.put12('[', v.length); +++ for (int i = 0; i < v.length; i++) { +++ bv.put12('D', cw.newDouble(v[i]).index); +++ } +++ } else { +++ Item i = cw.newConstItem(value); +++ bv.put12(".s.IFJDCS".charAt(i.type), i.index); +++ } +++ } +++ +++ @Override +++ public void visitEnum(final String name, final String desc, +++ final String value) { +++ ++size; +++ if (named) { +++ bv.putShort(cw.newUTF8(name)); +++ } +++ bv.put12('e', cw.newUTF8(desc)).putShort(cw.newUTF8(value)); +++ } +++ +++ @Override +++ public AnnotationVisitor visitAnnotation(final String name, +++ final String desc) { +++ ++size; +++ if (named) { +++ bv.putShort(cw.newUTF8(name)); +++ } +++ // write tag and type, and reserve space for values count +++ bv.put12('@', cw.newUTF8(desc)).putShort(0); +++ return new AnnotationWriter(cw, true, bv, bv, bv.length - 2); +++ } +++ +++ @Override +++ public AnnotationVisitor visitArray(final String name) { +++ ++size; +++ if (named) { +++ bv.putShort(cw.newUTF8(name)); +++ } +++ // write tag, and reserve space for array size +++ bv.put12('[', 0); +++ return new AnnotationWriter(cw, false, bv, bv, bv.length - 2); +++ } +++ +++ @Override +++ public void visitEnd() { +++ if (parent != null) { +++ byte[] data = parent.data; +++ data[offset] = (byte) (size >>> 8); +++ data[offset + 1] = (byte) size; +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Utility methods +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the size of this annotation writer list. +++ * +++ * @return the size of this annotation writer list. +++ */ +++ int getSize() { +++ int size = 0; +++ AnnotationWriter aw = this; +++ while (aw != null) { +++ size += aw.bv.length; +++ aw = aw.next; +++ } +++ return size; +++ } +++ +++ /** +++ * Puts the annotations of this annotation writer list into the given byte +++ * vector. +++ * +++ * @param out +++ * where the annotations must be put. +++ */ +++ void put(final ByteVector out) { +++ int n = 0; +++ int size = 2; +++ AnnotationWriter aw = this; +++ AnnotationWriter last = null; +++ while (aw != null) { +++ ++n; +++ size += aw.bv.length; +++ aw.visitEnd(); // in case user forgot to call visitEnd +++ aw.prev = last; +++ last = aw; +++ aw = aw.next; +++ } +++ out.putInt(size); +++ out.putShort(n); +++ aw = last; +++ while (aw != null) { +++ out.putByteArray(aw.bv.data, 0, aw.bv.length); +++ aw = aw.prev; +++ } +++ } +++ +++ /** +++ * Puts the given annotation lists into the given byte vector. +++ * +++ * @param panns +++ * an array of annotation writer lists. +++ * @param off +++ * index of the first annotation to be written. +++ * @param out +++ * where the annotations must be put. +++ */ +++ static void put(final AnnotationWriter[] panns, final int off, +++ final ByteVector out) { +++ int size = 1 + 2 * (panns.length - off); +++ for (int i = off; i < panns.length; ++i) { +++ size += panns[i] == null ? 0 : panns[i].getSize(); +++ } +++ out.putInt(size).putByte(panns.length - off); +++ for (int i = off; i < panns.length; ++i) { +++ AnnotationWriter aw = panns[i]; +++ AnnotationWriter last = null; +++ int n = 0; +++ while (aw != null) { +++ ++n; +++ aw.visitEnd(); // in case user forgot to call visitEnd +++ aw.prev = last; +++ last = aw; +++ aw = aw.next; +++ } +++ out.putShort(n); +++ aw = last; +++ while (aw != null) { +++ out.putByteArray(aw.bv.data, 0, aw.bv.length); +++ aw = aw.prev; +++ } +++ } +++ } +++ +++ /** +++ * Puts the given type reference and type path into the given bytevector. +++ * LOCAL_VARIABLE and RESOURCE_VARIABLE target types are not supported. +++ * +++ * @param typeRef +++ * a reference to the annotated type. See {@link TypeReference}. +++ * @param typePath +++ * the path to the annotated type argument, wildcard bound, array +++ * element type, or static inner type within 'typeRef'. May be +++ * <tt>null</tt> if the annotation targets 'typeRef' as a whole. +++ * @param out +++ * where the type reference and type path must be put. +++ */ +++ static void putTarget(int typeRef, TypePath typePath, ByteVector out) { +++ switch (typeRef >>> 24) { +++ case 0x00: // CLASS_TYPE_PARAMETER +++ case 0x01: // METHOD_TYPE_PARAMETER +++ case 0x16: // METHOD_FORMAL_PARAMETER +++ out.putShort(typeRef >>> 16); +++ break; +++ case 0x13: // FIELD +++ case 0x14: // METHOD_RETURN +++ case 0x15: // METHOD_RECEIVER +++ out.putByte(typeRef >>> 24); +++ break; +++ case 0x47: // CAST +++ case 0x48: // CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT +++ case 0x49: // METHOD_INVOCATION_TYPE_ARGUMENT +++ case 0x4A: // CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT +++ case 0x4B: // METHOD_REFERENCE_TYPE_ARGUMENT +++ out.putInt(typeRef); +++ break; +++ // case 0x10: // CLASS_EXTENDS +++ // case 0x11: // CLASS_TYPE_PARAMETER_BOUND +++ // case 0x12: // METHOD_TYPE_PARAMETER_BOUND +++ // case 0x17: // THROWS +++ // case 0x42: // EXCEPTION_PARAMETER +++ // case 0x43: // INSTANCEOF +++ // case 0x44: // NEW +++ // case 0x45: // CONSTRUCTOR_REFERENCE +++ // case 0x46: // METHOD_REFERENCE +++ default: +++ out.put12(typeRef >>> 24, (typeRef & 0xFFFF00) >> 8); +++ break; +++ } +++ if (typePath == null) { +++ out.putByte(0); +++ } else { +++ int length = typePath.b[typePath.offset] * 2 + 1; +++ out.putByteArray(typePath.b, typePath.offset, length); +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Attribute.java b/contrib/asm/src/org/objectweb/asm/Attribute.java ++new file mode 100644 ++index 0000000..8a2a882 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Attribute.java ++@@ -0,0 +1,255 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A non standard class, field, method or code attribute. +++ * +++ * @author Eric Bruneton +++ * @author Eugene Kuleshov +++ */ +++public class Attribute { +++ +++ /** +++ * The type of this attribute. +++ */ +++ public final String type; +++ +++ /** +++ * The raw value of this attribute, used only for unknown attributes. +++ */ +++ byte[] value; +++ +++ /** +++ * The next attribute in this attribute list. May be <tt>null</tt>. +++ */ +++ Attribute next; +++ +++ /** +++ * Constructs a new empty attribute. +++ * +++ * @param type +++ * the type of the attribute. +++ */ +++ protected Attribute(final String type) { +++ this.type = type; +++ } +++ +++ /** +++ * Returns <tt>true</tt> if this type of attribute is unknown. The default +++ * implementation of this method always returns <tt>true</tt>. +++ * +++ * @return <tt>true</tt> if this type of attribute is unknown. +++ */ +++ public boolean isUnknown() { +++ return true; +++ } +++ +++ /** +++ * Returns <tt>true</tt> if this type of attribute is a code attribute. +++ * +++ * @return <tt>true</tt> if this type of attribute is a code attribute. +++ */ +++ public boolean isCodeAttribute() { +++ return false; +++ } +++ +++ /** +++ * Returns the labels corresponding to this attribute. +++ * +++ * @return the labels corresponding to this attribute, or <tt>null</tt> if +++ * this attribute is not a code attribute that contains labels. +++ */ +++ protected Label[] getLabels() { +++ return null; +++ } +++ +++ /** +++ * Reads a {@link #type type} attribute. This method must return a +++ * <i>new</i> {@link Attribute} object, of type {@link #type type}, +++ * corresponding to the <tt>len</tt> bytes starting at the given offset, in +++ * the given class reader. +++ * +++ * @param cr +++ * the class that contains the attribute to be read. +++ * @param off +++ * index of the first byte of the attribute's content in +++ * {@link ClassReader#b cr.b}. The 6 attribute header bytes, +++ * containing the type and the length of the attribute, are not +++ * taken into account here. +++ * @param len +++ * the length of the attribute's content. +++ * @param buf +++ * buffer to be used to call {@link ClassReader#readUTF8 +++ * readUTF8}, {@link ClassReader#readClass(int,char[]) readClass} +++ * or {@link ClassReader#readConst readConst}. +++ * @param codeOff +++ * index of the first byte of code's attribute content in +++ * {@link ClassReader#b cr.b}, or -1 if the attribute to be read +++ * is not a code attribute. The 6 attribute header bytes, +++ * containing the type and the length of the attribute, are not +++ * taken into account here. +++ * @param labels +++ * the labels of the method's code, or <tt>null</tt> if the +++ * attribute to be read is not a code attribute. +++ * @return a <i>new</i> {@link Attribute} object corresponding to the given +++ * bytes. +++ */ +++ protected Attribute read(final ClassReader cr, final int off, +++ final int len, final char[] buf, final int codeOff, +++ final Label[] labels) { +++ Attribute attr = new Attribute(type); +++ attr.value = new byte[len]; +++ System.arraycopy(cr.b, off, attr.value, 0, len); +++ return attr; +++ } +++ +++ /** +++ * Returns the byte array form of this attribute. +++ * +++ * @param cw +++ * the class to which this attribute must be added. This +++ * parameter can be used to add to the constant pool of this +++ * class the items that corresponds to this attribute. +++ * @param code +++ * the bytecode of the method corresponding to this code +++ * attribute, or <tt>null</tt> if this attribute is not a code +++ * attributes. +++ * @param len +++ * the length of the bytecode of the method corresponding to this +++ * code attribute, or <tt>null</tt> if this attribute is not a +++ * code attribute. +++ * @param maxStack +++ * the maximum stack size of the method corresponding to this +++ * code attribute, or -1 if this attribute is not a code +++ * attribute. +++ * @param maxLocals +++ * the maximum number of local variables of the method +++ * corresponding to this code attribute, or -1 if this attribute +++ * is not a code attribute. +++ * @return the byte array form of this attribute. +++ */ +++ protected ByteVector write(final ClassWriter cw, final byte[] code, +++ final int len, final int maxStack, final int maxLocals) { +++ ByteVector v = new ByteVector(); +++ v.data = value; +++ v.length = value.length; +++ return v; +++ } +++ +++ /** +++ * Returns the length of the attribute list that begins with this attribute. +++ * +++ * @return the length of the attribute list that begins with this attribute. +++ */ +++ final int getCount() { +++ int count = 0; +++ Attribute attr = this; +++ while (attr != null) { +++ count += 1; +++ attr = attr.next; +++ } +++ return count; +++ } +++ +++ /** +++ * Returns the size of all the attributes in this attribute list. +++ * +++ * @param cw +++ * the class writer to be used to convert the attributes into +++ * byte arrays, with the {@link #write write} method. +++ * @param code +++ * the bytecode of the method corresponding to these code +++ * attributes, or <tt>null</tt> if these attributes are not code +++ * attributes. +++ * @param len +++ * the length of the bytecode of the method corresponding to +++ * these code attributes, or <tt>null</tt> if these attributes +++ * are not code attributes. +++ * @param maxStack +++ * the maximum stack size of the method corresponding to these +++ * code attributes, or -1 if these attributes are not code +++ * attributes. +++ * @param maxLocals +++ * the maximum number of local variables of the method +++ * corresponding to these code attributes, or -1 if these +++ * attributes are not code attributes. +++ * @return the size of all the attributes in this attribute list. This size +++ * includes the size of the attribute headers. +++ */ +++ final int getSize(final ClassWriter cw, final byte[] code, final int len, +++ final int maxStack, final int maxLocals) { +++ Attribute attr = this; +++ int size = 0; +++ while (attr != null) { +++ cw.newUTF8(attr.type); +++ size += attr.write(cw, code, len, maxStack, maxLocals).length + 6; +++ attr = attr.next; +++ } +++ return size; +++ } +++ +++ /** +++ * Writes all the attributes of this attribute list in the given byte +++ * vector. +++ * +++ * @param cw +++ * the class writer to be used to convert the attributes into +++ * byte arrays, with the {@link #write write} method. +++ * @param code +++ * the bytecode of the method corresponding to these code +++ * attributes, or <tt>null</tt> if these attributes are not code +++ * attributes. +++ * @param len +++ * the length of the bytecode of the method corresponding to +++ * these code attributes, or <tt>null</tt> if these attributes +++ * are not code attributes. +++ * @param maxStack +++ * the maximum stack size of the method corresponding to these +++ * code attributes, or -1 if these attributes are not code +++ * attributes. +++ * @param maxLocals +++ * the maximum number of local variables of the method +++ * corresponding to these code attributes, or -1 if these +++ * attributes are not code attributes. +++ * @param out +++ * where the attributes must be written. +++ */ +++ final void put(final ClassWriter cw, final byte[] code, final int len, +++ final int maxStack, final int maxLocals, final ByteVector out) { +++ Attribute attr = this; +++ while (attr != null) { +++ ByteVector b = attr.write(cw, code, len, maxStack, maxLocals); +++ out.putShort(cw.newUTF8(attr.type)).putInt(b.length); +++ out.putByteArray(b.data, 0, b.length); +++ attr = attr.next; +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/ByteVector.java b/contrib/asm/src/org/objectweb/asm/ByteVector.java ++new file mode 100644 ++index 0000000..9c532be ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/ByteVector.java ++@@ -0,0 +1,339 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A dynamically extensible vector of bytes. This class is roughly equivalent to +++ * a DataOutputStream on top of a ByteArrayOutputStream, but is more efficient. +++ * +++ * @author Eric Bruneton +++ */ +++public class ByteVector { +++ +++ /** +++ * The content of this vector. +++ */ +++ byte[] data; +++ +++ /** +++ * Actual number of bytes in this vector. +++ */ +++ int length; +++ +++ /** +++ * Constructs a new {@link ByteVector ByteVector} with a default initial +++ * size. +++ */ +++ public ByteVector() { +++ data = new byte[64]; +++ } +++ +++ /** +++ * Constructs a new {@link ByteVector ByteVector} with the given initial +++ * size. +++ * +++ * @param initialSize +++ * the initial size of the byte vector to be constructed. +++ */ +++ public ByteVector(final int initialSize) { +++ data = new byte[initialSize]; +++ } +++ +++ /** +++ * Puts a byte into this byte vector. The byte vector is automatically +++ * enlarged if necessary. +++ * +++ * @param b +++ * a byte. +++ * @return this byte vector. +++ */ +++ public ByteVector putByte(final int b) { +++ int length = this.length; +++ if (length + 1 > data.length) { +++ enlarge(1); +++ } +++ data[length++] = (byte) b; +++ this.length = length; +++ return this; +++ } +++ +++ /** +++ * Puts two bytes into this byte vector. The byte vector is automatically +++ * enlarged if necessary. +++ * +++ * @param b1 +++ * a byte. +++ * @param b2 +++ * another byte. +++ * @return this byte vector. +++ */ +++ ByteVector put11(final int b1, final int b2) { +++ int length = this.length; +++ if (length + 2 > data.length) { +++ enlarge(2); +++ } +++ byte[] data = this.data; +++ data[length++] = (byte) b1; +++ data[length++] = (byte) b2; +++ this.length = length; +++ return this; +++ } +++ +++ /** +++ * Puts a short into this byte vector. The byte vector is automatically +++ * enlarged if necessary. +++ * +++ * @param s +++ * a short. +++ * @return this byte vector. +++ */ +++ public ByteVector putShort(final int s) { +++ int length = this.length; +++ if (length + 2 > data.length) { +++ enlarge(2); +++ } +++ byte[] data = this.data; +++ data[length++] = (byte) (s >>> 8); +++ data[length++] = (byte) s; +++ this.length = length; +++ return this; +++ } +++ +++ /** +++ * Puts a byte and a short into this byte vector. The byte vector is +++ * automatically enlarged if necessary. +++ * +++ * @param b +++ * a byte. +++ * @param s +++ * a short. +++ * @return this byte vector. +++ */ +++ ByteVector put12(final int b, final int s) { +++ int length = this.length; +++ if (length + 3 > data.length) { +++ enlarge(3); +++ } +++ byte[] data = this.data; +++ data[length++] = (byte) b; +++ data[length++] = (byte) (s >>> 8); +++ data[length++] = (byte) s; +++ this.length = length; +++ return this; +++ } +++ +++ /** +++ * Puts an int into this byte vector. The byte vector is automatically +++ * enlarged if necessary. +++ * +++ * @param i +++ * an int. +++ * @return this byte vector. +++ */ +++ public ByteVector putInt(final int i) { +++ int length = this.length; +++ if (length + 4 > data.length) { +++ enlarge(4); +++ } +++ byte[] data = this.data; +++ data[length++] = (byte) (i >>> 24); +++ data[length++] = (byte) (i >>> 16); +++ data[length++] = (byte) (i >>> 8); +++ data[length++] = (byte) i; +++ this.length = length; +++ return this; +++ } +++ +++ /** +++ * Puts a long into this byte vector. The byte vector is automatically +++ * enlarged if necessary. +++ * +++ * @param l +++ * a long. +++ * @return this byte vector. +++ */ +++ public ByteVector putLong(final long l) { +++ int length = this.length; +++ if (length + 8 > data.length) { +++ enlarge(8); +++ } +++ byte[] data = this.data; +++ int i = (int) (l >>> 32); +++ data[length++] = (byte) (i >>> 24); +++ data[length++] = (byte) (i >>> 16); +++ data[length++] = (byte) (i >>> 8); +++ data[length++] = (byte) i; +++ i = (int) l; +++ data[length++] = (byte) (i >>> 24); +++ data[length++] = (byte) (i >>> 16); +++ data[length++] = (byte) (i >>> 8); +++ data[length++] = (byte) i; +++ this.length = length; +++ return this; +++ } +++ +++ /** +++ * Puts an UTF8 string into this byte vector. The byte vector is +++ * automatically enlarged if necessary. +++ * +++ * @param s +++ * a String whose UTF8 encoded length must be less than 65536. +++ * @return this byte vector. +++ */ +++ public ByteVector putUTF8(final String s) { +++ int charLength = s.length(); +++ if (charLength > 65535) { +++ throw new IllegalArgumentException(); +++ } +++ int len = length; +++ if (len + 2 + charLength > data.length) { +++ enlarge(2 + charLength); +++ } +++ byte[] data = this.data; +++ // optimistic algorithm: instead of computing the byte length and then +++ // serializing the string (which requires two loops), we assume the byte +++ // length is equal to char length (which is the most frequent case), and +++ // we start serializing the string right away. During the serialization, +++ // if we find that this assumption is wrong, we continue with the +++ // general method. +++ data[len++] = (byte) (charLength >>> 8); +++ data[len++] = (byte) charLength; +++ for (int i = 0; i < charLength; ++i) { +++ char c = s.charAt(i); +++ if (c >= '\001' && c <= '\177') { +++ data[len++] = (byte) c; +++ } else { +++ length = len; +++ return encodeUTF8(s, i, 65535); +++ } +++ } +++ length = len; +++ return this; +++ } +++ +++ /** +++ * Puts an UTF8 string into this byte vector. The byte vector is +++ * automatically enlarged if necessary. The string length is encoded in two +++ * bytes before the encoded characters, if there is space for that (i.e. if +++ * this.length - i - 2 >= 0). +++ * +++ * @param s +++ * the String to encode. +++ * @param i +++ * the index of the first character to encode. The previous +++ * characters are supposed to have already been encoded, using +++ * only one byte per character. +++ * @param maxByteLength +++ * the maximum byte length of the encoded string, including the +++ * already encoded characters. +++ * @return this byte vector. +++ */ +++ ByteVector encodeUTF8(final String s, int i, int maxByteLength) { +++ int charLength = s.length(); +++ int byteLength = i; +++ char c; +++ for (int j = i; j < charLength; ++j) { +++ c = s.charAt(j); +++ if (c >= '\001' && c <= '\177') { +++ byteLength++; +++ } else if (c > '\u07FF') { +++ byteLength += 3; +++ } else { +++ byteLength += 2; +++ } +++ } +++ if (byteLength > maxByteLength) { +++ throw new IllegalArgumentException(); +++ } +++ int start = length - i - 2; +++ if (start >= 0) { +++ data[start] = (byte) (byteLength >>> 8); +++ data[start + 1] = (byte) byteLength; +++ } +++ if (length + byteLength - i > data.length) { +++ enlarge(byteLength - i); +++ } +++ int len = length; +++ for (int j = i; j < charLength; ++j) { +++ c = s.charAt(j); +++ if (c >= '\001' && c <= '\177') { +++ data[len++] = (byte) c; +++ } else if (c > '\u07FF') { +++ data[len++] = (byte) (0xE0 | c >> 12 & 0xF); +++ data[len++] = (byte) (0x80 | c >> 6 & 0x3F); +++ data[len++] = (byte) (0x80 | c & 0x3F); +++ } else { +++ data[len++] = (byte) (0xC0 | c >> 6 & 0x1F); +++ data[len++] = (byte) (0x80 | c & 0x3F); +++ } +++ } +++ length = len; +++ return this; +++ } +++ +++ /** +++ * Puts an array of bytes into this byte vector. The byte vector is +++ * automatically enlarged if necessary. +++ * +++ * @param b +++ * an array of bytes. May be <tt>null</tt> to put <tt>len</tt> +++ * null bytes into this byte vector. +++ * @param off +++ * index of the fist byte of b that must be copied. +++ * @param len +++ * number of bytes of b that must be copied. +++ * @return this byte vector. +++ */ +++ public ByteVector putByteArray(final byte[] b, final int off, final int len) { +++ if (length + len > data.length) { +++ enlarge(len); +++ } +++ if (b != null) { +++ System.arraycopy(b, off, data, length, len); +++ } +++ length += len; +++ return this; +++ } +++ +++ /** +++ * Enlarge this byte vector so that it can receive n more bytes. +++ * +++ * @param size +++ * number of additional bytes that this byte vector should be +++ * able to receive. +++ */ +++ private void enlarge(final int size) { +++ int length1 = 2 * data.length; +++ int length2 = length + size; +++ byte[] newData = new byte[length1 > length2 ? length1 : length2]; +++ System.arraycopy(data, 0, newData, 0, length); +++ data = newData; +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/ClassReader.java b/contrib/asm/src/org/objectweb/asm/ClassReader.java ++new file mode 100644 ++index 0000000..e23fd60 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/ClassReader.java ++@@ -0,0 +1,2506 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++import java.io.IOException; +++import java.io.InputStream; +++ +++/** +++ * A Java class parser to make a {@link ClassVisitor} visit an existing class. +++ * This class parses a byte array conforming to the Java class file format and +++ * calls the appropriate visit methods of a given class visitor for each field, +++ * method and bytecode instruction encountered. +++ * +++ * @author Eric Bruneton +++ * @author Eugene Kuleshov +++ */ +++public class ClassReader { +++ +++ /** +++ * True to enable signatures support. +++ */ +++ static final boolean SIGNATURES = true; +++ +++ /** +++ * True to enable annotations support. +++ */ +++ static final boolean ANNOTATIONS = true; +++ +++ /** +++ * True to enable stack map frames support. +++ */ +++ static final boolean FRAMES = true; +++ +++ /** +++ * True to enable bytecode writing support. +++ */ +++ static final boolean WRITER = true; +++ +++ /** +++ * True to enable JSR_W and GOTO_W support. +++ */ +++ static final boolean RESIZE = true; +++ +++ /** +++ * Flag to skip method code. If this class is set <code>CODE</code> +++ * attribute won't be visited. This can be used, for example, to retrieve +++ * annotations for methods and method parameters. +++ */ +++ public static final int SKIP_CODE = 1; +++ +++ /** +++ * Flag to skip the debug information in the class. If this flag is set the +++ * debug information of the class is not visited, i.e. the +++ * {@link MethodVisitor#visitLocalVariable visitLocalVariable} and +++ * {@link MethodVisitor#visitLineNumber visitLineNumber} methods will not be +++ * called. +++ */ +++ public static final int SKIP_DEBUG = 2; +++ +++ /** +++ * Flag to skip the stack map frames in the class. If this flag is set the +++ * stack map frames of the class is not visited, i.e. the +++ * {@link MethodVisitor#visitFrame visitFrame} method will not be called. +++ * This flag is useful when the {@link ClassWriter#COMPUTE_FRAMES} option is +++ * used: it avoids visiting frames that will be ignored and recomputed from +++ * scratch in the class writer. +++ */ +++ public static final int SKIP_FRAMES = 4; +++ +++ /** +++ * Flag to expand the stack map frames. By default stack map frames are +++ * visited in their original format (i.e. "expanded" for classes whose +++ * version is less than V1_6, and "compressed" for the other classes). If +++ * this flag is set, stack map frames are always visited in expanded format +++ * (this option adds a decompression/recompression step in ClassReader and +++ * ClassWriter which degrades performances quite a lot). +++ */ +++ public static final int EXPAND_FRAMES = 8; +++ +++ /** +++ * The class to be parsed. <i>The content of this array must not be +++ * modified. This field is intended for {@link Attribute} sub classes, and +++ * is normally not needed by class generators or adapters.</i> +++ */ +++ public final byte[] b; +++ +++ /** +++ * The start index of each constant pool item in {@link #b b}, plus one. The +++ * one byte offset skips the constant pool item tag that indicates its type. +++ */ +++ private final int[] items; +++ +++ /** +++ * The String objects corresponding to the CONSTANT_Utf8 items. This cache +++ * avoids multiple parsing of a given CONSTANT_Utf8 constant pool item, +++ * which GREATLY improves performances (by a factor 2 to 3). This caching +++ * strategy could be extended to all constant pool items, but its benefit +++ * would not be so great for these items (because they are much less +++ * expensive to parse than CONSTANT_Utf8 items). +++ */ +++ private final String[] strings; +++ +++ /** +++ * Maximum length of the strings contained in the constant pool of the +++ * class. +++ */ +++ private final int maxStringLength; +++ +++ /** +++ * Start index of the class header information (access, name...) in +++ * {@link #b b}. +++ */ +++ public final int header; +++ +++ // ------------------------------------------------------------------------ +++ // Constructors +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Constructs a new {@link ClassReader} object. +++ * +++ * @param b +++ * the bytecode of the class to be read. +++ */ +++ public ClassReader(final byte[] b) { +++ this(b, 0, b.length); +++ } +++ +++ /** +++ * Constructs a new {@link ClassReader} object. +++ * +++ * @param b +++ * the bytecode of the class to be read. +++ * @param off +++ * the start offset of the class data. +++ * @param len +++ * the length of the class data. +++ */ +++ public ClassReader(final byte[] b, final int off, final int len) { +++ this.b = b; +++ // checks the class version +++ if (readShort(off + 6) > Opcodes.V1_8) { +++ throw new IllegalArgumentException(); +++ } +++ // parses the constant pool +++ items = new int[readUnsignedShort(off + 8)]; +++ int n = items.length; +++ strings = new String[n]; +++ int max = 0; +++ int index = off + 10; +++ for (int i = 1; i < n; ++i) { +++ items[i] = index + 1; +++ int size; +++ switch (b[index]) { +++ case ClassWriter.FIELD: +++ case ClassWriter.METH: +++ case ClassWriter.IMETH: +++ case ClassWriter.INT: +++ case ClassWriter.FLOAT: +++ case ClassWriter.NAME_TYPE: +++ case ClassWriter.INDY: +++ size = 5; +++ break; +++ case ClassWriter.LONG: +++ case ClassWriter.DOUBLE: +++ size = 9; +++ ++i; +++ break; +++ case ClassWriter.UTF8: +++ size = 3 + readUnsignedShort(index + 1); +++ if (size > max) { +++ max = size; +++ } +++ break; +++ case ClassWriter.HANDLE: +++ size = 4; +++ break; +++ // case ClassWriter.CLASS: +++ // case ClassWriter.STR: +++ // case ClassWriter.MTYPE +++ default: +++ size = 3; +++ break; +++ } +++ index += size; +++ } +++ maxStringLength = max; +++ // the class header information starts just after the constant pool +++ header = index; +++ } +++ +++ /** +++ * Returns the class's access flags (see {@link Opcodes}). This value may +++ * not reflect Deprecated and Synthetic flags when bytecode is before 1.5 +++ * and those flags are represented by attributes. +++ * +++ * @return the class access flags +++ * +++ * @see ClassVisitor#visit(int, int, String, String, String, String[]) +++ */ +++ public int getAccess() { +++ return readUnsignedShort(header); +++ } +++ +++ /** +++ * Returns the internal name of the class (see +++ * {@link Type#getInternalName() getInternalName}). +++ * +++ * @return the internal class name +++ * +++ * @see ClassVisitor#visit(int, int, String, String, String, String[]) +++ */ +++ public String getClassName() { +++ return readClass(header + 2, new char[maxStringLength]); +++ } +++ +++ /** +++ * Returns the internal of name of the super class (see +++ * {@link Type#getInternalName() getInternalName}). For interfaces, the +++ * super class is {@link Object}. +++ * +++ * @return the internal name of super class, or <tt>null</tt> for +++ * {@link Object} class. +++ * +++ * @see ClassVisitor#visit(int, int, String, String, String, String[]) +++ */ +++ public String getSuperName() { +++ return readClass(header + 4, new char[maxStringLength]); +++ } +++ +++ /** +++ * Returns the internal names of the class's interfaces (see +++ * {@link Type#getInternalName() getInternalName}). +++ * +++ * @return the array of internal names for all implemented interfaces or +++ * <tt>null</tt>. +++ * +++ * @see ClassVisitor#visit(int, int, String, String, String, String[]) +++ */ +++ public String[] getInterfaces() { +++ int index = header + 6; +++ int n = readUnsignedShort(index); +++ String[] interfaces = new String[n]; +++ if (n > 0) { +++ char[] buf = new char[maxStringLength]; +++ for (int i = 0; i < n; ++i) { +++ index += 2; +++ interfaces[i] = readClass(index, buf); +++ } +++ } +++ return interfaces; +++ } +++ +++ /** +++ * Copies the constant pool data into the given {@link ClassWriter}. Should +++ * be called before the {@link #accept(ClassVisitor,int)} method. +++ * +++ * @param classWriter +++ * the {@link ClassWriter} to copy constant pool into. +++ */ +++ void copyPool(final ClassWriter classWriter) { +++ char[] buf = new char[maxStringLength]; +++ int ll = items.length; +++ Item[] items2 = new Item[ll]; +++ for (int i = 1; i < ll; i++) { +++ int index = items[i]; +++ int tag = b[index - 1]; +++ Item item = new Item(i); +++ int nameType; +++ switch (tag) { +++ case ClassWriter.FIELD: +++ case ClassWriter.METH: +++ case ClassWriter.IMETH: +++ nameType = items[readUnsignedShort(index + 2)]; +++ item.set(tag, readClass(index, buf), readUTF8(nameType, buf), +++ readUTF8(nameType + 2, buf)); +++ break; +++ case ClassWriter.INT: +++ item.set(readInt(index)); +++ break; +++ case ClassWriter.FLOAT: +++ item.set(Float.intBitsToFloat(readInt(index))); +++ break; +++ case ClassWriter.NAME_TYPE: +++ item.set(tag, readUTF8(index, buf), readUTF8(index + 2, buf), +++ null); +++ break; +++ case ClassWriter.LONG: +++ item.set(readLong(index)); +++ ++i; +++ break; +++ case ClassWriter.DOUBLE: +++ item.set(Double.longBitsToDouble(readLong(index))); +++ ++i; +++ break; +++ case ClassWriter.UTF8: { +++ String s = strings[i]; +++ if (s == null) { +++ index = items[i]; +++ s = strings[i] = readUTF(index + 2, +++ readUnsignedShort(index), buf); +++ } +++ item.set(tag, s, null, null); +++ break; +++ } +++ case ClassWriter.HANDLE: { +++ int fieldOrMethodRef = items[readUnsignedShort(index + 1)]; +++ nameType = items[readUnsignedShort(fieldOrMethodRef + 2)]; +++ item.set(ClassWriter.HANDLE_BASE + readByte(index), +++ readClass(fieldOrMethodRef, buf), +++ readUTF8(nameType, buf), readUTF8(nameType + 2, buf)); +++ break; +++ } +++ case ClassWriter.INDY: +++ if (classWriter.bootstrapMethods == null) { +++ copyBootstrapMethods(classWriter, items2, buf); +++ } +++ nameType = items[readUnsignedShort(index + 2)]; +++ item.set(readUTF8(nameType, buf), readUTF8(nameType + 2, buf), +++ readUnsignedShort(index)); +++ break; +++ // case ClassWriter.STR: +++ // case ClassWriter.CLASS: +++ // case ClassWriter.MTYPE +++ default: +++ item.set(tag, readUTF8(index, buf), null, null); +++ break; +++ } +++ +++ int index2 = item.hashCode % items2.length; +++ item.next = items2[index2]; +++ items2[index2] = item; +++ } +++ +++ int off = items[1] - 1; +++ classWriter.pool.putByteArray(b, off, header - off); +++ classWriter.items = items2; +++ classWriter.threshold = (int) (0.75d * ll); +++ classWriter.index = ll; +++ } +++ +++ /** +++ * Copies the bootstrap method data into the given {@link ClassWriter}. +++ * Should be called before the {@link #accept(ClassVisitor,int)} method. +++ * +++ * @param classWriter +++ * the {@link ClassWriter} to copy bootstrap methods into. +++ */ +++ private void copyBootstrapMethods(final ClassWriter classWriter, +++ final Item[] items, final char[] c) { +++ // finds the "BootstrapMethods" attribute +++ int u = getAttributes(); +++ boolean found = false; +++ for (int i = readUnsignedShort(u); i > 0; --i) { +++ String attrName = readUTF8(u + 2, c); +++ if ("BootstrapMethods".equals(attrName)) { +++ found = true; +++ break; +++ } +++ u += 6 + readInt(u + 4); +++ } +++ if (!found) { +++ return; +++ } +++ // copies the bootstrap methods in the class writer +++ int boostrapMethodCount = readUnsignedShort(u + 8); +++ for (int j = 0, v = u + 10; j < boostrapMethodCount; j++) { +++ int position = v - u - 10; +++ int hashCode = readConst(readUnsignedShort(v), c).hashCode(); +++ for (int k = readUnsignedShort(v + 2); k > 0; --k) { +++ hashCode ^= readConst(readUnsignedShort(v + 4), c).hashCode(); +++ v += 2; +++ } +++ v += 4; +++ Item item = new Item(j); +++ item.set(position, hashCode & 0x7FFFFFFF); +++ int index = item.hashCode % items.length; +++ item.next = items[index]; +++ items[index] = item; +++ } +++ int attrSize = readInt(u + 4); +++ ByteVector bootstrapMethods = new ByteVector(attrSize + 62); +++ bootstrapMethods.putByteArray(b, u + 10, attrSize - 2); +++ classWriter.bootstrapMethodsCount = boostrapMethodCount; +++ classWriter.bootstrapMethods = bootstrapMethods; +++ } +++ +++ /** +++ * Constructs a new {@link ClassReader} object. +++ * +++ * @param is +++ * an input stream from which to read the class. +++ * @throws IOException +++ * if a problem occurs during reading. +++ */ +++ public ClassReader(final InputStream is) throws IOException { +++ this(readClass(is, false)); +++ } +++ +++ /** +++ * Constructs a new {@link ClassReader} object. +++ * +++ * @param name +++ * the binary qualified name of the class to be read. +++ * @throws IOException +++ * if an exception occurs during reading. +++ */ +++ public ClassReader(final String name) throws IOException { +++ this(readClass( +++ ClassLoader.getSystemResourceAsStream(name.replace('.', '/') +++ + ".class"), true)); +++ } +++ +++ /** +++ * Reads the bytecode of a class. +++ * +++ * @param is +++ * an input stream from which to read the class. +++ * @param close +++ * true to close the input stream after reading. +++ * @return the bytecode read from the given input stream. +++ * @throws IOException +++ * if a problem occurs during reading. +++ */ +++ private static byte[] readClass(final InputStream is, boolean close) +++ throws IOException { +++ if (is == null) { +++ throw new IOException("Class not found"); +++ } +++ try { +++ byte[] b = new byte[is.available()]; +++ int len = 0; +++ while (true) { +++ int n = is.read(b, len, b.length - len); +++ if (n == -1) { +++ if (len < b.length) { +++ byte[] c = new byte[len]; +++ System.arraycopy(b, 0, c, 0, len); +++ b = c; +++ } +++ return b; +++ } +++ len += n; +++ if (len == b.length) { +++ int last = is.read(); +++ if (last < 0) { +++ return b; +++ } +++ byte[] c = new byte[b.length + 1000]; +++ System.arraycopy(b, 0, c, 0, len); +++ c[len++] = (byte) last; +++ b = c; +++ } +++ } +++ } finally { +++ if (close) { +++ is.close(); +++ } +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Public methods +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Makes the given visitor visit the Java class of this {@link ClassReader} +++ * . This class is the one specified in the constructor (see +++ * {@link #ClassReader(byte[]) ClassReader}). +++ * +++ * @param classVisitor +++ * the visitor that must visit this class. +++ * @param flags +++ * option flags that can be used to modify the default behavior +++ * of this class. See {@link #SKIP_DEBUG}, {@link #EXPAND_FRAMES} +++ * , {@link #SKIP_FRAMES}, {@link #SKIP_CODE}. +++ */ +++ public void accept(final ClassVisitor classVisitor, final int flags) { +++ accept(classVisitor, new Attribute[0], flags); +++ } +++ +++ /** +++ * Makes the given visitor visit the Java class of this {@link ClassReader}. +++ * This class is the one specified in the constructor (see +++ * {@link #ClassReader(byte[]) ClassReader}). +++ * +++ * @param classVisitor +++ * the visitor that must visit this class. +++ * @param attrs +++ * prototypes of the attributes that must be parsed during the +++ * visit of the class. Any attribute whose type is not equal to +++ * the type of one the prototypes will not be parsed: its byte +++ * array value will be passed unchanged to the ClassWriter. +++ * <i>This may corrupt it if this value contains references to +++ * the constant pool, or has syntactic or semantic links with a +++ * class element that has been transformed by a class adapter +++ * between the reader and the writer</i>. +++ * @param flags +++ * option flags that can be used to modify the default behavior +++ * of this class. See {@link #SKIP_DEBUG}, {@link #EXPAND_FRAMES} +++ * , {@link #SKIP_FRAMES}, {@link #SKIP_CODE}. +++ */ +++ public void accept(final ClassVisitor classVisitor, +++ final Attribute[] attrs, final int flags) { +++ int u = header; // current offset in the class file +++ char[] c = new char[maxStringLength]; // buffer used to read strings +++ +++ Context context = new Context(); +++ context.attrs = attrs; +++ context.flags = flags; +++ context.buffer = c; +++ +++ // reads the class declaration +++ int access = readUnsignedShort(u); +++ String name = readClass(u + 2, c); +++ String superClass = readClass(u + 4, c); +++ String[] interfaces = new String[readUnsignedShort(u + 6)]; +++ u += 8; +++ for (int i = 0; i < interfaces.length; ++i) { +++ interfaces[i] = readClass(u, c); +++ u += 2; +++ } +++ +++ // reads the class attributes +++ String signature = null; +++ String sourceFile = null; +++ String sourceDebug = null; +++ String enclosingOwner = null; +++ String enclosingName = null; +++ String enclosingDesc = null; +++ int anns = 0; +++ int ianns = 0; +++ int tanns = 0; +++ int itanns = 0; +++ int innerClasses = 0; +++ Attribute attributes = null; +++ +++ u = getAttributes(); +++ for (int i = readUnsignedShort(u); i > 0; --i) { +++ String attrName = readUTF8(u + 2, c); +++ // tests are sorted in decreasing frequency order +++ // (based on frequencies observed on typical classes) +++ if ("SourceFile".equals(attrName)) { +++ sourceFile = readUTF8(u + 8, c); +++ } else if ("InnerClasses".equals(attrName)) { +++ innerClasses = u + 8; +++ } else if ("EnclosingMethod".equals(attrName)) { +++ enclosingOwner = readClass(u + 8, c); +++ int item = readUnsignedShort(u + 10); +++ if (item != 0) { +++ enclosingName = readUTF8(items[item], c); +++ enclosingDesc = readUTF8(items[item] + 2, c); +++ } +++ } else if (SIGNATURES && "Signature".equals(attrName)) { +++ signature = readUTF8(u + 8, c); +++ } else if (ANNOTATIONS +++ && "RuntimeVisibleAnnotations".equals(attrName)) { +++ anns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeVisibleTypeAnnotations".equals(attrName)) { +++ tanns = u + 8; +++ } else if ("Deprecated".equals(attrName)) { +++ access |= Opcodes.ACC_DEPRECATED; +++ } else if ("Synthetic".equals(attrName)) { +++ access |= Opcodes.ACC_SYNTHETIC +++ | ClassWriter.ACC_SYNTHETIC_ATTRIBUTE; +++ } else if ("SourceDebugExtension".equals(attrName)) { +++ int len = readInt(u + 4); +++ sourceDebug = readUTF(u + 8, len, new char[len]); +++ } else if (ANNOTATIONS +++ && "RuntimeInvisibleAnnotations".equals(attrName)) { +++ ianns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeInvisibleTypeAnnotations".equals(attrName)) { +++ itanns = u + 8; +++ } else if ("BootstrapMethods".equals(attrName)) { +++ int[] bootstrapMethods = new int[readUnsignedShort(u + 8)]; +++ for (int j = 0, v = u + 10; j < bootstrapMethods.length; j++) { +++ bootstrapMethods[j] = v; +++ v += 2 + readUnsignedShort(v + 2) << 1; +++ } +++ context.bootstrapMethods = bootstrapMethods; +++ } else { +++ Attribute attr = readAttribute(attrs, attrName, u + 8, +++ readInt(u + 4), c, -1, null); +++ if (attr != null) { +++ attr.next = attributes; +++ attributes = attr; +++ } +++ } +++ u += 6 + readInt(u + 4); +++ } +++ +++ // visits the class declaration +++ classVisitor.visit(readInt(items[1] - 7), access, name, signature, +++ superClass, interfaces); +++ +++ // visits the source and debug info +++ if ((flags & SKIP_DEBUG) == 0 +++ && (sourceFile != null || sourceDebug != null)) { +++ classVisitor.visitSource(sourceFile, sourceDebug); +++ } +++ +++ // visits the outer class +++ if (enclosingOwner != null) { +++ classVisitor.visitOuterClass(enclosingOwner, enclosingName, +++ enclosingDesc); +++ } +++ +++ // visits the class annotations and type annotations +++ if (ANNOTATIONS && anns != 0) { +++ for (int i = readUnsignedShort(anns), v = anns + 2; i > 0; --i) { +++ v = readAnnotationValues(v + 2, c, true, +++ classVisitor.visitAnnotation(readUTF8(v, c), true)); +++ } +++ } +++ if (ANNOTATIONS && ianns != 0) { +++ for (int i = readUnsignedShort(ianns), v = ianns + 2; i > 0; --i) { +++ v = readAnnotationValues(v + 2, c, true, +++ classVisitor.visitAnnotation(readUTF8(v, c), false)); +++ } +++ } +++ if (ANNOTATIONS && tanns != 0) { +++ for (int i = readUnsignedShort(tanns), v = tanns + 2; i > 0; --i) { +++ v = readAnnotationTarget(context, v); +++ v = readAnnotationValues(v + 2, c, true, +++ classVisitor.visitTypeAnnotation(context.typeRef, +++ context.typePath, readUTF8(v, c), true)); +++ } +++ } +++ if (ANNOTATIONS && itanns != 0) { +++ for (int i = readUnsignedShort(itanns), v = itanns + 2; i > 0; --i) { +++ v = readAnnotationTarget(context, v); +++ v = readAnnotationValues(v + 2, c, true, +++ classVisitor.visitTypeAnnotation(context.typeRef, +++ context.typePath, readUTF8(v, c), false)); +++ } +++ } +++ +++ // visits the attributes +++ while (attributes != null) { +++ Attribute attr = attributes.next; +++ attributes.next = null; +++ classVisitor.visitAttribute(attributes); +++ attributes = attr; +++ } +++ +++ // visits the inner classes +++ if (innerClasses != 0) { +++ int v = innerClasses + 2; +++ for (int i = readUnsignedShort(innerClasses); i > 0; --i) { +++ classVisitor.visitInnerClass(readClass(v, c), +++ readClass(v + 2, c), readUTF8(v + 4, c), +++ readUnsignedShort(v + 6)); +++ v += 8; +++ } +++ } +++ +++ // visits the fields and methods +++ u = header + 10 + 2 * interfaces.length; +++ for (int i = readUnsignedShort(u - 2); i > 0; --i) { +++ u = readField(classVisitor, context, u); +++ } +++ u += 2; +++ for (int i = readUnsignedShort(u - 2); i > 0; --i) { +++ u = readMethod(classVisitor, context, u); +++ } +++ +++ // visits the end of the class +++ classVisitor.visitEnd(); +++ } +++ +++ /** +++ * Reads a field and makes the given visitor visit it. +++ * +++ * @param classVisitor +++ * the visitor that must visit the field. +++ * @param context +++ * information about the class being parsed. +++ * @param u +++ * the start offset of the field in the class file. +++ * @return the offset of the first byte following the field in the class. +++ */ +++ private int readField(final ClassVisitor classVisitor, +++ final Context context, int u) { +++ // reads the field declaration +++ char[] c = context.buffer; +++ int access = readUnsignedShort(u); +++ String name = readUTF8(u + 2, c); +++ String desc = readUTF8(u + 4, c); +++ u += 6; +++ +++ // reads the field attributes +++ String signature = null; +++ int anns = 0; +++ int ianns = 0; +++ int tanns = 0; +++ int itanns = 0; +++ Object value = null; +++ Attribute attributes = null; +++ +++ for (int i = readUnsignedShort(u); i > 0; --i) { +++ String attrName = readUTF8(u + 2, c); +++ // tests are sorted in decreasing frequency order +++ // (based on frequencies observed on typical classes) +++ if ("ConstantValue".equals(attrName)) { +++ int item = readUnsignedShort(u + 8); +++ value = item == 0 ? null : readConst(item, c); +++ } else if (SIGNATURES && "Signature".equals(attrName)) { +++ signature = readUTF8(u + 8, c); +++ } else if ("Deprecated".equals(attrName)) { +++ access |= Opcodes.ACC_DEPRECATED; +++ } else if ("Synthetic".equals(attrName)) { +++ access |= Opcodes.ACC_SYNTHETIC +++ | ClassWriter.ACC_SYNTHETIC_ATTRIBUTE; +++ } else if (ANNOTATIONS +++ && "RuntimeVisibleAnnotations".equals(attrName)) { +++ anns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeVisibleTypeAnnotations".equals(attrName)) { +++ tanns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeInvisibleAnnotations".equals(attrName)) { +++ ianns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeInvisibleTypeAnnotations".equals(attrName)) { +++ itanns = u + 8; +++ } else { +++ Attribute attr = readAttribute(context.attrs, attrName, u + 8, +++ readInt(u + 4), c, -1, null); +++ if (attr != null) { +++ attr.next = attributes; +++ attributes = attr; +++ } +++ } +++ u += 6 + readInt(u + 4); +++ } +++ u += 2; +++ +++ // visits the field declaration +++ FieldVisitor fv = classVisitor.visitField(access, name, desc, +++ signature, value); +++ if (fv == null) { +++ return u; +++ } +++ +++ // visits the field annotations and type annotations +++ if (ANNOTATIONS && anns != 0) { +++ for (int i = readUnsignedShort(anns), v = anns + 2; i > 0; --i) { +++ v = readAnnotationValues(v + 2, c, true, +++ fv.visitAnnotation(readUTF8(v, c), true)); +++ } +++ } +++ if (ANNOTATIONS && ianns != 0) { +++ for (int i = readUnsignedShort(ianns), v = ianns + 2; i > 0; --i) { +++ v = readAnnotationValues(v + 2, c, true, +++ fv.visitAnnotation(readUTF8(v, c), false)); +++ } +++ } +++ if (ANNOTATIONS && tanns != 0) { +++ for (int i = readUnsignedShort(tanns), v = tanns + 2; i > 0; --i) { +++ v = readAnnotationTarget(context, v); +++ v = readAnnotationValues(v + 2, c, true, +++ fv.visitTypeAnnotation(context.typeRef, +++ context.typePath, readUTF8(v, c), true)); +++ } +++ } +++ if (ANNOTATIONS && itanns != 0) { +++ for (int i = readUnsignedShort(itanns), v = itanns + 2; i > 0; --i) { +++ v = readAnnotationTarget(context, v); +++ v = readAnnotationValues(v + 2, c, true, +++ fv.visitTypeAnnotation(context.typeRef, +++ context.typePath, readUTF8(v, c), false)); +++ } +++ } +++ +++ // visits the field attributes +++ while (attributes != null) { +++ Attribute attr = attributes.next; +++ attributes.next = null; +++ fv.visitAttribute(attributes); +++ attributes = attr; +++ } +++ +++ // visits the end of the field +++ fv.visitEnd(); +++ +++ return u; +++ } +++ +++ /** +++ * Reads a method and makes the given visitor visit it. +++ * +++ * @param classVisitor +++ * the visitor that must visit the method. +++ * @param context +++ * information about the class being parsed. +++ * @param u +++ * the start offset of the method in the class file. +++ * @return the offset of the first byte following the method in the class. +++ */ +++ private int readMethod(final ClassVisitor classVisitor, +++ final Context context, int u) { +++ // reads the method declaration +++ char[] c = context.buffer; +++ context.access = readUnsignedShort(u); +++ context.name = readUTF8(u + 2, c); +++ context.desc = readUTF8(u + 4, c); +++ u += 6; +++ +++ // reads the method attributes +++ int code = 0; +++ int exception = 0; +++ String[] exceptions = null; +++ String signature = null; +++ int methodParameters = 0; +++ int anns = 0; +++ int ianns = 0; +++ int tanns = 0; +++ int itanns = 0; +++ int dann = 0; +++ int mpanns = 0; +++ int impanns = 0; +++ int firstAttribute = u; +++ Attribute attributes = null; +++ +++ for (int i = readUnsignedShort(u); i > 0; --i) { +++ String attrName = readUTF8(u + 2, c); +++ // tests are sorted in decreasing frequency order +++ // (based on frequencies observed on typical classes) +++ if ("Code".equals(attrName)) { +++ if ((context.flags & SKIP_CODE) == 0) { +++ code = u + 8; +++ } +++ } else if ("Exceptions".equals(attrName)) { +++ exceptions = new String[readUnsignedShort(u + 8)]; +++ exception = u + 10; +++ for (int j = 0; j < exceptions.length; ++j) { +++ exceptions[j] = readClass(exception, c); +++ exception += 2; +++ } +++ } else if (SIGNATURES && "Signature".equals(attrName)) { +++ signature = readUTF8(u + 8, c); +++ } else if ("Deprecated".equals(attrName)) { +++ context.access |= Opcodes.ACC_DEPRECATED; +++ } else if (ANNOTATIONS +++ && "RuntimeVisibleAnnotations".equals(attrName)) { +++ anns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeVisibleTypeAnnotations".equals(attrName)) { +++ tanns = u + 8; +++ } else if (ANNOTATIONS && "AnnotationDefault".equals(attrName)) { +++ dann = u + 8; +++ } else if ("Synthetic".equals(attrName)) { +++ context.access |= Opcodes.ACC_SYNTHETIC +++ | ClassWriter.ACC_SYNTHETIC_ATTRIBUTE; +++ } else if (ANNOTATIONS +++ && "RuntimeInvisibleAnnotations".equals(attrName)) { +++ ianns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeInvisibleTypeAnnotations".equals(attrName)) { +++ itanns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeVisibleParameterAnnotations".equals(attrName)) { +++ mpanns = u + 8; +++ } else if (ANNOTATIONS +++ && "RuntimeInvisibleParameterAnnotations".equals(attrName)) { +++ impanns = u + 8; +++ } else if ("MethodParameters".equals(attrName)) { +++ methodParameters = u + 8; +++ } else { +++ Attribute attr = readAttribute(context.attrs, attrName, u + 8, +++ readInt(u + 4), c, -1, null); +++ if (attr != null) { +++ attr.next = attributes; +++ attributes = attr; +++ } +++ } +++ u += 6 + readInt(u + 4); +++ } +++ u += 2; +++ +++ // visits the method declaration +++ MethodVisitor mv = classVisitor.visitMethod(context.access, +++ context.name, context.desc, signature, exceptions); +++ if (mv == null) { +++ return u; +++ } +++ +++ /* +++ * if the returned MethodVisitor is in fact a MethodWriter, it means +++ * there is no method adapter between the reader and the writer. If, in +++ * addition, the writer's constant pool was copied from this reader +++ * (mw.cw.cr == this), and the signature and exceptions of the method +++ * have not been changed, then it is possible to skip all visit events +++ * and just copy the original code of the method to the writer (the +++ * access, name and descriptor can have been changed, this is not +++ * important since they are not copied as is from the reader). +++ */ +++ if (WRITER && mv instanceof MethodWriter) { +++ MethodWriter mw = (MethodWriter) mv; +++ if (mw.cw.cr == this && signature == mw.signature) { +++ boolean sameExceptions = false; +++ if (exceptions == null) { +++ sameExceptions = mw.exceptionCount == 0; +++ } else if (exceptions.length == mw.exceptionCount) { +++ sameExceptions = true; +++ for (int j = exceptions.length - 1; j >= 0; --j) { +++ exception -= 2; +++ if (mw.exceptions[j] != readUnsignedShort(exception)) { +++ sameExceptions = false; +++ break; +++ } +++ } +++ } +++ if (sameExceptions) { +++ /* +++ * we do not copy directly the code into MethodWriter to +++ * save a byte array copy operation. The real copy will be +++ * done in ClassWriter.toByteArray(). +++ */ +++ mw.classReaderOffset = firstAttribute; +++ mw.classReaderLength = u - firstAttribute; +++ return u; +++ } +++ } +++ } +++ +++ // visit the method parameters +++ if (methodParameters != 0) { +++ for (int i = b[methodParameters] & 0xFF, v = methodParameters + 1; i > 0; --i, v = v + 4) { +++ mv.visitParameter(readUTF8(v, c), readUnsignedShort(v + 2)); +++ } +++ } +++ +++ // visits the method annotations +++ if (ANNOTATIONS && dann != 0) { +++ AnnotationVisitor dv = mv.visitAnnotationDefault(); +++ readAnnotationValue(dann, c, null, dv); +++ if (dv != null) { +++ dv.visitEnd(); +++ } +++ } +++ if (ANNOTATIONS && anns != 0) { +++ for (int i = readUnsignedShort(anns), v = anns + 2; i > 0; --i) { +++ v = readAnnotationValues(v + 2, c, true, +++ mv.visitAnnotation(readUTF8(v, c), true)); +++ } +++ } +++ if (ANNOTATIONS && ianns != 0) { +++ for (int i = readUnsignedShort(ianns), v = ianns + 2; i > 0; --i) { +++ v = readAnnotationValues(v + 2, c, true, +++ mv.visitAnnotation(readUTF8(v, c), false)); +++ } +++ } +++ if (ANNOTATIONS && tanns != 0) { +++ for (int i = readUnsignedShort(tanns), v = tanns + 2; i > 0; --i) { +++ v = readAnnotationTarget(context, v); +++ v = readAnnotationValues(v + 2, c, true, +++ mv.visitTypeAnnotation(context.typeRef, +++ context.typePath, readUTF8(v, c), true)); +++ } +++ } +++ if (ANNOTATIONS && itanns != 0) { +++ for (int i = readUnsignedShort(itanns), v = itanns + 2; i > 0; --i) { +++ v = readAnnotationTarget(context, v); +++ v = readAnnotationValues(v + 2, c, true, +++ mv.visitTypeAnnotation(context.typeRef, +++ context.typePath, readUTF8(v, c), false)); +++ } +++ } +++ if (ANNOTATIONS && mpanns != 0) { +++ readParameterAnnotations(mv, context, mpanns, true); +++ } +++ if (ANNOTATIONS && impanns != 0) { +++ readParameterAnnotations(mv, context, impanns, false); +++ } +++ +++ // visits the method attributes +++ while (attributes != null) { +++ Attribute attr = attributes.next; +++ attributes.next = null; +++ mv.visitAttribute(attributes); +++ attributes = attr; +++ } +++ +++ // visits the method code +++ if (code != 0) { +++ mv.visitCode(); +++ readCode(mv, context, code); +++ } +++ +++ // visits the end of the method +++ mv.visitEnd(); +++ +++ return u; +++ } +++ +++ /** +++ * Reads the bytecode of a method and makes the given visitor visit it. +++ * +++ * @param mv +++ * the visitor that must visit the method's code. +++ * @param context +++ * information about the class being parsed. +++ * @param u +++ * the start offset of the code attribute in the class file. +++ */ +++ private void readCode(final MethodVisitor mv, final Context context, int u) { +++ // reads the header +++ byte[] b = this.b; +++ char[] c = context.buffer; +++ int maxStack = readUnsignedShort(u); +++ int maxLocals = readUnsignedShort(u + 2); +++ int codeLength = readInt(u + 4); +++ u += 8; +++ +++ // reads the bytecode to find the labels +++ int codeStart = u; +++ int codeEnd = u + codeLength; +++ Label[] labels = context.labels = new Label[codeLength + 2]; +++ readLabel(codeLength + 1, labels); +++ while (u < codeEnd) { +++ int offset = u - codeStart; +++ int opcode = b[u] & 0xFF; +++ switch (ClassWriter.TYPE[opcode]) { +++ case ClassWriter.NOARG_INSN: +++ case ClassWriter.IMPLVAR_INSN: +++ u += 1; +++ break; +++ case ClassWriter.LABEL_INSN: +++ readLabel(offset + readShort(u + 1), labels); +++ u += 3; +++ break; +++ case ClassWriter.LABELW_INSN: +++ readLabel(offset + readInt(u + 1), labels); +++ u += 5; +++ break; +++ case ClassWriter.WIDE_INSN: +++ opcode = b[u + 1] & 0xFF; +++ if (opcode == Opcodes.IINC) { +++ u += 6; +++ } else { +++ u += 4; +++ } +++ break; +++ case ClassWriter.TABL_INSN: +++ // skips 0 to 3 padding bytes +++ u = u + 4 - (offset & 3); +++ // reads instruction +++ readLabel(offset + readInt(u), labels); +++ for (int i = readInt(u + 8) - readInt(u + 4) + 1; i > 0; --i) { +++ readLabel(offset + readInt(u + 12), labels); +++ u += 4; +++ } +++ u += 12; +++ break; +++ case ClassWriter.LOOK_INSN: +++ // skips 0 to 3 padding bytes +++ u = u + 4 - (offset & 3); +++ // reads instruction +++ readLabel(offset + readInt(u), labels); +++ for (int i = readInt(u + 4); i > 0; --i) { +++ readLabel(offset + readInt(u + 12), labels); +++ u += 8; +++ } +++ u += 8; +++ break; +++ case ClassWriter.VAR_INSN: +++ case ClassWriter.SBYTE_INSN: +++ case ClassWriter.LDC_INSN: +++ u += 2; +++ break; +++ case ClassWriter.SHORT_INSN: +++ case ClassWriter.LDCW_INSN: +++ case ClassWriter.FIELDORMETH_INSN: +++ case ClassWriter.TYPE_INSN: +++ case ClassWriter.IINC_INSN: +++ u += 3; +++ break; +++ case ClassWriter.ITFMETH_INSN: +++ case ClassWriter.INDYMETH_INSN: +++ u += 5; +++ break; +++ // case MANA_INSN: +++ default: +++ u += 4; +++ break; +++ } +++ } +++ +++ // reads the try catch entries to find the labels, and also visits them +++ for (int i = readUnsignedShort(u); i > 0; --i) { +++ Label start = readLabel(readUnsignedShort(u + 2), labels); +++ Label end = readLabel(readUnsignedShort(u + 4), labels); +++ Label handler = readLabel(readUnsignedShort(u + 6), labels); +++ String type = readUTF8(items[readUnsignedShort(u + 8)], c); +++ mv.visitTryCatchBlock(start, end, handler, type); +++ u += 8; +++ } +++ u += 2; +++ +++ // reads the code attributes +++ int[] tanns = null; // start index of each visible type annotation +++ int[] itanns = null; // start index of each invisible type annotation +++ int tann = 0; // current index in tanns array +++ int itann = 0; // current index in itanns array +++ int ntoff = -1; // next visible type annotation code offset +++ int nitoff = -1; // next invisible type annotation code offset +++ int varTable = 0; +++ int varTypeTable = 0; +++ boolean zip = true; +++ boolean unzip = (context.flags & EXPAND_FRAMES) != 0; +++ int stackMap = 0; +++ int stackMapSize = 0; +++ int frameCount = 0; +++ Context frame = null; +++ Attribute attributes = null; +++ +++ for (int i = readUnsignedShort(u); i > 0; --i) { +++ String attrName = readUTF8(u + 2, c); +++ if ("LocalVariableTable".equals(attrName)) { +++ if ((context.flags & SKIP_DEBUG) == 0) { +++ varTable = u + 8; +++ for (int j = readUnsignedShort(u + 8), v = u; j > 0; --j) { +++ int label = readUnsignedShort(v + 10); +++ if (labels[label] == null) { +++ readLabel(label, labels).status |= Label.DEBUG; +++ } +++ label += readUnsignedShort(v + 12); +++ if (labels[label] == null) { +++ readLabel(label, labels).status |= Label.DEBUG; +++ } +++ v += 10; +++ } +++ } +++ } else if ("LocalVariableTypeTable".equals(attrName)) { +++ varTypeTable = u + 8; +++ } else if ("LineNumberTable".equals(attrName)) { +++ if ((context.flags & SKIP_DEBUG) == 0) { +++ for (int j = readUnsignedShort(u + 8), v = u; j > 0; --j) { +++ int label = readUnsignedShort(v + 10); +++ if (labels[label] == null) { +++ readLabel(label, labels).status |= Label.DEBUG; +++ } +++ Label l = labels[label]; +++ while (l.line > 0) { +++ if (l.next == null) { +++ l.next = new Label(); +++ } +++ l = l.next; +++ } +++ l.line = readUnsignedShort(v + 12); +++ v += 4; +++ } +++ } +++ } else if (ANNOTATIONS +++ && "RuntimeVisibleTypeAnnotations".equals(attrName)) { +++ tanns = readTypeAnnotations(mv, context, u + 8, true); +++ ntoff = tanns.length == 0 || readByte(tanns[0]) < 0x43 ? -1 +++ : readUnsignedShort(tanns[0] + 1); +++ } else if (ANNOTATIONS +++ && "RuntimeInvisibleTypeAnnotations".equals(attrName)) { +++ itanns = readTypeAnnotations(mv, context, u + 8, false); +++ nitoff = itanns.length == 0 || readByte(itanns[0]) < 0x43 ? -1 +++ : readUnsignedShort(itanns[0] + 1); +++ } else if (FRAMES && "StackMapTable".equals(attrName)) { +++ if ((context.flags & SKIP_FRAMES) == 0) { +++ stackMap = u + 10; +++ stackMapSize = readInt(u + 4); +++ frameCount = readUnsignedShort(u + 8); +++ } +++ /* +++ * here we do not extract the labels corresponding to the +++ * attribute content. This would require a full parsing of the +++ * attribute, which would need to be repeated in the second +++ * phase (see below). Instead the content of the attribute is +++ * read one frame at a time (i.e. after a frame has been +++ * visited, the next frame is read), and the labels it contains +++ * are also extracted one frame at a time. Thanks to the +++ * ordering of frames, having only a "one frame lookahead" is +++ * not a problem, i.e. it is not possible to see an offset +++ * smaller than the offset of the current insn and for which no +++ * Label exist. +++ */ +++ /* +++ * This is not true for UNINITIALIZED type offsets. We solve +++ * this by parsing the stack map table without a full decoding +++ * (see below). +++ */ +++ } else if (FRAMES && "StackMap".equals(attrName)) { +++ if ((context.flags & SKIP_FRAMES) == 0) { +++ zip = false; +++ stackMap = u + 10; +++ stackMapSize = readInt(u + 4); +++ frameCount = readUnsignedShort(u + 8); +++ } +++ /* +++ * IMPORTANT! here we assume that the frames are ordered, as in +++ * the StackMapTable attribute, although this is not guaranteed +++ * by the attribute format. +++ */ +++ } else { +++ for (int j = 0; j < context.attrs.length; ++j) { +++ if (context.attrs[j].type.equals(attrName)) { +++ Attribute attr = context.attrs[j].read(this, u + 8, +++ readInt(u + 4), c, codeStart - 8, labels); +++ if (attr != null) { +++ attr.next = attributes; +++ attributes = attr; +++ } +++ } +++ } +++ } +++ u += 6 + readInt(u + 4); +++ } +++ u += 2; +++ +++ // generates the first (implicit) stack map frame +++ if (FRAMES && stackMap != 0) { +++ /* +++ * for the first explicit frame the offset is not offset_delta + 1 +++ * but only offset_delta; setting the implicit frame offset to -1 +++ * allow the use of the "offset_delta + 1" rule in all cases +++ */ +++ frame = context; +++ frame.offset = -1; +++ frame.mode = 0; +++ frame.localCount = 0; +++ frame.localDiff = 0; +++ frame.stackCount = 0; +++ frame.local = new Object[maxLocals]; +++ frame.stack = new Object[maxStack]; +++ if (unzip) { +++ getImplicitFrame(context); +++ } +++ /* +++ * Finds labels for UNINITIALIZED frame types. Instead of decoding +++ * each element of the stack map table, we look for 3 consecutive +++ * bytes that "look like" an UNINITIALIZED type (tag 8, offset +++ * within code bounds, NEW instruction at this offset). We may find +++ * false positives (i.e. not real UNINITIALIZED types), but this +++ * should be rare, and the only consequence will be the creation of +++ * an unneeded label. This is better than creating a label for each +++ * NEW instruction, and faster than fully decoding the whole stack +++ * map table. +++ */ +++ for (int i = stackMap; i < stackMap + stackMapSize - 2; ++i) { +++ if (b[i] == 8) { // UNINITIALIZED FRAME TYPE +++ int v = readUnsignedShort(i + 1); +++ if (v >= 0 && v < codeLength) { +++ if ((b[codeStart + v] & 0xFF) == Opcodes.NEW) { +++ readLabel(v, labels); +++ } +++ } +++ } +++ } +++ } +++ +++ // visits the instructions +++ u = codeStart; +++ while (u < codeEnd) { +++ int offset = u - codeStart; +++ +++ // visits the label and line number for this offset, if any +++ Label l = labels[offset]; +++ if (l != null) { +++ Label next = l.next; +++ l.next = null; +++ mv.visitLabel(l); +++ if ((context.flags & SKIP_DEBUG) == 0 && l.line > 0) { +++ mv.visitLineNumber(l.line, l); +++ while (next != null) { +++ mv.visitLineNumber(next.line, l); +++ next = next.next; +++ } +++ } +++ } +++ +++ // visits the frame for this offset, if any +++ while (FRAMES && frame != null +++ && (frame.offset == offset || frame.offset == -1)) { +++ // if there is a frame for this offset, makes the visitor visit +++ // it, and reads the next frame if there is one. +++ if (frame.offset != -1) { +++ if (!zip || unzip) { +++ mv.visitFrame(Opcodes.F_NEW, frame.localCount, +++ frame.local, frame.stackCount, frame.stack); +++ } else { +++ mv.visitFrame(frame.mode, frame.localDiff, frame.local, +++ frame.stackCount, frame.stack); +++ } +++ } +++ if (frameCount > 0) { +++ stackMap = readFrame(stackMap, zip, unzip, frame); +++ --frameCount; +++ } else { +++ frame = null; +++ } +++ } +++ +++ // visits the instruction at this offset +++ int opcode = b[u] & 0xFF; +++ switch (ClassWriter.TYPE[opcode]) { +++ case ClassWriter.NOARG_INSN: +++ mv.visitInsn(opcode); +++ u += 1; +++ break; +++ case ClassWriter.IMPLVAR_INSN: +++ if (opcode > Opcodes.ISTORE) { +++ opcode -= 59; // ISTORE_0 +++ mv.visitVarInsn(Opcodes.ISTORE + (opcode >> 2), +++ opcode & 0x3); +++ } else { +++ opcode -= 26; // ILOAD_0 +++ mv.visitVarInsn(Opcodes.ILOAD + (opcode >> 2), opcode & 0x3); +++ } +++ u += 1; +++ break; +++ case ClassWriter.LABEL_INSN: +++ mv.visitJumpInsn(opcode, labels[offset + readShort(u + 1)]); +++ u += 3; +++ break; +++ case ClassWriter.LABELW_INSN: +++ mv.visitJumpInsn(opcode - 33, labels[offset + readInt(u + 1)]); +++ u += 5; +++ break; +++ case ClassWriter.WIDE_INSN: +++ opcode = b[u + 1] & 0xFF; +++ if (opcode == Opcodes.IINC) { +++ mv.visitIincInsn(readUnsignedShort(u + 2), readShort(u + 4)); +++ u += 6; +++ } else { +++ mv.visitVarInsn(opcode, readUnsignedShort(u + 2)); +++ u += 4; +++ } +++ break; +++ case ClassWriter.TABL_INSN: { +++ // skips 0 to 3 padding bytes +++ u = u + 4 - (offset & 3); +++ // reads instruction +++ int label = offset + readInt(u); +++ int min = readInt(u + 4); +++ int max = readInt(u + 8); +++ Label[] table = new Label[max - min + 1]; +++ u += 12; +++ for (int i = 0; i < table.length; ++i) { +++ table[i] = labels[offset + readInt(u)]; +++ u += 4; +++ } +++ mv.visitTableSwitchInsn(min, max, labels[label], table); +++ break; +++ } +++ case ClassWriter.LOOK_INSN: { +++ // skips 0 to 3 padding bytes +++ u = u + 4 - (offset & 3); +++ // reads instruction +++ int label = offset + readInt(u); +++ int len = readInt(u + 4); +++ int[] keys = new int[len]; +++ Label[] values = new Label[len]; +++ u += 8; +++ for (int i = 0; i < len; ++i) { +++ keys[i] = readInt(u); +++ values[i] = labels[offset + readInt(u + 4)]; +++ u += 8; +++ } +++ mv.visitLookupSwitchInsn(labels[label], keys, values); +++ break; +++ } +++ case ClassWriter.VAR_INSN: +++ mv.visitVarInsn(opcode, b[u + 1] & 0xFF); +++ u += 2; +++ break; +++ case ClassWriter.SBYTE_INSN: +++ mv.visitIntInsn(opcode, b[u + 1]); +++ u += 2; +++ break; +++ case ClassWriter.SHORT_INSN: +++ mv.visitIntInsn(opcode, readShort(u + 1)); +++ u += 3; +++ break; +++ case ClassWriter.LDC_INSN: +++ mv.visitLdcInsn(readConst(b[u + 1] & 0xFF, c)); +++ u += 2; +++ break; +++ case ClassWriter.LDCW_INSN: +++ mv.visitLdcInsn(readConst(readUnsignedShort(u + 1), c)); +++ u += 3; +++ break; +++ case ClassWriter.FIELDORMETH_INSN: +++ case ClassWriter.ITFMETH_INSN: { +++ int cpIndex = items[readUnsignedShort(u + 1)]; +++ boolean itf = b[cpIndex - 1] == ClassWriter.IMETH; +++ String iowner = readClass(cpIndex, c); +++ cpIndex = items[readUnsignedShort(cpIndex + 2)]; +++ String iname = readUTF8(cpIndex, c); +++ String idesc = readUTF8(cpIndex + 2, c); +++ if (opcode < Opcodes.INVOKEVIRTUAL) { +++ mv.visitFieldInsn(opcode, iowner, iname, idesc); +++ } else { +++ mv.visitMethodInsn(opcode, iowner, iname, idesc, itf); +++ } +++ if (opcode == Opcodes.INVOKEINTERFACE) { +++ u += 5; +++ } else { +++ u += 3; +++ } +++ break; +++ } +++ case ClassWriter.INDYMETH_INSN: { +++ int cpIndex = items[readUnsignedShort(u + 1)]; +++ int bsmIndex = context.bootstrapMethods[readUnsignedShort(cpIndex)]; +++ Handle bsm = (Handle) readConst(readUnsignedShort(bsmIndex), c); +++ int bsmArgCount = readUnsignedShort(bsmIndex + 2); +++ Object[] bsmArgs = new Object[bsmArgCount]; +++ bsmIndex += 4; +++ for (int i = 0; i < bsmArgCount; i++) { +++ bsmArgs[i] = readConst(readUnsignedShort(bsmIndex), c); +++ bsmIndex += 2; +++ } +++ cpIndex = items[readUnsignedShort(cpIndex + 2)]; +++ String iname = readUTF8(cpIndex, c); +++ String idesc = readUTF8(cpIndex + 2, c); +++ mv.visitInvokeDynamicInsn(iname, idesc, bsm, bsmArgs); +++ u += 5; +++ break; +++ } +++ case ClassWriter.TYPE_INSN: +++ mv.visitTypeInsn(opcode, readClass(u + 1, c)); +++ u += 3; +++ break; +++ case ClassWriter.IINC_INSN: +++ mv.visitIincInsn(b[u + 1] & 0xFF, b[u + 2]); +++ u += 3; +++ break; +++ // case MANA_INSN: +++ default: +++ mv.visitMultiANewArrayInsn(readClass(u + 1, c), b[u + 3] & 0xFF); +++ u += 4; +++ break; +++ } +++ +++ // visit the instruction annotations, if any +++ while (tanns != null && tann < tanns.length && ntoff <= offset) { +++ if (ntoff == offset) { +++ int v = readAnnotationTarget(context, tanns[tann]); +++ readAnnotationValues(v + 2, c, true, +++ mv.visitInsnAnnotation(context.typeRef, +++ context.typePath, readUTF8(v, c), true)); +++ } +++ ntoff = ++tann >= tanns.length || readByte(tanns[tann]) < 0x43 ? -1 +++ : readUnsignedShort(tanns[tann] + 1); +++ } +++ while (itanns != null && itann < itanns.length && nitoff <= offset) { +++ if (nitoff == offset) { +++ int v = readAnnotationTarget(context, itanns[itann]); +++ readAnnotationValues(v + 2, c, true, +++ mv.visitInsnAnnotation(context.typeRef, +++ context.typePath, readUTF8(v, c), false)); +++ } +++ nitoff = ++itann >= itanns.length +++ || readByte(itanns[itann]) < 0x43 ? -1 +++ : readUnsignedShort(itanns[itann] + 1); +++ } +++ } +++ if (labels[codeLength] != null) { +++ mv.visitLabel(labels[codeLength]); +++ } +++ +++ // visits the local variable tables +++ if ((context.flags & SKIP_DEBUG) == 0 && varTable != 0) { +++ int[] typeTable = null; +++ if (varTypeTable != 0) { +++ u = varTypeTable + 2; +++ typeTable = new int[readUnsignedShort(varTypeTable) * 3]; +++ for (int i = typeTable.length; i > 0;) { +++ typeTable[--i] = u + 6; // signature +++ typeTable[--i] = readUnsignedShort(u + 8); // index +++ typeTable[--i] = readUnsignedShort(u); // start +++ u += 10; +++ } +++ } +++ u = varTable + 2; +++ for (int i = readUnsignedShort(varTable); i > 0; --i) { +++ int start = readUnsignedShort(u); +++ int length = readUnsignedShort(u + 2); +++ int index = readUnsignedShort(u + 8); +++ String vsignature = null; +++ if (typeTable != null) { +++ for (int j = 0; j < typeTable.length; j += 3) { +++ if (typeTable[j] == start && typeTable[j + 1] == index) { +++ vsignature = readUTF8(typeTable[j + 2], c); +++ break; +++ } +++ } +++ } +++ mv.visitLocalVariable(readUTF8(u + 4, c), readUTF8(u + 6, c), +++ vsignature, labels[start], labels[start + length], +++ index); +++ u += 10; +++ } +++ } +++ +++ // visits the local variables type annotations +++ if (tanns != null) { +++ for (int i = 0; i < tanns.length; ++i) { +++ if ((readByte(tanns[i]) >> 1) == (0x40 >> 1)) { +++ int v = readAnnotationTarget(context, tanns[i]); +++ v = readAnnotationValues(v + 2, c, true, +++ mv.visitLocalVariableAnnotation(context.typeRef, +++ context.typePath, context.start, +++ context.end, context.index, readUTF8(v, c), +++ true)); +++ } +++ } +++ } +++ if (itanns != null) { +++ for (int i = 0; i < itanns.length; ++i) { +++ if ((readByte(itanns[i]) >> 1) == (0x40 >> 1)) { +++ int v = readAnnotationTarget(context, itanns[i]); +++ v = readAnnotationValues(v + 2, c, true, +++ mv.visitLocalVariableAnnotation(context.typeRef, +++ context.typePath, context.start, +++ context.end, context.index, readUTF8(v, c), +++ false)); +++ } +++ } +++ } +++ +++ // visits the code attributes +++ while (attributes != null) { +++ Attribute attr = attributes.next; +++ attributes.next = null; +++ mv.visitAttribute(attributes); +++ attributes = attr; +++ } +++ +++ // visits the max stack and max locals values +++ mv.visitMaxs(maxStack, maxLocals); +++ } +++ +++ /** +++ * Parses a type annotation table to find the labels, and to visit the try +++ * catch block annotations. +++ * +++ * @param u +++ * the start offset of a type annotation table. +++ * @param mv +++ * the method visitor to be used to visit the try catch block +++ * annotations. +++ * @param context +++ * information about the class being parsed. +++ * @param visible +++ * if the type annotation table to parse contains runtime visible +++ * annotations. +++ * @return the start offset of each type annotation in the parsed table. +++ */ +++ private int[] readTypeAnnotations(final MethodVisitor mv, +++ final Context context, int u, boolean visible) { +++ char[] c = context.buffer; +++ int[] offsets = new int[readUnsignedShort(u)]; +++ u += 2; +++ for (int i = 0; i < offsets.length; ++i) { +++ offsets[i] = u; +++ int target = readInt(u); +++ switch (target >>> 24) { +++ case 0x00: // CLASS_TYPE_PARAMETER +++ case 0x01: // METHOD_TYPE_PARAMETER +++ case 0x16: // METHOD_FORMAL_PARAMETER +++ u += 2; +++ break; +++ case 0x13: // FIELD +++ case 0x14: // METHOD_RETURN +++ case 0x15: // METHOD_RECEIVER +++ u += 1; +++ break; +++ case 0x40: // LOCAL_VARIABLE +++ case 0x41: // RESOURCE_VARIABLE +++ for (int j = readUnsignedShort(u + 1); j > 0; --j) { +++ int start = readUnsignedShort(u + 3); +++ int length = readUnsignedShort(u + 5); +++ readLabel(start, context.labels); +++ readLabel(start + length, context.labels); +++ u += 6; +++ } +++ u += 3; +++ break; +++ case 0x47: // CAST +++ case 0x48: // CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT +++ case 0x49: // METHOD_INVOCATION_TYPE_ARGUMENT +++ case 0x4A: // CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT +++ case 0x4B: // METHOD_REFERENCE_TYPE_ARGUMENT +++ u += 4; +++ break; +++ // case 0x10: // CLASS_EXTENDS +++ // case 0x11: // CLASS_TYPE_PARAMETER_BOUND +++ // case 0x12: // METHOD_TYPE_PARAMETER_BOUND +++ // case 0x17: // THROWS +++ // case 0x42: // EXCEPTION_PARAMETER +++ // case 0x43: // INSTANCEOF +++ // case 0x44: // NEW +++ // case 0x45: // CONSTRUCTOR_REFERENCE +++ // case 0x46: // METHOD_REFERENCE +++ default: +++ u += 3; +++ break; +++ } +++ int pathLength = readByte(u); +++ if ((target >>> 24) == 0x42) { +++ TypePath path = pathLength == 0 ? null : new TypePath(b, u); +++ u += 1 + 2 * pathLength; +++ u = readAnnotationValues(u + 2, c, true, +++ mv.visitTryCatchAnnotation(target, path, +++ readUTF8(u, c), visible)); +++ } else { +++ u = readAnnotationValues(u + 3 + 2 * pathLength, c, true, null); +++ } +++ } +++ return offsets; +++ } +++ +++ /** +++ * Parses the header of a type annotation to extract its target_type and +++ * target_path (the result is stored in the given context), and returns the +++ * start offset of the rest of the type_annotation structure (i.e. the +++ * offset to the type_index field, which is followed by +++ * num_element_value_pairs and then the name,value pairs). +++ * +++ * @param context +++ * information about the class being parsed. This is where the +++ * extracted target_type and target_path must be stored. +++ * @param u +++ * the start offset of a type_annotation structure. +++ * @return the start offset of the rest of the type_annotation structure. +++ */ +++ private int readAnnotationTarget(final Context context, int u) { +++ int target = readInt(u); +++ switch (target >>> 24) { +++ case 0x00: // CLASS_TYPE_PARAMETER +++ case 0x01: // METHOD_TYPE_PARAMETER +++ case 0x16: // METHOD_FORMAL_PARAMETER +++ target &= 0xFFFF0000; +++ u += 2; +++ break; +++ case 0x13: // FIELD +++ case 0x14: // METHOD_RETURN +++ case 0x15: // METHOD_RECEIVER +++ target &= 0xFF000000; +++ u += 1; +++ break; +++ case 0x40: // LOCAL_VARIABLE +++ case 0x41: { // RESOURCE_VARIABLE +++ target &= 0xFF000000; +++ int n = readUnsignedShort(u + 1); +++ context.start = new Label[n]; +++ context.end = new Label[n]; +++ context.index = new int[n]; +++ u += 3; +++ for (int i = 0; i < n; ++i) { +++ int start = readUnsignedShort(u); +++ int length = readUnsignedShort(u + 2); +++ context.start[i] = readLabel(start, context.labels); +++ context.end[i] = readLabel(start + length, context.labels); +++ context.index[i] = readUnsignedShort(u + 4); +++ u += 6; +++ } +++ break; +++ } +++ case 0x47: // CAST +++ case 0x48: // CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT +++ case 0x49: // METHOD_INVOCATION_TYPE_ARGUMENT +++ case 0x4A: // CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT +++ case 0x4B: // METHOD_REFERENCE_TYPE_ARGUMENT +++ target &= 0xFF0000FF; +++ u += 4; +++ break; +++ // case 0x10: // CLASS_EXTENDS +++ // case 0x11: // CLASS_TYPE_PARAMETER_BOUND +++ // case 0x12: // METHOD_TYPE_PARAMETER_BOUND +++ // case 0x17: // THROWS +++ // case 0x42: // EXCEPTION_PARAMETER +++ // case 0x43: // INSTANCEOF +++ // case 0x44: // NEW +++ // case 0x45: // CONSTRUCTOR_REFERENCE +++ // case 0x46: // METHOD_REFERENCE +++ default: +++ target &= (target >>> 24) < 0x43 ? 0xFFFFFF00 : 0xFF000000; +++ u += 3; +++ break; +++ } +++ int pathLength = readByte(u); +++ context.typeRef = target; +++ context.typePath = pathLength == 0 ? null : new TypePath(b, u); +++ return u + 1 + 2 * pathLength; +++ } +++ +++ /** +++ * Reads parameter annotations and makes the given visitor visit them. +++ * +++ * @param mv +++ * the visitor that must visit the annotations. +++ * @param context +++ * information about the class being parsed. +++ * @param v +++ * start offset in {@link #b b} of the annotations to be read. +++ * @param visible +++ * <tt>true</tt> if the annotations to be read are visible at +++ * runtime. +++ */ +++ private void readParameterAnnotations(final MethodVisitor mv, +++ final Context context, int v, final boolean visible) { +++ int i; +++ int n = b[v++] & 0xFF; +++ // workaround for a bug in javac (javac compiler generates a parameter +++ // annotation array whose size is equal to the number of parameters in +++ // the Java source file, while it should generate an array whose size is +++ // equal to the number of parameters in the method descriptor - which +++ // includes the synthetic parameters added by the compiler). This work- +++ // around supposes that the synthetic parameters are the first ones. +++ int synthetics = Type.getArgumentTypes(context.desc).length - n; +++ AnnotationVisitor av; +++ for (i = 0; i < synthetics; ++i) { +++ // virtual annotation to detect synthetic parameters in MethodWriter +++ av = mv.visitParameterAnnotation(i, "Ljava/lang/Synthetic;", false); +++ if (av != null) { +++ av.visitEnd(); +++ } +++ } +++ char[] c = context.buffer; +++ for (; i < n + synthetics; ++i) { +++ int j = readUnsignedShort(v); +++ v += 2; +++ for (; j > 0; --j) { +++ av = mv.visitParameterAnnotation(i, readUTF8(v, c), visible); +++ v = readAnnotationValues(v + 2, c, true, av); +++ } +++ } +++ } +++ +++ /** +++ * Reads the values of an annotation and makes the given visitor visit them. +++ * +++ * @param v +++ * the start offset in {@link #b b} of the values to be read +++ * (including the unsigned short that gives the number of +++ * values). +++ * @param buf +++ * buffer to be used to call {@link #readUTF8 readUTF8}, +++ * {@link #readClass(int,char[]) readClass} or {@link #readConst +++ * readConst}. +++ * @param named +++ * if the annotation values are named or not. +++ * @param av +++ * the visitor that must visit the values. +++ * @return the end offset of the annotation values. +++ */ +++ private int readAnnotationValues(int v, final char[] buf, +++ final boolean named, final AnnotationVisitor av) { +++ int i = readUnsignedShort(v); +++ v += 2; +++ if (named) { +++ for (; i > 0; --i) { +++ v = readAnnotationValue(v + 2, buf, readUTF8(v, buf), av); +++ } +++ } else { +++ for (; i > 0; --i) { +++ v = readAnnotationValue(v, buf, null, av); +++ } +++ } +++ if (av != null) { +++ av.visitEnd(); +++ } +++ return v; +++ } +++ +++ /** +++ * Reads a value of an annotation and makes the given visitor visit it. +++ * +++ * @param v +++ * the start offset in {@link #b b} of the value to be read +++ * (<i>not including the value name constant pool index</i>). +++ * @param buf +++ * buffer to be used to call {@link #readUTF8 readUTF8}, +++ * {@link #readClass(int,char[]) readClass} or {@link #readConst +++ * readConst}. +++ * @param name +++ * the name of the value to be read. +++ * @param av +++ * the visitor that must visit the value. +++ * @return the end offset of the annotation value. +++ */ +++ private int readAnnotationValue(int v, final char[] buf, final String name, +++ final AnnotationVisitor av) { +++ int i; +++ if (av == null) { +++ switch (b[v] & 0xFF) { +++ case 'e': // enum_const_value +++ return v + 5; +++ case '@': // annotation_value +++ return readAnnotationValues(v + 3, buf, true, null); +++ case '[': // array_value +++ return readAnnotationValues(v + 1, buf, false, null); +++ default: +++ return v + 3; +++ } +++ } +++ switch (b[v++] & 0xFF) { +++ case 'I': // pointer to CONSTANT_Integer +++ case 'J': // pointer to CONSTANT_Long +++ case 'F': // pointer to CONSTANT_Float +++ case 'D': // pointer to CONSTANT_Double +++ av.visit(name, readConst(readUnsignedShort(v), buf)); +++ v += 2; +++ break; +++ case 'B': // pointer to CONSTANT_Byte +++ av.visit(name, (byte) readInt(items[readUnsignedShort(v)])); +++ v += 2; +++ break; +++ case 'Z': // pointer to CONSTANT_Boolean +++ av.visit(name, +++ readInt(items[readUnsignedShort(v)]) == 0 ? Boolean.FALSE +++ : Boolean.TRUE); +++ v += 2; +++ break; +++ case 'S': // pointer to CONSTANT_Short +++ av.visit(name, (short) readInt(items[readUnsignedShort(v)])); +++ v += 2; +++ break; +++ case 'C': // pointer to CONSTANT_Char +++ av.visit(name, (char) readInt(items[readUnsignedShort(v)])); +++ v += 2; +++ break; +++ case 's': // pointer to CONSTANT_Utf8 +++ av.visit(name, readUTF8(v, buf)); +++ v += 2; +++ break; +++ case 'e': // enum_const_value +++ av.visitEnum(name, readUTF8(v, buf), readUTF8(v + 2, buf)); +++ v += 4; +++ break; +++ case 'c': // class_info +++ av.visit(name, Type.getType(readUTF8(v, buf))); +++ v += 2; +++ break; +++ case '@': // annotation_value +++ v = readAnnotationValues(v + 2, buf, true, +++ av.visitAnnotation(name, readUTF8(v, buf))); +++ break; +++ case '[': // array_value +++ int size = readUnsignedShort(v); +++ v += 2; +++ if (size == 0) { +++ return readAnnotationValues(v - 2, buf, false, +++ av.visitArray(name)); +++ } +++ switch (this.b[v++] & 0xFF) { +++ case 'B': +++ byte[] bv = new byte[size]; +++ for (i = 0; i < size; i++) { +++ bv[i] = (byte) readInt(items[readUnsignedShort(v)]); +++ v += 3; +++ } +++ av.visit(name, bv); +++ --v; +++ break; +++ case 'Z': +++ boolean[] zv = new boolean[size]; +++ for (i = 0; i < size; i++) { +++ zv[i] = readInt(items[readUnsignedShort(v)]) != 0; +++ v += 3; +++ } +++ av.visit(name, zv); +++ --v; +++ break; +++ case 'S': +++ short[] sv = new short[size]; +++ for (i = 0; i < size; i++) { +++ sv[i] = (short) readInt(items[readUnsignedShort(v)]); +++ v += 3; +++ } +++ av.visit(name, sv); +++ --v; +++ break; +++ case 'C': +++ char[] cv = new char[size]; +++ for (i = 0; i < size; i++) { +++ cv[i] = (char) readInt(items[readUnsignedShort(v)]); +++ v += 3; +++ } +++ av.visit(name, cv); +++ --v; +++ break; +++ case 'I': +++ int[] iv = new int[size]; +++ for (i = 0; i < size; i++) { +++ iv[i] = readInt(items[readUnsignedShort(v)]); +++ v += 3; +++ } +++ av.visit(name, iv); +++ --v; +++ break; +++ case 'J': +++ long[] lv = new long[size]; +++ for (i = 0; i < size; i++) { +++ lv[i] = readLong(items[readUnsignedShort(v)]); +++ v += 3; +++ } +++ av.visit(name, lv); +++ --v; +++ break; +++ case 'F': +++ float[] fv = new float[size]; +++ for (i = 0; i < size; i++) { +++ fv[i] = Float +++ .intBitsToFloat(readInt(items[readUnsignedShort(v)])); +++ v += 3; +++ } +++ av.visit(name, fv); +++ --v; +++ break; +++ case 'D': +++ double[] dv = new double[size]; +++ for (i = 0; i < size; i++) { +++ dv[i] = Double +++ .longBitsToDouble(readLong(items[readUnsignedShort(v)])); +++ v += 3; +++ } +++ av.visit(name, dv); +++ --v; +++ break; +++ default: +++ v = readAnnotationValues(v - 3, buf, false, av.visitArray(name)); +++ } +++ } +++ return v; +++ } +++ +++ /** +++ * Computes the implicit frame of the method currently being parsed (as +++ * defined in the given {@link Context}) and stores it in the given context. +++ * +++ * @param frame +++ * information about the class being parsed. +++ */ +++ private void getImplicitFrame(final Context frame) { +++ String desc = frame.desc; +++ Object[] locals = frame.local; +++ int local = 0; +++ if ((frame.access & Opcodes.ACC_STATIC) == 0) { +++ if ("<init>".equals(frame.name)) { +++ locals[local++] = Opcodes.UNINITIALIZED_THIS; +++ } else { +++ locals[local++] = readClass(header + 2, frame.buffer); +++ } +++ } +++ int i = 1; +++ loop: while (true) { +++ int j = i; +++ switch (desc.charAt(i++)) { +++ case 'Z': +++ case 'C': +++ case 'B': +++ case 'S': +++ case 'I': +++ locals[local++] = Opcodes.INTEGER; +++ break; +++ case 'F': +++ locals[local++] = Opcodes.FLOAT; +++ break; +++ case 'J': +++ locals[local++] = Opcodes.LONG; +++ break; +++ case 'D': +++ locals[local++] = Opcodes.DOUBLE; +++ break; +++ case '[': +++ while (desc.charAt(i) == '[') { +++ ++i; +++ } +++ if (desc.charAt(i) == 'L') { +++ ++i; +++ while (desc.charAt(i) != ';') { +++ ++i; +++ } +++ } +++ locals[local++] = desc.substring(j, ++i); +++ break; +++ case 'L': +++ while (desc.charAt(i) != ';') { +++ ++i; +++ } +++ locals[local++] = desc.substring(j + 1, i++); +++ break; +++ default: +++ break loop; +++ } +++ } +++ frame.localCount = local; +++ } +++ +++ /** +++ * Reads a stack map frame and stores the result in the given +++ * {@link Context} object. +++ * +++ * @param stackMap +++ * the start offset of a stack map frame in the class file. +++ * @param zip +++ * if the stack map frame at stackMap is compressed or not. +++ * @param unzip +++ * if the stack map frame must be uncompressed. +++ * @param frame +++ * where the parsed stack map frame must be stored. +++ * @return the offset of the first byte following the parsed frame. +++ */ +++ private int readFrame(int stackMap, boolean zip, boolean unzip, +++ Context frame) { +++ char[] c = frame.buffer; +++ Label[] labels = frame.labels; +++ int tag; +++ int delta; +++ if (zip) { +++ tag = b[stackMap++] & 0xFF; +++ } else { +++ tag = MethodWriter.FULL_FRAME; +++ frame.offset = -1; +++ } +++ frame.localDiff = 0; +++ if (tag < MethodWriter.SAME_LOCALS_1_STACK_ITEM_FRAME) { +++ delta = tag; +++ frame.mode = Opcodes.F_SAME; +++ frame.stackCount = 0; +++ } else if (tag < MethodWriter.RESERVED) { +++ delta = tag - MethodWriter.SAME_LOCALS_1_STACK_ITEM_FRAME; +++ stackMap = readFrameType(frame.stack, 0, stackMap, c, labels); +++ frame.mode = Opcodes.F_SAME1; +++ frame.stackCount = 1; +++ } else { +++ delta = readUnsignedShort(stackMap); +++ stackMap += 2; +++ if (tag == MethodWriter.SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED) { +++ stackMap = readFrameType(frame.stack, 0, stackMap, c, labels); +++ frame.mode = Opcodes.F_SAME1; +++ frame.stackCount = 1; +++ } else if (tag >= MethodWriter.CHOP_FRAME +++ && tag < MethodWriter.SAME_FRAME_EXTENDED) { +++ frame.mode = Opcodes.F_CHOP; +++ frame.localDiff = MethodWriter.SAME_FRAME_EXTENDED - tag; +++ frame.localCount -= frame.localDiff; +++ frame.stackCount = 0; +++ } else if (tag == MethodWriter.SAME_FRAME_EXTENDED) { +++ frame.mode = Opcodes.F_SAME; +++ frame.stackCount = 0; +++ } else if (tag < MethodWriter.FULL_FRAME) { +++ int local = unzip ? frame.localCount : 0; +++ for (int i = tag - MethodWriter.SAME_FRAME_EXTENDED; i > 0; i--) { +++ stackMap = readFrameType(frame.local, local++, stackMap, c, +++ labels); +++ } +++ frame.mode = Opcodes.F_APPEND; +++ frame.localDiff = tag - MethodWriter.SAME_FRAME_EXTENDED; +++ frame.localCount += frame.localDiff; +++ frame.stackCount = 0; +++ } else { // if (tag == FULL_FRAME) { +++ frame.mode = Opcodes.F_FULL; +++ int n = readUnsignedShort(stackMap); +++ stackMap += 2; +++ frame.localDiff = n; +++ frame.localCount = n; +++ for (int local = 0; n > 0; n--) { +++ stackMap = readFrameType(frame.local, local++, stackMap, c, +++ labels); +++ } +++ n = readUnsignedShort(stackMap); +++ stackMap += 2; +++ frame.stackCount = n; +++ for (int stack = 0; n > 0; n--) { +++ stackMap = readFrameType(frame.stack, stack++, stackMap, c, +++ labels); +++ } +++ } +++ } +++ frame.offset += delta + 1; +++ readLabel(frame.offset, labels); +++ return stackMap; +++ } +++ +++ /** +++ * Reads a stack map frame type and stores it at the given index in the +++ * given array. +++ * +++ * @param frame +++ * the array where the parsed type must be stored. +++ * @param index +++ * the index in 'frame' where the parsed type must be stored. +++ * @param v +++ * the start offset of the stack map frame type to read. +++ * @param buf +++ * a buffer to read strings. +++ * @param labels +++ * the labels of the method currently being parsed, indexed by +++ * their offset. If the parsed type is an Uninitialized type, a +++ * new label for the corresponding NEW instruction is stored in +++ * this array if it does not already exist. +++ * @return the offset of the first byte after the parsed type. +++ */ +++ private int readFrameType(final Object[] frame, final int index, int v, +++ final char[] buf, final Label[] labels) { +++ int type = b[v++] & 0xFF; +++ switch (type) { +++ case 0: +++ frame[index] = Opcodes.TOP; +++ break; +++ case 1: +++ frame[index] = Opcodes.INTEGER; +++ break; +++ case 2: +++ frame[index] = Opcodes.FLOAT; +++ break; +++ case 3: +++ frame[index] = Opcodes.DOUBLE; +++ break; +++ case 4: +++ frame[index] = Opcodes.LONG; +++ break; +++ case 5: +++ frame[index] = Opcodes.NULL; +++ break; +++ case 6: +++ frame[index] = Opcodes.UNINITIALIZED_THIS; +++ break; +++ case 7: // Object +++ frame[index] = readClass(v, buf); +++ v += 2; +++ break; +++ default: // Uninitialized +++ frame[index] = readLabel(readUnsignedShort(v), labels); +++ v += 2; +++ } +++ return v; +++ } +++ +++ /** +++ * Returns the label corresponding to the given offset. The default +++ * implementation of this method creates a label for the given offset if it +++ * has not been already created. +++ * +++ * @param offset +++ * a bytecode offset in a method. +++ * @param labels +++ * the already created labels, indexed by their offset. If a +++ * label already exists for offset this method must not create a +++ * new one. Otherwise it must store the new label in this array. +++ * @return a non null Label, which must be equal to labels[offset]. +++ */ +++ protected Label readLabel(int offset, Label[] labels) { +++ if (labels[offset] == null) { +++ labels[offset] = new Label(); +++ } +++ return labels[offset]; +++ } +++ +++ /** +++ * Returns the start index of the attribute_info structure of this class. +++ * +++ * @return the start index of the attribute_info structure of this class. +++ */ +++ private int getAttributes() { +++ // skips the header +++ int u = header + 8 + readUnsignedShort(header + 6) * 2; +++ // skips fields and methods +++ for (int i = readUnsignedShort(u); i > 0; --i) { +++ for (int j = readUnsignedShort(u + 8); j > 0; --j) { +++ u += 6 + readInt(u + 12); +++ } +++ u += 8; +++ } +++ u += 2; +++ for (int i = readUnsignedShort(u); i > 0; --i) { +++ for (int j = readUnsignedShort(u + 8); j > 0; --j) { +++ u += 6 + readInt(u + 12); +++ } +++ u += 8; +++ } +++ // the attribute_info structure starts just after the methods +++ return u + 2; +++ } +++ +++ /** +++ * Reads an attribute in {@link #b b}. +++ * +++ * @param attrs +++ * prototypes of the attributes that must be parsed during the +++ * visit of the class. Any attribute whose type is not equal to +++ * the type of one the prototypes is ignored (i.e. an empty +++ * {@link Attribute} instance is returned). +++ * @param type +++ * the type of the attribute. +++ * @param off +++ * index of the first byte of the attribute's content in +++ * {@link #b b}. The 6 attribute header bytes, containing the +++ * type and the length of the attribute, are not taken into +++ * account here (they have already been read). +++ * @param len +++ * the length of the attribute's content. +++ * @param buf +++ * buffer to be used to call {@link #readUTF8 readUTF8}, +++ * {@link #readClass(int,char[]) readClass} or {@link #readConst +++ * readConst}. +++ * @param codeOff +++ * index of the first byte of code's attribute content in +++ * {@link #b b}, or -1 if the attribute to be read is not a code +++ * attribute. The 6 attribute header bytes, containing the type +++ * and the length of the attribute, are not taken into account +++ * here. +++ * @param labels +++ * the labels of the method's code, or <tt>null</tt> if the +++ * attribute to be read is not a code attribute. +++ * @return the attribute that has been read, or <tt>null</tt> to skip this +++ * attribute. +++ */ +++ private Attribute readAttribute(final Attribute[] attrs, final String type, +++ final int off, final int len, final char[] buf, final int codeOff, +++ final Label[] labels) { +++ for (int i = 0; i < attrs.length; ++i) { +++ if (attrs[i].type.equals(type)) { +++ return attrs[i].read(this, off, len, buf, codeOff, labels); +++ } +++ } +++ return new Attribute(type).read(this, off, len, null, -1, null); +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Utility methods: low level parsing +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the number of constant pool items in {@link #b b}. +++ * +++ * @return the number of constant pool items in {@link #b b}. +++ */ +++ public int getItemCount() { +++ return items.length; +++ } +++ +++ /** +++ * Returns the start index of the constant pool item in {@link #b b}, plus +++ * one. <i>This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters.</i> +++ * +++ * @param item +++ * the index a constant pool item. +++ * @return the start index of the constant pool item in {@link #b b}, plus +++ * one. +++ */ +++ public int getItem(final int item) { +++ return items[item]; +++ } +++ +++ /** +++ * Returns the maximum length of the strings contained in the constant pool +++ * of the class. +++ * +++ * @return the maximum length of the strings contained in the constant pool +++ * of the class. +++ */ +++ public int getMaxStringLength() { +++ return maxStringLength; +++ } +++ +++ /** +++ * Reads a byte value in {@link #b b}. <i>This method is intended for +++ * {@link Attribute} sub classes, and is normally not needed by class +++ * generators or adapters.</i> +++ * +++ * @param index +++ * the start index of the value to be read in {@link #b b}. +++ * @return the read value. +++ */ +++ public int readByte(final int index) { +++ return b[index] & 0xFF; +++ } +++ +++ /** +++ * Reads an unsigned short value in {@link #b b}. <i>This method is intended +++ * for {@link Attribute} sub classes, and is normally not needed by class +++ * generators or adapters.</i> +++ * +++ * @param index +++ * the start index of the value to be read in {@link #b b}. +++ * @return the read value. +++ */ +++ public int readUnsignedShort(final int index) { +++ byte[] b = this.b; +++ return ((b[index] & 0xFF) << 8) | (b[index + 1] & 0xFF); +++ } +++ +++ /** +++ * Reads a signed short value in {@link #b b}. <i>This method is intended +++ * for {@link Attribute} sub classes, and is normally not needed by class +++ * generators or adapters.</i> +++ * +++ * @param index +++ * the start index of the value to be read in {@link #b b}. +++ * @return the read value. +++ */ +++ public short readShort(final int index) { +++ byte[] b = this.b; +++ return (short) (((b[index] & 0xFF) << 8) | (b[index + 1] & 0xFF)); +++ } +++ +++ /** +++ * Reads a signed int value in {@link #b b}. <i>This method is intended for +++ * {@link Attribute} sub classes, and is normally not needed by class +++ * generators or adapters.</i> +++ * +++ * @param index +++ * the start index of the value to be read in {@link #b b}. +++ * @return the read value. +++ */ +++ public int readInt(final int index) { +++ byte[] b = this.b; +++ return ((b[index] & 0xFF) << 24) | ((b[index + 1] & 0xFF) << 16) +++ | ((b[index + 2] & 0xFF) << 8) | (b[index + 3] & 0xFF); +++ } +++ +++ /** +++ * Reads a signed long value in {@link #b b}. <i>This method is intended for +++ * {@link Attribute} sub classes, and is normally not needed by class +++ * generators or adapters.</i> +++ * +++ * @param index +++ * the start index of the value to be read in {@link #b b}. +++ * @return the read value. +++ */ +++ public long readLong(final int index) { +++ long l1 = readInt(index); +++ long l0 = readInt(index + 4) & 0xFFFFFFFFL; +++ return (l1 << 32) | l0; +++ } +++ +++ /** +++ * Reads an UTF8 string constant pool item in {@link #b b}. <i>This method +++ * is intended for {@link Attribute} sub classes, and is normally not needed +++ * by class generators or adapters.</i> +++ * +++ * @param index +++ * the start index of an unsigned short value in {@link #b b}, +++ * whose value is the index of an UTF8 constant pool item. +++ * @param buf +++ * buffer to be used to read the item. This buffer must be +++ * sufficiently large. It is not automatically resized. +++ * @return the String corresponding to the specified UTF8 item. +++ */ +++ public String readUTF8(int index, final char[] buf) { +++ int item = readUnsignedShort(index); +++ if (index == 0 || item == 0) { +++ return null; +++ } +++ String s = strings[item]; +++ if (s != null) { +++ return s; +++ } +++ index = items[item]; +++ return strings[item] = readUTF(index + 2, readUnsignedShort(index), buf); +++ } +++ +++ /** +++ * Reads UTF8 string in {@link #b b}. +++ * +++ * @param index +++ * start offset of the UTF8 string to be read. +++ * @param utfLen +++ * length of the UTF8 string to be read. +++ * @param buf +++ * buffer to be used to read the string. This buffer must be +++ * sufficiently large. It is not automatically resized. +++ * @return the String corresponding to the specified UTF8 string. +++ */ +++ private String readUTF(int index, final int utfLen, final char[] buf) { +++ int endIndex = index + utfLen; +++ byte[] b = this.b; +++ int strLen = 0; +++ int c; +++ int st = 0; +++ char cc = 0; +++ while (index < endIndex) { +++ c = b[index++]; +++ switch (st) { +++ case 0: +++ c = c & 0xFF; +++ if (c < 0x80) { // 0xxxxxxx +++ buf[strLen++] = (char) c; +++ } else if (c < 0xE0 && c > 0xBF) { // 110x xxxx 10xx xxxx +++ cc = (char) (c & 0x1F); +++ st = 1; +++ } else { // 1110 xxxx 10xx xxxx 10xx xxxx +++ cc = (char) (c & 0x0F); +++ st = 2; +++ } +++ break; +++ +++ case 1: // byte 2 of 2-byte char or byte 3 of 3-byte char +++ buf[strLen++] = (char) ((cc << 6) | (c & 0x3F)); +++ st = 0; +++ break; +++ +++ case 2: // byte 2 of 3-byte char +++ cc = (char) ((cc << 6) | (c & 0x3F)); +++ st = 1; +++ break; +++ } +++ } +++ return new String(buf, 0, strLen); +++ } +++ +++ /** +++ * Reads a class constant pool item in {@link #b b}. <i>This method is +++ * intended for {@link Attribute} sub classes, and is normally not needed by +++ * class generators or adapters.</i> +++ * +++ * @param index +++ * the start index of an unsigned short value in {@link #b b}, +++ * whose value is the index of a class constant pool item. +++ * @param buf +++ * buffer to be used to read the item. This buffer must be +++ * sufficiently large. It is not automatically resized. +++ * @return the String corresponding to the specified class item. +++ */ +++ public String readClass(final int index, final char[] buf) { +++ // computes the start index of the CONSTANT_Class item in b +++ // and reads the CONSTANT_Utf8 item designated by +++ // the first two bytes of this CONSTANT_Class item +++ return readUTF8(items[readUnsignedShort(index)], buf); +++ } +++ +++ /** +++ * Reads a numeric or string constant pool item in {@link #b b}. <i>This +++ * method is intended for {@link Attribute} sub classes, and is normally not +++ * needed by class generators or adapters.</i> +++ * +++ * @param item +++ * the index of a constant pool item. +++ * @param buf +++ * buffer to be used to read the item. This buffer must be +++ * sufficiently large. It is not automatically resized. +++ * @return the {@link Integer}, {@link Float}, {@link Long}, {@link Double}, +++ * {@link String}, {@link Type} or {@link Handle} corresponding to +++ * the given constant pool item. +++ */ +++ public Object readConst(final int item, final char[] buf) { +++ int index = items[item]; +++ switch (b[index - 1]) { +++ case ClassWriter.INT: +++ return readInt(index); +++ case ClassWriter.FLOAT: +++ return Float.intBitsToFloat(readInt(index)); +++ case ClassWriter.LONG: +++ return readLong(index); +++ case ClassWriter.DOUBLE: +++ return Double.longBitsToDouble(readLong(index)); +++ case ClassWriter.CLASS: +++ return Type.getObjectType(readUTF8(index, buf)); +++ case ClassWriter.STR: +++ return readUTF8(index, buf); +++ case ClassWriter.MTYPE: +++ return Type.getMethodType(readUTF8(index, buf)); +++ default: // case ClassWriter.HANDLE_BASE + [1..9]: +++ int tag = readByte(index); +++ int[] items = this.items; +++ int cpIndex = items[readUnsignedShort(index + 1)]; +++ String owner = readClass(cpIndex, buf); +++ cpIndex = items[readUnsignedShort(cpIndex + 2)]; +++ String name = readUTF8(cpIndex, buf); +++ String desc = readUTF8(cpIndex + 2, buf); +++ return new Handle(tag, owner, name, desc); +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/ClassVisitor.java b/contrib/asm/src/org/objectweb/asm/ClassVisitor.java ++new file mode 100644 ++index 0000000..107ada0 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/ClassVisitor.java ++@@ -0,0 +1,320 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A visitor to visit a Java class. The methods of this class must be called in +++ * the following order: <tt>visit</tt> [ <tt>visitSource</tt> ] [ +++ * <tt>visitOuterClass</tt> ] ( <tt>visitAnnotation</tt> | +++ * <tt>visitTypeAnnotation</tt> | <tt>visitAttribute</tt> )* ( +++ * <tt>visitInnerClass</tt> | <tt>visitField</tt> | <tt>visitMethod</tt> )* +++ * <tt>visitEnd</tt>. +++ * +++ * @author Eric Bruneton +++ */ +++public abstract class ClassVisitor { +++ +++ /** +++ * The ASM API version implemented by this visitor. The value of this field +++ * must be one of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ */ +++ protected final int api; +++ +++ /** +++ * The class visitor to which this visitor must delegate method calls. May +++ * be null. +++ */ +++ protected ClassVisitor cv; +++ +++ /** +++ * Constructs a new {@link ClassVisitor}. +++ * +++ * @param api +++ * the ASM API version implemented by this visitor. Must be one +++ * of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ */ +++ public ClassVisitor(final int api) { +++ this(api, null); +++ } +++ +++ /** +++ * Constructs a new {@link ClassVisitor}. +++ * +++ * @param api +++ * the ASM API version implemented by this visitor. Must be one +++ * of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ * @param cv +++ * the class visitor to which this visitor must delegate method +++ * calls. May be null. +++ */ +++ public ClassVisitor(final int api, final ClassVisitor cv) { +++ if (api != Opcodes.ASM4 && api != Opcodes.ASM5) { +++ throw new IllegalArgumentException(); +++ } +++ this.api = api; +++ this.cv = cv; +++ } +++ +++ /** +++ * Visits the header of the class. +++ * +++ * @param version +++ * the class version. +++ * @param access +++ * the class's access flags (see {@link Opcodes}). This parameter +++ * also indicates if the class is deprecated. +++ * @param name +++ * the internal name of the class (see +++ * {@link Type#getInternalName() getInternalName}). +++ * @param signature +++ * the signature of this class. May be <tt>null</tt> if the class +++ * is not a generic one, and does not extend or implement generic +++ * classes or interfaces. +++ * @param superName +++ * the internal of name of the super class (see +++ * {@link Type#getInternalName() getInternalName}). For +++ * interfaces, the super class is {@link Object}. May be +++ * <tt>null</tt>, but only for the {@link Object} class. +++ * @param interfaces +++ * the internal names of the class's interfaces (see +++ * {@link Type#getInternalName() getInternalName}). May be +++ * <tt>null</tt>. +++ */ +++ public void visit(int version, int access, String name, String signature, +++ String superName, String[] interfaces) { +++ if (cv != null) { +++ cv.visit(version, access, name, signature, superName, interfaces); +++ } +++ } +++ +++ /** +++ * Visits the source of the class. +++ * +++ * @param source +++ * the name of the source file from which the class was compiled. +++ * May be <tt>null</tt>. +++ * @param debug +++ * additional debug information to compute the correspondance +++ * between source and compiled elements of the class. May be +++ * <tt>null</tt>. +++ */ +++ public void visitSource(String source, String debug) { +++ if (cv != null) { +++ cv.visitSource(source, debug); +++ } +++ } +++ +++ /** +++ * Visits the enclosing class of the class. This method must be called only +++ * if the class has an enclosing class. +++ * +++ * @param owner +++ * internal name of the enclosing class of the class. +++ * @param name +++ * the name of the method that contains the class, or +++ * <tt>null</tt> if the class is not enclosed in a method of its +++ * enclosing class. +++ * @param desc +++ * the descriptor of the method that contains the class, or +++ * <tt>null</tt> if the class is not enclosed in a method of its +++ * enclosing class. +++ */ +++ public void visitOuterClass(String owner, String name, String desc) { +++ if (cv != null) { +++ cv.visitOuterClass(owner, name, desc); +++ } +++ } +++ +++ /** +++ * Visits an annotation of the class. +++ * +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * <tt>true</tt> if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or <tt>null</tt> if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitAnnotation(String desc, boolean visible) { +++ if (cv != null) { +++ return cv.visitAnnotation(desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits an annotation on a type in the class signature. +++ * +++ * @param typeRef +++ * a reference to the annotated type. The sort of this type +++ * reference must be {@link TypeReference#CLASS_TYPE_PARAMETER +++ * CLASS_TYPE_PARAMETER}, +++ * {@link TypeReference#CLASS_TYPE_PARAMETER_BOUND +++ * CLASS_TYPE_PARAMETER_BOUND} or +++ * {@link TypeReference#CLASS_EXTENDS CLASS_EXTENDS}. See +++ * {@link TypeReference}. +++ * @param typePath +++ * the path to the annotated type argument, wildcard bound, array +++ * element type, or static inner type within 'typeRef'. May be +++ * <tt>null</tt> if the annotation targets 'typeRef' as a whole. +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * <tt>true</tt> if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or <tt>null</tt> if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitTypeAnnotation(int typeRef, +++ TypePath typePath, String desc, boolean visible) { +++ if (api < Opcodes.ASM5) { +++ throw new RuntimeException(); +++ } +++ if (cv != null) { +++ return cv.visitTypeAnnotation(typeRef, typePath, desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits a non standard attribute of the class. +++ * +++ * @param attr +++ * an attribute. +++ */ +++ public void visitAttribute(Attribute attr) { +++ if (cv != null) { +++ cv.visitAttribute(attr); +++ } +++ } +++ +++ /** +++ * Visits information about an inner class. This inner class is not +++ * necessarily a member of the class being visited. +++ * +++ * @param name +++ * the internal name of an inner class (see +++ * {@link Type#getInternalName() getInternalName}). +++ * @param outerName +++ * the internal name of the class to which the inner class +++ * belongs (see {@link Type#getInternalName() getInternalName}). +++ * May be <tt>null</tt> for not member classes. +++ * @param innerName +++ * the (simple) name of the inner class inside its enclosing +++ * class. May be <tt>null</tt> for anonymous inner classes. +++ * @param access +++ * the access flags of the inner class as originally declared in +++ * the enclosing class. +++ */ +++ public void visitInnerClass(String name, String outerName, +++ String innerName, int access) { +++ if (cv != null) { +++ cv.visitInnerClass(name, outerName, innerName, access); +++ } +++ } +++ +++ /** +++ * Visits a field of the class. +++ * +++ * @param access +++ * the field's access flags (see {@link Opcodes}). This parameter +++ * also indicates if the field is synthetic and/or deprecated. +++ * @param name +++ * the field's name. +++ * @param desc +++ * the field's descriptor (see {@link Type Type}). +++ * @param signature +++ * the field's signature. May be <tt>null</tt> if the field's +++ * type does not use generic types. +++ * @param value +++ * the field's initial value. This parameter, which may be +++ * <tt>null</tt> if the field does not have an initial value, +++ * must be an {@link Integer}, a {@link Float}, a {@link Long}, a +++ * {@link Double} or a {@link String} (for <tt>int</tt>, +++ * <tt>float</tt>, <tt>long</tt> or <tt>String</tt> fields +++ * respectively). <i>This parameter is only used for static +++ * fields</i>. Its value is ignored for non static fields, which +++ * must be initialized through bytecode instructions in +++ * constructors or methods. +++ * @return a visitor to visit field annotations and attributes, or +++ * <tt>null</tt> if this class visitor is not interested in visiting +++ * these annotations and attributes. +++ */ +++ public FieldVisitor visitField(int access, String name, String desc, +++ String signature, Object value) { +++ if (cv != null) { +++ return cv.visitField(access, name, desc, signature, value); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits a method of the class. This method <i>must</i> return a new +++ * {@link MethodVisitor} instance (or <tt>null</tt>) each time it is called, +++ * i.e., it should not return a previously returned visitor. +++ * +++ * @param access +++ * the method's access flags (see {@link Opcodes}). This +++ * parameter also indicates if the method is synthetic and/or +++ * deprecated. +++ * @param name +++ * the method's name. +++ * @param desc +++ * the method's descriptor (see {@link Type Type}). +++ * @param signature +++ * the method's signature. May be <tt>null</tt> if the method +++ * parameters, return type and exceptions do not use generic +++ * types. +++ * @param exceptions +++ * the internal names of the method's exception classes (see +++ * {@link Type#getInternalName() getInternalName}). May be +++ * <tt>null</tt>. +++ * @return an object to visit the byte code of the method, or <tt>null</tt> +++ * if this class visitor is not interested in visiting the code of +++ * this method. +++ */ +++ public MethodVisitor visitMethod(int access, String name, String desc, +++ String signature, String[] exceptions) { +++ if (cv != null) { +++ return cv.visitMethod(access, name, desc, signature, exceptions); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits the end of the class. This method, which is the last one to be +++ * called, is used to inform the visitor that all the fields and methods of +++ * the class have been visited. +++ */ +++ public void visitEnd() { +++ if (cv != null) { +++ cv.visitEnd(); +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/ClassWriter.java b/contrib/asm/src/org/objectweb/asm/ClassWriter.java ++new file mode 100644 ++index 0000000..63e1d7e ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/ClassWriter.java ++@@ -0,0 +1,1776 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A {@link ClassVisitor} that generates classes in bytecode form. More +++ * precisely this visitor generates a byte array conforming to the Java class +++ * file format. It can be used alone, to generate a Java class "from scratch", +++ * or with one or more {@link ClassReader ClassReader} and adapter class visitor +++ * to generate a modified class from one or more existing Java classes. +++ * +++ * @author Eric Bruneton +++ */ +++public class ClassWriter extends ClassVisitor { +++ +++ /** +++ * Flag to automatically compute the maximum stack size and the maximum +++ * number of local variables of methods. If this flag is set, then the +++ * arguments of the {@link MethodVisitor#visitMaxs visitMaxs} method of the +++ * {@link MethodVisitor} returned by the {@link #visitMethod visitMethod} +++ * method will be ignored, and computed automatically from the signature and +++ * the bytecode of each method. +++ * +++ * @see #ClassWriter(int) +++ */ +++ public static final int COMPUTE_MAXS = 1; +++ +++ /** +++ * Flag to automatically compute the stack map frames of methods from +++ * scratch. If this flag is set, then the calls to the +++ * {@link MethodVisitor#visitFrame} method are ignored, and the stack map +++ * frames are recomputed from the methods bytecode. The arguments of the +++ * {@link MethodVisitor#visitMaxs visitMaxs} method are also ignored and +++ * recomputed from the bytecode. In other words, computeFrames implies +++ * computeMaxs. +++ * +++ * @see #ClassWriter(int) +++ */ +++ public static final int COMPUTE_FRAMES = 2; +++ +++ /** +++ * Pseudo access flag to distinguish between the synthetic attribute and the +++ * synthetic access flag. +++ */ +++ static final int ACC_SYNTHETIC_ATTRIBUTE = 0x40000; +++ +++ /** +++ * Factor to convert from ACC_SYNTHETIC_ATTRIBUTE to Opcode.ACC_SYNTHETIC. +++ */ +++ static final int TO_ACC_SYNTHETIC = ACC_SYNTHETIC_ATTRIBUTE +++ / Opcodes.ACC_SYNTHETIC; +++ +++ /** +++ * The type of instructions without any argument. +++ */ +++ static final int NOARG_INSN = 0; +++ +++ /** +++ * The type of instructions with an signed byte argument. +++ */ +++ static final int SBYTE_INSN = 1; +++ +++ /** +++ * The type of instructions with an signed short argument. +++ */ +++ static final int SHORT_INSN = 2; +++ +++ /** +++ * The type of instructions with a local variable index argument. +++ */ +++ static final int VAR_INSN = 3; +++ +++ /** +++ * The type of instructions with an implicit local variable index argument. +++ */ +++ static final int IMPLVAR_INSN = 4; +++ +++ /** +++ * The type of instructions with a type descriptor argument. +++ */ +++ static final int TYPE_INSN = 5; +++ +++ /** +++ * The type of field and method invocations instructions. +++ */ +++ static final int FIELDORMETH_INSN = 6; +++ +++ /** +++ * The type of the INVOKEINTERFACE/INVOKEDYNAMIC instruction. +++ */ +++ static final int ITFMETH_INSN = 7; +++ +++ /** +++ * The type of the INVOKEDYNAMIC instruction. +++ */ +++ static final int INDYMETH_INSN = 8; +++ +++ /** +++ * The type of instructions with a 2 bytes bytecode offset label. +++ */ +++ static final int LABEL_INSN = 9; +++ +++ /** +++ * The type of instructions with a 4 bytes bytecode offset label. +++ */ +++ static final int LABELW_INSN = 10; +++ +++ /** +++ * The type of the LDC instruction. +++ */ +++ static final int LDC_INSN = 11; +++ +++ /** +++ * The type of the LDC_W and LDC2_W instructions. +++ */ +++ static final int LDCW_INSN = 12; +++ +++ /** +++ * The type of the IINC instruction. +++ */ +++ static final int IINC_INSN = 13; +++ +++ /** +++ * The type of the TABLESWITCH instruction. +++ */ +++ static final int TABL_INSN = 14; +++ +++ /** +++ * The type of the LOOKUPSWITCH instruction. +++ */ +++ static final int LOOK_INSN = 15; +++ +++ /** +++ * The type of the MULTIANEWARRAY instruction. +++ */ +++ static final int MANA_INSN = 16; +++ +++ /** +++ * The type of the WIDE instruction. +++ */ +++ static final int WIDE_INSN = 17; +++ +++ /** +++ * The instruction types of all JVM opcodes. +++ */ +++ static final byte[] TYPE; +++ +++ /** +++ * The type of CONSTANT_Class constant pool items. +++ */ +++ static final int CLASS = 7; +++ +++ /** +++ * The type of CONSTANT_Fieldref constant pool items. +++ */ +++ static final int FIELD = 9; +++ +++ /** +++ * The type of CONSTANT_Methodref constant pool items. +++ */ +++ static final int METH = 10; +++ +++ /** +++ * The type of CONSTANT_InterfaceMethodref constant pool items. +++ */ +++ static final int IMETH = 11; +++ +++ /** +++ * The type of CONSTANT_String constant pool items. +++ */ +++ static final int STR = 8; +++ +++ /** +++ * The type of CONSTANT_Integer constant pool items. +++ */ +++ static final int INT = 3; +++ +++ /** +++ * The type of CONSTANT_Float constant pool items. +++ */ +++ static final int FLOAT = 4; +++ +++ /** +++ * The type of CONSTANT_Long constant pool items. +++ */ +++ static final int LONG = 5; +++ +++ /** +++ * The type of CONSTANT_Double constant pool items. +++ */ +++ static final int DOUBLE = 6; +++ +++ /** +++ * The type of CONSTANT_NameAndType constant pool items. +++ */ +++ static final int NAME_TYPE = 12; +++ +++ /** +++ * The type of CONSTANT_Utf8 constant pool items. +++ */ +++ static final int UTF8 = 1; +++ +++ /** +++ * The type of CONSTANT_MethodType constant pool items. +++ */ +++ static final int MTYPE = 16; +++ +++ /** +++ * The type of CONSTANT_MethodHandle constant pool items. +++ */ +++ static final int HANDLE = 15; +++ +++ /** +++ * The type of CONSTANT_InvokeDynamic constant pool items. +++ */ +++ static final int INDY = 18; +++ +++ /** +++ * The base value for all CONSTANT_MethodHandle constant pool items. +++ * Internally, ASM store the 9 variations of CONSTANT_MethodHandle into 9 +++ * different items. +++ */ +++ static final int HANDLE_BASE = 20; +++ +++ /** +++ * Normal type Item stored in the ClassWriter {@link ClassWriter#typeTable}, +++ * instead of the constant pool, in order to avoid clashes with normal +++ * constant pool items in the ClassWriter constant pool's hash table. +++ */ +++ static final int TYPE_NORMAL = 30; +++ +++ /** +++ * Uninitialized type Item stored in the ClassWriter +++ * {@link ClassWriter#typeTable}, instead of the constant pool, in order to +++ * avoid clashes with normal constant pool items in the ClassWriter constant +++ * pool's hash table. +++ */ +++ static final int TYPE_UNINIT = 31; +++ +++ /** +++ * Merged type Item stored in the ClassWriter {@link ClassWriter#typeTable}, +++ * instead of the constant pool, in order to avoid clashes with normal +++ * constant pool items in the ClassWriter constant pool's hash table. +++ */ +++ static final int TYPE_MERGED = 32; +++ +++ /** +++ * The type of BootstrapMethods items. These items are stored in a special +++ * class attribute named BootstrapMethods and not in the constant pool. +++ */ +++ static final int BSM = 33; +++ +++ /** +++ * The class reader from which this class writer was constructed, if any. +++ */ +++ ClassReader cr; +++ +++ /** +++ * Minor and major version numbers of the class to be generated. +++ */ +++ int version; +++ +++ /** +++ * Index of the next item to be added in the constant pool. +++ */ +++ int index; +++ +++ /** +++ * The constant pool of this class. +++ */ +++ final ByteVector pool; +++ +++ /** +++ * The constant pool's hash table data. +++ */ +++ Item[] items; +++ +++ /** +++ * The threshold of the constant pool's hash table. +++ */ +++ int threshold; +++ +++ /** +++ * A reusable key used to look for items in the {@link #items} hash table. +++ */ +++ final Item key; +++ +++ /** +++ * A reusable key used to look for items in the {@link #items} hash table. +++ */ +++ final Item key2; +++ +++ /** +++ * A reusable key used to look for items in the {@link #items} hash table. +++ */ +++ final Item key3; +++ +++ /** +++ * A reusable key used to look for items in the {@link #items} hash table. +++ */ +++ final Item key4; +++ +++ /** +++ * A type table used to temporarily store internal names that will not +++ * necessarily be stored in the constant pool. This type table is used by +++ * the control flow and data flow analysis algorithm used to compute stack +++ * map frames from scratch. This array associates to each index <tt>i</tt> +++ * the Item whose index is <tt>i</tt>. All Item objects stored in this array +++ * are also stored in the {@link #items} hash table. These two arrays allow +++ * to retrieve an Item from its index or, conversely, to get the index of an +++ * Item from its value. Each Item stores an internal name in its +++ * {@link Item#strVal1} field. +++ */ +++ Item[] typeTable; +++ +++ /** +++ * Number of elements in the {@link #typeTable} array. +++ */ +++ private short typeCount; +++ +++ /** +++ * The access flags of this class. +++ */ +++ private int access; +++ +++ /** +++ * The constant pool item that contains the internal name of this class. +++ */ +++ private int name; +++ +++ /** +++ * The internal name of this class. +++ */ +++ String thisName; +++ +++ /** +++ * The constant pool item that contains the signature of this class. +++ */ +++ private int signature; +++ +++ /** +++ * The constant pool item that contains the internal name of the super class +++ * of this class. +++ */ +++ private int superName; +++ +++ /** +++ * Number of interfaces implemented or extended by this class or interface. +++ */ +++ private int interfaceCount; +++ +++ /** +++ * The interfaces implemented or extended by this class or interface. More +++ * precisely, this array contains the indexes of the constant pool items +++ * that contain the internal names of these interfaces. +++ */ +++ private int[] interfaces; +++ +++ /** +++ * The index of the constant pool item that contains the name of the source +++ * file from which this class was compiled. +++ */ +++ private int sourceFile; +++ +++ /** +++ * The SourceDebug attribute of this class. +++ */ +++ private ByteVector sourceDebug; +++ +++ /** +++ * The constant pool item that contains the name of the enclosing class of +++ * this class. +++ */ +++ private int enclosingMethodOwner; +++ +++ /** +++ * The constant pool item that contains the name and descriptor of the +++ * enclosing method of this class. +++ */ +++ private int enclosingMethod; +++ +++ /** +++ * The runtime visible annotations of this class. +++ */ +++ private AnnotationWriter anns; +++ +++ /** +++ * The runtime invisible annotations of this class. +++ */ +++ private AnnotationWriter ianns; +++ +++ /** +++ * The runtime visible type annotations of this class. +++ */ +++ private AnnotationWriter tanns; +++ +++ /** +++ * The runtime invisible type annotations of this class. +++ */ +++ private AnnotationWriter itanns; +++ +++ /** +++ * The non standard attributes of this class. +++ */ +++ private Attribute attrs; +++ +++ /** +++ * The number of entries in the InnerClasses attribute. +++ */ +++ private int innerClassesCount; +++ +++ /** +++ * The InnerClasses attribute. +++ */ +++ private ByteVector innerClasses; +++ +++ /** +++ * The number of entries in the BootstrapMethods attribute. +++ */ +++ int bootstrapMethodsCount; +++ +++ /** +++ * The BootstrapMethods attribute. +++ */ +++ ByteVector bootstrapMethods; +++ +++ /** +++ * The fields of this class. These fields are stored in a linked list of +++ * {@link FieldWriter} objects, linked to each other by their +++ * {@link FieldWriter#fv} field. This field stores the first element of this +++ * list. +++ */ +++ FieldWriter firstField; +++ +++ /** +++ * The fields of this class. These fields are stored in a linked list of +++ * {@link FieldWriter} objects, linked to each other by their +++ * {@link FieldWriter#fv} field. This field stores the last element of this +++ * list. +++ */ +++ FieldWriter lastField; +++ +++ /** +++ * The methods of this class. These methods are stored in a linked list of +++ * {@link MethodWriter} objects, linked to each other by their +++ * {@link MethodWriter#mv} field. This field stores the first element of +++ * this list. +++ */ +++ MethodWriter firstMethod; +++ +++ /** +++ * The methods of this class. These methods are stored in a linked list of +++ * {@link MethodWriter} objects, linked to each other by their +++ * {@link MethodWriter#mv} field. This field stores the last element of this +++ * list. +++ */ +++ MethodWriter lastMethod; +++ +++ /** +++ * <tt>true</tt> if the maximum stack size and number of local variables +++ * must be automatically computed. +++ */ +++ private boolean computeMaxs; +++ +++ /** +++ * <tt>true</tt> if the stack map frames must be recomputed from scratch. +++ */ +++ private boolean computeFrames; +++ +++ /** +++ * <tt>true</tt> if the stack map tables of this class are invalid. The +++ * {@link MethodWriter#resizeInstructions} method cannot transform existing +++ * stack map tables, and so produces potentially invalid classes when it is +++ * executed. In this case the class is reread and rewritten with the +++ * {@link #COMPUTE_FRAMES} option (the resizeInstructions method can resize +++ * stack map tables when this option is used). +++ */ +++ boolean invalidFrames; +++ +++ // ------------------------------------------------------------------------ +++ // Static initializer +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Computes the instruction types of JVM opcodes. +++ */ +++ static { +++ int i; +++ byte[] b = new byte[220]; +++ String s = "AAAAAAAAAAAAAAAABCLMMDDDDDEEEEEEEEEEEEEEEEEEEEAAAAAAAADD" +++ + "DDDEEEEEEEEEEEEEEEEEEEEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" +++ + "AAAAAAAAAAAAAAAAANAAAAAAAAAAAAAAAAAAAAJJJJJJJJJJJJJJJJDOPAA" +++ + "AAAAGGGGGGGHIFBFAAFFAARQJJKKJJJJJJJJJJJJJJJJJJ"; +++ for (i = 0; i < b.length; ++i) { +++ b[i] = (byte) (s.charAt(i) - 'A'); +++ } +++ TYPE = b; +++ +++ // code to generate the above string +++ // +++ // // SBYTE_INSN instructions +++ // b[Constants.NEWARRAY] = SBYTE_INSN; +++ // b[Constants.BIPUSH] = SBYTE_INSN; +++ // +++ // // SHORT_INSN instructions +++ // b[Constants.SIPUSH] = SHORT_INSN; +++ // +++ // // (IMPL)VAR_INSN instructions +++ // b[Constants.RET] = VAR_INSN; +++ // for (i = Constants.ILOAD; i <= Constants.ALOAD; ++i) { +++ // b[i] = VAR_INSN; +++ // } +++ // for (i = Constants.ISTORE; i <= Constants.ASTORE; ++i) { +++ // b[i] = VAR_INSN; +++ // } +++ // for (i = 26; i <= 45; ++i) { // ILOAD_0 to ALOAD_3 +++ // b[i] = IMPLVAR_INSN; +++ // } +++ // for (i = 59; i <= 78; ++i) { // ISTORE_0 to ASTORE_3 +++ // b[i] = IMPLVAR_INSN; +++ // } +++ // +++ // // TYPE_INSN instructions +++ // b[Constants.NEW] = TYPE_INSN; +++ // b[Constants.ANEWARRAY] = TYPE_INSN; +++ // b[Constants.CHECKCAST] = TYPE_INSN; +++ // b[Constants.INSTANCEOF] = TYPE_INSN; +++ // +++ // // (Set)FIELDORMETH_INSN instructions +++ // for (i = Constants.GETSTATIC; i <= Constants.INVOKESTATIC; ++i) { +++ // b[i] = FIELDORMETH_INSN; +++ // } +++ // b[Constants.INVOKEINTERFACE] = ITFMETH_INSN; +++ // b[Constants.INVOKEDYNAMIC] = INDYMETH_INSN; +++ // +++ // // LABEL(W)_INSN instructions +++ // for (i = Constants.IFEQ; i <= Constants.JSR; ++i) { +++ // b[i] = LABEL_INSN; +++ // } +++ // b[Constants.IFNULL] = LABEL_INSN; +++ // b[Constants.IFNONNULL] = LABEL_INSN; +++ // b[200] = LABELW_INSN; // GOTO_W +++ // b[201] = LABELW_INSN; // JSR_W +++ // // temporary opcodes used internally by ASM - see Label and +++ // MethodWriter +++ // for (i = 202; i < 220; ++i) { +++ // b[i] = LABEL_INSN; +++ // } +++ // +++ // // LDC(_W) instructions +++ // b[Constants.LDC] = LDC_INSN; +++ // b[19] = LDCW_INSN; // LDC_W +++ // b[20] = LDCW_INSN; // LDC2_W +++ // +++ // // special instructions +++ // b[Constants.IINC] = IINC_INSN; +++ // b[Constants.TABLESWITCH] = TABL_INSN; +++ // b[Constants.LOOKUPSWITCH] = LOOK_INSN; +++ // b[Constants.MULTIANEWARRAY] = MANA_INSN; +++ // b[196] = WIDE_INSN; // WIDE +++ // +++ // for (i = 0; i < b.length; ++i) { +++ // System.err.print((char)('A' + b[i])); +++ // } +++ // System.err.println(); +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Constructor +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Constructs a new {@link ClassWriter} object. +++ * +++ * @param flags +++ * option flags that can be used to modify the default behavior +++ * of this class. See {@link #COMPUTE_MAXS}, +++ * {@link #COMPUTE_FRAMES}. +++ */ +++ public ClassWriter(final int flags) { +++ super(Opcodes.ASM5); +++ index = 1; +++ pool = new ByteVector(); +++ items = new Item[256]; +++ threshold = (int) (0.75d * items.length); +++ key = new Item(); +++ key2 = new Item(); +++ key3 = new Item(); +++ key4 = new Item(); +++ this.computeMaxs = (flags & COMPUTE_MAXS) != 0; +++ this.computeFrames = (flags & COMPUTE_FRAMES) != 0; +++ } +++ +++ /** +++ * Constructs a new {@link ClassWriter} object and enables optimizations for +++ * "mostly add" bytecode transformations. These optimizations are the +++ * following: +++ * +++ * <ul> +++ * <li>The constant pool from the original class is copied as is in the new +++ * class, which saves time. New constant pool entries will be added at the +++ * end if necessary, but unused constant pool entries <i>won't be +++ * removed</i>.</li> +++ * <li>Methods that are not transformed are copied as is in the new class, +++ * directly from the original class bytecode (i.e. without emitting visit +++ * events for all the method instructions), which saves a <i>lot</i> of +++ * time. Untransformed methods are detected by the fact that the +++ * {@link ClassReader} receives {@link MethodVisitor} objects that come from +++ * a {@link ClassWriter} (and not from any other {@link ClassVisitor} +++ * instance).</li> +++ * </ul> +++ * +++ * @param classReader +++ * the {@link ClassReader} used to read the original class. It +++ * will be used to copy the entire constant pool from the +++ * original class and also to copy other fragments of original +++ * bytecode where applicable. +++ * @param flags +++ * option flags that can be used to modify the default behavior +++ * of this class. <i>These option flags do not affect methods +++ * that are copied as is in the new class. This means that the +++ * maximum stack size nor the stack frames will be computed for +++ * these methods</i>. See {@link #COMPUTE_MAXS}, +++ * {@link #COMPUTE_FRAMES}. +++ */ +++ public ClassWriter(final ClassReader classReader, final int flags) { +++ this(flags); +++ classReader.copyPool(this); +++ this.cr = classReader; +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Implementation of the ClassVisitor abstract class +++ // ------------------------------------------------------------------------ +++ +++ @Override +++ public final void visit(final int version, final int access, +++ final String name, final String signature, final String superName, +++ final String[] interfaces) { +++ this.version = version; +++ this.access = access; +++ this.name = newClass(name); +++ thisName = name; +++ if (ClassReader.SIGNATURES && signature != null) { +++ this.signature = newUTF8(signature); +++ } +++ this.superName = superName == null ? 0 : newClass(superName); +++ if (interfaces != null && interfaces.length > 0) { +++ interfaceCount = interfaces.length; +++ this.interfaces = new int[interfaceCount]; +++ for (int i = 0; i < interfaceCount; ++i) { +++ this.interfaces[i] = newClass(interfaces[i]); +++ } +++ } +++ } +++ +++ @Override +++ public final void visitSource(final String file, final String debug) { +++ if (file != null) { +++ sourceFile = newUTF8(file); +++ } +++ if (debug != null) { +++ sourceDebug = new ByteVector().encodeUTF8(debug, 0, +++ Integer.MAX_VALUE); +++ } +++ } +++ +++ @Override +++ public final void visitOuterClass(final String owner, final String name, +++ final String desc) { +++ enclosingMethodOwner = newClass(owner); +++ if (name != null && desc != null) { +++ enclosingMethod = newNameType(name, desc); +++ } +++ } +++ +++ @Override +++ public final AnnotationVisitor visitAnnotation(final String desc, +++ final boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write type, and reserve space for values count +++ bv.putShort(newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(this, true, bv, bv, 2); +++ if (visible) { +++ aw.next = anns; +++ anns = aw; +++ } else { +++ aw.next = ianns; +++ ianns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public final AnnotationVisitor visitTypeAnnotation(int typeRef, +++ TypePath typePath, final String desc, final boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write target_type and target_info +++ AnnotationWriter.putTarget(typeRef, typePath, bv); +++ // write type, and reserve space for values count +++ bv.putShort(newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(this, true, bv, bv, +++ bv.length - 2); +++ if (visible) { +++ aw.next = tanns; +++ tanns = aw; +++ } else { +++ aw.next = itanns; +++ itanns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public final void visitAttribute(final Attribute attr) { +++ attr.next = attrs; +++ attrs = attr; +++ } +++ +++ @Override +++ public final void visitInnerClass(final String name, +++ final String outerName, final String innerName, final int access) { +++ if (innerClasses == null) { +++ innerClasses = new ByteVector(); +++ } +++ // Sec. 4.7.6 of the JVMS states "Every CONSTANT_Class_info entry in the +++ // constant_pool table which represents a class or interface C that is +++ // not a package member must have exactly one corresponding entry in the +++ // classes array". To avoid duplicates we keep track in the intVal field +++ // of the Item of each CONSTANT_Class_info entry C whether an inner +++ // class entry has already been added for C (this field is unused for +++ // class entries, and changing its value does not change the hashcode +++ // and equality tests). If so we store the index of this inner class +++ // entry (plus one) in intVal. This hack allows duplicate detection in +++ // O(1) time. +++ Item nameItem = newClassItem(name); +++ if (nameItem.intVal == 0) { +++ ++innerClassesCount; +++ innerClasses.putShort(nameItem.index); +++ innerClasses.putShort(outerName == null ? 0 : newClass(outerName)); +++ innerClasses.putShort(innerName == null ? 0 : newUTF8(innerName)); +++ innerClasses.putShort(access); +++ nameItem.intVal = innerClassesCount; +++ } else { +++ // Compare the inner classes entry nameItem.intVal - 1 with the +++ // arguments of this method and throw an exception if there is a +++ // difference? +++ } +++ } +++ +++ @Override +++ public final FieldVisitor visitField(final int access, final String name, +++ final String desc, final String signature, final Object value) { +++ return new FieldWriter(this, access, name, desc, signature, value); +++ } +++ +++ @Override +++ public final MethodVisitor visitMethod(final int access, final String name, +++ final String desc, final String signature, final String[] exceptions) { +++ return new MethodWriter(this, access, name, desc, signature, +++ exceptions, computeMaxs, computeFrames); +++ } +++ +++ @Override +++ public final void visitEnd() { +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Other public methods +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the bytecode of the class that was build with this class writer. +++ * +++ * @return the bytecode of the class that was build with this class writer. +++ */ +++ public byte[] toByteArray() { +++ if (index > 0xFFFF) { +++ throw new RuntimeException("Class file too large!"); +++ } +++ // computes the real size of the bytecode of this class +++ int size = 24 + 2 * interfaceCount; +++ int nbFields = 0; +++ FieldWriter fb = firstField; +++ while (fb != null) { +++ ++nbFields; +++ size += fb.getSize(); +++ fb = (FieldWriter) fb.fv; +++ } +++ int nbMethods = 0; +++ MethodWriter mb = firstMethod; +++ while (mb != null) { +++ ++nbMethods; +++ size += mb.getSize(); +++ mb = (MethodWriter) mb.mv; +++ } +++ int attributeCount = 0; +++ if (bootstrapMethods != null) { +++ // we put it as first attribute in order to improve a bit +++ // ClassReader.copyBootstrapMethods +++ ++attributeCount; +++ size += 8 + bootstrapMethods.length; +++ newUTF8("BootstrapMethods"); +++ } +++ if (ClassReader.SIGNATURES && signature != 0) { +++ ++attributeCount; +++ size += 8; +++ newUTF8("Signature"); +++ } +++ if (sourceFile != 0) { +++ ++attributeCount; +++ size += 8; +++ newUTF8("SourceFile"); +++ } +++ if (sourceDebug != null) { +++ ++attributeCount; +++ size += sourceDebug.length + 6; +++ newUTF8("SourceDebugExtension"); +++ } +++ if (enclosingMethodOwner != 0) { +++ ++attributeCount; +++ size += 10; +++ newUTF8("EnclosingMethod"); +++ } +++ if ((access & Opcodes.ACC_DEPRECATED) != 0) { +++ ++attributeCount; +++ size += 6; +++ newUTF8("Deprecated"); +++ } +++ if ((access & Opcodes.ACC_SYNTHETIC) != 0) { +++ if ((version & 0xFFFF) < Opcodes.V1_5 +++ || (access & ACC_SYNTHETIC_ATTRIBUTE) != 0) { +++ ++attributeCount; +++ size += 6; +++ newUTF8("Synthetic"); +++ } +++ } +++ if (innerClasses != null) { +++ ++attributeCount; +++ size += 8 + innerClasses.length; +++ newUTF8("InnerClasses"); +++ } +++ if (ClassReader.ANNOTATIONS && anns != null) { +++ ++attributeCount; +++ size += 8 + anns.getSize(); +++ newUTF8("RuntimeVisibleAnnotations"); +++ } +++ if (ClassReader.ANNOTATIONS && ianns != null) { +++ ++attributeCount; +++ size += 8 + ianns.getSize(); +++ newUTF8("RuntimeInvisibleAnnotations"); +++ } +++ if (ClassReader.ANNOTATIONS && tanns != null) { +++ ++attributeCount; +++ size += 8 + tanns.getSize(); +++ newUTF8("RuntimeVisibleTypeAnnotations"); +++ } +++ if (ClassReader.ANNOTATIONS && itanns != null) { +++ ++attributeCount; +++ size += 8 + itanns.getSize(); +++ newUTF8("RuntimeInvisibleTypeAnnotations"); +++ } +++ if (attrs != null) { +++ attributeCount += attrs.getCount(); +++ size += attrs.getSize(this, null, 0, -1, -1); +++ } +++ size += pool.length; +++ // allocates a byte vector of this size, in order to avoid unnecessary +++ // arraycopy operations in the ByteVector.enlarge() method +++ ByteVector out = new ByteVector(size); +++ out.putInt(0xCAFEBABE).putInt(version); +++ out.putShort(index).putByteArray(pool.data, 0, pool.length); +++ int mask = Opcodes.ACC_DEPRECATED | ACC_SYNTHETIC_ATTRIBUTE +++ | ((access & ACC_SYNTHETIC_ATTRIBUTE) / TO_ACC_SYNTHETIC); +++ out.putShort(access & ~mask).putShort(name).putShort(superName); +++ out.putShort(interfaceCount); +++ for (int i = 0; i < interfaceCount; ++i) { +++ out.putShort(interfaces[i]); +++ } +++ out.putShort(nbFields); +++ fb = firstField; +++ while (fb != null) { +++ fb.put(out); +++ fb = (FieldWriter) fb.fv; +++ } +++ out.putShort(nbMethods); +++ mb = firstMethod; +++ while (mb != null) { +++ mb.put(out); +++ mb = (MethodWriter) mb.mv; +++ } +++ out.putShort(attributeCount); +++ if (bootstrapMethods != null) { +++ out.putShort(newUTF8("BootstrapMethods")); +++ out.putInt(bootstrapMethods.length + 2).putShort( +++ bootstrapMethodsCount); +++ out.putByteArray(bootstrapMethods.data, 0, bootstrapMethods.length); +++ } +++ if (ClassReader.SIGNATURES && signature != 0) { +++ out.putShort(newUTF8("Signature")).putInt(2).putShort(signature); +++ } +++ if (sourceFile != 0) { +++ out.putShort(newUTF8("SourceFile")).putInt(2).putShort(sourceFile); +++ } +++ if (sourceDebug != null) { +++ int len = sourceDebug.length; +++ out.putShort(newUTF8("SourceDebugExtension")).putInt(len); +++ out.putByteArray(sourceDebug.data, 0, len); +++ } +++ if (enclosingMethodOwner != 0) { +++ out.putShort(newUTF8("EnclosingMethod")).putInt(4); +++ out.putShort(enclosingMethodOwner).putShort(enclosingMethod); +++ } +++ if ((access & Opcodes.ACC_DEPRECATED) != 0) { +++ out.putShort(newUTF8("Deprecated")).putInt(0); +++ } +++ if ((access & Opcodes.ACC_SYNTHETIC) != 0) { +++ if ((version & 0xFFFF) < Opcodes.V1_5 +++ || (access & ACC_SYNTHETIC_ATTRIBUTE) != 0) { +++ out.putShort(newUTF8("Synthetic")).putInt(0); +++ } +++ } +++ if (innerClasses != null) { +++ out.putShort(newUTF8("InnerClasses")); +++ out.putInt(innerClasses.length + 2).putShort(innerClassesCount); +++ out.putByteArray(innerClasses.data, 0, innerClasses.length); +++ } +++ if (ClassReader.ANNOTATIONS && anns != null) { +++ out.putShort(newUTF8("RuntimeVisibleAnnotations")); +++ anns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && ianns != null) { +++ out.putShort(newUTF8("RuntimeInvisibleAnnotations")); +++ ianns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && tanns != null) { +++ out.putShort(newUTF8("RuntimeVisibleTypeAnnotations")); +++ tanns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && itanns != null) { +++ out.putShort(newUTF8("RuntimeInvisibleTypeAnnotations")); +++ itanns.put(out); +++ } +++ if (attrs != null) { +++ attrs.put(this, null, 0, -1, -1, out); +++ } +++ if (invalidFrames) { +++ anns = null; +++ ianns = null; +++ attrs = null; +++ innerClassesCount = 0; +++ innerClasses = null; +++ bootstrapMethodsCount = 0; +++ bootstrapMethods = null; +++ firstField = null; +++ lastField = null; +++ firstMethod = null; +++ lastMethod = null; +++ computeMaxs = false; +++ computeFrames = true; +++ invalidFrames = false; +++ new ClassReader(out.data).accept(this, ClassReader.SKIP_FRAMES); +++ return toByteArray(); +++ } +++ return out.data; +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Utility methods: constant pool management +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Adds a number or string constant to the constant pool of the class being +++ * build. Does nothing if the constant pool already contains a similar item. +++ * +++ * @param cst +++ * the value of the constant to be added to the constant pool. +++ * This parameter must be an {@link Integer}, a {@link Float}, a +++ * {@link Long}, a {@link Double}, a {@link String} or a +++ * {@link Type}. +++ * @return a new or already existing constant item with the given value. +++ */ +++ Item newConstItem(final Object cst) { +++ if (cst instanceof Integer) { +++ int val = ((Integer) cst).intValue(); +++ return newInteger(val); +++ } else if (cst instanceof Byte) { +++ int val = ((Byte) cst).intValue(); +++ return newInteger(val); +++ } else if (cst instanceof Character) { +++ int val = ((Character) cst).charValue(); +++ return newInteger(val); +++ } else if (cst instanceof Short) { +++ int val = ((Short) cst).intValue(); +++ return newInteger(val); +++ } else if (cst instanceof Boolean) { +++ int val = ((Boolean) cst).booleanValue() ? 1 : 0; +++ return newInteger(val); +++ } else if (cst instanceof Float) { +++ float val = ((Float) cst).floatValue(); +++ return newFloat(val); +++ } else if (cst instanceof Long) { +++ long val = ((Long) cst).longValue(); +++ return newLong(val); +++ } else if (cst instanceof Double) { +++ double val = ((Double) cst).doubleValue(); +++ return newDouble(val); +++ } else if (cst instanceof String) { +++ return newString((String) cst); +++ } else if (cst instanceof Type) { +++ Type t = (Type) cst; +++ int s = t.getSort(); +++ if (s == Type.OBJECT) { +++ return newClassItem(t.getInternalName()); +++ } else if (s == Type.METHOD) { +++ return newMethodTypeItem(t.getDescriptor()); +++ } else { // s == primitive type or array +++ return newClassItem(t.getDescriptor()); +++ } +++ } else if (cst instanceof Handle) { +++ Handle h = (Handle) cst; +++ return newHandleItem(h.tag, h.owner, h.name, h.desc); +++ } else { +++ throw new IllegalArgumentException("value " + cst); +++ } +++ } +++ +++ /** +++ * Adds a number or string constant to the constant pool of the class being +++ * build. Does nothing if the constant pool already contains a similar item. +++ * <i>This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters.</i> +++ * +++ * @param cst +++ * the value of the constant to be added to the constant pool. +++ * This parameter must be an {@link Integer}, a {@link Float}, a +++ * {@link Long}, a {@link Double} or a {@link String}. +++ * @return the index of a new or already existing constant item with the +++ * given value. +++ */ +++ public int newConst(final Object cst) { +++ return newConstItem(cst).index; +++ } +++ +++ /** +++ * Adds an UTF8 string to the constant pool of the class being build. Does +++ * nothing if the constant pool already contains a similar item. <i>This +++ * method is intended for {@link Attribute} sub classes, and is normally not +++ * needed by class generators or adapters.</i> +++ * +++ * @param value +++ * the String value. +++ * @return the index of a new or already existing UTF8 item. +++ */ +++ public int newUTF8(final String value) { +++ key.set(UTF8, value, null, null); +++ Item result = get(key); +++ if (result == null) { +++ pool.putByte(UTF8).putUTF8(value); +++ result = new Item(index++, key); +++ put(result); +++ } +++ return result.index; +++ } +++ +++ /** +++ * Adds a class reference to the constant pool of the class being build. +++ * Does nothing if the constant pool already contains a similar item. +++ * <i>This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters.</i> +++ * +++ * @param value +++ * the internal name of the class. +++ * @return a new or already existing class reference item. +++ */ +++ Item newClassItem(final String value) { +++ key2.set(CLASS, value, null, null); +++ Item result = get(key2); +++ if (result == null) { +++ pool.put12(CLASS, newUTF8(value)); +++ result = new Item(index++, key2); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a class reference to the constant pool of the class being build. +++ * Does nothing if the constant pool already contains a similar item. +++ * <i>This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters.</i> +++ * +++ * @param value +++ * the internal name of the class. +++ * @return the index of a new or already existing class reference item. +++ */ +++ public int newClass(final String value) { +++ return newClassItem(value).index; +++ } +++ +++ /** +++ * Adds a method type reference to the constant pool of the class being +++ * build. Does nothing if the constant pool already contains a similar item. +++ * <i>This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters.</i> +++ * +++ * @param methodDesc +++ * method descriptor of the method type. +++ * @return a new or already existing method type reference item. +++ */ +++ Item newMethodTypeItem(final String methodDesc) { +++ key2.set(MTYPE, methodDesc, null, null); +++ Item result = get(key2); +++ if (result == null) { +++ pool.put12(MTYPE, newUTF8(methodDesc)); +++ result = new Item(index++, key2); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a method type reference to the constant pool of the class being +++ * build. Does nothing if the constant pool already contains a similar item. +++ * <i>This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters.</i> +++ * +++ * @param methodDesc +++ * method descriptor of the method type. +++ * @return the index of a new or already existing method type reference +++ * item. +++ */ +++ public int newMethodType(final String methodDesc) { +++ return newMethodTypeItem(methodDesc).index; +++ } +++ +++ /** +++ * Adds a handle to the constant pool of the class being build. Does nothing +++ * if the constant pool already contains a similar item. <i>This method is +++ * intended for {@link Attribute} sub classes, and is normally not needed by +++ * class generators or adapters.</i> +++ * +++ * @param tag +++ * the kind of this handle. Must be {@link Opcodes#H_GETFIELD}, +++ * {@link Opcodes#H_GETSTATIC}, {@link Opcodes#H_PUTFIELD}, +++ * {@link Opcodes#H_PUTSTATIC}, {@link Opcodes#H_INVOKEVIRTUAL}, +++ * {@link Opcodes#H_INVOKESTATIC}, +++ * {@link Opcodes#H_INVOKESPECIAL}, +++ * {@link Opcodes#H_NEWINVOKESPECIAL} or +++ * {@link Opcodes#H_INVOKEINTERFACE}. +++ * @param owner +++ * the internal name of the field or method owner class. +++ * @param name +++ * the name of the field or method. +++ * @param desc +++ * the descriptor of the field or method. +++ * @return a new or an already existing method type reference item. +++ */ +++ Item newHandleItem(final int tag, final String owner, final String name, +++ final String desc) { +++ key4.set(HANDLE_BASE + tag, owner, name, desc); +++ Item result = get(key4); +++ if (result == null) { +++ if (tag <= Opcodes.H_PUTSTATIC) { +++ put112(HANDLE, tag, newField(owner, name, desc)); +++ } else { +++ put112(HANDLE, +++ tag, +++ newMethod(owner, name, desc, +++ tag == Opcodes.H_INVOKEINTERFACE)); +++ } +++ result = new Item(index++, key4); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a handle to the constant pool of the class being build. Does nothing +++ * if the constant pool already contains a similar item. <i>This method is +++ * intended for {@link Attribute} sub classes, and is normally not needed by +++ * class generators or adapters.</i> +++ * +++ * @param tag +++ * the kind of this handle. Must be {@link Opcodes#H_GETFIELD}, +++ * {@link Opcodes#H_GETSTATIC}, {@link Opcodes#H_PUTFIELD}, +++ * {@link Opcodes#H_PUTSTATIC}, {@link Opcodes#H_INVOKEVIRTUAL}, +++ * {@link Opcodes#H_INVOKESTATIC}, +++ * {@link Opcodes#H_INVOKESPECIAL}, +++ * {@link Opcodes#H_NEWINVOKESPECIAL} or +++ * {@link Opcodes#H_INVOKEINTERFACE}. +++ * @param owner +++ * the internal name of the field or method owner class. +++ * @param name +++ * the name of the field or method. +++ * @param desc +++ * the descriptor of the field or method. +++ * @return the index of a new or already existing method type reference +++ * item. +++ */ +++ public int newHandle(final int tag, final String owner, final String name, +++ final String desc) { +++ return newHandleItem(tag, owner, name, desc).index; +++ } +++ +++ /** +++ * Adds an invokedynamic reference to the constant pool of the class being +++ * build. Does nothing if the constant pool already contains a similar item. +++ * <i>This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters.</i> +++ * +++ * @param name +++ * name of the invoked method. +++ * @param desc +++ * descriptor of the invoke method. +++ * @param bsm +++ * the bootstrap method. +++ * @param bsmArgs +++ * the bootstrap method constant arguments. +++ * +++ * @return a new or an already existing invokedynamic type reference item. +++ */ +++ Item newInvokeDynamicItem(final String name, final String desc, +++ final Handle bsm, final Object... bsmArgs) { +++ // cache for performance +++ ByteVector bootstrapMethods = this.bootstrapMethods; +++ if (bootstrapMethods == null) { +++ bootstrapMethods = this.bootstrapMethods = new ByteVector(); +++ } +++ +++ int position = bootstrapMethods.length; // record current position +++ +++ int hashCode = bsm.hashCode(); +++ bootstrapMethods.putShort(newHandle(bsm.tag, bsm.owner, bsm.name, +++ bsm.desc)); +++ +++ int argsLength = bsmArgs.length; +++ bootstrapMethods.putShort(argsLength); +++ +++ for (int i = 0; i < argsLength; i++) { +++ Object bsmArg = bsmArgs[i]; +++ hashCode ^= bsmArg.hashCode(); +++ bootstrapMethods.putShort(newConst(bsmArg)); +++ } +++ +++ byte[] data = bootstrapMethods.data; +++ int length = (1 + 1 + argsLength) << 1; // (bsm + argCount + arguments) +++ hashCode &= 0x7FFFFFFF; +++ Item result = items[hashCode % items.length]; +++ loop: while (result != null) { +++ if (result.type != BSM || result.hashCode != hashCode) { +++ result = result.next; +++ continue; +++ } +++ +++ // because the data encode the size of the argument +++ // we don't need to test if these size are equals +++ int resultPosition = result.intVal; +++ for (int p = 0; p < length; p++) { +++ if (data[position + p] != data[resultPosition + p]) { +++ result = result.next; +++ continue loop; +++ } +++ } +++ break; +++ } +++ +++ int bootstrapMethodIndex; +++ if (result != null) { +++ bootstrapMethodIndex = result.index; +++ bootstrapMethods.length = position; // revert to old position +++ } else { +++ bootstrapMethodIndex = bootstrapMethodsCount++; +++ result = new Item(bootstrapMethodIndex); +++ result.set(position, hashCode); +++ put(result); +++ } +++ +++ // now, create the InvokeDynamic constant +++ key3.set(name, desc, bootstrapMethodIndex); +++ result = get(key3); +++ if (result == null) { +++ put122(INDY, bootstrapMethodIndex, newNameType(name, desc)); +++ result = new Item(index++, key3); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds an invokedynamic reference to the constant pool of the class being +++ * build. Does nothing if the constant pool already contains a similar item. +++ * <i>This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters.</i> +++ * +++ * @param name +++ * name of the invoked method. +++ * @param desc +++ * descriptor of the invoke method. +++ * @param bsm +++ * the bootstrap method. +++ * @param bsmArgs +++ * the bootstrap method constant arguments. +++ * +++ * @return the index of a new or already existing invokedynamic reference +++ * item. +++ */ +++ public int newInvokeDynamic(final String name, final String desc, +++ final Handle bsm, final Object... bsmArgs) { +++ return newInvokeDynamicItem(name, desc, bsm, bsmArgs).index; +++ } +++ +++ /** +++ * Adds a field reference to the constant pool of the class being build. +++ * Does nothing if the constant pool already contains a similar item. +++ * +++ * @param owner +++ * the internal name of the field's owner class. +++ * @param name +++ * the field's name. +++ * @param desc +++ * the field's descriptor. +++ * @return a new or already existing field reference item. +++ */ +++ Item newFieldItem(final String owner, final String name, final String desc) { +++ key3.set(FIELD, owner, name, desc); +++ Item result = get(key3); +++ if (result == null) { +++ put122(FIELD, newClass(owner), newNameType(name, desc)); +++ result = new Item(index++, key3); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a field reference to the constant pool of the class being build. +++ * Does nothing if the constant pool already contains a similar item. +++ * <i>This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters.</i> +++ * +++ * @param owner +++ * the internal name of the field's owner class. +++ * @param name +++ * the field's name. +++ * @param desc +++ * the field's descriptor. +++ * @return the index of a new or already existing field reference item. +++ */ +++ public int newField(final String owner, final String name, final String desc) { +++ return newFieldItem(owner, name, desc).index; +++ } +++ +++ /** +++ * Adds a method reference to the constant pool of the class being build. +++ * Does nothing if the constant pool already contains a similar item. +++ * +++ * @param owner +++ * the internal name of the method's owner class. +++ * @param name +++ * the method's name. +++ * @param desc +++ * the method's descriptor. +++ * @param itf +++ * <tt>true</tt> if <tt>owner</tt> is an interface. +++ * @return a new or already existing method reference item. +++ */ +++ Item newMethodItem(final String owner, final String name, +++ final String desc, final boolean itf) { +++ int type = itf ? IMETH : METH; +++ key3.set(type, owner, name, desc); +++ Item result = get(key3); +++ if (result == null) { +++ put122(type, newClass(owner), newNameType(name, desc)); +++ result = new Item(index++, key3); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a method reference to the constant pool of the class being build. +++ * Does nothing if the constant pool already contains a similar item. +++ * <i>This method is intended for {@link Attribute} sub classes, and is +++ * normally not needed by class generators or adapters.</i> +++ * +++ * @param owner +++ * the internal name of the method's owner class. +++ * @param name +++ * the method's name. +++ * @param desc +++ * the method's descriptor. +++ * @param itf +++ * <tt>true</tt> if <tt>owner</tt> is an interface. +++ * @return the index of a new or already existing method reference item. +++ */ +++ public int newMethod(final String owner, final String name, +++ final String desc, final boolean itf) { +++ return newMethodItem(owner, name, desc, itf).index; +++ } +++ +++ /** +++ * Adds an integer to the constant pool of the class being build. Does +++ * nothing if the constant pool already contains a similar item. +++ * +++ * @param value +++ * the int value. +++ * @return a new or already existing int item. +++ */ +++ Item newInteger(final int value) { +++ key.set(value); +++ Item result = get(key); +++ if (result == null) { +++ pool.putByte(INT).putInt(value); +++ result = new Item(index++, key); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a float to the constant pool of the class being build. Does nothing +++ * if the constant pool already contains a similar item. +++ * +++ * @param value +++ * the float value. +++ * @return a new or already existing float item. +++ */ +++ Item newFloat(final float value) { +++ key.set(value); +++ Item result = get(key); +++ if (result == null) { +++ pool.putByte(FLOAT).putInt(key.intVal); +++ result = new Item(index++, key); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a long to the constant pool of the class being build. Does nothing +++ * if the constant pool already contains a similar item. +++ * +++ * @param value +++ * the long value. +++ * @return a new or already existing long item. +++ */ +++ Item newLong(final long value) { +++ key.set(value); +++ Item result = get(key); +++ if (result == null) { +++ pool.putByte(LONG).putLong(value); +++ result = new Item(index, key); +++ index += 2; +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a double to the constant pool of the class being build. Does nothing +++ * if the constant pool already contains a similar item. +++ * +++ * @param value +++ * the double value. +++ * @return a new or already existing double item. +++ */ +++ Item newDouble(final double value) { +++ key.set(value); +++ Item result = get(key); +++ if (result == null) { +++ pool.putByte(DOUBLE).putLong(key.longVal); +++ result = new Item(index, key); +++ index += 2; +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a string to the constant pool of the class being build. Does nothing +++ * if the constant pool already contains a similar item. +++ * +++ * @param value +++ * the String value. +++ * @return a new or already existing string item. +++ */ +++ private Item newString(final String value) { +++ key2.set(STR, value, null, null); +++ Item result = get(key2); +++ if (result == null) { +++ pool.put12(STR, newUTF8(value)); +++ result = new Item(index++, key2); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds a name and type to the constant pool of the class being build. Does +++ * nothing if the constant pool already contains a similar item. <i>This +++ * method is intended for {@link Attribute} sub classes, and is normally not +++ * needed by class generators or adapters.</i> +++ * +++ * @param name +++ * a name. +++ * @param desc +++ * a type descriptor. +++ * @return the index of a new or already existing name and type item. +++ */ +++ public int newNameType(final String name, final String desc) { +++ return newNameTypeItem(name, desc).index; +++ } +++ +++ /** +++ * Adds a name and type to the constant pool of the class being build. Does +++ * nothing if the constant pool already contains a similar item. +++ * +++ * @param name +++ * a name. +++ * @param desc +++ * a type descriptor. +++ * @return a new or already existing name and type item. +++ */ +++ Item newNameTypeItem(final String name, final String desc) { +++ key2.set(NAME_TYPE, name, desc, null); +++ Item result = get(key2); +++ if (result == null) { +++ put122(NAME_TYPE, newUTF8(name), newUTF8(desc)); +++ result = new Item(index++, key2); +++ put(result); +++ } +++ return result; +++ } +++ +++ /** +++ * Adds the given internal name to {@link #typeTable} and returns its index. +++ * Does nothing if the type table already contains this internal name. +++ * +++ * @param type +++ * the internal name to be added to the type table. +++ * @return the index of this internal name in the type table. +++ */ +++ int addType(final String type) { +++ key.set(TYPE_NORMAL, type, null, null); +++ Item result = get(key); +++ if (result == null) { +++ result = addType(key); +++ } +++ return result.index; +++ } +++ +++ /** +++ * Adds the given "uninitialized" type to {@link #typeTable} and returns its +++ * index. This method is used for UNINITIALIZED types, made of an internal +++ * name and a bytecode offset. +++ * +++ * @param type +++ * the internal name to be added to the type table. +++ * @param offset +++ * the bytecode offset of the NEW instruction that created this +++ * UNINITIALIZED type value. +++ * @return the index of this internal name in the type table. +++ */ +++ int addUninitializedType(final String type, final int offset) { +++ key.type = TYPE_UNINIT; +++ key.intVal = offset; +++ key.strVal1 = type; +++ key.hashCode = 0x7FFFFFFF & (TYPE_UNINIT + type.hashCode() + offset); +++ Item result = get(key); +++ if (result == null) { +++ result = addType(key); +++ } +++ return result.index; +++ } +++ +++ /** +++ * Adds the given Item to {@link #typeTable}. +++ * +++ * @param item +++ * the value to be added to the type table. +++ * @return the added Item, which a new Item instance with the same value as +++ * the given Item. +++ */ +++ private Item addType(final Item item) { +++ ++typeCount; +++ Item result = new Item(typeCount, key); +++ put(result); +++ if (typeTable == null) { +++ typeTable = new Item[16]; +++ } +++ if (typeCount == typeTable.length) { +++ Item[] newTable = new Item[2 * typeTable.length]; +++ System.arraycopy(typeTable, 0, newTable, 0, typeTable.length); +++ typeTable = newTable; +++ } +++ typeTable[typeCount] = result; +++ return result; +++ } +++ +++ /** +++ * Returns the index of the common super type of the two given types. This +++ * method calls {@link #getCommonSuperClass} and caches the result in the +++ * {@link #items} hash table to speedup future calls with the same +++ * parameters. +++ * +++ * @param type1 +++ * index of an internal name in {@link #typeTable}. +++ * @param type2 +++ * index of an internal name in {@link #typeTable}. +++ * @return the index of the common super type of the two given types. +++ */ +++ int getMergedType(final int type1, final int type2) { +++ key2.type = TYPE_MERGED; +++ key2.longVal = type1 | (((long) type2) << 32); +++ key2.hashCode = 0x7FFFFFFF & (TYPE_MERGED + type1 + type2); +++ Item result = get(key2); +++ if (result == null) { +++ String t = typeTable[type1].strVal1; +++ String u = typeTable[type2].strVal1; +++ key2.intVal = addType(getCommonSuperClass(t, u)); +++ result = new Item((short) 0, key2); +++ put(result); +++ } +++ return result.intVal; +++ } +++ +++ /** +++ * Returns the common super type of the two given types. The default +++ * implementation of this method <i>loads</i> the two given classes and uses +++ * the java.lang.Class methods to find the common super class. It can be +++ * overridden to compute this common super type in other ways, in particular +++ * without actually loading any class, or to take into account the class +++ * that is currently being generated by this ClassWriter, which can of +++ * course not be loaded since it is under construction. +++ * +++ * @param type1 +++ * the internal name of a class. +++ * @param type2 +++ * the internal name of another class. +++ * @return the internal name of the common super class of the two given +++ * classes. +++ */ +++ protected String getCommonSuperClass(final String type1, final String type2) { +++ Class<?> c, d; +++ ClassLoader classLoader = getClass().getClassLoader(); +++ try { +++ c = Class.forName(type1.replace('/', '.'), false, classLoader); +++ d = Class.forName(type2.replace('/', '.'), false, classLoader); +++ } catch (Exception e) { +++ throw new RuntimeException(e.toString()); +++ } +++ if (c.isAssignableFrom(d)) { +++ return type1; +++ } +++ if (d.isAssignableFrom(c)) { +++ return type2; +++ } +++ if (c.isInterface() || d.isInterface()) { +++ return "java/lang/Object"; +++ } else { +++ do { +++ c = c.getSuperclass(); +++ } while (!c.isAssignableFrom(d)); +++ return c.getName().replace('.', '/'); +++ } +++ } +++ +++ /** +++ * Returns the constant pool's hash table item which is equal to the given +++ * item. +++ * +++ * @param key +++ * a constant pool item. +++ * @return the constant pool's hash table item which is equal to the given +++ * item, or <tt>null</tt> if there is no such item. +++ */ +++ private Item get(final Item key) { +++ Item i = items[key.hashCode % items.length]; +++ while (i != null && (i.type != key.type || !key.isEqualTo(i))) { +++ i = i.next; +++ } +++ return i; +++ } +++ +++ /** +++ * Puts the given item in the constant pool's hash table. The hash table +++ * <i>must</i> not already contains this item. +++ * +++ * @param i +++ * the item to be added to the constant pool's hash table. +++ */ +++ private void put(final Item i) { +++ if (index + typeCount > threshold) { +++ int ll = items.length; +++ int nl = ll * 2 + 1; +++ Item[] newItems = new Item[nl]; +++ for (int l = ll - 1; l >= 0; --l) { +++ Item j = items[l]; +++ while (j != null) { +++ int index = j.hashCode % newItems.length; +++ Item k = j.next; +++ j.next = newItems[index]; +++ newItems[index] = j; +++ j = k; +++ } +++ } +++ items = newItems; +++ threshold = (int) (nl * 0.75); +++ } +++ int index = i.hashCode % items.length; +++ i.next = items[index]; +++ items[index] = i; +++ } +++ +++ /** +++ * Puts one byte and two shorts into the constant pool. +++ * +++ * @param b +++ * a byte. +++ * @param s1 +++ * a short. +++ * @param s2 +++ * another short. +++ */ +++ private void put122(final int b, final int s1, final int s2) { +++ pool.put12(b, s1).putShort(s2); +++ } +++ +++ /** +++ * Puts two bytes and one short into the constant pool. +++ * +++ * @param b1 +++ * a byte. +++ * @param b2 +++ * another byte. +++ * @param s +++ * a short. +++ */ +++ private void put112(final int b1, final int b2, final int s) { +++ pool.put11(b1, b2).putShort(s); +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Context.java b/contrib/asm/src/org/objectweb/asm/Context.java ++new file mode 100644 ++index 0000000..363b34c ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Context.java ++@@ -0,0 +1,145 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++ +++package org.objectweb.asm; +++ +++/** +++ * Information about a class being parsed in a {@link ClassReader}. +++ * +++ * @author Eric Bruneton +++ */ +++class Context { +++ +++ /** +++ * Prototypes of the attributes that must be parsed for this class. +++ */ +++ Attribute[] attrs; +++ +++ /** +++ * The {@link ClassReader} option flags for the parsing of this class. +++ */ +++ int flags; +++ +++ /** +++ * The buffer used to read strings. +++ */ +++ char[] buffer; +++ +++ /** +++ * The start index of each bootstrap method. +++ */ +++ int[] bootstrapMethods; +++ +++ /** +++ * The access flags of the method currently being parsed. +++ */ +++ int access; +++ +++ /** +++ * The name of the method currently being parsed. +++ */ +++ String name; +++ +++ /** +++ * The descriptor of the method currently being parsed. +++ */ +++ String desc; +++ +++ /** +++ * The label objects, indexed by bytecode offset, of the method currently +++ * being parsed (only bytecode offsets for which a label is needed have a +++ * non null associated Label object). +++ */ +++ Label[] labels; +++ +++ /** +++ * The target of the type annotation currently being parsed. +++ */ +++ int typeRef; +++ +++ /** +++ * The path of the type annotation currently being parsed. +++ */ +++ TypePath typePath; +++ +++ /** +++ * The offset of the latest stack map frame that has been parsed. +++ */ +++ int offset; +++ +++ /** +++ * The labels corresponding to the start of the local variable ranges in the +++ * local variable type annotation currently being parsed. +++ */ +++ Label[] start; +++ +++ /** +++ * The labels corresponding to the end of the local variable ranges in the +++ * local variable type annotation currently being parsed. +++ */ +++ Label[] end; +++ +++ /** +++ * The local variable indices for each local variable range in the local +++ * variable type annotation currently being parsed. +++ */ +++ int[] index; +++ +++ /** +++ * The encoding of the latest stack map frame that has been parsed. +++ */ +++ int mode; +++ +++ /** +++ * The number of locals in the latest stack map frame that has been parsed. +++ */ +++ int localCount; +++ +++ /** +++ * The number locals in the latest stack map frame that has been parsed, +++ * minus the number of locals in the previous frame. +++ */ +++ int localDiff; +++ +++ /** +++ * The local values of the latest stack map frame that has been parsed. +++ */ +++ Object[] local; +++ +++ /** +++ * The stack size of the latest stack map frame that has been parsed. +++ */ +++ int stackCount; +++ +++ /** +++ * The stack values of the latest stack map frame that has been parsed. +++ */ +++ Object[] stack; +++} ++\ No newline at end of file ++diff --git a/contrib/asm/src/org/objectweb/asm/Edge.java b/contrib/asm/src/org/objectweb/asm/Edge.java ++new file mode 100644 ++index 0000000..4e87cba ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Edge.java ++@@ -0,0 +1,75 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * An edge in the control flow graph of a method body. See {@link Label Label}. +++ * +++ * @author Eric Bruneton +++ */ +++class Edge { +++ +++ /** +++ * Denotes a normal control flow graph edge. +++ */ +++ static final int NORMAL = 0; +++ +++ /** +++ * Denotes a control flow graph edge corresponding to an exception handler. +++ * More precisely any {@link Edge} whose {@link #info} is strictly positive +++ * corresponds to an exception handler. The actual value of {@link #info} is +++ * the index, in the {@link ClassWriter} type table, of the exception that +++ * is catched. +++ */ +++ static final int EXCEPTION = 0x7FFFFFFF; +++ +++ /** +++ * Information about this control flow graph edge. If +++ * {@link ClassWriter#COMPUTE_MAXS} is used this field is the (relative) +++ * stack size in the basic block from which this edge originates. This size +++ * is equal to the stack size at the "jump" instruction to which this edge +++ * corresponds, relatively to the stack size at the beginning of the +++ * originating basic block. If {@link ClassWriter#COMPUTE_FRAMES} is used, +++ * this field is the kind of this control flow graph edge (i.e. NORMAL or +++ * EXCEPTION). +++ */ +++ int info; +++ +++ /** +++ * The successor block of the basic block from which this edge originates. +++ */ +++ Label successor; +++ +++ /** +++ * The next edge in the list of successors of the originating basic block. +++ * See {@link Label#successors successors}. +++ */ +++ Edge next; +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/FieldVisitor.java b/contrib/asm/src/org/objectweb/asm/FieldVisitor.java ++new file mode 100644 ++index 0000000..2372e4c ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/FieldVisitor.java ++@@ -0,0 +1,150 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A visitor to visit a Java field. The methods of this class must be called in +++ * the following order: ( <tt>visitAnnotation</tt> | +++ * <tt>visitTypeAnnotation</tt> | <tt>visitAttribute</tt> )* <tt>visitEnd</tt>. +++ * +++ * @author Eric Bruneton +++ */ +++public abstract class FieldVisitor { +++ +++ /** +++ * The ASM API version implemented by this visitor. The value of this field +++ * must be one of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ */ +++ protected final int api; +++ +++ /** +++ * The field visitor to which this visitor must delegate method calls. May +++ * be null. +++ */ +++ protected FieldVisitor fv; +++ +++ /** +++ * Constructs a new {@link FieldVisitor}. +++ * +++ * @param api +++ * the ASM API version implemented by this visitor. Must be one +++ * of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ */ +++ public FieldVisitor(final int api) { +++ this(api, null); +++ } +++ +++ /** +++ * Constructs a new {@link FieldVisitor}. +++ * +++ * @param api +++ * the ASM API version implemented by this visitor. Must be one +++ * of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ * @param fv +++ * the field visitor to which this visitor must delegate method +++ * calls. May be null. +++ */ +++ public FieldVisitor(final int api, final FieldVisitor fv) { +++ if (api != Opcodes.ASM4 && api != Opcodes.ASM5) { +++ throw new IllegalArgumentException(); +++ } +++ this.api = api; +++ this.fv = fv; +++ } +++ +++ /** +++ * Visits an annotation of the field. +++ * +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * <tt>true</tt> if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or <tt>null</tt> if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitAnnotation(String desc, boolean visible) { +++ if (fv != null) { +++ return fv.visitAnnotation(desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits an annotation on the type of the field. +++ * +++ * @param typeRef +++ * a reference to the annotated type. The sort of this type +++ * reference must be {@link TypeReference#FIELD FIELD}. See +++ * {@link TypeReference}. +++ * @param typePath +++ * the path to the annotated type argument, wildcard bound, array +++ * element type, or static inner type within 'typeRef'. May be +++ * <tt>null</tt> if the annotation targets 'typeRef' as a whole. +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * <tt>true</tt> if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or <tt>null</tt> if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitTypeAnnotation(int typeRef, +++ TypePath typePath, String desc, boolean visible) { +++ if (api < Opcodes.ASM5) { +++ throw new RuntimeException(); +++ } +++ if (fv != null) { +++ return fv.visitTypeAnnotation(typeRef, typePath, desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits a non standard attribute of the field. +++ * +++ * @param attr +++ * an attribute. +++ */ +++ public void visitAttribute(Attribute attr) { +++ if (fv != null) { +++ fv.visitAttribute(attr); +++ } +++ } +++ +++ /** +++ * Visits the end of the field. This method, which is the last one to be +++ * called, is used to inform the visitor that all the annotations and +++ * attributes of the field have been visited. +++ */ +++ public void visitEnd() { +++ if (fv != null) { +++ fv.visitEnd(); +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/FieldWriter.java b/contrib/asm/src/org/objectweb/asm/FieldWriter.java ++new file mode 100644 ++index 0000000..84d92aa ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/FieldWriter.java ++@@ -0,0 +1,329 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * An {@link FieldVisitor} that generates Java fields in bytecode form. +++ * +++ * @author Eric Bruneton +++ */ +++final class FieldWriter extends FieldVisitor { +++ +++ /** +++ * The class writer to which this field must be added. +++ */ +++ private final ClassWriter cw; +++ +++ /** +++ * Access flags of this field. +++ */ +++ private final int access; +++ +++ /** +++ * The index of the constant pool item that contains the name of this +++ * method. +++ */ +++ private final int name; +++ +++ /** +++ * The index of the constant pool item that contains the descriptor of this +++ * field. +++ */ +++ private final int desc; +++ +++ /** +++ * The index of the constant pool item that contains the signature of this +++ * field. +++ */ +++ private int signature; +++ +++ /** +++ * The index of the constant pool item that contains the constant value of +++ * this field. +++ */ +++ private int value; +++ +++ /** +++ * The runtime visible annotations of this field. May be <tt>null</tt>. +++ */ +++ private AnnotationWriter anns; +++ +++ /** +++ * The runtime invisible annotations of this field. May be <tt>null</tt>. +++ */ +++ private AnnotationWriter ianns; +++ +++ /** +++ * The runtime visible type annotations of this field. May be <tt>null</tt>. +++ */ +++ private AnnotationWriter tanns; +++ +++ /** +++ * The runtime invisible type annotations of this field. May be +++ * <tt>null</tt>. +++ */ +++ private AnnotationWriter itanns; +++ +++ /** +++ * The non standard attributes of this field. May be <tt>null</tt>. +++ */ +++ private Attribute attrs; +++ +++ // ------------------------------------------------------------------------ +++ // Constructor +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Constructs a new {@link FieldWriter}. +++ * +++ * @param cw +++ * the class writer to which this field must be added. +++ * @param access +++ * the field's access flags (see {@link Opcodes}). +++ * @param name +++ * the field's name. +++ * @param desc +++ * the field's descriptor (see {@link Type}). +++ * @param signature +++ * the field's signature. May be <tt>null</tt>. +++ * @param value +++ * the field's constant value. May be <tt>null</tt>. +++ */ +++ FieldWriter(final ClassWriter cw, final int access, final String name, +++ final String desc, final String signature, final Object value) { +++ super(Opcodes.ASM5); +++ if (cw.firstField == null) { +++ cw.firstField = this; +++ } else { +++ cw.lastField.fv = this; +++ } +++ cw.lastField = this; +++ this.cw = cw; +++ this.access = access; +++ this.name = cw.newUTF8(name); +++ this.desc = cw.newUTF8(desc); +++ if (ClassReader.SIGNATURES && signature != null) { +++ this.signature = cw.newUTF8(signature); +++ } +++ if (value != null) { +++ this.value = cw.newConstItem(value).index; +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Implementation of the FieldVisitor abstract class +++ // ------------------------------------------------------------------------ +++ +++ @Override +++ public AnnotationVisitor visitAnnotation(final String desc, +++ final boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write type, and reserve space for values count +++ bv.putShort(cw.newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(cw, true, bv, bv, 2); +++ if (visible) { +++ aw.next = anns; +++ anns = aw; +++ } else { +++ aw.next = ianns; +++ ianns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public AnnotationVisitor visitTypeAnnotation(final int typeRef, +++ final TypePath typePath, final String desc, final boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write target_type and target_info +++ AnnotationWriter.putTarget(typeRef, typePath, bv); +++ // write type, and reserve space for values count +++ bv.putShort(cw.newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(cw, true, bv, bv, +++ bv.length - 2); +++ if (visible) { +++ aw.next = tanns; +++ tanns = aw; +++ } else { +++ aw.next = itanns; +++ itanns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public void visitAttribute(final Attribute attr) { +++ attr.next = attrs; +++ attrs = attr; +++ } +++ +++ @Override +++ public void visitEnd() { +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Utility methods +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the size of this field. +++ * +++ * @return the size of this field. +++ */ +++ int getSize() { +++ int size = 8; +++ if (value != 0) { +++ cw.newUTF8("ConstantValue"); +++ size += 8; +++ } +++ if ((access & Opcodes.ACC_SYNTHETIC) != 0) { +++ if ((cw.version & 0xFFFF) < Opcodes.V1_5 +++ || (access & ClassWriter.ACC_SYNTHETIC_ATTRIBUTE) != 0) { +++ cw.newUTF8("Synthetic"); +++ size += 6; +++ } +++ } +++ if ((access & Opcodes.ACC_DEPRECATED) != 0) { +++ cw.newUTF8("Deprecated"); +++ size += 6; +++ } +++ if (ClassReader.SIGNATURES && signature != 0) { +++ cw.newUTF8("Signature"); +++ size += 8; +++ } +++ if (ClassReader.ANNOTATIONS && anns != null) { +++ cw.newUTF8("RuntimeVisibleAnnotations"); +++ size += 8 + anns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && ianns != null) { +++ cw.newUTF8("RuntimeInvisibleAnnotations"); +++ size += 8 + ianns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && tanns != null) { +++ cw.newUTF8("RuntimeVisibleTypeAnnotations"); +++ size += 8 + tanns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && itanns != null) { +++ cw.newUTF8("RuntimeInvisibleTypeAnnotations"); +++ size += 8 + itanns.getSize(); +++ } +++ if (attrs != null) { +++ size += attrs.getSize(cw, null, 0, -1, -1); +++ } +++ return size; +++ } +++ +++ /** +++ * Puts the content of this field into the given byte vector. +++ * +++ * @param out +++ * where the content of this field must be put. +++ */ +++ void put(final ByteVector out) { +++ final int FACTOR = ClassWriter.TO_ACC_SYNTHETIC; +++ int mask = Opcodes.ACC_DEPRECATED | ClassWriter.ACC_SYNTHETIC_ATTRIBUTE +++ | ((access & ClassWriter.ACC_SYNTHETIC_ATTRIBUTE) / FACTOR); +++ out.putShort(access & ~mask).putShort(name).putShort(desc); +++ int attributeCount = 0; +++ if (value != 0) { +++ ++attributeCount; +++ } +++ if ((access & Opcodes.ACC_SYNTHETIC) != 0) { +++ if ((cw.version & 0xFFFF) < Opcodes.V1_5 +++ || (access & ClassWriter.ACC_SYNTHETIC_ATTRIBUTE) != 0) { +++ ++attributeCount; +++ } +++ } +++ if ((access & Opcodes.ACC_DEPRECATED) != 0) { +++ ++attributeCount; +++ } +++ if (ClassReader.SIGNATURES && signature != 0) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && anns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && ianns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && tanns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && itanns != null) { +++ ++attributeCount; +++ } +++ if (attrs != null) { +++ attributeCount += attrs.getCount(); +++ } +++ out.putShort(attributeCount); +++ if (value != 0) { +++ out.putShort(cw.newUTF8("ConstantValue")); +++ out.putInt(2).putShort(value); +++ } +++ if ((access & Opcodes.ACC_SYNTHETIC) != 0) { +++ if ((cw.version & 0xFFFF) < Opcodes.V1_5 +++ || (access & ClassWriter.ACC_SYNTHETIC_ATTRIBUTE) != 0) { +++ out.putShort(cw.newUTF8("Synthetic")).putInt(0); +++ } +++ } +++ if ((access & Opcodes.ACC_DEPRECATED) != 0) { +++ out.putShort(cw.newUTF8("Deprecated")).putInt(0); +++ } +++ if (ClassReader.SIGNATURES && signature != 0) { +++ out.putShort(cw.newUTF8("Signature")); +++ out.putInt(2).putShort(signature); +++ } +++ if (ClassReader.ANNOTATIONS && anns != null) { +++ out.putShort(cw.newUTF8("RuntimeVisibleAnnotations")); +++ anns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && ianns != null) { +++ out.putShort(cw.newUTF8("RuntimeInvisibleAnnotations")); +++ ianns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && tanns != null) { +++ out.putShort(cw.newUTF8("RuntimeVisibleTypeAnnotations")); +++ tanns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && itanns != null) { +++ out.putShort(cw.newUTF8("RuntimeInvisibleTypeAnnotations")); +++ itanns.put(out); +++ } +++ if (attrs != null) { +++ attrs.put(cw, null, 0, -1, -1, out); +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Frame.java b/contrib/asm/src/org/objectweb/asm/Frame.java ++new file mode 100644 ++index 0000000..1f6106f ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Frame.java ++@@ -0,0 +1,1462 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * Information about the input and output stack map frames of a basic block. +++ * +++ * @author Eric Bruneton +++ */ +++final class Frame { +++ +++ /* +++ * Frames are computed in a two steps process: during the visit of each +++ * instruction, the state of the frame at the end of current basic block is +++ * updated by simulating the action of the instruction on the previous state +++ * of this so called "output frame". In visitMaxs, a fix point algorithm is +++ * used to compute the "input frame" of each basic block, i.e. the stack map +++ * frame at the beginning of the basic block, starting from the input frame +++ * of the first basic block (which is computed from the method descriptor), +++ * and by using the previously computed output frames to compute the input +++ * state of the other blocks. +++ * +++ * All output and input frames are stored as arrays of integers. Reference +++ * and array types are represented by an index into a type table (which is +++ * not the same as the constant pool of the class, in order to avoid adding +++ * unnecessary constants in the pool - not all computed frames will end up +++ * being stored in the stack map table). This allows very fast type +++ * comparisons. +++ * +++ * Output stack map frames are computed relatively to the input frame of the +++ * basic block, which is not yet known when output frames are computed. It +++ * is therefore necessary to be able to represent abstract types such as +++ * "the type at position x in the input frame locals" or "the type at +++ * position x from the top of the input frame stack" or even "the type at +++ * position x in the input frame, with y more (or less) array dimensions". +++ * This explains the rather complicated type format used in output frames. +++ * +++ * This format is the following: DIM KIND VALUE (4, 4 and 24 bits). DIM is a +++ * signed number of array dimensions (from -8 to 7). KIND is either BASE, +++ * LOCAL or STACK. BASE is used for types that are not relative to the input +++ * frame. LOCAL is used for types that are relative to the input local +++ * variable types. STACK is used for types that are relative to the input +++ * stack types. VALUE depends on KIND. For LOCAL types, it is an index in +++ * the input local variable types. For STACK types, it is a position +++ * relatively to the top of input frame stack. For BASE types, it is either +++ * one of the constants defined below, or for OBJECT and UNINITIALIZED +++ * types, a tag and an index in the type table. +++ * +++ * Output frames can contain types of any kind and with a positive or +++ * negative dimension (and even unassigned types, represented by 0 - which +++ * does not correspond to any valid type value). Input frames can only +++ * contain BASE types of positive or null dimension. In all cases the type +++ * table contains only internal type names (array type descriptors are +++ * forbidden - dimensions must be represented through the DIM field). +++ * +++ * The LONG and DOUBLE types are always represented by using two slots (LONG +++ * + TOP or DOUBLE + TOP), for local variable types as well as in the +++ * operand stack. This is necessary to be able to simulate DUPx_y +++ * instructions, whose effect would be dependent on the actual type values +++ * if types were always represented by a single slot in the stack (and this +++ * is not possible, since actual type values are not always known - cf LOCAL +++ * and STACK type kinds). +++ */ +++ +++ /** +++ * Mask to get the dimension of a frame type. This dimension is a signed +++ * integer between -8 and 7. +++ */ +++ static final int DIM = 0xF0000000; +++ +++ /** +++ * Constant to be added to a type to get a type with one more dimension. +++ */ +++ static final int ARRAY_OF = 0x10000000; +++ +++ /** +++ * Constant to be added to a type to get a type with one less dimension. +++ */ +++ static final int ELEMENT_OF = 0xF0000000; +++ +++ /** +++ * Mask to get the kind of a frame type. +++ * +++ * @see #BASE +++ * @see #LOCAL +++ * @see #STACK +++ */ +++ static final int KIND = 0xF000000; +++ +++ /** +++ * Flag used for LOCAL and STACK types. Indicates that if this type happens +++ * to be a long or double type (during the computations of input frames), +++ * then it must be set to TOP because the second word of this value has been +++ * reused to store other data in the basic block. Hence the first word no +++ * longer stores a valid long or double value. +++ */ +++ static final int TOP_IF_LONG_OR_DOUBLE = 0x800000; +++ +++ /** +++ * Mask to get the value of a frame type. +++ */ +++ static final int VALUE = 0x7FFFFF; +++ +++ /** +++ * Mask to get the kind of base types. +++ */ +++ static final int BASE_KIND = 0xFF00000; +++ +++ /** +++ * Mask to get the value of base types. +++ */ +++ static final int BASE_VALUE = 0xFFFFF; +++ +++ /** +++ * Kind of the types that are not relative to an input stack map frame. +++ */ +++ static final int BASE = 0x1000000; +++ +++ /** +++ * Base kind of the base reference types. The BASE_VALUE of such types is an +++ * index into the type table. +++ */ +++ static final int OBJECT = BASE | 0x700000; +++ +++ /** +++ * Base kind of the uninitialized base types. The BASE_VALUE of such types +++ * in an index into the type table (the Item at that index contains both an +++ * instruction offset and an internal class name). +++ */ +++ static final int UNINITIALIZED = BASE | 0x800000; +++ +++ /** +++ * Kind of the types that are relative to the local variable types of an +++ * input stack map frame. The value of such types is a local variable index. +++ */ +++ private static final int LOCAL = 0x2000000; +++ +++ /** +++ * Kind of the the types that are relative to the stack of an input stack +++ * map frame. The value of such types is a position relatively to the top of +++ * this stack. +++ */ +++ private static final int STACK = 0x3000000; +++ +++ /** +++ * The TOP type. This is a BASE type. +++ */ +++ static final int TOP = BASE | 0; +++ +++ /** +++ * The BOOLEAN type. This is a BASE type mainly used for array types. +++ */ +++ static final int BOOLEAN = BASE | 9; +++ +++ /** +++ * The BYTE type. This is a BASE type mainly used for array types. +++ */ +++ static final int BYTE = BASE | 10; +++ +++ /** +++ * The CHAR type. This is a BASE type mainly used for array types. +++ */ +++ static final int CHAR = BASE | 11; +++ +++ /** +++ * The SHORT type. This is a BASE type mainly used for array types. +++ */ +++ static final int SHORT = BASE | 12; +++ +++ /** +++ * The INTEGER type. This is a BASE type. +++ */ +++ static final int INTEGER = BASE | 1; +++ +++ /** +++ * The FLOAT type. This is a BASE type. +++ */ +++ static final int FLOAT = BASE | 2; +++ +++ /** +++ * The DOUBLE type. This is a BASE type. +++ */ +++ static final int DOUBLE = BASE | 3; +++ +++ /** +++ * The LONG type. This is a BASE type. +++ */ +++ static final int LONG = BASE | 4; +++ +++ /** +++ * The NULL type. This is a BASE type. +++ */ +++ static final int NULL = BASE | 5; +++ +++ /** +++ * The UNINITIALIZED_THIS type. This is a BASE type. +++ */ +++ static final int UNINITIALIZED_THIS = BASE | 6; +++ +++ /** +++ * The stack size variation corresponding to each JVM instruction. This +++ * stack variation is equal to the size of the values produced by an +++ * instruction, minus the size of the values consumed by this instruction. +++ */ +++ static final int[] SIZE; +++ +++ /** +++ * Computes the stack size variation corresponding to each JVM instruction. +++ */ +++ static { +++ int i; +++ int[] b = new int[202]; +++ String s = "EFFFFFFFFGGFFFGGFFFEEFGFGFEEEEEEEEEEEEEEEEEEEEDEDEDDDDD" +++ + "CDCDEEEEEEEEEEEEEEEEEEEEBABABBBBDCFFFGGGEDCDCDCDCDCDCDCDCD" +++ + "CDCEEEEDDDDDDDCDCDCEFEFDDEEFFDEDEEEBDDBBDDDDDDCCCCCCCCEFED" +++ + "DDCDCDEEEEEEEEEEFEEEEEEDDEEDDEE"; +++ for (i = 0; i < b.length; ++i) { +++ b[i] = s.charAt(i) - 'E'; +++ } +++ SIZE = b; +++ +++ // code to generate the above string +++ // +++ // int NA = 0; // not applicable (unused opcode or variable size opcode) +++ // +++ // b = new int[] { +++ // 0, //NOP, // visitInsn +++ // 1, //ACONST_NULL, // - +++ // 1, //ICONST_M1, // - +++ // 1, //ICONST_0, // - +++ // 1, //ICONST_1, // - +++ // 1, //ICONST_2, // - +++ // 1, //ICONST_3, // - +++ // 1, //ICONST_4, // - +++ // 1, //ICONST_5, // - +++ // 2, //LCONST_0, // - +++ // 2, //LCONST_1, // - +++ // 1, //FCONST_0, // - +++ // 1, //FCONST_1, // - +++ // 1, //FCONST_2, // - +++ // 2, //DCONST_0, // - +++ // 2, //DCONST_1, // - +++ // 1, //BIPUSH, // visitIntInsn +++ // 1, //SIPUSH, // - +++ // 1, //LDC, // visitLdcInsn +++ // NA, //LDC_W, // - +++ // NA, //LDC2_W, // - +++ // 1, //ILOAD, // visitVarInsn +++ // 2, //LLOAD, // - +++ // 1, //FLOAD, // - +++ // 2, //DLOAD, // - +++ // 1, //ALOAD, // - +++ // NA, //ILOAD_0, // - +++ // NA, //ILOAD_1, // - +++ // NA, //ILOAD_2, // - +++ // NA, //ILOAD_3, // - +++ // NA, //LLOAD_0, // - +++ // NA, //LLOAD_1, // - +++ // NA, //LLOAD_2, // - +++ // NA, //LLOAD_3, // - +++ // NA, //FLOAD_0, // - +++ // NA, //FLOAD_1, // - +++ // NA, //FLOAD_2, // - +++ // NA, //FLOAD_3, // - +++ // NA, //DLOAD_0, // - +++ // NA, //DLOAD_1, // - +++ // NA, //DLOAD_2, // - +++ // NA, //DLOAD_3, // - +++ // NA, //ALOAD_0, // - +++ // NA, //ALOAD_1, // - +++ // NA, //ALOAD_2, // - +++ // NA, //ALOAD_3, // - +++ // -1, //IALOAD, // visitInsn +++ // 0, //LALOAD, // - +++ // -1, //FALOAD, // - +++ // 0, //DALOAD, // - +++ // -1, //AALOAD, // - +++ // -1, //BALOAD, // - +++ // -1, //CALOAD, // - +++ // -1, //SALOAD, // - +++ // -1, //ISTORE, // visitVarInsn +++ // -2, //LSTORE, // - +++ // -1, //FSTORE, // - +++ // -2, //DSTORE, // - +++ // -1, //ASTORE, // - +++ // NA, //ISTORE_0, // - +++ // NA, //ISTORE_1, // - +++ // NA, //ISTORE_2, // - +++ // NA, //ISTORE_3, // - +++ // NA, //LSTORE_0, // - +++ // NA, //LSTORE_1, // - +++ // NA, //LSTORE_2, // - +++ // NA, //LSTORE_3, // - +++ // NA, //FSTORE_0, // - +++ // NA, //FSTORE_1, // - +++ // NA, //FSTORE_2, // - +++ // NA, //FSTORE_3, // - +++ // NA, //DSTORE_0, // - +++ // NA, //DSTORE_1, // - +++ // NA, //DSTORE_2, // - +++ // NA, //DSTORE_3, // - +++ // NA, //ASTORE_0, // - +++ // NA, //ASTORE_1, // - +++ // NA, //ASTORE_2, // - +++ // NA, //ASTORE_3, // - +++ // -3, //IASTORE, // visitInsn +++ // -4, //LASTORE, // - +++ // -3, //FASTORE, // - +++ // -4, //DASTORE, // - +++ // -3, //AASTORE, // - +++ // -3, //BASTORE, // - +++ // -3, //CASTORE, // - +++ // -3, //SASTORE, // - +++ // -1, //POP, // - +++ // -2, //POP2, // - +++ // 1, //DUP, // - +++ // 1, //DUP_X1, // - +++ // 1, //DUP_X2, // - +++ // 2, //DUP2, // - +++ // 2, //DUP2_X1, // - +++ // 2, //DUP2_X2, // - +++ // 0, //SWAP, // - +++ // -1, //IADD, // - +++ // -2, //LADD, // - +++ // -1, //FADD, // - +++ // -2, //DADD, // - +++ // -1, //ISUB, // - +++ // -2, //LSUB, // - +++ // -1, //FSUB, // - +++ // -2, //DSUB, // - +++ // -1, //IMUL, // - +++ // -2, //LMUL, // - +++ // -1, //FMUL, // - +++ // -2, //DMUL, // - +++ // -1, //IDIV, // - +++ // -2, //LDIV, // - +++ // -1, //FDIV, // - +++ // -2, //DDIV, // - +++ // -1, //IREM, // - +++ // -2, //LREM, // - +++ // -1, //FREM, // - +++ // -2, //DREM, // - +++ // 0, //INEG, // - +++ // 0, //LNEG, // - +++ // 0, //FNEG, // - +++ // 0, //DNEG, // - +++ // -1, //ISHL, // - +++ // -1, //LSHL, // - +++ // -1, //ISHR, // - +++ // -1, //LSHR, // - +++ // -1, //IUSHR, // - +++ // -1, //LUSHR, // - +++ // -1, //IAND, // - +++ // -2, //LAND, // - +++ // -1, //IOR, // - +++ // -2, //LOR, // - +++ // -1, //IXOR, // - +++ // -2, //LXOR, // - +++ // 0, //IINC, // visitIincInsn +++ // 1, //I2L, // visitInsn +++ // 0, //I2F, // - +++ // 1, //I2D, // - +++ // -1, //L2I, // - +++ // -1, //L2F, // - +++ // 0, //L2D, // - +++ // 0, //F2I, // - +++ // 1, //F2L, // - +++ // 1, //F2D, // - +++ // -1, //D2I, // - +++ // 0, //D2L, // - +++ // -1, //D2F, // - +++ // 0, //I2B, // - +++ // 0, //I2C, // - +++ // 0, //I2S, // - +++ // -3, //LCMP, // - +++ // -1, //FCMPL, // - +++ // -1, //FCMPG, // - +++ // -3, //DCMPL, // - +++ // -3, //DCMPG, // - +++ // -1, //IFEQ, // visitJumpInsn +++ // -1, //IFNE, // - +++ // -1, //IFLT, // - +++ // -1, //IFGE, // - +++ // -1, //IFGT, // - +++ // -1, //IFLE, // - +++ // -2, //IF_ICMPEQ, // - +++ // -2, //IF_ICMPNE, // - +++ // -2, //IF_ICMPLT, // - +++ // -2, //IF_ICMPGE, // - +++ // -2, //IF_ICMPGT, // - +++ // -2, //IF_ICMPLE, // - +++ // -2, //IF_ACMPEQ, // - +++ // -2, //IF_ACMPNE, // - +++ // 0, //GOTO, // - +++ // 1, //JSR, // - +++ // 0, //RET, // visitVarInsn +++ // -1, //TABLESWITCH, // visiTableSwitchInsn +++ // -1, //LOOKUPSWITCH, // visitLookupSwitch +++ // -1, //IRETURN, // visitInsn +++ // -2, //LRETURN, // - +++ // -1, //FRETURN, // - +++ // -2, //DRETURN, // - +++ // -1, //ARETURN, // - +++ // 0, //RETURN, // - +++ // NA, //GETSTATIC, // visitFieldInsn +++ // NA, //PUTSTATIC, // - +++ // NA, //GETFIELD, // - +++ // NA, //PUTFIELD, // - +++ // NA, //INVOKEVIRTUAL, // visitMethodInsn +++ // NA, //INVOKESPECIAL, // - +++ // NA, //INVOKESTATIC, // - +++ // NA, //INVOKEINTERFACE, // - +++ // NA, //INVOKEDYNAMIC, // visitInvokeDynamicInsn +++ // 1, //NEW, // visitTypeInsn +++ // 0, //NEWARRAY, // visitIntInsn +++ // 0, //ANEWARRAY, // visitTypeInsn +++ // 0, //ARRAYLENGTH, // visitInsn +++ // NA, //ATHROW, // - +++ // 0, //CHECKCAST, // visitTypeInsn +++ // 0, //INSTANCEOF, // - +++ // -1, //MONITORENTER, // visitInsn +++ // -1, //MONITOREXIT, // - +++ // NA, //WIDE, // NOT VISITED +++ // NA, //MULTIANEWARRAY, // visitMultiANewArrayInsn +++ // -1, //IFNULL, // visitJumpInsn +++ // -1, //IFNONNULL, // - +++ // NA, //GOTO_W, // - +++ // NA, //JSR_W, // - +++ // }; +++ // for (i = 0; i < b.length; ++i) { +++ // System.err.print((char)('E' + b[i])); +++ // } +++ // System.err.println(); +++ } +++ +++ /** +++ * The label (i.e. basic block) to which these input and output stack map +++ * frames correspond. +++ */ +++ Label owner; +++ +++ /** +++ * The input stack map frame locals. +++ */ +++ int[] inputLocals; +++ +++ /** +++ * The input stack map frame stack. +++ */ +++ int[] inputStack; +++ +++ /** +++ * The output stack map frame locals. +++ */ +++ private int[] outputLocals; +++ +++ /** +++ * The output stack map frame stack. +++ */ +++ private int[] outputStack; +++ +++ /** +++ * Relative size of the output stack. The exact semantics of this field +++ * depends on the algorithm that is used. +++ * +++ * When only the maximum stack size is computed, this field is the size of +++ * the output stack relatively to the top of the input stack. +++ * +++ * When the stack map frames are completely computed, this field is the +++ * actual number of types in {@link #outputStack}. +++ */ +++ private int outputStackTop; +++ +++ /** +++ * Number of types that are initialized in the basic block. +++ * +++ * @see #initializations +++ */ +++ private int initializationCount; +++ +++ /** +++ * The types that are initialized in the basic block. A constructor +++ * invocation on an UNINITIALIZED or UNINITIALIZED_THIS type must replace +++ * <i>every occurence</i> of this type in the local variables and in the +++ * operand stack. This cannot be done during the first phase of the +++ * algorithm since, during this phase, the local variables and the operand +++ * stack are not completely computed. It is therefore necessary to store the +++ * types on which constructors are invoked in the basic block, in order to +++ * do this replacement during the second phase of the algorithm, where the +++ * frames are fully computed. Note that this array can contain types that +++ * are relative to input locals or to the input stack (see below for the +++ * description of the algorithm). +++ */ +++ private int[] initializations; +++ +++ /** +++ * Returns the output frame local variable type at the given index. +++ * +++ * @param local +++ * the index of the local that must be returned. +++ * @return the output frame local variable type at the given index. +++ */ +++ private int get(final int local) { +++ if (outputLocals == null || local >= outputLocals.length) { +++ // this local has never been assigned in this basic block, +++ // so it is still equal to its value in the input frame +++ return LOCAL | local; +++ } else { +++ int type = outputLocals[local]; +++ if (type == 0) { +++ // this local has never been assigned in this basic block, +++ // so it is still equal to its value in the input frame +++ type = outputLocals[local] = LOCAL | local; +++ } +++ return type; +++ } +++ } +++ +++ /** +++ * Sets the output frame local variable type at the given index. +++ * +++ * @param local +++ * the index of the local that must be set. +++ * @param type +++ * the value of the local that must be set. +++ */ +++ private void set(final int local, final int type) { +++ // creates and/or resizes the output local variables array if necessary +++ if (outputLocals == null) { +++ outputLocals = new int[10]; +++ } +++ int n = outputLocals.length; +++ if (local >= n) { +++ int[] t = new int[Math.max(local + 1, 2 * n)]; +++ System.arraycopy(outputLocals, 0, t, 0, n); +++ outputLocals = t; +++ } +++ // sets the local variable +++ outputLocals[local] = type; +++ } +++ +++ /** +++ * Pushes a new type onto the output frame stack. +++ * +++ * @param type +++ * the type that must be pushed. +++ */ +++ private void push(final int type) { +++ // creates and/or resizes the output stack array if necessary +++ if (outputStack == null) { +++ outputStack = new int[10]; +++ } +++ int n = outputStack.length; +++ if (outputStackTop >= n) { +++ int[] t = new int[Math.max(outputStackTop + 1, 2 * n)]; +++ System.arraycopy(outputStack, 0, t, 0, n); +++ outputStack = t; +++ } +++ // pushes the type on the output stack +++ outputStack[outputStackTop++] = type; +++ // updates the maximun height reached by the output stack, if needed +++ int top = owner.inputStackTop + outputStackTop; +++ if (top > owner.outputStackMax) { +++ owner.outputStackMax = top; +++ } +++ } +++ +++ /** +++ * Pushes a new type onto the output frame stack. +++ * +++ * @param cw +++ * the ClassWriter to which this label belongs. +++ * @param desc +++ * the descriptor of the type to be pushed. Can also be a method +++ * descriptor (in this case this method pushes its return type +++ * onto the output frame stack). +++ */ +++ private void push(final ClassWriter cw, final String desc) { +++ int type = type(cw, desc); +++ if (type != 0) { +++ push(type); +++ if (type == LONG || type == DOUBLE) { +++ push(TOP); +++ } +++ } +++ } +++ +++ /** +++ * Returns the int encoding of the given type. +++ * +++ * @param cw +++ * the ClassWriter to which this label belongs. +++ * @param desc +++ * a type descriptor. +++ * @return the int encoding of the given type. +++ */ +++ private static int type(final ClassWriter cw, final String desc) { +++ String t; +++ int index = desc.charAt(0) == '(' ? desc.indexOf(')') + 1 : 0; +++ switch (desc.charAt(index)) { +++ case 'V': +++ return 0; +++ case 'Z': +++ case 'C': +++ case 'B': +++ case 'S': +++ case 'I': +++ return INTEGER; +++ case 'F': +++ return FLOAT; +++ case 'J': +++ return LONG; +++ case 'D': +++ return DOUBLE; +++ case 'L': +++ // stores the internal name, not the descriptor! +++ t = desc.substring(index + 1, desc.length() - 1); +++ return OBJECT | cw.addType(t); +++ // case '[': +++ default: +++ // extracts the dimensions and the element type +++ int data; +++ int dims = index + 1; +++ while (desc.charAt(dims) == '[') { +++ ++dims; +++ } +++ switch (desc.charAt(dims)) { +++ case 'Z': +++ data = BOOLEAN; +++ break; +++ case 'C': +++ data = CHAR; +++ break; +++ case 'B': +++ data = BYTE; +++ break; +++ case 'S': +++ data = SHORT; +++ break; +++ case 'I': +++ data = INTEGER; +++ break; +++ case 'F': +++ data = FLOAT; +++ break; +++ case 'J': +++ data = LONG; +++ break; +++ case 'D': +++ data = DOUBLE; +++ break; +++ // case 'L': +++ default: +++ // stores the internal name, not the descriptor +++ t = desc.substring(dims + 1, desc.length() - 1); +++ data = OBJECT | cw.addType(t); +++ } +++ return (dims - index) << 28 | data; +++ } +++ } +++ +++ /** +++ * Pops a type from the output frame stack and returns its value. +++ * +++ * @return the type that has been popped from the output frame stack. +++ */ +++ private int pop() { +++ if (outputStackTop > 0) { +++ return outputStack[--outputStackTop]; +++ } else { +++ // if the output frame stack is empty, pops from the input stack +++ return STACK | -(--owner.inputStackTop); +++ } +++ } +++ +++ /** +++ * Pops the given number of types from the output frame stack. +++ * +++ * @param elements +++ * the number of types that must be popped. +++ */ +++ private void pop(final int elements) { +++ if (outputStackTop >= elements) { +++ outputStackTop -= elements; +++ } else { +++ // if the number of elements to be popped is greater than the number +++ // of elements in the output stack, clear it, and pops the remaining +++ // elements from the input stack. +++ owner.inputStackTop -= elements - outputStackTop; +++ outputStackTop = 0; +++ } +++ } +++ +++ /** +++ * Pops a type from the output frame stack. +++ * +++ * @param desc +++ * the descriptor of the type to be popped. Can also be a method +++ * descriptor (in this case this method pops the types +++ * corresponding to the method arguments). +++ */ +++ private void pop(final String desc) { +++ char c = desc.charAt(0); +++ if (c == '(') { +++ pop((Type.getArgumentsAndReturnSizes(desc) >> 2) - 1); +++ } else if (c == 'J' || c == 'D') { +++ pop(2); +++ } else { +++ pop(1); +++ } +++ } +++ +++ /** +++ * Adds a new type to the list of types on which a constructor is invoked in +++ * the basic block. +++ * +++ * @param var +++ * a type on a which a constructor is invoked. +++ */ +++ private void init(final int var) { +++ // creates and/or resizes the initializations array if necessary +++ if (initializations == null) { +++ initializations = new int[2]; +++ } +++ int n = initializations.length; +++ if (initializationCount >= n) { +++ int[] t = new int[Math.max(initializationCount + 1, 2 * n)]; +++ System.arraycopy(initializations, 0, t, 0, n); +++ initializations = t; +++ } +++ // stores the type to be initialized +++ initializations[initializationCount++] = var; +++ } +++ +++ /** +++ * Replaces the given type with the appropriate type if it is one of the +++ * types on which a constructor is invoked in the basic block. +++ * +++ * @param cw +++ * the ClassWriter to which this label belongs. +++ * @param t +++ * a type +++ * @return t or, if t is one of the types on which a constructor is invoked +++ * in the basic block, the type corresponding to this constructor. +++ */ +++ private int init(final ClassWriter cw, final int t) { +++ int s; +++ if (t == UNINITIALIZED_THIS) { +++ s = OBJECT | cw.addType(cw.thisName); +++ } else if ((t & (DIM | BASE_KIND)) == UNINITIALIZED) { +++ String type = cw.typeTable[t & BASE_VALUE].strVal1; +++ s = OBJECT | cw.addType(type); +++ } else { +++ return t; +++ } +++ for (int j = 0; j < initializationCount; ++j) { +++ int u = initializations[j]; +++ int dim = u & DIM; +++ int kind = u & KIND; +++ if (kind == LOCAL) { +++ u = dim + inputLocals[u & VALUE]; +++ } else if (kind == STACK) { +++ u = dim + inputStack[inputStack.length - (u & VALUE)]; +++ } +++ if (t == u) { +++ return s; +++ } +++ } +++ return t; +++ } +++ +++ /** +++ * Initializes the input frame of the first basic block from the method +++ * descriptor. +++ * +++ * @param cw +++ * the ClassWriter to which this label belongs. +++ * @param access +++ * the access flags of the method to which this label belongs. +++ * @param args +++ * the formal parameter types of this method. +++ * @param maxLocals +++ * the maximum number of local variables of this method. +++ */ +++ void initInputFrame(final ClassWriter cw, final int access, +++ final Type[] args, final int maxLocals) { +++ inputLocals = new int[maxLocals]; +++ inputStack = new int[0]; +++ int i = 0; +++ if ((access & Opcodes.ACC_STATIC) == 0) { +++ if ((access & MethodWriter.ACC_CONSTRUCTOR) == 0) { +++ inputLocals[i++] = OBJECT | cw.addType(cw.thisName); +++ } else { +++ inputLocals[i++] = UNINITIALIZED_THIS; +++ } +++ } +++ for (int j = 0; j < args.length; ++j) { +++ int t = type(cw, args[j].getDescriptor()); +++ inputLocals[i++] = t; +++ if (t == LONG || t == DOUBLE) { +++ inputLocals[i++] = TOP; +++ } +++ } +++ while (i < maxLocals) { +++ inputLocals[i++] = TOP; +++ } +++ } +++ +++ /** +++ * Simulates the action of the given instruction on the output stack frame. +++ * +++ * @param opcode +++ * the opcode of the instruction. +++ * @param arg +++ * the operand of the instruction, if any. +++ * @param cw +++ * the class writer to which this label belongs. +++ * @param item +++ * the operand of the instructions, if any. +++ */ +++ void execute(final int opcode, final int arg, final ClassWriter cw, +++ final Item item) { +++ int t1, t2, t3, t4; +++ switch (opcode) { +++ case Opcodes.NOP: +++ case Opcodes.INEG: +++ case Opcodes.LNEG: +++ case Opcodes.FNEG: +++ case Opcodes.DNEG: +++ case Opcodes.I2B: +++ case Opcodes.I2C: +++ case Opcodes.I2S: +++ case Opcodes.GOTO: +++ case Opcodes.RETURN: +++ break; +++ case Opcodes.ACONST_NULL: +++ push(NULL); +++ break; +++ case Opcodes.ICONST_M1: +++ case Opcodes.ICONST_0: +++ case Opcodes.ICONST_1: +++ case Opcodes.ICONST_2: +++ case Opcodes.ICONST_3: +++ case Opcodes.ICONST_4: +++ case Opcodes.ICONST_5: +++ case Opcodes.BIPUSH: +++ case Opcodes.SIPUSH: +++ case Opcodes.ILOAD: +++ push(INTEGER); +++ break; +++ case Opcodes.LCONST_0: +++ case Opcodes.LCONST_1: +++ case Opcodes.LLOAD: +++ push(LONG); +++ push(TOP); +++ break; +++ case Opcodes.FCONST_0: +++ case Opcodes.FCONST_1: +++ case Opcodes.FCONST_2: +++ case Opcodes.FLOAD: +++ push(FLOAT); +++ break; +++ case Opcodes.DCONST_0: +++ case Opcodes.DCONST_1: +++ case Opcodes.DLOAD: +++ push(DOUBLE); +++ push(TOP); +++ break; +++ case Opcodes.LDC: +++ switch (item.type) { +++ case ClassWriter.INT: +++ push(INTEGER); +++ break; +++ case ClassWriter.LONG: +++ push(LONG); +++ push(TOP); +++ break; +++ case ClassWriter.FLOAT: +++ push(FLOAT); +++ break; +++ case ClassWriter.DOUBLE: +++ push(DOUBLE); +++ push(TOP); +++ break; +++ case ClassWriter.CLASS: +++ push(OBJECT | cw.addType("java/lang/Class")); +++ break; +++ case ClassWriter.STR: +++ push(OBJECT | cw.addType("java/lang/String")); +++ break; +++ case ClassWriter.MTYPE: +++ push(OBJECT | cw.addType("java/lang/invoke/MethodType")); +++ break; +++ // case ClassWriter.HANDLE_BASE + [1..9]: +++ default: +++ push(OBJECT | cw.addType("java/lang/invoke/MethodHandle")); +++ } +++ break; +++ case Opcodes.ALOAD: +++ push(get(arg)); +++ break; +++ case Opcodes.IALOAD: +++ case Opcodes.BALOAD: +++ case Opcodes.CALOAD: +++ case Opcodes.SALOAD: +++ pop(2); +++ push(INTEGER); +++ break; +++ case Opcodes.LALOAD: +++ case Opcodes.D2L: +++ pop(2); +++ push(LONG); +++ push(TOP); +++ break; +++ case Opcodes.FALOAD: +++ pop(2); +++ push(FLOAT); +++ break; +++ case Opcodes.DALOAD: +++ case Opcodes.L2D: +++ pop(2); +++ push(DOUBLE); +++ push(TOP); +++ break; +++ case Opcodes.AALOAD: +++ pop(1); +++ t1 = pop(); +++ push(ELEMENT_OF + t1); +++ break; +++ case Opcodes.ISTORE: +++ case Opcodes.FSTORE: +++ case Opcodes.ASTORE: +++ t1 = pop(); +++ set(arg, t1); +++ if (arg > 0) { +++ t2 = get(arg - 1); +++ // if t2 is of kind STACK or LOCAL we cannot know its size! +++ if (t2 == LONG || t2 == DOUBLE) { +++ set(arg - 1, TOP); +++ } else if ((t2 & KIND) != BASE) { +++ set(arg - 1, t2 | TOP_IF_LONG_OR_DOUBLE); +++ } +++ } +++ break; +++ case Opcodes.LSTORE: +++ case Opcodes.DSTORE: +++ pop(1); +++ t1 = pop(); +++ set(arg, t1); +++ set(arg + 1, TOP); +++ if (arg > 0) { +++ t2 = get(arg - 1); +++ // if t2 is of kind STACK or LOCAL we cannot know its size! +++ if (t2 == LONG || t2 == DOUBLE) { +++ set(arg - 1, TOP); +++ } else if ((t2 & KIND) != BASE) { +++ set(arg - 1, t2 | TOP_IF_LONG_OR_DOUBLE); +++ } +++ } +++ break; +++ case Opcodes.IASTORE: +++ case Opcodes.BASTORE: +++ case Opcodes.CASTORE: +++ case Opcodes.SASTORE: +++ case Opcodes.FASTORE: +++ case Opcodes.AASTORE: +++ pop(3); +++ break; +++ case Opcodes.LASTORE: +++ case Opcodes.DASTORE: +++ pop(4); +++ break; +++ case Opcodes.POP: +++ case Opcodes.IFEQ: +++ case Opcodes.IFNE: +++ case Opcodes.IFLT: +++ case Opcodes.IFGE: +++ case Opcodes.IFGT: +++ case Opcodes.IFLE: +++ case Opcodes.IRETURN: +++ case Opcodes.FRETURN: +++ case Opcodes.ARETURN: +++ case Opcodes.TABLESWITCH: +++ case Opcodes.LOOKUPSWITCH: +++ case Opcodes.ATHROW: +++ case Opcodes.MONITORENTER: +++ case Opcodes.MONITOREXIT: +++ case Opcodes.IFNULL: +++ case Opcodes.IFNONNULL: +++ pop(1); +++ break; +++ case Opcodes.POP2: +++ case Opcodes.IF_ICMPEQ: +++ case Opcodes.IF_ICMPNE: +++ case Opcodes.IF_ICMPLT: +++ case Opcodes.IF_ICMPGE: +++ case Opcodes.IF_ICMPGT: +++ case Opcodes.IF_ICMPLE: +++ case Opcodes.IF_ACMPEQ: +++ case Opcodes.IF_ACMPNE: +++ case Opcodes.LRETURN: +++ case Opcodes.DRETURN: +++ pop(2); +++ break; +++ case Opcodes.DUP: +++ t1 = pop(); +++ push(t1); +++ push(t1); +++ break; +++ case Opcodes.DUP_X1: +++ t1 = pop(); +++ t2 = pop(); +++ push(t1); +++ push(t2); +++ push(t1); +++ break; +++ case Opcodes.DUP_X2: +++ t1 = pop(); +++ t2 = pop(); +++ t3 = pop(); +++ push(t1); +++ push(t3); +++ push(t2); +++ push(t1); +++ break; +++ case Opcodes.DUP2: +++ t1 = pop(); +++ t2 = pop(); +++ push(t2); +++ push(t1); +++ push(t2); +++ push(t1); +++ break; +++ case Opcodes.DUP2_X1: +++ t1 = pop(); +++ t2 = pop(); +++ t3 = pop(); +++ push(t2); +++ push(t1); +++ push(t3); +++ push(t2); +++ push(t1); +++ break; +++ case Opcodes.DUP2_X2: +++ t1 = pop(); +++ t2 = pop(); +++ t3 = pop(); +++ t4 = pop(); +++ push(t2); +++ push(t1); +++ push(t4); +++ push(t3); +++ push(t2); +++ push(t1); +++ break; +++ case Opcodes.SWAP: +++ t1 = pop(); +++ t2 = pop(); +++ push(t1); +++ push(t2); +++ break; +++ case Opcodes.IADD: +++ case Opcodes.ISUB: +++ case Opcodes.IMUL: +++ case Opcodes.IDIV: +++ case Opcodes.IREM: +++ case Opcodes.IAND: +++ case Opcodes.IOR: +++ case Opcodes.IXOR: +++ case Opcodes.ISHL: +++ case Opcodes.ISHR: +++ case Opcodes.IUSHR: +++ case Opcodes.L2I: +++ case Opcodes.D2I: +++ case Opcodes.FCMPL: +++ case Opcodes.FCMPG: +++ pop(2); +++ push(INTEGER); +++ break; +++ case Opcodes.LADD: +++ case Opcodes.LSUB: +++ case Opcodes.LMUL: +++ case Opcodes.LDIV: +++ case Opcodes.LREM: +++ case Opcodes.LAND: +++ case Opcodes.LOR: +++ case Opcodes.LXOR: +++ pop(4); +++ push(LONG); +++ push(TOP); +++ break; +++ case Opcodes.FADD: +++ case Opcodes.FSUB: +++ case Opcodes.FMUL: +++ case Opcodes.FDIV: +++ case Opcodes.FREM: +++ case Opcodes.L2F: +++ case Opcodes.D2F: +++ pop(2); +++ push(FLOAT); +++ break; +++ case Opcodes.DADD: +++ case Opcodes.DSUB: +++ case Opcodes.DMUL: +++ case Opcodes.DDIV: +++ case Opcodes.DREM: +++ pop(4); +++ push(DOUBLE); +++ push(TOP); +++ break; +++ case Opcodes.LSHL: +++ case Opcodes.LSHR: +++ case Opcodes.LUSHR: +++ pop(3); +++ push(LONG); +++ push(TOP); +++ break; +++ case Opcodes.IINC: +++ set(arg, INTEGER); +++ break; +++ case Opcodes.I2L: +++ case Opcodes.F2L: +++ pop(1); +++ push(LONG); +++ push(TOP); +++ break; +++ case Opcodes.I2F: +++ pop(1); +++ push(FLOAT); +++ break; +++ case Opcodes.I2D: +++ case Opcodes.F2D: +++ pop(1); +++ push(DOUBLE); +++ push(TOP); +++ break; +++ case Opcodes.F2I: +++ case Opcodes.ARRAYLENGTH: +++ case Opcodes.INSTANCEOF: +++ pop(1); +++ push(INTEGER); +++ break; +++ case Opcodes.LCMP: +++ case Opcodes.DCMPL: +++ case Opcodes.DCMPG: +++ pop(4); +++ push(INTEGER); +++ break; +++ case Opcodes.JSR: +++ case Opcodes.RET: +++ throw new RuntimeException( +++ "JSR/RET are not supported with computeFrames option"); +++ case Opcodes.GETSTATIC: +++ push(cw, item.strVal3); +++ break; +++ case Opcodes.PUTSTATIC: +++ pop(item.strVal3); +++ break; +++ case Opcodes.GETFIELD: +++ pop(1); +++ push(cw, item.strVal3); +++ break; +++ case Opcodes.PUTFIELD: +++ pop(item.strVal3); +++ pop(); +++ break; +++ case Opcodes.INVOKEVIRTUAL: +++ case Opcodes.INVOKESPECIAL: +++ case Opcodes.INVOKESTATIC: +++ case Opcodes.INVOKEINTERFACE: +++ pop(item.strVal3); +++ if (opcode != Opcodes.INVOKESTATIC) { +++ t1 = pop(); +++ if (opcode == Opcodes.INVOKESPECIAL +++ && item.strVal2.charAt(0) == '<') { +++ init(t1); +++ } +++ } +++ push(cw, item.strVal3); +++ break; +++ case Opcodes.INVOKEDYNAMIC: +++ pop(item.strVal2); +++ push(cw, item.strVal2); +++ break; +++ case Opcodes.NEW: +++ push(UNINITIALIZED | cw.addUninitializedType(item.strVal1, arg)); +++ break; +++ case Opcodes.NEWARRAY: +++ pop(); +++ switch (arg) { +++ case Opcodes.T_BOOLEAN: +++ push(ARRAY_OF | BOOLEAN); +++ break; +++ case Opcodes.T_CHAR: +++ push(ARRAY_OF | CHAR); +++ break; +++ case Opcodes.T_BYTE: +++ push(ARRAY_OF | BYTE); +++ break; +++ case Opcodes.T_SHORT: +++ push(ARRAY_OF | SHORT); +++ break; +++ case Opcodes.T_INT: +++ push(ARRAY_OF | INTEGER); +++ break; +++ case Opcodes.T_FLOAT: +++ push(ARRAY_OF | FLOAT); +++ break; +++ case Opcodes.T_DOUBLE: +++ push(ARRAY_OF | DOUBLE); +++ break; +++ // case Opcodes.T_LONG: +++ default: +++ push(ARRAY_OF | LONG); +++ break; +++ } +++ break; +++ case Opcodes.ANEWARRAY: +++ String s = item.strVal1; +++ pop(); +++ if (s.charAt(0) == '[') { +++ push(cw, '[' + s); +++ } else { +++ push(ARRAY_OF | OBJECT | cw.addType(s)); +++ } +++ break; +++ case Opcodes.CHECKCAST: +++ s = item.strVal1; +++ pop(); +++ if (s.charAt(0) == '[') { +++ push(cw, s); +++ } else { +++ push(OBJECT | cw.addType(s)); +++ } +++ break; +++ // case Opcodes.MULTIANEWARRAY: +++ default: +++ pop(arg); +++ push(cw, item.strVal1); +++ break; +++ } +++ } +++ +++ /** +++ * Merges the input frame of the given basic block with the input and output +++ * frames of this basic block. Returns <tt>true</tt> if the input frame of +++ * the given label has been changed by this operation. +++ * +++ * @param cw +++ * the ClassWriter to which this label belongs. +++ * @param frame +++ * the basic block whose input frame must be updated. +++ * @param edge +++ * the kind of the {@link Edge} between this label and 'label'. +++ * See {@link Edge#info}. +++ * @return <tt>true</tt> if the input frame of the given label has been +++ * changed by this operation. +++ */ +++ boolean merge(final ClassWriter cw, final Frame frame, final int edge) { +++ boolean changed = false; +++ int i, s, dim, kind, t; +++ +++ int nLocal = inputLocals.length; +++ int nStack = inputStack.length; +++ if (frame.inputLocals == null) { +++ frame.inputLocals = new int[nLocal]; +++ changed = true; +++ } +++ +++ for (i = 0; i < nLocal; ++i) { +++ if (outputLocals != null && i < outputLocals.length) { +++ s = outputLocals[i]; +++ if (s == 0) { +++ t = inputLocals[i]; +++ } else { +++ dim = s & DIM; +++ kind = s & KIND; +++ if (kind == BASE) { +++ t = s; +++ } else { +++ if (kind == LOCAL) { +++ t = dim + inputLocals[s & VALUE]; +++ } else { +++ t = dim + inputStack[nStack - (s & VALUE)]; +++ } +++ if ((s & TOP_IF_LONG_OR_DOUBLE) != 0 +++ && (t == LONG || t == DOUBLE)) { +++ t = TOP; +++ } +++ } +++ } +++ } else { +++ t = inputLocals[i]; +++ } +++ if (initializations != null) { +++ t = init(cw, t); +++ } +++ changed |= merge(cw, t, frame.inputLocals, i); +++ } +++ +++ if (edge > 0) { +++ for (i = 0; i < nLocal; ++i) { +++ t = inputLocals[i]; +++ changed |= merge(cw, t, frame.inputLocals, i); +++ } +++ if (frame.inputStack == null) { +++ frame.inputStack = new int[1]; +++ changed = true; +++ } +++ changed |= merge(cw, edge, frame.inputStack, 0); +++ return changed; +++ } +++ +++ int nInputStack = inputStack.length + owner.inputStackTop; +++ if (frame.inputStack == null) { +++ frame.inputStack = new int[nInputStack + outputStackTop]; +++ changed = true; +++ } +++ +++ for (i = 0; i < nInputStack; ++i) { +++ t = inputStack[i]; +++ if (initializations != null) { +++ t = init(cw, t); +++ } +++ changed |= merge(cw, t, frame.inputStack, i); +++ } +++ for (i = 0; i < outputStackTop; ++i) { +++ s = outputStack[i]; +++ dim = s & DIM; +++ kind = s & KIND; +++ if (kind == BASE) { +++ t = s; +++ } else { +++ if (kind == LOCAL) { +++ t = dim + inputLocals[s & VALUE]; +++ } else { +++ t = dim + inputStack[nStack - (s & VALUE)]; +++ } +++ if ((s & TOP_IF_LONG_OR_DOUBLE) != 0 +++ && (t == LONG || t == DOUBLE)) { +++ t = TOP; +++ } +++ } +++ if (initializations != null) { +++ t = init(cw, t); +++ } +++ changed |= merge(cw, t, frame.inputStack, nInputStack + i); +++ } +++ return changed; +++ } +++ +++ /** +++ * Merges the type at the given index in the given type array with the given +++ * type. Returns <tt>true</tt> if the type array has been modified by this +++ * operation. +++ * +++ * @param cw +++ * the ClassWriter to which this label belongs. +++ * @param t +++ * the type with which the type array element must be merged. +++ * @param types +++ * an array of types. +++ * @param index +++ * the index of the type that must be merged in 'types'. +++ * @return <tt>true</tt> if the type array has been modified by this +++ * operation. +++ */ +++ private static boolean merge(final ClassWriter cw, int t, +++ final int[] types, final int index) { +++ int u = types[index]; +++ if (u == t) { +++ // if the types are equal, merge(u,t)=u, so there is no change +++ return false; +++ } +++ if ((t & ~DIM) == NULL) { +++ if (u == NULL) { +++ return false; +++ } +++ t = NULL; +++ } +++ if (u == 0) { +++ // if types[index] has never been assigned, merge(u,t)=t +++ types[index] = t; +++ return true; +++ } +++ int v; +++ if ((u & BASE_KIND) == OBJECT || (u & DIM) != 0) { +++ // if u is a reference type of any dimension +++ if (t == NULL) { +++ // if t is the NULL type, merge(u,t)=u, so there is no change +++ return false; +++ } else if ((t & (DIM | BASE_KIND)) == (u & (DIM | BASE_KIND))) { +++ // if t and u have the same dimension and same base kind +++ if ((u & BASE_KIND) == OBJECT) { +++ // if t is also a reference type, and if u and t have the +++ // same dimension merge(u,t) = dim(t) | common parent of the +++ // element types of u and t +++ v = (t & DIM) | OBJECT +++ | cw.getMergedType(t & BASE_VALUE, u & BASE_VALUE); +++ } else { +++ // if u and t are array types, but not with the same element +++ // type, merge(u,t) = dim(u) - 1 | java/lang/Object +++ int vdim = ELEMENT_OF + (u & DIM); +++ v = vdim | OBJECT | cw.addType("java/lang/Object"); +++ } +++ } else if ((t & BASE_KIND) == OBJECT || (t & DIM) != 0) { +++ // if t is any other reference or array type, the merged type +++ // is min(udim, tdim) | java/lang/Object, where udim is the +++ // array dimension of u, minus 1 if u is an array type with a +++ // primitive element type (and similarly for tdim). +++ int tdim = (((t & DIM) == 0 || (t & BASE_KIND) == OBJECT) ? 0 +++ : ELEMENT_OF) + (t & DIM); +++ int udim = (((u & DIM) == 0 || (u & BASE_KIND) == OBJECT) ? 0 +++ : ELEMENT_OF) + (u & DIM); +++ v = Math.min(tdim, udim) | OBJECT +++ | cw.addType("java/lang/Object"); +++ } else { +++ // if t is any other type, merge(u,t)=TOP +++ v = TOP; +++ } +++ } else if (u == NULL) { +++ // if u is the NULL type, merge(u,t)=t, +++ // or TOP if t is not a reference type +++ v = (t & BASE_KIND) == OBJECT || (t & DIM) != 0 ? t : TOP; +++ } else { +++ // if u is any other type, merge(u,t)=TOP whatever t +++ v = TOP; +++ } +++ if (u != v) { +++ types[index] = v; +++ return true; +++ } +++ return false; +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Handle.java b/contrib/asm/src/org/objectweb/asm/Handle.java ++new file mode 100644 ++index 0000000..a627911 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Handle.java ++@@ -0,0 +1,170 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++ +++package org.objectweb.asm; +++ +++/** +++ * A reference to a field or a method. +++ * +++ * @author Remi Forax +++ * @author Eric Bruneton +++ */ +++public final class Handle { +++ +++ /** +++ * The kind of field or method designated by this Handle. Should be +++ * {@link Opcodes#H_GETFIELD}, {@link Opcodes#H_GETSTATIC}, +++ * {@link Opcodes#H_PUTFIELD}, {@link Opcodes#H_PUTSTATIC}, +++ * {@link Opcodes#H_INVOKEVIRTUAL}, {@link Opcodes#H_INVOKESTATIC}, +++ * {@link Opcodes#H_INVOKESPECIAL}, {@link Opcodes#H_NEWINVOKESPECIAL} or +++ * {@link Opcodes#H_INVOKEINTERFACE}. +++ */ +++ final int tag; +++ +++ /** +++ * The internal name of the class that owns the field or method designated +++ * by this handle. +++ */ +++ final String owner; +++ +++ /** +++ * The name of the field or method designated by this handle. +++ */ +++ final String name; +++ +++ /** +++ * The descriptor of the field or method designated by this handle. +++ */ +++ final String desc; +++ +++ /** +++ * Constructs a new field or method handle. +++ * +++ * @param tag +++ * the kind of field or method designated by this Handle. Must be +++ * {@link Opcodes#H_GETFIELD}, {@link Opcodes#H_GETSTATIC}, +++ * {@link Opcodes#H_PUTFIELD}, {@link Opcodes#H_PUTSTATIC}, +++ * {@link Opcodes#H_INVOKEVIRTUAL}, +++ * {@link Opcodes#H_INVOKESTATIC}, +++ * {@link Opcodes#H_INVOKESPECIAL}, +++ * {@link Opcodes#H_NEWINVOKESPECIAL} or +++ * {@link Opcodes#H_INVOKEINTERFACE}. +++ * @param owner +++ * the internal name of the class that owns the field or method +++ * designated by this handle. +++ * @param name +++ * the name of the field or method designated by this handle. +++ * @param desc +++ * the descriptor of the field or method designated by this +++ * handle. +++ */ +++ public Handle(int tag, String owner, String name, String desc) { +++ this.tag = tag; +++ this.owner = owner; +++ this.name = name; +++ this.desc = desc; +++ } +++ +++ /** +++ * Returns the kind of field or method designated by this handle. +++ * +++ * @return {@link Opcodes#H_GETFIELD}, {@link Opcodes#H_GETSTATIC}, +++ * {@link Opcodes#H_PUTFIELD}, {@link Opcodes#H_PUTSTATIC}, +++ * {@link Opcodes#H_INVOKEVIRTUAL}, {@link Opcodes#H_INVOKESTATIC}, +++ * {@link Opcodes#H_INVOKESPECIAL}, +++ * {@link Opcodes#H_NEWINVOKESPECIAL} or +++ * {@link Opcodes#H_INVOKEINTERFACE}. +++ */ +++ public int getTag() { +++ return tag; +++ } +++ +++ /** +++ * Returns the internal name of the class that owns the field or method +++ * designated by this handle. +++ * +++ * @return the internal name of the class that owns the field or method +++ * designated by this handle. +++ */ +++ public String getOwner() { +++ return owner; +++ } +++ +++ /** +++ * Returns the name of the field or method designated by this handle. +++ * +++ * @return the name of the field or method designated by this handle. +++ */ +++ public String getName() { +++ return name; +++ } +++ +++ /** +++ * Returns the descriptor of the field or method designated by this handle. +++ * +++ * @return the descriptor of the field or method designated by this handle. +++ */ +++ public String getDesc() { +++ return desc; +++ } +++ +++ @Override +++ public boolean equals(Object obj) { +++ if (obj == this) { +++ return true; +++ } +++ if (!(obj instanceof Handle)) { +++ return false; +++ } +++ Handle h = (Handle) obj; +++ return tag == h.tag && owner.equals(h.owner) && name.equals(h.name) +++ && desc.equals(h.desc); +++ } +++ +++ @Override +++ public int hashCode() { +++ return tag + owner.hashCode() * name.hashCode() * desc.hashCode(); +++ } +++ +++ /** +++ * Returns the textual representation of this handle. The textual +++ * representation is: +++ * +++ * <pre> +++ * owner '.' name desc ' ' '(' tag ')' +++ * </pre> +++ * +++ * . As this format is unambiguous, it can be parsed if necessary. +++ */ +++ @Override +++ public String toString() { +++ return owner + '.' + name + desc + " (" + tag + ')'; +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Handler.java b/contrib/asm/src/org/objectweb/asm/Handler.java ++new file mode 100644 ++index 0000000..b24591d ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Handler.java ++@@ -0,0 +1,121 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * Information about an exception handler block. +++ * +++ * @author Eric Bruneton +++ */ +++class Handler { +++ +++ /** +++ * Beginning of the exception handler's scope (inclusive). +++ */ +++ Label start; +++ +++ /** +++ * End of the exception handler's scope (exclusive). +++ */ +++ Label end; +++ +++ /** +++ * Beginning of the exception handler's code. +++ */ +++ Label handler; +++ +++ /** +++ * Internal name of the type of exceptions handled by this handler, or +++ * <tt>null</tt> to catch any exceptions. +++ */ +++ String desc; +++ +++ /** +++ * Constant pool index of the internal name of the type of exceptions +++ * handled by this handler, or 0 to catch any exceptions. +++ */ +++ int type; +++ +++ /** +++ * Next exception handler block info. +++ */ +++ Handler next; +++ +++ /** +++ * Removes the range between start and end from the given exception +++ * handlers. +++ * +++ * @param h +++ * an exception handler list. +++ * @param start +++ * the start of the range to be removed. +++ * @param end +++ * the end of the range to be removed. Maybe null. +++ * @return the exception handler list with the start-end range removed. +++ */ +++ static Handler remove(Handler h, Label start, Label end) { +++ if (h == null) { +++ return null; +++ } else { +++ h.next = remove(h.next, start, end); +++ } +++ int hstart = h.start.position; +++ int hend = h.end.position; +++ int s = start.position; +++ int e = end == null ? Integer.MAX_VALUE : end.position; +++ // if [hstart,hend[ and [s,e[ intervals intersect... +++ if (s < hend && e > hstart) { +++ if (s <= hstart) { +++ if (e >= hend) { +++ // [hstart,hend[ fully included in [s,e[, h removed +++ h = h.next; +++ } else { +++ // [hstart,hend[ minus [s,e[ = [e,hend[ +++ h.start = end; +++ } +++ } else if (e >= hend) { +++ // [hstart,hend[ minus [s,e[ = [hstart,s[ +++ h.end = start; +++ } else { +++ // [hstart,hend[ minus [s,e[ = [hstart,s[ + [e,hend[ +++ Handler g = new Handler(); +++ g.start = end; +++ g.end = h.end; +++ g.handler = h.handler; +++ g.desc = h.desc; +++ g.type = h.type; +++ g.next = h.next; +++ h.end = start; +++ h.next = g; +++ } +++ } +++ return h; +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Item.java b/contrib/asm/src/org/objectweb/asm/Item.java ++new file mode 100644 ++index 0000000..917524d ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Item.java ++@@ -0,0 +1,313 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A constant pool item. Constant pool items can be created with the 'newXXX' +++ * methods in the {@link ClassWriter} class. +++ * +++ * @author Eric Bruneton +++ */ +++final class Item { +++ +++ /** +++ * Index of this item in the constant pool. +++ */ +++ int index; +++ +++ /** +++ * Type of this constant pool item. A single class is used to represent all +++ * constant pool item types, in order to minimize the bytecode size of this +++ * package. The value of this field is one of {@link ClassWriter#INT}, +++ * {@link ClassWriter#LONG}, {@link ClassWriter#FLOAT}, +++ * {@link ClassWriter#DOUBLE}, {@link ClassWriter#UTF8}, +++ * {@link ClassWriter#STR}, {@link ClassWriter#CLASS}, +++ * {@link ClassWriter#NAME_TYPE}, {@link ClassWriter#FIELD}, +++ * {@link ClassWriter#METH}, {@link ClassWriter#IMETH}, +++ * {@link ClassWriter#MTYPE}, {@link ClassWriter#INDY}. +++ * +++ * MethodHandle constant 9 variations are stored using a range of 9 values +++ * from {@link ClassWriter#HANDLE_BASE} + 1 to +++ * {@link ClassWriter#HANDLE_BASE} + 9. +++ * +++ * Special Item types are used for Items that are stored in the ClassWriter +++ * {@link ClassWriter#typeTable}, instead of the constant pool, in order to +++ * avoid clashes with normal constant pool items in the ClassWriter constant +++ * pool's hash table. These special item types are +++ * {@link ClassWriter#TYPE_NORMAL}, {@link ClassWriter#TYPE_UNINIT} and +++ * {@link ClassWriter#TYPE_MERGED}. +++ */ +++ int type; +++ +++ /** +++ * Value of this item, for an integer item. +++ */ +++ int intVal; +++ +++ /** +++ * Value of this item, for a long item. +++ */ +++ long longVal; +++ +++ /** +++ * First part of the value of this item, for items that do not hold a +++ * primitive value. +++ */ +++ String strVal1; +++ +++ /** +++ * Second part of the value of this item, for items that do not hold a +++ * primitive value. +++ */ +++ String strVal2; +++ +++ /** +++ * Third part of the value of this item, for items that do not hold a +++ * primitive value. +++ */ +++ String strVal3; +++ +++ /** +++ * The hash code value of this constant pool item. +++ */ +++ int hashCode; +++ +++ /** +++ * Link to another constant pool item, used for collision lists in the +++ * constant pool's hash table. +++ */ +++ Item next; +++ +++ /** +++ * Constructs an uninitialized {@link Item}. +++ */ +++ Item() { +++ } +++ +++ /** +++ * Constructs an uninitialized {@link Item} for constant pool element at +++ * given position. +++ * +++ * @param index +++ * index of the item to be constructed. +++ */ +++ Item(final int index) { +++ this.index = index; +++ } +++ +++ /** +++ * Constructs a copy of the given item. +++ * +++ * @param index +++ * index of the item to be constructed. +++ * @param i +++ * the item that must be copied into the item to be constructed. +++ */ +++ Item(final int index, final Item i) { +++ this.index = index; +++ type = i.type; +++ intVal = i.intVal; +++ longVal = i.longVal; +++ strVal1 = i.strVal1; +++ strVal2 = i.strVal2; +++ strVal3 = i.strVal3; +++ hashCode = i.hashCode; +++ } +++ +++ /** +++ * Sets this item to an integer item. +++ * +++ * @param intVal +++ * the value of this item. +++ */ +++ void set(final int intVal) { +++ this.type = ClassWriter.INT; +++ this.intVal = intVal; +++ this.hashCode = 0x7FFFFFFF & (type + intVal); +++ } +++ +++ /** +++ * Sets this item to a long item. +++ * +++ * @param longVal +++ * the value of this item. +++ */ +++ void set(final long longVal) { +++ this.type = ClassWriter.LONG; +++ this.longVal = longVal; +++ this.hashCode = 0x7FFFFFFF & (type + (int) longVal); +++ } +++ +++ /** +++ * Sets this item to a float item. +++ * +++ * @param floatVal +++ * the value of this item. +++ */ +++ void set(final float floatVal) { +++ this.type = ClassWriter.FLOAT; +++ this.intVal = Float.floatToRawIntBits(floatVal); +++ this.hashCode = 0x7FFFFFFF & (type + (int) floatVal); +++ } +++ +++ /** +++ * Sets this item to a double item. +++ * +++ * @param doubleVal +++ * the value of this item. +++ */ +++ void set(final double doubleVal) { +++ this.type = ClassWriter.DOUBLE; +++ this.longVal = Double.doubleToRawLongBits(doubleVal); +++ this.hashCode = 0x7FFFFFFF & (type + (int) doubleVal); +++ } +++ +++ /** +++ * Sets this item to an item that do not hold a primitive value. +++ * +++ * @param type +++ * the type of this item. +++ * @param strVal1 +++ * first part of the value of this item. +++ * @param strVal2 +++ * second part of the value of this item. +++ * @param strVal3 +++ * third part of the value of this item. +++ */ +++ @SuppressWarnings("fallthrough") +++ void set(final int type, final String strVal1, final String strVal2, +++ final String strVal3) { +++ this.type = type; +++ this.strVal1 = strVal1; +++ this.strVal2 = strVal2; +++ this.strVal3 = strVal3; +++ switch (type) { +++ case ClassWriter.CLASS: +++ this.intVal = 0; // intVal of a class must be zero, see visitInnerClass +++ case ClassWriter.UTF8: +++ case ClassWriter.STR: +++ case ClassWriter.MTYPE: +++ case ClassWriter.TYPE_NORMAL: +++ hashCode = 0x7FFFFFFF & (type + strVal1.hashCode()); +++ return; +++ case ClassWriter.NAME_TYPE: { +++ hashCode = 0x7FFFFFFF & (type + strVal1.hashCode() +++ * strVal2.hashCode()); +++ return; +++ } +++ // ClassWriter.FIELD: +++ // ClassWriter.METH: +++ // ClassWriter.IMETH: +++ // ClassWriter.HANDLE_BASE + 1..9 +++ default: +++ hashCode = 0x7FFFFFFF & (type + strVal1.hashCode() +++ * strVal2.hashCode() * strVal3.hashCode()); +++ } +++ } +++ +++ /** +++ * Sets the item to an InvokeDynamic item. +++ * +++ * @param name +++ * invokedynamic's name. +++ * @param desc +++ * invokedynamic's desc. +++ * @param bsmIndex +++ * zero based index into the class attribute BootrapMethods. +++ */ +++ void set(String name, String desc, int bsmIndex) { +++ this.type = ClassWriter.INDY; +++ this.longVal = bsmIndex; +++ this.strVal1 = name; +++ this.strVal2 = desc; +++ this.hashCode = 0x7FFFFFFF & (ClassWriter.INDY + bsmIndex +++ * strVal1.hashCode() * strVal2.hashCode()); +++ } +++ +++ /** +++ * Sets the item to a BootstrapMethod item. +++ * +++ * @param position +++ * position in byte in the class attribute BootrapMethods. +++ * @param hashCode +++ * hashcode of the item. This hashcode is processed from the +++ * hashcode of the bootstrap method and the hashcode of all +++ * bootstrap arguments. +++ */ +++ void set(int position, int hashCode) { +++ this.type = ClassWriter.BSM; +++ this.intVal = position; +++ this.hashCode = hashCode; +++ } +++ +++ /** +++ * Indicates if the given item is equal to this one. <i>This method assumes +++ * that the two items have the same {@link #type}</i>. +++ * +++ * @param i +++ * the item to be compared to this one. Both items must have the +++ * same {@link #type}. +++ * @return <tt>true</tt> if the given item if equal to this one, +++ * <tt>false</tt> otherwise. +++ */ +++ boolean isEqualTo(final Item i) { +++ switch (type) { +++ case ClassWriter.UTF8: +++ case ClassWriter.STR: +++ case ClassWriter.CLASS: +++ case ClassWriter.MTYPE: +++ case ClassWriter.TYPE_NORMAL: +++ return i.strVal1.equals(strVal1); +++ case ClassWriter.TYPE_MERGED: +++ case ClassWriter.LONG: +++ case ClassWriter.DOUBLE: +++ return i.longVal == longVal; +++ case ClassWriter.INT: +++ case ClassWriter.FLOAT: +++ return i.intVal == intVal; +++ case ClassWriter.TYPE_UNINIT: +++ return i.intVal == intVal && i.strVal1.equals(strVal1); +++ case ClassWriter.NAME_TYPE: +++ return i.strVal1.equals(strVal1) && i.strVal2.equals(strVal2); +++ case ClassWriter.INDY: { +++ return i.longVal == longVal && i.strVal1.equals(strVal1) +++ && i.strVal2.equals(strVal2); +++ } +++ // case ClassWriter.FIELD: +++ // case ClassWriter.METH: +++ // case ClassWriter.IMETH: +++ // case ClassWriter.HANDLE_BASE + 1..9 +++ default: +++ return i.strVal1.equals(strVal1) && i.strVal2.equals(strVal2) +++ && i.strVal3.equals(strVal3); +++ } +++ } +++ +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Label.java b/contrib/asm/src/org/objectweb/asm/Label.java ++new file mode 100644 ++index 0000000..6bca6fb ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Label.java ++@@ -0,0 +1,565 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A label represents a position in the bytecode of a method. Labels are used +++ * for jump, goto, and switch instructions, and for try catch blocks. A label +++ * designates the <i>instruction</i> that is just after. Note however that there +++ * can be other elements between a label and the instruction it designates (such +++ * as other labels, stack map frames, line numbers, etc.). +++ * +++ * @author Eric Bruneton +++ */ +++public class Label { +++ +++ /** +++ * Indicates if this label is only used for debug attributes. Such a label +++ * is not the start of a basic block, the target of a jump instruction, or +++ * an exception handler. It can be safely ignored in control flow graph +++ * analysis algorithms (for optimization purposes). +++ */ +++ static final int DEBUG = 1; +++ +++ /** +++ * Indicates if the position of this label is known. +++ */ +++ static final int RESOLVED = 2; +++ +++ /** +++ * Indicates if this label has been updated, after instruction resizing. +++ */ +++ static final int RESIZED = 4; +++ +++ /** +++ * Indicates if this basic block has been pushed in the basic block stack. +++ * See {@link MethodWriter#visitMaxs visitMaxs}. +++ */ +++ static final int PUSHED = 8; +++ +++ /** +++ * Indicates if this label is the target of a jump instruction, or the start +++ * of an exception handler. +++ */ +++ static final int TARGET = 16; +++ +++ /** +++ * Indicates if a stack map frame must be stored for this label. +++ */ +++ static final int STORE = 32; +++ +++ /** +++ * Indicates if this label corresponds to a reachable basic block. +++ */ +++ static final int REACHABLE = 64; +++ +++ /** +++ * Indicates if this basic block ends with a JSR instruction. +++ */ +++ static final int JSR = 128; +++ +++ /** +++ * Indicates if this basic block ends with a RET instruction. +++ */ +++ static final int RET = 256; +++ +++ /** +++ * Indicates if this basic block is the start of a subroutine. +++ */ +++ static final int SUBROUTINE = 512; +++ +++ /** +++ * Indicates if this subroutine basic block has been visited by a +++ * visitSubroutine(null, ...) call. +++ */ +++ static final int VISITED = 1024; +++ +++ /** +++ * Indicates if this subroutine basic block has been visited by a +++ * visitSubroutine(!null, ...) call. +++ */ +++ static final int VISITED2 = 2048; +++ +++ /** +++ * Field used to associate user information to a label. Warning: this field +++ * is used by the ASM tree package. In order to use it with the ASM tree +++ * package you must override the +++ * {@link org.objectweb.asm.tree.MethodNode#getLabelNode} method. +++ */ +++ public Object info; +++ +++ /** +++ * Flags that indicate the status of this label. +++ * +++ * @see #DEBUG +++ * @see #RESOLVED +++ * @see #RESIZED +++ * @see #PUSHED +++ * @see #TARGET +++ * @see #STORE +++ * @see #REACHABLE +++ * @see #JSR +++ * @see #RET +++ */ +++ int status; +++ +++ /** +++ * The line number corresponding to this label, if known. If there are +++ * several lines, each line is stored in a separate label, all linked via +++ * their next field (these links are created in ClassReader and removed just +++ * before visitLabel is called, so that this does not impact the rest of the +++ * code). +++ */ +++ int line; +++ +++ /** +++ * The position of this label in the code, if known. +++ */ +++ int position; +++ +++ /** +++ * Number of forward references to this label, times two. +++ */ +++ private int referenceCount; +++ +++ /** +++ * Informations about forward references. Each forward reference is +++ * described by two consecutive integers in this array: the first one is the +++ * position of the first byte of the bytecode instruction that contains the +++ * forward reference, while the second is the position of the first byte of +++ * the forward reference itself. In fact the sign of the first integer +++ * indicates if this reference uses 2 or 4 bytes, and its absolute value +++ * gives the position of the bytecode instruction. This array is also used +++ * as a bitset to store the subroutines to which a basic block belongs. This +++ * information is needed in {@linked MethodWriter#visitMaxs}, after all +++ * forward references have been resolved. Hence the same array can be used +++ * for both purposes without problems. +++ */ +++ private int[] srcAndRefPositions; +++ +++ // ------------------------------------------------------------------------ +++ +++ /* +++ * Fields for the control flow and data flow graph analysis algorithms (used +++ * to compute the maximum stack size or the stack map frames). A control +++ * flow graph contains one node per "basic block", and one edge per "jump" +++ * from one basic block to another. Each node (i.e., each basic block) is +++ * represented by the Label object that corresponds to the first instruction +++ * of this basic block. Each node also stores the list of its successors in +++ * the graph, as a linked list of Edge objects. +++ * +++ * The control flow analysis algorithms used to compute the maximum stack +++ * size or the stack map frames are similar and use two steps. The first +++ * step, during the visit of each instruction, builds information about the +++ * state of the local variables and the operand stack at the end of each +++ * basic block, called the "output frame", <i>relatively</i> to the frame +++ * state at the beginning of the basic block, which is called the "input +++ * frame", and which is <i>unknown</i> during this step. The second step, in +++ * {@link MethodWriter#visitMaxs}, is a fix point algorithm that computes +++ * information about the input frame of each basic block, from the input +++ * state of the first basic block (known from the method signature), and by +++ * the using the previously computed relative output frames. +++ * +++ * The algorithm used to compute the maximum stack size only computes the +++ * relative output and absolute input stack heights, while the algorithm +++ * used to compute stack map frames computes relative output frames and +++ * absolute input frames. +++ */ +++ +++ /** +++ * Start of the output stack relatively to the input stack. The exact +++ * semantics of this field depends on the algorithm that is used. +++ * +++ * When only the maximum stack size is computed, this field is the number of +++ * elements in the input stack. +++ * +++ * When the stack map frames are completely computed, this field is the +++ * offset of the first output stack element relatively to the top of the +++ * input stack. This offset is always negative or null. A null offset means +++ * that the output stack must be appended to the input stack. A -n offset +++ * means that the first n output stack elements must replace the top n input +++ * stack elements, and that the other elements must be appended to the input +++ * stack. +++ */ +++ int inputStackTop; +++ +++ /** +++ * Maximum height reached by the output stack, relatively to the top of the +++ * input stack. This maximum is always positive or null. +++ */ +++ int outputStackMax; +++ +++ /** +++ * Information about the input and output stack map frames of this basic +++ * block. This field is only used when {@link ClassWriter#COMPUTE_FRAMES} +++ * option is used. +++ */ +++ Frame frame; +++ +++ /** +++ * The successor of this label, in the order they are visited. This linked +++ * list does not include labels used for debug info only. If +++ * {@link ClassWriter#COMPUTE_FRAMES} option is used then, in addition, it +++ * does not contain successive labels that denote the same bytecode position +++ * (in this case only the first label appears in this list). +++ */ +++ Label successor; +++ +++ /** +++ * The successors of this node in the control flow graph. These successors +++ * are stored in a linked list of {@link Edge Edge} objects, linked to each +++ * other by their {@link Edge#next} field. +++ */ +++ Edge successors; +++ +++ /** +++ * The next basic block in the basic block stack. This stack is used in the +++ * main loop of the fix point algorithm used in the second step of the +++ * control flow analysis algorithms. It is also used in +++ * {@link #visitSubroutine} to avoid using a recursive method, and in +++ * ClassReader to temporarily store multiple source lines for a label. +++ * +++ * @see MethodWriter#visitMaxs +++ */ +++ Label next; +++ +++ // ------------------------------------------------------------------------ +++ // Constructor +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Constructs a new label. +++ */ +++ public Label() { +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Methods to compute offsets and to manage forward references +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the offset corresponding to this label. This offset is computed +++ * from the start of the method's bytecode. <i>This method is intended for +++ * {@link Attribute} sub classes, and is normally not needed by class +++ * generators or adapters.</i> +++ * +++ * @return the offset corresponding to this label. +++ * @throws IllegalStateException +++ * if this label is not resolved yet. +++ */ +++ public int getOffset() { +++ if ((status & RESOLVED) == 0) { +++ throw new IllegalStateException( +++ "Label offset position has not been resolved yet"); +++ } +++ return position; +++ } +++ +++ /** +++ * Puts a reference to this label in the bytecode of a method. If the +++ * position of the label is known, the offset is computed and written +++ * directly. Otherwise, a null offset is written and a new forward reference +++ * is declared for this label. +++ * +++ * @param owner +++ * the code writer that calls this method. +++ * @param out +++ * the bytecode of the method. +++ * @param source +++ * the position of first byte of the bytecode instruction that +++ * contains this label. +++ * @param wideOffset +++ * <tt>true</tt> if the reference must be stored in 4 bytes, or +++ * <tt>false</tt> if it must be stored with 2 bytes. +++ * @throws IllegalArgumentException +++ * if this label has not been created by the given code writer. +++ */ +++ void put(final MethodWriter owner, final ByteVector out, final int source, +++ final boolean wideOffset) { +++ if ((status & RESOLVED) == 0) { +++ if (wideOffset) { +++ addReference(-1 - source, out.length); +++ out.putInt(-1); +++ } else { +++ addReference(source, out.length); +++ out.putShort(-1); +++ } +++ } else { +++ if (wideOffset) { +++ out.putInt(position - source); +++ } else { +++ out.putShort(position - source); +++ } +++ } +++ } +++ +++ /** +++ * Adds a forward reference to this label. This method must be called only +++ * for a true forward reference, i.e. only if this label is not resolved +++ * yet. For backward references, the offset of the reference can be, and +++ * must be, computed and stored directly. +++ * +++ * @param sourcePosition +++ * the position of the referencing instruction. This position +++ * will be used to compute the offset of this forward reference. +++ * @param referencePosition +++ * the position where the offset for this forward reference must +++ * be stored. +++ */ +++ private void addReference(final int sourcePosition, +++ final int referencePosition) { +++ if (srcAndRefPositions == null) { +++ srcAndRefPositions = new int[6]; +++ } +++ if (referenceCount >= srcAndRefPositions.length) { +++ int[] a = new int[srcAndRefPositions.length + 6]; +++ System.arraycopy(srcAndRefPositions, 0, a, 0, +++ srcAndRefPositions.length); +++ srcAndRefPositions = a; +++ } +++ srcAndRefPositions[referenceCount++] = sourcePosition; +++ srcAndRefPositions[referenceCount++] = referencePosition; +++ } +++ +++ /** +++ * Resolves all forward references to this label. This method must be called +++ * when this label is added to the bytecode of the method, i.e. when its +++ * position becomes known. This method fills in the blanks that where left +++ * in the bytecode by each forward reference previously added to this label. +++ * +++ * @param owner +++ * the code writer that calls this method. +++ * @param position +++ * the position of this label in the bytecode. +++ * @param data +++ * the bytecode of the method. +++ * @return <tt>true</tt> if a blank that was left for this label was to +++ * small to store the offset. In such a case the corresponding jump +++ * instruction is replaced with a pseudo instruction (using unused +++ * opcodes) using an unsigned two bytes offset. These pseudo +++ * instructions will need to be replaced with true instructions with +++ * wider offsets (4 bytes instead of 2). This is done in +++ * {@link MethodWriter#resizeInstructions}. +++ * @throws IllegalArgumentException +++ * if this label has already been resolved, or if it has not +++ * been created by the given code writer. +++ */ +++ boolean resolve(final MethodWriter owner, final int position, +++ final byte[] data) { +++ boolean needUpdate = false; +++ this.status |= RESOLVED; +++ this.position = position; +++ int i = 0; +++ while (i < referenceCount) { +++ int source = srcAndRefPositions[i++]; +++ int reference = srcAndRefPositions[i++]; +++ int offset; +++ if (source >= 0) { +++ offset = position - source; +++ if (offset < Short.MIN_VALUE || offset > Short.MAX_VALUE) { +++ /* +++ * changes the opcode of the jump instruction, in order to +++ * be able to find it later (see resizeInstructions in +++ * MethodWriter). These temporary opcodes are similar to +++ * jump instruction opcodes, except that the 2 bytes offset +++ * is unsigned (and can therefore represent values from 0 to +++ * 65535, which is sufficient since the size of a method is +++ * limited to 65535 bytes). +++ */ +++ int opcode = data[reference - 1] & 0xFF; +++ if (opcode <= Opcodes.JSR) { +++ // changes IFEQ ... JSR to opcodes 202 to 217 +++ data[reference - 1] = (byte) (opcode + 49); +++ } else { +++ // changes IFNULL and IFNONNULL to opcodes 218 and 219 +++ data[reference - 1] = (byte) (opcode + 20); +++ } +++ needUpdate = true; +++ } +++ data[reference++] = (byte) (offset >>> 8); +++ data[reference] = (byte) offset; +++ } else { +++ offset = position + source + 1; +++ data[reference++] = (byte) (offset >>> 24); +++ data[reference++] = (byte) (offset >>> 16); +++ data[reference++] = (byte) (offset >>> 8); +++ data[reference] = (byte) offset; +++ } +++ } +++ return needUpdate; +++ } +++ +++ /** +++ * Returns the first label of the series to which this label belongs. For an +++ * isolated label or for the first label in a series of successive labels, +++ * this method returns the label itself. For other labels it returns the +++ * first label of the series. +++ * +++ * @return the first label of the series to which this label belongs. +++ */ +++ Label getFirst() { +++ return !ClassReader.FRAMES || frame == null ? this : frame.owner; +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Methods related to subroutines +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns true is this basic block belongs to the given subroutine. +++ * +++ * @param id +++ * a subroutine id. +++ * @return true is this basic block belongs to the given subroutine. +++ */ +++ boolean inSubroutine(final long id) { +++ if ((status & Label.VISITED) != 0) { +++ return (srcAndRefPositions[(int) (id >>> 32)] & (int) id) != 0; +++ } +++ return false; +++ } +++ +++ /** +++ * Returns true if this basic block and the given one belong to a common +++ * subroutine. +++ * +++ * @param block +++ * another basic block. +++ * @return true if this basic block and the given one belong to a common +++ * subroutine. +++ */ +++ boolean inSameSubroutine(final Label block) { +++ if ((status & VISITED) == 0 || (block.status & VISITED) == 0) { +++ return false; +++ } +++ for (int i = 0; i < srcAndRefPositions.length; ++i) { +++ if ((srcAndRefPositions[i] & block.srcAndRefPositions[i]) != 0) { +++ return true; +++ } +++ } +++ return false; +++ } +++ +++ /** +++ * Marks this basic block as belonging to the given subroutine. +++ * +++ * @param id +++ * a subroutine id. +++ * @param nbSubroutines +++ * the total number of subroutines in the method. +++ */ +++ void addToSubroutine(final long id, final int nbSubroutines) { +++ if ((status & VISITED) == 0) { +++ status |= VISITED; +++ srcAndRefPositions = new int[nbSubroutines / 32 + 1]; +++ } +++ srcAndRefPositions[(int) (id >>> 32)] |= (int) id; +++ } +++ +++ /** +++ * Finds the basic blocks that belong to a given subroutine, and marks these +++ * blocks as belonging to this subroutine. This method follows the control +++ * flow graph to find all the blocks that are reachable from the current +++ * block WITHOUT following any JSR target. +++ * +++ * @param JSR +++ * a JSR block that jumps to this subroutine. If this JSR is not +++ * null it is added to the successor of the RET blocks found in +++ * the subroutine. +++ * @param id +++ * the id of this subroutine. +++ * @param nbSubroutines +++ * the total number of subroutines in the method. +++ */ +++ void visitSubroutine(final Label JSR, final long id, final int nbSubroutines) { +++ // user managed stack of labels, to avoid using a recursive method +++ // (recursivity can lead to stack overflow with very large methods) +++ Label stack = this; +++ while (stack != null) { +++ // removes a label l from the stack +++ Label l = stack; +++ stack = l.next; +++ l.next = null; +++ +++ if (JSR != null) { +++ if ((l.status & VISITED2) != 0) { +++ continue; +++ } +++ l.status |= VISITED2; +++ // adds JSR to the successors of l, if it is a RET block +++ if ((l.status & RET) != 0) { +++ if (!l.inSameSubroutine(JSR)) { +++ Edge e = new Edge(); +++ e.info = l.inputStackTop; +++ e.successor = JSR.successors.successor; +++ e.next = l.successors; +++ l.successors = e; +++ } +++ } +++ } else { +++ // if the l block already belongs to subroutine 'id', continue +++ if (l.inSubroutine(id)) { +++ continue; +++ } +++ // marks the l block as belonging to subroutine 'id' +++ l.addToSubroutine(id, nbSubroutines); +++ } +++ // pushes each successor of l on the stack, except JSR targets +++ Edge e = l.successors; +++ while (e != null) { +++ // if the l block is a JSR block, then 'l.successors.next' leads +++ // to the JSR target (see {@link #visitJumpInsn}) and must +++ // therefore not be followed +++ if ((l.status & Label.JSR) == 0 || e != l.successors.next) { +++ // pushes e.successor on the stack if it not already added +++ if (e.successor.next == null) { +++ e.successor.next = stack; +++ stack = e.successor; +++ } +++ } +++ e = e.next; +++ } +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Overriden Object methods +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns a string representation of this label. +++ * +++ * @return a string representation of this label. +++ */ +++ @Override +++ public String toString() { +++ return "L" + System.identityHashCode(this); +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/MethodVisitor.java b/contrib/asm/src/org/objectweb/asm/MethodVisitor.java ++new file mode 100644 ++index 0000000..f0927e8 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/MethodVisitor.java ++@@ -0,0 +1,881 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A visitor to visit a Java method. The methods of this class must be called in +++ * the following order: ( <tt>visitParameter</tt> )* [ +++ * <tt>visitAnnotationDefault</tt> ] ( <tt>visitAnnotation</tt> | +++ * <tt>visitParameterAnnotation</tt> <tt>visitTypeAnnotation</tt> | +++ * <tt>visitAttribute</tt> )* [ <tt>visitCode</tt> ( <tt>visitFrame</tt> | +++ * <tt>visit<i>X</i>Insn</tt> | <tt>visitLabel</tt> | +++ * <tt>visitInsnAnnotation</tt> | <tt>visitTryCatchBlock</tt> | +++ * <tt>visitTryCatchAnnotation</tt> | <tt>visitLocalVariable</tt> | +++ * <tt>visitLocalVariableAnnotation</tt> | <tt>visitLineNumber</tt> )* +++ * <tt>visitMaxs</tt> ] <tt>visitEnd</tt>. In addition, the +++ * <tt>visit<i>X</i>Insn</tt> and <tt>visitLabel</tt> methods must be called in +++ * the sequential order of the bytecode instructions of the visited code, +++ * <tt>visitInsnAnnotation</tt> must be called <i>after</i> the annotated +++ * instruction, <tt>visitTryCatchBlock</tt> must be called <i>before</i> the +++ * labels passed as arguments have been visited, +++ * <tt>visitTryCatchBlockAnnotation</tt> must be called <i>after</i> the +++ * corresponding try catch block has been visited, and the +++ * <tt>visitLocalVariable</tt>, <tt>visitLocalVariableAnnotation</tt> and +++ * <tt>visitLineNumber</tt> methods must be called <i>after</i> the labels +++ * passed as arguments have been visited. +++ * +++ * @author Eric Bruneton +++ */ +++public abstract class MethodVisitor { +++ +++ /** +++ * The ASM API version implemented by this visitor. The value of this field +++ * must be one of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ */ +++ protected final int api; +++ +++ /** +++ * The method visitor to which this visitor must delegate method calls. May +++ * be null. +++ */ +++ protected MethodVisitor mv; +++ +++ /** +++ * Constructs a new {@link MethodVisitor}. +++ * +++ * @param api +++ * the ASM API version implemented by this visitor. Must be one +++ * of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ */ +++ public MethodVisitor(final int api) { +++ this(api, null); +++ } +++ +++ /** +++ * Constructs a new {@link MethodVisitor}. +++ * +++ * @param api +++ * the ASM API version implemented by this visitor. Must be one +++ * of {@link Opcodes#ASM4} or {@link Opcodes#ASM5}. +++ * @param mv +++ * the method visitor to which this visitor must delegate method +++ * calls. May be null. +++ */ +++ public MethodVisitor(final int api, final MethodVisitor mv) { +++ if (api != Opcodes.ASM4 && api != Opcodes.ASM5) { +++ throw new IllegalArgumentException(); +++ } +++ this.api = api; +++ this.mv = mv; +++ } +++ +++ // ------------------------------------------------------------------------- +++ // Parameters, annotations and non standard attributes +++ // ------------------------------------------------------------------------- +++ +++ /** +++ * Visits a parameter of this method. +++ * +++ * @param name +++ * parameter name or null if none is provided. +++ * @param access +++ * the parameter's access flags, only <tt>ACC_FINAL</tt>, +++ * <tt>ACC_SYNTHETIC</tt> or/and <tt>ACC_MANDATED</tt> are +++ * allowed (see {@link Opcodes}). +++ */ +++ public void visitParameter(String name, int access) { +++ if (api < Opcodes.ASM5) { +++ throw new RuntimeException(); +++ } +++ if (mv != null) { +++ mv.visitParameter(name, access); +++ } +++ } +++ +++ /** +++ * Visits the default value of this annotation interface method. +++ * +++ * @return a visitor to the visit the actual default value of this +++ * annotation interface method, or <tt>null</tt> if this visitor is +++ * not interested in visiting this default value. The 'name' +++ * parameters passed to the methods of this annotation visitor are +++ * ignored. Moreover, exacly one visit method must be called on this +++ * annotation visitor, followed by visitEnd. +++ */ +++ public AnnotationVisitor visitAnnotationDefault() { +++ if (mv != null) { +++ return mv.visitAnnotationDefault(); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits an annotation of this method. +++ * +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * <tt>true</tt> if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or <tt>null</tt> if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitAnnotation(String desc, boolean visible) { +++ if (mv != null) { +++ return mv.visitAnnotation(desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits an annotation on a type in the method signature. +++ * +++ * @param typeRef +++ * a reference to the annotated type. The sort of this type +++ * reference must be {@link TypeReference#METHOD_TYPE_PARAMETER +++ * METHOD_TYPE_PARAMETER}, +++ * {@link TypeReference#METHOD_TYPE_PARAMETER_BOUND +++ * METHOD_TYPE_PARAMETER_BOUND}, +++ * {@link TypeReference#METHOD_RETURN METHOD_RETURN}, +++ * {@link TypeReference#METHOD_RECEIVER METHOD_RECEIVER}, +++ * {@link TypeReference#METHOD_FORMAL_PARAMETER +++ * METHOD_FORMAL_PARAMETER} or {@link TypeReference#THROWS +++ * THROWS}. See {@link TypeReference}. +++ * @param typePath +++ * the path to the annotated type argument, wildcard bound, array +++ * element type, or static inner type within 'typeRef'. May be +++ * <tt>null</tt> if the annotation targets 'typeRef' as a whole. +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * <tt>true</tt> if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or <tt>null</tt> if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitTypeAnnotation(int typeRef, +++ TypePath typePath, String desc, boolean visible) { +++ if (api < Opcodes.ASM5) { +++ throw new RuntimeException(); +++ } +++ if (mv != null) { +++ return mv.visitTypeAnnotation(typeRef, typePath, desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits an annotation of a parameter this method. +++ * +++ * @param parameter +++ * the parameter index. +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * <tt>true</tt> if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or <tt>null</tt> if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitParameterAnnotation(int parameter, +++ String desc, boolean visible) { +++ if (mv != null) { +++ return mv.visitParameterAnnotation(parameter, desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits a non standard attribute of this method. +++ * +++ * @param attr +++ * an attribute. +++ */ +++ public void visitAttribute(Attribute attr) { +++ if (mv != null) { +++ mv.visitAttribute(attr); +++ } +++ } +++ +++ /** +++ * Starts the visit of the method's code, if any (i.e. non abstract method). +++ */ +++ public void visitCode() { +++ if (mv != null) { +++ mv.visitCode(); +++ } +++ } +++ +++ /** +++ * Visits the current state of the local variables and operand stack +++ * elements. This method must(*) be called <i>just before</i> any +++ * instruction <b>i</b> that follows an unconditional branch instruction +++ * such as GOTO or THROW, that is the target of a jump instruction, or that +++ * starts an exception handler block. The visited types must describe the +++ * values of the local variables and of the operand stack elements <i>just +++ * before</i> <b>i</b> is executed.<br> +++ * <br> +++ * (*) this is mandatory only for classes whose version is greater than or +++ * equal to {@link Opcodes#V1_6 V1_6}. <br> +++ * <br> +++ * The frames of a method must be given either in expanded form, or in +++ * compressed form (all frames must use the same format, i.e. you must not +++ * mix expanded and compressed frames within a single method): +++ * <ul> +++ * <li>In expanded form, all frames must have the F_NEW type.</li> +++ * <li>In compressed form, frames are basically "deltas" from the state of +++ * the previous frame: +++ * <ul> +++ * <li>{@link Opcodes#F_SAME} representing frame with exactly the same +++ * locals as the previous frame and with the empty stack.</li> +++ * <li>{@link Opcodes#F_SAME1} representing frame with exactly the same +++ * locals as the previous frame and with single value on the stack ( +++ * <code>nStack</code> is 1 and <code>stack[0]</code> contains value for the +++ * type of the stack item).</li> +++ * <li>{@link Opcodes#F_APPEND} representing frame with current locals are +++ * the same as the locals in the previous frame, except that additional +++ * locals are defined (<code>nLocal</code> is 1, 2 or 3 and +++ * <code>local</code> elements contains values representing added types).</li> +++ * <li>{@link Opcodes#F_CHOP} representing frame with current locals are the +++ * same as the locals in the previous frame, except that the last 1-3 locals +++ * are absent and with the empty stack (<code>nLocals</code> is 1, 2 or 3).</li> +++ * <li>{@link Opcodes#F_FULL} representing complete frame data.</li> +++ * </ul> +++ * </li> +++ * </ul> +++ * <br> +++ * In both cases the first frame, corresponding to the method's parameters +++ * and access flags, is implicit and must not be visited. Also, it is +++ * illegal to visit two or more frames for the same code location (i.e., at +++ * least one instruction must be visited between two calls to visitFrame). +++ * +++ * @param type +++ * the type of this stack map frame. Must be +++ * {@link Opcodes#F_NEW} for expanded frames, or +++ * {@link Opcodes#F_FULL}, {@link Opcodes#F_APPEND}, +++ * {@link Opcodes#F_CHOP}, {@link Opcodes#F_SAME} or +++ * {@link Opcodes#F_APPEND}, {@link Opcodes#F_SAME1} for +++ * compressed frames. +++ * @param nLocal +++ * the number of local variables in the visited frame. +++ * @param local +++ * the local variable types in this frame. This array must not be +++ * modified. Primitive types are represented by +++ * {@link Opcodes#TOP}, {@link Opcodes#INTEGER}, +++ * {@link Opcodes#FLOAT}, {@link Opcodes#LONG}, +++ * {@link Opcodes#DOUBLE},{@link Opcodes#NULL} or +++ * {@link Opcodes#UNINITIALIZED_THIS} (long and double are +++ * represented by a single element). Reference types are +++ * represented by String objects (representing internal names), +++ * and uninitialized types by Label objects (this label +++ * designates the NEW instruction that created this uninitialized +++ * value). +++ * @param nStack +++ * the number of operand stack elements in the visited frame. +++ * @param stack +++ * the operand stack types in this frame. This array must not be +++ * modified. Its content has the same format as the "local" +++ * array. +++ * @throws IllegalStateException +++ * if a frame is visited just after another one, without any +++ * instruction between the two (unless this frame is a +++ * Opcodes#F_SAME frame, in which case it is silently ignored). +++ */ +++ public void visitFrame(int type, int nLocal, Object[] local, int nStack, +++ Object[] stack) { +++ if (mv != null) { +++ mv.visitFrame(type, nLocal, local, nStack, stack); +++ } +++ } +++ +++ // ------------------------------------------------------------------------- +++ // Normal instructions +++ // ------------------------------------------------------------------------- +++ +++ /** +++ * Visits a zero operand instruction. +++ * +++ * @param opcode +++ * the opcode of the instruction to be visited. This opcode is +++ * either NOP, ACONST_NULL, ICONST_M1, ICONST_0, ICONST_1, +++ * ICONST_2, ICONST_3, ICONST_4, ICONST_5, LCONST_0, LCONST_1, +++ * FCONST_0, FCONST_1, FCONST_2, DCONST_0, DCONST_1, IALOAD, +++ * LALOAD, FALOAD, DALOAD, AALOAD, BALOAD, CALOAD, SALOAD, +++ * IASTORE, LASTORE, FASTORE, DASTORE, AASTORE, BASTORE, CASTORE, +++ * SASTORE, POP, POP2, DUP, DUP_X1, DUP_X2, DUP2, DUP2_X1, +++ * DUP2_X2, SWAP, IADD, LADD, FADD, DADD, ISUB, LSUB, FSUB, DSUB, +++ * IMUL, LMUL, FMUL, DMUL, IDIV, LDIV, FDIV, DDIV, IREM, LREM, +++ * FREM, DREM, INEG, LNEG, FNEG, DNEG, ISHL, LSHL, ISHR, LSHR, +++ * IUSHR, LUSHR, IAND, LAND, IOR, LOR, IXOR, LXOR, I2L, I2F, I2D, +++ * L2I, L2F, L2D, F2I, F2L, F2D, D2I, D2L, D2F, I2B, I2C, I2S, +++ * LCMP, FCMPL, FCMPG, DCMPL, DCMPG, IRETURN, LRETURN, FRETURN, +++ * DRETURN, ARETURN, RETURN, ARRAYLENGTH, ATHROW, MONITORENTER, +++ * or MONITOREXIT. +++ */ +++ public void visitInsn(int opcode) { +++ if (mv != null) { +++ mv.visitInsn(opcode); +++ } +++ } +++ +++ /** +++ * Visits an instruction with a single int operand. +++ * +++ * @param opcode +++ * the opcode of the instruction to be visited. This opcode is +++ * either BIPUSH, SIPUSH or NEWARRAY. +++ * @param operand +++ * the operand of the instruction to be visited.<br> +++ * When opcode is BIPUSH, operand value should be between +++ * Byte.MIN_VALUE and Byte.MAX_VALUE.<br> +++ * When opcode is SIPUSH, operand value should be between +++ * Short.MIN_VALUE and Short.MAX_VALUE.<br> +++ * When opcode is NEWARRAY, operand value should be one of +++ * {@link Opcodes#T_BOOLEAN}, {@link Opcodes#T_CHAR}, +++ * {@link Opcodes#T_FLOAT}, {@link Opcodes#T_DOUBLE}, +++ * {@link Opcodes#T_BYTE}, {@link Opcodes#T_SHORT}, +++ * {@link Opcodes#T_INT} or {@link Opcodes#T_LONG}. +++ */ +++ public void visitIntInsn(int opcode, int operand) { +++ if (mv != null) { +++ mv.visitIntInsn(opcode, operand); +++ } +++ } +++ +++ /** +++ * Visits a local variable instruction. A local variable instruction is an +++ * instruction that loads or stores the value of a local variable. +++ * +++ * @param opcode +++ * the opcode of the local variable instruction to be visited. +++ * This opcode is either ILOAD, LLOAD, FLOAD, DLOAD, ALOAD, +++ * ISTORE, LSTORE, FSTORE, DSTORE, ASTORE or RET. +++ * @param var +++ * the operand of the instruction to be visited. This operand is +++ * the index of a local variable. +++ */ +++ public void visitVarInsn(int opcode, int var) { +++ if (mv != null) { +++ mv.visitVarInsn(opcode, var); +++ } +++ } +++ +++ /** +++ * Visits a type instruction. A type instruction is an instruction that +++ * takes the internal name of a class as parameter. +++ * +++ * @param opcode +++ * the opcode of the type instruction to be visited. This opcode +++ * is either NEW, ANEWARRAY, CHECKCAST or INSTANCEOF. +++ * @param type +++ * the operand of the instruction to be visited. This operand +++ * must be the internal name of an object or array class (see +++ * {@link Type#getInternalName() getInternalName}). +++ */ +++ public void visitTypeInsn(int opcode, String type) { +++ if (mv != null) { +++ mv.visitTypeInsn(opcode, type); +++ } +++ } +++ +++ /** +++ * Visits a field instruction. A field instruction is an instruction that +++ * loads or stores the value of a field of an object. +++ * +++ * @param opcode +++ * the opcode of the type instruction to be visited. This opcode +++ * is either GETSTATIC, PUTSTATIC, GETFIELD or PUTFIELD. +++ * @param owner +++ * the internal name of the field's owner class (see +++ * {@link Type#getInternalName() getInternalName}). +++ * @param name +++ * the field's name. +++ * @param desc +++ * the field's descriptor (see {@link Type Type}). +++ */ +++ public void visitFieldInsn(int opcode, String owner, String name, +++ String desc) { +++ if (mv != null) { +++ mv.visitFieldInsn(opcode, owner, name, desc); +++ } +++ } +++ +++ /** +++ * Visits a method instruction. A method instruction is an instruction that +++ * invokes a method. +++ * +++ * @param opcode +++ * the opcode of the type instruction to be visited. This opcode +++ * is either INVOKEVIRTUAL, INVOKESPECIAL, INVOKESTATIC or +++ * INVOKEINTERFACE. +++ * @param owner +++ * the internal name of the method's owner class (see +++ * {@link Type#getInternalName() getInternalName}). +++ * @param name +++ * the method's name. +++ * @param desc +++ * the method's descriptor (see {@link Type Type}). +++ */ +++ @Deprecated +++ public void visitMethodInsn(int opcode, String owner, String name, +++ String desc) { +++ if (api >= Opcodes.ASM5) { +++ boolean itf = opcode == Opcodes.INVOKEINTERFACE; +++ visitMethodInsn(opcode, owner, name, desc, itf); +++ return; +++ } +++ if (mv != null) { +++ mv.visitMethodInsn(opcode, owner, name, desc); +++ } +++ } +++ +++ /** +++ * Visits a method instruction. A method instruction is an instruction that +++ * invokes a method. +++ * +++ * @param opcode +++ * the opcode of the type instruction to be visited. This opcode +++ * is either INVOKEVIRTUAL, INVOKESPECIAL, INVOKESTATIC or +++ * INVOKEINTERFACE. +++ * @param owner +++ * the internal name of the method's owner class (see +++ * {@link Type#getInternalName() getInternalName}). +++ * @param name +++ * the method's name. +++ * @param desc +++ * the method's descriptor (see {@link Type Type}). +++ * @param itf +++ * if the method's owner class is an interface. +++ */ +++ public void visitMethodInsn(int opcode, String owner, String name, +++ String desc, boolean itf) { +++ if (api < Opcodes.ASM5) { +++ if (itf != (opcode == Opcodes.INVOKEINTERFACE)) { +++ throw new IllegalArgumentException( +++ "INVOKESPECIAL/STATIC on interfaces require ASM 5"); +++ } +++ visitMethodInsn(opcode, owner, name, desc); +++ return; +++ } +++ if (mv != null) { +++ mv.visitMethodInsn(opcode, owner, name, desc, itf); +++ } +++ } +++ +++ /** +++ * Visits an invokedynamic instruction. +++ * +++ * @param name +++ * the method's name. +++ * @param desc +++ * the method's descriptor (see {@link Type Type}). +++ * @param bsm +++ * the bootstrap method. +++ * @param bsmArgs +++ * the bootstrap method constant arguments. Each argument must be +++ * an {@link Integer}, {@link Float}, {@link Long}, +++ * {@link Double}, {@link String}, {@link Type} or {@link Handle} +++ * value. This method is allowed to modify the content of the +++ * array so a caller should expect that this array may change. +++ */ +++ public void visitInvokeDynamicInsn(String name, String desc, Handle bsm, +++ Object... bsmArgs) { +++ if (mv != null) { +++ mv.visitInvokeDynamicInsn(name, desc, bsm, bsmArgs); +++ } +++ } +++ +++ /** +++ * Visits a jump instruction. A jump instruction is an instruction that may +++ * jump to another instruction. +++ * +++ * @param opcode +++ * the opcode of the type instruction to be visited. This opcode +++ * is either IFEQ, IFNE, IFLT, IFGE, IFGT, IFLE, IF_ICMPEQ, +++ * IF_ICMPNE, IF_ICMPLT, IF_ICMPGE, IF_ICMPGT, IF_ICMPLE, +++ * IF_ACMPEQ, IF_ACMPNE, GOTO, JSR, IFNULL or IFNONNULL. +++ * @param label +++ * the operand of the instruction to be visited. This operand is +++ * a label that designates the instruction to which the jump +++ * instruction may jump. +++ */ +++ public void visitJumpInsn(int opcode, Label label) { +++ if (mv != null) { +++ mv.visitJumpInsn(opcode, label); +++ } +++ } +++ +++ /** +++ * Visits a label. A label designates the instruction that will be visited +++ * just after it. +++ * +++ * @param label +++ * a {@link Label Label} object. +++ */ +++ public void visitLabel(Label label) { +++ if (mv != null) { +++ mv.visitLabel(label); +++ } +++ } +++ +++ // ------------------------------------------------------------------------- +++ // Special instructions +++ // ------------------------------------------------------------------------- +++ +++ /** +++ * Visits a LDC instruction. Note that new constant types may be added in +++ * future versions of the Java Virtual Machine. To easily detect new +++ * constant types, implementations of this method should check for +++ * unexpected constant types, like this: +++ * +++ * <pre> +++ * if (cst instanceof Integer) { +++ * // ... +++ * } else if (cst instanceof Float) { +++ * // ... +++ * } else if (cst instanceof Long) { +++ * // ... +++ * } else if (cst instanceof Double) { +++ * // ... +++ * } else if (cst instanceof String) { +++ * // ... +++ * } else if (cst instanceof Type) { +++ * int sort = ((Type) cst).getSort(); +++ * if (sort == Type.OBJECT) { +++ * // ... +++ * } else if (sort == Type.ARRAY) { +++ * // ... +++ * } else if (sort == Type.METHOD) { +++ * // ... +++ * } else { +++ * // throw an exception +++ * } +++ * } else if (cst instanceof Handle) { +++ * // ... +++ * } else { +++ * // throw an exception +++ * } +++ * </pre> +++ * +++ * @param cst +++ * the constant to be loaded on the stack. This parameter must be +++ * a non null {@link Integer}, a {@link Float}, a {@link Long}, a +++ * {@link Double}, a {@link String}, a {@link Type} of OBJECT or +++ * ARRAY sort for <tt>.class</tt> constants, for classes whose +++ * version is 49.0, a {@link Type} of METHOD sort or a +++ * {@link Handle} for MethodType and MethodHandle constants, for +++ * classes whose version is 51.0. +++ */ +++ public void visitLdcInsn(Object cst) { +++ if (mv != null) { +++ mv.visitLdcInsn(cst); +++ } +++ } +++ +++ /** +++ * Visits an IINC instruction. +++ * +++ * @param var +++ * index of the local variable to be incremented. +++ * @param increment +++ * amount to increment the local variable by. +++ */ +++ public void visitIincInsn(int var, int increment) { +++ if (mv != null) { +++ mv.visitIincInsn(var, increment); +++ } +++ } +++ +++ /** +++ * Visits a TABLESWITCH instruction. +++ * +++ * @param min +++ * the minimum key value. +++ * @param max +++ * the maximum key value. +++ * @param dflt +++ * beginning of the default handler block. +++ * @param labels +++ * beginnings of the handler blocks. <tt>labels[i]</tt> is the +++ * beginning of the handler block for the <tt>min + i</tt> key. +++ */ +++ public void visitTableSwitchInsn(int min, int max, Label dflt, +++ Label... labels) { +++ if (mv != null) { +++ mv.visitTableSwitchInsn(min, max, dflt, labels); +++ } +++ } +++ +++ /** +++ * Visits a LOOKUPSWITCH instruction. +++ * +++ * @param dflt +++ * beginning of the default handler block. +++ * @param keys +++ * the values of the keys. +++ * @param labels +++ * beginnings of the handler blocks. <tt>labels[i]</tt> is the +++ * beginning of the handler block for the <tt>keys[i]</tt> key. +++ */ +++ public void visitLookupSwitchInsn(Label dflt, int[] keys, Label[] labels) { +++ if (mv != null) { +++ mv.visitLookupSwitchInsn(dflt, keys, labels); +++ } +++ } +++ +++ /** +++ * Visits a MULTIANEWARRAY instruction. +++ * +++ * @param desc +++ * an array type descriptor (see {@link Type Type}). +++ * @param dims +++ * number of dimensions of the array to allocate. +++ */ +++ public void visitMultiANewArrayInsn(String desc, int dims) { +++ if (mv != null) { +++ mv.visitMultiANewArrayInsn(desc, dims); +++ } +++ } +++ +++ /** +++ * Visits an annotation on an instruction. This method must be called just +++ * <i>after</i> the annotated instruction. It can be called several times +++ * for the same instruction. +++ * +++ * @param typeRef +++ * a reference to the annotated type. The sort of this type +++ * reference must be {@link TypeReference#INSTANCEOF INSTANCEOF}, +++ * {@link TypeReference#NEW NEW}, +++ * {@link TypeReference#CONSTRUCTOR_REFERENCE +++ * CONSTRUCTOR_REFERENCE}, {@link TypeReference#METHOD_REFERENCE +++ * METHOD_REFERENCE}, {@link TypeReference#CAST CAST}, +++ * {@link TypeReference#CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT +++ * CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT}, +++ * {@link TypeReference#METHOD_INVOCATION_TYPE_ARGUMENT +++ * METHOD_INVOCATION_TYPE_ARGUMENT}, +++ * {@link TypeReference#CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT +++ * CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT}, or +++ * {@link TypeReference#METHOD_REFERENCE_TYPE_ARGUMENT +++ * METHOD_REFERENCE_TYPE_ARGUMENT}. See {@link TypeReference}. +++ * @param typePath +++ * the path to the annotated type argument, wildcard bound, array +++ * element type, or static inner type within 'typeRef'. May be +++ * <tt>null</tt> if the annotation targets 'typeRef' as a whole. +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * <tt>true</tt> if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or <tt>null</tt> if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitInsnAnnotation(int typeRef, +++ TypePath typePath, String desc, boolean visible) { +++ if (api < Opcodes.ASM5) { +++ throw new RuntimeException(); +++ } +++ if (mv != null) { +++ return mv.visitInsnAnnotation(typeRef, typePath, desc, visible); +++ } +++ return null; +++ } +++ +++ // ------------------------------------------------------------------------- +++ // Exceptions table entries, debug information, max stack and max locals +++ // ------------------------------------------------------------------------- +++ +++ /** +++ * Visits a try catch block. +++ * +++ * @param start +++ * beginning of the exception handler's scope (inclusive). +++ * @param end +++ * end of the exception handler's scope (exclusive). +++ * @param handler +++ * beginning of the exception handler's code. +++ * @param type +++ * internal name of the type of exceptions handled by the +++ * handler, or <tt>null</tt> to catch any exceptions (for +++ * "finally" blocks). +++ * @throws IllegalArgumentException +++ * if one of the labels has already been visited by this visitor +++ * (by the {@link #visitLabel visitLabel} method). +++ */ +++ public void visitTryCatchBlock(Label start, Label end, Label handler, +++ String type) { +++ if (mv != null) { +++ mv.visitTryCatchBlock(start, end, handler, type); +++ } +++ } +++ +++ /** +++ * Visits an annotation on an exception handler type. This method must be +++ * called <i>after</i> the {@link #visitTryCatchBlock} for the annotated +++ * exception handler. It can be called several times for the same exception +++ * handler. +++ * +++ * @param typeRef +++ * a reference to the annotated type. The sort of this type +++ * reference must be {@link TypeReference#EXCEPTION_PARAMETER +++ * EXCEPTION_PARAMETER}. See {@link TypeReference}. +++ * @param typePath +++ * the path to the annotated type argument, wildcard bound, array +++ * element type, or static inner type within 'typeRef'. May be +++ * <tt>null</tt> if the annotation targets 'typeRef' as a whole. +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * <tt>true</tt> if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or <tt>null</tt> if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitTryCatchAnnotation(int typeRef, +++ TypePath typePath, String desc, boolean visible) { +++ if (api < Opcodes.ASM5) { +++ throw new RuntimeException(); +++ } +++ if (mv != null) { +++ return mv.visitTryCatchAnnotation(typeRef, typePath, desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits a local variable declaration. +++ * +++ * @param name +++ * the name of a local variable. +++ * @param desc +++ * the type descriptor of this local variable. +++ * @param signature +++ * the type signature of this local variable. May be +++ * <tt>null</tt> if the local variable type does not use generic +++ * types. +++ * @param start +++ * the first instruction corresponding to the scope of this local +++ * variable (inclusive). +++ * @param end +++ * the last instruction corresponding to the scope of this local +++ * variable (exclusive). +++ * @param index +++ * the local variable's index. +++ * @throws IllegalArgumentException +++ * if one of the labels has not already been visited by this +++ * visitor (by the {@link #visitLabel visitLabel} method). +++ */ +++ public void visitLocalVariable(String name, String desc, String signature, +++ Label start, Label end, int index) { +++ if (mv != null) { +++ mv.visitLocalVariable(name, desc, signature, start, end, index); +++ } +++ } +++ +++ /** +++ * Visits an annotation on a local variable type. +++ * +++ * @param typeRef +++ * a reference to the annotated type. The sort of this type +++ * reference must be {@link TypeReference#LOCAL_VARIABLE +++ * LOCAL_VARIABLE} or {@link TypeReference#RESOURCE_VARIABLE +++ * RESOURCE_VARIABLE}. See {@link TypeReference}. +++ * @param typePath +++ * the path to the annotated type argument, wildcard bound, array +++ * element type, or static inner type within 'typeRef'. May be +++ * <tt>null</tt> if the annotation targets 'typeRef' as a whole. +++ * @param start +++ * the fist instructions corresponding to the continuous ranges +++ * that make the scope of this local variable (inclusive). +++ * @param end +++ * the last instructions corresponding to the continuous ranges +++ * that make the scope of this local variable (exclusive). This +++ * array must have the same size as the 'start' array. +++ * @param index +++ * the local variable's index in each range. This array must have +++ * the same size as the 'start' array. +++ * @param desc +++ * the class descriptor of the annotation class. +++ * @param visible +++ * <tt>true</tt> if the annotation is visible at runtime. +++ * @return a visitor to visit the annotation values, or <tt>null</tt> if +++ * this visitor is not interested in visiting this annotation. +++ */ +++ public AnnotationVisitor visitLocalVariableAnnotation(int typeRef, +++ TypePath typePath, Label[] start, Label[] end, int[] index, +++ String desc, boolean visible) { +++ if (api < Opcodes.ASM5) { +++ throw new RuntimeException(); +++ } +++ if (mv != null) { +++ return mv.visitLocalVariableAnnotation(typeRef, typePath, start, +++ end, index, desc, visible); +++ } +++ return null; +++ } +++ +++ /** +++ * Visits a line number declaration. +++ * +++ * @param line +++ * a line number. This number refers to the source file from +++ * which the class was compiled. +++ * @param start +++ * the first instruction corresponding to this line number. +++ * @throws IllegalArgumentException +++ * if <tt>start</tt> has not already been visited by this +++ * visitor (by the {@link #visitLabel visitLabel} method). +++ */ +++ public void visitLineNumber(int line, Label start) { +++ if (mv != null) { +++ mv.visitLineNumber(line, start); +++ } +++ } +++ +++ /** +++ * Visits the maximum stack size and the maximum number of local variables +++ * of the method. +++ * +++ * @param maxStack +++ * maximum stack size of the method. +++ * @param maxLocals +++ * maximum number of local variables for the method. +++ */ +++ public void visitMaxs(int maxStack, int maxLocals) { +++ if (mv != null) { +++ mv.visitMaxs(maxStack, maxLocals); +++ } +++ } +++ +++ /** +++ * Visits the end of the method. This method, which is the last one to be +++ * called, is used to inform the visitor that all the annotations and +++ * attributes of the method have been visited. +++ */ +++ public void visitEnd() { +++ if (mv != null) { +++ mv.visitEnd(); +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/MethodWriter.java b/contrib/asm/src/org/objectweb/asm/MethodWriter.java ++new file mode 100644 ++index 0000000..ceca3f8 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/MethodWriter.java ++@@ -0,0 +1,2915 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * A {@link MethodVisitor} that generates methods in bytecode form. Each visit +++ * method of this class appends the bytecode corresponding to the visited +++ * instruction to a byte vector, in the order these methods are called. +++ * +++ * @author Eric Bruneton +++ * @author Eugene Kuleshov +++ */ +++class MethodWriter extends MethodVisitor { +++ +++ /** +++ * Pseudo access flag used to denote constructors. +++ */ +++ static final int ACC_CONSTRUCTOR = 0x80000; +++ +++ /** +++ * Frame has exactly the same locals as the previous stack map frame and +++ * number of stack items is zero. +++ */ +++ static final int SAME_FRAME = 0; // to 63 (0-3f) +++ +++ /** +++ * Frame has exactly the same locals as the previous stack map frame and +++ * number of stack items is 1 +++ */ +++ static final int SAME_LOCALS_1_STACK_ITEM_FRAME = 64; // to 127 (40-7f) +++ +++ /** +++ * Reserved for future use +++ */ +++ static final int RESERVED = 128; +++ +++ /** +++ * Frame has exactly the same locals as the previous stack map frame and +++ * number of stack items is 1. Offset is bigger then 63; +++ */ +++ static final int SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED = 247; // f7 +++ +++ /** +++ * Frame where current locals are the same as the locals in the previous +++ * frame, except that the k last locals are absent. The value of k is given +++ * by the formula 251-frame_type. +++ */ +++ static final int CHOP_FRAME = 248; // to 250 (f8-fA) +++ +++ /** +++ * Frame has exactly the same locals as the previous stack map frame and +++ * number of stack items is zero. Offset is bigger then 63; +++ */ +++ static final int SAME_FRAME_EXTENDED = 251; // fb +++ +++ /** +++ * Frame where current locals are the same as the locals in the previous +++ * frame, except that k additional locals are defined. The value of k is +++ * given by the formula frame_type-251. +++ */ +++ static final int APPEND_FRAME = 252; // to 254 // fc-fe +++ +++ /** +++ * Full frame +++ */ +++ static final int FULL_FRAME = 255; // ff +++ +++ /** +++ * Indicates that the stack map frames must be recomputed from scratch. In +++ * this case the maximum stack size and number of local variables is also +++ * recomputed from scratch. +++ * +++ * @see #compute +++ */ +++ private static final int FRAMES = 0; +++ +++ /** +++ * Indicates that the maximum stack size and number of local variables must +++ * be automatically computed. +++ * +++ * @see #compute +++ */ +++ private static final int MAXS = 1; +++ +++ /** +++ * Indicates that nothing must be automatically computed. +++ * +++ * @see #compute +++ */ +++ private static final int NOTHING = 2; +++ +++ /** +++ * The class writer to which this method must be added. +++ */ +++ final ClassWriter cw; +++ +++ /** +++ * Access flags of this method. +++ */ +++ private int access; +++ +++ /** +++ * The index of the constant pool item that contains the name of this +++ * method. +++ */ +++ private final int name; +++ +++ /** +++ * The index of the constant pool item that contains the descriptor of this +++ * method. +++ */ +++ private final int desc; +++ +++ /** +++ * The descriptor of this method. +++ */ +++ private final String descriptor; +++ +++ /** +++ * The signature of this method. +++ */ +++ String signature; +++ +++ /** +++ * If not zero, indicates that the code of this method must be copied from +++ * the ClassReader associated to this writer in <code>cw.cr</code>. More +++ * precisely, this field gives the index of the first byte to copied from +++ * <code>cw.cr.b</code>. +++ */ +++ int classReaderOffset; +++ +++ /** +++ * If not zero, indicates that the code of this method must be copied from +++ * the ClassReader associated to this writer in <code>cw.cr</code>. More +++ * precisely, this field gives the number of bytes to copied from +++ * <code>cw.cr.b</code>. +++ */ +++ int classReaderLength; +++ +++ /** +++ * Number of exceptions that can be thrown by this method. +++ */ +++ int exceptionCount; +++ +++ /** +++ * The exceptions that can be thrown by this method. More precisely, this +++ * array contains the indexes of the constant pool items that contain the +++ * internal names of these exception classes. +++ */ +++ int[] exceptions; +++ +++ /** +++ * The annotation default attribute of this method. May be <tt>null</tt>. +++ */ +++ private ByteVector annd; +++ +++ /** +++ * The runtime visible annotations of this method. May be <tt>null</tt>. +++ */ +++ private AnnotationWriter anns; +++ +++ /** +++ * The runtime invisible annotations of this method. May be <tt>null</tt>. +++ */ +++ private AnnotationWriter ianns; +++ +++ /** +++ * The runtime visible type annotations of this method. May be <tt>null</tt> +++ * . +++ */ +++ private AnnotationWriter tanns; +++ +++ /** +++ * The runtime invisible type annotations of this method. May be +++ * <tt>null</tt>. +++ */ +++ private AnnotationWriter itanns; +++ +++ /** +++ * The runtime visible parameter annotations of this method. May be +++ * <tt>null</tt>. +++ */ +++ private AnnotationWriter[] panns; +++ +++ /** +++ * The runtime invisible parameter annotations of this method. May be +++ * <tt>null</tt>. +++ */ +++ private AnnotationWriter[] ipanns; +++ +++ /** +++ * The number of synthetic parameters of this method. +++ */ +++ private int synthetics; +++ +++ /** +++ * The non standard attributes of the method. +++ */ +++ private Attribute attrs; +++ +++ /** +++ * The bytecode of this method. +++ */ +++ private ByteVector code = new ByteVector(); +++ +++ /** +++ * Maximum stack size of this method. +++ */ +++ private int maxStack; +++ +++ /** +++ * Maximum number of local variables for this method. +++ */ +++ private int maxLocals; +++ +++ /** +++ * Number of local variables in the current stack map frame. +++ */ +++ private int currentLocals; +++ +++ /** +++ * Number of stack map frames in the StackMapTable attribute. +++ */ +++ private int frameCount; +++ +++ /** +++ * The StackMapTable attribute. +++ */ +++ private ByteVector stackMap; +++ +++ /** +++ * The offset of the last frame that was written in the StackMapTable +++ * attribute. +++ */ +++ private int previousFrameOffset; +++ +++ /** +++ * The last frame that was written in the StackMapTable attribute. +++ * +++ * @see #frame +++ */ +++ private int[] previousFrame; +++ +++ /** +++ * The current stack map frame. The first element contains the offset of the +++ * instruction to which the frame corresponds, the second element is the +++ * number of locals and the third one is the number of stack elements. The +++ * local variables start at index 3 and are followed by the operand stack +++ * values. In summary frame[0] = offset, frame[1] = nLocal, frame[2] = +++ * nStack, frame[3] = nLocal. All types are encoded as integers, with the +++ * same format as the one used in {@link Label}, but limited to BASE types. +++ */ +++ private int[] frame; +++ +++ /** +++ * Number of elements in the exception handler list. +++ */ +++ private int handlerCount; +++ +++ /** +++ * The first element in the exception handler list. +++ */ +++ private Handler firstHandler; +++ +++ /** +++ * The last element in the exception handler list. +++ */ +++ private Handler lastHandler; +++ +++ /** +++ * Number of entries in the MethodParameters attribute. +++ */ +++ private int methodParametersCount; +++ +++ /** +++ * The MethodParameters attribute. +++ */ +++ private ByteVector methodParameters; +++ +++ /** +++ * Number of entries in the LocalVariableTable attribute. +++ */ +++ private int localVarCount; +++ +++ /** +++ * The LocalVariableTable attribute. +++ */ +++ private ByteVector localVar; +++ +++ /** +++ * Number of entries in the LocalVariableTypeTable attribute. +++ */ +++ private int localVarTypeCount; +++ +++ /** +++ * The LocalVariableTypeTable attribute. +++ */ +++ private ByteVector localVarType; +++ +++ /** +++ * Number of entries in the LineNumberTable attribute. +++ */ +++ private int lineNumberCount; +++ +++ /** +++ * The LineNumberTable attribute. +++ */ +++ private ByteVector lineNumber; +++ +++ /** +++ * The start offset of the last visited instruction. +++ */ +++ private int lastCodeOffset; +++ +++ /** +++ * The runtime visible type annotations of the code. May be <tt>null</tt>. +++ */ +++ private AnnotationWriter ctanns; +++ +++ /** +++ * The runtime invisible type annotations of the code. May be <tt>null</tt>. +++ */ +++ private AnnotationWriter ictanns; +++ +++ /** +++ * The non standard attributes of the method's code. +++ */ +++ private Attribute cattrs; +++ +++ /** +++ * Indicates if some jump instructions are too small and need to be resized. +++ */ +++ private boolean resize; +++ +++ /** +++ * The number of subroutines in this method. +++ */ +++ private int subroutines; +++ +++ // ------------------------------------------------------------------------ +++ +++ /* +++ * Fields for the control flow graph analysis algorithm (used to compute the +++ * maximum stack size). A control flow graph contains one node per "basic +++ * block", and one edge per "jump" from one basic block to another. Each +++ * node (i.e., each basic block) is represented by the Label object that +++ * corresponds to the first instruction of this basic block. Each node also +++ * stores the list of its successors in the graph, as a linked list of Edge +++ * objects. +++ */ +++ +++ /** +++ * Indicates what must be automatically computed. +++ * +++ * @see #FRAMES +++ * @see #MAXS +++ * @see #NOTHING +++ */ +++ private final int compute; +++ +++ /** +++ * A list of labels. This list is the list of basic blocks in the method, +++ * i.e. a list of Label objects linked to each other by their +++ * {@link Label#successor} field, in the order they are visited by +++ * {@link MethodVisitor#visitLabel}, and starting with the first basic +++ * block. +++ */ +++ private Label labels; +++ +++ /** +++ * The previous basic block. +++ */ +++ private Label previousBlock; +++ +++ /** +++ * The current basic block. +++ */ +++ private Label currentBlock; +++ +++ /** +++ * The (relative) stack size after the last visited instruction. This size +++ * is relative to the beginning of the current basic block, i.e., the true +++ * stack size after the last visited instruction is equal to the +++ * {@link Label#inputStackTop beginStackSize} of the current basic block +++ * plus <tt>stackSize</tt>. +++ */ +++ private int stackSize; +++ +++ /** +++ * The (relative) maximum stack size after the last visited instruction. +++ * This size is relative to the beginning of the current basic block, i.e., +++ * the true maximum stack size after the last visited instruction is equal +++ * to the {@link Label#inputStackTop beginStackSize} of the current basic +++ * block plus <tt>stackSize</tt>. +++ */ +++ private int maxStackSize; +++ +++ // ------------------------------------------------------------------------ +++ // Constructor +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Constructs a new {@link MethodWriter}. +++ * +++ * @param cw +++ * the class writer in which the method must be added. +++ * @param access +++ * the method's access flags (see {@link Opcodes}). +++ * @param name +++ * the method's name. +++ * @param desc +++ * the method's descriptor (see {@link Type}). +++ * @param signature +++ * the method's signature. May be <tt>null</tt>. +++ * @param exceptions +++ * the internal names of the method's exceptions. May be +++ * <tt>null</tt>. +++ * @param computeMaxs +++ * <tt>true</tt> if the maximum stack size and number of local +++ * variables must be automatically computed. +++ * @param computeFrames +++ * <tt>true</tt> if the stack map tables must be recomputed from +++ * scratch. +++ */ +++ MethodWriter(final ClassWriter cw, final int access, final String name, +++ final String desc, final String signature, +++ final String[] exceptions, final boolean computeMaxs, +++ final boolean computeFrames) { +++ super(Opcodes.ASM5); +++ if (cw.firstMethod == null) { +++ cw.firstMethod = this; +++ } else { +++ cw.lastMethod.mv = this; +++ } +++ cw.lastMethod = this; +++ this.cw = cw; +++ this.access = access; +++ if ("<init>".equals(name)) { +++ this.access |= ACC_CONSTRUCTOR; +++ } +++ this.name = cw.newUTF8(name); +++ this.desc = cw.newUTF8(desc); +++ this.descriptor = desc; +++ if (ClassReader.SIGNATURES) { +++ this.signature = signature; +++ } +++ if (exceptions != null && exceptions.length > 0) { +++ exceptionCount = exceptions.length; +++ this.exceptions = new int[exceptionCount]; +++ for (int i = 0; i < exceptionCount; ++i) { +++ this.exceptions[i] = cw.newClass(exceptions[i]); +++ } +++ } +++ this.compute = computeFrames ? FRAMES : (computeMaxs ? MAXS : NOTHING); +++ if (computeMaxs || computeFrames) { +++ // updates maxLocals +++ int size = Type.getArgumentsAndReturnSizes(descriptor) >> 2; +++ if ((access & Opcodes.ACC_STATIC) != 0) { +++ --size; +++ } +++ maxLocals = size; +++ currentLocals = size; +++ // creates and visits the label for the first basic block +++ labels = new Label(); +++ labels.status |= Label.PUSHED; +++ visitLabel(labels); +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Implementation of the MethodVisitor abstract class +++ // ------------------------------------------------------------------------ +++ +++ @Override +++ public void visitParameter(String name, int access) { +++ if (methodParameters == null) { +++ methodParameters = new ByteVector(); +++ } +++ ++methodParametersCount; +++ methodParameters.putShort((name == null) ? 0 : cw.newUTF8(name)) +++ .putShort(access); +++ } +++ +++ @Override +++ public AnnotationVisitor visitAnnotationDefault() { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ annd = new ByteVector(); +++ return new AnnotationWriter(cw, false, annd, null, 0); +++ } +++ +++ @Override +++ public AnnotationVisitor visitAnnotation(final String desc, +++ final boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write type, and reserve space for values count +++ bv.putShort(cw.newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(cw, true, bv, bv, 2); +++ if (visible) { +++ aw.next = anns; +++ anns = aw; +++ } else { +++ aw.next = ianns; +++ ianns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public AnnotationVisitor visitTypeAnnotation(final int typeRef, +++ final TypePath typePath, final String desc, final boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write target_type and target_info +++ AnnotationWriter.putTarget(typeRef, typePath, bv); +++ // write type, and reserve space for values count +++ bv.putShort(cw.newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(cw, true, bv, bv, +++ bv.length - 2); +++ if (visible) { +++ aw.next = tanns; +++ tanns = aw; +++ } else { +++ aw.next = itanns; +++ itanns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public AnnotationVisitor visitParameterAnnotation(final int parameter, +++ final String desc, final boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ if ("Ljava/lang/Synthetic;".equals(desc)) { +++ // workaround for a bug in javac with synthetic parameters +++ // see ClassReader.readParameterAnnotations +++ synthetics = Math.max(synthetics, parameter + 1); +++ return new AnnotationWriter(cw, false, bv, null, 0); +++ } +++ // write type, and reserve space for values count +++ bv.putShort(cw.newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(cw, true, bv, bv, 2); +++ if (visible) { +++ if (panns == null) { +++ panns = new AnnotationWriter[Type.getArgumentTypes(descriptor).length]; +++ } +++ aw.next = panns[parameter]; +++ panns[parameter] = aw; +++ } else { +++ if (ipanns == null) { +++ ipanns = new AnnotationWriter[Type.getArgumentTypes(descriptor).length]; +++ } +++ aw.next = ipanns[parameter]; +++ ipanns[parameter] = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public void visitAttribute(final Attribute attr) { +++ if (attr.isCodeAttribute()) { +++ attr.next = cattrs; +++ cattrs = attr; +++ } else { +++ attr.next = attrs; +++ attrs = attr; +++ } +++ } +++ +++ @Override +++ public void visitCode() { +++ } +++ +++ @Override +++ public void visitFrame(final int type, final int nLocal, +++ final Object[] local, final int nStack, final Object[] stack) { +++ if (!ClassReader.FRAMES || compute == FRAMES) { +++ return; +++ } +++ +++ if (type == Opcodes.F_NEW) { +++ if (previousFrame == null) { +++ visitImplicitFirstFrame(); +++ } +++ currentLocals = nLocal; +++ int frameIndex = startFrame(code.length, nLocal, nStack); +++ for (int i = 0; i < nLocal; ++i) { +++ if (local[i] instanceof String) { +++ frame[frameIndex++] = Frame.OBJECT +++ | cw.addType((String) local[i]); +++ } else if (local[i] instanceof Integer) { +++ frame[frameIndex++] = ((Integer) local[i]).intValue(); +++ } else { +++ frame[frameIndex++] = Frame.UNINITIALIZED +++ | cw.addUninitializedType("", +++ ((Label) local[i]).position); +++ } +++ } +++ for (int i = 0; i < nStack; ++i) { +++ if (stack[i] instanceof String) { +++ frame[frameIndex++] = Frame.OBJECT +++ | cw.addType((String) stack[i]); +++ } else if (stack[i] instanceof Integer) { +++ frame[frameIndex++] = ((Integer) stack[i]).intValue(); +++ } else { +++ frame[frameIndex++] = Frame.UNINITIALIZED +++ | cw.addUninitializedType("", +++ ((Label) stack[i]).position); +++ } +++ } +++ endFrame(); +++ } else { +++ int delta; +++ if (stackMap == null) { +++ stackMap = new ByteVector(); +++ delta = code.length; +++ } else { +++ delta = code.length - previousFrameOffset - 1; +++ if (delta < 0) { +++ if (type == Opcodes.F_SAME) { +++ return; +++ } else { +++ throw new IllegalStateException(); +++ } +++ } +++ } +++ +++ switch (type) { +++ case Opcodes.F_FULL: +++ currentLocals = nLocal; +++ stackMap.putByte(FULL_FRAME).putShort(delta).putShort(nLocal); +++ for (int i = 0; i < nLocal; ++i) { +++ writeFrameType(local[i]); +++ } +++ stackMap.putShort(nStack); +++ for (int i = 0; i < nStack; ++i) { +++ writeFrameType(stack[i]); +++ } +++ break; +++ case Opcodes.F_APPEND: +++ currentLocals += nLocal; +++ stackMap.putByte(SAME_FRAME_EXTENDED + nLocal).putShort(delta); +++ for (int i = 0; i < nLocal; ++i) { +++ writeFrameType(local[i]); +++ } +++ break; +++ case Opcodes.F_CHOP: +++ currentLocals -= nLocal; +++ stackMap.putByte(SAME_FRAME_EXTENDED - nLocal).putShort(delta); +++ break; +++ case Opcodes.F_SAME: +++ if (delta < 64) { +++ stackMap.putByte(delta); +++ } else { +++ stackMap.putByte(SAME_FRAME_EXTENDED).putShort(delta); +++ } +++ break; +++ case Opcodes.F_SAME1: +++ if (delta < 64) { +++ stackMap.putByte(SAME_LOCALS_1_STACK_ITEM_FRAME + delta); +++ } else { +++ stackMap.putByte(SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED) +++ .putShort(delta); +++ } +++ writeFrameType(stack[0]); +++ break; +++ } +++ +++ previousFrameOffset = code.length; +++ ++frameCount; +++ } +++ +++ maxStack = Math.max(maxStack, nStack); +++ maxLocals = Math.max(maxLocals, currentLocals); +++ } +++ +++ @Override +++ public void visitInsn(final int opcode) { +++ lastCodeOffset = code.length; +++ // adds the instruction to the bytecode of the method +++ code.putByte(opcode); +++ // update currentBlock +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(opcode, 0, null, null); +++ } else { +++ // updates current and max stack sizes +++ int size = stackSize + Frame.SIZE[opcode]; +++ if (size > maxStackSize) { +++ maxStackSize = size; +++ } +++ stackSize = size; +++ } +++ // if opcode == ATHROW or xRETURN, ends current block (no successor) +++ if ((opcode >= Opcodes.IRETURN && opcode <= Opcodes.RETURN) +++ || opcode == Opcodes.ATHROW) { +++ noSuccessor(); +++ } +++ } +++ } +++ +++ @Override +++ public void visitIntInsn(final int opcode, final int operand) { +++ lastCodeOffset = code.length; +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(opcode, operand, null, null); +++ } else if (opcode != Opcodes.NEWARRAY) { +++ // updates current and max stack sizes only for NEWARRAY +++ // (stack size variation = 0 for BIPUSH or SIPUSH) +++ int size = stackSize + 1; +++ if (size > maxStackSize) { +++ maxStackSize = size; +++ } +++ stackSize = size; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ if (opcode == Opcodes.SIPUSH) { +++ code.put12(opcode, operand); +++ } else { // BIPUSH or NEWARRAY +++ code.put11(opcode, operand); +++ } +++ } +++ +++ @Override +++ public void visitVarInsn(final int opcode, final int var) { +++ lastCodeOffset = code.length; +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(opcode, var, null, null); +++ } else { +++ // updates current and max stack sizes +++ if (opcode == Opcodes.RET) { +++ // no stack change, but end of current block (no successor) +++ currentBlock.status |= Label.RET; +++ // save 'stackSize' here for future use +++ // (see {@link #findSubroutineSuccessors}) +++ currentBlock.inputStackTop = stackSize; +++ noSuccessor(); +++ } else { // xLOAD or xSTORE +++ int size = stackSize + Frame.SIZE[opcode]; +++ if (size > maxStackSize) { +++ maxStackSize = size; +++ } +++ stackSize = size; +++ } +++ } +++ } +++ if (compute != NOTHING) { +++ // updates max locals +++ int n; +++ if (opcode == Opcodes.LLOAD || opcode == Opcodes.DLOAD +++ || opcode == Opcodes.LSTORE || opcode == Opcodes.DSTORE) { +++ n = var + 2; +++ } else { +++ n = var + 1; +++ } +++ if (n > maxLocals) { +++ maxLocals = n; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ if (var < 4 && opcode != Opcodes.RET) { +++ int opt; +++ if (opcode < Opcodes.ISTORE) { +++ /* ILOAD_0 */ +++ opt = 26 + ((opcode - Opcodes.ILOAD) << 2) + var; +++ } else { +++ /* ISTORE_0 */ +++ opt = 59 + ((opcode - Opcodes.ISTORE) << 2) + var; +++ } +++ code.putByte(opt); +++ } else if (var >= 256) { +++ code.putByte(196 /* WIDE */).put12(opcode, var); +++ } else { +++ code.put11(opcode, var); +++ } +++ if (opcode >= Opcodes.ISTORE && compute == FRAMES && handlerCount > 0) { +++ visitLabel(new Label()); +++ } +++ } +++ +++ @Override +++ public void visitTypeInsn(final int opcode, final String type) { +++ lastCodeOffset = code.length; +++ Item i = cw.newClassItem(type); +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(opcode, code.length, cw, i); +++ } else if (opcode == Opcodes.NEW) { +++ // updates current and max stack sizes only if opcode == NEW +++ // (no stack change for ANEWARRAY, CHECKCAST, INSTANCEOF) +++ int size = stackSize + 1; +++ if (size > maxStackSize) { +++ maxStackSize = size; +++ } +++ stackSize = size; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ code.put12(opcode, i.index); +++ } +++ +++ @Override +++ public void visitFieldInsn(final int opcode, final String owner, +++ final String name, final String desc) { +++ lastCodeOffset = code.length; +++ Item i = cw.newFieldItem(owner, name, desc); +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(opcode, 0, cw, i); +++ } else { +++ int size; +++ // computes the stack size variation +++ char c = desc.charAt(0); +++ switch (opcode) { +++ case Opcodes.GETSTATIC: +++ size = stackSize + (c == 'D' || c == 'J' ? 2 : 1); +++ break; +++ case Opcodes.PUTSTATIC: +++ size = stackSize + (c == 'D' || c == 'J' ? -2 : -1); +++ break; +++ case Opcodes.GETFIELD: +++ size = stackSize + (c == 'D' || c == 'J' ? 1 : 0); +++ break; +++ // case Constants.PUTFIELD: +++ default: +++ size = stackSize + (c == 'D' || c == 'J' ? -3 : -2); +++ break; +++ } +++ // updates current and max stack sizes +++ if (size > maxStackSize) { +++ maxStackSize = size; +++ } +++ stackSize = size; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ code.put12(opcode, i.index); +++ } +++ +++ @Override +++ public void visitMethodInsn(final int opcode, final String owner, +++ final String name, final String desc, final boolean itf) { +++ lastCodeOffset = code.length; +++ Item i = cw.newMethodItem(owner, name, desc, itf); +++ int argSize = i.intVal; +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(opcode, 0, cw, i); +++ } else { +++ /* +++ * computes the stack size variation. In order not to recompute +++ * several times this variation for the same Item, we use the +++ * intVal field of this item to store this variation, once it +++ * has been computed. More precisely this intVal field stores +++ * the sizes of the arguments and of the return value +++ * corresponding to desc. +++ */ +++ if (argSize == 0) { +++ // the above sizes have not been computed yet, +++ // so we compute them... +++ argSize = Type.getArgumentsAndReturnSizes(desc); +++ // ... and we save them in order +++ // not to recompute them in the future +++ i.intVal = argSize; +++ } +++ int size; +++ if (opcode == Opcodes.INVOKESTATIC) { +++ size = stackSize - (argSize >> 2) + (argSize & 0x03) + 1; +++ } else { +++ size = stackSize - (argSize >> 2) + (argSize & 0x03); +++ } +++ // updates current and max stack sizes +++ if (size > maxStackSize) { +++ maxStackSize = size; +++ } +++ stackSize = size; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ if (opcode == Opcodes.INVOKEINTERFACE) { +++ if (argSize == 0) { +++ argSize = Type.getArgumentsAndReturnSizes(desc); +++ i.intVal = argSize; +++ } +++ code.put12(Opcodes.INVOKEINTERFACE, i.index).put11(argSize >> 2, 0); +++ } else { +++ code.put12(opcode, i.index); +++ } +++ } +++ +++ @Override +++ public void visitInvokeDynamicInsn(final String name, final String desc, +++ final Handle bsm, final Object... bsmArgs) { +++ lastCodeOffset = code.length; +++ Item i = cw.newInvokeDynamicItem(name, desc, bsm, bsmArgs); +++ int argSize = i.intVal; +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(Opcodes.INVOKEDYNAMIC, 0, cw, i); +++ } else { +++ /* +++ * computes the stack size variation. In order not to recompute +++ * several times this variation for the same Item, we use the +++ * intVal field of this item to store this variation, once it +++ * has been computed. More precisely this intVal field stores +++ * the sizes of the arguments and of the return value +++ * corresponding to desc. +++ */ +++ if (argSize == 0) { +++ // the above sizes have not been computed yet, +++ // so we compute them... +++ argSize = Type.getArgumentsAndReturnSizes(desc); +++ // ... and we save them in order +++ // not to recompute them in the future +++ i.intVal = argSize; +++ } +++ int size = stackSize - (argSize >> 2) + (argSize & 0x03) + 1; +++ +++ // updates current and max stack sizes +++ if (size > maxStackSize) { +++ maxStackSize = size; +++ } +++ stackSize = size; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ code.put12(Opcodes.INVOKEDYNAMIC, i.index); +++ code.putShort(0); +++ } +++ +++ @Override +++ public void visitJumpInsn(final int opcode, final Label label) { +++ lastCodeOffset = code.length; +++ Label nextInsn = null; +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(opcode, 0, null, null); +++ // 'label' is the target of a jump instruction +++ label.getFirst().status |= Label.TARGET; +++ // adds 'label' as a successor of this basic block +++ addSuccessor(Edge.NORMAL, label); +++ if (opcode != Opcodes.GOTO) { +++ // creates a Label for the next basic block +++ nextInsn = new Label(); +++ } +++ } else { +++ if (opcode == Opcodes.JSR) { +++ if ((label.status & Label.SUBROUTINE) == 0) { +++ label.status |= Label.SUBROUTINE; +++ ++subroutines; +++ } +++ currentBlock.status |= Label.JSR; +++ addSuccessor(stackSize + 1, label); +++ // creates a Label for the next basic block +++ nextInsn = new Label(); +++ /* +++ * note that, by construction in this method, a JSR block +++ * has at least two successors in the control flow graph: +++ * the first one leads the next instruction after the JSR, +++ * while the second one leads to the JSR target. +++ */ +++ } else { +++ // updates current stack size (max stack size unchanged +++ // because stack size variation always negative in this +++ // case) +++ stackSize += Frame.SIZE[opcode]; +++ addSuccessor(stackSize, label); +++ } +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ if ((label.status & Label.RESOLVED) != 0 +++ && label.position - code.length < Short.MIN_VALUE) { +++ /* +++ * case of a backward jump with an offset < -32768. In this case we +++ * automatically replace GOTO with GOTO_W, JSR with JSR_W and IFxxx +++ * <l> with IFNOTxxx <l'> GOTO_W <l>, where IFNOTxxx is the +++ * "opposite" opcode of IFxxx (i.e., IFNE for IFEQ) and where <l'> +++ * designates the instruction just after the GOTO_W. +++ */ +++ if (opcode == Opcodes.GOTO) { +++ code.putByte(200); // GOTO_W +++ } else if (opcode == Opcodes.JSR) { +++ code.putByte(201); // JSR_W +++ } else { +++ // if the IF instruction is transformed into IFNOT GOTO_W the +++ // next instruction becomes the target of the IFNOT instruction +++ if (nextInsn != null) { +++ nextInsn.status |= Label.TARGET; +++ } +++ code.putByte(opcode <= 166 ? ((opcode + 1) ^ 1) - 1 +++ : opcode ^ 1); +++ code.putShort(8); // jump offset +++ code.putByte(200); // GOTO_W +++ } +++ label.put(this, code, code.length - 1, true); +++ } else { +++ /* +++ * case of a backward jump with an offset >= -32768, or of a forward +++ * jump with, of course, an unknown offset. In these cases we store +++ * the offset in 2 bytes (which will be increased in +++ * resizeInstructions, if needed). +++ */ +++ code.putByte(opcode); +++ label.put(this, code, code.length - 1, false); +++ } +++ if (currentBlock != null) { +++ if (nextInsn != null) { +++ // if the jump instruction is not a GOTO, the next instruction +++ // is also a successor of this instruction. Calling visitLabel +++ // adds the label of this next instruction as a successor of the +++ // current block, and starts a new basic block +++ visitLabel(nextInsn); +++ } +++ if (opcode == Opcodes.GOTO) { +++ noSuccessor(); +++ } +++ } +++ } +++ +++ @Override +++ public void visitLabel(final Label label) { +++ // resolves previous forward references to label, if any +++ resize |= label.resolve(this, code.length, code.data); +++ // updates currentBlock +++ if ((label.status & Label.DEBUG) != 0) { +++ return; +++ } +++ if (compute == FRAMES) { +++ if (currentBlock != null) { +++ if (label.position == currentBlock.position) { +++ // successive labels, do not start a new basic block +++ currentBlock.status |= (label.status & Label.TARGET); +++ label.frame = currentBlock.frame; +++ return; +++ } +++ // ends current block (with one new successor) +++ addSuccessor(Edge.NORMAL, label); +++ } +++ // begins a new current block +++ currentBlock = label; +++ if (label.frame == null) { +++ label.frame = new Frame(); +++ label.frame.owner = label; +++ } +++ // updates the basic block list +++ if (previousBlock != null) { +++ if (label.position == previousBlock.position) { +++ previousBlock.status |= (label.status & Label.TARGET); +++ label.frame = previousBlock.frame; +++ currentBlock = previousBlock; +++ return; +++ } +++ previousBlock.successor = label; +++ } +++ previousBlock = label; +++ } else if (compute == MAXS) { +++ if (currentBlock != null) { +++ // ends current block (with one new successor) +++ currentBlock.outputStackMax = maxStackSize; +++ addSuccessor(stackSize, label); +++ } +++ // begins a new current block +++ currentBlock = label; +++ // resets the relative current and max stack sizes +++ stackSize = 0; +++ maxStackSize = 0; +++ // updates the basic block list +++ if (previousBlock != null) { +++ previousBlock.successor = label; +++ } +++ previousBlock = label; +++ } +++ } +++ +++ @Override +++ public void visitLdcInsn(final Object cst) { +++ lastCodeOffset = code.length; +++ Item i = cw.newConstItem(cst); +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(Opcodes.LDC, 0, cw, i); +++ } else { +++ int size; +++ // computes the stack size variation +++ if (i.type == ClassWriter.LONG || i.type == ClassWriter.DOUBLE) { +++ size = stackSize + 2; +++ } else { +++ size = stackSize + 1; +++ } +++ // updates current and max stack sizes +++ if (size > maxStackSize) { +++ maxStackSize = size; +++ } +++ stackSize = size; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ int index = i.index; +++ if (i.type == ClassWriter.LONG || i.type == ClassWriter.DOUBLE) { +++ code.put12(20 /* LDC2_W */, index); +++ } else if (index >= 256) { +++ code.put12(19 /* LDC_W */, index); +++ } else { +++ code.put11(Opcodes.LDC, index); +++ } +++ } +++ +++ @Override +++ public void visitIincInsn(final int var, final int increment) { +++ lastCodeOffset = code.length; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(Opcodes.IINC, var, null, null); +++ } +++ } +++ if (compute != NOTHING) { +++ // updates max locals +++ int n = var + 1; +++ if (n > maxLocals) { +++ maxLocals = n; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ if ((var > 255) || (increment > 127) || (increment < -128)) { +++ code.putByte(196 /* WIDE */).put12(Opcodes.IINC, var) +++ .putShort(increment); +++ } else { +++ code.putByte(Opcodes.IINC).put11(var, increment); +++ } +++ } +++ +++ @Override +++ public void visitTableSwitchInsn(final int min, final int max, +++ final Label dflt, final Label... labels) { +++ lastCodeOffset = code.length; +++ // adds the instruction to the bytecode of the method +++ int source = code.length; +++ code.putByte(Opcodes.TABLESWITCH); +++ code.putByteArray(null, 0, (4 - code.length % 4) % 4); +++ dflt.put(this, code, source, true); +++ code.putInt(min).putInt(max); +++ for (int i = 0; i < labels.length; ++i) { +++ labels[i].put(this, code, source, true); +++ } +++ // updates currentBlock +++ visitSwitchInsn(dflt, labels); +++ } +++ +++ @Override +++ public void visitLookupSwitchInsn(final Label dflt, final int[] keys, +++ final Label[] labels) { +++ lastCodeOffset = code.length; +++ // adds the instruction to the bytecode of the method +++ int source = code.length; +++ code.putByte(Opcodes.LOOKUPSWITCH); +++ code.putByteArray(null, 0, (4 - code.length % 4) % 4); +++ dflt.put(this, code, source, true); +++ code.putInt(labels.length); +++ for (int i = 0; i < labels.length; ++i) { +++ code.putInt(keys[i]); +++ labels[i].put(this, code, source, true); +++ } +++ // updates currentBlock +++ visitSwitchInsn(dflt, labels); +++ } +++ +++ private void visitSwitchInsn(final Label dflt, final Label[] labels) { +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(Opcodes.LOOKUPSWITCH, 0, null, null); +++ // adds current block successors +++ addSuccessor(Edge.NORMAL, dflt); +++ dflt.getFirst().status |= Label.TARGET; +++ for (int i = 0; i < labels.length; ++i) { +++ addSuccessor(Edge.NORMAL, labels[i]); +++ labels[i].getFirst().status |= Label.TARGET; +++ } +++ } else { +++ // updates current stack size (max stack size unchanged) +++ --stackSize; +++ // adds current block successors +++ addSuccessor(stackSize, dflt); +++ for (int i = 0; i < labels.length; ++i) { +++ addSuccessor(stackSize, labels[i]); +++ } +++ } +++ // ends current block +++ noSuccessor(); +++ } +++ } +++ +++ @Override +++ public void visitMultiANewArrayInsn(final String desc, final int dims) { +++ lastCodeOffset = code.length; +++ Item i = cw.newClassItem(desc); +++ // Label currentBlock = this.currentBlock; +++ if (currentBlock != null) { +++ if (compute == FRAMES) { +++ currentBlock.frame.execute(Opcodes.MULTIANEWARRAY, dims, cw, i); +++ } else { +++ // updates current stack size (max stack size unchanged because +++ // stack size variation always negative or null) +++ stackSize += 1 - dims; +++ } +++ } +++ // adds the instruction to the bytecode of the method +++ code.put12(Opcodes.MULTIANEWARRAY, i.index).putByte(dims); +++ } +++ +++ @Override +++ public AnnotationVisitor visitInsnAnnotation(int typeRef, +++ TypePath typePath, String desc, boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write target_type and target_info +++ typeRef = (typeRef & 0xFF0000FF) | (lastCodeOffset << 8); +++ AnnotationWriter.putTarget(typeRef, typePath, bv); +++ // write type, and reserve space for values count +++ bv.putShort(cw.newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(cw, true, bv, bv, +++ bv.length - 2); +++ if (visible) { +++ aw.next = ctanns; +++ ctanns = aw; +++ } else { +++ aw.next = ictanns; +++ ictanns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public void visitTryCatchBlock(final Label start, final Label end, +++ final Label handler, final String type) { +++ ++handlerCount; +++ Handler h = new Handler(); +++ h.start = start; +++ h.end = end; +++ h.handler = handler; +++ h.desc = type; +++ h.type = type != null ? cw.newClass(type) : 0; +++ if (lastHandler == null) { +++ firstHandler = h; +++ } else { +++ lastHandler.next = h; +++ } +++ lastHandler = h; +++ } +++ +++ @Override +++ public AnnotationVisitor visitTryCatchAnnotation(int typeRef, +++ TypePath typePath, String desc, boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write target_type and target_info +++ AnnotationWriter.putTarget(typeRef, typePath, bv); +++ // write type, and reserve space for values count +++ bv.putShort(cw.newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(cw, true, bv, bv, +++ bv.length - 2); +++ if (visible) { +++ aw.next = ctanns; +++ ctanns = aw; +++ } else { +++ aw.next = ictanns; +++ ictanns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public void visitLocalVariable(final String name, final String desc, +++ final String signature, final Label start, final Label end, +++ final int index) { +++ if (signature != null) { +++ if (localVarType == null) { +++ localVarType = new ByteVector(); +++ } +++ ++localVarTypeCount; +++ localVarType.putShort(start.position) +++ .putShort(end.position - start.position) +++ .putShort(cw.newUTF8(name)).putShort(cw.newUTF8(signature)) +++ .putShort(index); +++ } +++ if (localVar == null) { +++ localVar = new ByteVector(); +++ } +++ ++localVarCount; +++ localVar.putShort(start.position) +++ .putShort(end.position - start.position) +++ .putShort(cw.newUTF8(name)).putShort(cw.newUTF8(desc)) +++ .putShort(index); +++ if (compute != NOTHING) { +++ // updates max locals +++ char c = desc.charAt(0); +++ int n = index + (c == 'J' || c == 'D' ? 2 : 1); +++ if (n > maxLocals) { +++ maxLocals = n; +++ } +++ } +++ } +++ +++ @Override +++ public AnnotationVisitor visitLocalVariableAnnotation(int typeRef, +++ TypePath typePath, Label[] start, Label[] end, int[] index, +++ String desc, boolean visible) { +++ if (!ClassReader.ANNOTATIONS) { +++ return null; +++ } +++ ByteVector bv = new ByteVector(); +++ // write target_type and target_info +++ bv.putByte(typeRef >>> 24).putShort(start.length); +++ for (int i = 0; i < start.length; ++i) { +++ bv.putShort(start[i].position) +++ .putShort(end[i].position - start[i].position) +++ .putShort(index[i]); +++ } +++ if (typePath == null) { +++ bv.putByte(0); +++ } else { +++ int length = typePath.b[typePath.offset] * 2 + 1; +++ bv.putByteArray(typePath.b, typePath.offset, length); +++ } +++ // write type, and reserve space for values count +++ bv.putShort(cw.newUTF8(desc)).putShort(0); +++ AnnotationWriter aw = new AnnotationWriter(cw, true, bv, bv, +++ bv.length - 2); +++ if (visible) { +++ aw.next = ctanns; +++ ctanns = aw; +++ } else { +++ aw.next = ictanns; +++ ictanns = aw; +++ } +++ return aw; +++ } +++ +++ @Override +++ public void visitLineNumber(final int line, final Label start) { +++ if (lineNumber == null) { +++ lineNumber = new ByteVector(); +++ } +++ ++lineNumberCount; +++ lineNumber.putShort(start.position); +++ lineNumber.putShort(line); +++ } +++ +++ @Override +++ public void visitMaxs(final int maxStack, final int maxLocals) { +++ if (resize) { +++ // replaces the temporary jump opcodes introduced by Label.resolve. +++ if (ClassReader.RESIZE) { +++ resizeInstructions(); +++ } else { +++ throw new RuntimeException("Method code too large!"); +++ } +++ } +++ if (ClassReader.FRAMES && compute == FRAMES) { +++ // completes the control flow graph with exception handler blocks +++ Handler handler = firstHandler; +++ while (handler != null) { +++ Label l = handler.start.getFirst(); +++ Label h = handler.handler.getFirst(); +++ Label e = handler.end.getFirst(); +++ // computes the kind of the edges to 'h' +++ String t = handler.desc == null ? "java/lang/Throwable" +++ : handler.desc; +++ int kind = Frame.OBJECT | cw.addType(t); +++ // h is an exception handler +++ h.status |= Label.TARGET; +++ // adds 'h' as a successor of labels between 'start' and 'end' +++ while (l != e) { +++ // creates an edge to 'h' +++ Edge b = new Edge(); +++ b.info = kind; +++ b.successor = h; +++ // adds it to the successors of 'l' +++ b.next = l.successors; +++ l.successors = b; +++ // goes to the next label +++ l = l.successor; +++ } +++ handler = handler.next; +++ } +++ +++ // creates and visits the first (implicit) frame +++ Frame f = labels.frame; +++ Type[] args = Type.getArgumentTypes(descriptor); +++ f.initInputFrame(cw, access, args, this.maxLocals); +++ visitFrame(f); +++ +++ /* +++ * fix point algorithm: mark the first basic block as 'changed' +++ * (i.e. put it in the 'changed' list) and, while there are changed +++ * basic blocks, choose one, mark it as unchanged, and update its +++ * successors (which can be changed in the process). +++ */ +++ int max = 0; +++ Label changed = labels; +++ while (changed != null) { +++ // removes a basic block from the list of changed basic blocks +++ Label l = changed; +++ changed = changed.next; +++ l.next = null; +++ f = l.frame; +++ // a reachable jump target must be stored in the stack map +++ if ((l.status & Label.TARGET) != 0) { +++ l.status |= Label.STORE; +++ } +++ // all visited labels are reachable, by definition +++ l.status |= Label.REACHABLE; +++ // updates the (absolute) maximum stack size +++ int blockMax = f.inputStack.length + l.outputStackMax; +++ if (blockMax > max) { +++ max = blockMax; +++ } +++ // updates the successors of the current basic block +++ Edge e = l.successors; +++ while (e != null) { +++ Label n = e.successor.getFirst(); +++ boolean change = f.merge(cw, n.frame, e.info); +++ if (change && n.next == null) { +++ // if n has changed and is not already in the 'changed' +++ // list, adds it to this list +++ n.next = changed; +++ changed = n; +++ } +++ e = e.next; +++ } +++ } +++ +++ // visits all the frames that must be stored in the stack map +++ Label l = labels; +++ while (l != null) { +++ f = l.frame; +++ if ((l.status & Label.STORE) != 0) { +++ visitFrame(f); +++ } +++ if ((l.status & Label.REACHABLE) == 0) { +++ // finds start and end of dead basic block +++ Label k = l.successor; +++ int start = l.position; +++ int end = (k == null ? code.length : k.position) - 1; +++ // if non empty basic block +++ if (end >= start) { +++ max = Math.max(max, 1); +++ // replaces instructions with NOP ... NOP ATHROW +++ for (int i = start; i < end; ++i) { +++ code.data[i] = Opcodes.NOP; +++ } +++ code.data[end] = (byte) Opcodes.ATHROW; +++ // emits a frame for this unreachable block +++ int frameIndex = startFrame(start, 0, 1); +++ frame[frameIndex] = Frame.OBJECT +++ | cw.addType("java/lang/Throwable"); +++ endFrame(); +++ // removes the start-end range from the exception +++ // handlers +++ firstHandler = Handler.remove(firstHandler, l, k); +++ } +++ } +++ l = l.successor; +++ } +++ +++ handler = firstHandler; +++ handlerCount = 0; +++ while (handler != null) { +++ handlerCount += 1; +++ handler = handler.next; +++ } +++ +++ this.maxStack = max; +++ } else if (compute == MAXS) { +++ // completes the control flow graph with exception handler blocks +++ Handler handler = firstHandler; +++ while (handler != null) { +++ Label l = handler.start; +++ Label h = handler.handler; +++ Label e = handler.end; +++ // adds 'h' as a successor of labels between 'start' and 'end' +++ while (l != e) { +++ // creates an edge to 'h' +++ Edge b = new Edge(); +++ b.info = Edge.EXCEPTION; +++ b.successor = h; +++ // adds it to the successors of 'l' +++ if ((l.status & Label.JSR) == 0) { +++ b.next = l.successors; +++ l.successors = b; +++ } else { +++ // if l is a JSR block, adds b after the first two edges +++ // to preserve the hypothesis about JSR block successors +++ // order (see {@link #visitJumpInsn}) +++ b.next = l.successors.next.next; +++ l.successors.next.next = b; +++ } +++ // goes to the next label +++ l = l.successor; +++ } +++ handler = handler.next; +++ } +++ +++ if (subroutines > 0) { +++ // completes the control flow graph with the RET successors +++ /* +++ * first step: finds the subroutines. This step determines, for +++ * each basic block, to which subroutine(s) it belongs. +++ */ +++ // finds the basic blocks that belong to the "main" subroutine +++ int id = 0; +++ labels.visitSubroutine(null, 1, subroutines); +++ // finds the basic blocks that belong to the real subroutines +++ Label l = labels; +++ while (l != null) { +++ if ((l.status & Label.JSR) != 0) { +++ // the subroutine is defined by l's TARGET, not by l +++ Label subroutine = l.successors.next.successor; +++ // if this subroutine has not been visited yet... +++ if ((subroutine.status & Label.VISITED) == 0) { +++ // ...assigns it a new id and finds its basic blocks +++ id += 1; +++ subroutine.visitSubroutine(null, (id / 32L) << 32 +++ | (1L << (id % 32)), subroutines); +++ } +++ } +++ l = l.successor; +++ } +++ // second step: finds the successors of RET blocks +++ l = labels; +++ while (l != null) { +++ if ((l.status & Label.JSR) != 0) { +++ Label L = labels; +++ while (L != null) { +++ L.status &= ~Label.VISITED2; +++ L = L.successor; +++ } +++ // the subroutine is defined by l's TARGET, not by l +++ Label subroutine = l.successors.next.successor; +++ subroutine.visitSubroutine(l, 0, subroutines); +++ } +++ l = l.successor; +++ } +++ } +++ +++ /* +++ * control flow analysis algorithm: while the block stack is not +++ * empty, pop a block from this stack, update the max stack size, +++ * compute the true (non relative) begin stack size of the +++ * successors of this block, and push these successors onto the +++ * stack (unless they have already been pushed onto the stack). +++ * Note: by hypothesis, the {@link Label#inputStackTop} of the +++ * blocks in the block stack are the true (non relative) beginning +++ * stack sizes of these blocks. +++ */ +++ int max = 0; +++ Label stack = labels; +++ while (stack != null) { +++ // pops a block from the stack +++ Label l = stack; +++ stack = stack.next; +++ // computes the true (non relative) max stack size of this block +++ int start = l.inputStackTop; +++ int blockMax = start + l.outputStackMax; +++ // updates the global max stack size +++ if (blockMax > max) { +++ max = blockMax; +++ } +++ // analyzes the successors of the block +++ Edge b = l.successors; +++ if ((l.status & Label.JSR) != 0) { +++ // ignores the first edge of JSR blocks (virtual successor) +++ b = b.next; +++ } +++ while (b != null) { +++ l = b.successor; +++ // if this successor has not already been pushed... +++ if ((l.status & Label.PUSHED) == 0) { +++ // computes its true beginning stack size... +++ l.inputStackTop = b.info == Edge.EXCEPTION ? 1 : start +++ + b.info; +++ // ...and pushes it onto the stack +++ l.status |= Label.PUSHED; +++ l.next = stack; +++ stack = l; +++ } +++ b = b.next; +++ } +++ } +++ this.maxStack = Math.max(maxStack, max); +++ } else { +++ this.maxStack = maxStack; +++ this.maxLocals = maxLocals; +++ } +++ } +++ +++ @Override +++ public void visitEnd() { +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Utility methods: control flow analysis algorithm +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Adds a successor to the {@link #currentBlock currentBlock} block. +++ * +++ * @param info +++ * information about the control flow edge to be added. +++ * @param successor +++ * the successor block to be added to the current block. +++ */ +++ private void addSuccessor(final int info, final Label successor) { +++ // creates and initializes an Edge object... +++ Edge b = new Edge(); +++ b.info = info; +++ b.successor = successor; +++ // ...and adds it to the successor list of the currentBlock block +++ b.next = currentBlock.successors; +++ currentBlock.successors = b; +++ } +++ +++ /** +++ * Ends the current basic block. This method must be used in the case where +++ * the current basic block does not have any successor. +++ */ +++ private void noSuccessor() { +++ if (compute == FRAMES) { +++ Label l = new Label(); +++ l.frame = new Frame(); +++ l.frame.owner = l; +++ l.resolve(this, code.length, code.data); +++ previousBlock.successor = l; +++ previousBlock = l; +++ } else { +++ currentBlock.outputStackMax = maxStackSize; +++ } +++ currentBlock = null; +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Utility methods: stack map frames +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Visits a frame that has been computed from scratch. +++ * +++ * @param f +++ * the frame that must be visited. +++ */ +++ private void visitFrame(final Frame f) { +++ int i, t; +++ int nTop = 0; +++ int nLocal = 0; +++ int nStack = 0; +++ int[] locals = f.inputLocals; +++ int[] stacks = f.inputStack; +++ // computes the number of locals (ignores TOP types that are just after +++ // a LONG or a DOUBLE, and all trailing TOP types) +++ for (i = 0; i < locals.length; ++i) { +++ t = locals[i]; +++ if (t == Frame.TOP) { +++ ++nTop; +++ } else { +++ nLocal += nTop + 1; +++ nTop = 0; +++ } +++ if (t == Frame.LONG || t == Frame.DOUBLE) { +++ ++i; +++ } +++ } +++ // computes the stack size (ignores TOP types that are just after +++ // a LONG or a DOUBLE) +++ for (i = 0; i < stacks.length; ++i) { +++ t = stacks[i]; +++ ++nStack; +++ if (t == Frame.LONG || t == Frame.DOUBLE) { +++ ++i; +++ } +++ } +++ // visits the frame and its content +++ int frameIndex = startFrame(f.owner.position, nLocal, nStack); +++ for (i = 0; nLocal > 0; ++i, --nLocal) { +++ t = locals[i]; +++ frame[frameIndex++] = t; +++ if (t == Frame.LONG || t == Frame.DOUBLE) { +++ ++i; +++ } +++ } +++ for (i = 0; i < stacks.length; ++i) { +++ t = stacks[i]; +++ frame[frameIndex++] = t; +++ if (t == Frame.LONG || t == Frame.DOUBLE) { +++ ++i; +++ } +++ } +++ endFrame(); +++ } +++ +++ /** +++ * Visit the implicit first frame of this method. +++ */ +++ private void visitImplicitFirstFrame() { +++ // There can be at most descriptor.length() + 1 locals +++ int frameIndex = startFrame(0, descriptor.length() + 1, 0); +++ if ((access & Opcodes.ACC_STATIC) == 0) { +++ if ((access & ACC_CONSTRUCTOR) == 0) { +++ frame[frameIndex++] = Frame.OBJECT | cw.addType(cw.thisName); +++ } else { +++ frame[frameIndex++] = 6; // Opcodes.UNINITIALIZED_THIS; +++ } +++ } +++ int i = 1; +++ loop: while (true) { +++ int j = i; +++ switch (descriptor.charAt(i++)) { +++ case 'Z': +++ case 'C': +++ case 'B': +++ case 'S': +++ case 'I': +++ frame[frameIndex++] = 1; // Opcodes.INTEGER; +++ break; +++ case 'F': +++ frame[frameIndex++] = 2; // Opcodes.FLOAT; +++ break; +++ case 'J': +++ frame[frameIndex++] = 4; // Opcodes.LONG; +++ break; +++ case 'D': +++ frame[frameIndex++] = 3; // Opcodes.DOUBLE; +++ break; +++ case '[': +++ while (descriptor.charAt(i) == '[') { +++ ++i; +++ } +++ if (descriptor.charAt(i) == 'L') { +++ ++i; +++ while (descriptor.charAt(i) != ';') { +++ ++i; +++ } +++ } +++ frame[frameIndex++] = Frame.OBJECT +++ | cw.addType(descriptor.substring(j, ++i)); +++ break; +++ case 'L': +++ while (descriptor.charAt(i) != ';') { +++ ++i; +++ } +++ frame[frameIndex++] = Frame.OBJECT +++ | cw.addType(descriptor.substring(j + 1, i++)); +++ break; +++ default: +++ break loop; +++ } +++ } +++ frame[1] = frameIndex - 3; +++ endFrame(); +++ } +++ +++ /** +++ * Starts the visit of a stack map frame. +++ * +++ * @param offset +++ * the offset of the instruction to which the frame corresponds. +++ * @param nLocal +++ * the number of local variables in the frame. +++ * @param nStack +++ * the number of stack elements in the frame. +++ * @return the index of the next element to be written in this frame. +++ */ +++ private int startFrame(final int offset, final int nLocal, final int nStack) { +++ int n = 3 + nLocal + nStack; +++ if (frame == null || frame.length < n) { +++ frame = new int[n]; +++ } +++ frame[0] = offset; +++ frame[1] = nLocal; +++ frame[2] = nStack; +++ return 3; +++ } +++ +++ /** +++ * Checks if the visit of the current frame {@link #frame} is finished, and +++ * if yes, write it in the StackMapTable attribute. +++ */ +++ private void endFrame() { +++ if (previousFrame != null) { // do not write the first frame +++ if (stackMap == null) { +++ stackMap = new ByteVector(); +++ } +++ writeFrame(); +++ ++frameCount; +++ } +++ previousFrame = frame; +++ frame = null; +++ } +++ +++ /** +++ * Compress and writes the current frame {@link #frame} in the StackMapTable +++ * attribute. +++ */ +++ private void writeFrame() { +++ int clocalsSize = frame[1]; +++ int cstackSize = frame[2]; +++ if ((cw.version & 0xFFFF) < Opcodes.V1_6) { +++ stackMap.putShort(frame[0]).putShort(clocalsSize); +++ writeFrameTypes(3, 3 + clocalsSize); +++ stackMap.putShort(cstackSize); +++ writeFrameTypes(3 + clocalsSize, 3 + clocalsSize + cstackSize); +++ return; +++ } +++ int localsSize = previousFrame[1]; +++ int type = FULL_FRAME; +++ int k = 0; +++ int delta; +++ if (frameCount == 0) { +++ delta = frame[0]; +++ } else { +++ delta = frame[0] - previousFrame[0] - 1; +++ } +++ if (cstackSize == 0) { +++ k = clocalsSize - localsSize; +++ switch (k) { +++ case -3: +++ case -2: +++ case -1: +++ type = CHOP_FRAME; +++ localsSize = clocalsSize; +++ break; +++ case 0: +++ type = delta < 64 ? SAME_FRAME : SAME_FRAME_EXTENDED; +++ break; +++ case 1: +++ case 2: +++ case 3: +++ type = APPEND_FRAME; +++ break; +++ } +++ } else if (clocalsSize == localsSize && cstackSize == 1) { +++ type = delta < 63 ? SAME_LOCALS_1_STACK_ITEM_FRAME +++ : SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED; +++ } +++ if (type != FULL_FRAME) { +++ // verify if locals are the same +++ int l = 3; +++ for (int j = 0; j < localsSize; j++) { +++ if (frame[l] != previousFrame[l]) { +++ type = FULL_FRAME; +++ break; +++ } +++ l++; +++ } +++ } +++ switch (type) { +++ case SAME_FRAME: +++ stackMap.putByte(delta); +++ break; +++ case SAME_LOCALS_1_STACK_ITEM_FRAME: +++ stackMap.putByte(SAME_LOCALS_1_STACK_ITEM_FRAME + delta); +++ writeFrameTypes(3 + clocalsSize, 4 + clocalsSize); +++ break; +++ case SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED: +++ stackMap.putByte(SAME_LOCALS_1_STACK_ITEM_FRAME_EXTENDED).putShort( +++ delta); +++ writeFrameTypes(3 + clocalsSize, 4 + clocalsSize); +++ break; +++ case SAME_FRAME_EXTENDED: +++ stackMap.putByte(SAME_FRAME_EXTENDED).putShort(delta); +++ break; +++ case CHOP_FRAME: +++ stackMap.putByte(SAME_FRAME_EXTENDED + k).putShort(delta); +++ break; +++ case APPEND_FRAME: +++ stackMap.putByte(SAME_FRAME_EXTENDED + k).putShort(delta); +++ writeFrameTypes(3 + localsSize, 3 + clocalsSize); +++ break; +++ // case FULL_FRAME: +++ default: +++ stackMap.putByte(FULL_FRAME).putShort(delta).putShort(clocalsSize); +++ writeFrameTypes(3, 3 + clocalsSize); +++ stackMap.putShort(cstackSize); +++ writeFrameTypes(3 + clocalsSize, 3 + clocalsSize + cstackSize); +++ } +++ } +++ +++ /** +++ * Writes some types of the current frame {@link #frame} into the +++ * StackMapTableAttribute. This method converts types from the format used +++ * in {@link Label} to the format used in StackMapTable attributes. In +++ * particular, it converts type table indexes to constant pool indexes. +++ * +++ * @param start +++ * index of the first type in {@link #frame} to write. +++ * @param end +++ * index of last type in {@link #frame} to write (exclusive). +++ */ +++ private void writeFrameTypes(final int start, final int end) { +++ for (int i = start; i < end; ++i) { +++ int t = frame[i]; +++ int d = t & Frame.DIM; +++ if (d == 0) { +++ int v = t & Frame.BASE_VALUE; +++ switch (t & Frame.BASE_KIND) { +++ case Frame.OBJECT: +++ stackMap.putByte(7).putShort( +++ cw.newClass(cw.typeTable[v].strVal1)); +++ break; +++ case Frame.UNINITIALIZED: +++ stackMap.putByte(8).putShort(cw.typeTable[v].intVal); +++ break; +++ default: +++ stackMap.putByte(v); +++ } +++ } else { +++ StringBuilder sb = new StringBuilder(); +++ d >>= 28; +++ while (d-- > 0) { +++ sb.append('['); +++ } +++ if ((t & Frame.BASE_KIND) == Frame.OBJECT) { +++ sb.append('L'); +++ sb.append(cw.typeTable[t & Frame.BASE_VALUE].strVal1); +++ sb.append(';'); +++ } else { +++ switch (t & 0xF) { +++ case 1: +++ sb.append('I'); +++ break; +++ case 2: +++ sb.append('F'); +++ break; +++ case 3: +++ sb.append('D'); +++ break; +++ case 9: +++ sb.append('Z'); +++ break; +++ case 10: +++ sb.append('B'); +++ break; +++ case 11: +++ sb.append('C'); +++ break; +++ case 12: +++ sb.append('S'); +++ break; +++ default: +++ sb.append('J'); +++ } +++ } +++ stackMap.putByte(7).putShort(cw.newClass(sb.toString())); +++ } +++ } +++ } +++ +++ private void writeFrameType(final Object type) { +++ if (type instanceof String) { +++ stackMap.putByte(7).putShort(cw.newClass((String) type)); +++ } else if (type instanceof Integer) { +++ stackMap.putByte(((Integer) type).intValue()); +++ } else { +++ stackMap.putByte(8).putShort(((Label) type).position); +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Utility methods: dump bytecode array +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the size of the bytecode of this method. +++ * +++ * @return the size of the bytecode of this method. +++ */ +++ final int getSize() { +++ if (classReaderOffset != 0) { +++ return 6 + classReaderLength; +++ } +++ int size = 8; +++ if (code.length > 0) { +++ if (code.length > 65536) { +++ throw new RuntimeException("Method code too large!"); +++ } +++ cw.newUTF8("Code"); +++ size += 18 + code.length + 8 * handlerCount; +++ if (localVar != null) { +++ cw.newUTF8("LocalVariableTable"); +++ size += 8 + localVar.length; +++ } +++ if (localVarType != null) { +++ cw.newUTF8("LocalVariableTypeTable"); +++ size += 8 + localVarType.length; +++ } +++ if (lineNumber != null) { +++ cw.newUTF8("LineNumberTable"); +++ size += 8 + lineNumber.length; +++ } +++ if (stackMap != null) { +++ boolean zip = (cw.version & 0xFFFF) >= Opcodes.V1_6; +++ cw.newUTF8(zip ? "StackMapTable" : "StackMap"); +++ size += 8 + stackMap.length; +++ } +++ if (ClassReader.ANNOTATIONS && ctanns != null) { +++ cw.newUTF8("RuntimeVisibleTypeAnnotations"); +++ size += 8 + ctanns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && ictanns != null) { +++ cw.newUTF8("RuntimeInvisibleTypeAnnotations"); +++ size += 8 + ictanns.getSize(); +++ } +++ if (cattrs != null) { +++ size += cattrs.getSize(cw, code.data, code.length, maxStack, +++ maxLocals); +++ } +++ } +++ if (exceptionCount > 0) { +++ cw.newUTF8("Exceptions"); +++ size += 8 + 2 * exceptionCount; +++ } +++ if ((access & Opcodes.ACC_SYNTHETIC) != 0) { +++ if ((cw.version & 0xFFFF) < Opcodes.V1_5 +++ || (access & ClassWriter.ACC_SYNTHETIC_ATTRIBUTE) != 0) { +++ cw.newUTF8("Synthetic"); +++ size += 6; +++ } +++ } +++ if ((access & Opcodes.ACC_DEPRECATED) != 0) { +++ cw.newUTF8("Deprecated"); +++ size += 6; +++ } +++ if (ClassReader.SIGNATURES && signature != null) { +++ cw.newUTF8("Signature"); +++ cw.newUTF8(signature); +++ size += 8; +++ } +++ if (methodParameters != null) { +++ cw.newUTF8("MethodParameters"); +++ size += 7 + methodParameters.length; +++ } +++ if (ClassReader.ANNOTATIONS && annd != null) { +++ cw.newUTF8("AnnotationDefault"); +++ size += 6 + annd.length; +++ } +++ if (ClassReader.ANNOTATIONS && anns != null) { +++ cw.newUTF8("RuntimeVisibleAnnotations"); +++ size += 8 + anns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && ianns != null) { +++ cw.newUTF8("RuntimeInvisibleAnnotations"); +++ size += 8 + ianns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && tanns != null) { +++ cw.newUTF8("RuntimeVisibleTypeAnnotations"); +++ size += 8 + tanns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && itanns != null) { +++ cw.newUTF8("RuntimeInvisibleTypeAnnotations"); +++ size += 8 + itanns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && panns != null) { +++ cw.newUTF8("RuntimeVisibleParameterAnnotations"); +++ size += 7 + 2 * (panns.length - synthetics); +++ for (int i = panns.length - 1; i >= synthetics; --i) { +++ size += panns[i] == null ? 0 : panns[i].getSize(); +++ } +++ } +++ if (ClassReader.ANNOTATIONS && ipanns != null) { +++ cw.newUTF8("RuntimeInvisibleParameterAnnotations"); +++ size += 7 + 2 * (ipanns.length - synthetics); +++ for (int i = ipanns.length - 1; i >= synthetics; --i) { +++ size += ipanns[i] == null ? 0 : ipanns[i].getSize(); +++ } +++ } +++ if (attrs != null) { +++ size += attrs.getSize(cw, null, 0, -1, -1); +++ } +++ return size; +++ } +++ +++ /** +++ * Puts the bytecode of this method in the given byte vector. +++ * +++ * @param out +++ * the byte vector into which the bytecode of this method must be +++ * copied. +++ */ +++ final void put(final ByteVector out) { +++ final int FACTOR = ClassWriter.TO_ACC_SYNTHETIC; +++ int mask = ACC_CONSTRUCTOR | Opcodes.ACC_DEPRECATED +++ | ClassWriter.ACC_SYNTHETIC_ATTRIBUTE +++ | ((access & ClassWriter.ACC_SYNTHETIC_ATTRIBUTE) / FACTOR); +++ out.putShort(access & ~mask).putShort(name).putShort(desc); +++ if (classReaderOffset != 0) { +++ out.putByteArray(cw.cr.b, classReaderOffset, classReaderLength); +++ return; +++ } +++ int attributeCount = 0; +++ if (code.length > 0) { +++ ++attributeCount; +++ } +++ if (exceptionCount > 0) { +++ ++attributeCount; +++ } +++ if ((access & Opcodes.ACC_SYNTHETIC) != 0) { +++ if ((cw.version & 0xFFFF) < Opcodes.V1_5 +++ || (access & ClassWriter.ACC_SYNTHETIC_ATTRIBUTE) != 0) { +++ ++attributeCount; +++ } +++ } +++ if ((access & Opcodes.ACC_DEPRECATED) != 0) { +++ ++attributeCount; +++ } +++ if (ClassReader.SIGNATURES && signature != null) { +++ ++attributeCount; +++ } +++ if (methodParameters != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && annd != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && anns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && ianns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && tanns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && itanns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && panns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && ipanns != null) { +++ ++attributeCount; +++ } +++ if (attrs != null) { +++ attributeCount += attrs.getCount(); +++ } +++ out.putShort(attributeCount); +++ if (code.length > 0) { +++ int size = 12 + code.length + 8 * handlerCount; +++ if (localVar != null) { +++ size += 8 + localVar.length; +++ } +++ if (localVarType != null) { +++ size += 8 + localVarType.length; +++ } +++ if (lineNumber != null) { +++ size += 8 + lineNumber.length; +++ } +++ if (stackMap != null) { +++ size += 8 + stackMap.length; +++ } +++ if (ClassReader.ANNOTATIONS && ctanns != null) { +++ size += 8 + ctanns.getSize(); +++ } +++ if (ClassReader.ANNOTATIONS && ictanns != null) { +++ size += 8 + ictanns.getSize(); +++ } +++ if (cattrs != null) { +++ size += cattrs.getSize(cw, code.data, code.length, maxStack, +++ maxLocals); +++ } +++ out.putShort(cw.newUTF8("Code")).putInt(size); +++ out.putShort(maxStack).putShort(maxLocals); +++ out.putInt(code.length).putByteArray(code.data, 0, code.length); +++ out.putShort(handlerCount); +++ if (handlerCount > 0) { +++ Handler h = firstHandler; +++ while (h != null) { +++ out.putShort(h.start.position).putShort(h.end.position) +++ .putShort(h.handler.position).putShort(h.type); +++ h = h.next; +++ } +++ } +++ attributeCount = 0; +++ if (localVar != null) { +++ ++attributeCount; +++ } +++ if (localVarType != null) { +++ ++attributeCount; +++ } +++ if (lineNumber != null) { +++ ++attributeCount; +++ } +++ if (stackMap != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && ctanns != null) { +++ ++attributeCount; +++ } +++ if (ClassReader.ANNOTATIONS && ictanns != null) { +++ ++attributeCount; +++ } +++ if (cattrs != null) { +++ attributeCount += cattrs.getCount(); +++ } +++ out.putShort(attributeCount); +++ if (localVar != null) { +++ out.putShort(cw.newUTF8("LocalVariableTable")); +++ out.putInt(localVar.length + 2).putShort(localVarCount); +++ out.putByteArray(localVar.data, 0, localVar.length); +++ } +++ if (localVarType != null) { +++ out.putShort(cw.newUTF8("LocalVariableTypeTable")); +++ out.putInt(localVarType.length + 2).putShort(localVarTypeCount); +++ out.putByteArray(localVarType.data, 0, localVarType.length); +++ } +++ if (lineNumber != null) { +++ out.putShort(cw.newUTF8("LineNumberTable")); +++ out.putInt(lineNumber.length + 2).putShort(lineNumberCount); +++ out.putByteArray(lineNumber.data, 0, lineNumber.length); +++ } +++ if (stackMap != null) { +++ boolean zip = (cw.version & 0xFFFF) >= Opcodes.V1_6; +++ out.putShort(cw.newUTF8(zip ? "StackMapTable" : "StackMap")); +++ out.putInt(stackMap.length + 2).putShort(frameCount); +++ out.putByteArray(stackMap.data, 0, stackMap.length); +++ } +++ if (ClassReader.ANNOTATIONS && ctanns != null) { +++ out.putShort(cw.newUTF8("RuntimeVisibleTypeAnnotations")); +++ ctanns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && ictanns != null) { +++ out.putShort(cw.newUTF8("RuntimeInvisibleTypeAnnotations")); +++ ictanns.put(out); +++ } +++ if (cattrs != null) { +++ cattrs.put(cw, code.data, code.length, maxLocals, maxStack, out); +++ } +++ } +++ if (exceptionCount > 0) { +++ out.putShort(cw.newUTF8("Exceptions")).putInt( +++ 2 * exceptionCount + 2); +++ out.putShort(exceptionCount); +++ for (int i = 0; i < exceptionCount; ++i) { +++ out.putShort(exceptions[i]); +++ } +++ } +++ if ((access & Opcodes.ACC_SYNTHETIC) != 0) { +++ if ((cw.version & 0xFFFF) < Opcodes.V1_5 +++ || (access & ClassWriter.ACC_SYNTHETIC_ATTRIBUTE) != 0) { +++ out.putShort(cw.newUTF8("Synthetic")).putInt(0); +++ } +++ } +++ if ((access & Opcodes.ACC_DEPRECATED) != 0) { +++ out.putShort(cw.newUTF8("Deprecated")).putInt(0); +++ } +++ if (ClassReader.SIGNATURES && signature != null) { +++ out.putShort(cw.newUTF8("Signature")).putInt(2) +++ .putShort(cw.newUTF8(signature)); +++ } +++ if (methodParameters != null) { +++ out.putShort(cw.newUTF8("MethodParameters")); +++ out.putInt(methodParameters.length + 1).putByte( +++ methodParametersCount); +++ out.putByteArray(methodParameters.data, 0, methodParameters.length); +++ } +++ if (ClassReader.ANNOTATIONS && annd != null) { +++ out.putShort(cw.newUTF8("AnnotationDefault")); +++ out.putInt(annd.length); +++ out.putByteArray(annd.data, 0, annd.length); +++ } +++ if (ClassReader.ANNOTATIONS && anns != null) { +++ out.putShort(cw.newUTF8("RuntimeVisibleAnnotations")); +++ anns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && ianns != null) { +++ out.putShort(cw.newUTF8("RuntimeInvisibleAnnotations")); +++ ianns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && tanns != null) { +++ out.putShort(cw.newUTF8("RuntimeVisibleTypeAnnotations")); +++ tanns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && itanns != null) { +++ out.putShort(cw.newUTF8("RuntimeInvisibleTypeAnnotations")); +++ itanns.put(out); +++ } +++ if (ClassReader.ANNOTATIONS && panns != null) { +++ out.putShort(cw.newUTF8("RuntimeVisibleParameterAnnotations")); +++ AnnotationWriter.put(panns, synthetics, out); +++ } +++ if (ClassReader.ANNOTATIONS && ipanns != null) { +++ out.putShort(cw.newUTF8("RuntimeInvisibleParameterAnnotations")); +++ AnnotationWriter.put(ipanns, synthetics, out); +++ } +++ if (attrs != null) { +++ attrs.put(cw, null, 0, -1, -1, out); +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Utility methods: instruction resizing (used to handle GOTO_W and JSR_W) +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Resizes and replaces the temporary instructions inserted by +++ * {@link Label#resolve} for wide forward jumps, while keeping jump offsets +++ * and instruction addresses consistent. This may require to resize other +++ * existing instructions, or even to introduce new instructions: for +++ * example, increasing the size of an instruction by 2 at the middle of a +++ * method can increases the offset of an IFEQ instruction from 32766 to +++ * 32768, in which case IFEQ 32766 must be replaced with IFNEQ 8 GOTO_W +++ * 32765. This, in turn, may require to increase the size of another jump +++ * instruction, and so on... All these operations are handled automatically +++ * by this method. +++ * <p> +++ * <i>This method must be called after all the method that is being built +++ * has been visited</i>. In particular, the {@link Label Label} objects used +++ * to construct the method are no longer valid after this method has been +++ * called. +++ */ +++ private void resizeInstructions() { +++ byte[] b = code.data; // bytecode of the method +++ int u, v, label; // indexes in b +++ int i, j; // loop indexes +++ /* +++ * 1st step: As explained above, resizing an instruction may require to +++ * resize another one, which may require to resize yet another one, and +++ * so on. The first step of the algorithm consists in finding all the +++ * instructions that need to be resized, without modifying the code. +++ * This is done by the following "fix point" algorithm: +++ * +++ * Parse the code to find the jump instructions whose offset will need +++ * more than 2 bytes to be stored (the future offset is computed from +++ * the current offset and from the number of bytes that will be inserted +++ * or removed between the source and target instructions). For each such +++ * instruction, adds an entry in (a copy of) the indexes and sizes +++ * arrays (if this has not already been done in a previous iteration!). +++ * +++ * If at least one entry has been added during the previous step, go +++ * back to the beginning, otherwise stop. +++ * +++ * In fact the real algorithm is complicated by the fact that the size +++ * of TABLESWITCH and LOOKUPSWITCH instructions depends on their +++ * position in the bytecode (because of padding). In order to ensure the +++ * convergence of the algorithm, the number of bytes to be added or +++ * removed from these instructions is over estimated during the previous +++ * loop, and computed exactly only after the loop is finished (this +++ * requires another pass to parse the bytecode of the method). +++ */ +++ int[] allIndexes = new int[0]; // copy of indexes +++ int[] allSizes = new int[0]; // copy of sizes +++ boolean[] resize; // instructions to be resized +++ int newOffset; // future offset of a jump instruction +++ +++ resize = new boolean[code.length]; +++ +++ // 3 = loop again, 2 = loop ended, 1 = last pass, 0 = done +++ int state = 3; +++ do { +++ if (state == 3) { +++ state = 2; +++ } +++ u = 0; +++ while (u < b.length) { +++ int opcode = b[u] & 0xFF; // opcode of current instruction +++ int insert = 0; // bytes to be added after this instruction +++ +++ switch (ClassWriter.TYPE[opcode]) { +++ case ClassWriter.NOARG_INSN: +++ case ClassWriter.IMPLVAR_INSN: +++ u += 1; +++ break; +++ case ClassWriter.LABEL_INSN: +++ if (opcode > 201) { +++ // converts temporary opcodes 202 to 217, 218 and +++ // 219 to IFEQ ... JSR (inclusive), IFNULL and +++ // IFNONNULL +++ opcode = opcode < 218 ? opcode - 49 : opcode - 20; +++ label = u + readUnsignedShort(b, u + 1); +++ } else { +++ label = u + readShort(b, u + 1); +++ } +++ newOffset = getNewOffset(allIndexes, allSizes, u, label); +++ if (newOffset < Short.MIN_VALUE +++ || newOffset > Short.MAX_VALUE) { +++ if (!resize[u]) { +++ if (opcode == Opcodes.GOTO || opcode == Opcodes.JSR) { +++ // two additional bytes will be required to +++ // replace this GOTO or JSR instruction with +++ // a GOTO_W or a JSR_W +++ insert = 2; +++ } else { +++ // five additional bytes will be required to +++ // replace this IFxxx <l> instruction with +++ // IFNOTxxx <l'> GOTO_W <l>, where IFNOTxxx +++ // is the "opposite" opcode of IFxxx (i.e., +++ // IFNE for IFEQ) and where <l'> designates +++ // the instruction just after the GOTO_W. +++ insert = 5; +++ } +++ resize[u] = true; +++ } +++ } +++ u += 3; +++ break; +++ case ClassWriter.LABELW_INSN: +++ u += 5; +++ break; +++ case ClassWriter.TABL_INSN: +++ if (state == 1) { +++ // true number of bytes to be added (or removed) +++ // from this instruction = (future number of padding +++ // bytes - current number of padding byte) - +++ // previously over estimated variation = +++ // = ((3 - newOffset%4) - (3 - u%4)) - u%4 +++ // = (-newOffset%4 + u%4) - u%4 +++ // = -(newOffset & 3) +++ newOffset = getNewOffset(allIndexes, allSizes, 0, u); +++ insert = -(newOffset & 3); +++ } else if (!resize[u]) { +++ // over estimation of the number of bytes to be +++ // added to this instruction = 3 - current number +++ // of padding bytes = 3 - (3 - u%4) = u%4 = u & 3 +++ insert = u & 3; +++ resize[u] = true; +++ } +++ // skips instruction +++ u = u + 4 - (u & 3); +++ u += 4 * (readInt(b, u + 8) - readInt(b, u + 4) + 1) + 12; +++ break; +++ case ClassWriter.LOOK_INSN: +++ if (state == 1) { +++ // like TABL_INSN +++ newOffset = getNewOffset(allIndexes, allSizes, 0, u); +++ insert = -(newOffset & 3); +++ } else if (!resize[u]) { +++ // like TABL_INSN +++ insert = u & 3; +++ resize[u] = true; +++ } +++ // skips instruction +++ u = u + 4 - (u & 3); +++ u += 8 * readInt(b, u + 4) + 8; +++ break; +++ case ClassWriter.WIDE_INSN: +++ opcode = b[u + 1] & 0xFF; +++ if (opcode == Opcodes.IINC) { +++ u += 6; +++ } else { +++ u += 4; +++ } +++ break; +++ case ClassWriter.VAR_INSN: +++ case ClassWriter.SBYTE_INSN: +++ case ClassWriter.LDC_INSN: +++ u += 2; +++ break; +++ case ClassWriter.SHORT_INSN: +++ case ClassWriter.LDCW_INSN: +++ case ClassWriter.FIELDORMETH_INSN: +++ case ClassWriter.TYPE_INSN: +++ case ClassWriter.IINC_INSN: +++ u += 3; +++ break; +++ case ClassWriter.ITFMETH_INSN: +++ case ClassWriter.INDYMETH_INSN: +++ u += 5; +++ break; +++ // case ClassWriter.MANA_INSN: +++ default: +++ u += 4; +++ break; +++ } +++ if (insert != 0) { +++ // adds a new (u, insert) entry in the allIndexes and +++ // allSizes arrays +++ int[] newIndexes = new int[allIndexes.length + 1]; +++ int[] newSizes = new int[allSizes.length + 1]; +++ System.arraycopy(allIndexes, 0, newIndexes, 0, +++ allIndexes.length); +++ System.arraycopy(allSizes, 0, newSizes, 0, allSizes.length); +++ newIndexes[allIndexes.length] = u; +++ newSizes[allSizes.length] = insert; +++ allIndexes = newIndexes; +++ allSizes = newSizes; +++ if (insert > 0) { +++ state = 3; +++ } +++ } +++ } +++ if (state < 3) { +++ --state; +++ } +++ } while (state != 0); +++ +++ // 2nd step: +++ // copies the bytecode of the method into a new bytevector, updates the +++ // offsets, and inserts (or removes) bytes as requested. +++ +++ ByteVector newCode = new ByteVector(code.length); +++ +++ u = 0; +++ while (u < code.length) { +++ int opcode = b[u] & 0xFF; +++ switch (ClassWriter.TYPE[opcode]) { +++ case ClassWriter.NOARG_INSN: +++ case ClassWriter.IMPLVAR_INSN: +++ newCode.putByte(opcode); +++ u += 1; +++ break; +++ case ClassWriter.LABEL_INSN: +++ if (opcode > 201) { +++ // changes temporary opcodes 202 to 217 (inclusive), 218 +++ // and 219 to IFEQ ... JSR (inclusive), IFNULL and +++ // IFNONNULL +++ opcode = opcode < 218 ? opcode - 49 : opcode - 20; +++ label = u + readUnsignedShort(b, u + 1); +++ } else { +++ label = u + readShort(b, u + 1); +++ } +++ newOffset = getNewOffset(allIndexes, allSizes, u, label); +++ if (resize[u]) { +++ // replaces GOTO with GOTO_W, JSR with JSR_W and IFxxx +++ // <l> with IFNOTxxx <l'> GOTO_W <l>, where IFNOTxxx is +++ // the "opposite" opcode of IFxxx (i.e., IFNE for IFEQ) +++ // and where <l'> designates the instruction just after +++ // the GOTO_W. +++ if (opcode == Opcodes.GOTO) { +++ newCode.putByte(200); // GOTO_W +++ } else if (opcode == Opcodes.JSR) { +++ newCode.putByte(201); // JSR_W +++ } else { +++ newCode.putByte(opcode <= 166 ? ((opcode + 1) ^ 1) - 1 +++ : opcode ^ 1); +++ newCode.putShort(8); // jump offset +++ newCode.putByte(200); // GOTO_W +++ // newOffset now computed from start of GOTO_W +++ newOffset -= 3; +++ } +++ newCode.putInt(newOffset); +++ } else { +++ newCode.putByte(opcode); +++ newCode.putShort(newOffset); +++ } +++ u += 3; +++ break; +++ case ClassWriter.LABELW_INSN: +++ label = u + readInt(b, u + 1); +++ newOffset = getNewOffset(allIndexes, allSizes, u, label); +++ newCode.putByte(opcode); +++ newCode.putInt(newOffset); +++ u += 5; +++ break; +++ case ClassWriter.TABL_INSN: +++ // skips 0 to 3 padding bytes +++ v = u; +++ u = u + 4 - (v & 3); +++ // reads and copies instruction +++ newCode.putByte(Opcodes.TABLESWITCH); +++ newCode.putByteArray(null, 0, (4 - newCode.length % 4) % 4); +++ label = v + readInt(b, u); +++ u += 4; +++ newOffset = getNewOffset(allIndexes, allSizes, v, label); +++ newCode.putInt(newOffset); +++ j = readInt(b, u); +++ u += 4; +++ newCode.putInt(j); +++ j = readInt(b, u) - j + 1; +++ u += 4; +++ newCode.putInt(readInt(b, u - 4)); +++ for (; j > 0; --j) { +++ label = v + readInt(b, u); +++ u += 4; +++ newOffset = getNewOffset(allIndexes, allSizes, v, label); +++ newCode.putInt(newOffset); +++ } +++ break; +++ case ClassWriter.LOOK_INSN: +++ // skips 0 to 3 padding bytes +++ v = u; +++ u = u + 4 - (v & 3); +++ // reads and copies instruction +++ newCode.putByte(Opcodes.LOOKUPSWITCH); +++ newCode.putByteArray(null, 0, (4 - newCode.length % 4) % 4); +++ label = v + readInt(b, u); +++ u += 4; +++ newOffset = getNewOffset(allIndexes, allSizes, v, label); +++ newCode.putInt(newOffset); +++ j = readInt(b, u); +++ u += 4; +++ newCode.putInt(j); +++ for (; j > 0; --j) { +++ newCode.putInt(readInt(b, u)); +++ u += 4; +++ label = v + readInt(b, u); +++ u += 4; +++ newOffset = getNewOffset(allIndexes, allSizes, v, label); +++ newCode.putInt(newOffset); +++ } +++ break; +++ case ClassWriter.WIDE_INSN: +++ opcode = b[u + 1] & 0xFF; +++ if (opcode == Opcodes.IINC) { +++ newCode.putByteArray(b, u, 6); +++ u += 6; +++ } else { +++ newCode.putByteArray(b, u, 4); +++ u += 4; +++ } +++ break; +++ case ClassWriter.VAR_INSN: +++ case ClassWriter.SBYTE_INSN: +++ case ClassWriter.LDC_INSN: +++ newCode.putByteArray(b, u, 2); +++ u += 2; +++ break; +++ case ClassWriter.SHORT_INSN: +++ case ClassWriter.LDCW_INSN: +++ case ClassWriter.FIELDORMETH_INSN: +++ case ClassWriter.TYPE_INSN: +++ case ClassWriter.IINC_INSN: +++ newCode.putByteArray(b, u, 3); +++ u += 3; +++ break; +++ case ClassWriter.ITFMETH_INSN: +++ case ClassWriter.INDYMETH_INSN: +++ newCode.putByteArray(b, u, 5); +++ u += 5; +++ break; +++ // case MANA_INSN: +++ default: +++ newCode.putByteArray(b, u, 4); +++ u += 4; +++ break; +++ } +++ } +++ +++ // updates the stack map frame labels +++ if (compute == FRAMES) { +++ Label l = labels; +++ while (l != null) { +++ /* +++ * Detects the labels that are just after an IF instruction that +++ * has been resized with the IFNOT GOTO_W pattern. These labels +++ * are now the target of a jump instruction (the IFNOT +++ * instruction). Note that we need the original label position +++ * here. getNewOffset must therefore never have been called for +++ * this label. +++ */ +++ u = l.position - 3; +++ if (u >= 0 && resize[u]) { +++ l.status |= Label.TARGET; +++ } +++ getNewOffset(allIndexes, allSizes, l); +++ l = l.successor; +++ } +++ // Update the offsets in the uninitialized types +++ if (cw.typeTable != null) { +++ for (i = 0; i < cw.typeTable.length; ++i) { +++ Item item = cw.typeTable[i]; +++ if (item != null && item.type == ClassWriter.TYPE_UNINIT) { +++ item.intVal = getNewOffset(allIndexes, allSizes, 0, +++ item.intVal); +++ } +++ } +++ } +++ // The stack map frames are not serialized yet, so we don't need +++ // to update them. They will be serialized in visitMaxs. +++ } else if (frameCount > 0) { +++ /* +++ * Resizing an existing stack map frame table is really hard. Not +++ * only the table must be parsed to update the offets, but new +++ * frames may be needed for jump instructions that were inserted by +++ * this method. And updating the offsets or inserting frames can +++ * change the format of the following frames, in case of packed +++ * frames. In practice the whole table must be recomputed. For this +++ * the frames are marked as potentially invalid. This will cause the +++ * whole class to be reread and rewritten with the COMPUTE_FRAMES +++ * option (see the ClassWriter.toByteArray method). This is not very +++ * efficient but is much easier and requires much less code than any +++ * other method I can think of. +++ */ +++ cw.invalidFrames = true; +++ } +++ // updates the exception handler block labels +++ Handler h = firstHandler; +++ while (h != null) { +++ getNewOffset(allIndexes, allSizes, h.start); +++ getNewOffset(allIndexes, allSizes, h.end); +++ getNewOffset(allIndexes, allSizes, h.handler); +++ h = h.next; +++ } +++ // updates the instructions addresses in the +++ // local var and line number tables +++ for (i = 0; i < 2; ++i) { +++ ByteVector bv = i == 0 ? localVar : localVarType; +++ if (bv != null) { +++ b = bv.data; +++ u = 0; +++ while (u < bv.length) { +++ label = readUnsignedShort(b, u); +++ newOffset = getNewOffset(allIndexes, allSizes, 0, label); +++ writeShort(b, u, newOffset); +++ label += readUnsignedShort(b, u + 2); +++ newOffset = getNewOffset(allIndexes, allSizes, 0, label) +++ - newOffset; +++ writeShort(b, u + 2, newOffset); +++ u += 10; +++ } +++ } +++ } +++ if (lineNumber != null) { +++ b = lineNumber.data; +++ u = 0; +++ while (u < lineNumber.length) { +++ writeShort( +++ b, +++ u, +++ getNewOffset(allIndexes, allSizes, 0, +++ readUnsignedShort(b, u))); +++ u += 4; +++ } +++ } +++ // updates the labels of the other attributes +++ Attribute attr = cattrs; +++ while (attr != null) { +++ Label[] labels = attr.getLabels(); +++ if (labels != null) { +++ for (i = labels.length - 1; i >= 0; --i) { +++ getNewOffset(allIndexes, allSizes, labels[i]); +++ } +++ } +++ attr = attr.next; +++ } +++ +++ // replaces old bytecodes with new ones +++ code = newCode; +++ } +++ +++ /** +++ * Reads an unsigned short value in the given byte array. +++ * +++ * @param b +++ * a byte array. +++ * @param index +++ * the start index of the value to be read. +++ * @return the read value. +++ */ +++ static int readUnsignedShort(final byte[] b, final int index) { +++ return ((b[index] & 0xFF) << 8) | (b[index + 1] & 0xFF); +++ } +++ +++ /** +++ * Reads a signed short value in the given byte array. +++ * +++ * @param b +++ * a byte array. +++ * @param index +++ * the start index of the value to be read. +++ * @return the read value. +++ */ +++ static short readShort(final byte[] b, final int index) { +++ return (short) (((b[index] & 0xFF) << 8) | (b[index + 1] & 0xFF)); +++ } +++ +++ /** +++ * Reads a signed int value in the given byte array. +++ * +++ * @param b +++ * a byte array. +++ * @param index +++ * the start index of the value to be read. +++ * @return the read value. +++ */ +++ static int readInt(final byte[] b, final int index) { +++ return ((b[index] & 0xFF) << 24) | ((b[index + 1] & 0xFF) << 16) +++ | ((b[index + 2] & 0xFF) << 8) | (b[index + 3] & 0xFF); +++ } +++ +++ /** +++ * Writes a short value in the given byte array. +++ * +++ * @param b +++ * a byte array. +++ * @param index +++ * where the first byte of the short value must be written. +++ * @param s +++ * the value to be written in the given byte array. +++ */ +++ static void writeShort(final byte[] b, final int index, final int s) { +++ b[index] = (byte) (s >>> 8); +++ b[index + 1] = (byte) s; +++ } +++ +++ /** +++ * Computes the future value of a bytecode offset. +++ * <p> +++ * Note: it is possible to have several entries for the same instruction in +++ * the <tt>indexes</tt> and <tt>sizes</tt>: two entries (index=a,size=b) and +++ * (index=a,size=b') are equivalent to a single entry (index=a,size=b+b'). +++ * +++ * @param indexes +++ * current positions of the instructions to be resized. Each +++ * instruction must be designated by the index of its <i>last</i> +++ * byte, plus one (or, in other words, by the index of the +++ * <i>first</i> byte of the <i>next</i> instruction). +++ * @param sizes +++ * the number of bytes to be <i>added</i> to the above +++ * instructions. More precisely, for each i < <tt>len</tt>, +++ * <tt>sizes</tt>[i] bytes will be added at the end of the +++ * instruction designated by <tt>indexes</tt>[i] or, if +++ * <tt>sizes</tt>[i] is negative, the <i>last</i> | +++ * <tt>sizes[i]</tt>| bytes of the instruction will be removed +++ * (the instruction size <i>must not</i> become negative or +++ * null). +++ * @param begin +++ * index of the first byte of the source instruction. +++ * @param end +++ * index of the first byte of the target instruction. +++ * @return the future value of the given bytecode offset. +++ */ +++ static int getNewOffset(final int[] indexes, final int[] sizes, +++ final int begin, final int end) { +++ int offset = end - begin; +++ for (int i = 0; i < indexes.length; ++i) { +++ if (begin < indexes[i] && indexes[i] <= end) { +++ // forward jump +++ offset += sizes[i]; +++ } else if (end < indexes[i] && indexes[i] <= begin) { +++ // backward jump +++ offset -= sizes[i]; +++ } +++ } +++ return offset; +++ } +++ +++ /** +++ * Updates the offset of the given label. +++ * +++ * @param indexes +++ * current positions of the instructions to be resized. Each +++ * instruction must be designated by the index of its <i>last</i> +++ * byte, plus one (or, in other words, by the index of the +++ * <i>first</i> byte of the <i>next</i> instruction). +++ * @param sizes +++ * the number of bytes to be <i>added</i> to the above +++ * instructions. More precisely, for each i < <tt>len</tt>, +++ * <tt>sizes</tt>[i] bytes will be added at the end of the +++ * instruction designated by <tt>indexes</tt>[i] or, if +++ * <tt>sizes</tt>[i] is negative, the <i>last</i> | +++ * <tt>sizes[i]</tt>| bytes of the instruction will be removed +++ * (the instruction size <i>must not</i> become negative or +++ * null). +++ * @param label +++ * the label whose offset must be updated. +++ */ +++ static void getNewOffset(final int[] indexes, final int[] sizes, +++ final Label label) { +++ if ((label.status & Label.RESIZED) == 0) { +++ label.position = getNewOffset(indexes, sizes, 0, label.position); +++ label.status |= Label.RESIZED; +++ } +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Opcodes.java b/contrib/asm/src/org/objectweb/asm/Opcodes.java ++new file mode 100644 ++index 0000000..e5c2b33 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Opcodes.java ++@@ -0,0 +1,361 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++/** +++ * Defines the JVM opcodes, access flags and array type codes. This interface +++ * does not define all the JVM opcodes because some opcodes are automatically +++ * handled. For example, the xLOAD and xSTORE opcodes are automatically replaced +++ * by xLOAD_n and xSTORE_n opcodes when possible. The xLOAD_n and xSTORE_n +++ * opcodes are therefore not defined in this interface. Likewise for LDC, +++ * automatically replaced by LDC_W or LDC2_W when necessary, WIDE, GOTO_W and +++ * JSR_W. +++ * +++ * @author Eric Bruneton +++ * @author Eugene Kuleshov +++ */ +++public interface Opcodes { +++ +++ // ASM API versions +++ +++ int ASM4 = 4 << 16 | 0 << 8 | 0; +++ int ASM5 = 5 << 16 | 0 << 8 | 0; +++ +++ // versions +++ +++ int V1_1 = 3 << 16 | 45; +++ int V1_2 = 0 << 16 | 46; +++ int V1_3 = 0 << 16 | 47; +++ int V1_4 = 0 << 16 | 48; +++ int V1_5 = 0 << 16 | 49; +++ int V1_6 = 0 << 16 | 50; +++ int V1_7 = 0 << 16 | 51; +++ int V1_8 = 0 << 16 | 52; +++ +++ // access flags +++ +++ int ACC_PUBLIC = 0x0001; // class, field, method +++ int ACC_PRIVATE = 0x0002; // class, field, method +++ int ACC_PROTECTED = 0x0004; // class, field, method +++ int ACC_STATIC = 0x0008; // field, method +++ int ACC_FINAL = 0x0010; // class, field, method, parameter +++ int ACC_SUPER = 0x0020; // class +++ int ACC_SYNCHRONIZED = 0x0020; // method +++ int ACC_VOLATILE = 0x0040; // field +++ int ACC_BRIDGE = 0x0040; // method +++ int ACC_VARARGS = 0x0080; // method +++ int ACC_TRANSIENT = 0x0080; // field +++ int ACC_NATIVE = 0x0100; // method +++ int ACC_INTERFACE = 0x0200; // class +++ int ACC_ABSTRACT = 0x0400; // class, method +++ int ACC_STRICT = 0x0800; // method +++ int ACC_SYNTHETIC = 0x1000; // class, field, method, parameter +++ int ACC_ANNOTATION = 0x2000; // class +++ int ACC_ENUM = 0x4000; // class(?) field inner +++ int ACC_MANDATED = 0x8000; // parameter +++ +++ // ASM specific pseudo access flags +++ +++ int ACC_DEPRECATED = 0x20000; // class, field, method +++ +++ // types for NEWARRAY +++ +++ int T_BOOLEAN = 4; +++ int T_CHAR = 5; +++ int T_FLOAT = 6; +++ int T_DOUBLE = 7; +++ int T_BYTE = 8; +++ int T_SHORT = 9; +++ int T_INT = 10; +++ int T_LONG = 11; +++ +++ // tags for Handle +++ +++ int H_GETFIELD = 1; +++ int H_GETSTATIC = 2; +++ int H_PUTFIELD = 3; +++ int H_PUTSTATIC = 4; +++ int H_INVOKEVIRTUAL = 5; +++ int H_INVOKESTATIC = 6; +++ int H_INVOKESPECIAL = 7; +++ int H_NEWINVOKESPECIAL = 8; +++ int H_INVOKEINTERFACE = 9; +++ +++ // stack map frame types +++ +++ /** +++ * Represents an expanded frame. See {@link ClassReader#EXPAND_FRAMES}. +++ */ +++ int F_NEW = -1; +++ +++ /** +++ * Represents a compressed frame with complete frame data. +++ */ +++ int F_FULL = 0; +++ +++ /** +++ * Represents a compressed frame where locals are the same as the locals in +++ * the previous frame, except that additional 1-3 locals are defined, and +++ * with an empty stack. +++ */ +++ int F_APPEND = 1; +++ +++ /** +++ * Represents a compressed frame where locals are the same as the locals in +++ * the previous frame, except that the last 1-3 locals are absent and with +++ * an empty stack. +++ */ +++ int F_CHOP = 2; +++ +++ /** +++ * Represents a compressed frame with exactly the same locals as the +++ * previous frame and with an empty stack. +++ */ +++ int F_SAME = 3; +++ +++ /** +++ * Represents a compressed frame with exactly the same locals as the +++ * previous frame and with a single value on the stack. +++ */ +++ int F_SAME1 = 4; +++ +++ Integer TOP = new Integer(0); +++ Integer INTEGER = new Integer(1); +++ Integer FLOAT = new Integer(2); +++ Integer DOUBLE = new Integer(3); +++ Integer LONG = new Integer(4); +++ Integer NULL = new Integer(5); +++ Integer UNINITIALIZED_THIS = new Integer(6); +++ +++ // opcodes // visit method (- = idem) +++ +++ int NOP = 0; // visitInsn +++ int ACONST_NULL = 1; // - +++ int ICONST_M1 = 2; // - +++ int ICONST_0 = 3; // - +++ int ICONST_1 = 4; // - +++ int ICONST_2 = 5; // - +++ int ICONST_3 = 6; // - +++ int ICONST_4 = 7; // - +++ int ICONST_5 = 8; // - +++ int LCONST_0 = 9; // - +++ int LCONST_1 = 10; // - +++ int FCONST_0 = 11; // - +++ int FCONST_1 = 12; // - +++ int FCONST_2 = 13; // - +++ int DCONST_0 = 14; // - +++ int DCONST_1 = 15; // - +++ int BIPUSH = 16; // visitIntInsn +++ int SIPUSH = 17; // - +++ int LDC = 18; // visitLdcInsn +++ // int LDC_W = 19; // - +++ // int LDC2_W = 20; // - +++ int ILOAD = 21; // visitVarInsn +++ int LLOAD = 22; // - +++ int FLOAD = 23; // - +++ int DLOAD = 24; // - +++ int ALOAD = 25; // - +++ // int ILOAD_0 = 26; // - +++ // int ILOAD_1 = 27; // - +++ // int ILOAD_2 = 28; // - +++ // int ILOAD_3 = 29; // - +++ // int LLOAD_0 = 30; // - +++ // int LLOAD_1 = 31; // - +++ // int LLOAD_2 = 32; // - +++ // int LLOAD_3 = 33; // - +++ // int FLOAD_0 = 34; // - +++ // int FLOAD_1 = 35; // - +++ // int FLOAD_2 = 36; // - +++ // int FLOAD_3 = 37; // - +++ // int DLOAD_0 = 38; // - +++ // int DLOAD_1 = 39; // - +++ // int DLOAD_2 = 40; // - +++ // int DLOAD_3 = 41; // - +++ // int ALOAD_0 = 42; // - +++ // int ALOAD_1 = 43; // - +++ // int ALOAD_2 = 44; // - +++ // int ALOAD_3 = 45; // - +++ int IALOAD = 46; // visitInsn +++ int LALOAD = 47; // - +++ int FALOAD = 48; // - +++ int DALOAD = 49; // - +++ int AALOAD = 50; // - +++ int BALOAD = 51; // - +++ int CALOAD = 52; // - +++ int SALOAD = 53; // - +++ int ISTORE = 54; // visitVarInsn +++ int LSTORE = 55; // - +++ int FSTORE = 56; // - +++ int DSTORE = 57; // - +++ int ASTORE = 58; // - +++ // int ISTORE_0 = 59; // - +++ // int ISTORE_1 = 60; // - +++ // int ISTORE_2 = 61; // - +++ // int ISTORE_3 = 62; // - +++ // int LSTORE_0 = 63; // - +++ // int LSTORE_1 = 64; // - +++ // int LSTORE_2 = 65; // - +++ // int LSTORE_3 = 66; // - +++ // int FSTORE_0 = 67; // - +++ // int FSTORE_1 = 68; // - +++ // int FSTORE_2 = 69; // - +++ // int FSTORE_3 = 70; // - +++ // int DSTORE_0 = 71; // - +++ // int DSTORE_1 = 72; // - +++ // int DSTORE_2 = 73; // - +++ // int DSTORE_3 = 74; // - +++ // int ASTORE_0 = 75; // - +++ // int ASTORE_1 = 76; // - +++ // int ASTORE_2 = 77; // - +++ // int ASTORE_3 = 78; // - +++ int IASTORE = 79; // visitInsn +++ int LASTORE = 80; // - +++ int FASTORE = 81; // - +++ int DASTORE = 82; // - +++ int AASTORE = 83; // - +++ int BASTORE = 84; // - +++ int CASTORE = 85; // - +++ int SASTORE = 86; // - +++ int POP = 87; // - +++ int POP2 = 88; // - +++ int DUP = 89; // - +++ int DUP_X1 = 90; // - +++ int DUP_X2 = 91; // - +++ int DUP2 = 92; // - +++ int DUP2_X1 = 93; // - +++ int DUP2_X2 = 94; // - +++ int SWAP = 95; // - +++ int IADD = 96; // - +++ int LADD = 97; // - +++ int FADD = 98; // - +++ int DADD = 99; // - +++ int ISUB = 100; // - +++ int LSUB = 101; // - +++ int FSUB = 102; // - +++ int DSUB = 103; // - +++ int IMUL = 104; // - +++ int LMUL = 105; // - +++ int FMUL = 106; // - +++ int DMUL = 107; // - +++ int IDIV = 108; // - +++ int LDIV = 109; // - +++ int FDIV = 110; // - +++ int DDIV = 111; // - +++ int IREM = 112; // - +++ int LREM = 113; // - +++ int FREM = 114; // - +++ int DREM = 115; // - +++ int INEG = 116; // - +++ int LNEG = 117; // - +++ int FNEG = 118; // - +++ int DNEG = 119; // - +++ int ISHL = 120; // - +++ int LSHL = 121; // - +++ int ISHR = 122; // - +++ int LSHR = 123; // - +++ int IUSHR = 124; // - +++ int LUSHR = 125; // - +++ int IAND = 126; // - +++ int LAND = 127; // - +++ int IOR = 128; // - +++ int LOR = 129; // - +++ int IXOR = 130; // - +++ int LXOR = 131; // - +++ int IINC = 132; // visitIincInsn +++ int I2L = 133; // visitInsn +++ int I2F = 134; // - +++ int I2D = 135; // - +++ int L2I = 136; // - +++ int L2F = 137; // - +++ int L2D = 138; // - +++ int F2I = 139; // - +++ int F2L = 140; // - +++ int F2D = 141; // - +++ int D2I = 142; // - +++ int D2L = 143; // - +++ int D2F = 144; // - +++ int I2B = 145; // - +++ int I2C = 146; // - +++ int I2S = 147; // - +++ int LCMP = 148; // - +++ int FCMPL = 149; // - +++ int FCMPG = 150; // - +++ int DCMPL = 151; // - +++ int DCMPG = 152; // - +++ int IFEQ = 153; // visitJumpInsn +++ int IFNE = 154; // - +++ int IFLT = 155; // - +++ int IFGE = 156; // - +++ int IFGT = 157; // - +++ int IFLE = 158; // - +++ int IF_ICMPEQ = 159; // - +++ int IF_ICMPNE = 160; // - +++ int IF_ICMPLT = 161; // - +++ int IF_ICMPGE = 162; // - +++ int IF_ICMPGT = 163; // - +++ int IF_ICMPLE = 164; // - +++ int IF_ACMPEQ = 165; // - +++ int IF_ACMPNE = 166; // - +++ int GOTO = 167; // - +++ int JSR = 168; // - +++ int RET = 169; // visitVarInsn +++ int TABLESWITCH = 170; // visiTableSwitchInsn +++ int LOOKUPSWITCH = 171; // visitLookupSwitch +++ int IRETURN = 172; // visitInsn +++ int LRETURN = 173; // - +++ int FRETURN = 174; // - +++ int DRETURN = 175; // - +++ int ARETURN = 176; // - +++ int RETURN = 177; // - +++ int GETSTATIC = 178; // visitFieldInsn +++ int PUTSTATIC = 179; // - +++ int GETFIELD = 180; // - +++ int PUTFIELD = 181; // - +++ int INVOKEVIRTUAL = 182; // visitMethodInsn +++ int INVOKESPECIAL = 183; // - +++ int INVOKESTATIC = 184; // - +++ int INVOKEINTERFACE = 185; // - +++ int INVOKEDYNAMIC = 186; // visitInvokeDynamicInsn +++ int NEW = 187; // visitTypeInsn +++ int NEWARRAY = 188; // visitIntInsn +++ int ANEWARRAY = 189; // visitTypeInsn +++ int ARRAYLENGTH = 190; // visitInsn +++ int ATHROW = 191; // - +++ int CHECKCAST = 192; // visitTypeInsn +++ int INSTANCEOF = 193; // - +++ int MONITORENTER = 194; // visitInsn +++ int MONITOREXIT = 195; // - +++ // int WIDE = 196; // NOT VISITED +++ int MULTIANEWARRAY = 197; // visitMultiANewArrayInsn +++ int IFNULL = 198; // visitJumpInsn +++ int IFNONNULL = 199; // - +++ // int GOTO_W = 200; // - +++ // int JSR_W = 201; // - +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/Type.java b/contrib/asm/src/org/objectweb/asm/Type.java ++new file mode 100644 ++index 0000000..33a8bf0 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/Type.java ++@@ -0,0 +1,896 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2011 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++package org.objectweb.asm; +++ +++import java.lang.reflect.Constructor; +++import java.lang.reflect.Method; +++ +++/** +++ * A Java field or method type. This class can be used to make it easier to +++ * manipulate type and method descriptors. +++ * +++ * @author Eric Bruneton +++ * @author Chris Nokleberg +++ */ +++public class Type { +++ +++ /** +++ * The sort of the <tt>void</tt> type. See {@link #getSort getSort}. +++ */ +++ public static final int VOID = 0; +++ +++ /** +++ * The sort of the <tt>boolean</tt> type. See {@link #getSort getSort}. +++ */ +++ public static final int BOOLEAN = 1; +++ +++ /** +++ * The sort of the <tt>char</tt> type. See {@link #getSort getSort}. +++ */ +++ public static final int CHAR = 2; +++ +++ /** +++ * The sort of the <tt>byte</tt> type. See {@link #getSort getSort}. +++ */ +++ public static final int BYTE = 3; +++ +++ /** +++ * The sort of the <tt>short</tt> type. See {@link #getSort getSort}. +++ */ +++ public static final int SHORT = 4; +++ +++ /** +++ * The sort of the <tt>int</tt> type. See {@link #getSort getSort}. +++ */ +++ public static final int INT = 5; +++ +++ /** +++ * The sort of the <tt>float</tt> type. See {@link #getSort getSort}. +++ */ +++ public static final int FLOAT = 6; +++ +++ /** +++ * The sort of the <tt>long</tt> type. See {@link #getSort getSort}. +++ */ +++ public static final int LONG = 7; +++ +++ /** +++ * The sort of the <tt>double</tt> type. See {@link #getSort getSort}. +++ */ +++ public static final int DOUBLE = 8; +++ +++ /** +++ * The sort of array reference types. See {@link #getSort getSort}. +++ */ +++ public static final int ARRAY = 9; +++ +++ /** +++ * The sort of object reference types. See {@link #getSort getSort}. +++ */ +++ public static final int OBJECT = 10; +++ +++ /** +++ * The sort of method types. See {@link #getSort getSort}. +++ */ +++ public static final int METHOD = 11; +++ +++ /** +++ * The <tt>void</tt> type. +++ */ +++ public static final Type VOID_TYPE = new Type(VOID, null, ('V' << 24) +++ | (5 << 16) | (0 << 8) | 0, 1); +++ +++ /** +++ * The <tt>boolean</tt> type. +++ */ +++ public static final Type BOOLEAN_TYPE = new Type(BOOLEAN, null, ('Z' << 24) +++ | (0 << 16) | (5 << 8) | 1, 1); +++ +++ /** +++ * The <tt>char</tt> type. +++ */ +++ public static final Type CHAR_TYPE = new Type(CHAR, null, ('C' << 24) +++ | (0 << 16) | (6 << 8) | 1, 1); +++ +++ /** +++ * The <tt>byte</tt> type. +++ */ +++ public static final Type BYTE_TYPE = new Type(BYTE, null, ('B' << 24) +++ | (0 << 16) | (5 << 8) | 1, 1); +++ +++ /** +++ * The <tt>short</tt> type. +++ */ +++ public static final Type SHORT_TYPE = new Type(SHORT, null, ('S' << 24) +++ | (0 << 16) | (7 << 8) | 1, 1); +++ +++ /** +++ * The <tt>int</tt> type. +++ */ +++ public static final Type INT_TYPE = new Type(INT, null, ('I' << 24) +++ | (0 << 16) | (0 << 8) | 1, 1); +++ +++ /** +++ * The <tt>float</tt> type. +++ */ +++ public static final Type FLOAT_TYPE = new Type(FLOAT, null, ('F' << 24) +++ | (2 << 16) | (2 << 8) | 1, 1); +++ +++ /** +++ * The <tt>long</tt> type. +++ */ +++ public static final Type LONG_TYPE = new Type(LONG, null, ('J' << 24) +++ | (1 << 16) | (1 << 8) | 2, 1); +++ +++ /** +++ * The <tt>double</tt> type. +++ */ +++ public static final Type DOUBLE_TYPE = new Type(DOUBLE, null, ('D' << 24) +++ | (3 << 16) | (3 << 8) | 2, 1); +++ +++ // ------------------------------------------------------------------------ +++ // Fields +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * The sort of this Java type. +++ */ +++ private final int sort; +++ +++ /** +++ * A buffer containing the internal name of this Java type. This field is +++ * only used for reference types. +++ */ +++ private final char[] buf; +++ +++ /** +++ * The offset of the internal name of this Java type in {@link #buf buf} or, +++ * for primitive types, the size, descriptor and getOpcode offsets for this +++ * type (byte 0 contains the size, byte 1 the descriptor, byte 2 the offset +++ * for IALOAD or IASTORE, byte 3 the offset for all other instructions). +++ */ +++ private final int off; +++ +++ /** +++ * The length of the internal name of this Java type. +++ */ +++ private final int len; +++ +++ // ------------------------------------------------------------------------ +++ // Constructors +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Constructs a reference type. +++ * +++ * @param sort +++ * the sort of the reference type to be constructed. +++ * @param buf +++ * a buffer containing the descriptor of the previous type. +++ * @param off +++ * the offset of this descriptor in the previous buffer. +++ * @param len +++ * the length of this descriptor. +++ */ +++ private Type(final int sort, final char[] buf, final int off, final int len) { +++ this.sort = sort; +++ this.buf = buf; +++ this.off = off; +++ this.len = len; +++ } +++ +++ /** +++ * Returns the Java type corresponding to the given type descriptor. +++ * +++ * @param typeDescriptor +++ * a field or method type descriptor. +++ * @return the Java type corresponding to the given type descriptor. +++ */ +++ public static Type getType(final String typeDescriptor) { +++ return getType(typeDescriptor.toCharArray(), 0); +++ } +++ +++ /** +++ * Returns the Java type corresponding to the given internal name. +++ * +++ * @param internalName +++ * an internal name. +++ * @return the Java type corresponding to the given internal name. +++ */ +++ public static Type getObjectType(final String internalName) { +++ char[] buf = internalName.toCharArray(); +++ return new Type(buf[0] == '[' ? ARRAY : OBJECT, buf, 0, buf.length); +++ } +++ +++ /** +++ * Returns the Java type corresponding to the given method descriptor. +++ * Equivalent to <code>Type.getType(methodDescriptor)</code>. +++ * +++ * @param methodDescriptor +++ * a method descriptor. +++ * @return the Java type corresponding to the given method descriptor. +++ */ +++ public static Type getMethodType(final String methodDescriptor) { +++ return getType(methodDescriptor.toCharArray(), 0); +++ } +++ +++ /** +++ * Returns the Java method type corresponding to the given argument and +++ * return types. +++ * +++ * @param returnType +++ * the return type of the method. +++ * @param argumentTypes +++ * the argument types of the method. +++ * @return the Java type corresponding to the given argument and return +++ * types. +++ */ +++ public static Type getMethodType(final Type returnType, +++ final Type... argumentTypes) { +++ return getType(getMethodDescriptor(returnType, argumentTypes)); +++ } +++ +++ /** +++ * Returns the Java type corresponding to the given class. +++ * +++ * @param c +++ * a class. +++ * @return the Java type corresponding to the given class. +++ */ +++ public static Type getType(final Class<?> c) { +++ if (c.isPrimitive()) { +++ if (c == Integer.TYPE) { +++ return INT_TYPE; +++ } else if (c == Void.TYPE) { +++ return VOID_TYPE; +++ } else if (c == Boolean.TYPE) { +++ return BOOLEAN_TYPE; +++ } else if (c == Byte.TYPE) { +++ return BYTE_TYPE; +++ } else if (c == Character.TYPE) { +++ return CHAR_TYPE; +++ } else if (c == Short.TYPE) { +++ return SHORT_TYPE; +++ } else if (c == Double.TYPE) { +++ return DOUBLE_TYPE; +++ } else if (c == Float.TYPE) { +++ return FLOAT_TYPE; +++ } else /* if (c == Long.TYPE) */{ +++ return LONG_TYPE; +++ } +++ } else { +++ return getType(getDescriptor(c)); +++ } +++ } +++ +++ /** +++ * Returns the Java method type corresponding to the given constructor. +++ * +++ * @param c +++ * a {@link Constructor Constructor} object. +++ * @return the Java method type corresponding to the given constructor. +++ */ +++ public static Type getType(final Constructor<?> c) { +++ return getType(getConstructorDescriptor(c)); +++ } +++ +++ /** +++ * Returns the Java method type corresponding to the given method. +++ * +++ * @param m +++ * a {@link Method Method} object. +++ * @return the Java method type corresponding to the given method. +++ */ +++ public static Type getType(final Method m) { +++ return getType(getMethodDescriptor(m)); +++ } +++ +++ /** +++ * Returns the Java types corresponding to the argument types of the given +++ * method descriptor. +++ * +++ * @param methodDescriptor +++ * a method descriptor. +++ * @return the Java types corresponding to the argument types of the given +++ * method descriptor. +++ */ +++ public static Type[] getArgumentTypes(final String methodDescriptor) { +++ char[] buf = methodDescriptor.toCharArray(); +++ int off = 1; +++ int size = 0; +++ while (true) { +++ char car = buf[off++]; +++ if (car == ')') { +++ break; +++ } else if (car == 'L') { +++ while (buf[off++] != ';') { +++ } +++ ++size; +++ } else if (car != '[') { +++ ++size; +++ } +++ } +++ Type[] args = new Type[size]; +++ off = 1; +++ size = 0; +++ while (buf[off] != ')') { +++ args[size] = getType(buf, off); +++ off += args[size].len + (args[size].sort == OBJECT ? 2 : 0); +++ size += 1; +++ } +++ return args; +++ } +++ +++ /** +++ * Returns the Java types corresponding to the argument types of the given +++ * method. +++ * +++ * @param method +++ * a method. +++ * @return the Java types corresponding to the argument types of the given +++ * method. +++ */ +++ public static Type[] getArgumentTypes(final Method method) { +++ Class<?>[] classes = method.getParameterTypes(); +++ Type[] types = new Type[classes.length]; +++ for (int i = classes.length - 1; i >= 0; --i) { +++ types[i] = getType(classes[i]); +++ } +++ return types; +++ } +++ +++ /** +++ * Returns the Java type corresponding to the return type of the given +++ * method descriptor. +++ * +++ * @param methodDescriptor +++ * a method descriptor. +++ * @return the Java type corresponding to the return type of the given +++ * method descriptor. +++ */ +++ public static Type getReturnType(final String methodDescriptor) { +++ char[] buf = methodDescriptor.toCharArray(); +++ return getType(buf, methodDescriptor.indexOf(')') + 1); +++ } +++ +++ /** +++ * Returns the Java type corresponding to the return type of the given +++ * method. +++ * +++ * @param method +++ * a method. +++ * @return the Java type corresponding to the return type of the given +++ * method. +++ */ +++ public static Type getReturnType(final Method method) { +++ return getType(method.getReturnType()); +++ } +++ +++ /** +++ * Computes the size of the arguments and of the return value of a method. +++ * +++ * @param desc +++ * the descriptor of a method. +++ * @return the size of the arguments of the method (plus one for the +++ * implicit this argument), argSize, and the size of its return +++ * value, retSize, packed into a single int i = +++ * <tt>(argSize << 2) | retSize</tt> (argSize is therefore equal to +++ * <tt>i >> 2</tt>, and retSize to <tt>i & 0x03</tt>). +++ */ +++ public static int getArgumentsAndReturnSizes(final String desc) { +++ int n = 1; +++ int c = 1; +++ while (true) { +++ char car = desc.charAt(c++); +++ if (car == ')') { +++ car = desc.charAt(c); +++ return n << 2 +++ | (car == 'V' ? 0 : (car == 'D' || car == 'J' ? 2 : 1)); +++ } else if (car == 'L') { +++ while (desc.charAt(c++) != ';') { +++ } +++ n += 1; +++ } else if (car == '[') { +++ while ((car = desc.charAt(c)) == '[') { +++ ++c; +++ } +++ if (car == 'D' || car == 'J') { +++ n -= 1; +++ } +++ } else if (car == 'D' || car == 'J') { +++ n += 2; +++ } else { +++ n += 1; +++ } +++ } +++ } +++ +++ /** +++ * Returns the Java type corresponding to the given type descriptor. For +++ * method descriptors, buf is supposed to contain nothing more than the +++ * descriptor itself. +++ * +++ * @param buf +++ * a buffer containing a type descriptor. +++ * @param off +++ * the offset of this descriptor in the previous buffer. +++ * @return the Java type corresponding to the given type descriptor. +++ */ +++ private static Type getType(final char[] buf, final int off) { +++ int len; +++ switch (buf[off]) { +++ case 'V': +++ return VOID_TYPE; +++ case 'Z': +++ return BOOLEAN_TYPE; +++ case 'C': +++ return CHAR_TYPE; +++ case 'B': +++ return BYTE_TYPE; +++ case 'S': +++ return SHORT_TYPE; +++ case 'I': +++ return INT_TYPE; +++ case 'F': +++ return FLOAT_TYPE; +++ case 'J': +++ return LONG_TYPE; +++ case 'D': +++ return DOUBLE_TYPE; +++ case '[': +++ len = 1; +++ while (buf[off + len] == '[') { +++ ++len; +++ } +++ if (buf[off + len] == 'L') { +++ ++len; +++ while (buf[off + len] != ';') { +++ ++len; +++ } +++ } +++ return new Type(ARRAY, buf, off, len + 1); +++ case 'L': +++ len = 1; +++ while (buf[off + len] != ';') { +++ ++len; +++ } +++ return new Type(OBJECT, buf, off + 1, len - 1); +++ // case '(': +++ default: +++ return new Type(METHOD, buf, off, buf.length - off); +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Accessors +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the sort of this Java type. +++ * +++ * @return {@link #VOID VOID}, {@link #BOOLEAN BOOLEAN}, {@link #CHAR CHAR}, +++ * {@link #BYTE BYTE}, {@link #SHORT SHORT}, {@link #INT INT}, +++ * {@link #FLOAT FLOAT}, {@link #LONG LONG}, {@link #DOUBLE DOUBLE}, +++ * {@link #ARRAY ARRAY}, {@link #OBJECT OBJECT} or {@link #METHOD +++ * METHOD}. +++ */ +++ public int getSort() { +++ return sort; +++ } +++ +++ /** +++ * Returns the number of dimensions of this array type. This method should +++ * only be used for an array type. +++ * +++ * @return the number of dimensions of this array type. +++ */ +++ public int getDimensions() { +++ int i = 1; +++ while (buf[off + i] == '[') { +++ ++i; +++ } +++ return i; +++ } +++ +++ /** +++ * Returns the type of the elements of this array type. This method should +++ * only be used for an array type. +++ * +++ * @return Returns the type of the elements of this array type. +++ */ +++ public Type getElementType() { +++ return getType(buf, off + getDimensions()); +++ } +++ +++ /** +++ * Returns the binary name of the class corresponding to this type. This +++ * method must not be used on method types. +++ * +++ * @return the binary name of the class corresponding to this type. +++ */ +++ public String getClassName() { +++ switch (sort) { +++ case VOID: +++ return "void"; +++ case BOOLEAN: +++ return "boolean"; +++ case CHAR: +++ return "char"; +++ case BYTE: +++ return "byte"; +++ case SHORT: +++ return "short"; +++ case INT: +++ return "int"; +++ case FLOAT: +++ return "float"; +++ case LONG: +++ return "long"; +++ case DOUBLE: +++ return "double"; +++ case ARRAY: +++ StringBuilder sb = new StringBuilder(getElementType().getClassName()); +++ for (int i = getDimensions(); i > 0; --i) { +++ sb.append("[]"); +++ } +++ return sb.toString(); +++ case OBJECT: +++ return new String(buf, off, len).replace('/', '.'); +++ default: +++ return null; +++ } +++ } +++ +++ /** +++ * Returns the internal name of the class corresponding to this object or +++ * array type. The internal name of a class is its fully qualified name (as +++ * returned by Class.getName(), where '.' are replaced by '/'. This method +++ * should only be used for an object or array type. +++ * +++ * @return the internal name of the class corresponding to this object type. +++ */ +++ public String getInternalName() { +++ return new String(buf, off, len); +++ } +++ +++ /** +++ * Returns the argument types of methods of this type. This method should +++ * only be used for method types. +++ * +++ * @return the argument types of methods of this type. +++ */ +++ public Type[] getArgumentTypes() { +++ return getArgumentTypes(getDescriptor()); +++ } +++ +++ /** +++ * Returns the return type of methods of this type. This method should only +++ * be used for method types. +++ * +++ * @return the return type of methods of this type. +++ */ +++ public Type getReturnType() { +++ return getReturnType(getDescriptor()); +++ } +++ +++ /** +++ * Returns the size of the arguments and of the return value of methods of +++ * this type. This method should only be used for method types. +++ * +++ * @return the size of the arguments (plus one for the implicit this +++ * argument), argSize, and the size of the return value, retSize, +++ * packed into a single +++ * int i = <tt>(argSize << 2) | retSize</tt> +++ * (argSize is therefore equal to <tt>i >> 2</tt>, +++ * and retSize to <tt>i & 0x03</tt>). +++ */ +++ public int getArgumentsAndReturnSizes() { +++ return getArgumentsAndReturnSizes(getDescriptor()); +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Conversion to type descriptors +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the descriptor corresponding to this Java type. +++ * +++ * @return the descriptor corresponding to this Java type. +++ */ +++ public String getDescriptor() { +++ StringBuffer buf = new StringBuffer(); +++ getDescriptor(buf); +++ return buf.toString(); +++ } +++ +++ /** +++ * Returns the descriptor corresponding to the given argument and return +++ * types. +++ * +++ * @param returnType +++ * the return type of the method. +++ * @param argumentTypes +++ * the argument types of the method. +++ * @return the descriptor corresponding to the given argument and return +++ * types. +++ */ +++ public static String getMethodDescriptor(final Type returnType, +++ final Type... argumentTypes) { +++ StringBuffer buf = new StringBuffer(); +++ buf.append('('); +++ for (int i = 0; i < argumentTypes.length; ++i) { +++ argumentTypes[i].getDescriptor(buf); +++ } +++ buf.append(')'); +++ returnType.getDescriptor(buf); +++ return buf.toString(); +++ } +++ +++ /** +++ * Appends the descriptor corresponding to this Java type to the given +++ * string buffer. +++ * +++ * @param buf +++ * the string buffer to which the descriptor must be appended. +++ */ +++ private void getDescriptor(final StringBuffer buf) { +++ if (this.buf == null) { +++ // descriptor is in byte 3 of 'off' for primitive types (buf == +++ // null) +++ buf.append((char) ((off & 0xFF000000) >>> 24)); +++ } else if (sort == OBJECT) { +++ buf.append('L'); +++ buf.append(this.buf, off, len); +++ buf.append(';'); +++ } else { // sort == ARRAY || sort == METHOD +++ buf.append(this.buf, off, len); +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Direct conversion from classes to type descriptors, +++ // without intermediate Type objects +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the internal name of the given class. The internal name of a +++ * class is its fully qualified name, as returned by Class.getName(), where +++ * '.' are replaced by '/'. +++ * +++ * @param c +++ * an object or array class. +++ * @return the internal name of the given class. +++ */ +++ public static String getInternalName(final Class<?> c) { +++ return c.getName().replace('.', '/'); +++ } +++ +++ /** +++ * Returns the descriptor corresponding to the given Java type. +++ * +++ * @param c +++ * an object class, a primitive class or an array class. +++ * @return the descriptor corresponding to the given class. +++ */ +++ public static String getDescriptor(final Class<?> c) { +++ StringBuffer buf = new StringBuffer(); +++ getDescriptor(buf, c); +++ return buf.toString(); +++ } +++ +++ /** +++ * Returns the descriptor corresponding to the given constructor. +++ * +++ * @param c +++ * a {@link Constructor Constructor} object. +++ * @return the descriptor of the given constructor. +++ */ +++ public static String getConstructorDescriptor(final Constructor<?> c) { +++ Class<?>[] parameters = c.getParameterTypes(); +++ StringBuffer buf = new StringBuffer(); +++ buf.append('('); +++ for (int i = 0; i < parameters.length; ++i) { +++ getDescriptor(buf, parameters[i]); +++ } +++ return buf.append(")V").toString(); +++ } +++ +++ /** +++ * Returns the descriptor corresponding to the given method. +++ * +++ * @param m +++ * a {@link Method Method} object. +++ * @return the descriptor of the given method. +++ */ +++ public static String getMethodDescriptor(final Method m) { +++ Class<?>[] parameters = m.getParameterTypes(); +++ StringBuffer buf = new StringBuffer(); +++ buf.append('('); +++ for (int i = 0; i < parameters.length; ++i) { +++ getDescriptor(buf, parameters[i]); +++ } +++ buf.append(')'); +++ getDescriptor(buf, m.getReturnType()); +++ return buf.toString(); +++ } +++ +++ /** +++ * Appends the descriptor of the given class to the given string buffer. +++ * +++ * @param buf +++ * the string buffer to which the descriptor must be appended. +++ * @param c +++ * the class whose descriptor must be computed. +++ */ +++ private static void getDescriptor(final StringBuffer buf, final Class<?> c) { +++ Class<?> d = c; +++ while (true) { +++ if (d.isPrimitive()) { +++ char car; +++ if (d == Integer.TYPE) { +++ car = 'I'; +++ } else if (d == Void.TYPE) { +++ car = 'V'; +++ } else if (d == Boolean.TYPE) { +++ car = 'Z'; +++ } else if (d == Byte.TYPE) { +++ car = 'B'; +++ } else if (d == Character.TYPE) { +++ car = 'C'; +++ } else if (d == Short.TYPE) { +++ car = 'S'; +++ } else if (d == Double.TYPE) { +++ car = 'D'; +++ } else if (d == Float.TYPE) { +++ car = 'F'; +++ } else /* if (d == Long.TYPE) */{ +++ car = 'J'; +++ } +++ buf.append(car); +++ return; +++ } else if (d.isArray()) { +++ buf.append('['); +++ d = d.getComponentType(); +++ } else { +++ buf.append('L'); +++ String name = d.getName(); +++ int len = name.length(); +++ for (int i = 0; i < len; ++i) { +++ char car = name.charAt(i); +++ buf.append(car == '.' ? '/' : car); +++ } +++ buf.append(';'); +++ return; +++ } +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Corresponding size and opcodes +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Returns the size of values of this type. This method must not be used for +++ * method types. +++ * +++ * @return the size of values of this type, i.e., 2 for <tt>long</tt> and +++ * <tt>double</tt>, 0 for <tt>void</tt> and 1 otherwise. +++ */ +++ public int getSize() { +++ // the size is in byte 0 of 'off' for primitive types (buf == null) +++ return buf == null ? (off & 0xFF) : 1; +++ } +++ +++ /** +++ * Returns a JVM instruction opcode adapted to this Java type. This method +++ * must not be used for method types. +++ * +++ * @param opcode +++ * a JVM instruction opcode. This opcode must be one of ILOAD, +++ * ISTORE, IALOAD, IASTORE, IADD, ISUB, IMUL, IDIV, IREM, INEG, +++ * ISHL, ISHR, IUSHR, IAND, IOR, IXOR and IRETURN. +++ * @return an opcode that is similar to the given opcode, but adapted to +++ * this Java type. For example, if this type is <tt>float</tt> and +++ * <tt>opcode</tt> is IRETURN, this method returns FRETURN. +++ */ +++ public int getOpcode(final int opcode) { +++ if (opcode == Opcodes.IALOAD || opcode == Opcodes.IASTORE) { +++ // the offset for IALOAD or IASTORE is in byte 1 of 'off' for +++ // primitive types (buf == null) +++ return opcode + (buf == null ? (off & 0xFF00) >> 8 : 4); +++ } else { +++ // the offset for other instructions is in byte 2 of 'off' for +++ // primitive types (buf == null) +++ return opcode + (buf == null ? (off & 0xFF0000) >> 16 : 4); +++ } +++ } +++ +++ // ------------------------------------------------------------------------ +++ // Equals, hashCode and toString +++ // ------------------------------------------------------------------------ +++ +++ /** +++ * Tests if the given object is equal to this type. +++ * +++ * @param o +++ * the object to be compared to this type. +++ * @return <tt>true</tt> if the given object is equal to this type. +++ */ +++ @Override +++ public boolean equals(final Object o) { +++ if (this == o) { +++ return true; +++ } +++ if (!(o instanceof Type)) { +++ return false; +++ } +++ Type t = (Type) o; +++ if (sort != t.sort) { +++ return false; +++ } +++ if (sort >= ARRAY) { +++ if (len != t.len) { +++ return false; +++ } +++ for (int i = off, j = t.off, end = i + len; i < end; i++, j++) { +++ if (buf[i] != t.buf[j]) { +++ return false; +++ } +++ } +++ } +++ return true; +++ } +++ +++ /** +++ * Returns a hash code value for this type. +++ * +++ * @return a hash code value for this type. +++ */ +++ @Override +++ public int hashCode() { +++ int hc = 13 * sort; +++ if (sort >= ARRAY) { +++ for (int i = off, end = i + len; i < end; i++) { +++ hc = 17 * (hc + buf[i]); +++ } +++ } +++ return hc; +++ } +++ +++ /** +++ * Returns a string representation of this type. +++ * +++ * @return the descriptor of this type. +++ */ +++ @Override +++ public String toString() { +++ return getDescriptor(); +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/TypePath.java b/contrib/asm/src/org/objectweb/asm/TypePath.java ++new file mode 100644 ++index 0000000..d9c99b1 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/TypePath.java ++@@ -0,0 +1,196 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2013 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++ +++package org.objectweb.asm; +++ +++/** +++ * The path to a type argument, wildcard bound, array element type, or static +++ * inner type within an enclosing type. +++ * +++ * @author Eric Bruneton +++ */ +++public class TypePath { +++ +++ /** +++ * A type path step that steps into the element type of an array type. See +++ * {@link #getStep getStep}. +++ */ +++ public final static int ARRAY_ELEMENT = 0; +++ +++ /** +++ * A type path step that steps into the nested type of a class type. See +++ * {@link #getStep getStep}. +++ */ +++ public final static int INNER_TYPE = 1; +++ +++ /** +++ * A type path step that steps into the bound of a wildcard type. See +++ * {@link #getStep getStep}. +++ */ +++ public final static int WILDCARD_BOUND = 2; +++ +++ /** +++ * A type path step that steps into a type argument of a generic type. See +++ * {@link #getStep getStep}. +++ */ +++ public final static int TYPE_ARGUMENT = 3; +++ +++ /** +++ * The byte array where the path is stored, in Java class file format. +++ */ +++ byte[] b; +++ +++ /** +++ * The offset of the first byte of the type path in 'b'. +++ */ +++ int offset; +++ +++ /** +++ * Creates a new type path. +++ * +++ * @param b +++ * the byte array containing the type path in Java class file +++ * format. +++ * @param offset +++ * the offset of the first byte of the type path in 'b'. +++ */ +++ TypePath(byte[] b, int offset) { +++ this.b = b; +++ this.offset = offset; +++ } +++ +++ /** +++ * Returns the length of this path. +++ * +++ * @return the length of this path. +++ */ +++ public int getLength() { +++ return b[offset]; +++ } +++ +++ /** +++ * Returns the value of the given step of this path. +++ * +++ * @param index +++ * an index between 0 and {@link #getLength()}, exclusive. +++ * @return {@link #ARRAY_ELEMENT ARRAY_ELEMENT}, {@link #INNER_TYPE +++ * INNER_TYPE}, {@link #WILDCARD_BOUND WILDCARD_BOUND}, or +++ * {@link #TYPE_ARGUMENT TYPE_ARGUMENT}. +++ */ +++ public int getStep(int index) { +++ return b[offset + 2 * index + 1]; +++ } +++ +++ /** +++ * Returns the index of the type argument that the given step is stepping +++ * into. This method should only be used for steps whose value is +++ * {@link #TYPE_ARGUMENT TYPE_ARGUMENT}. +++ * +++ * @param index +++ * an index between 0 and {@link #getLength()}, exclusive. +++ * @return the index of the type argument that the given step is stepping +++ * into. +++ */ +++ public int getStepArgument(int index) { +++ return b[offset + 2 * index + 2]; +++ } +++ +++ /** +++ * Converts a type path in string form, in the format used by +++ * {@link #toString()}, into a TypePath object. +++ * +++ * @param typePath +++ * a type path in string form, in the format used by +++ * {@link #toString()}. May be null or empty. +++ * @return the corresponding TypePath object, or null if the path is empty. +++ */ +++ public static TypePath fromString(final String typePath) { +++ if (typePath == null || typePath.length() == 0) { +++ return null; +++ } +++ int n = typePath.length(); +++ ByteVector out = new ByteVector(n); +++ out.putByte(0); +++ for (int i = 0; i < n;) { +++ char c = typePath.charAt(i++); +++ if (c == '[') { +++ out.put11(ARRAY_ELEMENT, 0); +++ } else if (c == '.') { +++ out.put11(INNER_TYPE, 0); +++ } else if (c == '*') { +++ out.put11(WILDCARD_BOUND, 0); +++ } else if (c >= '0' && c <= '9') { +++ int typeArg = c - '0'; +++ while (i < n && (c = typePath.charAt(i)) >= '0' && c <= '9') { +++ typeArg = typeArg * 10 + c - '0'; +++ i += 1; +++ } +++ if (i < n && typePath.charAt(i) == ';') { +++ i += 1; +++ } +++ out.put11(TYPE_ARGUMENT, typeArg); +++ } +++ } +++ out.data[0] = (byte) (out.length / 2); +++ return new TypePath(out.data, 0); +++ } +++ +++ /** +++ * Returns a string representation of this type path. {@link #ARRAY_ELEMENT +++ * ARRAY_ELEMENT} steps are represented with '[', {@link #INNER_TYPE +++ * INNER_TYPE} steps with '.', {@link #WILDCARD_BOUND WILDCARD_BOUND} steps +++ * with '*' and {@link #TYPE_ARGUMENT TYPE_ARGUMENT} steps with their type +++ * argument index in decimal form followed by ';'. +++ */ +++ @Override +++ public String toString() { +++ int length = getLength(); +++ StringBuilder result = new StringBuilder(length * 2); +++ for (int i = 0; i < length; ++i) { +++ switch (getStep(i)) { +++ case ARRAY_ELEMENT: +++ result.append('['); +++ break; +++ case INNER_TYPE: +++ result.append('.'); +++ break; +++ case WILDCARD_BOUND: +++ result.append('*'); +++ break; +++ case TYPE_ARGUMENT: +++ result.append(getStepArgument(i)).append(';'); +++ break; +++ default: +++ result.append('_'); +++ } +++ } +++ return result.toString(); +++ } +++} ++diff --git a/contrib/asm/src/org/objectweb/asm/TypeReference.java b/contrib/asm/src/org/objectweb/asm/TypeReference.java ++new file mode 100644 ++index 0000000..dff76c0 ++--- /dev/null +++++ b/contrib/asm/src/org/objectweb/asm/TypeReference.java ++@@ -0,0 +1,452 @@ +++/*** +++ * ASM: a very small and fast Java bytecode manipulation framework +++ * Copyright (c) 2000-2013 INRIA, France Telecom +++ * All rights reserved. +++ * +++ * Redistribution and use in source and binary forms, with or without +++ * modification, are permitted provided that the following conditions +++ * are met: +++ * 1. Redistributions of source code must retain the above copyright +++ * notice, this list of conditions and the following disclaimer. +++ * 2. Redistributions in binary form must reproduce the above copyright +++ * notice, this list of conditions and the following disclaimer in the +++ * documentation and/or other materials provided with the distribution. +++ * 3. Neither the name of the copyright holders nor the names of its +++ * contributors may be used to endorse or promote products derived from +++ * this software without specific prior written permission. +++ * +++ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +++ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +++ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +++ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +++ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +++ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +++ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +++ * THE POSSIBILITY OF SUCH DAMAGE. +++ */ +++ +++package org.objectweb.asm; +++ +++/** +++ * A reference to a type appearing in a class, field or method declaration, or +++ * on an instruction. Such a reference designates the part of the class where +++ * the referenced type is appearing (e.g. an 'extends', 'implements' or 'throws' +++ * clause, a 'new' instruction, a 'catch' clause, a type cast, a local variable +++ * declaration, etc). +++ * +++ * @author Eric Bruneton +++ */ +++public class TypeReference { +++ +++ /** +++ * The sort of type references that target a type parameter of a generic +++ * class. See {@link #getSort getSort}. +++ */ +++ public final static int CLASS_TYPE_PARAMETER = 0x00; +++ +++ /** +++ * The sort of type references that target a type parameter of a generic +++ * method. See {@link #getSort getSort}. +++ */ +++ public final static int METHOD_TYPE_PARAMETER = 0x01; +++ +++ /** +++ * The sort of type references that target the super class of a class or one +++ * of the interfaces it implements. See {@link #getSort getSort}. +++ */ +++ public final static int CLASS_EXTENDS = 0x10; +++ +++ /** +++ * The sort of type references that target a bound of a type parameter of a +++ * generic class. See {@link #getSort getSort}. +++ */ +++ public final static int CLASS_TYPE_PARAMETER_BOUND = 0x11; +++ +++ /** +++ * The sort of type references that target a bound of a type parameter of a +++ * generic method. See {@link #getSort getSort}. +++ */ +++ public final static int METHOD_TYPE_PARAMETER_BOUND = 0x12; +++ +++ /** +++ * The sort of type references that target the type of a field. See +++ * {@link #getSort getSort}. +++ */ +++ public final static int FIELD = 0x13; +++ +++ /** +++ * The sort of type references that target the return type of a method. See +++ * {@link #getSort getSort}. +++ */ +++ public final static int METHOD_RETURN = 0x14; +++ +++ /** +++ * The sort of type references that target the receiver type of a method. +++ * See {@link #getSort getSort}. +++ */ +++ public final static int METHOD_RECEIVER = 0x15; +++ +++ /** +++ * The sort of type references that target the type of a formal parameter of +++ * a method. See {@link #getSort getSort}. +++ */ +++ public final static int METHOD_FORMAL_PARAMETER = 0x16; +++ +++ /** +++ * The sort of type references that target the type of an exception declared +++ * in the throws clause of a method. See {@link #getSort getSort}. +++ */ +++ public final static int THROWS = 0x17; +++ +++ /** +++ * The sort of type references that target the type of a local variable in a +++ * method. See {@link #getSort getSort}. +++ */ +++ public final static int LOCAL_VARIABLE = 0x40; +++ +++ /** +++ * The sort of type references that target the type of a resource variable +++ * in a method. See {@link #getSort getSort}. +++ */ +++ public final static int RESOURCE_VARIABLE = 0x41; +++ +++ /** +++ * The sort of type references that target the type of the exception of a +++ * 'catch' clause in a method. See {@link #getSort getSort}. +++ */ +++ public final static int EXCEPTION_PARAMETER = 0x42; +++ +++ /** +++ * The sort of type references that target the type declared in an +++ * 'instanceof' instruction. See {@link #getSort getSort}. +++ */ +++ public final static int INSTANCEOF = 0x43; +++ +++ /** +++ * The sort of type references that target the type of the object created by +++ * a 'new' instruction. See {@link #getSort getSort}. +++ */ +++ public final static int NEW = 0x44; +++ +++ /** +++ * The sort of type references that target the receiver type of a +++ * constructor reference. See {@link #getSort getSort}. +++ */ +++ public final static int CONSTRUCTOR_REFERENCE = 0x45; +++ +++ /** +++ * The sort of type references that target the receiver type of a method +++ * reference. See {@link #getSort getSort}. +++ */ +++ public final static int METHOD_REFERENCE = 0x46; +++ +++ /** +++ * The sort of type references that target the type declared in an explicit +++ * or implicit cast instruction. See {@link #getSort getSort}. +++ */ +++ public final static int CAST = 0x47; +++ +++ /** +++ * The sort of type references that target a type parameter of a generic +++ * constructor in a constructor call. See {@link #getSort getSort}. +++ */ +++ public final static int CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT = 0x48; +++ +++ /** +++ * The sort of type references that target a type parameter of a generic +++ * method in a method call. See {@link #getSort getSort}. +++ */ +++ public final static int METHOD_INVOCATION_TYPE_ARGUMENT = 0x49; +++ +++ /** +++ * The sort of type references that target a type parameter of a generic +++ * constructor in a constructor reference. See {@link #getSort getSort}. +++ */ +++ public final static int CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT = 0x4A; +++ +++ /** +++ * The sort of type references that target a type parameter of a generic +++ * method in a method reference. See {@link #getSort getSort}. +++ */ +++ public final static int METHOD_REFERENCE_TYPE_ARGUMENT = 0x4B; +++ +++ /** +++ * The type reference value in Java class file format. +++ */ +++ private int value; +++ +++ /** +++ * Creates a new TypeReference. +++ * +++ * @param typeRef +++ * the int encoded value of the type reference, as received in a +++ * visit method related to type annotations, like +++ * visitTypeAnnotation. +++ */ +++ public TypeReference(int typeRef) { +++ this.value = typeRef; +++ } +++ +++ /** +++ * Returns a type reference of the given sort. +++ * +++ * @param sort +++ * {@link #FIELD FIELD}, {@link #METHOD_RETURN METHOD_RETURN}, +++ * {@link #METHOD_RECEIVER METHOD_RECEIVER}, +++ * {@link #LOCAL_VARIABLE LOCAL_VARIABLE}, +++ * {@link #RESOURCE_VARIABLE RESOURCE_VARIABLE}, +++ * {@link #INSTANCEOF INSTANCEOF}, {@link #NEW NEW}, +++ * {@link #CONSTRUCTOR_REFERENCE CONSTRUCTOR_REFERENCE}, or +++ * {@link #METHOD_REFERENCE METHOD_REFERENCE}. +++ * @return a type reference of the given sort. +++ */ +++ public static TypeReference newTypeReference(int sort) { +++ return new TypeReference(sort << 24); +++ } +++ +++ /** +++ * Returns a reference to a type parameter of a generic class or method. +++ * +++ * @param sort +++ * {@link #CLASS_TYPE_PARAMETER CLASS_TYPE_PARAMETER} or +++ * {@link #METHOD_TYPE_PARAMETER METHOD_TYPE_PARAMETER}. +++ * @param paramIndex +++ * the type parameter index. +++ * @return a reference to the given generic class or method type parameter. +++ */ +++ public static TypeReference newTypeParameterReference(int sort, +++ int paramIndex) { +++ return new TypeReference((sort << 24) | (paramIndex << 16)); +++ } +++ +++ /** +++ * Returns a reference to a type parameter bound of a generic class or +++ * method. +++ * +++ * @param sort +++ * {@link #CLASS_TYPE_PARAMETER CLASS_TYPE_PARAMETER} or +++ * {@link #METHOD_TYPE_PARAMETER METHOD_TYPE_PARAMETER}. +++ * @param paramIndex +++ * the type parameter index. +++ * @param boundIndex +++ * the type bound index within the above type parameters. +++ * @return a reference to the given generic class or method type parameter +++ * bound. +++ */ +++ public static TypeReference newTypeParameterBoundReference(int sort, +++ int paramIndex, int boundIndex) { +++ return new TypeReference((sort << 24) | (paramIndex << 16) +++ | (boundIndex << 8)); +++ } +++ +++ /** +++ * Returns a reference to the super class or to an interface of the +++ * 'implements' clause of a class. +++ * +++ * @param itfIndex +++ * the index of an interface in the 'implements' clause of a +++ * class, or -1 to reference the super class of the class. +++ * @return a reference to the given super type of a class. +++ */ +++ public static TypeReference newSuperTypeReference(int itfIndex) { +++ itfIndex &= 0xFFFF; +++ return new TypeReference((CLASS_EXTENDS << 24) | (itfIndex << 8)); +++ } +++ +++ /** +++ * Returns a reference to the type of a formal parameter of a method. +++ * +++ * @param paramIndex +++ * the formal parameter index. +++ * +++ * @return a reference to the type of the given method formal parameter. +++ */ +++ public static TypeReference newFormalParameterReference(int paramIndex) { +++ return new TypeReference((METHOD_FORMAL_PARAMETER << 24) +++ | (paramIndex << 16)); +++ } +++ +++ /** +++ * Returns a reference to the type of an exception, in a 'throws' clause of +++ * a method. +++ * +++ * @param exceptionIndex +++ * the index of an exception in a 'throws' clause of a method. +++ * +++ * @return a reference to the type of the given exception. +++ */ +++ public static TypeReference newExceptionReference(int exceptionIndex) { +++ return new TypeReference((THROWS << 24) | (exceptionIndex << 8)); +++ } +++ +++ /** +++ * Returns a reference to the type of the exception declared in a 'catch' +++ * clause of a method. +++ * +++ * @param tryCatchBlockIndex +++ * the index of a try catch block (using the order in which they +++ * are visited with visitTryCatchBlock). +++ * +++ * @return a reference to the type of the given exception. +++ */ +++ public static TypeReference newTryCatchReference(int tryCatchBlockIndex) { +++ return new TypeReference((EXCEPTION_PARAMETER << 24) +++ | (tryCatchBlockIndex << 8)); +++ } +++ +++ /** +++ * Returns a reference to the type of a type argument in a constructor or +++ * method call or reference. +++ * +++ * @param sort +++ * {@link #CAST CAST}, +++ * {@link #CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT +++ * CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT}, +++ * {@link #METHOD_INVOCATION_TYPE_ARGUMENT +++ * METHOD_INVOCATION_TYPE_ARGUMENT}, +++ * {@link #CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT +++ * CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT}, or +++ * {@link #METHOD_REFERENCE_TYPE_ARGUMENT +++ * METHOD_REFERENCE_TYPE_ARGUMENT}. +++ * @param argIndex +++ * the type argument index. +++ * +++ * @return a reference to the type of the given type argument. +++ */ +++ public static TypeReference newTypeArgumentReference(int sort, int argIndex) { +++ return new TypeReference((sort << 24) | argIndex); +++ } +++ +++ /** +++ * Returns the sort of this type reference. +++ * +++ * @return {@link #CLASS_TYPE_PARAMETER CLASS_TYPE_PARAMETER}, +++ * {@link #METHOD_TYPE_PARAMETER METHOD_TYPE_PARAMETER}, +++ * {@link #CLASS_EXTENDS CLASS_EXTENDS}, +++ * {@link #CLASS_TYPE_PARAMETER_BOUND CLASS_TYPE_PARAMETER_BOUND}, +++ * {@link #METHOD_TYPE_PARAMETER_BOUND METHOD_TYPE_PARAMETER_BOUND}, +++ * {@link #FIELD FIELD}, {@link #METHOD_RETURN METHOD_RETURN}, +++ * {@link #METHOD_RECEIVER METHOD_RECEIVER}, +++ * {@link #METHOD_FORMAL_PARAMETER METHOD_FORMAL_PARAMETER}, +++ * {@link #THROWS THROWS}, {@link #LOCAL_VARIABLE LOCAL_VARIABLE}, +++ * {@link #RESOURCE_VARIABLE RESOURCE_VARIABLE}, +++ * {@link #EXCEPTION_PARAMETER EXCEPTION_PARAMETER}, +++ * {@link #INSTANCEOF INSTANCEOF}, {@link #NEW NEW}, +++ * {@link #CONSTRUCTOR_REFERENCE CONSTRUCTOR_REFERENCE}, +++ * {@link #METHOD_REFERENCE METHOD_REFERENCE}, {@link #CAST CAST}, +++ * {@link #CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT +++ * CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT}, +++ * {@link #METHOD_INVOCATION_TYPE_ARGUMENT +++ * METHOD_INVOCATION_TYPE_ARGUMENT}, +++ * {@link #CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT +++ * CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT}, or +++ * {@link #METHOD_REFERENCE_TYPE_ARGUMENT +++ * METHOD_REFERENCE_TYPE_ARGUMENT}. +++ */ +++ public int getSort() { +++ return value >>> 24; +++ } +++ +++ /** +++ * Returns the index of the type parameter referenced by this type +++ * reference. This method must only be used for type references whose sort +++ * is {@link #CLASS_TYPE_PARAMETER CLASS_TYPE_PARAMETER}, +++ * {@link #METHOD_TYPE_PARAMETER METHOD_TYPE_PARAMETER}, +++ * {@link #CLASS_TYPE_PARAMETER_BOUND CLASS_TYPE_PARAMETER_BOUND} or +++ * {@link #METHOD_TYPE_PARAMETER_BOUND METHOD_TYPE_PARAMETER_BOUND}. +++ * +++ * @return a type parameter index. +++ */ +++ public int getTypeParameterIndex() { +++ return (value & 0x00FF0000) >> 16; +++ } +++ +++ /** +++ * Returns the index of the type parameter bound, within the type parameter +++ * {@link #getTypeParameterIndex}, referenced by this type reference. This +++ * method must only be used for type references whose sort is +++ * {@link #CLASS_TYPE_PARAMETER_BOUND CLASS_TYPE_PARAMETER_BOUND} or +++ * {@link #METHOD_TYPE_PARAMETER_BOUND METHOD_TYPE_PARAMETER_BOUND}. +++ * +++ * @return a type parameter bound index. +++ */ +++ public int getTypeParameterBoundIndex() { +++ return (value & 0x0000FF00) >> 8; +++ } +++ +++ /** +++ * Returns the index of the "super type" of a class that is referenced by +++ * this type reference. This method must only be used for type references +++ * whose sort is {@link #CLASS_EXTENDS CLASS_EXTENDS}. +++ * +++ * @return the index of an interface in the 'implements' clause of a class, +++ * or -1 if this type reference references the type of the super +++ * class. +++ */ +++ public int getSuperTypeIndex() { +++ return (short) ((value & 0x00FFFF00) >> 8); +++ } +++ +++ /** +++ * Returns the index of the formal parameter whose type is referenced by +++ * this type reference. This method must only be used for type references +++ * whose sort is {@link #METHOD_FORMAL_PARAMETER METHOD_FORMAL_PARAMETER}. +++ * +++ * @return a formal parameter index. +++ */ +++ public int getFormalParameterIndex() { +++ return (value & 0x00FF0000) >> 16; +++ } +++ +++ /** +++ * Returns the index of the exception, in a 'throws' clause of a method, +++ * whose type is referenced by this type reference. This method must only be +++ * used for type references whose sort is {@link #THROWS THROWS}. +++ * +++ * @return the index of an exception in the 'throws' clause of a method. +++ */ +++ public int getExceptionIndex() { +++ return (value & 0x00FFFF00) >> 8; +++ } +++ +++ /** +++ * Returns the index of the try catch block (using the order in which they +++ * are visited with visitTryCatchBlock), whose 'catch' type is referenced by +++ * this type reference. This method must only be used for type references +++ * whose sort is {@link #EXCEPTION_PARAMETER EXCEPTION_PARAMETER} . +++ * +++ * @return the index of an exception in the 'throws' clause of a method. +++ */ +++ public int getTryCatchBlockIndex() { +++ return (value & 0x00FFFF00) >> 8; +++ } +++ +++ /** +++ * Returns the index of the type argument referenced by this type reference. +++ * This method must only be used for type references whose sort is +++ * {@link #CAST CAST}, {@link #CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT +++ * CONSTRUCTOR_INVOCATION_TYPE_ARGUMENT}, +++ * {@link #METHOD_INVOCATION_TYPE_ARGUMENT METHOD_INVOCATION_TYPE_ARGUMENT}, +++ * {@link #CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT +++ * CONSTRUCTOR_REFERENCE_TYPE_ARGUMENT}, or +++ * {@link #METHOD_REFERENCE_TYPE_ARGUMENT METHOD_REFERENCE_TYPE_ARGUMENT}. +++ * +++ * @return a type parameter index. +++ */ +++ public int getTypeArgumentIndex() { +++ return value & 0xFF; +++ } +++ +++ /** +++ * Returns the int encoded value of this type reference, suitable for use in +++ * visit methods related to type annotations, like visitTypeAnnotation. +++ * +++ * @return the int encoded value of this type reference. +++ */ +++ public int getValue() { +++ return value; +++ } +++} ++diff --git a/includes/inttypes.h b/includes/inttypes.h ++new file mode 100644 ++index 0000000..ead903f ++--- /dev/null +++++ b/includes/inttypes.h ++@@ -0,0 +1,305 @@ +++// ISO C9x compliant inttypes.h for Microsoft Visual Studio +++// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +++// +++// Copyright (c) 2006 Alexander Chemeris +++// +++// Redistribution and use in source and binary forms, with or without +++// modification, are permitted provided that the following conditions are met: +++// +++// 1. Redistributions of source code must retain the above copyright notice, +++// this list of conditions and the following disclaimer. +++// +++// 2. Redistributions in binary form must reproduce the above copyright +++// notice, this list of conditions and the following disclaimer in the +++// documentation and/or other materials provided with the distribution. +++// +++// 3. The name of the author may be used to endorse or promote products +++// derived from this software without specific prior written permission. +++// +++// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +++// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +++// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +++// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +++// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +++// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +++// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +++// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +++// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +++// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +++// +++/////////////////////////////////////////////////////////////////////////////// +++ +++#ifndef _MSC_VER // [ +++#error "Use this header only with Microsoft Visual C++ compilers!" +++#endif // _MSC_VER ] +++ +++#ifndef _MSC_INTTYPES_H_ // [ +++#define _MSC_INTTYPES_H_ +++ +++#if _MSC_VER > 1000 +++#pragma once +++#endif +++ +++#include "stdint.h" +++ +++// 7.8 Format conversion of integer types +++ +++typedef struct { +++ intmax_t quot; +++ intmax_t rem; +++} imaxdiv_t; +++ +++// 7.8.1 Macros for format specifiers +++ +++#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 +++ +++// The fprintf macros for signed integers are: +++#define PRId8 "d" +++#define PRIi8 "i" +++#define PRIdLEAST8 "d" +++#define PRIiLEAST8 "i" +++#define PRIdFAST8 "d" +++#define PRIiFAST8 "i" +++ +++#define PRId16 "hd" +++#define PRIi16 "hi" +++#define PRIdLEAST16 "hd" +++#define PRIiLEAST16 "hi" +++#define PRIdFAST16 "hd" +++#define PRIiFAST16 "hi" +++ +++#define PRId32 "I32d" +++#define PRIi32 "I32i" +++#define PRIdLEAST32 "I32d" +++#define PRIiLEAST32 "I32i" +++#define PRIdFAST32 "I32d" +++#define PRIiFAST32 "I32i" +++ +++#define PRId64 "I64d" +++#define PRIi64 "I64i" +++#define PRIdLEAST64 "I64d" +++#define PRIiLEAST64 "I64i" +++#define PRIdFAST64 "I64d" +++#define PRIiFAST64 "I64i" +++ +++#define PRIdMAX "I64d" +++#define PRIiMAX "I64i" +++ +++#define PRIdPTR "Id" +++#define PRIiPTR "Ii" +++ +++// The fprintf macros for unsigned integers are: +++#define PRIo8 "o" +++#define PRIu8 "u" +++#define PRIx8 "x" +++#define PRIX8 "X" +++#define PRIoLEAST8 "o" +++#define PRIuLEAST8 "u" +++#define PRIxLEAST8 "x" +++#define PRIXLEAST8 "X" +++#define PRIoFAST8 "o" +++#define PRIuFAST8 "u" +++#define PRIxFAST8 "x" +++#define PRIXFAST8 "X" +++ +++#define PRIo16 "ho" +++#define PRIu16 "hu" +++#define PRIx16 "hx" +++#define PRIX16 "hX" +++#define PRIoLEAST16 "ho" +++#define PRIuLEAST16 "hu" +++#define PRIxLEAST16 "hx" +++#define PRIXLEAST16 "hX" +++#define PRIoFAST16 "ho" +++#define PRIuFAST16 "hu" +++#define PRIxFAST16 "hx" +++#define PRIXFAST16 "hX" +++ +++#define PRIo32 "I32o" +++#define PRIu32 "I32u" +++#define PRIx32 "I32x" +++#define PRIX32 "I32X" +++#define PRIoLEAST32 "I32o" +++#define PRIuLEAST32 "I32u" +++#define PRIxLEAST32 "I32x" +++#define PRIXLEAST32 "I32X" +++#define PRIoFAST32 "I32o" +++#define PRIuFAST32 "I32u" +++#define PRIxFAST32 "I32x" +++#define PRIXFAST32 "I32X" +++ +++#define PRIo64 "I64o" +++#define PRIu64 "I64u" +++#define PRIx64 "I64x" +++#define PRIX64 "I64X" +++#define PRIoLEAST64 "I64o" +++#define PRIuLEAST64 "I64u" +++#define PRIxLEAST64 "I64x" +++#define PRIXLEAST64 "I64X" +++#define PRIoFAST64 "I64o" +++#define PRIuFAST64 "I64u" +++#define PRIxFAST64 "I64x" +++#define PRIXFAST64 "I64X" +++ +++#define PRIoMAX "I64o" +++#define PRIuMAX "I64u" +++#define PRIxMAX "I64x" +++#define PRIXMAX "I64X" +++ +++#define PRIoPTR "Io" +++#define PRIuPTR "Iu" +++#define PRIxPTR "Ix" +++#define PRIXPTR "IX" +++ +++// The fscanf macros for signed integers are: +++#define SCNd8 "d" +++#define SCNi8 "i" +++#define SCNdLEAST8 "d" +++#define SCNiLEAST8 "i" +++#define SCNdFAST8 "d" +++#define SCNiFAST8 "i" +++ +++#define SCNd16 "hd" +++#define SCNi16 "hi" +++#define SCNdLEAST16 "hd" +++#define SCNiLEAST16 "hi" +++#define SCNdFAST16 "hd" +++#define SCNiFAST16 "hi" +++ +++#define SCNd32 "ld" +++#define SCNi32 "li" +++#define SCNdLEAST32 "ld" +++#define SCNiLEAST32 "li" +++#define SCNdFAST32 "ld" +++#define SCNiFAST32 "li" +++ +++#define SCNd64 "I64d" +++#define SCNi64 "I64i" +++#define SCNdLEAST64 "I64d" +++#define SCNiLEAST64 "I64i" +++#define SCNdFAST64 "I64d" +++#define SCNiFAST64 "I64i" +++ +++#define SCNdMAX "I64d" +++#define SCNiMAX "I64i" +++ +++#ifdef _WIN64 // [ +++# define SCNdPTR "I64d" +++# define SCNiPTR "I64i" +++#else // _WIN64 ][ +++# define SCNdPTR "ld" +++# define SCNiPTR "li" +++#endif // _WIN64 ] +++ +++// The fscanf macros for unsigned integers are: +++#define SCNo8 "o" +++#define SCNu8 "u" +++#define SCNx8 "x" +++#define SCNX8 "X" +++#define SCNoLEAST8 "o" +++#define SCNuLEAST8 "u" +++#define SCNxLEAST8 "x" +++#define SCNXLEAST8 "X" +++#define SCNoFAST8 "o" +++#define SCNuFAST8 "u" +++#define SCNxFAST8 "x" +++#define SCNXFAST8 "X" +++ +++#define SCNo16 "ho" +++#define SCNu16 "hu" +++#define SCNx16 "hx" +++#define SCNX16 "hX" +++#define SCNoLEAST16 "ho" +++#define SCNuLEAST16 "hu" +++#define SCNxLEAST16 "hx" +++#define SCNXLEAST16 "hX" +++#define SCNoFAST16 "ho" +++#define SCNuFAST16 "hu" +++#define SCNxFAST16 "hx" +++#define SCNXFAST16 "hX" +++ +++#define SCNo32 "lo" +++#define SCNu32 "lu" +++#define SCNx32 "lx" +++#define SCNX32 "lX" +++#define SCNoLEAST32 "lo" +++#define SCNuLEAST32 "lu" +++#define SCNxLEAST32 "lx" +++#define SCNXLEAST32 "lX" +++#define SCNoFAST32 "lo" +++#define SCNuFAST32 "lu" +++#define SCNxFAST32 "lx" +++#define SCNXFAST32 "lX" +++ +++#define SCNo64 "I64o" +++#define SCNu64 "I64u" +++#define SCNx64 "I64x" +++#define SCNX64 "I64X" +++#define SCNoLEAST64 "I64o" +++#define SCNuLEAST64 "I64u" +++#define SCNxLEAST64 "I64x" +++#define SCNXLEAST64 "I64X" +++#define SCNoFAST64 "I64o" +++#define SCNuFAST64 "I64u" +++#define SCNxFAST64 "I64x" +++#define SCNXFAST64 "I64X" +++ +++#define SCNoMAX "I64o" +++#define SCNuMAX "I64u" +++#define SCNxMAX "I64x" +++#define SCNXMAX "I64X" +++ +++#ifdef _WIN64 // [ +++# define SCNoPTR "I64o" +++# define SCNuPTR "I64u" +++# define SCNxPTR "I64x" +++# define SCNXPTR "I64X" +++#else // _WIN64 ][ +++# define SCNoPTR "lo" +++# define SCNuPTR "lu" +++# define SCNxPTR "lx" +++# define SCNXPTR "lX" +++#endif // _WIN64 ] +++ +++#endif // __STDC_FORMAT_MACROS ] +++ +++// 7.8.2 Functions for greatest-width integer types +++ +++// 7.8.2.1 The imaxabs function +++#define imaxabs _abs64 +++ +++// 7.8.2.2 The imaxdiv function +++ +++// This is modified version of div() function from Microsoft's div.c found +++// in %MSVC.NET%\crt\src\div.c +++#ifdef STATIC_IMAXDIV // [ +++static +++#else // STATIC_IMAXDIV ][ +++_inline +++#endif // STATIC_IMAXDIV ] +++imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +++{ +++ imaxdiv_t result; +++ +++ result.quot = numer / denom; +++ result.rem = numer % denom; +++ +++ if (numer < 0 && result.rem > 0) { +++ // did division wrong; must fix up +++ ++result.quot; +++ result.rem -= denom; +++ } +++ +++ return result; +++} +++ +++// 7.8.2.3 The strtoimax and strtoumax functions +++#define strtoimax _strtoi64 +++#define strtoumax _strtoui64 +++ +++// 7.8.2.4 The wcstoimax and wcstoumax functions +++#define wcstoimax _wcstoi64 +++#define wcstoumax _wcstoui64 +++ +++ +++#endif // _MSC_INTTYPES_H_ ] ++diff --git a/libbluray.def b/libbluray.def ++new file mode 100644 ++index 0000000..d4c93cb ++--- /dev/null +++++ b/libbluray.def ++@@ -0,0 +1,63 @@ +++; libbluray.def ; declares the exports +++ +++LIBRARY "libbluray.dll" +++ +++EXPORTS +++ ; bluray.h +++ bd_get_version +++ bd_get_titles +++ bd_get_title_info +++ bd_get_playlist_info +++ bd_free_title_info +++ bd_open +++ bd_close +++ bd_seek +++ bd_seek_time +++ bd_find_seek_point +++ bd_read +++ bd_read_skip_still +++ bd_seek_chapter +++ bd_chapter_pos +++ bd_get_current_chapter +++ bd_seek_mark +++ bd_seek_playitem +++ bd_select_playlist +++ bd_select_title +++ bd_select_angle +++ bd_seamless_angle_change +++ bd_get_title_size +++ bd_get_current_title +++ bd_get_current_angle +++ bd_tell +++ bd_tell_time +++ bd_get_disc_info +++ bd_set_player_setting +++ bd_set_player_setting_str +++ bd_start_bdj +++ bd_stop_bdj +++ bd_get_event +++ bd_play +++ bd_read_ext +++ bd_play_title +++ bd_menu_call +++ bd_register_overlay_proc +++ bd_register_argb_overlay_proc +++ bd_set_scr +++ bd_user_input +++ bd_mouse_select +++ bd_get_sound_effect +++ bd_get_meta +++ bd_get_clpi +++ bd_read_clpi +++ bd_free_clpi +++ bd_read_mpls +++ bd_free_mpls +++ bd_read_mobj +++ bd_free_mobj +++ bd_get_clip_infos +++ bd_get_title_mpls +++ +++ ; additional functions +++ bd_set_debug_handler +++ bd_set_debug_mask +++ bd_get_debug_mask ++diff --git a/libbluray.vcxproj b/libbluray.vcxproj ++new file mode 100644 ++index 0000000..c778955 ++--- /dev/null +++++ b/libbluray.vcxproj ++@@ -0,0 +1,231 @@ +++<?xml version="1.0" encoding="utf-8"?> +++<Project DefaultTargets="Build" ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> +++ <ItemGroup Label="ProjectConfigurations"> +++ <ProjectConfiguration Include="DebugRelease|Win32"> +++ <Configuration>DebugRelease</Configuration> +++ <Platform>Win32</Platform> +++ </ProjectConfiguration> +++ <ProjectConfiguration Include="DebugRelease|x64"> +++ <Configuration>DebugRelease</Configuration> +++ <Platform>x64</Platform> +++ </ProjectConfiguration> +++ <ProjectConfiguration Include="Debug|Win32"> +++ <Configuration>Debug</Configuration> +++ <Platform>Win32</Platform> +++ </ProjectConfiguration> +++ <ProjectConfiguration Include="Debug|x64"> +++ <Configuration>Debug</Configuration> +++ <Platform>x64</Platform> +++ </ProjectConfiguration> +++ <ProjectConfiguration Include="Release|Win32"> +++ <Configuration>Release</Configuration> +++ <Platform>Win32</Platform> +++ </ProjectConfiguration> +++ <ProjectConfiguration Include="Release|x64"> +++ <Configuration>Release</Configuration> +++ <Platform>x64</Platform> +++ </ProjectConfiguration> +++ </ItemGroup> +++ <ItemGroup> +++ <ClInclude Include="src\file\dirs.h" /> +++ <ClInclude Include="src\file\dl.h" /> +++ <ClInclude Include="src\file\file.h" /> +++ <ClInclude Include="src\file\filesystem.h" /> +++ <ClInclude Include="src\file\mount.h" /> +++ <ClInclude Include="src\libbluray\bdnav\bdid_parse.h" /> +++ <ClInclude Include="src\libbluray\bdnav\bdparse.h" /> +++ <ClInclude Include="src\libbluray\bdnav\clpi_data.h" /> +++ <ClInclude Include="src\libbluray\bdnav\clpi_parse.h" /> +++ <ClInclude Include="src\libbluray\bdnav\extdata_parse.h" /> +++ <ClInclude Include="src\libbluray\bdnav\index_parse.h" /> +++ <ClInclude Include="src\libbluray\bdnav\meta_data.h" /> +++ <ClInclude Include="src\libbluray\bdnav\meta_parse.h" /> +++ <ClInclude Include="src\libbluray\bdnav\mpls_parse.h" /> +++ <ClInclude Include="src\libbluray\bdnav\navigation.h" /> +++ <ClInclude Include="src\libbluray\bdnav\sound_parse.h" /> +++ <ClInclude Include="src\libbluray\bdnav\uo_mask_table.h" /> +++ <ClInclude Include="src\libbluray\bluray-version.h" /> +++ <ClInclude Include="src\libbluray\bluray.h" /> +++ <ClInclude Include="src\libbluray\bluray_internal.h" /> +++ <ClInclude Include="src\libbluray\decoders\graphics_controller.h" /> +++ <ClInclude Include="src\libbluray\decoders\graphics_processor.h" /> +++ <ClInclude Include="src\libbluray\decoders\ig.h" /> +++ <ClInclude Include="src\libbluray\decoders\ig_decode.h" /> +++ <ClInclude Include="src\libbluray\decoders\m2ts_demux.h" /> +++ <ClInclude Include="src\libbluray\decoders\m2ts_filter.h" /> +++ <ClInclude Include="src\libbluray\decoders\overlay.h" /> +++ <ClInclude Include="src\libbluray\decoders\pes_buffer.h" /> +++ <ClInclude Include="src\libbluray\decoders\pg.h" /> +++ <ClInclude Include="src\libbluray\decoders\pg_decode.h" /> +++ <ClInclude Include="src\libbluray\decoders\rle.h" /> +++ <ClInclude Include="src\libbluray\decoders\textst.h" /> +++ <ClInclude Include="src\libbluray\decoders\textst_decode.h" /> +++ <ClInclude Include="src\libbluray\decoders\textst_render.h" /> +++ <ClInclude Include="src\libbluray\disc\aacs.h" /> +++ <ClInclude Include="src\libbluray\disc\bdplus.h" /> +++ <ClInclude Include="src\libbluray\disc\dec.h" /> +++ <ClInclude Include="src\libbluray\disc\disc.h" /> +++ <ClInclude Include="src\libbluray\disc\enc_info.h" /> +++ <ClInclude Include="src\libbluray\hdmv\hdmv_insn.h" /> +++ <ClInclude Include="src\libbluray\hdmv\hdmv_vm.h" /> +++ <ClInclude Include="src\libbluray\hdmv\mobj_parse.h" /> +++ <ClInclude Include="src\libbluray\keys.h" /> +++ <ClInclude Include="src\libbluray\register.h" /> +++ <ClInclude Include="src\util\array.h" /> +++ <ClInclude Include="src\util\attributes.h" /> +++ <ClInclude Include="src\util\bits.h" /> +++ <ClInclude Include="src\util\logging.h" /> +++ <ClInclude Include="src\util\log_control.h" /> +++ <ClInclude Include="src\util\macro.h" /> +++ <ClInclude Include="src\util\mutex.h" /> +++ <ClInclude Include="src\util\refcnt.h" /> +++ <ClInclude Include="src\util\strutl.h" /> +++ <ClInclude Include="src\util\time.h" /> +++ </ItemGroup> +++ <ItemGroup> +++ <ClCompile Include="src\file\dirs_win32.c" /> +++ <ClCompile Include="src\file\dir_win32.c" /> +++ <ClCompile Include="src\file\dl_win32.c" /> +++ <ClCompile Include="src\file\file.c" /> +++ <ClCompile Include="src\file\filesystem.c" /> +++ <ClCompile Include="src\file\file_win32.c" /> +++ <ClCompile Include="src\file\mount.c" /> +++ <ClCompile Include="src\libbluray\bdnav\bdid_parse.c" /> +++ <ClCompile Include="src\libbluray\bdnav\clpi_parse.c" /> +++ <ClCompile Include="src\libbluray\bdnav\extdata_parse.c" /> +++ <ClCompile Include="src\libbluray\bdnav\index_parse.c" /> +++ <ClCompile Include="src\libbluray\bdnav\meta_parse.c" /> +++ <ClCompile Include="src\libbluray\bdnav\mpls_parse.c" /> +++ <ClCompile Include="src\libbluray\bdnav\navigation.c" /> +++ <ClCompile Include="src\libbluray\bdnav\sound_parse.c" /> +++ <ClCompile Include="src\libbluray\bluray.c" /> +++ <ClCompile Include="src\libbluray\decoders\graphics_controller.c" /> +++ <ClCompile Include="src\libbluray\decoders\graphics_processor.c" /> +++ <ClCompile Include="src\libbluray\decoders\ig_decode.c" /> +++ <ClCompile Include="src\libbluray\decoders\m2ts_demux.c" /> +++ <ClCompile Include="src\libbluray\decoders\m2ts_filter.c" /> +++ <ClCompile Include="src\libbluray\decoders\pes_buffer.c" /> +++ <ClCompile Include="src\libbluray\decoders\pg_decode.c" /> +++ <ClCompile Include="src\libbluray\decoders\rle.c" /> +++ <ClCompile Include="src\libbluray\decoders\textst_decode.c" /> +++ <ClCompile Include="src\libbluray\decoders\textst_render.c" /> +++ <ClCompile Include="src\libbluray\disc\aacs.c" /> +++ <ClCompile Include="src\libbluray\disc\bdplus.c" /> +++ <ClCompile Include="src\libbluray\disc\dec.c" /> +++ <ClCompile Include="src\libbluray\disc\disc.c" /> +++ <ClCompile Include="src\libbluray\hdmv\hdmv_vm.c" /> +++ <ClCompile Include="src\libbluray\hdmv\mobj_parse.c" /> +++ <ClCompile Include="src\libbluray\hdmv\mobj_print.c" /> +++ <ClCompile Include="src\libbluray\register.c" /> +++ <ClCompile Include="src\util\array.c" /> +++ <ClCompile Include="src\util\bits.c" /> +++ <ClCompile Include="src\util\logging.c" /> +++ <ClCompile Include="src\util\mutex.c" /> +++ <ClCompile Include="src\util\refcnt.c" /> +++ <ClCompile Include="src\util\strutl.c" /> +++ <ClCompile Include="src\util\time.c" /> +++ </ItemGroup> +++ <ItemGroup> +++ <None Include="libbluray.def" /> +++ </ItemGroup> +++ <PropertyGroup Label="Globals"> +++ <ProjectGuid>{E1DA1B95-71F1-4C21-A271-121176925062}</ProjectGuid> +++ <Keyword>Win32Proj</Keyword> +++ <RootNamespace>libbluray</RootNamespace> +++ </PropertyGroup> +++ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> +++ <PropertyGroup Label="Configuration" Condition="'$(VisualStudioVersion)' == '11.0'"> +++ <PlatformToolset>v110_xp</PlatformToolset> +++ </PropertyGroup> +++ <PropertyGroup Label="Configuration" Condition="'$(VisualStudioVersion)' == '12.0'"> +++ <PlatformToolset>v120_xp</PlatformToolset> +++ </PropertyGroup> +++ <PropertyGroup Label="Configuration" Condition="'$(VisualStudioVersion)' == '14.0'"> +++ <PlatformToolset>v140_xp</PlatformToolset> +++ </PropertyGroup> +++ <PropertyGroup Condition="'$(Configuration)'=='Debug' Or '$(Configuration)'=='DebugRelease'" Label="Configuration"> +++ <ConfigurationType>DynamicLibrary</ConfigurationType> +++ <UseDebugLibraries>true</UseDebugLibraries> +++ <CharacterSet>Unicode</CharacterSet> +++ </PropertyGroup> +++ <PropertyGroup Condition="'$(Configuration)'=='Release'" Label="Configuration"> +++ <ConfigurationType>DynamicLibrary</ConfigurationType> +++ <UseDebugLibraries>false</UseDebugLibraries> +++ <WholeProgramOptimization>true</WholeProgramOptimization> +++ <CharacterSet>Unicode</CharacterSet> +++ </PropertyGroup> +++ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> +++ <ImportGroup Label="ExtensionSettings"> +++ </ImportGroup> +++ <ImportGroup Label="PropertySheets"> +++ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> +++ </ImportGroup> +++ <PropertyGroup Label="UserMacros" /> +++ <PropertyGroup Condition="'$(Configuration)'=='Debug' Or '$(Configuration)'=='DebugRelease'"> +++ <LinkIncremental>true</LinkIncremental> +++ <OutDir>$(SolutionDir)bin_$(PlatformName)d\</OutDir> +++ <IntDir>$(SolutionDir)bin_$(PlatformName)d\$(ProjectName)\</IntDir> +++ </PropertyGroup> +++ <PropertyGroup Condition="'$(Configuration)'=='Release'"> +++ <LinkIncremental>false</LinkIncremental> +++ <OutDir>$(SolutionDir)bin_$(PlatformName)\$(ProjectName)\</OutDir> +++ <IntDir>$(SolutionDir)bin_$(PlatformName)\$(ProjectName)\</IntDir> +++ </PropertyGroup> +++ <ItemDefinitionGroup Condition="'$(Configuration)'=='Debug' Or '$(Configuration)'=='DebugRelease'"> +++ <ClCompile> +++ <PrecompiledHeader> +++ </PrecompiledHeader> +++ <WarningLevel>Level3</WarningLevel> +++ <Optimization>Disabled</Optimization> +++ <PreprocessorDefinitions>HAVE_CONFIG_H;WIN32;_DEBUG;_WINDOWS;_USRDLL;LIBBLURAY_EXPORTS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions> +++ <PreprocessorDefinitions Condition="'$(VisualStudioVersion)' == '12.0'">__STDC_FORMAT_MACROS;%(PreprocessorDefinitions)</PreprocessorDefinitions> +++ <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)includes;$(ProjectDir)src;$(ProjectDir)src\libbluray;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> +++ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary> +++ <CompileAs Condition="'$(VisualStudioVersion)' == '12.0'">CompileAsCpp</CompileAs> +++ </ClCompile> +++ <Link> +++ <SubSystem>Windows</SubSystem> +++ <GenerateDebugInformation>true</GenerateDebugInformation> +++ <ModuleDefinitionFile>libbluray.def</ModuleDefinitionFile> +++ </Link> +++ <PostBuildEvent> +++ <Command>xcopy /I /Y "$(OutDir)$(TargetName).lib" "$(OutDir)lib\"</Command> +++ <Message>Copy .lib into library path</Message> +++ </PostBuildEvent> +++ </ItemDefinitionGroup> +++ <ItemDefinitionGroup Condition="'$(Configuration)'=='DebugRelease'"> +++ <ClCompile> +++ <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary> +++ </ClCompile> +++ </ItemDefinitionGroup> +++ <ItemDefinitionGroup Condition="'$(Configuration)'=='Release'"> +++ <ClCompile> +++ <WarningLevel>Level3</WarningLevel> +++ <Optimization>MaxSpeed</Optimization> +++ <FunctionLevelLinking>true</FunctionLevelLinking> +++ <IntrinsicFunctions>true</IntrinsicFunctions> +++ <PreprocessorDefinitions>HAVE_CONFIG_H;WIN32;NDEBUG;_WINDOWS;_USRDLL;LIBBLURAY_EXPORTS;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions> +++ <PreprocessorDefinitions Condition="'$(VisualStudioVersion)' == '12.0'">__STDC_FORMAT_MACROS;%(PreprocessorDefinitions)</PreprocessorDefinitions> +++ <AdditionalIncludeDirectories>$(ProjectDir);$(ProjectDir)includes;$(ProjectDir)src;$(ProjectDir)src\libbluray;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> +++ <RuntimeLibrary>MultiThreaded</RuntimeLibrary> +++ <EnableEnhancedInstructionSet Condition="'$(Platform)'=='Win32'">StreamingSIMDExtensions</EnableEnhancedInstructionSet> +++ <CompileAs Condition="'$(VisualStudioVersion)' == '12.0'">CompileAsCpp</CompileAs> +++ </ClCompile> +++ <Link> +++ <SubSystem>Windows</SubSystem> +++ <GenerateDebugInformation>true</GenerateDebugInformation> +++ <EnableCOMDATFolding>true</EnableCOMDATFolding> +++ <OptimizeReferences>true</OptimizeReferences> +++ <ModuleDefinitionFile>libbluray.def</ModuleDefinitionFile> +++ <SetChecksum>true</SetChecksum> +++ </Link> +++ <PostBuildEvent> +++ <Command>xcopy /I /Y "$(TargetDir)$(TargetName)$(TargetExt)" "$(OutDir)..\" +++xcopy /I /Y "$(TargetDir)$(TargetName).lib" "$(OutDir)..\lib\"</Command> +++ <Message>Copy .dll/.lib into library path</Message> +++ </PostBuildEvent> +++ </ItemDefinitionGroup> +++ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> +++ <ImportGroup Label="ExtensionTargets"> +++ </ImportGroup> +++</Project> ++\ No newline at end of file ++diff --git a/libbluray.vcxproj.filters b/libbluray.vcxproj.filters ++new file mode 100644 ++index 0000000..57ff16c ++--- /dev/null +++++ b/libbluray.vcxproj.filters ++@@ -0,0 +1,353 @@ +++<?xml version="1.0" encoding="utf-8"?> +++<Project ToolsVersion="12.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> +++ <ItemGroup> +++ <Filter Include="Source Files"> +++ <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier> +++ <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions> +++ </Filter> +++ <Filter Include="Header Files"> +++ <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier> +++ <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions> +++ </Filter> +++ <Filter Include="Resource Files"> +++ <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier> +++ <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions> +++ </Filter> +++ <Filter Include="Source Files\util"> +++ <UniqueIdentifier>{1ab0e905-7c04-4090-b385-6363dd1c961c}</UniqueIdentifier> +++ </Filter> +++ <Filter Include="Header Files\util"> +++ <UniqueIdentifier>{b8fa3348-a089-461f-9ef5-3d9df997b8e5}</UniqueIdentifier> +++ </Filter> +++ <Filter Include="Header Files\file"> +++ <UniqueIdentifier>{125333e9-0b5e-45f9-a444-f0aaaf547d9b}</UniqueIdentifier> +++ </Filter> +++ <Filter Include="Source Files\file"> +++ <UniqueIdentifier>{a743058f-f07a-4d0f-bab6-02dc57defda9}</UniqueIdentifier> +++ </Filter> +++ <Filter Include="Source Files\libbluray"> +++ <UniqueIdentifier>{7dacc7c4-ef59-452b-9e5b-392c9df07c98}</UniqueIdentifier> +++ </Filter> +++ <Filter Include="Header Files\libbluray"> +++ <UniqueIdentifier>{c7895c81-c186-4d5e-a8ff-645c6d55a731}</UniqueIdentifier> +++ </Filter> +++ <Filter Include="Header Files\libbluray\bdnav"> +++ <UniqueIdentifier>{c8619466-211b-4c85-9d30-d1b1a822d32e}</UniqueIdentifier> +++ </Filter> +++ <Filter Include="Source Files\libbluray\bdnav"> +++ <UniqueIdentifier>{8afb6919-994f-4d1f-9638-ce4a06d0b473}</UniqueIdentifier> +++ </Filter> +++ <Filter Include="Source Files\libbluray\decoders"> +++ <UniqueIdentifier>{0e9086a7-eebf-4b8e-a4fe-b1724d148877}</UniqueIdentifier> +++ </Filter> +++ <Filter Include="Header Files\libbluray\decoders"> +++ <UniqueIdentifier>{fc5e776b-0f32-493a-b823-240288288502}</UniqueIdentifier> +++ </Filter> +++ <Filter Include="Header Files\libbluray\hdmv"> +++ <UniqueIdentifier>{96d2d786-cd45-4856-937d-9e6f85ced241}</UniqueIdentifier> +++ </Filter> +++ <Filter Include="Source Files\libbluray\hdmv"> +++ <UniqueIdentifier>{9f4ea4ae-217a-4d97-a5f3-e561ce1e49cd}</UniqueIdentifier> +++ </Filter> +++ <Filter Include="Source Files\libbluray\disc"> +++ <UniqueIdentifier>{09e1b1b8-3aa3-4918-b157-3dfc0554ccbb}</UniqueIdentifier> +++ </Filter> +++ <Filter Include="Header Files\libbluray\disc"> +++ <UniqueIdentifier>{1e02e503-752e-4765-9dfb-8cc67a7b79f8}</UniqueIdentifier> +++ </Filter> +++ </ItemGroup> +++ <ItemGroup> +++ <ClInclude Include="src\util\attributes.h"> +++ <Filter>Header Files\util</Filter> +++ </ClInclude> +++ <ClInclude Include="src\util\strutl.h"> +++ <Filter>Header Files\util</Filter> +++ </ClInclude> +++ <ClInclude Include="src\util\bits.h"> +++ <Filter>Header Files\util</Filter> +++ </ClInclude> +++ <ClInclude Include="src\util\log_control.h"> +++ <Filter>Header Files\util</Filter> +++ </ClInclude> +++ <ClInclude Include="src\util\logging.h"> +++ <Filter>Header Files\util</Filter> +++ </ClInclude> +++ <ClInclude Include="src\util\macro.h"> +++ <Filter>Header Files\util</Filter> +++ </ClInclude> +++ <ClInclude Include="src\util\mutex.h"> +++ <Filter>Header Files\util</Filter> +++ </ClInclude> +++ <ClInclude Include="src\file\dl.h"> +++ <Filter>Header Files\file</Filter> +++ </ClInclude> +++ <ClInclude Include="src\file\filesystem.h"> +++ <Filter>Header Files\file</Filter> +++ </ClInclude> +++ <ClInclude Include="src\file\file.h"> +++ <Filter>Header Files\file</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\bluray.h"> +++ <Filter>Header Files\libbluray</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\register.h"> +++ <Filter>Header Files\libbluray</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\keys.h"> +++ <Filter>Header Files\libbluray</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\bdnav\bdparse.h"> +++ <Filter>Header Files\libbluray\bdnav</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\bdnav\uo_mask_table.h"> +++ <Filter>Header Files\libbluray\bdnav</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\bdnav\clpi_parse.h"> +++ <Filter>Header Files\libbluray\bdnav</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\bdnav\index_parse.h"> +++ <Filter>Header Files\libbluray\bdnav</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\bdnav\meta_data.h"> +++ <Filter>Header Files\libbluray\bdnav</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\bdnav\meta_parse.h"> +++ <Filter>Header Files\libbluray\bdnav</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\bdnav\mpls_parse.h"> +++ <Filter>Header Files\libbluray\bdnav</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\bdnav\navigation.h"> +++ <Filter>Header Files\libbluray\bdnav</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\bdnav\sound_parse.h"> +++ <Filter>Header Files\libbluray\bdnav</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\decoders\graphics_controller.h"> +++ <Filter>Header Files\libbluray\decoders</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\decoders\pg_decode.h"> +++ <Filter>Header Files\libbluray\decoders</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\decoders\graphics_processor.h"> +++ <Filter>Header Files\libbluray\decoders</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\decoders\ig.h"> +++ <Filter>Header Files\libbluray\decoders</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\decoders\ig_decode.h"> +++ <Filter>Header Files\libbluray\decoders</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\decoders\m2ts_demux.h"> +++ <Filter>Header Files\libbluray\decoders</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\decoders\overlay.h"> +++ <Filter>Header Files\libbluray\decoders</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\decoders\pes_buffer.h"> +++ <Filter>Header Files\libbluray\decoders</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\decoders\pg.h"> +++ <Filter>Header Files\libbluray\decoders</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\hdmv\hdmv_insn.h"> +++ <Filter>Header Files\libbluray\hdmv</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\hdmv\mobj_parse.h"> +++ <Filter>Header Files\libbluray\hdmv</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\hdmv\hdmv_vm.h"> +++ <Filter>Header Files\libbluray\hdmv</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\bdnav\bdid_parse.h"> +++ <Filter>Header Files\libbluray\bdnav</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\bdnav\clpi_data.h"> +++ <Filter>Header Files\libbluray\bdnav</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\bdnav\extdata_parse.h"> +++ <Filter>Header Files\libbluray\bdnav</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\decoders\textst.h"> +++ <Filter>Header Files\libbluray\decoders</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\decoders\textst_render.h"> +++ <Filter>Header Files\libbluray\decoders</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\decoders\textst_decode.h"> +++ <Filter>Header Files\libbluray\decoders</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\decoders\rle.h"> +++ <Filter>Header Files\libbluray\decoders</Filter> +++ </ClInclude> +++ <ClInclude Include="src\util\refcnt.h"> +++ <Filter>Header Files\util</Filter> +++ </ClInclude> +++ <ClInclude Include="src\util\time.h"> +++ <Filter>Header Files\util</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\bluray_internal.h"> +++ <Filter>Header Files\libbluray</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\decoders\m2ts_filter.h"> +++ <Filter>Header Files\libbluray\decoders</Filter> +++ </ClInclude> +++ <ClInclude Include="src\file\dirs.h"> +++ <Filter>Header Files\file</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\bluray-version.h"> +++ <Filter>Header Files\libbluray</Filter> +++ </ClInclude> +++ <ClInclude Include="src\util\array.h"> +++ <Filter>Header Files\util</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\disc\aacs.h"> +++ <Filter>Header Files\libbluray\disc</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\disc\bdplus.h"> +++ <Filter>Header Files\libbluray\disc</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\disc\dec.h"> +++ <Filter>Header Files\libbluray\disc</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\disc\disc.h"> +++ <Filter>Header Files\libbluray\disc</Filter> +++ </ClInclude> +++ <ClInclude Include="src\libbluray\disc\enc_info.h"> +++ <Filter>Header Files\libbluray\disc</Filter> +++ </ClInclude> +++ <ClInclude Include="src\file\mount.h"> +++ <Filter>Header Files\file</Filter> +++ </ClInclude> +++ </ItemGroup> +++ <ItemGroup> +++ <ClCompile Include="src\util\logging.c"> +++ <Filter>Source Files\util</Filter> +++ </ClCompile> +++ <ClCompile Include="src\util\strutl.c"> +++ <Filter>Source Files\util</Filter> +++ </ClCompile> +++ <ClCompile Include="src\file\filesystem.c"> +++ <Filter>Source Files\file</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\bluray.c"> +++ <Filter>Source Files\libbluray</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\register.c"> +++ <Filter>Source Files\libbluray</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\bdnav\clpi_parse.c"> +++ <Filter>Source Files\libbluray\bdnav</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\bdnav\sound_parse.c"> +++ <Filter>Source Files\libbluray\bdnav</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\bdnav\index_parse.c"> +++ <Filter>Source Files\libbluray\bdnav</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\bdnav\meta_parse.c"> +++ <Filter>Source Files\libbluray\bdnav</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\bdnav\mpls_parse.c"> +++ <Filter>Source Files\libbluray\bdnav</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\bdnav\navigation.c"> +++ <Filter>Source Files\libbluray\bdnav</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\decoders\graphics_controller.c"> +++ <Filter>Source Files\libbluray\decoders</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\decoders\pg_decode.c"> +++ <Filter>Source Files\libbluray\decoders</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\decoders\graphics_processor.c"> +++ <Filter>Source Files\libbluray\decoders</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\decoders\ig_decode.c"> +++ <Filter>Source Files\libbluray\decoders</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\decoders\m2ts_demux.c"> +++ <Filter>Source Files\libbluray\decoders</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\decoders\pes_buffer.c"> +++ <Filter>Source Files\libbluray\decoders</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\hdmv\hdmv_vm.c"> +++ <Filter>Source Files\libbluray\hdmv</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\hdmv\mobj_print.c"> +++ <Filter>Source Files\libbluray\hdmv</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\hdmv\mobj_parse.c"> +++ <Filter>Source Files\libbluray\hdmv</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\bdnav\bdid_parse.c"> +++ <Filter>Source Files\libbluray\bdnav</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\bdnav\extdata_parse.c"> +++ <Filter>Source Files\libbluray\bdnav</Filter> +++ </ClCompile> +++ <ClCompile Include="src\util\bits.c"> +++ <Filter>Source Files\util</Filter> +++ </ClCompile> +++ <ClCompile Include="src\file\dir_win32.c"> +++ <Filter>Source Files\file</Filter> +++ </ClCompile> +++ <ClCompile Include="src\file\dirs_win32.c"> +++ <Filter>Source Files\file</Filter> +++ </ClCompile> +++ <ClCompile Include="src\file\dl_win32.c"> +++ <Filter>Source Files\file</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\decoders\textst_decode.c"> +++ <Filter>Source Files\libbluray\decoders</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\decoders\textst_render.c"> +++ <Filter>Source Files\libbluray\decoders</Filter> +++ </ClCompile> +++ <ClCompile Include="src\util\refcnt.c"> +++ <Filter>Source Files\util</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\decoders\m2ts_filter.c"> +++ <Filter>Source Files\libbluray\decoders</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\decoders\rle.c"> +++ <Filter>Source Files\libbluray\decoders</Filter> +++ </ClCompile> +++ <ClCompile Include="src\file\file_win32.c"> +++ <Filter>Source Files\file</Filter> +++ </ClCompile> +++ <ClCompile Include="src\file\file.c"> +++ <Filter>Source Files\file</Filter> +++ </ClCompile> +++ <ClCompile Include="src\util\array.c"> +++ <Filter>Source Files\util</Filter> +++ </ClCompile> +++ <ClCompile Include="src\util\mutex.c"> +++ <Filter>Source Files\util</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\disc\aacs.c"> +++ <Filter>Source Files\libbluray\disc</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\disc\bdplus.c"> +++ <Filter>Source Files\libbluray\disc</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\disc\dec.c"> +++ <Filter>Source Files\libbluray\disc</Filter> +++ </ClCompile> +++ <ClCompile Include="src\libbluray\disc\disc.c"> +++ <Filter>Source Files\libbluray\disc</Filter> +++ </ClCompile> +++ <ClCompile Include="src\util\time.c"> +++ <Filter>Source Files\util</Filter> +++ </ClCompile> +++ <ClCompile Include="src\file\mount.c"> +++ <Filter>Source Files\file</Filter> +++ </ClCompile> +++ </ItemGroup> +++ <ItemGroup> +++ <None Include="libbluray.def"> +++ <Filter>Source Files</Filter> +++ </None> +++ </ItemGroup> +++</Project> ++\ No newline at end of file ++diff --git a/src/devtools/bdj_test.c b/src/devtools/bdj_test.c ++new file mode 100644 ++index 0000000..d9ebd16 ++--- /dev/null +++++ b/src/devtools/bdj_test.c ++@@ -0,0 +1,67 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2010 William Hahne +++ * +++ * This program is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU General Public License +++ * as published by the Free Software Foundation; either version 2 +++ * of the License, or (at your option) any later version. +++ * +++ * This program is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +++ * GNU General Public License for more details. +++ * +++ * You should have received a copy of the GNU General Public License +++ * along with this program. If not, see <http://www.gnu.org/licenses/>. +++ * +++ * In addition, as a special exception, the copyright holders of libbluray +++ * gives permission to link the code of its release of libbluray with the +++ * OpenSSL project's "OpenSSL" library (or with modified versions of it +++ * that use the same license as the "OpenSSL" library), and distribute +++ * the linked executables. You must obey the GNU General Public License +++ * in all respects for all of the code used other than "OpenSSL". If you +++ * modify this file, you may extend this exception to your version of the +++ * file, but you are not obligated to do so. If you do not wish to do +++ * so, delete this exception statement from your version. +++ */ +++ +++#include <stdio.h> +++#include <stdlib.h> +++#include <unistd.h> +++ +++#include "libbluray/bluray.h" +++ +++#if defined(_WIN32) +++#include <windows.h> +++#define sleep(x) Sleep(x) +++#endif +++ +++static void _usage(void) { +++ printf("Usage: [path to disc] [starting object]\n"); +++} +++ +++int main(int argc, char** argv) +++{ +++ if (argc < 3) { +++ _usage(); +++ return 0; +++ } +++ +++ printf("%s %s\n", argv[1], argv[2]); +++ +++ BLURAY* bd = bd_open(argv[1], NULL); +++ +++ bd_get_titles(bd, TITLES_ALL, 0); +++ +++ if (!bd_start_bdj(bd, argv[2])) { +++ printf("Failed to start BD-J application.\n"); +++ } else { +++ while (1) { sleep(20); } +++ bd_stop_bdj(bd); +++ } +++ +++ bd_close(bd); +++ +++ return 0; +++} ++diff --git a/src/devtools/bdjo_dump.c b/src/devtools/bdjo_dump.c ++new file mode 100644 ++index 0000000..c9c8141 ++--- /dev/null +++++ b/src/devtools/bdjo_dump.c ++@@ -0,0 +1,206 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2014 Petri Hintukainen <phintuka@users.sourceforge.net> +++ * +++ * This library is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * This library is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with this library. If not, see +++ * <http://www.gnu.org/licenses/>. +++ */ +++ +++#include "libbluray/bluray.h" +++#include "libbluray/bdj/bdjo_data.h" +++ +++#include <stdio.h> +++#include <string.h> +++ +++static const char *_yes_no(int i) +++{ +++ return i > 0 ? "yes" : i < 0 ? "unknown" : "no"; +++} +++ +++static const char *_binding_str(int i) +++{ +++ switch (i) { +++ case 0: return "unbound"; +++ case 1: return "disc bound"; +++ case 3: return "title bound"; +++ default: return "???"; +++ } +++} +++ +++static const char *_visibility_str(int i) +++{ +++ switch (i) { +++ case 0: return "none"; +++ case 1: return "applications"; +++ case 2: return "user"; +++ default: return "???"; +++ } +++} +++ +++static void _terminal_info_print(const BDJO_TERMINAL_INFO *p) +++{ +++ printf("Terminal Info:\n"); +++ printf(" Default AWT font : %s\n", p->default_font); +++ printf(" initial HaVi config : %d\n", p->initial_havi_config_id); +++ printf(" Menu call mask : %d\n", p->menu_call_mask); +++ printf(" Title search mask : %d\n", p->menu_call_mask); +++} +++ +++static void _app_cache_item_print(const BDJO_APP_CACHE_ITEM *p) +++{ +++ printf(" %3.3s: %s%s\n", +++ p->lang_code, p->ref_to_name, +++ p->type == 1 ? ".jar" : p->type == 2 ? "/" : " (unknown type)"); +++} +++ +++static void _app_cache_info_print(const BDJO_APP_CACHE_INFO *p) +++{ +++ unsigned ii; +++ +++ printf("Application cache info:\n"); +++ for (ii = 0; ii < p->num_item; ii++) { +++ _app_cache_item_print(&p->item[ii]); +++ } +++} +++ +++static void _accessible_playlists_print(const BDJO_ACCESSIBLE_PLAYLISTS *p) +++{ +++ unsigned ii; +++ +++ printf("Accessible playlists:\n"); +++ printf(" Access to all : %s\n", _yes_no(p->access_to_all_flag)); +++ printf(" Autostart first : %s\n", _yes_no(p->autostart_first_playlist_flag)); +++ +++ if (p->num_pl) { +++ printf(" Playlists : %d\n", p->num_pl); +++ for (ii = 0; ii < p->num_pl; ii++) { +++ printf(" %s.mpls\n", p->pl[ii].name); +++ } +++ } +++} +++ +++static void _app_profile_print(BDJO_APP_PROFILE *p) +++{ +++ printf(" Profile %d Version %d.%d.%d\n", +++ p->profile_number, p->major_version, p->minor_version, p->micro_version); +++} +++ +++static void _app_print(const BDJO_APP *p) +++{ +++ unsigned ii; +++ +++ printf(" Control code: : %d (%s)\n", p->control_code, +++ p->control_code == 1 ? "autostart" : p->control_code == 2 ? "present" : "???"); +++ printf(" Type : %d (%s)\n", p->type, +++ p->type == 1 ? "BD-J App" : "???"); +++ printf(" Organization ID : %08X\n", p->org_id); +++ printf(" Application ID : %04X\n", p->app_id); +++ printf(" Priority : %d\n", p->priority); +++ printf(" Binding : %d (%s)\n", p->binding, _binding_str(p->binding)); +++ printf(" Visibility : %d (%s)\n", p->visibility, _visibility_str(p->visibility)); +++ +++ if (p->num_profile) { +++ printf(" Profiles:\n"); +++ for (ii = 0; ii < p->num_profile; ii++) { +++ _app_profile_print(&p->profile[ii]); +++ } +++ } +++ +++ if (p->num_name) { +++ printf(" Names:\n"); +++ for (ii = 0; ii < p->num_name; ii++) { +++ printf(" %s: %s\n", p->name[ii].lang, p->name[ii].name); +++ } +++ } +++ +++ printf(" Base directory : %s\n", p->base_dir); +++ printf(" Icon locator : %s\n", p->icon_locator); +++ printf(" Icon flags : 0x%04x\n", p->icon_flags); +++ printf(" Classpath extension : %s\n", p->classpath_extension); +++ printf(" Initial class : %s\n", p->initial_class); +++ printf(" Parameters : "); +++ for (ii = 0; ii < p->num_param; ii++) { +++ printf("%s ", p->param[ii].param); +++ } +++ printf("\n"); +++} +++ +++static void _app_management_table_print(const BDJO_APP_MANAGEMENT_TABLE *p) +++{ +++ unsigned ii; +++ +++ for (ii = 0; ii < p->num_app; ii++) { +++ printf("Application %u:\n", ii); +++ _app_print(&p->app[ii]); +++ } +++} +++ +++static void _key_interest_table_print(const BDJO_KEY_INTEREST_TABLE *p) +++{ +++ unsigned int v; +++ memcpy(&v, p, sizeof(unsigned int)); +++ if (v) { +++ printf("Key interest table:\n"); +++ printf(" %s%s%s%s%s%s%s%s%s%s%s\n", +++ p->vk_play ? "VK_PLAY " : "", +++ p->vk_stop ? "VK_STOP " : "", +++ p->vk_ffw ? "VK_FFW " : "", +++ p->vk_rew ? "VK_REW " : "", +++ p->vk_track_next ? "VK_TRACK_NEXT " : "", +++ p->vk_track_prev ? "VK_TRACK_PREV " : "", +++ p->vk_pause ? "VK_PAUSE " : "", +++ p->vk_still_off ? "VK_STILL_OFF " : "", +++ p->vk_sec_audio_ena_dis ? "VK_SEC_AUDIO " : "", +++ p->vk_sec_video_ena_dis ? "VK_SEC_VIDEO " : "", +++ p->pg_textst_ena_dis ? "VK_PG_TEXTST " : ""); +++ } +++} +++ +++static void _file_access_info_print(const BDJO_FILE_ACCESS_INFO *p) +++{ +++ printf("File access info:\n %s\n", p->path); +++} +++ +++static void _bdjo_print(const BDJO *p) +++{ +++ _terminal_info_print(&p->terminal_info); +++ _app_cache_info_print(&p->app_cache_info); +++ _accessible_playlists_print(&p->accessible_playlists); +++ _app_management_table_print(&p->app_table); +++ _key_interest_table_print(&p->key_interest_table); +++ _file_access_info_print(&p->file_access_info); +++} +++ +++int main(int argc, const char *argv[]) +++{ +++ if (argc < 2) { +++ fprintf(stderr, "usage: %s <bdjo_file>\n", argv[0]); +++ return 1; +++ } +++ +++ int cnt; +++ for (cnt = 1; cnt < argc; cnt++) { +++ +++ printf("%s\n", argv[cnt]); +++ +++ BDJO *bdjo = bd_read_bdjo(argv[cnt]); +++ if (bdjo) { +++ _bdjo_print(bdjo); +++ bd_free_bdjo(bdjo); +++ } +++ printf("\n"); +++ } +++ +++ return 0; +++} ++diff --git a/src/devtools/clpi_dump.c b/src/devtools/clpi_dump.c ++new file mode 100644 ++index 0000000..bd64783 ++--- /dev/null +++++ b/src/devtools/clpi_dump.c ++@@ -0,0 +1,487 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2009-2010 John Stebbins +++ * Copyright (C) 2012-2013 Petri Hintukainen <phintuka@users.sourceforge.net> +++ * +++ * This library is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * This library is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with this library. If not, see +++ * <http://www.gnu.org/licenses/>. +++ */ +++ +++#include <stdio.h> +++#include <stdlib.h> +++#include <unistd.h> +++#include <inttypes.h> +++ +++#include "libbluray/bdnav/clpi_data.h" +++#include "libbluray/bluray.h" +++ +++#include "util.h" +++ +++static int verbose; +++ +++typedef struct { +++ int value; +++ const char *str; +++} VALUE_MAP; +++ +++static inline const char* +++_lookup_str(const VALUE_MAP *map, int val) +++{ +++ int ii; +++ +++ for (ii = 0; map[ii].str; ii++) { +++ if (val == map[ii].value) { +++ return map[ii].str; +++ } +++ } +++ return "?"; +++} +++ +++const VALUE_MAP codec_map[] = { +++ {0x01, "MPEG-1 Video"}, +++ {0x02, "MPEG-2 Video"}, +++ {0x03, "MPEG-1 Audio"}, +++ {0x04, "MPEG-2 Audio"}, +++ {0x80, "LPCM"}, +++ {0x81, "AC-3"}, +++ {0x82, "DTS"}, +++ {0x83, "TrueHD"}, +++ {0x84, "AC-3 Plus"}, +++ {0x85, "DTS-HD"}, +++ {0x86, "DTS-HD Master"}, +++ {0xa1, "AC-3 Plus for secondary audio"}, +++ {0xa2, "DTS-HD for secondary audio"}, +++ {0xea, "VC-1"}, +++ {0x1b, "H.264"}, +++ {0x20, "H.264 MVC dep."}, +++ {0x90, "Presentation Graphics"}, +++ {0x91, "Presentation Graphics"}, +++ {0x92, "Interactive Graphics"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP video_format_map[] = { +++ {0, "Reserved"}, +++ {1, "480i"}, +++ {2, "576i"}, +++ {3, "480p"}, +++ {4, "1080i"}, +++ {5, "720p"}, +++ {6, "1080p"}, +++ {7, "576p"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP video_rate_map[] = { +++ {0, "Reserved1"}, +++ {1, "23.976"}, +++ {2, "24"}, +++ {3, "25"}, +++ {4, "29.97"}, +++ {5, "Reserved2"}, +++ {6, "50"}, +++ {7, "59.94"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP video_aspect_map[] = { +++ {0, "Reserved1"}, +++ {1, "Reserved2"}, +++ {2, "4:3"}, +++ {3, "16:9"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP audio_format_map[] = { +++ {0, "Reserved1"}, +++ {1, "Mono"}, +++ {2, "Reserved2"}, +++ {3, "Stereo"}, +++ {4, "Reserved3"}, +++ {5, "Reserved4"}, +++ {6, "Multi Channel"}, +++ {12, "Combo"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP audio_rate_map[] = { +++ {0, "Reserved1"}, +++ {1, "48 Khz"}, +++ {2, "Reserved2"}, +++ {3, "Reserved3"}, +++ {4, "96 Khz"}, +++ {5, "192 Khz"}, +++ {12, "48/192 Khz"}, +++ {14, "48/96 Khz"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP application_type_map[] = { +++ {1, "Main TS for a main-path of Movie"}, +++ {2, "Main TS for a main-path of Time based slide show"}, +++ {3, "Main TS for a main-path of Browsable slide show"}, +++ {4, "Sub TS for a sub-path of Browsable slide show"}, +++ {5, "Sub TS for a sub-path of Interactive Graphics menu"}, +++ {6, "Sub TS for a sub-path of Text subtitle"}, +++ {7, "Sub TS for a sub-path of one or more elementary streams path"}, +++ {0, NULL}, +++}; +++ +++static void +++_show_stream(CLPI_PROG_STREAM *ss, int level) +++{ +++ indent_printf(level, "Codec (%04x): %s", ss->coding_type, +++ _lookup_str(codec_map, ss->coding_type)); +++ indent_printf(level, "PID: %04x", ss->pid); +++ switch (ss->coding_type) { +++ case 0x01: +++ case 0x02: +++ case 0xea: +++ case 0x1b: +++ case 0x20: +++ indent_printf(level, "Format %02x: %s", ss->format, +++ _lookup_str(video_format_map, ss->format)); +++ indent_printf(level, "Rate %02x: %s", ss->rate, +++ _lookup_str(video_rate_map, ss->rate)); +++ indent_printf(level, "Aspect %02x: %s", ss->aspect, +++ _lookup_str(video_aspect_map, ss->aspect)); +++ indent_printf(level, "oc_flag %02x", ss->oc_flag); +++ break; +++ +++ case 0x03: +++ case 0x04: +++ case 0x80: +++ case 0x81: +++ case 0x82: +++ case 0x83: +++ case 0x84: +++ case 0x85: +++ case 0x86: +++ case 0xa1: +++ case 0xa2: +++ indent_printf(level, "Format %02x: %s", ss->format, +++ _lookup_str(audio_format_map, ss->format)); +++ indent_printf(level, "Rate %02x: %s", ss->rate, +++ _lookup_str(audio_rate_map, ss->rate)); +++ indent_printf(level, "Language: %s", ss->lang); +++ break; +++ +++ case 0x90: +++ case 0x91: +++ case 0xa0: +++ indent_printf(level, "Language: %s", ss->lang); +++ break; +++ +++ case 0x92: +++ indent_printf(level, "Char Code: %02x", ss->char_code); +++ indent_printf(level, "Language: %s", ss->lang); +++ break; +++ +++ default: +++ fprintf(stderr, "unrecognized coding type %02x\n", ss->coding_type); +++ break; +++ }; +++} +++ +++static void +++_show_clip_info(CLPI_CL *cl, int level) +++{ +++ CLPI_CLIP_INFO *ci = &cl->clip; +++ int ii; +++ +++ indent_printf(level, "Clip Info"); +++ indent_printf(level+1, "Clip Stream Type: %02x", ci->clip_stream_type); +++ indent_printf(level+1, "Clip Application Type (%02x): %s", +++ ci->application_type, _lookup_str(application_type_map, ci->application_type)); +++ indent_printf(level+1, "is_ATC_delta: %s", ci->is_atc_delta ? "True" : "False"); +++ indent_printf(level+1, "ATC delta count: %d", ci->atc_delta_count); +++ indent_printf(level+1, "TS Recording Rate: %u", ci->ts_recording_rate); +++ indent_printf(level+1, "Number Source Packets: %u", ci->num_source_packets); +++ // Show ts type info +++ indent_printf(level+1, "TS Type Info"); +++ indent_printf(level+2, "Validity Flags %02x", ci->ts_type_info.validity); +++ indent_printf(level+2, "Format Id %s", ci->ts_type_info.format_id); +++ // Show cc5 thing +++ for (ii = 0; ii < ci->atc_delta_count; ii++) { +++ indent_printf(level+1, "ATC delta[ %d ]", ii); +++ indent_printf(level+2, "Delta %08x", ci->atc_delta[ii].delta); +++ indent_printf(level+2, "File Id %s", ci->atc_delta[ii].file_id); +++ indent_printf(level+2, "File Code %s", ci->atc_delta[ii].file_code); +++ } +++ // show fonts +++ if (cl->font_info.font_count) { +++ indent_printf(level+1, "Font files"); +++ for (ii = 0; ii < cl->font_info.font_count; ii++) { +++ indent_printf(level+2, "Font file %d: %s.otf", ii+1, cl->font_info.font[ii].file_id); +++ } +++ } +++ +++ printf("\n"); +++} +++ +++static void +++_show_seq_info(CLPI_SEQ_INFO *si, int level) +++{ +++ CLPI_ATC_SEQ *atc; +++ CLPI_STC_SEQ *stc; +++ int ii, jj; +++ +++ indent_printf(level, "Sequence Info"); +++ indent_printf(level+1, "Number ATC Sequences: %d", si->num_atc_seq); +++ for (ii = 0; ii < si->num_atc_seq; ii++) { +++ atc = &si->atc_seq[ii]; +++ indent_printf(level+1, "ATC Sequence %d", ii); +++ indent_printf(level+2, "SPN ATC Start: %u", atc->spn_atc_start); +++ indent_printf(level+2, "Offset STC Id: %d", atc->offset_stc_id); +++ indent_printf(level+2, "Number STC Sequences: %d", atc->num_stc_seq); +++ for (jj = 0; jj < atc->num_stc_seq; jj++) { +++ stc = &atc->stc_seq[jj]; +++ indent_printf(level+2, "ATC Sequence %d", jj); +++ indent_printf(level+3, "SPN STC Start: %u", stc->spn_stc_start); +++ indent_printf(level+3, "PCR PID: %04x", stc->pcr_pid); +++ indent_printf(level+3, "Presentation Start: %u", +++ stc->presentation_start_time); +++ indent_printf(level+3, "Presentation End: %u", +++ stc->presentation_end_time); +++ } +++ } +++} +++ +++static void +++_show_prog_info(CLPI_PROG_INFO *pi, int level) +++{ +++ CLPI_PROG *prog; +++ int ii, jj; +++ +++ indent_printf(level, "Program Info"); +++ indent_printf(level+1, "Number Programs: %d", pi->num_prog); +++ for (ii = 0; ii < pi->num_prog; ii++) { +++ prog = &pi->progs[ii]; +++ indent_printf(level+1, "Program %d", ii); +++ indent_printf(level+2, "SPN Program Sequence Start: %d", +++ prog->spn_program_sequence_start); +++ indent_printf(level+2, "Program Map PID: %d", prog->program_map_pid); +++ indent_printf(level+2, "Number Streams: %d", prog->num_streams); +++ indent_printf(level+2, "Number Groups: %d", prog->num_groups); +++ for (jj = 0; jj < prog->num_streams; jj++) { +++ indent_printf(level+2, "Stream %d", jj); +++ _show_stream(&prog->streams[jj], level+3); +++ } +++ } +++} +++ +++static void +++_show_extent_start(CLPI_EXTENT_START *es, int level) +++{ +++ unsigned int ii; +++ +++ indent_printf(level, "Extension data: Extent Start Point"); +++ +++ if (!es->num_point) { +++ indent_printf(level+1, "(no data)"); +++ +++ } else { +++ indent_printf(level+1, "Number of Start Points: %d", es->num_point); +++ +++ if (verbose) { +++ for (ii = 0; ii < es->num_point; ii++) { +++ indent_printf(level+1, "Extent %5d: SPN 0x%08X", ii, es->point[ii]); +++ } +++ } +++ } +++} +++ +++static void +++_show_cpi_info(CLPI_CPI *cpi, int level) +++{ +++ CLPI_EP_MAP_ENTRY *entry; +++ CLPI_EP_COARSE *coarse; +++ CLPI_EP_FINE *fine; +++ int ii, jj, kk; +++ +++ indent_printf(level, "CPI"); +++ indent_printf(level+1, "Number Stream PID: %d", cpi->num_stream_pid); +++ for (ii = 0; ii < cpi->num_stream_pid; ii++) { +++ entry = &cpi->entry[ii]; +++ indent_printf(level+1, "Stream: %d", ii); +++ indent_printf(level+2, "PID: %04x", entry->pid); +++ indent_printf(level+2, "EP Stream Type: %d", entry->ep_stream_type); +++ indent_printf(level+2, "Number EP Coarse: %d", entry->num_ep_coarse); +++ indent_printf(level+2, "Number EP Fine: %d", entry->num_ep_fine); +++ indent_printf(level+2, "EP Map Start: %d", +++ entry->ep_map_stream_start_addr); +++ for (jj = 0; jj < entry->num_ep_coarse; jj++) { +++ coarse = &entry->coarse[jj]; +++ indent_printf(level+2, "Coarse: %d", jj); +++ indent_printf(level+3, "Ref EP Fine: %d", coarse->ref_ep_fine_id); +++ indent_printf(level+3, "PTS EP: %d", coarse->pts_ep); +++ indent_printf(level+3, "SPN EP: %d", coarse->spn_ep); +++ } +++ for (jj = 0; jj < entry->num_ep_fine; jj++) { +++ fine = &entry->fine[jj]; +++ indent_printf(level+2, "Fine: %d", jj); +++ indent_printf(level+3, "Angle Change Point: %s", +++ fine->is_angle_change_point ? "True":"False"); +++ indent_printf(level+3, "I End Offset: %d", +++ fine->i_end_position_offset); +++ indent_printf(level+3, "PTS EP: %d", fine->pts_ep); +++ indent_printf(level+3, "SPN EP: %d", fine->spn_ep); +++ } +++ if (verbose) { +++ uint64_t pts; +++ uint32_t spn; +++ +++ indent_printf(level+2, "PTS - SPN Map"); +++ for (jj = 0; jj < entry->num_ep_coarse; jj++) { +++ int start, end; +++ +++ indent_printf(level+3, "Coarse: %d", jj); +++ coarse = &entry->coarse[jj]; +++ start = coarse->ref_ep_fine_id; +++ if (jj < entry->num_ep_coarse - 1) { +++ end = entry->coarse[jj+1].ref_ep_fine_id; +++ } else { +++ end = entry->num_ep_fine; +++ } +++ for (kk = start; kk < end; kk++) { +++ fine = &entry->fine[kk]; +++ pts = ((uint64_t) (coarse->pts_ep & ~0x01) << 19) + +++ ((uint64_t)fine->pts_ep << 9); +++ spn = (coarse->spn_ep & ~0x1FFFF) + fine->spn_ep; +++ indent_printf(level+4, "PTS %8"PRIu64"/%8"PRIu64" -- SPN %u", +++ pts, pts >> 1, spn); +++ } +++ } +++ } +++ } +++} +++ +++ +++static void +++_usage(char *cmd) +++{ +++ fprintf(stderr, +++"Usage: %s -vcspi <clpi file> [<clpi file> ...]\n" +++"With no options, produces no output (not very useful)\n" +++"Options:\n" +++" v - Verbose output.\n" +++" c - Shows the Clip Info structure\n" +++" s - Shows the Sequence Info structure\n" +++" p - Shows the Program Info structure\n" +++" i - Shows the CPI. PTS to SPN map\n" +++" e - Shows Extent Start Table\n" +++, cmd); +++ +++ exit(EXIT_FAILURE); +++} +++ +++#define OPTS "vcspie" +++ +++int +++main(int argc, char *argv[]) +++{ +++ CLPI_CL *cl; +++ int opt; +++ int opt_clip_info = 0, opt_seq_info = 0, opt_prog_info = 0; +++ int opt_cpi_info = 0, opt_extent_start = 0; +++ int ii; +++ +++ do { +++ opt = getopt(argc, argv, OPTS); +++ switch (opt) { +++ case -1: break; +++ +++ case 'v': +++ verbose = 1; +++ break; +++ +++ case 's': +++ opt_seq_info = 1; +++ break; +++ +++ case 'i': +++ opt_cpi_info = 1; +++ break; +++ +++ case 'c': +++ opt_clip_info = 1; +++ break; +++ +++ case 'p': +++ opt_prog_info = 1; +++ break; +++ +++ case 'e': +++ opt_extent_start = 1; +++ break; +++ +++ default: +++ _usage(argv[0]); +++ break; +++ } +++ } while (opt != -1); +++ +++ if (optind >= argc) { +++ _usage(argv[0]); +++ } +++ +++ for (ii = optind; ii < argc; ii++) { +++ cl = bd_read_clpi(argv[ii]); +++ if (cl == NULL) { +++ fprintf(stderr, "Parsing %s failed\n", argv[ii]); +++ continue; +++ } +++ if (opt_clip_info) { +++ // Show clip info +++ _show_clip_info(cl, 1); +++ } +++ if (opt_seq_info) { +++ // Show sequence info +++ _show_seq_info(&cl->sequence, 1); +++ } +++ if (opt_prog_info) { +++ // Show program info +++ _show_prog_info(&cl->program, 1); +++ } +++ if (opt_cpi_info) { +++ // Show cpi +++ _show_cpi_info(&cl->cpi, 1); +++ } +++ +++ if (opt_prog_info) { +++ if (cl->program_ss.num_prog) { +++ printf("\n"); +++ indent_printf(1, "Extension: Program Info SS"); +++ _show_prog_info(&cl->program_ss, 1); +++ } +++ } +++ if (opt_cpi_info) { +++ if (cl->program_ss.num_prog) { +++ printf("\n"); +++ indent_printf(1, "Extension: CPI SS"); +++ _show_cpi_info(&cl->cpi_ss, 1); +++ } +++ } +++ if (opt_extent_start) { +++ // Show extent start point +++ if (cl->extent_start.num_point > 0) { +++ _show_extent_start(&cl->extent_start, 1); +++ } +++ } +++ +++ bd_free_clpi(cl); +++ } +++ return 0; +++} +++ ++diff --git a/src/devtools/hdmv_test.c b/src/devtools/hdmv_test.c ++new file mode 100644 ++index 0000000..585ed70 ++--- /dev/null +++++ b/src/devtools/hdmv_test.c ++@@ -0,0 +1,257 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2010 hpi1 +++ * +++ * This library is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * This library is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with this library. If not, see +++ * <http://www.gnu.org/licenses/>. +++ */ +++ +++#include <stdio.h> +++#include <stdlib.h> +++#include <string.h> +++#include <inttypes.h> +++ +++#include "util/log_control.h" +++#include "libbluray/bluray.h" +++ +++static void _print_event(BD_EVENT *ev) +++{ +++ switch (ev->event) { +++ case BD_EVENT_NONE: +++ break; +++ case BD_EVENT_ERROR: +++ printf("EVENT_ERROR:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_READ_ERROR: +++ printf("EVENT_READ_ERROR:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_ENCRYPTED: +++ printf("EVENT_ENCRYPTED:\t%d\n", ev->param); +++ break; +++ +++ /* current playback position */ +++ +++ case BD_EVENT_ANGLE: +++ printf("EVENT_ANGLE:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_TITLE: +++ printf("EVENT_TITLE:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_PLAYLIST: +++ printf("EVENT_PLAYLIST:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_PLAYITEM: +++ printf("EVENT_PLAYITEM:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_CHAPTER: +++ printf("EVENT_CHAPTER:\t%d\n", ev->param); +++ break; +++ +++ /* */ +++ +++ case BD_EVENT_STILL: +++ printf("EVENT_STILL:\t%d\n", ev->param); +++ break; +++ +++ case BD_EVENT_SEEK: +++ printf("EVENT_SEEK:\t%d\n", ev->param); +++ break; +++ +++ case BD_EVENT_STILL_TIME: +++ if (ev->param) { +++ printf("EVENT_STILL_TIME:\t%d\n", ev->param); +++ } else { +++ printf("EVENT_STILL_TIME:\tinfinite\n"); +++ } +++ break; +++ +++ /* stream selection */ +++ +++ case BD_EVENT_AUDIO_STREAM: +++ printf("EVENT_AUDIO_STREAM:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_IG_STREAM: +++ printf("EVENT_IG_STREAM:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_PG_TEXTST_STREAM: +++ printf("EVENT_PG_TEXTST_STREAM:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_SECONDARY_AUDIO_STREAM: +++ printf("EVENT_SECONDARY_AUDIO_STREAM:\t%d\n", ev->param); +++ break; +++ case BD_EVENT_SECONDARY_VIDEO_STREAM: +++ printf("EVENT_SECONDARY_VIDEO_STREAM:\t%d\n", ev->param); +++ break; +++ +++ case BD_EVENT_PG_TEXTST: +++ printf("EVENT_PG_TEXTST:\t%s\n", ev->param ? "enable" : "disable"); +++ break; +++ case BD_EVENT_SECONDARY_AUDIO: +++ printf("EVENT_SECONDARY_AUDIO:\t%s\n", ev->param ? "enable" : "disable"); +++ break; +++ case BD_EVENT_SECONDARY_VIDEO: +++ printf("EVENT_SECONDARY_VIDEO:\t%s\n", ev->param ? "enable" : "disable"); +++ break; +++ case BD_EVENT_SECONDARY_VIDEO_SIZE: +++ printf("EVENT_SECONDARY_VIDEO_SIZE:\t%s\n", ev->param==0 ? "PIP" : "fullscreen"); +++ break; +++ +++ default: +++ printf("UNKNOWN EVENT %d:\t%d\n", ev->event, ev->param); +++ break; +++ } +++ +++ fflush(stdout); +++} +++ +++static void _read_to_eof(BLURAY *bd) +++{ +++ BD_EVENT ev; +++ int bytes; +++ uint64_t total = 0; +++ uint8_t buf[6144]; +++ +++ bd_seek(bd, bd_get_title_size(bd) - 6144); +++ +++ do { +++ bytes = bd_read_ext(bd, buf, 6144, &ev); +++ total += bytes < 0 ? 0 : bytes; +++ _print_event(&ev); +++ } while (bytes > 0); +++ +++ printf("_read_to_eof(): read %"PRIu64" bytes\n", total); +++} +++ +++static void _print_events(BLURAY *bd) +++{ +++ BD_EVENT ev; +++ +++ do { +++ bd_read_ext(bd, NULL, 0, &ev); +++ _print_event(&ev); +++ } while (ev.event != BD_EVENT_NONE && ev.event != BD_EVENT_ERROR); +++} +++ +++static void _play_pl(BLURAY *bd) +++{ +++ printf("Playing playlist\n"); +++ +++ fflush(stdout); +++ _read_to_eof(bd); +++ +++ printf("Playing playlist done\n\n"); +++ +++ _print_events(bd); +++ +++ printf("\n"); +++} +++ +++int main(int argc, char *argv[]) +++{ +++ int title = -1; +++ int verbose = 0; +++ int args = 0; +++ +++ /* +++ * parse arguments +++ */ +++ +++ if (argc < 2) { +++ printf("\nUsage:\n %s [-v] [-t <title>] <media_path> [<keyfile_path>]\n\n", argv[0]); +++ return -1; +++ } +++ +++ if (!strcmp(argv[1+args], "-v")) { +++ verbose = 1; +++ args++; +++ } +++ +++ if (!strcmp(argv[1+args], "-t")) { +++ args++; +++ title = atoi(argv[1+args]); +++ args++; +++ printf("Requested title %d\n", title); +++ } +++ +++ if (verbose) { +++ printf("Enabling verbose debug\n"); +++ bd_set_debug_mask(bd_get_debug_mask() | DBG_HDMV | DBG_BLURAY); +++ } +++ +++ printf("\n"); +++ +++ /* +++ * open and setup +++ */ +++ +++ BLURAY *bd = bd_open(argv[1+args], argv[2+args]); +++ +++ if (!bd) { +++ printf("bd_open(\'%s\') failed\n", argv[1]); +++ return -1; +++ } +++ +++ bd_set_player_setting (bd, BLURAY_PLAYER_SETTING_PARENTAL, 99); +++ bd_set_player_setting_str(bd, BLURAY_PLAYER_SETTING_AUDIO_LANG, "eng"); +++ bd_set_player_setting_str(bd, BLURAY_PLAYER_SETTING_PG_LANG, "eng"); +++ bd_set_player_setting_str(bd, BLURAY_PLAYER_SETTING_MENU_LANG, "eng"); +++ bd_set_player_setting_str(bd, BLURAY_PLAYER_SETTING_COUNTRY_CODE, NULL); +++ +++ /* +++ * play +++ */ +++ +++ printf("Running first play movie object\n"); +++ +++ fflush(stdout); +++ bd_play(bd); +++ +++ _print_events(bd); +++ +++ printf("\n"); +++ +++ /* +++ * play title +++ */ +++ +++ if (title >= 0) { +++ printf("Playing title %d\n", title); +++ +++ fflush(stdout); +++ bd_play_title(bd, title); +++ +++ _print_events(bd); +++ +++ printf("\n"); +++ } +++ +++ /* +++ * play playlist +++ */ +++ +++ _play_pl(bd); +++ +++ _play_pl(bd); +++ +++ _play_pl(bd); +++ +++ /* +++ * clean up +++ */ +++ +++ bd_close(bd); +++ +++ return 0; +++} +++ ++diff --git a/src/devtools/mobj_dump.c b/src/devtools/mobj_dump.c ++new file mode 100644 ++index 0000000..3eaf9f4 ++--- /dev/null +++++ b/src/devtools/mobj_dump.c ++@@ -0,0 +1,83 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2010 hpi1 +++ * +++ * This library is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * This library is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with this library. If not, see +++ * <http://www.gnu.org/licenses/>. +++ */ +++ +++#include "libbluray/bluray.h" +++ +++#include "libbluray/hdmv/mobj_data.h" +++#include "libbluray/hdmv/mobj_print.h" +++ +++#include <stdio.h> +++#include <string.h> +++ +++static void _mobj_print(MOBJ_OBJECTS *objects, int disasm) +++{ +++ int o, c; +++ +++ printf("Number of objects: %d\n", objects->num_objects); +++ +++ for (o = 0; o < objects->num_objects; o++) { +++ +++ printf("Object %d:\n" +++ " number of commands: %d\n" +++ " resume intention flag: %d\n" +++ " menu call mask: %d\n" +++ " title search mask: %d\n", +++ o, objects->objects[o].num_cmds, +++ objects->objects[o].resume_intention_flag, +++ objects->objects[o].menu_call_mask, +++ objects->objects[o].title_search_mask); +++ +++ if (disasm) { +++ printf(" program:\n"); +++ for (c = 0; c < objects->objects[o].num_cmds; c++) { +++ char buf[256]; +++ mobj_sprint_cmd(buf, &objects->objects[o].cmds[c]); +++ printf(" %04d: %s\n", c, buf); +++ } +++ } +++ } +++} +++ +++int main(int argc, const char *argv[]) +++{ +++ int disasm = 0; +++ MOBJ_OBJECTS *mobj = NULL; +++ +++ if (argc < 2) { +++ fprintf(stderr, +++ "usage: %s [-d] <file>\n" +++ "Options:\n" +++ " d disassemble object code\n", +++ argv[0]); +++ return 1; +++ } +++ if (argc > 2) { +++ disasm = !strcmp(argv[1], "-d"); +++ } +++ +++ mobj = bd_read_mobj(argv[argc-1]); +++ +++ if (mobj) { +++ _mobj_print(mobj, disasm); +++ +++ bd_free_mobj(mobj); +++ } +++ +++ return 0; +++} ++diff --git a/src/devtools/mpls_dump.c b/src/devtools/mpls_dump.c ++new file mode 100644 ++index 0000000..405b6a1 ++--- /dev/null +++++ b/src/devtools/mpls_dump.c ++@@ -0,0 +1,799 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2009-2010 John Stebbins +++ * +++ * This library is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * This library is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with this library. If not, see +++ * <http://www.gnu.org/licenses/>. +++ */ +++ +++#include <sys/stat.h> +++#include <dirent.h> +++#include <stdio.h> +++#include <stdlib.h> +++#include <unistd.h> +++#include <string.h> +++#include <libgen.h> +++ +++#include "libbluray/bdnav/mpls_parse.h" +++#include "libbluray/bluray.h" +++ +++#include "util.h" +++ +++#ifdef _WIN32 +++# define DIR_SEP "\\" +++# define PLAYLIST_DIR "\\BDMV\\PLAYLIST" +++#else +++# define DIR_SEP "/" +++# define PLAYLIST_DIR "/BDMV/PLAYLIST" +++#endif +++ +++ +++static int verbose; +++ +++typedef struct { +++ int value; +++ const char *str; +++} VALUE_MAP; +++ +++const VALUE_MAP codec_map[] = { +++ {0x01, "MPEG-1 Video"}, +++ {0x02, "MPEG-2 Video"}, +++ {0x03, "MPEG-1 Audio"}, +++ {0x04, "MPEG-2 Audio"}, +++ {0x80, "LPCM"}, +++ {0x81, "AC-3"}, +++ {0x82, "DTS"}, +++ {0x83, "TrueHD"}, +++ {0x84, "AC-3 Plus"}, +++ {0x85, "DTS-HD"}, +++ {0x86, "DTS-HD Master"}, +++ {0xa1, "AC-3 Plus for secondary audio"}, +++ {0xa2, "DTS-HD for secondary audio"}, +++ {0xea, "VC-1"}, +++ {0x1b, "H.264"}, +++ {0x90, "Presentation Graphics"}, +++ {0x91, "Interactive Graphics"}, +++ {0x92, "Text Subtitle"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP video_format_map[] = { +++ {0, "Reserved"}, +++ {1, "480i"}, +++ {2, "576i"}, +++ {3, "480p"}, +++ {4, "1080i"}, +++ {5, "720p"}, +++ {6, "1080p"}, +++ {7, "576p"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP video_rate_map[] = { +++ {0, "Reserved1"}, +++ {1, "23.976"}, +++ {2, "24"}, +++ {3, "25"}, +++ {4, "29.97"}, +++ {5, "Reserved2"}, +++ {6, "50"}, +++ {7, "59.94"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP audio_format_map[] = { +++ {0, "Reserved1"}, +++ {1, "Mono"}, +++ {2, "Reserved2"}, +++ {3, "Stereo"}, +++ {4, "Reserved3"}, +++ {5, "Reserved4"}, +++ {6, "Multi Channel"}, +++ {12, "Combo"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP audio_rate_map[] = { +++ {0, "Reserved1"}, +++ {1, "48 Khz"}, +++ {2, "Reserved2"}, +++ {3, "Reserved3"}, +++ {4, "96 Khz"}, +++ {5, "192 Khz"}, +++ {12, "48/192 Khz"}, +++ {14, "48/96 Khz"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP subpath_type_map[] = { +++ {2, "Primary audio of the Browsable slideshow"}, +++ {3, "Interactive Graphics presentation menu"}, +++ {4, "Text Subtitle"}, +++ {5, "Out-of-mux Synchronous elementary streams"}, +++ {6, "Out-of-mux Asynchronous Picture-in-Picture presentation"}, +++ {7, "In-mux Synchronous Picture-in-Picture presentation"}, +++ {8, "SS Video"}, +++ {0,NULL} +++}; +++ +++const VALUE_MAP playback_type_map[] = { +++ {1, "Sequential"}, +++ {2, "Random"}, +++ {3, "Shuffle"}, +++ {0, NULL} +++}; +++ +++const VALUE_MAP connection_type_map[] = { +++ {1, "Non-seamless"}, +++ {5, "Seamless"}, +++ {6, "Seamless"}, +++ {0, NULL} +++}; +++ +++static const char* +++_lookup_str(const VALUE_MAP *map, int val) +++{ +++ int ii; +++ +++ for (ii = 0; map[ii].str; ii++) { +++ if (val == map[ii].value) { +++ return map[ii].str; +++ } +++ } +++ return "?"; +++} +++ +++static char * +++_mk_path(const char *base, const char *sub) +++{ +++ size_t n1 = strlen(base); +++ size_t n2 = strlen(sub); +++ char *result = (char*)malloc(n1 + n2 + strlen(DIR_SEP) + 1); +++ strcpy(result, base); +++ strcat(result, DIR_SEP); +++ strcat(result, sub); +++ +++ return result; +++} +++ +++static void +++_show_stream(MPLS_STREAM *ss, int level) +++{ +++ indent_printf(level, "Codec (%04x): %s", ss->coding_type, +++ _lookup_str(codec_map, ss->coding_type)); +++ switch (ss->stream_type) { +++ case 1: +++ indent_printf(level, "PID: %04x", ss->pid); +++ break; +++ +++ case 2: +++ case 4: +++ indent_printf(level, "SubPath Id: %02x", ss->subpath_id); +++ indent_printf(level, "SubClip Id: %02x", ss->subclip_id); +++ indent_printf(level, "PID: %04x", ss->pid); +++ break; +++ +++ case 3: +++ indent_printf(level, "SubPath Id: %02x", ss->subpath_id); +++ indent_printf(level, "PID: %04x", ss->pid); +++ break; +++ +++ default: +++ fprintf(stderr, "unrecognized stream type %02x\n", ss->stream_type); +++ break; +++ }; +++ +++ switch (ss->coding_type) { +++ case 0x01: +++ case 0x02: +++ case 0xea: +++ case 0x1b: +++ indent_printf(level, "Format %02x: %s", ss->format, +++ _lookup_str(video_format_map, ss->format)); +++ indent_printf(level, "Rate %02x: %s", ss->rate, +++ _lookup_str(video_rate_map, ss->rate)); +++ break; +++ +++ case 0x03: +++ case 0x04: +++ case 0x80: +++ case 0x81: +++ case 0x82: +++ case 0x83: +++ case 0x84: +++ case 0x85: +++ case 0x86: +++ case 0xa1: +++ case 0xa2: +++ indent_printf(level, "Format %02x: %s", ss->format, +++ _lookup_str(audio_format_map, ss->format)); +++ indent_printf(level, "Rate %02x: %s", ss->rate, +++ _lookup_str(audio_rate_map, ss->rate)); +++ indent_printf(level, "Language: %s", ss->lang); +++ break; +++ +++ case 0x90: +++ case 0x91: +++ indent_printf(level, "Language: %s", ss->lang); +++ break; +++ +++ case 0x92: +++ indent_printf(level, "Char Code: %02x", ss->char_code); +++ indent_printf(level, "Language: %s", ss->lang); +++ break; +++ +++ default: +++ fprintf(stderr, "unrecognized coding type %02x\n", ss->coding_type); +++ break; +++ }; +++} +++ +++static void +++_show_details(MPLS_PL *pl, int level) +++{ +++ int ii, jj, kk; +++ +++ for (ii = 0; ii < pl->list_count; ii++) { +++ MPLS_PI *pi; +++ +++ pi = &pl->play_item[ii]; +++ indent_printf(level, "Clip Id %s", pi->clip[0].clip_id); +++ indent_printf(level+1, "Stc Id: %02x", pi->clip[0].stc_id); +++ indent_printf(level+1, "Connection Condition: %s (%02x)", +++ _lookup_str(connection_type_map, pi->connection_condition), +++ pi->connection_condition); +++ indent_printf(level+1, "In-Time: %d", pi->in_time); +++ indent_printf(level+1, "Out-Time: %d", pi->out_time); +++ if (pi->still_mode == 1) { +++ indent_printf(level+1, "Still time: %ds\n", pi->still_time); +++ } +++ if (pi->still_mode == 2) { +++ indent_printf(level+1, "Still time: infinite\n"); +++ } +++ if (pi->angle_count > 1) { +++ for (jj = 1; jj < pi->angle_count; jj++) { +++ indent_printf(level+1, "Angle %d:", jj); +++ indent_printf(level+2, "Clip Id %s", pi->clip[jj].clip_id); +++ indent_printf(level+2, "Stc Id: %02x", pi->clip[jj].stc_id); +++ } +++ } +++ for (jj = 0; jj < pi->stn.num_video; jj++) { +++ indent_printf(level+1, "Video Stream %d:", jj); +++ _show_stream(&pi->stn.video[jj], level + 2); +++ } +++ for (jj = 0; jj < pi->stn.num_audio; jj++) { +++ indent_printf(level+1, "Audio Stream %d:", jj); +++ _show_stream(&pi->stn.audio[jj], level + 2); +++ } +++ for (jj = 0; jj < pi->stn.num_ig; jj++) { +++ indent_printf(level+1, "Interactive Graphics Stream %d:", jj); +++ _show_stream(&pi->stn.ig[jj], level + 2); +++ } +++ for (jj = 0; jj < (pi->stn.num_pg + pi->stn.num_pip_pg); jj++) { +++ if (jj < pi->stn.num_pg) { +++ indent_printf(level+1, "Presentation Graphics Stream %d:", jj); +++ } else { +++ indent_printf(level+1, "PIP Presentation Graphics Stream %d:", jj); +++ } +++ _show_stream(&pi->stn.pg[jj], level + 2); +++ } +++ for (jj = 0; jj < pi->stn.num_secondary_video; jj++) { +++ indent_printf(level+1, "Secondary Video Stream %d:", jj); +++ _show_stream(&pi->stn.secondary_video[jj], level + 2); +++ for (kk = 0; kk < pi->stn.secondary_video[jj].sv_num_secondary_audio_ref; kk++) { +++ indent_printf(level+2, "Secondary Audio Ref %d: %d", kk,pi->stn.secondary_video[jj].sv_secondary_audio_ref[kk]); +++ } +++ for (kk = 0; kk < pi->stn.secondary_video[jj].sv_num_pip_pg_ref; kk++) { +++ indent_printf(level+2, "PIP Presentation Graphic Ref %d: %d", kk,pi->stn.secondary_video[jj].sv_pip_pg_ref[kk]); +++ } +++ } +++ for (jj = 0; jj < pi->stn.num_secondary_audio; jj++) { +++ indent_printf(level+1, "Secondary Audio Stream %d:", jj); +++ _show_stream(&pi->stn.secondary_audio[jj], level + 2); +++ for (kk = 0; kk < pi->stn.secondary_audio[jj].sa_num_primary_audio_ref; kk++) { +++ indent_printf(level+2, "Primary Audio Ref %d: %d", kk,pi->stn.secondary_audio[jj].sa_primary_audio_ref[kk]); +++ } +++ } +++ printf("\n"); +++ } +++} +++ +++static void +++_show_ai(MPLS_PL *pl, int level) +++{ +++ indent_printf(level, "Playback type: %s (%d)", +++ _lookup_str(playback_type_map, pl->app_info.playback_type), +++ pl->app_info.playback_type); +++ if (pl->app_info.playback_type == 2 || pl->app_info.playback_type == 3) { +++ indent_printf(level+1, "Playback count: %d", pl->app_info.playback_count); +++ } +++} +++ +++static void +++_show_marks(MPLS_PL *pl, int level) +++{ +++ int ii; +++ +++ indent_printf(level, "PlayMark Count %d", pl->mark_count); +++ for (ii = 0; ii < pl->mark_count; ii++) { +++ MPLS_PI *pi; +++ MPLS_PLM *plm; +++ int min; +++ double sec; +++ +++ plm = &pl->play_mark[ii]; +++ indent_printf(level, "PlayMark %d", ii); +++ indent_printf(level+1, "Type: %02x", plm->mark_type); +++ if (plm->play_item_ref < pl->list_count) { +++ pi = &pl->play_item[plm->play_item_ref]; +++ indent_printf(level+1, "PlayItem: %s", pi->clip[0].clip_id); +++ } else { +++ indent_printf(level+1, "PlayItem: Invalid reference"); +++ } +++ indent_printf(level+1, "Time (ticks): %u", plm->time); +++ min = plm->duration / (45000*60); +++ sec = (double)(plm->duration - min * 45000 * 60) / 45000; +++ indent_printf(level+1, "Duration (mm:ss.ms, ticks): %d:%.2f, %u", +++ min, sec, plm->duration); +++ printf("\n"); +++ } +++} +++ +++static void +++_show_clip_list(MPLS_PL *pl, int level) +++{ +++ int ii, jj; +++ +++ for (ii = 0; ii < pl->list_count; ii++) { +++ MPLS_PI *pi; +++ +++ pi = &pl->play_item[ii]; +++ if (verbose) { +++ uint32_t duration; +++ +++ duration = pi->out_time - pi->in_time; +++ indent_printf(level, "%s.m2ts -- Duration: %3d:%02d", +++ pi->clip[0].clip_id, +++ duration / (45000 * 60), (duration / 45000) % 60); +++ } else { +++ indent_printf(level, "%s.m2ts", pi->clip[0].clip_id); +++ } +++ if (pi->angle_count > 1) { +++ for (jj = 1; jj < pi->angle_count; jj++) { +++ indent_printf(level+1, "Angle %d: %s.m2ts", jj+1, pi->clip[jj].clip_id); +++ } +++ } +++ } +++ printf("\n"); +++} +++ +++static void +++_show_sub_path(MPLS_SUB *sub, int level) +++{ +++ int ii; +++ +++ indent_printf(level+1, "Type: %d (%s)", sub->type, _lookup_str(subpath_type_map, sub->type)); +++ indent_printf(level+1, "Repeat: %d", sub->is_repeat); +++ indent_printf(level+1, "Sub playitem count: %d", sub->sub_playitem_count); +++ +++ for (ii = 0; ii < sub->sub_playitem_count; ii++) { +++ MPLS_SUB_PI *pi; +++ +++ pi = &sub->sub_play_item[ii]; +++ +++ if (verbose) { +++ indent_printf(level+1, "Sub playitem %d", ii); +++ indent_printf(level+2, "Clip Id %s", pi->clip[0].clip_id); +++ indent_printf(level+2, "Multi clip: %d", pi->is_multi_clip); +++ indent_printf(level+2, "Clip count: %d", pi->clip_count); +++ indent_printf(level+2, "Connection Condition: %s (%02x)", +++ _lookup_str(connection_type_map, pi->connection_condition), +++ pi->connection_condition); +++ indent_printf(level+2, "In-Time: %d", pi->in_time); +++ indent_printf(level+2, "Out-Time: %d", pi->out_time); +++ indent_printf(level+2, "Sync playitem Id: %d", pi->sync_play_item_id); +++ indent_printf(level+2, "Sync PTS: %d", pi->sync_pts); +++ } else { +++ indent_printf(level+1, "%s.m2ts", pi->clip[0].clip_id); +++ } +++ } +++} +++ +++static void +++_show_pip_metadata_block(MPLS_PIP_METADATA *block, int level) +++{ +++ int ii; +++ +++ indent_printf(level, "Clip ref: %d", block->clip_ref); +++ indent_printf(level, "Secondary video ref: %d", block->secondary_video_ref); +++ indent_printf(level, "Timeline type: %d", block->timeline_type); +++ indent_printf(level, "Luma key flag: %d", block->luma_key_flag); +++ if (block->luma_key_flag) { +++ indent_printf(level, "Upper limit luma key: %d", block->upper_limit_luma_key); +++ } +++ indent_printf(level, "Trick play flag: %d", block->trick_play_flag); +++ +++ for (ii = 0; ii < block->data_count; ii++) { +++ indent_printf(level, "data block %d:", ii); +++ indent_printf(level+1, "Timestamp: %d", block->data[ii].time); +++ indent_printf(level+1, "Horizontal position %d", block->data[ii].xpos); +++ indent_printf(level+1, "Vertical position: %d", block->data[ii].ypos); +++ indent_printf(level+1, "Scaling factor: %d", block->data[ii].scale_factor); +++ } +++} +++ +++static void +++_show_pip_metadata(MPLS_PL *pl, int level) +++{ +++ int ii; +++ +++ for (ii = 0; ii < pl->ext_pip_data_count; ii++) { +++ MPLS_PIP_METADATA *data; +++ +++ data = &pl->ext_pip_data[ii]; +++ +++ indent_printf(level, "PiP metadata block %d:", ii); +++ _show_pip_metadata_block(data, level+1); +++ } +++} +++ +++static void +++_show_sub_paths(MPLS_PL *pl, int level) +++{ +++ int ss; +++ +++ for (ss = 0; ss < pl->sub_count; ss++) { +++ MPLS_SUB *sub; +++ +++ sub = &pl->sub_path[ss]; +++ +++ indent_printf(level, "Sub Path %d:", ss); +++ _show_sub_path(sub, level+1); +++ } +++} +++ +++static void +++_show_sub_paths_ss(MPLS_PL *pl, int level) +++{ +++ int ss; +++ +++ for (ss = 0; ss < pl->ext_sub_count; ss++) { +++ MPLS_SUB *sub; +++ +++ sub = &pl->ext_sub_path[ss]; +++ +++ indent_printf(level, "Extension Sub Path %d:", ss); +++ _show_sub_path(sub, level+1); +++ } +++} +++ +++static uint32_t +++_pl_duration(MPLS_PL *pl) +++{ +++ int ii; +++ uint32_t duration = 0; +++ MPLS_PI *pi; +++ +++ for (ii = 0; ii < pl->list_count; ii++) { +++ pi = &pl->play_item[ii]; +++ duration += pi->out_time - pi->in_time; +++ } +++ return duration; +++} +++ +++static int +++_filter_dup(MPLS_PL *pl_list[], int count, MPLS_PL *pl) +++{ +++ int ii, jj; +++ +++ for (ii = 0; ii < count; ii++) { +++ if (pl->list_count != pl_list[ii]->list_count || +++ _pl_duration(pl) != _pl_duration(pl_list[ii])) { +++ continue; +++ } +++ for (jj = 0; jj < pl->list_count; jj++) { +++ MPLS_PI *pi1, *pi2; +++ +++ pi1 = &pl->play_item[jj]; +++ pi2 = &pl_list[ii]->play_item[jj]; +++ +++ if (memcmp(pi1->clip[0].clip_id, pi2->clip[0].clip_id, 5) != 0 || +++ pi1->in_time != pi2->in_time || +++ pi1->out_time != pi2->out_time) { +++ break; +++ } +++ } +++ if (jj != pl->list_count) { +++ continue; +++ } +++ return 0; +++ } +++ return 1; +++} +++ +++static int +++_find_repeats(MPLS_PL *pl, const char *m2ts) +++{ +++ int ii, count = 0; +++ +++ for (ii = 0; ii < pl->list_count; ii++) { +++ MPLS_PI *pi; +++ +++ pi = &pl->play_item[ii]; +++ // Ignore titles with repeated segments +++ if (strcmp(pi->clip[0].clip_id, m2ts) == 0) { +++ count++; +++ } +++ } +++ return count; +++} +++ +++static int +++_filter_short(MPLS_PL *pl, unsigned int seconds) +++{ +++ // Ignore short playlists +++ if (_pl_duration(pl) / 45000 <= seconds) { +++ return 0; +++ } +++ return 1; +++} +++ +++static int +++_filter_repeats(MPLS_PL *pl, int repeats) +++{ +++ int ii; +++ +++ for (ii = 0; ii < pl->list_count; ii++) { +++ MPLS_PI *pi; +++ +++ pi = &pl->play_item[ii]; +++ // Ignore titles with repeated segments +++ if (_find_repeats(pl, pi->clip[0].clip_id) > repeats) { +++ return 0; +++ } +++ } +++ return 1; +++} +++ +++static int clip_list = 0, playlist_info = 0, chapter_marks = 0, sub_paths = 0, pip_metadata = 0; +++static int repeats = 0, seconds = 0, dups = 0; +++ +++static MPLS_PL* +++_process_file(char *name, MPLS_PL *pl_list[], int pl_count) +++{ +++ MPLS_PL *pl; +++ +++ pl = bd_read_mpls(name); +++ if (pl == NULL) { +++ fprintf(stderr, "Parse failed: %s\n", name); +++ return NULL; +++ } +++ if (seconds) { +++ if (!_filter_short(pl, seconds)) { +++ bd_free_mpls(pl); +++ return NULL; +++ } +++ } +++ if (repeats) { +++ if (!_filter_repeats(pl, repeats)) { +++ bd_free_mpls(pl); +++ return NULL; +++ } +++ } +++ if (dups) { +++ if (!_filter_dup(pl_list, pl_count, pl)) { +++ bd_free_mpls(pl); +++ return NULL; +++ } +++ } +++ if (verbose) { +++ indent_printf(0, +++ "%s -- Num Clips: %3d , Duration: minutes %4u:%02u", +++ basename(name), +++ pl->list_count, +++ _pl_duration(pl) / (45000 * 60), +++ (_pl_duration(pl) / 45000) % 60); +++ _show_ai(pl, 1); +++ } else { +++ indent_printf(0, "%s -- Duration: minutes %4u:%02u", +++ basename(name), +++ _pl_duration(pl) / (45000 * 60), +++ (_pl_duration(pl) / 45000) % 60); +++ } +++ if (playlist_info) { +++ _show_details(pl, 1); +++ } +++ if (chapter_marks) { +++ _show_marks(pl, 1); +++ } +++ if (pip_metadata) { +++ _show_pip_metadata(pl, 1); +++ } +++ if (clip_list) { +++ _show_clip_list(pl, 1); +++ } +++ if (sub_paths) { +++ _show_sub_paths(pl, 1); +++ _show_sub_paths_ss(pl, 1); +++ } +++ return pl; +++} +++ +++static void +++_usage(char *cmd) +++{ +++ fprintf(stderr, +++"Usage: %s -vli <mpls file> [<mpls file> ...]\n" +++"With no options, produces a list of the playlist(s) with durations\n" +++"Options:\n" +++" v - Verbose output.\n" +++" l - Produces a list of the m2ts clips\n" +++" i - Dumps detailed information about each clip\n" +++" c - Show chapter marks\n" +++" p - Show sub paths\n" +++" P - Show picture-in-picture metadata\n" +++" r <N> - Filter out titles that have >N repeating clips\n" +++" d - Filter out duplicate titles\n" +++" s <seconds> - Filter out short titles\n" +++" f - Filter combination -r2 -d -s900\n" +++, cmd); +++ +++ exit(EXIT_FAILURE); +++} +++ +++#define OPTS "vlicpPfr:ds:" +++ +++static int +++_qsort_str_cmp(const void *a, const void *b) +++{ +++ const char *stra = *(char * const *)a; +++ const char *strb = *(char * const *)b; +++ +++ return strcmp(stra, strb); +++} +++ +++int +++main(int argc, char *argv[]) +++{ +++ MPLS_PL *pl; +++ int opt; +++ int ii, pl_ii; +++ MPLS_PL *pl_list[1000]; +++ struct stat st; +++ char *path = NULL; +++ DIR *dir = NULL; +++ +++ do { +++ opt = getopt(argc, argv, OPTS); +++ switch (opt) { +++ case -1: +++ break; +++ +++ case 'v': +++ verbose = 1; +++ break; +++ +++ case 'l': +++ clip_list = 1; +++ break; +++ +++ case 'i': +++ playlist_info = 1; +++ break; +++ +++ case 'c': +++ chapter_marks = 1; +++ break; +++ +++ case 'p': +++ sub_paths = 1; +++ break; +++ +++ case 'P': +++ pip_metadata = 1; +++ break; +++ +++ case 'd': +++ dups = 1; +++ break; +++ +++ case 'r': +++ repeats = atoi(optarg); +++ break; +++ +++ case 'f': +++ repeats = 2; +++ dups = 1; +++ seconds = 900; +++ break; +++ +++ case 's': +++ seconds = atoi(optarg); +++ break; +++ +++ default: +++ _usage(argv[0]); +++ break; +++ } +++ } while (opt != -1); +++ +++ if (optind >= argc) { +++ _usage(argv[0]); +++ } +++ +++ for (pl_ii = 0, ii = optind; pl_ii < 1000 && ii < argc; ii++) { +++ +++ if (stat(argv[ii], &st)) { +++ continue; +++ } +++ dir = NULL; +++ if (S_ISDIR(st.st_mode)) { +++ +++ printf("Directory: %s:\n", argv[ii]); +++ path = _mk_path(argv[ii], PLAYLIST_DIR); +++ if (path == NULL) { +++ fprintf(stderr, "Failed to find playlist path: %s\n", argv[ii]); +++ continue; +++ } +++ dir = opendir(path); +++ if (dir == NULL) { +++ fprintf(stderr, "Failed to open dir: %s\n", path); +++ free(path); +++ continue; +++ } +++ } +++ if (dir != NULL) { +++ char **dirlist = (char**)calloc(10001, sizeof(char*)); +++ struct dirent *ent; +++ int jj = 0; +++ for (ent = readdir(dir); ent != NULL; ent = readdir(dir)) { +++ dirlist[jj++] = strcpy((char*)malloc(strlen(ent->d_name)), ent->d_name); +++ } +++ qsort(dirlist, jj, sizeof(char*), _qsort_str_cmp); +++ for (jj = 0; dirlist[jj] != NULL; jj++) { +++ char *name = NULL; +++ name = _mk_path(path, dirlist[jj]); +++ free(dirlist[jj]); +++ if (stat(name, &st)) { +++ free(name); +++ continue; +++ } +++ if (!S_ISREG(st.st_mode)) { +++ free(name); +++ continue; +++ } +++ pl = _process_file(name, pl_list, pl_ii); +++ free(name); +++ if (pl != NULL) { +++ pl_list[pl_ii++] = pl; +++ } +++ } +++ free(dirlist); +++ free(path); +++ closedir(dir); +++ dir = NULL; +++ } else { +++ pl = _process_file(argv[ii], pl_list, pl_ii); +++ if (pl != NULL) { +++ pl_list[pl_ii++] = pl; +++ } +++ } +++ } +++ // Cleanup +++ for (ii = 0; ii < pl_ii; ii++) { +++ bd_free_mpls(pl_list[ii]); +++ } +++ return 0; +++} +++ ++diff --git a/src/devtools/util.c b/src/devtools/util.c ++new file mode 100644 ++index 0000000..aaa4c46 ++--- /dev/null +++++ b/src/devtools/util.c ++@@ -0,0 +1,40 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2009-2010 John Stebbins +++ * +++ * This library is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * This library is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with this library. If not, see +++ * <http://www.gnu.org/licenses/>. +++ */ +++ +++#include <stdio.h> +++#include <stdarg.h> +++ +++#include "util.h" +++ +++void +++indent_printf(int level, const char *fmt, ...) +++{ +++ va_list ap; +++ int ii; +++ +++ for (ii = 0; ii < level; ii++) +++ { +++ printf(" "); +++ } +++ va_start(ap, fmt); +++ vprintf(fmt, ap); +++ va_end(ap); +++ printf("\n"); +++} +++ ++diff --git a/src/devtools/util.h b/src/devtools/util.h ++new file mode 100644 ++index 0000000..144f8ec ++--- /dev/null +++++ b/src/devtools/util.h ++@@ -0,0 +1,43 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2009-2010 John Stebbins +++ * +++ * This library is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * This library is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with this library. If not, see +++ * <http://www.gnu.org/licenses/>. +++ */ +++ +++#include <stdint.h> +++ +++#include "util/attributes.h" +++ +++#if defined( __MINGW32__ ) +++# undef lseek +++# define lseek _lseeki64 +++# undef fseeko +++# define fseeko fseeko64 +++# undef ftello +++# define ftello ftello64 +++# define flockfile(...) +++# define funlockfile(...) +++# define getc_unlocked getc +++# undef off_t +++# define off_t off64_t +++# undef stat +++# define stat _stati64 +++# define fstat _fstati64 +++# define wstat _wstati64 +++#endif +++ +++void indent_printf(int level, const char *fmt, ...) BD_ATTR_FORMAT_PRINTF(2,3); +++ ++diff --git a/src/examples/bdj_test.c b/src/examples/bdj_test.c ++deleted file mode 100644 ++index d9ebd16..0000000 +++++ /dev/null ++@@ -1,67 +0,0 @@ ++-/* ++- * This file is part of libbluray ++- * Copyright (C) 2010 William Hahne ++- * ++- * This program is free software; you can redistribute it and/or ++- * modify it under the terms of the GNU General Public License ++- * as published by the Free Software Foundation; either version 2 ++- * of the License, or (at your option) any later version. ++- * ++- * This program is distributed in the hope that it will be useful, ++- * but WITHOUT ANY WARRANTY; without even the implied warranty of ++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++- * GNU General Public License for more details. ++- * ++- * You should have received a copy of the GNU General Public License ++- * along with this program. If not, see <http://www.gnu.org/licenses/>. ++- * ++- * In addition, as a special exception, the copyright holders of libbluray ++- * gives permission to link the code of its release of libbluray with the ++- * OpenSSL project's "OpenSSL" library (or with modified versions of it ++- * that use the same license as the "OpenSSL" library), and distribute ++- * the linked executables. You must obey the GNU General Public License ++- * in all respects for all of the code used other than "OpenSSL". If you ++- * modify this file, you may extend this exception to your version of the ++- * file, but you are not obligated to do so. If you do not wish to do ++- * so, delete this exception statement from your version. ++- */ ++- ++-#include <stdio.h> ++-#include <stdlib.h> ++-#include <unistd.h> ++- ++-#include "libbluray/bluray.h" ++- ++-#if defined(_WIN32) ++-#include <windows.h> ++-#define sleep(x) Sleep(x) ++-#endif ++- ++-static void _usage(void) { ++- printf("Usage: [path to disc] [starting object]\n"); ++-} ++- ++-int main(int argc, char** argv) ++-{ ++- if (argc < 3) { ++- _usage(); ++- return 0; ++- } ++- ++- printf("%s %s\n", argv[1], argv[2]); ++- ++- BLURAY* bd = bd_open(argv[1], NULL); ++- ++- bd_get_titles(bd, TITLES_ALL, 0); ++- ++- if (!bd_start_bdj(bd, argv[2])) { ++- printf("Failed to start BD-J application.\n"); ++- } else { ++- while (1) { sleep(20); } ++- bd_stop_bdj(bd); ++- } ++- ++- bd_close(bd); ++- ++- return 0; ++-} ++diff --git a/src/examples/bdjo_dump.c b/src/examples/bdjo_dump.c ++deleted file mode 100644 ++index bcbd2af..0000000 +++++ /dev/null ++@@ -1,206 +0,0 @@ ++-/* ++- * This file is part of libbluray ++- * Copyright (C) 2014 Petri Hintukainen <phintuka@users.sourceforge.net> ++- * ++- * This library is free software; you can redistribute it and/or ++- * modify it under the terms of the GNU Lesser General Public ++- * License as published by the Free Software Foundation; either ++- * version 2.1 of the License, or (at your option) any later version. ++- * ++- * This library is distributed in the hope that it will be useful, ++- * but WITHOUT ANY WARRANTY; without even the implied warranty of ++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++- * Lesser General Public License for more details. ++- * ++- * You should have received a copy of the GNU Lesser General Public ++- * License along with this library. If not, see ++- * <http://www.gnu.org/licenses/>. ++- */ ++- ++-#include "libbluray/bluray.h" ++-#include "libbluray/bdj/bdjo_data.h" ++- ++-#include <stdio.h> ++-#include <string.h> ++- ++-static const char *_yes_no(int i) ++-{ ++- return i > 0 ? "yes" : i < 0 ? "unknown" : "no"; ++-} ++- ++-static const char *_binding_str(int i) ++-{ ++- switch (i) { ++- case 0: return "unbound"; ++- case 1: return "disc bound"; ++- case 3: return "title bound"; ++- default: return "???"; ++- } ++-} ++- ++-static const char *_visibility_str(int i) ++-{ ++- switch (i) { ++- case 0: return "none"; ++- case 1: return "applications"; ++- case 2: return "user"; ++- default: return "???"; ++- } ++-} ++- ++-static void _terminal_info_print(const BDJO_TERMINAL_INFO *p) ++-{ ++- printf("Terminal Info:\n"); ++- printf(" Default AWT font : %s\n", p->default_font); ++- printf(" initial HaVi config : %d\n", p->initial_havi_config_id); ++- printf(" Menu call mask : %d\n", p->menu_call_mask); ++- printf(" Title search mask : %d\n", p->menu_call_mask); ++-} ++- ++-static void _app_cache_item_print(const BDJO_APP_CACHE_ITEM *p) ++-{ ++- printf(" %3.3s: %s%s\n", ++- p->lang_code, p->ref_to_name, ++- p->type == 1 ? ".jar" : p->type == 2 ? "/" : " (unknown type)"); ++-} ++- ++-static void _app_cache_info_print(const BDJO_APP_CACHE_INFO *p) ++-{ ++- unsigned ii; ++- ++- printf("Application cache info:\n"); ++- for (ii = 0; ii < p->num_item; ii++) { ++- _app_cache_item_print(&p->item[ii]); ++- } ++-} ++- ++-static void _accessible_playlists_print(const BDJO_ACCESSIBLE_PLAYLISTS *p) ++-{ ++- unsigned ii; ++- ++- printf("Accessible playlists:\n"); ++- printf(" Access to all : %s\n", _yes_no(p->access_to_all_flag)); ++- printf(" Autostart first : %s\n", _yes_no(p->autostart_first_playlist_flag)); ++- ++- if (p->num_pl) { ++- printf(" Playlists : %d\n", p->num_pl); ++- for (ii = 0; ii < p->num_pl; ii++) { ++- printf(" %s.mpls\n", p->pl[ii].name); ++- } ++- } ++-} ++- ++-static void _app_profile_print(BDJO_APP_PROFILE *p) ++-{ ++- printf(" Profile %d Version %d.%d.%d\n", ++- p->profile_number, p->major_version, p->minor_version, p->micro_version); ++-} ++- ++-static void _app_print(const BDJO_APP *p) ++-{ ++- unsigned ii; ++- ++- printf(" Control code: : %d (%s)\n", p->control_code, ++- p->control_code == 1 ? "autostart" : p->control_code == 2 ? "present" : "???"); ++- printf(" Type : %d (%s)\n", p->type, ++- p->type == 1 ? "BD-J App" : "???"); ++- printf(" Organization ID : %08X\n", p->org_id); ++- printf(" Application ID : %04X\n", p->app_id); ++- printf(" Priority : %d\n", p->priority); ++- printf(" Binding : %d (%s)\n", p->binding, _binding_str(p->binding)); ++- printf(" Visibility : %d (%s)\n", p->visibility, _visibility_str(p->visibility)); ++- ++- if (p->num_profile) { ++- printf(" Profiles:\n"); ++- for (ii = 0; ii < p->num_profile; ii++) { ++- _app_profile_print(&p->profile[ii]); ++- } ++- } ++- ++- if (p->num_name) { ++- printf(" Names:\n"); ++- for (ii = 0; ii < p->num_name; ii++) { ++- printf(" %s: %s\n", p->name[ii].lang, p->name[ii].name); ++- } ++- } ++- ++- printf(" Base directory : %s\n", p->base_dir); ++- printf(" Icon locator : %s\n", p->icon_locator); ++- printf(" Icon flags : 0x%04x\n", p->icon_flags); ++- printf(" Classpath extension : %s\n", p->classpath_extension); ++- printf(" Initial class : %s\n", p->initial_class); ++- printf(" Parameters : "); ++- for (ii = 0; ii < p->num_param; ii++) { ++- printf("%s ", p->param[ii].param); ++- } ++- printf("\n"); ++-} ++- ++-static void _app_management_table_print(const BDJO_APP_MANAGEMENT_TABLE *p) ++-{ ++- unsigned ii; ++- ++- for (ii = 0; ii < p->num_app; ii++) { ++- printf("Application %d:\n", ii); ++- _app_print(&p->app[ii]); ++- } ++-} ++- ++-static void _key_interest_table_print(const BDJO_KEY_INTEREST_TABLE *p) ++-{ ++- unsigned int v; ++- memcpy(&v, p, sizeof(unsigned int)); ++- if (v) { ++- printf("Key interest table:\n"); ++- printf(" %s%s%s%s%s%s%s%s%s%s%s\n", ++- p->vk_play ? "VK_PLAY " : "", ++- p->vk_stop ? "VK_STOP " : "", ++- p->vk_ffw ? "VK_FFW " : "", ++- p->vk_rew ? "VK_REW " : "", ++- p->vk_track_next ? "VK_TRACK_NEXT " : "", ++- p->vk_track_prev ? "VK_TRACK_PREV " : "", ++- p->vk_pause ? "VK_PAUSE " : "", ++- p->vk_still_off ? "VK_STILL_OFF " : "", ++- p->vk_sec_audio_ena_dis ? "VK_SEC_AUDIO " : "", ++- p->vk_sec_video_ena_dis ? "VK_SEC_VIDEO " : "", ++- p->pg_textst_ena_dis ? "VK_PG_TEXTST " : ""); ++- } ++-} ++- ++-static void _file_access_info_print(const BDJO_FILE_ACCESS_INFO *p) ++-{ ++- printf("File access info:\n %s\n", p->path); ++-} ++- ++-static void _bdjo_print(const BDJO *p) ++-{ ++- _terminal_info_print(&p->terminal_info); ++- _app_cache_info_print(&p->app_cache_info); ++- _accessible_playlists_print(&p->accessible_playlists); ++- _app_management_table_print(&p->app_table); ++- _key_interest_table_print(&p->key_interest_table); ++- _file_access_info_print(&p->file_access_info); ++-} ++- ++-int main(int argc, const char *argv[]) ++-{ ++- if (argc < 2) { ++- fprintf(stderr, "usage: %s <bdjo_file>\n", argv[0]); ++- return 1; ++- } ++- ++- int cnt; ++- for (cnt = 1; cnt < argc; cnt++) { ++- ++- printf("%s\n", argv[cnt]); ++- ++- BDJO *bdjo = bd_read_bdjo(argv[cnt]); ++- if (bdjo) { ++- _bdjo_print(bdjo); ++- bd_free_bdjo(bdjo); ++- } ++- printf("\n"); ++- } ++- ++- return 0; ++-} ++diff --git a/src/examples/clpi_dump.c b/src/examples/clpi_dump.c ++deleted file mode 100644 ++index bd64783..0000000 +++++ /dev/null ++@@ -1,487 +0,0 @@ ++-/* ++- * This file is part of libbluray ++- * Copyright (C) 2009-2010 John Stebbins ++- * Copyright (C) 2012-2013 Petri Hintukainen <phintuka@users.sourceforge.net> ++- * ++- * This library is free software; you can redistribute it and/or ++- * modify it under the terms of the GNU Lesser General Public ++- * License as published by the Free Software Foundation; either ++- * version 2.1 of the License, or (at your option) any later version. ++- * ++- * This library is distributed in the hope that it will be useful, ++- * but WITHOUT ANY WARRANTY; without even the implied warranty of ++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++- * Lesser General Public License for more details. ++- * ++- * You should have received a copy of the GNU Lesser General Public ++- * License along with this library. If not, see ++- * <http://www.gnu.org/licenses/>. ++- */ ++- ++-#include <stdio.h> ++-#include <stdlib.h> ++-#include <unistd.h> ++-#include <inttypes.h> ++- ++-#include "libbluray/bdnav/clpi_data.h" ++-#include "libbluray/bluray.h" ++- ++-#include "util.h" ++- ++-static int verbose; ++- ++-typedef struct { ++- int value; ++- const char *str; ++-} VALUE_MAP; ++- ++-static inline const char* ++-_lookup_str(const VALUE_MAP *map, int val) ++-{ ++- int ii; ++- ++- for (ii = 0; map[ii].str; ii++) { ++- if (val == map[ii].value) { ++- return map[ii].str; ++- } ++- } ++- return "?"; ++-} ++- ++-const VALUE_MAP codec_map[] = { ++- {0x01, "MPEG-1 Video"}, ++- {0x02, "MPEG-2 Video"}, ++- {0x03, "MPEG-1 Audio"}, ++- {0x04, "MPEG-2 Audio"}, ++- {0x80, "LPCM"}, ++- {0x81, "AC-3"}, ++- {0x82, "DTS"}, ++- {0x83, "TrueHD"}, ++- {0x84, "AC-3 Plus"}, ++- {0x85, "DTS-HD"}, ++- {0x86, "DTS-HD Master"}, ++- {0xa1, "AC-3 Plus for secondary audio"}, ++- {0xa2, "DTS-HD for secondary audio"}, ++- {0xea, "VC-1"}, ++- {0x1b, "H.264"}, ++- {0x20, "H.264 MVC dep."}, ++- {0x90, "Presentation Graphics"}, ++- {0x91, "Presentation Graphics"}, ++- {0x92, "Interactive Graphics"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP video_format_map[] = { ++- {0, "Reserved"}, ++- {1, "480i"}, ++- {2, "576i"}, ++- {3, "480p"}, ++- {4, "1080i"}, ++- {5, "720p"}, ++- {6, "1080p"}, ++- {7, "576p"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP video_rate_map[] = { ++- {0, "Reserved1"}, ++- {1, "23.976"}, ++- {2, "24"}, ++- {3, "25"}, ++- {4, "29.97"}, ++- {5, "Reserved2"}, ++- {6, "50"}, ++- {7, "59.94"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP video_aspect_map[] = { ++- {0, "Reserved1"}, ++- {1, "Reserved2"}, ++- {2, "4:3"}, ++- {3, "16:9"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP audio_format_map[] = { ++- {0, "Reserved1"}, ++- {1, "Mono"}, ++- {2, "Reserved2"}, ++- {3, "Stereo"}, ++- {4, "Reserved3"}, ++- {5, "Reserved4"}, ++- {6, "Multi Channel"}, ++- {12, "Combo"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP audio_rate_map[] = { ++- {0, "Reserved1"}, ++- {1, "48 Khz"}, ++- {2, "Reserved2"}, ++- {3, "Reserved3"}, ++- {4, "96 Khz"}, ++- {5, "192 Khz"}, ++- {12, "48/192 Khz"}, ++- {14, "48/96 Khz"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP application_type_map[] = { ++- {1, "Main TS for a main-path of Movie"}, ++- {2, "Main TS for a main-path of Time based slide show"}, ++- {3, "Main TS for a main-path of Browsable slide show"}, ++- {4, "Sub TS for a sub-path of Browsable slide show"}, ++- {5, "Sub TS for a sub-path of Interactive Graphics menu"}, ++- {6, "Sub TS for a sub-path of Text subtitle"}, ++- {7, "Sub TS for a sub-path of one or more elementary streams path"}, ++- {0, NULL}, ++-}; ++- ++-static void ++-_show_stream(CLPI_PROG_STREAM *ss, int level) ++-{ ++- indent_printf(level, "Codec (%04x): %s", ss->coding_type, ++- _lookup_str(codec_map, ss->coding_type)); ++- indent_printf(level, "PID: %04x", ss->pid); ++- switch (ss->coding_type) { ++- case 0x01: ++- case 0x02: ++- case 0xea: ++- case 0x1b: ++- case 0x20: ++- indent_printf(level, "Format %02x: %s", ss->format, ++- _lookup_str(video_format_map, ss->format)); ++- indent_printf(level, "Rate %02x: %s", ss->rate, ++- _lookup_str(video_rate_map, ss->rate)); ++- indent_printf(level, "Aspect %02x: %s", ss->aspect, ++- _lookup_str(video_aspect_map, ss->aspect)); ++- indent_printf(level, "oc_flag %02x", ss->oc_flag); ++- break; ++- ++- case 0x03: ++- case 0x04: ++- case 0x80: ++- case 0x81: ++- case 0x82: ++- case 0x83: ++- case 0x84: ++- case 0x85: ++- case 0x86: ++- case 0xa1: ++- case 0xa2: ++- indent_printf(level, "Format %02x: %s", ss->format, ++- _lookup_str(audio_format_map, ss->format)); ++- indent_printf(level, "Rate %02x: %s", ss->rate, ++- _lookup_str(audio_rate_map, ss->rate)); ++- indent_printf(level, "Language: %s", ss->lang); ++- break; ++- ++- case 0x90: ++- case 0x91: ++- case 0xa0: ++- indent_printf(level, "Language: %s", ss->lang); ++- break; ++- ++- case 0x92: ++- indent_printf(level, "Char Code: %02x", ss->char_code); ++- indent_printf(level, "Language: %s", ss->lang); ++- break; ++- ++- default: ++- fprintf(stderr, "unrecognized coding type %02x\n", ss->coding_type); ++- break; ++- }; ++-} ++- ++-static void ++-_show_clip_info(CLPI_CL *cl, int level) ++-{ ++- CLPI_CLIP_INFO *ci = &cl->clip; ++- int ii; ++- ++- indent_printf(level, "Clip Info"); ++- indent_printf(level+1, "Clip Stream Type: %02x", ci->clip_stream_type); ++- indent_printf(level+1, "Clip Application Type (%02x): %s", ++- ci->application_type, _lookup_str(application_type_map, ci->application_type)); ++- indent_printf(level+1, "is_ATC_delta: %s", ci->is_atc_delta ? "True" : "False"); ++- indent_printf(level+1, "ATC delta count: %d", ci->atc_delta_count); ++- indent_printf(level+1, "TS Recording Rate: %u", ci->ts_recording_rate); ++- indent_printf(level+1, "Number Source Packets: %u", ci->num_source_packets); ++- // Show ts type info ++- indent_printf(level+1, "TS Type Info"); ++- indent_printf(level+2, "Validity Flags %02x", ci->ts_type_info.validity); ++- indent_printf(level+2, "Format Id %s", ci->ts_type_info.format_id); ++- // Show cc5 thing ++- for (ii = 0; ii < ci->atc_delta_count; ii++) { ++- indent_printf(level+1, "ATC delta[ %d ]", ii); ++- indent_printf(level+2, "Delta %08x", ci->atc_delta[ii].delta); ++- indent_printf(level+2, "File Id %s", ci->atc_delta[ii].file_id); ++- indent_printf(level+2, "File Code %s", ci->atc_delta[ii].file_code); ++- } ++- // show fonts ++- if (cl->font_info.font_count) { ++- indent_printf(level+1, "Font files"); ++- for (ii = 0; ii < cl->font_info.font_count; ii++) { ++- indent_printf(level+2, "Font file %d: %s.otf", ii+1, cl->font_info.font[ii].file_id); ++- } ++- } ++- ++- printf("\n"); ++-} ++- ++-static void ++-_show_seq_info(CLPI_SEQ_INFO *si, int level) ++-{ ++- CLPI_ATC_SEQ *atc; ++- CLPI_STC_SEQ *stc; ++- int ii, jj; ++- ++- indent_printf(level, "Sequence Info"); ++- indent_printf(level+1, "Number ATC Sequences: %d", si->num_atc_seq); ++- for (ii = 0; ii < si->num_atc_seq; ii++) { ++- atc = &si->atc_seq[ii]; ++- indent_printf(level+1, "ATC Sequence %d", ii); ++- indent_printf(level+2, "SPN ATC Start: %u", atc->spn_atc_start); ++- indent_printf(level+2, "Offset STC Id: %d", atc->offset_stc_id); ++- indent_printf(level+2, "Number STC Sequences: %d", atc->num_stc_seq); ++- for (jj = 0; jj < atc->num_stc_seq; jj++) { ++- stc = &atc->stc_seq[jj]; ++- indent_printf(level+2, "ATC Sequence %d", jj); ++- indent_printf(level+3, "SPN STC Start: %u", stc->spn_stc_start); ++- indent_printf(level+3, "PCR PID: %04x", stc->pcr_pid); ++- indent_printf(level+3, "Presentation Start: %u", ++- stc->presentation_start_time); ++- indent_printf(level+3, "Presentation End: %u", ++- stc->presentation_end_time); ++- } ++- } ++-} ++- ++-static void ++-_show_prog_info(CLPI_PROG_INFO *pi, int level) ++-{ ++- CLPI_PROG *prog; ++- int ii, jj; ++- ++- indent_printf(level, "Program Info"); ++- indent_printf(level+1, "Number Programs: %d", pi->num_prog); ++- for (ii = 0; ii < pi->num_prog; ii++) { ++- prog = &pi->progs[ii]; ++- indent_printf(level+1, "Program %d", ii); ++- indent_printf(level+2, "SPN Program Sequence Start: %d", ++- prog->spn_program_sequence_start); ++- indent_printf(level+2, "Program Map PID: %d", prog->program_map_pid); ++- indent_printf(level+2, "Number Streams: %d", prog->num_streams); ++- indent_printf(level+2, "Number Groups: %d", prog->num_groups); ++- for (jj = 0; jj < prog->num_streams; jj++) { ++- indent_printf(level+2, "Stream %d", jj); ++- _show_stream(&prog->streams[jj], level+3); ++- } ++- } ++-} ++- ++-static void ++-_show_extent_start(CLPI_EXTENT_START *es, int level) ++-{ ++- unsigned int ii; ++- ++- indent_printf(level, "Extension data: Extent Start Point"); ++- ++- if (!es->num_point) { ++- indent_printf(level+1, "(no data)"); ++- ++- } else { ++- indent_printf(level+1, "Number of Start Points: %d", es->num_point); ++- ++- if (verbose) { ++- for (ii = 0; ii < es->num_point; ii++) { ++- indent_printf(level+1, "Extent %5d: SPN 0x%08X", ii, es->point[ii]); ++- } ++- } ++- } ++-} ++- ++-static void ++-_show_cpi_info(CLPI_CPI *cpi, int level) ++-{ ++- CLPI_EP_MAP_ENTRY *entry; ++- CLPI_EP_COARSE *coarse; ++- CLPI_EP_FINE *fine; ++- int ii, jj, kk; ++- ++- indent_printf(level, "CPI"); ++- indent_printf(level+1, "Number Stream PID: %d", cpi->num_stream_pid); ++- for (ii = 0; ii < cpi->num_stream_pid; ii++) { ++- entry = &cpi->entry[ii]; ++- indent_printf(level+1, "Stream: %d", ii); ++- indent_printf(level+2, "PID: %04x", entry->pid); ++- indent_printf(level+2, "EP Stream Type: %d", entry->ep_stream_type); ++- indent_printf(level+2, "Number EP Coarse: %d", entry->num_ep_coarse); ++- indent_printf(level+2, "Number EP Fine: %d", entry->num_ep_fine); ++- indent_printf(level+2, "EP Map Start: %d", ++- entry->ep_map_stream_start_addr); ++- for (jj = 0; jj < entry->num_ep_coarse; jj++) { ++- coarse = &entry->coarse[jj]; ++- indent_printf(level+2, "Coarse: %d", jj); ++- indent_printf(level+3, "Ref EP Fine: %d", coarse->ref_ep_fine_id); ++- indent_printf(level+3, "PTS EP: %d", coarse->pts_ep); ++- indent_printf(level+3, "SPN EP: %d", coarse->spn_ep); ++- } ++- for (jj = 0; jj < entry->num_ep_fine; jj++) { ++- fine = &entry->fine[jj]; ++- indent_printf(level+2, "Fine: %d", jj); ++- indent_printf(level+3, "Angle Change Point: %s", ++- fine->is_angle_change_point ? "True":"False"); ++- indent_printf(level+3, "I End Offset: %d", ++- fine->i_end_position_offset); ++- indent_printf(level+3, "PTS EP: %d", fine->pts_ep); ++- indent_printf(level+3, "SPN EP: %d", fine->spn_ep); ++- } ++- if (verbose) { ++- uint64_t pts; ++- uint32_t spn; ++- ++- indent_printf(level+2, "PTS - SPN Map"); ++- for (jj = 0; jj < entry->num_ep_coarse; jj++) { ++- int start, end; ++- ++- indent_printf(level+3, "Coarse: %d", jj); ++- coarse = &entry->coarse[jj]; ++- start = coarse->ref_ep_fine_id; ++- if (jj < entry->num_ep_coarse - 1) { ++- end = entry->coarse[jj+1].ref_ep_fine_id; ++- } else { ++- end = entry->num_ep_fine; ++- } ++- for (kk = start; kk < end; kk++) { ++- fine = &entry->fine[kk]; ++- pts = ((uint64_t) (coarse->pts_ep & ~0x01) << 19) + ++- ((uint64_t)fine->pts_ep << 9); ++- spn = (coarse->spn_ep & ~0x1FFFF) + fine->spn_ep; ++- indent_printf(level+4, "PTS %8"PRIu64"/%8"PRIu64" -- SPN %u", ++- pts, pts >> 1, spn); ++- } ++- } ++- } ++- } ++-} ++- ++- ++-static void ++-_usage(char *cmd) ++-{ ++- fprintf(stderr, ++-"Usage: %s -vcspi <clpi file> [<clpi file> ...]\n" ++-"With no options, produces no output (not very useful)\n" ++-"Options:\n" ++-" v - Verbose output.\n" ++-" c - Shows the Clip Info structure\n" ++-" s - Shows the Sequence Info structure\n" ++-" p - Shows the Program Info structure\n" ++-" i - Shows the CPI. PTS to SPN map\n" ++-" e - Shows Extent Start Table\n" ++-, cmd); ++- ++- exit(EXIT_FAILURE); ++-} ++- ++-#define OPTS "vcspie" ++- ++-int ++-main(int argc, char *argv[]) ++-{ ++- CLPI_CL *cl; ++- int opt; ++- int opt_clip_info = 0, opt_seq_info = 0, opt_prog_info = 0; ++- int opt_cpi_info = 0, opt_extent_start = 0; ++- int ii; ++- ++- do { ++- opt = getopt(argc, argv, OPTS); ++- switch (opt) { ++- case -1: break; ++- ++- case 'v': ++- verbose = 1; ++- break; ++- ++- case 's': ++- opt_seq_info = 1; ++- break; ++- ++- case 'i': ++- opt_cpi_info = 1; ++- break; ++- ++- case 'c': ++- opt_clip_info = 1; ++- break; ++- ++- case 'p': ++- opt_prog_info = 1; ++- break; ++- ++- case 'e': ++- opt_extent_start = 1; ++- break; ++- ++- default: ++- _usage(argv[0]); ++- break; ++- } ++- } while (opt != -1); ++- ++- if (optind >= argc) { ++- _usage(argv[0]); ++- } ++- ++- for (ii = optind; ii < argc; ii++) { ++- cl = bd_read_clpi(argv[ii]); ++- if (cl == NULL) { ++- fprintf(stderr, "Parsing %s failed\n", argv[ii]); ++- continue; ++- } ++- if (opt_clip_info) { ++- // Show clip info ++- _show_clip_info(cl, 1); ++- } ++- if (opt_seq_info) { ++- // Show sequence info ++- _show_seq_info(&cl->sequence, 1); ++- } ++- if (opt_prog_info) { ++- // Show program info ++- _show_prog_info(&cl->program, 1); ++- } ++- if (opt_cpi_info) { ++- // Show cpi ++- _show_cpi_info(&cl->cpi, 1); ++- } ++- ++- if (opt_prog_info) { ++- if (cl->program_ss.num_prog) { ++- printf("\n"); ++- indent_printf(1, "Extension: Program Info SS"); ++- _show_prog_info(&cl->program_ss, 1); ++- } ++- } ++- if (opt_cpi_info) { ++- if (cl->program_ss.num_prog) { ++- printf("\n"); ++- indent_printf(1, "Extension: CPI SS"); ++- _show_cpi_info(&cl->cpi_ss, 1); ++- } ++- } ++- if (opt_extent_start) { ++- // Show extent start point ++- if (cl->extent_start.num_point > 0) { ++- _show_extent_start(&cl->extent_start, 1); ++- } ++- } ++- ++- bd_free_clpi(cl); ++- } ++- return 0; ++-} ++- ++diff --git a/src/examples/hdmv_test.c b/src/examples/hdmv_test.c ++deleted file mode 100644 ++index 585ed70..0000000 +++++ /dev/null ++@@ -1,257 +0,0 @@ ++-/* ++- * This file is part of libbluray ++- * Copyright (C) 2010 hpi1 ++- * ++- * This library is free software; you can redistribute it and/or ++- * modify it under the terms of the GNU Lesser General Public ++- * License as published by the Free Software Foundation; either ++- * version 2.1 of the License, or (at your option) any later version. ++- * ++- * This library is distributed in the hope that it will be useful, ++- * but WITHOUT ANY WARRANTY; without even the implied warranty of ++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++- * Lesser General Public License for more details. ++- * ++- * You should have received a copy of the GNU Lesser General Public ++- * License along with this library. If not, see ++- * <http://www.gnu.org/licenses/>. ++- */ ++- ++-#include <stdio.h> ++-#include <stdlib.h> ++-#include <string.h> ++-#include <inttypes.h> ++- ++-#include "util/log_control.h" ++-#include "libbluray/bluray.h" ++- ++-static void _print_event(BD_EVENT *ev) ++-{ ++- switch (ev->event) { ++- case BD_EVENT_NONE: ++- break; ++- case BD_EVENT_ERROR: ++- printf("EVENT_ERROR:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_READ_ERROR: ++- printf("EVENT_READ_ERROR:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_ENCRYPTED: ++- printf("EVENT_ENCRYPTED:\t%d\n", ev->param); ++- break; ++- ++- /* current playback position */ ++- ++- case BD_EVENT_ANGLE: ++- printf("EVENT_ANGLE:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_TITLE: ++- printf("EVENT_TITLE:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_PLAYLIST: ++- printf("EVENT_PLAYLIST:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_PLAYITEM: ++- printf("EVENT_PLAYITEM:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_CHAPTER: ++- printf("EVENT_CHAPTER:\t%d\n", ev->param); ++- break; ++- ++- /* */ ++- ++- case BD_EVENT_STILL: ++- printf("EVENT_STILL:\t%d\n", ev->param); ++- break; ++- ++- case BD_EVENT_SEEK: ++- printf("EVENT_SEEK:\t%d\n", ev->param); ++- break; ++- ++- case BD_EVENT_STILL_TIME: ++- if (ev->param) { ++- printf("EVENT_STILL_TIME:\t%d\n", ev->param); ++- } else { ++- printf("EVENT_STILL_TIME:\tinfinite\n"); ++- } ++- break; ++- ++- /* stream selection */ ++- ++- case BD_EVENT_AUDIO_STREAM: ++- printf("EVENT_AUDIO_STREAM:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_IG_STREAM: ++- printf("EVENT_IG_STREAM:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_PG_TEXTST_STREAM: ++- printf("EVENT_PG_TEXTST_STREAM:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_SECONDARY_AUDIO_STREAM: ++- printf("EVENT_SECONDARY_AUDIO_STREAM:\t%d\n", ev->param); ++- break; ++- case BD_EVENT_SECONDARY_VIDEO_STREAM: ++- printf("EVENT_SECONDARY_VIDEO_STREAM:\t%d\n", ev->param); ++- break; ++- ++- case BD_EVENT_PG_TEXTST: ++- printf("EVENT_PG_TEXTST:\t%s\n", ev->param ? "enable" : "disable"); ++- break; ++- case BD_EVENT_SECONDARY_AUDIO: ++- printf("EVENT_SECONDARY_AUDIO:\t%s\n", ev->param ? "enable" : "disable"); ++- break; ++- case BD_EVENT_SECONDARY_VIDEO: ++- printf("EVENT_SECONDARY_VIDEO:\t%s\n", ev->param ? "enable" : "disable"); ++- break; ++- case BD_EVENT_SECONDARY_VIDEO_SIZE: ++- printf("EVENT_SECONDARY_VIDEO_SIZE:\t%s\n", ev->param==0 ? "PIP" : "fullscreen"); ++- break; ++- ++- default: ++- printf("UNKNOWN EVENT %d:\t%d\n", ev->event, ev->param); ++- break; ++- } ++- ++- fflush(stdout); ++-} ++- ++-static void _read_to_eof(BLURAY *bd) ++-{ ++- BD_EVENT ev; ++- int bytes; ++- uint64_t total = 0; ++- uint8_t buf[6144]; ++- ++- bd_seek(bd, bd_get_title_size(bd) - 6144); ++- ++- do { ++- bytes = bd_read_ext(bd, buf, 6144, &ev); ++- total += bytes < 0 ? 0 : bytes; ++- _print_event(&ev); ++- } while (bytes > 0); ++- ++- printf("_read_to_eof(): read %"PRIu64" bytes\n", total); ++-} ++- ++-static void _print_events(BLURAY *bd) ++-{ ++- BD_EVENT ev; ++- ++- do { ++- bd_read_ext(bd, NULL, 0, &ev); ++- _print_event(&ev); ++- } while (ev.event != BD_EVENT_NONE && ev.event != BD_EVENT_ERROR); ++-} ++- ++-static void _play_pl(BLURAY *bd) ++-{ ++- printf("Playing playlist\n"); ++- ++- fflush(stdout); ++- _read_to_eof(bd); ++- ++- printf("Playing playlist done\n\n"); ++- ++- _print_events(bd); ++- ++- printf("\n"); ++-} ++- ++-int main(int argc, char *argv[]) ++-{ ++- int title = -1; ++- int verbose = 0; ++- int args = 0; ++- ++- /* ++- * parse arguments ++- */ ++- ++- if (argc < 2) { ++- printf("\nUsage:\n %s [-v] [-t <title>] <media_path> [<keyfile_path>]\n\n", argv[0]); ++- return -1; ++- } ++- ++- if (!strcmp(argv[1+args], "-v")) { ++- verbose = 1; ++- args++; ++- } ++- ++- if (!strcmp(argv[1+args], "-t")) { ++- args++; ++- title = atoi(argv[1+args]); ++- args++; ++- printf("Requested title %d\n", title); ++- } ++- ++- if (verbose) { ++- printf("Enabling verbose debug\n"); ++- bd_set_debug_mask(bd_get_debug_mask() | DBG_HDMV | DBG_BLURAY); ++- } ++- ++- printf("\n"); ++- ++- /* ++- * open and setup ++- */ ++- ++- BLURAY *bd = bd_open(argv[1+args], argv[2+args]); ++- ++- if (!bd) { ++- printf("bd_open(\'%s\') failed\n", argv[1]); ++- return -1; ++- } ++- ++- bd_set_player_setting (bd, BLURAY_PLAYER_SETTING_PARENTAL, 99); ++- bd_set_player_setting_str(bd, BLURAY_PLAYER_SETTING_AUDIO_LANG, "eng"); ++- bd_set_player_setting_str(bd, BLURAY_PLAYER_SETTING_PG_LANG, "eng"); ++- bd_set_player_setting_str(bd, BLURAY_PLAYER_SETTING_MENU_LANG, "eng"); ++- bd_set_player_setting_str(bd, BLURAY_PLAYER_SETTING_COUNTRY_CODE, NULL); ++- ++- /* ++- * play ++- */ ++- ++- printf("Running first play movie object\n"); ++- ++- fflush(stdout); ++- bd_play(bd); ++- ++- _print_events(bd); ++- ++- printf("\n"); ++- ++- /* ++- * play title ++- */ ++- ++- if (title >= 0) { ++- printf("Playing title %d\n", title); ++- ++- fflush(stdout); ++- bd_play_title(bd, title); ++- ++- _print_events(bd); ++- ++- printf("\n"); ++- } ++- ++- /* ++- * play playlist ++- */ ++- ++- _play_pl(bd); ++- ++- _play_pl(bd); ++- ++- _play_pl(bd); ++- ++- /* ++- * clean up ++- */ ++- ++- bd_close(bd); ++- ++- return 0; ++-} ++- ++diff --git a/src/examples/list_titles.c b/src/examples/list_titles.c ++index 2e0cae8..768f078 100644 ++--- a/src/examples/list_titles.c +++++ b/src/examples/list_titles.c ++@@ -78,7 +78,10 @@ int main(int argc, char *argv[]) ++ _usage(argv[0]); ++ } ++ bd = bd_open(bd_dir, NULL); ++- +++ if (!bd) { +++ fprintf(stderr, "bd_open(%s) failed\n", bd_dir); +++ exit(EXIT_FAILURE); +++ } ++ count = bd_get_titles(bd, flags, seconds); ++ main_title = bd_get_main_title(bd); ++ if (main_title >= 0) { ++@@ -89,7 +92,7 @@ int main(int argc, char *argv[]) ++ BLURAY_TITLE_INFO* ti; ++ ti = bd_get_title_info(bd, ii, 0); ++ printf( ++- "index: %d duration: %02"PRIu64":%02"PRIu64":%02"PRIu64" chapters: %3d angles: %2u clips: %3u (playlist: %05d.mpls) " +++ "index: %3d duration: %02"PRIu64":%02"PRIu64":%02"PRIu64" chapters: %3d angles: %2u clips: %3u (playlist: %05d.mpls) " ++ "V:%d A:%-2d PG:%-2d IG:%-2d SV:%d SA:%d\n", ++ ii + 1, ++ (ti->duration / 90000) / (3600), ++diff --git a/src/examples/mobj_dump.c b/src/examples/mobj_dump.c ++deleted file mode 100644 ++index 3eaf9f4..0000000 +++++ /dev/null ++@@ -1,83 +0,0 @@ ++-/* ++- * This file is part of libbluray ++- * Copyright (C) 2010 hpi1 ++- * ++- * This library is free software; you can redistribute it and/or ++- * modify it under the terms of the GNU Lesser General Public ++- * License as published by the Free Software Foundation; either ++- * version 2.1 of the License, or (at your option) any later version. ++- * ++- * This library is distributed in the hope that it will be useful, ++- * but WITHOUT ANY WARRANTY; without even the implied warranty of ++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++- * Lesser General Public License for more details. ++- * ++- * You should have received a copy of the GNU Lesser General Public ++- * License along with this library. If not, see ++- * <http://www.gnu.org/licenses/>. ++- */ ++- ++-#include "libbluray/bluray.h" ++- ++-#include "libbluray/hdmv/mobj_data.h" ++-#include "libbluray/hdmv/mobj_print.h" ++- ++-#include <stdio.h> ++-#include <string.h> ++- ++-static void _mobj_print(MOBJ_OBJECTS *objects, int disasm) ++-{ ++- int o, c; ++- ++- printf("Number of objects: %d\n", objects->num_objects); ++- ++- for (o = 0; o < objects->num_objects; o++) { ++- ++- printf("Object %d:\n" ++- " number of commands: %d\n" ++- " resume intention flag: %d\n" ++- " menu call mask: %d\n" ++- " title search mask: %d\n", ++- o, objects->objects[o].num_cmds, ++- objects->objects[o].resume_intention_flag, ++- objects->objects[o].menu_call_mask, ++- objects->objects[o].title_search_mask); ++- ++- if (disasm) { ++- printf(" program:\n"); ++- for (c = 0; c < objects->objects[o].num_cmds; c++) { ++- char buf[256]; ++- mobj_sprint_cmd(buf, &objects->objects[o].cmds[c]); ++- printf(" %04d: %s\n", c, buf); ++- } ++- } ++- } ++-} ++- ++-int main(int argc, const char *argv[]) ++-{ ++- int disasm = 0; ++- MOBJ_OBJECTS *mobj = NULL; ++- ++- if (argc < 2) { ++- fprintf(stderr, ++- "usage: %s [-d] <file>\n" ++- "Options:\n" ++- " d disassemble object code\n", ++- argv[0]); ++- return 1; ++- } ++- if (argc > 2) { ++- disasm = !strcmp(argv[1], "-d"); ++- } ++- ++- mobj = bd_read_mobj(argv[argc-1]); ++- ++- if (mobj) { ++- _mobj_print(mobj, disasm); ++- ++- bd_free_mobj(mobj); ++- } ++- ++- return 0; ++-} ++diff --git a/src/examples/mpls_dump.c b/src/examples/mpls_dump.c ++deleted file mode 100644 ++index 619b6b2..0000000 +++++ /dev/null ++@@ -1,797 +0,0 @@ ++-/* ++- * This file is part of libbluray ++- * Copyright (C) 2009-2010 John Stebbins ++- * ++- * This library is free software; you can redistribute it and/or ++- * modify it under the terms of the GNU Lesser General Public ++- * License as published by the Free Software Foundation; either ++- * version 2.1 of the License, or (at your option) any later version. ++- * ++- * This library is distributed in the hope that it will be useful, ++- * but WITHOUT ANY WARRANTY; without even the implied warranty of ++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++- * Lesser General Public License for more details. ++- * ++- * You should have received a copy of the GNU Lesser General Public ++- * License along with this library. If not, see ++- * <http://www.gnu.org/licenses/>. ++- */ ++- ++-#include <sys/stat.h> ++-#include <dirent.h> ++-#include <stdio.h> ++-#include <stdlib.h> ++-#include <unistd.h> ++-#include <string.h> ++-#include <libgen.h> ++- ++-#include "libbluray/bdnav/mpls_parse.h" ++-#include "libbluray/bluray.h" ++- ++-#include "util.h" ++- ++-#ifdef _WIN32 ++-# define DIR_SEP "\\" ++-# define PLAYLIST_DIR "\\BDMV\\PLAYLIST" ++-#else ++-# define DIR_SEP "/" ++-# define PLAYLIST_DIR "/BDMV/PLAYLIST" ++-#endif ++- ++- ++-static int verbose; ++- ++-typedef struct { ++- int value; ++- const char *str; ++-} VALUE_MAP; ++- ++-const VALUE_MAP codec_map[] = { ++- {0x01, "MPEG-1 Video"}, ++- {0x02, "MPEG-2 Video"}, ++- {0x03, "MPEG-1 Audio"}, ++- {0x04, "MPEG-2 Audio"}, ++- {0x80, "LPCM"}, ++- {0x81, "AC-3"}, ++- {0x82, "DTS"}, ++- {0x83, "TrueHD"}, ++- {0x84, "AC-3 Plus"}, ++- {0x85, "DTS-HD"}, ++- {0x86, "DTS-HD Master"}, ++- {0xa1, "AC-3 Plus for secondary audio"}, ++- {0xa2, "DTS-HD for secondary audio"}, ++- {0xea, "VC-1"}, ++- {0x1b, "H.264"}, ++- {0x90, "Presentation Graphics"}, ++- {0x91, "Interactive Graphics"}, ++- {0x92, "Text Subtitle"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP video_format_map[] = { ++- {0, "Reserved"}, ++- {1, "480i"}, ++- {2, "576i"}, ++- {3, "480p"}, ++- {4, "1080i"}, ++- {5, "720p"}, ++- {6, "1080p"}, ++- {7, "576p"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP video_rate_map[] = { ++- {0, "Reserved1"}, ++- {1, "23.976"}, ++- {2, "24"}, ++- {3, "25"}, ++- {4, "29.97"}, ++- {5, "Reserved2"}, ++- {6, "50"}, ++- {7, "59.94"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP audio_format_map[] = { ++- {0, "Reserved1"}, ++- {1, "Mono"}, ++- {2, "Reserved2"}, ++- {3, "Stereo"}, ++- {4, "Reserved3"}, ++- {5, "Reserved4"}, ++- {6, "Multi Channel"}, ++- {12, "Combo"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP audio_rate_map[] = { ++- {0, "Reserved1"}, ++- {1, "48 Khz"}, ++- {2, "Reserved2"}, ++- {3, "Reserved3"}, ++- {4, "96 Khz"}, ++- {5, "192 Khz"}, ++- {12, "48/192 Khz"}, ++- {14, "48/96 Khz"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP subpath_type_map[] = { ++- {2, "Primary audio of the Browsable slideshow"}, ++- {3, "Interactive Graphics presentation menu"}, ++- {4, "Text Subtitle"}, ++- {5, "Out-of-mux Synchronous elementary streams"}, ++- {6, "Out-of-mux Asynchronous Picture-in-Picture presentation"}, ++- {7, "In-mux Synchronous Picture-in-Picture presentation"}, ++- {8, "SS Video"}, ++- {0,NULL} ++-}; ++- ++-const VALUE_MAP playback_type_map[] = { ++- {1, "Sequential"}, ++- {2, "Random"}, ++- {3, "Shuffle"}, ++- {0, NULL} ++-}; ++- ++-const VALUE_MAP connection_type_map[] = { ++- {1, "Non-seamless"}, ++- {5, "Seamless"}, ++- {6, "Seamless"}, ++- {0, NULL} ++-}; ++- ++-static const char* ++-_lookup_str(const VALUE_MAP *map, int val) ++-{ ++- int ii; ++- ++- for (ii = 0; map[ii].str; ii++) { ++- if (val == map[ii].value) { ++- return map[ii].str; ++- } ++- } ++- return "?"; ++-} ++- ++-static char * ++-_mk_path(const char *base, const char *sub) ++-{ ++- size_t n1 = strlen(base); ++- size_t n2 = strlen(sub); ++- char *result = (char*)malloc(n1 + n2 + strlen(DIR_SEP) + 1); ++- strcpy(result, base); ++- strcat(result, DIR_SEP); ++- strcat(result, sub); ++- ++- return result; ++-} ++- ++-static void ++-_show_stream(MPLS_STREAM *ss, int level) ++-{ ++- indent_printf(level, "Codec (%04x): %s", ss->coding_type, ++- _lookup_str(codec_map, ss->coding_type)); ++- switch (ss->stream_type) { ++- case 1: ++- indent_printf(level, "PID: %04x", ss->pid); ++- break; ++- ++- case 2: ++- case 4: ++- indent_printf(level, "SubPath Id: %02x", ss->subpath_id); ++- indent_printf(level, "SubClip Id: %02x", ss->subclip_id); ++- indent_printf(level, "PID: %04x", ss->pid); ++- break; ++- ++- case 3: ++- indent_printf(level, "SubPath Id: %02x", ss->subpath_id); ++- indent_printf(level, "PID: %04x", ss->pid); ++- break; ++- ++- default: ++- fprintf(stderr, "unrecognized stream type %02x\n", ss->stream_type); ++- break; ++- }; ++- ++- switch (ss->coding_type) { ++- case 0x01: ++- case 0x02: ++- case 0xea: ++- case 0x1b: ++- indent_printf(level, "Format %02x: %s", ss->format, ++- _lookup_str(video_format_map, ss->format)); ++- indent_printf(level, "Rate %02x: %s", ss->rate, ++- _lookup_str(video_rate_map, ss->rate)); ++- break; ++- ++- case 0x03: ++- case 0x04: ++- case 0x80: ++- case 0x81: ++- case 0x82: ++- case 0x83: ++- case 0x84: ++- case 0x85: ++- case 0x86: ++- case 0xa1: ++- case 0xa2: ++- indent_printf(level, "Format %02x: %s", ss->format, ++- _lookup_str(audio_format_map, ss->format)); ++- indent_printf(level, "Rate %02x: %s", ss->rate, ++- _lookup_str(audio_rate_map, ss->rate)); ++- indent_printf(level, "Language: %s", ss->lang); ++- break; ++- ++- case 0x90: ++- case 0x91: ++- indent_printf(level, "Language: %s", ss->lang); ++- break; ++- ++- case 0x92: ++- indent_printf(level, "Char Code: %02x", ss->char_code); ++- indent_printf(level, "Language: %s", ss->lang); ++- break; ++- ++- default: ++- fprintf(stderr, "unrecognized coding type %02x\n", ss->coding_type); ++- break; ++- }; ++-} ++- ++-static void ++-_show_details(MPLS_PL *pl, int level) ++-{ ++- int ii, jj, kk; ++- ++- for (ii = 0; ii < pl->list_count; ii++) { ++- MPLS_PI *pi; ++- ++- pi = &pl->play_item[ii]; ++- indent_printf(level, "Clip Id %s", pi->clip[0].clip_id); ++- indent_printf(level+1, "Stc Id: %02x", pi->clip[0].stc_id); ++- indent_printf(level+1, "Connection Condition: %s (%02x)", ++- _lookup_str(connection_type_map, pi->connection_condition), ++- pi->connection_condition); ++- indent_printf(level+1, "In-Time: %d", pi->in_time); ++- indent_printf(level+1, "Out-Time: %d", pi->out_time); ++- if (pi->still_mode == 1) { ++- indent_printf(level+1, "Still time: %ds\n", pi->still_time); ++- } ++- if (pi->still_mode == 2) { ++- indent_printf(level+1, "Still time: infinite\n"); ++- } ++- if (pi->angle_count > 1) { ++- for (jj = 1; jj < pi->angle_count; jj++) { ++- indent_printf(level+1, "Angle %d:", jj); ++- indent_printf(level+2, "Clip Id %s", pi->clip[jj].clip_id); ++- indent_printf(level+2, "Stc Id: %02x", pi->clip[jj].stc_id); ++- } ++- } ++- for (jj = 0; jj < pi->stn.num_video; jj++) { ++- indent_printf(level+1, "Video Stream %d:", jj); ++- _show_stream(&pi->stn.video[jj], level + 2); ++- } ++- for (jj = 0; jj < pi->stn.num_audio; jj++) { ++- indent_printf(level+1, "Audio Stream %d:", jj); ++- _show_stream(&pi->stn.audio[jj], level + 2); ++- } ++- for (jj = 0; jj < pi->stn.num_ig; jj++) { ++- indent_printf(level+1, "Interactive Graphics Stream %d:", jj); ++- _show_stream(&pi->stn.ig[jj], level + 2); ++- } ++- for (jj = 0; jj < (pi->stn.num_pg + pi->stn.num_pip_pg); jj++) { ++- if (jj < pi->stn.num_pg) { ++- indent_printf(level+1, "Presentation Graphics Stream %d:", jj); ++- } else { ++- indent_printf(level+1, "PIP Presentation Graphics Stream %d:", jj); ++- } ++- _show_stream(&pi->stn.pg[jj], level + 2); ++- } ++- for (jj = 0; jj < pi->stn.num_secondary_video; jj++) { ++- indent_printf(level+1, "Secondary Video Stream %d:", jj); ++- _show_stream(&pi->stn.secondary_video[jj], level + 2); ++- for (kk = 0; kk < pi->stn.secondary_video[jj].sv_num_secondary_audio_ref; kk++) { ++- indent_printf(level+2, "Secondary Audio Ref %d: %d", kk,pi->stn.secondary_video[jj].sv_secondary_audio_ref[kk]); ++- } ++- for (kk = 0; kk < pi->stn.secondary_video[jj].sv_num_pip_pg_ref; kk++) { ++- indent_printf(level+2, "PIP Presentation Graphic Ref %d: %d", kk,pi->stn.secondary_video[jj].sv_pip_pg_ref[kk]); ++- } ++- } ++- for (jj = 0; jj < pi->stn.num_secondary_audio; jj++) { ++- indent_printf(level+1, "Secondary Audio Stream %d:", jj); ++- _show_stream(&pi->stn.secondary_audio[jj], level + 2); ++- for (kk = 0; kk < pi->stn.secondary_audio[jj].sa_num_primary_audio_ref; kk++) { ++- indent_printf(level+2, "Primary Audio Ref %d: %d", kk,pi->stn.secondary_audio[jj].sa_primary_audio_ref[kk]); ++- } ++- } ++- printf("\n"); ++- } ++-} ++- ++-static void ++-_show_ai(MPLS_PL *pl, int level) ++-{ ++- indent_printf(level, "Playback type: %s (%d)", ++- _lookup_str(playback_type_map, pl->app_info.playback_type), ++- pl->app_info.playback_type); ++- if (pl->app_info.playback_type == 2 || pl->app_info.playback_type == 3) { ++- indent_printf(level+1, "Playback count: %d", pl->app_info.playback_count); ++- } ++-} ++- ++-static void ++-_show_marks(MPLS_PL *pl, int level) ++-{ ++- int ii; ++- ++- indent_printf(level, "PlayMark Count %d", pl->mark_count); ++- for (ii = 0; ii < pl->mark_count; ii++) { ++- MPLS_PI *pi; ++- MPLS_PLM *plm; ++- int min; ++- double sec; ++- ++- plm = &pl->play_mark[ii]; ++- indent_printf(level, "PlayMark %d", ii); ++- indent_printf(level+1, "Type: %02x", plm->mark_type); ++- if (plm->play_item_ref < pl->list_count) { ++- pi = &pl->play_item[plm->play_item_ref]; ++- indent_printf(level+1, "PlayItem: %s", pi->clip[0].clip_id); ++- } else { ++- indent_printf(level+1, "PlayItem: Invalid reference"); ++- } ++- indent_printf(level+1, "Time (ticks): %u", plm->time); ++- min = plm->duration / (45000*60); ++- sec = (double)(plm->duration - min * 45000 * 60) / 45000; ++- indent_printf(level+1, "Duration (mm:ss.ms, ticks): %d:%.2f, %u", ++- min, sec, plm->duration); ++- printf("\n"); ++- } ++-} ++- ++-static void ++-_show_clip_list(MPLS_PL *pl, int level) ++-{ ++- int ii, jj; ++- ++- for (ii = 0; ii < pl->list_count; ii++) { ++- MPLS_PI *pi; ++- ++- pi = &pl->play_item[ii]; ++- if (verbose) { ++- uint32_t duration; ++- ++- duration = pi->out_time - pi->in_time; ++- indent_printf(level, "%s.m2ts -- Duration: %3d:%02d", ++- pi->clip[0].clip_id, ++- duration / (45000 * 60), (duration / 45000) % 60); ++- } else { ++- indent_printf(level, "%s.m2ts", pi->clip[0].clip_id); ++- } ++- if (pi->angle_count > 1) { ++- for (jj = 1; jj < pi->angle_count; jj++) { ++- indent_printf(level+1, "Angle %d: %s.m2ts", jj+1, pi->clip[jj].clip_id); ++- } ++- } ++- } ++- printf("\n"); ++-} ++- ++-static void ++-_show_sub_path(MPLS_SUB *sub, int level) ++-{ ++- int ii; ++- ++- indent_printf(level+1, "Type: %d (%s)", sub->type, _lookup_str(subpath_type_map, sub->type)); ++- indent_printf(level+1, "Repeat: %d", sub->is_repeat); ++- indent_printf(level+1, "Sub playitem count: %d", sub->sub_playitem_count); ++- ++- for (ii = 0; ii < sub->sub_playitem_count; ii++) { ++- MPLS_SUB_PI *pi; ++- ++- pi = &sub->sub_play_item[ii]; ++- ++- if (verbose) { ++- indent_printf(level+1, "Sub playitem %d", ii); ++- indent_printf(level+2, "Clip Id %s", pi->clip[0].clip_id); ++- indent_printf(level+2, "Multi clip: %d", pi->is_multi_clip); ++- indent_printf(level+2, "Clip count: %d", pi->clip_count); ++- indent_printf(level+2, "Connection Condition: %s (%02x)", ++- _lookup_str(connection_type_map, pi->connection_condition), ++- pi->connection_condition); ++- indent_printf(level+2, "In-Time: %d", pi->in_time); ++- indent_printf(level+2, "Out-Time: %d", pi->out_time); ++- indent_printf(level+2, "Sync playitem Id: %d", pi->sync_play_item_id); ++- indent_printf(level+2, "Sync PTS: %d", pi->sync_pts); ++- } else { ++- indent_printf(level+1, "%s.m2ts", pi->clip[0].clip_id); ++- } ++- } ++-} ++- ++-static void ++-_show_pip_metadata_block(MPLS_PIP_METADATA *block, int level) ++-{ ++- int ii; ++- ++- indent_printf(level, "Clip ref: %d", block->clip_ref); ++- indent_printf(level, "Secondary video ref: %d", block->secondary_video_ref); ++- indent_printf(level, "Timeline type: %d", block->timeline_type); ++- indent_printf(level, "Luma key flag: %d", block->luma_key_flag); ++- if (block->luma_key_flag) { ++- indent_printf(level, "Upper limit luma key: %d", block->upper_limit_luma_key); ++- } ++- indent_printf(level, "Trick play flag: %d", block->trick_play_flag); ++- ++- for (ii = 0; ii < block->data_count; ii++) { ++- indent_printf(level, "data block %d:", ii); ++- indent_printf(level+1, "Timestamp: %d", block->data[ii].time); ++- indent_printf(level+1, "Horizontal position %d", block->data[ii].xpos); ++- indent_printf(level+1, "Vertical position: %d", block->data[ii].ypos); ++- indent_printf(level+1, "Scaling factor: %d", block->data[ii].scale_factor); ++- } ++-} ++- ++-static void ++-_show_pip_metadata(MPLS_PL *pl, int level) ++-{ ++- int ii; ++- ++- for (ii = 0; ii < pl->ext_pip_data_count; ii++) { ++- MPLS_PIP_METADATA *data; ++- ++- data = &pl->ext_pip_data[ii]; ++- ++- indent_printf(level, "PiP metadata block %d:", ii); ++- _show_pip_metadata_block(data, level+1); ++- } ++-} ++- ++-static void ++-_show_sub_paths(MPLS_PL *pl, int level) ++-{ ++- int ss; ++- ++- for (ss = 0; ss < pl->sub_count; ss++) { ++- MPLS_SUB *sub; ++- ++- sub = &pl->sub_path[ss]; ++- ++- indent_printf(level, "Sub Path %d:", ss); ++- _show_sub_path(sub, level+1); ++- } ++-} ++- ++-static void ++-_show_sub_paths_ss(MPLS_PL *pl, int level) ++-{ ++- int ss; ++- ++- for (ss = 0; ss < pl->ext_sub_count; ss++) { ++- MPLS_SUB *sub; ++- ++- sub = &pl->ext_sub_path[ss]; ++- ++- indent_printf(level, "Extension Sub Path %d:", ss); ++- _show_sub_path(sub, level+1); ++- } ++-} ++- ++-static uint32_t ++-_pl_duration(MPLS_PL *pl) ++-{ ++- int ii; ++- uint32_t duration = 0; ++- MPLS_PI *pi; ++- ++- for (ii = 0; ii < pl->list_count; ii++) { ++- pi = &pl->play_item[ii]; ++- duration += pi->out_time - pi->in_time; ++- } ++- return duration; ++-} ++- ++-static int ++-_filter_dup(MPLS_PL *pl_list[], int count, MPLS_PL *pl) ++-{ ++- int ii, jj; ++- ++- for (ii = 0; ii < count; ii++) { ++- if (pl->list_count != pl_list[ii]->list_count || ++- _pl_duration(pl) != _pl_duration(pl_list[ii])) { ++- continue; ++- } ++- for (jj = 0; jj < pl->list_count; jj++) { ++- MPLS_PI *pi1, *pi2; ++- ++- pi1 = &pl->play_item[jj]; ++- pi2 = &pl_list[ii]->play_item[jj]; ++- ++- if (memcmp(pi1->clip[0].clip_id, pi2->clip[0].clip_id, 5) != 0 || ++- pi1->in_time != pi2->in_time || ++- pi1->out_time != pi2->out_time) { ++- break; ++- } ++- } ++- if (jj != pl->list_count) { ++- continue; ++- } ++- return 0; ++- } ++- return 1; ++-} ++- ++-static int ++-_find_repeats(MPLS_PL *pl, const char *m2ts) ++-{ ++- int ii, count = 0; ++- ++- for (ii = 0; ii < pl->list_count; ii++) { ++- MPLS_PI *pi; ++- ++- pi = &pl->play_item[ii]; ++- // Ignore titles with repeated segments ++- if (strcmp(pi->clip[0].clip_id, m2ts) == 0) { ++- count++; ++- } ++- } ++- return count; ++-} ++- ++-static int ++-_filter_short(MPLS_PL *pl, unsigned int seconds) ++-{ ++- // Ignore short playlists ++- if (_pl_duration(pl) / 45000 <= seconds) { ++- return 0; ++- } ++- return 1; ++-} ++- ++-static int ++-_filter_repeats(MPLS_PL *pl, int repeats) ++-{ ++- int ii; ++- ++- for (ii = 0; ii < pl->list_count; ii++) { ++- MPLS_PI *pi; ++- ++- pi = &pl->play_item[ii]; ++- // Ignore titles with repeated segments ++- if (_find_repeats(pl, pi->clip[0].clip_id) > repeats) { ++- return 0; ++- } ++- } ++- return 1; ++-} ++- ++-static int clip_list = 0, playlist_info = 0, chapter_marks = 0, sub_paths = 0, pip_metadata = 0; ++-static int repeats = 0, seconds = 0, dups = 0; ++- ++-static MPLS_PL* ++-_process_file(char *name, MPLS_PL *pl_list[], int pl_count) ++-{ ++- MPLS_PL *pl; ++- ++- pl = bd_read_mpls(name); ++- if (pl == NULL) { ++- fprintf(stderr, "Parse failed: %s\n", name); ++- return NULL; ++- } ++- if (seconds) { ++- if (!_filter_short(pl, seconds)) { ++- bd_free_mpls(pl); ++- return NULL; ++- } ++- } ++- if (repeats) { ++- if (!_filter_repeats(pl, repeats)) { ++- bd_free_mpls(pl); ++- return NULL; ++- } ++- } ++- if (dups) { ++- if (!_filter_dup(pl_list, pl_count, pl)) { ++- bd_free_mpls(pl); ++- return NULL; ++- } ++- } ++- if (verbose) { ++- indent_printf(0, ++- "%s -- Num Clips: %3d , Duration: minutes %4u:%02u", ++- basename(name), ++- pl->list_count, ++- _pl_duration(pl) / (45000 * 60), ++- (_pl_duration(pl) / 45000) % 60); ++- _show_ai(pl, 1); ++- } else { ++- indent_printf(0, "%s -- Duration: minutes %4u:%02u", ++- basename(name), ++- _pl_duration(pl) / (45000 * 60), ++- (_pl_duration(pl) / 45000) % 60); ++- } ++- if (playlist_info) { ++- _show_details(pl, 1); ++- } ++- if (chapter_marks) { ++- _show_marks(pl, 1); ++- } ++- if (pip_metadata) { ++- _show_pip_metadata(pl, 1); ++- } ++- if (clip_list) { ++- _show_clip_list(pl, 1); ++- } ++- if (sub_paths) { ++- _show_sub_paths(pl, 1); ++- _show_sub_paths_ss(pl, 1); ++- } ++- return pl; ++-} ++- ++-static void ++-_usage(char *cmd) ++-{ ++- fprintf(stderr, ++-"Usage: %s -vli <mpls file> [<mpls file> ...]\n" ++-"With no options, produces a list of the playlist(s) with durations\n" ++-"Options:\n" ++-" v - Verbose output.\n" ++-" l - Produces a list of the m2ts clips\n" ++-" i - Dumps detailed information about each clip\n" ++-" c - Show chapter marks\n" ++-" p - Show sub paths\n" ++-" P - Show picture-in-picture metadata\n" ++-" r <N> - Filter out titles that have >N repeating clips\n" ++-" d - Filter out duplicate titles\n" ++-" s <seconds> - Filter out short titles\n" ++-" f - Filter combination -r2 -d -s900\n" ++-, cmd); ++- ++- exit(EXIT_FAILURE); ++-} ++- ++-#define OPTS "vlicpPfr:ds:" ++- ++-static int ++-_qsort_str_cmp(const void *a, const void *b) ++-{ ++- const char *stra = *(char * const *)a; ++- const char *strb = *(char * const *)b; ++- ++- return strcmp(stra, strb); ++-} ++- ++-int ++-main(int argc, char *argv[]) ++-{ ++- MPLS_PL *pl; ++- int opt; ++- int ii, pl_ii; ++- MPLS_PL *pl_list[1000]; ++- struct stat st; ++- char *path = NULL; ++- DIR *dir = NULL; ++- ++- do { ++- opt = getopt(argc, argv, OPTS); ++- switch (opt) { ++- case -1: ++- break; ++- ++- case 'v': ++- verbose = 1; ++- break; ++- ++- case 'l': ++- clip_list = 1; ++- break; ++- ++- case 'i': ++- playlist_info = 1; ++- break; ++- ++- case 'c': ++- chapter_marks = 1; ++- break; ++- ++- case 'p': ++- sub_paths = 1; ++- break; ++- ++- case 'P': ++- pip_metadata = 1; ++- break; ++- ++- case 'd': ++- dups = 1; ++- break; ++- ++- case 'r': ++- repeats = atoi(optarg); ++- break; ++- ++- case 'f': ++- repeats = 2; ++- dups = 1; ++- seconds = 900; ++- break; ++- ++- case 's': ++- seconds = atoi(optarg); ++- break; ++- ++- default: ++- _usage(argv[0]); ++- break; ++- } ++- } while (opt != -1); ++- ++- if (optind >= argc) { ++- _usage(argv[0]); ++- } ++- ++- for (pl_ii = 0, ii = optind; pl_ii < 1000 && ii < argc; ii++) { ++- ++- if (stat(argv[ii], &st)) { ++- continue; ++- } ++- dir = NULL; ++- if (S_ISDIR(st.st_mode)) { ++- ++- printf("Directory: %s:\n", argv[ii]); ++- path = _mk_path(argv[ii], PLAYLIST_DIR); ++- if (path == NULL) { ++- fprintf(stderr, "Failed to find playlist path: %s\n", argv[ii]); ++- continue; ++- } ++- dir = opendir(path); ++- if (dir == NULL) { ++- fprintf(stderr, "Failed to open dir: %s\n", path); ++- free(path); ++- continue; ++- } ++- } ++- if (dir != NULL) { ++- char **dirlist = (char**)calloc(10001, sizeof(char*)); ++- struct dirent *ent; ++- int jj = 0; ++- for (ent = readdir(dir); ent != NULL; ent = readdir(dir)) { ++- dirlist[jj++] = strcpy((char*)malloc(strlen(ent->d_name)), ent->d_name); ++- } ++- qsort(dirlist, jj, sizeof(char*), _qsort_str_cmp); ++- for (jj = 0; dirlist[jj] != NULL; jj++) { ++- char *name = NULL; ++- name = _mk_path(path, dirlist[jj]); ++- free(dirlist[jj]); ++- if (stat(name, &st)) { ++- free(name); ++- continue; ++- } ++- if (!S_ISREG(st.st_mode)) { ++- free(name); ++- continue; ++- } ++- pl = _process_file(name, pl_list, pl_ii); ++- free(name); ++- if (pl != NULL) { ++- pl_list[pl_ii++] = pl; ++- } ++- } ++- free(dirlist); ++- free(path); ++- } else { ++- pl = _process_file(argv[ii], pl_list, pl_ii); ++- if (pl != NULL) { ++- pl_list[pl_ii++] = pl; ++- } ++- } ++- } ++- // Cleanup ++- for (ii = 0; ii < pl_ii; ii++) { ++- bd_free_mpls(pl_list[ii]); ++- } ++- return 0; ++-} ++- ++diff --git a/src/examples/util.c b/src/examples/util.c ++deleted file mode 100644 ++index aaa4c46..0000000 +++++ /dev/null ++@@ -1,40 +0,0 @@ ++-/* ++- * This file is part of libbluray ++- * Copyright (C) 2009-2010 John Stebbins ++- * ++- * This library is free software; you can redistribute it and/or ++- * modify it under the terms of the GNU Lesser General Public ++- * License as published by the Free Software Foundation; either ++- * version 2.1 of the License, or (at your option) any later version. ++- * ++- * This library is distributed in the hope that it will be useful, ++- * but WITHOUT ANY WARRANTY; without even the implied warranty of ++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++- * Lesser General Public License for more details. ++- * ++- * You should have received a copy of the GNU Lesser General Public ++- * License along with this library. If not, see ++- * <http://www.gnu.org/licenses/>. ++- */ ++- ++-#include <stdio.h> ++-#include <stdarg.h> ++- ++-#include "util.h" ++- ++-void ++-indent_printf(int level, const char *fmt, ...) ++-{ ++- va_list ap; ++- int ii; ++- ++- for (ii = 0; ii < level; ii++) ++- { ++- printf(" "); ++- } ++- va_start(ap, fmt); ++- vprintf(fmt, ap); ++- va_end(ap); ++- printf("\n"); ++-} ++- ++diff --git a/src/examples/util.h b/src/examples/util.h ++deleted file mode 100644 ++index 144f8ec..0000000 +++++ /dev/null ++@@ -1,43 +0,0 @@ ++-/* ++- * This file is part of libbluray ++- * Copyright (C) 2009-2010 John Stebbins ++- * ++- * This library is free software; you can redistribute it and/or ++- * modify it under the terms of the GNU Lesser General Public ++- * License as published by the Free Software Foundation; either ++- * version 2.1 of the License, or (at your option) any later version. ++- * ++- * This library is distributed in the hope that it will be useful, ++- * but WITHOUT ANY WARRANTY; without even the implied warranty of ++- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++- * Lesser General Public License for more details. ++- * ++- * You should have received a copy of the GNU Lesser General Public ++- * License along with this library. If not, see ++- * <http://www.gnu.org/licenses/>. ++- */ ++- ++-#include <stdint.h> ++- ++-#include "util/attributes.h" ++- ++-#if defined( __MINGW32__ ) ++-# undef lseek ++-# define lseek _lseeki64 ++-# undef fseeko ++-# define fseeko fseeko64 ++-# undef ftello ++-# define ftello ftello64 ++-# define flockfile(...) ++-# define funlockfile(...) ++-# define getc_unlocked getc ++-# undef off_t ++-# define off_t off64_t ++-# undef stat ++-# define stat _stati64 ++-# define fstat _fstati64 ++-# define wstat _wstati64 ++-#endif ++- ++-void indent_printf(int level, const char *fmt, ...) BD_ATTR_FORMAT_PRINTF(2,3); ++- ++diff --git a/src/file/dir_win32.c b/src/file/dir_win32.c ++index 2690658..f42114d 100644 ++--- a/src/file/dir_win32.c +++++ b/src/file/dir_win32.c ++@@ -86,8 +86,8 @@ static BD_DIR_H *_dir_open_win32(const char* dirname) ++ ++ dir->internal = priv; ++ ++- wchar_t wfilespec[MAX_PATH]; ++- if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, filespec, -1, wfilespec, MAX_PATH)) +++ wchar_t wfilespec[4096 + 1] = {0}; +++ if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, filespec, -1, wfilespec, 4096)) ++ priv->handle = _wfindfirst(wfilespec, &priv->info); ++ else ++ priv->handle = -1; ++diff --git a/src/file/dirs_win32.c b/src/file/dirs_win32.c ++index 5279ea5..3d07251 100644 ++--- a/src/file/dirs_win32.c +++++ b/src/file/dirs_win32.c ++@@ -36,10 +36,18 @@ ++ ++ char *win32_get_font_dir(const char *font_file) ++ { ++- wchar_t wdir[MAX_PATH]; +++ wchar_t wdir[MAX_PATH+1] = {0}; ++ if (S_OK != SHGetFolderPathW(NULL, CSIDL_FONTS, NULL, SHGFP_TYPE_CURRENT, wdir)) { ++- GetWindowsDirectoryW(wdir, MAX_PATH); ++- wcscat(wdir, L"\\fonts"); +++ int lenght = GetWindowsDirectoryW(wdir, MAX_PATH); +++ if (lenght == 0 || lenght > (MAX_PATH - 8)) { +++ BD_DEBUG(DBG_FILE, "Font directory path too long!\n"); +++ return NULL; +++ } +++ if (!wcscat(wdir, L"\\fonts")) { +++ BD_DEBUG(DBG_FILE, "Could not construct font directory path!\n"); +++ return NULL; +++ } +++ ++ } ++ ++ int len = WideCharToMultiByte (CP_UTF8, 0, wdir, -1, NULL, 0, NULL, NULL); ++@@ -59,7 +67,7 @@ char *file_get_config_home(void) ++ ++ char *file_get_data_home(void) ++ { ++- wchar_t wdir[MAX_PATH]; +++ wchar_t wdir[MAX_PATH+1] = {0}; ++ ++ /* Get the "Application Data" folder for the user */ ++ if (S_OK == SHGetFolderPathW(NULL, CSIDL_APPDATA | CSIDL_FLAG_CREATE, ++@@ -84,7 +92,7 @@ char *file_get_cache_home(void) ++ const char *file_get_config_system(const char *dir) ++ { ++ static char *appdir = NULL; ++- wchar_t wdir[MAX_PATH]; +++ wchar_t wdir[MAX_PATH+1] = {0}; ++ ++ if (!dir) { ++ // first call ++diff --git a/src/file/dl_win32.c b/src/file/dl_win32.c ++index e4492e1..7f6ae73 100644 ++--- a/src/file/dl_win32.c +++++ b/src/file/dl_win32.c ++@@ -57,7 +57,7 @@ void *dl_dlopen(const char *path, const char *version) ++ { ++ (void)version; ++ ++- wchar_t wname[MAX_PATH]; +++ wchar_t wname[MAX_PATH+1] = {0}; ++ char *name; ++ void *result; ++ ++@@ -109,7 +109,7 @@ const char *dl_get_path(void) ++ if (!initialized) { ++ initialized = 1; ++ ++- static char path[MAX_PATH]; +++ static char path[MAX_PATH + 1]; ++ HMODULE hModule; ++ wchar_t wpath[MAX_PATH]; ++ ++diff --git a/src/file/file.c b/src/file/file.c ++index 15edfe0..2f85248 100644 ++--- a/src/file/file.c +++++ b/src/file/file.c ++@@ -52,6 +52,10 @@ int file_mkdirs(const char *path) ++ char *end = dir; ++ char *p; ++ +++ if (!dir) { +++ return -1; +++ } +++ ++ /* strip file name */ ++ if (!(end = strrchr(end, DIR_SEP_CHAR))) { ++ X_FREE(dir); ++diff --git a/src/file/file_posix.c b/src/file/file_posix.c ++index 753a8ce..2a79f6f 100644 ++--- a/src/file/file_posix.c +++++ b/src/file/file_posix.c ++@@ -38,6 +38,13 @@ ++ #include <sys/stat.h> ++ #include <fcntl.h> ++ +++#ifdef __ANDROID__ +++# undef lseek +++# define lseek lseek64 +++# undef off_t +++# define off_t off64_t +++#endif +++ ++ static void _file_close(BD_FILE_H *file) ++ { ++ if (file) { ++diff --git a/src/file/file_win32.c b/src/file/file_win32.c ++index 5eb52d7..c0f48e4 100644 ++--- a/src/file/file_win32.c +++++ b/src/file/file_win32.c ++@@ -97,9 +97,9 @@ static BD_FILE_H *_file_open(const char* filename, const char *mode) ++ { ++ BD_FILE_H *file; ++ FILE *fp; ++- wchar_t wfilename[MAX_PATH], wmode[8]; +++ wchar_t wfilename[4096 + 1] = {0}, wmode[8] = {0}; ++ ++- if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, filename, -1, wfilename, MAX_PATH) || +++ if (!MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, filename, -1, wfilename, 4096) || ++ !MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, mode, -1, wmode, 8)) { ++ ++ BD_DEBUG(DBG_FILE, "Error opening file %s\n", filename); ++@@ -112,6 +112,9 @@ static BD_FILE_H *_file_open(const char* filename, const char *mode) ++ return NULL; ++ } ++ +++ // Set file buffer +++ setvbuf(fp, NULL, _IOFBF, 6144 * 10); +++ ++ file = calloc(1, sizeof(BD_FILE_H)); ++ if (!file) { ++ BD_DEBUG(DBG_FILE | DBG_CRIT, "Error opening file %s (out of memory)\n", filename); ++diff --git a/src/file/mount.c b/src/file/mount.c ++index 6382d8b..7575f89 100644 ++--- a/src/file/mount.c +++++ b/src/file/mount.c ++@@ -33,6 +33,7 @@ ++ ++ char *mount_get_mountpoint(const char *device_path) ++ { +++#ifndef __ANDROID__ ++ #ifdef HAVE_MNTENT_H ++ struct stat st; ++ if (stat (device_path, &st) ) { ++@@ -62,6 +63,7 @@ char *mount_get_mountpoint(const char *device_path) ++ endmntent (f); ++ } ++ #endif /* HAVE_MNTENT_H */ +++#endif /* __ANDROID__ */ ++ ++ return str_dup(device_path); ++ } ++diff --git a/src/libbluray/bdj/bdj.c b/src/libbluray/bdj/bdj.c ++index 3465c69..23d944a 100644 ++--- a/src/libbluray/bdj/bdj.c +++++ b/src/libbluray/bdj/bdj.c ++@@ -26,6 +26,7 @@ ++ ++ #include "native/register_native.h" ++ +++#include "file/file.h" ++ #include "file/dirs.h" ++ #include "file/dl.h" ++ #include "util/strutl.h" ++@@ -41,9 +42,6 @@ ++ #ifdef _WIN32 ++ #include <windows.h> ++ #include <winreg.h> ++-#define DIR_SEP "\\" ++-#else ++-#define DIR_SEP "/" ++ #endif ++ ++ #ifdef HAVE_BDJ_J2ME ++@@ -67,7 +65,7 @@ static void *_load_jvm_win32(const char **p_java_home) ++ ++ wchar_t buf_loc[4096] = L"SOFTWARE\\JavaSoft\\Java Runtime Environment\\"; ++ wchar_t buf_vers[128]; ++- +++ wchar_t java_path[4096] = L""; ++ char strbuf[256]; ++ ++ LONG r; ++@@ -77,14 +75,14 @@ static void *_load_jvm_win32(const char **p_java_home) ++ ++ r = RegOpenKeyExW(HKEY_LOCAL_MACHINE, buf_loc, 0, KEY_READ, &hkey); ++ if (r != ERROR_SUCCESS) { ++- BD_DEBUG(DBG_BDJ | DBG_CRIT, "Error opening registry key SOFTWARE\\JavaSoft\\Java Runtime Environment\\"); +++ BD_DEBUG(DBG_BDJ | DBG_CRIT, "Error opening registry key SOFTWARE\\JavaSoft\\Java Runtime Environment\\\n"); ++ return NULL; ++ } ++ ++ r = RegQueryValueExW(hkey, L"CurrentVersion", NULL, &lType, (LPBYTE)buf_vers, &dSize); ++ RegCloseKey(hkey); ++ if (r != ERROR_SUCCESS) { ++- BD_DEBUG(DBG_BDJ | DBG_CRIT, "CurrentVersion registry value not found"); +++ BD_DEBUG(DBG_BDJ | DBG_CRIT, "CurrentVersion registry value not found\n"); ++ return NULL; ++ } ++ ++@@ -97,7 +95,7 @@ static void *_load_jvm_win32(const char **p_java_home) ++ dSize = sizeof(buf_loc); ++ r = RegOpenKeyExW(HKEY_LOCAL_MACHINE, buf_loc, 0, KEY_READ, &hkey); ++ if (r != ERROR_SUCCESS) { ++- BD_DEBUG(DBG_BDJ | DBG_CRIT, "Error opening JRE version-specific registry key"); +++ BD_DEBUG(DBG_BDJ | DBG_CRIT, "Error opening JRE version-specific registry key\n"); ++ return NULL; ++ } ++ ++@@ -108,6 +106,9 @@ static void *_load_jvm_win32(const char **p_java_home) ++ WideCharToMultiByte(CP_UTF8, 0, buf_loc, -1, java_home, sizeof(java_home), NULL, NULL); ++ *p_java_home = java_home; ++ BD_DEBUG(DBG_BDJ, "JavaHome: %s\n", java_home); +++ +++ wcscat(java_path, buf_loc); +++ wcscat(java_path, L"\\bin"); ++ } ++ ++ dSize = sizeof(buf_loc); ++@@ -115,11 +116,13 @@ static void *_load_jvm_win32(const char **p_java_home) ++ RegCloseKey(hkey); ++ ++ if (r != ERROR_SUCCESS) { ++- BD_DEBUG(DBG_BDJ | DBG_CRIT, "RuntimeLib registry value not found"); +++ BD_DEBUG(DBG_BDJ | DBG_CRIT, "RuntimeLib registry value not found\n"); ++ return NULL; ++ } ++ +++ SetDllDirectoryW(java_path); ++ void *result = LoadLibraryW(buf_loc); +++ SetDllDirectoryW(NULL); ++ ++ WideCharToMultiByte(CP_UTF8, 0, buf_loc, -1, strbuf, sizeof(strbuf), NULL, NULL); ++ if (!result) { ++@@ -132,10 +135,43 @@ static void *_load_jvm_win32(const char **p_java_home) ++ } ++ #endif ++ +++#ifdef _WIN32 +++static inline char *_utf8_to_cp(const char *utf8) +++{ +++ int wlen = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, NULL, 0); +++ if (wlen == 0) { +++ return NULL; +++ } +++ +++ wchar_t *wide = (wchar_t *)malloc(wlen * sizeof(wchar_t)); +++ if (!wide) { +++ return NULL; +++ } +++ MultiByteToWideChar(CP_UTF8, 0, utf8, -1, wide, wlen); +++ +++ size_t len = WideCharToMultiByte(CP_ACP, 0, wide, -1, NULL, 0, NULL, NULL); +++ if (len == 0) { +++ X_FREE(wide); +++ return NULL; +++ } +++ +++ char *out = (char *)malloc(len); +++ if (out != NULL) { +++ WideCharToMultiByte(CP_ACP, 0, wide, -1, out, len, NULL, NULL); +++ } +++ X_FREE(wide); +++ return out; +++} +++#endif +++ ++ static void *_jvm_dlopen(const char *java_home, const char *jvm_dir, const char *jvm_lib) ++ { ++ if (java_home) { ++ char *path = str_printf("%s/%s/%s", java_home, jvm_dir, jvm_lib); +++ if (!path) { +++ BD_DEBUG(DBG_CRIT, "out of memory\n"); +++ return NULL; +++ } ++ BD_DEBUG(DBG_BDJ, "Opening %s ...\n", path); ++ void *h = dl_dlopen(path, NULL); ++ X_FREE(path); ++@@ -208,11 +244,17 @@ static void *_load_jvm(const char **p_java_home) ++ ++ static int _can_read_file(const char *fn) ++ { ++- FILE *fp = fopen(fn, "rb"); +++ BD_FILE_H *fp; +++ +++ if (!fn) { +++ return 0; +++ } +++ +++ fp = file_open(fn, "rb"); ++ if (fp) { ++- char b; ++- int result = (int)fread(&b, 1, 1, fp); ++- fclose(fp); +++ uint8_t b; +++ int result = (int)file_read(fp, &b, 1); +++ file_close(fp); ++ if (result == 1) { ++ return 1; ++ } ++@@ -460,6 +502,11 @@ static int _create_jvm(void *jvm_lib, const char *java_home, const char *jar_fil ++ } ++ ++ JavaVMOption* option = calloc(1, sizeof(JavaVMOption) * 20); +++ if (!option) { +++ BD_DEBUG(DBG_CRIT, "out of memory\n"); +++ return 0; +++ } +++ ++ int n = 0; ++ JavaVMInitArgs args; ++ option[n++].optionString = str_dup ("-Dawt.toolkit=java.awt.BDToolkit"); ++@@ -499,6 +546,17 @@ static int _create_jvm(void *jvm_lib, const char *java_home, const char *jar_fil ++ args.options = option; ++ args.ignoreUnrecognized = JNI_FALSE; // don't ignore unrecognized options ++ +++#ifdef _WIN32 +++ /* ... in windows, JVM options are not UTF8 but current system code page ... */ +++ /* luckily, most BD-J options can be passed in as java strings later. But, not class path. */ +++ int ii; +++ for (ii = 0; ii < n; ii++) { +++ char *tmp = _utf8_to_cp(option[ii].optionString); +++ X_FREE(option[ii].optionString); +++ option[ii].optionString = tmp; +++ } +++#endif +++ ++ int result = JNI_CreateJavaVM_fp(jvm, (void**) env, &args); ++ ++ while (--n >= 0) { ++@@ -534,16 +592,22 @@ BDJAVA* bdj_open(const char *path, struct bluray *bd, ++ return 0; ++ } ++ +++ BDJAVA* bdjava = calloc(1, sizeof(BDJAVA)); +++ if (!bdjava) { +++ dl_dlclose(jvm_lib); +++ return NULL; +++ } +++ ++ JNIEnv* env = NULL; ++ JavaVM *jvm = NULL; ++ if (!_find_jvm(jvm_lib, &env, &jvm) && ++ !_create_jvm(jvm_lib, java_home, jar_file, &env, &jvm)) { ++ +++ X_FREE(bdjava); ++ dl_dlclose(jvm_lib); ++ return NULL; ++ } ++ ++- BDJAVA* bdjava = calloc(1, sizeof(BDJAVA)); ++ bdjava->h_libjvm = jvm_lib; ++ bdjava->jvm = jvm; ++ ++diff --git a/src/libbluray/bdj/bdj.h b/src/libbluray/bdj/bdj.h ++index 45fbfc5..f6cd97b 100644 ++--- a/src/libbluray/bdj/bdj.h +++++ b/src/libbluray/bdj/bdj.h ++@@ -45,10 +45,10 @@ typedef enum { ++ } BDJ_EVENT; ++ ++ typedef struct { ++- char *persistent_root; ++- char *cache_root; +++ char *persistent_root; /* BD-J Xlet persistent storage */ +++ char *cache_root; /* BD-J binding unit data area */ ++ ++- char *classpath; +++ char *classpath; /* BD-J implementation class path (location of libbluray.jar) */ ++ } BDJ_STORAGE; ++ ++ typedef struct bdjava_s BDJAVA; ++diff --git a/src/libbluray/bdj/build.xml b/src/libbluray/bdj/build.xml ++index c2764f7..938cd44 100644 ++--- a/src/libbluray/bdj/build.xml +++++ b/src/libbluray/bdj/build.xml ++@@ -7,6 +7,7 @@ ++ <property name="build" location="build"/> ++ <property name="dist" location="../../.libs"/> ++ <property name="src_awt" value=""/> +++ <property name="src_asm" value="../../../contrib/asm/src/"/> ++ <property name="bootclasspath" value=""/> ++ <property name="version" value=""/> ++ ++@@ -18,6 +19,12 @@ ++ ++ <target name="compile" depends="init" ++ description="compile the source " > +++ <javac srcdir="${src_asm}" destdir="${build}" debug="yes" +++ bootclasspath="${bootclasspath}" +++ source="1.5" target="1.5"> +++ <compilerarg value="-XDignore.symbol.file"/> +++ <compilerarg value="-Xlint:-deprecation"/> +++ </javac> ++ <javac srcdir="${src}${src_awt}" destdir="${build}" debug="yes" ++ bootclasspath="${bootclasspath}" ++ source="1.4" target="1.4"> ++diff --git a/src/libbluray/bdj/java-j2se/java/awt/BDGraphics.java b/src/libbluray/bdj/java-j2se/java/awt/BDGraphics.java ++index 54c24e5..77ce66e 100644 ++--- a/src/libbluray/bdj/java-j2se/java/awt/BDGraphics.java +++++ b/src/libbluray/bdj/java-j2se/java/awt/BDGraphics.java ++@@ -53,6 +53,7 @@ class BDGraphics extends BDGraphicsBase { ++ ++ public java.awt.font.FontRenderContext getFontRenderContext() ++ { +++ logger.unimplemented("getFontRenderContext"); ++ return null; ++ } ++ public void setPaint(Paint p) { ++diff --git a/src/libbluray/bdj/java/com/aacsla/bluray/online/ContentAttribute.java b/src/libbluray/bdj/java/com/aacsla/bluray/online/ContentAttribute.java ++index fbfa45d..34f59e4 100644 ++--- a/src/libbluray/bdj/java/com/aacsla/bluray/online/ContentAttribute.java +++++ b/src/libbluray/bdj/java/com/aacsla/bluray/online/ContentAttribute.java ++@@ -27,10 +27,33 @@ public class ContentAttribute { ++ } ++ ++ public byte[] getContentCertID() { +++ byte[] id = getContentCertID("AACS" + File.separator + "Content000.cer"); +++ if (id != null) { +++ return id; +++ } +++ +++ id = getContentCertID("MAKEMKV" + File.separator + "AACS" + File.separator + "Content000.cer"); +++ if (id != null) { +++ return id; +++ } +++ +++ id = getContentCertID("ANY!" + File.separator + "Content000.cer"); +++ if (id != null) { +++ return id; +++ } +++ +++ return new byte[6]; +++ } +++ +++ private byte[] getContentCertID(String file) { ++ FileInputStream is = null; ++ try { ++ is = new FileInputStream( ++- System.getProperty("bluray.vfs.root") + File.separator + "AACS/Content000.cer"); +++ System.getProperty("bluray.vfs.root") + File.separator + file); +++ } catch (Exception e) { +++ return null; +++ } +++ try { ++ if (is.skip(14) != 14) ++ return null; ++ byte[] bytes = new byte[6]; ++diff --git a/src/libbluray/bdj/java/java/awt/BDFontMetrics.java b/src/libbluray/bdj/java/java/awt/BDFontMetrics.java ++index d2a91dc..fdcda44 100644 ++--- a/src/libbluray/bdj/java/java/awt/BDFontMetrics.java +++++ b/src/libbluray/bdj/java/java/awt/BDFontMetrics.java ++@@ -192,7 +192,12 @@ public class BDFontMetrics extends sun.font.FontDesignMetrics { ++ } ++ ++ static synchronized String[] getFontList() { ++- init(); +++ try { +++ init(); +++ } catch (Throwable t) { +++ System.err.println("getFontList() failed: " + t); +++ return new String[0]; +++ } ++ ++ ArrayList fontNames = new ArrayList(); ++ ++diff --git a/src/libbluray/bdj/java/java/awt/BDGraphicsBase.java b/src/libbluray/bdj/java/java/awt/BDGraphicsBase.java ++index 0c7d403..f7e60f7 100644 ++--- a/src/libbluray/bdj/java/java/awt/BDGraphicsBase.java +++++ b/src/libbluray/bdj/java/java/awt/BDGraphicsBase.java ++@@ -306,7 +306,7 @@ abstract class BDGraphicsBase extends Graphics2D implements ConstrainableGraphic ++ Rectangle rect = new Rectangle(x, y, length, 1); ++ rect = actualClip.intersection(rect); ++ ++- if (rect.width <= 0 || rect.height <= 0 || rect.x < 0 || rect.y < 0) { +++ if (rect.width <= 0 || rect.height <= 0 || rect.x < 0 || rect.y < 0 || backBuffer == null) { ++ return; ++ } ++ ++@@ -364,7 +364,7 @@ abstract class BDGraphicsBase extends Graphics2D implements ConstrainableGraphic ++ Rectangle rect = new Rectangle(x, y, length, 1); ++ rect = actualClip.intersection(rect); ++ ++- if (rect.width <= 0 || rect.height <= 0 || rect.x < 0 || rect.y < 0) { +++ if (rect.width <= 0 || rect.height <= 0 || rect.x < 0 || rect.y < 0 || backBuffer == null) { ++ return; ++ } ++ ++@@ -458,7 +458,7 @@ abstract class BDGraphicsBase extends Graphics2D implements ConstrainableGraphic ++ y += originY; ++ Rectangle rect = new Rectangle(x, y, w, h); ++ rect = actualClip.intersection(rect); ++- if (rect.isEmpty()) { +++ if (rect.isEmpty() || backBuffer == null) { ++ return; ++ } ++ x = rect.x; ++@@ -572,7 +572,7 @@ abstract class BDGraphicsBase extends Graphics2D implements ConstrainableGraphic ++ Rectangle rect = new Rectangle(x, y, w, h); ++ rect = actualClip.intersection(rect); ++ ++- if (rect.width <= 0 || rect.height <= 0) { +++ if (rect.width <= 0 || rect.height <= 0 || backBuffer == null) { ++ return; ++ } ++ ++diff --git a/src/libbluray/bdj/java/java/awt/BDImageConsumer.java b/src/libbluray/bdj/java/java/awt/BDImageConsumer.java ++index 59e2af3..a076873 100644 ++--- a/src/libbluray/bdj/java/java/awt/BDImageConsumer.java +++++ b/src/libbluray/bdj/java/java/awt/BDImageConsumer.java ++@@ -25,7 +25,7 @@ import java.awt.image.ImageObserver; ++ import java.awt.image.ImageConsumer; ++ import java.awt.image.ColorModel; ++ ++-public class BDImageConsumer extends BDImage implements ImageConsumer { +++class BDImageConsumer extends BDImage implements ImageConsumer { ++ private Hashtable properties; ++ private ImageProducer producer; ++ private int status; ++diff --git a/src/libbluray/bdj/java/java/awt/BDToolkitBase.java b/src/libbluray/bdj/java/java/awt/BDToolkitBase.java ++index e210dea..0f5e3e0 100644 ++--- a/src/libbluray/bdj/java/java/awt/BDToolkitBase.java +++++ b/src/libbluray/bdj/java/java/awt/BDToolkitBase.java ++@@ -124,6 +124,10 @@ abstract class BDToolkitBase extends Toolkit { ++ } ++ ++ public Image getImage(String filename) { +++ if (BDJXletContext.getCurrentContext() == null) { +++ logger.error("getImage(): no context " + Logger.dumpStack()); +++ } +++ ++ if (cachedImages.containsKey(filename)) ++ return (Image)cachedImages.get(filename); ++ Image newImage = createImage(filename); ++@@ -133,6 +137,10 @@ abstract class BDToolkitBase extends Toolkit { ++ } ++ ++ public Image getImage(URL url) { +++ if (BDJXletContext.getCurrentContext() == null) { +++ logger.error("getImage(): no context " + Logger.dumpStack()); +++ } +++ ++ if (cachedImages.containsKey(url)) ++ return (Image)cachedImages.get(url); ++ Image newImage = createImage(url); ++@@ -142,6 +150,10 @@ abstract class BDToolkitBase extends Toolkit { ++ } ++ ++ public Image createImage(String filename) { +++ if (BDJXletContext.getCurrentContext() == null) { +++ logger.error("createImage(): no context " + Logger.dumpStack()); +++ } +++ ++ if (!new File(filename).isAbsolute()) { ++ String home = BDJXletContext.getCurrentXletHome(); ++ if (home != null) { ++@@ -161,6 +173,9 @@ abstract class BDToolkitBase extends Toolkit { ++ } ++ ++ public Image createImage(URL url) { +++ if (BDJXletContext.getCurrentContext() == null) { +++ logger.error("createImage(): no context " + Logger.dumpStack()); +++ } ++ ImageProducer ip = new URLImageSource(url); ++ Image newImage = createImage(ip); ++ return newImage; ++@@ -169,12 +184,20 @@ abstract class BDToolkitBase extends Toolkit { ++ public Image createImage(byte[] imagedata, ++ int imageoffset, ++ int imagelength) { +++ +++ if (BDJXletContext.getCurrentContext() == null) { +++ logger.error("createImage(): no context " + Logger.dumpStack()); +++ } +++ ++ ImageProducer ip = new ByteArrayImageSource(imagedata, imageoffset, imagelength); ++ Image newImage = createImage(ip); ++ return newImage; ++ } ++ ++ public Image createImage(ImageProducer producer) { +++ if (BDJXletContext.getCurrentContext() == null) { +++ logger.error("createImage(): no context " + Logger.dumpStack()); +++ } ++ return new BDImageConsumer(producer); ++ } ++ ++@@ -243,7 +266,7 @@ abstract class BDToolkitBase extends Toolkit { ++ } ++ } ++ ++- logger.warning("getSystemEventQueue(): no context"); +++ logger.warning("getSystemEventQueue(): no context from:" + logger.dumpStack()); ++ return eventQueue; ++ } ++ } ++diff --git a/src/libbluray/bdj/java/java/awt/BDWindowGraphics.java b/src/libbluray/bdj/java/java/awt/BDWindowGraphics.java ++index 743f441..26e7248 100644 ++--- a/src/libbluray/bdj/java/java/awt/BDWindowGraphics.java +++++ b/src/libbluray/bdj/java/java/awt/BDWindowGraphics.java ++@@ -39,6 +39,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void clearRect(int x, int y, int w, int h) { +++ if (window == null) return; ++ synchronized (window) { ++ super.clearRect(x, y, w, h); ++ window.notifyChanged(); ++@@ -46,6 +47,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void fillRect(int x, int y, int w, int h) { +++ if (window == null) return; ++ synchronized (window) { ++ super.fillRect(x, y, w, h); ++ window.notifyChanged(); ++@@ -53,6 +55,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void drawRect(int x, int y, int w, int h) { +++ if (window == null) return; ++ synchronized (window) { ++ super.drawRect(x, y, w, h); ++ window.notifyChanged(); ++@@ -60,6 +63,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void drawLine(int x1, int y1, int x2, int y2) { +++ if (window == null) return; ++ synchronized (window) { ++ super.drawLine(x1, y1, x2, y2); ++ window.notifyChanged(); ++@@ -67,6 +71,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void copyArea(int x, int y, int w, int h, int dx, int dy) { +++ if (window == null) return; ++ synchronized (window) { ++ super.copyArea(x, y, w, h, dx, dy); ++ window.notifyChanged(); ++@@ -74,6 +79,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void drawPolyline(int xPoints[], int yPoints[], int nPoints) { +++ if (window == null) return; ++ synchronized (window) { ++ super.drawPolyline(xPoints, yPoints, nPoints); ++ window.notifyChanged(); ++@@ -81,6 +87,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void drawPolygon(int xPoints[], int yPoints[], int nPoints) { +++ if (window == null) return; ++ synchronized (window) { ++ super.drawPolygon(xPoints, yPoints, nPoints); ++ window.notifyChanged(); ++@@ -88,6 +95,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void fillPolygon(int xPoints[], int yPoints[], int nPoints) { +++ if (window == null) return; ++ synchronized (window) { ++ super.fillPolygon(xPoints, yPoints, nPoints); ++ window.notifyChanged(); ++@@ -95,6 +103,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void drawOval(int x, int y, int w, int h) { +++ if (window == null) return; ++ synchronized (window) { ++ super.drawOval(x, y, w, h); ++ window.notifyChanged(); ++@@ -102,6 +111,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void fillOval(int x, int y, int w, int h) { +++ if (window == null) return; ++ synchronized (window) { ++ super.fillOval(x, y, w, h); ++ window.notifyChanged(); ++@@ -109,6 +119,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void drawArc(int x, int y, int w, int h, int startAngle, int endAngle) { +++ if (window == null) return; ++ synchronized (window) { ++ super.drawArc(x, y, w, h, startAngle, endAngle); ++ window.notifyChanged(); ++@@ -116,6 +127,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void fillArc(int x, int y, int w, int h, int startAngle, int endAngle) { +++ if (window == null) return; ++ synchronized (window) { ++ super.fillArc(x, y, w, h, startAngle, endAngle); ++ window.notifyChanged(); ++@@ -123,6 +135,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void drawRoundRect(int x, int y, int w, int h, int arcWidth, int arcHeight) { +++ if (window == null) return; ++ synchronized (window) { ++ super.drawRoundRect(x, y, w, h, arcWidth, arcHeight); ++ window.notifyChanged(); ++@@ -130,6 +143,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ public void fillRoundRect(int x, int y, int w, int h, int arcWidth, int arcHeight) { +++ if (window == null) return; ++ synchronized (window) { ++ super.fillRoundRect(x, y, w, h, arcWidth, arcHeight); ++ window.notifyChanged(); ++@@ -137,6 +151,7 @@ public class BDWindowGraphics extends BDGraphics { ++ } ++ ++ protected void drawStringN(long ftFace, String string, int x, int y, int rgb) { +++ if (window == null) return; ++ synchronized (window) { ++ super.drawStringN(ftFace, string, x, y, rgb); ++ window.notifyChanged(); ++@@ -154,6 +169,8 @@ public class BDWindowGraphics extends BDGraphics { ++ boolean flipX, boolean flipY, ++ Color bg, ImageObserver observer) { ++ +++ if (window == null) return true; +++ ++ synchronized (window) { ++ boolean complete = super.drawImageN( ++ img, dx, dy, dw, dh, sx, sy, sw, sh, ++diff --git a/src/libbluray/bdj/java/java/awt/Font.java b/src/libbluray/bdj/java/java/awt/Font.java ++index a126bc5..a952599 100644 ++--- a/src/libbluray/bdj/java/java/awt/Font.java +++++ b/src/libbluray/bdj/java/java/awt/Font.java ++@@ -198,6 +198,9 @@ public class Font implements java.io.Serializable { ++ public Font deriveFont(int style, int size) { ++ return new Font(name, style, size, fontFile, family); ++ } +++ public Font deriveFont(int style, float size) { +++ return new Font(name, style, (int)size, fontFile, family); +++ } ++ ++ /* constructor */ ++ private Font(String name, int style, int size, File fontFile, String family) { ++diff --git a/src/libbluray/bdj/java/javax/media/MediaLocator.java b/src/libbluray/bdj/java/javax/media/MediaLocator.java ++index a182e8d..245ac54 100644 ++--- a/src/libbluray/bdj/java/javax/media/MediaLocator.java +++++ b/src/libbluray/bdj/java/javax/media/MediaLocator.java ++@@ -25,11 +25,11 @@ import java.net.URL; ++ ++ public class MediaLocator implements Serializable ++ { ++- public MediaLocator(URL url) { +++ public MediaLocator(URL url) { ++ this(url.toExternalForm()); ++ } ++ ++- public MediaLocator(String locatorString) { +++ public MediaLocator(String locatorString) { ++ int index = locatorString.indexOf(":"); ++ if (index <= 0) ++ throw new IllegalArgumentException("Bad locator string."); ++@@ -56,7 +56,7 @@ public class MediaLocator implements Serializable ++ public String toExternalForm() { ++ return protocol + ":" + remainder; ++ } ++- +++ ++ private String protocol = ""; ++ private String remainder = ""; ++ ++diff --git a/src/libbluray/bdj/java/javax/tv/graphics/TVContainer.java b/src/libbluray/bdj/java/javax/tv/graphics/TVContainer.java ++index e036884..09971b6 100644 ++--- a/src/libbluray/bdj/java/javax/tv/graphics/TVContainer.java +++++ b/src/libbluray/bdj/java/javax/tv/graphics/TVContainer.java ++@@ -20,9 +20,10 @@ package javax.tv.graphics; ++ ++ import java.awt.Container; ++ import javax.tv.xlet.XletContext; ++-import org.havi.ui.HScene; ++ import org.havi.ui.HSceneFactory; ++ +++import org.videolan.BDJXletContext; +++ ++ public class TVContainer { ++ public static Container getRootContainer(XletContext context) ++ { ++@@ -30,7 +31,15 @@ public class TVContainer { ++ throw new NullPointerException(); ++ } ++ ++- HScene scene = HSceneFactory.getInstance().getDefaultHScene(); ++- return scene; +++ if (!(context instanceof BDJXletContext) || (BDJXletContext)context != BDJXletContext.getCurrentContext()) { +++ org.videolan.Logger.getLogger(TVContainer.class.getName()).error("wrong context"); +++ } +++ +++ /* GEM: return instance of org.havi.ui.HScene or NULL */ +++ HSceneFactory sf = HSceneFactory.getInstance(); +++ if (sf != null) { +++ return sf.getDefaultHScene(); +++ } +++ return null; ++ } ++ } ++diff --git a/src/libbluray/bdj/java/javax/tv/locator/LocatorImpl.java b/src/libbluray/bdj/java/javax/tv/locator/LocatorImpl.java ++index e14825c..a87269a 100644 ++--- a/src/libbluray/bdj/java/javax/tv/locator/LocatorImpl.java +++++ b/src/libbluray/bdj/java/javax/tv/locator/LocatorImpl.java ++@@ -24,7 +24,7 @@ public class LocatorImpl implements Locator { ++ this.url = url; ++ } ++ ++- public boolean hasMultipleTransformations() { +++ public boolean hasMultipleTransformations() { ++ return false; ++ } ++ ++diff --git a/src/libbluray/bdj/java/javax/tv/service/SIElement.java b/src/libbluray/bdj/java/javax/tv/service/SIElement.java ++index c2a0262..16140de 100644 ++--- a/src/libbluray/bdj/java/javax/tv/service/SIElement.java +++++ b/src/libbluray/bdj/java/javax/tv/service/SIElement.java ++@@ -26,7 +26,7 @@ public interface SIElement extends SIRetrievable ++ public Locator getLocator(); ++ ++ public boolean equals(Object obj); ++- +++ ++ public int hashCode(); ++ ++ public ServiceInformationType getServiceInformationType(); ++diff --git a/src/libbluray/bdj/java/javax/tv/service/SIManagerImpl.java b/src/libbluray/bdj/java/javax/tv/service/SIManagerImpl.java ++index 4016876..f9d4a32 100644 ++--- a/src/libbluray/bdj/java/javax/tv/service/SIManagerImpl.java +++++ b/src/libbluray/bdj/java/javax/tv/service/SIManagerImpl.java ++@@ -29,6 +29,7 @@ import javax.tv.service.navigation.ServiceListImpl; ++ import javax.tv.service.transport.Transport; ++ import javax.tv.service.transport.TransportImpl; ++ +++import org.bluray.net.BDLocator; ++ import org.bluray.ti.TitleImpl; ++ import org.videolan.Libbluray; ++ ++@@ -82,6 +83,15 @@ public class SIManagerImpl extends SIManager { ++ } ++ ++ public Service getService(Locator locator) throws InvalidLocatorException, SecurityException { +++ +++ BDLocator bdLocator = null; +++ try { +++ bdLocator = new BDLocator(locator.toExternalForm()); +++ } catch (org.davic.net.InvalidLocatorException e) { +++ System.err.println("invalid locator: " + locator.toExternalForm() + "\n" + org.videolan.Logger.dumpStack(e)); +++ throw new javax.tv.locator.InvalidLocatorException(locator); +++ } +++ ++ return titles.findService(locator); ++ } ++ ++diff --git a/src/libbluray/bdj/java/javax/tv/service/navigation/SIElementFilter.java b/src/libbluray/bdj/java/javax/tv/service/navigation/SIElementFilter.java ++index fdfa128..f0dc97c 100644 ++--- a/src/libbluray/bdj/java/javax/tv/service/navigation/SIElementFilter.java +++++ b/src/libbluray/bdj/java/javax/tv/service/navigation/SIElementFilter.java ++@@ -1,6 +1,7 @@ ++ /* ++ * This file is part of libbluray ++ * Copyright (C) 2010 William Hahne +++ * Copyright (C) 2015 Petri Hintukainen ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++@@ -25,9 +26,21 @@ import javax.tv.service.SIElement; ++ import javax.tv.service.SIRequest; ++ import javax.tv.service.SIRequestorImpl; ++ +++import org.bluray.net.BDLocator; +++ ++ public final class SIElementFilter extends ServiceFilter ++ { ++ public SIElementFilter(SIElement element) throws FilterNotSupportedException { +++ if (element == null) +++ throw new NullPointerException(); +++ +++ try { +++ new BDLocator(element.getLocator().toExternalForm()); +++ } catch (Exception e) { +++ System.err.println("Invalid SI element: " + e + " at " + org.videolan.Logger.dumpStack(e)); +++ throw new FilterNotSupportedException(); +++ } +++ ++ this.element = element; ++ } ++ ++@@ -37,9 +50,9 @@ public final class SIElementFilter extends ServiceFilter ++ ++ public boolean accept(Service service) { ++ SIRequestorImpl requestor = new SIRequestorImpl(); ++- +++ ++ SIRequest req = service.retrieveDetails(requestor); ++- +++ ++ // TODO: This may be a bit excessive ++ int timeout = 0; ++ while (!requestor.getResponse() && timeout < 1000) { ++@@ -48,27 +61,27 @@ public final class SIElementFilter extends ServiceFilter ++ } catch (InterruptedException e) { ++ // ignore ++ } ++- +++ ++ timeout++; ++ } ++- +++ ++ // if we still don't have a response just cancel the request ++ if (!requestor.getResponse()) { ++ if (req != null) ++ req.cancel(); ++ } ++- +++ ++ if (requestor.getResult() == null) ++ return false; ++- +++ ++ SIRetrievable[] rets = requestor.getResult(); ++ for (int i = 0; i < rets.length; i++) { ++ if (rets[i].equals(element)) ++ return true; ++ } ++- +++ ++ return false; ++ } ++- +++ ++ SIElement element; ++ } ++diff --git a/src/libbluray/bdj/java/javax/tv/service/navigation/ServiceTypeFilter.java b/src/libbluray/bdj/java/javax/tv/service/navigation/ServiceTypeFilter.java ++index 0333302..250821e 100644 ++--- a/src/libbluray/bdj/java/javax/tv/service/navigation/ServiceTypeFilter.java +++++ b/src/libbluray/bdj/java/javax/tv/service/navigation/ServiceTypeFilter.java ++@@ -24,6 +24,8 @@ import javax.tv.service.ServiceType; ++ public final class ServiceTypeFilter extends ServiceFilter { ++ public ServiceTypeFilter(ServiceType type) ++ { +++ if (type == null) +++ throw new NullPointerException(); ++ this.type = type; ++ } ++ ++diff --git a/src/libbluray/bdj/java/javax/tv/service/navigation/StreamType.java b/src/libbluray/bdj/java/javax/tv/service/navigation/StreamType.java ++index 5824b44..26dc166 100644 ++--- a/src/libbluray/bdj/java/javax/tv/service/navigation/StreamType.java +++++ b/src/libbluray/bdj/java/javax/tv/service/navigation/StreamType.java ++@@ -22,6 +22,8 @@ package javax.tv.service.navigation; ++ public class StreamType { ++ protected StreamType(String name) ++ { +++ if (name == null) +++ throw new NullPointerException(); ++ this.name = name; ++ } ++ ++diff --git a/src/libbluray/bdj/java/javax/tv/service/selection/ServiceContextFactoryImpl.java b/src/libbluray/bdj/java/javax/tv/service/selection/ServiceContextFactoryImpl.java ++index e1e8dea..2940db4 100644 ++--- a/src/libbluray/bdj/java/javax/tv/service/selection/ServiceContextFactoryImpl.java +++++ b/src/libbluray/bdj/java/javax/tv/service/selection/ServiceContextFactoryImpl.java ++@@ -33,12 +33,14 @@ public class ServiceContextFactoryImpl extends ServiceContextFactory { ++ synchronized (ServiceContextFactoryImpl.class) { ++ if (instance == null) ++ instance = new ServiceContextFactoryImpl(); +++ return instance; ++ } ++- return instance; ++ } ++ ++ public static void shutdown() { ++- instance = null; +++ synchronized (ServiceContextFactoryImpl.class) { +++ instance = null; +++ } ++ } ++ ++ public ServiceContext createServiceContext() ++@@ -60,10 +62,19 @@ public class ServiceContextFactoryImpl extends ServiceContextFactory { ++ } ++ ++ public ServiceContext[] getServiceContexts() { ++- SecurityManager sec = System.getSecurityManager(); ++- if (sec != null) ++- sec.checkPermission(new ServiceContextPermission("access", "own")); ++- return serviceContexts; +++ try { +++ SecurityManager sec = System.getSecurityManager(); +++ if (sec != null) +++ sec.checkPermission(new ServiceContextPermission("access", "own")); +++ +++ ServiceContext[] r = new ServiceContext[1]; +++ r[0] = serviceContexts[0]; +++ return r; +++ +++ } catch (Exception e) { +++ } +++ +++ return new ServiceContext[0]; ++ } ++ ++ private ServiceContext[] serviceContexts; ++diff --git a/src/libbluray/bdj/java/org/bluray/bdplus/Status.java b/src/libbluray/bdj/java/org/bluray/bdplus/Status.java ++index 3f5fcf6..b897b3e 100644 ++--- a/src/libbluray/bdj/java/org/bluray/bdplus/Status.java +++++ b/src/libbluray/bdj/java/org/bluray/bdplus/Status.java ++@@ -28,8 +28,8 @@ public class Status { ++ synchronized (Status.class) { ++ if (instance == null) ++ instance = new Status(); +++ return instance; ++ } ++- return instance; ++ } ++ ++ public static void shutdown() { ++diff --git a/src/libbluray/bdj/java/org/bluray/net/BDLocator.java b/src/libbluray/bdj/java/org/bluray/net/BDLocator.java ++index 6b747cc..a0b593b 100644 ++--- a/src/libbluray/bdj/java/org/bluray/net/BDLocator.java +++++ b/src/libbluray/bdj/java/org/bluray/net/BDLocator.java ++@@ -29,11 +29,11 @@ public class BDLocator extends Locator { ++ super(url); ++ try { ++ ++- if (!url.startsWith("bd://")) ++- throw new InvalidLocatorException(); ++- String str = url.substring(5); ++- if (!parseJar(str) && !parseSound(str) && !parsePlaylist(str)) ++- throw new InvalidLocatorException(); +++ if (!url.startsWith("bd://")) +++ throw new InvalidLocatorException(); +++ String str = url.substring(5); +++ if (!parseJar(str) && !parseSound(str) && !parsePlaylist(str)) +++ throw new InvalidLocatorException(); ++ ++ } catch (InvalidLocatorException e) { ++ System.err.println("Invalid locator: " + url); ++diff --git a/src/libbluray/bdj/java/org/bluray/storage/StorageManager.java b/src/libbluray/bdj/java/org/bluray/storage/StorageManager.java ++index 961c8ec..5ea1c57 100644 ++--- a/src/libbluray/bdj/java/org/bluray/storage/StorageManager.java +++++ b/src/libbluray/bdj/java/org/bluray/storage/StorageManager.java ++@@ -24,8 +24,8 @@ public class StorageManager { ++ synchronized (StorageManager.class) { ++ if (instance == null) ++ instance = new StorageManager(); +++ return instance; ++ } ++- return instance; ++ } ++ ++ protected StorageManager() { ++diff --git a/src/libbluray/bdj/java/org/bluray/ti/selection/TitleContextImpl.java b/src/libbluray/bdj/java/org/bluray/ti/selection/TitleContextImpl.java ++index 0109d2b..48d70be 100644 ++--- a/src/libbluray/bdj/java/org/bluray/ti/selection/TitleContextImpl.java +++++ b/src/libbluray/bdj/java/org/bluray/ti/selection/TitleContextImpl.java ++@@ -38,9 +38,11 @@ import javax.tv.service.selection.ServiceContextPermission; ++ ++ import org.bluray.ti.Title; ++ import org.bluray.ti.TitleImpl; +++ ++ import org.videolan.BDJLoader; ++ import org.videolan.BDJLoaderCallback; ++ import org.videolan.BDJListeners; +++import org.videolan.Logger; ++ import org.videolan.media.content.PlayerManager; ++ ++ public class TitleContextImpl implements TitleContext { ++@@ -71,19 +73,30 @@ public class TitleContextImpl implements TitleContext { ++ } ++ ++ public void start(Title title, boolean restart) throws SecurityException { +++ logger.info("start(" + title.getName() + ", restart=" + restart + ")"); +++ ++ SecurityManager sm = System.getSecurityManager(); ++ if (sm != null) { ++ sm.checkPermission(new SelectPermission(title.getLocator(), "own")); ++ } ++- ++- if (state == STATE_DESTROYED) +++ if (state == STATE_DESTROYED) { +++ logger.error("start() failed: Title Context already destroyed"); ++ throw new IllegalStateException(); +++ } +++ +++ if (!restart && (this.title == null || !title.equals(this.title))) { +++ /* force restarting of service bound Xlets when title changes */ +++ logger.info("start(): title changed, force restart"); +++ restart = true; +++ } +++ ++ TitleStartAction action = new TitleStartAction(this, (TitleImpl)title); ++ if (!BDJLoader.load((TitleImpl)title, restart, action)) ++ action.loaderDone(false); ++ } ++ ++ public void select(Service service) throws SecurityException { +++ logger.info("select(" + service.getName() + ")"); ++ start((Title)service, true); ++ } ++ ++@@ -96,6 +109,8 @@ public class TitleContextImpl implements TitleContext { ++ } ++ ++ public void stop() throws SecurityException { +++ logger.info("stop()"); +++ ++ SecurityManager sm = System.getSecurityManager(); ++ if (sm != null) { ++ sm.checkPermission(new ServiceContextPermission("stop", "own")); ++@@ -187,4 +202,6 @@ public class TitleContextImpl implements TitleContext { ++ private BDJListeners listeners = new BDJListeners(); ++ private TitleImpl title = null; ++ private int state = STATE_STOPPED; +++ +++ private static final Logger logger = Logger.getLogger(TitleContextImpl.class.getName()); ++ } ++diff --git a/src/libbluray/bdj/java/org/dvb/event/EventManager.java b/src/libbluray/bdj/java/org/dvb/event/EventManager.java ++index 2bf2ea4..844b72d 100644 ++--- a/src/libbluray/bdj/java/org/dvb/event/EventManager.java +++++ b/src/libbluray/bdj/java/org/dvb/event/EventManager.java ++@@ -40,8 +40,8 @@ public class EventManager implements ResourceServer { ++ synchronized (EventManager.class) { ++ if (instance == null) ++ instance = new EventManager(); +++ return instance; ++ } ++- return instance; ++ } ++ ++ public static void shutdown() { ++@@ -254,7 +254,7 @@ public class EventManager implements ResourceServer { ++ return false; ++ } ++ ++- private class UserEventItem { +++ private static class UserEventItem { ++ public UserEventItem(BDJXletContext context, UserEventListener listener, ++ ResourceClient client, UserEventRepository userEvents) { ++ this.context = context; ++@@ -272,7 +272,7 @@ public class EventManager implements ResourceServer { ++ public UserEventRepository userEvents; ++ } ++ ++- private class UserEventAction extends BDJAction { +++ private static class UserEventAction extends BDJAction { ++ public UserEventAction(UserEventItem item, UserEvent event) { ++ this.listener = item.listener; ++ this.event = event; ++diff --git a/src/libbluray/bdj/java/org/dvb/io/persistent/FileAttributes.java b/src/libbluray/bdj/java/org/dvb/io/persistent/FileAttributes.java ++index 4c941ff..65c3d29 100644 ++--- a/src/libbluray/bdj/java/org/dvb/io/persistent/FileAttributes.java +++++ b/src/libbluray/bdj/java/org/dvb/io/persistent/FileAttributes.java ++@@ -78,9 +78,9 @@ public class FileAttributes { ++ { ++ boolean r = f.canRead(); ++ boolean w = f.canWrite(); ++- +++ ++ FileAccessPermissions permissions = new FileAccessPermissions(r, w, r, w, r, w); ++- +++ ++ return new FileAttributes(null, permissions, PRIORITY_LOW); ++ } ++ ++diff --git a/src/libbluray/bdj/java/org/dvb/user/UserPreferenceManager.java b/src/libbluray/bdj/java/org/dvb/user/UserPreferenceManager.java ++index 239c966..af86b4e 100644 ++--- a/src/libbluray/bdj/java/org/dvb/user/UserPreferenceManager.java +++++ b/src/libbluray/bdj/java/org/dvb/user/UserPreferenceManager.java ++@@ -31,8 +31,8 @@ public class UserPreferenceManager { ++ synchronized (UserPreferenceManager.class) { ++ if (instance == null) ++ instance = new UserPreferenceManager(); +++ return instance; ++ } ++- return instance; ++ } ++ ++ public void read(Preference p) { ++diff --git a/src/libbluray/bdj/java/org/havi/ui/HScene.java b/src/libbluray/bdj/java/org/havi/ui/HScene.java ++index 7937d32..926781a 100644 ++--- a/src/libbluray/bdj/java/org/havi/ui/HScene.java +++++ b/src/libbluray/bdj/java/org/havi/ui/HScene.java ++@@ -239,8 +239,10 @@ public class HScene extends Container implements HComponentOrdering { ++ } ++ ++ public synchronized void dispose() { ++- if (null != BDJXletContext.getCurrentContext()) ++- HSceneFactory.getInstance().dispose(this); +++ HSceneFactory sf = HSceneFactory.getInstance(); +++ if (sf != null) { +++ sf.dispose(this); +++ } ++ } ++ ++ protected void disposeImpl() ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJAppProxy.java b/src/libbluray/bdj/java/org/videolan/BDJAppProxy.java ++index 135c000..72ba458 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJAppProxy.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJAppProxy.java ++@@ -384,8 +384,8 @@ class BDJAppProxy implements DVBJProxy, Runnable { ++ } catch (InterruptedException e) { ++ } ++ } +++ return done; ++ } ++- return done; ++ } ++ ++ public void release() { ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJAppsDatabase.java b/src/libbluray/bdj/java/org/videolan/BDJAppsDatabase.java ++index 13c088f..ca39f12 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJAppsDatabase.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJAppsDatabase.java ++@@ -35,8 +35,8 @@ public class BDJAppsDatabase extends AppsDatabase { ++ synchronized (BDJAppsDatabase.class) { ++ if (instance == null) ++ instance = new BDJAppsDatabase(); +++ return instance; ++ } ++- return instance; ++ } ++ ++ public int size() { ++@@ -106,5 +106,5 @@ public class BDJAppsDatabase extends AppsDatabase { ++ private BDJAppProxy[] appProxys = null; ++ private AppEntry[] appTable = null; ++ ++- protected static BDJAppsDatabase instance = null; +++ private static BDJAppsDatabase instance = null; ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJClassFileTransformer.java b/src/libbluray/bdj/java/org/videolan/BDJClassFileTransformer.java ++new file mode 100644 ++index 0000000..988e76e ++--- /dev/null +++++ b/src/libbluray/bdj/java/org/videolan/BDJClassFileTransformer.java ++@@ -0,0 +1,91 @@ +++/* +++ * This file is part of libbluray +++ * Copyright (C) 2015 Petri Hintukainen <phintuka@users.sourceforge.net> +++ * +++ * This library is free software; you can redistribute it and/or +++ * modify it under the terms of the GNU Lesser General Public +++ * License as published by the Free Software Foundation; either +++ * version 2.1 of the License, or (at your option) any later version. +++ * +++ * This library is distributed in the hope that it will be useful, +++ * but WITHOUT ANY WARRANTY; without even the implied warranty of +++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +++ * Lesser General Public License for more details. +++ * +++ * You should have received a copy of the GNU Lesser General Public +++ * License along with this library. If not, see +++ * <http://www.gnu.org/licenses/>. +++ */ +++ +++package org.videolan; +++ +++/** +++ * This is a class which is called by BDJClassLoader +++ * when ClassFormatError is thrown inside defineClass(). +++ * +++ * Some discs have invalid debug info in class files (broken by +++ * malfunctioning obfuscater ?). +++ * We strip debug info from the class and try to load it again. +++ * +++ * Penguins of MAdagascar: +++ * java.lang.ClassFormatError: Invalid index 0 in LocalVariableTable' +++ * in class file com/tcs/blr/bluray/pal/fox/controller/d +++ */ +++ +++import org.objectweb.asm.ClassReader; +++import org.objectweb.asm.ClassWriter; +++import org.objectweb.asm.ClassVisitor; +++import org.objectweb.asm.MethodVisitor; +++import org.objectweb.asm.Opcodes; +++import org.objectweb.asm.Attribute; +++ +++class BDJClassFileTransformer +++{ +++ public byte[] transform(byte[] b, int off, int len) +++ throws ClassFormatError +++ { +++ logger.info("Trying to transform broken class file (" + len + " bytes)"); +++ +++ byte[] r = new byte[len]; +++ for (int i = 0; i < len; i++) +++ r[i] = b[i+off]; +++ +++ try { +++ ClassReader cr = new ClassReader(r); +++ ClassWriter cw = new ClassWriter(cr, 0/*ClassWriter.COMPUTE_FRAMES | ClassWriter.COMPUTE_MAXS*/); +++ ClassVisitor cv = new MyClassVisitor(cw); +++ cr.accept(cv, ClassReader.SKIP_DEBUG); +++ return cw.toByteArray(); +++ } catch (Exception e) { +++ logger.error("Failed transforming class: " + e); +++ } +++ +++ return r; +++ } +++ +++ public class MyClassVisitor extends ClassVisitor { +++ public MyClassVisitor(ClassVisitor cv) { +++ super(Opcodes.ASM4, cv); +++ } +++ +++ public MethodVisitor visitMethod(int access, String name, String desc, +++ String signature, String[] exceptions) { +++ MethodVisitor mv = super.visitMethod(access, name, desc, signature, exceptions); +++ //System.err.println("visit method: " + name); +++ return new MyMethodVisitor(mv); +++ } +++ } +++ +++ public class MyMethodVisitor extends MethodVisitor { +++ public MyMethodVisitor(MethodVisitor mv) { +++ super(Opcodes.ASM4, mv); +++ } +++ +++ public void visitAttribute(Attribute attr) { +++ //System.err.println(" attribute: " + attr.type); +++ super.visitAttribute(attr); +++ } +++ } +++ +++ private static final Logger logger = Logger.getLogger(BDJClassFileTransformer.class.getName()); +++} ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJClassLoader.java b/src/libbluray/bdj/java/org/videolan/BDJClassLoader.java ++index 733c3e5..2eb3844 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJClassLoader.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJClassLoader.java ++@@ -21,6 +21,7 @@ package org.videolan; ++ ++ import java.net.MalformedURLException; ++ +++import java.io.ByteArrayOutputStream; ++ import java.io.File; ++ import java.io.InputStream; ++ import java.io.IOException; ++@@ -126,7 +127,87 @@ public class BDJClassLoader extends URLClassLoader { ++ } ++ return c; ++ } ++- return super.loadClass(name); +++ +++ try { +++ return super.loadClass(name); +++ } catch (ClassNotFoundException e0) { +++ logger.error("ClassNotFoundException: " + name); +++ throw e0; +++ } catch (Error err) { +++ logger.error("FATAL: " + err); +++ throw err; +++ } +++ } +++ +++ private byte[] loadClassCode(String name) throws ClassNotFoundException { +++ String path = name.replace('.', '/').concat(".class"); +++ +++ URL res = super.findResource(path); +++ if (res == null) { +++ logger.error("loadClassCode(): resource for class " + name + "not found"); +++ throw new ClassNotFoundException(name); +++ } +++ +++ InputStream is = null; +++ ByteArrayOutputStream os = null; +++ try { +++ is = res.openStream(); +++ os = new ByteArrayOutputStream(); +++ byte[] buffer = new byte[0xffff]; +++ while (true) { +++ int r = is.read(buffer); +++ if (r == -1) break; +++ os.write(buffer, 0, r); +++ } +++ +++ return os.toByteArray(); +++ +++ } catch (Exception e) { +++ logger.error("loadClassCode(" + name + ") failed: " + e); +++ throw new ClassNotFoundException(name); +++ +++ } finally { +++ try { +++ if (is != null) +++ is.close(); +++ } catch (IOException ioe) { +++ } +++ try { +++ if (os != null) +++ os.close(); +++ } catch (IOException ioe) { +++ } +++ } +++ } +++ +++ protected Class findClass(String name) throws ClassNotFoundException { +++ try { +++ return super.findClass(name); +++ +++ } catch (ClassFormatError ce) { +++ +++ /* try to "fix" broken class file */ +++ /* if we got ClassFormatError, package was already created. */ +++ byte[] b = loadClassCode(name); +++ if (b == null) { +++ logger.error("loadClassCode(" + name + ") failed"); +++ /* this usually kills Xlet ... */ +++ throw ce; +++ } +++ try { +++ b = new BDJClassFileTransformer().transform(b, 0, b.length); +++ return defineClass(b, 0, b.length); +++ } catch (ThreadDeath td) { +++ throw td; +++ } catch (Throwable t) { +++ logger.error("Class rewriting failed: " + t); +++ throw new ClassNotFoundException(name); +++ } +++ +++ } catch (Error er) { +++ logger.error("Unexpected error: " + er + " " + Logger.dumpStack(er)); +++ throw er; +++ } ++ } ++ ++ public URL getResource(String name) { ++@@ -157,4 +238,6 @@ public class BDJClassLoader extends URLClassLoader { ++ } ++ ++ private String xletClass; +++ +++ private static final Logger logger = Logger.getLogger(BDJClassLoader.class.getName()); ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJListeners.java b/src/libbluray/bdj/java/org/videolan/BDJListeners.java ++index 77acf4d..ba3a9c5 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJListeners.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJListeners.java ++@@ -56,6 +56,8 @@ import org.dvb.media.SubtitleListener; ++ import org.dvb.media.SubtitleNotAvailableEvent; ++ import org.dvb.media.SubtitleNotSelectedEvent; ++ import org.dvb.media.SubtitleSelectedEvent; +++import org.dvb.media.VideoFormatListener; +++import org.dvb.media.VideoFormatEvent; ++ ++ public class BDJListeners { ++ private LinkedList listeners = new LinkedList(); ++@@ -220,6 +222,9 @@ public class BDJListeners { ++ event instanceof SubtitleNotSelectedEvent || event instanceof SubtitleSelectedEvent) { ++ ((SubtitleListener)listener).subtitleStatusChanged((EventObject)event); ++ +++ } else if (event instanceof VideoFormatEvent) { +++ ((VideoFormatListener)listener).receiveVideoFormatEvent((VideoFormatEvent)event); +++ ++ } else if (event instanceof PSR102Status) { ++ ((StatusListener)listener).receive(((PSR102Status)event).value); ++ ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJLoader.java b/src/libbluray/bdj/java/org/videolan/BDJLoader.java ++index 22bd37a..b2bcff3 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJLoader.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJLoader.java ++@@ -44,8 +44,60 @@ import org.videolan.media.content.PlayerManager; ++ ++ public class BDJLoader { ++ +++ private static class FontCacheAction extends BDJAction { +++ public FontCacheAction(InputStream is) { +++ this.fontPath = null; +++ this.is = is; +++ } +++ public FontCacheAction(String fontPath) { +++ this.fontPath = fontPath; +++ this.is = null; +++ } +++ +++ protected void doAction() { +++ try { +++ if (this.is != null) { +++ this.cacheFile = addFontImpl(is); +++ } else { +++ this.cacheFile = addFontImpl(fontPath); +++ } +++ } catch (RuntimeException e) { +++ this.exception = e; +++ } +++ } +++ +++ public File execute() { +++ BDJActionManager.getInstance().putCommand(this); +++ waitEnd(); +++ if (exception != null) { +++ throw exception; +++ } +++ return cacheFile; +++ } +++ +++ private final String fontPath; +++ private final InputStream is; +++ private File cacheFile = null; +++ private RuntimeException exception = null; +++ } +++ ++ /* called by org.dvb.ui.FontFactory */ ++ public static File addFont(InputStream is) { +++ if (BDJXletContext.getCurrentContext() == null) +++ return addFontImpl(is); +++ /* dispatch cache request to privileged thread */ +++ return new FontCacheAction(is).execute(); +++ } +++ +++ /* called by org.dvb.ui.FontFactory */ +++ public static File addFont(String fontFile) { +++ if (BDJXletContext.getCurrentContext() == null) +++ return addFontImpl(fontFile); +++ /* dispatch cache request to privileged thread */ +++ return new FontCacheAction(fontFile).execute(); +++ } +++ +++ private static File addFontImpl(InputStream is) { ++ VFSCache localCache = vfsCache; ++ if (localCache != null) { ++ return localCache.addFont(is); ++@@ -53,8 +105,7 @@ public class BDJLoader { ++ return null; ++ } ++ ++- /* called by org.dvb.ui.FontFactory */ ++- public static File addFont(String fontFile) { +++ private static File addFontImpl(String fontFile) { ++ VFSCache localCache = vfsCache; ++ if (localCache != null) { ++ return localCache.addFont(fontFile); ++@@ -134,11 +185,6 @@ public class BDJLoader { ++ throw new InvalidObjectException("bdjo not loaded"); ++ AppEntry[] appTable = bdjo.getAppTable(); ++ ++- // initialize AppCaches ++- if (vfsCache != null) { ++- vfsCache.add(bdjo.getAppCaches()); ++- } ++- ++ // reuse appProxys ++ BDJAppProxy[] proxys = new BDJAppProxy[appTable.length]; ++ AppsDatabase db = AppsDatabase.getAppsDatabase(); ++@@ -147,6 +193,15 @@ public class BDJLoader { ++ AppID id = (AppID)ids.nextElement(); ++ BDJAppProxy proxy = (BDJAppProxy)db.getAppProxy(id); ++ AppEntry entry = (AppEntry)db.getAppAttributes(id); +++ if (proxy == null) { +++ logger.error("AppsDatabase corrupted!"); +++ continue; +++ } +++ if (entry == null) { +++ logger.error("AppsDatabase corrupted!"); +++ proxy.release(); +++ continue; +++ } ++ for (int i = 0; i < appTable.length; i++) { ++ if (id.equals(appTable[i].getIdentifier()) && ++ entry.getInitialClass().equals(appTable[i].getInitialClass())) { ++@@ -155,7 +210,6 @@ public class BDJLoader { ++ proxy.stop(true); ++ } else { ++ logger.info("Keeping xlet " + appTable[i].getInitialClass()); ++- proxy.getXletContext().update(appTable[i], bdjo.getAppCaches()); ++ proxys[i] = proxy; ++ proxy = null; ++ } ++@@ -180,6 +234,11 @@ public class BDJLoader { ++ Libbluray.setUOMask(terminfo.getMenuCallMask(), terminfo.getTitleSearchMask()); ++ Libbluray.setKeyInterest(bdjo.getKeyInterestTable()); ++ +++ // initialize AppCaches +++ if (vfsCache != null) { +++ vfsCache.add(bdjo.getAppCaches()); +++ } +++ ++ // initialize appProxys ++ for (int i = 0; i < appTable.length; i++) { ++ if (proxys[i] == null) { ++@@ -196,6 +255,7 @@ public class BDJLoader { ++ } ++ logger.info("Loaded class: " + appTable[i].getInitialClass() + p + " from " + appTable[i].getBasePath() + ".jar"); ++ } else { +++ proxys[i].getXletContext().update(appTable[i], bdjo.getAppCaches()); ++ logger.info("Reused class: " + appTable[i].getInitialClass() + " from " + appTable[i].getBasePath() + ".jar"); ++ } ++ } ++@@ -206,6 +266,19 @@ public class BDJLoader { ++ // notify AppsDatabase ++ ((BDJAppsDatabase)BDJAppsDatabase.getAppsDatabase()).newDatabase(bdjo, proxys); ++ +++ // auto start playlist +++ try { +++ PlayListTable plt = bdjo.getAccessiblePlaylists(); +++ if ((plt != null) && (plt.isAutostartFirst())) { +++ logger.info("Auto-starting playlist"); +++ String[] pl = plt.getPlayLists(); +++ if (pl.length > 0) +++ Manager.createPlayer(new MediaLocator(new BDLocator("bd://PLAYLIST:" + pl[0]))).start(); +++ } +++ } catch (Exception e) { +++ logger.error("loadN(): autoplaylist failed: " + e + "\n" + Logger.dumpStack(e)); +++ } +++ ++ // now run all the xlets ++ for (int i = 0; i < appTable.length; i++) { ++ int code = appTable[i].getControlCode(); ++@@ -222,15 +295,6 @@ public class BDJLoader { ++ ++ logger.info("Finished initializing and starting xlets."); ++ ++- // auto start playlist ++- PlayListTable plt = bdjo.getAccessiblePlaylists(); ++- if ((plt != null) && (plt.isAutostartFirst())) { ++- logger.info("Auto-starting playlist"); ++- String[] pl = plt.getPlayLists(); ++- if (pl.length > 0) ++- Manager.createPlayer(new MediaLocator(new BDLocator("bd://PLAYLIST:" + pl[0]))).start(); ++- } ++- ++ return true; ++ ++ } catch (Throwable e) { ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJSecurityManager.java b/src/libbluray/bdj/java/org/videolan/BDJSecurityManager.java ++index 38f8ac5..8a337ee 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJSecurityManager.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJSecurityManager.java ++@@ -76,6 +76,14 @@ final class BDJSecurityManager extends SecurityManager { ++ } ++ deny(perm); ++ } +++ +++ // work around bug in openjdk 7 / 8 +++ // sun.awt.AWTAutoShutdown.notifyThreadBusy is missing doPrivileged() +++ // (fixed in jdk9 / http://hg.openjdk.java.net/jdk9/client/jdk/rev/5b613a3c04be ) +++ if (classDepth("sun.awt.AWTAutoShutdown") > 0) { +++ return; +++ } +++ ++ if (perm.implies(new RuntimePermission("modifyThreadGroup"))) { ++ /* do check here (no need to log failures) */ ++ super.checkPermission(perm); ++@@ -119,6 +127,10 @@ final class BDJSecurityManager extends SecurityManager { ++ return; ++ } ++ } +++ if (perm.getActions().contains("write")) { +++ /* write permissions are handled in checkWrite() */ +++ deny(perm); +++ } ++ } ++ ++ /* Networking */ ++@@ -180,6 +192,10 @@ final class BDJSecurityManager extends SecurityManager { ++ throw new SecurityException("exit denied"); ++ } ++ +++ public void checkSystemClipboardAccess() { +++ throw new SecurityException("clipboard access denied"); +++ } +++ ++ /* ++ * file read access ++ */ ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJThreadGroup.java b/src/libbluray/bdj/java/org/videolan/BDJThreadGroup.java ++index f4bc1dc..4943a7e 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJThreadGroup.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJThreadGroup.java ++@@ -20,9 +20,9 @@ ++ ++ package org.videolan; ++ ++-public class BDJThreadGroup extends ThreadGroup { +++class BDJThreadGroup extends ThreadGroup { ++ ++- public BDJThreadGroup(String name, BDJXletContext context) { +++ protected BDJThreadGroup(String name, BDJXletContext context) { ++ super(name); ++ this.context = context; ++ } ++@@ -45,15 +45,11 @@ public class BDJThreadGroup extends ThreadGroup { ++ } ++ } ++ ++- public BDJXletContext getContext() { +++ protected BDJXletContext getContext() { ++ return context; ++ } ++ ++- public void setContext(BDJXletContext context) { ++- this.context = context; ++- } ++- ++- public boolean waitForShutdown(int timeout, int maxThreads) { +++ protected boolean waitForShutdown(int timeout, int maxThreads) { ++ ++ if (parentOf(Thread.currentThread().getThreadGroup()) && maxThreads < 1) { ++ logger.error("Current Thread is contained within ThreadGroup to be disposed."); ++@@ -94,8 +90,6 @@ public class BDJThreadGroup extends ThreadGroup { ++ } catch (IllegalThreadStateException e) { ++ logger.error("ThreadGroup destroy failed: " + e); ++ } ++- ++- context = null; ++ } ++ ++ public void dumpThreads() { ++@@ -115,6 +109,6 @@ public class BDJThreadGroup extends ThreadGroup { ++ } ++ } ++ ++- private BDJXletContext context; +++ private final BDJXletContext context; ++ private static final Logger logger = Logger.getLogger(BDJThreadGroup.class.getName()); ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJUtil.java b/src/libbluray/bdj/java/org/videolan/BDJUtil.java ++index 507c2e7..cc17992 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJUtil.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJUtil.java ++@@ -25,20 +25,31 @@ public class BDJUtil { ++ /** ++ * Make a five digit zero padded string based on an integer ++ * Ex. integer 1 -> string "00001" ++- * +++ * ++ * @param id ++ * @return ++ */ ++- public static String makeFiveDigitStr(int id) +++ public static String makeFiveDigitStr(int id) ++ { +++ if (id < 0 || id > 99999) { +++ System.err.println("Invalid ID: " + id); +++ throw new IllegalArgumentException("Invalid ID " + id); +++ } +++ String s = "" + id; +++ while (s.length() < 5) { +++ s = "0" + s; +++ } +++ return s; +++ /* ++ DecimalFormat fmt = new DecimalFormat(); ++ fmt.setMaximumIntegerDigits(5); ++ fmt.setMinimumIntegerDigits(5); ++ fmt.setGroupingUsed(false); ++- +++ ++ return fmt.format(id); +++ */ ++ } ++- +++ ++ /** ++ * Make a path based on the disc root to an absolute path based on the filesystem of the computer ++ * Ex. /BDMV/JAR/00000.jar -> /bluray/disc/mount/point/BDMV/JAR/00000.jar ++@@ -47,6 +58,11 @@ public class BDJUtil { ++ */ ++ public static String discRootToFilesystem(String path) ++ { ++- return System.getProperty("bluray.vfs.root") + path; +++ String vfsRoot = System.getProperty("bluray.vfs.root"); +++ if (vfsRoot == null) { +++ System.err.println("discRootToFilesystem(): disc root not set !"); +++ return path; +++ } +++ return vfsRoot + path; ++ } ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/BDJXletContext.java b/src/libbluray/bdj/java/org/videolan/BDJXletContext.java ++index ae5b3a0..8ee818a 100644 ++--- a/src/libbluray/bdj/java/org/videolan/BDJXletContext.java +++++ b/src/libbluray/bdj/java/org/videolan/BDJXletContext.java ++@@ -70,7 +70,12 @@ public class BDJXletContext implements javax.tv.xlet.XletContext, javax.microedi ++ try { ++ int homeJarID = Integer.parseInt(home); ++ long time = System.currentTimeMillis(); ++- homeMountPoint = MountManager.mount(homeJarID, false) + java.io.File.separator; +++ homeMountPoint = MountManager.mount(homeJarID, false); +++ if (homeMountPoint == null) { +++ logger.error("Failed mounting " + home + ".jar"); +++ } else { +++ homeMountPoint = homeMountPoint + java.io.File.separator; +++ } ++ time = System.currentTimeMillis() - time; ++ logger.info("Mounted Xlet home directory from " + home + ".jar " + ++ "to " + homeMountPoint + "(" + time + "ms)"); ++@@ -80,6 +85,8 @@ public class BDJXletContext implements javax.tv.xlet.XletContext, javax.microedi ++ } ++ ++ public String getXletHome() { +++ if (homeMountPoint == null) +++ logger.error("Home directory not mounted!"); ++ return homeMountPoint; ++ } ++ ++@@ -102,6 +109,8 @@ public class BDJXletContext implements javax.tv.xlet.XletContext, javax.microedi ++ return Integer.toHexString(appid.getAID()); ++ else if (key.equals("org.dvb.application.appid")) ++ return appid; +++ +++ logger.error("unhandled getXletProperty(" + key + ")"); ++ return null; ++ } ++ ++diff --git a/src/libbluray/bdj/java/org/videolan/IxcRegistryImpl.java b/src/libbluray/bdj/java/org/videolan/IxcRegistryImpl.java ++index a9fe28d..bae986f 100644 ++--- a/src/libbluray/bdj/java/org/videolan/IxcRegistryImpl.java +++++ b/src/libbluray/bdj/java/org/videolan/IxcRegistryImpl.java ++@@ -260,7 +260,7 @@ public class IxcRegistryImpl { ++ return result; ++ } ++ ++- public class RemoteMethod implements Runnable +++ private class RemoteMethod implements Runnable ++ { ++ final BDJXletContext calleeContext; ++ final BDJXletContext callerContext; ++@@ -276,9 +276,11 @@ public class IxcRegistryImpl { ++ callerContext = BDJXletContext.getCurrentContext(); ++ if (callerContext == null) { ++ logger.error("caller context is null"); +++ throw new RemoteException("no caller context"); ++ } ++ if (context == null) { ++ logger.error("callee context is null"); +++ throw new RemoteException("no callee context"); ++ } ++ calleeContext = context; ++ ++@@ -426,6 +428,18 @@ public class IxcRegistryImpl { ++ throw new IllegalArgumentException("xc not current BDJXletContext"); ++ } ++ +++ if ("/7fff7669/4050/Messenger".equals(path)) { +++ /* known discs: +++ - Terminator Salvation +++ */ +++ try { +++ logger.error("Enabling Ixc delay hack for " + path); +++ Thread.sleep(200L); +++ } catch (InterruptedException ie) { +++ ie.printStackTrace(); +++ } +++ } +++ ++ WrappedRemoteObj wrappedObj = null; ++ synchronized (remoteObjects) { ++ if (!remoteObjects.containsKey(path)) { ++@@ -438,7 +452,7 @@ public class IxcRegistryImpl { ++ } ++ Object remoteObj = wrapOrCopy(wrappedObj, wrappedObj.context, (BDJXletContext)xc); ++ ++- Debug("IxcRegistry.lookup(" + path + ") => " + remoteObj); +++ Debug("IxcRegistry.lookup(" + path + ") => OK"); ++ ++ return (Remote)remoteObj; ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/Libbluray.java b/src/libbluray/bdj/java/org/videolan/Libbluray.java ++index 41af18f..6a97ad7 100644 ++--- a/src/libbluray/bdj/java/org/videolan/Libbluray.java +++++ b/src/libbluray/bdj/java/org/videolan/Libbluray.java ++@@ -24,6 +24,8 @@ import java.awt.BDFontMetrics; ++ import java.awt.BDToolkit; ++ import java.awt.event.KeyEvent; ++ import java.io.File; +++import java.util.HashMap; +++import java.util.Map; ++ import java.util.Vector; ++ ++ import javax.media.PackageManager; ++@@ -50,14 +52,7 @@ public class Libbluray { ++ ++ /* hook system properties: make "user.dir" point to current Xlet home directory */ ++ ++- private static boolean propertiesHooked = false; ++- ++ private static void hookProperties() { ++- if (propertiesHooked) { ++- return; ++- } ++- propertiesHooked = true; ++- ++ java.util.Properties p = new java.util.Properties(System.getProperties()) { ++ public String getProperty(String key) { ++ if (key.equals("user.dir")) { ++@@ -65,6 +60,7 @@ public class Libbluray { ++ if (ctx != null) { ++ return ctx.getXletHome(); ++ } +++ System.err.println("getProperty(user.dir): no context ! " + Logger.dumpStack()); ++ } ++ return super.getProperty(key); ++ } ++@@ -72,6 +68,28 @@ public class Libbluray { ++ System.setProperties(p); ++ } ++ +++ private static boolean initOnce = false; +++ private static void initOnce() { +++ if (initOnce) { +++ return; +++ } +++ initOnce = true; +++ +++ /* hook system properties (provide Xlet-specific user.dir) */ +++ try { +++ hookProperties(); +++ } catch (Throwable t) { +++ System.err.println("hookProperties() failed: " + t); +++ } +++ +++ /* hook sockets (limit network connections) */ +++ try { +++ BDJSocketFactory.init(); +++ } catch (Throwable t) { +++ System.err.println("Hooking socket factory failed: " + t + "\n" + Logger.dumpStack(t)); +++ } +++ } +++ ++ private static String canonicalize(String path, boolean create) { ++ try { ++ File dir = new File(path); ++@@ -89,7 +107,7 @@ public class Libbluray { ++ private static void init(long nativePointer, String discID, String discRoot, ++ String persistentRoot, String budaRoot) { ++ ++- hookProperties(); +++ initOnce(); ++ ++ /* set up directories */ ++ persistentRoot = canonicalize(persistentRoot, true); ++@@ -185,8 +203,6 @@ public class Libbluray { ++ ++ System.setProperty("bluray.network.connected", "YES"); ++ ++- BDJSocketFactory.init(); ++- ++ try { ++ System.setSecurityManager(new BDJSecurityManager(discRoot, persistentRoot, budaRoot)); ++ } catch (Exception ex) { ++@@ -228,6 +244,7 @@ public class Libbluray { ++ } ++ nativePointer = 0; ++ titleInfos = null; +++ bdjoFiles = null; ++ } ++ ++ /* ++@@ -296,6 +313,10 @@ public class Libbluray { ++ * Disc data ++ */ ++ +++ /* cache parsed .bdjo files */ +++ private static Map bdjoFiles = null; +++ private static Object bdjoFilesLock = new Object(); +++ ++ public static byte[] getAacsData(int type) { ++ return getAacsDataN(nativePointer, type); ++ } ++@@ -305,7 +326,23 @@ public class Libbluray { ++ } ++ ++ public static Bdjo getBdjo(String name) { ++- return getBdjoN(nativePointer, name + ".bdjo"); +++ Bdjo bdjo; +++ synchronized (bdjoFilesLock) { +++ if (bdjoFiles == null) { +++ bdjoFiles = new HashMap(); +++ } else { +++ bdjo = (Bdjo)bdjoFiles.get(name); +++ if (bdjo != null) { +++ return bdjo; +++ } +++ } +++ +++ bdjo = getBdjoN(nativePointer, name + ".bdjo"); +++ if (bdjo != null) { +++ bdjoFiles.put(name, bdjo); +++ } +++ return bdjo; +++ } ++ } ++ ++ public static String[] listBdFiles(String path, boolean onlyBdRom) { ++diff --git a/src/libbluray/bdj/java/org/videolan/MountManager.java b/src/libbluray/bdj/java/org/videolan/MountManager.java ++index 83d6870..6f6fd52 100644 ++--- a/src/libbluray/bdj/java/org/videolan/MountManager.java +++++ b/src/libbluray/bdj/java/org/videolan/MountManager.java ++@@ -185,6 +185,7 @@ public class MountManager { ++ new PrivilegedAction() { ++ public Object run() { ++ if (mountPoint.decRefCount() < 1) { +++ logger.error("Removing JAR " + id + " from mount cache"); ++ mountPoints.remove(id); ++ } ++ return null; ++@@ -221,7 +222,7 @@ public class MountManager { ++ if (mountPoint != null) { ++ return mountPoint.getMountPoint(); ++ } else { ++- logger.info("JAR " + jarId + " not mounted"); +++ logger.error("JAR " + jarId + " not mounted"); ++ } ++ return null; ++ } ++@@ -247,6 +248,7 @@ public class MountManager { ++ if (dir != null) { ++ return dir.getAbsolutePath(); ++ } +++ logger.error("getMountPoint(): already unmounted !"); ++ return null; ++ } ++ ++@@ -274,8 +276,8 @@ public class MountManager { ++ return classFiles; ++ } ++ ++- public boolean setClassFiles() { ++- return classFiles == true; +++ public void setClassFiles() { +++ classFiles = true; ++ } ++ ++ private File dir; ++diff --git a/src/libbluray/bdj/java/org/videolan/TitleInfo.java b/src/libbluray/bdj/java/org/videolan/TitleInfo.java ++index 1c1075b..10dc62a 100644 ++--- a/src/libbluray/bdj/java/org/videolan/TitleInfo.java +++++ b/src/libbluray/bdj/java/org/videolan/TitleInfo.java ++@@ -24,7 +24,7 @@ public class TitleInfo { ++ this.objType = objType; ++ this.playbackType = playbackType; ++ if (objType == OBJ_TYPE_BDJ) ++- this.bdjoName = (new java.text.DecimalFormat("00000")).format(idRef); +++ this.bdjoName = (BDJUtil.makeFiveDigitStr(idRef)); ++ else ++ this.hdmvOID = idRef; ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/VFSCache.java b/src/libbluray/bdj/java/org/videolan/VFSCache.java ++index 2bcfbe9..22fe1f0 100644 ++--- a/src/libbluray/bdj/java/org/videolan/VFSCache.java +++++ b/src/libbluray/bdj/java/org/videolan/VFSCache.java ++@@ -270,16 +270,19 @@ class VFSCache { ++ accessFileSynced(absPath); ++ } ++ ++- protected synchronized void accessFileSynced(String absPath) { +++ private synchronized void accessFileSynced(String absPath) { ++ ++ if (inAccessFile) { ++ /* avoid recursion from SecurityManager checks */ ++ return; ++ } ++ ++- inAccessFile = true; ++- accessFileImp(absPath); ++- inAccessFile = false; +++ try { +++ inAccessFile = true; +++ accessFileImp(absPath); +++ } finally { +++ inAccessFile = false; +++ } ++ } ++ ++ private void accessFileImp(String absPath) { ++@@ -297,7 +300,7 @@ class VFSCache { ++ } ++ ++ /* do not cache .m2ts streams */ ++- if (relPath.startsWith("BDMV" + File.separator + "STREAM" + File.separator)) { +++ if (relPath.startsWith(streamDir)) { ++ return; ++ } ++ ++@@ -352,6 +355,7 @@ class VFSCache { ++ ++ private static final String jarDir = "BDMV" + File.separator + "JAR" + File.separator; ++ private static final String fontDir = "BDMV" + File.separator + "AUXDATA" + File.separator; +++ private static final String streamDir = "BDMV" + File.separator + "STREAM" + File.separator; ++ ++ private static final Logger logger = Logger.getLogger(VFSCache.class.getName()); ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/media/content/BDHandler.java b/src/libbluray/bdj/java/org/videolan/media/content/BDHandler.java ++index 3d43579..92269f1 100644 ++--- a/src/libbluray/bdj/java/org/videolan/media/content/BDHandler.java +++++ b/src/libbluray/bdj/java/org/videolan/media/content/BDHandler.java ++@@ -67,10 +67,13 @@ public abstract class BDHandler implements Player, ServiceContentHandler { ++ ++ public BDHandler() { ++ ownerContext = BDJXletContext.getCurrentContext(); ++- ++- PlayerAction action = new PlayerAction(this, PlayerAction.ACTION_INIT, null); ++- BDJActionManager.getInstance().putCommand(action); ++- action.waitEnd(); +++ if (ownerContext == null) { +++ doInitAction(); +++ } else { +++ PlayerAction action = new PlayerAction(this, PlayerAction.ACTION_INIT, null); +++ BDJActionManager.getInstance().putCommand(action); +++ action.waitEnd(); +++ } ++ } ++ ++ private void doInitAction() { ++diff --git a/src/libbluray/bdj/java/org/videolan/media/content/PlayerManager.java b/src/libbluray/bdj/java/org/videolan/media/content/PlayerManager.java ++index cc06e84..d45358b 100644 ++--- a/src/libbluray/bdj/java/org/videolan/media/content/PlayerManager.java +++++ b/src/libbluray/bdj/java/org/videolan/media/content/PlayerManager.java ++@@ -19,6 +19,7 @@ ++ package org.videolan.media.content; ++ ++ import java.util.ArrayList; +++import org.videolan.Logger; ++ ++ public class PlayerManager { ++ ++@@ -81,7 +82,7 @@ public class PlayerManager { ++ return; ++ } ++ ++- System.err.println("unknown player type: " + player.getClass().getName()); +++ logger.error("unknown player type: " + player.getClass().getName()); ++ } ++ ++ protected boolean allocateResource(BDHandler player) { ++@@ -91,6 +92,9 @@ public class PlayerManager { ++ } ++ synchronized (playlistPlayerLock) { ++ if (playlistPlayer != null && player != playlistPlayer) { +++ +++ logger.info("allocateResource(): Stopping old playlist player"); +++ ++ playlistPlayer.stop(); ++ playlistPlayer.deallocate(); ++ } ++@@ -108,7 +112,7 @@ public class PlayerManager { ++ return true; ++ } ++ ++- System.err.println("unknown player type: " + player.getClass().getName()); +++ logger.error("allocateResource(): unknown player type: " + player.getClass().getName()); ++ return false; ++ } ++ ++@@ -153,4 +157,6 @@ public class PlayerManager { ++ } ++ } ++ } +++ +++ private static final Logger logger = Logger.getLogger(PlayerManager.class.getName()); ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/media/content/playlist/BackgroundVideoPresentationControlImpl.java b/src/libbluray/bdj/java/org/videolan/media/content/playlist/BackgroundVideoPresentationControlImpl.java ++index 05ae554..21f6de5 100644 ++--- a/src/libbluray/bdj/java/org/videolan/media/content/playlist/BackgroundVideoPresentationControlImpl.java +++++ b/src/libbluray/bdj/java/org/videolan/media/content/playlist/BackgroundVideoPresentationControlImpl.java ++@@ -53,6 +53,8 @@ public class BackgroundVideoPresentationControlImpl extends VideoControl ++ } ++ ++ public boolean setVideoTransformation(VideoTransformation transform) { +++ if (transform == null) +++ return false; ++ setClipRegion(transform.getClipRegion()); ++ HScreenPoint pos = transform.getVideoPosition(); ++ float[] scales = transform.getScalingFactors(); ++@@ -99,40 +101,40 @@ public class BackgroundVideoPresentationControlImpl extends VideoControl ++ return new AWTVideoSize( ++ new Rectangle(vd.width, vd.height), ++ new Rectangle(sd.width, sd.height)); ++- } +++ } ++ ++- public Dimension getSourceVideoSize() { ++- return getVideoSize(); ++- } +++ public Dimension getSourceVideoSize() { +++ return getVideoSize(); +++ } ++ ++- public boolean setSize(AWTVideoSize size) { ++- setClipRegion(size.getSource()); ++- setVideoArea(getNormalizedRectangle(getScreenSize(), size.getDestination())); ++- return true; ++- } +++ public boolean setSize(AWTVideoSize size) { +++ setClipRegion(size.getSource()); +++ setVideoArea(getNormalizedRectangle(getScreenSize(), size.getDestination())); +++ return true; +++ } ++ ++- public AWTVideoSize checkSize(AWTVideoSize size) { ++- Dimension vd = getInputVideoSize(); ++- Rectangle sr = size.getSource(); ++- if (sr.x < 0) +++ public AWTVideoSize checkSize(AWTVideoSize size) { +++ Dimension vd = getInputVideoSize(); +++ Rectangle sr = size.getSource(); +++ if (sr.x < 0) +++ sr.x = 0; +++ if ((sr.x + sr.width) > vd.width) { +++ sr.width = vd.width - sr.x; +++ if (sr.width <= 0) { ++ sr.x = 0; ++- if ((sr.x + sr.width) > vd.width) { ++- sr.width = vd.width - sr.x; ++- if (sr.width <= 0) { ++- sr.x = 0; ++- sr.width = 0; ++- } +++ sr.width = 0; ++ } ++- if (sr.y < 0) +++ } +++ if (sr.y < 0) +++ sr.y = 0; +++ if ((sr.y + sr.height) > vd.height) { +++ sr.height = vd.height - sr.y; +++ if (sr.height <= 0) { ++ sr.y = 0; ++- if ((sr.y + sr.height) > vd.height) { ++- sr.height = vd.height - sr.y; ++- if (sr.height <= 0) { ++- sr.y = 0; ++- sr.height = 0; ++- } +++ sr.height = 0; ++ } ++- Rectangle dr = size.getDestination(); ++- return new AWTVideoSize(sr, dr); ++ } +++ Rectangle dr = size.getDestination(); +++ return new AWTVideoSize(sr, dr); +++ } ++ } ++diff --git a/src/libbluray/bdj/java/org/videolan/media/content/playlist/Handler.java b/src/libbluray/bdj/java/org/videolan/media/content/playlist/Handler.java ++index 7e52949..8728628 100644 ++--- a/src/libbluray/bdj/java/org/videolan/media/content/playlist/Handler.java +++++ b/src/libbluray/bdj/java/org/videolan/media/content/playlist/Handler.java ++@@ -72,6 +72,7 @@ public class Handler extends BDHandler { ++ synchronized (this) { ++ try { ++ locator = new BDLocator(source.getLocator().toExternalForm()); +++ currentLocator = null; ++ } catch (org.davic.net.InvalidLocatorException e) { ++ throw new IncompatibleSourceException(); ++ } ++@@ -294,7 +295,11 @@ public class Handler extends BDHandler { ++ ++ protected void doEndOfMediaReached(int playlist) { ++ synchronized (this) { ++- if (locator == null || locator.getPlayListId() != playlist) { +++ if (locator == null) { +++ System.err.println("endOfMedia(" + playlist + ") ignored: no current locator"); +++ return; +++ } +++ if (locator.getPlayListId() != playlist) { ++ System.err.println("endOfMedia ignored: playlist does not match (" + playlist + " != " + locator.getPlayListId()); ++ return; ++ } ++@@ -336,6 +341,7 @@ public class Handler extends BDHandler { ++ if (pi == null) ++ throw new InvalidPlayListException(); ++ this.locator = locator; +++ this.currentLocator = null; ++ baseMediaTime = 0; ++ if (state == Prefetched) ++ doPrefetch(); ++diff --git a/src/libbluray/bdj/java/org/videolan/media/content/video/dvb/mpeg/drip/BackgroundVideoPresentationControlImpl.java b/src/libbluray/bdj/java/org/videolan/media/content/video/dvb/mpeg/drip/BackgroundVideoPresentationControlImpl.java ++index 3596def..377aacc 100644 ++--- a/src/libbluray/bdj/java/org/videolan/media/content/video/dvb/mpeg/drip/BackgroundVideoPresentationControlImpl.java +++++ b/src/libbluray/bdj/java/org/videolan/media/content/video/dvb/mpeg/drip/BackgroundVideoPresentationControlImpl.java ++@@ -100,23 +100,23 @@ public class BackgroundVideoPresentationControlImpl extends VideoControl ++ Rectangle sr = size.getSource(); ++ if (sr.x < 0) ++ sr.x = 0; ++- if ((sr.x + sr.width) > vd.width) { ++- sr.width = vd.width - sr.x; ++- if (sr.width <= 0) { ++- sr.x = 0; ++- sr.width = 0; ++- } +++ if ((sr.x + sr.width) > vd.width) { +++ sr.width = vd.width - sr.x; +++ if (sr.width <= 0) { +++ sr.x = 0; +++ sr.width = 0; ++ } ++- if (sr.y < 0) +++ } +++ if (sr.y < 0) +++ sr.y = 0; +++ if ((sr.y + sr.height) > vd.height) { +++ sr.height = vd.height - sr.y; +++ if (sr.height <= 0) { ++ sr.y = 0; ++- if ((sr.y + sr.height) > vd.height) { ++- sr.height = vd.height - sr.y; ++- if (sr.height <= 0) { ++- sr.y = 0; ++- sr.height = 0; ++- } +++ sr.height = 0; ++ } ++- Rectangle dr = size.getDestination(); ++- return new AWTVideoSize(sr, dr); +++ } +++ Rectangle dr = size.getDestination(); +++ return new AWTVideoSize(sr, dr); ++ } ++ } ++diff --git a/src/libbluray/bdj/native/java_awt_BDFontMetrics.c b/src/libbluray/bdj/native/java_awt_BDFontMetrics.c ++index 3bbd3c3..f84a382 100644 ++--- a/src/libbluray/bdj/native/java_awt_BDFontMetrics.c +++++ b/src/libbluray/bdj/native/java_awt_BDFontMetrics.c ++@@ -135,7 +135,10 @@ static char *_win32_resolve_font(const char *family, int style) ++ ++ memset(&lf, 0, sizeof(lf)); ++ lf.lfCharSet = DEFAULT_CHARSET; ++- MultiByteToWideChar(CP_UTF8, 0, family, -1, lf.lfFaceName, sizeof(lf.lfFaceName)); +++ int length = MultiByteToWideChar(CP_UTF8, 0, family, -1, lf.lfFaceName, LF_FACESIZE); +++ if (!length) { +++ return NULL; +++ } ++ ++ hDC = GetDC(NULL); ++ EnumFontFamiliesExW(hDC, &lf, (FONTENUMPROCW)&EnumFontCallbackW, (LPARAM)&data, 0); ++diff --git a/src/libbluray/bdnav/clpi_parse.c b/src/libbluray/bdnav/clpi_parse.c ++index 365ec0f..f0826de 100644 ++--- a/src/libbluray/bdnav/clpi_parse.c +++++ b/src/libbluray/bdnav/clpi_parse.c ++@@ -39,6 +39,7 @@ ++ #define CLPI_SIG1 ('H' << 24 | 'D' << 16 | 'M' << 8 | 'V') ++ #define CLPI_SIG2A ('0' << 24 | '2' << 16 | '0' << 8 | '0') ++ #define CLPI_SIG2B ('0' << 24 | '1' << 16 | '0' << 8 | '0') +++#define CLPI_SIG2C ('0' << 24 | '2' << 16 | '4' << 8 | '0') ++ ++ static void ++ _human_readable_sig(char *sig, uint32_t s1, uint32_t s2) ++@@ -129,7 +130,8 @@ _parse_header(BITSTREAM *bits, CLPI_CL *cl) ++ cl->type_indicator2 = bs_read(bits, 32); ++ if (cl->type_indicator != CLPI_SIG1 || ++ (cl->type_indicator2 != CLPI_SIG2A && ++- cl->type_indicator2 != CLPI_SIG2B)) { +++ cl->type_indicator2 != CLPI_SIG2B && +++ cl->type_indicator2 != CLPI_SIG2C)) { ++ ++ char sig[9]; ++ char expect[9]; ++@@ -223,7 +225,7 @@ _parse_sequence(BITSTREAM *bits, CLPI_CL *cl) ++ cl->sequence.num_atc_seq = bs_read(bits, 8); ++ ++ CLPI_ATC_SEQ *atc_seq; ++- atc_seq = malloc(cl->sequence.num_atc_seq * sizeof(CLPI_ATC_SEQ)); +++ atc_seq = calloc(cl->sequence.num_atc_seq, sizeof(CLPI_ATC_SEQ)); ++ cl->sequence.atc_seq = atc_seq; ++ for (ii = 0; ii < cl->sequence.num_atc_seq; ii++) { ++ atc_seq[ii].spn_atc_start = bs_read(bits, 32); ++@@ -254,7 +256,7 @@ _parse_program(BITSTREAM *bits, CLPI_PROG_INFO *program) ++ program->num_prog = bs_read(bits, 8); ++ ++ CLPI_PROG *progs; ++- progs = malloc(program->num_prog * sizeof(CLPI_PROG)); +++ progs = calloc(program->num_prog, sizeof(CLPI_PROG)); ++ program->progs = progs; ++ for (ii = 0; ii < program->num_prog; ii++) { ++ progs[ii].spn_program_sequence_start = bs_read(bits, 32); ++@@ -263,7 +265,7 @@ _parse_program(BITSTREAM *bits, CLPI_PROG_INFO *program) ++ progs[ii].num_groups = bs_read(bits, 8); ++ ++ CLPI_PROG_STREAM *ps; ++- ps = malloc(progs[ii].num_streams * sizeof(CLPI_PROG_STREAM)); +++ ps = calloc(progs[ii].num_streams, sizeof(CLPI_PROG_STREAM)); ++ progs[ii].streams = ps; ++ for (jj = 0; jj < progs[ii].num_streams; jj++) { ++ ps[jj].pid = bs_read(bits, 16); ++@@ -335,7 +337,7 @@ _parse_cpi(BITSTREAM *bits, CLPI_CPI *cpi) ++ cpi->num_stream_pid = bs_read(bits, 8); ++ ++ CLPI_EP_MAP_ENTRY *entry; ++- entry = malloc(cpi->num_stream_pid * sizeof(CLPI_EP_MAP_ENTRY)); +++ entry = calloc(cpi->num_stream_pid, sizeof(CLPI_EP_MAP_ENTRY)); ++ cpi->entry = entry; ++ for (ii = 0; ii < cpi->num_stream_pid; ii++) { ++ entry[ii].pid = bs_read(bits, 16); ++@@ -622,12 +624,12 @@ _clean_program(CLPI_PROG_INFO *p) ++ { ++ int ii; ++ ++- for (ii = 0; ii < p->num_prog; ii++) { ++- if (p->progs[ii].streams != NULL) { +++ if (p && p->progs) { +++ for (ii = 0; ii < p->num_prog; ii++) { ++ X_FREE(p->progs[ii].streams); ++ } +++ X_FREE(p->progs); ++ } ++- X_FREE(p->progs); ++ } ++ ++ static void ++@@ -635,15 +637,13 @@ _clean_cpi(CLPI_CPI *cpi) ++ { ++ int ii; ++ ++- for (ii = 0; ii < cpi->num_stream_pid; ii++) { ++- if (cpi->entry[ii].coarse != NULL) { +++ if (cpi && cpi->entry) { +++ for (ii = 0; ii < cpi->num_stream_pid; ii++) { ++ X_FREE(cpi->entry[ii].coarse); ++- } ++- if (cpi->entry[ii].fine != NULL) { ++ X_FREE(cpi->entry[ii].fine); ++ } +++ X_FREE(cpi->entry); ++ } ++- X_FREE(cpi->entry); ++ } ++ ++ void ++@@ -654,15 +654,12 @@ clpi_free(CLPI_CL *cl) ++ if (cl == NULL) { ++ return; ++ } ++- if (cl->clip.atc_delta != NULL) { ++- X_FREE(cl->clip.atc_delta); ++- } ++- for (ii = 0; ii < cl->sequence.num_atc_seq; ii++) { ++- if (cl->sequence.atc_seq[ii].stc_seq != NULL) { +++ X_FREE(cl->clip.atc_delta); +++ if (cl->sequence.atc_seq) { +++ for (ii = 0; ii < cl->sequence.num_atc_seq; ii++) { ++ X_FREE(cl->sequence.atc_seq[ii].stc_seq); ++ } ++- } ++- if (cl->sequence.atc_seq != NULL) { +++ ++ X_FREE(cl->sequence.atc_seq); ++ } ++ ++@@ -796,7 +793,7 @@ clpi_copy(const CLPI_CL* src_cl) ++ } ++ ++ dest_cl->sequence.num_atc_seq = src_cl->sequence.num_atc_seq; ++- dest_cl->sequence.atc_seq = malloc(src_cl->sequence.num_atc_seq * sizeof(CLPI_ATC_SEQ)); +++ dest_cl->sequence.atc_seq = calloc(src_cl->sequence.num_atc_seq, sizeof(CLPI_ATC_SEQ)); ++ for (ii = 0; ii < src_cl->sequence.num_atc_seq; ii++) { ++ dest_cl->sequence.atc_seq[ii].spn_atc_start = src_cl->sequence.atc_seq[ii].spn_atc_start; ++ dest_cl->sequence.atc_seq[ii].offset_stc_id = src_cl->sequence.atc_seq[ii].offset_stc_id; ++@@ -811,7 +808,7 @@ clpi_copy(const CLPI_CL* src_cl) ++ } ++ ++ dest_cl->program.num_prog = src_cl->program.num_prog; ++- dest_cl->program.progs = malloc(src_cl->program.num_prog * sizeof(CLPI_PROG)); +++ dest_cl->program.progs = calloc(src_cl->program.num_prog, sizeof(CLPI_PROG)); ++ for (ii = 0; ii < src_cl->program.num_prog; ii++) { ++ dest_cl->program.progs[ii].spn_program_sequence_start = src_cl->program.progs[ii].spn_program_sequence_start; ++ dest_cl->program.progs[ii].program_map_pid = src_cl->program.progs[ii].program_map_pid; ++@@ -831,7 +828,7 @@ clpi_copy(const CLPI_CL* src_cl) ++ } ++ ++ dest_cl->cpi.num_stream_pid = src_cl->cpi.num_stream_pid; ++- dest_cl->cpi.entry = malloc(src_cl->cpi.num_stream_pid * sizeof(CLPI_EP_MAP_ENTRY)); +++ dest_cl->cpi.entry = calloc(src_cl->cpi.num_stream_pid, sizeof(CLPI_EP_MAP_ENTRY)); ++ for (ii = 0; ii < dest_cl->cpi.num_stream_pid; ii++) { ++ dest_cl->cpi.entry[ii].pid = src_cl->cpi.entry[ii].pid; ++ dest_cl->cpi.entry[ii].ep_stream_type = src_cl->cpi.entry[ii].ep_stream_type; ++diff --git a/src/libbluray/bdnav/index_parse.c b/src/libbluray/bdnav/index_parse.c ++index 6c07ba1..64dc5e3 100644 ++--- a/src/libbluray/bdnav/index_parse.c +++++ b/src/libbluray/bdnav/index_parse.c ++@@ -103,8 +103,16 @@ static int _parse_index(BITSTREAM *bs, INDX_ROOT *index) ++ } ++ ++ index->num_titles = bs_read(bs, 16); +++ if (!index->num_titles) { +++ BD_DEBUG(DBG_CRIT, "empty index\n"); +++ return 0; +++ } ++ ++ index->titles = calloc(index->num_titles, sizeof(INDX_TITLE)); +++ if (!index->titles) { +++ BD_DEBUG(DBG_CRIT, "out of memory\n"); +++ return 0; +++ } ++ ++ for (i = 0; i < index->num_titles; i++) { ++ ++diff --git a/src/libbluray/bdnav/meta_parse.c b/src/libbluray/bdnav/meta_parse.c ++index 50b8c75..a9a7edc 100644 ++--- a/src/libbluray/bdnav/meta_parse.c +++++ b/src/libbluray/bdnav/meta_parse.c ++@@ -81,29 +81,35 @@ static void _parseManifestNode(xmlNode * a_node, META_DL *disclib) ++ } ++ else if (xmlStrEqual(cur_node->parent->name, BAD_CAST_CONST "tableOfContents")) { ++ if (xmlStrEqual(cur_node->name, BAD_CAST_CONST "titleName") && (tmp = xmlGetProp(cur_node, BAD_CAST_CONST "titleNumber"))) { ++- int i = disclib->toc_count; ++- disclib->toc_count++; ++- disclib->toc_entries = realloc(disclib->toc_entries, (disclib->toc_count*sizeof(META_TITLE))); ++- disclib->toc_entries[i].title_number = atoi((const char*)tmp); ++- disclib->toc_entries[i].title_name = (char*)xmlNodeGetContent(cur_node); +++ META_TITLE *new_entries = realloc(disclib->toc_entries, ((disclib->toc_count + 1)*sizeof(META_TITLE))); +++ if (new_entries) { +++ int i = disclib->toc_count; +++ disclib->toc_count++; +++ disclib->toc_entries = new_entries; +++ disclib->toc_entries[i].title_number = atoi((const char*)tmp); +++ disclib->toc_entries[i].title_name = (char*)xmlNodeGetContent(cur_node); +++ } ++ XML_FREE(tmp); ++ } ++ } ++ else if (xmlStrEqual(cur_node->parent->name, BAD_CAST_CONST "description")) { ++ if (xmlStrEqual(cur_node->name, BAD_CAST_CONST "thumbnail") && (tmp = xmlGetProp(cur_node, BAD_CAST_CONST "href"))) { ++- uint8_t i = disclib->thumb_count; ++- disclib->thumb_count++; ++- disclib->thumbnails = realloc(disclib->thumbnails, (disclib->thumb_count*sizeof(META_THUMBNAIL))); ++- disclib->thumbnails[i].path = (char *)tmp; ++- if ((tmp = xmlGetProp(cur_node, BAD_CAST_CONST "size"))) { ++- int x = 0, y = 0; ++- sscanf((const char*)tmp, "%ix%i", &x, &y); ++- disclib->thumbnails[i].xres = x; ++- disclib->thumbnails[i].yres = y; ++- XML_FREE(tmp); ++- } ++- else { ++- disclib->thumbnails[i].xres = disclib->thumbnails[i].yres = -1; +++ META_THUMBNAIL *new_thumbnails = realloc(disclib->thumbnails, ((disclib->thumb_count + 1)*sizeof(META_THUMBNAIL))); +++ if (new_thumbnails) { +++ uint8_t i = disclib->thumb_count; +++ disclib->thumb_count++; +++ disclib->thumbnails = new_thumbnails; +++ disclib->thumbnails[i].path = (char *)tmp; +++ if ((tmp = xmlGetProp(cur_node, BAD_CAST_CONST "size"))) { +++ int x = 0, y = 0; +++ sscanf((const char*)tmp, "%ix%i", &x, &y); +++ disclib->thumbnails[i].xres = x; +++ disclib->thumbnails[i].yres = y; +++ XML_FREE(tmp); +++ } +++ else { +++ disclib->thumbnails[i].xres = disclib->thumbnails[i].yres = -1; +++ } ++ } ++ } ++ } ++@@ -126,15 +132,18 @@ static void _findMetaXMLfiles(META_ROOT *meta, BD_DISC *disc) ++ if (ent.d_name[0] == '.') ++ continue; ++ else if (strncasecmp(ent.d_name, "bdmt_", 5) == 0) { ++- uint8_t i = meta->dl_count; ++- meta->dl_count++; ++- meta->dl_entries = realloc(meta->dl_entries, (meta->dl_count*sizeof(META_DL))); ++- memset(&meta->dl_entries[i], 0, sizeof(meta->dl_entries[i])); ++- ++- meta->dl_entries[i].filename = str_dup(ent.d_name); ++- strncpy(meta->dl_entries[i].language_code, ent.d_name+5,3); ++- meta->dl_entries[i].language_code[3] = '\0'; ++- str_tolower(meta->dl_entries[i].language_code); +++ META_DL *new_dl_entries = realloc(meta->dl_entries, ((meta->dl_count + 1)*sizeof(META_DL))); +++ if (new_dl_entries) { +++ uint8_t i = meta->dl_count; +++ meta->dl_count++; +++ meta->dl_entries = new_dl_entries; +++ memset(&meta->dl_entries[i], 0, sizeof(meta->dl_entries[i])); +++ +++ meta->dl_entries[i].filename = str_dup(ent.d_name); +++ strncpy(meta->dl_entries[i].language_code, ent.d_name+5,3); +++ meta->dl_entries[i].language_code[3] = '\0'; +++ str_tolower(meta->dl_entries[i].language_code); +++ } ++ } ++ } ++ dir_close(dir); ++@@ -145,6 +154,10 @@ META_ROOT *meta_parse(BD_DISC *disc) ++ { ++ #ifdef HAVE_LIBXML2 ++ META_ROOT *root = calloc(1, sizeof(META_ROOT)); +++ if (!root) { +++ BD_DEBUG(DBG_CRIT, "out of memory\n"); +++ return NULL; +++ } ++ root->dl_count = 0; ++ ++ xmlDocPtr doc; ++diff --git a/src/libbluray/bdnav/mpls_parse.c b/src/libbluray/bdnav/mpls_parse.c ++index da01f7b..8bfbb8c 100644 ++--- a/src/libbluray/bdnav/mpls_parse.c +++++ b/src/libbluray/bdnav/mpls_parse.c ++@@ -39,6 +39,7 @@ ++ #define MPLS_SIG1 ('M' << 24 | 'P' << 16 | 'L' << 8 | 'S') ++ #define MPLS_SIG2A ('0' << 24 | '2' << 16 | '0' << 8 | '0') ++ #define MPLS_SIG2B ('0' << 24 | '1' << 16 | '0' << 8 | '0') +++#define MPLS_SIG2C ('0' << 24 | '2' << 16 | '4' << 8 | '0') ++ ++ static void ++ _human_readable_sig(char *sig, uint32_t s1, uint32_t s2) ++@@ -137,8 +138,9 @@ _parse_appinfo(BITSTREAM *bits, MPLS_AI *ai) ++ ai->random_access_flag = bs_read(bits, 1); ++ ai->audio_mix_flag = bs_read(bits, 1); ++ ai->lossless_bypass_flag = bs_read(bits, 1); +++ ai->mvc_base_view_r_flag = bs_read(bits, 1); ++ // Reserved ++- bs_skip(bits, 13); +++ bs_skip(bits, 12); ++ bs_seek_byte(bits, pos + len); ++ return 1; ++ } ++@@ -155,7 +157,8 @@ _parse_header(BITSTREAM *bits, MPLS_PL *pl) ++ pl->type_indicator2 = bs_read(bits, 32); ++ if (pl->type_indicator != MPLS_SIG1 || ++ (pl->type_indicator2 != MPLS_SIG2A && ++- pl->type_indicator2 != MPLS_SIG2B)) { +++ pl->type_indicator2 != MPLS_SIG2B && +++ pl->type_indicator2 != MPLS_SIG2C)) { ++ ++ char sig[9]; ++ char expect[9]; ++@@ -259,6 +262,7 @@ _parse_stream(BITSTREAM *bits, MPLS_STREAM *s) ++ break; ++ }; ++ s->lang[3] = '\0'; +++ s->ss_offset_sequence_id = 0xFF; ++ ++ bs_seek_byte(bits, pos + len); ++ return 1; ++@@ -882,6 +886,99 @@ _parse_subpath_extension(BITSTREAM *bits, MPLS_PL *pl) ++ } ++ ++ static int +++_parse_stn_ss_extension(BITSTREAM *bits, MPLS_PL *pl) +++{ +++ int ii, s; +++ int64_t pos; +++ +++ for (ii = 0; ii < pl->list_count; ii++) { +++ uint32_t len = bs_read(bits, 16); +++ pos = bs_pos(bits) >> 3; +++ int Fixed_offset_during_PopUp_flag = bs_read(bits, 1); +++ bs_skip(bits, 15); // reserved +++ +++ for (s = 0; s < pl->play_item[ii].stn.num_video; s++) { +++ // stream_entry +++ uint32_t slen = bs_read(bits, 8); +++ bs_skip(bits, slen * 8); +++ +++ // stream_attributes_ss +++ slen = bs_read(bits, 8); +++ bs_skip(bits, slen * 8); +++ +++ bs_skip(bits, 10); // reserved +++ bs_skip(bits, 6); // number_of_offset_sequences +++ } +++ +++ for (s = 0; s < pl->play_item[ii].stn.num_pg; s++) { +++ pl->play_item[ii].stn.pg[s].ss_offset_sequence_id = bs_read(bits, 8); +++ +++ bs_skip(bits, 4); // reserved +++ bs_skip(bits, 1); // dialog_region_offset_valid_flag +++ int is_SS_PG = bs_read(bits, 1); +++ int is_top_AS_PG_textST = bs_read(bits, 1); +++ int is_bottom_AS_PG_textST = bs_read(bits, 1); +++ if (is_SS_PG) { +++ // stream_entry left eye +++ uint32_t slen = bs_read(bits, 8); +++ bs_skip(bits, slen * 8); +++ +++ // stream_entry right eye +++ slen = bs_read(bits, 8); +++ bs_skip(bits, slen * 8); +++ +++ bs_skip(bits, 8); // reserved +++ bs_skip(bits, 8); // PG offset +++ } +++ if (is_top_AS_PG_textST) { +++ // stream_entry +++ uint32_t slen = bs_read(bits, 8); +++ bs_skip(bits, slen * 8); +++ +++ bs_skip(bits, 8); // reserved +++ bs_skip(bits, 8); // PG offset +++ } +++ if (is_bottom_AS_PG_textST) { +++ // stream_entry +++ uint32_t slen = bs_read(bits, 8); +++ bs_skip(bits, slen * 8); +++ +++ bs_skip(bits, 8); // reserved +++ bs_skip(bits, 8); // PG offset +++ } +++ } +++ +++ for (s = 0; s < pl->play_item[ii].stn.num_ig; s++) { +++ if (Fixed_offset_during_PopUp_flag) +++ bs_skip(bits, 8); +++ else +++ pl->play_item[ii].stn.ig[s].ss_offset_sequence_id = bs_read(bits, 8); +++ +++ bs_skip(bits, 16); // IG_Plane_offset_during_BB_video +++ bs_skip(bits, 7); // reserved +++ int is_SS_IG = bs_read(bits, 1); +++ if (is_SS_IG) { +++ // stream_entry left eye +++ uint32_t slen = bs_read(bits, 8); +++ bs_skip(bits, slen * 8); +++ +++ // stream_entry right eye +++ slen = bs_read(bits, 8); +++ bs_skip(bits, slen * 8); +++ +++ bs_skip(bits, 8); // reserved +++ bs_skip(bits, 8); // PG offset +++ } +++ } +++ +++ // Skip to next play item +++ bs_seek_byte(bits, pos + len); +++ } +++ +++ return 0; +++} +++ +++static int ++ _parse_mpls_extension(BITSTREAM *bits, int id1, int id2, void *handle) ++ { ++ MPLS_PL *pl = (MPLS_PL*)handle; ++@@ -895,7 +992,7 @@ _parse_mpls_extension(BITSTREAM *bits, int id1, int id2, void *handle) ++ ++ if (id1 == 2) { ++ if (id2 == 1) { ++- return 0; +++ return _parse_stn_ss_extension(bits, pl); ++ } ++ if (id2 == 2) { ++ // SubPath entries extension ++diff --git a/src/libbluray/bdnav/mpls_parse.h b/src/libbluray/bdnav/mpls_parse.h ++index f9f7a18..94add53 100644 ++--- a/src/libbluray/bdnav/mpls_parse.h +++++ b/src/libbluray/bdnav/mpls_parse.h ++@@ -49,6 +49,7 @@ typedef struct ++ uint8_t sv_num_pip_pg_ref; ++ uint8_t *sv_secondary_audio_ref; ++ uint8_t *sv_pip_pg_ref; +++ uint8_t ss_offset_sequence_id; ++ } MPLS_STREAM; ++ ++ typedef struct ++@@ -110,6 +111,7 @@ typedef struct ++ uint8_t random_access_flag; ++ uint8_t audio_mix_flag; ++ uint8_t lossless_bypass_flag; +++ uint8_t mvc_base_view_r_flag; ++ } MPLS_AI; ++ ++ typedef struct ++diff --git a/src/libbluray/bdnav/navigation.c b/src/libbluray/bdnav/navigation.c ++index db7fa9f..cfd7739 100644 ++--- a/src/libbluray/bdnav/navigation.c +++++ b/src/libbluray/bdnav/navigation.c ++@@ -174,6 +174,21 @@ _pl_duration(MPLS_PL *pl) ++ return duration; ++ } ++ +++static uint32_t +++_pl_chapter_count(MPLS_PL *pl) +++{ +++ unsigned ii, chapters = 0; +++ +++ // Count the number of "entry" marks (skipping "link" marks) +++ // This is the the number of chapters +++ for (ii = 0; ii < pl->mark_count; ii++) { +++ if (pl->play_mark[ii].mark_type == BD_MARK_ENTRY) { +++ chapters++; +++ } +++ } +++ return chapters; +++} +++ ++ NAV_TITLE_LIST* nav_get_title_list(BD_DISC *disc, uint32_t flags, uint32_t min_title_length) ++ { ++ BD_DIR_H *dir; ++@@ -403,15 +418,20 @@ static void _fill_clip(NAV_TITLE *title, ++ strncpy(&clip->name[5], ".m2ts", 6); ++ clip->clip_id = atoi(mpls_clip[clip->angle].clip_id); ++ ++- file = str_printf("%s.clpi", mpls_clip[clip->angle].clip_id); ++ clpi_free(clip->cl); ++- clip->cl = clpi_get(title->disc, file); ++- X_FREE(file); +++ clip->cl = NULL; +++ +++ file = str_printf("%s.clpi", mpls_clip[clip->angle].clip_id); +++ if (file) { +++ clip->cl = clpi_get(title->disc, file); +++ X_FREE(file); +++ } ++ if (clip->cl == NULL) { ++ clip->start_pkt = 0; ++ clip->end_pkt = 0; ++ return; ++ } +++ ++ switch (connection_condition) { ++ case 5: ++ case 6: ++@@ -441,7 +461,7 @@ static void _fill_clip(NAV_TITLE *title, ++ NAV_TITLE* nav_title_open(BD_DISC *disc, const char *playlist, unsigned angle) ++ { ++ NAV_TITLE *title = NULL; ++- unsigned ii, ss, chapters = 0; +++ unsigned ii, ss; ++ uint32_t pos = 0; ++ uint32_t time = 0; ++ ++@@ -501,15 +521,8 @@ NAV_TITLE* nav_title_open(BD_DISC *disc, const char *playlist, unsigned angle) ++ } ++ } ++ ++- // Count the number of "entry" marks (skipping "link" marks) ++- // This is the the number of chapters ++- for (ii = 0; ii < title->pl->mark_count; ii++) { ++- if (title->pl->play_mark[ii].mark_type == BD_MARK_ENTRY) { ++- chapters++; ++- } ++- } ++- title->chap_list.count = chapters; ++- title->chap_list.mark = calloc(chapters, sizeof(NAV_MARK)); +++ title->chap_list.count = _pl_chapter_count(title->pl); +++ title->chap_list.mark = calloc(title->chap_list.count, sizeof(NAV_MARK)); ++ title->mark_list.count = title->pl->mark_count; ++ title->mark_list.mark = calloc(title->pl->mark_count, sizeof(NAV_MARK)); ++ ++@@ -526,19 +539,29 @@ void nav_title_close(NAV_TITLE *title) ++ { ++ unsigned ii, ss; ++ ++- for (ss = 0; ss < title->sub_path_count; ss++) { ++- for (ii = 0; ii < title->sub_path[ss].clip_list.count; ii++) { ++- clpi_free(title->sub_path[ss].clip_list.clip[ii].cl); +++ if (!title) +++ return; +++ +++ if (title->sub_path) { +++ for (ss = 0; ss < title->sub_path_count; ss++) { +++ if (title->sub_path[ss].clip_list.clip) { +++ for (ii = 0; ii < title->sub_path[ss].clip_list.count; ii++) { +++ clpi_free(title->sub_path[ss].clip_list.clip[ii].cl); +++ } +++ X_FREE(title->sub_path[ss].clip_list.clip); +++ } ++ } ++- X_FREE(title->sub_path[ss].clip_list.clip); +++ X_FREE(title->sub_path); ++ } ++- X_FREE(title->sub_path); ++ ++- for (ii = 0; ii < title->pl->list_count; ii++) { ++- clpi_free(title->clip_list.clip[ii].cl); +++ if (title->clip_list.clip) { +++ for (ii = 0; ii < title->clip_list.count; ii++) { +++ clpi_free(title->clip_list.clip[ii].cl); +++ } +++ X_FREE(title->clip_list.clip); ++ } +++ ++ mpls_free(title->pl); ++- X_FREE(title->clip_list.clip); ++ X_FREE(title->chap_list.mark); ++ X_FREE(title->mark_list.mark); ++ X_FREE(title); ++diff --git a/src/libbluray/bdnav/sound_parse.c b/src/libbluray/bdnav/sound_parse.c ++index c1cbcfb..7c267da 100644 ++--- a/src/libbluray/bdnav/sound_parse.c +++++ b/src/libbluray/bdnav/sound_parse.c ++@@ -65,6 +65,7 @@ static int _sound_parse_attributes(BITSTREAM *bs, SOUND_OBJECT *obj) ++ ++ switch (i = bs_read(bs, 4)) { ++ default: BD_DEBUG(DBG_NAV, "unknown channel configuration code %d\n", i); +++ /* fall thru */ ++ case 1: obj->num_channels = 1; ++ break; ++ case 3: obj->num_channels = 2; ++@@ -72,11 +73,13 @@ static int _sound_parse_attributes(BITSTREAM *bs, SOUND_OBJECT *obj) ++ }; ++ switch (i = bs_read(bs, 4)) { ++ default: BD_DEBUG(DBG_NAV, "unknown sample rate code %d\n", i); +++ /* fall thru */ ++ case 1: obj->sample_rate = 48000; ++ break; ++ }; ++ switch (i = bs_read(bs, 2)) { ++ default: BD_DEBUG(DBG_NAV, "unknown bits per sample code %d\n", i); +++ /* fall thru */ ++ case 1: obj->bits_per_sample = 16; ++ break; ++ }; ++@@ -103,7 +106,15 @@ static int _sound_read_samples(BITSTREAM *bs, SOUND_OBJECT *obj) ++ uint32_t n; ++ uint32_t num_samples = obj->num_frames * obj->num_channels; ++ +++ if (!num_samples) { +++ return 1; +++ } +++ ++ obj->samples = calloc(num_samples, sizeof(uint16_t)); +++ if (!obj->samples) { +++ BD_DEBUG(DBG_CRIT, "out of memory\n"); +++ return 0; +++ } ++ ++ for (n = 0; n < num_samples; n++) { ++ obj->samples[n] = bs_read(bs, 16); ++@@ -116,13 +127,14 @@ void sound_free(SOUND_DATA **p) ++ { ++ if (p && *p) { ++ ++- unsigned i; ++- for (i = 0 ; i < (*p)->num_sounds; i++) { ++- X_FREE((*p)->sounds[i].samples); ++- } ++- ++- X_FREE((*p)->sounds); +++ if ((*p)->sounds) { +++ unsigned i; +++ for (i = 0 ; i < (*p)->num_sounds; i++) { +++ X_FREE((*p)->sounds[i].samples); +++ } ++ +++ X_FREE((*p)->sounds); +++ } ++ X_FREE(*p); ++ } ++ } ++@@ -150,21 +162,29 @@ static SOUND_DATA *_sound_parse(BD_FILE_H *fp) ++ bs_skip(&bs, 8); /* reserved */ ++ num_sounds = bs_read(&bs, 8); ++ ++- if (data_len < 1) { +++ if (data_len < 1 || num_sounds < 1) { ++ BD_DEBUG(DBG_NAV | DBG_CRIT, "empty database\n"); ++ goto error; ++ } ++ ++ data_offsets = calloc(num_sounds, sizeof(uint32_t)); ++ data = calloc(1, sizeof(SOUND_DATA)); +++ if (!data_offsets || !data) { +++ BD_DEBUG(DBG_CRIT, "out of memory\n"); +++ goto error; +++ } ++ data->num_sounds = num_sounds; ++ data->sounds = calloc(num_sounds, sizeof(SOUND_OBJECT)); +++ if (!data->sounds) { +++ BD_DEBUG(DBG_CRIT, "out of memory\n"); +++ goto error; +++ } ++ ++ /* parse headers */ ++ ++ for (i = 0; i < data->num_sounds; i++) { ++ if (!_sound_parse_index(&bs, data_offsets + i, &data->sounds[i])) { ++- BD_DEBUG(DBG_NAV | DBG_CRIT, "error parsing sound %d attribues\n", i); +++ BD_DEBUG(DBG_NAV | DBG_CRIT, "error parsing sound %d attributes\n", i); ++ goto error; ++ } ++ } ++diff --git a/src/libbluray/bluray.c b/src/libbluray/bluray.c ++index eba9c5e..27beed7 100644 ++--- a/src/libbluray/bluray.c +++++ b/src/libbluray/bluray.c ++@@ -42,6 +42,7 @@ ++ #include "hdmv/hdmv_vm.h" ++ #include "hdmv/mobj_parse.h" ++ #include "decoders/graphics_controller.h" +++#include "decoders/hdmv_pids.h" ++ #include "decoders/m2ts_filter.h" ++ #include "decoders/overlay.h" ++ #include "disc/disc.h" ++@@ -93,6 +94,7 @@ typedef struct { ++ /* */ ++ uint8_t eof_hit; ++ uint8_t encrypted_block_cnt; +++ uint8_t seek_flag; /* used to fine-tune first read after seek */ ++ ++ M2TS_FILTER *m2ts_filter; ++ } BD_STREAM; ++@@ -202,7 +204,9 @@ static void _init_event_queue(BLURAY *bd) ++ { ++ if (!bd->event_queue) { ++ bd->event_queue = calloc(1, sizeof(struct bd_event_queue_s)); ++- bd_mutex_init(&bd->event_queue->mutex); +++ if (bd->event_queue) { +++ bd_mutex_init(&bd->event_queue->mutex); +++ } ++ } else { ++ bd_mutex_lock(&bd->event_queue->mutex); ++ bd->event_queue->in = 0; ++@@ -794,7 +798,15 @@ static int _preload_m2ts(BLURAY *bd, BD_PRELOAD *p) ++ ++ /* allocate buffer */ ++ p->clip_size = (size_t)st.clip_size; ++- p->buf = realloc(p->buf, p->clip_size); +++ uint8_t* tmp = (uint8_t*)realloc(p->buf, p->clip_size); +++ if (!tmp) { +++ BD_DEBUG(DBG_BLURAY | DBG_CRIT, "_preload_m2ts(): out of memory\n"); +++ _close_m2ts(&st); +++ _close_preload(p); +++ return 0; +++ } +++ +++ p->buf = tmp; ++ ++ /* read clip to buffer */ ++ ++@@ -847,6 +859,7 @@ static int64_t _seek_stream(BLURAY *bd, BD_STREAM *st, ++ } ++ ++ st->int_buf_off = 6144; +++ st->seek_flag = 1; ++ ++ return st->clip_pos; ++ } ++@@ -939,6 +952,7 @@ static void _fill_disc_info(BLURAY *bd, BD_ENC_INFO *enc_info) ++ bd->disc_info.bdplus_handled = enc_info->bdplus_handled; ++ bd->disc_info.bdplus_gen = enc_info->bdplus_gen; ++ bd->disc_info.bdplus_date = enc_info->bdplus_date; +++ bd->disc_info.no_menu_support = enc_info->no_menu_support; ++ ++ bd->disc_info.udf_volume_id = disc_volume_id(bd->disc); ++ ++@@ -1085,6 +1099,10 @@ static void _fill_disc_info(BLURAY *bd, BD_ENC_INFO *enc_info) ++ indx_free(&index); ++ } ++ +++ if (!bd->disc_info.first_play_supported || !bd->disc_info.top_menu_supported) { +++ bd->disc_info.no_menu_support = 1; +++ } +++ ++ if (bd->disc_info.bdj_detected) { ++ BDID_DATA *bdid = bdid_get(bd->disc); /* parse id.bdmv */ ++ if (bdid) { ++@@ -1624,6 +1642,25 @@ int64_t bd_seek_time(BLURAY *bd, uint64_t tick) ++ return bd->s_pos; ++ } ++ +++int64_t bd_find_seek_point(BLURAY *bd, uint64_t tick) +++{ +++ uint32_t clip_pkt, out_pkt; +++ NAV_CLIP *clip; +++ +++ tick /= 2; +++ +++ if (bd->title && +++ tick < bd->title->duration) { +++ +++ // Find the closest access unit to the requested position +++ clip = nav_time_search(bd->title, (uint32_t)tick, &clip_pkt, &out_pkt); +++ +++ return (int64_t)out_pkt * 192; +++ } +++ +++ return bd->s_pos; +++} +++ ++ uint64_t bd_tell_time(BLURAY *bd) ++ { ++ uint32_t clip_pkt = 0, out_pkt = 0, out_time = 0; ++@@ -1956,6 +1993,19 @@ static int _bd_read(BLURAY *bd, unsigned char *buf, int len) ++ /* fatal error */ ++ return -1; ++ } +++ +++ /* finetune seek point (avoid skipping PAT/PMT/PCR) */ +++ if (BD_UNLIKELY(st->seek_flag)) { +++ st->seek_flag = 0; +++ +++ /* rewind if previous packets contain PAT/PMT/PCR */ +++ while (st->int_buf_off >= 192 && TS_PID(bd->int_buf + st->int_buf_off - 192) <= HDMV_PID_PCR) { +++ st->clip_pos -= 192; +++ st->int_buf_off -= 192; +++ bd->s_pos -= 192; +++ } +++ } +++ ++ } ++ if (size > (unsigned int)6144 - st->int_buf_off) { ++ size = 6144 - st->int_buf_off; ++@@ -2081,12 +2131,14 @@ static int _preload_textst_subpath(BLURAY *bd) ++ gc_add_font(bd->graphics_controller, NULL, -1); ++ for (ii = 0; ii < bd->st_textst.clip->cl->font_info.font_count; ii++) { ++ char *file = str_printf("%s.otf", bd->st_textst.clip->cl->font_info.font[ii].file_id); ++- uint8_t *data = NULL; ++- size_t size = disc_read_file(bd->disc, "BDMV" DIR_SEP "AUXDATA", file, &data); ++- if (data && size > 0 && gc_add_font(bd->graphics_controller, data, size) < 0) { ++- X_FREE(data); +++ if (file) { +++ uint8_t *data = NULL; +++ size_t size = disc_read_file(bd->disc, "BDMV" DIR_SEP "AUXDATA", file, &data); +++ if (data && size > 0 && gc_add_font(bd->graphics_controller, data, size) < 0) { +++ X_FREE(data); +++ } +++ X_FREE(file); ++ } ++- X_FREE(file); ++ } ++ gc_run(bd->graphics_controller, GC_CTRL_PG_CHARCODE, char_code, NULL); ++ ++@@ -2278,6 +2330,8 @@ static int _open_playlist(BLURAY *bd, const char *f_name, unsigned angle) ++ ++ _preload_subpaths(bd); ++ +++ bd->st0.seek_flag = 1; +++ ++ return 1; ++ } ++ return 0; ++@@ -2285,9 +2339,14 @@ static int _open_playlist(BLURAY *bd, const char *f_name, unsigned angle) ++ ++ int bd_select_playlist(BLURAY *bd, uint32_t playlist) ++ { ++- char *f_name = str_printf("%05d.mpls", playlist); +++ char *f_name; ++ int result; ++ +++ f_name = str_printf("%05d.mpls", playlist); +++ if (!f_name) { +++ return 0; +++ } +++ ++ bd_mutex_lock(&bd->mutex); ++ ++ if (bd->title_list) { ++@@ -2504,6 +2563,9 @@ uint32_t bd_get_titles(BLURAY *bd, uint8_t flags, uint32_t min_title_length) ++ ++ int bd_get_main_title(BLURAY *bd) ++ { +++ if (!bd) { +++ return -1; +++ } ++ if (bd->title_type != title_undef) { ++ BD_DEBUG(DBG_CRIT | DBG_BLURAY, "bd_get_main_title() can't be used with BluRay menus\n"); ++ } ++@@ -2571,6 +2633,7 @@ static BLURAY_TITLE_INFO* _fill_title_info(NAV_TITLE* title, uint32_t title_idx, ++ BLURAY_CLIP_INFO *ci = &title_info->clips[ii]; ++ NAV_CLIP *nc = &title->clip_list.clip[ii]; ++ +++ ci->idx = nc->clip_id; ++ ci->pkt_count = nc->end_pkt - nc->start_pkt; ++ ci->start_time = (uint64_t)nc->title_time * 2; ++ ci->in_time = (uint64_t)pi->in_time * 2; ++@@ -2597,6 +2660,8 @@ static BLURAY_TITLE_INFO* _fill_title_info(NAV_TITLE* title, uint32_t title_idx, ++ _copy_streams(nc, ci->sec_audio_streams, pi->stn.secondary_audio, ci->sec_audio_stream_count); ++ } ++ +++ title_info->mvc_base_view_r_flag = title->pl->app_info.mvc_base_view_r_flag; +++ ++ return title_info; ++ } ++ ++@@ -2637,9 +2702,14 @@ BLURAY_TITLE_INFO* bd_get_title_info(BLURAY *bd, uint32_t title_idx, unsigned an ++ ++ BLURAY_TITLE_INFO* bd_get_playlist_info(BLURAY *bd, uint32_t playlist, unsigned angle) ++ { ++- char *f_name = str_printf("%05d.mpls", playlist); +++ char *f_name; ++ BLURAY_TITLE_INFO *title_info; ++ +++ f_name = str_printf("%05d.mpls", playlist); +++ if (!f_name) { +++ return NULL; +++ } +++ ++ title_info = _get_title_info(bd, 0, playlist, f_name, angle); ++ ++ X_FREE(f_name); ++@@ -2694,9 +2764,9 @@ int bd_set_player_setting(BLURAY *bd, uint32_t idx, uint32_t value) ++ bd_mutex_lock(&bd->mutex); ++ ++ bd->decode_pg = !!value; ++- result = bd_psr_write_bits(bd->regs, PSR_PG_STREAM, ++- (!!value) << 31, ++- 0x80000000); +++ result = !bd_psr_write_bits(bd->regs, PSR_PG_STREAM, +++ (!!value) << 31, +++ 0x80000000); ++ ++ bd_mutex_unlock(&bd->mutex); ++ return result; ++@@ -2705,7 +2775,7 @@ int bd_set_player_setting(BLURAY *bd, uint32_t idx, uint32_t value) ++ for (i = 0; i < sizeof(map) / sizeof(map[0]); i++) { ++ if (idx == map[i].idx) { ++ bd_mutex_lock(&bd->mutex); ++- result = !bd_psr_setting_write(bd->regs, idx, value); +++ result = !bd_psr_setting_write(bd->regs, map[i].psr, value); ++ bd_mutex_unlock(&bd->mutex); ++ return result; ++ } ++@@ -2756,6 +2826,9 @@ void bd_select_stream(BLURAY *bd, uint32_t stream_type, uint32_t stream_id, uint ++ bd_mutex_lock(&bd->mutex); ++ ++ switch (stream_type) { +++ case BLURAY_AUDIO_STREAM: +++ bd_psr_write(bd->regs, PSR_PRIMARY_AUDIO_ID, stream_id & 0xff); +++ break; ++ case BLURAY_PG_TEXTST_STREAM: ++ bd_psr_write_bits(bd->regs, PSR_PG_STREAM, ++ ((!!enable_flag)<<31) | (stream_id & 0xfff), ++@@ -3076,6 +3149,11 @@ static int _play_title(BLURAY *bd, unsigned title) ++ return 0; ++ } ++ +++ if (bd->disc_info.no_menu_support) { +++ BD_DEBUG(DBG_BLURAY | DBG_CRIT, "bd_play(): no menu support\n"); +++ return 0; +++ } +++ ++ /* first play object ? */ ++ if (title == BLURAY_TITLE_FIRST_PLAY) { ++ ++@@ -3203,6 +3281,12 @@ static int _try_play_title(BLURAY *bd, unsigned title) ++ int bd_play_title(BLURAY *bd, unsigned title) ++ { ++ int ret; +++ +++ if (title == BLURAY_TITLE_TOP_MENU) { +++ /* menu call uses different UO mask */ +++ return bd_menu_call(bd, -1); +++ } +++ ++ bd_mutex_lock(&bd->mutex); ++ ret = _try_play_title(bd, title); ++ bd_mutex_unlock(&bd->mutex); ++@@ -3561,7 +3645,37 @@ int bd_get_sound_effect(BLURAY *bd, unsigned sound_id, BLURAY_SOUND_EFFECT *effe ++ } ++ ++ /* ++- * +++ * Direct file access +++ */ +++ +++static int _bd_read_file(BLURAY *bd, const char *dir, const char *file, void **data, int64_t *size) +++{ +++ if (!bd || !bd->disc || !file || !data || !size) { +++ BD_DEBUG(DBG_CRIT, "Invalid arguments for bd_read_file()\n"); +++ return 0; +++ } +++ +++ *data = NULL; +++ *size = (int64_t)disc_read_file(bd->disc, dir, file, (uint8_t**)data); +++ if (!*data || *size < 0) { +++ BD_DEBUG(DBG_BLURAY, "bd_read_file() failed\n"); +++ X_FREE(*data); +++ return 0; +++ } +++ +++ BD_DEBUG(DBG_BLURAY, "bd_read_file(): read %"PRId64" bytes from %s"DIR_SEP"%s\n", +++ *size, dir, file); +++ return 1; +++} +++ +++int bd_read_file(BLURAY *bd, const char *path, void **data, int64_t *size) +++{ +++ return _bd_read_file(bd, NULL, path, data, size); +++} +++ +++ +++/* +++ * Metadata ++ */ ++ ++ const struct meta_dl *bd_get_meta(BLURAY *bd) ++@@ -3598,6 +3712,15 @@ const struct meta_dl *bd_get_meta(BLURAY *bd) ++ return meta; ++ } ++ +++int bd_get_meta_file(BLURAY *bd, const char *name, void **data, int64_t *size) +++{ +++ return _bd_read_file(bd, DIR_SEP "BDMV" DIR_SEP "META" DIR_SEP "DL", name, data, size); +++} +++ +++/* +++ * Database access +++ */ +++ ++ struct clpi_cl *bd_get_clpi(BLURAY *bd, unsigned clip_ref) ++ { ++ if (bd->title && clip_ref < bd->title->clip_list.count) { ++@@ -3655,3 +3778,28 @@ void bd_free_bdjo(struct bdjo_data *obj) ++ (void)obj; ++ #endif ++ } +++ +++int bd_get_clip_infos(BLURAY *bd, unsigned clip, uint64_t *clip_start_time, uint64_t *stream_start_time, uint64_t *pos, uint64_t *duration) +++{ +++ if (bd && bd->title && bd->title->clip_list.count > clip) { +++ if (clip_start_time) +++ *clip_start_time = (uint64_t)bd->title->clip_list.clip[clip].title_time << 1; +++ if (stream_start_time) +++ *stream_start_time = (uint64_t)bd->title->clip_list.clip[clip].in_time << 1; +++ if (pos) +++ *pos = (uint64_t)bd->title->clip_list.clip[clip].title_pkt * 192; +++ if (duration) +++ *duration = (uint64_t)bd->title->clip_list.clip[clip].duration << 1; +++ +++ return 1; +++ } +++ return 0; +++} +++ +++struct mpls_pl* bd_get_title_mpls(BLURAY * bd) +++{ +++ if (bd && bd->title) { +++ return bd->title->pl; +++ } +++ return NULL; +++} ++diff --git a/src/libbluray/bluray.h b/src/libbluray/bluray.h ++index 6ade74b..6e74df4 100644 ++--- a/src/libbluray/bluray.h +++++ b/src/libbluray/bluray.h ++@@ -32,6 +32,7 @@ extern "C" { ++ */ ++ ++ #include <stdint.h> +++#include "bdnav/clpi_data.h" ++ ++ #define TITLES_ALL 0 /**< all titles. */ ++ #define TITLES_FILTER_DUP_TITLE 0x01 /**< remove duplicate titles. */ ++@@ -119,6 +120,9 @@ typedef struct { ++ char bdj_disc_id[33]; /* (BD-J) disc ID */ ++ ++ const char *udf_volume_id; /* optional UDF volume identifier */ +++ +++ uint8_t no_menu_support; /* 1 if this disc can't be played using on-disc menus */ +++ ++ } BLURAY_DISC_INFO; ++ ++ /* ++@@ -216,6 +220,7 @@ typedef struct bd_stream_info { ++ } BLURAY_STREAM_INFO; ++ ++ typedef struct bd_clip { +++ uint32_t idx; ++ uint32_t pkt_count; ++ uint8_t still_mode; ++ uint16_t still_time; /* seconds */ ++@@ -266,6 +271,8 @@ typedef struct bd_title_info { ++ ++ uint32_t mark_count; ++ BLURAY_TITLE_MARK *marks; +++ +++ uint8_t mvc_base_view_r_flag; ++ } BLURAY_TITLE_INFO; ++ ++ /* ++@@ -355,12 +362,29 @@ const BLURAY_DISC_INFO *bd_get_disc_info(BLURAY *bd); ++ * If information is provided in multiple languages, currently ++ * selected language (BLURAY_PLAYER_SETTING_MENU_LANG) is used. ++ * +++ * Referenced thumbnail images should be read with bd_get_meta_file(). +++ * ++ * @param bd BLURAY object ++ * @return META_DL (disclib) object, NULL on error ++ */ ++ struct meta_dl; ++ const struct meta_dl *bd_get_meta(BLURAY *bd); ++ +++/** +++ * +++ * Read metadata file from BluRay disc. +++ * +++ * Allocate large enough memory block and read file contents. +++ * Caller must free the memory block with free(). +++ * +++ * @param bd BLURAY object +++ * @param file_name name of metadata file +++ * @param data where to store pointer to file data +++ * @param size where to store file size +++ * @return 1 on success, 0 on error +++ */ +++int bd_get_meta_file(BLURAY *bd, const char *file_name, void **data, int64_t *size); +++ ++ ++ /* ++ * Title selection without on-disc menus ++@@ -441,6 +465,16 @@ uint32_t bd_get_current_title(BLURAY *bd); ++ ++ /** ++ * +++ * Find the byte position to specific time in 90Khz ticks +++ * +++ * @param bd BLURAY ojbect +++ * @param tick tick count +++ * @return byte position +++ */ +++int64_t bd_find_seek_point(BLURAY *bd, uint64_t tick); +++ +++/** +++ * ++ * Read from currently selected title file, decrypt if possible ++ * ++ * @param bd BLURAY object ++@@ -536,6 +570,7 @@ void bd_seamless_angle_change(BLURAY *bd, unsigned angle); ++ * @param stream_id stream number (1..N) ++ * @param enable_flag set to 0 to disable streams of this type ++ */ +++#define BLURAY_AUDIO_STREAM 0 ++ #define BLURAY_PG_TEXTST_STREAM 1 ++ ++ void bd_select_stream(BLURAY *bd, uint32_t stream_type, uint32_t stream_id, uint32_t enable_flag); ++@@ -963,7 +998,6 @@ int bd_mouse_select(BLURAY *bd, int64_t pts, uint16_t x, uint16_t y); ++ ++ /* access to internal information */ ++ ++-struct clpi_cl; ++ /** ++ * ++ * Get copy of clip information for requested playitem. ++@@ -1001,6 +1035,43 @@ void bd_free_bdjo(struct bdjo_data *); ++ int bd_start_bdj(BLURAY *bd, const char* start_object); // start BD-J from the specified BD-J object (should be a 5 character string) ++ void bd_stop_bdj(BLURAY *bd); // shutdown BD-J and clean up resources ++ +++/** +++ * +++ * Read a file from BluRay Virtual File System. +++ * +++ * Allocate large enough memory block and read file contents. +++ * Caller must free the memory block with free(). +++ * +++ * @param bd BLURAY object +++ * @param file_name path to the file (relative to disc root) +++ * @param data where to store pointer to allocated data +++ * @param size where to store file size +++ * @return 1 on success, 0 on error +++ */ +++int bd_read_file(BLURAY *, const char *path, void **data, int64_t *size); +++ +++/** +++ * +++ * Get information about the clip +++ * +++ * @param bd BLURAY object +++ * @param clip clip index +++ * @param clip_start_time start of the clip (in the total title) (in 90khz) +++ * @param stream_start_time first pts in the clip (in 90khz) +++ * @param byte position of the clip (absolute) +++ * @param duration duration of the clip (in 90khz) +++ */ +++int bd_get_clip_infos(BLURAY *bd, unsigned clip, uint64_t *clip_start_time, uint64_t *stream_start_time, uint64_t *pos, uint64_t *duration); +++ +++/** +++ * Get the MPLS struct of the current title +++ * +++ * @param bd BLURAY object +++ * @return the MPLS struct +++ * +++ * Lifetime of the MPLS pointer is limited to the lifetime of the BD title +++ */ +++struct mpls_pl* bd_get_title_mpls(BLURAY * bd); ++ ++ #ifdef __cplusplus ++ } ++diff --git a/src/libbluray/decoders/graphics_controller.c b/src/libbluray/decoders/graphics_controller.c ++index dabde1c..d3c775a 100644 ++--- a/src/libbluray/decoders/graphics_controller.c +++++ b/src/libbluray/decoders/graphics_controller.c ++@@ -825,6 +825,8 @@ void gc_free(GRAPHICS_CONTROLLER **p) ++ ++ bd_mutex_destroy(&gc->mutex); ++ +++ X_FREE(gc->saved_bog_data); +++ ++ X_FREE(*p); ++ } ++ } ++diff --git a/src/libbluray/decoders/hdmv_pids.h b/src/libbluray/decoders/hdmv_pids.h ++index ac5bc6a..45a55f3 100644 ++--- a/src/libbluray/decoders/hdmv_pids.h +++++ b/src/libbluray/decoders/hdmv_pids.h ++@@ -61,5 +61,12 @@ ++ #define IS_HDMV_PID_IG(pid) ((pid) >= HDMV_PID_IG_FIRST && (pid) <= HDMV_PID_IG_LAST) ++ #define IS_HDMV_PID_TEXTST(pid) ((pid) == HDMV_PID_TEXTST) ++ +++/* +++ * Extract PID from HDMV MPEG-TS packet +++ */ +++ +++#define TS_PID(buf) \ +++ ((((buf)[4+1] & 0x1f) << 8) | (buf)[4+2]) +++ ++ ++ #endif // _HDMV_PIDS_H_ ++diff --git a/src/libbluray/decoders/overlay.h b/src/libbluray/decoders/overlay.h ++index 6a31218..7daa478 100644 ++--- a/src/libbluray/decoders/overlay.h +++++ b/src/libbluray/decoders/overlay.h ++@@ -20,6 +20,10 @@ ++ #ifndef BD_OVERLAY_H_ ++ #define BD_OVERLAY_H_ ++ +++#ifdef __cplusplus +++extern "C" { +++#endif +++ ++ #include <stdint.h> ++ ++ #define BD_OVERLAY_INTERFACE_VERSION 2 ++@@ -199,4 +203,8 @@ typedef struct bd_argb_buffer_s { ++ ++ } BD_ARGB_BUFFER; ++ +++#ifdef __cplusplus +++} +++#endif +++ ++ #endif // BD_OVERLAY_H_ ++diff --git a/src/libbluray/decoders/textst_render.c b/src/libbluray/decoders/textst_render.c ++index 8d1527e..0e87d4b 100644 ++--- a/src/libbluray/decoders/textst_render.c +++++ b/src/libbluray/decoders/textst_render.c ++@@ -74,6 +74,10 @@ TEXTST_RENDER *textst_render_init(void) ++ #ifdef HAVE_FT2 ++ TEXTST_RENDER *p = calloc(1, sizeof(TEXTST_RENDER)); ++ +++ if (!p) { +++ return NULL; +++ } +++ ++ if (!FT_Init_FreeType(&p->ft_lib)) { ++ return p; ++ } ++diff --git a/src/libbluray/disc/aacs.c b/src/libbluray/disc/aacs.c ++index 217ef6f..9ae8efb 100644 ++--- a/src/libbluray/disc/aacs.c +++++ b/src/libbluray/disc/aacs.c ++@@ -47,6 +47,8 @@ struct bd_aacs { ++ fptr_p_void get_device_binding_id; ++ fptr_p_void get_device_nonce; ++ fptr_p_void get_media_key; +++ +++ int impl_id; ++ }; ++ ++ ++@@ -58,15 +60,19 @@ static void _libaacs_close(BD_AACS *p) ++ } ++ } ++ ++-void libaacs_unload(BD_AACS **p) +++static void _unload(BD_AACS *p) ++ { ++- if (p && *p) { ++- _libaacs_close(*p); +++ _libaacs_close(p); ++ ++- if ((*p)->h_libaacs) { ++- dl_dlclose((*p)->h_libaacs); ++- } +++ if (p->h_libaacs) { +++ dl_dlclose(p->h_libaacs); +++ } +++} ++ +++void libaacs_unload(BD_AACS **p) +++{ +++ if (p && *p) { +++ _unload(*p); ++ X_FREE(*p); ++ } ++ } ++@@ -82,7 +88,7 @@ int libaacs_required(void *have_file_handle, int (*have_file)(void *, const char ++ return 0; ++ } ++ ++-static void *_open_libaacs(void) +++static void *_open_libaacs(int *impl_id) ++ { ++ const char * const libaacs[] = { ++ getenv("LIBAACS_PATH"), ++@@ -91,10 +97,11 @@ static void *_open_libaacs(void) ++ }; ++ unsigned ii; ++ ++- for (ii = 0; ii < sizeof(libaacs) / sizeof(libaacs[0]); ii++) { +++ for (ii = *impl_id; ii < sizeof(libaacs) / sizeof(libaacs[0]); ii++) { ++ if (libaacs[ii]) { ++ void *handle = dl_dlopen(libaacs[ii], "0"); ++ if (handle) { +++ *impl_id = ii; ++ BD_DEBUG(DBG_BLURAY, "Using %s for AACS\n", libaacs[ii]); ++ return handle; ++ } ++@@ -105,11 +112,15 @@ static void *_open_libaacs(void) ++ return NULL; ++ } ++ ++-BD_AACS *libaacs_load(void) +++static BD_AACS *_load(int impl_id) ++ { ++ BD_AACS *p = calloc(1, sizeof(BD_AACS)); +++ if (!p) { +++ return NULL; +++ } +++ p->impl_id = impl_id; ++ ++- p->h_libaacs = _open_libaacs(); +++ p->h_libaacs = _open_libaacs(&p->impl_id); ++ if (!p->h_libaacs) { ++ X_FREE(p); ++ return NULL; ++@@ -140,6 +151,11 @@ BD_AACS *libaacs_load(void) ++ return p; ++ } ++ +++BD_AACS *libaacs_load(void) +++{ +++ return _load(0); +++} +++ ++ int libaacs_open(BD_AACS *p, const char *device, ++ void *file_open_handle, void *file_open_fp, ++ const char *keyfile_path) ++@@ -177,6 +193,22 @@ int libaacs_open(BD_AACS *p, const char *device, ++ BD_DEBUG(DBG_BLURAY, "aacs_open() not found\n"); ++ } ++ +++ if (error_code) { +++ /* failed. try next aacs implementation if available. */ +++ BD_AACS *p2 = _load(p->impl_id + 1); +++ if (p2) { +++ if (!libaacs_open(p2, device, file_open_handle, file_open_fp, keyfile_path)) { +++ /* succeed - swap implementations */ +++ _unload(p); +++ *p = *p2; +++ X_FREE(p2); +++ return 0; +++ } +++ /* failed - report original errors */ +++ libaacs_unload(&p2); +++ } +++ } +++ ++ if (p->aacs) { ++ if (aacs_get_mkb_version) { ++ p->mkbv = aacs_get_mkb_version(p->aacs); ++diff --git a/src/libbluray/disc/bdplus.c b/src/libbluray/disc/bdplus.c ++index b8c4d57..363719f 100644 ++--- a/src/libbluray/disc/bdplus.c +++++ b/src/libbluray/disc/bdplus.c ++@@ -107,6 +107,9 @@ static void *_libbdplus_open(void) ++ BD_BDPLUS *libbdplus_load(void) ++ { ++ BD_BDPLUS *p = calloc(1, sizeof(BD_BDPLUS)); +++ if (!p) { +++ return NULL; +++ } ++ ++ BD_DEBUG(DBG_BDPLUS, "attempting to load libbdplus\n"); ++ ++@@ -241,10 +244,12 @@ BD_BDPLUS_ST *libbdplus_m2ts(BD_BDPLUS *p, uint32_t clip_id, uint64_t pos) ++ if (!p->m2ts) { ++ /* use old API */ ++ BD_BDPLUS_ST *ret = calloc(1, sizeof(BD_BDPLUS_ST)); ++- ret->lib = p; ++- ret->st = NULL; ++- p->title(p->bdplus, clip_id); ++- p->seek(p->bdplus, pos); +++ if (ret) { +++ ret->lib = p; +++ ret->st = NULL; +++ p->title(p->bdplus, clip_id); +++ p->seek(p->bdplus, pos); +++ } ++ return ret; ++ } ++ ++@@ -258,9 +263,11 @@ BD_BDPLUS_ST *libbdplus_m2ts(BD_BDPLUS *p, uint32_t clip_id, uint64_t pos) ++ p->m2ts_close(st); ++ } else { ++ BD_BDPLUS_ST *ret = calloc(1, sizeof(BD_BDPLUS_ST)); ++- ret->lib = p; ++- ret->st = st; ++- BD_DEBUG(DBG_BLURAY | DBG_CRIT, "BD+ active for clip %05d.m2ts\n", clip_id); +++ if (ret) { +++ ret->lib = p; +++ ret->st = st; +++ BD_DEBUG(DBG_BLURAY | DBG_CRIT, "BD+ active for clip %05d.m2ts\n", clip_id); +++ } ++ return ret; ++ } ++ } ++diff --git a/src/libbluray/disc/dec.c b/src/libbluray/disc/dec.c ++index 694646e..1c8a601 100644 ++--- a/src/libbluray/disc/dec.c +++++ b/src/libbluray/disc/dec.c ++@@ -158,6 +158,10 @@ static int _bdrom_have_file(void *p, const char *dir, const char *file) ++ char *path; ++ ++ path = str_printf("%s" DIR_SEP "%s", dir, file); +++ if (!path) { +++ return 0; +++ } +++ ++ fp = dev->pf_file_open_bdrom(dev->file_open_bdrom_handle, path); ++ X_FREE(path); ++ ++@@ -175,6 +179,8 @@ static int _libaacs_init(BD_DEC *dec, struct dec_dev *dev, ++ int result; ++ const uint8_t *disc_id; ++ +++ memset(i, 0, sizeof(*i)); +++ ++ libaacs_unload(&dec->aacs); ++ ++ i->aacs_detected = libaacs_required((void*)dev, _bdrom_have_file); ++@@ -201,7 +207,7 @@ static int _libaacs_init(BD_DEC *dec, struct dec_dev *dev, ++ } ++ ++ if (result) { ++- BD_DEBUG(DBG_BLURAY | DBG_CRIT, "aacs_open() failed!\n"); +++ BD_DEBUG(DBG_BLURAY | DBG_CRIT, "aacs_open() failed: %d!\n", result); ++ libaacs_unload(&dec->aacs); ++ return 0; ++ } ++@@ -255,6 +261,13 @@ static int _libbdplus_init(BD_DEC *dec, struct dec_dev *dev, ++ i->bdplus_gen = libbdplus_get_gen(dec->bdplus); ++ i->bdplus_date = libbdplus_get_date(dec->bdplus); ++ i->bdplus_handled = 1; +++ +++ if (i->bdplus_date == 0) { +++ // libmmbd -> no menu support +++ BD_DEBUG(DBG_BLURAY | DBG_CRIT, "WARNING: using libmmbd for BD+. On-disc menus will not work.\n"); +++ i->no_menu_support = 1; +++ } +++ ++ return 1; ++ } ++ ++diff --git a/src/libbluray/disc/disc.c b/src/libbluray/disc/disc.c ++index ecd53e3..757b6ed 100644 ++--- a/src/libbluray/disc/disc.c +++++ b/src/libbluray/disc/disc.c ++@@ -65,7 +65,11 @@ static BD_FILE_H *_bdrom_open_path(void *p, const char *rel_path) ++ char *abs_path; ++ ++ abs_path = str_printf("%s%s", disc->disc_root, rel_path); ++- fp = file_open(abs_path, "rb"); +++ if (!abs_path) { +++ return NULL; +++ } +++ +++ fp = file_open(abs_path, "rbS"); ++ X_FREE(abs_path); ++ ++ return fp; ++@@ -78,6 +82,10 @@ static BD_DIR_H *_bdrom_open_dir(void *p, const char *dir) ++ char *path; ++ ++ path = str_printf("%s%s", disc->disc_root, dir); +++ if (!path) { +++ return NULL; +++ } +++ ++ dp = dir_open(path); ++ X_FREE(path); ++ ++@@ -96,8 +104,10 @@ static BD_FILE_H *_overlay_open_path(BD_DISC *p, const char *rel_path) ++ ++ if (p->overlay_root) { ++ char *abs_path = str_printf("%s%s", p->overlay_root, rel_path); ++- fp = file_open(abs_path, "rb"); ++- X_FREE(abs_path); +++ if (abs_path) { +++ fp = file_open(abs_path, "rb"); +++ X_FREE(abs_path); +++ } ++ } ++ ++ bd_mutex_unlock(&p->ovl_mutex); ++@@ -113,8 +123,10 @@ static BD_DIR_H *_overlay_open_dir(BD_DISC *p, const char *dir) ++ ++ if (p->overlay_root) { ++ char *abs_path = str_printf("%s%s", p->disc_root, dir); ++- dp = dir_open_default()(abs_path); ++- X_FREE(abs_path); +++ if (abs_path) { +++ dp = dir_open_default()(abs_path); +++ X_FREE(abs_path); +++ } ++ } ++ ++ bd_mutex_unlock(&p->ovl_mutex); ++@@ -165,7 +177,7 @@ static void _comb_dir_append(BD_DIR_H *dp, BD_DIRENT *entry) ++ } ++ ++ /* append */ ++- priv = realloc(priv, sizeof(*priv) + priv->count * sizeof(BD_DIRENT)); +++ priv = realloc(dp->internal, sizeof(*priv) + priv->count * sizeof(BD_DIRENT)); ++ if (!priv) { ++ return; ++ } ++@@ -183,6 +195,10 @@ static BD_DIR_H *_combine_dirs(BD_DIR_H *ovl, BD_DIR_H *rom) ++ dp->read = _comb_dir_read; ++ dp->close = _comb_dir_close; ++ dp->internal = calloc(1, sizeof(COMB_DIR)); +++ if (!dp->internal) { +++ X_FREE(dp); +++ goto out; +++ } ++ ++ while (!dir_read(ovl, &entry)) { ++ _comb_dir_append(dp, &entry); ++@@ -191,6 +207,8 @@ static BD_DIR_H *_combine_dirs(BD_DIR_H *ovl, BD_DIR_H *rom) ++ _comb_dir_append(dp, &entry); ++ } ++ } +++ +++ out: ++ dir_close(ovl); ++ dir_close(rom); ++ ++@@ -342,6 +360,10 @@ BD_FILE_H *disc_open_file(BD_DISC *p, const char *dir, const char *file) ++ char *path; ++ ++ path = str_printf("%s" DIR_SEP "%s", dir, file); +++ if (!path) { +++ return NULL; +++ } +++ ++ fp = disc_open_path(p, path); ++ X_FREE(path); ++ ++@@ -377,7 +399,11 @@ size_t disc_read_file(BD_DISC *disc, const char *dir, const char *file, ++ ++ *data = NULL; ++ ++- fp = disc_open_file(disc, dir, file); +++ if (dir) { +++ fp = disc_open_file(disc, dir, file); +++ } else { +++ fp = disc_open_path(disc, file); +++ } ++ if (!fp) { ++ return 0; ++ } ++@@ -454,7 +480,7 @@ int disc_cache_bdrom_file(BD_DISC *p, const char *rel_path, const char *cache_pa ++ BD_DEBUG(DBG_FILE | DBG_CRIT, "error caching file %s\n", rel_path); ++ file_close(fp_out); ++ file_close(fp_in); ++- file_unlink(cache_path); +++ (void)file_unlink(cache_path); ++ return -1; ++ } ++ } ++diff --git a/src/libbluray/disc/enc_info.h b/src/libbluray/disc/enc_info.h ++index d45d891..47ca94f 100644 ++--- a/src/libbluray/disc/enc_info.h +++++ b/src/libbluray/disc/enc_info.h ++@@ -34,6 +34,8 @@ typedef struct bd_enc_info { ++ uint8_t disc_id[20]; ++ uint8_t bdplus_gen; ++ uint32_t bdplus_date; +++ +++ uint8_t no_menu_support; ++ } BD_ENC_INFO; ++ ++ #endif /* _BD_DISC_ENC_INFO_H_ */ ++diff --git a/src/libbluray/disc/udf_fs.c b/src/libbluray/disc/udf_fs.c ++index 1eec761..3e438ca 100644 ++--- a/src/libbluray/disc/udf_fs.c +++++ b/src/libbluray/disc/udf_fs.c ++@@ -67,6 +67,9 @@ static int64_t _file_read(BD_FILE_H *file, uint8_t *buf, int64_t size) ++ BD_FILE_H *udf_file_open(void *udf, const char *filename) ++ { ++ BD_FILE_H *file = calloc(1, sizeof(BD_FILE_H)); +++ if (!file) { +++ return NULL; +++ } ++ ++ BD_DEBUG(DBG_FILE, "Opening UDF file %s... (%p)\n", filename, (void*)file); ++ ++@@ -116,6 +119,9 @@ static int _dir_read(BD_DIR_H *dir, BD_DIRENT *entry) ++ BD_DIR_H *udf_dir_open(void *udf, const char* dirname) ++ { ++ BD_DIR_H *dir = calloc(1, sizeof(BD_DIR_H)); +++ if (!dir) { +++ return NULL; +++ } ++ ++ BD_DEBUG(DBG_DIR, "Opening UDF dir %s... (%p)\n", dirname, (void*)dir); ++ ++diff --git a/src/libbluray/hdmv/mobj_print.c b/src/libbluray/hdmv/mobj_print.c ++index 5c5313e..4361a76 100644 ++--- a/src/libbluray/hdmv/mobj_print.c +++++ b/src/libbluray/hdmv/mobj_print.c ++@@ -159,6 +159,7 @@ static const char * const psr_info[128] = { ++ /* PSR127 */ NULL, ++ }; ++ +++#if 0 ++ static const char * const insn_groups[4] = { ++ "BRANCH", ++ "COMPARE", ++@@ -175,6 +176,7 @@ static const char * const insn_group_set[8] = { ++ "SET", ++ "SETSYSTEM", ++ }; +++#endif ++ ++ static const char * const insn_opt_set[32] = { ++ NULL, ++diff --git a/src/util/logging.c b/src/util/logging.c ++index b8ef1f5..62e6b59 100644 ++--- a/src/util/logging.c +++++ b/src/util/logging.c ++@@ -81,19 +81,34 @@ void bd_debug(const char *file, int line, uint32_t mask, const char *format, ... ++ ++ if (mask & debug_mask) { ++ const char *f = strrchr(file, DIR_SEP_CHAR); ++- char buffer[4096], *pt = buffer; +++ char buffer[4096]; ++ va_list args; +++ int len, len2; ++ ++- pt += sprintf(buffer, "%s:%d: ", f ? f + 1 : file, line); +++ len = sprintf(buffer, "%s:%d: ", f ? f + 1 : file, line); +++ if (len < 0) { +++ return; +++ } ++ ++ va_start(args, format); ++- vsnprintf(pt, sizeof(buffer) - (size_t)(intptr_t)(pt - buffer) - 1, format, args); +++ len2 = vsnprintf(buffer + len, sizeof(buffer) - len - 1, format, args); ++ va_end(args); ++ +++ if (len2 < 0) { +++ return; +++ } +++ ++ if (log_func) { +++ buffer[sizeof(buffer)-1] = 0; ++ log_func(buffer); +++ ++ } else { ++- fprintf(logfile, "%s", buffer); +++ len += len2; +++ if ((size_t)len >= sizeof(buffer)) { +++ len = sizeof(buffer); +++ } +++ +++ fwrite(buffer, len, 1, logfile); ++ } ++ } ++ } ++diff --git a/src/util/refcnt.h b/src/util/refcnt.h ++index b839eba..9164921 100644 ++--- a/src/util/refcnt.h +++++ b/src/util/refcnt.h ++@@ -20,6 +20,10 @@ ++ #ifndef BD_REFCNT_H_ ++ #define BD_REFCNT_H_ ++ +++#ifdef __cplusplus +++extern "C" { +++#endif +++ ++ #include "attributes.h" ++ ++ #include <stddef.h> ++@@ -53,4 +57,8 @@ void bd_refcnt_inc(const void *obj); ++ void bd_refcnt_dec(const void *obj); ++ #endif ++ +++#ifdef __cplusplus +++} +++#endif +++ ++ #endif // BD_REFCNT_H_ + +From 5e19f7192303245587548c2564d1e1711019a565 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Wed, 2 Mar 2016 19:40:47 +0000 +Subject: [PATCH 48/67] [VideoPlayer] Added new msdk-mvc decoder. + +--- + xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.cpp | 61 ++++++++++++++++++++++ + xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.h | 4 ++ + 2 files changed, 65 insertions(+) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.cpp +index 83b1c5639c80020cd53a30844b4f1bb0b45507cb..b075c263d54c7078b69b86127ee023d42f8d5d20 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.cpp +@@ -24,6 +24,7 @@ + #include "utils/log.h" + #include "cores/FFmpeg.h" + #include "Util.h" ++#include <assert.h> + + #ifdef TARGET_WINDOWS + #pragma comment(lib, "avcodec.lib") +@@ -37,6 +38,7 @@ + + extern "C" { + #include "libswscale/swscale.h" ++#include "libavutil/intreadwrite.h" + } + + // allocate a new picture (AV_PIX_FMT_YUV420P) +@@ -402,6 +404,65 @@ double CDVDCodecUtils::NormalizeFrameduration(double frameduration, bool *match) + } + } + ++bool CDVDCodecUtils::IsH264AnnexB(std::string format, AVStream *avstream) ++{ ++ assert(avstream->codec->codec_id == AV_CODEC_ID_H264 || avstream->codec->codec_id == AV_CODEC_ID_H264_MVC); ++ if (avstream->codec->extradata_size < 4) ++ return true; ++ if (avstream->codec->extradata[0] == 1) ++ return false; ++ if (format == "avi") ++ { ++ BYTE *src = avstream->codec->extradata; ++ unsigned startcode = AV_RB32(src); ++ if (startcode == 0x00000001 || (startcode & 0xffffff00) == 0x00000100) ++ return true; ++ if (avstream->codec->codec_tag == MKTAG('A', 'V', 'C', '1') || avstream->codec->codec_tag == MKTAG('a', 'v', 'c', '1')) ++ return false; ++ } ++ return true; ++} ++ ++bool CDVDCodecUtils::ProcessH264MVCExtradata(uint8_t *data, int data_size, uint8_t **mvc_data, int *mvc_data_size) ++{ ++ uint8_t* extradata = data; ++ int extradata_size = data_size; ++ ++ if (extradata_size > 4 && *(char *)extradata == 1) ++ { ++ // Find "mvcC" atom ++ uint32_t state = -1; ++ int i = 0; ++ for (; i < extradata_size; i++) ++ { ++ state = (state << 8) | extradata[i]; ++ if (state == MKBETAG('m', 'v', 'c', 'C')) ++ break; ++ } ++ if (i >= 8 && i < extradata_size) ++ { ++ // Update pointers to the start of the mvcC atom ++ extradata = extradata + i - 7; ++ extradata_size = extradata_size - i + 7; ++ // verify size atom and actual size ++ if (extradata_size >= 14 && (AV_RB32(extradata) + 4) <= extradata_size) ++ { ++ extradata += 8; ++ extradata_size -= 8; ++ if (*(char *)extradata == 1) ++ { ++ if (mvc_data) ++ *mvc_data = extradata; ++ if (mvc_data_size) ++ *mvc_data_size = extradata_size; ++ return true; ++ } ++ } ++ } ++ } ++ return false; ++} ++ + struct EFormatMap { + AVPixelFormat pix_fmt; + ERenderFormat format; +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.h b/xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.h +index eb76a6fe73f6c884540807cfb93c7a3ecc4eea90..7e24c2364e8d2efa9b8351afc041aa14404d5e51 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/DVDCodecUtils.h +@@ -24,6 +24,7 @@ + #include "cores/VideoPlayer/VideoRenderers/RenderFormats.h" + + struct YV12Image; ++class AVStream; + + class CDVDCodecUtils + { +@@ -42,6 +43,9 @@ public: + + static double NormalizeFrameduration(double frameduration, bool *match = NULL); + ++ static bool IsH264AnnexB(std::string format, AVStream *avstream); ++ static bool ProcessH264MVCExtradata(uint8_t *extradata, int extradata_size, uint8_t **mvc_extradata = nullptr, int *mvc_extradata_size = nullptr); ++ + static ERenderFormat EFormatFromPixfmt(int fmt); + static int PixfmtFromEFormat(ERenderFormat format); + }; + +From 38596e0f01d1235fe66c4891818f203e676bf6af Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sun, 6 Mar 2016 12:54:59 +0000 +Subject: [PATCH 49/67] mvc: Automatically enable stereo mode + +--- + xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp | 6 +++++- + xbmc/cores/omxplayer/OMXVideo.cpp | 6 +++++- + 2 files changed, 10 insertions(+), 2 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +index 8691b086a46fcdd03eee809a53ea9b20f74dcc05..b4e2c57d406297f75c5dfc0217f4d33507cb6755 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +@@ -571,13 +571,17 @@ bool CMMALVideo::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options) + switch (hints.codec) + { + case AV_CODEC_ID_H264: ++ case AV_CODEC_ID_H264_MVC: + // H.264 + m_codingType = MMAL_ENCODING_H264; + m_pFormatName = "mmal-h264"; +- if (CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_SUPPORTMVC)) ++ if ((hints.codec_tag == MKTAG('M', 'V', 'C', '1') || hints.codec_tag == MKTAG('A', 'M', 'V', 'C')) && ++ CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_SUPPORTMVC)) + { + m_codingType = MMAL_ENCODING_MVC; + m_pFormatName= "mmal-mvc"; ++ if (hints.stereo_mode == "mono") ++ hints.stereo_mode = "mvc_lr"; + } + break; + case AV_CODEC_ID_H263: +diff --git a/xbmc/cores/omxplayer/OMXVideo.cpp b/xbmc/cores/omxplayer/OMXVideo.cpp +index b2bb0a832f5a722bb9de2a48e21e96d5d74e71b8..f8f26a891f6610de83ec143ec4b51f0aea5424de 100644 +--- a/xbmc/cores/omxplayer/OMXVideo.cpp ++++ b/xbmc/cores/omxplayer/OMXVideo.cpp +@@ -401,6 +401,7 @@ bool COMXVideo::Open(CDVDStreamInfo &hints, OMXClock *clock, EDEINTERLACEMODE de + switch (hints.codec) + { + case AV_CODEC_ID_H264: ++ case AV_CODEC_ID_H264_MVC: + { + switch(hints.profile) + { +@@ -437,10 +438,13 @@ bool COMXVideo::Open(CDVDStreamInfo &hints, OMXClock *clock, EDEINTERLACEMODE de + break; + } + } +- if (CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_SUPPORTMVC)) ++ if ((hints.codec_tag == MKTAG('M', 'V', 'C', '1') || hints.codec_tag == MKTAG('A', 'M', 'V', 'C')) && ++ CSettings::GetInstance().GetBool(CSettings::SETTING_VIDEOPLAYER_SUPPORTMVC)) + { + m_codingType = OMX_VIDEO_CodingMVC; + m_video_codec_name = "omx-mvc"; ++ if (hints.stereo_mode == "mono") ++ hints.stereo_mode = "mvc_lr"; + } + break; + case AV_CODEC_ID_MPEG4: + +From 25afb65c978ae78c35cfcfd10cb355ba88d42f7a Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Thu, 24 Mar 2016 13:02:58 +0000 +Subject: [PATCH 50/67] ffmpeg: mvc: fix for pixelation from packets with no + pts/dts + +--- + .../73fde6f9f3d01f7fc0f3ae4b66f6c725f9fb1105.patch | 24 ++++++++++++++++++++++ + tools/depends/target/ffmpeg/Makefile | 4 +++- + 2 files changed, 27 insertions(+), 1 deletion(-) + create mode 100644 tools/depends/target/ffmpeg/73fde6f9f3d01f7fc0f3ae4b66f6c725f9fb1105.patch + +diff --git a/tools/depends/target/ffmpeg/73fde6f9f3d01f7fc0f3ae4b66f6c725f9fb1105.patch b/tools/depends/target/ffmpeg/73fde6f9f3d01f7fc0f3ae4b66f6c725f9fb1105.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..5240cf58ce40c28d12354db63b7e29143ba46978 +--- /dev/null ++++ b/tools/depends/target/ffmpeg/73fde6f9f3d01f7fc0f3ae4b66f6c725f9fb1105.patch +@@ -0,0 +1,24 @@ ++From 73fde6f9f3d01f7fc0f3ae4b66f6c725f9fb1105 Mon Sep 17 00:00:00 2001 ++From: Hendrik Leppkes <h.leppkes@gmail.com> ++Date: Mon, 1 Sep 2014 11:39:09 +0200 ++Subject: [PATCH] h264_parser: force grabing a new timestamp until a frame ++ start was found ++ ++--- ++ libavcodec/h264_parser.c | 3 +++ ++ 1 file changed, 3 insertions(+) ++ ++diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c ++index 2fd3f2b..7165652 100644 ++--- a/libavcodec/h264_parser.c +++++ b/libavcodec/h264_parser.c ++@@ -525,6 +525,9 @@ static int h264_parse(AVCodecParserContext *s, ++ } else { ++ next = h264_find_frame_end(p, buf, buf_size); ++ +++ if (next == END_NOT_FOUND && pc->frame_start_found == 0) +++ s->fetch_timestamp = 1; +++ ++ if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) { ++ *poutbuf = NULL; ++ *poutbuf_size = 0; +diff --git a/tools/depends/target/ffmpeg/Makefile b/tools/depends/target/ffmpeg/Makefile +index 92d9437b36eaa4e655990f7e68634e0bbf4d9605..99f375ba5d5b40eecdd423ac5787276e534ad4d7 100644 +--- a/tools/depends/target/ffmpeg/Makefile ++++ b/tools/depends/target/ffmpeg/Makefile +@@ -6,7 +6,8 @@ DEPS= ../../Makefile.include FFMPEG-VERSION Makefile \ + pfcd_hevc_optimisations.patch \ + 0001-Squashed-commit-of-the-following.patch \ + 0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch 0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch \ +- h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch ++ h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch \ ++ 73fde6f9f3d01f7fc0f3ae4b66f6c725f9fb1105.patch + + # set to "yes" to enable patching + # we don't apply patches until we move to a vanilla ffmpeg tarball +@@ -91,6 +92,7 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + cd $(PLATFORM); patch -p1 < ../0001-avcodec-add-h264_mvc-codec-id-and-profiles.patch + cd $(PLATFORM); patch -p1 < ../0001-h264_parser-add-support-for-parsing-h264-mvc-NALUs.patch + cd $(PLATFORM); patch -p1 < ../h264_parser_fix_parsing_of_mvc_slices_in_some_corner_cases.patch ++ cd $(PLATFORM); patch -p1 < ../73fde6f9f3d01f7fc0f3ae4b66f6c725f9fb1105.patch + + cd $(PLATFORM);\ + CFLAGS="$(CFLAGS)" CXXFLAGS="$(CXXFLAGS)" CPPFLAGS="$(CPPFLAGS)" LDFLAGS="$(LDFLAGS)" \ + +From bcc3e1b01501c7ca65525ae31ecdb7a6028f8e84 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Wed, 9 Mar 2016 13:08:44 +0000 +Subject: [PATCH 51/67] stereoscopicmanager: remove hardwarebased for rbp + +--- + xbmc/guilib/StereoscopicsManager.cpp | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/xbmc/guilib/StereoscopicsManager.cpp b/xbmc/guilib/StereoscopicsManager.cpp +index 6eb0752994bc5f8c47efbbf211120af0a0720d0c..9426604f6460651f54cc035476e69530b2ea8493 100644 +--- a/xbmc/guilib/StereoscopicsManager.cpp ++++ b/xbmc/guilib/StereoscopicsManager.cpp +@@ -72,8 +72,10 @@ static const struct StereoModeMap VideoModeToGuiModeMap[] = + { "anaglyph_yellow_blue", RENDER_STEREO_MODE_ANAGLYPH_YELLOW_BLUE }, + { "block_lr", RENDER_STEREO_MODE_OFF }, // unsupported + { "block_rl", RENDER_STEREO_MODE_OFF }, // unsupported ++#ifndef TARGET_RASPBERRY_PI + { "mvc_lr", RENDER_STEREO_MODE_HARDWAREBASED }, + { "mvc_rl", RENDER_STEREO_MODE_HARDWAREBASED }, ++#endif + { "mvc_lr", RENDER_STEREO_MODE_SPLIT_HORIZONTAL }, // fallback + { "mvc_rl", RENDER_STEREO_MODE_SPLIT_HORIZONTAL }, // fallback + {} + +From 5ca0d3a5d247af35d48a4375131117466ad56f09 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Tue, 17 May 2016 19:24:08 +0100 +Subject: [PATCH 52/67] stereoscopics: Switch to using block_lr for mvc to + match makemkv + +See: http://forum.kodi.tv/showthread.php?tid=221407&pid=2339656#pid2339656 +--- + xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp | 2 +- + xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp | 2 +- + xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp | 12 ++++-------- + xbmc/cores/omxplayer/OMXVideo.cpp | 2 +- + xbmc/guilib/StereoscopicsManager.cpp | 11 +++++------ + 5 files changed, 12 insertions(+), 17 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +index b4e2c57d406297f75c5dfc0217f4d33507cb6755..470083b2256d23488ca476cebfe8d3ef9f62377e 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +@@ -581,7 +581,7 @@ bool CMMALVideo::Open(CDVDStreamInfo &hints, CDVDCodecOptions &options) + m_codingType = MMAL_ENCODING_MVC; + m_pFormatName= "mmal-mvc"; + if (hints.stereo_mode == "mono") +- hints.stereo_mode = "mvc_lr"; ++ hints.stereo_mode = "block_lr"; + } + break; + case AV_CODEC_ID_H263: +diff --git a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +index 54e4d0b66680a08c1e4c1be343fabe4371aec6af..5798ba2ede172c89d18b6997874117301a8b6a37 100644 +--- a/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDDemuxers/DVDDemuxFFmpeg.cpp +@@ -1387,7 +1387,7 @@ CDemuxStream* CDVDDemuxFFmpeg::AddStream(int streamIdx) + CDVDInputStreamBluray *bluRay = static_cast<CDVDInputStreamBluray*>(m_pInput); + if (bluRay->HasMVC()) + { +- st->stereo_mode = bluRay->AreEyesFlipped() ? "mvc_rl" : "mvc_lr"; ++ st->stereo_mode = bluRay->AreEyesFlipped() ? "block_rl" : "block_lr"; + mvcStream = static_cast<CDVDDemuxMVC*>(bluRay->GetDemuxMVC())->GetAVStream(); + } + } +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp +index 04ceed1504c2d81aaa165d232e128c410b9fdc2c..49f7f7ca7e144a259f6d06bd11cd97aa0b3242aa 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/RenderFlags.cpp +@@ -102,10 +102,8 @@ namespace RenderManager { + convert["right_left"] = CONF_FLAGS_STEREO_MODE_SBS | CONF_FLAGS_STEREO_CADANCE_RIGHT_LEFT; + convert["anaglyph_green_magenta"] = 0u; + convert["anaglyph_yellow_blue"] = 0u; +- convert["block_lr"] = 0u; +- convert["block_rl"] = 0u; +- convert["mvc_lr"] = 0u; +- convert["mvc_rl"] = 0u; ++ convert["block_lr"] = CONF_FLAGS_STEREO_CADANCE_LEFT_RIGHT; ++ convert["block_rl"] = CONF_FLAGS_STEREO_CADANCE_RIGHT_LEFT; + } + return convert[mode]; + } +@@ -125,10 +123,8 @@ namespace RenderManager { + convert["row_interleaved_lr"] = "row_interleaved_rl"; + convert["col_interleaved_rl"] = "col_interleaved_lr"; + convert["col_interleaved_lr"] = "col_interleaved_rl"; +- convert["block_lr"] = "block_lr"; +- convert["block_rl"] = "block_rl"; +- convert["mvc_lr"] = "mvc_rl"; +- convert["mvc_rl"] = "mvc_lr"; ++ convert["block_lr"] = "block_rl"; ++ convert["block_rl"] = "block_lr"; + } + std::string res = convert[mode]; + if(res.empty()) +diff --git a/xbmc/cores/omxplayer/OMXVideo.cpp b/xbmc/cores/omxplayer/OMXVideo.cpp +index f8f26a891f6610de83ec143ec4b51f0aea5424de..de15bfff05d23949d6e6f4304b15aa7d79120dc2 100644 +--- a/xbmc/cores/omxplayer/OMXVideo.cpp ++++ b/xbmc/cores/omxplayer/OMXVideo.cpp +@@ -444,7 +444,7 @@ bool COMXVideo::Open(CDVDStreamInfo &hints, OMXClock *clock, EDEINTERLACEMODE de + m_codingType = OMX_VIDEO_CodingMVC; + m_video_codec_name = "omx-mvc"; + if (hints.stereo_mode == "mono") +- hints.stereo_mode = "mvc_lr"; ++ hints.stereo_mode = "block_lr"; + } + break; + case AV_CODEC_ID_MPEG4: +diff --git a/xbmc/guilib/StereoscopicsManager.cpp b/xbmc/guilib/StereoscopicsManager.cpp +index 9426604f6460651f54cc035476e69530b2ea8493..cc929b599125a44ac128713fd4331782d9931791 100644 +--- a/xbmc/guilib/StereoscopicsManager.cpp ++++ b/xbmc/guilib/StereoscopicsManager.cpp +@@ -70,14 +70,13 @@ static const struct StereoModeMap VideoModeToGuiModeMap[] = + { "anaglyph_cyan_red", RENDER_STEREO_MODE_ANAGLYPH_RED_CYAN }, + { "anaglyph_green_magenta", RENDER_STEREO_MODE_ANAGLYPH_GREEN_MAGENTA }, + { "anaglyph_yellow_blue", RENDER_STEREO_MODE_ANAGLYPH_YELLOW_BLUE }, +- { "block_lr", RENDER_STEREO_MODE_OFF }, // unsupported +- { "block_rl", RENDER_STEREO_MODE_OFF }, // unsupported + #ifndef TARGET_RASPBERRY_PI +- { "mvc_lr", RENDER_STEREO_MODE_HARDWAREBASED }, +- { "mvc_rl", RENDER_STEREO_MODE_HARDWAREBASED }, ++ { "block_lr", RENDER_STEREO_MODE_HARDWAREBASED }, ++ { "block_rl", RENDER_STEREO_MODE_HARDWAREBASED }, ++#else ++ { "block_lr", RENDER_STEREO_MODE_SPLIT_HORIZONTAL }, // fallback ++ { "block_rl", RENDER_STEREO_MODE_SPLIT_HORIZONTAL }, // fallback + #endif +- { "mvc_lr", RENDER_STEREO_MODE_SPLIT_HORIZONTAL }, // fallback +- { "mvc_rl", RENDER_STEREO_MODE_SPLIT_HORIZONTAL }, // fallback + {} + }; + + +From 0a3a48ddcd0cbcd263105f844d27afa720da9bf2 Mon Sep 17 00:00:00 2001 +From: Anton Fedchin <anightik@gmail.com> +Date: Thu, 10 Mar 2016 18:11:33 +0300 +Subject: [PATCH 53/67] fixup! Revert supporting crappy tab/sbs subtitles. this + fixes regular subtitles. + +--- + .../VideoPlayer/DVDCodecs/Overlay/DVDOverlayCodecFFmpeg.cpp | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Overlay/DVDOverlayCodecFFmpeg.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Overlay/DVDOverlayCodecFFmpeg.cpp +index 3a080d06c90b0762482816928642e6de7810b539..7c0b70777556ac7694e7fc511cd4bb189fc42e08 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Overlay/DVDOverlayCodecFFmpeg.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Overlay/DVDOverlayCodecFFmpeg.cpp +@@ -243,20 +243,20 @@ CDVDOverlay* CDVDOverlayCodecFFmpeg::GetOverlay() + } + } + +- RENDER_STEREO_MODE render_stereo_mode = g_graphicsContext.GetStereoMode(); ++ /*RENDER_STEREO_MODE render_stereo_mode = g_graphicsContext.GetStereoMode(); + if (render_stereo_mode != RENDER_STEREO_MODE_OFF) + { +- if (rect.h > m_height / 2) ++ if ((rect.h - rect.y) > m_height / 2) + { + m_height /= 2; + rect.h /= 2; + } +- else if (rect.w > m_width / 2) ++ else if ((rect.w - rect.x) > m_width / 2) + { + m_width /= 2; + rect.w /= 2; + } +- } ++ }*/ + + CDVDOverlayImage* overlay = new CDVDOverlayImage(); + +@@ -290,6 +290,7 @@ CDVDOverlay* CDVDOverlayCodecFFmpeg::GetOverlay() + + m_SubtitleIndex++; + ++ CLog::Log(LOGDEBUG, "Overlay: x:%d y:%d w:%d h:%d", overlay->x, overlay->y, overlay->width, overlay->height); + return overlay; + } + + +From 49a3522fd02f0d6e4adb10ec413df1bf5e181421 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Tue, 10 Feb 2015 15:29:16 +0000 +Subject: [PATCH 54/67] [libcec] Add repeating keypress patch from popcornmix' + repo + +--- + tools/depends/target/libcec/Makefile | 1 + + tools/depends/target/libcec/popcornmix.patch | 859 +++++++++++++++++++++++++++ + 2 files changed, 860 insertions(+) + create mode 100644 tools/depends/target/libcec/popcornmix.patch + +diff --git a/tools/depends/target/libcec/Makefile b/tools/depends/target/libcec/Makefile +index f54af9e7ed3d0a9bef922517728c8b8db51d9d75..ddf996361ad5b46dd2b33fb035b2ed133914a612 100644 +--- a/tools/depends/target/libcec/Makefile ++++ b/tools/depends/target/libcec/Makefile +@@ -21,6 +21,7 @@ $(TARBALLS_LOCATION)/$(ARCHIVE): + $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + rm -rf $(PLATFORM); mkdir -p $(PLATFORM)/build + cd $(PLATFORM); $(ARCHIVE_TOOL) $(ARCHIVE_TOOL_FLAGS) $(TARBALLS_LOCATION)/$(ARCHIVE) ++ cd $(PLATFORM); patch -p1 < ../popcornmix.patch + cd $(PLATFORM)/build; $(CMAKE) -DBUILD_SHARED_LIBS=1 -DSKIP_PYTHON_WRAPPER:STRING=1 -DCMAKE_INSTALL_LIBDIR=$(PREFIX)/lib .. + + $(LIBDYLIB): $(PLATFORM) +diff --git a/tools/depends/target/libcec/popcornmix.patch b/tools/depends/target/libcec/popcornmix.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..8366a696562a934144cc9a21ea6f2cab3c69e655 +--- /dev/null ++++ b/tools/depends/target/libcec/popcornmix.patch +@@ -0,0 +1,859 @@ ++From ec982e9800ae312972d306b67779215a2add6cde Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Fri, 24 Oct 2014 13:45:21 +0100 ++Subject: [PATCH 1/6] Make released key polling wait for exact time until key ++ gets released ++ ++--- ++ src/libcec/CECClient.cpp | 16 ++++++++++++++-- ++ src/libcec/CECClient.h | 2 +- ++ src/libcec/CECProcessor.cpp | 8 +++++--- ++ src/libcec/LibCEC.cpp | 10 ++++++++-- ++ src/libcec/LibCEC.h | 4 +++- ++ 5 files changed, 31 insertions(+), 9 deletions(-) ++ ++diff --git a/src/libcec/CECClient.cpp b/src/libcec/CECClient.cpp ++index 35c2d3e..e307c0e 100644 ++--- a/src/libcec/CECClient.cpp +++++ b/src/libcec/CECClient.cpp ++@@ -1067,7 +1067,7 @@ void CCECClient::SetCurrentButton(const cec_user_control_code iButtonCode) ++ AddKey(key); ++ } ++ ++-void CCECClient::CheckKeypressTimeout(void) +++uint16_t CCECClient::CheckKeypressTimeout(void) ++ { ++ cec_keypress key; ++ ++@@ -1091,12 +1091,24 @@ void CCECClient::CheckKeypressTimeout(void) ++ } ++ else ++ { ++- return; +++ // time when this keypress will be released and we'd like to be called again +++ unsigned int timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; +++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton == comboKey && iTimeoutMs > 0) +++ timeout = iTimeoutMs - (iNow - m_buttontime) + 1; +++ else if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton != comboKey) +++ timeout = CEC_BUTTON_TIMEOUT - (iNow - m_buttontime) + 1; +++ if (timeout > CEC_PROCESSOR_SIGNAL_WAIT_TIME) +++ { +++ LIB_CEC->AddLog(CEC_LOG_ERROR, "Unexpected timeout: %d (%.3f %.3f %.3f) k:%02x", timeout, iNow*1e-3, m_buttontime*1e-3, CEC_BUTTON_TIMEOUT*1e-3, m_iCurrentButton); +++ timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; +++ } +++ return timeout; ++ } ++ } ++ ++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "key auto-released: %s (%1x)", ToString(key.keycode), key.keycode); ++ QueueAddKey(key); +++ return CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ } ++ ++ bool CCECClient::EnableCallbacks(void *cbParam, ICECCallbacks *callbacks) ++diff --git a/src/libcec/CECClient.h b/src/libcec/CECClient.h ++index 12f8a3b..c9ce5e3 100644 ++--- a/src/libcec/CECClient.h +++++ b/src/libcec/CECClient.h ++@@ -272,7 +272,7 @@ namespace CEC ++ virtual void AddKey(bool bSendComboKey = false); ++ virtual void AddKey(const cec_keypress &key); ++ virtual void SetCurrentButton(const cec_user_control_code iButtonCode); ++- virtual void CheckKeypressTimeout(void); +++ virtual uint16_t CheckKeypressTimeout(void); ++ virtual void SourceActivated(const cec_logical_address logicalAddress); ++ virtual void SourceDeactivated(const cec_logical_address logicalAddress); ++ ++diff --git a/src/libcec/CECProcessor.cpp b/src/libcec/CECProcessor.cpp ++index 99f71aa..604b950 100644 ++--- a/src/libcec/CECProcessor.cpp +++++ b/src/libcec/CECProcessor.cpp ++@@ -52,7 +52,6 @@ ++ using namespace CEC; ++ using namespace PLATFORM; ++ ++-#define CEC_PROCESSOR_SIGNAL_WAIT_TIME 1000 ++ #define ACTIVE_SOURCE_CHECK_INTERVAL 500 ++ #define TV_PRESENT_CHECK_INTERVAL 30000 ++ ++@@ -260,6 +259,7 @@ bool CCECProcessor::OnCommandReceived(const cec_command &command) ++ ++ void *CCECProcessor::Process(void) ++ { +++ uint16_t timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ m_libcec->AddLog(CEC_LOG_DEBUG, "processor thread started"); ++ ++ if (!m_connCheck) ++@@ -274,13 +274,13 @@ void *CCECProcessor::Process(void) ++ while (!IsStopped() && m_communication->IsOpen()) ++ { ++ // wait for a new incoming command, and process it ++- if (m_inBuffer.Pop(command, CEC_PROCESSOR_SIGNAL_WAIT_TIME)) +++ if (m_inBuffer.Pop(command, timeout)) ++ ProcessCommand(command); ++ ++ if (CECInitialised() && !IsStopped()) ++ { ++ // check clients for keypress timeouts ++- m_libcec->CheckKeypressTimeout(); +++ timeout = m_libcec->CheckKeypressTimeout(); ++ ++ // check if we need to replace handlers ++ ReplaceHandlers(); ++@@ -311,6 +311,8 @@ void *CCECProcessor::Process(void) ++ tvPresentCheck.Init(TV_PRESENT_CHECK_INTERVAL); ++ } ++ } +++ else +++ timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ } ++ ++ return NULL; ++diff --git a/src/libcec/LibCEC.cpp b/src/libcec/LibCEC.cpp ++index af36b79..5ccb8dd 100644 ++--- a/src/libcec/LibCEC.cpp +++++ b/src/libcec/LibCEC.cpp ++@@ -361,11 +361,17 @@ bool CLibCEC::IsValidPhysicalAddress(uint16_t iPhysicalAddress) ++ iPhysicalAddress <= CEC_MAX_PHYSICAL_ADDRESS; ++ } ++ ++-void CLibCEC::CheckKeypressTimeout(void) +++uint16_t CLibCEC::CheckKeypressTimeout(void) ++ { +++ uint16_t timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ // check all clients ++ for (std::vector<CECClientPtr>::iterator it = m_clients.begin(); it != m_clients.end(); it++) ++- (*it)->CheckKeypressTimeout(); +++ { +++ uint16_t t = (*it)->CheckKeypressTimeout(); +++ if (t < timeout) +++ timeout = t; +++ } +++ return timeout; ++ } ++ ++ void CLibCEC::AddLog(const cec_log_level level, const char *strFormat, ...) ++diff --git a/src/libcec/LibCEC.h b/src/libcec/LibCEC.h ++index 6d9a229..d9d1e7b 100644 ++--- a/src/libcec/LibCEC.h +++++ b/src/libcec/LibCEC.h ++@@ -39,6 +39,8 @@ ++ #include "CECTypeUtils.h" ++ #include <memory> ++ +++#define CEC_PROCESSOR_SIGNAL_WAIT_TIME 1000 +++ ++ namespace CEC ++ { ++ class CAdapterCommunication; ++@@ -125,7 +127,7 @@ namespace CEC ++ ++ void AddLog(const cec_log_level level, const char *strFormat, ...); ++ void AddCommand(const cec_command &command); ++- void CheckKeypressTimeout(void); +++ uint16_t CheckKeypressTimeout(void); ++ void Alert(const libcec_alert type, const libcec_parameter ¶m); ++ ++ static bool IsValidPhysicalAddress(uint16_t iPhysicalAddress); ++-- ++1.9.1 ++ ++ ++From 41f0f3ec9ac136da3565c96fd5a7075499f3938d Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Fri, 24 Oct 2014 13:51:34 +0100 ++Subject: [PATCH 2/6] Keep track of time since initial button press and last ++ button update ++ ++--- ++ src/libcec/CECClient.cpp | 44 +++++++++++++++++++++++++++----------------- ++ src/libcec/CECClient.h | 3 ++- ++ 2 files changed, 29 insertions(+), 18 deletions(-) ++ ++diff --git a/src/libcec/CECClient.cpp b/src/libcec/CECClient.cpp ++index e307c0e..e7935b9 100644 ++--- a/src/libcec/CECClient.cpp +++++ b/src/libcec/CECClient.cpp ++@@ -54,7 +54,8 @@ CCECClient::CCECClient(CCECProcessor *processor, const libcec_configuration &con ++ m_bInitialised(false), ++ m_bRegistered(false), ++ m_iCurrentButton(CEC_USER_CONTROL_CODE_UNKNOWN), ++- m_buttontime(0), +++ m_initialButtontime(0), +++ m_updateButtontime(0), ++ m_iPreventForwardingPowerOffCommand(0), ++ m_iLastKeypressTime(0) ++ { ++@@ -981,9 +982,10 @@ void CCECClient::AddKey(bool bSendComboKey /* = false */) ++ CLockObject lock(m_mutex); ++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN) ++ { ++- key.duration = (unsigned int) (GetTimeMs() - m_buttontime); +++ unsigned int duration = (unsigned int) (GetTimeMs() - m_updateButtontime); +++ key.duration = (unsigned int) (GetTimeMs() - m_initialButtontime); ++ ++- if (key.duration > m_configuration.iComboKeyTimeoutMs || +++ if (duration > m_configuration.iComboKeyTimeoutMs || ++ m_configuration.iComboKeyTimeoutMs == 0 || ++ m_iCurrentButton != m_configuration.comboKey || ++ bSendComboKey) ++@@ -991,14 +993,15 @@ void CCECClient::AddKey(bool bSendComboKey /* = false */) ++ key.keycode = m_iCurrentButton; ++ ++ m_iCurrentButton = CEC_USER_CONTROL_CODE_UNKNOWN; ++- m_buttontime = 0; +++ m_initialButtontime = 0; +++ m_updateButtontime = 0; ++ } ++ } ++ } ++ ++ if (key.keycode != CEC_USER_CONTROL_CODE_UNKNOWN) ++ { ++- LIB_CEC->AddLog(CEC_LOG_DEBUG, "key released: %s (%1x)", ToString(key.keycode), key.keycode); +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "key released: %s (%1x) D:%dms", ToString(key.keycode), key.keycode, key.duration); ++ QueueAddKey(key); ++ } ++ } ++@@ -1012,7 +1015,7 @@ void CCECClient::AddKey(const cec_keypress &key) ++ AddKey(); ++ return; ++ } ++- +++ bool isrepeat = false; ++ cec_keypress transmitKey(key); ++ cec_user_control_code comboKey(m_configuration.clientVersion >= LIBCEC_VERSION_TO_UINT(2, 0, 5) ? ++ m_configuration.comboKey : CEC_USER_CONTROL_CODE_STOP); ++@@ -1035,22 +1038,27 @@ void CCECClient::AddKey(const cec_keypress &key) ++ AddKey(true); ++ } ++ +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "key pressed: %s (%1x) current(%lx) duration(%d)", ToString(transmitKey.keycode), transmitKey.keycode, m_iCurrentButton, key.duration); +++ ++ if (m_iCurrentButton == key.keycode) ++ { ++- m_buttontime = GetTimeMs(); +++ m_updateButtontime = GetTimeMs(); +++ isrepeat = true; ++ } ++ else ++ { ++- AddKey(); +++ if (m_iCurrentButton != transmitKey.keycode) +++ AddKey(); ++ if (key.duration == 0) ++ { ++ m_iCurrentButton = transmitKey.keycode; ++- m_buttontime = m_iCurrentButton == CEC_USER_CONTROL_CODE_UNKNOWN || key.duration > 0 ? 0 : GetTimeMs(); +++ m_initialButtontime = m_iCurrentButton == CEC_USER_CONTROL_CODE_UNKNOWN || key.duration > 0 ? 0 : GetTimeMs(); +++ m_updateButtontime = m_initialButtontime; ++ } ++ } ++ } ++ ++- if (key.keycode != comboKey || key.duration > 0) +++ if (!isrepeat && (key.keycode != comboKey || key.duration > 0)) ++ { ++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "key pressed: %s (%1x)", ToString(transmitKey.keycode), transmitKey.keycode); ++ QueueAddKey(transmitKey); ++@@ -1074,32 +1082,34 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ { ++ CLockObject lock(m_mutex); ++ uint64_t iNow = GetTimeMs(); +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "%s T:%.3f", __FUNCTION__, iNow*1e-3); ++ cec_user_control_code comboKey(m_configuration.clientVersion >= LIBCEC_VERSION_TO_UINT(2, 0, 5) ? ++ m_configuration.comboKey : CEC_USER_CONTROL_CODE_STOP); ++ uint32_t iTimeoutMs(m_configuration.clientVersion >= LIBCEC_VERSION_TO_UINT(2, 0, 5) ? ++ m_configuration.iComboKeyTimeoutMs : CEC_DEFAULT_COMBO_TIMEOUT_MS); ++ ++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && ++- ((m_iCurrentButton == comboKey && iTimeoutMs > 0 && iNow - m_buttontime > iTimeoutMs) || ++- (m_iCurrentButton != comboKey && iNow - m_buttontime > CEC_BUTTON_TIMEOUT))) +++ ((m_iCurrentButton == comboKey && iTimeoutMs > 0 && iNow - m_updateButtontime > iTimeoutMs) || +++ (m_iCurrentButton != comboKey && iNow - m_updateButtontime > CEC_BUTTON_TIMEOUT))) ++ { ++- key.duration = (unsigned int) (iNow - m_buttontime); +++ key.duration = (unsigned int) (iNow - m_initialButtontime); ++ key.keycode = m_iCurrentButton; ++ ++ m_iCurrentButton = CEC_USER_CONTROL_CODE_UNKNOWN; ++- m_buttontime = 0; +++ m_initialButtontime = 0; +++ m_updateButtontime = 0; ++ } ++ else ++ { ++ // time when this keypress will be released and we'd like to be called again ++ unsigned int timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton == comboKey && iTimeoutMs > 0) ++- timeout = iTimeoutMs - (iNow - m_buttontime) + 1; +++ timeout = iTimeoutMs - (iNow - m_updateButtontime) + 1; ++ else if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton != comboKey) ++- timeout = CEC_BUTTON_TIMEOUT - (iNow - m_buttontime) + 1; +++ timeout = CEC_BUTTON_TIMEOUT - (iNow - m_updateButtontime) + 1; ++ if (timeout > CEC_PROCESSOR_SIGNAL_WAIT_TIME) ++ { ++- LIB_CEC->AddLog(CEC_LOG_ERROR, "Unexpected timeout: %d (%.3f %.3f %.3f) k:%02x", timeout, iNow*1e-3, m_buttontime*1e-3, CEC_BUTTON_TIMEOUT*1e-3, m_iCurrentButton); +++ LIB_CEC->AddLog(CEC_LOG_ERROR, "Unexpected timeout: %d (%.3f %.3f %.3f) k:%02x", timeout, iNow*1e-3, m_updateButtontime*1e-3, CEC_BUTTON_TIMEOUT*1e-3, m_iCurrentButton); ++ timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ } ++ return timeout; ++diff --git a/src/libcec/CECClient.h b/src/libcec/CECClient.h ++index c9ce5e3..611c68b 100644 ++--- a/src/libcec/CECClient.h +++++ b/src/libcec/CECClient.h ++@@ -404,7 +404,8 @@ namespace CEC ++ PLATFORM::CMutex m_mutex; /**< mutex for changes to this instance */ ++ PLATFORM::CMutex m_cbMutex; /**< mutex that is held when doing anything with callbacks */ ++ cec_user_control_code m_iCurrentButton; /**< the control code of the button that's currently held down (if any) */ ++- int64_t m_buttontime; /**< the timestamp when the button was pressed (in seconds since epoch), or 0 if none was pressed. */ +++ int64_t m_initialButtontime; /**< the timestamp when the button was initially pressed (in seconds since epoch), or 0 if none was pressed. */ +++ int64_t m_updateButtontime; /**< the timestamp when the button was updated (in seconds since epoch), or 0 if none was pressed. */ ++ int64_t m_iPreventForwardingPowerOffCommand; /**< prevent forwarding standby commands until this time */ ++ int64_t m_iLastKeypressTime; /**< last time a key press was sent to the client */ ++ cec_keypress m_lastKeypress; /**< the last key press that was sent to the client */ ++-- ++1.9.1 ++ ++ ++From 273ead6980b69eddf98810eb1eb33d94a7d74fce Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Tue, 28 Oct 2014 00:09:18 +0000 ++Subject: [PATCH 3/6] Support repeating button presses with configurable repeat ++ rate ++ ++--- ++ include/cectypes.h | 6 ++ ++ src/libcec/CECClient.cpp | 100 +++++++++++++++++++---- ++ src/libcec/CECClient.h | 6 +- ++ src/libcec/implementations/CECCommandHandler.cpp | 2 +- ++ 4 files changed, 96 insertions(+), 18 deletions(-) ++ ++diff --git a/include/cectypes.h b/include/cectypes.h ++index acff259..8f098ef 100644 ++--- a/include/cectypes.h +++++ b/include/cectypes.h ++@@ -1493,6 +1493,8 @@ struct libcec_configuration ++ XXX changed meaning in 2.2.0 to not break binary compatibility. next major (3.0) release will fix it in a nicer way */ ++ cec_user_control_code comboKey; /*!< key code that initiates combo keys. defaults to CEC_USER_CONTROL_CODE_F1_BLUE. CEC_USER_CONTROL_CODE_UNKNOWN to disable. added in 2.0.5 */ ++ uint32_t iComboKeyTimeoutMs; /*!< timeout until the combo key is sent as normal keypress */ +++ uint32_t iButtonRepeatRateMs; /*!< rate at which buttons autorepeat. 0 means rely on CEC device */ +++ uint32_t iButtonReleaseDelayMs;/*!< duration after last update until a button is considered released */ ++ ++ #ifdef __cplusplus ++ libcec_configuration(void) { Clear(); } ++@@ -1527,6 +1529,8 @@ struct libcec_configuration ++ cecVersion == other.cecVersion && ++ adapterType == other.adapterType && ++ iDoubleTapTimeout50Ms == other.iDoubleTapTimeout50Ms && +++ iButtonRepeatRateMs == other.iButtonRepeatRateMs && +++ iButtonReleaseDelayMs == other.iButtonReleaseDelayMs && ++ (other.clientVersion <= LIBCEC_VERSION_TO_UINT(2, 0, 4) || comboKey == other.comboKey) && ++ (other.clientVersion <= LIBCEC_VERSION_TO_UINT(2, 0, 4) || iComboKeyTimeoutMs == other.iComboKeyTimeoutMs) && ++ (other.clientVersion < LIBCEC_VERSION_TO_UINT(2, 1, 0) || bPowerOnScreensaver == other.bPowerOnScreensaver)); ++@@ -1567,6 +1571,8 @@ struct libcec_configuration ++ iDoubleTapTimeout50Ms = CEC_DOUBLE_TAP_TIMEOUT_50_MS; ++ comboKey = CEC_USER_CONTROL_CODE_STOP; ++ iComboKeyTimeoutMs = CEC_DEFAULT_COMBO_TIMEOUT_MS; +++ iButtonRepeatRateMs = 0; +++ iButtonReleaseDelayMs = CEC_BUTTON_TIMEOUT; ++ ++ memset(strDeviceName, 0, 13); ++ deviceTypes.Clear(); ++diff --git a/src/libcec/CECClient.cpp b/src/libcec/CECClient.cpp ++index e7935b9..598628d 100644 ++--- a/src/libcec/CECClient.cpp +++++ b/src/libcec/CECClient.cpp ++@@ -56,6 +56,10 @@ CCECClient::CCECClient(CCECProcessor *processor, const libcec_configuration &con ++ m_iCurrentButton(CEC_USER_CONTROL_CODE_UNKNOWN), ++ m_initialButtontime(0), ++ m_updateButtontime(0), +++ m_repeatButtontime(0), +++ m_releaseButtontime(0), +++ m_pressedButtoncount(0), +++ m_releasedButtoncount(0), ++ m_iPreventForwardingPowerOffCommand(0), ++ m_iLastKeypressTime(0) ++ { ++@@ -851,6 +855,9 @@ bool CCECClient::GetCurrentConfiguration(libcec_configuration &configuration) ++ configuration.bMonitorOnly = m_configuration.bMonitorOnly; ++ configuration.cecVersion = m_configuration.cecVersion; ++ configuration.adapterType = m_configuration.adapterType; +++ configuration.iDoubleTapTimeout50Ms = m_configuration.iDoubleTapTimeout50Ms; +++ configuration.iButtonRepeatRateMs = m_configuration.iButtonRepeatRateMs; +++ configuration.iButtonReleaseDelayMs = m_configuration.iButtonReleaseDelayMs; ++ ++ return true; ++ } ++@@ -894,6 +901,9 @@ bool CCECClient::SetConfiguration(const libcec_configuration &configuration) ++ m_configuration.cecVersion = configuration.cecVersion; ++ m_configuration.adapterType = configuration.adapterType; ++ m_configuration.iDoubleTapTimeout50Ms = configuration.iDoubleTapTimeout50Ms; +++ m_configuration.iButtonRepeatRateMs = configuration.iButtonRepeatRateMs; +++ m_configuration.iButtonReleaseDelayMs = configuration.iButtonReleaseDelayMs; +++ ++ m_configuration.deviceTypes.Add(configuration.deviceTypes[0]); ++ ++ if (m_configuration.clientVersion >= LIBCEC_VERSION_TO_UINT(2, 0, 5)) ++@@ -950,6 +960,7 @@ bool CCECClient::SetConfiguration(const libcec_configuration &configuration) ++ primary->ActivateSource(); ++ } ++ +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "%s: %d:%d:%d", __FUNCTION__, DoubleTapTimeoutMS(), m_configuration.iButtonRepeatRateMs, m_configuration.iButtonReleaseDelayMs); ++ return true; ++ } ++ ++@@ -973,11 +984,15 @@ void CCECClient::AddCommand(const cec_command &command) ++ } ++ } ++ ++-void CCECClient::AddKey(bool bSendComboKey /* = false */) +++void CCECClient::AddKey(bool bSendComboKey /* = false */, bool bButtonRelease /* = false */) ++ { ++ cec_keypress key; ++ key.keycode = CEC_USER_CONTROL_CODE_UNKNOWN; ++ +++ // we ignore button releases when supporting repeating keys +++ if (bButtonRelease && m_configuration.iButtonRepeatRateMs && m_configuration.iButtonReleaseDelayMs) +++ return; +++ ++ { ++ CLockObject lock(m_mutex); ++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN) ++@@ -995,6 +1010,10 @@ void CCECClient::AddKey(bool bSendComboKey /* = false */) ++ m_iCurrentButton = CEC_USER_CONTROL_CODE_UNKNOWN; ++ m_initialButtontime = 0; ++ m_updateButtontime = 0; +++ m_repeatButtontime = 0; +++ m_releaseButtontime = 0; +++ m_pressedButtoncount = 0; +++ m_releasedButtoncount = 0; ++ } ++ } ++ } ++@@ -1012,6 +1031,7 @@ void CCECClient::AddKey(const cec_keypress &key) ++ key.keycode < CEC_USER_CONTROL_CODE_SELECT) ++ { ++ // send back the previous key if there is one +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "Unexpected key %s (%1x) D:%dms", ToString(key.keycode), key.keycode, key.duration); ++ AddKey(); ++ return; ++ } ++@@ -1035,7 +1055,10 @@ void CCECClient::AddKey(const cec_keypress &key) ++ transmitKey.keycode = CEC_USER_CONTROL_CODE_DOT; ++ // default, send back the previous key ++ else +++ { +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "Combo key %s (%1x) D%dms:", ToString(key.keycode), key.keycode, key.duration); ++ AddKey(true); +++ } ++ } ++ ++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "key pressed: %s (%1x) current(%lx) duration(%d)", ToString(transmitKey.keycode), transmitKey.keycode, m_iCurrentButton, key.duration); ++@@ -1043,17 +1066,44 @@ void CCECClient::AddKey(const cec_keypress &key) ++ if (m_iCurrentButton == key.keycode) ++ { ++ m_updateButtontime = GetTimeMs(); ++- isrepeat = true; +++ m_releaseButtontime = m_updateButtontime + (m_configuration.iButtonReleaseDelayMs ? m_configuration.iButtonReleaseDelayMs : CEC_BUTTON_TIMEOUT); +++ // want to have seen some updated before considering a repeat +++ if (m_configuration.iButtonRepeatRateMs) +++ { +++ if (!m_repeatButtontime && m_pressedButtoncount > 1) +++ m_repeatButtontime = m_initialButtontime + DoubleTapTimeoutMS(); +++ isrepeat = true; +++ } +++ m_pressedButtoncount++; ++ } ++ else ++ { ++ if (m_iCurrentButton != transmitKey.keycode) +++ { +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "Changed key %s (%1x) D:%dms cur:%lx", ToString(transmitKey.keycode), transmitKey.keycode, transmitKey.duration, m_iCurrentButton); ++ AddKey(); +++ } ++ if (key.duration == 0) ++ { ++ m_iCurrentButton = transmitKey.keycode; ++- m_initialButtontime = m_iCurrentButton == CEC_USER_CONTROL_CODE_UNKNOWN || key.duration > 0 ? 0 : GetTimeMs(); ++- m_updateButtontime = m_initialButtontime; +++ if (m_iCurrentButton == CEC_USER_CONTROL_CODE_UNKNOWN) +++ { +++ m_initialButtontime = 0; +++ m_updateButtontime = 0; +++ m_repeatButtontime = 0; +++ m_releaseButtontime = 0; +++ m_pressedButtoncount = 0; +++ m_releasedButtoncount = 0; +++ } +++ else +++ { +++ m_initialButtontime = GetTimeMs(); +++ m_updateButtontime = m_initialButtontime; +++ m_repeatButtontime = 0; // set this on next update +++ m_releaseButtontime = m_initialButtontime + (m_configuration.iButtonReleaseDelayMs ? m_configuration.iButtonReleaseDelayMs : CEC_BUTTON_TIMEOUT); +++ m_pressedButtoncount = 1; +++ m_releasedButtoncount = 0; +++ } ++ } ++ } ++ } ++@@ -1072,12 +1122,16 @@ void CCECClient::SetCurrentButton(const cec_user_control_code iButtonCode) ++ key.duration = 0; ++ key.keycode = iButtonCode; ++ +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "SetCurrentButton %s (%1x) D:%dms cur:%lx", ToString(key.keycode), key.keycode, key.duration); ++ AddKey(key); ++ } ++ ++ uint16_t CCECClient::CheckKeypressTimeout(void) ++ { +++ // time when we'd like to be called again +++ unsigned int timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ cec_keypress key; +++ key.keycode = CEC_USER_CONTROL_CODE_UNKNOWN; ++ ++ { ++ CLockObject lock(m_mutex); ++@@ -1089,8 +1143,8 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ m_configuration.iComboKeyTimeoutMs : CEC_DEFAULT_COMBO_TIMEOUT_MS); ++ ++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && ++- ((m_iCurrentButton == comboKey && iTimeoutMs > 0 && iNow - m_updateButtontime > iTimeoutMs) || ++- (m_iCurrentButton != comboKey && iNow - m_updateButtontime > CEC_BUTTON_TIMEOUT))) +++ ((m_iCurrentButton == comboKey && iTimeoutMs > 0 && iNow - m_updateButtontime >= iTimeoutMs) || +++ (m_iCurrentButton != comboKey && m_releaseButtontime && iNow >= (uint64_t)m_releaseButtontime))) ++ { ++ key.duration = (unsigned int) (iNow - m_initialButtontime); ++ key.keycode = m_iCurrentButton; ++@@ -1098,27 +1152,41 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ m_iCurrentButton = CEC_USER_CONTROL_CODE_UNKNOWN; ++ m_initialButtontime = 0; ++ m_updateButtontime = 0; +++ m_repeatButtontime = 0; +++ m_releaseButtontime = 0; +++ m_pressedButtoncount = 0; +++ m_releasedButtoncount = 0; +++ } +++ else if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && +++ (m_iCurrentButton != comboKey && m_repeatButtontime && iNow >= (uint64_t)m_repeatButtontime)) +++ { +++ key.duration = 0; +++ key.keycode = m_iCurrentButton; +++ m_repeatButtontime = iNow + m_configuration.iButtonRepeatRateMs; +++ timeout = std::min((uint64_t)timeout, m_repeatButtontime - iNow); ++ } ++ else ++ { ++- // time when this keypress will be released and we'd like to be called again ++- unsigned int timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton == comboKey && iTimeoutMs > 0) ++- timeout = iTimeoutMs - (iNow - m_updateButtontime) + 1; ++- else if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton != comboKey) ++- timeout = CEC_BUTTON_TIMEOUT - (iNow - m_updateButtontime) + 1; +++ timeout = std::min((uint64_t)timeout, m_updateButtontime - iNow + iTimeoutMs); +++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton != comboKey && m_releaseButtontime) +++ timeout = std::min((uint64_t)timeout, m_releaseButtontime - iNow); +++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton != comboKey && m_repeatButtontime) +++ timeout = std::min((uint64_t)timeout, m_repeatButtontime - iNow); ++ if (timeout > CEC_PROCESSOR_SIGNAL_WAIT_TIME) ++ { ++- LIB_CEC->AddLog(CEC_LOG_ERROR, "Unexpected timeout: %d (%.3f %.3f %.3f) k:%02x", timeout, iNow*1e-3, m_updateButtontime*1e-3, CEC_BUTTON_TIMEOUT*1e-3, m_iCurrentButton); +++ LIB_CEC->AddLog(CEC_LOG_ERROR, "Unexpected timeout: %d (%.3f %.3f %.3f) k:%02x", timeout, iNow*1e-3, m_updateButtontime*1e-3, m_releaseButtontime*1e-3, m_iCurrentButton); ++ timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ } ++- return timeout; ++ } +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "key %s: %s (%1x) timeout:%dms (rel:%d,rep:%d,prs:%d,rel:%d)", key.keycode == CEC_USER_CONTROL_CODE_UNKNOWN ? "idle" : key.duration ? "released" : "repeated", +++ ToString(m_iCurrentButton), m_iCurrentButton, timeout, (int)(m_releaseButtontime ? m_releaseButtontime - iNow : 0), (int)(m_repeatButtontime ? m_repeatButtontime - iNow : 0), m_pressedButtoncount, m_releasedButtoncount); ++ } ++ ++- LIB_CEC->AddLog(CEC_LOG_DEBUG, "key auto-released: %s (%1x)", ToString(key.keycode), key.keycode); ++- QueueAddKey(key); ++- return CEC_PROCESSOR_SIGNAL_WAIT_TIME; +++ if (key.keycode != CEC_USER_CONTROL_CODE_UNKNOWN) +++ QueueAddKey(key); +++ +++ return timeout; ++ } ++ ++ bool CCECClient::EnableCallbacks(void *cbParam, ICECCallbacks *callbacks) ++diff --git a/src/libcec/CECClient.h b/src/libcec/CECClient.h ++index 611c68b..adeb5af 100644 ++--- a/src/libcec/CECClient.h +++++ b/src/libcec/CECClient.h ++@@ -269,7 +269,7 @@ namespace CEC ++ // callbacks ++ virtual void Alert(const libcec_alert type, const libcec_parameter ¶m) { QueueAlert(type, param); } ++ virtual void AddLog(const cec_log_message &message) { QueueAddLog(message); } ++- virtual void AddKey(bool bSendComboKey = false); +++ virtual void AddKey(bool bSendComboKey = false, bool bButtonRelease = false); ++ virtual void AddKey(const cec_keypress &key); ++ virtual void SetCurrentButton(const cec_user_control_code iButtonCode); ++ virtual uint16_t CheckKeypressTimeout(void); ++@@ -406,6 +406,10 @@ namespace CEC ++ cec_user_control_code m_iCurrentButton; /**< the control code of the button that's currently held down (if any) */ ++ int64_t m_initialButtontime; /**< the timestamp when the button was initially pressed (in seconds since epoch), or 0 if none was pressed. */ ++ int64_t m_updateButtontime; /**< the timestamp when the button was updated (in seconds since epoch), or 0 if none was pressed. */ +++ int64_t m_repeatButtontime; /**< the timestamp when the button will next repeat (in seconds since epoch), or 0 if repeat is disabled. */ +++ int64_t m_releaseButtontime; /**< the timestamp when the button will be released (in seconds since epoch), or 0 if none was pressed. */ +++ int32_t m_pressedButtoncount; /**< the number of times a button released message has been seen for this press. */ +++ int32_t m_releasedButtoncount; /**< the number of times a button pressed message has been seen for this press. */ ++ int64_t m_iPreventForwardingPowerOffCommand; /**< prevent forwarding standby commands until this time */ ++ int64_t m_iLastKeypressTime; /**< last time a key press was sent to the client */ ++ cec_keypress m_lastKeypress; /**< the last key press that was sent to the client */ ++diff --git a/src/libcec/implementations/CECCommandHandler.cpp b/src/libcec/implementations/CECCommandHandler.cpp ++index 6d6244e..d64186f 100644 ++--- a/src/libcec/implementations/CECCommandHandler.cpp +++++ b/src/libcec/implementations/CECCommandHandler.cpp ++@@ -770,7 +770,7 @@ int CCECCommandHandler::HandleUserControlRelease(const cec_command &command) ++ ++ CECClientPtr client = m_processor->GetClient(command.destination); ++ if (client) ++- client->AddKey(); +++ client->AddKey(false, true); ++ ++ return COMMAND_HANDLED; ++ } ++-- ++1.9.1 ++ ++ ++From 3336d0827f7fd159430f3431642b07090c06c869 Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Tue, 28 Oct 2014 01:21:35 +0000 ++Subject: [PATCH 4/6] Skip double press removal. It is handled through other ++ means. ++ ++--- ++ src/libcec/CECClient.cpp | 18 +----------------- ++ src/libcec/CECClient.h | 2 -- ++ 2 files changed, 1 insertion(+), 19 deletions(-) ++ ++diff --git a/src/libcec/CECClient.cpp b/src/libcec/CECClient.cpp ++index 598628d..dccd874 100644 ++--- a/src/libcec/CECClient.cpp +++++ b/src/libcec/CECClient.cpp ++@@ -60,11 +60,8 @@ CCECClient::CCECClient(CCECProcessor *processor, const libcec_configuration &con ++ m_releaseButtontime(0), ++ m_pressedButtoncount(0), ++ m_releasedButtoncount(0), ++- m_iPreventForwardingPowerOffCommand(0), ++- m_iLastKeypressTime(0) +++ m_iPreventForwardingPowerOffCommand(0) ++ { ++- m_lastKeypress.keycode = CEC_USER_CONTROL_CODE_UNKNOWN; ++- m_lastKeypress.duration = 0; ++ m_configuration.Clear(); ++ // set the initial configuration ++ SetConfiguration(configuration); ++@@ -1647,20 +1644,7 @@ void CCECClient::CallbackAddKey(const cec_keypress &key) ++ { ++ CLockObject lock(m_cbMutex); ++ if (m_configuration.callbacks && m_configuration.callbacks->CBCecKeyPress) ++- { ++- // prevent double taps ++- int64_t now = GetTimeMs(); ++- if (m_lastKeypress.keycode != key.keycode || ++- key.duration > 0 || ++- now - m_iLastKeypressTime >= DoubleTapTimeoutMS()) ++- { ++- // no double tap ++- if (key.duration == 0) ++- m_iLastKeypressTime = now; ++- m_lastKeypress = key; ++ m_configuration.callbacks->CBCecKeyPress(m_configuration.callbackParam, key); ++- } ++- } ++ } ++ ++ void CCECClient::CallbackAddLog(const cec_log_message &message) ++diff --git a/src/libcec/CECClient.h b/src/libcec/CECClient.h ++index adeb5af..43a713b 100644 ++--- a/src/libcec/CECClient.h +++++ b/src/libcec/CECClient.h ++@@ -411,8 +411,6 @@ namespace CEC ++ int32_t m_pressedButtoncount; /**< the number of times a button released message has been seen for this press. */ ++ int32_t m_releasedButtoncount; /**< the number of times a button pressed message has been seen for this press. */ ++ int64_t m_iPreventForwardingPowerOffCommand; /**< prevent forwarding standby commands until this time */ ++- int64_t m_iLastKeypressTime; /**< last time a key press was sent to the client */ ++- cec_keypress m_lastKeypress; /**< the last key press that was sent to the client */ ++ PLATFORM::SyncedBuffer<CCallbackWrap*> m_callbackCalls; ++ }; ++ } ++-- ++1.9.1 ++ ++ ++From 0dd0234f620a546bfa843172648383f83d88088c Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Mon, 3 Nov 2014 23:28:04 +0000 ++Subject: [PATCH 5/6] Pass through duration on all button repeats ++ ++--- ++ src/libcec/CECClient.cpp | 34 ++++++++++++++++++++++++---------- ++ 1 file changed, 24 insertions(+), 10 deletions(-) ++ ++diff --git a/src/libcec/CECClient.cpp b/src/libcec/CECClient.cpp ++index dccd874..1946148 100644 ++--- a/src/libcec/CECClient.cpp +++++ b/src/libcec/CECClient.cpp ++@@ -986,10 +986,6 @@ void CCECClient::AddKey(bool bSendComboKey /* = false */, bool bButtonRelease /* ++ cec_keypress key; ++ key.keycode = CEC_USER_CONTROL_CODE_UNKNOWN; ++ ++- // we ignore button releases when supporting repeating keys ++- if (bButtonRelease && m_configuration.iButtonRepeatRateMs && m_configuration.iButtonReleaseDelayMs) ++- return; ++- ++ { ++ CLockObject lock(m_mutex); ++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN) ++@@ -1015,6 +1011,10 @@ void CCECClient::AddKey(bool bSendComboKey /* = false */, bool bButtonRelease /* ++ } ++ } ++ +++ // we don't forward releases when supporting repeating keys +++ if (bButtonRelease && m_configuration.iButtonRepeatRateMs) +++ return; +++ ++ if (key.keycode != CEC_USER_CONTROL_CODE_UNKNOWN) ++ { ++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "key released: %s (%1x) D:%dms", ToString(key.keycode), key.keycode, key.duration); ++@@ -1107,7 +1107,7 @@ void CCECClient::AddKey(const cec_keypress &key) ++ ++ if (!isrepeat && (key.keycode != comboKey || key.duration > 0)) ++ { ++- LIB_CEC->AddLog(CEC_LOG_DEBUG, "key pressed: %s (%1x)", ToString(transmitKey.keycode), transmitKey.keycode); +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "key pressed: %s (%1x, %d)", ToString(transmitKey.keycode), transmitKey.keycode, transmitKey.duration); ++ QueueAddKey(transmitKey); ++ } ++ } ++@@ -1129,6 +1129,7 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ unsigned int timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ cec_keypress key; ++ key.keycode = CEC_USER_CONTROL_CODE_UNKNOWN; +++ key.duration = 0; ++ ++ { ++ CLockObject lock(m_mutex); ++@@ -1140,8 +1141,7 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ m_configuration.iComboKeyTimeoutMs : CEC_DEFAULT_COMBO_TIMEOUT_MS); ++ ++ if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && ++- ((m_iCurrentButton == comboKey && iTimeoutMs > 0 && iNow - m_updateButtontime >= iTimeoutMs) || ++- (m_iCurrentButton != comboKey && m_releaseButtontime && iNow >= (uint64_t)m_releaseButtontime))) +++ m_iCurrentButton == comboKey && iTimeoutMs > 0 && iNow - m_updateButtontime >= iTimeoutMs) ++ { ++ key.duration = (unsigned int) (iNow - m_initialButtontime); ++ key.keycode = m_iCurrentButton; ++@@ -1155,9 +1155,23 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ m_releasedButtoncount = 0; ++ } ++ else if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && +++ m_iCurrentButton != comboKey && m_releaseButtontime && iNow >= (uint64_t)m_releaseButtontime) +++ { +++ key.duration = (unsigned int) (iNow - m_initialButtontime); +++ key.keycode = CEC_USER_CONTROL_CODE_UNKNOWN; +++ +++ m_iCurrentButton = CEC_USER_CONTROL_CODE_UNKNOWN; +++ m_initialButtontime = 0; +++ m_updateButtontime = 0; +++ m_repeatButtontime = 0; +++ m_releaseButtontime = 0; +++ m_pressedButtoncount = 0; +++ m_releasedButtoncount = 0; +++ } +++ else if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && ++ (m_iCurrentButton != comboKey && m_repeatButtontime && iNow >= (uint64_t)m_repeatButtontime)) ++ { ++- key.duration = 0; +++ key.duration = (unsigned int) (iNow - m_initialButtontime); ++ key.keycode = m_iCurrentButton; ++ m_repeatButtontime = iNow + m_configuration.iButtonRepeatRateMs; ++ timeout = std::min((uint64_t)timeout, m_repeatButtontime - iNow); ++@@ -1176,8 +1190,8 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ timeout = CEC_PROCESSOR_SIGNAL_WAIT_TIME; ++ } ++ } ++- LIB_CEC->AddLog(CEC_LOG_DEBUG, "key %s: %s (%1x) timeout:%dms (rel:%d,rep:%d,prs:%d,rel:%d)", key.keycode == CEC_USER_CONTROL_CODE_UNKNOWN ? "idle" : key.duration ? "released" : "repeated", ++- ToString(m_iCurrentButton), m_iCurrentButton, timeout, (int)(m_releaseButtontime ? m_releaseButtontime - iNow : 0), (int)(m_repeatButtontime ? m_repeatButtontime - iNow : 0), m_pressedButtoncount, m_releasedButtoncount); +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "Key %s: %s (duration:%d) (%1x) timeout:%dms (rel:%d,rep:%d,prs:%d,rel:%d)", ToString(m_iCurrentButton), key.keycode == CEC_USER_CONTROL_CODE_UNKNOWN ? "idle" : m_repeatButtontime ? "repeated" : "released", key.duration, +++ m_iCurrentButton, timeout, (int)(m_releaseButtontime ? m_releaseButtontime - iNow : 0), (int)(m_repeatButtontime ? m_repeatButtontime - iNow : 0), m_pressedButtoncount, m_releasedButtoncount); ++ } ++ ++ if (key.keycode != CEC_USER_CONTROL_CODE_UNKNOWN) ++-- ++1.9.1 ++ ++ ++From 1ea01f59d8186d4d53af41961aaccbbc11651115 Mon Sep 17 00:00:00 2001 ++From: popcornmix <popcornmix@gmail.com> ++Date: Wed, 5 Nov 2014 21:04:25 +0000 ++Subject: [PATCH 6/6] squash: Fix for stop needing to be pressed twice ++ ++--- ++ src/libcec/CECClient.cpp | 17 ++++++++--------- ++ 1 file changed, 8 insertions(+), 9 deletions(-) ++ ++diff --git a/src/libcec/CECClient.cpp b/src/libcec/CECClient.cpp ++index 1946148..f4f114b 100644 ++--- a/src/libcec/CECClient.cpp +++++ b/src/libcec/CECClient.cpp ++@@ -1131,6 +1131,8 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ key.keycode = CEC_USER_CONTROL_CODE_UNKNOWN; ++ key.duration = 0; ++ +++ if (m_iCurrentButton == CEC_USER_CONTROL_CODE_UNKNOWN) +++ return timeout; ++ { ++ CLockObject lock(m_mutex); ++ uint64_t iNow = GetTimeMs(); ++@@ -1140,8 +1142,7 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ uint32_t iTimeoutMs(m_configuration.clientVersion >= LIBCEC_VERSION_TO_UINT(2, 0, 5) ? ++ m_configuration.iComboKeyTimeoutMs : CEC_DEFAULT_COMBO_TIMEOUT_MS); ++ ++- if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && ++- m_iCurrentButton == comboKey && iTimeoutMs > 0 && iNow - m_updateButtontime >= iTimeoutMs) +++ if (m_iCurrentButton == comboKey && iTimeoutMs > 0 && iNow - m_updateButtontime >= iTimeoutMs) ++ { ++ key.duration = (unsigned int) (iNow - m_initialButtontime); ++ key.keycode = m_iCurrentButton; ++@@ -1154,8 +1155,7 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ m_pressedButtoncount = 0; ++ m_releasedButtoncount = 0; ++ } ++- else if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && ++- m_iCurrentButton != comboKey && m_releaseButtontime && iNow >= (uint64_t)m_releaseButtontime) +++ else if (m_iCurrentButton != comboKey && m_releaseButtontime && iNow >= (uint64_t)m_releaseButtontime) ++ { ++ key.duration = (unsigned int) (iNow - m_initialButtontime); ++ key.keycode = CEC_USER_CONTROL_CODE_UNKNOWN; ++@@ -1168,8 +1168,7 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ m_pressedButtoncount = 0; ++ m_releasedButtoncount = 0; ++ } ++- else if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && ++- (m_iCurrentButton != comboKey && m_repeatButtontime && iNow >= (uint64_t)m_repeatButtontime)) +++ else if (m_iCurrentButton != comboKey && m_repeatButtontime && iNow >= (uint64_t)m_repeatButtontime) ++ { ++ key.duration = (unsigned int) (iNow - m_initialButtontime); ++ key.keycode = m_iCurrentButton; ++@@ -1178,11 +1177,11 @@ uint16_t CCECClient::CheckKeypressTimeout(void) ++ } ++ else ++ { ++- if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton == comboKey && iTimeoutMs > 0) +++ if (m_iCurrentButton == comboKey && iTimeoutMs > 0) ++ timeout = std::min((uint64_t)timeout, m_updateButtontime - iNow + iTimeoutMs); ++- if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton != comboKey && m_releaseButtontime) +++ if (m_iCurrentButton != comboKey && m_releaseButtontime) ++ timeout = std::min((uint64_t)timeout, m_releaseButtontime - iNow); ++- if (m_iCurrentButton != CEC_USER_CONTROL_CODE_UNKNOWN && m_iCurrentButton != comboKey && m_repeatButtontime) +++ if (m_iCurrentButton != comboKey && m_repeatButtontime) ++ timeout = std::min((uint64_t)timeout, m_repeatButtontime - iNow); ++ if (timeout > CEC_PROCESSOR_SIGNAL_WAIT_TIME) ++ { ++-- ++1.9.1 ++ + +From fcfb4a5068565c3ca935cf16932f6f45f34a33d0 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sat, 19 Mar 2016 17:15:29 +0000 +Subject: [PATCH 55/67] cec: hack: pretend bump to 3.1.0 + +--- + tools/depends/target/libcec/Makefile | 1 + + tools/depends/target/libcec/bump.patch | 21 +++++++++++++++++++++ + 2 files changed, 22 insertions(+) + create mode 100644 tools/depends/target/libcec/bump.patch + +diff --git a/tools/depends/target/libcec/Makefile b/tools/depends/target/libcec/Makefile +index ddf996361ad5b46dd2b33fb035b2ed133914a612..39ba882d0c7e270b4d1d1d566027cbaffb76b587 100644 +--- a/tools/depends/target/libcec/Makefile ++++ b/tools/depends/target/libcec/Makefile +@@ -22,6 +22,7 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + rm -rf $(PLATFORM); mkdir -p $(PLATFORM)/build + cd $(PLATFORM); $(ARCHIVE_TOOL) $(ARCHIVE_TOOL_FLAGS) $(TARBALLS_LOCATION)/$(ARCHIVE) + cd $(PLATFORM); patch -p1 < ../popcornmix.patch ++ cd $(PLATFORM); patch -p1 < ../bump.patch + cd $(PLATFORM)/build; $(CMAKE) -DBUILD_SHARED_LIBS=1 -DSKIP_PYTHON_WRAPPER:STRING=1 -DCMAKE_INSTALL_LIBDIR=$(PREFIX)/lib .. + + $(LIBDYLIB): $(PLATFORM) +diff --git a/tools/depends/target/libcec/bump.patch b/tools/depends/target/libcec/bump.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..9e55e51068e7befd9d4ff003156ce1ff4cc56c0e +--- /dev/null ++++ b/tools/depends/target/libcec/bump.patch +@@ -0,0 +1,21 @@ ++commit 49a1728feabca68b8424a8b22abec9ee87b9aa99 ++Author: Lars Op den Kamp <lars@opdenkamp.eu> ++Date: Wed Jan 20 01:06:50 2016 +0100 ++ ++ bump to 3.1.0 ++ ++diff --git a/CMakeLists.txt b/CMakeLists.txt ++index 23d71fc..173f625 100644 ++--- a/CMakeLists.txt +++++ b/CMakeLists.txt ++@@ -2,8 +2,8 @@ project(libcec) ++ cmake_minimum_required(VERSION 2.8.9) ++ ++ set(LIBCEC_VERSION_MAJOR 3) ++-set(LIBCEC_VERSION_MINOR 0) ++-set(LIBCEC_VERSION_PATCH 0) +++set(LIBCEC_VERSION_MINOR 1) +++set(LIBCEC_VERSION_PATCH 0) ++ ++ # cec-client ++ add_subdirectory(src/cec-client) + +From 5dc7976451fc1ab8c7aeac2d9b4090a71e5a857d Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Tue, 28 Oct 2014 00:19:40 +0000 +Subject: [PATCH 56/67] [cec] Add settings for configuring button repeats + +--- + addons/resource.language.en_gb/resources/strings.po | 17 +++++++++++++++-- + system/peripherals.xml | 4 +++- + xbmc/peripherals/devices/PeripheralCecAdapter.cpp | 16 ++++++++++++++++ + 3 files changed, 34 insertions(+), 3 deletions(-) + +diff --git a/addons/resource.language.en_gb/resources/strings.po b/addons/resource.language.en_gb/resources/strings.po +index 8cb9f8503c29c54cd0cb55018f867a45248c649f..a4c4387b0a78e4dc9ed875e72c4ce72dd2741fe2 100644 +--- a/addons/resource.language.en_gb/resources/strings.po ++++ b/addons/resource.language.en_gb/resources/strings.po +@@ -19392,8 +19392,6 @@ msgctxt "#38111" + msgid "This category contains other settings for the GUI interface" + msgstr "" + +-#empty strings from id 38112 to 38999 +- + #: system/settings/settings.xml + msgctxt "#39000" + msgid "HD and up" +@@ -19414,6 +19412,21 @@ msgctxt "#39003" + msgid "Accelerate h264" + msgstr "" + ++#: system/peripherals.xml ++msgctxt "#38050" ++msgid "Remote button press delay before repeating (ms)" ++msgstr "" ++ ++#: system/peripherals.xml ++msgctxt "#38051" ++msgid "Remote button press repeat rate (ms)" ++msgstr "" ++ ++#: system/peripherals.xml ++msgctxt "#38052" ++msgid "Remote button press release time (ms)" ++msgstr "" ++ + msgctxt "#38190" + msgid "Extract thumbnails from video files" + msgstr "" +diff --git a/system/peripherals.xml b/system/peripherals.xml +index ec3c3fe39db5f2272b3a9e49b34de3a4a063aab0..c3dbae029d397ab2e6948296df64b7a6f174b2af 100644 +--- a/system/peripherals.xml ++++ b/system/peripherals.xml +@@ -31,7 +31,9 @@ + <setting key="device_type" type="int" value="1" configurable="0" /> + <setting key="wake_devices_advanced" type="string" value="" configurable="0" /> + <setting key="standby_devices_advanced" type="string" value="" configurable="0" /> +- <setting key="double_tap_timeout_ms" type="int" min="0" value="300" configurable="0" /> ++ <setting key="double_tap_timeout_ms" type="int" min="50" max="1000" step="50" value="300" label="38050" order="16" /> ++ <setting key="button_repeat_rate_ms" type="int" min="0" max="250" step="10" value="0" label="38051" order="17" /> ++ <setting key="button_release_delay_ms" type="int" min="0" max="500" step="50" value="0" label="38052" order="18" /> + </peripheral> + + <peripheral vendor_product="2548:1001,2548:1002" bus="usb" name="Pulse-Eight CEC Adapter" mapTo="cec"> +diff --git a/xbmc/peripherals/devices/PeripheralCecAdapter.cpp b/xbmc/peripherals/devices/PeripheralCecAdapter.cpp +index f784bded97de9491d3eaaee2fb6efc86e74dd07b..8ab327c34e08a14c598b758a67384f1c6a838e6c 100644 +--- a/xbmc/peripherals/devices/PeripheralCecAdapter.cpp ++++ b/xbmc/peripherals/devices/PeripheralCecAdapter.cpp +@@ -1284,6 +1284,20 @@ void CPeripheralCecAdapter::SetConfigurationFromLibCEC(const CEC::libcec_configu + m_configuration.bSendInactiveSource = config.bSendInactiveSource; + bChanged |= SetSetting("send_inactive_source", m_configuration.bSendInactiveSource == 1); + ++#if defined(CEC_DOUBLE_TAP_TIMEOUT_MS_OLD) ++ m_configuration.iDoubleTapTimeout50Ms = config.iDoubleTapTimeout50Ms; ++ bChanged |= SetSetting("double_tap_timeout_ms", (int)m_configuration.iDoubleTapTimeout50Ms * 50); ++#else ++ m_configuration.iDoubleTapTimeoutMs = config.iDoubleTapTimeoutMs; ++ bChanged |= SetSetting("double_tap_timeout_ms", (int)m_configuration.iDoubleTapTimeoutMs; ++#endif ++ ++ m_configuration.iButtonRepeatRateMs = config.iButtonRepeatRateMs; ++ bChanged |= SetSetting("button_repeat_rate_ms", (int)m_configuration.iButtonRepeatRateMs); ++ ++ m_configuration.iButtonReleaseDelayMs = config.iButtonReleaseDelayMs; ++ bChanged |= SetSetting("button_release_delay_ms", (int)m_configuration.iButtonReleaseDelayMs); ++ + m_configuration.iFirmwareVersion = config.iFirmwareVersion; + m_configuration.bShutdownOnStandby = config.bShutdownOnStandby; + +@@ -1388,6 +1402,8 @@ void CPeripheralCecAdapter::SetConfigurationFromSettings(void) + // backwards compatibility. will be removed once the next major release of libCEC is out + m_configuration.iDoubleTapTimeoutMs = GetSettingInt("double_tap_timeout_ms"); + #endif ++ m_configuration.iButtonRepeatRateMs = GetSettingInt("button_repeat_rate_ms"); ++ m_configuration.iButtonReleaseDelayMs = GetSettingInt("button_release_delay_ms"); + + if (GetSettingBool("pause_playback_on_deactivate")) + { + +From c43daf2021b96de898d3522b5248108f9d9af488 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Mon, 3 Nov 2014 23:17:46 +0000 +Subject: [PATCH 57/67] [cec] Don't discard buttons when repeat mode is enabled + +--- + xbmc/peripherals/devices/PeripheralCecAdapter.cpp | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/xbmc/peripherals/devices/PeripheralCecAdapter.cpp b/xbmc/peripherals/devices/PeripheralCecAdapter.cpp +index 8ab327c34e08a14c598b758a67384f1c6a838e6c..8b04a37a803c2f0ff15de35a10186e3dc9c0d130 100644 +--- a/xbmc/peripherals/devices/PeripheralCecAdapter.cpp ++++ b/xbmc/peripherals/devices/PeripheralCecAdapter.cpp +@@ -776,7 +776,10 @@ void CPeripheralCecAdapter::PushCecKeypress(const CecButtonPress &key) + CLog::Log(LOGDEBUG, "%s - received key %2x duration %d", __FUNCTION__, key.iButton, key.iDuration); + + CSingleLock lock(m_critSection); +- if (key.iDuration > 0) ++ // avoid the queue getting too long ++ if (m_configuration.iButtonRepeatRateMs && m_buttonQueue.size() > 5) ++ return; ++ if (m_configuration.iButtonRepeatRateMs == 0 && key.iDuration > 0) + { + if (m_currentButton.iButton == key.iButton && m_currentButton.iDuration == 0) + { + +From 8e198d48296f237f20faa59de880eaef75752459 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Tue, 4 Nov 2014 18:50:00 +0000 +Subject: [PATCH 58/67] [cec] Temp - more logging + +--- + xbmc/peripherals/devices/PeripheralCecAdapter.cpp | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/xbmc/peripherals/devices/PeripheralCecAdapter.cpp b/xbmc/peripherals/devices/PeripheralCecAdapter.cpp +index 8b04a37a803c2f0ff15de35a10186e3dc9c0d130..259649721512e744fd89bfe66af6bc6324c82653 100644 +--- a/xbmc/peripherals/devices/PeripheralCecAdapter.cpp ++++ b/xbmc/peripherals/devices/PeripheralCecAdapter.cpp +@@ -773,12 +773,15 @@ void CPeripheralCecAdapter::GetNextKey(void) + + void CPeripheralCecAdapter::PushCecKeypress(const CecButtonPress &key) + { +- CLog::Log(LOGDEBUG, "%s - received key %2x duration %d", __FUNCTION__, key.iButton, key.iDuration); ++ CLog::Log(LOGDEBUG, "%s - received key %2x duration %d (rep:%d size:%d)", __FUNCTION__, key.iButton, key.iDuration, m_configuration.iButtonRepeatRateMs, m_buttonQueue.size()); + + CSingleLock lock(m_critSection); + // avoid the queue getting too long + if (m_configuration.iButtonRepeatRateMs && m_buttonQueue.size() > 5) ++ { ++ CLog::Log(LOGDEBUG, "%s - discarded key %2x", __FUNCTION__, key.iButton); + return; ++ } + if (m_configuration.iButtonRepeatRateMs == 0 && key.iDuration > 0) + { + if (m_currentButton.iButton == key.iButton && m_currentButton.iDuration == 0) +@@ -787,6 +790,7 @@ void CPeripheralCecAdapter::PushCecKeypress(const CecButtonPress &key) + if (m_bHasButton) + m_currentButton.iDuration = key.iDuration; + // ignore this one, since it's already been handled by xbmc ++ CLog::Log(LOGDEBUG, "%s - ignored key %2x", __FUNCTION__, key.iButton); + return; + } + // if we received a keypress with a duration set, try to find the same one without a duration set, and replace it +@@ -797,6 +801,7 @@ void CPeripheralCecAdapter::PushCecKeypress(const CecButtonPress &key) + if ((*it).iDuration == 0) + { + // replace this entry ++ CLog::Log(LOGDEBUG, "%s - replaced key %2x", __FUNCTION__, key.iButton); + (*it).iDuration = key.iDuration; + return; + } +@@ -806,6 +811,7 @@ void CPeripheralCecAdapter::PushCecKeypress(const CecButtonPress &key) + } + } + ++ CLog::Log(LOGDEBUG, "%s - added key %2x", __FUNCTION__, key.iButton); + m_buttonQueue.push_back(key); + } + + +From d61469373a92dbcb58bc8e38fc8d921106f61943 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Fri, 22 Jan 2016 12:29:41 +0000 +Subject: [PATCH 59/67] [cec] Update for libcec 3.1.0 + +--- + configure.ac | 4 ++-- + xbmc/peripherals/devices/PeripheralCecAdapter.cpp | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/configure.ac b/configure.ac +index d498f958b83813cbf5fce0a86bf07743665b5ed4..277c97f72b20650ba6a594e6363b9a863e0310a8 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -1433,9 +1433,9 @@ if test "x$use_libcec" != "xno"; then + # libcec is dyloaded, so we need to check for its headers and link any depends. + if test "x$use_libcec" != "xno"; then + if test "x$use_libcec" != "xauto"; then +- PKG_CHECK_MODULES([CEC],[libcec >= 3.0.0],,[use_libcec="no";AC_MSG_ERROR($libcec_disabled)]) ++ PKG_CHECK_MODULES([CEC],[libcec >= 3.1.0],,[use_libcec="no";AC_MSG_ERROR($libcec_disabled)]) + else +- PKG_CHECK_MODULES([CEC],[libcec >= 3.0.0],,[use_libcec="no";AC_MSG_RESULT($libcec_disabled)]) ++ PKG_CHECK_MODULES([CEC],[libcec >= 3.1.0],,[use_libcec="no";AC_MSG_RESULT($libcec_disabled)]) + fi + + if test "x$use_libcec" != "xno"; then +diff --git a/xbmc/peripherals/devices/PeripheralCecAdapter.cpp b/xbmc/peripherals/devices/PeripheralCecAdapter.cpp +index 259649721512e744fd89bfe66af6bc6324c82653..ae7fd02ea17cb11318083f853d6b1641af4ecadb 100644 +--- a/xbmc/peripherals/devices/PeripheralCecAdapter.cpp ++++ b/xbmc/peripherals/devices/PeripheralCecAdapter.cpp +@@ -43,7 +43,7 @@ using namespace PERIPHERALS; + using namespace ANNOUNCEMENT; + using namespace CEC; + +-#define CEC_LIB_SUPPORTED_VERSION LIBCEC_VERSION_TO_UINT(3, 0, 0) ++#define CEC_LIB_SUPPORTED_VERSION LIBCEC_VERSION_TO_UINT(3, 1, 0) + + /* time in seconds to ignore standby commands from devices after the screensaver has been activated */ + #define SCREENSAVER_TIMEOUT 20 +@@ -1326,7 +1326,7 @@ void CPeripheralCecAdapter::SetConfigurationFromLibCEC(const CEC::libcec_configu + void CPeripheralCecAdapter::SetConfigurationFromSettings(void) + { + // client version matches the version of libCEC that we originally used the API from +- m_configuration.clientVersion = LIBCEC_VERSION_TO_UINT(3, 0, 0); ++ m_configuration.clientVersion = CEC_LIB_SUPPORTED_VERSION; + + // device name 'XBMC' + snprintf(m_configuration.strDeviceName, 13, "%s", GetSettingString("device_name").c_str()); + +From f50610a41e776cb15acbb2740587cf65b47811d0 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sat, 19 Mar 2016 14:46:41 +0000 +Subject: [PATCH 60/67] libcec: use system audio mode request instead of power + on to start AVR reliable + +--- + tools/depends/target/libcec/208.patch | 38 +++++++++++++++++++++++++++++++++++ + tools/depends/target/libcec/Makefile | 1 + + 2 files changed, 39 insertions(+) + create mode 100644 tools/depends/target/libcec/208.patch + +diff --git a/tools/depends/target/libcec/208.patch b/tools/depends/target/libcec/208.patch +new file mode 100644 +index 0000000000000000000000000000000000000000..3dc5adf022e80c3337ad69b7c7d7346daafbfdd3 +--- /dev/null ++++ b/tools/depends/target/libcec/208.patch +@@ -0,0 +1,38 @@ ++From f70c4d76e1d9c0219a3927b6b66090b7575e7933 Mon Sep 17 00:00:00 2001 ++From: Gerald Dachs <gda@dachsweb.de> ++Date: Thu, 17 Mar 2016 12:12:51 +0100 ++Subject: [PATCH] use system audio mode request instead of power on to start ++ AVR reliable ++ ++--- ++ src/libcec/devices/CECBusDevice.cpp | 13 +++++++++---- ++ 1 file changed, 9 insertions(+), 4 deletions(-) ++ ++diff --git a/src/libcec/devices/CECBusDevice.cpp b/src/libcec/devices/CECBusDevice.cpp ++index 55939d1..e2d5ea3 100644 ++--- a/src/libcec/devices/CECBusDevice.cpp +++++ b/src/libcec/devices/CECBusDevice.cpp ++@@ -1025,14 +1025,19 @@ bool CCECBusDevice::ActivateSource(uint64_t iDelay /* = 0 */) ++ bool bReturn(true); ++ if (iDelay == 0) ++ { ++- /** some AVRs fail to be powered up by the TV when it powers up. power up the AVR explicitly */ +++ /** send system audio mode request if AVR exists */ ++ if (m_iLogicalAddress != CECDEVICE_AUDIOSYSTEM) ++ { ++ CCECBusDevice* audioSystem(m_processor->GetDevice(CECDEVICE_AUDIOSYSTEM)); ++- if (audioSystem && audioSystem->IsPresent() && audioSystem->GetPowerStatus(m_iLogicalAddress) != CEC_POWER_STATUS_ON) +++ if (audioSystem && audioSystem->IsPresent()) ++ { ++- LIB_CEC->AddLog(CEC_LOG_DEBUG, "powering up the AVR"); ++- audioSystem->PowerOn(m_iLogicalAddress); +++ cec_command command; +++ +++ LIB_CEC->AddLog(CEC_LOG_DEBUG, "sending system audio mode request for '%s'", ToString(m_iLogicalAddress)); +++ cec_command::Format(command, m_iLogicalAddress, CECDEVICE_AUDIOSYSTEM, CEC_OPCODE_SYSTEM_AUDIO_MODE_REQUEST); +++ command.parameters.PushBack((uint8_t) ((m_iPhysicalAddress >> 8) & 0xFF)); +++ command.parameters.PushBack((uint8_t) (m_iPhysicalAddress & 0xFF)); +++ bReturn = m_handler->Transmit(command, false, false); ++ } ++ } ++ +diff --git a/tools/depends/target/libcec/Makefile b/tools/depends/target/libcec/Makefile +index 39ba882d0c7e270b4d1d1d566027cbaffb76b587..4565dc9f6fc0b3e6b49133443c19e10767d475eb 100644 +--- a/tools/depends/target/libcec/Makefile ++++ b/tools/depends/target/libcec/Makefile +@@ -23,6 +23,7 @@ $(PLATFORM): $(TARBALLS_LOCATION)/$(ARCHIVE) $(DEPS) + cd $(PLATFORM); $(ARCHIVE_TOOL) $(ARCHIVE_TOOL_FLAGS) $(TARBALLS_LOCATION)/$(ARCHIVE) + cd $(PLATFORM); patch -p1 < ../popcornmix.patch + cd $(PLATFORM); patch -p1 < ../bump.patch ++ cd $(PLATFORM); patch -p1 < ../208.patch + cd $(PLATFORM)/build; $(CMAKE) -DBUILD_SHARED_LIBS=1 -DSKIP_PYTHON_WRAPPER:STRING=1 -DCMAKE_INSTALL_LIBDIR=$(PREFIX)/lib .. + + $(LIBDYLIB): $(PLATFORM) + +From b3074634af438e1dd9de238718364d82d4ee46e2 Mon Sep 17 00:00:00 2001 +From: Rainer Hochecker <fernetmenta@online.de> +Date: Tue, 22 Mar 2016 09:51:52 +0100 +Subject: [PATCH 61/67] python: use kodi provided cert if available + +--- + xbmc/interfaces/python/XBPython.cpp | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/xbmc/interfaces/python/XBPython.cpp b/xbmc/interfaces/python/XBPython.cpp +index d762bf4f8fdca2a1081026089977ae8987c88b66..ff4ed7db26845905108ea0ae504e4f589f9c7d0f 100644 +--- a/xbmc/interfaces/python/XBPython.cpp ++++ b/xbmc/interfaces/python/XBPython.cpp +@@ -593,9 +593,12 @@ bool XBPython::OnScriptInitialized(ILanguageInvoker *invoker) + CEnvironment::putenv(buf); + buf = "OS=win32"; + CEnvironment::putenv(buf); ++#endif + +-#elif defined(TARGET_ANDROID) +- setenv("SSL_CERT_FILE", CSpecialProtocol::TranslatePath("special://xbmc/system/certs/cacert.pem").c_str(), 1); ++#if !defined(TARGET_WINDOWS) ++ // use Kodi provided cert if available ++ if (XFILE::CFile::Exists("special://xbmc/system/certs/cacert.pem")) ++ setenv("SSL_CERT_FILE", CSpecialProtocol::TranslatePath("special://xbmc/system/certs/cacert.pem").c_str(), 1); + #endif + + if (PyEval_ThreadsInitialized()) + +From bd545157e573f1904ac552693524a4bce8789c5d Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Wed, 25 May 2016 18:31:17 +0100 +Subject: [PATCH 62/67] rbp: Hard code the number of buffers to improve audio + sync + +--- + system/settings/rbp.xml | 6 ++++++ + xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp | 4 ++++ + 2 files changed, 10 insertions(+) + +diff --git a/system/settings/rbp.xml b/system/settings/rbp.xml +index 2e6c903df5e4d2cd064466db0ef55deada5cdc80..29d8f92c123875a83eae4832c1f6246a6deefc3c 100644 +--- a/system/settings/rbp.xml ++++ b/system/settings/rbp.xml +@@ -92,6 +92,12 @@ + <control type="toggle" /> + </setting> + </group> ++ <group id="3"> ++ <setting id="videoscreen.noofbuffers"> ++ <visible>false</visible> ++ <default>2</default> <!-- double buffered --> ++ </setting> ++ </group> + </category> + <category id="audio"> + <group id="1"> +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp +index db537d33a5d55fc856bbd3ec0a7846df3bb060be..ee34c0b31da3b05fabae5e47ad51db2f09e682c3 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/RenderManager.cpp +@@ -1066,7 +1066,11 @@ void CRenderManager::UpdateDisplayLatency() + refresh = 0; // No idea about refresh rate when windowed, just get the default latency + m_displayLatency = (double) g_advancedSettings.GetDisplayLatency(refresh); + ++#ifdef TARGET_RASPBERRY_PI ++ int buffers = CSettings::GetInstance().GetBool("videoplayer.usedisplayasclock") ? 1:2; ++#else + int buffers = g_Windowing.NoOfBuffers(); ++#endif + m_displayLatency += (buffers - 1) / fps; + + } + +From cb6cdf5bf5a392fffc7e58631a4767ab8836ea02 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Wed, 8 Jun 2016 01:11:26 +0100 +Subject: [PATCH 63/67] omxvideo: Remove call to AutoInterlaceMethod. Treat + auto as advanced + +--- + xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp | 2 +- + xbmc/cores/omxplayer/OMXVideo.cpp | 5 ++++- + 2 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +index d65857779628debfc85b47b8dd283513edb5a319..523e52c27de2711ca03c6b06767c940be6e3d177 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +@@ -632,7 +632,7 @@ bool CMMALRenderer::Supports(ESCALINGMETHOD method) + + EINTERLACEMETHOD CMMALRenderer::AutoInterlaceMethod() + { +- return m_sourceWidth * m_sourceHeight <= 576 * 720 ? VS_INTERLACEMETHOD_MMAL_ADVANCED : VS_INTERLACEMETHOD_MMAL_BOB; ++ return VS_INTERLACEMETHOD_MMAL_ADVANCED; + } + + void CMMALRenderer::SetVideoRect(const CRect& InSrcRect, const CRect& InDestRect) +diff --git a/xbmc/cores/omxplayer/OMXVideo.cpp b/xbmc/cores/omxplayer/OMXVideo.cpp +index de15bfff05d23949d6e6f4304b15aa7d79120dc2..79685835382422d0a22d7b75d7c1408e2c053403 100644 +--- a/xbmc/cores/omxplayer/OMXVideo.cpp ++++ b/xbmc/cores/omxplayer/OMXVideo.cpp +@@ -242,7 +242,10 @@ bool COMXVideo::PortSettingsChanged(ResolutionUpdateInfo &resinfo) + + if(m_deinterlace) + { +- EINTERLACEMETHOD interlace_method = m_renderManager.AutoInterlaceMethod(CMediaSettings::GetInstance().GetCurrentVideoSettings().m_InterlaceMethod); ++ EINTERLACEMETHOD interlace_method = CMediaSettings::GetInstance().GetCurrentVideoSettings().m_InterlaceMethod; ++ if (interlace_method == VS_INTERLACEMETHOD_AUTO) ++ interlace_method = VS_INTERLACEMETHOD_MMAL_ADVANCED; ++ + bool advanced_deinterlace = interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED || interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED_HALF; + bool half_framerate = interlace_method == VS_INTERLACEMETHOD_MMAL_ADVANCED_HALF || interlace_method == VS_INTERLACEMETHOD_MMAL_BOB_HALF; + + +From 120051ba33cc7dc2885b3bf3abf49c4903bee0f8 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Wed, 15 Jun 2016 23:41:43 +0100 +Subject: [PATCH 64/67] mmal_codec: Use EOS through codec to determine drain is + complete + +Rather than relying on a timeout from codec, feed an EOS through to ensure all frames have been returned +--- + .../VideoPlayer/DVDCodecs/Video/MMALCodec.cpp | 72 ++++++++++++++-------- + xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.h | 3 + + 2 files changed, 49 insertions(+), 26 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +index 470083b2256d23488ca476cebfe8d3ef9f62377e..cd0d30d77cc1cd8803ccde317bcc2f3cd61000e4 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +@@ -117,6 +117,9 @@ CMMALVideo::CMMALVideo(CProcessInfo &processInfo) : CDVDVideoCodec(processInfo) + m_fps = 0.0f; + m_num_decoded = 0; + m_codecControlFlags = 0; ++ m_got_eos = false; ++ m_packet_num = 0; ++ m_packet_num_eos = ~0; + } + + CMMALVideo::~CMMALVideo() +@@ -243,7 +246,7 @@ void CMMALVideo::dec_output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf + { + if (!(buffer->cmd == 0 && buffer->length > 0)) + if (g_advancedSettings.CanLogComponent(LOGVIDEO)) +- CLog::Log(LOGDEBUG, "%s::%s port:%p buffer %p, len %d cmd:%x", CLASSNAME, __func__, port, buffer, buffer->length, buffer->cmd); ++ CLog::Log(LOGDEBUG, "%s::%s port:%p buffer %p, len %d cmd:%x flags:%x", CLASSNAME, __func__, port, buffer, buffer->length, buffer->cmd, buffer->flags); + + bool kept = false; + +@@ -288,6 +291,12 @@ void CMMALVideo::dec_output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf + kept = true; + } + } ++ if (buffer->flags & MMAL_BUFFER_HEADER_FLAG_EOS) ++ { ++ CSingleLock lock(m_output_mutex); ++ m_got_eos = true; ++ m_output_cond.notifyAll(); ++ } + } + else if (buffer->cmd == MMAL_EVENT_FORMAT_CHANGED) + { +@@ -790,11 +799,18 @@ int CMMALVideo::Decode(uint8_t* pData, int iSize, double dts, double pts) + + MMAL_BUFFER_HEADER_T *buffer; + MMAL_STATUS_T status; +- ++ bool drain = (m_codecControlFlags & DVD_CODEC_CTRL_DRAIN) ? true : false; ++ bool send_eos = drain && !m_got_eos && m_packet_num_eos != m_packet_num; ++ // we don't get an EOS response if no packets have been sent ++ if (m_packet_num == 0) ++ { ++ send_eos = false; ++ m_got_eos = true; ++ } + Prime(); + while (1) + { +- if (pData) ++ if (pData || send_eos) + { + // 500ms timeout + { +@@ -817,17 +833,25 @@ int CMMALVideo::Decode(uint8_t* pData, int iSize, double dts, double pts) + if (m_dropState) + buffer->flags |= MMAL_BUFFER_HEADER_FLAG_USER3; + +- memcpy(buffer->data, pData, buffer->length); ++ if (pData) ++ memcpy(buffer->data, pData, buffer->length); + iSize -= buffer->length; + pData += buffer->length; + + if (iSize == 0) ++ { ++ m_packet_num++; + buffer->flags |= MMAL_BUFFER_HEADER_FLAG_FRAME_END; +- ++ if (send_eos) ++ { ++ buffer->flags |= MMAL_BUFFER_HEADER_FLAG_EOS; ++ m_packet_num_eos = m_packet_num; ++ m_got_eos = false; ++ } ++ } + if (g_advancedSettings.CanLogComponent(LOGVIDEO)) + CLog::Log(LOGDEBUG, "%s::%s - %-8p %-6d/%-6d dts:%.3f pts:%.3f flags:%x ready_queue(%d)", + CLASSNAME, __func__, buffer, buffer->length, iSize, dts == DVD_NOPTS_VALUE ? 0.0 : dts*1e-6, pts == DVD_NOPTS_VALUE ? 0.0 : pts*1e-6, buffer->flags, m_output_ready.size()); +- assert((int)buffer->length > 0); + status = mmal_port_send_buffer(m_dec_input, buffer); + if (status != MMAL_SUCCESS) + { +@@ -879,36 +903,28 @@ int CMMALVideo::Decode(uint8_t* pData, int iSize, double dts, double pts) + bool full = queued > DVD_MSEC_TO_TIME(1000); + int ret = 0; + +- unsigned int pics = m_output_ready.size(); +- if (m_preroll && (pics >= GetAllowedReferences() || m_codecControlFlags & DVD_CODEC_CTRL_DRAIN)) +- m_preroll = false; +- if (pics > 0 && !m_preroll) +- ret |= VC_PICTURE; +- if ((m_preroll || pics <= 1) && mmal_queue_length(m_dec_input_pool->queue) > 0 && !(m_codecControlFlags & DVD_CODEC_CTRL_DRAIN)) +- ret |= VC_BUFFER; +- +- bool slept = false; +- if (!ret) ++ XbmcThreads::EndTime delay(500); ++ while (!ret && !delay.IsTimePast()) + { +- slept = true; ++ unsigned int pics = m_output_ready.size(); ++ if (m_preroll && (pics >= GetAllowedReferences() || drain)) ++ m_preroll = false; ++ if (pics > 0 && !m_preroll) ++ ret |= VC_PICTURE; ++ if ((m_preroll || pics <= 1) && mmal_queue_length(m_dec_input_pool->queue) > 0 && (!drain || m_got_eos || m_packet_num_eos != m_packet_num)) ++ ret |= VC_BUFFER; ++ if (!ret) + { + // otherwise we busy spin + lock.Leave(); + CSingleLock output_lock(m_output_mutex); +- m_output_cond.wait(output_lock, 30); ++ m_output_cond.wait(output_lock, delay.MillisLeft()); + lock.Enter(); + } +- unsigned int pics = m_output_ready.size(); +- if (m_preroll && (pics >= GetAllowedReferences() || m_codecControlFlags & DVD_CODEC_CTRL_DRAIN)) +- m_preroll = false; +- if (pics > 0 && !m_preroll) +- ret |= VC_PICTURE; +- if ((m_preroll || pics <= 1) && (mmal_queue_length(m_dec_input_pool->queue) > 0 || m_codecControlFlags & DVD_CODEC_CTRL_DRAIN)) +- ret |= VC_BUFFER; + } + + if (g_advancedSettings.CanLogComponent(LOGVIDEO)) +- CLog::Log(LOGDEBUG, "%s::%s - ret(%x) pics(%d) inputs(%d) slept(%d) queued(%.2f) (%.2f:%.2f) full(%d) flags(%x) preroll(%d)", CLASSNAME, __func__, ret, m_output_ready.size(), mmal_queue_length(m_dec_input_pool->queue), slept, queued*1e-6, m_demuxerPts*1e-6, m_decoderPts*1e-6, full, m_codecControlFlags, m_preroll); ++ CLog::Log(LOGDEBUG, "%s::%s - ret(%x) pics(%d) inputs(%d) slept(%2d) queued(%.2f) (%.2f:%.2f) full(%d) flags(%x) preroll(%d) eos(%d %d/%d)", CLASSNAME, __func__, ret, m_output_ready.size(), mmal_queue_length(m_dec_input_pool->queue), 500-delay.MillisLeft(), queued*1e-6, m_demuxerPts*1e-6, m_decoderPts*1e-6, full, m_codecControlFlags, m_preroll, m_got_eos, m_packet_num, m_packet_num_eos); + + return ret; + } +@@ -981,6 +997,10 @@ void CMMALVideo::Reset(void) + m_demuxerPts = DVD_NOPTS_VALUE; + m_codecControlFlags = 0; + m_dropState = false; ++ m_num_decoded = 0; ++ m_got_eos = false; ++ m_packet_num = 0; ++ m_packet_num_eos = ~0; + m_preroll = !m_hints.stills && (m_speed == DVD_PLAYSPEED_NORMAL || m_speed == DVD_PLAYSPEED_PAUSE); + } + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.h b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.h +index d008c6c538819a05be9925ab8cd342b131e511d8..122a5e24f5ffb1bf2415867ec98d8e5104339ab1 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.h ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.h +@@ -134,6 +134,9 @@ protected: + int m_codecControlFlags; + bool m_dropState; + bool m_preroll; ++ bool m_got_eos; ++ uint32_t m_packet_num; ++ uint32_t m_packet_num_eos; + + CCriticalSection m_sharedSection; + MMAL_COMPONENT_T *m_dec; + +From d91a21ae571cc2bf2c5103c8c72fc1262379f4b5 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Fri, 17 Jun 2016 16:23:25 +0100 +Subject: [PATCH 65/67] rbp: Update transposed video scaling to match other + platforms + +--- + .../VideoRenderers/HwDecRender/MMALRenderer.cpp | 29 ++++++++++++++++++---- + xbmc/cores/omxplayer/OMXPlayerVideo.cpp | 29 ++++++++++++++++++---- + 2 files changed, 48 insertions(+), 10 deletions(-) + +diff --git a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +index 523e52c27de2711ca03c6b06767c940be6e3d177..8a4bf24625a57b11908f4f38588fb348581556a6 100644 +--- a/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp ++++ b/xbmc/cores/VideoPlayer/VideoRenderers/HwDecRender/MMALRenderer.cpp +@@ -656,11 +656,30 @@ void CMMALRenderer::SetVideoRect(const CRect& InSrcRect, const CRect& InDestRect + // fix up transposed video + if (m_renderOrientation == 90 || m_renderOrientation == 270) + { +- float diff = (DestRect.Height() - DestRect.Width()) * 0.5f; +- DestRect.x1 -= diff; +- DestRect.x2 += diff; +- DestRect.y1 += diff; +- DestRect.y2 -= diff; ++ float newWidth, newHeight; ++ float aspectRatio = GetAspectRatio(); ++ // clamp width if too wide ++ if (DestRect.Height() > DestRect.Width()) ++ { ++ newWidth = DestRect.Width(); // clamp to the width of the old dest rect ++ newHeight = newWidth * aspectRatio; ++ } ++ else // else clamp to height ++ { ++ newHeight = DestRect.Height(); // clamp to the height of the old dest rect ++ newWidth = newHeight / aspectRatio; ++ } ++ ++ // calculate the center point of the view and offsets ++ float centerX = DestRect.x1 + DestRect.Width() * 0.5f; ++ float centerY = DestRect.y1 + DestRect.Height() * 0.5f; ++ float diffX = newWidth * 0.5f; ++ float diffY = newHeight * 0.5f; ++ ++ DestRect.x1 = centerX - diffX; ++ DestRect.x2 = centerX + diffX; ++ DestRect.y1 = centerY - diffY; ++ DestRect.y2 = centerY + diffY; + } + + // check if destination rect or video view mode has changed +diff --git a/xbmc/cores/omxplayer/OMXPlayerVideo.cpp b/xbmc/cores/omxplayer/OMXPlayerVideo.cpp +index d61dc4f2668f8aca91bce79cfb631034061c491c..ed138297b49c8d3e6b42a1f1fa5fa08bd01be11b 100644 +--- a/xbmc/cores/omxplayer/OMXPlayerVideo.cpp ++++ b/xbmc/cores/omxplayer/OMXPlayerVideo.cpp +@@ -640,11 +640,30 @@ void OMXPlayerVideo::SetVideoRect(const CRect &InSrcRect, const CRect &InDestRec + // fix up transposed video + if (m_hints.orientation == 90 || m_hints.orientation == 270) + { +- float diff = (DestRect.Height() - DestRect.Width()) * 0.5f; +- DestRect.x1 -= diff; +- DestRect.x2 += diff; +- DestRect.y1 += diff; +- DestRect.y2 -= diff; ++ float newWidth, newHeight; ++ float aspectRatio = GetAspectRatio(); ++ // clamp width if too wide ++ if (DestRect.Height() > DestRect.Width()) ++ { ++ newWidth = DestRect.Width(); // clamp to the width of the old dest rect ++ newHeight = newWidth * aspectRatio; ++ } ++ else // else clamp to height ++ { ++ newHeight = DestRect.Height(); // clamp to the height of the old dest rect ++ newWidth = newHeight / aspectRatio; ++ } ++ ++ // calculate the center point of the view and offsets ++ float centerX = DestRect.x1 + DestRect.Width() * 0.5f; ++ float centerY = DestRect.y1 + DestRect.Height() * 0.5f; ++ float diffX = newWidth * 0.5f; ++ float diffY = newHeight * 0.5f; ++ ++ DestRect.x1 = centerX - diffX; ++ DestRect.x2 = centerX + diffX; ++ DestRect.y1 = centerY - diffY; ++ DestRect.y2 = centerY + diffY; + } + + // check if destination rect or video view mode has changed + +From bd128ae3789c33616ae6ebd55fbae13984f98477 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Sun, 19 Jun 2016 16:53:49 +0100 +Subject: [PATCH 66/67] mmalcodec: Add another buffer when deinterlacing + +See: http://forum.kodi.tv/showthread.php?tid=276372 +--- + xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +index cd0d30d77cc1cd8803ccde317bcc2f3cd61000e4..6fd59a64dd48c05d1ccc3183adc5deda16e930a2 100644 +--- a/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp ++++ b/xbmc/cores/VideoPlayer/DVDCodecs/Video/MMALCodec.cpp +@@ -400,7 +400,7 @@ bool CMMALVideo::CreateDeinterlace(EINTERLACEMETHOD interlace_method) + m_deint_input->userdata = (struct MMAL_PORT_USERDATA_T *)this; + + // Image_fx assumed 3 frames of context. simple deinterlace doesn't require this +- status = mmal_port_parameter_set_uint32(m_deint_input, MMAL_PARAMETER_EXTRA_BUFFERS, GetAllowedReferences() - 5 + advanced_deinterlace ? 2:0); ++ status = mmal_port_parameter_set_uint32(m_deint_input, MMAL_PARAMETER_EXTRA_BUFFERS, 1 + GetAllowedReferences() - 5 + advanced_deinterlace ? 2:0); + if (status != MMAL_SUCCESS) + CLog::Log(LOGERROR, "%s::%s Failed to enable extra buffers on %s (status=%x %s)", CLASSNAME, __func__, m_deint_input->name, status, mmal_status_to_string(status)); + + +From a89d7094bd34a58451effaa3fbbc72651888ea23 Mon Sep 17 00:00:00 2001 +From: popcornmix <popcornmix@gmail.com> +Date: Fri, 1 Jul 2016 13:15:36 +0100 +Subject: [PATCH 67/67] UNSTABLE: This is a placeholder. Commits after this + point are considered experimental. + +--- + .placeholder | 0 + 1 file changed, 0 insertions(+), 0 deletions(-) + create mode 100644 .placeholder + +diff --git a/.placeholder b/.placeholder +new file mode 100644 +index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391