From beb3e824769409b087d692666b5fe2d2ad9c8ff2 Mon Sep 17 00:00:00 2001 From: Stephan Raue Date: Sun, 7 Jun 2009 22:13:55 +0200 Subject: [PATCH] variuos test to gst-ffmpeg (not working) --- packages/linux/patches/drm_via_chrome9.diff | 5150 ++++++++++++++++++ packages/multimedia/ffmpeg/10_mt-decode.diff | 2875 ++++++++++ packages/multimedia/ffmpeg/build | 65 + packages/multimedia/ffmpeg/install | 19 + packages/multimedia/ffmpeg/url | 1 + 5 files changed, 8110 insertions(+) create mode 100644 packages/linux/patches/drm_via_chrome9.diff create mode 100644 packages/multimedia/ffmpeg/10_mt-decode.diff create mode 100755 packages/multimedia/ffmpeg/build create mode 100755 packages/multimedia/ffmpeg/install create mode 100644 packages/multimedia/ffmpeg/url diff --git a/packages/linux/patches/drm_via_chrome9.diff b/packages/linux/patches/drm_via_chrome9.diff new file mode 100644 index 0000000000..ce141d6dc6 --- /dev/null +++ b/packages/linux/patches/drm_via_chrome9.diff @@ -0,0 +1,5150 @@ +diff -Naur linux-2.6.30-rc7/drivers/gpu/drm/Kconfig linux-2.6.30-rc7.patch/drivers/gpu/drm/Kconfig +--- linux-2.6.30-rc7/drivers/gpu/drm/Kconfig 2009-05-23 23:47:00.000000000 +0200 ++++ linux-2.6.30-rc7.patch/drivers/gpu/drm/Kconfig 2009-05-27 14:23:46.064053992 +0200 +@@ -123,6 +123,13 @@ + Choose this option if you have a Via unichrome or compatible video + chipset. If M is selected the module will be called via. + ++config DRM_VIA_CHROME9 ++ tristate "Via chrome9 video cards" ++ depends on DRM ++ help ++ Choose this option if you have a Via chrome9 or compatible video ++ chipset. If M is selected the module will be called via_chrome9. ++ + config DRM_SAVAGE + tristate "Savage video cards" + depends on DRM +diff -Naur linux-2.6.30-rc7/drivers/gpu/drm/Makefile linux-2.6.30-rc7.patch/drivers/gpu/drm/Makefile +--- linux-2.6.30-rc7/drivers/gpu/drm/Makefile 2009-05-23 23:47:00.000000000 +0200 ++++ linux-2.6.30-rc7.patch/drivers/gpu/drm/Makefile 2009-05-27 14:24:17.464054221 +0200 +@@ -26,4 +26,5 @@ + obj-$(CONFIG_DRM_SIS) += sis/ + obj-$(CONFIG_DRM_SAVAGE)+= savage/ + obj-$(CONFIG_DRM_VIA) +=via/ ++obj-$(CONFIG_DRM_VIA_CHROME9) +=via_chrome9/ + +diff -Naur linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/Makefile linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/Makefile +--- linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/Makefile 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/Makefile 2008-12-03 03:08:31.000000000 +0100 +@@ -0,0 +1,8 @@ ++# ++# Makefile for the drm device driver. This driver provides support for the ++# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. ++ ++ccflags-y := -Iinclude/drm ++via_chrome9-y := via_chrome9_drv.o via_chrome9_drm.o via_chrome9_mm.o via_chrome9_dma.o via_chrome9_verifier.o ++ ++obj-$(CONFIG_DRM_VIA_CHROME9) += via_chrome9.o +\ Kein Zeilenumbruch am Dateiende. +diff -Naur linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_3d_reg.h linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_3d_reg.h +--- linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_3d_reg.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_3d_reg.h 2008-12-03 03:08:31.000000000 +0100 +@@ -0,0 +1,407 @@ ++/* ++ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved. ++ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person ++ * obtaining a copy of this software and associated documentation ++ * files (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, ++ * copy, modify, merge, publish, distribute, sub license, ++ * and/or sell copies of the Software, and to permit persons to ++ * whom the Software is furnished to do so, subject to the ++ * following conditions: ++ * ++ * The above copyright notice and this permission notice ++ * (including the next paragraph) shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES ++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NON-INFRINGEMENT. IN NO EVENT SHALL VIA, S3 GRAPHICS, AND/OR ++ * ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR ++ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#ifndef VIA_CHROME9_3D_REG_H ++#define VIA_CHROME9_3D_REG_H ++#define GetMMIORegister(base, offset) \ ++ (*(__volatile__ unsigned int *)(void *)(((unsigned char *)(base)) + \ ++ (offset))) ++#define SetMMIORegister(base, offset, val) \ ++ (*(__volatile__ unsigned int *)(void *)(((unsigned char *)(base)) + \ ++ (offset)) = (val)) ++ ++#define GetMMIORegisterU8(base, offset) \ ++ (*(__volatile__ unsigned char *)(void *)(((unsigned char *)(base)) + \ ++ (offset))) ++#define SetMMIORegisterU8(base, offset, val) \ ++ (*(__volatile__ unsigned char *)(void *)(((unsigned char *)(base)) + \ ++ (offset)) = (val)) ++ ++#define BCI_SEND(bci, value) (*(bci)++ = (unsigned long)(value)) ++#define BCI_SET_STREAM_REGISTER(bci_base, bci_index, reg_value) \ ++do { \ ++ unsigned long cmd; \ ++ \ ++ cmd = (0x90000000 \ ++ | (1<<16) /* stream processor register */ \ ++ | (bci_index & 0x3FFC)); /* MMIO register address */ \ ++ BCI_SEND(bci_base, cmd); \ ++ BCI_SEND(bci_base, reg_value); \ ++ } while (0) ++ ++/* Command Header Type */ ++ ++#define INV_AGPHeader0 0xFE000000 ++#define INV_AGPHeader1 0xFE010000 ++#define INV_AGPHeader2 0xFE020000 ++#define INV_AGPHeader3 0xFE030000 ++#define INV_AGPHeader4 0xFE040000 ++#define INV_AGPHeader5 0xFE050000 ++#define INV_AGPHeader6 0xFE060000 ++#define INV_AGPHeader7 0xFE070000 ++#define INV_AGPHeader82 0xFE820000 ++#define INV_AGPHeader_MASK 0xFFFF0000 ++#define INV_DUMMY_MASK 0xFF000000 ++ ++/*send pause address of AGP ring command buffer via_chrome9 this IO port*/ ++#define INV_REG_PCIPAUSE 0x294 ++#define INV_REG_PCIPAUSE_ENABLE 0x4 ++ ++#define INV_CMDBUF_THRESHOLD (8) ++#define INV_QW_PAUSE_ALIGN 0x40 ++ ++/* Transmission IO Space*/ ++#define INV_REG_CR_TRANS 0x041C ++#define INV_REG_CR_BEGIN 0x0420 ++#define INV_REG_CR_END 0x0438 ++ ++#define INV_REG_3D_TRANS 0x043C ++#define INV_REG_3D_BEGIN 0x0440 ++#define INV_REG_3D_END 0x06FC ++#define INV_REG_23D_WAIT 0x326C ++/*3D / 2D ID Control (Only For Group A)*/ ++#define INV_REG_2D3D_ID_CTRL 0x060 ++ ++ ++/* Engine Status */ ++ ++#define INV_RB_ENG_STATUS 0x0400 ++#define INV_ENG_BUSY_HQV0 0x00040000 ++#define INV_ENG_BUSY_HQV1 0x00020000 ++#define INV_ENG_BUSY_CR 0x00000010 ++#define INV_ENG_BUSY_MPEG 0x00000008 ++#define INV_ENG_BUSY_VQ 0x00000004 ++#define INV_ENG_BUSY_2D 0x00000002 ++#define INV_ENG_BUSY_3D 0x00001FE1 ++#define INV_ENG_BUSY_ALL \ ++ (INV_ENG_BUSY_2D | INV_ENG_BUSY_3D | INV_ENG_BUSY_CR) ++ ++/* Command Queue Status*/ ++#define INV_RB_VQ_STATUS 0x0448 ++#define INV_VQ_FULL 0x40000000 ++ ++/* AGP command buffer pointer current position*/ ++#define INV_RB_AGPCMD_CURRADDR 0x043C ++ ++/* AGP command buffer status*/ ++#define INV_RB_AGPCMD_STATUS 0x0444 ++#define INV_AGPCMD_InPause 0x80000000 ++ ++/*AGP command buffer pause address*/ ++#define INV_RB_AGPCMD_PAUSEADDR 0x045C ++ ++/*AGP command buffer jump address*/ ++#define INV_RB_AGPCMD_JUMPADDR 0x0460 ++ ++/*AGP command buffer start address*/ ++#define INV_RB_AGPCMD_STARTADDR 0x0464 ++ ++ ++/* Constants */ ++#define NUMBER_OF_EVENT_TAGS 1024 ++#define NUMBER_OF_APERTURES_CLB 16 ++ ++/* Register definition */ ++#define HW_SHADOW_ADDR 0x8520 ++#define HW_GARTTABLE_ADDR 0x8540 ++ ++#define INV_HSWFlag_DBGMASK 0x00000FFF ++#define INV_HSWFlag_ENCODEMASK 0x007FFFF0 ++#define INV_HSWFlag_ADDRSHFT 8 ++#define INV_HSWFlag_DECODEMASK \ ++ (INV_HSWFlag_ENCODEMASK << INV_HSWFlag_ADDRSHFT) ++#define INV_HSWFlag_ADDR_ENCODE(x) 0xCC000000 ++#define INV_HSWFlag_ADDR_DECODE(x) \ ++ (((unsigned int)x & INV_HSWFlag_DECODEMASK) >> INV_HSWFlag_ADDRSHFT) ++ ++ ++#define INV_SubA_HAGPBstL 0x60000000 ++#define INV_SubA_HAGPBstH 0x61000000 ++#define INV_SubA_HAGPBendL 0x62000000 ++#define INV_SubA_HAGPBendH 0x63000000 ++#define INV_SubA_HAGPBpL 0x64000000 ++#define INV_SubA_HAGPBpID 0x65000000 ++#define INV_HAGPBpID_PAUSE 0x00000000 ++#define INV_HAGPBpID_JUMP 0x00000100 ++#define INV_HAGPBpID_STOP 0x00000200 ++ ++#define INV_HAGPBpH_MASK 0x000000FF ++#define INV_HAGPBpH_SHFT 0 ++ ++#define INV_SubA_HAGPBjumpL 0x66000000 ++#define INV_SubA_HAGPBjumpH 0x67000000 ++#define INV_HAGPBjumpH_MASK 0x000000FF ++#define INV_HAGPBjumpH_SHFT 0 ++ ++#define INV_SubA_HFthRCM 0x68000000 ++#define INV_HFthRCM_MASK 0x003F0000 ++#define INV_HFthRCM_SHFT 16 ++#define INV_HFthRCM_8 0x00080000 ++#define INV_HFthRCM_10 0x000A0000 ++#define INV_HFthRCM_18 0x00120000 ++#define INV_HFthRCM_24 0x00180000 ++#define INV_HFthRCM_32 0x00200000 ++ ++#define INV_HAGPBClear 0x00000008 ++ ++#define INV_HRSTTrig_RestoreAGP 0x00000004 ++#define INV_HRSTTrig_RestoreAll 0x00000002 ++#define INV_HAGPBTrig 0x00000001 ++ ++#define INV_ParaSubType_MASK 0xff000000 ++#define INV_ParaType_MASK 0x00ff0000 ++#define INV_ParaOS_MASK 0x0000ff00 ++#define INV_ParaAdr_MASK 0x000000ff ++#define INV_ParaSubType_SHIFT 24 ++#define INV_ParaType_SHIFT 16 ++#define INV_ParaOS_SHIFT 8 ++#define INV_ParaAdr_SHIFT 0 ++ ++#define INV_ParaType_Vdata 0x00000000 ++#define INV_ParaType_Attr 0x00010000 ++#define INV_ParaType_Tex 0x00020000 ++#define INV_ParaType_Pal 0x00030000 ++#define INV_ParaType_FVF 0x00040000 ++#define INV_ParaType_PreCR 0x00100000 ++#define INV_ParaType_CR 0x00110000 ++#define INV_ParaType_Cfg 0x00fe0000 ++#define INV_ParaType_Dummy 0x00300000 ++ ++#define INV_SubType_Tex0 0x00000000 ++#define INV_SubType_Tex1 0x00000001 ++#define INV_SubType_Tex2 0x00000002 ++#define INV_SubType_Tex3 0x00000003 ++#define INV_SubType_Tex4 0x00000004 ++#define INV_SubType_Tex5 0x00000005 ++#define INV_SubType_Tex6 0x00000006 ++#define INV_SubType_Tex7 0x00000007 ++#define INV_SubType_General 0x000000fe ++#define INV_SubType_TexSample 0x00000020 ++ ++#define INV_HWBasL_MASK 0x00FFFFFF ++#define INV_HWBasH_MASK 0xFF000000 ++#define INV_HWBasH_SHFT 24 ++#define INV_HWBasL(x) ((unsigned int)(x) & INV_HWBasL_MASK) ++#define INV_HWBasH(x) ((unsigned int)(x) >> INV_HWBasH_SHFT) ++#define INV_HWBas256(x) ((unsigned int)(x) >> 8) ++#define INV_HWPit32(x) ((unsigned int)(x) >> 5) ++ ++/* Read Back Register Setting */ ++#define INV_SubA_HSetRBGID 0x02000000 ++#define INV_HSetRBGID_CR 0x00000000 ++#define INV_HSetRBGID_FE 0x00000001 ++#define INV_HSetRBGID_PE 0x00000002 ++#define INV_HSetRBGID_RC 0x00000003 ++#define INV_HSetRBGID_PS 0x00000004 ++#define INV_HSetRBGID_XE 0x00000005 ++#define INV_HSetRBGID_BE 0x00000006 ++ ++ ++struct drm_clb_event_tag_info { ++ unsigned int *linear_address; ++ unsigned int *event_tag_linear_address; ++ int usage[NUMBER_OF_EVENT_TAGS]; ++ unsigned int pid[NUMBER_OF_EVENT_TAGS]; ++}; ++ ++static inline int is_agp_header(unsigned int data) ++{ ++ switch (data & INV_AGPHeader_MASK) { ++ case INV_AGPHeader0: ++ case INV_AGPHeader1: ++ case INV_AGPHeader2: ++ case INV_AGPHeader3: ++ case INV_AGPHeader4: ++ case INV_AGPHeader5: ++ case INV_AGPHeader6: ++ case INV_AGPHeader7: ++ return 1; ++ default: ++ return 0; ++ } ++} ++ ++/* Header0: 2D */ ++#define ADDCmdHeader0_INVI(pCmd, dwCount) \ ++{ \ ++ /* 4 unsigned int align, insert NULL Command for padding */ \ ++ while (((unsigned long *)(pCmd)) & 0xF) { \ ++ *(pCmd)++ = 0xCC000000; \ ++ } \ ++ *(pCmd)++ = INV_AGPHeader0; \ ++ *(pCmd)++ = (dwCount); \ ++ *(pCmd)++ = 0; \ ++ *(pCmd)++ = (unsigned int)INV_HSWFlag_ADDR_ENCODE(pCmd); \ ++} ++ ++/* Header1: 2D */ ++#define ADDCmdHeader1_INVI(pCmd, dwAddr, dwCount) \ ++{ \ ++ /* 4 unsigned int align, insert NULL Command for padding */ \ ++ while (((unsigned long *)(pCmd)) & 0xF) { \ ++ *(pCmd)++ = 0xCC000000; \ ++ } \ ++ *(pCmd)++ = INV_AGPHeader1 | (dwAddr); \ ++ *(pCmd)++ = (dwCount); \ ++ *(pCmd)++ = 0; \ ++ *(pCmd)++ = (unsigned int)INV_HSWFlag_ADDR_ENCODE(pCmd); \ ++} ++ ++/* Header2: CR/3D */ ++#define ADDCmdHeader2_INVI(pCmd, dwAddr, dwType) \ ++{ \ ++ /* 4 unsigned int align, insert NULL Command for padding */ \ ++ while (((unsigned int)(pCmd)) & 0xF) { \ ++ *(pCmd)++ = 0xCC000000; \ ++ } \ ++ *(pCmd)++ = INV_AGPHeader2 | ((dwAddr)+4); \ ++ *(pCmd)++ = (dwAddr); \ ++ *(pCmd)++ = (dwType); \ ++ *(pCmd)++ = (unsigned int)INV_HSWFlag_ADDR_ENCODE(pCmd); \ ++} ++ ++/* Header2: CR/3D with SW Flag */ ++#define ADDCmdHeader2_SWFlag_INVI(pCmd, dwAddr, dwType, dwSWFlag) \ ++{ \ ++ /* 4 unsigned int align, insert NULL Command for padding */ \ ++ while (((unsigned long *)(pCmd)) & 0xF) { \ ++ *(pCmd)++ = 0xCC000000; \ ++ } \ ++ *(pCmd)++ = INV_AGPHeader2 | ((dwAddr)+4); \ ++ *(pCmd)++ = (dwAddr); \ ++ *(pCmd)++ = (dwType); \ ++ *(pCmd)++ = (dwSWFlag); \ ++} ++ ++ ++/* Header3: 3D */ ++#define ADDCmdHeader3_INVI(pCmd, dwType, dwStart, dwCount) \ ++{ \ ++ /* 4 unsigned int align, insert NULL Command for padding */ \ ++ while (((unsigned long *)(pCmd)) & 0xF) { \ ++ *(pCmd)++ = 0xCC000000; \ ++ } \ ++ *(pCmd)++ = INV_AGPHeader3 | INV_REG_3D_TRANS; \ ++ *(pCmd)++ = (dwCount); \ ++ *(pCmd)++ = (dwType) | ((dwStart) & 0xFFFF); \ ++ *(pCmd)++ = (unsigned int)INV_HSWFlag_ADDR_ENCODE(pCmd); \ ++} ++ ++/* Header3: 3D with SW Flag */ ++#define ADDCmdHeader3_SWFlag_INVI(pCmd, dwType, dwStart, dwSWFlag, dwCount) \ ++{ \ ++ /* 4 unsigned int align, insert NULL Command for padding */ \ ++ while (((unsigned long *)(pCmd)) & 0xF) { \ ++ *(pCmd)++ = 0xCC000000; \ ++ } \ ++ *(pCmd)++ = INV_AGPHeader3 | INV_REG_3D_TRANS; \ ++ *(pCmd)++ = (dwCount); \ ++ *(pCmd)++ = (dwType) | ((dwStart) & 0xFFFF); \ ++ *(pCmd)++ = (dwSWFlag); \ ++} ++ ++/* Header4: DVD */ ++#define ADDCmdHeader4_INVI(pCmd, dwAddr, dwCount, id) \ ++{ \ ++ /* 4 unsigned int align, insert NULL Command for padding */ \ ++ while (((unsigned long *)(pCmd)) & 0xF) { \ ++ *(pCmd)++ = 0xCC000000; \ ++ } \ ++ *(pCmd)++ = INV_AGPHeader4 | (dwAddr); \ ++ *(pCmd)++ = (dwCount); \ ++ *(pCmd)++ = (id); \ ++ *(pCmd)++ = 0; \ ++} ++ ++/* Header5: DVD */ ++#define ADDCmdHeader5_INVI(pCmd, dwQWcount, id) \ ++{ \ ++ /* 4 unsigned int align, insert NULL Command for padding */ \ ++ while (((unsigned long *)(pCmd)) & 0xF) { \ ++ *(pCmd)++ = 0xCC000000; \ ++ } \ ++ *(pCmd)++ = INV_AGPHeader5; \ ++ *(pCmd)++ = (dwQWcount); \ ++ *(pCmd)++ = (id); \ ++ *(pCmd)++ = 0; \ ++} ++ ++/* Header6: DEBUG */ ++#define ADDCmdHeader6_INVI(pCmd) \ ++{ \ ++ /* 4 unsigned int align, insert NULL Command for padding */ \ ++ while (((unsigned long *)(pCmd)) & 0xF) { \ ++ *(pCmd)++ = 0xCC000000; \ ++ } \ ++ *(pCmd)++ = INV_AGPHeader6; \ ++ *(pCmd)++ = 0; \ ++ *(pCmd)++ = 0; \ ++ *(pCmd)++ = 0; \ ++} ++ ++/* Header7: DMA */ ++#define ADDCmdHeader7_INVI(pCmd, dwQWcount, id) \ ++{ \ ++ /* 4 unsigned int align, insert NULL Command for padding */ \ ++ while (((unsigned long *)(pCmd)) & 0xF) { \ ++ *(pCmd)++ = 0xCC000000; \ ++ } \ ++ *(pCmd)++ = INV_AGPHeader7; \ ++ *(pCmd)++ = (dwQWcount); \ ++ *(pCmd)++ = (id); \ ++ *(pCmd)++ = 0; \ ++} ++ ++/* Header82: Branch buffer */ ++#define ADDCmdHeader82_INVI(pCmd, dwAddr, dwType); \ ++{ \ ++ /* 4 unsigned int align, insert NULL Command for padding */ \ ++ while (((unsigned long *)(pCmd)) & 0xF) { \ ++ *(pCmd)++ = 0xCC000000; \ ++ } \ ++ *(pCmd)++ = INV_AGPHeader82 | ((dwAddr)+4); \ ++ *(pCmd)++ = (dwAddr); \ ++ *(pCmd)++ = (dwType); \ ++ *(pCmd)++ = 0xCC000000; \ ++} ++ ++ ++#define ADD2DCmd_INVI(pCmd, dwAddr, dwCmd) \ ++{ \ ++ *(pCmd)++ = (dwAddr); \ ++ *(pCmd)++ = (dwCmd); \ ++} ++ ++#define ADDCmdData_INVI(pCmd, dwCmd) (*(pCmd)++ = (dwCmd)) ++ ++#define ADDCmdDataStream_INVI(pCmdBuf, pCmd, dwCount) \ ++{ \ ++ memcpy((pCmdBuf), (pCmd), ((dwCount)<<2)); \ ++ (pCmdBuf) += (dwCount); \ ++} ++ ++#endif +diff -Naur linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_dma.c linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_dma.c +--- linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_dma.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_dma.c 2008-12-03 03:08:32.000000000 +0100 +@@ -0,0 +1,1285 @@ ++/* ++ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved. ++ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person ++ * obtaining a copy of this software and associated documentation ++ * files (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, ++ * copy, modify, merge, publish, distribute, sub license, ++ * and/or sell copies of the Software, and to permit persons to ++ * whom the Software is furnished to do so, subject to the ++ * following conditions: ++ * ++ * The above copyright notice and this permission notice ++ * (including the next paragraph) shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES ++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NON-INFRINGEMENT. IN NO EVENT SHALL VIA, S3 GRAPHICS, AND/OR ++ * ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR ++ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#include "drmP.h" ++#include "drm.h" ++#include "via_chrome9_drm.h" ++#include "via_chrome9_drv.h" ++#include "via_chrome9_3d_reg.h" ++#include "via_chrome9_dma.h" ++ ++#define NULLCOMMANDNUMBER 256 ++unsigned int NULL_COMMAND_INV[4] = ++ { 0xCC000000, 0xCD000000, 0xCE000000, 0xCF000000 }; ++ ++void ++via_chrome9ke_assert(int a) ++{ ++} ++ ++unsigned int ++ProtectSizeValue(unsigned int size) ++{ ++ unsigned int i; ++ for (i = 0; i < 8; i++) ++ if ((size > (1 << (i + 12))) ++ && (size <= (1 << (i + 13)))) ++ return i + 1; ++ return 0; ++} ++ ++void via_chrome9_dma_init_inv(struct drm_device *dev) ++{ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *)dev->dev_private; ++ struct drm_via_chrome9_DMA_manager *lpcmDMAManager = ++ dev_priv->dma_manager; ++ ++ if (dev_priv->chip_sub_index == CHIP_H6S2) { ++ unsigned int *pGARTTable; ++ unsigned int i, entries, GARTOffset; ++ unsigned char sr6a, sr6b, sr6c, sr6f, sr7b; ++ unsigned int *addrlinear; ++ unsigned int size, alignedoffset; ++ ++ entries = dev_priv->pagetable_map.pagetable_size / ++ sizeof(unsigned int); ++ pGARTTable = dev_priv->pagetable_map.pagetable_handle; ++ ++ GARTOffset = dev_priv->pagetable_map.pagetable_offset; ++ ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6c); ++ sr6c = GetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5); ++ sr6c &= (~0x80); ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6c); ++ ++ sr6a = (unsigned char)((GARTOffset & 0xff000) >> 12); ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6a); ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6a); ++ ++ sr6b = (unsigned char)((GARTOffset & 0xff00000) >> 20); ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6b); ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6b); ++ ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6c); ++ sr6c = GetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5); ++ sr6c |= ((unsigned char)((GARTOffset >> 28) & 0x01)); ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6c); ++ ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x7b); ++ sr7b = GetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5); ++ sr7b &= (~0x0f); ++ sr7b |= ProtectSizeValue(dev_priv-> ++ pagetable_map.pagetable_size); ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr7b); ++ ++ for (i = 0; i < entries; i++) ++ writel(0x80000000, pGARTTable+i); ++ ++ /*flush*/ ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6f); ++ do { ++ sr6f = GetMMIORegisterU8(dev_priv->mmio->handle, ++ 0x83c5); ++ } while (sr6f & 0x80); ++ ++ sr6f |= 0x80; ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6f); ++ ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6c); ++ sr6c = GetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5); ++ sr6c |= 0x80; ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6c); ++ ++ if (dev_priv->drm_agp_type != DRM_AGP_DISABLED) { ++ size = lpcmDMAManager->DMASize * sizeof(unsigned int) + ++ dev_priv->agp_size; ++ alignedoffset = 0; ++ entries = (size + PAGE_SIZE - 1) / PAGE_SIZE; ++ addrlinear = ++ (unsigned int *)dev_priv->pcie_vmalloc_nocache; ++ ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6c); ++ sr6c = ++ GetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5); ++ sr6c &= (~0x80); ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6c); ++ ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6f); ++ do { ++ sr6f = GetMMIORegisterU8(dev_priv->mmio->handle, ++ 0x83c5); ++ } while (sr6f & 0x80); ++ ++ for (i = 0; i < entries; i++) ++ writel(page_to_pfn(vmalloc_to_page( ++ (void *)addrlinear + PAGE_SIZE * i)) & ++ 0x3fffffff, pGARTTable + i + alignedoffset); ++ ++ sr6f |= 0x80; ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6f); ++ ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6c); ++ sr6c = ++ GetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5); ++ sr6c |= 0x80; ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6c); ++ } ++ ++ } ++ ++ if (dev_priv->drm_agp_type == DRM_AGP_DOUBLE_BUFFER) ++ SetAGPDoubleCmd_inv(dev); ++ else if (dev_priv->drm_agp_type == DRM_AGP_RING_BUFFER) ++ SetAGPRingCmdRegs_inv(dev); ++ ++ return ; ++} ++ ++static unsigned int ++InitPCIEGART(struct drm_via_chrome9_private *dev_priv) ++{ ++ unsigned int *pGARTTable; ++ unsigned int i, entries, GARTOffset; ++ unsigned char sr6a, sr6b, sr6c, sr6f, sr7b; ++ ++ if (!dev_priv->pagetable_map.pagetable_size) ++ return 0; ++ ++ entries = dev_priv->pagetable_map.pagetable_size / sizeof(unsigned int); ++ ++ pGARTTable = ++ ioremap_nocache(dev_priv->fb_base_address + ++ dev_priv->pagetable_map.pagetable_offset, ++ dev_priv->pagetable_map.pagetable_size); ++ if (pGARTTable) ++ dev_priv->pagetable_map.pagetable_handle = pGARTTable; ++ else ++ return 0; ++ ++ /*set gart table base */ ++ GARTOffset = dev_priv->pagetable_map.pagetable_offset; ++ ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6c); ++ sr6c = GetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5); ++ sr6c &= (~0x80); ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6c); ++ ++ sr6a = (unsigned char) ((GARTOffset & 0xff000) >> 12); ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6a); ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6a); ++ ++ sr6b = (unsigned char) ((GARTOffset & 0xff00000) >> 20); ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6b); ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6b); ++ ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6c); ++ sr6c = GetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5); ++ sr6c |= ((unsigned char) ((GARTOffset >> 28) & 0x01)); ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6c); ++ ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x7b); ++ sr7b = GetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5); ++ sr7b &= (~0x0f); ++ sr7b |= ProtectSizeValue(dev_priv->pagetable_map.pagetable_size); ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr7b); ++ ++ for (i = 0; i < entries; i++) ++ writel(0x80000000, pGARTTable + i); ++ /*flush */ ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6f); ++ do { ++ sr6f = GetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5); ++ } ++ while (sr6f & 0x80) ++ ; ++ ++ sr6f |= 0x80; ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6f); ++ ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6c); ++ sr6c = GetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5); ++ sr6c |= 0x80; ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6c); ++ ++ return 1; ++} ++ ++ ++static unsigned int * ++AllocAndBindPCIEMemory(struct drm_via_chrome9_private *dev_priv, ++ unsigned int size, unsigned int offset) ++{ ++ unsigned int *addrlinear; ++ unsigned int *pGARTTable; ++ unsigned int entries, alignedoffset, i; ++ unsigned char sr6c, sr6f; ++ ++ if (!size) ++ return NULL; ++ ++ entries = (size + PAGE_SIZE - 1) / PAGE_SIZE; ++ alignedoffset = (offset + PAGE_SIZE - 1) / PAGE_SIZE; ++ ++ if ((entries + alignedoffset) > ++ (dev_priv->pagetable_map.pagetable_size / sizeof(unsigned int))) ++ return NULL; ++ ++ addrlinear = ++ __vmalloc(entries * PAGE_SIZE, GFP_KERNEL | __GFP_HIGHMEM, ++ PAGE_KERNEL_NOCACHE); ++ ++ if (!addrlinear) ++ return NULL; ++ ++ pGARTTable = dev_priv->pagetable_map.pagetable_handle; ++ ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6c); ++ sr6c = GetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5); ++ sr6c &= (~0x80); ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6c); ++ ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6f); ++ do { ++ sr6f = GetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5); ++ } ++ while (sr6f & 0x80) ++ ; ++ ++ for (i = 0; i < entries; i++) ++ writel(page_to_pfn ++ (vmalloc_to_page((void *) addrlinear + PAGE_SIZE * i)) & ++ 0x3fffffff, pGARTTable + i + alignedoffset); ++ ++ sr6f |= 0x80; ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6f); ++ ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6c); ++ sr6c = GetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5); ++ sr6c |= 0x80; ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6c); ++ ++ return addrlinear; ++ ++} ++ ++void ++SetAGPDoubleCmd_inv(struct drm_device *dev) ++{ ++ /* we now don't use double buffer */ ++ return; ++} ++ ++void ++SetAGPRingCmdRegs_inv(struct drm_device *dev) ++{ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ struct drm_via_chrome9_DMA_manager *lpcmDMAManager = ++ (struct drm_via_chrome9_DMA_manager *) dev_priv->dma_manager; ++ unsigned int AGPBufLinearBase = 0, AGPBufPhysicalBase = 0; ++ unsigned long *pFree; ++ unsigned int dwStart, dwEnd, dwPause, AGPCurrAddr, AGPCurStat, CurrAGP; ++ unsigned int dwReg60, dwReg61, dwReg62, dwReg63, ++ dwReg64, dwReg65, dwJump; ++ ++ lpcmDMAManager->pFree = lpcmDMAManager->pBeg; ++ ++ AGPBufLinearBase = (unsigned int) lpcmDMAManager->addr_linear; ++ AGPBufPhysicalBase = ++ (dev_priv->chip_agp == ++ CHIP_PCIE) ? 0 : (unsigned int) dev->agp->base + ++ lpcmDMAManager->pPhysical; ++ /*add shadow offset */ ++ ++ CurrAGP = ++ GetMMIORegister(dev_priv->mmio->handle, INV_RB_AGPCMD_CURRADDR); ++ AGPCurStat = ++ GetMMIORegister(dev_priv->mmio->handle, INV_RB_AGPCMD_STATUS); ++ ++ if (AGPCurStat & INV_AGPCMD_InPause) { ++ AGPCurrAddr = ++ GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ pFree = (unsigned long *) (AGPBufLinearBase + AGPCurrAddr - ++ AGPBufPhysicalBase); ++ ADDCmdHeader2_INVI(pFree, INV_REG_CR_TRANS, INV_ParaType_Dummy); ++ if (dev_priv->chip_sub_index == CHIP_H6S2) ++ do { ++ ADDCmdData_INVI(pFree, 0xCCCCCCC0); ++ ADDCmdData_INVI(pFree, 0xDDD00000); ++ } ++ while ((u32)((unsigned int) pFree) & 0x7f) ++ ; ++ /*for 8*128bit aligned */ ++ else ++ do { ++ ADDCmdData_INVI(pFree, 0xCCCCCCC0); ++ ADDCmdData_INVI(pFree, 0xDDD00000); ++ } ++ while ((u32) ((unsigned int) pFree) & 0x1f) ++ ; ++ /*for 256bit aligned */ ++ dwPause = ++ (u32) (((unsigned int) pFree) - AGPBufLinearBase + ++ AGPBufPhysicalBase - 16); ++ ++ dwReg64 = INV_SubA_HAGPBpL | INV_HWBasL(dwPause); ++ dwReg65 = ++ INV_SubA_HAGPBpID | INV_HWBasH(dwPause) | ++ INV_HAGPBpID_STOP; ++ ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_TRANS, ++ INV_ParaType_PreCR); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, ++ dwReg64); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, ++ dwReg65); ++ ++ while (GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_ENG_STATUS) & INV_ENG_BUSY_ALL) ++ ; ++ } ++ dwStart = ++ (u32) ((unsigned int) lpcmDMAManager->pBeg - AGPBufLinearBase + ++ AGPBufPhysicalBase); ++ dwEnd = (u32) ((unsigned int) lpcmDMAManager->pEnd - AGPBufLinearBase + ++ AGPBufPhysicalBase); ++ ++ lpcmDMAManager->pFree = lpcmDMAManager->pBeg; ++ if (dev_priv->chip_sub_index == CHIP_H6S2) { ++ ADDCmdHeader2_INVI(lpcmDMAManager->pFree, INV_REG_CR_TRANS, ++ INV_ParaType_Dummy); ++ do { ++ ADDCmdData_INVI(lpcmDMAManager->pFree, 0xCCCCCCC0); ++ ADDCmdData_INVI(lpcmDMAManager->pFree, 0xDDD00000); ++ } ++ while ((u32)((unsigned long *) lpcmDMAManager->pFree) & 0x7f) ++ ; ++ } ++ dwJump = 0xFFFFFFF0; ++ dwPause = ++ (u32)(((unsigned int) lpcmDMAManager->pFree) - ++ 16 - AGPBufLinearBase + AGPBufPhysicalBase); ++ ++ DRM_DEBUG("dwStart = %08x, dwEnd = %08x, dwPause = %08x\n", dwStart, ++ dwEnd, dwPause); ++ ++ dwReg60 = INV_SubA_HAGPBstL | INV_HWBasL(dwStart); ++ dwReg61 = INV_SubA_HAGPBstH | INV_HWBasH(dwStart); ++ dwReg62 = INV_SubA_HAGPBendL | INV_HWBasL(dwEnd); ++ dwReg63 = INV_SubA_HAGPBendH | INV_HWBasH(dwEnd); ++ dwReg64 = INV_SubA_HAGPBpL | INV_HWBasL(dwPause); ++ dwReg65 = INV_SubA_HAGPBpID | INV_HWBasH(dwPause) | INV_HAGPBpID_PAUSE; ++ ++ if (dev_priv->chip_sub_index == CHIP_H6S2) ++ dwReg60 |= 0x01; ++ ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_TRANS, ++ INV_ParaType_PreCR); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg60); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg61); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg62); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg63); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg64); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg65); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, ++ INV_SubA_HAGPBjumpL | INV_HWBasL(dwJump)); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, ++ INV_SubA_HAGPBjumpH | INV_HWBasH(dwJump)); ++ ++ /* Trigger AGP cycle */ ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, ++ INV_SubA_HFthRCM | INV_HFthRCM_10 | INV_HAGPBTrig); ++ ++ /*for debug */ ++ CurrAGP = ++ GetMMIORegister(dev_priv->mmio->handle, INV_RB_AGPCMD_CURRADDR); ++ ++ lpcmDMAManager->pInUseBySW = lpcmDMAManager->pFree; ++} ++ ++/* Do hw intialization and determine whether to use dma or mmio to ++talk with hw */ ++int ++via_chrome9_hw_init(struct drm_device *dev, ++ struct drm_via_chrome9_init *init) ++{ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ unsigned retval = 0; ++ unsigned int *pGARTTable, *addrlinear = NULL; ++ int pages; ++ struct drm_clb_event_tag_info *event_tag_info; ++ struct drm_via_chrome9_DMA_manager *lpcmDMAManager = NULL; ++ ++ if (init->chip_agp == CHIP_PCIE) { ++ dev_priv->pagetable_map.pagetable_offset = ++ init->garttable_offset; ++ dev_priv->pagetable_map.pagetable_size = init->garttable_size; ++ dev_priv->agp_size = init->agp_tex_size; ++ /*Henry :prepare for PCIE texture buffer */ ++ } else { ++ dev_priv->pagetable_map.pagetable_offset = 0; ++ dev_priv->pagetable_map.pagetable_size = 0; ++ } ++ ++ dev_priv->dma_manager = ++ kmalloc(sizeof(struct drm_via_chrome9_DMA_manager), GFP_KERNEL); ++ if (!dev_priv->dma_manager) { ++ DRM_ERROR("could not allocate system for dma_manager!\n"); ++ return -ENOMEM; ++ } ++ ++ lpcmDMAManager = ++ (struct drm_via_chrome9_DMA_manager *) dev_priv->dma_manager; ++ ((struct drm_via_chrome9_DMA_manager *) ++ dev_priv->dma_manager)->DMASize = init->DMA_size; ++ ((struct drm_via_chrome9_DMA_manager *) ++ dev_priv->dma_manager)->pPhysical = init->DMA_phys_address; ++ ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_TRANS, 0x00110000); ++ if (dev_priv->chip_sub_index == CHIP_H6S2) { ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, ++ 0x06000000); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, ++ 0x07100000); ++ } else { ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, ++ 0x02000000); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, ++ 0x03100000); ++ } ++ ++ /* Specify fence command read back ID */ ++ /* Default the read back ID is CR */ ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_TRANS, ++ INV_ParaType_PreCR); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, ++ INV_SubA_HSetRBGID | INV_HSetRBGID_CR); ++ ++ DRM_DEBUG("begin to init\n"); ++ ++ if (dev_priv->chip_sub_index == CHIP_H6S2) { ++ dev_priv->pcie_vmalloc_nocache = 0; ++ if (dev_priv->pagetable_map.pagetable_size) ++ retval = InitPCIEGART(dev_priv); ++ ++ if (retval && dev_priv->drm_agp_type != DRM_AGP_DISABLED) { ++ addrlinear = ++ AllocAndBindPCIEMemory(dev_priv, ++ lpcmDMAManager->DMASize + ++ dev_priv->agp_size, 0); ++ if (addrlinear) { ++ dev_priv->pcie_vmalloc_nocache = (unsigned long) ++ addrlinear; ++ } else { ++ dev_priv->bci_buffer = ++ vmalloc(MAX_BCI_BUFFER_SIZE); ++ dev_priv->drm_agp_type = DRM_AGP_DISABLED; ++ } ++ } else { ++ dev_priv->bci_buffer = vmalloc(MAX_BCI_BUFFER_SIZE); ++ dev_priv->drm_agp_type = DRM_AGP_DISABLED; ++ } ++ } else { ++ if (dev_priv->drm_agp_type != DRM_AGP_DISABLED) { ++ pGARTTable = NULL; ++ addrlinear = (unsigned int *) ++ ioremap(dev->agp->base + ++ lpcmDMAManager->pPhysical, ++ lpcmDMAManager->DMASize); ++ dev_priv->bci_buffer = NULL; ++ } else { ++ dev_priv->bci_buffer = vmalloc(MAX_BCI_BUFFER_SIZE); ++ /*Homer, BCI path always use this block of memory8 */ ++ } ++ } ++ ++ /*till here we have known whether support dma or not */ ++ pages = dev->sg->pages; ++ event_tag_info = vmalloc(sizeof(struct drm_clb_event_tag_info)); ++ memset(event_tag_info, 0, sizeof(struct drm_clb_event_tag_info)); ++ if (!event_tag_info) ++ return DRM_ERROR(" event_tag_info allocate error!"); ++ ++ /* aligned to 16k alignment */ ++ event_tag_info->linear_address = ++ (int ++ *) (((unsigned int) dev_priv->shadow_map.shadow_handle + ++ 0x3fff) & 0xffffc000); ++ event_tag_info->event_tag_linear_address = ++ event_tag_info->linear_address + 3; ++ dev_priv->event_tag_info = (void *) event_tag_info; ++ dev_priv->max_apertures = NUMBER_OF_APERTURES_CLB; ++ ++ /* Initialize DMA data structure */ ++ lpcmDMAManager->DMASize /= sizeof(unsigned int); ++ lpcmDMAManager->pBeg = addrlinear; ++ lpcmDMAManager->pFree = lpcmDMAManager->pBeg; ++ lpcmDMAManager->pInUseBySW = lpcmDMAManager->pBeg; ++ lpcmDMAManager->pInUseByHW = lpcmDMAManager->pBeg; ++ lpcmDMAManager->LastIssuedEventTag = (unsigned int) (unsigned long *) ++ lpcmDMAManager->pBeg; ++ lpcmDMAManager->ppInUseByHW = ++ (unsigned int **) ((char *) (dev_priv->mmio->handle) + ++ INV_RB_AGPCMD_CURRADDR); ++ lpcmDMAManager->bDMAAgp = dev_priv->chip_agp; ++ lpcmDMAManager->addr_linear = (unsigned int *) addrlinear; ++ ++ if (dev_priv->drm_agp_type == DRM_AGP_DOUBLE_BUFFER) { ++ lpcmDMAManager->MaxKickoffSize = lpcmDMAManager->DMASize >> 1; ++ lpcmDMAManager->pEnd = ++ lpcmDMAManager->addr_linear + ++ (lpcmDMAManager->DMASize >> 1) - 1; ++ SetAGPDoubleCmd_inv(dev); ++ if (dev_priv->chip_sub_index == CHIP_H6S2) { ++ DRM_INFO("DMA buffer initialized finished. "); ++ DRM_INFO("Use PCIE Double Buffer type!\n"); ++ DRM_INFO("Total PCIE DMA buffer size = %8d bytes. \n", ++ lpcmDMAManager->DMASize << 2); ++ } else { ++ DRM_INFO("DMA buffer initialized finished. "); ++ DRM_INFO("Use AGP Double Buffer type!\n"); ++ DRM_INFO("Total AGP DMA buffer size = %8d bytes. \n", ++ lpcmDMAManager->DMASize << 2); ++ } ++ } else if (dev_priv->drm_agp_type == DRM_AGP_RING_BUFFER) { ++ lpcmDMAManager->MaxKickoffSize = lpcmDMAManager->DMASize; ++ lpcmDMAManager->pEnd = ++ lpcmDMAManager->addr_linear + lpcmDMAManager->DMASize; ++ SetAGPRingCmdRegs_inv(dev); ++ if (dev_priv->chip_sub_index == CHIP_H6S2) { ++ DRM_INFO("DMA buffer initialized finished. \n"); ++ DRM_INFO("Use PCIE Ring Buffer type!"); ++ DRM_INFO("Total PCIE DMA buffer size = %8d bytes. \n", ++ lpcmDMAManager->DMASize << 2); ++ } else { ++ DRM_INFO("DMA buffer initialized finished. "); ++ DRM_INFO("Use AGP Ring Buffer type!\n"); ++ DRM_INFO("Total AGP DMA buffer size = %8d bytes. \n", ++ lpcmDMAManager->DMASize << 2); ++ } ++ } else if (dev_priv->drm_agp_type == DRM_AGP_DISABLED) { ++ lpcmDMAManager->MaxKickoffSize = 0x0; ++ if (dev_priv->chip_sub_index == CHIP_H6S2) ++ DRM_INFO("PCIE init failed! Use PCI\n"); ++ else ++ DRM_INFO("AGP init failed! Use PCI\n"); ++ } ++ return 0; ++} ++ ++static void ++kickoff_bci_inv(struct drm_device *dev, ++ struct drm_via_chrome9_flush *dma_info) ++{ ++ u32 HdType, dwQWCount, i, dwCount, Addr1, Addr2, SWPointer, ++ SWPointerEnd; ++ unsigned long *pCmdData; ++ int result; ++ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ /*pCmdData = __s3gke_vmalloc(dma_info->cmd_size<<2); */ ++ pCmdData = dev_priv->bci_buffer; ++ ++ if (!pCmdData) ++ return; ++ result = copy_from_user((int *) pCmdData, dma_info->usermode_dma_buf, ++ dma_info->cmd_size << 2); ++ if (result) { ++ DRM_ERROR("In function kickoff_bci_inv,\ ++ copy_from_user is fault. \n"); ++ return ; ++ } ++#if VIA_CHROME9_VERIFY_ENABLE ++ result = via_chrome9_verify_command_stream( ++ (const uint32_t *)pCmdData, dma_info->cmd_size << 2, ++ dev, dev_priv->chip_sub_index == CHIP_H6S2 ? 0 : 1); ++ if (result) { ++ DRM_ERROR("The command has the security issue \n"); ++ return ; ++ } ++#endif ++ SWPointer = 0; ++ SWPointerEnd = (u32) dma_info->cmd_size; ++ while (SWPointer < SWPointerEnd) { ++ HdType = pCmdData[SWPointer] & INV_AGPHeader_MASK; ++ switch (HdType) { ++ case INV_AGPHeader0: ++ case INV_AGPHeader5: ++ dwQWCount = pCmdData[SWPointer + 1]; ++ SWPointer += 4; ++ ++ for (i = 0; i < dwQWCount; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, ++ pCmdData[SWPointer], ++ pCmdData[SWPointer + 1]); ++ SWPointer += 2; ++ } ++ break; ++ ++ case INV_AGPHeader1: ++ dwCount = pCmdData[SWPointer + 1]; ++ Addr1 = 0x0; ++ SWPointer += 4; /* skip 128-bit. */ ++ ++ for (; dwCount > 0; dwCount--, SWPointer++, ++ Addr1 += 4) { ++ SetMMIORegister(dev_priv->hostBlt->handle, ++ Addr1, pCmdData[SWPointer]); ++ } ++ break; ++ ++ case INV_AGPHeader4: ++ dwCount = pCmdData[SWPointer + 1]; ++ Addr1 = pCmdData[SWPointer] & 0x0000FFFF; ++ SWPointer += 4; /* skip 128-bit. */ ++ ++ for (; dwCount > 0; dwCount--, SWPointer++) ++ SetMMIORegister(dev_priv->mmio->handle, Addr1, ++ pCmdData[SWPointer]); ++ break; ++ ++ case INV_AGPHeader2: ++ Addr1 = pCmdData[SWPointer + 1] & 0xFFFF; ++ Addr2 = pCmdData[SWPointer] & 0xFFFF; ++ ++ /* Write first data (either ParaType or whatever) to ++ Addr1 */ ++ SetMMIORegister(dev_priv->mmio->handle, Addr1, ++ pCmdData[SWPointer + 2]); ++ SWPointer += 4; ++ ++ /* The following data are all written to Addr2, ++ until another header is met */ ++ while (!is_agp_header(pCmdData[SWPointer]) ++ && (SWPointer < SWPointerEnd)) { ++ SetMMIORegister(dev_priv->mmio->handle, Addr2, ++ pCmdData[SWPointer]); ++ SWPointer++; ++ } ++ break; ++ ++ case INV_AGPHeader3: ++ Addr1 = pCmdData[SWPointer] & 0xFFFF; ++ Addr2 = Addr1 + 4; ++ dwCount = pCmdData[SWPointer + 1]; ++ ++ /* Write first data (either ParaType or whatever) to ++ Addr1 */ ++ SetMMIORegister(dev_priv->mmio->handle, Addr1, ++ pCmdData[SWPointer + 2]); ++ SWPointer += 4; ++ ++ for (i = 0; i < dwCount; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, Addr2, ++ pCmdData[SWPointer]); ++ SWPointer++; ++ } ++ break; ++ ++ case INV_AGPHeader6: ++ break; ++ ++ case INV_AGPHeader7: ++ break; ++ ++ default: ++ SWPointer += 4; /* Advance to next header */ ++ } ++ ++ SWPointer = (SWPointer + 3) & ~3; ++ } ++} ++ ++void ++kickoff_dma_db_inv(struct drm_device *dev) ++{ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ struct drm_via_chrome9_DMA_manager *lpcmDMAManager = ++ dev_priv->dma_manager; ++ ++ u32 BufferSize = (u32) (lpcmDMAManager->pFree - lpcmDMAManager->pBeg); ++ ++ unsigned int AGPBufLinearBase = ++ (unsigned int) lpcmDMAManager->addr_linear; ++ unsigned int AGPBufPhysicalBase = ++ (unsigned int) dev->agp->base + lpcmDMAManager->pPhysical; ++ /*add shadow offset */ ++ ++ unsigned int dwStart, dwEnd, dwPause; ++ unsigned int dwReg60, dwReg61, dwReg62, dwReg63, dwReg64, dwReg65; ++ unsigned int CR_Status; ++ ++ if (BufferSize == 0) ++ return; ++ ++ /* 256-bit alignment of AGP pause address */ ++ if ((u32) ((unsigned long *) lpcmDMAManager->pFree) & 0x1f) { ++ ADDCmdHeader2_INVI(lpcmDMAManager->pFree, INV_REG_CR_TRANS, ++ INV_ParaType_Dummy); ++ do { ++ ADDCmdData_INVI(lpcmDMAManager->pFree, 0xCCCCCCC0); ++ ADDCmdData_INVI(lpcmDMAManager->pFree, 0xDDD00000); ++ } ++ while (((unsigned int) lpcmDMAManager->pFree) & 0x1f) ++ ; ++ } ++ ++ dwStart = ++ (u32) (unsigned long *)lpcmDMAManager->pBeg - ++ AGPBufLinearBase + AGPBufPhysicalBase; ++ dwEnd = (u32) (unsigned long *)lpcmDMAManager->pEnd - ++ AGPBufLinearBase + AGPBufPhysicalBase; ++ dwPause = ++ (u32)(unsigned long *)lpcmDMAManager->pFree - ++ AGPBufLinearBase + AGPBufPhysicalBase - 4; ++ ++ dwReg60 = INV_SubA_HAGPBstL | INV_HWBasL(dwStart); ++ dwReg61 = INV_SubA_HAGPBstH | INV_HWBasH(dwStart); ++ dwReg62 = INV_SubA_HAGPBendL | INV_HWBasL(dwEnd); ++ dwReg63 = INV_SubA_HAGPBendH | INV_HWBasH(dwEnd); ++ dwReg64 = INV_SubA_HAGPBpL | INV_HWBasL(dwPause); ++ dwReg65 = INV_SubA_HAGPBpID | INV_HWBasH(dwPause) | INV_HAGPBpID_STOP; ++ ++ /* wait CR idle */ ++ CR_Status = GetMMIORegister(dev_priv->mmio->handle, INV_RB_ENG_STATUS); ++ while (CR_Status & INV_ENG_BUSY_CR) ++ CR_Status = ++ GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_ENG_STATUS); ++ ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_TRANS, ++ INV_ParaType_PreCR); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg60); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg61); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg62); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg63); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg64); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg65); ++ ++ /* Trigger AGP cycle */ ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, ++ INV_SubA_HFthRCM | INV_HFthRCM_10 | INV_HAGPBTrig); ++ ++ if (lpcmDMAManager->pBeg == lpcmDMAManager->addr_linear) { ++ /* The second AGP command buffer */ ++ lpcmDMAManager->pBeg = ++ lpcmDMAManager->addr_linear + ++ (lpcmDMAManager->DMASize >> 2); ++ lpcmDMAManager->pEnd = ++ lpcmDMAManager->addr_linear + lpcmDMAManager->DMASize; ++ lpcmDMAManager->pFree = lpcmDMAManager->pBeg; ++ } else { ++ /* The first AGP command buffer */ ++ lpcmDMAManager->pBeg = lpcmDMAManager->addr_linear; ++ lpcmDMAManager->pEnd = ++ lpcmDMAManager->addr_linear + ++ (lpcmDMAManager->DMASize / 2) - 1; ++ lpcmDMAManager->pFree = lpcmDMAManager->pBeg; ++ } ++ CR_Status = GetMMIORegister(dev_priv->mmio->handle, INV_RB_ENG_STATUS); ++} ++ ++ ++void ++kickoff_dma_ring_inv(struct drm_device *dev) ++{ ++ unsigned int dwPause, dwReg64, dwReg65; ++ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ struct drm_via_chrome9_DMA_manager *lpcmDMAManager = ++ dev_priv->dma_manager; ++ ++ unsigned int AGPBufLinearBase = ++ (unsigned int) lpcmDMAManager->addr_linear; ++ unsigned int AGPBufPhysicalBase = ++ (dev_priv->chip_agp == ++ CHIP_PCIE) ? 0 : (unsigned int) dev->agp->base + ++ lpcmDMAManager->pPhysical; ++ /*add shadow offset */ ++ ++ /* 256-bit alignment of AGP pause address */ ++ if (dev_priv->chip_sub_index == CHIP_H6S2) { ++ if ((u32) ++ ((unsigned long *) lpcmDMAManager->pFree) & 0x7f) { ++ ADDCmdHeader2_INVI(lpcmDMAManager->pFree, ++ INV_REG_CR_TRANS, ++ INV_ParaType_Dummy); ++ do { ++ ADDCmdData_INVI(lpcmDMAManager->pFree, ++ 0xCCCCCCC0); ++ ADDCmdData_INVI(lpcmDMAManager->pFree, ++ 0xDDD00000); ++ } ++ while ((u32)((unsigned long *) lpcmDMAManager->pFree) & ++ 0x7f) ++ ; ++ } ++ } else { ++ if ((u32) ++ ((unsigned long *) lpcmDMAManager->pFree) & 0x1f) { ++ ADDCmdHeader2_INVI(lpcmDMAManager->pFree, ++ INV_REG_CR_TRANS, ++ INV_ParaType_Dummy); ++ do { ++ ADDCmdData_INVI(lpcmDMAManager->pFree, ++ 0xCCCCCCC0); ++ ADDCmdData_INVI(lpcmDMAManager->pFree, ++ 0xDDD00000); ++ } ++ while ((u32)((unsigned long *) lpcmDMAManager->pFree) & ++ 0x1f) ++ ; ++ } ++ } ++ ++ ++ dwPause = (u32) ((unsigned long *) lpcmDMAManager->pFree) ++ - AGPBufLinearBase + AGPBufPhysicalBase - 16; ++ ++ dwReg64 = INV_SubA_HAGPBpL | INV_HWBasL(dwPause); ++ dwReg65 = INV_SubA_HAGPBpID | INV_HWBasH(dwPause) | INV_HAGPBpID_PAUSE; ++ ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_TRANS, ++ INV_ParaType_PreCR); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg64); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg65); ++ ++ lpcmDMAManager->pInUseBySW = lpcmDMAManager->pFree; ++} ++ ++static int ++waitchipidle_inv(struct drm_via_chrome9_private *dev_priv) ++{ ++ unsigned int count = 50000; ++ unsigned int eng_status; ++ unsigned int engine_busy; ++ ++ do { ++ eng_status = ++ GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_ENG_STATUS); ++ engine_busy = eng_status & INV_ENG_BUSY_ALL; ++ count--; ++ } ++ while (engine_busy && count) ++ ; ++ if (count && engine_busy == 0) ++ return 0; ++ return -1; ++} ++ ++void ++get_space_db_inv(struct drm_device *dev, ++ struct cmd_get_space *lpcmGetSpaceData) ++{ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ struct drm_via_chrome9_DMA_manager *lpcmDMAManager = ++ dev_priv->dma_manager; ++ ++ unsigned int dwRequestSize = lpcmGetSpaceData->dwRequestSize; ++ if (dwRequestSize > lpcmDMAManager->MaxKickoffSize) { ++ DRM_INFO("too big DMA buffer request!!!\n"); ++ via_chrome9ke_assert(0); ++ *lpcmGetSpaceData->pCmdData = (unsigned int) NULL; ++ return; ++ } ++ ++ if ((lpcmDMAManager->pFree + dwRequestSize) > ++ (lpcmDMAManager->pEnd - INV_CMDBUF_THRESHOLD * 2)) ++ kickoff_dma_db_inv(dev); ++ ++ *lpcmGetSpaceData->pCmdData = (unsigned int) lpcmDMAManager->pFree; ++} ++ ++void ++RewindRingAGP_inv(struct drm_device *dev) ++{ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ struct drm_via_chrome9_DMA_manager *lpcmDMAManager = ++ dev_priv->dma_manager; ++ ++ unsigned int AGPBufLinearBase = ++ (unsigned int) lpcmDMAManager->addr_linear; ++ unsigned int AGPBufPhysicalBase = ++ (dev_priv->chip_agp == ++ CHIP_PCIE) ? 0 : (unsigned int) dev->agp->base + ++ lpcmDMAManager->pPhysical; ++ /*add shadow offset */ ++ ++ unsigned int dwPause, dwJump; ++ unsigned int dwReg66, dwReg67; ++ unsigned int dwReg64, dwReg65; ++ ++ ADDCmdHeader2_INVI(lpcmDMAManager->pFree, INV_REG_CR_TRANS, ++ INV_ParaType_Dummy); ++ ADDCmdData_INVI(lpcmDMAManager->pFree, 0xCCCCCCC7); ++ if (dev_priv->chip_sub_index == CHIP_H6S2) ++ while ((unsigned int) lpcmDMAManager->pFree & 0x7F) ++ ADDCmdData_INVI(lpcmDMAManager->pFree, 0xCCCCCCC7); ++ else ++ while ((unsigned int) lpcmDMAManager->pFree & 0x1F) ++ ADDCmdData_INVI(lpcmDMAManager->pFree, 0xCCCCCCC7); ++ dwJump = ((u32) ((unsigned long *) lpcmDMAManager->pFree)) ++ - AGPBufLinearBase + AGPBufPhysicalBase - 16; ++ ++ lpcmDMAManager->pFree = lpcmDMAManager->pBeg; ++ ++ dwPause = ((u32) ((unsigned long *) lpcmDMAManager->pFree)) ++ - AGPBufLinearBase + AGPBufPhysicalBase - 16; ++ ++ dwReg64 = INV_SubA_HAGPBpL | INV_HWBasL(dwPause); ++ dwReg65 = INV_SubA_HAGPBpID | INV_HWBasH(dwPause) | INV_HAGPBpID_PAUSE; ++ ++ dwReg66 = INV_SubA_HAGPBjumpL | INV_HWBasL(dwJump); ++ dwReg67 = INV_SubA_HAGPBjumpH | INV_HWBasH(dwJump); ++ ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_TRANS, ++ INV_ParaType_PreCR); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg66); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg67); ++ ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg64); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, dwReg65); ++ lpcmDMAManager->pInUseBySW = lpcmDMAManager->pFree; ++} ++ ++ ++void ++get_space_ring_inv(struct drm_device *dev, ++ struct cmd_get_space *lpcmGetSpaceData) ++{ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ struct drm_via_chrome9_DMA_manager *lpcmDMAManager = ++ dev_priv->dma_manager; ++ unsigned int dwUnFlushed; ++ unsigned int dwRequestSize = lpcmGetSpaceData->dwRequestSize; ++ ++ unsigned int AGPBufLinearBase = ++ (unsigned int) lpcmDMAManager->addr_linear; ++ unsigned int AGPBufPhysicalBase = ++ (dev_priv->chip_agp == ++ CHIP_PCIE) ? 0 : (unsigned int) dev->agp->base + ++ lpcmDMAManager->pPhysical; ++ /*add shadow offset */ ++ u32 BufStart, BufEnd, CurSW, CurHW, NextSW, BoundaryCheck; ++ ++ dwUnFlushed = ++ (unsigned int) (lpcmDMAManager->pFree - lpcmDMAManager->pBeg); ++ /*default bEnableModuleSwitch is on for metro,is off for rest */ ++ /*cmHW_Module_Switch is context-wide variable which is enough for 2d/3d ++ switch in a context. */ ++ /*But we must keep the dma buffer being wrapped head and tail by 3d cmds ++ when it is kicked off to kernel mode. */ ++ /*Get DMA Space (If requested, or no BCI space and BCI not forced. */ ++ ++ if (dwRequestSize > lpcmDMAManager->MaxKickoffSize) { ++ DRM_INFO("too big DMA buffer request!!!\n"); ++ via_chrome9ke_assert(0); ++ *lpcmGetSpaceData->pCmdData = 0; ++ return; ++ } ++ ++ if (dwUnFlushed + dwRequestSize > lpcmDMAManager->MaxKickoffSize) ++ kickoff_dma_ring_inv(dev); ++ ++ BufStart = ++ (u32)((unsigned int) lpcmDMAManager->pBeg) - AGPBufLinearBase + ++ AGPBufPhysicalBase; ++ BufEnd = (u32)((unsigned int) lpcmDMAManager->pEnd) - AGPBufLinearBase + ++ AGPBufPhysicalBase; ++ dwRequestSize = lpcmGetSpaceData->dwRequestSize << 2; ++ NextSW = (u32) ((unsigned int) lpcmDMAManager->pFree) + dwRequestSize + ++ INV_CMDBUF_THRESHOLD * 8 - AGPBufLinearBase + ++ AGPBufPhysicalBase; ++ ++ CurSW = (u32)((unsigned int) lpcmDMAManager->pFree) - AGPBufLinearBase + ++ AGPBufPhysicalBase; ++ CurHW = GetMMIORegister(dev_priv->mmio->handle, INV_RB_AGPCMD_CURRADDR); ++ ++ if (NextSW >= BufEnd) { ++ kickoff_dma_ring_inv(dev); ++ CurSW = (u32) ((unsigned int) lpcmDMAManager->pFree) - ++ AGPBufLinearBase + AGPBufPhysicalBase; ++ /* make sure the last rewind is completed */ ++ CurHW = GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ while (CurHW > CurSW) ++ CurHW = GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ /* Sometime the value read from HW is unreliable, ++ so need double confirm. */ ++ CurHW = GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ while (CurHW > CurSW) ++ CurHW = GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ BoundaryCheck = ++ BufStart + dwRequestSize + INV_QW_PAUSE_ALIGN * 16; ++ if (BoundaryCheck >= BufEnd) ++ /* If an empty command buffer can't hold ++ the request data. */ ++ via_chrome9ke_assert(0); ++ else { ++ /* We need to guarntee the new commands have no chance ++ to override the unexected commands or wait until there ++ is no unexecuted commands in agp buffer */ ++ if (CurSW <= BoundaryCheck) { ++ CurHW = GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ while (CurHW < CurSW) ++ CurHW = GetMMIORegister( ++ dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ /*Sometime the value read from HW is unreliable, ++ so need double confirm. */ ++ CurHW = GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ while (CurHW < CurSW) { ++ CurHW = GetMMIORegister( ++ dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ } ++ RewindRingAGP_inv(dev); ++ CurSW = (u32) ((unsigned long *) ++ lpcmDMAManager->pFree) - ++ AGPBufLinearBase + AGPBufPhysicalBase; ++ CurHW = GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ /* Waiting until hw pointer jump to start ++ and hw pointer will */ ++ /* equal to sw pointer */ ++ while (CurHW != CurSW) { ++ CurHW = GetMMIORegister( ++ dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ } ++ } else { ++ CurHW = GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ ++ while (CurHW <= BoundaryCheck) { ++ CurHW = GetMMIORegister( ++ dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ } ++ CurHW = GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ /* Sometime the value read from HW is ++ unreliable, so need double confirm. */ ++ while (CurHW <= BoundaryCheck) { ++ CurHW = GetMMIORegister( ++ dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ } ++ RewindRingAGP_inv(dev); ++ } ++ } ++ } else { ++ /* no need to rewind Ensure unexecuted agp commands will ++ not be override by new ++ agp commands */ ++ CurSW = (u32) ((unsigned int) lpcmDMAManager->pFree) - ++ AGPBufLinearBase + AGPBufPhysicalBase; ++ CurHW = GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ ++ while ((CurHW > CurSW) && (CurHW <= NextSW)) ++ CurHW = GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ ++ /* Sometime the value read from HW is unreliable, ++ so need double confirm. */ ++ CurHW = GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ while ((CurHW > CurSW) && (CurHW <= NextSW)) ++ CurHW = GetMMIORegister(dev_priv->mmio->handle, ++ INV_RB_AGPCMD_CURRADDR); ++ } ++ /*return the space handle */ ++ *lpcmGetSpaceData->pCmdData = (unsigned int) lpcmDMAManager->pFree; ++} ++ ++void ++release_space_inv(struct drm_device *dev, ++ struct cmd_release_space *lpcmReleaseSpaceData) ++{ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ struct drm_via_chrome9_DMA_manager *lpcmDMAManager = ++ dev_priv->dma_manager; ++ unsigned int dwReleaseSize = lpcmReleaseSpaceData->dwReleaseSize; ++ int i = 0; ++ ++ lpcmDMAManager->pFree += dwReleaseSize; ++ ++ /* aligned address */ ++ while (((unsigned int) lpcmDMAManager->pFree) & 0xF) { ++ /* not in 4 unsigned ints (16 Bytes) align address, ++ insert NULL Commands */ ++ *lpcmDMAManager->pFree++ = NULL_COMMAND_INV[i & 0x3]; ++ i++; ++ } ++ ++ if ((dev_priv->chip_sub_index == CHIP_H5 || ++ dev_priv->chip_sub_index == CHIP_H6S2) && ++ (dev_priv->drm_agp_type == DRM_AGP_RING_BUFFER)) { ++ ADDCmdHeader2_INVI(lpcmDMAManager->pFree, INV_REG_CR_TRANS, ++ INV_ParaType_Dummy); ++ for (i = 0; i < NULLCOMMANDNUMBER; i++) ++ ADDCmdData_INVI(lpcmDMAManager->pFree, 0xCC000000); ++ } ++} ++ ++int ++via_chrome9_ioctl_flush(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_via_chrome9_flush *dma_info = data; ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ int ret = 0; ++ int result = 0; ++ struct cmd_get_space getspace; ++ struct cmd_release_space releasespace; ++ unsigned long *pCmdData = NULL; ++ ++ switch (dma_info->dma_cmd_type) { ++ /* Copy DMA buffer to BCI command buffer */ ++ case flush_bci: ++ case flush_bci_and_wait: ++ if (dma_info->cmd_size <= 0) ++ return 0; ++ if (dma_info->cmd_size > MAX_BCI_BUFFER_SIZE) { ++ DRM_INFO("too big BCI space request!!!\n"); ++ return 0; ++ } ++ ++ kickoff_bci_inv(dev, dma_info); ++ waitchipidle_inv(dev_priv); ++ break; ++ /* Use DRM DMA buffer manager to kick off DMA directly */ ++ case dma_kickoff: ++ break; ++ ++ /* Copy user mode DMA buffer to kernel DMA buffer, ++ then kick off DMA */ ++ case flush_dma_buffer: ++ case flush_dma_and_wait: ++ if (dma_info->cmd_size <= 0) ++ return 0; ++ ++ getspace.dwRequestSize = dma_info->cmd_size; ++ if ((dev_priv->chip_sub_index == CHIP_H5 || ++ dev_priv->chip_sub_index == CHIP_H6S2) && ++ (dev_priv->drm_agp_type == DRM_AGP_RING_BUFFER)) ++ getspace.dwRequestSize += (NULLCOMMANDNUMBER + 4); ++ /*henry:Patch for VT3293 agp ring buffer stability */ ++ getspace.pCmdData = (unsigned int *) &pCmdData; ++ ++ if (dev_priv->drm_agp_type == DRM_AGP_DOUBLE_BUFFER) ++ get_space_db_inv(dev, &getspace); ++ else if (dev_priv->drm_agp_type == DRM_AGP_RING_BUFFER) ++ get_space_ring_inv(dev, &getspace); ++ if (pCmdData) { ++ /*copy data from userspace to kernel-dma-agp buffer */ ++ result = copy_from_user((int *) ++ pCmdData, ++ dma_info->usermode_dma_buf, ++ dma_info->cmd_size << 2); ++ if (result) { ++ DRM_ERROR("In function via_chrome9_ioctl_flush,\ ++ copy_from_user is fault. \n"); ++ return -EINVAL; ++ } ++ ++#if VIA_CHROME9_VERIFY_ENABLE ++ result = via_chrome9_verify_command_stream( ++ (const uint32_t *)pCmdData, dma_info->cmd_size << 2, ++ dev, dev_priv->chip_sub_index == CHIP_H6S2 ? 0 : 1); ++ if (result) { ++ DRM_ERROR("The user command has security issue.\n"); ++ return -EINVAL; ++ } ++#endif ++ ++ releasespace.dwReleaseSize = dma_info->cmd_size; ++ release_space_inv(dev, &releasespace); ++ if (dev_priv->drm_agp_type == DRM_AGP_DOUBLE_BUFFER) ++ kickoff_dma_db_inv(dev); ++ else if (dev_priv->drm_agp_type == DRM_AGP_RING_BUFFER) ++ kickoff_dma_ring_inv(dev); ++ ++ if (dma_info->dma_cmd_type == flush_dma_and_wait) ++ waitchipidle_inv(dev_priv); ++ } else { ++ DRM_INFO("No enough DMA space"); ++ ret = -ENOMEM; ++ } ++ break; ++ ++ default: ++ DRM_INFO("Invalid DMA buffer type"); ++ ret = -EINVAL; ++ break; ++ } ++ return ret; ++} ++ ++int ++via_chrome9_ioctl_free(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ return 0; ++} ++ ++int ++via_chrome9_ioctl_wait_chip_idle(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ ++ waitchipidle_inv(dev_priv); ++ /* maybe_bug here, do we always return 0 */ ++ return 0; ++} ++ ++int ++via_chrome9_ioctl_flush_cache(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ return 0; ++} +diff -Naur linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_dma.h linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_dma.h +--- linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_dma.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_dma.h 2008-12-03 03:08:32.000000000 +0100 +@@ -0,0 +1,69 @@ ++/* ++ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved. ++ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person ++ * obtaining a copy of this software and associated documentation ++ * files (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, ++ * copy, modify, merge, publish, distribute, sub license, ++ * and/or sell copies of the Software, and to permit persons to ++ * whom the Software is furnished to do so, subject to the ++ * following conditions: ++ * ++ * The above copyright notice and this permission notice ++ * (including the next paragraph) shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES ++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NON-INFRINGEMENT. IN NO EVENT SHALL VIA, S3 GRAPHICS, AND/OR ++ * ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR ++ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#ifndef _VIA_CHROME9_DMA_H_ ++#define _VIA_CHROME9_DMA_H_ ++ ++#define MAX_BCI_BUFFER_SIZE (16 * 1024 * 1024) ++ ++enum cmd_request_type { ++ CM_REQUEST_BCI, ++ CM_REQUEST_DMA, ++ CM_REQUEST_RB, ++ CM_REQUEST_RB_FORCED_DMA, ++ CM_REQUEST_NOTAVAILABLE ++}; ++ ++struct cmd_get_space { ++ unsigned int dwRequestSize; ++ enum cmd_request_type hint; ++ __volatile__ unsigned int *pCmdData; ++}; ++ ++struct cmd_release_space { ++ unsigned int dwReleaseSize; ++}; ++ ++extern int via_chrome9_hw_init(struct drm_device *dev, ++ struct drm_via_chrome9_init *init); ++extern int via_chrome9_ioctl_flush(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++extern int via_chrome9_ioctl_free(struct drm_device *dev, void *data, ++ struct drm_file *file_prev); ++extern int via_chrome9_ioctl_wait_chip_idle(struct drm_device *dev, ++ void *data, struct drm_file *file_priv); ++extern int via_chrome9_ioctl_flush_cache(struct drm_device *dev, ++ void *data, struct drm_file *file_priv); ++extern int via_chrome9_ioctl_flush(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++extern int via_chrome9_ioctl_free(struct drm_device *dev, void *data, ++ struct drm_file *file_priv); ++extern unsigned int ProtectSizeValue(unsigned int size); ++extern void SetAGPDoubleCmd_inv(struct drm_device *dev); ++extern void SetAGPRingCmdRegs_inv(struct drm_device *dev); ++extern void via_chrome9_dma_init_inv(struct drm_device *dev); ++ ++#endif +diff -Naur linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_drm.c linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_drm.c +--- linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_drm.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_drm.c 2008-12-03 03:08:32.000000000 +0100 +@@ -0,0 +1,950 @@ ++/* ++ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved. ++ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person ++ * obtaining a copy of this software and associated documentation ++ * files (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, ++ * copy, modify, merge, publish, distribute, sub license, ++ * and/or sell copies of the Software, and to permit persons to ++ * whom the Software is furnished to do so, subject to the ++ * following conditions: ++ * ++ * The above copyright notice and this permission notice ++ * (including the next paragraph) shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES ++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NON-INFRINGEMENT. IN NO EVENT SHALL VIA, S3 GRAPHICS, AND/OR ++ * ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR ++ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#include "drmP.h" ++#include "via_chrome9_drm.h" ++#include "via_chrome9_drv.h" ++#include "via_chrome9_mm.h" ++#include "via_chrome9_dma.h" ++#include "via_chrome9_3d_reg.h" ++ ++#define VIA_CHROME9DRM_VIDEO_STARTADDRESS_ALIGNMENT 10 ++ ++void *via_chrome9_dev_v4l; ++void *via_chrome9_filepriv_v4l; ++ ++void __via_chrome9ke_udelay(unsigned long usecs) ++{ ++ unsigned long start; ++ unsigned long stop; ++ unsigned long period; ++ unsigned long wait_period; ++ struct timespec tval; ++ ++#ifdef NDELAY_LIMIT ++#define UDELAY_LIMIT (NDELAY_LIMIT/1000) /* supposed to be 10 msec */ ++#else ++#define UDELAY_LIMIT (10000) /* 10 msec */ ++#endif ++ ++ if (usecs > UDELAY_LIMIT) { ++ start = jiffies; ++ tval.tv_sec = usecs / 1000000; ++ tval.tv_nsec = (usecs - tval.tv_sec * 1000000) * 1000; ++ wait_period = timespec_to_jiffies(&tval); ++ do { ++ stop = jiffies; ++ ++ if (stop < start) ++ period = ((unsigned long)-1 - start) + stop + 1; ++ else ++ period = stop - start; ++ ++ } while (period < wait_period); ++ } else ++ udelay(usecs); /* delay value might get checked once again */ ++} ++ ++int via_chrome9_ioctl_process_exit(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ return 0; ++} ++ ++int via_chrome9_ioctl_restore_primary(struct drm_device *dev, ++ void *data, struct drm_file *file_priv) ++{ ++ return 0; ++} ++ ++void Initialize3DEngine(struct drm_via_chrome9_private *dev_priv) ++{ ++ int i; ++ unsigned int StageOfTexture; ++ ++ if (dev_priv->chip_sub_index == CHIP_H5 || ++ dev_priv->chip_sub_index == CHIP_H5S1) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ 0x00010000); ++ ++ for (i = 0; i <= 0x8A; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (unsigned int) i << 24); ++ } ++ ++ /* Initial Texture Stage Setting*/ ++ for (StageOfTexture = 0; StageOfTexture < 0xf; ++ StageOfTexture++) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00020000 | 0x00000000 | ++ (StageOfTexture & 0xf)<<24)); ++ /* *((unsigned int volatile*)(pMapIOPort+HC_REG_TRANS_SET)) = ++ (0x00020000 | HC_ParaSubType_Tex0 | (StageOfTexture & ++ 0xf)<<24);*/ ++ for (i = 0 ; i <= 0x30 ; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, (unsigned int) i << 24); ++ } ++ } ++ ++ /* Initial Texture Sampler Setting*/ ++ for (StageOfTexture = 0; StageOfTexture < 0xf; ++ StageOfTexture++) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00020000 | 0x00020000 | ++ (StageOfTexture & 0xf)<<24)); ++ /* *((unsigned int volatile*)(pMapIOPort+ ++ HC_REG_TRANS_SET)) = (0x00020000 | 0x00020000 | ++ ( StageOfTexture & 0xf)<<24);*/ ++ for (i = 0 ; i <= 0x30 ; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, (unsigned int) i << 24); ++ } ++ } ++ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00020000 | 0xfe000000)); ++ /* *((unsigned int volatile*)(pMapIOPort+HC_REG_TRANS_SET)) = ++ (0x00020000 | HC_ParaSubType_TexGen);*/ ++ for (i = 0 ; i <= 0x13 ; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (unsigned int) i << 24); ++ /* *((unsigned int volatile*)(pMapIOPort+ ++ HC_REG_Hpara0)) = ((unsigned int) i << 24);*/ ++ } ++ ++ /* Initial Gamma Table Setting*/ ++ /* Initial Gamma Table Setting*/ ++ /* 5 + 4 = 9 (12) dwords*/ ++ /* sRGB texture is not directly support by H3 hardware. ++ We have to set the deGamma table for texture sampling.*/ ++ ++ /* degamma table*/ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00030000 | 0x15000000)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (0x40000000 | (30 << 20) | (15 << 10) | (5))); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ ((119 << 20) | (81 << 10) | (52))); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ ((283 << 20) | (219 << 10) | (165))); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ ((535 << 20) | (441 << 10) | (357))); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ ((119 << 20) | (884 << 20) | (757 << 10) | ++ (640))); ++ ++ /* gamma table*/ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00030000 | 0x17000000)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (0x40000000 | (13 << 20) | (13 << 10) | (13))); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (0x40000000 | (26 << 20) | (26 << 10) | (26))); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (0x40000000 | (39 << 20) | (39 << 10) | (39))); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ ((51 << 20) | (51 << 10) | (51))); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ ((71 << 20) | (71 << 10) | (71))); ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, (87 << 20) | (87 << 10) | (87)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (113 << 20) | (113 << 10) | (113)); ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, (135 << 20) | (135 << 10) | (135)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (170 << 20) | (170 << 10) | (170)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (199 << 20) | (199 << 10) | (199)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (246 << 20) | (246 << 10) | (246)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (284 << 20) | (284 << 10) | (284)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (317 << 20) | (317 << 10) | (317)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (347 << 20) | (347 << 10) | (347)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (373 << 20) | (373 << 10) | (373)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (398 << 20) | (398 << 10) | (398)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (442 << 20) | (442 << 10) | (442)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (481 << 20) | (481 << 10) | (481)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (517 << 20) | (517 << 10) | (517)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (550 << 20) | (550 << 10) | (550)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (609 << 20) | (609 << 10) | (609)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (662 << 20) | (662 << 10) | (662)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (709 << 20) | (709 << 10) | (709)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (753 << 20) | (753 << 10) | (753)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (794 << 20) | (794 << 10) | (794)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (832 << 20) | (832 << 10) | (832)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (868 << 20) | (868 << 10) | (868)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (902 << 20) | (902 << 10) | (902)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (934 << 20) | (934 << 10) | (934)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (966 << 20) | (966 << 10) | (966)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (996 << 20) | (996 << 10) | (996)); ++ ++ ++ /* ++ For Interrupt Restore only All types of write through ++ regsiters should be write header data to hardware at ++ least before it can restore. H/W will automatically ++ record the header to write through state buffer for ++ resture usage. ++ By Jaren: ++ HParaType = 8'h03, HParaSubType = 8'h00 ++ 8'h11 ++ 8'h12 ++ 8'h14 ++ 8'h15 ++ 8'h17 ++ HParaSubType 8'h12, 8'h15 is initialized. ++ [HWLimit] ++ 1. All these write through registers can't be partial ++ update. ++ 2. All these write through must be AGP command ++ 16 entries : 4 128-bit data */ ++ ++ /* Initialize INV_ParaSubType_TexPal */ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00030000 | 0x00000000)); ++ for (i = 0; i < 16; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x00000000); ++ } ++ ++ /* Initialize INV_ParaSubType_4X4Cof */ ++ /* 32 entries : 8 128-bit data */ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00030000 | 0x11000000)); ++ for (i = 0; i < 32; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x00000000); ++ } ++ ++ /* Initialize INV_ParaSubType_StipPal */ ++ /* 5 entries : 2 128-bit data */ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00030000 | 0x14000000)); ++ for (i = 0; i < (5+3); i++) { ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, 0x00000000); ++ } ++ ++ /* primitive setting & vertex format*/ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00040000 | 0x14000000)); ++ for (i = 0; i < 52; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, ((unsigned int) i << 24)); ++ } ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ 0x00fe0000); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x4000840f); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x47000400); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x44000000); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x46000000); ++ ++ /* setting Misconfig*/ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ 0x00fe0000); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x00001004); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x0800004b); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x0a000049); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x0b0000fb); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x0c000001); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x0d0000cb); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x0e000009); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x10000000); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x110000ff); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x12000000); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x130000db); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x14000000); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x15000000); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x16000000); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x17000000); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x18000000); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x19000000); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x20000000); ++ } else if (dev_priv->chip_sub_index == CHIP_H6S2) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ 0x00010000); ++ for (i = 0; i <= 0x9A; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (unsigned int) i << 24); ++ } ++ ++ /* Initial Texture Stage Setting*/ ++ for (StageOfTexture = 0; StageOfTexture <= 0xf; ++ StageOfTexture++) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00020000 | 0x00000000 | ++ (StageOfTexture & 0xf)<<24)); ++ for (i = 0 ; i <= 0x30 ; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, (unsigned int) i << 24); ++ } ++ } ++ ++ /* Initial Texture Sampler Setting*/ ++ for (StageOfTexture = 0; StageOfTexture <= 0xf; ++ StageOfTexture++) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00020000 | 0x20000000 | ++ (StageOfTexture & 0xf)<<24)); ++ for (i = 0 ; i <= 0x36 ; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, (unsigned int) i << 24); ++ } ++ } ++ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00020000 | 0xfe000000)); ++ for (i = 0 ; i <= 0x13 ; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (unsigned int) i << 24); ++ /* *((unsigned int volatile*)(pMapIOPort+ ++ HC_REG_Hpara0)) =((unsigned int) i << 24);*/ ++ } ++ ++ /* Initial Gamma Table Setting*/ ++ /* Initial Gamma Table Setting*/ ++ /* 5 + 4 = 9 (12) dwords*/ ++ /* sRGB texture is not directly support by ++ H3 hardware.*/ ++ /* We have to set the deGamma table for texture ++ sampling.*/ ++ ++ /* degamma table*/ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00030000 | 0x15000000)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (0x40000000 | (30 << 20) | (15 << 10) | (5))); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ ((119 << 20) | (81 << 10) | (52))); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ ((283 << 20) | (219 << 10) | (165))); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ ((535 << 20) | (441 << 10) | (357))); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ ((119 << 20) | (884 << 20) | (757 << 10) ++ | (640))); ++ ++ /* gamma table*/ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00030000 | 0x17000000)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (0x40000000 | (13 << 20) | (13 << 10) | (13))); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (0x40000000 | (26 << 20) | (26 << 10) | (26))); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (0x40000000 | (39 << 20) | (39 << 10) | (39))); ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, ((51 << 20) | (51 << 10) | (51))); ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, ((71 << 20) | (71 << 10) | (71))); ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, (87 << 20) | (87 << 10) | (87)); ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, (113 << 20) | (113 << 10) | (113)); ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, (135 << 20) | (135 << 10) | (135)); ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, (170 << 20) | (170 << 10) | (170)); ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, (199 << 20) | (199 << 10) | (199)); ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, (246 << 20) | (246 << 10) | (246)); ++ SetMMIORegister(dev_priv->mmio->handle, ++ 0x440, (284 << 20) | (284 << 10) | (284)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (317 << 20) | (317 << 10) | (317)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (347 << 20) | (347 << 10) | (347)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (373 << 20) | (373 << 10) | (373)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (398 << 20) | (398 << 10) | (398)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (442 << 20) | (442 << 10) | (442)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (481 << 20) | (481 << 10) | (481)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (517 << 20) | (517 << 10) | (517)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (550 << 20) | (550 << 10) | (550)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (609 << 20) | (609 << 10) | (609)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (662 << 20) | (662 << 10) | (662)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (709 << 20) | (709 << 10) | (709)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (753 << 20) | (753 << 10) | (753)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (794 << 20) | (794 << 10) | (794)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (832 << 20) | (832 << 10) | (832)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (868 << 20) | (868 << 10) | (868)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (902 << 20) | (902 << 10) | (902)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (934 << 20) | (934 << 10) | (934)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (966 << 20) | (966 << 10) | (966)); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ (996 << 20) | (996 << 10) | (996)); ++ ++ ++ /* For Interrupt Restore only ++ All types of write through regsiters should be write ++ header data to hardware at least before it can restore. ++ H/W will automatically record the header to write ++ through state buffer for restureusage. ++ By Jaren: ++ HParaType = 8'h03, HParaSubType = 8'h00 ++ 8'h11 ++ 8'h12 ++ 8'h14 ++ 8'h15 ++ 8'h17 ++ HParaSubType 8'h12, 8'h15 is initialized. ++ [HWLimit] ++ 1. All these write through registers can't be partial ++ update. ++ 2. All these write through must be AGP command ++ 16 entries : 4 128-bit data */ ++ ++ /* Initialize INV_ParaSubType_TexPal */ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00030000 | 0x00000000)); ++ for (i = 0; i < 16; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x00000000); ++ } ++ ++ /* Initialize INV_ParaSubType_4X4Cof */ ++ /* 32 entries : 8 128-bit data */ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00030000 | 0x11000000)); ++ for (i = 0; i < 32; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x00000000); ++ } ++ ++ /* Initialize INV_ParaSubType_StipPal */ ++ /* 5 entries : 2 128-bit data */ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00030000 | 0x14000000)); ++ for (i = 0; i < (5+3); i++) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x00000000); ++ } ++ ++ /* primitive setting & vertex format*/ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00040000)); ++ for (i = 0; i <= 0x62; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ ((unsigned int) i << 24)); ++ } ++ ++ /*ParaType 0xFE - Configure and Misc Setting*/ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00fe0000)); ++ for (i = 0; i <= 0x47; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ ((unsigned int) i << 24)); ++ } ++ /*ParaType 0x11 - Frame Buffer Auto-Swapping and ++ Command Regulator Misc*/ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ (0x00110000)); ++ for (i = 0; i <= 0x20; i++) { ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ ((unsigned int) i << 24)); ++ } ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ 0x00fe0000); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x4000840f); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x47000404); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x44000000); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x46000005); ++ ++ /* setting Misconfig*/ ++ SetMMIORegister(dev_priv->mmio->handle, 0x43C, ++ 0x00fe0000); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x00001004); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x08000249); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x0a0002c9); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x0b0002fb); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x0c000000); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x0d0002cb); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x0e000009); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x10000049); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x110002ff); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x12000008); ++ SetMMIORegister(dev_priv->mmio->handle, 0x440, ++ 0x130002db); ++ } ++} ++ ++int via_chrome9_drm_resume(struct pci_dev *pci) ++{ ++ struct drm_device *dev = (struct drm_device *)pci_get_drvdata(pci); ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *)dev->dev_private; ++ ++ if (!dev_priv->initialized) ++ return 0; ++ ++ Initialize3DEngine(dev_priv); ++ ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_TRANS, 0x00110000); ++ if (dev_priv->chip_sub_index == CHIP_H6S2) { ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, ++ 0x06000000); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, ++ 0x07100000); ++ } else{ ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, ++ 0x02000000); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, ++ 0x03100000); ++ } ++ ++ ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_TRANS, ++ INV_ParaType_PreCR); ++ SetMMIORegister(dev_priv->mmio->handle, INV_REG_CR_BEGIN, ++ INV_SubA_HSetRBGID | INV_HSetRBGID_CR); ++ ++ if (dev_priv->chip_sub_index == CHIP_H6S2) { ++ unsigned int i; ++ /* Here restore SR66~SR6F SR79~SR7B */ ++ for (i = 0; i < 10; i++) { ++ SetMMIORegisterU8(dev_priv->mmio->handle, ++ 0x83c4, 0x66 + i); ++ SetMMIORegisterU8(dev_priv->mmio->handle, ++ 0x83c5, dev_priv->gti_backup[i]); ++ } ++ ++ for (i = 0; i < 3; i++) { ++ SetMMIORegisterU8(dev_priv->mmio->handle, ++ 0x83c4, 0x79 + i); ++ SetMMIORegisterU8(dev_priv->mmio->handle, ++ 0x83c5, dev_priv->gti_backup[10 + i]); ++ } ++ } ++ ++ via_chrome9_dma_init_inv(dev); ++ ++ return 0; ++} ++ ++int via_chrome9_drm_suspend(struct pci_dev *pci, ++ pm_message_t state) ++{ ++ int i; ++ struct drm_device *dev = (struct drm_device *)pci_get_drvdata(pci); ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *)dev->dev_private; ++ ++ if (!dev_priv->initialized) ++ return 0; ++ ++ if (dev_priv->chip_sub_index != CHIP_H6S2) ++ return 0; ++ ++ /* Save registers from SR66~SR6F */ ++ for (i = 0; i < 10; i++) { ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x66 + i); ++ dev_priv->gti_backup[i] = ++ GetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5); ++ } ++ ++ /* Save registers from SR79~SR7B */ ++ for (i = 0; i < 3; i++) { ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x79 + i); ++ dev_priv->gti_backup[10 + i] = ++ GetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5); ++ } ++ ++ return 0; ++} ++ ++int via_chrome9_driver_load(struct drm_device *dev, ++ unsigned long chipset) ++{ ++ struct drm_via_chrome9_private *dev_priv; ++ int ret = 0; ++ static int associate; ++ ++ if (!associate) { ++ pci_set_drvdata(dev->pdev, dev); ++ dev->pdev->driver = &dev->driver->pci_driver; ++ associate = 1; ++ } ++ ++ dev->counters += 4; ++ dev->types[6] = _DRM_STAT_IRQ; ++ dev->types[7] = _DRM_STAT_PRIMARY; ++ dev->types[8] = _DRM_STAT_SECONDARY; ++ dev->types[9] = _DRM_STAT_DMA; ++ ++ dev_priv = drm_calloc(1, sizeof(struct drm_via_chrome9_private), ++ DRM_MEM_DRIVER); ++ if (dev_priv == NULL) ++ return -ENOMEM; ++ ++ /* Clear */ ++ memset(dev_priv, 0, sizeof(struct drm_via_chrome9_private)); ++ ++ dev_priv->dev = dev; ++ dev->dev_private = (void *)dev_priv; ++ ++ dev_priv->chip_index = chipset; ++ ++ ret = drm_sman_init(&dev_priv->sman, 2, 12, 8); ++ if (ret) ++ drm_free(dev_priv, sizeof(*dev_priv), DRM_MEM_DRIVER); ++ return ret; ++} ++ ++int via_chrome9_driver_unload(struct drm_device *dev) ++{ ++ struct drm_via_chrome9_private *dev_priv = dev->dev_private; ++ ++ drm_sman_takedown(&dev_priv->sman); ++ ++ drm_free(dev_priv, sizeof(struct drm_via_chrome9_private), ++ DRM_MEM_DRIVER); ++ ++ dev->dev_private = 0; ++ ++ return 0; ++} ++ ++static int via_chrome9_initialize(struct drm_device *dev, ++ struct drm_via_chrome9_init *init) ++{ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *)dev->dev_private; ++ ++ dev_priv->chip_agp = init->chip_agp; ++ dev_priv->chip_index = init->chip_index; ++ dev_priv->chip_sub_index = init->chip_sub_index; ++ ++ dev_priv->usec_timeout = init->usec_timeout; ++ dev_priv->front_offset = init->front_offset; ++ dev_priv->back_offset = init->back_offset >> ++ VIA_CHROME9DRM_VIDEO_STARTADDRESS_ALIGNMENT << ++ VIA_CHROME9DRM_VIDEO_STARTADDRESS_ALIGNMENT; ++ dev_priv->available_fb_size = init->available_fb_size - ++ (init->available_fb_size % ++ (1 << VIA_CHROME9DRM_VIDEO_STARTADDRESS_ALIGNMENT)); ++ dev_priv->depth_offset = init->depth_offset; ++ ++ /* Find all the map added first, doing this is necessary to ++ intialize hw */ ++ if (via_chrome9_map_init(dev, init)) { ++ DRM_ERROR("function via_chrome9_map_init ERROR !\n"); ++ goto error; ++ } ++ ++ /* Necessary information has been gathered for initialize hw */ ++ if (via_chrome9_hw_init(dev, init)) { ++ DRM_ERROR("function via_chrome9_hw_init ERROR !\n"); ++ goto error; ++ } ++ ++ /* After hw intialization, we have kown whether to use agp ++ or to use pcie for texture */ ++ if (via_chrome9_heap_management_init(dev, init)) { ++ DRM_ERROR("function \ ++ via_chrome9_heap_management_init ERROR !\n"); ++ goto error; ++ } ++ ++ dev_priv->initialized = 1; ++ ++ return 0; ++ ++error: ++ /* all the error recover has been processed in relevant function, ++ so here just return error */ ++ return -EINVAL; ++} ++ ++static void via_chrome9_cleanup(struct drm_device *dev, ++ struct drm_via_chrome9_init *init) ++{ ++ struct drm_via_chrome9_DMA_manager *lpcmDMAManager = NULL; ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *)dev->dev_private; ++ DRM_DEBUG("function via_chrome9_cleanup run!\n"); ++ ++ if (!dev_priv) ++ return ; ++ ++ lpcmDMAManager = ++ (struct drm_via_chrome9_DMA_manager *)dev_priv->dma_manager; ++ if (dev_priv->pcie_vmalloc_nocache) { ++ vfree((void *)dev_priv->pcie_vmalloc_nocache); ++ dev_priv->pcie_vmalloc_nocache = 0; ++ if (lpcmDMAManager) ++ lpcmDMAManager->addr_linear = NULL; ++ } ++ ++ if (dev_priv->pagetable_map.pagetable_handle) { ++ iounmap(dev_priv->pagetable_map.pagetable_handle); ++ dev_priv->pagetable_map.pagetable_handle = NULL; ++ } ++ ++ if (lpcmDMAManager && lpcmDMAManager->addr_linear) { ++ iounmap(lpcmDMAManager->addr_linear); ++ lpcmDMAManager->addr_linear = NULL; ++ } ++ ++ kfree(lpcmDMAManager); ++ dev_priv->dma_manager = NULL; ++ ++ if (dev_priv->event_tag_info) { ++ vfree(dev_priv->event_tag_info); ++ dev_priv->event_tag_info = NULL; ++ } ++ ++ if (dev_priv->bci_buffer) { ++ vfree(dev_priv->bci_buffer); ++ dev_priv->bci_buffer = NULL; ++ } ++ ++ via_chrome9_memory_destroy_heap(dev, dev_priv); ++ ++ /* After cleanup, it should to set the value to null */ ++ dev_priv->sarea = dev_priv->mmio = dev_priv->hostBlt = ++ dev_priv->fb = dev_priv->front = dev_priv->back = ++ dev_priv->depth = dev_priv->agp_tex = ++ dev_priv->shadow_map.shadow = 0; ++ dev_priv->sarea_priv = 0; ++ dev_priv->initialized = 0; ++} ++ ++/* ++Do almost everything intialize here,include: ++1.intialize all addmaps in private data structure ++2.intialize memory heap management for video agp/pcie ++3.intialize hw for dma(pcie/agp) function ++ ++Note:all this function will dispatch into relevant function ++*/ ++int via_chrome9_ioctl_init(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_via_chrome9_init *init = (struct drm_via_chrome9_init *)data; ++ ++ switch (init->func) { ++ case VIA_CHROME9_INIT: ++ if (via_chrome9_initialize(dev, init)) { ++ DRM_ERROR("function via_chrome9_initialize error\n"); ++ return -1; ++ } ++ via_chrome9_filepriv_v4l = (void *)file_priv; ++ via_chrome9_dev_v4l = (void *)dev; ++ break; ++ ++ case VIA_CHROME9_CLEANUP: ++ via_chrome9_cleanup(dev, init); ++ via_chrome9_filepriv_v4l = 0; ++ via_chrome9_dev_v4l = 0; ++ break; ++ ++ default: ++ return -1; ++ } ++ ++ return 0; ++} ++ ++int via_chrome9_ioctl_allocate_event_tag(struct drm_device *dev, ++ void *data, struct drm_file *file_priv) ++{ ++ struct drm_via_chrome9_event_tag *event_tag = data; ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *)dev->dev_private; ++ struct drm_clb_event_tag_info *event_tag_info = ++ dev_priv->event_tag_info; ++ unsigned int *event_addr = 0, i = 0; ++ ++ for (i = 0; i < NUMBER_OF_EVENT_TAGS; i++) { ++ if (!event_tag_info->usage[i]) ++ break; ++ } ++ ++ if (i < NUMBER_OF_EVENT_TAGS) { ++ event_tag_info->usage[i] = 1; ++ event_tag->event_offset = i; ++ event_tag->last_sent_event_value.event_low = 0; ++ event_tag->current_event_value.event_low = 0; ++ event_addr = event_tag_info->linear_address + ++ event_tag->event_offset * 4; ++ *event_addr = 0; ++ return 0; ++ } else { ++ return -7; ++ } ++ ++ return 0; ++} ++ ++int via_chrome9_ioctl_free_event_tag(struct drm_device *dev, ++ void *data, struct drm_file *file_priv) ++{ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *)dev->dev_private; ++ struct drm_clb_event_tag_info *event_tag_info = ++ dev_priv->event_tag_info; ++ struct drm_via_chrome9_event_tag *event_tag = data; ++ ++ event_tag_info->usage[event_tag->event_offset] = 0; ++ return 0; ++} ++ ++void via_chrome9_lastclose(struct drm_device *dev) ++{ ++ via_chrome9_cleanup(dev, 0); ++ return ; ++} ++ ++static int via_chrome9_do_wait_vblank(struct drm_via_chrome9_private ++ *dev_priv) ++{ ++ int i; ++ ++ for (i = 0; i < dev_priv->usec_timeout; i++) { ++ VIA_CHROME9_WRITE8(0x83d4, 0x34); ++ if ((VIA_CHROME9_READ8(0x83d5)) & 0x8) ++ return 0; ++ __via_chrome9ke_udelay(1); ++ } ++ ++ return -1; ++} ++ ++void via_chrome9_preclose(struct drm_device *dev, struct drm_file *file_priv) ++{ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ struct drm_via_chrome9_sarea *sarea_priv = NULL; ++ ++ if (!dev_priv) ++ return ; ++ ++ sarea_priv = dev_priv->sarea_priv; ++ if (!sarea_priv) ++ return ; ++ ++ if ((sarea_priv->page_flip == 1) && ++ (sarea_priv->current_page != VIA_CHROME9_FRONT)) { ++ __volatile__ unsigned long *bci_base; ++ if (via_chrome9_do_wait_vblank(dev_priv)) ++ return; ++ ++ bci_base = (__volatile__ unsigned long *)(dev_priv->bci); ++ ++ BCI_SET_STREAM_REGISTER(bci_base, 0x81c4, 0xc0000000); ++ BCI_SET_STREAM_REGISTER(bci_base, 0x81c0, ++ dev_priv->front_offset); ++ BCI_SEND(bci_base, 0x64000000);/* wait vsync */ ++ ++ sarea_priv->current_page = VIA_CHROME9_FRONT; ++ } ++} ++ ++int via_chrome9_is_agp(struct drm_device *dev) ++{ ++ /* filter out pcie group which has no AGP device */ ++ if (dev->pci_device == 0x1122 || dev->pci_device == 0x5122) { ++ dev->driver->driver_features &= ++ ~(DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_REQUIRE_AGP); ++ return 0; ++ } ++ return 1; ++} ++ +diff -Naur linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_drm.h linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_drm.h +--- linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_drm.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_drm.h 2008-12-03 03:08:32.000000000 +0100 +@@ -0,0 +1,443 @@ ++/* ++ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved. ++ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person ++ * obtaining a copy of this software and associated documentation ++ * files (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, ++ * copy, modify, merge, publish, distribute, sub license, ++ * and/or sell copies of the Software, and to permit persons to ++ * whom the Software is furnished to do so, subject to the ++ * following conditions: ++ * ++ * The above copyright notice and this permission notice ++ * (including the next paragraph) shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES ++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NON-INFRINGEMENT. IN NO EVENT SHALL VIA, S3 GRAPHICS, AND/OR ++ * ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR ++ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#ifndef _VIA_CHROME9_DRM_H_ ++#define _VIA_CHROME9_DRM_H_ ++ ++/* WARNING: These defines must be the same as what the Xserver uses. ++ * if you change them, you must change the defines in the Xserver. ++ */ ++ ++#ifndef _VIA_CHROME9_DEFINES_ ++#define _VIA_CHROME9_DEFINES_ ++ ++#ifndef __KERNEL__ ++#include "via_drmclient.h" ++#endif ++ ++#define VIA_CHROME9_NR_SAREA_CLIPRECTS 8 ++#define VIA_CHROME9_NR_XVMC_PORTS 10 ++#define VIA_CHROME9_NR_XVMC_LOCKS 5 ++#define VIA_CHROME9_MAX_CACHELINE_SIZE 64 ++#define XVMCLOCKPTR(saPriv,lockNo) \ ++ ((__volatile__ struct drm_hw_lock *) \ ++ (((((unsigned long) (saPriv)->XvMCLockArea) + \ ++ (VIA_CHROME9_MAX_CACHELINE_SIZE - 1)) & \ ++ ~(VIA_CHROME9_MAX_CACHELINE_SIZE - 1)) + \ ++ VIA_CHROME9_MAX_CACHELINE_SIZE*(lockNo))) ++ ++/* Each region is a minimum of 64k, and there are at most 64 of them. ++ */ ++#define VIA_CHROME9_NR_TEX_REGIONS 64 ++#define VIA_CHROME9_LOG_MIN_TEX_REGION_SIZE 16 ++#endif ++ ++#define VIA_CHROME9_UPLOAD_TEX0IMAGE 0x1 /* handled clientside */ ++#define VIA_CHROME9_UPLOAD_TEX1IMAGE 0x2 /* handled clientside */ ++#define VIA_CHROME9_UPLOAD_CTX 0x4 ++#define VIA_CHROME9_UPLOAD_BUFFERS 0x8 ++#define VIA_CHROME9_UPLOAD_TEX0 0x10 ++#define VIA_CHROME9_UPLOAD_TEX1 0x20 ++#define VIA_CHROME9_UPLOAD_CLIPRECTS 0x40 ++#define VIA_CHROME9_UPLOAD_ALL 0xff ++ ++/* VIA_CHROME9 specific ioctls */ ++#define DRM_VIA_CHROME9_ALLOCMEM 0x00 ++#define DRM_VIA_CHROME9_FREEMEM 0x01 ++#define DRM_VIA_CHROME9_FREE 0x02 ++#define DRM_VIA_CHROME9_ALLOCATE_EVENT_TAG 0x03 ++#define DRM_VIA_CHROME9_FREE_EVENT_TAG 0x04 ++#define DRM_VIA_CHROME9_ALLOCATE_APERTURE 0x05 ++#define DRM_VIA_CHROME9_FREE_APERTURE 0x06 ++#define DRM_VIA_CHROME9_ALLOCATE_VIDEO_MEM 0x07 ++#define DRM_VIA_CHROME9_FREE_VIDEO_MEM 0x08 ++#define DRM_VIA_CHROME9_WAIT_CHIP_IDLE 0x09 ++#define DRM_VIA_CHROME9_PROCESS_EXIT 0x0A ++#define DRM_VIA_CHROME9_RESTORE_PRIMARY 0x0B ++#define DRM_VIA_CHROME9_FLUSH_CACHE 0x0C ++#define DRM_VIA_CHROME9_INIT 0x0D ++#define DRM_VIA_CHROME9_FLUSH 0x0E ++#define DRM_VIA_CHROME9_CHECKVIDMEMSIZE 0x0F ++#define DRM_VIA_CHROME9_PCIEMEMCTRL 0x10 ++#define DRM_VIA_CHROME9_AUTH_MAGIC 0x11 ++#define DRM_VIA_CHROME9_GET_PCI_ID 0x12 ++#define DRM_VIA_CHROME9_INIT_JUDGE 0x16 ++#define DRM_VIA_CHROME9_DMA 0x17 ++ ++#define DRM_IOCTL_VIA_CHROME9_INIT \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_INIT, \ ++ struct drm_via_chrome9_init) ++#define DRM_IOCTL_VIA_CHROME9_FLUSH \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_FLUSH, \ ++ struct drm_via_chrome9_flush) ++#define DRM_IOCTL_VIA_CHROME9_FREE \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_FREE, int) ++#define DRM_IOCTL_VIA_CHROME9_ALLOCATE_EVENT_TAG \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_ALLOCATE_EVENT_TAG, \ ++ struct drm_event_via_chrome9_tag) ++#define DRM_IOCTL_VIA_CHROME9_FREE_EVENT_TAG \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_FREE_EVENT_TAG, \ ++ struct drm_event_via_chrome9_tag) ++#define DRM_IOCTL_VIA_CHROME9_ALLOCATE_APERTURE \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_ALLOCATE_APERTURE, \ ++ struct drm_via_chrome9_aperture) ++#define DRM_IOCTL_VIA_CHROME9_FREE_APERTURE \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_FREE_APERTURE, \ ++ struct drm_via_chrome9_aperture) ++#define DRM_IOCTL_VIA_CHROME9_ALLOCATE_VIDEO_MEM \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_ALLOCATE_VIDEO_MEM, \ ++ struct drm_via_chrome9_memory_alloc) ++#define DRM_IOCTL_VIA_CHROME9_FREE_VIDEO_MEM \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_FREE_VIDEO_MEM, \ ++ struct drm_via_chrome9_memory_alloc) ++#define DRM_IOCTL_VIA_CHROME9_WAIT_CHIP_IDLE \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_WAIT_CHIP_IDLE, int) ++#define DRM_IOCTL_VIA_CHROME9_PROCESS_EXIT \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_PROCESS_EXIT, int) ++#define DRM_IOCTL_VIA_CHROME9_RESTORE_PRIMARY \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_RESTORE_PRIMARY, int) ++#define DRM_IOCTL_VIA_CHROME9_FLUSH_CACHE \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_FLUSH_CACHE, int) ++#define DRM_IOCTL_VIA_CHROME9_ALLOCMEM \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_ALLOCMEM, int) ++#define DRM_IOCTL_VIA_CHROME9_FREEMEM \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_FREEMEM, int) ++#define DRM_IOCTL_VIA_CHROME9_CHECK_VIDMEM_SIZE \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_CHECKVIDMEMSIZE, \ ++ struct drm_via_chrome9_memory_alloc) ++#define DRM_IOCTL_VIA_CHROME9_PCIEMEMCTRL \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_PCIEMEMCTRL,\ ++ drm_via_chrome9_pciemem_ctrl_t) ++#define DRM_IOCTL_VIA_CHROME9_AUTH_MAGIC \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_AUTH_MAGIC, drm_auth_t) ++#define DRM_IOCTL_VIA_CHROME9_GET_PCI_ID \ ++ DRM_IOW(DRM_COMMAND_BASE + DRM_VIA_CHROME9_GET_PCI_ID, \ ++ struct get_pci_id_struct) ++#define DRM_IOCTL_VIA_CHROME9_INIT_JUDGE \ ++ DRM_IOR(DRM_COMMAND_BASE + DRM_VIA_CHROME9_INIT_JUDGE, int) ++#define DRM_IOCTL_VIA_CHROME9_DMA \ ++ DRM_IO(DRM_COMMAND_BASE + DRM_VIA_CHROME9_DMA, int) ++ ++enum S3GCHIPIDS { ++ CHIP_UNKNOWN = -1, ++ CHIP_CMODEL, /*Model for any chip. */ ++ CHIP_CLB, /*Columbia */ ++ CHIP_DST, /*Destination */ ++ CHIP_CSR, /*Castlerock */ ++ CHIP_INV, /*Innovation (H3) */ ++ CHIP_H5, /*Innovation (H5) */ ++ CHIP_H5S1, /*Innovation (H5S1) */ ++ CHIP_H6S2, /*Innovation (H6S2) */ ++ CHIP_CMS, /*Columbia MS */ ++ CHIP_METRO, /*Metropolis */ ++ CHIP_MANHATTAN, /*manhattan */ ++ CHIP_MATRIX, /*matrix */ ++ CHIP_EVO, /*change for GCC 4.1 -add- 07.02.12*/ ++ CHIP_H6S1, /*Innovation (H6S1)*/ ++ CHIP_DST2, /*Destination-2 */ ++ CHIP_LAST /*Maximum number of chips supported. */ ++}; ++ ++enum VIA_CHROME9CHIPBUS { ++ CHIP_PCI, ++ CHIP_AGP, ++ CHIP_PCIE ++}; ++ ++struct drm_via_chrome9_init { ++ enum { ++ VIA_CHROME9_INIT = 0x01, ++ VIA_CHROME9_CLEANUP = 0x02 ++ } func; ++ int chip_agp; ++ int chip_index; ++ int chip_sub_index; ++ int usec_timeout; ++ unsigned int sarea_priv_offset; ++ unsigned int fb_cpp; ++ unsigned int front_offset; ++ unsigned int back_offset; ++ unsigned int depth_offset; ++ unsigned int mmio_handle; ++ unsigned int dma_handle; ++ unsigned int fb_handle; ++ unsigned int front_handle; ++ unsigned int back_handle; ++ unsigned int depth_handle; ++ ++ unsigned int fb_tex_offset; ++ unsigned int fb_tex_size; ++ ++ unsigned int agp_tex_size; ++ unsigned int agp_tex_handle; ++ unsigned int shadow_size; ++ unsigned int shadow_handle; ++ unsigned int garttable_size; ++ unsigned int garttable_offset; ++ unsigned long available_fb_size; ++ unsigned long fb_base_address; ++ unsigned int DMA_size; ++ unsigned long DMA_phys_address; ++ enum { ++ AGP_RING_BUFFER, ++ AGP_DOUBLE_BUFFER, ++ AGP_DISABLED ++ } agp_type; ++ unsigned int hostBlt_handle; ++}; ++ ++enum dma_cmd_type { ++ flush_bci = 0, ++ flush_bci_and_wait, ++ dma_kickoff, ++ flush_dma_buffer, ++ flush_dma_and_wait ++}; ++ ++struct drm_via_chrome9_flush { ++ enum dma_cmd_type dma_cmd_type; ++ /* command buffer index */ ++ int cmd_idx; ++ /* command buffer offset */ ++ int cmd_offset; ++ /* command dword size,command always from beginning */ ++ int cmd_size; ++ /* if use dma kick off,it is dma kick off command */ ++ unsigned long dma_kickoff[2]; ++ /* user mode DMA buffer pointer */ ++ unsigned int *usermode_dma_buf; ++}; ++ ++struct event_value { ++ int event_low; ++ int event_high; ++}; ++ ++struct drm_via_chrome9_event_tag { ++ unsigned int event_size; /* event tag size */ ++ int event_offset; /* event tag id */ ++ struct event_value last_sent_event_value; ++ struct event_value current_event_value; ++ int query_mask0; ++ int query_mask1; ++ int query_Id1; ++}; ++ ++/* Indices into buf.Setup where various bits of state are mirrored per ++ * context and per buffer. These can be fired at the card as a unit, ++ * or in a piecewise fashion as required. ++ */ ++ ++#define VIA_CHROME9_TEX_SETUP_SIZE 8 ++ ++/* Flags for clear ioctl ++ */ ++#define VIA_CHROME9_FRONT 0x1 ++#define VIA_CHROME9_BACK 0x2 ++#define VIA_CHROME9_DEPTH 0x4 ++#define VIA_CHROME9_STENCIL 0x8 ++#define VIA_CHROME9_MEM_VIDEO 0 /* matches drm constant */ ++#define VIA_CHROME9_MEM_AGP 1 /* matches drm constant */ ++#define VIA_CHROME9_MEM_SYSTEM 2 ++#define VIA_CHROME9_MEM_MIXED 3 ++#define VIA_CHROME9_MEM_UNKNOWN 4 ++ ++struct drm_via_chrome9_agp { ++ uint32_t offset; ++ uint32_t size; ++}; ++ ++struct drm_via_chrome9_fb { ++ uint32_t offset; ++ uint32_t size; ++}; ++ ++struct drm_via_chrome9_mem { ++ uint32_t context; ++ uint32_t type; ++ uint32_t size; ++ unsigned long index; ++ unsigned long offset; ++}; ++ ++struct drm_via_chrome9_aperture { ++ /*IN: The frame buffer offset of the surface. */ ++ int surface_offset; ++ /*IN: Surface pitch in byte, */ ++ int pitch; ++ /*IN: Surface width in pixel */ ++ int width; ++ /*IN: Surface height in pixel */ ++ int height; ++ /*IN: Surface color format, Columbia has more color formats */ ++ int color_format; ++ /*IN: Rotation degrees, only for Columbia */ ++ int rotation_degree; ++ /*IN Is the PCIE Video, for MATRIX support NONLOCAL Aperture */ ++ int isPCIEVIDEO; ++ /*IN: Is the surface tilled, only for Columbia */ ++ int is_tiled; ++ /*IN: Only allocate apertur, not hardware setup. */ ++ int allocate_only; ++ /* OUT: linear address for aperture */ ++ unsigned int *aperture_linear_address; ++ /*OUT: The pitch of the aperture,for CPU write not for GE */ ++ int aperture_pitch; ++ /*OUT: The index of the aperture */ ++ int aperture_handle; ++ int apertureID; ++ /* always =0xAAAAAAAA */ ++ /* Aligned surface's width(in pixel) */ ++ int width_aligned; ++ /* Aligned surface's height(in pixel) */ ++ int height_aligned; ++}; ++ ++/* ++ Some fileds of this data structure has no meaning now since ++ we have managed heap based on mechanism provided by DRM ++ Remain what it was to keep consistent with 3D driver interface. ++*/ ++struct drm_via_chrome9_memory_alloc { ++ enum { ++ memory_heap_video = 0, ++ memory_heap_agp, ++ memory_heap_pcie_video, ++ memory_heap_pcie, ++ max_memory_heaps ++ } heap_type; ++ struct { ++ void *lpL1Node; ++ unsigned int alcL1Tag; ++ unsigned int usageCount; ++ unsigned int dwVersion; ++ unsigned int dwResHandle; ++ unsigned int dwProcessID; ++ } heap_info; ++ unsigned int flags; ++ unsigned int size; ++ unsigned int physaddress; ++ unsigned int offset; ++ unsigned int align; ++ void *linearaddress; ++}; ++ ++struct drm_via_chrome9_dma_init { ++ enum { ++ VIA_CHROME9_INIT_DMA = 0x01, ++ VIA_CHROME9_CLEANUP_DMA = 0x02, ++ VIA_CHROME9_DMA_INITIALIZED = 0x03 ++ } func; ++ ++ unsigned long offset; ++ unsigned long size; ++ unsigned long reg_pause_addr; ++}; ++ ++struct drm_via_chrome9_cmdbuffer { ++ char __user *buf; ++ unsigned long size; ++}; ++ ++/* Warning: If you change the SAREA structure you must change the Xserver ++ * structure as well */ ++ ++struct drm_via_chrome9_tex_region { ++ unsigned char next, prev; /* indices to form a circular LRU */ ++ unsigned char inUse; /* owned by a client, or free? */ ++ int age; /* tracked by clients to update local LRU's */ ++}; ++ ++struct drm_via_chrome9_sarea { ++ int page_flip; ++ int current_page; ++ unsigned int req_drawable;/* the X drawable id */ ++ unsigned int req_draw_buffer;/* VIA_CHROME9_FRONT or VIA_CHROME9_BACK */ ++ /* Last context that uploaded state */ ++ int ctx_owner; ++}; ++ ++struct drm_via_chrome9_cmdbuf_size { ++ enum { ++ VIA_CHROME9_CMDBUF_SPACE = 0x01, ++ VIA_CHROME9_CMDBUF_LAG = 0x02 ++ } func; ++ int wait; ++ uint32_t size; ++}; ++ ++struct drm_via_chrome9_DMA_manager { ++ unsigned int *addr_linear; ++ unsigned int DMASize; ++ unsigned int bDMAAgp; ++ unsigned int LastIssuedEventTag; ++ unsigned int *pBeg; ++ unsigned int *pInUseByHW; ++ unsigned int **ppInUseByHW; ++ unsigned int *pInUseBySW; ++ unsigned int *pFree; ++ unsigned int *pEnd; ++ ++ unsigned long pPhysical; ++ unsigned int MaxKickoffSize; ++}; ++ ++struct get_pci_id_struct { ++ unsigned int x; ++ unsigned int y; ++ unsigned int z; ++ unsigned int f; ++}; ++ ++ ++extern void *via_chrome9_dev_v4l; ++extern void *via_chrome9_filepriv_v4l; ++extern int via_chrome9_ioctl_wait_chip_idle(struct drm_device *dev, ++ void *data, struct drm_file *file_priv); ++extern int via_chrome9_ioctl_init(struct drm_device *dev, ++ void *data, struct drm_file *file_priv); ++extern int via_chrome9_ioctl_allocate_event_tag(struct drm_device ++ *dev, void *data, struct drm_file *file_priv); ++extern int via_chrome9_ioctl_free_event_tag(struct drm_device *dev, ++ void *data, struct drm_file *file_priv); ++extern int via_chrome9_driver_load(struct drm_device *dev, ++ unsigned long chipset); ++extern int via_chrome9_driver_unload(struct drm_device *dev); ++extern int via_chrome9_ioctl_process_exit(struct drm_device *dev, ++ void *data, struct drm_file *file_priv); ++extern int via_chrome9_ioctl_restore_primary(struct drm_device *dev, ++ void *data, struct drm_file *file_priv); ++extern int via_chrome9_drm_resume(struct pci_dev *dev); ++extern int via_chrome9_drm_suspend(struct pci_dev *dev, ++ pm_message_t state); ++extern void __via_chrome9ke_udelay(unsigned long usecs); ++extern void via_chrome9_lastclose(struct drm_device *dev); ++extern void via_chrome9_preclose(struct drm_device *dev, ++ struct drm_file *file_priv); ++extern int via_chrome9_is_agp(struct drm_device *dev); ++ ++ ++#endif /* _VIA_CHROME9_DRM_H_ */ +diff -Naur linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_drv.c linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_drv.c +--- linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_drv.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_drv.c 2009-05-27 14:49:50.635061021 +0200 +@@ -0,0 +1,218 @@ ++/* ++ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved. ++ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person ++ * obtaining a copy of this software and associated documentation ++ * files (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, ++ * copy, modify, merge, publish, distribute, sub license, ++ * and/or sell copies of the Software, and to permit persons to ++ * whom the Software is furnished to do so, subject to the ++ * following conditions: ++ * ++ * The above copyright notice and this permission notice ++ * (including the next paragraph) shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES ++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NON-INFRINGEMENT. IN NO EVENT SHALL VIA, S3 GRAPHICS, AND/OR ++ * ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR ++ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#include "drmP.h" ++#include "via_chrome9_drm.h" ++#include "via_chrome9_drv.h" ++#include "via_chrome9_dma.h" ++#include "via_chrome9_mm.h" ++#include "via_chrome9_3d_reg.h" ++ ++#define RING_BUFFER_INIT_FLAG 1 ++#define RING_BUFFER_CLEANUP_FLAG 2 ++ ++int via_chrome9_drm_authmagic(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ return 0; ++} ++ ++int via_chrome9_drm_get_pci_id(struct drm_device *dev, ++ void *data, struct drm_file *file_priv) ++{ ++ unsigned int *reg_val = data; ++ outl(0x8000002C, 0xCF8); ++ *reg_val = inl(0xCFC); ++ outl(0x8000012C, 0xCF8); ++ *(reg_val+1) = inl(0xCFC); ++ outl(0x8000022C, 0xCF8); ++ *(reg_val+2) = inl(0xCFC); ++ outl(0x8000052C, 0xCF8); ++ *(reg_val+3) = inl(0xCFC); ++ ++ return 0; ++} ++int via_chrome9_drm_judge(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ ++ if (dev_priv->initialized) ++ *(int *)data = 1; ++ else ++ *(int *)data = -1; ++ return 0; ++} ++ ++int via_chrome9_dma_init(struct drm_device *dev, void *data, ++ struct drm_file *file_priv) ++{ ++ int tmp; ++ unsigned char sr6c; ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *)dev->dev_private; ++ tmp = *((int *)data); ++ ++ switch (tmp) { ++ case RING_BUFFER_INIT_FLAG: ++ via_chrome9_dma_init_inv(dev); ++ break; ++ case RING_BUFFER_CLEANUP_FLAG: ++ if (dev_priv->chip_sub_index == CHIP_H6S2) { ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c4, 0x6c); ++ sr6c = GetMMIORegisterU8(dev_priv->mmio->handle, ++ 0x83c5); ++ sr6c &= 0x7F; ++ SetMMIORegisterU8(dev_priv->mmio->handle, 0x83c5, sr6c); ++ } ++ break; ++ } ++ return 0; ++} ++ ++ ++ ++struct drm_ioctl_desc via_chrome9_ioctls[] = { ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_INIT, via_chrome9_ioctl_init, ++ DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),/* via_chrome9_map.c*/ ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_FLUSH, via_chrome9_ioctl_flush, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_FREE, via_chrome9_ioctl_free, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_ALLOCATE_EVENT_TAG, ++ via_chrome9_ioctl_allocate_event_tag, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_FREE_EVENT_TAG, ++ via_chrome9_ioctl_free_event_tag, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_ALLOCATE_APERTURE, ++ via_chrome9_ioctl_allocate_aperture, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_FREE_APERTURE, ++ via_chrome9_ioctl_free_aperture, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_ALLOCATE_VIDEO_MEM, ++ via_chrome9_ioctl_allocate_mem_wrapper, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_FREE_VIDEO_MEM, ++ via_chrome9_ioctl_free_mem_wrapper, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_WAIT_CHIP_IDLE, ++ via_chrome9_ioctl_wait_chip_idle, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_PROCESS_EXIT, ++ via_chrome9_ioctl_process_exit, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_RESTORE_PRIMARY, ++ via_chrome9_ioctl_restore_primary, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_FLUSH_CACHE, ++ via_chrome9_ioctl_flush_cache, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_ALLOCMEM, ++ via_chrome9_ioctl_allocate_mem_base, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_FREEMEM, ++ via_chrome9_ioctl_freemem_base, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_CHECKVIDMEMSIZE, ++ via_chrome9_ioctl_check_vidmem_size, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_PCIEMEMCTRL, ++ via_chrome9_ioctl_pciemem_ctrl, DRM_AUTH), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_AUTH_MAGIC, via_chrome9_drm_authmagic, 0), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_GET_PCI_ID, ++ via_chrome9_drm_get_pci_id, 0), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_INIT_JUDGE, via_chrome9_drm_judge, 0), ++ DRM_IOCTL_DEF(DRM_VIA_CHROME9_DMA, via_chrome9_dma_init, 0) ++}; ++ ++int via_chrome9_max_ioctl = DRM_ARRAY_SIZE(via_chrome9_ioctls); ++ ++static struct pci_device_id pciidlist[] = { ++ {0x1106, 0x3225, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, ++ {0x1106, 0x3230, PCI_ANY_ID, PCI_ANY_ID, 0, 0, VIA_CHROME9_DX9_0}, ++ {0x1106, 0x3371, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, ++ {0x1106, 0x1122, PCI_ANY_ID, PCI_ANY_ID, 0, 0, VIA_CHROME9_PCIE_GROUP}, ++ {0x1106, 0x5122, PCI_ANY_ID, PCI_ANY_ID, 0, 0, VIA_CHROME9_PCIE_GROUP}, ++ {0, 0, 0} ++}; ++ ++int via_chrome9_driver_open(struct drm_device *dev, ++ struct drm_file *priv) ++{ ++ priv->authenticated = 1; ++ return 0; ++} ++ ++static struct drm_driver driver = { ++ .driver_features = DRIVER_USE_AGP | DRIVER_REQUIRE_AGP | ++ DRIVER_HAVE_DMA | DRIVER_FB_DMA | DRIVER_USE_MTRR, ++ .open = via_chrome9_driver_open, ++ .load = via_chrome9_driver_load, ++ .unload = via_chrome9_driver_unload, ++ .device_is_agp = via_chrome9_is_agp, ++ .reclaim_buffers = drm_core_reclaim_buffers, ++ .reclaim_buffers_locked = NULL, ++ .reclaim_buffers_idlelocked = via_chrome9_reclaim_buffers_locked, ++ .lastclose = via_chrome9_lastclose, ++ .preclose = via_chrome9_preclose, ++ .get_map_ofs = drm_core_get_map_ofs, ++ .get_reg_ofs = drm_core_get_reg_ofs, ++ .ioctls = via_chrome9_ioctls, ++ .fops = { ++ .owner = THIS_MODULE, ++ .open = drm_open, ++ .release = drm_release, ++ .ioctl = drm_ioctl, ++ .mmap = drm_mmap, ++ .poll = drm_poll, ++ .fasync = drm_fasync, ++ }, ++ .pci_driver = { ++ .name = DRIVER_NAME, ++ .id_table = pciidlist, ++ .resume = via_chrome9_drm_resume, ++ .suspend = via_chrome9_drm_suspend, ++ }, ++ ++ .name = DRIVER_NAME, ++ .desc = DRIVER_DESC, ++ .date = DRIVER_DATE, ++ .major = DRIVER_MAJOR, ++ .minor = DRIVER_MINOR, ++ .patchlevel = DRIVER_PATCHLEVEL, ++}; ++ ++static int __init via_chrome9_init(void) ++{ ++ driver.num_ioctls = via_chrome9_max_ioctl; ++#if VIA_CHROME9_VERIFY_ENABLE ++ via_chrome9_init_command_verifier(); ++ DRM_INFO("via_chrome9 verify function enabled. \n"); ++#endif ++ driver.dev_priv_size = sizeof(struct drm_via_chrome9_private); ++ return drm_init(&driver); ++} ++ ++static void __exit via_chrome9_exit(void) ++{ ++ drm_exit(&driver); ++} ++ ++module_init(via_chrome9_init); ++module_exit(via_chrome9_exit); ++ ++MODULE_AUTHOR(DRIVER_AUTHOR); ++MODULE_DESCRIPTION(DRIVER_DESC); ++MODULE_LICENSE("GPL and additional rights"); +diff -Naur linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_drv.h linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_drv.h +--- linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_drv.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_drv.h 2008-12-03 03:08:32.000000000 +0100 +@@ -0,0 +1,150 @@ ++/* ++ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved. ++ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person ++ * obtaining a copy of this software and associated documentation ++ * files (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, ++ * copy, modify, merge, publish, distribute, sub license, ++ * and/or sell copies of the Software, and to permit persons to ++ * whom the Software is furnished to do so, subject to the ++ * following conditions: ++ * ++ * The above copyright notice and this permission notice ++ * (including the next paragraph) shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES ++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NON-INFRINGEMENT. IN NO EVENT SHALL VIA, S3 GRAPHICS, AND/OR ++ * ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR ++ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#ifndef _VIA_CHROME9_DRV_H_ ++#define _VIA_CHROME9_DRV_H_ ++ ++#include "drm_sman.h" ++#include "via_chrome9_verifier.h" ++#define DRIVER_AUTHOR "Various" ++ ++#define DRIVER_NAME "via_chrome9" ++#define DRIVER_DESC "VIA_CHROME9 Unichrome / Pro" ++#define DRIVER_DATE "20080415" ++ ++#define DRIVER_MAJOR 2 ++#define DRIVER_MINOR 11 ++#define DRIVER_PATCHLEVEL 1 ++ ++#define via_chrome9_FIRE_BUF_SIZE 1024 ++#define via_chrome9_NUM_IRQS 4 ++ ++#define MAX_MEMORY_HEAPS 4 ++#define NUMBER_OF_APERTURES 32 ++ ++/*typedef struct drm_via_chrome9_shadow_map drm_via_chrome9_shadow_map_t;*/ ++struct drm_via_chrome9_shadow_map { ++ struct drm_map *shadow; ++ unsigned int shadow_size; ++ unsigned int *shadow_handle; ++}; ++ ++/*typedef struct drm_via_chrome9_pagetable_map ++ *drm_via_chrome9_pagetable_map_t; ++ */ ++struct drm_via_chrome9_pagetable_map { ++ unsigned int pagetable_offset; ++ unsigned int pagetable_size; ++ unsigned int *pagetable_handle; ++ unsigned int mmt_register; ++}; ++ ++/*typedef struct drm_via_chrome9_private drm_via_chrome9_private_t;*/ ++struct drm_via_chrome9_private { ++ int chip_agp; ++ int chip_index; ++ int chip_sub_index; ++ ++ unsigned long front_offset; ++ unsigned long back_offset; ++ unsigned long depth_offset; ++ unsigned long fb_base_address; ++ unsigned long available_fb_size; ++ int usec_timeout; ++ int max_apertures; ++ struct drm_sman sman; ++ unsigned int alignment; ++ /* bit[31]:0:indicate no alignment needed,1:indicate ++ alignment needed and size is bit[0:30]*/ ++ ++ struct drm_map *sarea; ++ struct drm_via_chrome9_sarea *sarea_priv; ++ ++ struct drm_map *mmio; ++ struct drm_map *hostBlt; ++ struct drm_map *fb; ++ struct drm_map *front; ++ struct drm_map *back; ++ struct drm_map *depth; ++ struct drm_map *agp_tex; ++ unsigned int agp_size; ++ unsigned int agp_offset; ++ ++ struct semaphore *drm_s3g_sem; ++ ++ struct drm_via_chrome9_shadow_map shadow_map; ++ struct drm_via_chrome9_pagetable_map pagetable_map; ++ ++ char *bci; ++ ++ int aperture_usage[NUMBER_OF_APERTURES]; ++ void *event_tag_info; ++ ++ /* DMA buffer manager */ ++ void *dma_manager; ++ /* Indicate agp/pcie heap initialization flag */ ++ int agp_initialized; ++ /* Indicate video heap initialization flag */ ++ int vram_initialized; ++ ++ unsigned long pcie_vmalloc_addr; ++ ++ /* pointer to device information */ ++ void *dev; ++ /* if agp init fail, go ahead and force dri use PCI*/ ++ enum { ++ DRM_AGP_RING_BUFFER, ++ DRM_AGP_DOUBLE_BUFFER, ++ DRM_AGP_DISABLED ++ } drm_agp_type; ++ /*end*/ ++#if VIA_CHROME9_VERIFY_ENABLE ++ struct drm_via_chrome9_state hc_state; ++#endif ++ unsigned long *bci_buffer; ++ unsigned long pcie_vmalloc_nocache; ++ unsigned char gti_backup[13]; ++ int initialized; ++ ++}; ++ ++ ++enum via_chrome9_family { ++ VIA_CHROME9_OTHER = 0, /* Baseline */ ++ VIA_CHROME9_PRO_GROUP_A,/* Another video engine and DMA commands */ ++ VIA_CHROME9_DX9_0, ++ VIA_CHROME9_PCIE_GROUP ++}; ++ ++/* VIA_CHROME9 MMIO register access */ ++#define VIA_CHROME9_BASE ((dev_priv->mmio)) ++ ++#define VIA_CHROME9_READ(reg) DRM_READ32(VIA_CHROME9_BASE, reg) ++#define VIA_CHROME9_WRITE(reg, val) DRM_WRITE32(VIA_CHROME9_BASE, reg, val) ++#define VIA_CHROME9_READ8(reg) DRM_READ8(VIA_CHROME9_BASE, reg) ++#define VIA_CHROME9_WRITE8(reg, val) DRM_WRITE8(VIA_CHROME9_BASE, reg, val) ++ ++#endif +diff -Naur linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_mm.c linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_mm.c +--- linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_mm.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_mm.c 2008-12-03 03:08:33.000000000 +0100 +@@ -0,0 +1,435 @@ ++/* ++ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved. ++ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person ++ * obtaining a copy of this software and associated documentation ++ * files (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, ++ * copy, modify, merge, publish, distribute, sub license, ++ * and/or sell copies of the Software, and to permit persons to ++ * whom the Software is furnished to do so, subject to the ++ * following conditions: ++ * ++ * The above copyright notice and this permission notice ++ * (including the next paragraph) shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES ++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NON-INFRINGEMENT. IN NO EVENT SHALL VIA, S3 GRAPHICS, AND/OR ++ * ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR ++ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++ ++#include "drmP.h" ++#include "via_chrome9_drm.h" ++#include "via_chrome9_drv.h" ++#include "drm_sman.h" ++#include "via_chrome9_mm.h" ++ ++#define VIA_CHROME9_MM_GRANULARITY 4 ++#define VIA_CHROME9_MM_GRANULARITY_MASK ((1 << VIA_CHROME9_MM_GRANULARITY) - 1) ++ ++ ++int via_chrome9_map_init(struct drm_device *dev, ++ struct drm_via_chrome9_init *init) ++{ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *)dev->dev_private; ++ ++ dev_priv->sarea = drm_getsarea(dev); ++ if (!dev_priv->sarea) { ++ DRM_ERROR("could not find sarea!\n"); ++ goto error; ++ } ++ dev_priv->sarea_priv = ++ (struct drm_via_chrome9_sarea *)((unsigned char *)dev_priv-> ++ sarea->handle + init->sarea_priv_offset); ++ ++ dev_priv->fb = drm_core_findmap(dev, init->fb_handle); ++ if (!dev_priv->fb) { ++ DRM_ERROR("could not find framebuffer!\n"); ++ goto error; ++ } ++ /* Frame buffer physical base address */ ++ dev_priv->fb_base_address = init->fb_base_address; ++ ++ if (init->shadow_size) { ++ /* find apg shadow region mappings */ ++ dev_priv->shadow_map.shadow = drm_core_findmap(dev, init-> ++ shadow_handle); ++ if (!dev_priv->shadow_map.shadow) { ++ DRM_ERROR("could not shadow map!\n"); ++ goto error; ++ } ++ dev_priv->shadow_map.shadow_size = init->shadow_size; ++ dev_priv->shadow_map.shadow_handle = (unsigned int *)dev_priv-> ++ shadow_map.shadow->handle; ++ init->shadow_handle = dev_priv->shadow_map.shadow->offset; ++ } ++ if (init->agp_tex_size && init->chip_agp != CHIP_PCIE) { ++ /* find apg texture buffer mappings */ ++ dev_priv->agp_tex = drm_core_findmap(dev, init->agp_tex_handle); ++ dev_priv->agp_size = init->agp_tex_size; ++ dev_priv->agp_offset = init->agp_tex_handle; ++ if (!dev_priv->agp_tex) { ++ DRM_ERROR("could not find agp texture map !\n"); ++ goto error; ++ } ++ } ++ /* find mmio/dma mappings */ ++ dev_priv->mmio = drm_core_findmap(dev, init->mmio_handle); ++ if (!dev_priv->mmio) { ++ DRM_ERROR("failed to find mmio region!\n"); ++ goto error; ++ } ++ ++ dev_priv->hostBlt = drm_core_findmap(dev, init->hostBlt_handle); ++ if (!dev_priv->hostBlt) { ++ DRM_ERROR("failed to find host bitblt region!\n"); ++ goto error; ++ } ++ ++ dev_priv->drm_agp_type = init->agp_type; ++ if (init->agp_type != AGP_DISABLED && init->chip_agp != CHIP_PCIE) { ++ dev->agp_buffer_map = drm_core_findmap(dev, init->dma_handle); ++ if (!dev->agp_buffer_map) { ++ DRM_ERROR("failed to find dma buffer region!\n"); ++ goto error; ++ } ++ } ++ ++ dev_priv->bci = (char *)dev_priv->mmio->handle + 0x10000; ++ ++ return 0; ++ ++error: ++ /* do cleanup here, refine_later */ ++ return -EINVAL; ++} ++ ++int via_chrome9_heap_management_init(struct drm_device *dev, ++ struct drm_via_chrome9_init *init) ++{ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ int ret = 0; ++ ++ /* video memory management. range: 0 ---- video_whole_size */ ++ mutex_lock(&dev->struct_mutex); ++ ret = drm_sman_set_range(&dev_priv->sman, VIA_CHROME9_MEM_VIDEO, ++ 0, dev_priv->available_fb_size >> VIA_CHROME9_MM_GRANULARITY); ++ if (ret) { ++ DRM_ERROR("VRAM memory manager initialization ******ERROR\ ++ !******\n"); ++ mutex_unlock(&dev->struct_mutex); ++ goto error; ++ } ++ dev_priv->vram_initialized = 1; ++ /* agp/pcie heap management. ++ note:because agp is contradict with pcie, so only one is enough ++ for managing both of them.*/ ++ init->agp_type = dev_priv->drm_agp_type; ++ if (init->agp_type != AGP_DISABLED && dev_priv->agp_size) { ++ ret = drm_sman_set_range(&dev_priv->sman, VIA_CHROME9_MEM_AGP, ++ 0, dev_priv->agp_size >> VIA_CHROME9_MM_GRANULARITY); ++ if (ret) { ++ DRM_ERROR("AGP/PCIE memory manager initialization ******ERROR\ ++ !******\n"); ++ mutex_unlock(&dev->struct_mutex); ++ goto error; ++ } ++ dev_priv->agp_initialized = 1; ++ } ++ mutex_unlock(&dev->struct_mutex); ++ return 0; ++ ++error: ++ /* Do error recover here, refine_later */ ++ return -EINVAL; ++} ++ ++ ++void via_chrome9_memory_destroy_heap(struct drm_device *dev, ++ struct drm_via_chrome9_private *dev_priv) ++{ ++ mutex_lock(&dev->struct_mutex); ++ drm_sman_cleanup(&dev_priv->sman); ++ dev_priv->vram_initialized = 0; ++ dev_priv->agp_initialized = 0; ++ mutex_unlock(&dev->struct_mutex); ++} ++ ++void via_chrome9_reclaim_buffers_locked(struct drm_device *dev, ++ struct drm_file *file_priv) ++{ ++ return; ++} ++ ++int via_chrome9_ioctl_allocate_aperture(struct drm_device *dev, ++ void *data, struct drm_file *file_priv) ++{ ++ return 0; ++} ++ ++int via_chrome9_ioctl_free_aperture(struct drm_device *dev, ++ void *data, struct drm_file *file_priv) ++{ ++ return 0; ++} ++ ++ ++/* Allocate memory from DRM module for video playing */ ++int via_chrome9_ioctl_allocate_mem_base(struct drm_device *dev, ++void *data, struct drm_file *file_priv) ++{ ++ struct drm_via_chrome9_mem *mem = data; ++ struct drm_memblock_item *item; ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ unsigned long tmpSize = 0, offset = 0, alignment = 0; ++ /* modify heap_type to agp for pcie, since we treat pcie/agp heap ++ no difference in heap management */ ++ if (mem->type == memory_heap_pcie) { ++ if (dev_priv->chip_agp != CHIP_PCIE) { ++ DRM_ERROR("User want to alloc memory from pcie heap \ ++ but via_chrome9.ko has no this heap exist.\n"); ++ return -EINVAL; ++ } ++ mem->type = memory_heap_agp; ++ } ++ ++ if (mem->type > VIA_CHROME9_MEM_AGP) { ++ DRM_ERROR("Unknown memory type allocation\n"); ++ return -EINVAL; ++ } ++ mutex_lock(&dev->struct_mutex); ++ if (0 == ((mem->type == VIA_CHROME9_MEM_VIDEO) ? ++ dev_priv->vram_initialized : dev_priv->agp_initialized)) { ++ DRM_ERROR("Attempt to allocate from uninitialized\ ++ memory manager.\n"); ++ mutex_unlock(&dev->struct_mutex); ++ return -EINVAL; ++ } ++ tmpSize = (mem->size + VIA_CHROME9_MM_GRANULARITY_MASK) >> ++ VIA_CHROME9_MM_GRANULARITY; ++ mem->size = tmpSize << VIA_CHROME9_MM_GRANULARITY; ++ alignment = (dev_priv->alignment & 0x80000000) ? dev_priv-> ++ alignment & 0x7FFFFFFF : 0; ++ alignment /= (1 << VIA_CHROME9_MM_GRANULARITY); ++ item = drm_sman_alloc(&dev_priv->sman, mem->type, tmpSize, alignment, ++ (unsigned long)file_priv); ++ mutex_unlock(&dev->struct_mutex); ++ /* alloc failed */ ++ if (!item) { ++ DRM_ERROR("Allocate memory failed ******ERROR******.\n"); ++ return -ENOMEM; ++ } ++ /* Till here every thing is ok, we check the memory type allocated ++ and return appropriate value to user mode Here the value return to ++ user is very difficult to operate. BE CAREFULLY!!! */ ++ /* offset is used by user mode ap to calculate the virtual address ++ which is used to access the memory allocated */ ++ mem->index = item->user_hash.key; ++ offset = item->mm->offset(item->mm, item->mm_info) << ++ VIA_CHROME9_MM_GRANULARITY; ++ switch (mem->type) { ++ case VIA_CHROME9_MEM_VIDEO: ++ mem->offset = offset + dev_priv->back_offset; ++ break; ++ case VIA_CHROME9_MEM_AGP: ++ /* return different value to user according to the chip type */ ++ if (dev_priv->chip_agp == CHIP_PCIE) { ++ mem->offset = offset + ++ ((struct drm_via_chrome9_DMA_manager *)dev_priv-> ++ dma_manager)->DMASize * sizeof(unsigned long); ++ } else { ++ mem->offset = offset; ++ } ++ break; ++ default: ++ /* Strange thing happen! Faint. Code bug! */ ++ DRM_ERROR("Enter here is impossible ******\ ++ ERROR******.\n"); ++ return -EINVAL; ++ } ++ /*DONE. Need we call function copy_to_user ?NO. We can't even ++ touch user's space.But we are lucky, since kernel drm:drm_ioctl ++ will to the job for us. */ ++ return 0; ++} ++ ++/* Allocate video/AGP/PCIE memory from heap management */ ++int via_chrome9_ioctl_allocate_mem_wrapper(struct drm_device ++ *dev, void *data, struct drm_file *file_priv) ++{ ++ struct drm_via_chrome9_memory_alloc *memory_alloc = ++ (struct drm_via_chrome9_memory_alloc *)data; ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ struct drm_via_chrome9_mem mem; ++ ++ mem.size = memory_alloc->size; ++ mem.type = memory_alloc->heap_type; ++ dev_priv->alignment = memory_alloc->align | 0x80000000; ++ if (via_chrome9_ioctl_allocate_mem_base(dev, &mem, file_priv)) { ++ DRM_ERROR("Allocate memory error!.\n"); ++ return -ENOMEM; ++ } ++ dev_priv->alignment = 0; ++ /* Till here every thing is ok, we check the memory type allocated and ++ return appropriate value to user mode Here the value return to user is ++ very difficult to operate. BE CAREFULLY!!!*/ ++ /* offset is used by user mode ap to calculate the virtual address ++ which is used to access the memory allocated */ ++ memory_alloc->offset = mem.offset; ++ memory_alloc->heap_info.lpL1Node = (void *)mem.index; ++ memory_alloc->size = mem.size; ++ switch (memory_alloc->heap_type) { ++ case VIA_CHROME9_MEM_VIDEO: ++ memory_alloc->physaddress = memory_alloc->offset + ++ dev_priv->fb_base_address; ++ memory_alloc->linearaddress = (void *)memory_alloc->physaddress; ++ break; ++ case VIA_CHROME9_MEM_AGP: ++ /* return different value to user according to the chip type */ ++ if (dev_priv->chip_agp == CHIP_PCIE) { ++ memory_alloc->physaddress = memory_alloc->offset; ++ memory_alloc->linearaddress = (void *)memory_alloc-> ++ physaddress; ++ } else { ++ memory_alloc->physaddress = dev->agp->base + ++ memory_alloc->offset + ++ ((struct drm_via_chrome9_DMA_manager *) ++ dev_priv->dma_manager)->DMASize * sizeof(unsigned long); ++ memory_alloc->linearaddress = ++ (void *)memory_alloc->physaddress; ++ } ++ break; ++ default: ++ /* Strange thing happen! Faint. Code bug! */ ++ DRM_ERROR("Enter here is impossible ******ERROR******.\n"); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++int via_chrome9_ioctl_free_mem_wrapper(struct drm_device *dev, ++ void *data, struct drm_file *file_priv) ++{ ++ struct drm_via_chrome9_memory_alloc *memory_alloc = data; ++ struct drm_via_chrome9_mem mem; ++ ++ mem.index = (unsigned long)memory_alloc->heap_info.lpL1Node; ++ if (via_chrome9_ioctl_freemem_base(dev, &mem, file_priv)) { ++ DRM_ERROR("function free_mem_wrapper error.\n"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++int via_chrome9_ioctl_freemem_base(struct drm_device *dev, ++ void *data, struct drm_file *file_priv) ++{ ++ struct drm_via_chrome9_private *dev_priv = dev->dev_private; ++ struct drm_via_chrome9_mem *mem = data; ++ int ret; ++ ++ mutex_lock(&dev->struct_mutex); ++ ret = drm_sman_free_key(&dev_priv->sman, mem->index); ++ mutex_unlock(&dev->struct_mutex); ++ DRM_DEBUG("free = 0x%lx\n", mem->index); ++ ++ return ret; ++} ++ ++int via_chrome9_ioctl_check_vidmem_size(struct drm_device *dev, ++ void *data, struct drm_file *file_priv) ++{ ++ return 0; ++} ++ ++int via_chrome9_ioctl_pciemem_ctrl(struct drm_device *dev, ++ void *data, struct drm_file *file_priv) ++{ ++ int result = 0; ++ struct drm_via_chrome9_private *dev_priv = dev->dev_private; ++ struct drm_via_chrome9_pciemem_ctrl *pcie_memory_ctrl = data; ++ switch (pcie_memory_ctrl->ctrl_type) { ++ case pciemem_copy_from_user: ++ result = copy_from_user((void *)( ++ dev_priv->pcie_vmalloc_nocache+ ++ pcie_memory_ctrl->pcieoffset), ++ pcie_memory_ctrl->usermode_data, ++ pcie_memory_ctrl->size); ++ break; ++ case pciemem_copy_to_user: ++ result = copy_to_user(pcie_memory_ctrl->usermode_data, ++ (void *)(dev_priv->pcie_vmalloc_nocache+ ++ pcie_memory_ctrl->pcieoffset), ++ pcie_memory_ctrl->size); ++ break; ++ case pciemem_memset: ++ memset((void *)(dev_priv->pcie_vmalloc_nocache + ++ pcie_memory_ctrl->pcieoffset), ++ pcie_memory_ctrl->memsetdata, ++ pcie_memory_ctrl->size); ++ break; ++ default: ++ break; ++ } ++ return 0; ++} ++ ++ ++int via_fb_alloc(struct drm_via_chrome9_mem *mem) ++{ ++ struct drm_device *dev = (struct drm_device *)via_chrome9_dev_v4l; ++ struct drm_via_chrome9_private *dev_priv; ++ ++ if (!dev || !dev->dev_private || !via_chrome9_filepriv_v4l) { ++ DRM_ERROR("V4L work before X initialize DRM module !!!\n"); ++ return -EINVAL; ++ } ++ ++ dev_priv = (struct drm_via_chrome9_private *)dev->dev_private; ++ if (!dev_priv->vram_initialized || ++ mem->type != VIA_CHROME9_MEM_VIDEO) { ++ DRM_ERROR("the memory type from V4L is error !!!\n"); ++ return -EINVAL; ++ } ++ ++ if (via_chrome9_ioctl_allocate_mem_base(dev, ++ mem, via_chrome9_filepriv_v4l)) { ++ DRM_ERROR("DRM module allocate memory error for V4L!!!\n"); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++EXPORT_SYMBOL(via_fb_alloc); ++ ++int via_fb_free(struct drm_via_chrome9_mem *mem) ++{ ++ struct drm_device *dev = (struct drm_device *)via_chrome9_dev_v4l; ++ struct drm_via_chrome9_private *dev_priv; ++ ++ if (!dev || !dev->dev_private || !via_chrome9_filepriv_v4l) ++ return -EINVAL; ++ ++ dev_priv = (struct drm_via_chrome9_private *)dev->dev_private; ++ if (!dev_priv->vram_initialized || ++ mem->type != VIA_CHROME9_MEM_VIDEO) ++ return -EINVAL; ++ ++ if (via_chrome9_ioctl_freemem_base(dev, mem, via_chrome9_filepriv_v4l)) ++ return -EINVAL; ++ ++ return 0; ++} ++EXPORT_SYMBOL(via_fb_free); +diff -Naur linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_mm.h linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_mm.h +--- linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_mm.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_mm.h 2008-12-03 03:08:33.000000000 +0100 +@@ -0,0 +1,67 @@ ++/* ++ * Copyright 1998-2003 VIA Technologies, Inc. All Rights Reserved. ++ * Copyright 2001-2003 S3 Graphics, Inc. All Rights Reserved. ++ * ++ * Permission is hereby granted, free of charge, to any person ++ * obtaining a copy of this software and associated documentation ++ * files (the "Software"), to deal in the Software without ++ * restriction, including without limitation the rights to use, ++ * copy, modify, merge, publish, distribute, sub license, ++ * and/or sell copies of the Software, and to permit persons to ++ * whom the Software is furnished to do so, subject to the ++ * following conditions: ++ * ++ * The above copyright notice and this permission notice ++ * (including the next paragraph) shall be included in all ++ * copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, ++ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES ++ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND ++ * NON-INFRINGEMENT. IN NO EVENT SHALL VIA, S3 GRAPHICS, AND/OR ++ * ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR ++ * THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++ */ ++#ifndef _VIA_CHROME9_MM_H_ ++#define _VIA_CHROME9_MM_H_ ++struct drm_via_chrome9_pciemem_ctrl { ++ enum { ++ pciemem_copy_from_user = 0, ++ pciemem_copy_to_user, ++ pciemem_memset, ++ } ctrl_type; ++ unsigned int pcieoffset; ++ unsigned int size;/*in Byte*/ ++ unsigned char memsetdata;/*for memset*/ ++ void *usermode_data;/*user mode data pointer*/ ++}; ++ ++extern int via_chrome9_map_init(struct drm_device *dev, ++ struct drm_via_chrome9_init *init); ++extern int via_chrome9_heap_management_init(struct drm_device ++ *dev, struct drm_via_chrome9_init *init); ++extern void via_chrome9_memory_destroy_heap(struct drm_device ++ *dev, struct drm_via_chrome9_private *dev_priv); ++extern int via_chrome9_ioctl_check_vidmem_size(struct drm_device ++ *dev, void *data, struct drm_file *file_priv); ++extern int via_chrome9_ioctl_pciemem_ctrl(struct drm_device *dev, ++ void *data, struct drm_file *file_priv); ++extern int via_chrome9_ioctl_allocate_aperture(struct drm_device ++ *dev, void *data, struct drm_file *file_priv); ++extern int via_chrome9_ioctl_free_aperture(struct drm_device *dev, ++ void *data, struct drm_file *file_priv); ++extern int via_chrome9_ioctl_allocate_mem_base(struct drm_device ++ *dev, void *data, struct drm_file *file_priv); ++extern int via_chrome9_ioctl_allocate_mem_wrapper( ++ struct drm_device *dev, void *data, struct drm_file *file_priv); ++extern int via_chrome9_ioctl_freemem_base(struct drm_device ++ *dev, void *data, struct drm_file *file_priv); ++extern int via_chrome9_ioctl_free_mem_wrapper(struct drm_device ++ *dev, void *data, struct drm_file *file_priv); ++extern void via_chrome9_reclaim_buffers_locked(struct drm_device ++ *dev, struct drm_file *file_priv); ++ ++#endif ++ +diff -Naur linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_verifier.c linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_verifier.c +--- linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_verifier.c 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_verifier.c 2008-12-03 03:08:33.000000000 +0100 +@@ -0,0 +1,982 @@ ++/* ++* Copyright 2004 The Unichrome Project. All Rights Reserved. ++* Copyright 2005 Thomas Hellstrom. All Rights Reserved. ++* ++* Permission is hereby granted, free of charge, to any person obtaining a ++* copy of this software and associated documentation files (the "Software"), ++* to deal in the Software without restriction, including without limitation ++* the rights to use, copy, modify, merge, publish, distribute, sub license, ++* and/or sell copies of the Software, and to permit persons to whom the ++* Software is furnished to do so, subject to the following conditions: ++* ++* The above copyright notice and this permission notice (including the ++* next paragraph) shall be included in all copies or substantial portions ++* of the Software. ++* ++* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL ++* THE AUTHOR(S), AND/OR THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES ++* OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ++* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER ++* DEALINGS IN THE SOFTWARE. ++* ++* This code was written using docs obtained under NDA from VIA Inc. ++* ++* Don't run this code directly on an AGP buffer. Due to cache problems it will ++* be very slow. ++*/ ++ ++#include "via_chrome9_3d_reg.h" ++#include "drmP.h" ++#include "drm.h" ++#include "via_chrome9_drm.h" ++#include "via_chrome9_verifier.h" ++#include "via_chrome9_drv.h" ++ ++#if VIA_CHROME9_VERIFY_ENABLE ++ ++enum verifier_state { ++ state_command, ++ state_header0, ++ state_header1, ++ state_header2, ++ state_header3, ++ state_header4, ++ state_header5, ++ state_header6, ++ state_header7, ++ state_error ++}; ++ ++enum hazard { ++ no_check = 0, ++ check_render_target_addr0, ++ check_render_target_addr1, ++ check_render_target_addr_mode, ++ check_z_buffer_addr0, ++ check_z_buffer_addr1, ++ check_z_buffer_addr_mode, ++ check_zocclusion_addr0, ++ check_zocclusion_addr1, ++ check_coarse_z_addr0, ++ check_coarse_z_addr1, ++ check_fvf_addr_mode, ++ check_t_level0_facen_addr0, ++ check_fence_cmd_addr0, ++ check_fence_cmd_addr1, ++ check_fence_cmd_addr2, ++ forbidden_command ++}; ++ ++/* ++ * Associates each hazard above with a possible multi-command ++ * sequence. For example an address that is split over multiple ++ * commands and that needs to be checked at the first command ++ * that does not include any part of the address. ++ */ ++ ++static enum drm_via_chrome9_sequence seqs[] = { ++ no_sequence, ++ dest_address, ++ dest_address, ++ dest_address, ++ z_address, ++ z_address, ++ z_address, ++ zocclusion_address, ++ zocclusion_address, ++ coarse_z_address, ++ coarse_z_address, ++ fvf_address, ++ tex_address, ++ fence_cmd_address, ++ fence_cmd_address, ++ fence_cmd_address, ++ no_sequence ++}; ++ ++struct hz_init { ++ unsigned int code; ++ enum hazard hz; ++}; ++/* for atrribute other than context hazard detect */ ++static struct hz_init init_table1[] = { ++ {0xcc, no_check}, ++ {0xcd, no_check}, ++ {0xce, no_check}, ++ {0xcf, no_check}, ++ {0xdd, no_check}, ++ {0xee, no_check}, ++ {0x00, no_check}, ++ {0x01, no_check}, ++ {0x10, check_z_buffer_addr0}, ++ {0x11, check_z_buffer_addr1}, ++ {0x12, check_z_buffer_addr_mode}, ++ {0x13, no_check}, ++ {0x14, no_check}, ++ {0x15, no_check}, ++ {0x16, no_check}, ++ {0x17, no_check}, ++ {0x18, no_check}, ++ {0x19, no_check}, ++ {0x1a, no_check}, ++ {0x1b, no_check}, ++ {0x1c, no_check}, ++ {0x1d, no_check}, ++ {0x1e, no_check}, ++ {0x1f, no_check}, ++ {0x20, no_check}, ++ {0x21, check_zocclusion_addr0}, ++ {0x22, check_zocclusion_addr1}, ++ {0x23, no_check}, ++ {0x24, no_check}, ++ {0x25, no_check}, ++ {0x26, no_check}, ++ {0x27, no_check}, ++ /* H5 only*/ ++ {0x28, no_check}, ++ {0x29, check_coarse_z_addr0}, ++ {0x2a, check_coarse_z_addr1}, ++ {0x33, no_check}, ++ {0x34, no_check}, ++ {0x35, no_check}, ++ {0x36, no_check}, ++ {0x37, no_check}, ++ {0x38, no_check}, ++ {0x39, no_check}, ++ {0x3A, no_check}, ++ {0x3B, no_check}, ++ {0x3C, no_check}, ++ {0x3D, no_check}, ++ {0x3E, no_check}, ++ {0x3F, no_check}, ++ /*render target check */ ++ {0x50, check_render_target_addr0}, ++ /* H5/H6 different */ ++ {0x51, check_render_target_addr_mode}, ++ {0x52, check_render_target_addr1}, ++ {0x53, no_check}, ++ {0x58, check_render_target_addr0}, ++ {0x59, check_render_target_addr_mode}, ++ {0x5a, check_render_target_addr1}, ++ {0x5b, no_check}, ++ {0x60, check_render_target_addr0}, ++ {0x61, check_render_target_addr_mode}, ++ {0x62, check_render_target_addr1}, ++ {0x63, no_check}, ++ {0x68, check_render_target_addr0}, ++ {0x69, check_render_target_addr_mode}, ++ {0x6a, check_render_target_addr1}, ++ {0x6b, no_check}, ++ {0x70, no_check}, ++ {0x71, no_check}, ++ {0x72, no_check}, ++ {0x73, no_check}, ++ {0x74, no_check}, ++ {0x75, no_check}, ++ {0x76, no_check}, ++ {0x77, no_check}, ++ {0x78, no_check}, ++ {0x80, no_check}, ++ {0x81, no_check}, ++ {0x82, no_check}, ++ {0x83, no_check}, ++ {0x84, no_check}, ++ {0x85, no_check}, ++ {0x86, no_check}, ++ {0x87, no_check}, ++ {0x88, no_check}, ++ {0x89, no_check}, ++ {0x8a, no_check}, ++ {0x90, no_check}, ++ {0x91, no_check}, ++ {0x92, no_check}, ++ {0x93, no_check}, ++ {0x94, no_check}, ++ {0x95, no_check}, ++ {0x96, no_check}, ++ {0x97, no_check}, ++ {0x98, no_check}, ++ {0x99, no_check}, ++ {0x9a, no_check}, ++ {0x9b, no_check}, ++ {0xaa, no_check} ++}; ++ ++/* for texture stage's hazard detect */ ++static struct hz_init init_table2[] = { ++ {0xcc, no_check}, ++ {0xcd, no_check}, ++ {0xce, no_check}, ++ {0xcf, no_check}, ++ {0xdd, no_check}, ++ {0xee, no_check}, ++ {0x00, no_check}, ++ {0x01, no_check}, ++ {0x02, no_check}, ++ {0x03, no_check}, ++ {0x04, no_check}, ++ {0x05, no_check}, ++ /* H5/H6 diffent */ ++ {0x18, check_t_level0_facen_addr0}, ++ {0x20, no_check}, ++ {0x21, no_check}, ++ {0x22, no_check}, ++ {0x30, no_check}, ++ {0x50, no_check}, ++ {0x51, no_check}, ++ {0x9b, no_check}, ++}; ++ ++/*Check for flexible vertex format */ ++static struct hz_init init_table3[] = { ++ {0xcc, no_check}, ++ {0xcd, no_check}, ++ {0xce, no_check}, ++ {0xcf, no_check}, ++ {0xdd, no_check}, ++ {0xee, no_check}, ++ /* H5/H6 different */ ++ {0x00, check_fvf_addr_mode}, ++ {0x01, no_check}, ++ {0x02, no_check}, ++ {0x03, no_check}, ++ {0x04, no_check}, ++ {0x05, no_check}, ++ {0x08, no_check}, ++ {0x09, no_check}, ++ {0x0a, no_check}, ++ {0x0b, no_check}, ++ {0x0c, no_check}, ++ {0x0d, no_check}, ++ {0x0e, no_check}, ++ {0x0f, no_check}, ++ {0x10, no_check}, ++ {0x11, no_check}, ++ {0x12, no_check}, ++ {0x13, no_check}, ++ {0x14, no_check}, ++ {0x15, no_check}, ++ {0x16, no_check}, ++ {0x17, no_check}, ++ {0x18, no_check}, ++ {0x19, no_check}, ++ {0x1a, no_check}, ++ {0x1b, no_check}, ++ {0x1c, no_check}, ++ {0x1d, no_check}, ++ {0x1e, no_check}, ++ {0x1f, no_check}, ++ {0x20, no_check}, ++ {0x21, no_check}, ++ {0x22, no_check}, ++ {0x23, no_check}, ++ {0x24, no_check}, ++ {0x25, no_check}, ++ {0x26, no_check}, ++ {0x27, no_check}, ++ {0x28, no_check}, ++ {0x29, no_check}, ++ {0x2a, no_check}, ++ {0x2b, no_check}, ++ {0x2c, no_check}, ++ {0x2d, no_check}, ++ {0x2e, no_check}, ++ {0x2f, no_check}, ++ {0x40, no_check}, ++ {0x41, no_check}, ++ {0x42, no_check}, ++ {0x43, no_check}, ++ {0x44, no_check}, ++ {0x45, no_check}, ++ {0x46, no_check}, ++ {0x47, no_check}, ++ {0x48, no_check}, ++ {0x50, no_check}, ++ {0x51, no_check}, ++ {0x52, no_check}, ++ {0x60, no_check}, ++ {0x61, no_check}, ++ {0x62, no_check}, ++ {0x9b, no_check}, ++ {0xaa, no_check} ++}; ++/*Check for 364 fence command id*/ ++static struct hz_init init_table4[] = { ++ {0xcc, no_check}, ++ {0xcd, no_check}, ++ {0xce, no_check}, ++ {0xcf, no_check}, ++ {0xdd, no_check}, ++ {0xee, no_check}, ++ {0x00, no_check}, ++ {0x01, check_fence_cmd_addr0}, ++ {0x02, check_fence_cmd_addr1}, ++ {0x03, check_fence_cmd_addr2}, ++ {0x10, no_check}, ++ {0x11, no_check}, ++ {0x12, no_check}, ++ {0x13, no_check}, ++ {0x14, no_check}, ++ {0x18, no_check}, ++ {0x19, no_check}, ++ {0x1a, no_check}, ++ {0x1b, no_check}, ++ {0x1c, no_check}, ++ {0x20, no_check}, ++ {0xab, no_check}, ++ {0xaa, no_check} ++}; ++ ++/*Check for 353 fence command id*/ ++static struct hz_init init_table5[] = { ++ {0xcc, no_check}, ++ {0xcd, no_check}, ++ {0xce, no_check}, ++ {0xcf, no_check}, ++ {0xdd, no_check}, ++ {0xee, no_check}, ++ {0x00, no_check}, ++ {0x01, no_check}, ++ {0x02, no_check}, ++ {0x03, no_check}, ++ {0x04, check_fence_cmd_addr0}, ++ {0x05, check_fence_cmd_addr1}, ++ {0x06, no_check}, ++ {0x07, check_fence_cmd_addr2}, ++ {0x08, no_check}, ++ {0x09, no_check}, ++ {0x0a, no_check}, ++ {0x0b, no_check}, ++ {0x0c, no_check}, ++ {0x0d, no_check}, ++ {0x0e, no_check}, ++ {0x0f, no_check}, ++ {0x10, no_check}, ++ {0x11, no_check}, ++ {0x12, no_check}, ++ {0x18, no_check}, ++ {0x19, no_check}, ++ {0x1a, no_check}, ++ {0x30, no_check}, ++ {0x31, no_check}, ++ {0x32, no_check}, ++ {0x68, no_check}, ++ {0x69, no_check}, ++ {0x6a, no_check}, ++ {0x6b, no_check}, ++ {0xab, no_check}, ++ {0xaa, no_check} ++}; ++ ++static enum hazard init_table_01_00[256]; ++static enum hazard init_table_02_0n[256]; ++static enum hazard init_table_04_00[256]; ++static enum hazard init_table_11_364[256]; ++static enum hazard init_table_11_353[256]; ++ ++/*Require fence command id location reside in the shadow system memory */ ++static inline int ++check_fence_cmd_addr_range(struct drm_via_chrome9_state *seq, ++ unsigned long fence_cmd_add, unsigned long size, struct drm_device *dev) ++{ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *)dev->dev_private; ++ if (!dev_priv->shadow_map.shadow) ++ return -1; ++ if ((fence_cmd_add < dev_priv->shadow_map.shadow->offset) || ++ (fence_cmd_add + size > ++ dev_priv->shadow_map.shadow->offset + ++ dev_priv->shadow_map.shadow->size)) ++ return -1; ++ return 0; ++} ++ ++/* ++ * Currently we only catch the fence cmd's address, which will ++ * access system memory inevitably. ++ * NOTE:No care about AGP address.(we just think all AGP access are safe now). ++ */ ++ ++static inline int finish_current_sequence(struct drm_via_chrome9_state *cur_seq) ++{ ++ switch (cur_seq->unfinished) { ++ case fence_cmd_address: ++ if (cur_seq->fence_need_check) ++ if (check_fence_cmd_addr_range(cur_seq, ++ cur_seq->fence_cmd_addr, 4, cur_seq->dev)) ++ return -EINVAL; ++ break; ++ default: ++ break; ++ } ++ cur_seq->unfinished = no_sequence; ++ return 0; ++} ++/* Only catch the cmd which potentially access the system memory, and treat all ++ * the other cmds are safe. ++ */ ++static inline int ++investigate_hazard(uint32_t cmd, enum hazard hz, ++ struct drm_via_chrome9_state *cur_seq) ++{ ++ register uint32_t tmp; ++ ++ if (cur_seq->unfinished && (cur_seq->unfinished != seqs[hz])) { ++ int ret = finish_current_sequence(cur_seq); ++ if (ret) ++ return ret; ++ } ++ ++ switch (hz) { ++ case check_render_target_addr0: ++ tmp = ((cmd >> 24) - 0x50) >> 3; ++ cur_seq->unfinished = dest_address; ++ cur_seq->render_target_addr[tmp] = cmd << 8; ++ break; ++ case check_render_target_addr1: ++ cur_seq->unfinished = dest_address; ++ tmp = ((cmd >> 24) - 0x50) >> 3; ++ cur_seq->render_target_pitch[tmp] = (cmd & 0x000001FF) >> 5; ++ break; ++ case check_render_target_addr_mode: ++ cur_seq->unfinished = dest_address; ++ if (!cur_seq->agp) ++ if (((cmd & 0x00300000) >> 20) == 2) { ++ DRM_ERROR("Attempt to place \ ++ render target in system memory\n"); ++ return -EINVAL; ++ } ++ break; ++ case check_z_buffer_addr0: ++ cur_seq->unfinished = z_address; ++ break; ++ case check_z_buffer_addr1: ++ cur_seq->unfinished = z_address; ++ if ((cmd & 0x00000003) == 2) { ++ DRM_ERROR("Attempt to place \ ++ Z buffer in system memory\n"); ++ return -EINVAL; ++ } ++ break; ++ case check_z_buffer_addr_mode: ++ cur_seq->unfinished = z_address; ++ if (((cmd & 0x00000060) >> 5) == 2) { ++ DRM_ERROR("Attempt to place \ ++ stencil buffer in system memory\n"); ++ return -EINVAL; ++ } ++ break; ++ case check_zocclusion_addr0: ++ cur_seq->unfinished = zocclusion_address; ++ break; ++ case check_zocclusion_addr1: ++ cur_seq->unfinished = zocclusion_address; ++ if (((cmd & 0x00c00000) >> 22) == 2) { ++ DRM_ERROR("Attempt to access system memory\n"); ++ return -EINVAL; ++ } ++ break; ++ case check_coarse_z_addr0: ++ cur_seq->unfinished = coarse_z_address; ++ if (((cmd & 0x00300000) >> 20) == 2) ++ return -EINVAL; ++ break; ++ case check_coarse_z_addr1: ++ cur_seq->unfinished = coarse_z_address; ++ break; ++ case check_fvf_addr_mode: ++ cur_seq->unfinished = fvf_address; ++ if (!cur_seq->agp) ++ if (((cmd & 0x0000c000) >> 14) == 2) { ++ DRM_ERROR("Attempt to place \ ++ fvf buffer in system memory\n"); ++ return -EINVAL; ++ } ++ break; ++ case check_t_level0_facen_addr0: ++ cur_seq->unfinished = tex_address; ++ if (!cur_seq->agp) ++ if ((cmd & 0x00000003) == 2 || ++ ((cmd & 0x0000000c) >> 2) == 2 || ++ ((cmd & 0x00000030) >> 4) == 2 || ++ ((cmd & 0x000000c0) >> 6) == 2 || ++ ((cmd & 0x0000c000) >> 14) == 2 || ++ ((cmd & 0x00030000) >> 16) == 2) { ++ DRM_ERROR("Attempt to place \ ++ texture buffer in system memory\n"); ++ return -EINVAL; ++ } ++ break; ++ case check_fence_cmd_addr0: ++ cur_seq->unfinished = fence_cmd_address; ++ if (cur_seq->agp) ++ cur_seq->fence_cmd_addr = ++ (cur_seq->fence_cmd_addr & 0xFF000000) | ++ (cmd & 0x00FFFFFF); ++ else ++ cur_seq->fence_cmd_addr = ++ (cur_seq->fence_cmd_addr & 0x00FFFFFF) | ++ ((cmd & 0x000000FF) << 24); ++ break; ++ case check_fence_cmd_addr1: ++ cur_seq->unfinished = fence_cmd_address; ++ if (!cur_seq->agp) ++ cur_seq->fence_cmd_addr = ++ (cur_seq->fence_cmd_addr & 0xFF000000) | ++ (cmd & 0x00FFFFFF); ++ break; ++ case check_fence_cmd_addr2: ++ cur_seq->unfinished = fence_cmd_address; ++ if (cmd & 0x00040000) ++ cur_seq->fence_need_check = 1; ++ else ++ cur_seq->fence_need_check = 0; ++ break; ++ default: ++ /*We think all the other cmd are safe.*/ ++ return 0; ++ } ++ return 0; ++} ++ ++static inline int verify_mmio_address(uint32_t address) ++{ ++ if ((address > 0x3FF) && (address < 0xC00)) { ++ DRM_ERROR("Invalid VIDEO DMA command. " ++ "Attempt to access 3D- or command burst area.\n"); ++ return 1; ++ } else if ((address > 0xDFF) && (address < 0x1200)) { ++ DRM_ERROR("Invalid VIDEO DMA command. " ++ "Attempt to access PCI DMA area.\n"); ++ return 1; ++ } else if ((address > 0x1DFF) && (address < 0x2200)) { ++ DRM_ERROR("Invalid VIDEO DMA command. " ++ "Attempt to access CBU ROTATE SPACE registers.\n"); ++ return 1; ++ } else if ((address > 0x23FF) && (address < 0x3200)) { ++ DRM_ERROR("Invalid VIDEO DMA command. " ++ "Attempt to access PCI DMA2 area..\n"); ++ return 1; ++ } else if (address > 0x33FF) { ++ DRM_ERROR("Invalid VIDEO DMA command. " ++ "Attempt to access VGA registers.\n"); ++ return 1; ++ } ++ return 0; ++} ++ ++static inline int is_dummy_cmd(uint32_t cmd) ++{ ++ if ((cmd & INV_DUMMY_MASK) == 0xCC000000 || ++ (cmd & INV_DUMMY_MASK) == 0xCD000000 || ++ (cmd & INV_DUMMY_MASK) == 0xCE000000 || ++ (cmd & INV_DUMMY_MASK) == 0xCF000000 || ++ (cmd & INV_DUMMY_MASK) == 0xDD000000) ++ return 1; ++ return 0; ++} ++ ++static inline int ++verify_2d_tail(uint32_t const **buffer, const uint32_t *buf_end, ++ uint32_t dwords) ++{ ++ const uint32_t *buf = *buffer; ++ ++ if (buf_end - buf < dwords) { ++ DRM_ERROR("Illegal termination of 2d command.\n"); ++ return 1; ++ } ++ ++ while (dwords--) { ++ if (!is_dummy_cmd(*buf++)) { ++ DRM_ERROR("Illegal 2d command tail.\n"); ++ return 1; ++ } ++ } ++ ++ *buffer = buf; ++ return 0; ++} ++ ++static inline int ++verify_video_tail(uint32_t const **buffer, const uint32_t *buf_end, ++ uint32_t dwords) ++{ ++ const uint32_t *buf = *buffer; ++ ++ if (buf_end - buf < dwords) { ++ DRM_ERROR("Illegal termination of video command.\n"); ++ return 1; ++ } ++ while (dwords--) { ++ if (*buf && !is_dummy_cmd(*buf)) { ++ DRM_ERROR("Illegal video command tail.\n"); ++ return 1; ++ } ++ buf++; ++ } ++ *buffer = buf; ++ return 0; ++} ++ ++static inline enum verifier_state ++via_chrome9_check_header0(uint32_t const **buffer, const uint32_t *buf_end) ++{ ++ const uint32_t *buf = *buffer; ++ uint32_t cmd, qword, dword; ++ ++ qword = *(buf+1); ++ buf += 4; ++ dword = qword << 1; ++ ++ if (buf_end - buf < dword) ++ return state_error; ++ ++ while (qword-- > 0) { ++ cmd = *buf; ++ /* Is this consition too restrict? */ ++ if ((cmd & 0xFFFF) > 0x1FF) { ++ DRM_ERROR("Invalid header0 command io address 0x%x \ ++ Attempt to access non-2D mmio area.\n", cmd); ++ return state_error; ++ } ++ buf += 2; ++ } ++ ++ if ((dword & 3) && verify_2d_tail(&buf, buf_end, 4 - (dword & 0x3))) ++ return state_error; ++ ++ *buffer = buf; ++ return state_command; ++} ++ ++static inline enum verifier_state ++via_chrome9_check_header1(uint32_t const **buffer, const uint32_t *buf_end) ++{ ++ uint32_t dword; ++ const uint32_t *buf = *buffer; ++ ++ dword = *(buf + 1); ++ buf += 4; ++ ++ if (buf + dword > buf_end) ++ return state_error; ++ ++ buf += dword; ++ ++ if ((dword & 0x3) && verify_2d_tail(&buf, buf_end, 4 - (dword & 0x3))) ++ return state_error; ++ ++ *buffer = buf; ++ return state_command; ++} ++ ++static inline enum verifier_state ++via_chrome9_check_header2(uint32_t const **buffer, ++ const uint32_t *buf_end, struct drm_via_chrome9_state *hc_state) ++{ ++ uint32_t cmd1, cmd2; ++ enum hazard hz; ++ const uint32_t *buf = *buffer; ++ const enum hazard *hz_table; ++ ++ if ((buf_end - buf) < 4) { ++ DRM_ERROR ++ ("Illegal termination of DMA HALCYON_HEADER2 sequence.\n"); ++ return state_error; ++ } ++ cmd1 = *buf & 0x0000FFFF; ++ cmd2 = *++buf & 0x0000FFFF; ++ if (((cmd1 != INV_REG_CR_BEGIN) && (cmd1 != INV_REG_3D_BEGIN)) || ++ ((cmd2 != INV_REG_CR_TRANS) && (cmd2 != INV_REG_3D_TRANS))) { ++ DRM_ERROR ++ ("Illegal IO address of DMA HALCYON_HEADER2 sequence.\n"); ++ return state_error; ++ } ++ /* Advance to get paratype and subparatype */ ++ cmd1 = *++buf & 0xFFFF0000; ++ ++ switch (cmd1) { ++ case INV_ParaType_Attr: ++ buf += 2; ++ hz_table = init_table_01_00; ++ break; ++ case (INV_ParaType_Tex | (INV_SubType_Tex0 << 24)): ++ case (INV_ParaType_Tex | (INV_SubType_Tex1 << 24)): ++ case (INV_ParaType_Tex | (INV_SubType_Tex2 << 24)): ++ case (INV_ParaType_Tex | (INV_SubType_Tex3 << 24)): ++ case (INV_ParaType_Tex | (INV_SubType_Tex4 << 24)): ++ case (INV_ParaType_Tex | (INV_SubType_Tex5 << 24)): ++ case (INV_ParaType_Tex | (INV_SubType_Tex6 << 24)): ++ case (INV_ParaType_Tex | (INV_SubType_Tex7 << 24)): ++ buf += 2; ++ hc_state->texture_index = (cmd1 & INV_ParaSubType_MASK) >> 24; ++ hz_table = init_table_02_0n; ++ break; ++ case INV_ParaType_FVF: ++ buf += 2; ++ hz_table = init_table_04_00; ++ break; ++ case INV_ParaType_CR: ++ buf += 2; ++ if (hc_state->agp) ++ hz_table = init_table_11_364; ++ else ++ hz_table = init_table_11_353; ++ break; ++ case INV_ParaType_Dummy: ++ buf += 2; ++ while ((buf < buf_end) && !is_agp_header(*buf)) ++ if (!is_dummy_cmd(*buf)) ++ return state_error; ++ else ++ buf++; ++ ++ if ((buf_end > buf) && ((buf_end - buf) & 0x3)) ++ return state_error; ++ return state_command; ++ /* We think cases below are all safe. So we feedback only when these ++ these cmd has another header there. ++ */ ++ case INV_ParaType_Vdata: ++ case (INV_ParaType_Tex | ++ ((INV_SubType_Tex0 | INV_SubType_TexSample) << 24)): ++ case (INV_ParaType_Tex | ++ ((INV_SubType_Tex1 | INV_SubType_TexSample) << 24)): ++ case (INV_ParaType_Tex | ++ ((INV_SubType_Tex2 | INV_SubType_TexSample) << 24)): ++ case (INV_ParaType_Tex | ++ ((INV_SubType_Tex3 | INV_SubType_TexSample) << 24)): ++ case (INV_ParaType_Tex | ++ ((INV_SubType_Tex4 | INV_SubType_TexSample) << 24)): ++ case (INV_ParaType_Tex | ++ ((INV_SubType_Tex5 | INV_SubType_TexSample) << 24)): ++ case (INV_ParaType_Tex | ++ ((INV_SubType_Tex6 | INV_SubType_TexSample) << 24)): ++ case (INV_ParaType_Tex | ++ ((INV_SubType_Tex7 | INV_SubType_TexSample) << 24)): ++ case (INV_ParaType_Tex | (INV_SubType_General << 24)): ++ case INV_ParaType_Pal: ++ case INV_ParaType_PreCR: ++ case INV_ParaType_Cfg: ++ default: ++ buf += 2; ++ while ((buf < buf_end) && !is_agp_header(*buf)) ++ buf++; ++ *buffer = buf; ++ return state_command; ++ } ++ ++ while (buf < buf_end && !is_agp_header(*buf)) { ++ cmd1 = *buf++; ++ hz = hz_table[cmd1 >> 24]; ++ if (hz) { ++ if (investigate_hazard(cmd1, hz, hc_state)) ++ return state_error; ++ } else if (hc_state->unfinished && ++ finish_current_sequence(hc_state)) ++ return state_error; ++ ++ } ++ if (hc_state->unfinished && finish_current_sequence(hc_state)) ++ return state_error; ++ *buffer = buf; ++ return state_command; ++} ++ ++static inline enum verifier_state ++via_chrome9_check_header3(uint32_t const **buffer, ++ const uint32_t *buf_end) ++{ ++ const uint32_t *buf = *buffer; ++ ++ buf += 4; ++ while (buf < buf_end && !is_agp_header(*buf)) ++ buf += 4; ++ ++ *buffer = buf; ++ return state_command; ++} ++ ++ ++static inline enum verifier_state ++via_chrome9_check_vheader4(uint32_t const **buffer, ++ const uint32_t *buf_end) ++{ ++ uint32_t data; ++ const uint32_t *buf = *buffer; ++ ++ if (buf_end - buf < 4) { ++ DRM_ERROR("Illegal termination of video header4 command\n"); ++ return state_error; ++ } ++ ++ data = *buf++ & ~INV_AGPHeader_MASK; ++ if (verify_mmio_address(data)) ++ return state_error; ++ ++ data = *buf; ++ buf += 2; ++ ++ if (*buf++ != 0x00000000) { ++ DRM_ERROR("Illegal header4 header data\n"); ++ return state_error; ++ } ++ ++ if (buf_end - buf < data) ++ return state_error; ++ buf += data; ++ ++ if ((data & 3) && verify_video_tail(&buf, buf_end, 4 - (data & 3))) ++ return state_error; ++ *buffer = buf; ++ return state_command; ++ ++} ++ ++static inline enum verifier_state ++via_chrome9_check_vheader5(uint32_t const **buffer, const uint32_t *buf_end) ++{ ++ uint32_t data; ++ const uint32_t *buf = *buffer; ++ uint32_t i; ++ ++ if (buf_end - buf < 4) { ++ DRM_ERROR("Illegal termination of video header5 command\n"); ++ return state_error; ++ } ++ ++ data = *++buf; ++ buf += 2; ++ ++ if (*buf++ != 0x00000000) { ++ DRM_ERROR("Illegal header5 header data\n"); ++ return state_error; ++ } ++ if ((buf_end - buf) < (data << 1)) { ++ DRM_ERROR("Illegal termination of video header5 command\n"); ++ return state_error; ++ } ++ for (i = 0; i < data; ++i) { ++ if (verify_mmio_address(*buf++)) ++ return state_error; ++ buf++; ++ } ++ data <<= 1; ++ if ((data & 3) && verify_video_tail(&buf, buf_end, 4 - (data & 3))) ++ return state_error; ++ *buffer = buf; ++ return state_command; ++} ++ ++int ++via_chrome9_verify_command_stream(const uint32_t *buf, ++ unsigned int size, struct drm_device *dev, int agp) ++{ ++ ++ struct drm_via_chrome9_private *dev_priv = ++ (struct drm_via_chrome9_private *) dev->dev_private; ++ struct drm_via_chrome9_state *hc_state = &dev_priv->hc_state; ++ struct drm_via_chrome9_state saved_state = *hc_state; ++ uint32_t cmd; ++ const uint32_t *buf_end = buf + (size >> 2); ++ enum verifier_state state = state_command; ++ ++ hc_state->dev = dev; ++ hc_state->unfinished = no_sequence; ++ hc_state->agp = agp; ++ ++ while (buf < buf_end) { ++ ++ switch (state) { ++ case state_header0: ++ state = via_chrome9_check_header0(&buf, buf_end); ++ break; ++ case state_header1: ++ state = via_chrome9_check_header1(&buf, buf_end); ++ break; ++ case state_header2: ++ state = via_chrome9_check_header2(&buf, ++ buf_end, hc_state); ++ break; ++ case state_header3: ++ state = via_chrome9_check_header3(&buf, buf_end); ++ break; ++ case state_header4: ++ state = via_chrome9_check_vheader4(&buf, buf_end); ++ break; ++ case state_header5: ++ state = via_chrome9_check_vheader5(&buf, buf_end); ++ break; ++ case state_header6: ++ case state_header7: ++ DRM_ERROR("Unimplemented Header 6/7 command.\n"); ++ state = state_error; ++ break; ++ case state_command: ++ cmd = *buf; ++ if (INV_AGPHeader2 == (cmd & INV_AGPHeader_MASK)) ++ state = state_header2; ++ else if (INV_AGPHeader1 == (cmd & INV_AGPHeader_MASK)) ++ state = state_header1; ++ else if (INV_AGPHeader5 == (cmd & INV_AGPHeader_MASK)) ++ state = state_header5; ++ else if (INV_AGPHeader6 == (cmd & INV_AGPHeader_MASK)) ++ state = state_header6; ++ else if (INV_AGPHeader3 == (cmd & INV_AGPHeader_MASK)) ++ state = state_header3; ++ else if (INV_AGPHeader4 == (cmd & INV_AGPHeader_MASK)) ++ state = state_header4; ++ else if (INV_AGPHeader7 == (cmd & INV_AGPHeader_MASK)) ++ state = state_header7; ++ else if (INV_AGPHeader0 == (cmd & INV_AGPHeader_MASK)) ++ state = state_header0; ++ else { ++ DRM_ERROR("Invalid command sequence\n"); ++ state = state_error; ++ } ++ break; ++ case state_error: ++ default: ++ *hc_state = saved_state; ++ return -EINVAL; ++ } ++ } ++ if (state == state_error) { ++ *hc_state = saved_state; ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++ ++static void ++setup_hazard_table(struct hz_init init_table[], ++enum hazard table[], int size) ++{ ++ int i; ++ ++ for (i = 0; i < 256; ++i) ++ table[i] = forbidden_command; ++ ++ for (i = 0; i < size; ++i) ++ table[init_table[i].code] = init_table[i].hz; ++} ++ ++void via_chrome9_init_command_verifier(void) ++{ ++ setup_hazard_table(init_table1, init_table_01_00, ++ sizeof(init_table1) / sizeof(struct hz_init)); ++ setup_hazard_table(init_table2, init_table_02_0n, ++ sizeof(init_table2) / sizeof(struct hz_init)); ++ setup_hazard_table(init_table3, init_table_04_00, ++ sizeof(init_table3) / sizeof(struct hz_init)); ++ setup_hazard_table(init_table4, init_table_11_364, ++ sizeof(init_table4) / sizeof(struct hz_init)); ++ setup_hazard_table(init_table5, init_table_11_353, ++ sizeof(init_table5) / sizeof(struct hz_init)); ++} ++ ++#endif +diff -Naur linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_verifier.h linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_verifier.h +--- linux-2.6.30-rc7/drivers/gpu/drm/via_chrome9/via_chrome9_verifier.h 1970-01-01 01:00:00.000000000 +0100 ++++ linux-2.6.30-rc7.patch/drivers/gpu/drm/via_chrome9/via_chrome9_verifier.h 2008-12-03 03:08:33.000000000 +0100 +@@ -0,0 +1,61 @@ ++/* ++* Copyright 2004 The Unichrome Project. All Rights Reserved. ++* ++* Permission is hereby granted, free of charge, to any person obtaining a ++* copy of this software and associated documentation files (the "Software"), ++* to deal in the Software without restriction, including without limitation ++* the rights to use, copy, modify, merge, publish, distribute, sub license, ++* and/or sell copies of the Software, and to permit persons to whom the ++* Software is furnished to do so, subject to the following conditions: ++* ++* The above copyright notice and this permission notice (including the ++* next paragraph) shall be included in all copies or substantial portions ++* of the Software. ++* ++* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL ++* THE UNICHROME PROJECT, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, ++* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR ++* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR ++* THE USE OR OTHER DEALINGS IN THE SOFTWARE. ++* ++* Author: Scott Fang 2008. ++*/ ++ ++#ifndef _via_chrome9_VERIFIER_H_ ++#define _via_chrome9_VERIFIER_H_ ++ ++#define VIA_CHROME9_VERIFY_ENABLE 0 ++ ++enum drm_via_chrome9_sequence { ++ no_sequence = 0, ++ z_address, ++ dest_address, ++ tex_address, ++ zocclusion_address, ++ coarse_z_address, ++ fvf_address, ++ fence_cmd_address ++}; ++ ++struct drm_via_chrome9_state { ++ uint32_t texture_index; ++ uint32_t render_target_addr[4]; ++ uint32_t render_target_pitch[4]; ++ uint32_t vb_addr; ++ uint32_t fence_cmd_addr; ++ uint32_t fence_need_check; ++ enum drm_via_chrome9_sequence unfinished; ++ int agp_texture; ++ int multitex; ++ struct drm_device *dev; ++ int agp; ++ const uint32_t *buf_start; ++}; ++ ++extern int via_chrome9_verify_command_stream(const uint32_t *buf, ++ unsigned int size, struct drm_device *dev, int agp); ++void via_chrome9_init_command_verifier(void); ++ ++#endif diff --git a/packages/multimedia/ffmpeg/10_mt-decode.diff b/packages/multimedia/ffmpeg/10_mt-decode.diff new file mode 100644 index 0000000000..4e9606d208 --- /dev/null +++ b/packages/multimedia/ffmpeg/10_mt-decode.diff @@ -0,0 +1,2875 @@ +diff -Naur ffmpeg-r17861.orig/ffplay.c ffmpeg-r17861/ffplay.c +--- ffmpeg-r17861.orig/ffplay.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/ffplay.c 2009-03-07 15:28:18.000000000 +0100 +@@ -1751,6 +1751,9 @@ + enc->skip_loop_filter= skip_loop_filter; + enc->error_recognition= error_recognition; + enc->error_concealment= error_concealment; ++ if(thread_count>1) ++ avcodec_thread_init(enc, thread_count); ++ enc->thread_count= thread_count; + + set_context_opts(enc, avcodec_opts[enc->codec_type], 0); + +@@ -1775,9 +1778,6 @@ + is->audio_src_fmt= SAMPLE_FMT_S16; + } + +- if(thread_count>1) +- avcodec_thread_init(enc, thread_count); +- enc->thread_count= thread_count; + ic->streams[stream_index]->discard = AVDISCARD_DEFAULT; + switch(enc->codec_type) { + case CODEC_TYPE_AUDIO: +diff -Naur ffmpeg-r17861.orig/libavcodec/avcodec.h ffmpeg-r17861/libavcodec/avcodec.h +--- ffmpeg-r17861.orig/libavcodec/avcodec.h 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/avcodec.h 2009-03-07 15:28:18.000000000 +0100 +@@ -534,6 +534,10 @@ + * Codec can export data for HW decoding (VDPAU). + */ + #define CODEC_CAP_HWACCEL_VDPAU 0x0080 ++/** ++ * Codec supports frame-based multithreading. ++ */ ++#define CODEC_CAP_FRAME_THREADS 0x0100 + + //The following defines may change, don't expect compatibility if you use them. + #define MB_TYPE_INTRA4x4 0x0001 +@@ -805,8 +809,21 @@ + * - encoding: unused\ + * - decoding: Read by user.\ + */\ +- int64_t reordered_opaque;\ +- ++ int64_t reordered_opaque;\ ++\ ++ /**\ ++ * the AVCodecContext which ff_get_buffer was last called on\ ++ * - encoding: Set by libavcodec.\ ++ * - decoding: Set by libavcodec.\ ++ */\ ++ struct AVCodecContext *owner;\ ++\ ++ /**\ ++ * used by multithreading to store frame-specific info\ ++ * - encoding: Set by libavcodec.\ ++ * - decoding: Set by libavcodec.\ ++ */\ ++ void *thread_opaque; + + #define FF_QSCALE_TYPE_MPEG1 0 + #define FF_QSCALE_TYPE_MPEG2 1 +@@ -960,7 +977,7 @@ + * If non NULL, 'draw_horiz_band' is called by the libavcodec + * decoder to draw a horizontal band. It improves cache usage. Not + * all codecs can do that. You must check the codec capabilities +- * beforehand. ++ * beforehand. May be called by different threads at the same time. + * The function is also used by hardware acceleration APIs. + * It is called at least once during frame decoding to pass + * the data needed for hardware render. +@@ -995,7 +1012,7 @@ + * Samples per packet, initialized when calling 'init'. + */ + int frame_size; +- int frame_number; ///< audio or video frame number ++ int frame_number; ///< Number of audio or video frames returned so far + int real_pict_num; ///< Returns the real picture number of previous encoded frame. + + /** +@@ -1205,6 +1222,8 @@ + * If pic.reference is set then the frame will be read later by libavcodec. + * avcodec_align_dimensions() should be used to find the required width and + * height, as they normally need to be rounded up to the next multiple of 16. ++ * May be called by different threads if frame threading is enabled, but not ++ * by more than one at the same time. + * - encoding: unused + * - decoding: Set by libavcodec., user can override. + */ +@@ -1213,7 +1232,9 @@ + /** + * Called to release buffers which were allocated with get_buffer. + * A released buffer can be reused in get_buffer(). +- * pic.data[*] must be set to NULL. ++ * pic.data[*] must be set to NULL. May be called by different threads ++ * if frame threading is enabled, but not more than one at the same time. ++ * + * - encoding: unused + * - decoding: Set by libavcodec., user can override. + */ +@@ -2332,6 +2353,32 @@ + * Set to time_base ticks per frame. Default 1, e.g., H.264/MPEG-2 set it to 2. + */ + int ticks_per_frame; ++ ++ /** ++ * Whether this is a copy of the context which had init() called on it. ++ * This is used by multithreading - shared tables and picture pointers ++ * should be freed from the original context only. ++ * - encoding: Set by libavcodec. ++ * - decoding: Set by libavcodec. ++ */ ++ int is_copy; ++ ++ /** ++ * Which multithreading methods to use, for codecs that support more than one. ++ * - encoding: Set by user, otherwise the default is used. ++ * - decoding: Set by user, otherwise the default is used. ++ */ ++ int thread_type; ++#define FF_THREAD_FRAME 1 //< Decode more than one frame at once ++#define FF_THREAD_SLICE 2 //< Decode more than one part of a single frame at once ++#define FF_THREAD_DEFAULT 3 //< Use both if possible. ++ ++ /** ++ * Which multithreading methods are actually active at the moment. ++ * - encoding: Set by libavcodec. ++ * - decoding: Set by libavcodec. ++ */ ++ int active_thread_type; + } AVCodecContext; + + /** +@@ -2374,6 +2421,26 @@ + const int *supported_samplerates; ///< array of supported audio samplerates, or NULL if unknown, array is terminated by 0 + const enum SampleFormat *sample_fmts; ///< array of supported sample formats, or NULL if unknown, array is terminated by -1 + const int64_t *channel_layouts; ///< array of support channel layouts, or NULL if unknown. array is terminated by 0 ++ ++ /** ++ * @defgroup framethreading Frame threading support functions. ++ * @{ ++ */ ++ /** ++ * If the codec allocates writable tables in init(), define init_copy() to re-allocate ++ * them in the copied contexts. Before calling it, priv_data will be set to a copy of ++ * the original. ++ */ ++ int (*init_copy)(AVCodecContext *); ++ /** ++ * Copy all necessary context variables from the last thread before starting the next one. ++ * If the codec doesn't define this, the next thread will start automatically; otherwise, ++ * the codec must call ff_report_frame_setup_done(). Do not assume anything about the ++ * contents of priv data except that it has been copied from the original some time after ++ * codec init. Will not be called if frame threading is disabled. ++ */ ++ int (*update_context)(AVCodecContext *, AVCodecContext *from); ++ /** @} */ + } AVCodec; + + /** +diff -Naur ffmpeg-r17861.orig/libavcodec/beosthread.c ffmpeg-r17861/libavcodec/beosthread.c +--- ffmpeg-r17861.orig/libavcodec/beosthread.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/beosthread.c 2009-03-07 15:28:18.000000000 +0100 +@@ -121,7 +121,13 @@ + int i; + ThreadContext *c; + ++ if(!(s->thread_type & FF_THREAD_SLICE)){ ++ av_log(s, AV_LOG_WARNING, "The requested thread algorithm is not supported with this thread library.\n"); ++ return 0; ++ } ++ + s->thread_count= thread_count; ++ s->active_thread_type= FF_THREAD_SLICE; + + assert(!s->thread_opaque); + c= av_mallocz(sizeof(ThreadContext)*thread_count); +diff -Naur ffmpeg-r17861.orig/libavcodec/dsputil.c ffmpeg-r17861/libavcodec/dsputil.c +--- ffmpeg-r17861.orig/libavcodec/dsputil.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/dsputil.c 2009-03-07 15:28:18.000000000 +0100 +@@ -438,7 +438,7 @@ + + /* draw the edges of width 'w' of an image of size width, height */ + //FIXME check that this is ok for mpeg4 interlaced +-static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w) ++static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w, int sides) + { + uint8_t *ptr, *last_line; + int i; +@@ -446,8 +446,8 @@ + last_line = buf + (height - 1) * wrap; + for(i=0;ipict_type == FF_B_TYPE){ +- while(s->next_picture.mbskip_table[ s->mb_index2xy[ mb_num ] ]) mb_num++; ++ int mb_x = 0, mb_y = 0; ++ ++ while(s->next_picture.mbskip_table[ s->mb_index2xy[ mb_num ] ]) { ++ if (!mb_x) ff_await_frame_progress((AVFrame*)s->next_picture_ptr, mb_y++); ++ mb_num++; ++ if (++mb_x == s->mb_width) mb_x = 0; ++ } ++ + if(mb_num >= s->mb_num) return -1; // slice contains just skipped MBs which where already decoded + } + +@@ -4351,6 +4359,8 @@ + s->last_mv[i][1][0]= + s->last_mv[i][1][1]= 0; + } ++ ++ ff_await_frame_progress((AVFrame*)s->next_picture_ptr, s->mb_y); + } + + /* if we skipped it in the future P Frame than skip it now too */ +@@ -4530,6 +4540,12 @@ + if(s->codec_id==CODEC_ID_MPEG4){ + if(mpeg4_is_resync(s)){ + const int delta= s->mb_x + 1 == s->mb_width ? 2 : 1; ++ ++ if(s->pict_type==FF_B_TYPE){ ++ ff_await_frame_progress((AVFrame*)s->next_picture_ptr, ++ (s->mb_x + delta >= s->mb_width) ? FFMIN(s->mb_y+1, s->mb_height-1) : s->mb_y); ++ } ++ + if(s->pict_type==FF_B_TYPE && s->next_picture.mbskip_table[xy + delta]) + return SLICE_OK; + return SLICE_END; +diff -Naur ffmpeg-r17861.orig/libavcodec/h263dec.c ffmpeg-r17861/libavcodec/h263dec.c +--- ffmpeg-r17861.orig/libavcodec/h263dec.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/h263dec.c 2009-03-07 15:28:18.000000000 +0100 +@@ -32,6 +32,7 @@ + #include "h263_parser.h" + #include "mpeg4video_parser.h" + #include "msmpeg4.h" ++#include "thread.h" + + //#define DEBUG + //#define PRINT_FRAME_TIME +@@ -235,6 +236,7 @@ + if(++s->mb_x >= s->mb_width){ + s->mb_x=0; + ff_draw_horiz_band(s, s->mb_y*mb_size, mb_size); ++ MPV_report_decode_progress(s); + s->mb_y++; + } + return 0; +@@ -255,6 +257,7 @@ + } + + ff_draw_horiz_band(s, s->mb_y*mb_size, mb_size); ++ MPV_report_decode_progress(s); + + s->mb_x= 0; + } +@@ -626,6 +629,8 @@ + return -1; + } + ++ if (!s->divx_packed) ff_report_frame_setup_done(avctx); ++ + #ifdef DEBUG + av_log(avctx, AV_LOG_DEBUG, "qscale=%d\n", s->qscale); + #endif +@@ -737,9 +742,10 @@ + NULL, + ff_h263_decode_end, + ff_h263_decode_frame, +- CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY, ++ CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY | CODEC_CAP_FRAME_THREADS, + .flush= ff_mpeg_flush, + .long_name= NULL_IF_CONFIG_SMALL("MPEG-4 part 2"), ++ .update_context= ONLY_IF_THREADS_ENABLED(ff_mpeg_update_context), + .pix_fmts= ff_hwaccel_pixfmt_list_420, + }; + +diff -Naur ffmpeg-r17861.orig/libavcodec/h264.c ffmpeg-r17861/libavcodec/h264.c +--- ffmpeg-r17861.orig/libavcodec/h264.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/h264.c 2009-03-07 15:28:18.000000000 +0100 +@@ -35,6 +35,7 @@ + #include "golomb.h" + #include "mathops.h" + #include "rectangle.h" ++#include "thread.h" + #include "vdpau_internal.h" + + #include "cabac.h" +@@ -1597,6 +1598,141 @@ + return h->pps.chroma_qp_table[t][qscale]; + } + ++static inline int mc_dir_part_y(H264Context *h, Picture *pic, int n, int height, ++ int y_offset, int list){ ++ int my= h->mv_cache[list][ scan8[n] ][1] + 4*y_offset; ++ int filter_height= 6; ++ int extra_height= h->emu_edge_height; ++ const int full_my= my>>2; ++ ++ if(!pic->data[0]) return -1; ++ ++ if(full_my < (extra_height + filter_height)){ ++ my = abs(my) + extra_height*4; ++ } ++ ++ return (my + height * 4 + filter_height * 4 + 1) >> 2; ++} ++ ++static inline void mc_part_y(H264Context *h, int refs[2][48], int n, int height, ++ int y_offset, int list0, int list1){ ++ MpegEncContext * const s = &h->s; ++ int my; ++ ++ y_offset += 16*(s->mb_y >> FIELD_PICTURE); ++ ++ if(list0){ ++ int ref_n = h->ref_cache[0][ scan8[n] ], my; ++ Picture *ref= &h->ref_list[0][ref_n]; ++ ++ // Error resilience puts the current picture in the ref list. ++ // Don't try to wait on these as it will cause a deadlock. ++ // Fields can wait on each other, though. ++ if(ref->thread_opaque != s->current_picture.thread_opaque || ++ (ref->reference&3) != s->picture_structure) { ++ my = mc_dir_part_y(h, ref, n, height, y_offset, 0); ++ refs[0][ref_n] = FFMAX(refs[0][ref_n], my); ++ } ++ } ++ ++ if(list1){ ++ int ref_n = h->ref_cache[1][ scan8[n] ]; ++ Picture *ref= &h->ref_list[1][ref_n]; ++ ++ if(ref->thread_opaque != s->current_picture.thread_opaque || ++ (ref->reference&3) != s->picture_structure) { ++ my = mc_dir_part_y(h, ref, n, height, y_offset, 1); ++ refs[1][ref_n] = FFMAX(refs[1][ref_n], my); ++ } ++ } ++} ++ ++/** ++ * Wait until all reference frames are available for MC operations. ++ * ++ * @param h the H264 context ++ */ ++static void avail_motion(H264Context *h){ ++ MpegEncContext * const s = &h->s; ++ const int mb_xy= h->mb_xy; ++ const int mb_type= s->current_picture.mb_type[mb_xy]; ++ int refs[2][48]; ++ int ref, list; ++ ++ memset(refs, -1, sizeof(refs)); ++ ++ if(IS_16X16(mb_type)){ ++ mc_part_y(h, refs, 0, 16, 0, ++ IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); ++ }else if(IS_16X8(mb_type)){ ++ mc_part_y(h, refs, 0, 8, 0, ++ IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); ++ mc_part_y(h, refs, 8, 8, 8, ++ IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); ++ }else if(IS_8X16(mb_type)){ ++ mc_part_y(h, refs, 0, 16, 0, ++ IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); ++ mc_part_y(h, refs, 4, 16, 0, ++ IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); ++ }else{ ++ int i; ++ ++ assert(IS_8X8(mb_type)); ++ ++ for(i=0; i<4; i++){ ++ const int sub_mb_type= h->sub_mb_type[i]; ++ const int n= 4*i; ++ int y_offset= (i&2)<<2; ++ ++ if(IS_SUB_8X8(sub_mb_type)){ ++ mc_part_y(h, refs, n , 8, y_offset, ++ IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); ++ }else if(IS_SUB_8X4(sub_mb_type)){ ++ mc_part_y(h, refs, n , 4, y_offset, ++ IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); ++ mc_part_y(h, refs, n+2, 4, y_offset+4, ++ IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); ++ }else if(IS_SUB_4X8(sub_mb_type)){ ++ mc_part_y(h, refs, n , 8, y_offset, ++ IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); ++ mc_part_y(h, refs, n+1, 8, y_offset, ++ IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); ++ }else{ ++ int j; ++ assert(IS_SUB_4X4(sub_mb_type)); ++ for(j=0; j<4; j++){ ++ int sub_y_offset= y_offset + 2*(j&2); ++ mc_part_y(h, refs, n+j, 4, sub_y_offset, ++ IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); ++ } ++ } ++ } ++ } ++ ++ for(list=1; list>=0; list--){ ++ for(ref=0; ref<48; ref++){ ++ int row = refs[list][ref]; ++ if(row >= 0){ ++ Picture *ref_pic = &h->ref_list[list][ref]; ++ int ref_field = ref_pic->reference - 1; ++ int ref_field_picture = ref_pic->field_picture; ++ int pic_height = 16*s->mb_height >> ref_field_picture; ++ ++ if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields ++ ff_await_field_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1); ++ ff_await_field_progress((AVFrame*)ref_pic, FFMIN((row >> 1) , pic_height-1), 0); ++ }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame ++ ff_await_field_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field , pic_height-1), 0); ++ }else if(FIELD_PICTURE){ ++ ff_await_field_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field); ++ }else{ ++ ff_await_field_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0); ++ } ++ } ++ } ++ } ++} ++ + static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list, + uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, + int src_x_offset, int src_y_offset, +@@ -1800,6 +1936,7 @@ + + assert(IS_INTER(mb_type)); + ++ if(USE_FRAME_THREADING(s->avctx)) avail_motion(h); + prefetch_motion(h, 0); + + if(IS_16X16(mb_type)){ +@@ -2202,7 +2339,7 @@ + if(avctx->extradata_size > 0 && avctx->extradata && + *(char *)avctx->extradata == 1){ + h->is_avc = 1; +- h->got_avcC = 0; ++ h->got_extradata = 0; + } else { + h->is_avc = 0; + } +@@ -2220,6 +2357,101 @@ + return 0; + } + ++static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base) ++{ ++ int i; ++ ++ for (i=0; istart_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field) ++static int decode_update_context(AVCodecContext *dst, AVCodecContext *src){ ++ H264Context *h= dst->priv_data, *h1= src->priv_data; ++ MpegEncContext * const s = &h->s, * const s1 = &h1->s; ++ int inited = s->context_initialized, err; ++ ++ if(!s1->context_initialized) return 0; ++ ++ err = ff_mpeg_update_context(dst, src); ++ if(err) return err; ++ ++ //FIXME handle width/height changing ++ if(!inited){ ++ int i; ++ memcpy(&h->s + 1, &h1->s + 1, sizeof(H264Context) - sizeof(MpegEncContext)); //copy all fields after MpegEnc ++ memset(h->sps_buffers, 0, sizeof(h->sps_buffers)); ++ memset(h->pps_buffers, 0, sizeof(h->pps_buffers)); ++ alloc_tables(h); ++ context_init(h); ++ ++ for(i=0; i<2; i++){ ++ h->rbsp_buffer[i] = NULL; ++ h->rbsp_buffer_size[i] = 0; ++ } ++ ++ h->thread_context[0] = h; ++ ++ // frame_start may not be called for the next thread (if it's decoding a bottom field) ++ // so this has to be allocated here ++ h->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize); ++ } ++ ++ //extradata/NAL handling ++ h->is_avc = h1->is_avc; ++ h->got_extradata = h1->got_extradata; ++ ++ //SPS/PPS ++ copy_parameter_set((void**)h->sps_buffers, (void**)h1->sps_buffers, MAX_SPS_COUNT, sizeof(SPS)); ++ h->sps = h1->sps; ++ copy_parameter_set((void**)h->pps_buffers, (void**)h1->pps_buffers, MAX_PPS_COUNT, sizeof(PPS)); ++ h->pps = h1->pps; ++ ++ //Dequantization matrices ++ //FIXME these are big - can they be only copied when PPS changes? ++ copy_fields(h, h1, dequant4_buffer, dequant4_coeff); ++ ++ //POC timing ++ copy_fields(h, h1, poc_lsb, use_weight); ++ ++ //reference lists ++ copy_fields(h, h1, ref_count, intra_gb); ++ ++ copy_picture_range(h->short_ref, h1->short_ref, 32, s, s1); ++ copy_picture_range(h->long_ref, h1->long_ref, 32, s, s1); ++ copy_picture_range(h->delayed_pic, h1->delayed_pic, MAX_DELAYED_PIC_COUNT+2, s, s1); ++ ++ h->last_slice_type = h1->last_slice_type; ++ ++ if(!s->current_picture_ptr) return 0; ++ ++ if(!s->dropable) { ++ execute_ref_pic_marking(h, h->mmco, h->mmco_index); ++ h->prev_poc_msb = h->poc_msb; ++ h->prev_poc_lsb = h->poc_lsb; ++ } ++ h->prev_frame_num_offset= h->frame_num_offset; ++ h->prev_frame_num = h->frame_num; ++ if(h->next_output_pic) h->outputed_poc = h->next_output_pic->poc; ++ ++ return 0; ++} ++ + static int frame_start(H264Context *h){ + MpegEncContext * const s = &h->s; + int i; +@@ -2251,11 +2483,11 @@ + /* can't be in alloc_tables because linesize isn't known there. + * FIXME: redo bipred weight to not require extra buffer? */ + for(i = 0; i < s->avctx->thread_count; i++) +- if(!h->thread_context[i]->s.obmc_scratchpad) ++ if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad) + h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize); + + /* some macroblocks will be accessed before they're available */ +- if(FRAME_MBAFF || s->avctx->thread_count > 1) ++ if(FRAME_MBAFF || USE_AVCODEC_EXECUTE(s->avctx)) + memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table)); + + // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1; +@@ -2271,11 +2503,154 @@ + + s->current_picture_ptr->field_poc[0]= + s->current_picture_ptr->field_poc[1]= INT_MAX; ++ ++ h->next_output_pic = NULL; ++ + assert(s->current_picture_ptr->long_ref==0); + + return 0; + } + ++/** ++ * Run setup operations that must be run after slice header decoding. ++ * This includes finding the next displayed frame. ++ * ++ * @param h h264 master context ++ */ ++static void decode_postinit(H264Context *h){ ++ MpegEncContext * const s = &h->s; ++ Picture *out = s->current_picture_ptr; ++ Picture *cur = s->current_picture_ptr; ++ int i, pics, cross_idr, out_of_order, out_idx; ++ ++ s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264; ++ s->current_picture_ptr->pict_type= s->pict_type; ++ ++ // Don't do anything if it's the first field or we've already been called. ++ // FIXME the first field should call ff_report_frame_setup_done() even if it skips the rest ++ if (h->next_output_pic) return; ++ ++ if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) { ++ ff_report_frame_setup_done(s->avctx); ++ return; ++ } ++ ++ cur->repeat_pict = 0; ++ ++ /* Signal interlacing information externally. */ ++ /* Prioritize picture timing SEI information over used decoding process if it exists. */ ++ if(h->sps.pic_struct_present_flag){ ++ switch (h->sei_pic_struct) ++ { ++ case SEI_PIC_STRUCT_FRAME: ++ cur->interlaced_frame = 0; ++ break; ++ case SEI_PIC_STRUCT_TOP_FIELD: ++ case SEI_PIC_STRUCT_BOTTOM_FIELD: ++ case SEI_PIC_STRUCT_TOP_BOTTOM: ++ case SEI_PIC_STRUCT_BOTTOM_TOP: ++ cur->interlaced_frame = 1; ++ break; ++ case SEI_PIC_STRUCT_TOP_BOTTOM_TOP: ++ case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM: ++ // Signal the possibility of telecined film externally (pic_struct 5,6) ++ // From these hints, let the applications decide if they apply deinterlacing. ++ cur->repeat_pict = 1; ++ cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE; ++ break; ++ case SEI_PIC_STRUCT_FRAME_DOUBLING: ++ // Force progressive here, as doubling interlaced frame is a bad idea. ++ cur->interlaced_frame = 0; ++ cur->repeat_pict = 2; ++ break; ++ case SEI_PIC_STRUCT_FRAME_TRIPLING: ++ cur->interlaced_frame = 0; ++ cur->repeat_pict = 4; ++ break; ++ } ++ }else{ ++ /* Derive interlacing flag from used decoding process. */ ++ cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE; ++ } ++ ++ if (cur->field_poc[0] != cur->field_poc[1]){ ++ /* Derive top_field_first from field pocs. */ ++ cur->top_field_first = cur->field_poc[0] < cur->field_poc[1]; ++ }else{ ++ if(cur->interlaced_frame || h->sps.pic_struct_present_flag){ ++ /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */ ++ if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM ++ || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP) ++ cur->top_field_first = 1; ++ else ++ cur->top_field_first = 0; ++ }else{ ++ /* Most likely progressive */ ++ cur->top_field_first = 0; ++ } ++ } ++ ++ //FIXME do something with unavailable reference frames ++ ++ /* Sort B-frames into display order */ ++ ++ if(h->sps.bitstream_restriction_flag ++ && s->avctx->has_b_frames < h->sps.num_reorder_frames){ ++ s->avctx->has_b_frames = h->sps.num_reorder_frames; ++ s->low_delay = 0; ++ } ++ ++ if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT ++ && !h->sps.bitstream_restriction_flag){ ++ s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT; ++ s->low_delay= 0; ++ } ++ ++ pics = 0; ++ while(h->delayed_pic[pics]) pics++; ++ ++ assert(pics <= MAX_DELAYED_PIC_COUNT); ++ ++ h->delayed_pic[pics++] = cur; ++ if(cur->reference == 0) ++ cur->reference = DELAYED_PIC_REF; ++ ++ out = h->delayed_pic[0]; ++ out_idx = 0; ++ for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++) ++ if(h->delayed_pic[i]->poc < out->poc){ ++ out = h->delayed_pic[i]; ++ out_idx = i; ++ } ++ cross_idr = !h->delayed_pic[0]->poc || !!h->delayed_pic[i] || h->delayed_pic[0]->key_frame; ++ ++ out_of_order = !cross_idr && out->poc < h->outputed_poc; ++ ++ if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames) ++ { } ++ else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) ++ || (s->low_delay && ++ ((!cross_idr && out->poc > h->outputed_poc + 2) ++ || cur->pict_type == FF_B_TYPE))) ++ { ++ s->low_delay = 0; ++ s->avctx->has_b_frames++; ++ } ++ ++ if(out_of_order || pics > s->avctx->has_b_frames){ ++ out->reference &= ~DELAYED_PIC_REF; ++ for(i=out_idx; h->delayed_pic[i]; i++) ++ h->delayed_pic[i] = h->delayed_pic[i+1]; ++ } ++ if(!out_of_order && pics > s->avctx->has_b_frames){ ++ h->next_output_pic = out; ++ }else{ ++ av_log(s->avctx, AV_LOG_DEBUG, "no picture\n"); ++ } ++ ++ ff_report_frame_setup_done(s->avctx); ++} ++ + static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){ + MpegEncContext * const s = &h->s; + int i; +@@ -3754,20 +4129,25 @@ + init_scan_tables(h); + alloc_tables(h); + +- for(i = 1; i < s->avctx->thread_count; i++) { +- H264Context *c; +- c = h->thread_context[i] = av_malloc(sizeof(H264Context)); +- memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext)); +- memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext)); +- c->sps = h->sps; +- c->pps = h->pps; +- init_scan_tables(c); +- clone_tables(c, h); +- } +- +- for(i = 0; i < s->avctx->thread_count; i++) +- if(context_init(h->thread_context[i]) < 0) ++ if (!USE_AVCODEC_EXECUTE(s->avctx)) { ++ if (context_init(h) < 0) + return -1; ++ } else { ++ for(i = 1; i < s->avctx->thread_count; i++) { ++ H264Context *c; ++ c = h->thread_context[i] = av_malloc(sizeof(H264Context)); ++ memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext)); ++ memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext)); ++ c->sps = h->sps; ++ c->pps = h->pps; ++ init_scan_tables(c); ++ clone_tables(c, h); ++ } ++ ++ for(i = 0; i < s->avctx->thread_count; i++) ++ if(context_init(h->thread_context[i]) < 0) ++ return -1; ++ } + + s->avctx->width = s->width; + s->avctx->height = s->height; +@@ -3802,6 +4182,10 @@ + h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME; + + if(h0->current_slice == 0){ ++ if(h->frame_num != h->prev_frame_num && ++ (h->prev_frame_num+1)%(1<sps.log2_max_frame_num) < (h->frame_num - h->sps.ref_frame_count)) ++ h->prev_frame_num = h->frame_num - h->sps.ref_frame_count - 1; ++ + while(h->frame_num != h->prev_frame_num && + h->frame_num != (h->prev_frame_num+1)%(1<sps.log2_max_frame_num)){ + av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num); +@@ -3810,6 +4194,8 @@ + h->prev_frame_num++; + h->prev_frame_num %= 1<sps.log2_max_frame_num; + s->current_picture_ptr->frame_num= h->prev_frame_num; ++ ff_report_field_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 0); ++ ff_report_field_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1); + execute_ref_pic_marking(h, NULL, 0); + } + +@@ -3943,6 +4329,7 @@ + if(h->slice_type_nos!=FF_I_TYPE && decode_ref_pic_list_reordering(h) < 0) + return -1; + ++ /* + if(h->slice_type_nos!=FF_I_TYPE){ + s->last_picture_ptr= &h->ref_list[0][0]; + ff_copy_picture(&s->last_picture, s->last_picture_ptr); +@@ -3951,6 +4338,7 @@ + s->next_picture_ptr= &h->ref_list[1][0]; + ff_copy_picture(&s->next_picture, s->next_picture_ptr); + } ++ */ + + if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE ) + || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) ) +@@ -4067,11 +4455,15 @@ + +(h->ref_list[j][i].reference&3); + } + +- h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; ++ //FIXME: fix draw_edges+PAFF+frame threads ++ h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && USE_FRAME_THREADING(s->avctx))) ? 0 : 16; + h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width; + + s->avctx->refs= h->sps.ref_frame_count; + ++ if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && h->slice_num==1) ++ decode_postinit(h); ++ + if(s->avctx->debug&FF_DEBUG_PICT_INFO){ + av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n", + h->slice_num, +@@ -6605,6 +6997,40 @@ + #endif + } + ++/** ++ * Draw edges and report progress for the last MB row. ++ */ ++static void decode_finish_row(H264Context *h){ ++ MpegEncContext * const s = &h->s; ++ int top = 16*(s->mb_y >> FIELD_PICTURE); ++ int height = 16 << FRAME_MBAFF; ++ int deblock_border = (16 + 4) << FRAME_MBAFF; ++ int pic_height = 16*s->mb_height >> FIELD_PICTURE; ++ ++ if (h->deblocking_filter) { ++ if((top + height) >= pic_height) ++ height += deblock_border; ++ ++ top -= deblock_border; ++ } ++ ++ if (top >= pic_height || (top + height) < h->emu_edge_height) ++ return; ++ ++ height = FFMIN(height, pic_height - top); ++ if (top < h->emu_edge_height) { ++ height = top+height; ++ top = 0; ++ } ++ ++ ff_draw_horiz_band(s, top, height); ++ ++ if (s->dropable) return; ++ ++ ff_report_field_progress((AVFrame*)s->current_picture_ptr, top + height - 1, ++ s->picture_structure==PICT_BOTTOM_FIELD); ++} ++ + static int decode_slice(struct AVCodecContext *avctx, void *arg){ + H264Context *h = *(void**)arg; + MpegEncContext * const s = &h->s; +@@ -6666,7 +7092,7 @@ + + if( ++s->mb_x >= s->mb_width ) { + s->mb_x = 0; +- ff_draw_horiz_band(s, 16*s->mb_y, 16); ++ decode_finish_row(h); + ++s->mb_y; + if(FIELD_OR_MBAFF_PICTURE) { + ++s->mb_y; +@@ -6703,7 +7129,7 @@ + + if(++s->mb_x >= s->mb_width){ + s->mb_x=0; +- ff_draw_horiz_band(s, 16*s->mb_y, 16); ++ decode_finish_row(h); + ++s->mb_y; + if(FIELD_OR_MBAFF_PICTURE) { + ++s->mb_y; +@@ -7404,7 +7830,7 @@ + H264Context *hx; ///< thread context + int context_count = 0; + +- h->max_contexts = avctx->thread_count; ++ h->max_contexts = USE_AVCODEC_EXECUTE(s->avctx) ? avctx->thread_count : 1; + #if 0 + int i; + for(i=0; i<50; i++){ +@@ -7631,7 +8057,9 @@ + Picture *out; + int i, out_idx; + +-//FIXME factorize this with the output code below ++ s->current_picture_ptr = NULL; ++ ++//FIXME factorize this with the output code + out = h->delayed_pic[0]; + out_idx = 0; + for(i=1; h->delayed_pic[i] && (h->delayed_pic[i]->poc && !h->delayed_pic[i]->key_frame); i++) +@@ -7651,7 +8079,7 @@ + return 0; + } + +- if(h->is_avc && !h->got_avcC) { ++ if(h->is_avc && !h->got_extradata) { + int i, cnt, nalsize; + unsigned char *p = avctx->extradata; + if(avctx->extradata_size < 7) { +@@ -7689,13 +8117,13 @@ + // Now store right nal length size, that will be use to parse all other nals + h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1; + // Do not reparse avcC +- h->got_avcC = 1; ++ h->got_extradata = 1; + } + +- if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){ ++ if(!h->got_extradata && !h->is_avc && s->avctx->extradata_size){ + if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0) + return -1; +- h->got_avcC = 1; ++ h->got_extradata = 1; + } + + buf_index=decode_nal_units(h, buf, buf_size); +@@ -7709,25 +8137,26 @@ + } + + if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){ +- Picture *out = s->current_picture_ptr; +- Picture *cur = s->current_picture_ptr; +- int i, pics, cross_idr, out_of_order, out_idx; + +- s->mb_y= 0; ++ if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h); + +- s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264; +- s->current_picture_ptr->pict_type= s->pict_type; ++ s->mb_y= 0; ++ ff_report_field_progress((AVFrame*)s->current_picture_ptr, (16*s->mb_height >> FIELD_PICTURE) - 1, ++ s->picture_structure==PICT_BOTTOM_FIELD); + + if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) + ff_vdpau_h264_set_reference_frames(s); + +- if(!s->dropable) { +- execute_ref_pic_marking(h, h->mmco, h->mmco_index); +- h->prev_poc_msb= h->poc_msb; +- h->prev_poc_lsb= h->poc_lsb; ++ if(!USE_FRAME_THREADING(avctx)){ ++ if(!s->dropable) { ++ execute_ref_pic_marking(h, h->mmco, h->mmco_index); ++ h->prev_poc_msb= h->poc_msb; ++ h->prev_poc_lsb= h->poc_lsb; ++ } ++ h->prev_frame_num_offset= h->frame_num_offset; ++ h->prev_frame_num= h->frame_num; ++ if(h->next_output_pic) h->outputed_poc = h->next_output_pic->poc; + } +- h->prev_frame_num_offset= h->frame_num_offset; +- h->prev_frame_num= h->frame_num; + + if (avctx->hwaccel) { + if (avctx->hwaccel->end_frame(avctx) < 0) +@@ -7754,6 +8183,17 @@ + + MPV_frame_end(s); + ++#if 1 /* MT */ ++ if (!h->next_output_pic) { ++ /* Wait for second field. */ ++ *data_size = 0; ++ } else { ++ *data_size = sizeof(AVFrame); ++ *pict = *(AVFrame*)h->next_output_pic; ++ } ++#endif ++ ++#if 0 /* Original */ + if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) { + /* Wait for second field. */ + *data_size = 0; +@@ -7870,6 +8310,7 @@ + av_log(avctx, AV_LOG_DEBUG, "no picture\n"); + } + } ++#endif + } + + assert(pict->data[0] || !*data_size); +@@ -8112,9 +8553,10 @@ + NULL, + decode_end, + decode_frame, +- /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY, ++ /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_FRAME_THREADS, + .flush= flush_dpb, + .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"), ++ .update_context = ONLY_IF_THREADS_ENABLED(decode_update_context), + .pix_fmts= ff_hwaccel_pixfmt_list_420, + }; + +diff -Naur ffmpeg-r17861.orig/libavcodec/h264.h ffmpeg-r17861/libavcodec/h264.h +--- ffmpeg-r17861.orig/libavcodec/h264.h 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/h264.h 2009-03-07 15:28:18.000000000 +0100 +@@ -250,7 +250,7 @@ + * Used to parse AVC variant of h264 + */ + int is_avc; ///< this flag is != 0 if codec is avc1 +- int got_avcC; ///< flag used to parse avcC data only once ++ int got_extradata; ///< flag used to parse extradata only once + int nal_length_size; ///< Number of bytes used for nal length (1, 2 or 4) + + int chroma_qp[2]; //QPc +@@ -411,6 +411,7 @@ + according to picture reordering in slice header */ + int ref2frm[MAX_SLICES][2][64]; ///< reference to frame number lists, used in the loop filter, the first 2 are for -2,-1 + Picture *delayed_pic[MAX_DELAYED_PIC_COUNT+2]; //FIXME size? ++ Picture *next_output_pic; + int outputed_poc; + + /** +diff -Naur ffmpeg-r17861.orig/libavcodec/mdec.c ffmpeg-r17861/libavcodec/mdec.c +--- ffmpeg-r17861.orig/libavcodec/mdec.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/mdec.c 2009-03-07 15:28:18.000000000 +0100 +@@ -234,6 +234,18 @@ + return 0; + } + ++static av_cold int decode_init_copy(AVCodecContext *avctx){ ++ MDECContext * const a = avctx->priv_data; ++ AVFrame *p = (AVFrame*)&a->picture; ++ ++ avctx->coded_frame= p; ++ a->avctx= avctx; ++ ++ p->qscale_table= av_mallocz( p->qstride * a->mb_height); ++ ++ return 0; ++} ++ + static av_cold int decode_end(AVCodecContext *avctx){ + MDECContext * const a = avctx->priv_data; + +@@ -253,7 +265,8 @@ + NULL, + decode_end, + decode_frame, +- CODEC_CAP_DR1, ++ CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS, + .long_name= NULL_IF_CONFIG_SMALL("Sony PlayStation MDEC (Motion DECoder)"), ++ .init_copy= ONLY_IF_THREADS_ENABLED(decode_init_copy) + }; + +diff -Naur ffmpeg-r17861.orig/libavcodec/mimic.c ffmpeg-r17861/libavcodec/mimic.c +--- ffmpeg-r17861.orig/libavcodec/mimic.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/mimic.c 2009-03-07 15:28:18.000000000 +0100 +@@ -27,6 +27,7 @@ + #include "bitstream.h" + #include "bytestream.h" + #include "dsputil.h" ++#include "thread.h" + + #define MIMIC_HEADER_SIZE 20 + +@@ -51,6 +52,10 @@ + ScanTable scantable; + DSPContext dsp; + VLC vlc; ++ ++ /* Kept in the context so multithreading can have a constant to read from */ ++ int next_cur_index; ++ int next_prev_index; + } MimicContext; + + static const uint32_t huffcodes[] = { +@@ -121,6 +126,21 @@ + return 0; + } + ++static int mimic_decode_update_context(AVCodecContext *avctx, AVCodecContext *avctx_from) ++{ ++ MimicContext *dst = avctx->priv_data, *src = avctx_from->priv_data; ++ ++ dst->cur_index = src->next_cur_index; ++ dst->prev_index = src->next_prev_index; ++ ++ memcpy(dst->buf_ptrs, src->buf_ptrs, sizeof(src->buf_ptrs)); ++ memcpy(dst->flipped_ptrs, src->flipped_ptrs, sizeof(src->flipped_ptrs)); ++ ++ memset(&dst->buf_ptrs[dst->cur_index], 0, sizeof(AVFrame)); ++ ++ return 0; ++} ++ + static const int8_t vlcdec_lookup[9][64] = { + { 0, }, + { -1, 1, }, +@@ -205,7 +225,7 @@ + static int decode(MimicContext *ctx, int quality, int num_coeffs, + int is_iframe) + { +- int y, x, plane; ++ int y, x, plane, cur_row = 0; + + for(plane = 0; plane < 3; plane++) { + const int is_chroma = !!plane; +@@ -236,6 +256,7 @@ + int index = (ctx->cur_index+backref)&15; + uint8_t *p = ctx->flipped_ptrs[index].data[0]; + ++ ff_await_frame_progress(&ctx->buf_ptrs[index], cur_row); + if(p) { + p += src - + ctx->flipped_ptrs[ctx->prev_index].data[plane]; +@@ -246,6 +267,7 @@ + } + } + } else { ++ ff_await_frame_progress(&ctx->buf_ptrs[ctx->prev_index], cur_row); + ctx->dsp.put_pixels_tab[1][0](dst, src, stride, 8); + } + src += 8; +@@ -253,6 +275,8 @@ + } + src += (stride - ctx->num_hblocks[plane])<<3; + dst += (stride - ctx->num_hblocks[plane])<<3; ++ ++ ff_report_frame_progress(&ctx->buf_ptrs[ctx->cur_index], cur_row++); + } + } + +@@ -324,14 +348,19 @@ + } + + ctx->buf_ptrs[ctx->cur_index].reference = 1; +- if(avctx->get_buffer(avctx, &ctx->buf_ptrs[ctx->cur_index])) { ++ if(ff_get_buffer(avctx, &ctx->buf_ptrs[ctx->cur_index])) { + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return -1; + } + ++ ctx->next_prev_index = ctx->cur_index; ++ ctx->next_cur_index = (ctx->cur_index - 1) & 15; ++ + prepare_avpic(ctx, &ctx->flipped_ptrs[ctx->cur_index], + (AVPicture*) &ctx->buf_ptrs[ctx->cur_index]); + ++ ff_report_frame_setup_done(avctx); ++ + ctx->swap_buf = av_fast_realloc(ctx->swap_buf, &ctx->swap_buf_size, + swap_buf_size + FF_INPUT_BUFFER_PADDING_SIZE); + if(!ctx->swap_buf) +@@ -343,7 +372,7 @@ + init_get_bits(&ctx->gb, ctx->swap_buf, swap_buf_size << 3); + + if(!decode(ctx, quality, num_coeffs, !is_pframe)) { +- avctx->release_buffer(avctx, &ctx->buf_ptrs[ctx->cur_index]); ++ ff_release_buffer(avctx, &ctx->buf_ptrs[ctx->cur_index]); + return -1; + } + +@@ -351,13 +380,12 @@ + *(AVFrame*)data = ctx->buf_ptrs[ctx->cur_index]; + *data_size = sizeof(AVFrame); + +- ctx->prev_index = ctx->cur_index; +- ctx->cur_index--; +- ctx->cur_index &= 15; ++ ctx->prev_index = ctx->next_prev_index; ++ ctx->cur_index = ctx->next_cur_index; + + /* Only release frames that aren't used for backreferences anymore */ + if(ctx->buf_ptrs[ctx->cur_index].data[0]) +- avctx->release_buffer(avctx, &ctx->buf_ptrs[ctx->cur_index]); ++ ff_release_buffer(avctx, &ctx->buf_ptrs[ctx->cur_index]); + + return buf_size; + } +@@ -368,9 +396,12 @@ + int i; + + av_free(ctx->swap_buf); ++ ++ if(avctx->is_copy) return 0; ++ + for(i = 0; i < 16; i++) + if(ctx->buf_ptrs[i].data[0]) +- avctx->release_buffer(avctx, &ctx->buf_ptrs[i]); ++ ff_release_buffer(avctx, &ctx->buf_ptrs[i]); + free_vlc(&ctx->vlc); + + return 0; +@@ -385,6 +416,7 @@ + NULL, + mimic_decode_end, + mimic_decode_frame, +- CODEC_CAP_DR1, ++ CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS, + .long_name = NULL_IF_CONFIG_SMALL("Mimic"), ++ .update_context = ONLY_IF_THREADS_ENABLED(mimic_decode_update_context) + }; +diff -Naur ffmpeg-r17861.orig/libavcodec/mpeg12.c ffmpeg-r17861/libavcodec/mpeg12.c +--- ffmpeg-r17861.orig/libavcodec/mpeg12.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/mpeg12.c 2009-03-07 15:28:18.000000000 +0100 +@@ -37,6 +37,7 @@ + #include "bytestream.h" + #include "vdpau_internal.h" + #include "xvmc_internal.h" ++#include "thread.h" + + //#undef NDEBUG + //#include +@@ -1191,6 +1192,27 @@ + return 0; + } + ++static int mpeg_decode_update_context(AVCodecContext *avctx, AVCodecContext *avctx_from) ++{ ++ Mpeg1Context *ctx = avctx->priv_data, *ctx_from = avctx_from->priv_data; ++ MpegEncContext *s = &ctx->mpeg_enc_ctx, *s1 = &ctx_from->mpeg_enc_ctx; ++ int err; ++ ++ if(!ctx_from->mpeg_enc_ctx_allocated || !s1->context_initialized) ++ return 0; ++ ++ err = ff_mpeg_update_context(avctx, avctx_from); ++ if(err) return err; ++ ++ if(!ctx->mpeg_enc_ctx_allocated) ++ memcpy(s + 1, s1 + 1, sizeof(Mpeg1Context) - sizeof(MpegEncContext)); ++ ++ if(!(s->pict_type == FF_B_TYPE || s->low_delay)) ++ s->picture_number++; ++ ++ return 0; ++} ++ + static void quant_matrix_rebuild(uint16_t *matrix, const uint8_t *old_perm, + const uint8_t *new_perm){ + uint16_t temp_matrix[64]; +@@ -1631,6 +1653,8 @@ + } + + *s->current_picture_ptr->pan_scan= s1->pan_scan; ++ ++ if (USE_FRAME_THREADING(avctx)) ff_report_frame_setup_done(avctx); + }else{ //second field + int i; + +@@ -1803,6 +1827,7 @@ + const int mb_size= 16>>s->avctx->lowres; + + ff_draw_horiz_band(s, mb_size*s->mb_y, mb_size); ++ MPV_report_decode_progress(s); + + s->mb_x = 0; + s->mb_y++; +@@ -1957,7 +1982,7 @@ + *pict= *(AVFrame*)s->current_picture_ptr; + ff_print_debug_info(s, pict); + } else { +- s->picture_number++; ++ if (!USE_FRAME_THREADING(avctx)) s->picture_number++; + /* latency of 1 frame for I- and P-frames */ + /* XXX: use another variable than picture_number */ + if (s->last_picture_ptr != NULL) { +@@ -2324,7 +2349,7 @@ + buf_ptr = ff_find_start_code(buf_ptr,buf_end, &start_code); + if (start_code > 0x1ff){ + if(s2->pict_type != FF_B_TYPE || avctx->skip_frame <= AVDISCARD_DEFAULT){ +- if(avctx->thread_count > 1){ ++ if(USE_AVCODEC_EXECUTE(avctx)){ + int i; + + avctx->execute(avctx, slice_decode_thread, (void**)&(s2->thread_context[0]), NULL, s->slice_count, sizeof(void*)); +@@ -2425,7 +2450,7 @@ + break; + } + +- if(avctx->thread_count > 1){ ++ if(USE_AVCODEC_EXECUTE(avctx)){ + int threshold= (s2->mb_height*s->slice_count + avctx->thread_count/2) / avctx->thread_count; + if(threshold <= mb_y){ + MpegEncContext *thread_context= s2->thread_context[s->slice_count]; +@@ -2475,9 +2500,10 @@ + NULL, + mpeg_decode_end, + mpeg_decode_frame, +- CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY, ++ CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY | CODEC_CAP_FRAME_THREADS, + .flush= ff_mpeg_flush, + .long_name= NULL_IF_CONFIG_SMALL("MPEG-1 video"), ++ .update_context= ONLY_IF_THREADS_ENABLED(mpeg_decode_update_context), + }; + + AVCodec mpeg2video_decoder = { +@@ -2511,7 +2537,7 @@ + + #if CONFIG_MPEG_XVMC_DECODER + static av_cold int mpeg_mc_decode_init(AVCodecContext *avctx){ +- if( avctx->thread_count > 1) ++ if( USE_AVCODEC_EXECUTE(avctx) ) + return -1; + if( !(avctx->slice_flags & SLICE_FLAG_CODED_ORDER) ) + return -1; +diff -Naur ffmpeg-r17861.orig/libavcodec/mpegvideo.c ffmpeg-r17861/libavcodec/mpegvideo.c +--- ffmpeg-r17861.orig/libavcodec/mpegvideo.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/mpegvideo.c 2009-03-07 15:28:18.000000000 +0100 +@@ -35,6 +35,7 @@ + #include "msmpeg4.h" + #include "faandct.h" + #include "xvmc_internal.h" ++#include "thread.h" + #include + + //#undef NDEBUG +@@ -169,7 +170,7 @@ + */ + static void free_frame_buffer(MpegEncContext *s, Picture *pic) + { +- s->avctx->release_buffer(s->avctx, (AVFrame*)pic); ++ ff_release_buffer(s->avctx, (AVFrame*)pic); + } + + /** +@@ -179,7 +180,7 @@ + { + int r; + +- r = s->avctx->get_buffer(s->avctx, (AVFrame*)pic); ++ r = ff_get_buffer(s->avctx, (AVFrame*)pic); + + if (r<0 || !pic->age || !pic->type || !pic->data[0]) { + av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]); +@@ -392,6 +393,80 @@ + //STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads + } + ++int ff_mpeg_update_context(AVCodecContext *dst, AVCodecContext *src) ++{ ++ MpegEncContext *s = dst->priv_data, *s1 = src->priv_data; ++ ++ if(!s1->context_initialized) return 0; ++ ++ //FIXME can parameters change on I-frames? in that case dst may need a reinit ++ if(!s->context_initialized){ ++ memcpy(s, s1, sizeof(MpegEncContext)); ++ ++ s->avctx = dst; ++ s->picture_range_start += MAX_PICTURE_COUNT; ++ s->picture_range_end += MAX_PICTURE_COUNT; ++ s->bitstream_buffer = NULL; ++ s->bitstream_buffer_size = s->allocated_bitstream_buffer_size = 0; ++ ++ MPV_common_init(s); ++ } ++ ++ s->avctx->coded_height = s1->avctx->coded_height; ++ s->avctx->coded_width = s1->avctx->coded_width; ++ s->avctx->width = s1->avctx->width; ++ s->avctx->height = s1->avctx->height; ++ ++ s->coded_picture_number = s1->coded_picture_number; ++ s->picture_number = s1->picture_number; ++ s->input_picture_number = s1->input_picture_number; ++ ++ memcpy(s->picture, s1->picture, s1->picture_count * sizeof(Picture)); ++ memcpy(&s->last_picture, &s1->last_picture, (char*)&s1->last_picture_ptr - (char*)&s1->last_picture); ++ ++ s->last_picture_ptr = REBASE_PICTURE(s1->last_picture_ptr, s, s1); ++ s->current_picture_ptr = REBASE_PICTURE(s1->current_picture_ptr, s, s1); ++ s->next_picture_ptr = REBASE_PICTURE(s1->next_picture_ptr, s, s1); ++ ++ memcpy(s->prev_pict_types, s1->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE); ++ ++ //Error/bug resilience ++ s->next_p_frame_damaged = s1->next_p_frame_damaged; ++ s->workaround_bugs = s1->workaround_bugs; ++ ++ //MPEG4 timing info ++ memcpy(&s->time_increment_bits, &s1->time_increment_bits, (char*)&s1->shape - (char*)&s1->time_increment_bits); ++ ++ //B-frame info ++ s->max_b_frames = s1->max_b_frames; ++ s->low_delay = s1->low_delay; ++ s->dropable = s1->dropable; ++ ++ //DivX handling (doesn't work) ++ s->divx_packed = s1->divx_packed; ++ ++ if(s1->bitstream_buffer){ ++ s->bitstream_buffer = av_fast_realloc(s->bitstream_buffer, &s->allocated_bitstream_buffer_size, s1->allocated_bitstream_buffer_size+FF_INPUT_BUFFER_PADDING_SIZE); ++ s->bitstream_buffer_size = s1->bitstream_buffer_size; ++ memcpy(s->bitstream_buffer, s1->bitstream_buffer, s1->bitstream_buffer_size); ++ memset(s->bitstream_buffer+s->bitstream_buffer_size, 0, FF_INPUT_BUFFER_PADDING_SIZE); ++ } ++ ++ //MPEG2/interlacing info ++ memcpy(&s->progressive_sequence, &s1->progressive_sequence, (char*)&s1->rtp_mode - (char*)&s1->progressive_sequence); ++ ++ if(!s1->first_field){ ++ s->last_pict_type= s1->pict_type; ++ if (s1->current_picture_ptr) s->last_lambda_for[s1->pict_type] = s1->current_picture_ptr->quality; ++ ++ if(s1->pict_type!=FF_B_TYPE){ ++ s->last_non_b_pict_type= s1->pict_type; ++ } ++ } ++ ++ return 0; ++} ++ + /** + * sets the given MpegEncContext to common defaults (same for encoding and decoding). + * the changed fields will not depend upon the prior state of the MpegEncContext. +@@ -412,6 +487,9 @@ + + s->f_code = 1; + s->b_code = 1; ++ ++ s->picture_range_start = 0; ++ s->picture_range_end = MAX_PICTURE_COUNT; + } + + /** +@@ -437,7 +515,8 @@ + return -1; + } + +- if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){ ++ if(USE_AVCODEC_EXECUTE(s->avctx) && ++ (s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height))){ + av_log(s->avctx, AV_LOG_ERROR, "too many threads\n"); + return -1; + } +@@ -536,7 +615,8 @@ + CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t)) + } + } +- CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture)) ++ s->picture_count = MAX_PICTURE_COUNT * FFMAX(1, s->avctx->thread_count); ++ CHECKED_ALLOCZ(s->picture, s->picture_count * sizeof(Picture)) + + CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t)) + +@@ -600,20 +680,26 @@ + } + + s->context_initialized = 1; +- + s->thread_context[0]= s; +- threads = s->avctx->thread_count; + +- for(i=1; ithread_context[i]= av_malloc(sizeof(MpegEncContext)); +- memcpy(s->thread_context[i], s, sizeof(MpegEncContext)); +- } ++ if (USE_AVCODEC_EXECUTE(s->avctx)) { ++ threads = s->avctx->thread_count; ++ ++ for(i=1; ithread_context[i]= av_malloc(sizeof(MpegEncContext)); ++ memcpy(s->thread_context[i], s, sizeof(MpegEncContext)); ++ } + +- for(i=0; ithread_context[i], s) < 0) +- goto fail; +- s->thread_context[i]->start_mb_y= (s->mb_height*(i ) + s->avctx->thread_count/2) / s->avctx->thread_count; +- s->thread_context[i]->end_mb_y = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count; ++ for(i=0; ithread_context[i], s) < 0) ++ goto fail; ++ s->thread_context[i]->start_mb_y= (s->mb_height*(i ) + s->avctx->thread_count/2) / s->avctx->thread_count; ++ s->thread_context[i]->end_mb_y = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count; ++ } ++ } else { ++ if(init_duplicate_context(s, s) < 0) goto fail; ++ s->start_mb_y = 0; ++ s->end_mb_y = s->mb_height; + } + + return 0; +@@ -627,12 +713,14 @@ + { + int i, j, k; + +- for(i=0; iavctx->thread_count; i++){ +- free_duplicate_context(s->thread_context[i]); +- } +- for(i=1; iavctx->thread_count; i++){ +- av_freep(&s->thread_context[i]); +- } ++ if (USE_AVCODEC_EXECUTE(s->avctx)) { ++ for(i=0; iavctx->thread_count; i++){ ++ free_duplicate_context(s->thread_context[i]); ++ } ++ for(i=1; iavctx->thread_count; i++){ ++ av_freep(&s->thread_context[i]); ++ } ++ } else free_duplicate_context(s); + + av_freep(&s->parse_context.buffer); + s->parse_context.buffer_size=0; +@@ -688,8 +776,8 @@ + av_freep(&s->reordered_input_picture); + av_freep(&s->dct_offset); + +- if(s->picture){ +- for(i=0; ipicture && !s->avctx->is_copy){ ++ for(i=0; ipicture_count; i++){ + free_picture(s, &s->picture[i]); + } + } +@@ -801,14 +889,14 @@ + int i; + + if(shared){ +- for(i=0; ipicture_range_start; ipicture_range_end; i++){ + if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i; + } + }else{ +- for(i=0; ipicture_range_start; ipicture_range_end; i++){ + if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME + } +- for(i=0; ipicture_range_start; ipicture_range_end; i++){ + if(s->picture[i].data[0]==NULL) return i; + } + } +@@ -865,7 +953,7 @@ + /* release forgotten pictures */ + /* if(mpeg124/h263) */ + if(!s->encoding){ +- for(i=0; ipicture_count; i++){ + if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){ + av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n"); + free_frame_buffer(s, &s->picture[i]); +@@ -877,7 +965,7 @@ + alloc: + if(!s->encoding){ + /* release non reference frames */ +- for(i=0; ipicture_count; i++){ + if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){ + free_frame_buffer(s, &s->picture[i]); + } +@@ -906,6 +994,7 @@ + s->current_picture_ptr= (Picture*)pic; + s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic + s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence; ++ s->current_picture_ptr->field_picture= s->picture_structure != PICT_FRAME; + } + + s->current_picture_ptr->pict_type= s->pict_type; +@@ -981,19 +1070,21 @@ + void MPV_frame_end(MpegEncContext *s) + { + int i; +- /* draw edge for correct motion prediction if outside */ ++ /* redraw edges for the frame if decoding didn't complete */ + //just to make sure that all data is rendered. + if(CONFIG_MPEG_XVMC_DECODER && s->avctx->xvmc_acceleration){ + ff_xvmc_field_end(s); + }else if(!s->avctx->hwaccel + && !(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU) ++ && s->error_count + && s->unrestricted_mv + && s->current_picture.reference + && !s->intra_only + && !(s->flags&CODEC_FLAG_EMU_EDGE)) { +- s->dsp.draw_edges(s->current_picture.data[0], s->linesize , s->h_edge_pos , s->v_edge_pos , EDGE_WIDTH ); +- s->dsp.draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2); +- s->dsp.draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2); ++ int edges = EDGE_BOTTOM | EDGE_TOP, h = s->v_edge_pos; ++ s->dsp.draw_edges(s->current_picture_ptr->data[0], s->linesize , s->h_edge_pos , h , EDGE_WIDTH , edges); ++ s->dsp.draw_edges(s->current_picture_ptr->data[1], s->uvlinesize, s->h_edge_pos>>1, h>>1, EDGE_WIDTH/2, edges); ++ s->dsp.draw_edges(s->current_picture_ptr->data[2], s->uvlinesize, s->h_edge_pos>>1, h>>1, EDGE_WIDTH/2, edges); + } + emms_c(); + +@@ -1015,7 +1106,7 @@ + + if(s->encoding){ + /* release non-reference frames */ +- for(i=0; ipicture_count; i++){ + if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){ + free_frame_buffer(s, &s->picture[i]); + } +@@ -1689,6 +1780,43 @@ + } + } + ++/** ++ * find the lowest MB row referenced in the MVs ++ */ ++int MPV_lowest_referenced_row(MpegEncContext *s, int dir) ++{ ++ int my_max = INT_MIN, my_min = INT_MAX, qpel_shift = !s->quarter_sample; ++ int my, off, i, mvs; ++ ++ if (s->picture_structure != PICT_FRAME) goto unhandled; ++ ++ switch (s->mv_type) { ++ case MV_TYPE_16X16: ++ mvs = 1; ++ break; ++ case MV_TYPE_16X8: ++ mvs = 2; ++ break; ++ case MV_TYPE_8X8: ++ mvs = 4; ++ break; ++ default: ++ goto unhandled; ++ } ++ ++ for (i = 0; i < mvs; i++) { ++ my = s->mv[dir][i][1]<> 6; ++ ++ return FFMIN(FFMAX(s->mb_y + off, 0), s->mb_height-1); ++unhandled: ++ return s->mb_height-1; ++} ++ + /* put block[] to dest[] */ + static inline void put_dct(MpegEncContext *s, + DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale) +@@ -1852,6 +1980,16 @@ + /* motion handling */ + /* decoding or more than one mb_type (MC was already done otherwise) */ + if(!s->encoding){ ++ ++ if(USE_FRAME_THREADING(s->avctx)) { ++ if (s->mv_dir & MV_DIR_FORWARD) { ++ ff_await_frame_progress((AVFrame*)s->last_picture_ptr, MPV_lowest_referenced_row(s, 0)); ++ } ++ if (s->mv_dir & MV_DIR_BACKWARD) { ++ ff_await_frame_progress((AVFrame*)s->next_picture_ptr, MPV_lowest_referenced_row(s, 1)); ++ } ++ } ++ + if(lowres_flag){ + h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab; + +@@ -2016,18 +2154,31 @@ + * @param h is the normal height, this will be reduced automatically if needed for the last row + */ + void ff_draw_horiz_band(MpegEncContext *s, int y, int h){ ++ if(s->picture_structure != PICT_FRAME){ ++ h <<= 1; ++ y <<= 1; ++ } ++ ++ if (s->unrestricted_mv && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) { ++ int sides = 0, edge_h; ++ if (y==0) sides |= EDGE_TOP; ++ if (y + h >= s->v_edge_pos) sides |= EDGE_BOTTOM; ++ ++ edge_h= FFMIN(h, s->v_edge_pos - y); ++ ++ s->dsp.draw_edges(s->current_picture_ptr->data[0] + y *s->linesize , s->linesize , s->h_edge_pos , edge_h , EDGE_WIDTH , sides); ++ s->dsp.draw_edges(s->current_picture_ptr->data[1] + (y>>1)*s->uvlinesize, s->uvlinesize, s->h_edge_pos>>1, edge_h>>1, EDGE_WIDTH/2, sides); ++ s->dsp.draw_edges(s->current_picture_ptr->data[2] + (y>>1)*s->uvlinesize, s->uvlinesize, s->h_edge_pos>>1, edge_h>>1, EDGE_WIDTH/2, sides); ++ } ++ ++ h= FFMIN(h, s->avctx->height - y); ++ ++ if(s->picture_structure != PICT_FRAME && s->first_field && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return; ++ + if (s->avctx->draw_horiz_band) { + AVFrame *src; + int offset[4]; + +- if(s->picture_structure != PICT_FRAME){ +- h <<= 1; +- y <<= 1; +- if(s->first_field && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return; +- } +- +- h= FFMIN(h, s->avctx->height - y); +- + if(s->pict_type==FF_B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER)) + src= (AVFrame*)s->current_picture_ptr; + else if(s->last_picture_ptr) +@@ -2086,7 +2237,7 @@ + if(s==NULL || s->picture==NULL) + return; + +- for(i=0; ipicture_count; i++){ + if(s->picture[i].data[0] && ( s->picture[i].type == FF_BUFFER_TYPE_INTERNAL + || s->picture[i].type == FF_BUFFER_TYPE_USER)) + free_frame_buffer(s, &s->picture[i]); +@@ -2339,3 +2490,9 @@ + s->y_dc_scale= s->y_dc_scale_table[ qscale ]; + s->c_dc_scale= s->c_dc_scale_table[ s->chroma_qscale ]; + } ++ ++void MPV_report_decode_progress(MpegEncContext *s) ++{ ++ if (s->pict_type != FF_B_TYPE && !s->partitioned_frame) ++ ff_report_frame_progress((AVFrame*)s->current_picture_ptr, s->mb_y); ++} +diff -Naur ffmpeg-r17861.orig/libavcodec/mpegvideo_enc.c ffmpeg-r17861/libavcodec/mpegvideo_enc.c +--- ffmpeg-r17861.orig/libavcodec/mpegvideo_enc.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/mpegvideo_enc.c 2009-03-07 15:28:18.000000000 +0100 +@@ -35,6 +35,7 @@ + #include "msmpeg4.h" + #include "h263.h" + #include "faandct.h" ++#include "thread.h" + #include "aandcttab.h" + #include + +@@ -1185,9 +1186,9 @@ + { + MpegEncContext *s = avctx->priv_data; + AVFrame *pic_arg = data; +- int i, stuffing_count; ++ int i, stuffing_count, context_count = USE_AVCODEC_EXECUTE(avctx) ? avctx->thread_count : 1; + +- for(i=0; ithread_count; i++){ ++ for(i=0; ithread_context[i]->start_mb_y; + int end_y= s->thread_context[i]-> end_mb_y; + int h= s->mb_height; +@@ -1250,7 +1251,7 @@ + s->last_non_b_time= s->time - s->pp_time; + } + // av_log(NULL, AV_LOG_ERROR, "R:%d ", s->next_lambda); +- for(i=0; ithread_count; i++){ ++ for(i=0; ithread_context[i]->pb; + init_put_bits(pb, pb->buf, pb->buf_end - pb->buf); + } +@@ -2705,6 +2706,7 @@ + { + int i; + int bits; ++ int context_count = USE_AVCODEC_EXECUTE(s->avctx) ? s->avctx->thread_count : 1; + + s->picture_number = picture_number; + +@@ -2744,7 +2746,7 @@ + } + + s->mb_intra=0; //for the rate distortion & bit compare functions +- for(i=1; iavctx->thread_count; i++){ ++ for(i=1; ithread_context[i], s); + } + +@@ -2757,11 +2759,11 @@ + s->lambda2= (s->lambda2* (int64_t)s->avctx->me_penalty_compensation + 128)>>8; + if(s->pict_type != FF_B_TYPE && s->avctx->me_threshold==0){ + if((s->avctx->pre_me && s->last_non_b_pict_type==FF_I_TYPE) || s->avctx->pre_me==2){ +- s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count, sizeof(void*)); ++ s->avctx->execute(s->avctx, pre_estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, context_count, sizeof(void*)); + } + } + +- s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count, sizeof(void*)); ++ s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, context_count, sizeof(void*)); + }else /* if(s->pict_type == FF_I_TYPE) */{ + /* I-Frame */ + for(i=0; imb_stride*s->mb_height; i++) +@@ -2769,10 +2771,10 @@ + + if(!s->fixed_qscale){ + /* finding spatial complexity for I-frame rate control */ +- s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count, sizeof(void*)); ++ s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, context_count, sizeof(void*)); + } + } +- for(i=1; iavctx->thread_count; i++){ ++ for(i=1; ithread_context[i]); + } + s->current_picture.mc_mb_var_sum= s->current_picture_ptr->mc_mb_var_sum= s->me.mc_mb_var_sum_temp; +@@ -2906,11 +2908,11 @@ + bits= put_bits_count(&s->pb); + s->header_bits= bits - s->last_bits; + +- for(i=1; iavctx->thread_count; i++){ ++ for(i=1; ithread_context[i], s); + } +- s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count, sizeof(void*)); +- for(i=1; iavctx->thread_count; i++){ ++ s->avctx->execute(s->avctx, encode_thread, (void**)&(s->thread_context[0]), NULL, context_count, sizeof(void*)); ++ for(i=1; ithread_context[i]); + } + emms_c(); +diff -Naur ffmpeg-r17861.orig/libavcodec/mpegvideo.h ffmpeg-r17861/libavcodec/mpegvideo.h +--- ffmpeg-r17861.orig/libavcodec/mpegvideo.h 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/mpegvideo.h 2009-03-07 15:28:18.000000000 +0100 +@@ -121,6 +121,7 @@ + int ref_poc[2][2][16]; ///< h264 POCs of the frames used as reference (FIXME need per slice) + int ref_count[2][2]; ///< number of entries in ref_poc (FIXME need per slice) + int mbaff; ///< h264 1 -> MBAFF frame 0-> not MBAFF ++ int field_picture; ///< whether or not the picture was encoded in seperate fields + + int mb_var_sum; ///< sum of MB variance for current frame + int mc_mb_var_sum; ///< motion compensated MB variance for current frame +@@ -245,6 +246,9 @@ + Picture **input_picture; ///< next pictures on display order for encoding + Picture **reordered_input_picture; ///< pointer to the next pictures in codedorder for encoding + ++ int picture_count; ///< number of allocated pictures (MAX_PICTURE_COUNT * avctx->thread_count) ++ int picture_range_start, picture_range_end; ///< the part of picture that this context can allocate in ++ + int start_mb_y; ///< start mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y) + int end_mb_y; ///< end mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y) + struct MpegEncContext *thread_context[MAX_THREADS]; +@@ -671,6 +675,7 @@ + void (*denoise_dct)(struct MpegEncContext *s, DCTELEM *block); + } MpegEncContext; + ++#define REBASE_PICTURE(pic, new_ctx, old_ctx) (pic ? &new_ctx->picture[pic - old_ctx->picture] : NULL) + + void MPV_decode_defaults(MpegEncContext *s); + int MPV_common_init(MpegEncContext *s); +@@ -695,6 +700,9 @@ + int ff_find_unused_picture(MpegEncContext *s, int shared); + void ff_denoise_dct(MpegEncContext *s, DCTELEM *block); + void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src); ++int MPV_lowest_referenced_row(MpegEncContext *s, int dir); ++void MPV_report_decode_progress(MpegEncContext *s); ++int ff_mpeg_update_context(AVCodecContext *dst, AVCodecContext *src); + const uint8_t *ff_find_start_code(const uint8_t *p, const uint8_t *end, uint32_t *state); + + void ff_er_frame_start(MpegEncContext *s); +diff -Naur ffmpeg-r17861.orig/libavcodec/options.c ffmpeg-r17861/libavcodec/options.c +--- ffmpeg-r17861.orig/libavcodec/options.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/options.c 2009-03-07 15:28:18.000000000 +0100 +@@ -394,6 +394,9 @@ + {"rc_max_vbv_use", NULL, OFFSET(rc_max_available_vbv_use), FF_OPT_TYPE_FLOAT, 1.0/3, 0.0, FLT_MAX, V|E}, + {"rc_min_vbv_use", NULL, OFFSET(rc_min_vbv_overflow_use), FF_OPT_TYPE_FLOAT, 3, 0.0, FLT_MAX, V|E}, + {"ticks_per_frame", NULL, OFFSET(ticks_per_frame), FF_OPT_TYPE_INT, 1, 1, INT_MAX, A|V|E|D}, ++{"thread_type", "select multithreading type", OFFSET(thread_type), FF_OPT_TYPE_INT, FF_THREAD_DEFAULT, 0, INT_MAX, V|E|D, "thread_type"}, ++{"slice", NULL, 0, FF_OPT_TYPE_CONST, FF_THREAD_SLICE, INT_MIN, INT_MAX, V|E|D, "thread_type"}, ++{"frame", NULL, 0, FF_OPT_TYPE_CONST, FF_THREAD_FRAME, INT_MIN, INT_MAX, V|E|D, "thread_type"}, + {NULL}, + }; + +diff -Naur ffmpeg-r17861.orig/libavcodec/os2thread.c ffmpeg-r17861/libavcodec/os2thread.c +--- ffmpeg-r17861.orig/libavcodec/os2thread.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/os2thread.c 2009-03-07 15:28:18.000000000 +0100 +@@ -114,7 +114,13 @@ + ThreadContext *c; + uint32_t threadid; + ++ if(!(s->thread_type & FF_THREAD_SLICE)){ ++ av_log(s, AV_LOG_WARNING, "The requested thread algorithm is not supported with this thread library.\n"); ++ return 0; ++ } ++ + s->thread_count= thread_count; ++ s->active_thread_type= FF_THREAD_SLICE; + + assert(!s->thread_opaque); + c= av_mallocz(sizeof(ThreadContext)*thread_count); +diff -Naur ffmpeg-r17861.orig/libavcodec/pthread.c ffmpeg-r17861/libavcodec/pthread.c +--- ffmpeg-r17861.orig/libavcodec/pthread.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/pthread.c 2009-03-07 15:28:18.000000000 +0100 +@@ -1,5 +1,6 @@ + /* + * Copyright (c) 2004 Roman Shaposhnik ++ * Copyright (c) 2008 Alexander Strange (astrange@ithinksw.com) + * + * Many thanks to Steven M. Schultz for providing clever ideas and + * to Michael Niedermayer for writing initial +@@ -24,6 +25,9 @@ + #include + + #include "avcodec.h" ++#include "thread.h" ++ ++#define MAX_DELAYED_RELEASED_BUFFERS 32 + + typedef int (action_func)(AVCodecContext *c, void *arg); + +@@ -43,6 +47,61 @@ + int done; + } ThreadContext; + ++typedef struct PerThreadContext { ++ pthread_t thread; ++ pthread_cond_t input_cond; ///< Used to wait for a new frame from the main thread. ++ pthread_cond_t progress_cond; ///< Used by child threads to wait for decoding/encoding progress. ++ pthread_cond_t output_cond; ///< Used by the main thread to wait for frames to finish. ++ ++ pthread_mutex_t mutex; ///< Mutex used to protect the contents of the PerThreadContext. ++ pthread_mutex_t progress_mutex; ///< Mutex used to protect frame progress values and progress_cond. ++ ++ AVCodecContext *avctx; ///< Context used to decode frames passed to this thread. ++ ++ uint8_t *buf; ///< Input frame (for decoding) or output (for encoding). ++ int buf_size; ++ int allocated_buf_size; ++ ++ AVFrame picture; ///< Output frame (for decoding) or input (for encoding). ++ int got_picture; ///< The output of got_picture_ptr from the last avcodec_decode_video() call (for decoding). ++ int result; ///< The result of the last codec decode/encode() call. ++ ++ struct FrameThreadContext *parent; ++ ++ enum { ++ STATE_INPUT_READY, ///< Set when the thread is sleeping. ++ STATE_SETTING_UP, ///< Set before the codec has called ff_report_frame_setup_done(). ++ STATE_SETUP_FINISHED /**< ++ * Set after the codec has called ff_report_frame_setup_done(). ++ * At this point it is safe to start the next thread. ++ */ ++ } state; ++ ++ /** ++ * Array of frames passed to ff_release_buffer(), ++ * to be released later. ++ */ ++ AVFrame released_buffers[MAX_DELAYED_RELEASED_BUFFERS]; ++ int num_released_buffers; ++} PerThreadContext; ++ ++typedef struct FrameThreadContext { ++ PerThreadContext *threads; ///< The contexts for frame decoding threads. ++ PerThreadContext *prev_thread; ///< The last thread submit_frame() was called on. ++ ++ int next_decoding; ///< The next context to submit frames to. ++ int next_finished; ///< The next context to return output from. ++ ++ int delaying; /** ++ * Set for the first N frames, where N is the number of threads. ++ * While it is set, ff_en/decode_frame_threaded won't return any results. ++ */ ++ ++ pthread_mutex_t buffer_mutex; ///< Mutex used to protect get/release_buffer(). ++ ++ int die; ///< Set to cause threads to exit. ++} FrameThreadContext; ++ + static void* attribute_align_arg worker(void *v) + { + AVCodecContext *avctx = v; +@@ -81,7 +140,7 @@ + pthread_mutex_unlock(&c->current_job_lock); + } + +-void avcodec_thread_free(AVCodecContext *avctx) ++static void thread_free(AVCodecContext *avctx) + { + ThreadContext *c = avctx->thread_opaque; + int i; +@@ -106,6 +165,9 @@ + ThreadContext *c= avctx->thread_opaque; + int dummy_ret; + ++ if (!USE_AVCODEC_EXECUTE(avctx) || avctx->thread_count <= 1) ++ return avcodec_default_execute(avctx, func, arg, ret, job_count, job_size); ++ + if (job_count <= 0) + return 0; + +@@ -130,7 +192,7 @@ + return 0; + } + +-int avcodec_thread_init(AVCodecContext *avctx, int thread_count) ++static int thread_init(AVCodecContext *avctx, int thread_count) + { + int i; + ThreadContext *c; +@@ -169,3 +231,491 @@ + avctx->execute = avcodec_thread_execute; + return 0; + } ++ ++/** ++ * Read and decode frames from the main thread until fctx->die is set. ++ * ff_report_frame_setup_done() is called before decoding if the codec ++ * doesn't define update_context(). To simplify codecs and avoid deadlock ++ * bugs, progress is set to INT_MAX on all returned frames. ++ */ ++static attribute_align_arg void *frame_worker_thread(void *arg) ++{ ++ PerThreadContext * volatile p = arg; ++ AVCodecContext *avctx = p->avctx; ++ FrameThreadContext * volatile fctx = p->parent; ++ AVCodec *codec = avctx->codec; ++ ++ while (1) { ++ pthread_mutex_lock(&p->mutex); ++ while (p->state == STATE_INPUT_READY && !fctx->die) ++ pthread_cond_wait(&p->input_cond, &p->mutex); ++ pthread_mutex_unlock(&p->mutex); ++ ++ if (fctx->die) break; ++ ++ if (!codec->update_context) ff_report_frame_setup_done(avctx); ++ ++ pthread_mutex_lock(&p->mutex); ++ p->result = codec->decode(avctx, &p->picture, &p->got_picture, p->buf, p->buf_size); ++ ++ if (p->state == STATE_SETTING_UP) ff_report_frame_setup_done(avctx); ++ if (p->got_picture) { ++ ff_report_field_progress(&p->picture, INT_MAX, 0); ++ ff_report_field_progress(&p->picture, INT_MAX, 1); ++ } ++ ++ p->buf_size = 0; ++ p->state = STATE_INPUT_READY; ++ ++ pthread_mutex_lock(&p->progress_mutex); ++ pthread_cond_signal(&p->output_cond); ++ pthread_mutex_unlock(&p->progress_mutex); ++ pthread_mutex_unlock(&p->mutex); ++ }; ++ ++ return NULL; ++} ++ ++static int frame_thread_init(AVCodecContext *avctx) ++{ ++ FrameThreadContext *fctx; ++ AVCodecContext *src = avctx; ++ AVCodec *codec = avctx->codec; ++ int i, thread_count = avctx->thread_count, err = 0; ++ ++ avctx->thread_opaque = fctx = av_mallocz(sizeof(FrameThreadContext)); ++ fctx->delaying = 1; ++ pthread_mutex_init(&fctx->buffer_mutex, NULL); ++ ++ fctx->threads = av_mallocz(sizeof(PerThreadContext) * thread_count); ++ ++ for (i = 0; i < thread_count; i++) { ++ AVCodecContext *copy = av_malloc(sizeof(AVCodecContext)); ++ PerThreadContext *p = &fctx->threads[i]; ++ ++ pthread_mutex_init(&p->mutex, NULL); ++ pthread_mutex_init(&p->progress_mutex, NULL); ++ pthread_cond_init(&p->input_cond, NULL); ++ pthread_cond_init(&p->progress_cond, NULL); ++ pthread_cond_init(&p->output_cond, NULL); ++ ++ p->parent = fctx; ++ p->avctx = copy; ++ ++ *copy = *src; ++ copy->thread_opaque = p; ++ ++ if (!i) { ++ src = copy; ++ ++ if (codec->init) ++ err = codec->init(copy); ++ } else { ++ copy->is_copy = 1; ++ copy->priv_data = av_malloc(codec->priv_data_size); ++ memcpy(copy->priv_data, src->priv_data, codec->priv_data_size); ++ ++ if (codec->init_copy) ++ err = codec->init_copy(copy); ++ } ++ ++ if (err) goto error; ++ ++ pthread_create(&p->thread, NULL, frame_worker_thread, p); ++ } ++ ++ return 0; ++ ++error: ++ avctx->thread_count = i; ++ avcodec_thread_free(avctx); ++ ++ return err; ++} ++ ++/** ++ * Update a thread's context from the last thread. This is used for returning ++ * frames and for starting new decoding jobs after the previous one finishes ++ * predecoding. ++ * ++ * @param dst The destination context. ++ * @param src The source context. ++ * @param for_user Whether or not dst is the user-visible context. update_context won't be called and some pointers will be copied. ++ */ ++static int update_context_from_copy(AVCodecContext *dst, AVCodecContext *src, int for_user) ++{ ++ int err = 0; ++#define COPY(f) dst->f = src->f; ++#define COPY_FIELDS(s, e) memcpy(&dst->s, &src->s, (char*)&dst->e - (char*)&dst->s); ++ ++ //coded_width/height are not copied here, so that codecs' update_context can see when they change ++ //many encoding parameters could be theoretically changed during encode, but aren't copied ATM ++ ++ COPY(sub_id); ++ COPY(width); ++ COPY(height); ++ COPY(pix_fmt); ++ COPY(real_pict_num); //necessary? ++ COPY(delay); ++ COPY(max_b_frames); ++ ++ COPY_FIELDS(mv_bits, opaque); ++ ++ COPY(has_b_frames); ++ COPY(bits_per_coded_sample); ++ COPY(sample_aspect_ratio); ++ COPY(idct_algo); ++ if (for_user) COPY(coded_frame); ++ memcpy(dst->error, src->error, sizeof(src->error)); ++ COPY(last_predictor_count); //necessary? ++ COPY(dtg_active_format); ++ COPY(color_table_id); ++ COPY(profile); ++ COPY(level); ++ COPY(bits_per_raw_sample); ++ ++ if (!for_user) { ++ if (dst->codec->update_context) ++ err = dst->codec->update_context(dst, src); ++ } ++ ++ return err; ++} ++ ++///Update the next decoding thread with values set by the user ++static void update_context_from_user(AVCodecContext *dst, AVCodecContext *src) ++{ ++ COPY(hurry_up); ++ COPY_FIELDS(skip_loop_filter, bidir_refine); ++ COPY(frame_number); ++ COPY(reordered_opaque); ++} ++ ++/// Release all frames passed to ff_release_buffer() ++static void handle_delayed_releases(PerThreadContext *p) ++{ ++ FrameThreadContext *fctx = p->parent; ++ ++ while (p->num_released_buffers > 0) { ++ AVFrame *f = &p->released_buffers[--p->num_released_buffers]; ++ ++ av_freep(&f->thread_opaque); ++ ++ pthread_mutex_lock(&fctx->buffer_mutex); ++ f->owner->release_buffer(f->owner, f); ++ pthread_mutex_unlock(&fctx->buffer_mutex); ++ } ++} ++ ++/// Submit a frame to the next decoding thread ++static int submit_frame(PerThreadContext * volatile p, const uint8_t *buf, int buf_size) ++{ ++ FrameThreadContext *fctx = p->parent; ++ PerThreadContext *prev_thread = fctx->prev_thread; ++ AVCodec *codec = p->avctx->codec; ++ int err = 0; ++ ++ if (!buf_size && !(codec->capabilities & CODEC_CAP_DELAY)) return 0; ++ ++ pthread_mutex_lock(&p->mutex); ++ if (prev_thread) { ++ pthread_mutex_lock(&prev_thread->progress_mutex); ++ while (prev_thread->state == STATE_SETTING_UP) ++ pthread_cond_wait(&prev_thread->progress_cond, &prev_thread->progress_mutex); ++ pthread_mutex_unlock(&prev_thread->progress_mutex); ++ ++ err = update_context_from_copy(p->avctx, prev_thread->avctx, 0); ++ if (err) return err; ++ } ++ ++ //FIXME: the client API should allow copy-on-write ++ p->buf = av_fast_realloc(p->buf, &p->allocated_buf_size, buf_size + FF_INPUT_BUFFER_PADDING_SIZE); ++ memcpy(p->buf, buf, buf_size); ++ memset(p->buf + buf_size, 0, FF_INPUT_BUFFER_PADDING_SIZE); ++ p->buf_size = buf_size; ++ ++ handle_delayed_releases(p); ++ ++ p->state = STATE_SETTING_UP; ++ pthread_cond_signal(&p->input_cond); ++ pthread_mutex_unlock(&p->mutex); ++ ++ fctx->prev_thread = p; ++ ++ return err; ++} ++ ++int ff_decode_frame_threaded(AVCodecContext *avctx, ++ void *data, int *data_size, ++ const uint8_t *buf, int buf_size) ++{ ++ FrameThreadContext *fctx; ++ PerThreadContext * volatile p; ++ int thread_count = avctx->thread_count, err = 0; ++ int returning_thread; ++ ++ if (!avctx->thread_opaque) frame_thread_init(avctx); ++ fctx = avctx->thread_opaque; ++ returning_thread = fctx->next_finished; ++ ++ p = &fctx->threads[fctx->next_decoding]; ++ update_context_from_user(p->avctx, avctx); ++ err = submit_frame(p, buf, buf_size); ++ if (err) return err; ++ ++ fctx->next_decoding++; ++ ++ if (fctx->delaying) { ++ if (fctx->next_decoding >= (thread_count-1)) fctx->delaying = 0; ++ ++ *data_size=0; ++ return 0; ++ } ++ ++ //If it's draining frames at EOF, ignore null frames from the codec. ++ //Only return one when we've run out of codec frames to return. ++ do { ++ p = &fctx->threads[returning_thread++]; ++ ++ pthread_mutex_lock(&p->progress_mutex); ++ while (p->state != STATE_INPUT_READY) ++ pthread_cond_wait(&p->output_cond, &p->progress_mutex); ++ pthread_mutex_unlock(&p->progress_mutex); ++ ++ *(AVFrame*)data = p->picture; ++ *data_size = p->got_picture; ++ ++ avcodec_get_frame_defaults(&p->picture); ++ p->got_picture = 0; ++ ++ if (returning_thread >= thread_count) returning_thread = 0; ++ } while (!buf_size && !*data_size && returning_thread != fctx->next_finished); ++ ++ update_context_from_copy(avctx, p->avctx, 1); ++ ++ if (fctx->next_decoding >= thread_count) fctx->next_decoding = 0; ++ fctx->next_finished = returning_thread; ++ ++ return p->result; ++} ++ ++void ff_report_field_progress(AVFrame *f, int n, int field) ++{ ++ PerThreadContext *p = f->owner->thread_opaque; ++ int *progress = f->thread_opaque; ++ ++ if (progress[field] >= n) return; ++ ++ pthread_mutex_lock(&p->progress_mutex); ++ progress[field] = n; ++ pthread_cond_broadcast(&p->progress_cond); ++ pthread_mutex_unlock(&p->progress_mutex); ++} ++ ++void ff_await_field_progress(AVFrame *f, int n, int field) ++{ ++ PerThreadContext *p = f->owner->thread_opaque; ++ int * volatile progress = f->thread_opaque; ++ ++ if (progress[field] >= n) return; ++ ++ pthread_mutex_lock(&p->progress_mutex); ++ while (progress[field] < n) ++ pthread_cond_wait(&p->progress_cond, &p->progress_mutex); ++ pthread_mutex_unlock(&p->progress_mutex); ++} ++ ++void ff_report_frame_progress(AVFrame *f, int n) ++{ ++ ff_report_field_progress(f, n, 0); ++} ++ ++void ff_await_frame_progress(AVFrame *f, int n) ++{ ++ ff_await_field_progress(f, n, 0); ++} ++ ++void ff_report_frame_setup_done(AVCodecContext *avctx) { ++ PerThreadContext *p = avctx->thread_opaque; ++ ++ if (!USE_FRAME_THREADING(avctx)) return; ++ ++ pthread_mutex_lock(&p->progress_mutex); ++ p->state = STATE_SETUP_FINISHED; ++ pthread_cond_broadcast(&p->progress_cond); ++ pthread_mutex_unlock(&p->progress_mutex); ++} ++ ++/// Wait for all threads to finish decoding ++static void park_frame_worker_threads(FrameThreadContext *fctx, int thread_count) ++{ ++ int i; ++ ++ for (i = 0; i < thread_count; i++) { ++ PerThreadContext *p = &fctx->threads[i]; ++ ++ pthread_mutex_lock(&p->progress_mutex); ++ while (p->state != STATE_INPUT_READY) ++ pthread_cond_wait(&p->output_cond, &p->progress_mutex); ++ pthread_mutex_unlock(&p->progress_mutex); ++ } ++} ++ ++static void frame_thread_free(AVCodecContext *avctx) ++{ ++ FrameThreadContext *fctx = avctx->thread_opaque; ++ AVCodec *codec = avctx->codec; ++ int i; ++ ++ park_frame_worker_threads(fctx, avctx->thread_count); ++ ++ if (fctx->prev_thread && fctx->prev_thread != fctx->threads) ++ update_context_from_copy(fctx->threads->avctx, fctx->prev_thread->avctx, 0); ++ ++ fctx->die = 1; ++ ++ for (i = 0; i < avctx->thread_count; i++) { ++ PerThreadContext *p = &fctx->threads[i]; ++ ++ pthread_mutex_lock(&p->mutex); ++ pthread_cond_signal(&p->input_cond); ++ pthread_mutex_unlock(&p->mutex); ++ ++ pthread_join(p->thread, NULL); ++ ++ if (codec->close) ++ codec->close(p->avctx); ++ ++ handle_delayed_releases(p); ++ } ++ ++ for (i = 0; i < avctx->thread_count; i++) { ++ PerThreadContext *p = &fctx->threads[i]; ++ ++ avcodec_default_free_buffers(p->avctx); ++ ++ pthread_mutex_destroy(&p->mutex); ++ pthread_mutex_destroy(&p->progress_mutex); ++ pthread_cond_destroy(&p->input_cond); ++ pthread_cond_destroy(&p->progress_cond); ++ pthread_cond_destroy(&p->output_cond); ++ av_freep(&p->buf); ++ ++ if (i) ++ av_freep(&p->avctx->priv_data); ++ ++ av_freep(&p->avctx); ++ } ++ ++ av_freep(&fctx->threads); ++ pthread_mutex_destroy(&fctx->buffer_mutex); ++ av_freep(&avctx->thread_opaque); ++} ++ ++void ff_frame_thread_flush(AVCodecContext *avctx) ++{ ++ FrameThreadContext *fctx = avctx->thread_opaque; ++ ++ if (!avctx->thread_opaque) return; ++ ++ park_frame_worker_threads(fctx, avctx->thread_count); ++ ++ if (fctx->prev_thread && fctx->prev_thread != fctx->threads) ++ update_context_from_copy(fctx->threads->avctx, fctx->prev_thread->avctx, 0); ++ ++ fctx->next_decoding = fctx->next_finished = 0; ++ fctx->delaying = 1; ++ fctx->prev_thread = NULL; ++} ++ ++int ff_get_buffer(AVCodecContext *avctx, AVFrame *f) ++{ ++ int ret, *progress; ++ PerThreadContext *p = avctx->thread_opaque; ++ ++ f->owner = avctx; ++ f->thread_opaque = progress = av_malloc(sizeof(int)*2); ++ ++ if (!USE_FRAME_THREADING(avctx)) { ++ progress[0] = ++ progress[1] = INT_MAX; ++ return avctx->get_buffer(avctx, f); ++ } ++ ++ progress[0] = ++ progress[1] = -1; ++ ++ pthread_mutex_lock(&p->parent->buffer_mutex); ++ ret = avctx->get_buffer(avctx, f); ++ pthread_mutex_unlock(&p->parent->buffer_mutex); ++ ++ /* ++ * The buffer list isn't shared between threads, ++ * so age doesn't mean what codecs expect it to mean. ++ * Disable it for now. ++ */ ++ f->age = INT_MAX; ++ ++ return ret; ++} ++ ++void ff_release_buffer(AVCodecContext *avctx, AVFrame *f) ++{ ++ PerThreadContext *p = avctx->thread_opaque; ++ ++ if (!USE_FRAME_THREADING(avctx)) { ++ av_freep(&f->thread_opaque); ++ avctx->release_buffer(avctx, f); ++ return; ++ } ++ ++ if (p->num_released_buffers >= MAX_DELAYED_RELEASED_BUFFERS) { ++ av_log(p->avctx, AV_LOG_ERROR, "too many delayed release_buffer calls!\n"); ++ return; ++ } ++ ++ if(avctx->debug & FF_DEBUG_BUFFERS) ++ av_log(avctx, AV_LOG_DEBUG, "delayed_release_buffer called on pic %p, %d buffers used\n", ++ f, f->owner->internal_buffer_count); ++ ++ p->released_buffers[p->num_released_buffers++] = *f; ++ memset(f->data, 0, sizeof(f->data)); ++} ++ ++/// Set the threading algorithm used, or none if an algorithm was set but no thread count. ++static void validate_thread_parameters(AVCodecContext *avctx) ++{ ++ int frame_threading_supported = (avctx->codec->capabilities & CODEC_CAP_FRAME_THREADS) ++ && !(avctx->flags & CODEC_FLAG_TRUNCATED) ++ && !(avctx->flags & CODEC_FLAG_LOW_DELAY) ++ && !(avctx->flags2 & CODEC_FLAG2_CHUNKS); ++ if (avctx->thread_count <= 1) ++ avctx->active_thread_type = 0; ++ else if (frame_threading_supported && (avctx->thread_type & FF_THREAD_FRAME)) ++ avctx->active_thread_type = FF_THREAD_FRAME; ++ else ++ avctx->active_thread_type = FF_THREAD_SLICE; ++} ++ ++int avcodec_thread_init(AVCodecContext *avctx, int thread_count) ++{ ++ avctx->thread_count = thread_count; ++ ++ if (avctx->codec) { ++ validate_thread_parameters(avctx); ++ ++ // frame_thread_init must be called after codec init ++ if (USE_AVCODEC_EXECUTE(avctx)) ++ return thread_init(avctx, thread_count); ++ } ++ ++ return 0; ++} ++ ++void avcodec_thread_free(AVCodecContext *avctx) ++{ ++ if (USE_FRAME_THREADING(avctx)) ++ frame_thread_free(avctx); ++ else ++ thread_free(avctx); ++} +diff -Naur ffmpeg-r17861.orig/libavcodec/snow.c ffmpeg-r17861/libavcodec/snow.c +--- ffmpeg-r17861.orig/libavcodec/snow.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/snow.c 2009-03-07 15:28:18.000000000 +0100 +@@ -4117,9 +4117,9 @@ + int h= s->avctx->height; + + if(s->current_picture.data[0]){ +- s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH ); +- s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2); +- s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2); ++ s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH , EDGE_TOP|EDGE_BOTTOM); ++ s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2, EDGE_TOP|EDGE_BOTTOM); ++ s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2, EDGE_TOP|EDGE_BOTTOM); + } + + tmp= s->last_picture[s->max_ref_frames-1]; +diff -Naur ffmpeg-r17861.orig/libavcodec/thread.h ffmpeg-r17861/libavcodec/thread.h +--- ffmpeg-r17861.orig/libavcodec/thread.h 1970-01-01 01:00:00.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/thread.h 2009-03-07 15:28:18.000000000 +0100 +@@ -0,0 +1,136 @@ ++/* ++ * Multithreading support ++ * Copyright (c) 2008 Alexander Strange ++ * ++ * This file is part of FFmpeg. ++ * ++ * FFmpeg is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * FFmpeg is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with FFmpeg; if not, write to the Free Software ++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ++ */ ++ ++/** ++ * @file thread.h ++ * Multithreading support header. ++ * @author Alexander Strange ++ */ ++ ++#ifndef AVCODEC_THREAD_H ++#define AVCODEC_THREAD_H ++ ++#include "config.h" ++#include "avcodec.h" ++ ++/** ++ * Wait for all decoding threads to finish and then reset the internal state. ++ */ ++void ff_frame_thread_flush(AVCodecContext *avctx); ++ ++/** ++ * Submit a new frame for multithreaded decoding. Parameters ++ * are the same as avcodec_decode_video(). The result will be ++ * what the codec output X frames ago, where X is the number ++ * of threads. ++ */ ++int ff_decode_frame_threaded(AVCodecContext *avctx, ++ void *data, int *data_size, ++ const uint8_t *buf, int buf_size); ++ ++#if HAVE_PTHREADS ++ ++/** ++ * If the codec defines update_context, call this after doing ++ * all setup work for the next thread. update_context will be ++ * called sometime afterwards, after which no variable read by ++ * it may be changed by the codec. ++ */ ++void ff_report_frame_setup_done(AVCodecContext *avctx); ++ ++/** ++ * Call this function after decoding some part of a frame. ++ * Subsequent calls with lower values for \p progress will be ignored. ++ * ++ * @param f The frame being decoded ++ * @param progress The highest-numbered part finished so far ++ */ ++void ff_report_frame_progress(AVFrame *f, int progress); ++ ++/** ++ * Call this function before accessing some part of a reference frame. ++ * On return, all parts up to the requested number will be available. ++ */ ++void ff_await_frame_progress(AVFrame *f, int progress); ++ ++/** ++ * Equivalent of ff_report_frame_progress() for pictures whose fields ++ * are stored in seperate frames. ++ * ++ * @param f The frame containing the current field ++ * @param progress The highest-numbered part finished so far ++ * @param field The current field. 0 for top field/frame, 1 for bottom. ++ */ ++void ff_report_field_progress(AVFrame *f, int progress, int field); ++ ++/** ++ * Equivaent of ff_await_frame_progress() for pictures whose fields ++ * are stored in seperate frames. ++ */ ++void ff_await_field_progress(AVFrame *f, int progress, int field); ++ ++/** ++ * Allocate a frame with avctx->get_buffer() and set ++ * values needed for multithreading. Codecs must call ++ * this instead of using get_buffer() directly if ++ * frame threading is enabled. ++ */ ++int ff_get_buffer(AVCodecContext *avctx, AVFrame *f); ++ ++/** ++ * Release a frame at a later time, after all earlier ++ * decoding threads have completed. On return, \p f->data ++ * will be cleared. Codec must call this instead of using ++ * release_buffer() directly if frame threading is enabled. ++ */ ++void ff_release_buffer(AVCodecContext *avctx, AVFrame *f); ++ ++///True if frame threading is active. ++#define USE_FRAME_THREADING(avctx) (avctx->active_thread_type == FF_THREAD_FRAME) ++///True if calling AVCodecContext execute() will run in parallel. ++#define USE_AVCODEC_EXECUTE(avctx) (avctx->active_thread_type == FF_THREAD_SLICE) ++ ++#else ++ ++//Stub out these functions for systems without pthreads ++static inline void ff_report_frame_setup_done(AVCodecContext *avctx) {} ++static inline void ff_report_frame_progress(AVFrame *f, int progress) {} ++static inline void ff_report_field_progress(AVFrame *f, int progress, int field) {} ++static inline void ff_await_frame_progress(AVFrame *f, int progress) {} ++static inline void ff_await_field_progress(AVFrame *f, int progress, int field) {} ++ ++static inline int ff_get_buffer(AVCodecContext *avctx, AVFrame *f) ++{ ++ f->owner = avctx; ++ return avctx->get_buffer(avctx, f); ++} ++ ++static inline void ff_release_buffer(AVCodecContext *avctx, AVFrame *f) ++{ ++ f->owner->release_buffer(f->owner, f); ++} ++ ++#define USE_FRAME_THREADING(avctx) 0 ++#define USE_AVCODEC_EXECUTE(avctx) (HAVE_THREADS && avctx->active_thread_type) ++ ++#endif ++ ++#endif /* AVCODEC_THREAD_H */ +diff -Naur ffmpeg-r17861.orig/libavcodec/utils.c ffmpeg-r17861/libavcodec/utils.c +--- ffmpeg-r17861.orig/libavcodec/utils.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/utils.c 2009-03-07 15:28:18.000000000 +0100 +@@ -35,6 +35,7 @@ + #include "dsputil.h" + #include "opt.h" + #include "imgconvert.h" ++#include "thread.h" + #include "audioconvert.h" + #include "internal.h" + #include +@@ -329,6 +330,7 @@ + assert(pic->type==FF_BUFFER_TYPE_INTERNAL); + assert(s->internal_buffer_count); + ++ if(s->internal_buffer){ + buf = NULL; /* avoids warning */ + for(i=0; iinternal_buffer_count; i++){ //just 3-5 checks so is not worth to optimize + buf= &((InternalBuffer*)s->internal_buffer)[i]; +@@ -340,6 +342,7 @@ + last = &((InternalBuffer*)s->internal_buffer)[s->internal_buffer_count]; + + FFSWAP(InternalBuffer, *buf, *last); ++ } + + for(i=0; i<4; i++){ + pic->data[i]=NULL; +@@ -453,7 +456,17 @@ + avctx->codec = codec; + avctx->codec_id = codec->id; + avctx->frame_number = 0; +- if(avctx->codec->init){ ++ ++ if (HAVE_THREADS && avctx->thread_count>1 && !avctx->thread_opaque) { ++ ret = avcodec_thread_init(avctx, avctx->thread_count); ++ if (ret < 0) { ++ av_freep(&avctx->priv_data); ++ avctx->codec= NULL; ++ goto end; ++ } ++ } ++ ++ if(avctx->codec->init && !USE_FRAME_THREADING(avctx)){ + ret = avctx->codec->init(avctx); + if (ret < 0) { + av_freep(&avctx->priv_data); +@@ -521,12 +534,15 @@ + const uint8_t *buf, int buf_size) + { + int ret; ++ int threaded = USE_FRAME_THREADING(avctx); + + *got_picture_ptr= 0; + if((avctx->coded_width||avctx->coded_height) && avcodec_check_dimensions(avctx,avctx->coded_width,avctx->coded_height)) + return -1; +- if((avctx->codec->capabilities & CODEC_CAP_DELAY) || buf_size){ +- ret = avctx->codec->decode(avctx, picture, got_picture_ptr, ++ if((avctx->codec->capabilities & CODEC_CAP_DELAY) || buf_size || threaded){ ++ if (threaded) ret = ff_decode_frame_threaded(avctx, picture, ++ got_picture_ptr, buf, buf_size); ++ else ret = avctx->codec->decode(avctx, picture, got_picture_ptr, + buf, buf_size); + + emms_c(); //needed to avoid an emms_c() call before every return; +@@ -592,11 +608,12 @@ + + if (HAVE_THREADS && avctx->thread_opaque) + avcodec_thread_free(avctx); +- if (avctx->codec->close) ++ if (avctx->codec->close && !USE_FRAME_THREADING(avctx)) + avctx->codec->close(avctx); + avcodec_default_free_buffers(avctx); + av_freep(&avctx->priv_data); + avctx->codec = NULL; ++ avctx->active_thread_type = 0; + entangled_thread_counter--; + return 0; + } +@@ -830,6 +847,8 @@ + + void avcodec_flush_buffers(AVCodecContext *avctx) + { ++ if(USE_FRAME_THREADING(avctx)) ++ ff_frame_thread_flush(avctx); + if(avctx->codec->flush) + avctx->codec->flush(avctx); + } +diff -Naur ffmpeg-r17861.orig/libavcodec/w32thread.c ffmpeg-r17861/libavcodec/w32thread.c +--- ffmpeg-r17861.orig/libavcodec/w32thread.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/w32thread.c 2009-03-07 15:28:18.000000000 +0100 +@@ -104,7 +104,13 @@ + ThreadContext *c; + uint32_t threadid; + ++ if(!(s->thread_type & FF_THREAD_SLICE)){ ++ av_log(s, AV_LOG_WARNING, "The requested thread algorithm is not supported with this thread library.\n"); ++ return 0; ++ } ++ + s->thread_count= thread_count; ++ s->active_thread_type= FF_THREAD_SLICE; + + assert(!s->thread_opaque); + c= av_mallocz(sizeof(ThreadContext)*thread_count); +diff -Naur ffmpeg-r17861.orig/libavcodec/x86/dsputil_mmx.c ffmpeg-r17861/libavcodec/x86/dsputil_mmx.c +--- ffmpeg-r17861.orig/libavcodec/x86/dsputil_mmx.c 2009-03-07 15:22:29.000000000 +0100 ++++ ffmpeg-r17861/libavcodec/x86/dsputil_mmx.c 2009-03-07 15:28:18.000000000 +0100 +@@ -761,7 +761,7 @@ + + /* draw the edges of width 'w' of an image of size width, height + this mmx version can only handle w==8 || w==16 */ +-static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) ++static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w, int sides) + { + uint8_t *ptr, *last_line; + int i; +@@ -816,34 +816,39 @@ + + for(i=0;i