diff --git a/packages/linux/patches/linux-2.6.36-rc7-git5-drm_nouveau_upstream-20101014.diff b/packages/linux/patches/linux-2.6.36-rc7-git5-drm_nouveau_upstream-20101014.diff
new file mode 100644
index 0000000000..b229daa65d
--- /dev/null
+++ b/packages/linux/patches/linux-2.6.36-rc7-git5-drm_nouveau_upstream-20101014.diff
@@ -0,0 +1,16106 @@
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/Kconfig linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/Kconfig
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/Kconfig	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/Kconfig	2010-10-15 02:04:44.280991202 +0200
+@@ -10,6 +10,7 @@
+ 	select FB
+ 	select FRAMEBUFFER_CONSOLE if !EMBEDDED
+ 	select FB_BACKLIGHT if DRM_NOUVEAU_BACKLIGHT
++	select ACPI_VIDEO if ACPI
+ 	help
+ 	  Choose this option for open-source nVidia support.
+ 
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/Makefile linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/Makefile
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/Makefile	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/Makefile	2010-10-15 02:04:44.281991214 +0200
+@@ -9,7 +9,8 @@
+              nouveau_bo.o nouveau_fence.o nouveau_gem.o nouveau_ttm.o \
+              nouveau_hw.o nouveau_calc.o nouveau_bios.o nouveau_i2c.o \
+              nouveau_display.o nouveau_connector.o nouveau_fbcon.o \
+-             nouveau_dp.o \
++             nouveau_dp.o nouveau_ramht.o \
++	     nouveau_pm.o nouveau_volt.o nouveau_perf.o nouveau_temp.o \
+              nv04_timer.o \
+              nv04_mc.o nv40_mc.o nv50_mc.o \
+              nv04_fb.o nv10_fb.o nv30_fb.o nv40_fb.o nv50_fb.o nvc0_fb.o \
+@@ -23,7 +24,8 @@
+              nv04_dac.o nv04_dfp.o nv04_tv.o nv17_tv.o nv17_tv_modes.o \
+              nv04_crtc.o nv04_display.o nv04_cursor.o nv04_fbcon.o \
+              nv10_gpio.o nv50_gpio.o \
+-	     nv50_calc.o
++	     nv50_calc.o \
++	     nv04_pm.o nv50_pm.o nva3_pm.o
+ 
+ nouveau-$(CONFIG_DRM_NOUVEAU_DEBUG) += nouveau_debugfs.o
+ nouveau-$(CONFIG_COMPAT) += nouveau_ioc32.o
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_acpi.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_acpi.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_acpi.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_acpi.c	2010-10-15 02:04:44.288991301 +0200
+@@ -292,6 +292,6 @@
+ 	if (ret < 0)
+ 		return ret;
+ 
+-	nv_connector->edid = edid;
++	nv_connector->edid = kmemdup(edid, EDID_LENGTH, GFP_KERNEL);
+ 	return 0;
+ }
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_bios.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_bios.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_bios.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_bios.c	2010-10-15 02:04:44.310991574 +0200
+@@ -43,9 +43,6 @@
+ #define BIOSLOG(sip, fmt, arg...) NV_DEBUG(sip->dev, fmt, ##arg)
+ #define LOG_OLD_VALUE(x)
+ 
+-#define ROM16(x) le16_to_cpu(*(uint16_t *)&(x))
+-#define ROM32(x) le32_to_cpu(*(uint32_t *)&(x))
+-
+ struct init_exec {
+ 	bool execute;
+ 	bool repeat;
+@@ -272,12 +269,6 @@
+ 	int (*handler)(struct nvbios *, uint16_t, struct init_exec *);
+ };
+ 
+-struct bit_entry {
+-	uint8_t id[2];
+-	uint16_t length;
+-	uint16_t offset;
+-};
+-
+ static int parse_init_table(struct nvbios *, unsigned int, struct init_exec *);
+ 
+ #define MACRO_INDEX_SIZE	2
+@@ -1231,7 +1222,7 @@
+ 			return 3;
+ 		}
+ 
+-		if (cond & 1)
++		if (!(cond & 1))
+ 			iexec->execute = false;
+ 	}
+ 		break;
+@@ -4675,6 +4666,92 @@
+ 	return 0;
+ }
+ 
++struct pll_mapping {
++	u8  type;
++	u32 reg;
++};
++
++static struct pll_mapping nv04_pll_mapping[] = {
++	{ PLL_CORE  , NV_PRAMDAC_NVPLL_COEFF },
++	{ PLL_MEMORY, NV_PRAMDAC_MPLL_COEFF },
++	{ PLL_VPLL0 , NV_PRAMDAC_VPLL_COEFF },
++	{ PLL_VPLL1 , NV_RAMDAC_VPLL2 },
++	{}
++};
++
++static struct pll_mapping nv40_pll_mapping[] = {
++	{ PLL_CORE  , 0x004000 },
++	{ PLL_MEMORY, 0x004020 },
++	{ PLL_VPLL0 , NV_PRAMDAC_VPLL_COEFF },
++	{ PLL_VPLL1 , NV_RAMDAC_VPLL2 },
++	{}
++};
++
++static struct pll_mapping nv50_pll_mapping[] = {
++	{ PLL_CORE  , 0x004028 },
++	{ PLL_SHADER, 0x004020 },
++	{ PLL_UNK03 , 0x004000 },
++	{ PLL_MEMORY, 0x004008 },
++	{ PLL_UNK40 , 0x00e810 },
++	{ PLL_UNK41 , 0x00e818 },
++	{ PLL_UNK42 , 0x00e824 },
++	{ PLL_VPLL0 , 0x614100 },
++	{ PLL_VPLL1 , 0x614900 },
++	{}
++};
++
++static struct pll_mapping nv84_pll_mapping[] = {
++	{ PLL_CORE  , 0x004028 },
++	{ PLL_SHADER, 0x004020 },
++	{ PLL_MEMORY, 0x004008 },
++	{ PLL_UNK05 , 0x004030 },
++	{ PLL_UNK41 , 0x00e818 },
++	{ PLL_VPLL0 , 0x614100 },
++	{ PLL_VPLL1 , 0x614900 },
++	{}
++};
++
++u32
++get_pll_register(struct drm_device *dev, enum pll_types type)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nvbios *bios = &dev_priv->vbios;
++	struct pll_mapping *map;
++	int i;
++
++	if (dev_priv->card_type < NV_40)
++		map = nv04_pll_mapping;
++	else
++	if (dev_priv->card_type < NV_50)
++		map = nv40_pll_mapping;
++	else {
++		u8 *plim = &bios->data[bios->pll_limit_tbl_ptr];
++
++		if (plim[0] >= 0x30) {
++			u8 *entry = plim + plim[1];
++			for (i = 0; i < plim[3]; i++, entry += plim[2]) {
++				if (entry[0] == type)
++					return ROM32(entry[3]);
++			}
++
++			return 0;
++		}
++
++		if (dev_priv->chipset == 0x50)
++			map = nv50_pll_mapping;
++		else
++			map = nv84_pll_mapping;
++	}
++
++	while (map->reg) {
++		if (map->type == type)
++			return map->reg;
++		map++;
++	}
++
++	return 0;
++}
++
+ int get_pll_limits(struct drm_device *dev, uint32_t limit_match, struct pll_lims *pll_lim)
+ {
+ 	/*
+@@ -4750,6 +4827,17 @@
+ 	/* initialize all members to zero */
+ 	memset(pll_lim, 0, sizeof(struct pll_lims));
+ 
++	/* if we were passed a type rather than a register, figure
++	 * out the register and store it
++	 */
++	if (limit_match > PLL_MAX)
++		pll_lim->reg = limit_match;
++	else {
++		pll_lim->reg = get_pll_register(dev, limit_match);
++		if (!pll_lim->reg)
++			return -ENOENT;
++	}
++
+ 	if (pll_lim_ver == 0x10 || pll_lim_ver == 0x11) {
+ 		uint8_t *pll_rec = &bios->data[bios->pll_limit_tbl_ptr + headerlen + recordlen * pllindex];
+ 
+@@ -4785,7 +4873,6 @@
+ 		pll_lim->max_usable_log2p = 0x6;
+ 	} else if (pll_lim_ver == 0x20 || pll_lim_ver == 0x21) {
+ 		uint16_t plloffs = bios->pll_limit_tbl_ptr + headerlen;
+-		uint32_t reg = 0; /* default match */
+ 		uint8_t *pll_rec;
+ 		int i;
+ 
+@@ -4797,37 +4884,22 @@
+ 			NV_WARN(dev, "Default PLL limit entry has non-zero "
+ 				       "register field\n");
+ 
+-		if (limit_match > MAX_PLL_TYPES)
+-			/* we've been passed a reg as the match */
+-			reg = limit_match;
+-		else /* limit match is a pll type */
+-			for (i = 1; i < entries && !reg; i++) {
+-				uint32_t cmpreg = ROM32(bios->data[plloffs + recordlen * i]);
+-
+-				if (limit_match == NVPLL &&
+-				    (cmpreg == NV_PRAMDAC_NVPLL_COEFF || cmpreg == 0x4000))
+-					reg = cmpreg;
+-				if (limit_match == MPLL &&
+-				    (cmpreg == NV_PRAMDAC_MPLL_COEFF || cmpreg == 0x4020))
+-					reg = cmpreg;
+-				if (limit_match == VPLL1 &&
+-				    (cmpreg == NV_PRAMDAC_VPLL_COEFF || cmpreg == 0x4010))
+-					reg = cmpreg;
+-				if (limit_match == VPLL2 &&
+-				    (cmpreg == NV_RAMDAC_VPLL2 || cmpreg == 0x4018))
+-					reg = cmpreg;
+-			}
+-
+ 		for (i = 1; i < entries; i++)
+-			if (ROM32(bios->data[plloffs + recordlen * i]) == reg) {
++			if (ROM32(bios->data[plloffs + recordlen * i]) == pll_lim->reg) {
+ 				pllindex = i;
+ 				break;
+ 			}
+ 
++		if ((dev_priv->card_type >= NV_50) && (pllindex == 0)) {
++			NV_ERROR(dev, "Register 0x%08x not found in PLL "
++				 "limits table", pll_lim->reg);
++			return -ENOENT;
++		}
++
+ 		pll_rec = &bios->data[plloffs + recordlen * pllindex];
+ 
+ 		BIOSLOG(bios, "Loading PLL limits for reg 0x%08x\n",
+-			pllindex ? reg : 0);
++			pllindex ? pll_lim->reg : 0);
+ 
+ 		/*
+ 		 * Frequencies are stored in tables in MHz, kHz are more
+@@ -4877,8 +4949,8 @@
+ 		if (cv == 0x51 && !pll_lim->refclk) {
+ 			uint32_t sel_clk = bios_rd32(bios, NV_PRAMDAC_SEL_CLK);
+ 
+-			if (((limit_match == NV_PRAMDAC_VPLL_COEFF || limit_match == VPLL1) && sel_clk & 0x20) ||
+-			    ((limit_match == NV_RAMDAC_VPLL2 || limit_match == VPLL2) && sel_clk & 0x80)) {
++			if ((pll_lim->reg == NV_PRAMDAC_VPLL_COEFF && sel_clk & 0x20) ||
++			    (pll_lim->reg == NV_RAMDAC_VPLL2 && sel_clk & 0x80)) {
+ 				if (bios_idxprt_rd(bios, NV_CIO_CRX__COLOR, NV_CIO_CRE_CHIP_ID_INDEX) < 0xa3)
+ 					pll_lim->refclk = 200000;
+ 				else
+@@ -4891,10 +4963,10 @@
+ 		int i;
+ 
+ 		BIOSLOG(bios, "Loading PLL limits for register 0x%08x\n",
+-			limit_match);
++			pll_lim->reg);
+ 
+ 		for (i = 0; i < entries; i++, entry += recordlen) {
+-			if (ROM32(entry[3]) == limit_match) {
++			if (ROM32(entry[3]) == pll_lim->reg) {
+ 				record = &bios->data[ROM16(entry[1])];
+ 				break;
+ 			}
+@@ -4902,7 +4974,7 @@
+ 
+ 		if (!record) {
+ 			NV_ERROR(dev, "Register 0x%08x not found in PLL "
+-				 "limits table", limit_match);
++				 "limits table", pll_lim->reg);
+ 			return -ENOENT;
+ 		}
+ 
+@@ -4931,10 +5003,10 @@
+ 		int i;
+ 
+ 		BIOSLOG(bios, "Loading PLL limits for register 0x%08x\n",
+-			limit_match);
++			pll_lim->reg);
+ 
+ 		for (i = 0; i < entries; i++, entry += recordlen) {
+-			if (ROM32(entry[3]) == limit_match) {
++			if (ROM32(entry[3]) == pll_lim->reg) {
+ 				record = &bios->data[ROM16(entry[1])];
+ 				break;
+ 			}
+@@ -4942,7 +5014,7 @@
+ 
+ 		if (!record) {
+ 			NV_ERROR(dev, "Register 0x%08x not found in PLL "
+-				 "limits table", limit_match);
++				 "limits table", pll_lim->reg);
+ 			return -ENOENT;
+ 		}
+ 
+@@ -5293,7 +5365,7 @@
+ 	if (bitentry->length < 0x5)
+ 		return 0;
+ 
+-	if (bitentry->id[1] < 2) {
++	if (bitentry->version < 2) {
+ 		bios->ram_restrict_group_count = bios->data[bitentry->offset + 2];
+ 		bios->ram_restrict_tbl_ptr = ROM16(bios->data[bitentry->offset + 3]);
+ 	} else {
+@@ -5403,27 +5475,40 @@
+ 
+ #define BIT_TABLE(id, funcid) ((struct bit_table){ id, parse_bit_##funcid##_tbl_entry })
+ 
++int
++bit_table(struct drm_device *dev, u8 id, struct bit_entry *bit)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nvbios *bios = &dev_priv->vbios;
++	u8 entries, *entry;
++
++	entries = bios->data[bios->offset + 10];
++	entry   = &bios->data[bios->offset + 12];
++	while (entries--) {
++		if (entry[0] == id) {
++			bit->id = entry[0];
++			bit->version = entry[1];
++			bit->length = ROM16(entry[2]);
++			bit->offset = ROM16(entry[4]);
++			bit->data = ROMPTR(bios, entry[4]);
++			return 0;
++		}
++
++		entry += bios->data[bios->offset + 9];
++	}
++
++	return -ENOENT;
++}
++
+ static int
+ parse_bit_table(struct nvbios *bios, const uint16_t bitoffset,
+ 		struct bit_table *table)
+ {
+ 	struct drm_device *dev = bios->dev;
+-	uint8_t maxentries = bios->data[bitoffset + 4];
+-	int i, offset;
+ 	struct bit_entry bitentry;
+ 
+-	for (i = 0, offset = bitoffset + 6; i < maxentries; i++, offset += 6) {
+-		bitentry.id[0] = bios->data[offset];
+-
+-		if (bitentry.id[0] != table->id)
+-			continue;
+-
+-		bitentry.id[1] = bios->data[offset + 1];
+-		bitentry.length = ROM16(bios->data[offset + 2]);
+-		bitentry.offset = ROM16(bios->data[offset + 4]);
+-
++	if (bit_table(dev, table->id, &bitentry) == 0)
+ 		return table->parse_fn(dev, bios, &bitentry);
+-	}
+ 
+ 	NV_INFO(dev, "BIT table '%c' not found\n", table->id);
+ 	return -ENOSYS;
+@@ -5683,8 +5768,14 @@
+ static struct dcb_gpio_entry *
+ new_gpio_entry(struct nvbios *bios)
+ {
++	struct drm_device *dev = bios->dev;
+ 	struct dcb_gpio_table *gpio = &bios->dcb.gpio;
+ 
++	if (gpio->entries >= DCB_MAX_NUM_GPIO_ENTRIES) {
++		NV_ERROR(dev, "exceeded maximum number of gpio entries!!\n");
++		return NULL;
++	}
++
+ 	return &gpio->entry[gpio->entries++];
+ }
+ 
+@@ -5706,113 +5797,90 @@
+ }
+ 
+ static void
+-parse_dcb30_gpio_entry(struct nvbios *bios, uint16_t offset)
+-{
+-	struct dcb_gpio_entry *gpio;
+-	uint16_t ent = ROM16(bios->data[offset]);
+-	uint8_t line = ent & 0x1f,
+-		tag = ent >> 5 & 0x3f,
+-		flags = ent >> 11 & 0x1f;
+-
+-	if (tag == 0x3f)
+-		return;
+-
+-	gpio = new_gpio_entry(bios);
+-
+-	gpio->tag = tag;
+-	gpio->line = line;
+-	gpio->invert = flags != 4;
+-	gpio->entry = ent;
+-}
+-
+-static void
+-parse_dcb40_gpio_entry(struct nvbios *bios, uint16_t offset)
+-{
+-	uint32_t entry = ROM32(bios->data[offset]);
+-	struct dcb_gpio_entry *gpio;
+-
+-	if ((entry & 0x0000ff00) == 0x0000ff00)
+-		return;
+-
+-	gpio = new_gpio_entry(bios);
+-	gpio->tag = (entry & 0x0000ff00) >> 8;
+-	gpio->line = (entry & 0x0000001f) >> 0;
+-	gpio->state_default = (entry & 0x01000000) >> 24;
+-	gpio->state[0] = (entry & 0x18000000) >> 27;
+-	gpio->state[1] = (entry & 0x60000000) >> 29;
+-	gpio->entry = entry;
+-}
+-
+-static void
+ parse_dcb_gpio_table(struct nvbios *bios)
+ {
+ 	struct drm_device *dev = bios->dev;
+-	uint16_t gpio_table_ptr = bios->dcb.gpio_table_ptr;
+-	uint8_t *gpio_table = &bios->data[gpio_table_ptr];
+-	int header_len = gpio_table[1],
+-	    entries = gpio_table[2],
+-	    entry_len = gpio_table[3];
+-	void (*parse_entry)(struct nvbios *, uint16_t) = NULL;
++	struct dcb_gpio_entry *e;
++	u8 headerlen, entries, recordlen;
++	u8 *dcb, *gpio = NULL, *entry;
+ 	int i;
+ 
+-	if (bios->dcb.version >= 0x40) {
+-		if (gpio_table_ptr && entry_len != 4) {
+-			NV_WARN(dev, "Invalid DCB GPIO table entry length.\n");
+-			return;
+-		}
+-
+-		parse_entry = parse_dcb40_gpio_entry;
++	dcb = ROMPTR(bios, bios->data[0x36]);
++	if (dcb[0] >= 0x30) {
++		gpio = ROMPTR(bios, dcb[10]);
++		if (!gpio)
++			goto no_table;
++
++		headerlen = gpio[1];
++		entries   = gpio[2];
++		recordlen = gpio[3];
++	} else
++	if (dcb[0] >= 0x22 && dcb[-1] >= 0x13) {
++		gpio = ROMPTR(bios, dcb[-15]);
++		if (!gpio)
++			goto no_table;
++
++		headerlen = 3;
++		entries   = gpio[2];
++		recordlen = gpio[1];
++	} else
++	if (dcb[0] >= 0x22) {
++		/* No GPIO table present, parse the TVDAC GPIO data. */
++		uint8_t *tvdac_gpio = &dcb[-5];
+ 
+-	} else if (bios->dcb.version >= 0x30) {
+-		if (gpio_table_ptr && entry_len != 2) {
+-			NV_WARN(dev, "Invalid DCB GPIO table entry length.\n");
+-			return;
++		if (tvdac_gpio[0] & 1) {
++			e = new_gpio_entry(bios);
++			e->tag = DCB_GPIO_TVDAC0;
++			e->line = tvdac_gpio[1] >> 4;
++			e->invert = tvdac_gpio[0] & 2;
+ 		}
+ 
+-		parse_entry = parse_dcb30_gpio_entry;
+-
+-	} else if (bios->dcb.version >= 0x22) {
+-		/*
+-		 * DCBs older than v3.0 don't really have a GPIO
+-		 * table, instead they keep some GPIO info at fixed
+-		 * locations.
+-		 */
+-		uint16_t dcbptr = ROM16(bios->data[0x36]);
+-		uint8_t *tvdac_gpio = &bios->data[dcbptr - 5];
++		goto no_table;
++	} else {
++		NV_DEBUG(dev, "no/unknown gpio table on DCB 0x%02x\n", dcb[0]);
++		goto no_table;
++	}
+ 
+-		if (tvdac_gpio[0] & 1) {
+-			struct dcb_gpio_entry *gpio = new_gpio_entry(bios);
++	entry = gpio + headerlen;
++	for (i = 0; i < entries; i++, entry += recordlen) {
++		e = new_gpio_entry(bios);
++		if (!e)
++			break;
+ 
+-			gpio->tag = DCB_GPIO_TVDAC0;
+-			gpio->line = tvdac_gpio[1] >> 4;
+-			gpio->invert = tvdac_gpio[0] & 2;
+-		}
+-	} else {
+-		/*
+-		 * No systematic way to store GPIO info on pre-v2.2
+-		 * DCBs, try to match the PCI device IDs.
+-		 */
++		if (gpio[0] < 0x40) {
++			e->entry = ROM16(entry[0]);
++			e->tag = (e->entry & 0x07e0) >> 5;
++			if (e->tag == 0x3f) {
++				bios->dcb.gpio.entries--;
++				continue;
++			}
+ 
+-		/* Apple iMac G4 NV18 */
+-		if (nv_match_device(dev, 0x0189, 0x10de, 0x0010)) {
+-			struct dcb_gpio_entry *gpio = new_gpio_entry(bios);
++			e->line = (e->entry & 0x001f);
++			e->invert = ((e->entry & 0xf800) >> 11) != 4;
++		} else {
++			e->entry = ROM32(entry[0]);
++			e->tag = (e->entry & 0x0000ff00) >> 8;
++			if (e->tag == 0xff) {
++				bios->dcb.gpio.entries--;
++				continue;
++			}
+ 
+-			gpio->tag = DCB_GPIO_TVDAC0;
+-			gpio->line = 4;
++			e->line = (e->entry & 0x0000001f) >> 0;
++			e->state_default = (e->entry & 0x01000000) >> 24;
++			e->state[0] = (e->entry & 0x18000000) >> 27;
++			e->state[1] = (e->entry & 0x60000000) >> 29;
+ 		}
+-
+ 	}
+ 
+-	if (!gpio_table_ptr)
+-		return;
+-
+-	if (entries > DCB_MAX_NUM_GPIO_ENTRIES) {
+-		NV_WARN(dev, "Too many entries in the DCB GPIO table.\n");
+-		entries = DCB_MAX_NUM_GPIO_ENTRIES;
++no_table:
++	/* Apple iMac G4 NV18 */
++	if (nv_match_device(dev, 0x0189, 0x10de, 0x0010)) {
++		e = new_gpio_entry(bios);
++		if (e) {
++			e->tag = DCB_GPIO_TVDAC0;
++			e->line = 4;
++		}
+ 	}
+-
+-	for (i = 0; i < entries; i++)
+-		parse_entry(bios, gpio_table_ptr + header_len + entry_len * i);
+ }
+ 
+ struct dcb_connector_table_entry *
+@@ -5971,7 +6039,6 @@
+ 			if (type != cte->type)
+ 				NV_WARN(dev, " -> type 0x%02x\n", cte->type);
+ 		}
+-
+ 	}
+ }
+ 
+@@ -6680,6 +6747,8 @@
+ 					bit_signature, sizeof(bit_signature));
+ 	if (offset) {
+ 		NV_TRACE(dev, "BIT BIOS found\n");
++		bios->type = NVBIOS_BIT;
++		bios->offset = offset;
+ 		return parse_bit_structure(bios, offset + 6);
+ 	}
+ 
+@@ -6687,6 +6756,8 @@
+ 					bmp_signature, sizeof(bmp_signature));
+ 	if (offset) {
+ 		NV_TRACE(dev, "BMP BIOS found\n");
++		bios->type = NVBIOS_BMP;
++		bios->offset = offset;
+ 		return parse_bmp_structure(dev, bios, offset);
+ 	}
+ 
+@@ -6806,6 +6877,8 @@
+ 			"running VBIOS init tables.\n");
+ 		bios->execute = true;
+ 	}
++	if (nouveau_force_post)
++		bios->execute = true;
+ 
+ 	ret = nouveau_run_vbios_init(dev);
+ 	if (ret)
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_bios.h linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_bios.h
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_bios.h	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_bios.h	2010-10-15 02:04:44.320991698 +0200
+@@ -34,6 +34,20 @@
+ 
+ #define DCB_LOC_ON_CHIP 0
+ 
++#define ROM16(x) le16_to_cpu(*(uint16_t *)&(x))
++#define ROM32(x) le32_to_cpu(*(uint32_t *)&(x))
++#define ROMPTR(bios, x) (ROM16(x) ? &(bios)->data[ROM16(x)] : NULL)
++
++struct bit_entry {
++	uint8_t  id;
++	uint8_t  version;
++	uint16_t length;
++	uint16_t offset;
++	uint8_t *data;
++};
++
++int bit_table(struct drm_device *, u8 id, struct bit_entry *);
++
+ struct dcb_i2c_entry {
+ 	uint32_t entry;
+ 	uint8_t port_type;
+@@ -170,16 +184,28 @@
+ 	LVDS_PANEL_OFF
+ };
+ 
+-/* changing these requires matching changes to reg tables in nv_get_clock */
+-#define MAX_PLL_TYPES	4
++/* these match types in pll limits table version 0x40,
++ * nouveau uses them on all chipsets internally where a
++ * specific pll needs to be referenced, but the exact
++ * register isn't known.
++ */
+ enum pll_types {
+-	NVPLL,
+-	MPLL,
+-	VPLL1,
+-	VPLL2
++	PLL_CORE   = 0x01,
++	PLL_SHADER = 0x02,
++	PLL_UNK03  = 0x03,
++	PLL_MEMORY = 0x04,
++	PLL_UNK05  = 0x05,
++	PLL_UNK40  = 0x40,
++	PLL_UNK41  = 0x41,
++	PLL_UNK42  = 0x42,
++	PLL_VPLL0  = 0x80,
++	PLL_VPLL1  = 0x81,
++	PLL_MAX    = 0xff
+ };
+ 
+ struct pll_lims {
++	u32 reg;
++
+ 	struct {
+ 		int minfreq;
+ 		int maxfreq;
+@@ -212,6 +238,11 @@
+ 
+ struct nvbios {
+ 	struct drm_device *dev;
++	enum {
++		NVBIOS_BMP,
++		NVBIOS_BIT
++	} type;
++	uint16_t offset;
+ 
+ 	uint8_t chip_version;
+ 
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_bo.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_bo.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_bo.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_bo.c	2010-10-15 02:04:44.322991724 +0200
+@@ -36,21 +36,6 @@
+ #include <linux/log2.h>
+ #include <linux/slab.h>
+ 
+-int
+-nouveau_bo_sync_gpu(struct nouveau_bo *nvbo, struct nouveau_channel *chan)
+-{
+-	struct nouveau_fence *prev_fence = nvbo->bo.sync_obj;
+-	int ret;
+-
+-	if (!prev_fence || nouveau_fence_channel(prev_fence) == chan)
+-		return 0;
+-
+-	spin_lock(&nvbo->bo.lock);
+-	ret = ttm_bo_wait(&nvbo->bo, false, false, false);
+-	spin_unlock(&nvbo->bo.lock);
+-	return ret;
+-}
+-
+ static void
+ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
+ {
+@@ -58,8 +43,6 @@
+ 	struct drm_device *dev = dev_priv->dev;
+ 	struct nouveau_bo *nvbo = nouveau_bo(bo);
+ 
+-	ttm_bo_kunmap(&nvbo->kmap);
+-
+ 	if (unlikely(nvbo->gem))
+ 		DRM_ERROR("bo %p still attached to GEM object\n", bo);
+ 
+@@ -160,12 +143,12 @@
+ 	nvbo->no_vm = no_vm;
+ 	nvbo->tile_mode = tile_mode;
+ 	nvbo->tile_flags = tile_flags;
++	nvbo->bo.bdev = &dev_priv->ttm.bdev;
+ 
+-	nouveau_bo_fixup_align(dev, tile_mode, tile_flags, &align, &size);
++	nouveau_bo_fixup_align(dev, tile_mode, nouveau_bo_tile_layout(nvbo),
++			       &align, &size);
+ 	align >>= PAGE_SHIFT;
+ 
+-	nvbo->placement.fpfn = 0;
+-	nvbo->placement.lpfn = mappable ? dev_priv->fb_mappable_pages : 0;
+ 	nouveau_bo_placement_set(nvbo, flags, 0);
+ 
+ 	nvbo->channel = chan;
+@@ -195,6 +178,31 @@
+ 		pl[(*n)++] = TTM_PL_FLAG_SYSTEM | flags;
+ }
+ 
++static void
++set_placement_range(struct nouveau_bo *nvbo, uint32_t type)
++{
++	struct drm_nouveau_private *dev_priv = nouveau_bdev(nvbo->bo.bdev);
++
++	if (dev_priv->card_type == NV_10 &&
++	    nvbo->tile_mode && (type & TTM_PL_FLAG_VRAM)) {
++		/*
++		 * Make sure that the color and depth buffers are handled
++		 * by independent memory controller units. Up to a 9x
++		 * speed up when alpha-blending and depth-test are enabled
++		 * at the same time.
++		 */
++		int vram_pages = dev_priv->vram_size >> PAGE_SHIFT;
++
++		if (nvbo->tile_flags & NOUVEAU_GEM_TILE_ZETA) {
++			nvbo->placement.fpfn = vram_pages / 2;
++			nvbo->placement.lpfn = ~0;
++		} else {
++			nvbo->placement.fpfn = 0;
++			nvbo->placement.lpfn = vram_pages / 2;
++		}
++	}
++}
++
+ void
+ nouveau_bo_placement_set(struct nouveau_bo *nvbo, uint32_t type, uint32_t busy)
+ {
+@@ -209,6 +217,8 @@
+ 	pl->busy_placement = nvbo->busy_placements;
+ 	set_placement_list(nvbo->busy_placements, &pl->num_busy_placement,
+ 			   type | busy, flags);
++
++	set_placement_range(nvbo, type);
+ }
+ 
+ int
+@@ -305,7 +315,8 @@
+ void
+ nouveau_bo_unmap(struct nouveau_bo *nvbo)
+ {
+-	ttm_bo_kunmap(&nvbo->kmap);
++	if (nvbo)
++		ttm_bo_kunmap(&nvbo->kmap);
+ }
+ 
+ u16
+@@ -404,7 +415,10 @@
+ 		man->available_caching = TTM_PL_FLAG_UNCACHED |
+ 					 TTM_PL_FLAG_WC;
+ 		man->default_caching = TTM_PL_FLAG_WC;
+-		man->gpu_offset = dev_priv->vm_vram_base;
++		if (dev_priv->card_type == NV_50)
++			man->gpu_offset = 0x40000000;
++		else
++			man->gpu_offset = 0;
+ 		break;
+ 	case TTM_PL_TT:
+ 		switch (dev_priv->gart_info.type) {
+@@ -469,19 +483,26 @@
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = ttm_bo_move_accel_cleanup(&nvbo->bo, fence, NULL,
+-					evict || (nvbo->channel &&
+-						  nvbo->channel != chan),
++	if (nvbo->channel) {
++		ret = nouveau_fence_sync(fence, nvbo->channel);
++		if (ret)
++			goto out;
++	}
++
++	ret = ttm_bo_move_accel_cleanup(&nvbo->bo, fence, NULL, evict,
+ 					no_wait_reserve, no_wait_gpu, new_mem);
++out:
+ 	nouveau_fence_unref((void *)&fence);
+ 	return ret;
+ }
+ 
+ static inline uint32_t
+-nouveau_bo_mem_ctxdma(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
+-		      struct ttm_mem_reg *mem)
++nouveau_bo_mem_ctxdma(struct ttm_buffer_object *bo,
++		      struct nouveau_channel *chan, struct ttm_mem_reg *mem)
+ {
+-	if (chan == nouveau_bdev(nvbo->bo.bdev)->channel) {
++	struct nouveau_bo *nvbo = nouveau_bo(bo);
++
++	if (nvbo->no_vm) {
+ 		if (mem->mem_type == TTM_PL_TT)
+ 			return NvDmaGART;
+ 		return NvDmaVRAM;
+@@ -493,87 +514,191 @@
+ }
+ 
+ static int
+-nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
+-		     bool no_wait_reserve, bool no_wait_gpu,
+-		     struct ttm_mem_reg *new_mem)
++nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
++		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+ {
+-	struct nouveau_bo *nvbo = nouveau_bo(bo);
+ 	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+-	struct ttm_mem_reg *old_mem = &bo->mem;
+-	struct nouveau_channel *chan;
+-	uint64_t src_offset, dst_offset;
+-	uint32_t page_count;
++	struct nouveau_bo *nvbo = nouveau_bo(bo);
++	u64 length = (new_mem->num_pages << PAGE_SHIFT);
++	u64 src_offset, dst_offset;
+ 	int ret;
+ 
+-	chan = nvbo->channel;
+-	if (!chan || nvbo->tile_flags || nvbo->no_vm)
+-		chan = dev_priv->channel;
+-
+ 	src_offset = old_mem->mm_node->start << PAGE_SHIFT;
+ 	dst_offset = new_mem->mm_node->start << PAGE_SHIFT;
+-	if (chan != dev_priv->channel) {
+-		if (old_mem->mem_type == TTM_PL_TT)
+-			src_offset += dev_priv->vm_gart_base;
+-		else
++	if (!nvbo->no_vm) {
++		if (old_mem->mem_type == TTM_PL_VRAM)
+ 			src_offset += dev_priv->vm_vram_base;
+-
+-		if (new_mem->mem_type == TTM_PL_TT)
+-			dst_offset += dev_priv->vm_gart_base;
+ 		else
++			src_offset += dev_priv->vm_gart_base;
++
++		if (new_mem->mem_type == TTM_PL_VRAM)
+ 			dst_offset += dev_priv->vm_vram_base;
++		else
++			dst_offset += dev_priv->vm_gart_base;
+ 	}
+ 
+ 	ret = RING_SPACE(chan, 3);
+ 	if (ret)
+ 		return ret;
+-	BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_SOURCE, 2);
+-	OUT_RING(chan, nouveau_bo_mem_ctxdma(nvbo, chan, old_mem));
+-	OUT_RING(chan, nouveau_bo_mem_ctxdma(nvbo, chan, new_mem));
+ 
+-	if (dev_priv->card_type >= NV_50) {
+-		ret = RING_SPACE(chan, 4);
++	BEGIN_RING(chan, NvSubM2MF, 0x0184, 2);
++	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, old_mem));
++	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, new_mem));
++
++	while (length) {
++		u32 amount, stride, height;
++
++		amount  = min(length, (u64)(4 * 1024 * 1024));
++		stride  = 16 * 4;
++		height  = amount / stride;
++
++		if (new_mem->mem_type == TTM_PL_VRAM &&
++		    nouveau_bo_tile_layout(nvbo)) {
++			ret = RING_SPACE(chan, 8);
++			if (ret)
++				return ret;
++
++			BEGIN_RING(chan, NvSubM2MF, 0x0200, 7);
++			OUT_RING  (chan, 0);
++			OUT_RING  (chan, 0);
++			OUT_RING  (chan, stride);
++			OUT_RING  (chan, height);
++			OUT_RING  (chan, 1);
++			OUT_RING  (chan, 0);
++			OUT_RING  (chan, 0);
++		} else {
++			ret = RING_SPACE(chan, 2);
++			if (ret)
++				return ret;
++
++			BEGIN_RING(chan, NvSubM2MF, 0x0200, 1);
++			OUT_RING  (chan, 1);
++		}
++		if (old_mem->mem_type == TTM_PL_VRAM &&
++		    nouveau_bo_tile_layout(nvbo)) {
++			ret = RING_SPACE(chan, 8);
++			if (ret)
++				return ret;
++
++			BEGIN_RING(chan, NvSubM2MF, 0x021c, 7);
++			OUT_RING  (chan, 0);
++			OUT_RING  (chan, 0);
++			OUT_RING  (chan, stride);
++			OUT_RING  (chan, height);
++			OUT_RING  (chan, 1);
++			OUT_RING  (chan, 0);
++			OUT_RING  (chan, 0);
++		} else {
++			ret = RING_SPACE(chan, 2);
++			if (ret)
++				return ret;
++
++			BEGIN_RING(chan, NvSubM2MF, 0x021c, 1);
++			OUT_RING  (chan, 1);
++		}
++
++		ret = RING_SPACE(chan, 14);
+ 		if (ret)
+ 			return ret;
+-		BEGIN_RING(chan, NvSubM2MF, 0x0200, 1);
+-		OUT_RING(chan, 1);
+-		BEGIN_RING(chan, NvSubM2MF, 0x021c, 1);
+-		OUT_RING(chan, 1);
++
++		BEGIN_RING(chan, NvSubM2MF, 0x0238, 2);
++		OUT_RING  (chan, upper_32_bits(src_offset));
++		OUT_RING  (chan, upper_32_bits(dst_offset));
++		BEGIN_RING(chan, NvSubM2MF, 0x030c, 8);
++		OUT_RING  (chan, lower_32_bits(src_offset));
++		OUT_RING  (chan, lower_32_bits(dst_offset));
++		OUT_RING  (chan, stride);
++		OUT_RING  (chan, stride);
++		OUT_RING  (chan, stride);
++		OUT_RING  (chan, height);
++		OUT_RING  (chan, 0x00000101);
++		OUT_RING  (chan, 0x00000000);
++		BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_NOP, 1);
++		OUT_RING  (chan, 0);
++
++		length -= amount;
++		src_offset += amount;
++		dst_offset += amount;
+ 	}
+ 
++	return 0;
++}
++
++static int
++nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
++		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
++{
++	u32 src_offset = old_mem->mm_node->start << PAGE_SHIFT;
++	u32 dst_offset = new_mem->mm_node->start << PAGE_SHIFT;
++	u32 page_count = new_mem->num_pages;
++	int ret;
++
++	ret = RING_SPACE(chan, 3);
++	if (ret)
++		return ret;
++
++	BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_SOURCE, 2);
++	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, old_mem));
++	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, new_mem));
++
+ 	page_count = new_mem->num_pages;
+ 	while (page_count) {
+ 		int line_count = (page_count > 2047) ? 2047 : page_count;
+ 
+-		if (dev_priv->card_type >= NV_50) {
+-			ret = RING_SPACE(chan, 3);
+-			if (ret)
+-				return ret;
+-			BEGIN_RING(chan, NvSubM2MF, 0x0238, 2);
+-			OUT_RING(chan, upper_32_bits(src_offset));
+-			OUT_RING(chan, upper_32_bits(dst_offset));
+-		}
+ 		ret = RING_SPACE(chan, 11);
+ 		if (ret)
+ 			return ret;
++
+ 		BEGIN_RING(chan, NvSubM2MF,
+ 				 NV_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8);
+-		OUT_RING(chan, lower_32_bits(src_offset));
+-		OUT_RING(chan, lower_32_bits(dst_offset));
+-		OUT_RING(chan, PAGE_SIZE); /* src_pitch */
+-		OUT_RING(chan, PAGE_SIZE); /* dst_pitch */
+-		OUT_RING(chan, PAGE_SIZE); /* line_length */
+-		OUT_RING(chan, line_count);
+-		OUT_RING(chan, (1<<8)|(1<<0));
+-		OUT_RING(chan, 0);
++		OUT_RING  (chan, src_offset);
++		OUT_RING  (chan, dst_offset);
++		OUT_RING  (chan, PAGE_SIZE); /* src_pitch */
++		OUT_RING  (chan, PAGE_SIZE); /* dst_pitch */
++		OUT_RING  (chan, PAGE_SIZE); /* line_length */
++		OUT_RING  (chan, line_count);
++		OUT_RING  (chan, 0x00000101);
++		OUT_RING  (chan, 0x00000000);
+ 		BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_NOP, 1);
+-		OUT_RING(chan, 0);
++		OUT_RING  (chan, 0);
+ 
+ 		page_count -= line_count;
+ 		src_offset += (PAGE_SIZE * line_count);
+ 		dst_offset += (PAGE_SIZE * line_count);
+ 	}
+ 
+-	return nouveau_bo_move_accel_cleanup(chan, nvbo, evict, no_wait_reserve, no_wait_gpu, new_mem);
++	return 0;
++}
++
++static int
++nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
++		     bool no_wait_reserve, bool no_wait_gpu,
++		     struct ttm_mem_reg *new_mem)
++{
++	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
++	struct nouveau_bo *nvbo = nouveau_bo(bo);
++	struct nouveau_channel *chan;
++	int ret;
++
++	chan = nvbo->channel;
++	if (!chan || nvbo->no_vm) {
++		chan = dev_priv->channel;
++		mutex_lock(&chan->mutex);
++	}
++
++	if (dev_priv->card_type < NV_50)
++		ret = nv04_bo_move_m2mf(chan, bo, &bo->mem, new_mem);
++	else
++		ret = nv50_bo_move_m2mf(chan, bo, &bo->mem, new_mem);
++	if (ret == 0) {
++		ret = nouveau_bo_move_accel_cleanup(chan, nvbo, evict,
++						    no_wait_reserve,
++						    no_wait_gpu, new_mem);
++	}
++
++	if (chan == dev_priv->channel)
++		mutex_unlock(&chan->mutex);
++	return ret;
+ }
+ 
+ static int
+@@ -674,7 +799,8 @@
+ 	if (dev_priv->card_type == NV_50) {
+ 		ret = nv50_mem_vm_bind_linear(dev,
+ 					      offset + dev_priv->vm_vram_base,
+-					      new_mem->size, nvbo->tile_flags,
++					      new_mem->size,
++					      nouveau_bo_tile_layout(nvbo),
+ 					      offset);
+ 		if (ret)
+ 			return ret;
+@@ -719,12 +845,6 @@
+ 	if (ret)
+ 		return ret;
+ 
+-	/* Software copy if the card isn't up and running yet. */
+-	if (!dev_priv->channel) {
+-		ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
+-		goto out;
+-	}
+-
+ 	/* Fake bo copy. */
+ 	if (old_mem->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
+ 		BUG_ON(bo->mem.mm_node != NULL);
+@@ -733,6 +853,12 @@
+ 		goto out;
+ 	}
+ 
++	/* Software copy if the card isn't up and running yet. */
++	if (!dev_priv->channel) {
++		ret = ttm_bo_move_memcpy(bo, evict, no_wait_reserve, no_wait_gpu, new_mem);
++		goto out;
++	}
++
+ 	/* Hardware assisted copy. */
+ 	if (new_mem->mem_type == TTM_PL_SYSTEM)
+ 		ret = nouveau_bo_move_flipd(bo, evict, intr, no_wait_reserve, no_wait_gpu, new_mem);
+@@ -808,7 +934,27 @@
+ static int
+ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
+ {
+-	return 0;
++	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
++	struct nouveau_bo *nvbo = nouveau_bo(bo);
++
++	/* as long as the bo isn't in vram, and isn't tiled, we've got
++	 * nothing to do here.
++	 */
++	if (bo->mem.mem_type != TTM_PL_VRAM) {
++		if (dev_priv->card_type < NV_50 ||
++		    !nouveau_bo_tile_layout(nvbo))
++			return 0;
++	}
++
++	/* make sure bo is in mappable vram */
++	if (bo->mem.mm_node->start + bo->mem.num_pages < dev_priv->fb_mappable_pages)
++		return 0;
++
++
++	nvbo->placement.fpfn = 0;
++	nvbo->placement.lpfn = dev_priv->fb_mappable_pages;
++	nouveau_bo_placement_set(nvbo, TTM_PL_VRAM, 0);
++	return ttm_bo_validate(bo, &nvbo->placement, false, true, false);
+ }
+ 
+ struct ttm_bo_driver nouveau_bo_driver = {
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_calc.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_calc.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_calc.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_calc.c	2010-10-15 02:04:44.323991737 +0200
+@@ -198,8 +198,8 @@
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nv_fifo_info fifo_data;
+ 	struct nv_sim_state sim_data;
+-	int MClk = nouveau_hw_get_clock(dev, MPLL);
+-	int NVClk = nouveau_hw_get_clock(dev, NVPLL);
++	int MClk = nouveau_hw_get_clock(dev, PLL_MEMORY);
++	int NVClk = nouveau_hw_get_clock(dev, PLL_CORE);
+ 	uint32_t cfg1 = nvReadFB(dev, NV04_PFB_CFG1);
+ 
+ 	sim_data.pclk_khz = VClk;
+@@ -234,7 +234,7 @@
+ }
+ 
+ static void
+-nv30_update_arb(int *burst, int *lwm)
++nv20_update_arb(int *burst, int *lwm)
+ {
+ 	unsigned int fifo_size, burst_size, graphics_lwm;
+ 
+@@ -251,14 +251,14 @@
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 
+-	if (dev_priv->card_type < NV_30)
++	if (dev_priv->card_type < NV_20)
+ 		nv04_update_arb(dev, vclk, bpp, burst, lwm);
+ 	else if ((dev->pci_device & 0xfff0) == 0x0240 /*CHIPSET_C51*/ ||
+ 		 (dev->pci_device & 0xfff0) == 0x03d0 /*CHIPSET_C512*/) {
+ 		*burst = 128;
+ 		*lwm = 0x0480;
+ 	} else
+-		nv30_update_arb(burst, lwm);
++		nv20_update_arb(burst, lwm);
+ }
+ 
+ static int
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_channel.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_channel.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_channel.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_channel.c	2010-10-15 02:04:44.326991773 +0200
+@@ -70,14 +70,8 @@
+ 		chan->pushbuf_base = pb->bo.mem.mm_node->start << PAGE_SHIFT;
+ 	}
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, chan, 0, pushbuf, &chan->pushbuf);
+-	if (ret) {
+-		NV_ERROR(dev, "Error referencing pushbuf ctxdma: %d\n", ret);
+-		if (pushbuf != dev_priv->gart_info.sg_ctxdma)
+-			nouveau_gpuobj_del(dev, &pushbuf);
+-		return ret;
+-	}
+-
++	nouveau_gpuobj_ref(pushbuf, &chan->pushbuf);
++	nouveau_gpuobj_ref(NULL, &pushbuf);
+ 	return 0;
+ }
+ 
+@@ -113,53 +107,54 @@
+ int
+ nouveau_channel_alloc(struct drm_device *dev, struct nouveau_channel **chan_ret,
+ 		      struct drm_file *file_priv,
+-		      uint32_t vram_handle, uint32_t tt_handle)
++		      uint32_t vram_handle, uint32_t gart_handle)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+ 	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+ 	struct nouveau_channel *chan;
+-	int channel, user;
+-	int ret;
++	unsigned long flags;
++	int user, ret;
+ 
+-	/*
+-	 * Alright, here is the full story
+-	 * Nvidia cards have multiple hw fifo contexts (praise them for that,
+-	 * no complicated crash-prone context switches)
+-	 * We allocate a new context for each app and let it write to it
+-	 * directly (woo, full userspace command submission !)
+-	 * When there are no more contexts, you lost
+-	 */
+-	for (channel = 0; channel < pfifo->channels; channel++) {
+-		if (dev_priv->fifos[channel] == NULL)
++	/* allocate and lock channel structure */
++	chan = kzalloc(sizeof(*chan), GFP_KERNEL);
++	if (!chan)
++		return -ENOMEM;
++	chan->dev = dev;
++	chan->file_priv = file_priv;
++	chan->vram_handle = vram_handle;
++	chan->gart_handle = gart_handle;
++
++	atomic_set(&chan->refcount, 1);
++	mutex_init(&chan->mutex);
++	mutex_lock(&chan->mutex);
++
++	/* allocate hw channel id */
++	spin_lock_irqsave(&dev_priv->channels.lock, flags);
++	for (chan->id = 0; chan->id < pfifo->channels; chan->id++) {
++		if (!dev_priv->channels.ptr[chan->id]) {
++			dev_priv->channels.ptr[chan->id] = chan;
+ 			break;
++		}
+ 	}
++	spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
+ 
+-	/* no more fifos. you lost. */
+-	if (channel == pfifo->channels)
+-		return -EINVAL;
++	if (chan->id == pfifo->channels) {
++		mutex_unlock(&chan->mutex);
++		kfree(chan);
++		return -ENODEV;
++	}
+ 
+-	dev_priv->fifos[channel] = kzalloc(sizeof(struct nouveau_channel),
+-					   GFP_KERNEL);
+-	if (!dev_priv->fifos[channel])
+-		return -ENOMEM;
+-	chan = dev_priv->fifos[channel];
++	NV_DEBUG(dev, "initialising channel %d\n", chan->id);
+ 	INIT_LIST_HEAD(&chan->nvsw.vbl_wait);
+ 	INIT_LIST_HEAD(&chan->fence.pending);
+-	chan->dev = dev;
+-	chan->id = channel;
+-	chan->file_priv = file_priv;
+-	chan->vram_handle = vram_handle;
+-	chan->gart_handle = tt_handle;
+-
+-	NV_INFO(dev, "Allocating FIFO number %d\n", channel);
+ 
+ 	/* Allocate DMA push buffer */
+ 	chan->pushbuf_bo = nouveau_channel_user_pushbuf_alloc(dev);
+ 	if (!chan->pushbuf_bo) {
+ 		ret = -ENOMEM;
+ 		NV_ERROR(dev, "pushbuf %d\n", ret);
+-		nouveau_channel_free(chan);
++		nouveau_channel_put(&chan);
+ 		return ret;
+ 	}
+ 
+@@ -167,18 +162,18 @@
+ 
+ 	/* Locate channel's user control regs */
+ 	if (dev_priv->card_type < NV_40)
+-		user = NV03_USER(channel);
++		user = NV03_USER(chan->id);
+ 	else
+ 	if (dev_priv->card_type < NV_50)
+-		user = NV40_USER(channel);
++		user = NV40_USER(chan->id);
+ 	else
+-		user = NV50_USER(channel);
++		user = NV50_USER(chan->id);
+ 
+ 	chan->user = ioremap(pci_resource_start(dev->pdev, 0) + user,
+ 								PAGE_SIZE);
+ 	if (!chan->user) {
+ 		NV_ERROR(dev, "ioremap of regs failed.\n");
+-		nouveau_channel_free(chan);
++		nouveau_channel_put(&chan);
+ 		return -ENOMEM;
+ 	}
+ 	chan->user_put = 0x40;
+@@ -188,15 +183,15 @@
+ 	ret = nouveau_notifier_init_channel(chan);
+ 	if (ret) {
+ 		NV_ERROR(dev, "ntfy %d\n", ret);
+-		nouveau_channel_free(chan);
++		nouveau_channel_put(&chan);
+ 		return ret;
+ 	}
+ 
+ 	/* Setup channel's default objects */
+-	ret = nouveau_gpuobj_channel_init(chan, vram_handle, tt_handle);
++	ret = nouveau_gpuobj_channel_init(chan, vram_handle, gart_handle);
+ 	if (ret) {
+ 		NV_ERROR(dev, "gpuobj %d\n", ret);
+-		nouveau_channel_free(chan);
++		nouveau_channel_put(&chan);
+ 		return ret;
+ 	}
+ 
+@@ -204,7 +199,7 @@
+ 	ret = nouveau_channel_pushbuf_ctxdma_init(chan);
+ 	if (ret) {
+ 		NV_ERROR(dev, "pbctxdma %d\n", ret);
+-		nouveau_channel_free(chan);
++		nouveau_channel_put(&chan);
+ 		return ret;
+ 	}
+ 
+@@ -214,14 +209,14 @@
+ 	/* Create a graphics context for new channel */
+ 	ret = pgraph->create_context(chan);
+ 	if (ret) {
+-		nouveau_channel_free(chan);
++		nouveau_channel_put(&chan);
+ 		return ret;
+ 	}
+ 
+ 	/* Construct inital RAMFC for new channel */
+ 	ret = pfifo->create_context(chan);
+ 	if (ret) {
+-		nouveau_channel_free(chan);
++		nouveau_channel_put(&chan);
+ 		return ret;
+ 	}
+ 
+@@ -229,35 +224,72 @@
+ 
+ 	ret = nouveau_dma_init(chan);
+ 	if (!ret)
+-		ret = nouveau_fence_init(chan);
++		ret = nouveau_fence_channel_init(chan);
+ 	if (ret) {
+-		nouveau_channel_free(chan);
++		nouveau_channel_put(&chan);
+ 		return ret;
+ 	}
+ 
+ 	nouveau_debugfs_channel_init(chan);
+ 
+-	NV_INFO(dev, "%s: initialised FIFO %d\n", __func__, channel);
++	NV_DEBUG(dev, "channel %d initialised\n", chan->id);
+ 	*chan_ret = chan;
+ 	return 0;
+ }
+ 
+-/* stops a fifo */
++struct nouveau_channel *
++nouveau_channel_get(struct drm_device *dev, struct drm_file *file_priv, int id)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_channel *chan = ERR_PTR(-ENODEV);
++	unsigned long flags;
++
++	spin_lock_irqsave(&dev_priv->channels.lock, flags);
++	chan = dev_priv->channels.ptr[id];
++
++	if (unlikely(!chan || atomic_read(&chan->refcount) == 0)) {
++		spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
++		return ERR_PTR(-EINVAL);
++	}
++
++	if (unlikely(file_priv && chan->file_priv != file_priv)) {
++		spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
++		return ERR_PTR(-EINVAL);
++	}
++
++	atomic_inc(&chan->refcount);
++	spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
++
++	mutex_lock(&chan->mutex);
++	return chan;
++}
++
+ void
+-nouveau_channel_free(struct nouveau_channel *chan)
++nouveau_channel_put(struct nouveau_channel **pchan)
+ {
++	struct nouveau_channel *chan = *pchan;
+ 	struct drm_device *dev = chan->dev;
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+ 	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
++	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+ 	unsigned long flags;
+ 	int ret;
+ 
+-	NV_INFO(dev, "%s: freeing fifo %d\n", __func__, chan->id);
++	/* unlock the channel */
++	mutex_unlock(&chan->mutex);
++
++	/* decrement the refcount, and we're done if there's still refs */
++	if (likely(!atomic_dec_and_test(&chan->refcount))) {
++		*pchan = NULL;
++		return;
++	}
+ 
++	/* noone wants the channel anymore */
++	NV_DEBUG(dev, "freeing channel %d\n", chan->id);
+ 	nouveau_debugfs_channel_fini(chan);
++	*pchan = NULL;
+ 
+-	/* Give outstanding push buffers a chance to complete */
++	/* give it chance to idle */
+ 	nouveau_fence_update(chan);
+ 	if (chan->fence.sequence != chan->fence.sequence_ack) {
+ 		struct nouveau_fence *fence = NULL;
+@@ -272,13 +304,13 @@
+ 			NV_ERROR(dev, "Failed to idle channel %d.\n", chan->id);
+ 	}
+ 
+-	/* Ensure all outstanding fences are signaled.  They should be if the
++	/* ensure all outstanding fences are signaled.  they should be if the
+ 	 * above attempts at idling were OK, but if we failed this'll tell TTM
+ 	 * we're done with the buffers.
+ 	 */
+-	nouveau_fence_fini(chan);
++	nouveau_fence_channel_fini(chan);
+ 
+-	/* This will prevent pfifo from switching channels. */
++	/* boot it off the hardware */
+ 	pfifo->reassign(dev, false);
+ 
+ 	/* We want to give pgraph a chance to idle and get rid of all potential
+@@ -307,9 +339,17 @@
+ 
+ 	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+ 
+-	/* Release the channel's resources */
+-	nouveau_gpuobj_ref_del(dev, &chan->pushbuf);
++	/* aside from its resources, the channel should now be dead,
++	 * remove it from the channel list
++	 */
++	spin_lock_irqsave(&dev_priv->channels.lock, flags);
++	dev_priv->channels.ptr[chan->id] = NULL;
++	spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
++
++	/* destroy any resources the channel owned */
++	nouveau_gpuobj_ref(NULL, &chan->pushbuf);
+ 	if (chan->pushbuf_bo) {
++		nouveau_bo_unmap(chan->pushbuf_bo);
+ 		nouveau_bo_unpin(chan->pushbuf_bo);
+ 		nouveau_bo_ref(NULL, &chan->pushbuf_bo);
+ 	}
+@@ -318,7 +358,6 @@
+ 	if (chan->user)
+ 		iounmap(chan->user);
+ 
+-	dev_priv->fifos[chan->id] = NULL;
+ 	kfree(chan);
+ }
+ 
+@@ -328,31 +367,20 @@
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_engine *engine = &dev_priv->engine;
++	struct nouveau_channel *chan;
+ 	int i;
+ 
+ 	NV_DEBUG(dev, "clearing FIFO enables from file_priv\n");
+ 	for (i = 0; i < engine->fifo.channels; i++) {
+-		struct nouveau_channel *chan = dev_priv->fifos[i];
++		chan = nouveau_channel_get(dev, file_priv, i);
++		if (IS_ERR(chan))
++			continue;
+ 
+-		if (chan && chan->file_priv == file_priv)
+-			nouveau_channel_free(chan);
++		atomic_dec(&chan->refcount);
++		nouveau_channel_put(&chan);
+ 	}
+ }
+ 
+-int
+-nouveau_channel_owner(struct drm_device *dev, struct drm_file *file_priv,
+-		      int channel)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_engine *engine = &dev_priv->engine;
+-
+-	if (channel >= engine->fifo.channels)
+-		return 0;
+-	if (dev_priv->fifos[channel] == NULL)
+-		return 0;
+-
+-	return (dev_priv->fifos[channel]->file_priv == file_priv);
+-}
+ 
+ /***********************************
+  * ioctls wrapping the functions
+@@ -400,24 +428,26 @@
+ 	/* Named memory object area */
+ 	ret = drm_gem_handle_create(file_priv, chan->notifier_bo->gem,
+ 				    &init->notifier_handle);
+-	if (ret) {
+-		nouveau_channel_free(chan);
+-		return ret;
+-	}
+ 
+-	return 0;
++	if (ret == 0)
++		atomic_inc(&chan->refcount); /* userspace reference */
++	nouveau_channel_put(&chan);
++	return ret;
+ }
+ 
+ static int
+ nouveau_ioctl_fifo_free(struct drm_device *dev, void *data,
+ 			struct drm_file *file_priv)
+ {
+-	struct drm_nouveau_channel_free *cfree = data;
++	struct drm_nouveau_channel_free *req = data;
+ 	struct nouveau_channel *chan;
+ 
+-	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(cfree->channel, file_priv, chan);
++	chan = nouveau_channel_get(dev, file_priv, req->channel);
++	if (IS_ERR(chan))
++		return PTR_ERR(chan);
+ 
+-	nouveau_channel_free(chan);
++	atomic_dec(&chan->refcount);
++	nouveau_channel_put(&chan);
+ 	return 0;
+ }
+ 
+@@ -426,18 +456,18 @@
+  ***********************************/
+ 
+ struct drm_ioctl_desc nouveau_ioctls[] = {
+-	DRM_IOCTL_DEF_DRV(NOUVEAU_GETPARAM, nouveau_ioctl_getparam, DRM_AUTH),
+-	DRM_IOCTL_DEF_DRV(NOUVEAU_SETPARAM, nouveau_ioctl_setparam, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+-	DRM_IOCTL_DEF_DRV(NOUVEAU_CHANNEL_ALLOC, nouveau_ioctl_fifo_alloc, DRM_AUTH),
+-	DRM_IOCTL_DEF_DRV(NOUVEAU_CHANNEL_FREE, nouveau_ioctl_fifo_free, DRM_AUTH),
+-	DRM_IOCTL_DEF_DRV(NOUVEAU_GROBJ_ALLOC, nouveau_ioctl_grobj_alloc, DRM_AUTH),
+-	DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_ioctl_notifier_alloc, DRM_AUTH),
+-	DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_ioctl_gpuobj_free, DRM_AUTH),
+-	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH),
+-	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH),
+-	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH),
+-	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH),
+-	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH),
++	DRM_IOCTL_DEF_DRV(NOUVEAU_GETPARAM, nouveau_ioctl_getparam, DRM_UNLOCKED|DRM_AUTH),
++	DRM_IOCTL_DEF_DRV(NOUVEAU_SETPARAM, nouveau_ioctl_setparam, DRM_UNLOCKED|DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
++	DRM_IOCTL_DEF_DRV(NOUVEAU_CHANNEL_ALLOC, nouveau_ioctl_fifo_alloc, DRM_UNLOCKED|DRM_AUTH),
++	DRM_IOCTL_DEF_DRV(NOUVEAU_CHANNEL_FREE, nouveau_ioctl_fifo_free, DRM_UNLOCKED|DRM_AUTH),
++	DRM_IOCTL_DEF_DRV(NOUVEAU_GROBJ_ALLOC, nouveau_ioctl_grobj_alloc, DRM_UNLOCKED|DRM_AUTH),
++	DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_ioctl_notifier_alloc, DRM_UNLOCKED|DRM_AUTH),
++	DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_ioctl_gpuobj_free, DRM_UNLOCKED|DRM_AUTH),
++	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_UNLOCKED|DRM_AUTH),
++	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_UNLOCKED|DRM_AUTH),
++	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_UNLOCKED|DRM_AUTH),
++	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_UNLOCKED|DRM_AUTH),
++	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_UNLOCKED|DRM_AUTH),
+ };
+ 
+ int nouveau_max_ioctl = DRM_ARRAY_SIZE(nouveau_ioctls);
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_connector.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_connector.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_connector.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_connector.c	2010-10-15 02:04:44.327991785 +0200
+@@ -76,6 +76,22 @@
+ 	return NULL;
+ }
+ 
++/*TODO: This could use improvement, and learn to handle the fixed
++ *      BIOS tables etc.  It's fine currently, for its only user.
++ */
++int
++nouveau_connector_bpp(struct drm_connector *connector)
++{
++	struct nouveau_connector *nv_connector = nouveau_connector(connector);
++
++	if (nv_connector->edid && nv_connector->edid->revision >= 4) {
++		u8 bpc = ((nv_connector->edid->input & 0x70) >> 3) + 4;
++		if (bpc > 4)
++			return bpc;
++	}
++
++	return 18;
++}
+ 
+ static void
+ nouveau_connector_destroy(struct drm_connector *drm_connector)
+@@ -130,6 +146,36 @@
+ 	return NULL;
+ }
+ 
++static struct nouveau_encoder *
++nouveau_connector_of_detect(struct drm_connector *connector)
++{
++#ifdef __powerpc__
++	struct drm_device *dev = connector->dev;
++	struct nouveau_connector *nv_connector = nouveau_connector(connector);
++	struct nouveau_encoder *nv_encoder;
++	struct device_node *cn, *dn = pci_device_to_OF_node(dev->pdev);
++
++	if (!dn ||
++	    !((nv_encoder = find_encoder_by_type(connector, OUTPUT_TMDS)) ||
++	      (nv_encoder = find_encoder_by_type(connector, OUTPUT_ANALOG))))
++		return NULL;
++
++	for_each_child_of_node(dn, cn) {
++		const char *name = of_get_property(cn, "name", NULL);
++		const void *edid = of_get_property(cn, "EDID", NULL);
++		int idx = name ? name[strlen(name) - 1] - 'A' : 0;
++
++		if (nv_encoder->dcb->i2c_index == idx && edid) {
++			nv_connector->edid =
++				kmemdup(edid, EDID_LENGTH, GFP_KERNEL);
++			of_node_put(cn);
++			return nv_encoder;
++		}
++	}
++#endif
++	return NULL;
++}
++
+ static void
+ nouveau_connector_set_encoder(struct drm_connector *connector,
+ 			      struct nouveau_encoder *nv_encoder)
+@@ -225,6 +271,12 @@
+ 		return connector_status_connected;
+ 	}
+ 
++	nv_encoder = nouveau_connector_of_detect(connector);
++	if (nv_encoder) {
++		nouveau_connector_set_encoder(connector, nv_encoder);
++		return connector_status_connected;
++	}
++
+ detect_analog:
+ 	nv_encoder = find_encoder_by_type(connector, OUTPUT_ANALOG);
+ 	if (!nv_encoder && !nouveau_tv_disable)
+@@ -589,11 +641,28 @@
+ 	return ret;
+ }
+ 
++static unsigned
++get_tmds_link_bandwidth(struct drm_connector *connector)
++{
++	struct nouveau_connector *nv_connector = nouveau_connector(connector);
++	struct drm_nouveau_private *dev_priv = connector->dev->dev_private;
++	struct dcb_entry *dcb = nv_connector->detected_encoder->dcb;
++
++	if (dcb->location != DCB_LOC_ON_CHIP ||
++	    dev_priv->chipset >= 0x46)
++		return 165000;
++	else if (dev_priv->chipset >= 0x40)
++		return 155000;
++	else if (dev_priv->chipset >= 0x18)
++		return 135000;
++	else
++		return 112000;
++}
++
+ static int
+ nouveau_connector_mode_valid(struct drm_connector *connector,
+ 			     struct drm_display_mode *mode)
+ {
+-	struct drm_nouveau_private *dev_priv = connector->dev->dev_private;
+ 	struct nouveau_connector *nv_connector = nouveau_connector(connector);
+ 	struct nouveau_encoder *nv_encoder = nv_connector->detected_encoder;
+ 	struct drm_encoder *encoder = to_drm_encoder(nv_encoder);
+@@ -611,11 +680,9 @@
+ 		max_clock = 400000;
+ 		break;
+ 	case OUTPUT_TMDS:
+-		if ((dev_priv->card_type >= NV_50 && !nouveau_duallink) ||
+-		    !nv_encoder->dcb->duallink_possible)
+-			max_clock = 165000;
+-		else
+-			max_clock = 330000;
++		max_clock = get_tmds_link_bandwidth(connector);
++		if (nouveau_duallink && nv_encoder->dcb->duallink_possible)
++			max_clock *= 2;
+ 		break;
+ 	case OUTPUT_ANALOG:
+ 		max_clock = nv_encoder->dcb->crtconf.maxfreq;
+@@ -630,7 +697,7 @@
+ 		else
+ 			max_clock = nv_encoder->dp.link_nr * 162000;
+ 
+-		clock *= 3;
++		clock = clock * nouveau_connector_bpp(connector) / 8;
+ 		break;
+ 	default:
+ 		BUG_ON(1);
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_connector.h linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_connector.h
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_connector.h	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_connector.h	2010-10-15 02:04:44.329991810 +0200
+@@ -55,4 +55,7 @@
+ void
+ nouveau_connector_set_polling(struct drm_connector *);
+ 
++int
++nouveau_connector_bpp(struct drm_connector *);
++
+ #endif /* __NOUVEAU_CONNECTOR_H__ */
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_debugfs.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_debugfs.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_debugfs.c	2010-10-15 02:04:44.330991823 +0200
+@@ -157,7 +157,23 @@
+ 	return 0;
+ }
+ 
++static int
++nouveau_debugfs_evict_vram(struct seq_file *m, void *data)
++{
++	struct drm_info_node *node = (struct drm_info_node *) m->private;
++	struct drm_nouveau_private *dev_priv = node->minor->dev->dev_private;
++	int ret;
++
++	ret = ttm_bo_evict_mm(&dev_priv->ttm.bdev, TTM_PL_VRAM);
++	if (ret)
++		seq_printf(m, "failed: %d", ret);
++	else
++		seq_printf(m, "succeeded\n");
++	return 0;
++}
++
+ static struct drm_info_list nouveau_debugfs_list[] = {
++	{ "evict_vram", nouveau_debugfs_evict_vram, 0, NULL },
+ 	{ "chipset", nouveau_debugfs_chipset_info, 0, NULL },
+ 	{ "memory", nouveau_debugfs_memory_info, 0, NULL },
+ 	{ "vbios.rom", nouveau_debugfs_vbios_image, 0, NULL },
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_dma.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_dma.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_dma.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_dma.c	2010-10-15 02:04:44.331991836 +0200
+@@ -28,6 +28,7 @@
+ #include "drm.h"
+ #include "nouveau_drv.h"
+ #include "nouveau_dma.h"
++#include "nouveau_ramht.h"
+ 
+ void
+ nouveau_dma_pre_init(struct nouveau_channel *chan)
+@@ -58,26 +59,17 @@
+ {
+ 	struct drm_device *dev = chan->dev;
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_gpuobj *m2mf = NULL;
+-	struct nouveau_gpuobj *nvsw = NULL;
++	struct nouveau_gpuobj *obj = NULL;
+ 	int ret, i;
+ 
+ 	/* Create NV_MEMORY_TO_MEMORY_FORMAT for buffer moves */
+ 	ret = nouveau_gpuobj_gr_new(chan, dev_priv->card_type < NV_50 ?
+-				    0x0039 : 0x5039, &m2mf);
++				    0x0039 : 0x5039, &obj);
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, chan, NvM2MF, m2mf, NULL);
+-	if (ret)
+-		return ret;
+-
+-	/* Create an NV_SW object for various sync purposes */
+-	ret = nouveau_gpuobj_sw_new(chan, NV_SW, &nvsw);
+-	if (ret)
+-		return ret;
+-
+-	ret = nouveau_gpuobj_ref_add(dev, chan, NvSw, nvsw, NULL);
++	ret = nouveau_ramht_insert(chan, NvM2MF, obj);
++	nouveau_gpuobj_ref(NULL, &obj);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -91,11 +83,6 @@
+ 	if (ret)
+ 		return ret;
+ 
+-	/* Map M2MF notifier object - fbcon. */
+-	ret = nouveau_bo_map(chan->notifier_bo);
+-	if (ret)
+-		return ret;
+-
+ 	/* Insert NOPS for NOUVEAU_DMA_SKIPS */
+ 	ret = RING_SPACE(chan, NOUVEAU_DMA_SKIPS);
+ 	if (ret)
+@@ -113,13 +100,6 @@
+ 	BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1);
+ 	OUT_RING(chan, NvNotify0);
+ 
+-	/* Initialise NV_SW */
+-	ret = RING_SPACE(chan, 2);
+-	if (ret)
+-		return ret;
+-	BEGIN_RING(chan, NvSubSw, 0, 1);
+-	OUT_RING(chan, NvSw);
+-
+ 	/* Sit back and pray the channel works.. */
+ 	FIRE_RING(chan);
+ 
+@@ -217,7 +197,7 @@
+ 
+ 		chan->dma.ib_free = get - chan->dma.ib_put;
+ 		if (chan->dma.ib_free <= 0)
+-			chan->dma.ib_free += chan->dma.ib_max + 1;
++			chan->dma.ib_free += chan->dma.ib_max;
+ 	}
+ 
+ 	return 0;
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_dma.h linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_dma.h
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_dma.h	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_dma.h	2010-10-15 02:04:44.333991860 +0200
+@@ -72,6 +72,7 @@
+ 	NvGdiRect	= 0x8000000c,
+ 	NvImageBlit	= 0x8000000d,
+ 	NvSw		= 0x8000000e,
++	NvSema		= 0x8000000f,
+ 
+ 	/* G80+ display objects */
+ 	NvEvoVRAM	= 0x01000000,
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_dp.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_dp.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_dp.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_dp.c	2010-10-15 02:04:44.334991872 +0200
+@@ -317,7 +317,8 @@
+ 		return false;
+ 
+ 	config[0] = nv_encoder->dp.link_nr;
+-	if (nv_encoder->dp.dpcd_version >= 0x11)
++	if (nv_encoder->dp.dpcd_version >= 0x11 &&
++	    nv_encoder->dp.enhanced_frame)
+ 		config[0] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+ 
+ 	ret = nouveau_dp_lane_count_set(encoder, config[0]);
+@@ -468,10 +469,12 @@
+ 	    !nv_encoder->dcb->dpconf.link_bw)
+ 		nv_encoder->dp.link_bw = DP_LINK_BW_1_62;
+ 
+-	nv_encoder->dp.link_nr = dpcd[2] & 0xf;
++	nv_encoder->dp.link_nr = dpcd[2] & DP_MAX_LANE_COUNT_MASK;
+ 	if (nv_encoder->dp.link_nr > nv_encoder->dcb->dpconf.link_nr)
+ 		nv_encoder->dp.link_nr = nv_encoder->dcb->dpconf.link_nr;
+ 
++	nv_encoder->dp.enhanced_frame = (dpcd[2] & DP_ENHANCED_FRAME_CAP);
++
+ 	return true;
+ }
+ 
+@@ -524,7 +527,8 @@
+ 		nv_wr32(dev, NV50_AUXCH_CTRL(index), ctrl | 0x80000000);
+ 		nv_wr32(dev, NV50_AUXCH_CTRL(index), ctrl);
+ 		nv_wr32(dev, NV50_AUXCH_CTRL(index), ctrl | 0x00010000);
+-		if (!nv_wait(NV50_AUXCH_CTRL(index), 0x00010000, 0x00000000)) {
++		if (!nv_wait(dev, NV50_AUXCH_CTRL(index),
++			     0x00010000, 0x00000000)) {
+ 			NV_ERROR(dev, "expected bit 16 == 0, got 0x%08x\n",
+ 				 nv_rd32(dev, NV50_AUXCH_CTRL(index)));
+ 			ret = -EBUSY;
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_drv.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_drv.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_drv.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_drv.c	2010-10-15 02:04:44.336991898 +0200
+@@ -31,13 +31,14 @@
+ #include "nouveau_hw.h"
+ #include "nouveau_fb.h"
+ #include "nouveau_fbcon.h"
++#include "nouveau_pm.h"
+ #include "nv50_display.h"
+ 
+ #include "drm_pciids.h"
+ 
+-MODULE_PARM_DESC(noagp, "Disable AGP");
+-int nouveau_noagp;
+-module_param_named(noagp, nouveau_noagp, int, 0400);
++MODULE_PARM_DESC(agpmode, "AGP mode (0 to disable AGP)");
++int nouveau_agpmode = -1;
++module_param_named(agpmode, nouveau_agpmode, int, 0400);
+ 
+ MODULE_PARM_DESC(modeset, "Enable kernel modesetting");
+ static int nouveau_modeset = -1; /* kms */
+@@ -79,6 +80,10 @@
+ int nouveau_nofbaccel = 0;
+ module_param_named(nofbaccel, nouveau_nofbaccel, int, 0400);
+ 
++MODULE_PARM_DESC(force_post, "Force POST");
++int nouveau_force_post = 0;
++module_param_named(force_post, nouveau_force_post, int, 0400);
++
+ MODULE_PARM_DESC(override_conntype, "Ignore DCB connector type");
+ int nouveau_override_conntype = 0;
+ module_param_named(override_conntype, nouveau_override_conntype, int, 0400);
+@@ -102,6 +107,14 @@
+ int nouveau_reg_debug;
+ module_param_named(reg_debug, nouveau_reg_debug, int, 0600);
+ 
++MODULE_PARM_DESC(perflvl, "Performance level (default: boot)\n");
++char *nouveau_perflvl;
++module_param_named(perflvl, nouveau_perflvl, charp, 0400);
++
++MODULE_PARM_DESC(perflvl_wr, "Allow perflvl changes (warning: dangerous!)\n");
++int nouveau_perflvl_wr;
++module_param_named(perflvl_wr, nouveau_perflvl_wr, int, 0400);
++
+ int nouveau_fbpercrtc;
+ #if 0
+ module_param_named(fbpercrtc, nouveau_fbpercrtc, int, 0400);
+@@ -182,9 +195,8 @@
+ 	for (i = 0; i < pfifo->channels; i++) {
+ 		struct nouveau_fence *fence = NULL;
+ 
+-		chan = dev_priv->fifos[i];
+-		if (!chan || (dev_priv->card_type >= NV_50 &&
+-			      chan == dev_priv->fifos[0]))
++		chan = dev_priv->channels.ptr[i];
++		if (!chan || !chan->pushbuf_bo)
+ 			continue;
+ 
+ 		ret = nouveau_fence_new(chan, &fence, true);
+@@ -271,6 +283,8 @@
+ 	if (ret)
+ 		return ret;
+ 
++	nouveau_pm_resume(dev);
++
+ 	if (dev_priv->gart_info.type == NOUVEAU_GART_AGP) {
+ 		ret = nouveau_mem_init_agp(dev);
+ 		if (ret) {
+@@ -298,7 +312,7 @@
+ 		int j;
+ 
+ 		for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+-			chan = dev_priv->fifos[i];
++			chan = dev_priv->channels.ptr[i];
+ 			if (!chan || !chan->pushbuf_bo)
+ 				continue;
+ 
+@@ -434,6 +448,12 @@
+ 	if (!nouveau_modeset)
+ 		return 0;
+ 
++#if defined(CONFIG_FRAMEBUFFER_CONSOLE_MODULE)
++	request_module("fbcon");
++#elif !defined(CONFIG_FRAMEBUFFER_CONSOLE)
++	printk(KERN_INFO "CONFIG_FRAMEBUFFER_CONSOLE was not enabled. You won't get any console output.\n");
++#endif
++
+ 	nouveau_register_dsm_handler();
+ 	return drm_init(&driver);
+ }
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_drv.h linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_drv.h
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_drv.h	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_drv.h	2010-10-15 02:04:44.340991947 +0200
+@@ -96,10 +96,12 @@
+ 	struct nouveau_tile_reg *tile;
+ 
+ 	struct drm_gem_object *gem;
+-	struct drm_file *cpu_filp;
+ 	int pin_refcnt;
+ };
+ 
++#define nouveau_bo_tile_layout(nvbo)				\
++	((nvbo)->tile_flags & NOUVEAU_GEM_TILE_LAYOUT_MASK)
++
+ static inline struct nouveau_bo *
+ nouveau_bo(struct ttm_buffer_object *bo)
+ {
+@@ -133,22 +135,24 @@
+ #define NVOBJ_ENGINE_DISPLAY	2
+ #define NVOBJ_ENGINE_INT	0xdeadbeef
+ 
+-#define NVOBJ_FLAG_ALLOW_NO_REFS	(1 << 0)
+ #define NVOBJ_FLAG_ZERO_ALLOC		(1 << 1)
+ #define NVOBJ_FLAG_ZERO_FREE		(1 << 2)
+-#define NVOBJ_FLAG_FAKE			(1 << 3)
+ struct nouveau_gpuobj {
++	struct drm_device *dev;
++	struct kref refcount;
+ 	struct list_head list;
+ 
+-	struct nouveau_channel *im_channel;
+ 	struct drm_mm_node *im_pramin;
+ 	struct nouveau_bo *im_backing;
+-	uint32_t im_backing_start;
+ 	uint32_t *im_backing_suspend;
+ 	int im_bound;
+ 
+ 	uint32_t flags;
+-	int refcount;
++
++	u32 size;
++	u32 pinst;
++	u32 cinst;
++	u64 vinst;
+ 
+ 	uint32_t engine;
+ 	uint32_t class;
+@@ -157,20 +161,13 @@
+ 	void *priv;
+ };
+ 
+-struct nouveau_gpuobj_ref {
+-	struct list_head list;
+-
+-	struct nouveau_gpuobj *gpuobj;
+-	uint32_t instance;
+-
+-	struct nouveau_channel *channel;
+-	int handle;
+-};
+-
+ struct nouveau_channel {
+ 	struct drm_device *dev;
+ 	int id;
+ 
++	atomic_t refcount;
++	struct mutex mutex;
++
+ 	/* owner of this fifo */
+ 	struct drm_file *file_priv;
+ 	/* mapping of the fifo itself */
+@@ -192,33 +189,32 @@
+ 	} fence;
+ 
+ 	/* DMA push buffer */
+-	struct nouveau_gpuobj_ref *pushbuf;
+-	struct nouveau_bo         *pushbuf_bo;
+-	uint32_t                   pushbuf_base;
++	struct nouveau_gpuobj *pushbuf;
++	struct nouveau_bo     *pushbuf_bo;
++	uint32_t               pushbuf_base;
+ 
+ 	/* Notifier memory */
+ 	struct nouveau_bo *notifier_bo;
+ 	struct drm_mm notifier_heap;
+ 
+ 	/* PFIFO context */
+-	struct nouveau_gpuobj_ref *ramfc;
+-	struct nouveau_gpuobj_ref *cache;
++	struct nouveau_gpuobj *ramfc;
++	struct nouveau_gpuobj *cache;
+ 
+ 	/* PGRAPH context */
+ 	/* XXX may be merge 2 pointers as private data ??? */
+-	struct nouveau_gpuobj_ref *ramin_grctx;
++	struct nouveau_gpuobj *ramin_grctx;
+ 	void *pgraph_ctx;
+ 
+ 	/* NV50 VM */
+-	struct nouveau_gpuobj     *vm_pd;
+-	struct nouveau_gpuobj_ref *vm_gart_pt;
+-	struct nouveau_gpuobj_ref *vm_vram_pt[NV50_VM_VRAM_NR];
++	struct nouveau_gpuobj *vm_pd;
++	struct nouveau_gpuobj *vm_gart_pt;
++	struct nouveau_gpuobj *vm_vram_pt[NV50_VM_VRAM_NR];
+ 
+ 	/* Objects */
+-	struct nouveau_gpuobj_ref *ramin; /* Private instmem */
+-	struct drm_mm              ramin_heap; /* Private PRAMIN heap */
+-	struct nouveau_gpuobj_ref *ramht; /* Hash table */
+-	struct list_head           ramht_refs; /* Objects referenced by RAMHT */
++	struct nouveau_gpuobj *ramin; /* Private instmem */
++	struct drm_mm          ramin_heap; /* Private PRAMIN heap */
++	struct nouveau_ramht  *ramht; /* Hash table */
+ 
+ 	/* GPU object info for stuff used in-kernel (mm_enabled) */
+ 	uint32_t m2mf_ntfy;
+@@ -296,7 +292,7 @@
+ struct nouveau_fifo_engine {
+ 	int  channels;
+ 
+-	struct nouveau_gpuobj_ref *playlist[2];
++	struct nouveau_gpuobj *playlist[2];
+ 	int cur_playlist;
+ 
+ 	int  (*init)(struct drm_device *);
+@@ -305,7 +301,6 @@
+ 	void (*disable)(struct drm_device *);
+ 	void (*enable)(struct drm_device *);
+ 	bool (*reassign)(struct drm_device *, bool enable);
+-	bool (*cache_flush)(struct drm_device *dev);
+ 	bool (*cache_pull)(struct drm_device *dev, bool enable);
+ 
+ 	int  (*channel_id)(struct drm_device *);
+@@ -334,7 +329,7 @@
+ 	int grctx_size;
+ 
+ 	/* NV2x/NV3x context table (0x400780) */
+-	struct nouveau_gpuobj_ref *ctx_table;
++	struct nouveau_gpuobj *ctx_table;
+ 
+ 	int  (*init)(struct drm_device *);
+ 	void (*takedown)(struct drm_device *);
+@@ -369,6 +364,92 @@
+ 	void (*irq_enable)(struct drm_device *, enum dcb_gpio_tag, bool on);
+ };
+ 
++struct nouveau_pm_voltage_level {
++	u8 voltage;
++	u8 vid;
++};
++
++struct nouveau_pm_voltage {
++	bool supported;
++	u8 vid_mask;
++
++	struct nouveau_pm_voltage_level *level;
++	int nr_level;
++};
++
++#define NOUVEAU_PM_MAX_LEVEL 8
++struct nouveau_pm_level {
++	struct device_attribute dev_attr;
++	char name[32];
++	int id;
++
++	u32 core;
++	u32 memory;
++	u32 shader;
++	u32 unk05;
++
++	u8 voltage;
++	u8 fanspeed;
++
++	u16 memscript;
++};
++
++struct nouveau_pm_temp_sensor_constants {
++	u16 offset_constant;
++	s16 offset_mult;
++	u16 offset_div;
++	u16 slope_mult;
++	u16 slope_div;
++};
++
++struct nouveau_pm_threshold_temp {
++	s16 critical;
++	s16 down_clock;
++	s16 fan_boost;
++};
++
++struct nouveau_pm_memtiming {
++	u32 reg_100220;
++	u32 reg_100224;
++	u32 reg_100228;
++	u32 reg_10022c;
++	u32 reg_100230;
++	u32 reg_100234;
++	u32 reg_100238;
++	u32 reg_10023c;
++};
++
++struct nouveau_pm_memtimings {
++	bool supported;
++	struct nouveau_pm_memtiming *timing;
++	int nr_timing;
++};
++
++struct nouveau_pm_engine {
++	struct nouveau_pm_voltage voltage;
++	struct nouveau_pm_level perflvl[NOUVEAU_PM_MAX_LEVEL];
++	int nr_perflvl;
++	struct nouveau_pm_memtimings memtimings;
++	struct nouveau_pm_temp_sensor_constants sensor_constants;
++	struct nouveau_pm_threshold_temp threshold_temp;
++
++	struct nouveau_pm_level boot;
++	struct nouveau_pm_level *cur;
++
++	struct device *hwmon;
++	struct notifier_block acpi_nb;
++
++	int (*clock_get)(struct drm_device *, u32 id);
++	void *(*clock_pre)(struct drm_device *, struct nouveau_pm_level *,
++			   u32 id, int khz);
++	void (*clock_set)(struct drm_device *, void *);
++	int (*voltage_get)(struct drm_device *);
++	int (*voltage_set)(struct drm_device *, int voltage);
++	int (*fanspeed_get)(struct drm_device *);
++	int (*fanspeed_set)(struct drm_device *, int fanspeed);
++	int (*temp_get)(struct drm_device *);
++};
++
+ struct nouveau_engine {
+ 	struct nouveau_instmem_engine instmem;
+ 	struct nouveau_mc_engine      mc;
+@@ -378,6 +459,7 @@
+ 	struct nouveau_fifo_engine    fifo;
+ 	struct nouveau_display_engine display;
+ 	struct nouveau_gpio_engine    gpio;
++	struct nouveau_pm_engine      pm;
+ };
+ 
+ struct nouveau_pll_vals {
+@@ -409,13 +491,13 @@
+ };
+ 
+ struct nv04_crtc_reg {
+-	unsigned char MiscOutReg;     /* */
++	unsigned char MiscOutReg;
+ 	uint8_t CRTC[0xa0];
+ 	uint8_t CR58[0x10];
+ 	uint8_t Sequencer[5];
+ 	uint8_t Graphics[9];
+ 	uint8_t Attribute[21];
+-	unsigned char DAC[768];       /* Internal Colorlookuptable */
++	unsigned char DAC[768];
+ 
+ 	/* PCRTC regs */
+ 	uint32_t fb_start;
+@@ -463,43 +545,9 @@
+ };
+ 
+ struct nv04_mode_state {
+-	uint32_t bpp;
+-	uint32_t width;
+-	uint32_t height;
+-	uint32_t interlace;
+-	uint32_t repaint0;
+-	uint32_t repaint1;
+-	uint32_t screen;
+-	uint32_t scale;
+-	uint32_t dither;
+-	uint32_t extra;
+-	uint32_t fifo;
+-	uint32_t pixel;
+-	uint32_t horiz;
+-	int arbitration0;
+-	int arbitration1;
+-	uint32_t pll;
+-	uint32_t pllB;
+-	uint32_t vpll;
+-	uint32_t vpll2;
+-	uint32_t vpllB;
+-	uint32_t vpll2B;
++	struct nv04_crtc_reg crtc_reg[2];
+ 	uint32_t pllsel;
+ 	uint32_t sel_clk;
+-	uint32_t general;
+-	uint32_t crtcOwner;
+-	uint32_t head;
+-	uint32_t head2;
+-	uint32_t cursorConfig;
+-	uint32_t cursor0;
+-	uint32_t cursor1;
+-	uint32_t cursor2;
+-	uint32_t timingH;
+-	uint32_t timingV;
+-	uint32_t displayV;
+-	uint32_t crtcSync;
+-
+-	struct nv04_crtc_reg crtc_reg[2];
+ };
+ 
+ enum nouveau_card_type {
+@@ -522,8 +570,14 @@
+ 	int flags;
+ 
+ 	void __iomem *mmio;
++
++	spinlock_t ramin_lock;
+ 	void __iomem *ramin;
+-	uint32_t ramin_size;
++	u32 ramin_size;
++	u32 ramin_base;
++	bool ramin_available;
++	struct drm_mm ramin_heap;
++	struct list_head gpuobj_list;
+ 
+ 	struct nouveau_bo *vga_ram;
+ 
+@@ -540,8 +594,16 @@
+ 		atomic_t validate_sequence;
+ 	} ttm;
+ 
+-	int fifo_alloc_count;
+-	struct nouveau_channel *fifos[NOUVEAU_MAX_CHANNEL_NR];
++	struct {
++		spinlock_t lock;
++		struct drm_mm heap;
++		struct nouveau_bo *bo;
++	} fence;
++
++	struct {
++		spinlock_t lock;
++		struct nouveau_channel *ptr[NOUVEAU_MAX_CHANNEL_NR];
++	} channels;
+ 
+ 	struct nouveau_engine engine;
+ 	struct nouveau_channel *channel;
+@@ -550,15 +612,11 @@
+ 	spinlock_t context_switch_lock;
+ 
+ 	/* RAMIN configuration, RAMFC, RAMHT and RAMRO offsets */
+-	struct nouveau_gpuobj *ramht;
++	struct nouveau_ramht  *ramht;
++	struct nouveau_gpuobj *ramfc;
++	struct nouveau_gpuobj *ramro;
++
+ 	uint32_t ramin_rsvd_vram;
+-	uint32_t ramht_offset;
+-	uint32_t ramht_size;
+-	uint32_t ramht_bits;
+-	uint32_t ramfc_offset;
+-	uint32_t ramfc_size;
+-	uint32_t ramro_offset;
+-	uint32_t ramro_size;
+ 
+ 	struct {
+ 		enum {
+@@ -576,14 +634,12 @@
+ 	} gart_info;
+ 
+ 	/* nv10-nv40 tiling regions */
+-	struct {
+-		struct nouveau_tile_reg reg[NOUVEAU_MAX_TILE_NR];
+-		spinlock_t lock;
+-	} tile;
++	struct nouveau_tile_reg tile[NOUVEAU_MAX_TILE_NR];
+ 
+ 	/* VRAM/fb configuration */
+ 	uint64_t vram_size;
+ 	uint64_t vram_sys_base;
++	u32 vram_rblock_size;
+ 
+ 	uint64_t fb_phys;
+ 	uint64_t fb_available_size;
+@@ -600,10 +656,6 @@
+ 	struct nouveau_gpuobj *vm_vram_pt[NV50_VM_VRAM_NR];
+ 	int vm_vram_pt_nr;
+ 
+-	struct drm_mm ramin_heap;
+-
+-	struct list_head gpuobj_list;
+-
+ 	struct nvbios vbios;
+ 
+ 	struct nv04_mode_state mode_reg;
+@@ -634,6 +686,12 @@
+ };
+ 
+ static inline struct drm_nouveau_private *
++nouveau_private(struct drm_device *dev)
++{
++	return dev->dev_private;
++}
++
++static inline struct drm_nouveau_private *
+ nouveau_bdev(struct ttm_bo_device *bd)
+ {
+ 	return container_of(bd, struct drm_nouveau_private, ttm.bdev);
+@@ -658,18 +716,8 @@
+ 	return 0;
+ }
+ 
+-#define NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(id, cl, ch) do {    \
+-	struct drm_nouveau_private *nv = dev->dev_private;       \
+-	if (!nouveau_channel_owner(dev, (cl), (id))) {           \
+-		NV_ERROR(dev, "pid %d doesn't own channel %d\n", \
+-			 DRM_CURRENTPID, (id));                  \
+-		return -EPERM;                                   \
+-	}                                                        \
+-	(ch) = nv->fifos[(id)];                                  \
+-} while (0)
+-
+ /* nouveau_drv.c */
+-extern int nouveau_noagp;
++extern int nouveau_agpmode;
+ extern int nouveau_duallink;
+ extern int nouveau_uscript_lvds;
+ extern int nouveau_uscript_tmds;
+@@ -683,7 +731,10 @@
+ extern int nouveau_ignorelid;
+ extern int nouveau_nofbaccel;
+ extern int nouveau_noaccel;
++extern int nouveau_force_post;
+ extern int nouveau_override_conntype;
++extern char *nouveau_perflvl;
++extern int nouveau_perflvl_wr;
+ 
+ extern int nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state);
+ extern int nouveau_pci_resume(struct pci_dev *pdev);
+@@ -704,8 +755,10 @@
+ extern int  nouveau_card_init(struct drm_device *);
+ 
+ /* nouveau_mem.c */
+-extern int  nouveau_mem_detect(struct drm_device *dev);
+-extern int  nouveau_mem_init(struct drm_device *);
++extern int  nouveau_mem_vram_init(struct drm_device *);
++extern void nouveau_mem_vram_fini(struct drm_device *);
++extern int  nouveau_mem_gart_init(struct drm_device *);
++extern void nouveau_mem_gart_fini(struct drm_device *);
+ extern int  nouveau_mem_init_agp(struct drm_device *);
+ extern int  nouveau_mem_reset_agp(struct drm_device *);
+ extern void nouveau_mem_close(struct drm_device *);
+@@ -737,19 +790,18 @@
+ extern struct drm_ioctl_desc nouveau_ioctls[];
+ extern int nouveau_max_ioctl;
+ extern void nouveau_channel_cleanup(struct drm_device *, struct drm_file *);
+-extern int  nouveau_channel_owner(struct drm_device *, struct drm_file *,
+-				  int channel);
+ extern int  nouveau_channel_alloc(struct drm_device *dev,
+ 				  struct nouveau_channel **chan,
+ 				  struct drm_file *file_priv,
+ 				  uint32_t fb_ctxdma, uint32_t tt_ctxdma);
+-extern void nouveau_channel_free(struct nouveau_channel *);
++extern struct nouveau_channel *
++nouveau_channel_get(struct drm_device *, struct drm_file *, int id);
++extern void nouveau_channel_put(struct nouveau_channel **);
+ 
+ /* nouveau_object.c */
+ extern int  nouveau_gpuobj_early_init(struct drm_device *);
+ extern int  nouveau_gpuobj_init(struct drm_device *);
+ extern void nouveau_gpuobj_takedown(struct drm_device *);
+-extern void nouveau_gpuobj_late_takedown(struct drm_device *);
+ extern int  nouveau_gpuobj_suspend(struct drm_device *dev);
+ extern void nouveau_gpuobj_suspend_cleanup(struct drm_device *dev);
+ extern void nouveau_gpuobj_resume(struct drm_device *dev);
+@@ -759,24 +811,11 @@
+ extern int nouveau_gpuobj_new(struct drm_device *, struct nouveau_channel *,
+ 			      uint32_t size, int align, uint32_t flags,
+ 			      struct nouveau_gpuobj **);
+-extern int nouveau_gpuobj_del(struct drm_device *, struct nouveau_gpuobj **);
+-extern int nouveau_gpuobj_ref_add(struct drm_device *, struct nouveau_channel *,
+-				  uint32_t handle, struct nouveau_gpuobj *,
+-				  struct nouveau_gpuobj_ref **);
+-extern int nouveau_gpuobj_ref_del(struct drm_device *,
+-				  struct nouveau_gpuobj_ref **);
+-extern int nouveau_gpuobj_ref_find(struct nouveau_channel *, uint32_t handle,
+-				   struct nouveau_gpuobj_ref **ref_ret);
+-extern int nouveau_gpuobj_new_ref(struct drm_device *,
+-				  struct nouveau_channel *alloc_chan,
+-				  struct nouveau_channel *ref_chan,
+-				  uint32_t handle, uint32_t size, int align,
+-				  uint32_t flags, struct nouveau_gpuobj_ref **);
+-extern int nouveau_gpuobj_new_fake(struct drm_device *,
+-				   uint32_t p_offset, uint32_t b_offset,
+-				   uint32_t size, uint32_t flags,
+-				   struct nouveau_gpuobj **,
+-				   struct nouveau_gpuobj_ref**);
++extern void nouveau_gpuobj_ref(struct nouveau_gpuobj *,
++			       struct nouveau_gpuobj **);
++extern int nouveau_gpuobj_new_fake(struct drm_device *, u32 pinst, u64 vinst,
++				   u32 size, u32 flags,
++				   struct nouveau_gpuobj **);
+ extern int nouveau_gpuobj_dma_new(struct nouveau_channel *, int class,
+ 				  uint64_t offset, uint64_t size, int access,
+ 				  int target, struct nouveau_gpuobj **);
+@@ -879,6 +918,7 @@
+ 						      enum dcb_gpio_tag);
+ extern struct dcb_connector_table_entry *
+ nouveau_bios_connector_entry(struct drm_device *, int index);
++extern u32 get_pll_register(struct drm_device *, enum pll_types);
+ extern int get_pll_limits(struct drm_device *, uint32_t limit_match,
+ 			  struct pll_lims *);
+ extern int nouveau_bios_run_display_table(struct drm_device *,
+@@ -925,6 +965,10 @@
+ extern void nv40_fb_takedown(struct drm_device *);
+ extern void nv40_fb_set_region_tiling(struct drm_device *, int, uint32_t,
+ 				      uint32_t, uint32_t);
++/* nv50_fb.c */
++extern int  nv50_fb_init(struct drm_device *);
++extern void nv50_fb_takedown(struct drm_device *);
++extern void nv50_fb_vm_trap(struct drm_device *, int display, const char *);
+ 
+ /* nv50_fb.c */
+ extern int  nv50_fb_init(struct drm_device *);
+@@ -939,7 +983,6 @@
+ extern void nv04_fifo_disable(struct drm_device *);
+ extern void nv04_fifo_enable(struct drm_device *);
+ extern bool nv04_fifo_reassign(struct drm_device *, bool);
+-extern bool nv04_fifo_cache_flush(struct drm_device *);
+ extern bool nv04_fifo_cache_pull(struct drm_device *, bool);
+ extern int  nv04_fifo_channel_id(struct drm_device *);
+ extern int  nv04_fifo_create_context(struct nouveau_channel *);
+@@ -977,7 +1020,6 @@
+ extern void nvc0_fifo_disable(struct drm_device *);
+ extern void nvc0_fifo_enable(struct drm_device *);
+ extern bool nvc0_fifo_reassign(struct drm_device *, bool);
+-extern bool nvc0_fifo_cache_flush(struct drm_device *);
+ extern bool nvc0_fifo_cache_pull(struct drm_device *, bool);
+ extern int  nvc0_fifo_channel_id(struct drm_device *);
+ extern int  nvc0_fifo_create_context(struct nouveau_channel *);
+@@ -1165,19 +1207,24 @@
+ extern void nouveau_bo_wr16(struct nouveau_bo *nvbo, unsigned index, u16 val);
+ extern u32 nouveau_bo_rd32(struct nouveau_bo *nvbo, unsigned index);
+ extern void nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, u32 val);
+-extern int nouveau_bo_sync_gpu(struct nouveau_bo *, struct nouveau_channel *);
+ 
+ /* nouveau_fence.c */
+ struct nouveau_fence;
+-extern int nouveau_fence_init(struct nouveau_channel *);
+-extern void nouveau_fence_fini(struct nouveau_channel *);
++extern int nouveau_fence_init(struct drm_device *);
++extern void nouveau_fence_fini(struct drm_device *);
++extern int nouveau_fence_channel_init(struct nouveau_channel *);
++extern void nouveau_fence_channel_fini(struct nouveau_channel *);
+ extern void nouveau_fence_update(struct nouveau_channel *);
+ extern int nouveau_fence_new(struct nouveau_channel *, struct nouveau_fence **,
+ 			     bool emit);
+ extern int nouveau_fence_emit(struct nouveau_fence *);
++extern void nouveau_fence_work(struct nouveau_fence *fence,
++			       void (*work)(void *priv, bool signalled),
++			       void *priv);
+ struct nouveau_channel *nouveau_fence_channel(struct nouveau_fence *);
+ extern bool nouveau_fence_signalled(void *obj, void *arg);
+ extern int nouveau_fence_wait(void *obj, void *arg, bool lazy, bool intr);
++extern int nouveau_fence_sync(struct nouveau_fence *, struct nouveau_channel *);
+ extern int nouveau_fence_flush(void *obj, void *arg);
+ extern void nouveau_fence_unref(void **obj);
+ extern void *nouveau_fence_ref(void *obj);
+@@ -1255,12 +1302,11 @@
+ 	iowrite32_native(val, dev_priv->mmio + reg);
+ }
+ 
+-static inline void nv_mask(struct drm_device *dev, u32 reg, u32 mask, u32 val)
++static inline u32 nv_mask(struct drm_device *dev, u32 reg, u32 mask, u32 val)
+ {
+ 	u32 tmp = nv_rd32(dev, reg);
+-	tmp &= ~mask;
+-	tmp |= val;
+-	nv_wr32(dev, reg, tmp);
++	nv_wr32(dev, reg, (tmp & ~mask) | val);
++	return tmp;
+ }
+ 
+ static inline u8 nv_rd08(struct drm_device *dev, unsigned reg)
+@@ -1275,7 +1321,7 @@
+ 	iowrite8(val, dev_priv->mmio + reg);
+ }
+ 
+-#define nv_wait(reg, mask, val) \
++#define nv_wait(dev, reg, mask, val) \
+ 	nouveau_wait_until(dev, 2000000000ULL, (reg), (mask), (val))
+ 
+ /* PRAMIN access */
+@@ -1292,17 +1338,8 @@
+ }
+ 
+ /* object access */
+-static inline u32 nv_ro32(struct drm_device *dev, struct nouveau_gpuobj *obj,
+-				unsigned index)
+-{
+-	return nv_ri32(dev, obj->im_pramin->start + index * 4);
+-}
+-
+-static inline void nv_wo32(struct drm_device *dev, struct nouveau_gpuobj *obj,
+-				unsigned index, u32 val)
+-{
+-	nv_wi32(dev, obj->im_pramin->start + index * 4, val);
+-}
++extern u32 nv_ro32(struct nouveau_gpuobj *, u32 offset);
++extern void nv_wo32(struct nouveau_gpuobj *, u32 offset, u32 val);
+ 
+ /*
+  * Logging
+@@ -1403,6 +1440,7 @@
+ #define NV_SW_SEMAPHORE_OFFSET                                       0x00000064
+ #define NV_SW_SEMAPHORE_ACQUIRE                                      0x00000068
+ #define NV_SW_SEMAPHORE_RELEASE                                      0x0000006c
++#define NV_SW_YIELD                                                  0x00000080
+ #define NV_SW_DMA_VBLSEM                                             0x0000018c
+ #define NV_SW_VBLSEM_OFFSET                                          0x00000400
+ #define NV_SW_VBLSEM_RELEASE_VALUE                                   0x00000404
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_encoder.h linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_encoder.h
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_encoder.h	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_encoder.h	2010-10-15 02:04:44.343991985 +0200
+@@ -55,6 +55,7 @@
+ 			int dpcd_version;
+ 			int link_nr;
+ 			int link_bw;
++			bool enhanced_frame;
+ 		} dp;
+ 	};
+ };
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_fbcon.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_fbcon.c	2010-10-15 02:00:52.945120028 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_fbcon.c	2010-10-15 02:04:44.345992009 +0200
+@@ -49,6 +49,96 @@
+ #include "nouveau_fbcon.h"
+ #include "nouveau_dma.h"
+ 
++static void
++nouveau_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
++{
++	struct nouveau_fbdev *nfbdev = info->par;
++	struct drm_device *dev = nfbdev->dev;
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	int ret;
++
++	if (info->state != FBINFO_STATE_RUNNING)
++		return;
++
++	ret = -ENODEV;
++	if (!in_interrupt() && !(info->flags & FBINFO_HWACCEL_DISABLED) &&
++	    mutex_trylock(&dev_priv->channel->mutex)) {
++		if (dev_priv->chipset < NV_50)
++			ret = nv04_fbcon_fillrect(info, rect);
++		else
++		if (dev_priv->chipset < NV_C0)
++			ret = nv50_fbcon_fillrect(info, rect);
++		mutex_unlock(&dev_priv->channel->mutex);
++	}
++
++	if (ret == 0)
++		return;
++
++	if (ret != -ENODEV)
++		nouveau_fbcon_gpu_lockup(info);
++	cfb_fillrect(info, rect);
++}
++
++static void
++nouveau_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *image)
++{
++	struct nouveau_fbdev *nfbdev = info->par;
++	struct drm_device *dev = nfbdev->dev;
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	int ret;
++
++	if (info->state != FBINFO_STATE_RUNNING)
++		return;
++
++	ret = -ENODEV;
++	if (!in_interrupt() && !(info->flags & FBINFO_HWACCEL_DISABLED) &&
++	    mutex_trylock(&dev_priv->channel->mutex)) {
++		if (dev_priv->chipset < NV_50)
++			ret = nv04_fbcon_copyarea(info, image);
++		else
++		if (dev_priv->chipset < NV_C0)
++			ret = nv50_fbcon_copyarea(info, image);
++		mutex_unlock(&dev_priv->channel->mutex);
++	}
++
++	if (ret == 0)
++		return;
++
++	if (ret != -ENODEV)
++		nouveau_fbcon_gpu_lockup(info);
++	cfb_copyarea(info, image);
++}
++
++static void
++nouveau_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
++{
++	struct nouveau_fbdev *nfbdev = info->par;
++	struct drm_device *dev = nfbdev->dev;
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	int ret;
++
++	if (info->state != FBINFO_STATE_RUNNING)
++		return;
++
++	ret = -ENODEV;
++	if (!in_interrupt() && !(info->flags & FBINFO_HWACCEL_DISABLED) &&
++	    mutex_trylock(&dev_priv->channel->mutex)) {
++		if (dev_priv->chipset < NV_50)
++			ret = nv04_fbcon_imageblit(info, image);
++		else
++		if (dev_priv->chipset < NV_C0)
++			ret = nv50_fbcon_imageblit(info, image);
++		mutex_unlock(&dev_priv->channel->mutex);
++	}
++
++	if (ret == 0)
++		return;
++
++	if (ret != -ENODEV)
++		nouveau_fbcon_gpu_lockup(info);
++	cfb_imageblit(info, image);
++}
++
+ static int
+ nouveau_fbcon_sync(struct fb_info *info)
+ {
+@@ -58,12 +148,17 @@
+ 	struct nouveau_channel *chan = dev_priv->channel;
+ 	int ret, i;
+ 
+-	if (!chan || !chan->accel_done ||
++	if (!chan || !chan->accel_done || in_interrupt() ||
+ 	    info->state != FBINFO_STATE_RUNNING ||
+ 	    info->flags & FBINFO_HWACCEL_DISABLED)
+ 		return 0;
+ 
+-	if (RING_SPACE(chan, 4)) {
++	if (!mutex_trylock(&chan->mutex))
++		return 0;
++
++	ret = RING_SPACE(chan, 4);
++	if (ret) {
++		mutex_unlock(&chan->mutex);
+ 		nouveau_fbcon_gpu_lockup(info);
+ 		return 0;
+ 	}
+@@ -74,6 +169,7 @@
+ 	OUT_RING(chan, 0);
+ 	nouveau_bo_wr32(chan->notifier_bo, chan->m2mf_ntfy + 3, 0xffffffff);
+ 	FIRE_RING(chan);
++	mutex_unlock(&chan->mutex);
+ 
+ 	ret = -EBUSY;
+ 	for (i = 0; i < 100000; i++) {
+@@ -97,36 +193,22 @@
+ 	.owner = THIS_MODULE,
+ 	.fb_check_var = drm_fb_helper_check_var,
+ 	.fb_set_par = drm_fb_helper_set_par,
+-	.fb_fillrect = cfb_fillrect,
+-	.fb_copyarea = cfb_copyarea,
+-	.fb_imageblit = cfb_imageblit,
++	.fb_fillrect = nouveau_fbcon_fillrect,
++	.fb_copyarea = nouveau_fbcon_copyarea,
++	.fb_imageblit = nouveau_fbcon_imageblit,
+ 	.fb_sync = nouveau_fbcon_sync,
+ 	.fb_pan_display = drm_fb_helper_pan_display,
+ 	.fb_blank = drm_fb_helper_blank,
+ 	.fb_setcmap = drm_fb_helper_setcmap,
+ };
+ 
+-static struct fb_ops nv04_fbcon_ops = {
++static struct fb_ops nouveau_fbcon_sw_ops = {
+ 	.owner = THIS_MODULE,
+ 	.fb_check_var = drm_fb_helper_check_var,
+ 	.fb_set_par = drm_fb_helper_set_par,
+-	.fb_fillrect = nv04_fbcon_fillrect,
+-	.fb_copyarea = nv04_fbcon_copyarea,
+-	.fb_imageblit = nv04_fbcon_imageblit,
+-	.fb_sync = nouveau_fbcon_sync,
+-	.fb_pan_display = drm_fb_helper_pan_display,
+-	.fb_blank = drm_fb_helper_blank,
+-	.fb_setcmap = drm_fb_helper_setcmap,
+-};
+-
+-static struct fb_ops nv50_fbcon_ops = {
+-	.owner = THIS_MODULE,
+-	.fb_check_var = drm_fb_helper_check_var,
+-	.fb_set_par = drm_fb_helper_set_par,
+-	.fb_fillrect = nv50_fbcon_fillrect,
+-	.fb_copyarea = nv50_fbcon_copyarea,
+-	.fb_imageblit = nv50_fbcon_imageblit,
+-	.fb_sync = nouveau_fbcon_sync,
++	.fb_fillrect = cfb_fillrect,
++	.fb_copyarea = cfb_copyarea,
++	.fb_imageblit = cfb_imageblit,
+ 	.fb_pan_display = drm_fb_helper_pan_display,
+ 	.fb_blank = drm_fb_helper_blank,
+ 	.fb_setcmap = drm_fb_helper_setcmap,
+@@ -251,7 +333,7 @@
+ 			      FBINFO_HWACCEL_FILLRECT |
+ 			      FBINFO_HWACCEL_IMAGEBLIT;
+ 	info->flags |= FBINFO_CAN_FORCE_OUTPUT;
+-	info->fbops = &nouveau_fbcon_ops;
++	info->fbops = &nouveau_fbcon_sw_ops;
+ 	info->fix.smem_start = dev->mode_config.fb_base + nvbo->bo.offset -
+ 			       dev_priv->vm_vram_base;
+ 	info->fix.smem_len = size;
+@@ -279,19 +361,18 @@
+ 	info->pixmap.flags = FB_PIXMAP_SYSTEM;
+ 	info->pixmap.scan_align = 1;
+ 
++	mutex_unlock(&dev->struct_mutex);
++
+ 	if (dev_priv->channel && !nouveau_nofbaccel) {
+-		switch (dev_priv->card_type) {
+-		case NV_C0:
+-			break;
+-		case NV_50:
+-			nv50_fbcon_accel_init(info);
+-			info->fbops = &nv50_fbcon_ops;
+-			break;
+-		default:
+-			nv04_fbcon_accel_init(info);
+-			info->fbops = &nv04_fbcon_ops;
+-			break;
+-		};
++		ret = -ENODEV;
++		if (dev_priv->card_type < NV_50)
++			ret = nv04_fbcon_accel_init(info);
++		else
++		if (dev_priv->card_type < NV_C0)
++			ret = nv50_fbcon_accel_init(info);
++
++		if (ret == 0)
++			info->fbops = &nouveau_fbcon_ops;
+ 	}
+ 
+ 	nouveau_fbcon_zfill(dev, nfbdev);
+@@ -302,7 +383,6 @@
+ 						nouveau_fb->base.height,
+ 						nvbo->bo.offset, nvbo);
+ 
+-	mutex_unlock(&dev->struct_mutex);
+ 	vga_switcheroo_client_fb_set(dev->pdev, info);
+ 	return 0;
+ 
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_fbcon.h linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_fbcon.h
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_fbcon.h	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_fbcon.h	2010-10-15 02:04:44.348992046 +0200
+@@ -40,13 +40,13 @@
+ 
+ void nouveau_fbcon_restore(void);
+ 
+-void nv04_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region);
+-void nv04_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect);
+-void nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image);
++int nv04_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region);
++int nv04_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect);
++int nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image);
+ int nv04_fbcon_accel_init(struct fb_info *info);
+-void nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect);
+-void nv50_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region);
+-void nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image);
++int nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect);
++int nv50_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region);
++int nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image);
+ int nv50_fbcon_accel_init(struct fb_info *info);
+ 
+ void nouveau_fbcon_gpu_lockup(struct fb_info *info);
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_fence.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_fence.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_fence.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_fence.c	2010-10-15 02:04:44.350992072 +0200
+@@ -28,9 +28,11 @@
+ #include "drm.h"
+ 
+ #include "nouveau_drv.h"
++#include "nouveau_ramht.h"
+ #include "nouveau_dma.h"
+ 
+-#define USE_REFCNT (dev_priv->card_type >= NV_10)
++#define USE_REFCNT(dev) (nouveau_private(dev)->chipset >= 0x10)
++#define USE_SEMA(dev) (nouveau_private(dev)->chipset >= 0x17)
+ 
+ struct nouveau_fence {
+ 	struct nouveau_channel *channel;
+@@ -39,6 +41,15 @@
+ 
+ 	uint32_t sequence;
+ 	bool signalled;
++
++	void (*work)(void *priv, bool signalled);
++	void *priv;
++};
++
++struct nouveau_semaphore {
++	struct kref ref;
++	struct drm_device *dev;
++	struct drm_mm_node *mem;
+ };
+ 
+ static inline struct nouveau_fence *
+@@ -59,14 +70,13 @@
+ void
+ nouveau_fence_update(struct nouveau_channel *chan)
+ {
+-	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+-	struct list_head *entry, *tmp;
+-	struct nouveau_fence *fence;
++	struct drm_device *dev = chan->dev;
++	struct nouveau_fence *tmp, *fence;
+ 	uint32_t sequence;
+ 
+ 	spin_lock(&chan->fence.lock);
+ 
+-	if (USE_REFCNT)
++	if (USE_REFCNT(dev))
+ 		sequence = nvchan_rd32(chan, 0x48);
+ 	else
+ 		sequence = atomic_read(&chan->fence.last_sequence_irq);
+@@ -75,12 +85,14 @@
+ 		goto out;
+ 	chan->fence.sequence_ack = sequence;
+ 
+-	list_for_each_safe(entry, tmp, &chan->fence.pending) {
+-		fence = list_entry(entry, struct nouveau_fence, entry);
+-
++	list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) {
+ 		sequence = fence->sequence;
+ 		fence->signalled = true;
+ 		list_del(&fence->entry);
++
++		if (unlikely(fence->work))
++			fence->work(fence->priv, true);
++
+ 		kref_put(&fence->refcount, nouveau_fence_del);
+ 
+ 		if (sequence == chan->fence.sequence_ack)
+@@ -121,8 +133,8 @@
+ int
+ nouveau_fence_emit(struct nouveau_fence *fence)
+ {
+-	struct drm_nouveau_private *dev_priv = fence->channel->dev->dev_private;
+ 	struct nouveau_channel *chan = fence->channel;
++	struct drm_device *dev = chan->dev;
+ 	int ret;
+ 
+ 	ret = RING_SPACE(chan, 2);
+@@ -143,7 +155,7 @@
+ 	list_add_tail(&fence->entry, &chan->fence.pending);
+ 	spin_unlock(&chan->fence.lock);
+ 
+-	BEGIN_RING(chan, NvSubSw, USE_REFCNT ? 0x0050 : 0x0150, 1);
++	BEGIN_RING(chan, NvSubSw, USE_REFCNT(dev) ? 0x0050 : 0x0150, 1);
+ 	OUT_RING(chan, fence->sequence);
+ 	FIRE_RING(chan);
+ 
+@@ -151,6 +163,25 @@
+ }
+ 
+ void
++nouveau_fence_work(struct nouveau_fence *fence,
++		   void (*work)(void *priv, bool signalled),
++		   void *priv)
++{
++	BUG_ON(fence->work);
++
++	spin_lock(&fence->channel->fence.lock);
++
++	if (fence->signalled) {
++		work(priv, true);
++	} else {
++		fence->work = work;
++		fence->priv = priv;
++	}
++
++	spin_unlock(&fence->channel->fence.lock);
++}
++
++void
+ nouveau_fence_unref(void **sync_obj)
+ {
+ 	struct nouveau_fence *fence = nouveau_fence(*sync_obj);
+@@ -213,6 +244,177 @@
+ 	return ret;
+ }
+ 
++static struct nouveau_semaphore *
++alloc_semaphore(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_semaphore *sema;
++	int ret;
++
++	if (!USE_SEMA(dev))
++		return NULL;
++
++	sema = kmalloc(sizeof(*sema), GFP_KERNEL);
++	if (!sema)
++		goto fail;
++
++	ret = drm_mm_pre_get(&dev_priv->fence.heap);
++	if (ret)
++		goto fail;
++
++	spin_lock(&dev_priv->fence.lock);
++	sema->mem = drm_mm_search_free(&dev_priv->fence.heap, 4, 0, 0);
++	if (sema->mem)
++		sema->mem = drm_mm_get_block_atomic(sema->mem, 4, 0);
++	spin_unlock(&dev_priv->fence.lock);
++
++	if (!sema->mem)
++		goto fail;
++
++	kref_init(&sema->ref);
++	sema->dev = dev;
++	nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 0);
++
++	return sema;
++fail:
++	kfree(sema);
++	return NULL;
++}
++
++static void
++free_semaphore(struct kref *ref)
++{
++	struct nouveau_semaphore *sema =
++		container_of(ref, struct nouveau_semaphore, ref);
++	struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
++
++	spin_lock(&dev_priv->fence.lock);
++	drm_mm_put_block(sema->mem);
++	spin_unlock(&dev_priv->fence.lock);
++
++	kfree(sema);
++}
++
++static void
++semaphore_work(void *priv, bool signalled)
++{
++	struct nouveau_semaphore *sema = priv;
++	struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
++
++	if (unlikely(!signalled))
++		nouveau_bo_wr32(dev_priv->fence.bo, sema->mem->start / 4, 1);
++
++	kref_put(&sema->ref, free_semaphore);
++}
++
++static int
++emit_semaphore(struct nouveau_channel *chan, int method,
++	       struct nouveau_semaphore *sema)
++{
++	struct drm_nouveau_private *dev_priv = sema->dev->dev_private;
++	struct nouveau_fence *fence;
++	bool smart = (dev_priv->card_type >= NV_50);
++	int ret;
++
++	ret = RING_SPACE(chan, smart ? 8 : 4);
++	if (ret)
++		return ret;
++
++	if (smart) {
++		BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
++		OUT_RING(chan, NvSema);
++	}
++	BEGIN_RING(chan, NvSubSw, NV_SW_SEMAPHORE_OFFSET, 1);
++	OUT_RING(chan, sema->mem->start);
++
++	if (smart && method == NV_SW_SEMAPHORE_ACQUIRE) {
++		/*
++		 * NV50 tries to be too smart and context-switch
++		 * between semaphores instead of doing a "first come,
++		 * first served" strategy like previous cards
++		 * do.
++		 *
++		 * That's bad because the ACQUIRE latency can get as
++		 * large as the PFIFO context time slice in the
++		 * typical DRI2 case where you have several
++		 * outstanding semaphores at the same moment.
++		 *
++		 * If we're going to ACQUIRE, force the card to
++		 * context switch before, just in case the matching
++		 * RELEASE is already scheduled to be executed in
++		 * another channel.
++		 */
++		BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
++		OUT_RING(chan, 0);
++	}
++
++	BEGIN_RING(chan, NvSubSw, method, 1);
++	OUT_RING(chan, 1);
++
++	if (smart && method == NV_SW_SEMAPHORE_RELEASE) {
++		/*
++		 * Force the card to context switch, there may be
++		 * another channel waiting for the semaphore we just
++		 * released.
++		 */
++		BEGIN_RING(chan, NvSubSw, NV_SW_YIELD, 1);
++		OUT_RING(chan, 0);
++	}
++
++	/* Delay semaphore destruction until its work is done */
++	ret = nouveau_fence_new(chan, &fence, true);
++	if (ret)
++		return ret;
++
++	kref_get(&sema->ref);
++	nouveau_fence_work(fence, semaphore_work, sema);
++	nouveau_fence_unref((void *)&fence);
++
++	return 0;
++}
++
++int
++nouveau_fence_sync(struct nouveau_fence *fence,
++		   struct nouveau_channel *wchan)
++{
++	struct nouveau_channel *chan = nouveau_fence_channel(fence);
++	struct drm_device *dev = wchan->dev;
++	struct nouveau_semaphore *sema;
++	int ret;
++
++	if (likely(!fence || chan == wchan ||
++		   nouveau_fence_signalled(fence, NULL)))
++		return 0;
++
++	sema = alloc_semaphore(dev);
++	if (!sema) {
++		/* Early card or broken userspace, fall back to
++		 * software sync. */
++		return nouveau_fence_wait(fence, NULL, false, false);
++	}
++
++	/* try to take chan's mutex, if we can't take it right away
++	 * we have to fallback to software sync to prevent locking
++	 * order issues
++	 */
++	if (!mutex_trylock(&chan->mutex)) {
++		free_semaphore(&sema->ref);
++		return nouveau_fence_wait(fence, NULL, false, false);
++	}
++
++	/* Make wchan wait until it gets signalled */
++	ret = emit_semaphore(wchan, NV_SW_SEMAPHORE_ACQUIRE, sema);
++	if (ret)
++		goto out;
++
++	/* Signal the semaphore from chan */
++	ret = emit_semaphore(chan, NV_SW_SEMAPHORE_RELEASE, sema);
++	mutex_unlock(&chan->mutex);
++out:
++	kref_put(&sema->ref, free_semaphore);
++	return ret;
++}
++
+ int
+ nouveau_fence_flush(void *sync_obj, void *sync_arg)
+ {
+@@ -220,26 +422,123 @@
+ }
+ 
+ int
+-nouveau_fence_init(struct nouveau_channel *chan)
++nouveau_fence_channel_init(struct nouveau_channel *chan)
+ {
++	struct drm_device *dev = chan->dev;
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_gpuobj *obj = NULL;
++	int ret;
++
++	/* Create an NV_SW object for various sync purposes */
++	ret = nouveau_gpuobj_sw_new(chan, NV_SW, &obj);
++	if (ret)
++		return ret;
++
++	ret = nouveau_ramht_insert(chan, NvSw, obj);
++	nouveau_gpuobj_ref(NULL, &obj);
++	if (ret)
++		return ret;
++
++	ret = RING_SPACE(chan, 2);
++	if (ret)
++		return ret;
++	BEGIN_RING(chan, NvSubSw, 0, 1);
++	OUT_RING(chan, NvSw);
++
++	/* Create a DMA object for the shared cross-channel sync area. */
++	if (USE_SEMA(dev)) {
++		struct drm_mm_node *mem = dev_priv->fence.bo->bo.mem.mm_node;
++
++		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
++					     mem->start << PAGE_SHIFT,
++					     mem->size << PAGE_SHIFT,
++					     NV_DMA_ACCESS_RW,
++					     NV_DMA_TARGET_VIDMEM, &obj);
++		if (ret)
++			return ret;
++
++		ret = nouveau_ramht_insert(chan, NvSema, obj);
++		nouveau_gpuobj_ref(NULL, &obj);
++		if (ret)
++			return ret;
++
++		ret = RING_SPACE(chan, 2);
++		if (ret)
++			return ret;
++		BEGIN_RING(chan, NvSubSw, NV_SW_DMA_SEMAPHORE, 1);
++		OUT_RING(chan, NvSema);
++	}
++
++	FIRE_RING(chan);
++
+ 	INIT_LIST_HEAD(&chan->fence.pending);
+ 	spin_lock_init(&chan->fence.lock);
+ 	atomic_set(&chan->fence.last_sequence_irq, 0);
++
+ 	return 0;
+ }
+ 
+ void
+-nouveau_fence_fini(struct nouveau_channel *chan)
++nouveau_fence_channel_fini(struct nouveau_channel *chan)
+ {
+-	struct list_head *entry, *tmp;
+-	struct nouveau_fence *fence;
+-
+-	list_for_each_safe(entry, tmp, &chan->fence.pending) {
+-		fence = list_entry(entry, struct nouveau_fence, entry);
++	struct nouveau_fence *tmp, *fence;
+ 
++	list_for_each_entry_safe(fence, tmp, &chan->fence.pending, entry) {
+ 		fence->signalled = true;
+ 		list_del(&fence->entry);
++
++		if (unlikely(fence->work))
++			fence->work(fence->priv, false);
++
+ 		kref_put(&fence->refcount, nouveau_fence_del);
+ 	}
+ }
+ 
++int
++nouveau_fence_init(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	int ret;
++
++	/* Create a shared VRAM heap for cross-channel sync. */
++	if (USE_SEMA(dev)) {
++		ret = nouveau_bo_new(dev, NULL, 4096, 0, TTM_PL_FLAG_VRAM,
++				     0, 0, false, true, &dev_priv->fence.bo);
++		if (ret)
++			return ret;
++
++		ret = nouveau_bo_pin(dev_priv->fence.bo, TTM_PL_FLAG_VRAM);
++		if (ret)
++			goto fail;
++
++		ret = nouveau_bo_map(dev_priv->fence.bo);
++		if (ret)
++			goto fail;
++
++		ret = drm_mm_init(&dev_priv->fence.heap, 0,
++				  dev_priv->fence.bo->bo.mem.size);
++		if (ret)
++			goto fail;
++
++		spin_lock_init(&dev_priv->fence.lock);
++	}
++
++	return 0;
++fail:
++	nouveau_bo_unmap(dev_priv->fence.bo);
++	nouveau_bo_ref(NULL, &dev_priv->fence.bo);
++	return ret;
++}
++
++void
++nouveau_fence_fini(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++
++	if (USE_SEMA(dev)) {
++		drm_mm_takedown(&dev_priv->fence.heap);
++		nouveau_bo_unmap(dev_priv->fence.bo);
++		nouveau_bo_unpin(dev_priv->fence.bo);
++		nouveau_bo_ref(NULL, &dev_priv->fence.bo);
++	}
++}
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_gem.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_gem.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_gem.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_gem.c	2010-10-15 02:04:44.352992096 +0200
+@@ -48,9 +48,6 @@
+ 		return;
+ 	nvbo->gem = NULL;
+ 
+-	if (unlikely(nvbo->cpu_filp))
+-		ttm_bo_synccpu_write_release(bo);
+-
+ 	if (unlikely(nvbo->pin_refcnt)) {
+ 		nvbo->pin_refcnt = 1;
+ 		nouveau_bo_unpin(nvbo);
+@@ -107,23 +104,29 @@
+ }
+ 
+ static bool
+-nouveau_gem_tile_flags_valid(struct drm_device *dev, uint32_t tile_flags) {
+-	switch (tile_flags) {
+-	case 0x0000:
+-	case 0x1800:
+-	case 0x2800:
+-	case 0x4800:
+-	case 0x7000:
+-	case 0x7400:
+-	case 0x7a00:
+-	case 0xe000:
+-		break;
+-	default:
+-		NV_ERROR(dev, "bad page flags: 0x%08x\n", tile_flags);
+-		return false;
++nouveau_gem_tile_flags_valid(struct drm_device *dev, uint32_t tile_flags)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++
++	if (dev_priv->card_type >= NV_50) {
++		switch (tile_flags & NOUVEAU_GEM_TILE_LAYOUT_MASK) {
++		case 0x0000:
++		case 0x1800:
++		case 0x2800:
++		case 0x4800:
++		case 0x7000:
++		case 0x7400:
++		case 0x7a00:
++		case 0xe000:
++			return true;
++		}
++	} else {
++		if (!(tile_flags & NOUVEAU_GEM_TILE_LAYOUT_MASK))
++			return true;
+ 	}
+ 
+-	return true;
++	NV_ERROR(dev, "bad page flags: 0x%08x\n", tile_flags);
++	return false;
+ }
+ 
+ int
+@@ -140,11 +143,6 @@
+ 	if (unlikely(dev_priv->ttm.bdev.dev_mapping == NULL))
+ 		dev_priv->ttm.bdev.dev_mapping = dev_priv->dev->dev_mapping;
+ 
+-	if (req->channel_hint) {
+-		NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(req->channel_hint,
+-						     file_priv, chan);
+-	}
+-
+ 	if (req->info.domain & NOUVEAU_GEM_DOMAIN_VRAM)
+ 		flags |= TTM_PL_FLAG_VRAM;
+ 	if (req->info.domain & NOUVEAU_GEM_DOMAIN_GART)
+@@ -155,10 +153,18 @@
+ 	if (!nouveau_gem_tile_flags_valid(dev, req->info.tile_flags))
+ 		return -EINVAL;
+ 
++	if (req->channel_hint) {
++		chan = nouveau_channel_get(dev, file_priv, req->channel_hint);
++		if (IS_ERR(chan))
++			return PTR_ERR(chan);
++	}
++
+ 	ret = nouveau_gem_new(dev, chan, req->info.size, req->align, flags,
+ 			      req->info.tile_mode, req->info.tile_flags, false,
+ 			      (req->info.domain & NOUVEAU_GEM_DOMAIN_MAPPABLE),
+ 			      &nvbo);
++	if (chan)
++		nouveau_channel_put(&chan);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -293,14 +299,15 @@
+ 			return -EINVAL;
+ 		}
+ 
+-		ret = ttm_bo_reserve(&nvbo->bo, false, false, true, sequence);
++		ret = ttm_bo_reserve(&nvbo->bo, true, false, true, sequence);
+ 		if (ret) {
+ 			validate_fini(op, NULL);
+-			if (ret == -EAGAIN)
+-				ret = ttm_bo_wait_unreserved(&nvbo->bo, false);
++			if (unlikely(ret == -EAGAIN))
++				ret = ttm_bo_wait_unreserved(&nvbo->bo, true);
+ 			drm_gem_object_unreference_unlocked(gem);
+-			if (ret) {
+-				NV_ERROR(dev, "fail reserve\n");
++			if (unlikely(ret)) {
++				if (ret != -ERESTARTSYS)
++					NV_ERROR(dev, "fail reserve\n");
+ 				return ret;
+ 			}
+ 			goto retry;
+@@ -325,25 +332,6 @@
+ 			validate_fini(op, NULL);
+ 			return -EINVAL;
+ 		}
+-
+-		if (unlikely(atomic_read(&nvbo->bo.cpu_writers) > 0)) {
+-			validate_fini(op, NULL);
+-
+-			if (nvbo->cpu_filp == file_priv) {
+-				NV_ERROR(dev, "bo %p mapped by process trying "
+-					      "to validate it!\n", nvbo);
+-				return -EINVAL;
+-			}
+-
+-			mutex_unlock(&drm_global_mutex);
+-			ret = ttm_bo_wait_cpu(&nvbo->bo, false);
+-			mutex_lock(&drm_global_mutex);
+-			if (ret) {
+-				NV_ERROR(dev, "fail wait_cpu\n");
+-				return ret;
+-			}
+-			goto retry;
+-		}
+ 	}
+ 
+ 	return 0;
+@@ -362,7 +350,7 @@
+ 	list_for_each_entry(nvbo, list, entry) {
+ 		struct drm_nouveau_gem_pushbuf_bo *b = &pbbo[nvbo->pbbo_index];
+ 
+-		ret = nouveau_bo_sync_gpu(nvbo, chan);
++		ret = nouveau_fence_sync(nvbo->bo.sync_obj, chan);
+ 		if (unlikely(ret)) {
+ 			NV_ERROR(dev, "fail pre-validate sync\n");
+ 			return ret;
+@@ -378,14 +366,15 @@
+ 
+ 		nvbo->channel = (b->read_domains & (1 << 31)) ? NULL : chan;
+ 		ret = ttm_bo_validate(&nvbo->bo, &nvbo->placement,
+-				      false, false, false);
++				      true, false, false);
+ 		nvbo->channel = NULL;
+ 		if (unlikely(ret)) {
+-			NV_ERROR(dev, "fail ttm_validate\n");
++			if (ret != -ERESTARTSYS)
++				NV_ERROR(dev, "fail ttm_validate\n");
+ 			return ret;
+ 		}
+ 
+-		ret = nouveau_bo_sync_gpu(nvbo, chan);
++		ret = nouveau_fence_sync(nvbo->bo.sync_obj, chan);
+ 		if (unlikely(ret)) {
+ 			NV_ERROR(dev, "fail post-validate sync\n");
+ 			return ret;
+@@ -433,13 +422,15 @@
+ 
+ 	ret = validate_init(chan, file_priv, pbbo, nr_buffers, op);
+ 	if (unlikely(ret)) {
+-		NV_ERROR(dev, "validate_init\n");
++		if (ret != -ERESTARTSYS)
++			NV_ERROR(dev, "validate_init\n");
+ 		return ret;
+ 	}
+ 
+ 	ret = validate_list(chan, &op->vram_list, pbbo, user_buffers);
+ 	if (unlikely(ret < 0)) {
+-		NV_ERROR(dev, "validate vram_list\n");
++		if (ret != -ERESTARTSYS)
++			NV_ERROR(dev, "validate vram_list\n");
+ 		validate_fini(op, NULL);
+ 		return ret;
+ 	}
+@@ -447,7 +438,8 @@
+ 
+ 	ret = validate_list(chan, &op->gart_list, pbbo, user_buffers);
+ 	if (unlikely(ret < 0)) {
+-		NV_ERROR(dev, "validate gart_list\n");
++		if (ret != -ERESTARTSYS)
++			NV_ERROR(dev, "validate gart_list\n");
+ 		validate_fini(op, NULL);
+ 		return ret;
+ 	}
+@@ -455,7 +447,8 @@
+ 
+ 	ret = validate_list(chan, &op->both_list, pbbo, user_buffers);
+ 	if (unlikely(ret < 0)) {
+-		NV_ERROR(dev, "validate both_list\n");
++		if (ret != -ERESTARTSYS)
++			NV_ERROR(dev, "validate both_list\n");
+ 		validate_fini(op, NULL);
+ 		return ret;
+ 	}
+@@ -579,7 +572,9 @@
+ 	struct nouveau_fence *fence = NULL;
+ 	int i, j, ret = 0, do_reloc = 0;
+ 
+-	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(req->channel, file_priv, chan);
++	chan = nouveau_channel_get(dev, file_priv, req->channel);
++	if (IS_ERR(chan))
++		return PTR_ERR(chan);
+ 
+ 	req->vram_available = dev_priv->fb_aper_free;
+ 	req->gart_available = dev_priv->gart_info.aper_free;
+@@ -589,28 +584,34 @@
+ 	if (unlikely(req->nr_push > NOUVEAU_GEM_MAX_PUSH)) {
+ 		NV_ERROR(dev, "pushbuf push count exceeds limit: %d max %d\n",
+ 			 req->nr_push, NOUVEAU_GEM_MAX_PUSH);
++		nouveau_channel_put(&chan);
+ 		return -EINVAL;
+ 	}
+ 
+ 	if (unlikely(req->nr_buffers > NOUVEAU_GEM_MAX_BUFFERS)) {
+ 		NV_ERROR(dev, "pushbuf bo count exceeds limit: %d max %d\n",
+ 			 req->nr_buffers, NOUVEAU_GEM_MAX_BUFFERS);
++		nouveau_channel_put(&chan);
+ 		return -EINVAL;
+ 	}
+ 
+ 	if (unlikely(req->nr_relocs > NOUVEAU_GEM_MAX_RELOCS)) {
+ 		NV_ERROR(dev, "pushbuf reloc count exceeds limit: %d max %d\n",
+ 			 req->nr_relocs, NOUVEAU_GEM_MAX_RELOCS);
++		nouveau_channel_put(&chan);
+ 		return -EINVAL;
+ 	}
+ 
+ 	push = u_memcpya(req->push, req->nr_push, sizeof(*push));
+-	if (IS_ERR(push))
++	if (IS_ERR(push)) {
++		nouveau_channel_put(&chan);
+ 		return PTR_ERR(push);
++	}
+ 
+ 	bo = u_memcpya(req->buffers, req->nr_buffers, sizeof(*bo));
+ 	if (IS_ERR(bo)) {
+ 		kfree(push);
++		nouveau_channel_put(&chan);
+ 		return PTR_ERR(bo);
+ 	}
+ 
+@@ -633,7 +634,8 @@
+ 	ret = nouveau_gem_pushbuf_validate(chan, file_priv, bo, req->buffers,
+ 					   req->nr_buffers, &op, &do_reloc);
+ 	if (ret) {
+-		NV_ERROR(dev, "validate: %d\n", ret);
++		if (ret != -ERESTARTSYS)
++			NV_ERROR(dev, "validate: %d\n", ret);
+ 		goto out;
+ 	}
+ 
+@@ -744,6 +746,7 @@
+ 		req->suffix1 = 0x00000000;
+ 	}
+ 
++	nouveau_channel_put(&chan);
+ 	return ret;
+ }
+ 
+@@ -775,26 +778,9 @@
+ 		return -ENOENT;
+ 	nvbo = nouveau_gem_object(gem);
+ 
+-	if (nvbo->cpu_filp) {
+-		if (nvbo->cpu_filp == file_priv)
+-			goto out;
+-
+-		ret = ttm_bo_wait_cpu(&nvbo->bo, no_wait);
+-		if (ret)
+-			goto out;
+-	}
+-
+-	if (req->flags & NOUVEAU_GEM_CPU_PREP_NOBLOCK) {
+-		spin_lock(&nvbo->bo.lock);
+-		ret = ttm_bo_wait(&nvbo->bo, false, false, no_wait);
+-		spin_unlock(&nvbo->bo.lock);
+-	} else {
+-		ret = ttm_bo_synccpu_write_grab(&nvbo->bo, no_wait);
+-		if (ret == 0)
+-			nvbo->cpu_filp = file_priv;
+-	}
+-
+-out:
++	spin_lock(&nvbo->bo.lock);
++	ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait);
++	spin_unlock(&nvbo->bo.lock);
+ 	drm_gem_object_unreference_unlocked(gem);
+ 	return ret;
+ }
+@@ -803,26 +789,7 @@
+ nouveau_gem_ioctl_cpu_fini(struct drm_device *dev, void *data,
+ 			   struct drm_file *file_priv)
+ {
+-	struct drm_nouveau_gem_cpu_prep *req = data;
+-	struct drm_gem_object *gem;
+-	struct nouveau_bo *nvbo;
+-	int ret = -EINVAL;
+-
+-	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
+-	if (!gem)
+-		return -ENOENT;
+-	nvbo = nouveau_gem_object(gem);
+-
+-	if (nvbo->cpu_filp != file_priv)
+-		goto out;
+-	nvbo->cpu_filp = NULL;
+-
+-	ttm_bo_synccpu_write_release(&nvbo->bo);
+-	ret = 0;
+-
+-out:
+-	drm_gem_object_unreference_unlocked(gem);
+-	return ret;
++	return 0;
+ }
+ 
+ int
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_grctx.h linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_grctx.h
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_grctx.h	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_grctx.h	2010-10-15 02:04:44.354992121 +0200
+@@ -126,7 +126,7 @@
+ 	reg = (reg - 0x00400000) / 4;
+ 	reg = (reg - ctx->ctxprog_reg) + ctx->ctxvals_base;
+ 
+-	nv_wo32(ctx->dev, ctx->data, reg, val);
++	nv_wo32(ctx->data, reg * 4, val);
+ }
+ #endif
+ 
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_hw.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_hw.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_hw.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_hw.c	2010-10-15 02:04:44.356992146 +0200
+@@ -305,7 +305,7 @@
+ 	bool mpll = Preg == 0x4020;
+ 	uint32_t oldPval = nvReadMC(dev, Preg);
+ 	uint32_t NMNM = pv->NM2 << 16 | pv->NM1;
+-	uint32_t Pval = (oldPval & (mpll ? ~(0x11 << 16) : ~(1 << 16))) |
++	uint32_t Pval = (oldPval & (mpll ? ~(0x77 << 16) : ~(7 << 16))) |
+ 			0xc << 28 | pv->log2P << 16;
+ 	uint32_t saved4600 = 0;
+ 	/* some cards have different maskc040s */
+@@ -427,22 +427,12 @@
+ 		       struct nouveau_pll_vals *pllvals)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	const uint32_t nv04_regs[MAX_PLL_TYPES] = { NV_PRAMDAC_NVPLL_COEFF,
+-						    NV_PRAMDAC_MPLL_COEFF,
+-						    NV_PRAMDAC_VPLL_COEFF,
+-						    NV_RAMDAC_VPLL2 };
+-	const uint32_t nv40_regs[MAX_PLL_TYPES] = { 0x4000,
+-						    0x4020,
+-						    NV_PRAMDAC_VPLL_COEFF,
+-						    NV_RAMDAC_VPLL2 };
+-	uint32_t reg1, pll1, pll2 = 0;
++	uint32_t reg1 = get_pll_register(dev, plltype), pll1, pll2 = 0;
+ 	struct pll_lims pll_lim;
+ 	int ret;
+ 
+-	if (dev_priv->card_type < NV_40)
+-		reg1 = nv04_regs[plltype];
+-	else
+-		reg1 = nv40_regs[plltype];
++	if (reg1 == 0)
++		return -ENOENT;
+ 
+ 	pll1 = nvReadMC(dev, reg1);
+ 
+@@ -491,8 +481,10 @@
+ nouveau_hw_get_clock(struct drm_device *dev, enum pll_types plltype)
+ {
+ 	struct nouveau_pll_vals pllvals;
++	int ret;
+ 
+-	if (plltype == MPLL && (dev->pci_device & 0x0ff0) == CHIPSET_NFORCE) {
++	if (plltype == PLL_MEMORY &&
++	    (dev->pci_device & 0x0ff0) == CHIPSET_NFORCE) {
+ 		uint32_t mpllP;
+ 
+ 		pci_read_config_dword(pci_get_bus_and_slot(0, 3), 0x6c, &mpllP);
+@@ -501,14 +493,17 @@
+ 
+ 		return 400000 / mpllP;
+ 	} else
+-	if (plltype == MPLL && (dev->pci_device & 0xff0) == CHIPSET_NFORCE2) {
++	if (plltype == PLL_MEMORY &&
++	    (dev->pci_device & 0xff0) == CHIPSET_NFORCE2) {
+ 		uint32_t clock;
+ 
+ 		pci_read_config_dword(pci_get_bus_and_slot(0, 5), 0x4c, &clock);
+ 		return clock;
+ 	}
+ 
+-	nouveau_hw_get_pllvals(dev, plltype, &pllvals);
++	ret = nouveau_hw_get_pllvals(dev, plltype, &pllvals);
++	if (ret)
++		return ret;
+ 
+ 	return nouveau_hw_pllvals_to_clk(&pllvals);
+ }
+@@ -524,11 +519,11 @@
+ 
+ 	struct pll_lims pll_lim;
+ 	struct nouveau_pll_vals pv;
+-	uint32_t pllreg = head ? NV_RAMDAC_VPLL2 : NV_PRAMDAC_VPLL_COEFF;
++	enum pll_types pll = head ? PLL_VPLL1 : PLL_VPLL0;
+ 
+-	if (get_pll_limits(dev, head ? VPLL2 : VPLL1, &pll_lim))
++	if (get_pll_limits(dev, pll, &pll_lim))
+ 		return;
+-	nouveau_hw_get_pllvals(dev, head ? VPLL2 : VPLL1, &pv);
++	nouveau_hw_get_pllvals(dev, pll, &pv);
+ 
+ 	if (pv.M1 >= pll_lim.vco1.min_m && pv.M1 <= pll_lim.vco1.max_m &&
+ 	    pv.N1 >= pll_lim.vco1.min_n && pv.N1 <= pll_lim.vco1.max_n &&
+@@ -541,7 +536,7 @@
+ 	pv.M1 = pll_lim.vco1.max_m;
+ 	pv.N1 = pll_lim.vco1.min_n;
+ 	pv.log2P = pll_lim.max_usable_log2p;
+-	nouveau_hw_setpll(dev, pllreg, &pv);
++	nouveau_hw_setpll(dev, pll_lim.reg, &pv);
+ }
+ 
+ /*
+@@ -661,7 +656,7 @@
+ 	if (dev_priv->card_type >= NV_10)
+ 		regp->nv10_cursync = NVReadRAMDAC(dev, head, NV_RAMDAC_NV10_CURSYNC);
+ 
+-	nouveau_hw_get_pllvals(dev, head ? VPLL2 : VPLL1, &regp->pllvals);
++	nouveau_hw_get_pllvals(dev, head ? PLL_VPLL1 : PLL_VPLL0, &regp->pllvals);
+ 	state->pllsel = NVReadRAMDAC(dev, 0, NV_PRAMDAC_PLL_COEFF_SELECT);
+ 	if (nv_two_heads(dev))
+ 		state->sel_clk = NVReadRAMDAC(dev, 0, NV_PRAMDAC_SEL_CLK);
+@@ -866,10 +861,11 @@
+ 	rd_cio_state(dev, head, regp, NV_CIO_CRE_FFLWM__INDEX);
+ 	rd_cio_state(dev, head, regp, NV_CIO_CRE_21);
+ 
+-	if (dev_priv->card_type >= NV_30) {
++	if (dev_priv->card_type >= NV_20)
+ 		rd_cio_state(dev, head, regp, NV_CIO_CRE_47);
++
++	if (dev_priv->card_type >= NV_30)
+ 		rd_cio_state(dev, head, regp, 0x9f);
+-	}
+ 
+ 	rd_cio_state(dev, head, regp, NV_CIO_CRE_49);
+ 	rd_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR0_INDEX);
+@@ -976,10 +972,11 @@
+ 	wr_cio_state(dev, head, regp, NV_CIO_CRE_FF_INDEX);
+ 	wr_cio_state(dev, head, regp, NV_CIO_CRE_FFLWM__INDEX);
+ 
+-	if (dev_priv->card_type >= NV_30) {
++	if (dev_priv->card_type >= NV_20)
+ 		wr_cio_state(dev, head, regp, NV_CIO_CRE_47);
++
++	if (dev_priv->card_type >= NV_30)
+ 		wr_cio_state(dev, head, regp, 0x9f);
+-	}
+ 
+ 	wr_cio_state(dev, head, regp, NV_CIO_CRE_49);
+ 	wr_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR0_INDEX);
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_i2c.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_i2c.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_i2c.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_i2c.c	2010-10-15 02:04:44.357992158 +0200
+@@ -299,7 +299,10 @@
+ 
+ int
+ nouveau_i2c_identify(struct drm_device *dev, const char *what,
+-		     struct i2c_board_info *info, int index)
++		     struct i2c_board_info *info,
++		     bool (*match)(struct nouveau_i2c_chan *,
++				   struct i2c_board_info *),
++		     int index)
+ {
+ 	struct nouveau_i2c_chan *i2c = nouveau_i2c_find(dev, index);
+ 	int i;
+@@ -307,7 +310,8 @@
+ 	NV_DEBUG(dev, "Probing %ss on I2C bus: %d\n", what, index);
+ 
+ 	for (i = 0; info[i].addr; i++) {
+-		if (nouveau_probe_i2c_addr(i2c, info[i].addr)) {
++		if (nouveau_probe_i2c_addr(i2c, info[i].addr) &&
++		    (!match || match(i2c, &info[i]))) {
+ 			NV_INFO(dev, "Detected %s: %s\n", what, info[i].type);
+ 			return i;
+ 		}
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_i2c.h linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_i2c.h
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_i2c.h	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_i2c.h	2010-10-15 02:04:44.359992183 +0200
+@@ -44,7 +44,10 @@
+ struct nouveau_i2c_chan *nouveau_i2c_find(struct drm_device *, int index);
+ bool nouveau_probe_i2c_addr(struct nouveau_i2c_chan *i2c, int addr);
+ int nouveau_i2c_identify(struct drm_device *dev, const char *what,
+-			 struct i2c_board_info *info, int index);
++			 struct i2c_board_info *info,
++			 bool (*match)(struct nouveau_i2c_chan *,
++				       struct i2c_board_info *),
++			 int index);
+ 
+ extern const struct i2c_algorithm nouveau_dp_i2c_algo;
+ 
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_irq.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_irq.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_irq.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_irq.c	2010-10-15 02:04:44.361992209 +0200
+@@ -35,12 +35,20 @@
+ #include "nouveau_drm.h"
+ #include "nouveau_drv.h"
+ #include "nouveau_reg.h"
++#include "nouveau_ramht.h"
+ #include <linux/ratelimit.h>
+ 
+ /* needed for hotplug irq */
+ #include "nouveau_connector.h"
+ #include "nv50_display.h"
+ 
++static DEFINE_RATELIMIT_STATE(nouveau_ratelimit_state, 3 * HZ, 20);
++
++static int nouveau_ratelimit(void)
++{
++	return __ratelimit(&nouveau_ratelimit_state);
++}
++
+ void
+ nouveau_irq_preinstall(struct drm_device *dev)
+ {
+@@ -99,35 +107,49 @@
+ }
+ 
+ static bool
+-nouveau_fifo_swmthd(struct nouveau_channel *chan, uint32_t addr, uint32_t data)
++nouveau_fifo_swmthd(struct drm_device *dev, u32 chid, u32 addr, u32 data)
+ {
+-	struct drm_device *dev = chan->dev;
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_channel *chan = NULL;
++	struct nouveau_gpuobj *obj;
++	unsigned long flags;
+ 	const int subc = (addr >> 13) & 0x7;
+ 	const int mthd = addr & 0x1ffc;
++	bool handled = false;
++	u32 engine;
+ 
+-	if (mthd == 0x0000) {
+-		struct nouveau_gpuobj_ref *ref = NULL;
++	spin_lock_irqsave(&dev_priv->channels.lock, flags);
++	if (likely(chid >= 0 && chid < dev_priv->engine.fifo.channels))
++		chan = dev_priv->channels.ptr[chid];
++	if (unlikely(!chan))
++		goto out;
++
++	switch (mthd) {
++	case 0x0000: /* bind object to subchannel */
++		obj = nouveau_ramht_find(chan, data);
++		if (unlikely(!obj || obj->engine != NVOBJ_ENGINE_SW))
++			break;
+ 
+-		if (nouveau_gpuobj_ref_find(chan, data, &ref))
+-			return false;
++		chan->sw_subchannel[subc] = obj->class;
++		engine = 0x0000000f << (subc * 4);
+ 
+-		if (ref->gpuobj->engine != NVOBJ_ENGINE_SW)
+-			return false;
++		nv_mask(dev, NV04_PFIFO_CACHE1_ENGINE, engine, 0x00000000);
++		handled = true;
++		break;
++	default:
++		engine = nv_rd32(dev, NV04_PFIFO_CACHE1_ENGINE);
++		if (unlikely(((engine >> (subc * 4)) & 0xf) != 0))
++			break;
+ 
+-		chan->sw_subchannel[subc] = ref->gpuobj->class;
+-		nv_wr32(dev, NV04_PFIFO_CACHE1_ENGINE, nv_rd32(dev,
+-			NV04_PFIFO_CACHE1_ENGINE) & ~(0xf << subc * 4));
+-		return true;
++		if (!nouveau_call_method(chan, chan->sw_subchannel[subc],
++					 mthd, data))
++			handled = true;
++		break;
+ 	}
+ 
+-	/* hw object */
+-	if (nv_rd32(dev, NV04_PFIFO_CACHE1_ENGINE) & (1 << (subc*4)))
+-		return false;
+-
+-	if (nouveau_call_method(chan, chan->sw_subchannel[subc], mthd, data))
+-		return false;
+-
+-	return true;
++out:
++	spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
++	return handled;
+ }
+ 
+ static void
+@@ -140,14 +162,11 @@
+ 
+ 	reassign = nv_rd32(dev, NV03_PFIFO_CACHES) & 1;
+ 	while ((status = nv_rd32(dev, NV03_PFIFO_INTR_0)) && (cnt++ < 100)) {
+-		struct nouveau_channel *chan = NULL;
+ 		uint32_t chid, get;
+ 
+ 		nv_wr32(dev, NV03_PFIFO_CACHES, 0);
+ 
+ 		chid = engine->fifo.channel_id(dev);
+-		if (chid >= 0 && chid < engine->fifo.channels)
+-			chan = dev_priv->fifos[chid];
+ 		get  = nv_rd32(dev, NV03_PFIFO_CACHE1_GET);
+ 
+ 		if (status & NV_PFIFO_INTR_CACHE_ERROR) {
+@@ -174,7 +193,7 @@
+ 					NV40_PFIFO_CACHE1_DATA(ptr));
+ 			}
+ 
+-			if (!chan || !nouveau_fifo_swmthd(chan, mthd, data)) {
++			if (!nouveau_fifo_swmthd(dev, chid, mthd, data)) {
+ 				NV_INFO(dev, "PFIFO_CACHE_ERROR - Ch %d/%d "
+ 					     "Mthd 0x%04x Data 0x%08x\n",
+ 					chid, (mthd >> 13) & 7, mthd & 0x1ffc,
+@@ -200,16 +219,47 @@
+ 		}
+ 
+ 		if (status & NV_PFIFO_INTR_DMA_PUSHER) {
+-			NV_INFO(dev, "PFIFO_DMA_PUSHER - Ch %d\n", chid);
++			u32 dma_get = nv_rd32(dev, 0x003244);
++			u32 dma_put = nv_rd32(dev, 0x003240);
++			u32 push = nv_rd32(dev, 0x003220);
++			u32 state = nv_rd32(dev, 0x003228);
++
++			if (dev_priv->card_type == NV_50) {
++				u32 ho_get = nv_rd32(dev, 0x003328);
++				u32 ho_put = nv_rd32(dev, 0x003320);
++				u32 ib_get = nv_rd32(dev, 0x003334);
++				u32 ib_put = nv_rd32(dev, 0x003330);
++
++				if (nouveau_ratelimit())
++					NV_INFO(dev, "PFIFO_DMA_PUSHER - Ch %d Get 0x%02x%08x "
++					     "Put 0x%02x%08x IbGet 0x%08x IbPut 0x%08x "
++					     "State 0x%08x Push 0x%08x\n",
++						chid, ho_get, dma_get, ho_put,
++						dma_put, ib_get, ib_put, state,
++						push);
++
++				/* METHOD_COUNT, in DMA_STATE on earlier chipsets */
++				nv_wr32(dev, 0x003364, 0x00000000);
++				if (dma_get != dma_put || ho_get != ho_put) {
++					nv_wr32(dev, 0x003244, dma_put);
++					nv_wr32(dev, 0x003328, ho_put);
++				} else
++				if (ib_get != ib_put) {
++					nv_wr32(dev, 0x003334, ib_put);
++				}
++			} else {
++				NV_INFO(dev, "PFIFO_DMA_PUSHER - Ch %d Get 0x%08x "
++					     "Put 0x%08x State 0x%08x Push 0x%08x\n",
++					chid, dma_get, dma_put, state, push);
+ 
+-			status &= ~NV_PFIFO_INTR_DMA_PUSHER;
+-			nv_wr32(dev, NV03_PFIFO_INTR_0,
+-						NV_PFIFO_INTR_DMA_PUSHER);
++				if (dma_get != dma_put)
++					nv_wr32(dev, 0x003244, dma_put);
++			}
+ 
+-			nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_STATE, 0x00000000);
+-			if (nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_PUT) != get)
+-				nv_wr32(dev, NV04_PFIFO_CACHE1_DMA_GET,
+-								get + 4);
++			nv_wr32(dev, 0x003228, 0x00000000);
++			nv_wr32(dev, 0x003220, 0x00000001);
++			nv_wr32(dev, 0x002100, NV_PFIFO_INTR_DMA_PUSHER);
++			status &= ~NV_PFIFO_INTR_DMA_PUSHER;
+ 		}
+ 
+ 		if (status & NV_PFIFO_INTR_SEMAPHORE) {
+@@ -226,9 +276,18 @@
+ 			nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, 1);
+ 		}
+ 
++		if (dev_priv->card_type == NV_50) {
++			if (status & 0x00000010) {
++				nv50_fb_vm_trap(dev, 1, "PFIFO_BAR_FAULT");
++				status &= ~0x00000010;
++				nv_wr32(dev, 0x002100, 0x00000010);
++			}
++		}
++
+ 		if (status) {
+-			NV_INFO(dev, "PFIFO_INTR 0x%08x - Ch %d\n",
+-				status, chid);
++			if (nouveau_ratelimit())
++				NV_INFO(dev, "PFIFO_INTR 0x%08x - Ch %d\n",
++					status, chid);
+ 			nv_wr32(dev, NV03_PFIFO_INTR_0, status);
+ 			status = 0;
+ 		}
+@@ -342,6 +401,8 @@
+ nouveau_graph_chid_from_grctx(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_channel *chan;
++	unsigned long flags;
+ 	uint32_t inst;
+ 	int i;
+ 
+@@ -351,27 +412,29 @@
+ 	if (dev_priv->card_type < NV_50) {
+ 		inst = (nv_rd32(dev, 0x40032c) & 0xfffff) << 4;
+ 
++		spin_lock_irqsave(&dev_priv->channels.lock, flags);
+ 		for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+-			struct nouveau_channel *chan = dev_priv->fifos[i];
+-
++			chan = dev_priv->channels.ptr[i];
+ 			if (!chan || !chan->ramin_grctx)
+ 				continue;
+ 
+-			if (inst == chan->ramin_grctx->instance)
++			if (inst == chan->ramin_grctx->pinst)
+ 				break;
+ 		}
++		spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
+ 	} else {
+ 		inst = (nv_rd32(dev, 0x40032c) & 0xfffff) << 12;
+ 
++		spin_lock_irqsave(&dev_priv->channels.lock, flags);
+ 		for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+-			struct nouveau_channel *chan = dev_priv->fifos[i];
+-
++			chan = dev_priv->channels.ptr[i];
+ 			if (!chan || !chan->ramin)
+ 				continue;
+ 
+-			if (inst == chan->ramin->instance)
++			if (inst == chan->ramin->vinst)
+ 				break;
+ 		}
++		spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
+ 	}
+ 
+ 
+@@ -393,7 +456,8 @@
+ 	else
+ 		channel = nouveau_graph_chid_from_grctx(dev);
+ 
+-	if (channel >= engine->fifo.channels || !dev_priv->fifos[channel]) {
++	if (channel >= engine->fifo.channels ||
++	    !dev_priv->channels.ptr[channel]) {
+ 		NV_ERROR(dev, "AIII, invalid/inactive channel id %d\n", channel);
+ 		return -EINVAL;
+ 	}
+@@ -476,14 +540,19 @@
+ 			   struct nouveau_pgraph_trap *trap)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	unsigned long flags;
++	int ret = -EINVAL;
+ 
+-	if (trap->channel < 0 ||
+-	    trap->channel >= dev_priv->engine.fifo.channels ||
+-	    !dev_priv->fifos[trap->channel])
+-		return -ENODEV;
++	spin_lock_irqsave(&dev_priv->channels.lock, flags);
++	if (trap->channel > 0 &&
++	    trap->channel < dev_priv->engine.fifo.channels &&
++	    dev_priv->channels.ptr[trap->channel]) {
++		ret = nouveau_call_method(dev_priv->channels.ptr[trap->channel],
++					  trap->class, trap->mthd, trap->data);
++	}
++	spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
+ 
+-	return nouveau_call_method(dev_priv->fifos[trap->channel],
+-				   trap->class, trap->mthd, trap->data);
++	return ret;
+ }
+ 
+ static inline void
+@@ -505,13 +574,6 @@
+ 		nouveau_graph_dump_trap_info(dev, "PGRAPH_NOTIFY", &trap);
+ }
+ 
+-static DEFINE_RATELIMIT_STATE(nouveau_ratelimit_state, 3 * HZ, 20);
+-
+-static int nouveau_ratelimit(void)
+-{
+-	return __ratelimit(&nouveau_ratelimit_state);
+-}
+-
+ 
+ static inline void
+ nouveau_pgraph_intr_error(struct drm_device *dev, uint32_t nsource)
+@@ -605,40 +667,6 @@
+ 	nv_wr32(dev, NV03_PMC_INTR_0, NV_PMC_INTR_0_PGRAPH_PENDING);
+ }
+ 
+-static void
+-nv50_pfb_vm_trap(struct drm_device *dev, int display, const char *name)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	uint32_t trap[6];
+-	int i, ch;
+-	uint32_t idx = nv_rd32(dev, 0x100c90);
+-	if (idx & 0x80000000) {
+-		idx &= 0xffffff;
+-		if (display) {
+-			for (i = 0; i < 6; i++) {
+-				nv_wr32(dev, 0x100c90, idx | i << 24);
+-				trap[i] = nv_rd32(dev, 0x100c94);
+-			}
+-			for (ch = 0; ch < dev_priv->engine.fifo.channels; ch++) {
+-				struct nouveau_channel *chan = dev_priv->fifos[ch];
+-
+-				if (!chan || !chan->ramin)
+-					continue;
+-
+-				if (trap[1] == chan->ramin->instance >> 12)
+-					break;
+-			}
+-			NV_INFO(dev, "%s - VM: Trapped %s at %02x%04x%04x status %08x %08x channel %d\n",
+-					name, (trap[5]&0x100?"read":"write"),
+-					trap[5]&0xff, trap[4]&0xffff,
+-					trap[3]&0xffff, trap[0], trap[2], ch);
+-		}
+-		nv_wr32(dev, 0x100c90, idx | 0x80000000);
+-	} else if (display) {
+-		NV_INFO(dev, "%s - no VM fault?\n", name);
+-	}
+-}
+-
+ static struct nouveau_enum_names nv50_mp_exec_error_names[] =
+ {
+ 	{ 3, "STACK_UNDERFLOW" },
+@@ -711,7 +739,7 @@
+ 		tps++;
+ 		switch (type) {
+ 		case 6: /* texture error... unknown for now */
+-			nv50_pfb_vm_trap(dev, display, name);
++			nv50_fb_vm_trap(dev, display, name);
+ 			if (display) {
+ 				NV_ERROR(dev, "magic set %d:\n", i);
+ 				for (r = ustatus_addr + 4; r <= ustatus_addr + 0x10; r += 4)
+@@ -734,7 +762,7 @@
+ 			uint32_t e1c = nv_rd32(dev, ustatus_addr + 0x14);
+ 			uint32_t e20 = nv_rd32(dev, ustatus_addr + 0x18);
+ 			uint32_t e24 = nv_rd32(dev, ustatus_addr + 0x1c);
+-			nv50_pfb_vm_trap(dev, display, name);
++			nv50_fb_vm_trap(dev, display, name);
+ 			/* 2d engine destination */
+ 			if (ustatus & 0x00000010) {
+ 				if (display) {
+@@ -817,7 +845,7 @@
+ 
+ 		/* Known to be triggered by screwed up NOTIFY and COND... */
+ 		if (ustatus & 0x00000001) {
+-			nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_DISPATCH_FAULT");
++			nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_DISPATCH_FAULT");
+ 			nv_wr32(dev, 0x400500, 0);
+ 			if (nv_rd32(dev, 0x400808) & 0x80000000) {
+ 				if (display) {
+@@ -842,7 +870,7 @@
+ 			ustatus &= ~0x00000001;
+ 		}
+ 		if (ustatus & 0x00000002) {
+-			nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_DISPATCH_QUERY");
++			nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_DISPATCH_QUERY");
+ 			nv_wr32(dev, 0x400500, 0);
+ 			if (nv_rd32(dev, 0x40084c) & 0x80000000) {
+ 				if (display) {
+@@ -884,15 +912,15 @@
+ 			NV_INFO(dev, "PGRAPH_TRAP_M2MF - no ustatus?\n");
+ 		}
+ 		if (ustatus & 0x00000001) {
+-			nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_M2MF_NOTIFY");
++			nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_M2MF_NOTIFY");
+ 			ustatus &= ~0x00000001;
+ 		}
+ 		if (ustatus & 0x00000002) {
+-			nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_M2MF_IN");
++			nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_M2MF_IN");
+ 			ustatus &= ~0x00000002;
+ 		}
+ 		if (ustatus & 0x00000004) {
+-			nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_M2MF_OUT");
++			nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_M2MF_OUT");
+ 			ustatus &= ~0x00000004;
+ 		}
+ 		NV_INFO (dev, "PGRAPH_TRAP_M2MF - %08x %08x %08x %08x\n",
+@@ -917,7 +945,7 @@
+ 			NV_INFO(dev, "PGRAPH_TRAP_VFETCH - no ustatus?\n");
+ 		}
+ 		if (ustatus & 0x00000001) {
+-			nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_VFETCH_FAULT");
++			nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_VFETCH_FAULT");
+ 			NV_INFO (dev, "PGRAPH_TRAP_VFETCH_FAULT - %08x %08x %08x %08x\n",
+ 					nv_rd32(dev, 0x400c00),
+ 					nv_rd32(dev, 0x400c08),
+@@ -939,7 +967,7 @@
+ 			NV_INFO(dev, "PGRAPH_TRAP_STRMOUT - no ustatus?\n");
+ 		}
+ 		if (ustatus & 0x00000001) {
+-			nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_STRMOUT_FAULT");
++			nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_STRMOUT_FAULT");
+ 			NV_INFO (dev, "PGRAPH_TRAP_STRMOUT_FAULT - %08x %08x %08x %08x\n",
+ 					nv_rd32(dev, 0x401804),
+ 					nv_rd32(dev, 0x401808),
+@@ -964,7 +992,7 @@
+ 			NV_INFO(dev, "PGRAPH_TRAP_CCACHE - no ustatus?\n");
+ 		}
+ 		if (ustatus & 0x00000001) {
+-			nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_CCACHE_FAULT");
++			nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_CCACHE_FAULT");
+ 			NV_INFO (dev, "PGRAPH_TRAP_CCACHE_FAULT - %08x %08x %08x %08x %08x %08x %08x\n",
+ 					nv_rd32(dev, 0x405800),
+ 					nv_rd32(dev, 0x405804),
+@@ -986,7 +1014,7 @@
+ 	 * remaining, so try to handle it anyway. Perhaps related to that
+ 	 * unknown DMA slot on tesla? */
+ 	if (status & 0x20) {
+-		nv50_pfb_vm_trap(dev, display, "PGRAPH_TRAP_UNKC04");
++		nv50_fb_vm_trap(dev, display, "PGRAPH_TRAP_UNKC04");
+ 		ustatus = nv_rd32(dev, 0x402000) & 0x7fffffff;
+ 		if (display)
+ 			NV_INFO(dev, "PGRAPH_TRAP_UNKC04 - Unhandled ustatus 0x%08x\n", ustatus);
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_mem.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_mem.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_mem.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_mem.c	2010-10-15 02:04:44.363992233 +0200
+@@ -33,7 +33,9 @@
+ #include "drmP.h"
+ #include "drm.h"
+ #include "drm_sarea.h"
++
+ #include "nouveau_drv.h"
++#include "nouveau_pm.h"
+ 
+ /*
+  * NV10-NV40 tiling helpers
+@@ -47,18 +49,14 @@
+ 	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+ 	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+ 	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+-	struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i];
++	struct nouveau_tile_reg *tile = &dev_priv->tile[i];
+ 
+ 	tile->addr = addr;
+ 	tile->size = size;
+ 	tile->used = !!pitch;
+ 	nouveau_fence_unref((void **)&tile->fence);
+ 
+-	if (!pfifo->cache_flush(dev))
+-		return;
+-
+ 	pfifo->reassign(dev, false);
+-	pfifo->cache_flush(dev);
+ 	pfifo->cache_pull(dev, false);
+ 
+ 	nouveau_wait_for_idle(dev);
+@@ -76,34 +74,36 @@
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+-	struct nouveau_tile_reg *tile = dev_priv->tile.reg, *found = NULL;
+-	int i;
++	struct nouveau_tile_reg *found = NULL;
++	unsigned long i, flags;
+ 
+-	spin_lock(&dev_priv->tile.lock);
++	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+ 
+ 	for (i = 0; i < pfb->num_tiles; i++) {
+-		if (tile[i].used)
++		struct nouveau_tile_reg *tile = &dev_priv->tile[i];
++
++		if (tile->used)
+ 			/* Tile region in use. */
+ 			continue;
+ 
+-		if (tile[i].fence &&
+-		    !nouveau_fence_signalled(tile[i].fence, NULL))
++		if (tile->fence &&
++		    !nouveau_fence_signalled(tile->fence, NULL))
+ 			/* Pending tile region. */
+ 			continue;
+ 
+-		if (max(tile[i].addr, addr) <
+-		    min(tile[i].addr + tile[i].size, addr + size))
++		if (max(tile->addr, addr) <
++		    min(tile->addr + tile->size, addr + size))
+ 			/* Kill an intersecting tile region. */
+ 			nv10_mem_set_region_tiling(dev, i, 0, 0, 0);
+ 
+ 		if (pitch && !found) {
+ 			/* Free tile region. */
+ 			nv10_mem_set_region_tiling(dev, i, addr, size, pitch);
+-			found = &tile[i];
++			found = tile;
+ 		}
+ 	}
+ 
+-	spin_unlock(&dev_priv->tile.lock);
++	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+ 
+ 	return found;
+ }
+@@ -169,8 +169,9 @@
+ 			virt  += (end - pte);
+ 
+ 			while (pte < end) {
+-				nv_wo32(dev, pgt, pte++, offset_l);
+-				nv_wo32(dev, pgt, pte++, offset_h);
++				nv_wo32(pgt, (pte * 4) + 0, offset_l);
++				nv_wo32(pgt, (pte * 4) + 4, offset_h);
++				pte += 2;
+ 			}
+ 		}
+ 	}
+@@ -203,8 +204,10 @@
+ 		pages -= (end - pte);
+ 		virt  += (end - pte) << 15;
+ 
+-		while (pte < end)
+-			nv_wo32(dev, pgt, pte++, 0);
++		while (pte < end) {
++			nv_wo32(pgt, (pte * 4), 0);
++			pte++;
++		}
+ 	}
+ 	dev_priv->engine.instmem.flush(dev);
+ 
+@@ -218,7 +221,7 @@
+  * Cleanup everything
+  */
+ void
+-nouveau_mem_close(struct drm_device *dev)
++nouveau_mem_vram_fini(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 
+@@ -229,6 +232,19 @@
+ 
+ 	nouveau_ttm_global_release(dev_priv);
+ 
++	if (dev_priv->fb_mtrr >= 0) {
++		drm_mtrr_del(dev_priv->fb_mtrr,
++			     pci_resource_start(dev->pdev, 1),
++			     pci_resource_len(dev->pdev, 1), DRM_MTRR_WC);
++		dev_priv->fb_mtrr = -1;
++	}
++}
++
++void
++nouveau_mem_gart_fini(struct drm_device *dev)
++{
++	nouveau_sgdma_takedown(dev);
++
+ 	if (drm_core_has_AGP(dev) && dev->agp) {
+ 		struct drm_agp_mem *entry, *tempe;
+ 
+@@ -248,13 +264,6 @@
+ 		dev->agp->acquired = 0;
+ 		dev->agp->enabled = 0;
+ 	}
+-
+-	if (dev_priv->fb_mtrr) {
+-		drm_mtrr_del(dev_priv->fb_mtrr,
+-			     pci_resource_start(dev->pdev, 1),
+-			     pci_resource_len(dev->pdev, 1), DRM_MTRR_WC);
+-		dev_priv->fb_mtrr = -1;
+-	}
+ }
+ 
+ static uint32_t
+@@ -305,8 +314,62 @@
+ 	return 0;
+ }
+ 
+-/* returns the amount of FB ram in bytes */
+-int
++static void
++nv50_vram_preinit(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	int i, parts, colbits, rowbitsa, rowbitsb, banks;
++	u64 rowsize, predicted;
++	u32 r0, r4, rt, ru;
++
++	r0 = nv_rd32(dev, 0x100200);
++	r4 = nv_rd32(dev, 0x100204);
++	rt = nv_rd32(dev, 0x100250);
++	ru = nv_rd32(dev, 0x001540);
++	NV_DEBUG(dev, "memcfg 0x%08x 0x%08x 0x%08x 0x%08x\n", r0, r4, rt, ru);
++
++	for (i = 0, parts = 0; i < 8; i++) {
++		if (ru & (0x00010000 << i))
++			parts++;
++	}
++
++	colbits  =  (r4 & 0x0000f000) >> 12;
++	rowbitsa = ((r4 & 0x000f0000) >> 16) + 8;
++	rowbitsb = ((r4 & 0x00f00000) >> 20) + 8;
++	banks    = ((r4 & 0x01000000) ? 8 : 4);
++
++	rowsize = parts * banks * (1 << colbits) * 8;
++	predicted = rowsize << rowbitsa;
++	if (r0 & 0x00000004)
++		predicted += rowsize << rowbitsb;
++
++	if (predicted != dev_priv->vram_size) {
++		NV_WARN(dev, "memory controller reports %dMiB VRAM\n",
++			(u32)(dev_priv->vram_size >> 20));
++		NV_WARN(dev, "we calculated %dMiB VRAM\n",
++			(u32)(predicted >> 20));
++	}
++
++	dev_priv->vram_rblock_size = rowsize >> 12;
++	if (rt & 1)
++		dev_priv->vram_rblock_size *= 3;
++
++	NV_DEBUG(dev, "rblock %lld bytes\n",
++		 (u64)dev_priv->vram_rblock_size << 12);
++}
++
++static void
++nvaa_vram_preinit(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++
++	/* To our knowledge, there's no large scale reordering of pages
++	 * that occurs on IGP chipsets.
++	 */
++	dev_priv->vram_rblock_size = 1;
++}
++
++static int
+ nouveau_mem_detect(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+@@ -325,9 +388,18 @@
+ 		dev_priv->vram_size = nv_rd32(dev, NV04_PFB_FIFO_DATA);
+ 		dev_priv->vram_size |= (dev_priv->vram_size & 0xff) << 32;
+ 		dev_priv->vram_size &= 0xffffffff00ll;
+-		if (dev_priv->chipset == 0xaa || dev_priv->chipset == 0xac) {
++
++		switch (dev_priv->chipset) {
++		case 0xaa:
++		case 0xac:
++		case 0xaf:
+ 			dev_priv->vram_sys_base = nv_rd32(dev, 0x100e10);
+ 			dev_priv->vram_sys_base <<= 12;
++			nvaa_vram_preinit(dev);
++			break;
++		default:
++			nv50_vram_preinit(dev);
++			break;
+ 		}
+ 	} else {
+ 		dev_priv->vram_size  = nv_rd32(dev, 0x10f20c) << 20;
+@@ -345,6 +417,33 @@
+ 	return -ENOMEM;
+ }
+ 
++#if __OS_HAS_AGP
++static unsigned long
++get_agp_mode(struct drm_device *dev, unsigned long mode)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++
++	/*
++	 * FW seems to be broken on nv18, it makes the card lock up
++	 * randomly.
++	 */
++	if (dev_priv->chipset == 0x18)
++		mode &= ~PCI_AGP_COMMAND_FW;
++
++	/*
++	 * AGP mode set in the command line.
++	 */
++	if (nouveau_agpmode > 0) {
++		bool agpv3 = mode & 0x8;
++		int rate = agpv3 ? nouveau_agpmode / 4 : nouveau_agpmode;
++
++		mode = (mode & ~0x7) | (rate & 0x7);
++	}
++
++	return mode;
++}
++#endif
++
+ int
+ nouveau_mem_reset_agp(struct drm_device *dev)
+ {
+@@ -355,7 +454,8 @@
+ 	/* First of all, disable fast writes, otherwise if it's
+ 	 * already enabled in the AGP bridge and we disable the card's
+ 	 * AGP controller we might be locking ourselves out of it. */
+-	if (nv_rd32(dev, NV04_PBUS_PCI_NV_19) & PCI_AGP_COMMAND_FW) {
++	if ((nv_rd32(dev, NV04_PBUS_PCI_NV_19) |
++	     dev->agp->mode) & PCI_AGP_COMMAND_FW) {
+ 		struct drm_agp_info info;
+ 		struct drm_agp_mode mode;
+ 
+@@ -363,7 +463,7 @@
+ 		if (ret)
+ 			return ret;
+ 
+-		mode.mode = info.mode & ~PCI_AGP_COMMAND_FW;
++		mode.mode = get_agp_mode(dev, info.mode) & ~PCI_AGP_COMMAND_FW;
+ 		ret = drm_agp_enable(dev, mode);
+ 		if (ret)
+ 			return ret;
+@@ -418,7 +518,7 @@
+ 	}
+ 
+ 	/* see agp.h for the AGPSTAT_* modes available */
+-	mode.mode = info.mode;
++	mode.mode = get_agp_mode(dev, info.mode);
+ 	ret = drm_agp_enable(dev, mode);
+ 	if (ret) {
+ 		NV_ERROR(dev, "Unable to enable AGP: %d\n", ret);
+@@ -433,24 +533,27 @@
+ }
+ 
+ int
+-nouveau_mem_init(struct drm_device *dev)
++nouveau_mem_vram_init(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct ttm_bo_device *bdev = &dev_priv->ttm.bdev;
+-	int ret, dma_bits = 32;
+-
+-	dev_priv->fb_phys = pci_resource_start(dev->pdev, 1);
+-	dev_priv->gart_info.type = NOUVEAU_GART_NONE;
++	int ret, dma_bits;
+ 
+ 	if (dev_priv->card_type >= NV_50 &&
+ 	    pci_dma_supported(dev->pdev, DMA_BIT_MASK(40)))
+ 		dma_bits = 40;
++	else
++		dma_bits = 32;
+ 
+ 	ret = pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(dma_bits));
+-	if (ret) {
+-		NV_ERROR(dev, "Error setting DMA mask: %d\n", ret);
++	if (ret)
++		return ret;
++
++	ret = nouveau_mem_detect(dev);
++	if (ret)
+ 		return ret;
+-	}
++
++	dev_priv->fb_phys = pci_resource_start(dev->pdev, 1);
+ 
+ 	ret = nouveau_ttm_global_init(dev_priv);
+ 	if (ret)
+@@ -465,8 +568,6 @@
+ 		return ret;
+ 	}
+ 
+-	spin_lock_init(&dev_priv->tile.lock);
+-
+ 	dev_priv->fb_available_size = dev_priv->vram_size;
+ 	dev_priv->fb_mappable_pages = dev_priv->fb_available_size;
+ 	if (dev_priv->fb_mappable_pages > pci_resource_len(dev->pdev, 1))
+@@ -474,7 +575,16 @@
+ 			pci_resource_len(dev->pdev, 1);
+ 	dev_priv->fb_mappable_pages >>= PAGE_SHIFT;
+ 
+-	/* remove reserved space at end of vram from available amount */
++	/* reserve space at end of VRAM for PRAMIN */
++	if (dev_priv->chipset == 0x40 || dev_priv->chipset == 0x47 ||
++	    dev_priv->chipset == 0x49 || dev_priv->chipset == 0x4b)
++		dev_priv->ramin_rsvd_vram = (2 * 1024 * 1024);
++	else
++	if (dev_priv->card_type >= NV_40)
++		dev_priv->ramin_rsvd_vram = (1 * 1024 * 1024);
++	else
++		dev_priv->ramin_rsvd_vram = (512 * 1024);
++
+ 	dev_priv->fb_available_size -= dev_priv->ramin_rsvd_vram;
+ 	dev_priv->fb_aper_free = dev_priv->fb_available_size;
+ 
+@@ -495,9 +605,23 @@
+ 		nouveau_bo_ref(NULL, &dev_priv->vga_ram);
+ 	}
+ 
+-	/* GART */
++	dev_priv->fb_mtrr = drm_mtrr_add(pci_resource_start(dev->pdev, 1),
++					 pci_resource_len(dev->pdev, 1),
++					 DRM_MTRR_WC);
++	return 0;
++}
++
++int
++nouveau_mem_gart_init(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct ttm_bo_device *bdev = &dev_priv->ttm.bdev;
++	int ret;
++
++	dev_priv->gart_info.type = NOUVEAU_GART_NONE;
++
+ #if !defined(__powerpc__) && !defined(__ia64__)
+-	if (drm_device_is_agp(dev) && dev->agp && !nouveau_noagp) {
++	if (drm_device_is_agp(dev) && dev->agp && nouveau_agpmode) {
+ 		ret = nouveau_mem_init_agp(dev);
+ 		if (ret)
+ 			NV_ERROR(dev, "Error initialising AGP: %d\n", ret);
+@@ -523,11 +647,150 @@
+ 		return ret;
+ 	}
+ 
+-	dev_priv->fb_mtrr = drm_mtrr_add(pci_resource_start(dev->pdev, 1),
+-					 pci_resource_len(dev->pdev, 1),
+-					 DRM_MTRR_WC);
+-
+ 	return 0;
+ }
+ 
++void
++nouveau_mem_timing_init(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct nouveau_pm_memtimings *memtimings = &pm->memtimings;
++	struct nvbios *bios = &dev_priv->vbios;
++	struct bit_entry P;
++	u8 tUNK_0, tUNK_1, tUNK_2;
++	u8 tRP;		/* Byte 3 */
++	u8 tRAS;	/* Byte 5 */
++	u8 tRFC;	/* Byte 7 */
++	u8 tRC;		/* Byte 9 */
++	u8 tUNK_10, tUNK_11, tUNK_12, tUNK_13, tUNK_14;
++	u8 tUNK_18, tUNK_19, tUNK_20, tUNK_21;
++	u8 *mem = NULL, *entry;
++	int i, recordlen, entries;
++
++	if (bios->type == NVBIOS_BIT) {
++		if (bit_table(dev, 'P', &P))
++			return;
++
++		if (P.version == 1)
++			mem = ROMPTR(bios, P.data[4]);
++		else
++		if (P.version == 2)
++			mem = ROMPTR(bios, P.data[8]);
++		else {
++			NV_WARN(dev, "unknown mem for BIT P %d\n", P.version);
++		}
++	} else {
++		NV_DEBUG(dev, "BMP version too old for memory\n");
++		return;
++	}
++
++	if (!mem) {
++		NV_DEBUG(dev, "memory timing table pointer invalid\n");
++		return;
++	}
+ 
++	if (mem[0] != 0x10) {
++		NV_WARN(dev, "memory timing table 0x%02x unknown\n", mem[0]);
++		return;
++	}
++
++	/* validate record length */
++	entries   = mem[2];
++	recordlen = mem[3];
++	if (recordlen < 15) {
++		NV_ERROR(dev, "mem timing table length unknown: %d\n", mem[3]);
++		return;
++	}
++
++	/* parse vbios entries into common format */
++	memtimings->timing =
++		kcalloc(entries, sizeof(*memtimings->timing), GFP_KERNEL);
++	if (!memtimings->timing)
++		return;
++
++	entry = mem + mem[1];
++	for (i = 0; i < entries; i++, entry += recordlen) {
++		struct nouveau_pm_memtiming *timing = &pm->memtimings.timing[i];
++		if (entry[0] == 0)
++			continue;
++
++		tUNK_18 = 1;
++		tUNK_19 = 1;
++		tUNK_20 = 0;
++		tUNK_21 = 0;
++		switch (min(recordlen, 21)) {
++		case 21:
++			tUNK_21 = entry[21];
++		case 20:
++			tUNK_20 = entry[20];
++		case 19:
++			tUNK_19 = entry[19];
++		case 18:
++			tUNK_18 = entry[18];
++		default:
++			tUNK_0  = entry[0];
++			tUNK_1  = entry[1];
++			tUNK_2  = entry[2];
++			tRP     = entry[3];
++			tRAS    = entry[5];
++			tRFC    = entry[7];
++			tRC     = entry[9];
++			tUNK_10 = entry[10];
++			tUNK_11 = entry[11];
++			tUNK_12 = entry[12];
++			tUNK_13 = entry[13];
++			tUNK_14 = entry[14];
++			break;
++		}
++
++		timing->reg_100220 = (tRC << 24 | tRFC << 16 | tRAS << 8 | tRP);
++
++		/* XXX: I don't trust the -1's and +1's... they must come
++		 *      from somewhere! */
++		timing->reg_100224 = ((tUNK_0 + tUNK_19 + 1) << 24 |
++				      tUNK_18 << 16 |
++				      (tUNK_1 + tUNK_19 + 1) << 8 |
++				      (tUNK_2 - 1));
++
++		timing->reg_100228 = (tUNK_12 << 16 | tUNK_11 << 8 | tUNK_10);
++		if(recordlen > 19) {
++			timing->reg_100228 += (tUNK_19 - 1) << 24;
++		} else {
++			timing->reg_100228 += tUNK_12 << 24;
++		}
++
++		/* XXX: reg_10022c */
++
++		timing->reg_100230 = (tUNK_20 << 24 | tUNK_21 << 16 |
++				      tUNK_13 << 8  | tUNK_13);
++
++		/* XXX: +6? */
++		timing->reg_100234 = (tRAS << 24 | (tUNK_19 + 6) << 8 | tRC);
++		if(tUNK_10 > tUNK_11) {
++			timing->reg_100234 += tUNK_10 << 16;
++		} else {
++			timing->reg_100234 += tUNK_11 << 16;
++		}
++
++		/* XXX; reg_100238, reg_10023c */
++		NV_DEBUG(dev, "Entry %d: 220: %08x %08x %08x %08x\n", i,
++			 timing->reg_100220, timing->reg_100224,
++			 timing->reg_100228, timing->reg_10022c);
++		NV_DEBUG(dev, "         230: %08x %08x %08x %08x\n",
++			 timing->reg_100230, timing->reg_100234,
++			 timing->reg_100238, timing->reg_10023c);
++	}
++
++	memtimings->nr_timing  = entries;
++	memtimings->supported = true;
++}
++
++void
++nouveau_mem_timing_fini(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_memtimings *mem = &dev_priv->engine.pm.memtimings;
++
++	kfree(mem->timing);
++}
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_notifier.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_notifier.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_notifier.c	2010-10-15 02:00:53.004120745 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_notifier.c	2010-10-15 02:04:44.365992257 +0200
+@@ -28,6 +28,7 @@
+ #include "drmP.h"
+ #include "drm.h"
+ #include "nouveau_drv.h"
++#include "nouveau_ramht.h"
+ 
+ int
+ nouveau_notifier_init_channel(struct nouveau_channel *chan)
+@@ -146,11 +147,11 @@
+ 	nobj->dtor = nouveau_notifier_gpuobj_dtor;
+ 	nobj->priv = mem;
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, chan, handle, nobj, NULL);
++	ret = nouveau_ramht_insert(chan, handle, nobj);
++	nouveau_gpuobj_ref(NULL, &nobj);
+ 	if (ret) {
+-		nouveau_gpuobj_del(dev, &nobj);
+ 		drm_mm_put_block(mem);
+-		NV_ERROR(dev, "Error referencing notifier ctxdma: %d\n", ret);
++		NV_ERROR(dev, "Error adding notifier to ramht: %d\n", ret);
+ 		return ret;
+ 	}
+ 
+@@ -184,11 +185,11 @@
+ 	struct nouveau_channel *chan;
+ 	int ret;
+ 
+-	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(na->channel, file_priv, chan);
++	chan = nouveau_channel_get(dev, file_priv, na->channel);
++	if (IS_ERR(chan))
++		return PTR_ERR(chan);
+ 
+ 	ret = nouveau_notifier_alloc(chan, na->handle, na->size, &na->offset);
+-	if (ret)
+-		return ret;
+-
+-	return 0;
++	nouveau_channel_put(&chan);
++	return ret;
+ }
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_object.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_object.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_object.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_object.c	2010-10-15 02:04:44.369992308 +0200
+@@ -34,6 +34,7 @@
+ #include "drm.h"
+ #include "nouveau_drv.h"
+ #include "nouveau_drm.h"
++#include "nouveau_ramht.h"
+ 
+ /* NVidia uses context objects to drive drawing operations.
+ 
+@@ -65,137 +66,6 @@
+    The key into the hash table depends on the object handle and channel id and
+    is given as:
+ */
+-static uint32_t
+-nouveau_ramht_hash_handle(struct drm_device *dev, int channel, uint32_t handle)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	uint32_t hash = 0;
+-	int i;
+-
+-	NV_DEBUG(dev, "ch%d handle=0x%08x\n", channel, handle);
+-
+-	for (i = 32; i > 0; i -= dev_priv->ramht_bits) {
+-		hash ^= (handle & ((1 << dev_priv->ramht_bits) - 1));
+-		handle >>= dev_priv->ramht_bits;
+-	}
+-
+-	if (dev_priv->card_type < NV_50)
+-		hash ^= channel << (dev_priv->ramht_bits - 4);
+-	hash <<= 3;
+-
+-	NV_DEBUG(dev, "hash=0x%08x\n", hash);
+-	return hash;
+-}
+-
+-static int
+-nouveau_ramht_entry_valid(struct drm_device *dev, struct nouveau_gpuobj *ramht,
+-			  uint32_t offset)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	uint32_t ctx = nv_ro32(dev, ramht, (offset + 4)/4);
+-
+-	if (dev_priv->card_type < NV_40)
+-		return ((ctx & NV_RAMHT_CONTEXT_VALID) != 0);
+-	return (ctx != 0);
+-}
+-
+-static int
+-nouveau_ramht_insert(struct drm_device *dev, struct nouveau_gpuobj_ref *ref)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_instmem_engine *instmem = &dev_priv->engine.instmem;
+-	struct nouveau_channel *chan = ref->channel;
+-	struct nouveau_gpuobj *ramht = chan->ramht ? chan->ramht->gpuobj : NULL;
+-	uint32_t ctx, co, ho;
+-
+-	if (!ramht) {
+-		NV_ERROR(dev, "No hash table!\n");
+-		return -EINVAL;
+-	}
+-
+-	if (dev_priv->card_type < NV_40) {
+-		ctx = NV_RAMHT_CONTEXT_VALID | (ref->instance >> 4) |
+-		      (chan->id << NV_RAMHT_CONTEXT_CHANNEL_SHIFT) |
+-		      (ref->gpuobj->engine << NV_RAMHT_CONTEXT_ENGINE_SHIFT);
+-	} else
+-	if (dev_priv->card_type < NV_50) {
+-		ctx = (ref->instance >> 4) |
+-		      (chan->id << NV40_RAMHT_CONTEXT_CHANNEL_SHIFT) |
+-		      (ref->gpuobj->engine << NV40_RAMHT_CONTEXT_ENGINE_SHIFT);
+-	} else {
+-		if (ref->gpuobj->engine == NVOBJ_ENGINE_DISPLAY) {
+-			ctx = (ref->instance << 10) | 2;
+-		} else {
+-			ctx = (ref->instance >> 4) |
+-			      ((ref->gpuobj->engine <<
+-				NV40_RAMHT_CONTEXT_ENGINE_SHIFT));
+-		}
+-	}
+-
+-	co = ho = nouveau_ramht_hash_handle(dev, chan->id, ref->handle);
+-	do {
+-		if (!nouveau_ramht_entry_valid(dev, ramht, co)) {
+-			NV_DEBUG(dev,
+-				 "insert ch%d 0x%08x: h=0x%08x, c=0x%08x\n",
+-				 chan->id, co, ref->handle, ctx);
+-			nv_wo32(dev, ramht, (co + 0)/4, ref->handle);
+-			nv_wo32(dev, ramht, (co + 4)/4, ctx);
+-
+-			list_add_tail(&ref->list, &chan->ramht_refs);
+-			instmem->flush(dev);
+-			return 0;
+-		}
+-		NV_DEBUG(dev, "collision ch%d 0x%08x: h=0x%08x\n",
+-			 chan->id, co, nv_ro32(dev, ramht, co/4));
+-
+-		co += 8;
+-		if (co >= dev_priv->ramht_size)
+-			co = 0;
+-	} while (co != ho);
+-
+-	NV_ERROR(dev, "RAMHT space exhausted. ch=%d\n", chan->id);
+-	return -ENOMEM;
+-}
+-
+-static void
+-nouveau_ramht_remove(struct drm_device *dev, struct nouveau_gpuobj_ref *ref)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_instmem_engine *instmem = &dev_priv->engine.instmem;
+-	struct nouveau_channel *chan = ref->channel;
+-	struct nouveau_gpuobj *ramht = chan->ramht ? chan->ramht->gpuobj : NULL;
+-	uint32_t co, ho;
+-
+-	if (!ramht) {
+-		NV_ERROR(dev, "No hash table!\n");
+-		return;
+-	}
+-
+-	co = ho = nouveau_ramht_hash_handle(dev, chan->id, ref->handle);
+-	do {
+-		if (nouveau_ramht_entry_valid(dev, ramht, co) &&
+-		    (ref->handle == nv_ro32(dev, ramht, (co/4)))) {
+-			NV_DEBUG(dev,
+-				 "remove ch%d 0x%08x: h=0x%08x, c=0x%08x\n",
+-				 chan->id, co, ref->handle,
+-				 nv_ro32(dev, ramht, (co + 4)));
+-			nv_wo32(dev, ramht, (co + 0)/4, 0x00000000);
+-			nv_wo32(dev, ramht, (co + 4)/4, 0x00000000);
+-
+-			list_del(&ref->list);
+-			instmem->flush(dev);
+-			return;
+-		}
+-
+-		co += 8;
+-		if (co >= dev_priv->ramht_size)
+-			co = 0;
+-	} while (co != ho);
+-	list_del(&ref->list);
+-
+-	NV_ERROR(dev, "RAMHT entry not found. ch=%d, handle=0x%08x\n",
+-		 chan->id, ref->handle);
+-}
+ 
+ int
+ nouveau_gpuobj_new(struct drm_device *dev, struct nouveau_channel *chan,
+@@ -205,7 +75,7 @@
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_engine *engine = &dev_priv->engine;
+ 	struct nouveau_gpuobj *gpuobj;
+-	struct drm_mm *pramin = NULL;
++	struct drm_mm_node *ramin = NULL;
+ 	int ret;
+ 
+ 	NV_DEBUG(dev, "ch%d size=%u align=%d flags=0x%08x\n",
+@@ -218,69 +88,102 @@
+ 	if (!gpuobj)
+ 		return -ENOMEM;
+ 	NV_DEBUG(dev, "gpuobj %p\n", gpuobj);
++	gpuobj->dev = dev;
+ 	gpuobj->flags = flags;
+-	gpuobj->im_channel = chan;
++	kref_init(&gpuobj->refcount);
++	gpuobj->size = size;
+ 
++	spin_lock(&dev_priv->ramin_lock);
+ 	list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list);
++	spin_unlock(&dev_priv->ramin_lock);
+ 
+-	/* Choose between global instmem heap, and per-channel private
+-	 * instmem heap.  On <NV50 allow requests for private instmem
+-	 * to be satisfied from global heap if no per-channel area
+-	 * available.
+-	 */
+ 	if (chan) {
+ 		NV_DEBUG(dev, "channel heap\n");
+-		pramin = &chan->ramin_heap;
++
++		ramin = drm_mm_search_free(&chan->ramin_heap, size, align, 0);
++		if (ramin)
++			ramin = drm_mm_get_block(ramin, size, align);
++
++		if (!ramin) {
++			nouveau_gpuobj_ref(NULL, &gpuobj);
++			return -ENOMEM;
++		}
+ 	} else {
+ 		NV_DEBUG(dev, "global heap\n");
+-		pramin = &dev_priv->ramin_heap;
+ 
++		/* allocate backing pages, sets vinst */
+ 		ret = engine->instmem.populate(dev, gpuobj, &size);
+ 		if (ret) {
+-			nouveau_gpuobj_del(dev, &gpuobj);
++			nouveau_gpuobj_ref(NULL, &gpuobj);
+ 			return ret;
+ 		}
+-	}
+ 
+-	/* Allocate a chunk of the PRAMIN aperture */
+-	gpuobj->im_pramin = drm_mm_search_free(pramin, size, align, 0);
+-	if (gpuobj->im_pramin)
+-		gpuobj->im_pramin = drm_mm_get_block(gpuobj->im_pramin, size, align);
++		/* try and get aperture space */
++		do {
++			if (drm_mm_pre_get(&dev_priv->ramin_heap))
++				return -ENOMEM;
+ 
+-	if (!gpuobj->im_pramin) {
+-		nouveau_gpuobj_del(dev, &gpuobj);
+-		return -ENOMEM;
++			spin_lock(&dev_priv->ramin_lock);
++			ramin = drm_mm_search_free(&dev_priv->ramin_heap, size,
++						   align, 0);
++			if (ramin == NULL) {
++				spin_unlock(&dev_priv->ramin_lock);
++				nouveau_gpuobj_ref(NULL, &gpuobj);
++				return ret;
++			}
++
++			ramin = drm_mm_get_block_atomic(ramin, size, align);
++			spin_unlock(&dev_priv->ramin_lock);
++		} while (ramin == NULL);
++
++		/* on nv50 it's ok to fail, we have a fallback path */
++		if (!ramin && dev_priv->card_type < NV_50) {
++			nouveau_gpuobj_ref(NULL, &gpuobj);
++			return -ENOMEM;
++		}
+ 	}
+ 
+-	if (!chan) {
++	/* if we got a chunk of the aperture, map pages into it */
++	gpuobj->im_pramin = ramin;
++	if (!chan && gpuobj->im_pramin && dev_priv->ramin_available) {
+ 		ret = engine->instmem.bind(dev, gpuobj);
+ 		if (ret) {
+-			nouveau_gpuobj_del(dev, &gpuobj);
++			nouveau_gpuobj_ref(NULL, &gpuobj);
+ 			return ret;
+ 		}
+ 	}
+ 
++	/* calculate the various different addresses for the object */
++	if (chan) {
++		gpuobj->pinst = chan->ramin->pinst;
++		if (gpuobj->pinst != ~0)
++			gpuobj->pinst += gpuobj->im_pramin->start;
++
++		if (dev_priv->card_type < NV_50) {
++			gpuobj->cinst = gpuobj->pinst;
++		} else {
++			gpuobj->cinst = gpuobj->im_pramin->start;
++			gpuobj->vinst = gpuobj->im_pramin->start +
++					chan->ramin->vinst;
++		}
++	} else {
++		if (gpuobj->im_pramin)
++			gpuobj->pinst = gpuobj->im_pramin->start;
++		else
++			gpuobj->pinst = ~0;
++		gpuobj->cinst = 0xdeadbeef;
++	}
++
+ 	if (gpuobj->flags & NVOBJ_FLAG_ZERO_ALLOC) {
+ 		int i;
+ 
+-		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
+-			nv_wo32(dev, gpuobj, i/4, 0);
++		for (i = 0; i < gpuobj->size; i += 4)
++			nv_wo32(gpuobj, i, 0);
+ 		engine->instmem.flush(dev);
+ 	}
+ 
+-	*gpuobj_ret = gpuobj;
+-	return 0;
+-}
+-
+-int
+-nouveau_gpuobj_early_init(struct drm_device *dev)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-
+-	NV_DEBUG(dev, "\n");
+-
+-	INIT_LIST_HEAD(&dev_priv->gpuobj_list);
+ 
++	*gpuobj_ret = gpuobj;
+ 	return 0;
+ }
+ 
+@@ -288,18 +191,12 @@
+ nouveau_gpuobj_init(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	int ret;
+ 
+ 	NV_DEBUG(dev, "\n");
+ 
+-	if (dev_priv->card_type < NV_50) {
+-		ret = nouveau_gpuobj_new_fake(dev,
+-			dev_priv->ramht_offset, ~0, dev_priv->ramht_size,
+-			NVOBJ_FLAG_ZERO_ALLOC | NVOBJ_FLAG_ALLOW_NO_REFS,
+-						&dev_priv->ramht, NULL);
+-		if (ret)
+-			return ret;
+-	}
++	INIT_LIST_HEAD(&dev_priv->gpuobj_list);
++	spin_lock_init(&dev_priv->ramin_lock);
++	dev_priv->ramin_base = ~0;
+ 
+ 	return 0;
+ }
+@@ -311,297 +208,89 @@
+ 
+ 	NV_DEBUG(dev, "\n");
+ 
+-	nouveau_gpuobj_del(dev, &dev_priv->ramht);
++	BUG_ON(!list_empty(&dev_priv->gpuobj_list));
+ }
+ 
+-void
+-nouveau_gpuobj_late_takedown(struct drm_device *dev)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_gpuobj *gpuobj = NULL;
+-	struct list_head *entry, *tmp;
+ 
+-	NV_DEBUG(dev, "\n");
+-
+-	list_for_each_safe(entry, tmp, &dev_priv->gpuobj_list) {
+-		gpuobj = list_entry(entry, struct nouveau_gpuobj, list);
+-
+-		NV_ERROR(dev, "gpuobj %p still exists at takedown, refs=%d\n",
+-			 gpuobj, gpuobj->refcount);
+-		gpuobj->refcount = 0;
+-		nouveau_gpuobj_del(dev, &gpuobj);
+-	}
+-}
+-
+-int
+-nouveau_gpuobj_del(struct drm_device *dev, struct nouveau_gpuobj **pgpuobj)
++static void
++nouveau_gpuobj_del(struct kref *ref)
+ {
++	struct nouveau_gpuobj *gpuobj =
++		container_of(ref, struct nouveau_gpuobj, refcount);
++	struct drm_device *dev = gpuobj->dev;
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_engine *engine = &dev_priv->engine;
+-	struct nouveau_gpuobj *gpuobj;
+ 	int i;
+ 
+-	NV_DEBUG(dev, "gpuobj %p\n", pgpuobj ? *pgpuobj : NULL);
+-
+-	if (!dev_priv || !pgpuobj || !(*pgpuobj))
+-		return -EINVAL;
+-	gpuobj = *pgpuobj;
+-
+-	if (gpuobj->refcount != 0) {
+-		NV_ERROR(dev, "gpuobj refcount is %d\n", gpuobj->refcount);
+-		return -EINVAL;
+-	}
++	NV_DEBUG(dev, "gpuobj %p\n", gpuobj);
+ 
+ 	if (gpuobj->im_pramin && (gpuobj->flags & NVOBJ_FLAG_ZERO_FREE)) {
+-		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
+-			nv_wo32(dev, gpuobj, i/4, 0);
++		for (i = 0; i < gpuobj->size; i += 4)
++			nv_wo32(gpuobj, i, 0);
+ 		engine->instmem.flush(dev);
+ 	}
+ 
+ 	if (gpuobj->dtor)
+ 		gpuobj->dtor(dev, gpuobj);
+ 
+-	if (gpuobj->im_backing && !(gpuobj->flags & NVOBJ_FLAG_FAKE))
++	if (gpuobj->im_backing)
+ 		engine->instmem.clear(dev, gpuobj);
+ 
+-	if (gpuobj->im_pramin) {
+-		if (gpuobj->flags & NVOBJ_FLAG_FAKE)
+-			kfree(gpuobj->im_pramin);
+-		else
+-			drm_mm_put_block(gpuobj->im_pramin);
+-	}
+-
++	spin_lock(&dev_priv->ramin_lock);
++	if (gpuobj->im_pramin)
++		drm_mm_put_block(gpuobj->im_pramin);
+ 	list_del(&gpuobj->list);
++	spin_unlock(&dev_priv->ramin_lock);
+ 
+-	*pgpuobj = NULL;
+ 	kfree(gpuobj);
+-	return 0;
+-}
+-
+-static int
+-nouveau_gpuobj_instance_get(struct drm_device *dev,
+-			    struct nouveau_channel *chan,
+-			    struct nouveau_gpuobj *gpuobj, uint32_t *inst)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_gpuobj *cpramin;
+-
+-	/* <NV50 use PRAMIN address everywhere */
+-	if (dev_priv->card_type < NV_50) {
+-		*inst = gpuobj->im_pramin->start;
+-		return 0;
+-	}
+-
+-	if (chan && gpuobj->im_channel != chan) {
+-		NV_ERROR(dev, "Channel mismatch: obj %d, ref %d\n",
+-			 gpuobj->im_channel->id, chan->id);
+-		return -EINVAL;
+-	}
+-
+-	/* NV50 channel-local instance */
+-	if (chan) {
+-		cpramin = chan->ramin->gpuobj;
+-		*inst = gpuobj->im_pramin->start - cpramin->im_pramin->start;
+-		return 0;
+-	}
+-
+-	/* NV50 global (VRAM) instance */
+-	if (!gpuobj->im_channel) {
+-		/* ...from global heap */
+-		if (!gpuobj->im_backing) {
+-			NV_ERROR(dev, "AII, no VRAM backing gpuobj\n");
+-			return -EINVAL;
+-		}
+-		*inst = gpuobj->im_backing_start;
+-		return 0;
+-	} else {
+-		/* ...from local heap */
+-		cpramin = gpuobj->im_channel->ramin->gpuobj;
+-		*inst = cpramin->im_backing_start +
+-			(gpuobj->im_pramin->start - cpramin->im_pramin->start);
+-		return 0;
+-	}
+-
+-	return -EINVAL;
+-}
+-
+-int
+-nouveau_gpuobj_ref_add(struct drm_device *dev, struct nouveau_channel *chan,
+-		       uint32_t handle, struct nouveau_gpuobj *gpuobj,
+-		       struct nouveau_gpuobj_ref **ref_ret)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_gpuobj_ref *ref;
+-	uint32_t instance;
+-	int ret;
+-
+-	NV_DEBUG(dev, "ch%d h=0x%08x gpuobj=%p\n",
+-		 chan ? chan->id : -1, handle, gpuobj);
+-
+-	if (!dev_priv || !gpuobj || (ref_ret && *ref_ret != NULL))
+-		return -EINVAL;
+-
+-	if (!chan && !ref_ret)
+-		return -EINVAL;
+-
+-	if (gpuobj->engine == NVOBJ_ENGINE_SW && !gpuobj->im_pramin) {
+-		/* sw object */
+-		instance = 0x40;
+-	} else {
+-		ret = nouveau_gpuobj_instance_get(dev, chan, gpuobj, &instance);
+-		if (ret)
+-			return ret;
+-	}
+-
+-	ref = kzalloc(sizeof(*ref), GFP_KERNEL);
+-	if (!ref)
+-		return -ENOMEM;
+-	INIT_LIST_HEAD(&ref->list);
+-	ref->gpuobj   = gpuobj;
+-	ref->channel  = chan;
+-	ref->instance = instance;
+-
+-	if (!ref_ret) {
+-		ref->handle = handle;
+-
+-		ret = nouveau_ramht_insert(dev, ref);
+-		if (ret) {
+-			kfree(ref);
+-			return ret;
+-		}
+-	} else {
+-		ref->handle = ~0;
+-		*ref_ret = ref;
+-	}
+-
+-	ref->gpuobj->refcount++;
+-	return 0;
+-}
+-
+-int nouveau_gpuobj_ref_del(struct drm_device *dev, struct nouveau_gpuobj_ref **pref)
+-{
+-	struct nouveau_gpuobj_ref *ref;
+-
+-	NV_DEBUG(dev, "ref %p\n", pref ? *pref : NULL);
+-
+-	if (!dev || !pref || *pref == NULL)
+-		return -EINVAL;
+-	ref = *pref;
+-
+-	if (ref->handle != ~0)
+-		nouveau_ramht_remove(dev, ref);
+-
+-	if (ref->gpuobj) {
+-		ref->gpuobj->refcount--;
+-
+-		if (ref->gpuobj->refcount == 0) {
+-			if (!(ref->gpuobj->flags & NVOBJ_FLAG_ALLOW_NO_REFS))
+-				nouveau_gpuobj_del(dev, &ref->gpuobj);
+-		}
+-	}
+-
+-	*pref = NULL;
+-	kfree(ref);
+-	return 0;
+-}
+-
+-int
+-nouveau_gpuobj_new_ref(struct drm_device *dev,
+-		       struct nouveau_channel *oc, struct nouveau_channel *rc,
+-		       uint32_t handle, uint32_t size, int align,
+-		       uint32_t flags, struct nouveau_gpuobj_ref **ref)
+-{
+-	struct nouveau_gpuobj *gpuobj = NULL;
+-	int ret;
+-
+-	ret = nouveau_gpuobj_new(dev, oc, size, align, flags, &gpuobj);
+-	if (ret)
+-		return ret;
+-
+-	ret = nouveau_gpuobj_ref_add(dev, rc, handle, gpuobj, ref);
+-	if (ret) {
+-		nouveau_gpuobj_del(dev, &gpuobj);
+-		return ret;
+-	}
+-
+-	return 0;
+ }
+ 
+-int
+-nouveau_gpuobj_ref_find(struct nouveau_channel *chan, uint32_t handle,
+-			struct nouveau_gpuobj_ref **ref_ret)
++void
++nouveau_gpuobj_ref(struct nouveau_gpuobj *ref, struct nouveau_gpuobj **ptr)
+ {
+-	struct nouveau_gpuobj_ref *ref;
+-	struct list_head *entry, *tmp;
+-
+-	list_for_each_safe(entry, tmp, &chan->ramht_refs) {
+-		ref = list_entry(entry, struct nouveau_gpuobj_ref, list);
++	if (ref)
++		kref_get(&ref->refcount);
+ 
+-		if (ref->handle == handle) {
+-			if (ref_ret)
+-				*ref_ret = ref;
+-			return 0;
+-		}
+-	}
++	if (*ptr)
++		kref_put(&(*ptr)->refcount, nouveau_gpuobj_del);
+ 
+-	return -EINVAL;
++	*ptr = ref;
+ }
+ 
+ int
+-nouveau_gpuobj_new_fake(struct drm_device *dev, uint32_t p_offset,
+-			uint32_t b_offset, uint32_t size,
+-			uint32_t flags, struct nouveau_gpuobj **pgpuobj,
+-			struct nouveau_gpuobj_ref **pref)
++nouveau_gpuobj_new_fake(struct drm_device *dev, u32 pinst, u64 vinst,
++			u32 size, u32 flags, struct nouveau_gpuobj **pgpuobj)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_gpuobj *gpuobj = NULL;
+ 	int i;
+ 
+ 	NV_DEBUG(dev,
+-		 "p_offset=0x%08x b_offset=0x%08x size=0x%08x flags=0x%08x\n",
+-		 p_offset, b_offset, size, flags);
++		 "pinst=0x%08x vinst=0x%010llx size=0x%08x flags=0x%08x\n",
++		 pinst, vinst, size, flags);
+ 
+ 	gpuobj = kzalloc(sizeof(*gpuobj), GFP_KERNEL);
+ 	if (!gpuobj)
+ 		return -ENOMEM;
+ 	NV_DEBUG(dev, "gpuobj %p\n", gpuobj);
+-	gpuobj->im_channel = NULL;
+-	gpuobj->flags      = flags | NVOBJ_FLAG_FAKE;
+-
+-	list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list);
+-
+-	if (p_offset != ~0) {
+-		gpuobj->im_pramin = kzalloc(sizeof(struct drm_mm_node),
+-					    GFP_KERNEL);
+-		if (!gpuobj->im_pramin) {
+-			nouveau_gpuobj_del(dev, &gpuobj);
+-			return -ENOMEM;
+-		}
+-		gpuobj->im_pramin->start = p_offset;
+-		gpuobj->im_pramin->size  = size;
+-	}
+-
+-	if (b_offset != ~0) {
+-		gpuobj->im_backing = (struct nouveau_bo *)-1;
+-		gpuobj->im_backing_start = b_offset;
+-	}
++	gpuobj->dev = dev;
++	gpuobj->flags = flags;
++	kref_init(&gpuobj->refcount);
++	gpuobj->size  = size;
++	gpuobj->pinst = pinst;
++	gpuobj->cinst = 0xdeadbeef;
++	gpuobj->vinst = vinst;
+ 
+ 	if (gpuobj->flags & NVOBJ_FLAG_ZERO_ALLOC) {
+-		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
+-			nv_wo32(dev, gpuobj, i/4, 0);
++		for (i = 0; i < gpuobj->size; i += 4)
++			nv_wo32(gpuobj, i, 0);
+ 		dev_priv->engine.instmem.flush(dev);
+ 	}
+ 
+-	if (pref) {
+-		i = nouveau_gpuobj_ref_add(dev, NULL, 0, gpuobj, pref);
+-		if (i) {
+-			nouveau_gpuobj_del(dev, &gpuobj);
+-			return i;
+-		}
+-	}
+-
+-	if (pgpuobj)
+-		*pgpuobj = gpuobj;
++	spin_lock(&dev_priv->ramin_lock);
++	list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list);
++	spin_unlock(&dev_priv->ramin_lock);
++	*pgpuobj = gpuobj;
+ 	return 0;
+ }
+ 
+@@ -685,14 +374,12 @@
+ 		adjust = offset &  0x00000fff;
+ 		frame  = offset & ~0x00000fff;
+ 
+-		nv_wo32(dev, *gpuobj, 0, ((1<<12) | (1<<13) |
+-				(adjust << 20) |
+-				 (access << 14) |
+-				 (target << 16) |
+-				  class));
+-		nv_wo32(dev, *gpuobj, 1, size - 1);
+-		nv_wo32(dev, *gpuobj, 2, frame | pte_flags);
+-		nv_wo32(dev, *gpuobj, 3, frame | pte_flags);
++		nv_wo32(*gpuobj,  0, ((1<<12) | (1<<13) | (adjust << 20) |
++				      (access << 14) | (target << 16) |
++				      class));
++		nv_wo32(*gpuobj,  4, size - 1);
++		nv_wo32(*gpuobj,  8, frame | pte_flags);
++		nv_wo32(*gpuobj, 12, frame | pte_flags);
+ 	} else {
+ 		uint64_t limit = offset + size - 1;
+ 		uint32_t flags0, flags5;
+@@ -705,12 +392,12 @@
+ 			flags5 = 0x00080000;
+ 		}
+ 
+-		nv_wo32(dev, *gpuobj, 0, flags0 | class);
+-		nv_wo32(dev, *gpuobj, 1, lower_32_bits(limit));
+-		nv_wo32(dev, *gpuobj, 2, lower_32_bits(offset));
+-		nv_wo32(dev, *gpuobj, 3, ((upper_32_bits(limit) & 0xff) << 24) |
+-					(upper_32_bits(offset) & 0xff));
+-		nv_wo32(dev, *gpuobj, 5, flags5);
++		nv_wo32(*gpuobj,  0, flags0 | class);
++		nv_wo32(*gpuobj,  4, lower_32_bits(limit));
++		nv_wo32(*gpuobj,  8, lower_32_bits(offset));
++		nv_wo32(*gpuobj, 12, ((upper_32_bits(limit) & 0xff) << 24) |
++				      (upper_32_bits(offset) & 0xff));
++		nv_wo32(*gpuobj, 20, flags5);
+ 	}
+ 
+ 	instmem->flush(dev);
+@@ -741,7 +428,7 @@
+ 			*o_ret = 0;
+ 	} else
+ 	if (dev_priv->gart_info.type == NOUVEAU_GART_SGDMA) {
+-		*gpuobj = dev_priv->gart_info.sg_ctxdma;
++		nouveau_gpuobj_ref(dev_priv->gart_info.sg_ctxdma, gpuobj);
+ 		if (offset & ~0xffffffffULL) {
+ 			NV_ERROR(dev, "obj offset exceeds 32-bits\n");
+ 			return -EINVAL;
+@@ -829,25 +516,25 @@
+ 	}
+ 
+ 	if (dev_priv->card_type >= NV_50) {
+-		nv_wo32(dev, *gpuobj, 0, class);
+-		nv_wo32(dev, *gpuobj, 5, 0x00010000);
++		nv_wo32(*gpuobj,  0, class);
++		nv_wo32(*gpuobj, 20, 0x00010000);
+ 	} else {
+ 		switch (class) {
+ 		case NV_CLASS_NULL:
+-			nv_wo32(dev, *gpuobj, 0, 0x00001030);
+-			nv_wo32(dev, *gpuobj, 1, 0xFFFFFFFF);
++			nv_wo32(*gpuobj, 0, 0x00001030);
++			nv_wo32(*gpuobj, 4, 0xFFFFFFFF);
+ 			break;
+ 		default:
+ 			if (dev_priv->card_type >= NV_40) {
+-				nv_wo32(dev, *gpuobj, 0, class);
++				nv_wo32(*gpuobj, 0, class);
+ #ifdef __BIG_ENDIAN
+-				nv_wo32(dev, *gpuobj, 2, 0x01000000);
++				nv_wo32(*gpuobj, 8, 0x01000000);
+ #endif
+ 			} else {
+ #ifdef __BIG_ENDIAN
+-				nv_wo32(dev, *gpuobj, 0, class | 0x00080000);
++				nv_wo32(*gpuobj, 0, class | 0x00080000);
+ #else
+-				nv_wo32(dev, *gpuobj, 0, class);
++				nv_wo32(*gpuobj, 0, class);
+ #endif
+ 			}
+ 		}
+@@ -873,10 +560,15 @@
+ 	gpuobj = kzalloc(sizeof(*gpuobj), GFP_KERNEL);
+ 	if (!gpuobj)
+ 		return -ENOMEM;
++	gpuobj->dev = chan->dev;
+ 	gpuobj->engine = NVOBJ_ENGINE_SW;
+ 	gpuobj->class = class;
++	kref_init(&gpuobj->refcount);
++	gpuobj->cinst = 0x40;
+ 
++	spin_lock(&dev_priv->ramin_lock);
+ 	list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list);
++	spin_unlock(&dev_priv->ramin_lock);
+ 	*gpuobj_ret = gpuobj;
+ 	return 0;
+ }
+@@ -886,7 +578,6 @@
+ {
+ 	struct drm_device *dev = chan->dev;
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_gpuobj *pramin = NULL;
+ 	uint32_t size;
+ 	uint32_t base;
+ 	int ret;
+@@ -911,18 +602,16 @@
+ 		size += 0x1000;
+ 	}
+ 
+-	ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0, size, 0x1000, 0,
+-				     &chan->ramin);
++	ret = nouveau_gpuobj_new(dev, NULL, size, 0x1000, 0, &chan->ramin);
+ 	if (ret) {
+ 		NV_ERROR(dev, "Error allocating channel PRAMIN: %d\n", ret);
+ 		return ret;
+ 	}
+-	pramin = chan->ramin->gpuobj;
+ 
+-	ret = drm_mm_init(&chan->ramin_heap, pramin->im_pramin->start + base, size);
++	ret = drm_mm_init(&chan->ramin_heap, base, size);
+ 	if (ret) {
+ 		NV_ERROR(dev, "Error creating PRAMIN heap: %d\n", ret);
+-		nouveau_gpuobj_ref_del(dev, &chan->ramin);
++		nouveau_gpuobj_ref(NULL, &chan->ramin);
+ 		return ret;
+ 	}
+ 
+@@ -939,8 +628,6 @@
+ 	struct nouveau_gpuobj *vram = NULL, *tt = NULL;
+ 	int ret, i;
+ 
+-	INIT_LIST_HEAD(&chan->ramht_refs);
+-
+ 	NV_DEBUG(dev, "ch%d vram=0x%08x tt=0x%08x\n", chan->id, vram_h, tt_h);
+ 
+ 	/* Allocate a chunk of memory for per-channel object storage */
+@@ -956,41 +643,38 @@
+ 	 *    locations determined during init.
+ 	 */
+ 	if (dev_priv->card_type >= NV_50) {
+-		uint32_t vm_offset, pde;
++		u32 pgd_offs = (dev_priv->chipset == 0x50) ? 0x1400 : 0x0200;
++		u64 vm_vinst = chan->ramin->vinst + pgd_offs;
++		u32 vm_pinst = chan->ramin->pinst;
++		u32 pde;
+ 
+-		vm_offset = (dev_priv->chipset & 0xf0) == 0x50 ? 0x1400 : 0x200;
+-		vm_offset += chan->ramin->gpuobj->im_pramin->start;
++		if (vm_pinst != ~0)
++			vm_pinst += pgd_offs;
+ 
+-		ret = nouveau_gpuobj_new_fake(dev, vm_offset, ~0, 0x4000,
+-							0, &chan->vm_pd, NULL);
++		ret = nouveau_gpuobj_new_fake(dev, vm_pinst, vm_vinst, 0x4000,
++					      0, &chan->vm_pd);
+ 		if (ret)
+ 			return ret;
+ 		for (i = 0; i < 0x4000; i += 8) {
+-			nv_wo32(dev, chan->vm_pd, (i+0)/4, 0x00000000);
+-			nv_wo32(dev, chan->vm_pd, (i+4)/4, 0xdeadcafe);
++			nv_wo32(chan->vm_pd, i + 0, 0x00000000);
++			nv_wo32(chan->vm_pd, i + 4, 0xdeadcafe);
+ 		}
+ 
+-		pde = (dev_priv->vm_gart_base / (512*1024*1024)) * 2;
+-		ret = nouveau_gpuobj_ref_add(dev, NULL, 0,
+-					     dev_priv->gart_info.sg_ctxdma,
+-					     &chan->vm_gart_pt);
+-		if (ret)
+-			return ret;
+-		nv_wo32(dev, chan->vm_pd, pde++,
+-			    chan->vm_gart_pt->instance | 0x03);
+-		nv_wo32(dev, chan->vm_pd, pde++, 0x00000000);
++		nouveau_gpuobj_ref(dev_priv->gart_info.sg_ctxdma,
++				   &chan->vm_gart_pt);
++		pde = (dev_priv->vm_gart_base / (512*1024*1024)) * 8;
++		nv_wo32(chan->vm_pd, pde + 0, chan->vm_gart_pt->vinst | 3);
++		nv_wo32(chan->vm_pd, pde + 4, 0x00000000);
+ 
+-		pde = (dev_priv->vm_vram_base / (512*1024*1024)) * 2;
++		pde = (dev_priv->vm_vram_base / (512*1024*1024)) * 8;
+ 		for (i = 0; i < dev_priv->vm_vram_pt_nr; i++) {
+-			ret = nouveau_gpuobj_ref_add(dev, NULL, 0,
+-						     dev_priv->vm_vram_pt[i],
+-						     &chan->vm_vram_pt[i]);
+-			if (ret)
+-				return ret;
++			nouveau_gpuobj_ref(dev_priv->vm_vram_pt[i],
++					   &chan->vm_vram_pt[i]);
+ 
+-			nv_wo32(dev, chan->vm_pd, pde++,
+-				    chan->vm_vram_pt[i]->instance | 0x61);
+-			nv_wo32(dev, chan->vm_pd, pde++, 0x00000000);
++			nv_wo32(chan->vm_pd, pde + 0,
++				chan->vm_vram_pt[i]->vinst | 0x61);
++			nv_wo32(chan->vm_pd, pde + 4, 0x00000000);
++			pde += 8;
+ 		}
+ 
+ 		instmem->flush(dev);
+@@ -998,15 +682,17 @@
+ 
+ 	/* RAMHT */
+ 	if (dev_priv->card_type < NV_50) {
+-		ret = nouveau_gpuobj_ref_add(dev, NULL, 0, dev_priv->ramht,
+-					     &chan->ramht);
++		nouveau_ramht_ref(dev_priv->ramht, &chan->ramht, NULL);
++	} else {
++		struct nouveau_gpuobj *ramht = NULL;
++
++		ret = nouveau_gpuobj_new(dev, chan, 0x8000, 16,
++					 NVOBJ_FLAG_ZERO_ALLOC, &ramht);
+ 		if (ret)
+ 			return ret;
+-	} else {
+-		ret = nouveau_gpuobj_new_ref(dev, chan, chan, 0,
+-					     0x8000, 16,
+-					     NVOBJ_FLAG_ZERO_ALLOC,
+-					     &chan->ramht);
++
++		ret = nouveau_ramht_new(dev, ramht, &chan->ramht);
++		nouveau_gpuobj_ref(NULL, &ramht);
+ 		if (ret)
+ 			return ret;
+ 	}
+@@ -1023,24 +709,32 @@
+ 		}
+ 	} else {
+ 		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
+-						0, dev_priv->fb_available_size,
+-						NV_DMA_ACCESS_RW,
+-						NV_DMA_TARGET_VIDMEM, &vram);
++					     0, dev_priv->fb_available_size,
++					     NV_DMA_ACCESS_RW,
++					     NV_DMA_TARGET_VIDMEM, &vram);
+ 		if (ret) {
+ 			NV_ERROR(dev, "Error creating VRAM ctxdma: %d\n", ret);
+ 			return ret;
+ 		}
+ 	}
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, chan, vram_h, vram, NULL);
++	ret = nouveau_ramht_insert(chan, vram_h, vram);
++	nouveau_gpuobj_ref(NULL, &vram);
+ 	if (ret) {
+-		NV_ERROR(dev, "Error referencing VRAM ctxdma: %d\n", ret);
++		NV_ERROR(dev, "Error adding VRAM ctxdma to RAMHT: %d\n", ret);
+ 		return ret;
+ 	}
+ 
+ 	/* TT memory ctxdma */
+ 	if (dev_priv->card_type >= NV_50) {
+-		tt = vram;
++		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
++					     0, dev_priv->vm_end,
++					     NV_DMA_ACCESS_RW,
++					     NV_DMA_TARGET_AGP, &tt);
++		if (ret) {
++			NV_ERROR(dev, "Error creating VRAM ctxdma: %d\n", ret);
++			return ret;
++		}
+ 	} else
+ 	if (dev_priv->gart_info.type != NOUVEAU_GART_NONE) {
+ 		ret = nouveau_gpuobj_gart_dma_new(chan, 0,
+@@ -1056,9 +750,10 @@
+ 		return ret;
+ 	}
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, chan, tt_h, tt, NULL);
++	ret = nouveau_ramht_insert(chan, tt_h, tt);
++	nouveau_gpuobj_ref(NULL, &tt);
+ 	if (ret) {
+-		NV_ERROR(dev, "Error referencing TT ctxdma: %d\n", ret);
++		NV_ERROR(dev, "Error adding TT ctxdma to RAMHT: %d\n", ret);
+ 		return ret;
+ 	}
+ 
+@@ -1070,33 +765,23 @@
+ {
+ 	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+ 	struct drm_device *dev = chan->dev;
+-	struct list_head *entry, *tmp;
+-	struct nouveau_gpuobj_ref *ref;
+ 	int i;
+ 
+ 	NV_DEBUG(dev, "ch%d\n", chan->id);
+ 
+-	if (!chan->ramht_refs.next)
++	if (!chan->ramht)
+ 		return;
+ 
+-	list_for_each_safe(entry, tmp, &chan->ramht_refs) {
+-		ref = list_entry(entry, struct nouveau_gpuobj_ref, list);
+-
+-		nouveau_gpuobj_ref_del(dev, &ref);
+-	}
+-
+-	nouveau_gpuobj_ref_del(dev, &chan->ramht);
++	nouveau_ramht_ref(NULL, &chan->ramht, chan);
+ 
+-	nouveau_gpuobj_del(dev, &chan->vm_pd);
+-	nouveau_gpuobj_ref_del(dev, &chan->vm_gart_pt);
++	nouveau_gpuobj_ref(NULL, &chan->vm_pd);
++	nouveau_gpuobj_ref(NULL, &chan->vm_gart_pt);
+ 	for (i = 0; i < dev_priv->vm_vram_pt_nr; i++)
+-		nouveau_gpuobj_ref_del(dev, &chan->vm_vram_pt[i]);
++		nouveau_gpuobj_ref(NULL, &chan->vm_vram_pt[i]);
+ 
+ 	if (chan->ramin_heap.free_stack.next)
+ 		drm_mm_takedown(&chan->ramin_heap);
+-	if (chan->ramin)
+-		nouveau_gpuobj_ref_del(dev, &chan->ramin);
+-
++	nouveau_gpuobj_ref(NULL, &chan->ramin);
+ }
+ 
+ int
+@@ -1117,17 +802,17 @@
+ 	}
+ 
+ 	list_for_each_entry(gpuobj, &dev_priv->gpuobj_list, list) {
+-		if (!gpuobj->im_backing || (gpuobj->flags & NVOBJ_FLAG_FAKE))
++		if (!gpuobj->im_backing)
+ 			continue;
+ 
+-		gpuobj->im_backing_suspend = vmalloc(gpuobj->im_pramin->size);
++		gpuobj->im_backing_suspend = vmalloc(gpuobj->size);
+ 		if (!gpuobj->im_backing_suspend) {
+ 			nouveau_gpuobj_resume(dev);
+ 			return -ENOMEM;
+ 		}
+ 
+-		for (i = 0; i < gpuobj->im_pramin->size / 4; i++)
+-			gpuobj->im_backing_suspend[i] = nv_ro32(dev, gpuobj, i);
++		for (i = 0; i < gpuobj->size; i += 4)
++			gpuobj->im_backing_suspend[i/4] = nv_ro32(gpuobj, i);
+ 	}
+ 
+ 	return 0;
+@@ -1172,8 +857,8 @@
+ 		if (!gpuobj->im_backing_suspend)
+ 			continue;
+ 
+-		for (i = 0; i < gpuobj->im_pramin->size / 4; i++)
+-			nv_wo32(dev, gpuobj, i, gpuobj->im_backing_suspend[i]);
++		for (i = 0; i < gpuobj->size; i += 4)
++			nv_wo32(gpuobj, i, gpuobj->im_backing_suspend[i/4]);
+ 		dev_priv->engine.instmem.flush(dev);
+ 	}
+ 
+@@ -1191,8 +876,6 @@
+ 	struct nouveau_channel *chan;
+ 	int ret;
+ 
+-	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(init->channel, file_priv, chan);
+-
+ 	if (init->handle == ~0)
+ 		return -EINVAL;
+ 
+@@ -1208,45 +891,96 @@
+ 		return -EPERM;
+ 	}
+ 
+-	if (nouveau_gpuobj_ref_find(chan, init->handle, NULL) == 0)
+-		return -EEXIST;
++	chan = nouveau_channel_get(dev, file_priv, init->channel);
++	if (IS_ERR(chan))
++		return PTR_ERR(chan);
++
++	if (nouveau_ramht_find(chan, init->handle)) {
++		ret = -EEXIST;
++		goto out;
++	}
+ 
+ 	if (!grc->software)
+ 		ret = nouveau_gpuobj_gr_new(chan, grc->id, &gr);
+ 	else
+ 		ret = nouveau_gpuobj_sw_new(chan, grc->id, &gr);
+-
+ 	if (ret) {
+ 		NV_ERROR(dev, "Error creating object: %d (%d/0x%08x)\n",
+ 			 ret, init->channel, init->handle);
+-		return ret;
++		goto out;
+ 	}
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, chan, init->handle, gr, NULL);
++	ret = nouveau_ramht_insert(chan, init->handle, gr);
++	nouveau_gpuobj_ref(NULL, &gr);
+ 	if (ret) {
+ 		NV_ERROR(dev, "Error referencing object: %d (%d/0x%08x)\n",
+ 			 ret, init->channel, init->handle);
+-		nouveau_gpuobj_del(dev, &gr);
+-		return ret;
+ 	}
+ 
+-	return 0;
++out:
++	nouveau_channel_put(&chan);
++	return ret;
+ }
+ 
+ int nouveau_ioctl_gpuobj_free(struct drm_device *dev, void *data,
+ 			      struct drm_file *file_priv)
+ {
+ 	struct drm_nouveau_gpuobj_free *objfree = data;
+-	struct nouveau_gpuobj_ref *ref;
+ 	struct nouveau_channel *chan;
+ 	int ret;
+ 
+-	NOUVEAU_GET_USER_CHANNEL_WITH_RETURN(objfree->channel, file_priv, chan);
++	chan = nouveau_channel_get(dev, file_priv, objfree->channel);
++	if (IS_ERR(chan))
++		return PTR_ERR(chan);
+ 
+-	ret = nouveau_gpuobj_ref_find(chan, objfree->handle, &ref);
+-	if (ret)
+-		return ret;
+-	nouveau_gpuobj_ref_del(dev, &ref);
++	ret = nouveau_ramht_remove(chan, objfree->handle);
++	nouveau_channel_put(&chan);
++	return ret;
++}
+ 
+-	return 0;
++u32
++nv_ro32(struct nouveau_gpuobj *gpuobj, u32 offset)
++{
++	struct drm_nouveau_private *dev_priv = gpuobj->dev->dev_private;
++	struct drm_device *dev = gpuobj->dev;
++
++	if (gpuobj->pinst == ~0 || !dev_priv->ramin_available) {
++		u64  ptr = gpuobj->vinst + offset;
++		u32 base = ptr >> 16;
++		u32  val;
++
++		spin_lock(&dev_priv->ramin_lock);
++		if (dev_priv->ramin_base != base) {
++			dev_priv->ramin_base = base;
++			nv_wr32(dev, 0x001700, dev_priv->ramin_base);
++		}
++		val = nv_rd32(dev, 0x700000 + (ptr & 0xffff));
++		spin_unlock(&dev_priv->ramin_lock);
++		return val;
++	}
++
++	return nv_ri32(dev, gpuobj->pinst + offset);
++}
++
++void
++nv_wo32(struct nouveau_gpuobj *gpuobj, u32 offset, u32 val)
++{
++	struct drm_nouveau_private *dev_priv = gpuobj->dev->dev_private;
++	struct drm_device *dev = gpuobj->dev;
++
++	if (gpuobj->pinst == ~0 || !dev_priv->ramin_available) {
++		u64  ptr = gpuobj->vinst + offset;
++		u32 base = ptr >> 16;
++
++		spin_lock(&dev_priv->ramin_lock);
++		if (dev_priv->ramin_base != base) {
++			dev_priv->ramin_base = base;
++			nv_wr32(dev, 0x001700, dev_priv->ramin_base);
++		}
++		nv_wr32(dev, 0x700000 + (ptr & 0xffff), val);
++		spin_unlock(&dev_priv->ramin_lock);
++		return;
++	}
++
++	nv_wi32(dev, gpuobj->pinst + offset, val);
+ }
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_perf.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_perf.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_perf.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_perf.c	2010-10-15 02:04:44.372992344 +0200
+@@ -0,0 +1,205 @@
++/*
++ * Copyright 2010 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: Ben Skeggs
++ */
++
++#include "drmP.h"
++
++#include "nouveau_drv.h"
++#include "nouveau_pm.h"
++
++static void
++legacy_perf_init(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nvbios *bios = &dev_priv->vbios;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	char *perf, *entry, *bmp = &bios->data[bios->offset];
++	int headerlen, use_straps;
++
++	if (bmp[5] < 0x5 || bmp[6] < 0x14) {
++		NV_DEBUG(dev, "BMP version too old for perf\n");
++		return;
++	}
++
++	perf = ROMPTR(bios, bmp[0x73]);
++	if (!perf) {
++		NV_DEBUG(dev, "No memclock table pointer found.\n");
++		return;
++	}
++
++	switch (perf[0]) {
++	case 0x12:
++	case 0x14:
++	case 0x18:
++		use_straps = 0;
++		headerlen = 1;
++		break;
++	case 0x01:
++		use_straps = perf[1] & 1;
++		headerlen = (use_straps ? 8 : 2);
++		break;
++	default:
++		NV_WARN(dev, "Unknown memclock table version %x.\n", perf[0]);
++		return;
++	}
++
++	entry = perf + headerlen;
++	if (use_straps)
++		entry += (nv_rd32(dev, NV_PEXTDEV_BOOT_0) & 0x3c) >> 1;
++
++	sprintf(pm->perflvl[0].name, "performance_level_0");
++	pm->perflvl[0].memory = ROM16(entry[0]) * 20;
++	pm->nr_perflvl = 1;
++}
++
++void
++nouveau_perf_init(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct nvbios *bios = &dev_priv->vbios;
++	struct bit_entry P;
++	u8 version, headerlen, recordlen, entries;
++	u8 *perf, *entry;
++	int vid, i;
++
++	if (bios->type == NVBIOS_BIT) {
++		if (bit_table(dev, 'P', &P))
++			return;
++
++		if (P.version != 1 && P.version != 2) {
++			NV_WARN(dev, "unknown perf for BIT P %d\n", P.version);
++			return;
++		}
++
++		perf = ROMPTR(bios, P.data[0]);
++		version   = perf[0];
++		headerlen = perf[1];
++		if (version < 0x40) {
++			recordlen = perf[3] + (perf[4] * perf[5]);
++			entries   = perf[2];
++		} else {
++			recordlen = perf[2] + (perf[3] * perf[4]);
++			entries   = perf[5];
++		}
++	} else {
++		if (bios->data[bios->offset + 6] < 0x25) {
++			legacy_perf_init(dev);
++			return;
++		}
++
++		perf = ROMPTR(bios, bios->data[bios->offset + 0x94]);
++		if (!perf) {
++			NV_DEBUG(dev, "perf table pointer invalid\n");
++			return;
++		}
++
++		version   = perf[1];
++		headerlen = perf[0];
++		recordlen = perf[3];
++		entries   = perf[2];
++	}
++
++	entry = perf + headerlen;
++	for (i = 0; i < entries; i++) {
++		struct nouveau_pm_level *perflvl = &pm->perflvl[pm->nr_perflvl];
++
++		if (entry[0] == 0xff) {
++			entry += recordlen;
++			continue;
++		}
++
++		switch (version) {
++		case 0x12:
++		case 0x13:
++		case 0x15:
++			perflvl->fanspeed = entry[55];
++			perflvl->voltage = entry[56];
++			perflvl->core = ROM32(entry[1]) * 10;
++			perflvl->memory = ROM32(entry[5]) * 20;
++			break;
++		case 0x21:
++		case 0x23:
++		case 0x24:
++			perflvl->fanspeed = entry[4];
++			perflvl->voltage = entry[5];
++			perflvl->core = ROM16(entry[6]) * 1000;
++
++			if (dev_priv->chipset == 0x49 ||
++			    dev_priv->chipset == 0x4b)
++				perflvl->memory = ROM16(entry[11]) * 1000;
++			else
++				perflvl->memory = ROM16(entry[11]) * 2000;
++
++			break;
++		case 0x25:
++			perflvl->fanspeed = entry[4];
++			perflvl->voltage = entry[5];
++			perflvl->core = ROM16(entry[6]) * 1000;
++			perflvl->shader = ROM16(entry[10]) * 1000;
++			perflvl->memory = ROM16(entry[12]) * 1000;
++			break;
++		case 0x30:
++			perflvl->memscript = ROM16(entry[2]);
++		case 0x35:
++			perflvl->fanspeed = entry[6];
++			perflvl->voltage = entry[7];
++			perflvl->core = ROM16(entry[8]) * 1000;
++			perflvl->shader = ROM16(entry[10]) * 1000;
++			perflvl->memory = ROM16(entry[12]) * 1000;
++			/*XXX: confirm on 0x35 */
++			perflvl->unk05 = ROM16(entry[16]) * 1000;
++			break;
++		case 0x40:
++#define subent(n) entry[perf[2] + ((n) * perf[3])]
++			perflvl->fanspeed = 0; /*XXX*/
++			perflvl->voltage = entry[2];
++			perflvl->core = (ROM16(subent(0)) & 0xfff) * 1000;
++			perflvl->shader = (ROM16(subent(1)) & 0xfff) * 1000;
++			perflvl->memory = (ROM16(subent(2)) & 0xfff) * 1000;
++			break;
++		}
++
++		/* make sure vid is valid */
++		if (pm->voltage.supported && perflvl->voltage) {
++			vid = nouveau_volt_vid_lookup(dev, perflvl->voltage);
++			if (vid < 0) {
++				NV_DEBUG(dev, "drop perflvl %d, bad vid\n", i);
++				entry += recordlen;
++				continue;
++			}
++		}
++
++		snprintf(perflvl->name, sizeof(perflvl->name),
++			 "performance_level_%d", i);
++		perflvl->id = i;
++		pm->nr_perflvl++;
++
++		entry += recordlen;
++	}
++}
++
++void
++nouveau_perf_fini(struct drm_device *dev)
++{
++}
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_pm.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_pm.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_pm.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_pm.c	2010-10-15 02:04:44.377992406 +0200
+@@ -0,0 +1,548 @@
++/*
++ * Copyright 2010 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: Ben Skeggs
++ */
++
++#include "drmP.h"
++
++#include "nouveau_drv.h"
++#include "nouveau_pm.h"
++
++#ifdef CONFIG_ACPI
++#include <linux/acpi.h>
++#endif
++#include <linux/power_supply.h>
++#include <linux/hwmon.h>
++#include <linux/hwmon-sysfs.h>
++
++static int
++nouveau_pm_clock_set(struct drm_device *dev, struct nouveau_pm_level *perflvl,
++		     u8 id, u32 khz)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	void *pre_state;
++
++	if (khz == 0)
++		return 0;
++
++	pre_state = pm->clock_pre(dev, perflvl, id, khz);
++	if (IS_ERR(pre_state))
++		return PTR_ERR(pre_state);
++
++	if (pre_state)
++		pm->clock_set(dev, pre_state);
++	return 0;
++}
++
++static int
++nouveau_pm_perflvl_set(struct drm_device *dev, struct nouveau_pm_level *perflvl)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	int ret;
++
++	if (perflvl == pm->cur)
++		return 0;
++
++	if (pm->voltage.supported && pm->voltage_set && perflvl->voltage) {
++		ret = pm->voltage_set(dev, perflvl->voltage);
++		if (ret) {
++			NV_ERROR(dev, "voltage_set %d failed: %d\n",
++				 perflvl->voltage, ret);
++		}
++	}
++
++	nouveau_pm_clock_set(dev, perflvl, PLL_CORE, perflvl->core);
++	nouveau_pm_clock_set(dev, perflvl, PLL_SHADER, perflvl->shader);
++	nouveau_pm_clock_set(dev, perflvl, PLL_MEMORY, perflvl->memory);
++	nouveau_pm_clock_set(dev, perflvl, PLL_UNK05, perflvl->unk05);
++
++	pm->cur = perflvl;
++	return 0;
++}
++
++static int
++nouveau_pm_profile_set(struct drm_device *dev, const char *profile)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct nouveau_pm_level *perflvl = NULL;
++
++	/* safety precaution, for now */
++	if (nouveau_perflvl_wr != 7777)
++		return -EPERM;
++
++	if (!pm->clock_set)
++		return -EINVAL;
++
++	if (!strncmp(profile, "boot", 4))
++		perflvl = &pm->boot;
++	else {
++		int pl = simple_strtol(profile, NULL, 10);
++		int i;
++
++		for (i = 0; i < pm->nr_perflvl; i++) {
++			if (pm->perflvl[i].id == pl) {
++				perflvl = &pm->perflvl[i];
++				break;
++			}
++		}
++
++		if (!perflvl)
++			return -EINVAL;
++	}
++
++	NV_INFO(dev, "setting performance level: %s\n", profile);
++	return nouveau_pm_perflvl_set(dev, perflvl);
++}
++
++static int
++nouveau_pm_perflvl_get(struct drm_device *dev, struct nouveau_pm_level *perflvl)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	int ret;
++
++	if (!pm->clock_get)
++		return -EINVAL;
++
++	memset(perflvl, 0, sizeof(*perflvl));
++
++	ret = pm->clock_get(dev, PLL_CORE);
++	if (ret > 0)
++		perflvl->core = ret;
++
++	ret = pm->clock_get(dev, PLL_MEMORY);
++	if (ret > 0)
++		perflvl->memory = ret;
++
++	ret = pm->clock_get(dev, PLL_SHADER);
++	if (ret > 0)
++		perflvl->shader = ret;
++
++	ret = pm->clock_get(dev, PLL_UNK05);
++	if (ret > 0)
++		perflvl->unk05 = ret;
++
++	if (pm->voltage.supported && pm->voltage_get) {
++		ret = pm->voltage_get(dev);
++		if (ret > 0)
++			perflvl->voltage = ret;
++	}
++
++	return 0;
++}
++
++static void
++nouveau_pm_perflvl_info(struct nouveau_pm_level *perflvl, char *ptr, int len)
++{
++	char c[16], s[16], v[16], f[16];
++
++	c[0] = '\0';
++	if (perflvl->core)
++		snprintf(c, sizeof(c), " core %dMHz", perflvl->core / 1000);
++
++	s[0] = '\0';
++	if (perflvl->shader)
++		snprintf(s, sizeof(s), " shader %dMHz", perflvl->shader / 1000);
++
++	v[0] = '\0';
++	if (perflvl->voltage)
++		snprintf(v, sizeof(v), " voltage %dmV", perflvl->voltage * 10);
++
++	f[0] = '\0';
++	if (perflvl->fanspeed)
++		snprintf(f, sizeof(f), " fanspeed %d%%", perflvl->fanspeed);
++
++	snprintf(ptr, len, "memory %dMHz%s%s%s%s\n", perflvl->memory / 1000,
++		 c, s, v, f);
++}
++
++static ssize_t
++nouveau_pm_get_perflvl_info(struct device *d,
++			    struct device_attribute *a, char *buf)
++{
++	struct nouveau_pm_level *perflvl = (struct nouveau_pm_level *)a;
++	char *ptr = buf;
++	int len = PAGE_SIZE;
++
++	snprintf(ptr, len, "%d: ", perflvl->id);
++	ptr += strlen(buf);
++	len -= strlen(buf);
++
++	nouveau_pm_perflvl_info(perflvl, ptr, len);
++	return strlen(buf);
++}
++
++static ssize_t
++nouveau_pm_get_perflvl(struct device *d, struct device_attribute *a, char *buf)
++{
++	struct drm_device *dev = pci_get_drvdata(to_pci_dev(d));
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct nouveau_pm_level cur;
++	int len = PAGE_SIZE, ret;
++	char *ptr = buf;
++
++	if (!pm->cur)
++		snprintf(ptr, len, "setting: boot\n");
++	else if (pm->cur == &pm->boot)
++		snprintf(ptr, len, "setting: boot\nc: ");
++	else
++		snprintf(ptr, len, "setting: static %d\nc: ", pm->cur->id);
++	ptr += strlen(buf);
++	len -= strlen(buf);
++
++	ret = nouveau_pm_perflvl_get(dev, &cur);
++	if (ret == 0)
++		nouveau_pm_perflvl_info(&cur, ptr, len);
++	return strlen(buf);
++}
++
++static ssize_t
++nouveau_pm_set_perflvl(struct device *d, struct device_attribute *a,
++		       const char *buf, size_t count)
++{
++	struct drm_device *dev = pci_get_drvdata(to_pci_dev(d));
++	int ret;
++
++	ret = nouveau_pm_profile_set(dev, buf);
++	if (ret)
++		return ret;
++	return strlen(buf);
++}
++
++static DEVICE_ATTR(performance_level, S_IRUGO | S_IWUSR,
++		   nouveau_pm_get_perflvl, nouveau_pm_set_perflvl);
++
++static int
++nouveau_sysfs_init(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct device *d = &dev->pdev->dev;
++	int ret, i;
++
++	ret = device_create_file(d, &dev_attr_performance_level);
++	if (ret)
++		return ret;
++
++	for (i = 0; i < pm->nr_perflvl; i++) {
++		struct nouveau_pm_level *perflvl = &pm->perflvl[i];
++
++		perflvl->dev_attr.attr.name = perflvl->name;
++		perflvl->dev_attr.attr.mode = S_IRUGO;
++		perflvl->dev_attr.show = nouveau_pm_get_perflvl_info;
++		perflvl->dev_attr.store = NULL;
++		sysfs_attr_init(&perflvl->dev_attr.attr);
++
++		ret = device_create_file(d, &perflvl->dev_attr);
++		if (ret) {
++			NV_ERROR(dev, "failed pervlvl %d sysfs: %d\n",
++				 perflvl->id, i);
++			perflvl->dev_attr.attr.name = NULL;
++			nouveau_pm_fini(dev);
++			return ret;
++		}
++	}
++
++	return 0;
++}
++
++static void
++nouveau_sysfs_fini(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct device *d = &dev->pdev->dev;
++	int i;
++
++	device_remove_file(d, &dev_attr_performance_level);
++	for (i = 0; i < pm->nr_perflvl; i++) {
++		struct nouveau_pm_level *pl = &pm->perflvl[i];
++
++		if (!pl->dev_attr.attr.name)
++			break;
++
++		device_remove_file(d, &pl->dev_attr);
++	}
++}
++
++static ssize_t
++nouveau_hwmon_show_temp(struct device *d, struct device_attribute *a, char *buf)
++{
++	struct drm_device *dev = dev_get_drvdata(d);
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++
++	return snprintf(buf, PAGE_SIZE, "%d\n", pm->temp_get(dev)*1000);
++}
++static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, nouveau_hwmon_show_temp,
++						  NULL, 0);
++
++static ssize_t
++nouveau_hwmon_max_temp(struct device *d, struct device_attribute *a, char *buf)
++{
++	struct drm_device *dev = dev_get_drvdata(d);
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct nouveau_pm_threshold_temp *temp = &pm->threshold_temp;
++
++	return snprintf(buf, PAGE_SIZE, "%d\n", temp->down_clock*1000);
++}
++static ssize_t
++nouveau_hwmon_set_max_temp(struct device *d, struct device_attribute *a,
++						const char *buf, size_t count)
++{
++	struct drm_device *dev = dev_get_drvdata(d);
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct nouveau_pm_threshold_temp *temp = &pm->threshold_temp;
++	long value;
++
++	if (strict_strtol(buf, 10, &value) == -EINVAL)
++		return count;
++
++	temp->down_clock = value/1000;
++
++	nouveau_temp_safety_checks(dev);
++
++	return count;
++}
++static SENSOR_DEVICE_ATTR(temp1_max, S_IRUGO | S_IWUSR, nouveau_hwmon_max_temp,
++						  nouveau_hwmon_set_max_temp,
++						  0);
++
++static ssize_t
++nouveau_hwmon_critical_temp(struct device *d, struct device_attribute *a,
++							char *buf)
++{
++	struct drm_device *dev = dev_get_drvdata(d);
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct nouveau_pm_threshold_temp *temp = &pm->threshold_temp;
++
++	return snprintf(buf, PAGE_SIZE, "%d\n", temp->critical*1000);
++}
++static ssize_t
++nouveau_hwmon_set_critical_temp(struct device *d, struct device_attribute *a,
++							    const char *buf,
++								size_t count)
++{
++	struct drm_device *dev = dev_get_drvdata(d);
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct nouveau_pm_threshold_temp *temp = &pm->threshold_temp;
++	long value;
++
++	if (strict_strtol(buf, 10, &value) == -EINVAL)
++		return count;
++
++	temp->critical = value/1000;
++
++	nouveau_temp_safety_checks(dev);
++
++	return count;
++}
++static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO | S_IWUSR,
++						nouveau_hwmon_critical_temp,
++						nouveau_hwmon_set_critical_temp,
++						0);
++
++static ssize_t nouveau_hwmon_show_name(struct device *dev,
++				      struct device_attribute *attr,
++				      char *buf)
++{
++	return sprintf(buf, "nouveau\n");
++}
++static SENSOR_DEVICE_ATTR(name, S_IRUGO, nouveau_hwmon_show_name, NULL, 0);
++
++static ssize_t nouveau_hwmon_show_update_rate(struct device *dev,
++				      struct device_attribute *attr,
++				      char *buf)
++{
++	return sprintf(buf, "1000\n");
++}
++static SENSOR_DEVICE_ATTR(update_rate, S_IRUGO,
++						nouveau_hwmon_show_update_rate,
++						NULL, 0);
++
++static struct attribute *hwmon_attributes[] = {
++	&sensor_dev_attr_temp1_input.dev_attr.attr,
++	&sensor_dev_attr_temp1_max.dev_attr.attr,
++	&sensor_dev_attr_temp1_crit.dev_attr.attr,
++	&sensor_dev_attr_name.dev_attr.attr,
++	&sensor_dev_attr_update_rate.dev_attr.attr,
++	NULL
++};
++
++static const struct attribute_group hwmon_attrgroup = {
++	.attrs = hwmon_attributes,
++};
++
++static int
++nouveau_hwmon_init(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct device *hwmon_dev;
++	int ret;
++
++	if (!pm->temp_get)
++		return -ENODEV;
++
++	hwmon_dev = hwmon_device_register(&dev->pdev->dev);
++	if (IS_ERR(hwmon_dev)) {
++		ret = PTR_ERR(hwmon_dev);
++		NV_ERROR(dev,
++			"Unable to register hwmon device: %d\n", ret);
++		return ret;
++	}
++	dev_set_drvdata(hwmon_dev, dev);
++	ret = sysfs_create_group(&hwmon_dev->kobj,
++					&hwmon_attrgroup);
++	if (ret) {
++		NV_ERROR(dev,
++			"Unable to create hwmon sysfs file: %d\n", ret);
++		hwmon_device_unregister(hwmon_dev);
++		return ret;
++	}
++
++	pm->hwmon = hwmon_dev;
++
++	return 0;
++}
++
++static void
++nouveau_hwmon_fini(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++
++	if (pm->hwmon) {
++		sysfs_remove_group(&pm->hwmon->kobj, &hwmon_attrgroup);
++		hwmon_device_unregister(pm->hwmon);
++	}
++}
++
++#ifdef CONFIG_ACPI
++static int
++nouveau_pm_acpi_event(struct notifier_block *nb, unsigned long val, void *data)
++{
++	struct drm_nouveau_private *dev_priv =
++		container_of(nb, struct drm_nouveau_private, engine.pm.acpi_nb);
++	struct drm_device *dev = dev_priv->dev;
++	struct acpi_bus_event *entry = (struct acpi_bus_event *)data;
++
++	if (strcmp(entry->device_class, "ac_adapter") == 0) {
++		bool ac = power_supply_is_system_supplied();
++
++		NV_DEBUG(dev, "power supply changed: %s\n", ac ? "AC" : "DC");
++	}
++
++	return NOTIFY_OK;
++}
++#endif
++
++int
++nouveau_pm_init(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	char info[256];
++	int ret, i;
++
++	nouveau_volt_init(dev);
++	nouveau_perf_init(dev);
++	nouveau_temp_init(dev);
++	nouveau_mem_timing_init(dev);
++
++	NV_INFO(dev, "%d available performance level(s)\n", pm->nr_perflvl);
++	for (i = 0; i < pm->nr_perflvl; i++) {
++		nouveau_pm_perflvl_info(&pm->perflvl[i], info, sizeof(info));
++		NV_INFO(dev, "%d: %s", pm->perflvl[i].id, info);
++	}
++
++	/* determine current ("boot") performance level */
++	ret = nouveau_pm_perflvl_get(dev, &pm->boot);
++	if (ret == 0) {
++		pm->cur = &pm->boot;
++
++		nouveau_pm_perflvl_info(&pm->boot, info, sizeof(info));
++		NV_INFO(dev, "c: %s", info);
++	}
++
++	/* switch performance levels now if requested */
++	if (nouveau_perflvl != NULL) {
++		ret = nouveau_pm_profile_set(dev, nouveau_perflvl);
++		if (ret) {
++			NV_ERROR(dev, "error setting perflvl \"%s\": %d\n",
++				 nouveau_perflvl, ret);
++		}
++	}
++
++	nouveau_sysfs_init(dev);
++	nouveau_hwmon_init(dev);
++#ifdef CONFIG_ACPI
++	pm->acpi_nb.notifier_call = nouveau_pm_acpi_event;
++	register_acpi_notifier(&pm->acpi_nb);
++#endif
++
++	return 0;
++}
++
++void
++nouveau_pm_fini(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++
++	if (pm->cur != &pm->boot)
++		nouveau_pm_perflvl_set(dev, &pm->boot);
++
++	nouveau_mem_timing_fini(dev);
++	nouveau_temp_fini(dev);
++	nouveau_perf_fini(dev);
++	nouveau_volt_fini(dev);
++
++#ifdef CONFIG_ACPI
++	unregister_acpi_notifier(&pm->acpi_nb);
++#endif
++	nouveau_hwmon_fini(dev);
++	nouveau_sysfs_fini(dev);
++}
++
++void
++nouveau_pm_resume(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct nouveau_pm_level *perflvl;
++
++	if (pm->cur == &pm->boot)
++		return;
++
++	perflvl = pm->cur;
++	pm->cur = &pm->boot;
++	nouveau_pm_perflvl_set(dev, perflvl);
++}
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_pm.h linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_pm.h
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_pm.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_pm.h	2010-10-15 02:04:44.379992432 +0200
+@@ -0,0 +1,74 @@
++/*
++ * Copyright 2010 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: Ben Skeggs
++ */
++
++#ifndef __NOUVEAU_PM_H__
++#define __NOUVEAU_PM_H__
++
++/* nouveau_pm.c */
++int  nouveau_pm_init(struct drm_device *dev);
++void nouveau_pm_fini(struct drm_device *dev);
++void nouveau_pm_resume(struct drm_device *dev);
++
++/* nouveau_volt.c */
++void nouveau_volt_init(struct drm_device *);
++void nouveau_volt_fini(struct drm_device *);
++int  nouveau_volt_vid_lookup(struct drm_device *, int voltage);
++int  nouveau_volt_lvl_lookup(struct drm_device *, int vid);
++int  nouveau_voltage_gpio_get(struct drm_device *);
++int  nouveau_voltage_gpio_set(struct drm_device *, int voltage);
++
++/* nouveau_perf.c */
++void nouveau_perf_init(struct drm_device *);
++void nouveau_perf_fini(struct drm_device *);
++
++/* nouveau_mem.c */
++void nouveau_mem_timing_init(struct drm_device *);
++void nouveau_mem_timing_fini(struct drm_device *);
++
++/* nv04_pm.c */
++int nv04_pm_clock_get(struct drm_device *, u32 id);
++void *nv04_pm_clock_pre(struct drm_device *, struct nouveau_pm_level *,
++			u32 id, int khz);
++void nv04_pm_clock_set(struct drm_device *, void *);
++
++/* nv50_pm.c */
++int nv50_pm_clock_get(struct drm_device *, u32 id);
++void *nv50_pm_clock_pre(struct drm_device *, struct nouveau_pm_level *,
++			u32 id, int khz);
++void nv50_pm_clock_set(struct drm_device *, void *);
++
++/* nva3_pm.c */
++int nva3_pm_clock_get(struct drm_device *, u32 id);
++void *nva3_pm_clock_pre(struct drm_device *, struct nouveau_pm_level *,
++			u32 id, int khz);
++void nva3_pm_clock_set(struct drm_device *, void *);
++
++/* nouveau_temp.c */
++void nouveau_temp_init(struct drm_device *dev);
++void nouveau_temp_fini(struct drm_device *dev);
++void nouveau_temp_safety_checks(struct drm_device *dev);
++int nv40_temp_get(struct drm_device *dev);
++int nv84_temp_get(struct drm_device *dev);
++
++#endif
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_ramht.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_ramht.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_ramht.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_ramht.c	2010-10-15 02:04:44.381992457 +0200
+@@ -0,0 +1,307 @@
++/*
++ * Copyright 2010 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: Ben Skeggs
++ */
++
++#include "drmP.h"
++
++#include "nouveau_drv.h"
++#include "nouveau_ramht.h"
++
++static u32
++nouveau_ramht_hash_handle(struct nouveau_channel *chan, u32 handle)
++{
++	struct drm_device *dev = chan->dev;
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_ramht *ramht = chan->ramht;
++	u32 hash = 0;
++	int i;
++
++	NV_DEBUG(dev, "ch%d handle=0x%08x\n", chan->id, handle);
++
++	for (i = 32; i > 0; i -= ramht->bits) {
++		hash ^= (handle & ((1 << ramht->bits) - 1));
++		handle >>= ramht->bits;
++	}
++
++	if (dev_priv->card_type < NV_50)
++		hash ^= chan->id << (ramht->bits - 4);
++	hash <<= 3;
++
++	NV_DEBUG(dev, "hash=0x%08x\n", hash);
++	return hash;
++}
++
++static int
++nouveau_ramht_entry_valid(struct drm_device *dev, struct nouveau_gpuobj *ramht,
++			  u32 offset)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	u32 ctx = nv_ro32(ramht, offset + 4);
++
++	if (dev_priv->card_type < NV_40)
++		return ((ctx & NV_RAMHT_CONTEXT_VALID) != 0);
++	return (ctx != 0);
++}
++
++static int
++nouveau_ramht_entry_same_channel(struct nouveau_channel *chan,
++				 struct nouveau_gpuobj *ramht, u32 offset)
++{
++	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
++	u32 ctx = nv_ro32(ramht, offset + 4);
++
++	if (dev_priv->card_type >= NV_50)
++		return true;
++	else if (dev_priv->card_type >= NV_40)
++		return chan->id ==
++			((ctx >> NV40_RAMHT_CONTEXT_CHANNEL_SHIFT) & 0x1f);
++	else
++		return chan->id ==
++			((ctx >> NV_RAMHT_CONTEXT_CHANNEL_SHIFT) & 0x1f);
++}
++
++int
++nouveau_ramht_insert(struct nouveau_channel *chan, u32 handle,
++		     struct nouveau_gpuobj *gpuobj)
++{
++	struct drm_device *dev = chan->dev;
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_instmem_engine *instmem = &dev_priv->engine.instmem;
++	struct nouveau_ramht_entry *entry;
++	struct nouveau_gpuobj *ramht = chan->ramht->gpuobj;
++	unsigned long flags;
++	u32 ctx, co, ho;
++
++	if (nouveau_ramht_find(chan, handle))
++		return -EEXIST;
++
++	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
++	if (!entry)
++		return -ENOMEM;
++	entry->channel = chan;
++	entry->gpuobj = NULL;
++	entry->handle = handle;
++	nouveau_gpuobj_ref(gpuobj, &entry->gpuobj);
++
++	if (dev_priv->card_type < NV_40) {
++		ctx = NV_RAMHT_CONTEXT_VALID | (gpuobj->cinst >> 4) |
++		      (chan->id << NV_RAMHT_CONTEXT_CHANNEL_SHIFT) |
++		      (gpuobj->engine << NV_RAMHT_CONTEXT_ENGINE_SHIFT);
++	} else
++	if (dev_priv->card_type < NV_50) {
++		ctx = (gpuobj->cinst >> 4) |
++		      (chan->id << NV40_RAMHT_CONTEXT_CHANNEL_SHIFT) |
++		      (gpuobj->engine << NV40_RAMHT_CONTEXT_ENGINE_SHIFT);
++	} else {
++		if (gpuobj->engine == NVOBJ_ENGINE_DISPLAY) {
++			ctx = (gpuobj->cinst << 10) | 2;
++		} else {
++			ctx = (gpuobj->cinst >> 4) |
++			      ((gpuobj->engine <<
++				NV40_RAMHT_CONTEXT_ENGINE_SHIFT));
++		}
++	}
++
++	spin_lock_irqsave(&chan->ramht->lock, flags);
++	list_add(&entry->head, &chan->ramht->entries);
++
++	co = ho = nouveau_ramht_hash_handle(chan, handle);
++	do {
++		if (!nouveau_ramht_entry_valid(dev, ramht, co)) {
++			NV_DEBUG(dev,
++				 "insert ch%d 0x%08x: h=0x%08x, c=0x%08x\n",
++				 chan->id, co, handle, ctx);
++			nv_wo32(ramht, co + 0, handle);
++			nv_wo32(ramht, co + 4, ctx);
++
++			spin_unlock_irqrestore(&chan->ramht->lock, flags);
++			instmem->flush(dev);
++			return 0;
++		}
++		NV_DEBUG(dev, "collision ch%d 0x%08x: h=0x%08x\n",
++			 chan->id, co, nv_ro32(ramht, co));
++
++		co += 8;
++		if (co >= ramht->size)
++			co = 0;
++	} while (co != ho);
++
++	NV_ERROR(dev, "RAMHT space exhausted. ch=%d\n", chan->id);
++	list_del(&entry->head);
++	spin_unlock_irqrestore(&chan->ramht->lock, flags);
++	kfree(entry);
++	return -ENOMEM;
++}
++
++static struct nouveau_ramht_entry *
++nouveau_ramht_remove_entry(struct nouveau_channel *chan, u32 handle)
++{
++	struct nouveau_ramht *ramht = chan ? chan->ramht : NULL;
++	struct nouveau_ramht_entry *entry;
++	unsigned long flags;
++
++	if (!ramht)
++		return NULL;
++
++	spin_lock_irqsave(&ramht->lock, flags);
++	list_for_each_entry(entry, &ramht->entries, head) {
++		if (entry->channel == chan &&
++		    (!handle || entry->handle == handle)) {
++			list_del(&entry->head);
++			spin_unlock_irqrestore(&ramht->lock, flags);
++
++			return entry;
++		}
++	}
++	spin_unlock_irqrestore(&ramht->lock, flags);
++
++	return NULL;
++}
++
++static void
++nouveau_ramht_remove_hash(struct nouveau_channel *chan, u32 handle)
++{
++	struct drm_device *dev = chan->dev;
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_instmem_engine *instmem = &dev_priv->engine.instmem;
++	struct nouveau_gpuobj *ramht = chan->ramht->gpuobj;
++	unsigned long flags;
++	u32 co, ho;
++
++	spin_lock_irqsave(&chan->ramht->lock, flags);
++	co = ho = nouveau_ramht_hash_handle(chan, handle);
++	do {
++		if (nouveau_ramht_entry_valid(dev, ramht, co) &&
++		    nouveau_ramht_entry_same_channel(chan, ramht, co) &&
++		    (handle == nv_ro32(ramht, co))) {
++			NV_DEBUG(dev,
++				 "remove ch%d 0x%08x: h=0x%08x, c=0x%08x\n",
++				 chan->id, co, handle, nv_ro32(ramht, co + 4));
++			nv_wo32(ramht, co + 0, 0x00000000);
++			nv_wo32(ramht, co + 4, 0x00000000);
++			instmem->flush(dev);
++			goto out;
++		}
++
++		co += 8;
++		if (co >= ramht->size)
++			co = 0;
++	} while (co != ho);
++
++	NV_ERROR(dev, "RAMHT entry not found. ch=%d, handle=0x%08x\n",
++		 chan->id, handle);
++out:
++	spin_unlock_irqrestore(&chan->ramht->lock, flags);
++}
++
++int
++nouveau_ramht_remove(struct nouveau_channel *chan, u32 handle)
++{
++	struct nouveau_ramht_entry *entry;
++
++	entry = nouveau_ramht_remove_entry(chan, handle);
++	if (!entry)
++		return -ENOENT;
++
++	nouveau_ramht_remove_hash(chan, entry->handle);
++	nouveau_gpuobj_ref(NULL, &entry->gpuobj);
++	kfree(entry);
++	return 0;
++}
++
++struct nouveau_gpuobj *
++nouveau_ramht_find(struct nouveau_channel *chan, u32 handle)
++{
++	struct nouveau_ramht *ramht = chan->ramht;
++	struct nouveau_ramht_entry *entry;
++	struct nouveau_gpuobj *gpuobj = NULL;
++	unsigned long flags;
++
++	if (unlikely(!chan->ramht))
++		return NULL;
++
++	spin_lock_irqsave(&ramht->lock, flags);
++	list_for_each_entry(entry, &chan->ramht->entries, head) {
++		if (entry->channel == chan && entry->handle == handle) {
++			gpuobj = entry->gpuobj;
++			break;
++		}
++	}
++	spin_unlock_irqrestore(&ramht->lock, flags);
++
++	return gpuobj;
++}
++
++int
++nouveau_ramht_new(struct drm_device *dev, struct nouveau_gpuobj *gpuobj,
++		  struct nouveau_ramht **pramht)
++{
++	struct nouveau_ramht *ramht;
++
++	ramht = kzalloc(sizeof(*ramht), GFP_KERNEL);
++	if (!ramht)
++		return -ENOMEM;
++
++	ramht->dev = dev;
++	kref_init(&ramht->refcount);
++	ramht->bits = drm_order(gpuobj->size / 8);
++	INIT_LIST_HEAD(&ramht->entries);
++	spin_lock_init(&ramht->lock);
++	nouveau_gpuobj_ref(gpuobj, &ramht->gpuobj);
++
++	*pramht = ramht;
++	return 0;
++}
++
++static void
++nouveau_ramht_del(struct kref *ref)
++{
++	struct nouveau_ramht *ramht =
++		container_of(ref, struct nouveau_ramht, refcount);
++
++	nouveau_gpuobj_ref(NULL, &ramht->gpuobj);
++	kfree(ramht);
++}
++
++void
++nouveau_ramht_ref(struct nouveau_ramht *ref, struct nouveau_ramht **ptr,
++		  struct nouveau_channel *chan)
++{
++	struct nouveau_ramht_entry *entry;
++	struct nouveau_ramht *ramht;
++
++	if (ref)
++		kref_get(&ref->refcount);
++
++	ramht = *ptr;
++	if (ramht) {
++		while ((entry = nouveau_ramht_remove_entry(chan, 0))) {
++			nouveau_ramht_remove_hash(chan, entry->handle);
++			nouveau_gpuobj_ref(NULL, &entry->gpuobj);
++			kfree(entry);
++		}
++
++		kref_put(&ramht->refcount, nouveau_ramht_del);
++	}
++	*ptr = ref;
++}
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_ramht.h linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_ramht.h
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_ramht.h	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_ramht.h	2010-10-15 02:04:44.382992469 +0200
+@@ -0,0 +1,55 @@
++/*
++ * Copyright 2010 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: Ben Skeggs
++ */
++
++#ifndef __NOUVEAU_RAMHT_H__
++#define __NOUVEAU_RAMHT_H__
++
++struct nouveau_ramht_entry {
++	struct list_head head;
++	struct nouveau_channel *channel;
++	struct nouveau_gpuobj *gpuobj;
++	u32 handle;
++};
++
++struct nouveau_ramht {
++	struct drm_device *dev;
++	struct kref refcount;
++	spinlock_t lock;
++	struct nouveau_gpuobj *gpuobj;
++	struct list_head entries;
++	int bits;
++};
++
++extern int  nouveau_ramht_new(struct drm_device *, struct nouveau_gpuobj *,
++			      struct nouveau_ramht **);
++extern void nouveau_ramht_ref(struct nouveau_ramht *, struct nouveau_ramht **,
++			      struct nouveau_channel *unref_channel);
++
++extern int  nouveau_ramht_insert(struct nouveau_channel *, u32 handle,
++				 struct nouveau_gpuobj *);
++extern int  nouveau_ramht_remove(struct nouveau_channel *, u32 handle);
++extern struct nouveau_gpuobj *
++nouveau_ramht_find(struct nouveau_channel *chan, u32 handle);
++
++#endif
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_reg.h linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_reg.h
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_reg.h	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_reg.h	2010-10-15 02:04:44.385992505 +0200
+@@ -551,6 +551,8 @@
+ #define NV10_PFIFO_CACHE1_DMA_SUBROUTINE                   0x0000324C
+ #define NV03_PFIFO_CACHE1_PULL0                            0x00003240
+ #define NV04_PFIFO_CACHE1_PULL0                            0x00003250
++#    define NV04_PFIFO_CACHE1_PULL0_HASH_FAILED            0x00000010
++#    define NV04_PFIFO_CACHE1_PULL0_HASH_BUSY              0x00001000
+ #define NV03_PFIFO_CACHE1_PULL1                            0x00003250
+ #define NV04_PFIFO_CACHE1_PULL1                            0x00003254
+ #define NV04_PFIFO_CACHE1_HASH                             0x00003258
+@@ -785,15 +787,12 @@
+ #define NV50_PDISPLAY_DAC_MODE_CTRL_C(i)                (0x00610b5c + (i) * 0x8)
+ #define NV50_PDISPLAY_SOR_MODE_CTRL_P(i)                (0x00610b70 + (i) * 0x8)
+ #define NV50_PDISPLAY_SOR_MODE_CTRL_C(i)                (0x00610b74 + (i) * 0x8)
++#define NV50_PDISPLAY_EXT_MODE_CTRL_P(i)                (0x00610b80 + (i) * 0x8)
++#define NV50_PDISPLAY_EXT_MODE_CTRL_C(i)                (0x00610b84 + (i) * 0x8)
+ #define NV50_PDISPLAY_DAC_MODE_CTRL2_P(i)               (0x00610bdc + (i) * 0x8)
+ #define NV50_PDISPLAY_DAC_MODE_CTRL2_C(i)               (0x00610be0 + (i) * 0x8)
+-
+ #define NV90_PDISPLAY_SOR_MODE_CTRL_P(i)                (0x00610794 + (i) * 0x8)
+ #define NV90_PDISPLAY_SOR_MODE_CTRL_C(i)                (0x00610798 + (i) * 0x8)
+-#define NV90_PDISPLAY_DAC_MODE_CTRL_P(i)                (0x00610b58 + (i) * 0x8)
+-#define NV90_PDISPLAY_DAC_MODE_CTRL_C(i)                (0x00610b5c + (i) * 0x8)
+-#define NV90_PDISPLAY_DAC_MODE_CTRL2_P(i)               (0x00610b80 + (i) * 0x8)
+-#define NV90_PDISPLAY_DAC_MODE_CTRL2_C(i)               (0x00610b84 + (i) * 0x8)
+ 
+ #define NV50_PDISPLAY_CRTC_CLK                                       0x00614000
+ #define NV50_PDISPLAY_CRTC_CLK_CTRL1(i)                 ((i) * 0x800 + 0x614100)
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_sgdma.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_sgdma.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_sgdma.c	2010-10-15 02:04:44.387992531 +0200
+@@ -105,11 +105,13 @@
+ 		uint32_t offset_h = upper_32_bits(dma_offset);
+ 
+ 		for (j = 0; j < PAGE_SIZE / NV_CTXDMA_PAGE_SIZE; j++) {
+-			if (dev_priv->card_type < NV_50)
+-				nv_wo32(dev, gpuobj, pte++, offset_l | 3);
+-			else {
+-				nv_wo32(dev, gpuobj, pte++, offset_l | 0x21);
+-				nv_wo32(dev, gpuobj, pte++, offset_h & 0xff);
++			if (dev_priv->card_type < NV_50) {
++				nv_wo32(gpuobj, (pte * 4) + 0, offset_l | 3);
++				pte += 1;
++			} else {
++				nv_wo32(gpuobj, (pte * 4) + 0, offset_l | 0x21);
++				nv_wo32(gpuobj, (pte * 4) + 4, offset_h & 0xff);
++				pte += 2;
+ 			}
+ 
+ 			dma_offset += NV_CTXDMA_PAGE_SIZE;
+@@ -145,11 +147,13 @@
+ 		dma_addr_t dma_offset = dev_priv->gart_info.sg_dummy_bus;
+ 
+ 		for (j = 0; j < PAGE_SIZE / NV_CTXDMA_PAGE_SIZE; j++) {
+-			if (dev_priv->card_type < NV_50)
+-				nv_wo32(dev, gpuobj, pte++, dma_offset | 3);
+-			else {
+-				nv_wo32(dev, gpuobj, pte++, dma_offset | 0x21);
+-				nv_wo32(dev, gpuobj, pte++, 0x00000000);
++			if (dev_priv->card_type < NV_50) {
++				nv_wo32(gpuobj, (pte * 4) + 0, dma_offset | 3);
++				pte += 1;
++			} else {
++				nv_wo32(gpuobj, (pte * 4) + 0, 0x00000000);
++				nv_wo32(gpuobj, (pte * 4) + 4, 0x00000000);
++				pte += 2;
+ 			}
+ 
+ 			dma_offset += NV_CTXDMA_PAGE_SIZE;
+@@ -230,7 +234,6 @@
+ 	}
+ 
+ 	ret = nouveau_gpuobj_new(dev, NULL, obj_size, 16,
+-				      NVOBJ_FLAG_ALLOW_NO_REFS |
+ 				      NVOBJ_FLAG_ZERO_ALLOC |
+ 				      NVOBJ_FLAG_ZERO_FREE, &gpuobj);
+ 	if (ret) {
+@@ -239,9 +242,9 @@
+ 	}
+ 
+ 	dev_priv->gart_info.sg_dummy_page =
+-		alloc_page(GFP_KERNEL|__GFP_DMA32);
++		alloc_page(GFP_KERNEL|__GFP_DMA32|__GFP_ZERO);
+ 	if (!dev_priv->gart_info.sg_dummy_page) {
+-		nouveau_gpuobj_del(dev, &gpuobj);
++		nouveau_gpuobj_ref(NULL, &gpuobj);
+ 		return -ENOMEM;
+ 	}
+ 
+@@ -250,29 +253,34 @@
+ 		pci_map_page(pdev, dev_priv->gart_info.sg_dummy_page, 0,
+ 			     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ 	if (pci_dma_mapping_error(pdev, dev_priv->gart_info.sg_dummy_bus)) {
+-		nouveau_gpuobj_del(dev, &gpuobj);
++		nouveau_gpuobj_ref(NULL, &gpuobj);
+ 		return -EFAULT;
+ 	}
+ 
+ 	if (dev_priv->card_type < NV_50) {
++		/* special case, allocated from global instmem heap so
++		 * cinst is invalid, we use it on all channels though so
++		 * cinst needs to be valid, set it the same as pinst
++		 */
++		gpuobj->cinst = gpuobj->pinst;
++
+ 		/* Maybe use NV_DMA_TARGET_AGP for PCIE? NVIDIA do this, and
+ 		 * confirmed to work on c51.  Perhaps means NV_DMA_TARGET_PCIE
+ 		 * on those cards? */
+-		nv_wo32(dev, gpuobj, 0, NV_CLASS_DMA_IN_MEMORY |
+-				       (1 << 12) /* PT present */ |
+-				       (0 << 13) /* PT *not* linear */ |
+-				       (NV_DMA_ACCESS_RW  << 14) |
+-				       (NV_DMA_TARGET_PCI << 16));
+-		nv_wo32(dev, gpuobj, 1, aper_size - 1);
++		nv_wo32(gpuobj, 0, NV_CLASS_DMA_IN_MEMORY |
++				   (1 << 12) /* PT present */ |
++				   (0 << 13) /* PT *not* linear */ |
++				   (NV_DMA_ACCESS_RW  << 14) |
++				   (NV_DMA_TARGET_PCI << 16));
++		nv_wo32(gpuobj, 4, aper_size - 1);
+ 		for (i = 2; i < 2 + (aper_size >> 12); i++) {
+-			nv_wo32(dev, gpuobj, i,
+-				    dev_priv->gart_info.sg_dummy_bus | 3);
++			nv_wo32(gpuobj, i * 4,
++				dev_priv->gart_info.sg_dummy_bus | 3);
+ 		}
+ 	} else {
+ 		for (i = 0; i < obj_size; i += 8) {
+-			nv_wo32(dev, gpuobj, (i+0)/4,
+-				    dev_priv->gart_info.sg_dummy_bus | 0x21);
+-			nv_wo32(dev, gpuobj, (i+4)/4, 0);
++			nv_wo32(gpuobj, i + 0, 0x00000000);
++			nv_wo32(gpuobj, i + 4, 0x00000000);
+ 		}
+ 	}
+ 	dev_priv->engine.instmem.flush(dev);
+@@ -298,7 +306,7 @@
+ 		dev_priv->gart_info.sg_dummy_bus = 0;
+ 	}
+ 
+-	nouveau_gpuobj_del(dev, &dev_priv->gart_info.sg_ctxdma);
++	nouveau_gpuobj_ref(NULL, &dev_priv->gart_info.sg_ctxdma);
+ }
+ 
+ int
+@@ -308,9 +316,9 @@
+ 	struct nouveau_gpuobj *gpuobj = dev_priv->gart_info.sg_ctxdma;
+ 	int pte;
+ 
+-	pte = (offset >> NV_CTXDMA_PAGE_SHIFT);
++	pte = (offset >> NV_CTXDMA_PAGE_SHIFT) << 2;
+ 	if (dev_priv->card_type < NV_50) {
+-		*page = nv_ro32(dev, gpuobj, (pte + 2)) & ~NV_CTXDMA_PAGE_MASK;
++		*page = nv_ro32(gpuobj, (pte + 8)) & ~NV_CTXDMA_PAGE_MASK;
+ 		return 0;
+ 	}
+ 
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_state.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_state.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_state.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_state.c	2010-10-15 02:04:44.389992556 +0200
+@@ -35,6 +35,8 @@
+ #include "nouveau_drv.h"
+ #include "nouveau_drm.h"
+ #include "nouveau_fbcon.h"
++#include "nouveau_ramht.h"
++#include "nouveau_pm.h"
+ #include "nv50_display.h"
+ 
+ static void nouveau_stub_takedown(struct drm_device *dev) {}
+@@ -78,7 +80,6 @@
+ 		engine->fifo.disable		= nv04_fifo_disable;
+ 		engine->fifo.enable		= nv04_fifo_enable;
+ 		engine->fifo.reassign		= nv04_fifo_reassign;
+-		engine->fifo.cache_flush	= nv04_fifo_cache_flush;
+ 		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
+ 		engine->fifo.channel_id		= nv04_fifo_channel_id;
+ 		engine->fifo.create_context	= nv04_fifo_create_context;
+@@ -95,6 +96,9 @@
+ 		engine->gpio.get		= NULL;
+ 		engine->gpio.set		= NULL;
+ 		engine->gpio.irq_enable		= NULL;
++		engine->pm.clock_get		= nv04_pm_clock_get;
++		engine->pm.clock_pre		= nv04_pm_clock_pre;
++		engine->pm.clock_set		= nv04_pm_clock_set;
+ 		break;
+ 	case 0x10:
+ 		engine->instmem.init		= nv04_instmem_init;
+@@ -130,7 +134,6 @@
+ 		engine->fifo.disable		= nv04_fifo_disable;
+ 		engine->fifo.enable		= nv04_fifo_enable;
+ 		engine->fifo.reassign		= nv04_fifo_reassign;
+-		engine->fifo.cache_flush	= nv04_fifo_cache_flush;
+ 		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
+ 		engine->fifo.channel_id		= nv10_fifo_channel_id;
+ 		engine->fifo.create_context	= nv10_fifo_create_context;
+@@ -147,6 +150,9 @@
+ 		engine->gpio.get		= nv10_gpio_get;
+ 		engine->gpio.set		= nv10_gpio_set;
+ 		engine->gpio.irq_enable		= NULL;
++		engine->pm.clock_get		= nv04_pm_clock_get;
++		engine->pm.clock_pre		= nv04_pm_clock_pre;
++		engine->pm.clock_set		= nv04_pm_clock_set;
+ 		break;
+ 	case 0x20:
+ 		engine->instmem.init		= nv04_instmem_init;
+@@ -182,7 +188,6 @@
+ 		engine->fifo.disable		= nv04_fifo_disable;
+ 		engine->fifo.enable		= nv04_fifo_enable;
+ 		engine->fifo.reassign		= nv04_fifo_reassign;
+-		engine->fifo.cache_flush	= nv04_fifo_cache_flush;
+ 		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
+ 		engine->fifo.channel_id		= nv10_fifo_channel_id;
+ 		engine->fifo.create_context	= nv10_fifo_create_context;
+@@ -199,6 +204,9 @@
+ 		engine->gpio.get		= nv10_gpio_get;
+ 		engine->gpio.set		= nv10_gpio_set;
+ 		engine->gpio.irq_enable		= NULL;
++		engine->pm.clock_get		= nv04_pm_clock_get;
++		engine->pm.clock_pre		= nv04_pm_clock_pre;
++		engine->pm.clock_set		= nv04_pm_clock_set;
+ 		break;
+ 	case 0x30:
+ 		engine->instmem.init		= nv04_instmem_init;
+@@ -234,7 +242,6 @@
+ 		engine->fifo.disable		= nv04_fifo_disable;
+ 		engine->fifo.enable		= nv04_fifo_enable;
+ 		engine->fifo.reassign		= nv04_fifo_reassign;
+-		engine->fifo.cache_flush	= nv04_fifo_cache_flush;
+ 		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
+ 		engine->fifo.channel_id		= nv10_fifo_channel_id;
+ 		engine->fifo.create_context	= nv10_fifo_create_context;
+@@ -251,6 +258,11 @@
+ 		engine->gpio.get		= nv10_gpio_get;
+ 		engine->gpio.set		= nv10_gpio_set;
+ 		engine->gpio.irq_enable		= NULL;
++		engine->pm.clock_get		= nv04_pm_clock_get;
++		engine->pm.clock_pre		= nv04_pm_clock_pre;
++		engine->pm.clock_set		= nv04_pm_clock_set;
++		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
++		engine->pm.voltage_set		= nouveau_voltage_gpio_set;
+ 		break;
+ 	case 0x40:
+ 	case 0x60:
+@@ -287,7 +299,6 @@
+ 		engine->fifo.disable		= nv04_fifo_disable;
+ 		engine->fifo.enable		= nv04_fifo_enable;
+ 		engine->fifo.reassign		= nv04_fifo_reassign;
+-		engine->fifo.cache_flush	= nv04_fifo_cache_flush;
+ 		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
+ 		engine->fifo.channel_id		= nv10_fifo_channel_id;
+ 		engine->fifo.create_context	= nv40_fifo_create_context;
+@@ -304,6 +315,12 @@
+ 		engine->gpio.get		= nv10_gpio_get;
+ 		engine->gpio.set		= nv10_gpio_set;
+ 		engine->gpio.irq_enable		= NULL;
++		engine->pm.clock_get		= nv04_pm_clock_get;
++		engine->pm.clock_pre		= nv04_pm_clock_pre;
++		engine->pm.clock_set		= nv04_pm_clock_set;
++		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
++		engine->pm.voltage_set		= nouveau_voltage_gpio_set;
++		engine->pm.temp_get		= nv40_temp_get;
+ 		break;
+ 	case 0x50:
+ 	case 0x80: /* gotta love NVIDIA's consistency.. */
+@@ -358,6 +375,27 @@
+ 		engine->gpio.get		= nv50_gpio_get;
+ 		engine->gpio.set		= nv50_gpio_set;
+ 		engine->gpio.irq_enable		= nv50_gpio_irq_enable;
++		switch (dev_priv->chipset) {
++		case 0xa3:
++		case 0xa5:
++		case 0xa8:
++		case 0xaf:
++			engine->pm.clock_get	= nva3_pm_clock_get;
++			engine->pm.clock_pre	= nva3_pm_clock_pre;
++			engine->pm.clock_set	= nva3_pm_clock_set;
++			break;
++		default:
++			engine->pm.clock_get	= nv50_pm_clock_get;
++			engine->pm.clock_pre	= nv50_pm_clock_pre;
++			engine->pm.clock_set	= nv50_pm_clock_set;
++			break;
++		}
++		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
++		engine->pm.voltage_set		= nouveau_voltage_gpio_set;
++		if (dev_priv->chipset >= 0x84)
++			engine->pm.temp_get	= nv84_temp_get;
++		else
++			engine->pm.temp_get	= nv40_temp_get;
+ 		break;
+ 	case 0xC0:
+ 		engine->instmem.init		= nvc0_instmem_init;
+@@ -437,16 +475,14 @@
+ nouveau_card_init_channel(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_gpuobj *gpuobj;
++	struct nouveau_gpuobj *gpuobj = NULL;
+ 	int ret;
+ 
+ 	ret = nouveau_channel_alloc(dev, &dev_priv->channel,
+-				    (struct drm_file *)-2,
+-				    NvDmaFB, NvDmaTT);
++				    (struct drm_file *)-2, NvDmaFB, NvDmaTT);
+ 	if (ret)
+ 		return ret;
+ 
+-	gpuobj = NULL;
+ 	ret = nouveau_gpuobj_dma_new(dev_priv->channel, NV_CLASS_DMA_IN_MEMORY,
+ 				     0, dev_priv->vram_size,
+ 				     NV_DMA_ACCESS_RW, NV_DMA_TARGET_VIDMEM,
+@@ -454,28 +490,27 @@
+ 	if (ret)
+ 		goto out_err;
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, dev_priv->channel, NvDmaVRAM,
+-				     gpuobj, NULL);
++	ret = nouveau_ramht_insert(dev_priv->channel, NvDmaVRAM, gpuobj);
++	nouveau_gpuobj_ref(NULL, &gpuobj);
+ 	if (ret)
+ 		goto out_err;
+ 
+-	gpuobj = NULL;
+ 	ret = nouveau_gpuobj_gart_dma_new(dev_priv->channel, 0,
+ 					  dev_priv->gart_info.aper_size,
+ 					  NV_DMA_ACCESS_RW, &gpuobj, NULL);
+ 	if (ret)
+ 		goto out_err;
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, dev_priv->channel, NvDmaGART,
+-				     gpuobj, NULL);
++	ret = nouveau_ramht_insert(dev_priv->channel, NvDmaGART, gpuobj);
++	nouveau_gpuobj_ref(NULL, &gpuobj);
+ 	if (ret)
+ 		goto out_err;
+ 
++	mutex_unlock(&dev_priv->channel->mutex);
+ 	return 0;
++
+ out_err:
+-	nouveau_gpuobj_del(dev, &gpuobj);
+-	nouveau_channel_free(dev_priv->channel);
+-	dev_priv->channel = NULL;
++	nouveau_channel_put(&dev_priv->channel);
+ 	return ret;
+ }
+ 
+@@ -522,6 +557,7 @@
+ 	if (ret)
+ 		goto out;
+ 	engine = &dev_priv->engine;
++	spin_lock_init(&dev_priv->channels.lock);
+ 	spin_lock_init(&dev_priv->context_switch_lock);
+ 
+ 	/* Make the CRTCs and I2C buses accessible */
+@@ -534,35 +570,28 @@
+ 	if (ret)
+ 		goto out_display_early;
+ 
+-	ret = nouveau_mem_detect(dev);
++	nouveau_pm_init(dev);
++
++	ret = nouveau_mem_vram_init(dev);
+ 	if (ret)
+ 		goto out_bios;
+ 
+-	ret = nouveau_gpuobj_early_init(dev);
++	ret = nouveau_gpuobj_init(dev);
+ 	if (ret)
+-		goto out_bios;
++		goto out_vram;
+ 
+-	/* Initialise instance memory, must happen before mem_init so we
+-	 * know exactly how much VRAM we're able to use for "normal"
+-	 * purposes.
+-	 */
+ 	ret = engine->instmem.init(dev);
+ 	if (ret)
+-		goto out_gpuobj_early;
++		goto out_gpuobj;
+ 
+-	/* Setup the memory manager */
+-	ret = nouveau_mem_init(dev);
++	ret = nouveau_mem_gart_init(dev);
+ 	if (ret)
+ 		goto out_instmem;
+ 
+-	ret = nouveau_gpuobj_init(dev);
+-	if (ret)
+-		goto out_mem;
+-
+ 	/* PMC */
+ 	ret = engine->mc.init(dev);
+ 	if (ret)
+-		goto out_gpuobj;
++		goto out_gart;
+ 
+ 	/* PGPIO */
+ 	ret = engine->gpio.init(dev);
+@@ -611,9 +640,13 @@
+ 	/* what about PVIDEO/PCRTC/PRAMDAC etc? */
+ 
+ 	if (!engine->graph.accel_blocked) {
+-		ret = nouveau_card_init_channel(dev);
++		ret = nouveau_fence_init(dev);
+ 		if (ret)
+ 			goto out_irq;
++
++		ret = nouveau_card_init_channel(dev);
++		if (ret)
++			goto out_fence;
+ 	}
+ 
+ 	ret = nouveau_backlight_init(dev);
+@@ -624,6 +657,8 @@
+ 	drm_kms_helper_poll_init(dev);
+ 	return 0;
+ 
++out_fence:
++	nouveau_fence_fini(dev);
+ out_irq:
+ 	drm_irq_uninstall(dev);
+ out_display:
+@@ -642,16 +677,16 @@
+ 	engine->gpio.takedown(dev);
+ out_mc:
+ 	engine->mc.takedown(dev);
+-out_gpuobj:
+-	nouveau_gpuobj_takedown(dev);
+-out_mem:
+-	nouveau_sgdma_takedown(dev);
+-	nouveau_mem_close(dev);
++out_gart:
++	nouveau_mem_gart_fini(dev);
+ out_instmem:
+ 	engine->instmem.takedown(dev);
+-out_gpuobj_early:
+-	nouveau_gpuobj_late_takedown(dev);
++out_gpuobj:
++	nouveau_gpuobj_takedown(dev);
++out_vram:
++	nouveau_mem_vram_fini(dev);
+ out_bios:
++	nouveau_pm_fini(dev);
+ 	nouveau_bios_takedown(dev);
+ out_display_early:
+ 	engine->display.late_takedown(dev);
+@@ -667,9 +702,9 @@
+ 
+ 	nouveau_backlight_exit(dev);
+ 
+-	if (dev_priv->channel) {
+-		nouveau_channel_free(dev_priv->channel);
+-		dev_priv->channel = NULL;
++	if (!engine->graph.accel_blocked) {
++		nouveau_fence_fini(dev);
++		nouveau_channel_put(&dev_priv->channel);
+ 	}
+ 
+ 	if (!nouveau_noaccel) {
+@@ -686,15 +721,15 @@
+ 	ttm_bo_clean_mm(&dev_priv->ttm.bdev, TTM_PL_VRAM);
+ 	ttm_bo_clean_mm(&dev_priv->ttm.bdev, TTM_PL_TT);
+ 	mutex_unlock(&dev->struct_mutex);
+-	nouveau_sgdma_takedown(dev);
++	nouveau_mem_gart_fini(dev);
+ 
+-	nouveau_gpuobj_takedown(dev);
+-	nouveau_mem_close(dev);
+ 	engine->instmem.takedown(dev);
++	nouveau_gpuobj_takedown(dev);
++	nouveau_mem_vram_fini(dev);
+ 
+ 	drm_irq_uninstall(dev);
+ 
+-	nouveau_gpuobj_late_takedown(dev);
++	nouveau_pm_fini(dev);
+ 	nouveau_bios_takedown(dev);
+ 
+ 	vga_client_register(dev->pdev, NULL, NULL, NULL);
+@@ -1006,6 +1041,9 @@
+ 	case NOUVEAU_GETPARAM_PTIMER_TIME:
+ 		getparam->value = dev_priv->engine.timer.read(dev);
+ 		break;
++	case NOUVEAU_GETPARAM_HAS_BO_USAGE:
++		getparam->value = 1;
++		break;
+ 	case NOUVEAU_GETPARAM_GRAPH_UNITS:
+ 		/* NV40 and NV50 versions are quite different, but register
+ 		 * address is the same. User is supposed to know the card
+@@ -1016,7 +1054,7 @@
+ 		}
+ 		/* FALLTHRU */
+ 	default:
+-		NV_ERROR(dev, "unknown parameter %lld\n", getparam->param);
++		NV_DEBUG(dev, "unknown parameter %lld\n", getparam->param);
+ 		return -EINVAL;
+ 	}
+ 
+@@ -1031,7 +1069,7 @@
+ 
+ 	switch (setparam->param) {
+ 	default:
+-		NV_ERROR(dev, "unknown parameter %lld\n", setparam->param);
++		NV_DEBUG(dev, "unknown parameter %lld\n", setparam->param);
+ 		return -EINVAL;
+ 	}
+ 
+@@ -1057,7 +1095,7 @@
+ /* Waits for PGRAPH to go completely idle */
+ bool nouveau_wait_for_idle(struct drm_device *dev)
+ {
+-	if (!nv_wait(NV04_PGRAPH_STATUS, 0xffffffff, 0x00000000)) {
++	if (!nv_wait(dev, NV04_PGRAPH_STATUS, 0xffffffff, 0x00000000)) {
+ 		NV_ERROR(dev, "PGRAPH idle timed out with status 0x%08x\n",
+ 			 nv_rd32(dev, NV04_PGRAPH_STATUS));
+ 		return false;
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_temp.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_temp.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_temp.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_temp.c	2010-10-15 02:04:44.391992580 +0200
+@@ -0,0 +1,309 @@
++/*
++ * Copyright 2010 PathScale inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: Martin Peres
++ */
++
++#include "drmP.h"
++
++#include "nouveau_drv.h"
++#include "nouveau_pm.h"
++
++static void
++nouveau_temp_vbios_parse(struct drm_device *dev, u8 *temp)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct nouveau_pm_temp_sensor_constants *sensor = &pm->sensor_constants;
++	struct nouveau_pm_threshold_temp *temps = &pm->threshold_temp;
++	int i, headerlen, recordlen, entries;
++
++	if (!temp) {
++		NV_DEBUG(dev, "temperature table pointer invalid\n");
++		return;
++	}
++
++	/* Set the default sensor's contants */
++	sensor->offset_constant = 0;
++	sensor->offset_mult = 1;
++	sensor->offset_div = 1;
++	sensor->slope_mult = 1;
++	sensor->slope_div = 1;
++
++	/* Set the default temperature thresholds */
++	temps->critical = 110;
++	temps->down_clock = 100;
++	temps->fan_boost = 90;
++
++	/* Set the known default values to setup the temperature sensor */
++	if (dev_priv->card_type >= NV_40) {
++		switch (dev_priv->chipset) {
++		case 0x43:
++			sensor->offset_mult = 32060;
++			sensor->offset_div = 1000;
++			sensor->slope_mult = 792;
++			sensor->slope_div = 1000;
++			break;
++
++		case 0x44:
++		case 0x47:
++		case 0x4a:
++			sensor->offset_mult = 27839;
++			sensor->offset_div = 1000;
++			sensor->slope_mult = 780;
++			sensor->slope_div = 1000;
++			break;
++
++		case 0x46:
++			sensor->offset_mult = -24775;
++			sensor->offset_div = 100;
++			sensor->slope_mult = 467;
++			sensor->slope_div = 10000;
++			break;
++
++		case 0x49:
++			sensor->offset_mult = -25051;
++			sensor->offset_div = 100;
++			sensor->slope_mult = 458;
++			sensor->slope_div = 10000;
++			break;
++
++		case 0x4b:
++			sensor->offset_mult = -24088;
++			sensor->offset_div = 100;
++			sensor->slope_mult = 442;
++			sensor->slope_div = 10000;
++			break;
++
++		case 0x50:
++			sensor->offset_mult = -22749;
++			sensor->offset_div = 100;
++			sensor->slope_mult = 431;
++			sensor->slope_div = 10000;
++			break;
++		}
++	}
++
++	headerlen = temp[1];
++	recordlen = temp[2];
++	entries = temp[3];
++	temp = temp + headerlen;
++
++	/* Read the entries from the table */
++	for (i = 0; i < entries; i++) {
++		u16 value = ROM16(temp[1]);
++
++		switch (temp[0]) {
++		case 0x01:
++			if ((value & 0x8f) == 0)
++				sensor->offset_constant = (value >> 9) & 0x7f;
++			break;
++
++		case 0x04:
++			if ((value & 0xf00f) == 0xa000) /* core */
++				temps->critical = (value&0x0ff0) >> 4;
++			break;
++
++		case 0x07:
++			if ((value & 0xf00f) == 0xa000) /* core */
++				temps->down_clock = (value&0x0ff0) >> 4;
++			break;
++
++		case 0x08:
++			if ((value & 0xf00f) == 0xa000) /* core */
++				temps->fan_boost = (value&0x0ff0) >> 4;
++			break;
++
++		case 0x10:
++			sensor->offset_mult = value;
++			break;
++
++		case 0x11:
++			sensor->offset_div = value;
++			break;
++
++		case 0x12:
++			sensor->slope_mult = value;
++			break;
++
++		case 0x13:
++			sensor->slope_div = value;
++			break;
++		}
++		temp += recordlen;
++	}
++
++	nouveau_temp_safety_checks(dev);
++}
++
++static int
++nv40_sensor_setup(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct nouveau_pm_temp_sensor_constants *sensor = &pm->sensor_constants;
++	u32 offset = sensor->offset_mult / sensor->offset_div;
++	u32 sensor_calibration;
++
++	/* set up the sensors */
++	sensor_calibration = 120 - offset - sensor->offset_constant;
++	sensor_calibration = sensor_calibration * sensor->slope_div /
++				sensor->slope_mult;
++
++	if (dev_priv->chipset >= 0x46)
++		sensor_calibration |= 0x80000000;
++	else
++		sensor_calibration |= 0x10000000;
++
++	nv_wr32(dev, 0x0015b0, sensor_calibration);
++
++	/* Wait for the sensor to update */
++	msleep(5);
++
++	/* read */
++	return nv_rd32(dev, 0x0015b4) & 0x1fff;
++}
++
++int
++nv40_temp_get(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct nouveau_pm_temp_sensor_constants *sensor = &pm->sensor_constants;
++	int offset = sensor->offset_mult / sensor->offset_div;
++	int core_temp;
++
++	if (dev_priv->chipset >= 0x50) {
++		core_temp = nv_rd32(dev, 0x20008);
++	} else {
++		core_temp = nv_rd32(dev, 0x0015b4) & 0x1fff;
++		/* Setup the sensor if the temperature is 0 */
++		if (core_temp == 0)
++			core_temp = nv40_sensor_setup(dev);
++	}
++
++	core_temp = core_temp * sensor->slope_mult / sensor->slope_div;
++	core_temp = core_temp + offset + sensor->offset_constant;
++
++	return core_temp;
++}
++
++int
++nv84_temp_get(struct drm_device *dev)
++{
++	return nv_rd32(dev, 0x20400);
++}
++
++void
++nouveau_temp_safety_checks(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct nouveau_pm_threshold_temp *temps = &pm->threshold_temp;
++
++	if (temps->critical > 120)
++		temps->critical = 120;
++	else if (temps->critical < 80)
++		temps->critical = 80;
++
++	if (temps->down_clock > 110)
++		temps->down_clock = 110;
++	else if (temps->down_clock < 60)
++		temps->down_clock = 60;
++
++	if (temps->fan_boost > 100)
++		temps->fan_boost = 100;
++	else if (temps->fan_boost < 40)
++		temps->fan_boost = 40;
++}
++
++static bool
++probe_monitoring_device(struct nouveau_i2c_chan *i2c,
++			struct i2c_board_info *info)
++{
++	char modalias[16] = "i2c:";
++	struct i2c_client *client;
++
++	strlcat(modalias, info->type, sizeof(modalias));
++	request_module(modalias);
++
++	client = i2c_new_device(&i2c->adapter, info);
++	if (!client)
++		return false;
++
++	if (!client->driver || client->driver->detect(client, info)) {
++		i2c_unregister_device(client);
++		return false;
++	}
++
++	return true;
++}
++
++static void
++nouveau_temp_probe_i2c(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct dcb_table *dcb = &dev_priv->vbios.dcb;
++	struct i2c_board_info info[] = {
++		{ I2C_BOARD_INFO("w83l785ts", 0x2d) },
++		{ I2C_BOARD_INFO("w83781d", 0x2d) },
++		{ I2C_BOARD_INFO("f75375", 0x2e) },
++		{ I2C_BOARD_INFO("adt7473", 0x2e) },
++		{ I2C_BOARD_INFO("lm99", 0x4c) },
++		{ }
++	};
++	int idx = (dcb->version >= 0x40 ?
++		   dcb->i2c_default_indices & 0xf : 2);
++
++	nouveau_i2c_identify(dev, "monitoring device", info,
++			     probe_monitoring_device, idx);
++}
++
++void
++nouveau_temp_init(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nvbios *bios = &dev_priv->vbios;
++	struct bit_entry P;
++	u8 *temp = NULL;
++
++	if (bios->type == NVBIOS_BIT) {
++		if (bit_table(dev, 'P', &P))
++			return;
++
++		if (P.version == 1)
++			temp = ROMPTR(bios, P.data[12]);
++		else if (P.version == 2)
++			temp = ROMPTR(bios, P.data[16]);
++		else
++			NV_WARN(dev, "unknown temp for BIT P %d\n", P.version);
++
++		nouveau_temp_vbios_parse(dev, temp);
++	}
++
++	nouveau_temp_probe_i2c(dev);
++}
++
++void
++nouveau_temp_fini(struct drm_device *dev)
++{
++
++}
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_volt.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_volt.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nouveau_volt.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nouveau_volt.c	2010-10-15 02:04:44.392992593 +0200
+@@ -0,0 +1,212 @@
++/*
++ * Copyright 2010 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: Ben Skeggs
++ */
++
++#include "drmP.h"
++
++#include "nouveau_drv.h"
++#include "nouveau_pm.h"
++
++static const enum dcb_gpio_tag vidtag[] = { 0x04, 0x05, 0x06, 0x1a };
++static int nr_vidtag = sizeof(vidtag) / sizeof(vidtag[0]);
++
++int
++nouveau_voltage_gpio_get(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_gpio_engine *gpio = &dev_priv->engine.gpio;
++	struct nouveau_pm_voltage *volt = &dev_priv->engine.pm.voltage;
++	u8 vid = 0;
++	int i;
++
++	for (i = 0; i < nr_vidtag; i++) {
++		if (!(volt->vid_mask & (1 << i)))
++			continue;
++
++		vid |= gpio->get(dev, vidtag[i]) << i;
++	}
++
++	return nouveau_volt_lvl_lookup(dev, vid);
++}
++
++int
++nouveau_voltage_gpio_set(struct drm_device *dev, int voltage)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_gpio_engine *gpio = &dev_priv->engine.gpio;
++	struct nouveau_pm_voltage *volt = &dev_priv->engine.pm.voltage;
++	int vid, i;
++
++	vid = nouveau_volt_vid_lookup(dev, voltage);
++	if (vid < 0)
++		return vid;
++
++	for (i = 0; i < nr_vidtag; i++) {
++		if (!(volt->vid_mask & (1 << i)))
++			continue;
++
++		gpio->set(dev, vidtag[i], !!(vid & (1 << i)));
++	}
++
++	return 0;
++}
++
++int
++nouveau_volt_vid_lookup(struct drm_device *dev, int voltage)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_voltage *volt = &dev_priv->engine.pm.voltage;
++	int i;
++
++	for (i = 0; i < volt->nr_level; i++) {
++		if (volt->level[i].voltage == voltage)
++			return volt->level[i].vid;
++	}
++
++	return -ENOENT;
++}
++
++int
++nouveau_volt_lvl_lookup(struct drm_device *dev, int vid)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_voltage *volt = &dev_priv->engine.pm.voltage;
++	int i;
++
++	for (i = 0; i < volt->nr_level; i++) {
++		if (volt->level[i].vid == vid)
++			return volt->level[i].voltage;
++	}
++
++	return -ENOENT;
++}
++
++void
++nouveau_volt_init(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
++	struct nouveau_pm_voltage *voltage = &pm->voltage;
++	struct nvbios *bios = &dev_priv->vbios;
++	struct bit_entry P;
++	u8 *volt = NULL, *entry;
++	int i, headerlen, recordlen, entries, vidmask, vidshift;
++
++	if (bios->type == NVBIOS_BIT) {
++		if (bit_table(dev, 'P', &P))
++			return;
++
++		if (P.version == 1)
++			volt = ROMPTR(bios, P.data[16]);
++		else
++		if (P.version == 2)
++			volt = ROMPTR(bios, P.data[12]);
++		else {
++			NV_WARN(dev, "unknown volt for BIT P %d\n", P.version);
++		}
++	} else {
++		if (bios->data[bios->offset + 6] < 0x27) {
++			NV_DEBUG(dev, "BMP version too old for voltage\n");
++			return;
++		}
++
++		volt = ROMPTR(bios, bios->data[bios->offset + 0x98]);
++	}
++
++	if (!volt) {
++		NV_DEBUG(dev, "voltage table pointer invalid\n");
++		return;
++	}
++
++	switch (volt[0]) {
++	case 0x10:
++	case 0x11:
++	case 0x12:
++		headerlen = 5;
++		recordlen = volt[1];
++		entries   = volt[2];
++		vidshift  = 0;
++		vidmask   = volt[4];
++		break;
++	case 0x20:
++		headerlen = volt[1];
++		recordlen = volt[3];
++		entries   = volt[2];
++		vidshift  = 0; /* could be vidshift like 0x30? */
++		vidmask   = volt[5];
++		break;
++	case 0x30:
++		headerlen = volt[1];
++		recordlen = volt[2];
++		entries   = volt[3];
++		vidshift  = hweight8(volt[5]);
++		vidmask   = volt[4];
++		break;
++	default:
++		NV_WARN(dev, "voltage table 0x%02x unknown\n", volt[0]);
++		return;
++	}
++
++	/* validate vid mask */
++	voltage->vid_mask = vidmask;
++	if (!voltage->vid_mask)
++		return;
++
++	i = 0;
++	while (vidmask) {
++		if (i > nr_vidtag) {
++			NV_DEBUG(dev, "vid bit %d unknown\n", i);
++			return;
++		}
++
++		if (!nouveau_bios_gpio_entry(dev, vidtag[i])) {
++			NV_DEBUG(dev, "vid bit %d has no gpio tag\n", i);
++			return;
++		}
++
++		vidmask >>= 1;
++		i++;
++	}
++
++	/* parse vbios entries into common format */
++	voltage->level = kcalloc(entries, sizeof(*voltage->level), GFP_KERNEL);
++	if (!voltage->level)
++		return;
++
++	entry = volt + headerlen;
++	for (i = 0; i < entries; i++, entry += recordlen) {
++		voltage->level[i].voltage = entry[0];
++		voltage->level[i].vid     = entry[1] >> vidshift;
++	}
++	voltage->nr_level  = entries;
++	voltage->supported = true;
++}
++
++void
++nouveau_volt_fini(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_pm_voltage *volt = &dev_priv->engine.pm.voltage;
++
++	kfree(volt->level);
++}
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_crtc.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_crtc.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_crtc.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_crtc.c	2010-10-15 02:04:44.394992618 +0200
+@@ -109,7 +109,7 @@
+ 	struct nouveau_pll_vals *pv = &regp->pllvals;
+ 	struct pll_lims pll_lim;
+ 
+-	if (get_pll_limits(dev, nv_crtc->index ? VPLL2 : VPLL1, &pll_lim))
++	if (get_pll_limits(dev, nv_crtc->index ? PLL_VPLL1 : PLL_VPLL0, &pll_lim))
+ 		return;
+ 
+ 	/* NM2 == 0 is used to determine single stage mode on two stage plls */
+@@ -718,6 +718,7 @@
+ 
+ 	drm_crtc_cleanup(crtc);
+ 
++	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
+ 	nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
+ 	kfree(nv_crtc);
+ }
+@@ -826,7 +827,7 @@
+ 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_FF_INDEX);
+ 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_FFLWM__INDEX);
+ 
+-	if (dev_priv->card_type >= NV_30) {
++	if (dev_priv->card_type >= NV_20) {
+ 		regp->CRTC[NV_CIO_CRE_47] = arb_lwm >> 8;
+ 		crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_47);
+ 	}
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_dac.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_dac.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_dac.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_dac.c	2010-10-15 02:04:44.396992642 +0200
+@@ -291,6 +291,8 @@
+ 	msleep(5);
+ 
+ 	sample = NVReadRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + regoffset);
++	/* do it again just in case it's a residual current */
++	sample &= NVReadRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + regoffset);
+ 
+ 	temp = NVReadRAMDAC(dev, head, NV_PRAMDAC_TEST_CONTROL);
+ 	NVWriteRAMDAC(dev, head, NV_PRAMDAC_TEST_CONTROL,
+@@ -343,22 +345,13 @@
+ {
+ 	struct drm_encoder_helper_funcs *helper = encoder->helper_private;
+ 	struct drm_device *dev = encoder->dev;
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	int head = nouveau_crtc(encoder->crtc)->index;
+-	struct nv04_crtc_reg *crtcstate = dev_priv->mode_reg.crtc_reg;
+ 
+ 	helper->dpms(encoder, DRM_MODE_DPMS_OFF);
+ 
+ 	nv04_dfp_disable(dev, head);
+-
+-	/* Some NV4x have unknown values (0x3f, 0x50, 0x54, 0x6b, 0x79, 0x7f)
+-	 * at LCD__INDEX which we don't alter
+-	 */
+-	if (!(crtcstate[head].CRTC[NV_CIO_CRE_LCD__INDEX] & 0x44))
+-		crtcstate[head].CRTC[NV_CIO_CRE_LCD__INDEX] = 0;
+ }
+ 
+-
+ static void nv04_dac_mode_set(struct drm_encoder *encoder,
+ 			      struct drm_display_mode *mode,
+ 			      struct drm_display_mode *adjusted_mode)
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_dfp.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_dfp.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_dfp.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_dfp.c	2010-10-15 02:04:44.398992668 +0200
+@@ -104,6 +104,8 @@
+ 	}
+ 	/* don't inadvertently turn it on when state written later */
+ 	crtcstate[head].fp_control = FP_TG_CONTROL_OFF;
++	crtcstate[head].CRTC[NV_CIO_CRE_LCD__INDEX] &=
++		~NV_CIO_CRE_LCD_ROUTE_MASK;
+ }
+ 
+ void nv04_dfp_update_fp_control(struct drm_encoder *encoder, int mode)
+@@ -183,14 +185,15 @@
+ 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+ 	struct nouveau_connector *nv_connector = nouveau_encoder_connector_get(nv_encoder);
+ 
+-	/* For internal panels and gpu scaling on DVI we need the native mode */
+-	if (nv_connector->scaling_mode != DRM_MODE_SCALE_NONE) {
+-		if (!nv_connector->native_mode)
+-			return false;
++	if (!nv_connector->native_mode ||
++	    nv_connector->scaling_mode == DRM_MODE_SCALE_NONE ||
++	    mode->hdisplay > nv_connector->native_mode->hdisplay ||
++	    mode->vdisplay > nv_connector->native_mode->vdisplay) {
++		nv_encoder->mode = *adjusted_mode;
++
++	} else {
+ 		nv_encoder->mode = *nv_connector->native_mode;
+ 		adjusted_mode->clock = nv_connector->native_mode->clock;
+-	} else {
+-		nv_encoder->mode = *adjusted_mode;
+ 	}
+ 
+ 	return true;
+@@ -253,26 +256,21 @@
+ 
+ 	nv04_dfp_prepare_sel_clk(dev, nv_encoder, head);
+ 
+-	/* Some NV4x have unknown values (0x3f, 0x50, 0x54, 0x6b, 0x79, 0x7f)
+-	 * at LCD__INDEX which we don't alter
+-	 */
+-	if (!(*cr_lcd & 0x44)) {
+-		*cr_lcd = 0x3;
++	*cr_lcd = (*cr_lcd & ~NV_CIO_CRE_LCD_ROUTE_MASK) | 0x3;
+ 
+-		if (nv_two_heads(dev)) {
+-			if (nv_encoder->dcb->location == DCB_LOC_ON_CHIP)
+-				*cr_lcd |= head ? 0x0 : 0x8;
+-			else {
+-				*cr_lcd |= (nv_encoder->dcb->or << 4) & 0x30;
+-				if (nv_encoder->dcb->type == OUTPUT_LVDS)
+-					*cr_lcd |= 0x30;
+-				if ((*cr_lcd & 0x30) == (*cr_lcd_oth & 0x30)) {
+-					/* avoid being connected to both crtcs */
+-					*cr_lcd_oth &= ~0x30;
+-					NVWriteVgaCrtc(dev, head ^ 1,
+-						       NV_CIO_CRE_LCD__INDEX,
+-						       *cr_lcd_oth);
+-				}
++	if (nv_two_heads(dev)) {
++		if (nv_encoder->dcb->location == DCB_LOC_ON_CHIP)
++			*cr_lcd |= head ? 0x0 : 0x8;
++		else {
++			*cr_lcd |= (nv_encoder->dcb->or << 4) & 0x30;
++			if (nv_encoder->dcb->type == OUTPUT_LVDS)
++				*cr_lcd |= 0x30;
++			if ((*cr_lcd & 0x30) == (*cr_lcd_oth & 0x30)) {
++				/* avoid being connected to both crtcs */
++				*cr_lcd_oth &= ~0x30;
++				NVWriteVgaCrtc(dev, head ^ 1,
++					       NV_CIO_CRE_LCD__INDEX,
++					       *cr_lcd_oth);
+ 			}
+ 		}
+ 	}
+@@ -640,7 +638,7 @@
+ 	    get_tmds_slave(encoder))
+ 		return;
+ 
+-	type = nouveau_i2c_identify(dev, "TMDS transmitter", info, 2);
++	type = nouveau_i2c_identify(dev, "TMDS transmitter", info, NULL, 2);
+ 	if (type < 0)
+ 		return;
+ 
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_fbcon.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_fbcon.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_fbcon.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_fbcon.c	2010-10-15 02:04:44.399992681 +0200
+@@ -25,54 +25,42 @@
+ #include "drmP.h"
+ #include "nouveau_drv.h"
+ #include "nouveau_dma.h"
++#include "nouveau_ramht.h"
+ #include "nouveau_fbcon.h"
+ 
+-void
++int
+ nv04_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
+ {
+ 	struct nouveau_fbdev *nfbdev = info->par;
+ 	struct drm_device *dev = nfbdev->dev;
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_channel *chan = dev_priv->channel;
++	int ret;
+ 
+-	if (info->state != FBINFO_STATE_RUNNING)
+-		return;
+-
+-	if (!(info->flags & FBINFO_HWACCEL_DISABLED) && RING_SPACE(chan, 4)) {
+-		nouveau_fbcon_gpu_lockup(info);
+-	}
+-
+-	if (info->flags & FBINFO_HWACCEL_DISABLED) {
+-		cfb_copyarea(info, region);
+-		return;
+-	}
++	ret = RING_SPACE(chan, 4);
++	if (ret)
++		return ret;
+ 
+ 	BEGIN_RING(chan, NvSubImageBlit, 0x0300, 3);
+ 	OUT_RING(chan, (region->sy << 16) | region->sx);
+ 	OUT_RING(chan, (region->dy << 16) | region->dx);
+ 	OUT_RING(chan, (region->height << 16) | region->width);
+ 	FIRE_RING(chan);
++	return 0;
+ }
+ 
+-void
++int
+ nv04_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
+ {
+ 	struct nouveau_fbdev *nfbdev = info->par;
+ 	struct drm_device *dev = nfbdev->dev;
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_channel *chan = dev_priv->channel;
++	int ret;
+ 
+-	if (info->state != FBINFO_STATE_RUNNING)
+-		return;
+-
+-	if (!(info->flags & FBINFO_HWACCEL_DISABLED) && RING_SPACE(chan, 7)) {
+-		nouveau_fbcon_gpu_lockup(info);
+-	}
+-
+-	if (info->flags & FBINFO_HWACCEL_DISABLED) {
+-		cfb_fillrect(info, rect);
+-		return;
+-	}
++	ret = RING_SPACE(chan, 7);
++	if (ret)
++		return ret;
+ 
+ 	BEGIN_RING(chan, NvSubGdiRect, 0x02fc, 1);
+ 	OUT_RING(chan, (rect->rop != ROP_COPY) ? 1 : 3);
+@@ -86,9 +74,10 @@
+ 	OUT_RING(chan, (rect->dx << 16) | rect->dy);
+ 	OUT_RING(chan, (rect->width << 16) | rect->height);
+ 	FIRE_RING(chan);
++	return 0;
+ }
+ 
+-void
++int
+ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
+ {
+ 	struct nouveau_fbdev *nfbdev = info->par;
+@@ -100,23 +89,14 @@
+ 	uint32_t dsize;
+ 	uint32_t width;
+ 	uint32_t *data = (uint32_t *)image->data;
++	int ret;
+ 
+-	if (info->state != FBINFO_STATE_RUNNING)
+-		return;
+-
+-	if (image->depth != 1) {
+-		cfb_imageblit(info, image);
+-		return;
+-	}
+-
+-	if (!(info->flags & FBINFO_HWACCEL_DISABLED) && RING_SPACE(chan, 8)) {
+-		nouveau_fbcon_gpu_lockup(info);
+-	}
++	if (image->depth != 1)
++		return -ENODEV;
+ 
+-	if (info->flags & FBINFO_HWACCEL_DISABLED) {
+-		cfb_imageblit(info, image);
+-		return;
+-	}
++	ret = RING_SPACE(chan, 8);
++	if (ret)
++		return ret;
+ 
+ 	width = ALIGN(image->width, 8);
+ 	dsize = ALIGN(width * image->height, 32) >> 5;
+@@ -143,11 +123,9 @@
+ 	while (dsize) {
+ 		int iter_len = dsize > 128 ? 128 : dsize;
+ 
+-		if (RING_SPACE(chan, iter_len + 1)) {
+-			nouveau_fbcon_gpu_lockup(info);
+-			cfb_imageblit(info, image);
+-			return;
+-		}
++		ret = RING_SPACE(chan, iter_len + 1);
++		if (ret)
++			return ret;
+ 
+ 		BEGIN_RING(chan, NvSubGdiRect, 0x0c00, iter_len);
+ 		OUT_RINGp(chan, data, iter_len);
+@@ -156,6 +134,7 @@
+ 	}
+ 
+ 	FIRE_RING(chan);
++	return 0;
+ }
+ 
+ static int
+@@ -169,11 +148,9 @@
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, dev_priv->channel, handle, obj, NULL);
+-	if (ret)
+-		return ret;
+-
+-	return 0;
++	ret = nouveau_ramht_insert(dev_priv->channel, handle, obj);
++	nouveau_gpuobj_ref(NULL, &obj);
++	return ret;
+ }
+ 
+ int
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_fifo.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_fifo.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_fifo.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_fifo.c	2010-10-15 02:04:44.401992705 +0200
+@@ -27,8 +27,9 @@
+ #include "drmP.h"
+ #include "drm.h"
+ #include "nouveau_drv.h"
++#include "nouveau_ramht.h"
+ 
+-#define NV04_RAMFC(c) (dev_priv->ramfc_offset + ((c) * NV04_RAMFC__SIZE))
++#define NV04_RAMFC(c) (dev_priv->ramfc->pinst + ((c) * NV04_RAMFC__SIZE))
+ #define NV04_RAMFC__SIZE 32
+ #define NV04_RAMFC_DMA_PUT                                       0x00
+ #define NV04_RAMFC_DMA_GET                                       0x04
+@@ -38,10 +39,8 @@
+ #define NV04_RAMFC_ENGINE                                        0x14
+ #define NV04_RAMFC_PULL1_ENGINE                                  0x18
+ 
+-#define RAMFC_WR(offset, val) nv_wo32(dev, chan->ramfc->gpuobj, \
+-					 NV04_RAMFC_##offset/4, (val))
+-#define RAMFC_RD(offset)      nv_ro32(dev, chan->ramfc->gpuobj, \
+-					 NV04_RAMFC_##offset/4)
++#define RAMFC_WR(offset, val) nv_wo32(chan->ramfc, NV04_RAMFC_##offset, (val))
++#define RAMFC_RD(offset)      nv_ro32(chan->ramfc, NV04_RAMFC_##offset)
+ 
+ void
+ nv04_fifo_disable(struct drm_device *dev)
+@@ -72,37 +71,32 @@
+ }
+ 
+ bool
+-nv04_fifo_cache_flush(struct drm_device *dev)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_timer_engine *ptimer = &dev_priv->engine.timer;
+-	uint64_t start = ptimer->read(dev);
+-
+-	do {
+-		if (nv_rd32(dev, NV03_PFIFO_CACHE1_GET) ==
+-		    nv_rd32(dev, NV03_PFIFO_CACHE1_PUT))
+-			return true;
+-
+-	} while (ptimer->read(dev) - start < 100000000);
+-
+-	NV_ERROR(dev, "Timeout flushing the PFIFO cache.\n");
+-
+-	return false;
+-}
+-
+-bool
+ nv04_fifo_cache_pull(struct drm_device *dev, bool enable)
+ {
+-	uint32_t pull = nv_rd32(dev, NV04_PFIFO_CACHE1_PULL0);
++	int pull = nv_mask(dev, NV04_PFIFO_CACHE1_PULL0, 1, enable);
++
++	if (!enable) {
++		/* In some cases the PFIFO puller may be left in an
++		 * inconsistent state if you try to stop it when it's
++		 * busy translating handles. Sometimes you get a
++		 * PFIFO_CACHE_ERROR, sometimes it just fails silently
++		 * sending incorrect instance offsets to PGRAPH after
++		 * it's started up again. To avoid the latter we
++		 * invalidate the most recently calculated instance.
++		 */
++		if (!nv_wait(dev, NV04_PFIFO_CACHE1_PULL0,
++			     NV04_PFIFO_CACHE1_PULL0_HASH_BUSY, 0))
++			NV_ERROR(dev, "Timeout idling the PFIFO puller.\n");
++
++		if (nv_rd32(dev, NV04_PFIFO_CACHE1_PULL0) &
++		    NV04_PFIFO_CACHE1_PULL0_HASH_FAILED)
++			nv_wr32(dev, NV03_PFIFO_INTR_0,
++				NV_PFIFO_INTR_CACHE_ERROR);
+ 
+-	if (enable) {
+-		nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, pull | 1);
+-	} else {
+-		nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, pull & ~1);
+ 		nv_wr32(dev, NV04_PFIFO_CACHE1_HASH, 0);
+ 	}
+ 
+-	return !!(pull & 1);
++	return pull & 1;
+ }
+ 
+ int
+@@ -130,7 +124,7 @@
+ 						NV04_RAMFC__SIZE,
+ 						NVOBJ_FLAG_ZERO_ALLOC |
+ 						NVOBJ_FLAG_ZERO_FREE,
+-						NULL, &chan->ramfc);
++						&chan->ramfc);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -139,7 +133,7 @@
+ 	/* Setup initial state */
+ 	RAMFC_WR(DMA_PUT, chan->pushbuf_base);
+ 	RAMFC_WR(DMA_GET, chan->pushbuf_base);
+-	RAMFC_WR(DMA_INSTANCE, chan->pushbuf->instance >> 4);
++	RAMFC_WR(DMA_INSTANCE, chan->pushbuf->pinst >> 4);
+ 	RAMFC_WR(DMA_FETCH, (NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+ 			     NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+ 			     NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8 |
+@@ -161,7 +155,7 @@
+ 	nv_wr32(dev, NV04_PFIFO_MODE,
+ 		nv_rd32(dev, NV04_PFIFO_MODE) & ~(1 << chan->id));
+ 
+-	nouveau_gpuobj_ref_del(dev, &chan->ramfc);
++	nouveau_gpuobj_ref(NULL, &chan->ramfc);
+ }
+ 
+ static void
+@@ -214,7 +208,7 @@
+ 	if (chid < 0 || chid >= dev_priv->engine.fifo.channels)
+ 		return 0;
+ 
+-	chan = dev_priv->fifos[chid];
++	chan = dev_priv->channels.ptr[chid];
+ 	if (!chan) {
+ 		NV_ERROR(dev, "Inactive channel on PFIFO: %d\n", chid);
+ 		return -EINVAL;
+@@ -264,10 +258,10 @@
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 
+ 	nv_wr32(dev, NV03_PFIFO_RAMHT, (0x03 << 24) /* search 128 */ |
+-				       ((dev_priv->ramht_bits - 9) << 16) |
+-				       (dev_priv->ramht_offset >> 8));
+-	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro_offset>>8);
+-	nv_wr32(dev, NV03_PFIFO_RAMFC, dev_priv->ramfc_offset >> 8);
++				       ((dev_priv->ramht->bits - 9) << 16) |
++				       (dev_priv->ramht->gpuobj->pinst >> 8));
++	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro->pinst >> 8);
++	nv_wr32(dev, NV03_PFIFO_RAMFC, dev_priv->ramfc->pinst >> 8);
+ }
+ 
+ static void
+@@ -295,7 +289,7 @@
+ 	pfifo->reassign(dev, true);
+ 
+ 	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+-		if (dev_priv->fifos[i]) {
++		if (dev_priv->channels.ptr[i]) {
+ 			uint32_t mode = nv_rd32(dev, NV04_PFIFO_MODE);
+ 			nv_wr32(dev, NV04_PFIFO_MODE, mode | (1 << i));
+ 		}
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_graph.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_graph.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_graph.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_graph.c	2010-10-15 02:04:44.402992717 +0200
+@@ -357,7 +357,7 @@
+ 	if (chid >= dev_priv->engine.fifo.channels)
+ 		return NULL;
+ 
+-	return dev_priv->fifos[chid];
++	return dev_priv->channels.ptr[chid];
+ }
+ 
+ void
+@@ -376,7 +376,7 @@
+ 
+ 	/* Load context for next channel */
+ 	chid = dev_priv->engine.fifo.channel_id(dev);
+-	chan = dev_priv->fifos[chid];
++	chan = dev_priv->channels.ptr[chid];
+ 	if (chan)
+ 		nv04_graph_load_context(chan);
+ 
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_instmem.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_instmem.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_instmem.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_instmem.c	2010-10-15 02:04:44.404992741 +0200
+@@ -1,6 +1,7 @@
+ #include "drmP.h"
+ #include "drm.h"
+ #include "nouveau_drv.h"
++#include "nouveau_ramht.h"
+ 
+ /* returns the size of fifo context */
+ static int
+@@ -17,102 +18,51 @@
+ 	return 32;
+ }
+ 
+-static void
+-nv04_instmem_determine_amount(struct drm_device *dev)
++int nv04_instmem_init(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	int i;
++	struct nouveau_gpuobj *ramht = NULL;
++	u32 offset, length;
++	int ret;
+ 
+-	/* Figure out how much instance memory we need */
+-	if (dev_priv->card_type >= NV_40) {
+-		/* We'll want more instance memory than this on some NV4x cards.
+-		 * There's a 16MB aperture to play with that maps onto the end
+-		 * of vram.  For now, only reserve a small piece until we know
+-		 * more about what each chipset requires.
+-		 */
+-		switch (dev_priv->chipset) {
+-		case 0x40:
+-		case 0x47:
+-		case 0x49:
+-		case 0x4b:
+-			dev_priv->ramin_rsvd_vram = (2 * 1024 * 1024);
+-			break;
+-		default:
+-			dev_priv->ramin_rsvd_vram = (1 * 1024 * 1024);
+-			break;
+-		}
+-	} else {
+-		/*XXX: what *are* the limits on <NV40 cards?
+-		 */
+-		dev_priv->ramin_rsvd_vram = (512 * 1024);
+-	}
+-	NV_DEBUG(dev, "RAMIN size: %dKiB\n", dev_priv->ramin_rsvd_vram >> 10);
++	/* RAMIN always available */
++	dev_priv->ramin_available = true;
+ 
+-	/* Clear all of it, except the BIOS image that's in the first 64KiB */
+-	for (i = 64 * 1024; i < dev_priv->ramin_rsvd_vram; i += 4)
+-		nv_wi32(dev, i, 0x00000000);
+-}
++	/* Setup shared RAMHT */
++	ret = nouveau_gpuobj_new_fake(dev, 0x10000, ~0, 4096,
++				      NVOBJ_FLAG_ZERO_ALLOC, &ramht);
++	if (ret)
++		return ret;
+ 
+-static void
+-nv04_instmem_configure_fixed_tables(struct drm_device *dev)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_engine *engine = &dev_priv->engine;
++	ret = nouveau_ramht_new(dev, ramht, &dev_priv->ramht);
++	nouveau_gpuobj_ref(NULL, &ramht);
++	if (ret)
++		return ret;
+ 
+-	/* FIFO hash table (RAMHT)
+-	 *   use 4k hash table at RAMIN+0x10000
+-	 *   TODO: extend the hash table
+-	 */
+-	dev_priv->ramht_offset = 0x10000;
+-	dev_priv->ramht_bits   = 9;
+-	dev_priv->ramht_size   = (1 << dev_priv->ramht_bits); /* nr entries */
+-	dev_priv->ramht_size  *= 8; /* 2 32-bit values per entry in RAMHT */
+-	NV_DEBUG(dev, "RAMHT offset=0x%x, size=%d\n", dev_priv->ramht_offset,
+-						      dev_priv->ramht_size);
+-
+-	/* FIFO runout table (RAMRO) - 512k at 0x11200 */
+-	dev_priv->ramro_offset = 0x11200;
+-	dev_priv->ramro_size   = 512;
+-	NV_DEBUG(dev, "RAMRO offset=0x%x, size=%d\n", dev_priv->ramro_offset,
+-						      dev_priv->ramro_size);
+-
+-	/* FIFO context table (RAMFC)
+-	 *   NV40  : Not sure exactly how to position RAMFC on some cards,
+-	 *           0x30002 seems to position it at RAMIN+0x20000 on these
+-	 *           cards.  RAMFC is 4kb (32 fifos, 128byte entries).
+-	 *   Others: Position RAMFC at RAMIN+0x11400
+-	 */
+-	dev_priv->ramfc_size = engine->fifo.channels *
+-						nouveau_fifo_ctx_size(dev);
++	/* And RAMRO */
++	ret = nouveau_gpuobj_new_fake(dev, 0x11200, ~0, 512,
++				      NVOBJ_FLAG_ZERO_ALLOC, &dev_priv->ramro);
++	if (ret)
++		return ret;
++
++	/* And RAMFC */
++	length = dev_priv->engine.fifo.channels * nouveau_fifo_ctx_size(dev);
+ 	switch (dev_priv->card_type) {
+ 	case NV_40:
+-		dev_priv->ramfc_offset = 0x20000;
++		offset = 0x20000;
+ 		break;
+-	case NV_30:
+-	case NV_20:
+-	case NV_10:
+-	case NV_04:
+ 	default:
+-		dev_priv->ramfc_offset = 0x11400;
++		offset = 0x11400;
+ 		break;
+ 	}
+-	NV_DEBUG(dev, "RAMFC offset=0x%x, size=%d\n", dev_priv->ramfc_offset,
+-						      dev_priv->ramfc_size);
+-}
+ 
+-int nv04_instmem_init(struct drm_device *dev)
+-{
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	uint32_t offset;
+-	int ret;
+-
+-	nv04_instmem_determine_amount(dev);
+-	nv04_instmem_configure_fixed_tables(dev);
++	ret = nouveau_gpuobj_new_fake(dev, offset, ~0, length,
++				      NVOBJ_FLAG_ZERO_ALLOC, &dev_priv->ramfc);
++	if (ret)
++		return ret;
+ 
+-	/* Create a heap to manage RAMIN allocations, we don't allocate
+-	 * the space that was reserved for RAMHT/FC/RO.
+-	 */
+-	offset = dev_priv->ramfc_offset + dev_priv->ramfc_size;
++	/* Only allow space after RAMFC to be used for object allocation */
++	offset += length;
+ 
+ 	/* It appears RAMRO (or something?) is controlled by 0x2220/0x2230
+ 	 * on certain NV4x chipsets as well as RAMFC.  When 0x2230 == 0
+@@ -140,46 +90,34 @@
+ void
+ nv04_instmem_takedown(struct drm_device *dev)
+ {
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++
++	nouveau_ramht_ref(NULL, &dev_priv->ramht, NULL);
++	nouveau_gpuobj_ref(NULL, &dev_priv->ramro);
++	nouveau_gpuobj_ref(NULL, &dev_priv->ramfc);
+ }
+ 
+ int
+-nv04_instmem_populate(struct drm_device *dev, struct nouveau_gpuobj *gpuobj, uint32_t *sz)
++nv04_instmem_populate(struct drm_device *dev, struct nouveau_gpuobj *gpuobj,
++		      uint32_t *sz)
+ {
+-	if (gpuobj->im_backing)
+-		return -EINVAL;
+-
+ 	return 0;
+ }
+ 
+ void
+ nv04_instmem_clear(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
+ {
+-	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-
+-	if (gpuobj && gpuobj->im_backing) {
+-		if (gpuobj->im_bound)
+-			dev_priv->engine.instmem.unbind(dev, gpuobj);
+-		gpuobj->im_backing = NULL;
+-	}
+ }
+ 
+ int
+ nv04_instmem_bind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
+ {
+-	if (!gpuobj->im_pramin || gpuobj->im_bound)
+-		return -EINVAL;
+-
+-	gpuobj->im_bound = 1;
+ 	return 0;
+ }
+ 
+ int
+ nv04_instmem_unbind(struct drm_device *dev, struct nouveau_gpuobj *gpuobj)
+ {
+-	if (gpuobj->im_bound == 0)
+-		return -EINVAL;
+-
+-	gpuobj->im_bound = 0;
+ 	return 0;
+ }
+ 
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_pm.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_pm.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_pm.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_pm.c	2010-10-15 02:04:44.405992754 +0200
+@@ -0,0 +1,81 @@
++/*
++ * Copyright 2010 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: Ben Skeggs
++ */
++
++#include "drmP.h"
++#include "nouveau_drv.h"
++#include "nouveau_hw.h"
++#include "nouveau_pm.h"
++
++struct nv04_pm_state {
++	struct pll_lims pll;
++	struct nouveau_pll_vals calc;
++};
++
++int
++nv04_pm_clock_get(struct drm_device *dev, u32 id)
++{
++	return nouveau_hw_get_clock(dev, id);
++}
++
++void *
++nv04_pm_clock_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl,
++		  u32 id, int khz)
++{
++	struct nv04_pm_state *state;
++	int ret;
++
++	state = kzalloc(sizeof(*state), GFP_KERNEL);
++	if (!state)
++		return ERR_PTR(-ENOMEM);
++
++	ret = get_pll_limits(dev, id, &state->pll);
++	if (ret) {
++		kfree(state);
++		return (ret == -ENOENT) ? NULL : ERR_PTR(ret);
++	}
++
++	ret = nouveau_calc_pll_mnp(dev, &state->pll, khz, &state->calc);
++	if (!ret) {
++		kfree(state);
++		return ERR_PTR(-EINVAL);
++	}
++
++	return state;
++}
++
++void
++nv04_pm_clock_set(struct drm_device *dev, void *pre_state)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nv04_pm_state *state = pre_state;
++	u32 reg = state->pll.reg;
++
++	/* thank the insane nouveau_hw_setpll() interface for this */
++	if (dev_priv->card_type >= NV_40)
++		reg += 4;
++
++	nouveau_hw_setpll(dev, reg, &state->calc);
++	kfree(state);
++}
++
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_tv.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_tv.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv04_tv.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv04_tv.c	2010-10-15 02:04:44.407992780 +0200
+@@ -49,8 +49,8 @@
+ 
+ int nv04_tv_identify(struct drm_device *dev, int i2c_index)
+ {
+-	return nouveau_i2c_identify(dev, "TV encoder",
+-				    nv04_tv_encoder_info, i2c_index);
++	return nouveau_i2c_identify(dev, "TV encoder", nv04_tv_encoder_info,
++				    NULL, i2c_index);
+ }
+ 
+ 
+@@ -99,12 +99,10 @@
+ 
+ 	state->tv_setup = 0;
+ 
+-	if (bind) {
+-		state->CRTC[NV_CIO_CRE_LCD__INDEX] = 0;
++	if (bind)
+ 		state->CRTC[NV_CIO_CRE_49] |= 0x10;
+-	} else {
++	else
+ 		state->CRTC[NV_CIO_CRE_49] &= ~0x10;
+-	}
+ 
+ 	NVWriteVgaCrtc(dev, head, NV_CIO_CRE_LCD__INDEX,
+ 		       state->CRTC[NV_CIO_CRE_LCD__INDEX]);
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv10_fifo.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv10_fifo.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv10_fifo.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv10_fifo.c	2010-10-15 02:04:44.408992792 +0200
+@@ -27,8 +27,9 @@
+ #include "drmP.h"
+ #include "drm.h"
+ #include "nouveau_drv.h"
++#include "nouveau_ramht.h"
+ 
+-#define NV10_RAMFC(c) (dev_priv->ramfc_offset + ((c) * NV10_RAMFC__SIZE))
++#define NV10_RAMFC(c) (dev_priv->ramfc->pinst + ((c) * NV10_RAMFC__SIZE))
+ #define NV10_RAMFC__SIZE ((dev_priv->chipset) >= 0x17 ? 64 : 32)
+ 
+ int
+@@ -48,7 +49,7 @@
+ 
+ 	ret = nouveau_gpuobj_new_fake(dev, NV10_RAMFC(chan->id), ~0,
+ 				      NV10_RAMFC__SIZE, NVOBJ_FLAG_ZERO_ALLOC |
+-				      NVOBJ_FLAG_ZERO_FREE, NULL, &chan->ramfc);
++				      NVOBJ_FLAG_ZERO_FREE, &chan->ramfc);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -57,7 +58,7 @@
+ 	 */
+ 	nv_wi32(dev, fc +  0, chan->pushbuf_base);
+ 	nv_wi32(dev, fc +  4, chan->pushbuf_base);
+-	nv_wi32(dev, fc + 12, chan->pushbuf->instance >> 4);
++	nv_wi32(dev, fc + 12, chan->pushbuf->pinst >> 4);
+ 	nv_wi32(dev, fc + 20, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+ 			      NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+ 			      NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8 |
+@@ -80,7 +81,7 @@
+ 	nv_wr32(dev, NV04_PFIFO_MODE,
+ 			nv_rd32(dev, NV04_PFIFO_MODE) & ~(1 << chan->id));
+ 
+-	nouveau_gpuobj_ref_del(dev, &chan->ramfc);
++	nouveau_gpuobj_ref(NULL, &chan->ramfc);
+ }
+ 
+ static void
+@@ -202,14 +203,14 @@
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 
+ 	nv_wr32(dev, NV03_PFIFO_RAMHT, (0x03 << 24) /* search 128 */ |
+-				       ((dev_priv->ramht_bits - 9) << 16) |
+-				       (dev_priv->ramht_offset >> 8));
+-	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro_offset>>8);
++				       ((dev_priv->ramht->bits - 9) << 16) |
++				       (dev_priv->ramht->gpuobj->pinst >> 8));
++	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro->pinst >> 8);
+ 
+ 	if (dev_priv->chipset < 0x17) {
+-		nv_wr32(dev, NV03_PFIFO_RAMFC, dev_priv->ramfc_offset >> 8);
++		nv_wr32(dev, NV03_PFIFO_RAMFC, dev_priv->ramfc->pinst >> 8);
+ 	} else {
+-		nv_wr32(dev, NV03_PFIFO_RAMFC, (dev_priv->ramfc_offset >> 8) |
++		nv_wr32(dev, NV03_PFIFO_RAMFC, (dev_priv->ramfc->pinst >> 8) |
+ 					       (1 << 16) /* 64 Bytes entry*/);
+ 		/* XXX nvidia blob set bit 18, 21,23 for nv20 & nv30 */
+ 	}
+@@ -240,7 +241,7 @@
+ 	pfifo->reassign(dev, true);
+ 
+ 	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+-		if (dev_priv->fifos[i]) {
++		if (dev_priv->channels.ptr[i]) {
+ 			uint32_t mode = nv_rd32(dev, NV04_PFIFO_MODE);
+ 			nv_wr32(dev, NV04_PFIFO_MODE, mode | (1 << i));
+ 		}
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv10_graph.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv10_graph.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv10_graph.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv10_graph.c	2010-10-15 02:04:44.410992816 +0200
+@@ -802,8 +802,8 @@
+ 
+ 	/* Load context for next channel */
+ 	chid = (nv_rd32(dev, NV04_PGRAPH_TRAPPED_ADDR) >> 20) & 0x1f;
+-	chan = dev_priv->fifos[chid];
+-	if (chan)
++	chan = dev_priv->channels.ptr[chid];
++	if (chan && chan->pgraph_ctx)
+ 		nv10_graph_load_context(chan);
+ 
+ 	pgraph->fifo_access(dev, true);
+@@ -833,7 +833,7 @@
+ 	if (chid >= dev_priv->engine.fifo.channels)
+ 		return NULL;
+ 
+-	return dev_priv->fifos[chid];
++	return dev_priv->channels.ptr[chid];
+ }
+ 
+ int nv10_graph_create_context(struct nouveau_channel *chan)
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv17_tv.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv17_tv.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv17_tv.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv17_tv.c	2010-10-15 02:04:44.412992842 +0200
+@@ -193,55 +193,56 @@
+ 	}
+ }
+ 
+-static const struct {
+-	int hdisplay;
+-	int vdisplay;
+-} modes[] = {
+-	{ 640, 400 },
+-	{ 640, 480 },
+-	{ 720, 480 },
+-	{ 720, 576 },
+-	{ 800, 600 },
+-	{ 1024, 768 },
+-	{ 1280, 720 },
+-	{ 1280, 1024 },
+-	{ 1920, 1080 }
+-};
+-
+-static int nv17_tv_get_modes(struct drm_encoder *encoder,
+-			     struct drm_connector *connector)
++static int nv17_tv_get_ld_modes(struct drm_encoder *encoder,
++				struct drm_connector *connector)
+ {
+ 	struct nv17_tv_norm_params *tv_norm = get_tv_norm(encoder);
+-	struct drm_display_mode *mode;
+-	struct drm_display_mode *output_mode;
++	struct drm_display_mode *mode, *tv_mode;
+ 	int n = 0;
+-	int i;
+-
+-	if (tv_norm->kind != CTV_ENC_MODE) {
+-		struct drm_display_mode *tv_mode;
+ 
+-		for (tv_mode = nv17_tv_modes; tv_mode->hdisplay; tv_mode++) {
+-			mode = drm_mode_duplicate(encoder->dev, tv_mode);
++	for (tv_mode = nv17_tv_modes; tv_mode->hdisplay; tv_mode++) {
++		mode = drm_mode_duplicate(encoder->dev, tv_mode);
+ 
+-			mode->clock = tv_norm->tv_enc_mode.vrefresh *
+-						mode->htotal / 1000 *
+-						mode->vtotal / 1000;
++		mode->clock = tv_norm->tv_enc_mode.vrefresh *
++			mode->htotal / 1000 *
++			mode->vtotal / 1000;
+ 
+-			if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+-				mode->clock *= 2;
++		if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
++			mode->clock *= 2;
+ 
+-			if (mode->hdisplay == tv_norm->tv_enc_mode.hdisplay &&
+-			    mode->vdisplay == tv_norm->tv_enc_mode.vdisplay)
+-				mode->type |= DRM_MODE_TYPE_PREFERRED;
++		if (mode->hdisplay == tv_norm->tv_enc_mode.hdisplay &&
++		    mode->vdisplay == tv_norm->tv_enc_mode.vdisplay)
++			mode->type |= DRM_MODE_TYPE_PREFERRED;
+ 
+-			drm_mode_probed_add(connector, mode);
+-			n++;
+-		}
+-		return n;
++		drm_mode_probed_add(connector, mode);
++		n++;
+ 	}
+ 
+-	/* tv_norm->kind == CTV_ENC_MODE */
+-	output_mode = &tv_norm->ctv_enc_mode.mode;
++	return n;
++}
++
++static int nv17_tv_get_hd_modes(struct drm_encoder *encoder,
++				struct drm_connector *connector)
++{
++	struct nv17_tv_norm_params *tv_norm = get_tv_norm(encoder);
++	struct drm_display_mode *output_mode = &tv_norm->ctv_enc_mode.mode;
++	struct drm_display_mode *mode;
++	const struct {
++		int hdisplay;
++		int vdisplay;
++	} modes[] = {
++		{ 640, 400 },
++		{ 640, 480 },
++		{ 720, 480 },
++		{ 720, 576 },
++		{ 800, 600 },
++		{ 1024, 768 },
++		{ 1280, 720 },
++		{ 1280, 1024 },
++		{ 1920, 1080 }
++	};
++	int i, n = 0;
++
+ 	for (i = 0; i < ARRAY_SIZE(modes); i++) {
+ 		if (modes[i].hdisplay > output_mode->hdisplay ||
+ 		    modes[i].vdisplay > output_mode->vdisplay)
+@@ -251,11 +252,12 @@
+ 		    modes[i].vdisplay == output_mode->vdisplay) {
+ 			mode = drm_mode_duplicate(encoder->dev, output_mode);
+ 			mode->type |= DRM_MODE_TYPE_PREFERRED;
++
+ 		} else {
+ 			mode = drm_cvt_mode(encoder->dev, modes[i].hdisplay,
+-				modes[i].vdisplay, 60, false,
+-				output_mode->flags & DRM_MODE_FLAG_INTERLACE,
+-				false);
++					    modes[i].vdisplay, 60, false,
++					    (output_mode->flags &
++					     DRM_MODE_FLAG_INTERLACE), false);
+ 		}
+ 
+ 		/* CVT modes are sometimes unsuitable... */
+@@ -266,6 +268,7 @@
+ 					     - mode->hdisplay) * 9 / 10) & ~7;
+ 			mode->hsync_end = mode->hsync_start + 8;
+ 		}
++
+ 		if (output_mode->vdisplay >= 1024) {
+ 			mode->vtotal = output_mode->vtotal;
+ 			mode->vsync_start = output_mode->vsync_start;
+@@ -276,9 +279,21 @@
+ 		drm_mode_probed_add(connector, mode);
+ 		n++;
+ 	}
++
+ 	return n;
+ }
+ 
++static int nv17_tv_get_modes(struct drm_encoder *encoder,
++			     struct drm_connector *connector)
++{
++	struct nv17_tv_norm_params *tv_norm = get_tv_norm(encoder);
++
++	if (tv_norm->kind == CTV_ENC_MODE)
++		return nv17_tv_get_hd_modes(encoder, connector);
++	else
++		return nv17_tv_get_ld_modes(encoder, connector);
++}
++
+ static int nv17_tv_mode_valid(struct drm_encoder *encoder,
+ 			      struct drm_display_mode *mode)
+ {
+@@ -408,15 +423,8 @@
+ 
+ 	}
+ 
+-	/* Some NV4x have unknown values (0x3f, 0x50, 0x54, 0x6b, 0x79, 0x7f)
+-	 * at LCD__INDEX which we don't alter
+-	 */
+-	if (!(*cr_lcd & 0x44)) {
+-		if (tv_norm->kind == CTV_ENC_MODE)
+-			*cr_lcd = 0x1 | (head ? 0x0 : 0x8);
+-		else
+-			*cr_lcd = 0;
+-	}
++	if (tv_norm->kind == CTV_ENC_MODE)
++		*cr_lcd |= 0x1 | (head ? 0x0 : 0x8);
+ 
+ 	/* Set the DACCLK register */
+ 	dacclk = (NVReadRAMDAC(dev, 0, dacclk_off) & ~0x30) | 0x1;
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv17_tv.h linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv17_tv.h
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv17_tv.h	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv17_tv.h	2010-10-15 02:04:44.414992866 +0200
+@@ -127,7 +127,8 @@
+ 
+ /* TV hardware access functions */
+ 
+-static inline void nv_write_ptv(struct drm_device *dev, uint32_t reg, uint32_t val)
++static inline void nv_write_ptv(struct drm_device *dev, uint32_t reg,
++				uint32_t val)
+ {
+ 	nv_wr32(dev, reg, val);
+ }
+@@ -137,7 +138,8 @@
+ 	return nv_rd32(dev, reg);
+ }
+ 
+-static inline void nv_write_tv_enc(struct drm_device *dev, uint8_t reg, uint8_t val)
++static inline void nv_write_tv_enc(struct drm_device *dev, uint8_t reg,
++				   uint8_t val)
+ {
+ 	nv_write_ptv(dev, NV_PTV_TV_INDEX, reg);
+ 	nv_write_ptv(dev, NV_PTV_TV_DATA, val);
+@@ -149,8 +151,11 @@
+ 	return nv_read_ptv(dev, NV_PTV_TV_DATA);
+ }
+ 
+-#define nv_load_ptv(dev, state, reg) nv_write_ptv(dev, NV_PTV_OFFSET + 0x##reg, state->ptv_##reg)
+-#define nv_save_ptv(dev, state, reg) state->ptv_##reg = nv_read_ptv(dev, NV_PTV_OFFSET + 0x##reg)
+-#define nv_load_tv_enc(dev, state, reg) nv_write_tv_enc(dev, 0x##reg, state->tv_enc[0x##reg])
++#define nv_load_ptv(dev, state, reg) \
++	nv_write_ptv(dev, NV_PTV_OFFSET + 0x##reg, state->ptv_##reg)
++#define nv_save_ptv(dev, state, reg) \
++	state->ptv_##reg = nv_read_ptv(dev, NV_PTV_OFFSET + 0x##reg)
++#define nv_load_tv_enc(dev, state, reg) \
++	nv_write_tv_enc(dev, 0x##reg, state->tv_enc[0x##reg])
+ 
+ #endif
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv17_tv_modes.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv17_tv_modes.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv17_tv_modes.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv17_tv_modes.c	2010-10-15 02:04:44.415992878 +0200
+@@ -336,12 +336,17 @@
+ 			struct filter_params *p = &fparams[k][j];
+ 
+ 			for (i = 0; i < 7; i++) {
+-				int64_t c = (p->k1 + p->ki*i + p->ki2*i*i + p->ki3*i*i*i)
+-					+ (p->kr + p->kir*i + p->ki2r*i*i + p->ki3r*i*i*i)*rs[k]
+-					+ (p->kf + p->kif*i + p->ki2f*i*i + p->ki3f*i*i*i)*flicker
+-					+ (p->krf + p->kirf*i + p->ki2rf*i*i + p->ki3rf*i*i*i)*flicker*rs[k];
++				int64_t c = (p->k1 + p->ki*i + p->ki2*i*i +
++					     p->ki3*i*i*i)
++					+ (p->kr + p->kir*i + p->ki2r*i*i +
++					   p->ki3r*i*i*i) * rs[k]
++					+ (p->kf + p->kif*i + p->ki2f*i*i +
++					   p->ki3f*i*i*i) * flicker
++					+ (p->krf + p->kirf*i + p->ki2rf*i*i +
++					   p->ki3rf*i*i*i) * flicker * rs[k];
+ 
+-				(*filters[k])[j][i] = (c + id5/2) >> 39 & (0x1 << 31 | 0x7f << 9);
++				(*filters[k])[j][i] = (c + id5/2) >> 39
++					& (0x1 << 31 | 0x7f << 9);
+ 			}
+ 		}
+ 	}
+@@ -349,7 +354,8 @@
+ 
+ /* Hardware state saving/restoring */
+ 
+-static void tv_save_filter(struct drm_device *dev, uint32_t base, uint32_t regs[4][7])
++static void tv_save_filter(struct drm_device *dev, uint32_t base,
++			   uint32_t regs[4][7])
+ {
+ 	int i, j;
+ 	uint32_t offsets[] = { base, base + 0x1c, base + 0x40, base + 0x5c };
+@@ -360,7 +366,8 @@
+ 	}
+ }
+ 
+-static void tv_load_filter(struct drm_device *dev, uint32_t base, uint32_t regs[4][7])
++static void tv_load_filter(struct drm_device *dev, uint32_t base,
++			   uint32_t regs[4][7])
+ {
+ 	int i, j;
+ 	uint32_t offsets[] = { base, base + 0x1c, base + 0x40, base + 0x5c };
+@@ -504,10 +511,10 @@
+ 		break;
+ 	}
+ 
+-	regs->tv_enc[0x20] = interpolate(0, tv_norm->tv_enc_mode.tv_enc[0x20], 255,
+-					 tv_enc->saturation);
+-	regs->tv_enc[0x22] = interpolate(0, tv_norm->tv_enc_mode.tv_enc[0x22], 255,
+-					 tv_enc->saturation);
++	regs->tv_enc[0x20] = interpolate(0, tv_norm->tv_enc_mode.tv_enc[0x20],
++					 255, tv_enc->saturation);
++	regs->tv_enc[0x22] = interpolate(0, tv_norm->tv_enc_mode.tv_enc[0x22],
++					 255, tv_enc->saturation);
+ 	regs->tv_enc[0x25] = tv_enc->hue * 255 / 100;
+ 
+ 	nv_load_ptv(dev, regs, 204);
+@@ -541,7 +548,8 @@
+ 	int head = nouveau_crtc(encoder->crtc)->index;
+ 	struct nv04_crtc_reg *regs = &dev_priv->mode_reg.crtc_reg[head];
+ 	struct drm_display_mode *crtc_mode = &encoder->crtc->mode;
+-	struct drm_display_mode *output_mode = &get_tv_norm(encoder)->ctv_enc_mode.mode;
++	struct drm_display_mode *output_mode =
++		&get_tv_norm(encoder)->ctv_enc_mode.mode;
+ 	int overscan, hmargin, vmargin, hratio, vratio;
+ 
+ 	/* The rescaler doesn't do the right thing for interlaced modes. */
+@@ -553,13 +561,15 @@
+ 	hmargin = (output_mode->hdisplay - crtc_mode->hdisplay) / 2;
+ 	vmargin = (output_mode->vdisplay - crtc_mode->vdisplay) / 2;
+ 
+-	hmargin = interpolate(0, min(hmargin, output_mode->hdisplay/20), hmargin,
+-			      overscan);
+-	vmargin = interpolate(0, min(vmargin, output_mode->vdisplay/20), vmargin,
+-			      overscan);
+-
+-	hratio = crtc_mode->hdisplay * 0x800 / (output_mode->hdisplay - 2*hmargin);
+-	vratio = crtc_mode->vdisplay * 0x800 / (output_mode->vdisplay - 2*vmargin) & ~3;
++	hmargin = interpolate(0, min(hmargin, output_mode->hdisplay/20),
++			      hmargin, overscan);
++	vmargin = interpolate(0, min(vmargin, output_mode->vdisplay/20),
++			      vmargin, overscan);
++
++	hratio = crtc_mode->hdisplay * 0x800 /
++		(output_mode->hdisplay - 2*hmargin);
++	vratio = crtc_mode->vdisplay * 0x800 /
++		(output_mode->vdisplay - 2*vmargin) & ~3;
+ 
+ 	regs->fp_horiz_regs[FP_VALID_START] = hmargin;
+ 	regs->fp_horiz_regs[FP_VALID_END] = output_mode->hdisplay - hmargin - 1;
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv20_graph.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv20_graph.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv20_graph.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv20_graph.c	2010-10-15 02:04:44.418992917 +0200
+@@ -37,49 +37,49 @@
+ {
+ 	int i;
+ 
+-	nv_wo32(dev, ctx, 0x033c/4, 0xffff0000);
+-	nv_wo32(dev, ctx, 0x03a0/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x03a4/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x047c/4, 0x00000101);
+-	nv_wo32(dev, ctx, 0x0490/4, 0x00000111);
+-	nv_wo32(dev, ctx, 0x04a8/4, 0x44400000);
++	nv_wo32(ctx, 0x033c, 0xffff0000);
++	nv_wo32(ctx, 0x03a0, 0x0fff0000);
++	nv_wo32(ctx, 0x03a4, 0x0fff0000);
++	nv_wo32(ctx, 0x047c, 0x00000101);
++	nv_wo32(ctx, 0x0490, 0x00000111);
++	nv_wo32(ctx, 0x04a8, 0x44400000);
+ 	for (i = 0x04d4; i <= 0x04e0; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00030303);
++		nv_wo32(ctx, i, 0x00030303);
+ 	for (i = 0x04f4; i <= 0x0500; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080000);
++		nv_wo32(ctx, i, 0x00080000);
+ 	for (i = 0x050c; i <= 0x0518; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x01012000);
++		nv_wo32(ctx, i, 0x01012000);
+ 	for (i = 0x051c; i <= 0x0528; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x000105b8);
++		nv_wo32(ctx, i, 0x000105b8);
+ 	for (i = 0x052c; i <= 0x0538; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080008);
++		nv_wo32(ctx, i, 0x00080008);
+ 	for (i = 0x055c; i <= 0x0598; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
+-	nv_wo32(dev, ctx, 0x05a4/4, 0x4b7fffff);
+-	nv_wo32(dev, ctx, 0x05fc/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x0604/4, 0x00004000);
+-	nv_wo32(dev, ctx, 0x0610/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x0618/4, 0x00040000);
+-	nv_wo32(dev, ctx, 0x061c/4, 0x00010000);
++		nv_wo32(ctx, i, 0x07ff0000);
++	nv_wo32(ctx, 0x05a4, 0x4b7fffff);
++	nv_wo32(ctx, 0x05fc, 0x00000001);
++	nv_wo32(ctx, 0x0604, 0x00004000);
++	nv_wo32(ctx, 0x0610, 0x00000001);
++	nv_wo32(ctx, 0x0618, 0x00040000);
++	nv_wo32(ctx, 0x061c, 0x00010000);
+ 	for (i = 0x1c1c; i <= 0x248c; i += 16) {
+-		nv_wo32(dev, ctx, (i + 0)/4, 0x10700ff9);
+-		nv_wo32(dev, ctx, (i + 4)/4, 0x0436086c);
+-		nv_wo32(dev, ctx, (i + 8)/4, 0x000c001b);
+-	}
+-	nv_wo32(dev, ctx, 0x281c/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2830/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x285c/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x2860/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2864/4, 0x3f000000);
+-	nv_wo32(dev, ctx, 0x286c/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x2870/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2878/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x2880/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x34a4/4, 0x000fe000);
+-	nv_wo32(dev, ctx, 0x3530/4, 0x000003f8);
+-	nv_wo32(dev, ctx, 0x3540/4, 0x002fe000);
++		nv_wo32(ctx, (i + 0), 0x10700ff9);
++		nv_wo32(ctx, (i + 4), 0x0436086c);
++		nv_wo32(ctx, (i + 8), 0x000c001b);
++	}
++	nv_wo32(ctx, 0x281c, 0x3f800000);
++	nv_wo32(ctx, 0x2830, 0x3f800000);
++	nv_wo32(ctx, 0x285c, 0x40000000);
++	nv_wo32(ctx, 0x2860, 0x3f800000);
++	nv_wo32(ctx, 0x2864, 0x3f000000);
++	nv_wo32(ctx, 0x286c, 0x40000000);
++	nv_wo32(ctx, 0x2870, 0x3f800000);
++	nv_wo32(ctx, 0x2878, 0xbf800000);
++	nv_wo32(ctx, 0x2880, 0xbf800000);
++	nv_wo32(ctx, 0x34a4, 0x000fe000);
++	nv_wo32(ctx, 0x3530, 0x000003f8);
++	nv_wo32(ctx, 0x3540, 0x002fe000);
+ 	for (i = 0x355c; i <= 0x3578; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x001c527c);
++		nv_wo32(ctx, i, 0x001c527c);
+ }
+ 
+ static void
+@@ -87,58 +87,58 @@
+ {
+ 	int i;
+ 
+-	nv_wo32(dev, ctx, 0x035c/4, 0xffff0000);
+-	nv_wo32(dev, ctx, 0x03c0/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x03c4/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x049c/4, 0x00000101);
+-	nv_wo32(dev, ctx, 0x04b0/4, 0x00000111);
+-	nv_wo32(dev, ctx, 0x04c8/4, 0x00000080);
+-	nv_wo32(dev, ctx, 0x04cc/4, 0xffff0000);
+-	nv_wo32(dev, ctx, 0x04d0/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x04e4/4, 0x44400000);
+-	nv_wo32(dev, ctx, 0x04fc/4, 0x4b800000);
++	nv_wo32(ctx, 0x035c, 0xffff0000);
++	nv_wo32(ctx, 0x03c0, 0x0fff0000);
++	nv_wo32(ctx, 0x03c4, 0x0fff0000);
++	nv_wo32(ctx, 0x049c, 0x00000101);
++	nv_wo32(ctx, 0x04b0, 0x00000111);
++	nv_wo32(ctx, 0x04c8, 0x00000080);
++	nv_wo32(ctx, 0x04cc, 0xffff0000);
++	nv_wo32(ctx, 0x04d0, 0x00000001);
++	nv_wo32(ctx, 0x04e4, 0x44400000);
++	nv_wo32(ctx, 0x04fc, 0x4b800000);
+ 	for (i = 0x0510; i <= 0x051c; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00030303);
++		nv_wo32(ctx, i, 0x00030303);
+ 	for (i = 0x0530; i <= 0x053c; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080000);
++		nv_wo32(ctx, i, 0x00080000);
+ 	for (i = 0x0548; i <= 0x0554; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x01012000);
++		nv_wo32(ctx, i, 0x01012000);
+ 	for (i = 0x0558; i <= 0x0564; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x000105b8);
++		nv_wo32(ctx, i, 0x000105b8);
+ 	for (i = 0x0568; i <= 0x0574; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080008);
++		nv_wo32(ctx, i, 0x00080008);
+ 	for (i = 0x0598; i <= 0x05d4; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
+-	nv_wo32(dev, ctx, 0x05e0/4, 0x4b7fffff);
+-	nv_wo32(dev, ctx, 0x0620/4, 0x00000080);
+-	nv_wo32(dev, ctx, 0x0624/4, 0x30201000);
+-	nv_wo32(dev, ctx, 0x0628/4, 0x70605040);
+-	nv_wo32(dev, ctx, 0x062c/4, 0xb0a09080);
+-	nv_wo32(dev, ctx, 0x0630/4, 0xf0e0d0c0);
+-	nv_wo32(dev, ctx, 0x0664/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x066c/4, 0x00004000);
+-	nv_wo32(dev, ctx, 0x0678/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x0680/4, 0x00040000);
+-	nv_wo32(dev, ctx, 0x0684/4, 0x00010000);
++		nv_wo32(ctx, i, 0x07ff0000);
++	nv_wo32(ctx, 0x05e0, 0x4b7fffff);
++	nv_wo32(ctx, 0x0620, 0x00000080);
++	nv_wo32(ctx, 0x0624, 0x30201000);
++	nv_wo32(ctx, 0x0628, 0x70605040);
++	nv_wo32(ctx, 0x062c, 0xb0a09080);
++	nv_wo32(ctx, 0x0630, 0xf0e0d0c0);
++	nv_wo32(ctx, 0x0664, 0x00000001);
++	nv_wo32(ctx, 0x066c, 0x00004000);
++	nv_wo32(ctx, 0x0678, 0x00000001);
++	nv_wo32(ctx, 0x0680, 0x00040000);
++	nv_wo32(ctx, 0x0684, 0x00010000);
+ 	for (i = 0x1b04; i <= 0x2374; i += 16) {
+-		nv_wo32(dev, ctx, (i + 0)/4, 0x10700ff9);
+-		nv_wo32(dev, ctx, (i + 4)/4, 0x0436086c);
+-		nv_wo32(dev, ctx, (i + 8)/4, 0x000c001b);
+-	}
+-	nv_wo32(dev, ctx, 0x2704/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2718/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2744/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x2748/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x274c/4, 0x3f000000);
+-	nv_wo32(dev, ctx, 0x2754/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x2758/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2760/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x2768/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x308c/4, 0x000fe000);
+-	nv_wo32(dev, ctx, 0x3108/4, 0x000003f8);
+-	nv_wo32(dev, ctx, 0x3468/4, 0x002fe000);
++		nv_wo32(ctx, (i + 0), 0x10700ff9);
++		nv_wo32(ctx, (i + 4), 0x0436086c);
++		nv_wo32(ctx, (i + 8), 0x000c001b);
++	}
++	nv_wo32(ctx, 0x2704, 0x3f800000);
++	nv_wo32(ctx, 0x2718, 0x3f800000);
++	nv_wo32(ctx, 0x2744, 0x40000000);
++	nv_wo32(ctx, 0x2748, 0x3f800000);
++	nv_wo32(ctx, 0x274c, 0x3f000000);
++	nv_wo32(ctx, 0x2754, 0x40000000);
++	nv_wo32(ctx, 0x2758, 0x3f800000);
++	nv_wo32(ctx, 0x2760, 0xbf800000);
++	nv_wo32(ctx, 0x2768, 0xbf800000);
++	nv_wo32(ctx, 0x308c, 0x000fe000);
++	nv_wo32(ctx, 0x3108, 0x000003f8);
++	nv_wo32(ctx, 0x3468, 0x002fe000);
+ 	for (i = 0x3484; i <= 0x34a0; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x001c527c);
++		nv_wo32(ctx, i, 0x001c527c);
+ }
+ 
+ static void
+@@ -146,49 +146,49 @@
+ {
+ 	int i;
+ 
+-	nv_wo32(dev, ctx, 0x033c/4, 0xffff0000);
+-	nv_wo32(dev, ctx, 0x03a0/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x03a4/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x047c/4, 0x00000101);
+-	nv_wo32(dev, ctx, 0x0490/4, 0x00000111);
+-	nv_wo32(dev, ctx, 0x04a8/4, 0x44400000);
++	nv_wo32(ctx, 0x033c, 0xffff0000);
++	nv_wo32(ctx, 0x03a0, 0x0fff0000);
++	nv_wo32(ctx, 0x03a4, 0x0fff0000);
++	nv_wo32(ctx, 0x047c, 0x00000101);
++	nv_wo32(ctx, 0x0490, 0x00000111);
++	nv_wo32(ctx, 0x04a8, 0x44400000);
+ 	for (i = 0x04d4; i <= 0x04e0; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00030303);
++		nv_wo32(ctx, i, 0x00030303);
+ 	for (i = 0x04f4; i <= 0x0500; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080000);
++		nv_wo32(ctx, i, 0x00080000);
+ 	for (i = 0x050c; i <= 0x0518; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x01012000);
++		nv_wo32(ctx, i, 0x01012000);
+ 	for (i = 0x051c; i <= 0x0528; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x000105b8);
++		nv_wo32(ctx, i, 0x000105b8);
+ 	for (i = 0x052c; i <= 0x0538; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080008);
++		nv_wo32(ctx, i, 0x00080008);
+ 	for (i = 0x055c; i <= 0x0598; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
+-	nv_wo32(dev, ctx, 0x05a4/4, 0x4b7fffff);
+-	nv_wo32(dev, ctx, 0x05fc/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x0604/4, 0x00004000);
+-	nv_wo32(dev, ctx, 0x0610/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x0618/4, 0x00040000);
+-	nv_wo32(dev, ctx, 0x061c/4, 0x00010000);
++		nv_wo32(ctx, i, 0x07ff0000);
++	nv_wo32(ctx, 0x05a4, 0x4b7fffff);
++	nv_wo32(ctx, 0x05fc, 0x00000001);
++	nv_wo32(ctx, 0x0604, 0x00004000);
++	nv_wo32(ctx, 0x0610, 0x00000001);
++	nv_wo32(ctx, 0x0618, 0x00040000);
++	nv_wo32(ctx, 0x061c, 0x00010000);
+ 	for (i = 0x1a9c; i <= 0x22fc; i += 16) { /*XXX: check!! */
+-		nv_wo32(dev, ctx, (i + 0)/4, 0x10700ff9);
+-		nv_wo32(dev, ctx, (i + 4)/4, 0x0436086c);
+-		nv_wo32(dev, ctx, (i + 8)/4, 0x000c001b);
+-	}
+-	nv_wo32(dev, ctx, 0x269c/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x26b0/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x26dc/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x26e0/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x26e4/4, 0x3f000000);
+-	nv_wo32(dev, ctx, 0x26ec/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x26f0/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x26f8/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x2700/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x3024/4, 0x000fe000);
+-	nv_wo32(dev, ctx, 0x30a0/4, 0x000003f8);
+-	nv_wo32(dev, ctx, 0x33fc/4, 0x002fe000);
++		nv_wo32(ctx, (i + 0), 0x10700ff9);
++		nv_wo32(ctx, (i + 4), 0x0436086c);
++		nv_wo32(ctx, (i + 8), 0x000c001b);
++	}
++	nv_wo32(ctx, 0x269c, 0x3f800000);
++	nv_wo32(ctx, 0x26b0, 0x3f800000);
++	nv_wo32(ctx, 0x26dc, 0x40000000);
++	nv_wo32(ctx, 0x26e0, 0x3f800000);
++	nv_wo32(ctx, 0x26e4, 0x3f000000);
++	nv_wo32(ctx, 0x26ec, 0x40000000);
++	nv_wo32(ctx, 0x26f0, 0x3f800000);
++	nv_wo32(ctx, 0x26f8, 0xbf800000);
++	nv_wo32(ctx, 0x2700, 0xbf800000);
++	nv_wo32(ctx, 0x3024, 0x000fe000);
++	nv_wo32(ctx, 0x30a0, 0x000003f8);
++	nv_wo32(ctx, 0x33fc, 0x002fe000);
+ 	for (i = 0x341c; i <= 0x3438; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x001c527c);
++		nv_wo32(ctx, i, 0x001c527c);
+ }
+ 
+ static void
+@@ -196,57 +196,57 @@
+ {
+ 	int i;
+ 
+-	nv_wo32(dev, ctx, 0x0410/4, 0x00000101);
+-	nv_wo32(dev, ctx, 0x0424/4, 0x00000111);
+-	nv_wo32(dev, ctx, 0x0428/4, 0x00000060);
+-	nv_wo32(dev, ctx, 0x0444/4, 0x00000080);
+-	nv_wo32(dev, ctx, 0x0448/4, 0xffff0000);
+-	nv_wo32(dev, ctx, 0x044c/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x0460/4, 0x44400000);
+-	nv_wo32(dev, ctx, 0x048c/4, 0xffff0000);
++	nv_wo32(ctx, 0x0410, 0x00000101);
++	nv_wo32(ctx, 0x0424, 0x00000111);
++	nv_wo32(ctx, 0x0428, 0x00000060);
++	nv_wo32(ctx, 0x0444, 0x00000080);
++	nv_wo32(ctx, 0x0448, 0xffff0000);
++	nv_wo32(ctx, 0x044c, 0x00000001);
++	nv_wo32(ctx, 0x0460, 0x44400000);
++	nv_wo32(ctx, 0x048c, 0xffff0000);
+ 	for (i = 0x04e0; i < 0x04e8; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x04ec/4, 0x00011100);
++		nv_wo32(ctx, i, 0x0fff0000);
++	nv_wo32(ctx, 0x04ec, 0x00011100);
+ 	for (i = 0x0508; i < 0x0548; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
+-	nv_wo32(dev, ctx, 0x0550/4, 0x4b7fffff);
+-	nv_wo32(dev, ctx, 0x058c/4, 0x00000080);
+-	nv_wo32(dev, ctx, 0x0590/4, 0x30201000);
+-	nv_wo32(dev, ctx, 0x0594/4, 0x70605040);
+-	nv_wo32(dev, ctx, 0x0598/4, 0xb8a89888);
+-	nv_wo32(dev, ctx, 0x059c/4, 0xf8e8d8c8);
+-	nv_wo32(dev, ctx, 0x05b0/4, 0xb0000000);
++		nv_wo32(ctx, i, 0x07ff0000);
++	nv_wo32(ctx, 0x0550, 0x4b7fffff);
++	nv_wo32(ctx, 0x058c, 0x00000080);
++	nv_wo32(ctx, 0x0590, 0x30201000);
++	nv_wo32(ctx, 0x0594, 0x70605040);
++	nv_wo32(ctx, 0x0598, 0xb8a89888);
++	nv_wo32(ctx, 0x059c, 0xf8e8d8c8);
++	nv_wo32(ctx, 0x05b0, 0xb0000000);
+ 	for (i = 0x0600; i < 0x0640; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00010588);
++		nv_wo32(ctx, i, 0x00010588);
+ 	for (i = 0x0640; i < 0x0680; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00030303);
++		nv_wo32(ctx, i, 0x00030303);
+ 	for (i = 0x06c0; i < 0x0700; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0008aae4);
++		nv_wo32(ctx, i, 0x0008aae4);
+ 	for (i = 0x0700; i < 0x0740; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x01012000);
++		nv_wo32(ctx, i, 0x01012000);
+ 	for (i = 0x0740; i < 0x0780; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080008);
+-	nv_wo32(dev, ctx, 0x085c/4, 0x00040000);
+-	nv_wo32(dev, ctx, 0x0860/4, 0x00010000);
++		nv_wo32(ctx, i, 0x00080008);
++	nv_wo32(ctx, 0x085c, 0x00040000);
++	nv_wo32(ctx, 0x0860, 0x00010000);
+ 	for (i = 0x0864; i < 0x0874; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00040004);
++		nv_wo32(ctx, i, 0x00040004);
+ 	for (i = 0x1f18; i <= 0x3088 ; i += 16) {
+-		nv_wo32(dev, ctx, i/4 + 0, 0x10700ff9);
+-		nv_wo32(dev, ctx, i/4 + 1, 0x0436086c);
+-		nv_wo32(dev, ctx, i/4 + 2, 0x000c001b);
++		nv_wo32(ctx, i + 0, 0x10700ff9);
++		nv_wo32(ctx, i + 1, 0x0436086c);
++		nv_wo32(ctx, i + 2, 0x000c001b);
+ 	}
+ 	for (i = 0x30b8; i < 0x30c8; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0000ffff);
+-	nv_wo32(dev, ctx, 0x344c/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x3808/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x381c/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x3848/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x384c/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x3850/4, 0x3f000000);
+-	nv_wo32(dev, ctx, 0x3858/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x385c/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x3864/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x386c/4, 0xbf800000);
++		nv_wo32(ctx, i, 0x0000ffff);
++	nv_wo32(ctx, 0x344c, 0x3f800000);
++	nv_wo32(ctx, 0x3808, 0x3f800000);
++	nv_wo32(ctx, 0x381c, 0x3f800000);
++	nv_wo32(ctx, 0x3848, 0x40000000);
++	nv_wo32(ctx, 0x384c, 0x3f800000);
++	nv_wo32(ctx, 0x3850, 0x3f000000);
++	nv_wo32(ctx, 0x3858, 0x40000000);
++	nv_wo32(ctx, 0x385c, 0x3f800000);
++	nv_wo32(ctx, 0x3864, 0xbf800000);
++	nv_wo32(ctx, 0x386c, 0xbf800000);
+ }
+ 
+ static void
+@@ -254,57 +254,57 @@
+ {
+ 	int i;
+ 
+-	nv_wo32(dev, ctx, 0x040c/4, 0x01000101);
+-	nv_wo32(dev, ctx, 0x0420/4, 0x00000111);
+-	nv_wo32(dev, ctx, 0x0424/4, 0x00000060);
+-	nv_wo32(dev, ctx, 0x0440/4, 0x00000080);
+-	nv_wo32(dev, ctx, 0x0444/4, 0xffff0000);
+-	nv_wo32(dev, ctx, 0x0448/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x045c/4, 0x44400000);
+-	nv_wo32(dev, ctx, 0x0480/4, 0xffff0000);
++	nv_wo32(ctx, 0x040c, 0x01000101);
++	nv_wo32(ctx, 0x0420, 0x00000111);
++	nv_wo32(ctx, 0x0424, 0x00000060);
++	nv_wo32(ctx, 0x0440, 0x00000080);
++	nv_wo32(ctx, 0x0444, 0xffff0000);
++	nv_wo32(ctx, 0x0448, 0x00000001);
++	nv_wo32(ctx, 0x045c, 0x44400000);
++	nv_wo32(ctx, 0x0480, 0xffff0000);
+ 	for (i = 0x04d4; i < 0x04dc; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x04e0/4, 0x00011100);
++		nv_wo32(ctx, i, 0x0fff0000);
++	nv_wo32(ctx, 0x04e0, 0x00011100);
+ 	for (i = 0x04fc; i < 0x053c; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
+-	nv_wo32(dev, ctx, 0x0544/4, 0x4b7fffff);
+-	nv_wo32(dev, ctx, 0x057c/4, 0x00000080);
+-	nv_wo32(dev, ctx, 0x0580/4, 0x30201000);
+-	nv_wo32(dev, ctx, 0x0584/4, 0x70605040);
+-	nv_wo32(dev, ctx, 0x0588/4, 0xb8a89888);
+-	nv_wo32(dev, ctx, 0x058c/4, 0xf8e8d8c8);
+-	nv_wo32(dev, ctx, 0x05a0/4, 0xb0000000);
++		nv_wo32(ctx, i, 0x07ff0000);
++	nv_wo32(ctx, 0x0544, 0x4b7fffff);
++	nv_wo32(ctx, 0x057c, 0x00000080);
++	nv_wo32(ctx, 0x0580, 0x30201000);
++	nv_wo32(ctx, 0x0584, 0x70605040);
++	nv_wo32(ctx, 0x0588, 0xb8a89888);
++	nv_wo32(ctx, 0x058c, 0xf8e8d8c8);
++	nv_wo32(ctx, 0x05a0, 0xb0000000);
+ 	for (i = 0x05f0; i < 0x0630; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00010588);
++		nv_wo32(ctx, i, 0x00010588);
+ 	for (i = 0x0630; i < 0x0670; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00030303);
++		nv_wo32(ctx, i, 0x00030303);
+ 	for (i = 0x06b0; i < 0x06f0; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0008aae4);
++		nv_wo32(ctx, i, 0x0008aae4);
+ 	for (i = 0x06f0; i < 0x0730; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x01012000);
++		nv_wo32(ctx, i, 0x01012000);
+ 	for (i = 0x0730; i < 0x0770; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080008);
+-	nv_wo32(dev, ctx, 0x0850/4, 0x00040000);
+-	nv_wo32(dev, ctx, 0x0854/4, 0x00010000);
++		nv_wo32(ctx, i, 0x00080008);
++	nv_wo32(ctx, 0x0850, 0x00040000);
++	nv_wo32(ctx, 0x0854, 0x00010000);
+ 	for (i = 0x0858; i < 0x0868; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00040004);
++		nv_wo32(ctx, i, 0x00040004);
+ 	for (i = 0x15ac; i <= 0x271c ; i += 16) {
+-		nv_wo32(dev, ctx, i/4 + 0, 0x10700ff9);
+-		nv_wo32(dev, ctx, i/4 + 1, 0x0436086c);
+-		nv_wo32(dev, ctx, i/4 + 2, 0x000c001b);
++		nv_wo32(ctx, i + 0, 0x10700ff9);
++		nv_wo32(ctx, i + 1, 0x0436086c);
++		nv_wo32(ctx, i + 2, 0x000c001b);
+ 	}
+ 	for (i = 0x274c; i < 0x275c; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0000ffff);
+-	nv_wo32(dev, ctx, 0x2ae0/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2e9c/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2eb0/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2edc/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x2ee0/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2ee4/4, 0x3f000000);
+-	nv_wo32(dev, ctx, 0x2eec/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x2ef0/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x2ef8/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x2f00/4, 0xbf800000);
++		nv_wo32(ctx, i, 0x0000ffff);
++	nv_wo32(ctx, 0x2ae0, 0x3f800000);
++	nv_wo32(ctx, 0x2e9c, 0x3f800000);
++	nv_wo32(ctx, 0x2eb0, 0x3f800000);
++	nv_wo32(ctx, 0x2edc, 0x40000000);
++	nv_wo32(ctx, 0x2ee0, 0x3f800000);
++	nv_wo32(ctx, 0x2ee4, 0x3f000000);
++	nv_wo32(ctx, 0x2eec, 0x40000000);
++	nv_wo32(ctx, 0x2ef0, 0x3f800000);
++	nv_wo32(ctx, 0x2ef8, 0xbf800000);
++	nv_wo32(ctx, 0x2f00, 0xbf800000);
+ }
+ 
+ static void
+@@ -312,57 +312,57 @@
+ {
+ 	int i;
+ 
+-	nv_wo32(dev, ctx, 0x040c/4, 0x00000101);
+-	nv_wo32(dev, ctx, 0x0420/4, 0x00000111);
+-	nv_wo32(dev, ctx, 0x0424/4, 0x00000060);
+-	nv_wo32(dev, ctx, 0x0440/4, 0x00000080);
+-	nv_wo32(dev, ctx, 0x0444/4, 0xffff0000);
+-	nv_wo32(dev, ctx, 0x0448/4, 0x00000001);
+-	nv_wo32(dev, ctx, 0x045c/4, 0x44400000);
+-	nv_wo32(dev, ctx, 0x0488/4, 0xffff0000);
++	nv_wo32(ctx, 0x040c, 0x00000101);
++	nv_wo32(ctx, 0x0420, 0x00000111);
++	nv_wo32(ctx, 0x0424, 0x00000060);
++	nv_wo32(ctx, 0x0440, 0x00000080);
++	nv_wo32(ctx, 0x0444, 0xffff0000);
++	nv_wo32(ctx, 0x0448, 0x00000001);
++	nv_wo32(ctx, 0x045c, 0x44400000);
++	nv_wo32(ctx, 0x0488, 0xffff0000);
+ 	for (i = 0x04dc; i < 0x04e4; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0fff0000);
+-	nv_wo32(dev, ctx, 0x04e8/4, 0x00011100);
++		nv_wo32(ctx, i, 0x0fff0000);
++	nv_wo32(ctx, 0x04e8, 0x00011100);
+ 	for (i = 0x0504; i < 0x0544; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x07ff0000);
+-	nv_wo32(dev, ctx, 0x054c/4, 0x4b7fffff);
+-	nv_wo32(dev, ctx, 0x0588/4, 0x00000080);
+-	nv_wo32(dev, ctx, 0x058c/4, 0x30201000);
+-	nv_wo32(dev, ctx, 0x0590/4, 0x70605040);
+-	nv_wo32(dev, ctx, 0x0594/4, 0xb8a89888);
+-	nv_wo32(dev, ctx, 0x0598/4, 0xf8e8d8c8);
+-	nv_wo32(dev, ctx, 0x05ac/4, 0xb0000000);
++		nv_wo32(ctx, i, 0x07ff0000);
++	nv_wo32(ctx, 0x054c, 0x4b7fffff);
++	nv_wo32(ctx, 0x0588, 0x00000080);
++	nv_wo32(ctx, 0x058c, 0x30201000);
++	nv_wo32(ctx, 0x0590, 0x70605040);
++	nv_wo32(ctx, 0x0594, 0xb8a89888);
++	nv_wo32(ctx, 0x0598, 0xf8e8d8c8);
++	nv_wo32(ctx, 0x05ac, 0xb0000000);
+ 	for (i = 0x0604; i < 0x0644; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00010588);
++		nv_wo32(ctx, i, 0x00010588);
+ 	for (i = 0x0644; i < 0x0684; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00030303);
++		nv_wo32(ctx, i, 0x00030303);
+ 	for (i = 0x06c4; i < 0x0704; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0008aae4);
++		nv_wo32(ctx, i, 0x0008aae4);
+ 	for (i = 0x0704; i < 0x0744; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x01012000);
++		nv_wo32(ctx, i, 0x01012000);
+ 	for (i = 0x0744; i < 0x0784; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00080008);
+-	nv_wo32(dev, ctx, 0x0860/4, 0x00040000);
+-	nv_wo32(dev, ctx, 0x0864/4, 0x00010000);
++		nv_wo32(ctx, i, 0x00080008);
++	nv_wo32(ctx, 0x0860, 0x00040000);
++	nv_wo32(ctx, 0x0864, 0x00010000);
+ 	for (i = 0x0868; i < 0x0878; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x00040004);
++		nv_wo32(ctx, i, 0x00040004);
+ 	for (i = 0x1f1c; i <= 0x308c ; i += 16) {
+-		nv_wo32(dev, ctx, i/4 + 0, 0x10700ff9);
+-		nv_wo32(dev, ctx, i/4 + 1, 0x0436086c);
+-		nv_wo32(dev, ctx, i/4 + 2, 0x000c001b);
++		nv_wo32(ctx, i + 0, 0x10700ff9);
++		nv_wo32(ctx, i + 4, 0x0436086c);
++		nv_wo32(ctx, i + 8, 0x000c001b);
+ 	}
+ 	for (i = 0x30bc; i < 0x30cc; i += 4)
+-		nv_wo32(dev, ctx, i/4, 0x0000ffff);
+-	nv_wo32(dev, ctx, 0x3450/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x380c/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x3820/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x384c/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x3850/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x3854/4, 0x3f000000);
+-	nv_wo32(dev, ctx, 0x385c/4, 0x40000000);
+-	nv_wo32(dev, ctx, 0x3860/4, 0x3f800000);
+-	nv_wo32(dev, ctx, 0x3868/4, 0xbf800000);
+-	nv_wo32(dev, ctx, 0x3870/4, 0xbf800000);
++		nv_wo32(ctx, i, 0x0000ffff);
++	nv_wo32(ctx, 0x3450, 0x3f800000);
++	nv_wo32(ctx, 0x380c, 0x3f800000);
++	nv_wo32(ctx, 0x3820, 0x3f800000);
++	nv_wo32(ctx, 0x384c, 0x40000000);
++	nv_wo32(ctx, 0x3850, 0x3f800000);
++	nv_wo32(ctx, 0x3854, 0x3f000000);
++	nv_wo32(ctx, 0x385c, 0x40000000);
++	nv_wo32(ctx, 0x3860, 0x3f800000);
++	nv_wo32(ctx, 0x3868, 0xbf800000);
++	nv_wo32(ctx, 0x3870, 0xbf800000);
+ }
+ 
+ int
+@@ -372,7 +372,7 @@
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+ 	void (*ctx_init)(struct drm_device *, struct nouveau_gpuobj *);
+-	unsigned int idoffs = 0x28/4;
++	unsigned int idoffs = 0x28;
+ 	int ret;
+ 
+ 	switch (dev_priv->chipset) {
+@@ -403,21 +403,19 @@
+ 		BUG_ON(1);
+ 	}
+ 
+-	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pgraph->grctx_size,
+-				     16, NVOBJ_FLAG_ZERO_ALLOC,
+-				     &chan->ramin_grctx);
++	ret = nouveau_gpuobj_new(dev, chan, pgraph->grctx_size, 16,
++				 NVOBJ_FLAG_ZERO_ALLOC, &chan->ramin_grctx);
+ 	if (ret)
+ 		return ret;
+ 
+ 	/* Initialise default context values */
+-	ctx_init(dev, chan->ramin_grctx->gpuobj);
++	ctx_init(dev, chan->ramin_grctx);
+ 
+ 	/* nv20: nv_wo32(dev, chan->ramin_grctx->gpuobj, 10, chan->id<<24); */
+-	nv_wo32(dev, chan->ramin_grctx->gpuobj, idoffs,
+-					(chan->id << 24) | 0x1); /* CTX_USER */
++	nv_wo32(chan->ramin_grctx, idoffs,
++		(chan->id << 24) | 0x1); /* CTX_USER */
+ 
+-	nv_wo32(dev, pgraph->ctx_table->gpuobj, chan->id,
+-		     chan->ramin_grctx->instance >> 4);
++	nv_wo32(pgraph->ctx_table, chan->id * 4, chan->ramin_grctx->pinst >> 4);
+ 	return 0;
+ }
+ 
+@@ -428,10 +426,8 @@
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+ 
+-	if (chan->ramin_grctx)
+-		nouveau_gpuobj_ref_del(dev, &chan->ramin_grctx);
+-
+-	nv_wo32(dev, pgraph->ctx_table->gpuobj, chan->id, 0);
++	nouveau_gpuobj_ref(NULL, &chan->ramin_grctx);
++	nv_wo32(pgraph->ctx_table, chan->id * 4, 0);
+ }
+ 
+ int
+@@ -442,7 +438,7 @@
+ 
+ 	if (!chan->ramin_grctx)
+ 		return -EINVAL;
+-	inst = chan->ramin_grctx->instance >> 4;
++	inst = chan->ramin_grctx->pinst >> 4;
+ 
+ 	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_POINTER, inst);
+ 	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_XFER,
+@@ -465,7 +461,7 @@
+ 	chan = pgraph->channel(dev);
+ 	if (!chan)
+ 		return 0;
+-	inst = chan->ramin_grctx->instance >> 4;
++	inst = chan->ramin_grctx->pinst >> 4;
+ 
+ 	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_POINTER, inst);
+ 	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_XFER,
+@@ -552,15 +548,15 @@
+ 
+ 	if (!pgraph->ctx_table) {
+ 		/* Create Context Pointer Table */
+-		ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0, 32 * 4, 16,
+-						  NVOBJ_FLAG_ZERO_ALLOC,
+-						  &pgraph->ctx_table);
++		ret = nouveau_gpuobj_new(dev, NULL, 32 * 4, 16,
++					 NVOBJ_FLAG_ZERO_ALLOC,
++					 &pgraph->ctx_table);
+ 		if (ret)
+ 			return ret;
+ 	}
+ 
+ 	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_TABLE,
+-		     pgraph->ctx_table->instance >> 4);
++		     pgraph->ctx_table->pinst >> 4);
+ 
+ 	nv20_graph_rdi(dev);
+ 
+@@ -646,7 +642,7 @@
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+ 
+-	nouveau_gpuobj_ref_del(dev, &pgraph->ctx_table);
++	nouveau_gpuobj_ref(NULL, &pgraph->ctx_table);
+ }
+ 
+ int
+@@ -681,15 +677,15 @@
+ 
+ 	if (!pgraph->ctx_table) {
+ 		/* Create Context Pointer Table */
+-		ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0, 32 * 4, 16,
+-						  NVOBJ_FLAG_ZERO_ALLOC,
+-						  &pgraph->ctx_table);
++		ret = nouveau_gpuobj_new(dev, NULL, 32 * 4, 16,
++					 NVOBJ_FLAG_ZERO_ALLOC,
++					 &pgraph->ctx_table);
+ 		if (ret)
+ 			return ret;
+ 	}
+ 
+ 	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_TABLE,
+-		     pgraph->ctx_table->instance >> 4);
++		     pgraph->ctx_table->pinst >> 4);
+ 
+ 	nv_wr32(dev, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
+ 	nv_wr32(dev, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv40_fifo.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv40_fifo.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv40_fifo.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv40_fifo.c	2010-10-15 02:04:44.420992941 +0200
+@@ -27,8 +27,9 @@
+ #include "drmP.h"
+ #include "nouveau_drv.h"
+ #include "nouveau_drm.h"
++#include "nouveau_ramht.h"
+ 
+-#define NV40_RAMFC(c) (dev_priv->ramfc_offset + ((c) * NV40_RAMFC__SIZE))
++#define NV40_RAMFC(c) (dev_priv->ramfc->pinst + ((c) * NV40_RAMFC__SIZE))
+ #define NV40_RAMFC__SIZE 128
+ 
+ int
+@@ -42,7 +43,7 @@
+ 
+ 	ret = nouveau_gpuobj_new_fake(dev, NV40_RAMFC(chan->id), ~0,
+ 				      NV40_RAMFC__SIZE, NVOBJ_FLAG_ZERO_ALLOC |
+-				      NVOBJ_FLAG_ZERO_FREE, NULL, &chan->ramfc);
++				      NVOBJ_FLAG_ZERO_FREE, &chan->ramfc);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -50,7 +51,7 @@
+ 
+ 	nv_wi32(dev, fc +  0, chan->pushbuf_base);
+ 	nv_wi32(dev, fc +  4, chan->pushbuf_base);
+-	nv_wi32(dev, fc + 12, chan->pushbuf->instance >> 4);
++	nv_wi32(dev, fc + 12, chan->pushbuf->pinst >> 4);
+ 	nv_wi32(dev, fc + 24, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+ 			      NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+ 			      NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8 |
+@@ -58,7 +59,7 @@
+ 			      NV_PFIFO_CACHE1_BIG_ENDIAN |
+ #endif
+ 			      0x30000000 /* no idea.. */);
+-	nv_wi32(dev, fc + 56, chan->ramin_grctx->instance >> 4);
++	nv_wi32(dev, fc + 56, chan->ramin_grctx->pinst >> 4);
+ 	nv_wi32(dev, fc + 60, 0x0001FFFF);
+ 
+ 	/* enable the fifo dma operation */
+@@ -77,8 +78,7 @@
+ 	nv_wr32(dev, NV04_PFIFO_MODE,
+ 		nv_rd32(dev, NV04_PFIFO_MODE) & ~(1 << chan->id));
+ 
+-	if (chan->ramfc)
+-		nouveau_gpuobj_ref_del(dev, &chan->ramfc);
++	nouveau_gpuobj_ref(NULL, &chan->ramfc);
+ }
+ 
+ static void
+@@ -241,9 +241,9 @@
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 
+ 	nv_wr32(dev, NV03_PFIFO_RAMHT, (0x03 << 24) /* search 128 */ |
+-				       ((dev_priv->ramht_bits - 9) << 16) |
+-				       (dev_priv->ramht_offset >> 8));
+-	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro_offset>>8);
++				       ((dev_priv->ramht->bits - 9) << 16) |
++				       (dev_priv->ramht->gpuobj->pinst >> 8));
++	nv_wr32(dev, NV03_PFIFO_RAMRO, dev_priv->ramro->pinst >> 8);
+ 
+ 	switch (dev_priv->chipset) {
+ 	case 0x47:
+@@ -271,7 +271,7 @@
+ 		nv_wr32(dev, 0x2230, 0);
+ 		nv_wr32(dev, NV40_PFIFO_RAMFC,
+ 			((dev_priv->vram_size - 512 * 1024 +
+-			  dev_priv->ramfc_offset) >> 16) | (3 << 16));
++			  dev_priv->ramfc->pinst) >> 16) | (3 << 16));
+ 		break;
+ 	}
+ }
+@@ -301,7 +301,7 @@
+ 	pfifo->reassign(dev, true);
+ 
+ 	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+-		if (dev_priv->fifos[i]) {
++		if (dev_priv->channels.ptr[i]) {
+ 			uint32_t mode = nv_rd32(dev, NV04_PFIFO_MODE);
+ 			nv_wr32(dev, NV04_PFIFO_MODE, mode | (1 << i));
+ 		}
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv40_graph.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv40_graph.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv40_graph.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv40_graph.c	2010-10-15 02:04:44.421992953 +0200
+@@ -42,10 +42,10 @@
+ 	inst = (inst & NV40_PGRAPH_CTXCTL_CUR_INSTANCE) << 4;
+ 
+ 	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+-		struct nouveau_channel *chan = dev_priv->fifos[i];
++		struct nouveau_channel *chan = dev_priv->channels.ptr[i];
+ 
+ 		if (chan && chan->ramin_grctx &&
+-		    chan->ramin_grctx->instance == inst)
++		    chan->ramin_grctx->pinst == inst)
+ 			return chan;
+ 	}
+ 
+@@ -61,27 +61,25 @@
+ 	struct nouveau_grctx ctx = {};
+ 	int ret;
+ 
+-	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pgraph->grctx_size,
+-				     16, NVOBJ_FLAG_ZERO_ALLOC,
+-				     &chan->ramin_grctx);
++	ret = nouveau_gpuobj_new(dev, chan, pgraph->grctx_size, 16,
++				 NVOBJ_FLAG_ZERO_ALLOC, &chan->ramin_grctx);
+ 	if (ret)
+ 		return ret;
+ 
+ 	/* Initialise default context values */
+ 	ctx.dev = chan->dev;
+ 	ctx.mode = NOUVEAU_GRCTX_VALS;
+-	ctx.data = chan->ramin_grctx->gpuobj;
++	ctx.data = chan->ramin_grctx;
+ 	nv40_grctx_init(&ctx);
+ 
+-	nv_wo32(dev, chan->ramin_grctx->gpuobj, 0,
+-		     chan->ramin_grctx->gpuobj->im_pramin->start);
++	nv_wo32(chan->ramin_grctx, 0, chan->ramin_grctx->pinst);
+ 	return 0;
+ }
+ 
+ void
+ nv40_graph_destroy_context(struct nouveau_channel *chan)
+ {
+-	nouveau_gpuobj_ref_del(chan->dev, &chan->ramin_grctx);
++	nouveau_gpuobj_ref(NULL, &chan->ramin_grctx);
+ }
+ 
+ static int
+@@ -135,7 +133,7 @@
+ 
+ 	if (!chan->ramin_grctx)
+ 		return -EINVAL;
+-	inst = chan->ramin_grctx->instance >> 4;
++	inst = chan->ramin_grctx->pinst >> 4;
+ 
+ 	ret = nv40_graph_transfer_context(dev, inst, 0);
+ 	if (ret)
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv40_grctx.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv40_grctx.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv40_grctx.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv40_grctx.c	2010-10-15 02:04:44.425993003 +0200
+@@ -596,13 +596,13 @@
+ 
+ 	offset += 0x0280/4;
+ 	for (i = 0; i < 16; i++, offset += 2)
+-		nv_wo32(dev, obj, offset, 0x3f800000);
++		nv_wo32(obj, offset * 4, 0x3f800000);
+ 
+ 	for (vs = 0; vs < vs_nr; vs++, offset += vs_len) {
+ 		for (i = 0; i < vs_nr_b0 * 6; i += 6)
+-			nv_wo32(dev, obj, offset + b0_offset + i, 0x00000001);
++			nv_wo32(obj, (offset + b0_offset + i) * 4, 0x00000001);
+ 		for (i = 0; i < vs_nr_b1 * 4; i += 4)
+-			nv_wo32(dev, obj, offset + b1_offset + i, 0x3f800000);
++			nv_wo32(obj, (offset + b1_offset + i) * 4, 0x3f800000);
+ 	}
+ }
+ 
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_calc.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_calc.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_calc.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_calc.c	2010-10-15 02:04:44.426993016 +0200
+@@ -51,24 +51,28 @@
+ 	       int *N, int *fN, int *M, int *P)
+ {
+ 	fixed20_12 fb_div, a, b;
++	u32 refclk = pll->refclk / 10;
++	u32 max_vco_freq = pll->vco1.maxfreq / 10;
++	u32 max_vco_inputfreq = pll->vco1.max_inputfreq / 10;
++	clk /= 10;
+ 
+-	*P = pll->vco1.maxfreq / clk;
++	*P = max_vco_freq / clk;
+ 	if (*P > pll->max_p)
+ 		*P = pll->max_p;
+ 	if (*P < pll->min_p)
+ 		*P = pll->min_p;
+ 
+-	/* *M = ceil(refclk / pll->vco.max_inputfreq); */
+-	a.full = dfixed_const(pll->refclk);
+-	b.full = dfixed_const(pll->vco1.max_inputfreq);
++	/* *M = floor((refclk + max_vco_inputfreq) / max_vco_inputfreq); */
++	a.full = dfixed_const(refclk + max_vco_inputfreq);
++	b.full = dfixed_const(max_vco_inputfreq);
+ 	a.full = dfixed_div(a, b);
+-	a.full = dfixed_ceil(a);
++	a.full = dfixed_floor(a);
+ 	*M = dfixed_trunc(a);
+ 
+ 	/* fb_div = (vco * *M) / refclk; */
+ 	fb_div.full = dfixed_const(clk * *P);
+ 	fb_div.full = dfixed_mul(fb_div, a);
+-	a.full = dfixed_const(pll->refclk);
++	a.full = dfixed_const(refclk);
+ 	fb_div.full = dfixed_div(fb_div, a);
+ 
+ 	/* *N = floor(fb_div); */
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_crtc.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_crtc.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_crtc.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_crtc.c	2010-10-15 02:04:44.428993040 +0200
+@@ -266,15 +266,10 @@
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct pll_lims pll;
+-	uint32_t reg, reg1, reg2;
++	uint32_t reg1, reg2;
+ 	int ret, N1, M1, N2, M2, P;
+ 
+-	if (dev_priv->chipset < NV_C0)
+-		reg = NV50_PDISPLAY_CRTC_CLK_CTRL1(head);
+-	else
+-		reg = 0x614140 + (head * 0x800);
+-
+-	ret = get_pll_limits(dev, reg, &pll);
++	ret = get_pll_limits(dev, PLL_VPLL0 + head, &pll);
+ 	if (ret)
+ 		return ret;
+ 
+@@ -286,11 +281,11 @@
+ 		NV_DEBUG(dev, "pclk %d out %d NM1 %d %d NM2 %d %d P %d\n",
+ 			 pclk, ret, N1, M1, N2, M2, P);
+ 
+-		reg1 = nv_rd32(dev, reg + 4) & 0xff00ff00;
+-		reg2 = nv_rd32(dev, reg + 8) & 0x8000ff00;
+-		nv_wr32(dev, reg, 0x10000611);
+-		nv_wr32(dev, reg + 4, reg1 | (M1 << 16) | N1);
+-		nv_wr32(dev, reg + 8, reg2 | (P << 28) | (M2 << 16) | N2);
++		reg1 = nv_rd32(dev, pll.reg + 4) & 0xff00ff00;
++		reg2 = nv_rd32(dev, pll.reg + 8) & 0x8000ff00;
++		nv_wr32(dev, pll.reg + 0, 0x10000611);
++		nv_wr32(dev, pll.reg + 4, reg1 | (M1 << 16) | N1);
++		nv_wr32(dev, pll.reg + 8, reg2 | (P << 28) | (M2 << 16) | N2);
+ 	} else
+ 	if (dev_priv->chipset < NV_C0) {
+ 		ret = nv50_calc_pll2(dev, &pll, pclk, &N1, &N2, &M1, &P);
+@@ -300,10 +295,10 @@
+ 		NV_DEBUG(dev, "pclk %d out %d N %d fN 0x%04x M %d P %d\n",
+ 			 pclk, ret, N1, N2, M1, P);
+ 
+-		reg1 = nv_rd32(dev, reg + 4) & 0xffc00000;
+-		nv_wr32(dev, reg, 0x50000610);
+-		nv_wr32(dev, reg + 4, reg1 | (P << 16) | (M1 << 8) | N1);
+-		nv_wr32(dev, reg + 8, N2);
++		reg1 = nv_rd32(dev, pll.reg + 4) & 0xffc00000;
++		nv_wr32(dev, pll.reg + 0, 0x50000610);
++		nv_wr32(dev, pll.reg + 4, reg1 | (P << 16) | (M1 << 8) | N1);
++		nv_wr32(dev, pll.reg + 8, N2);
+ 	} else {
+ 		ret = nv50_calc_pll2(dev, &pll, pclk, &N1, &N2, &M1, &P);
+ 		if (ret <= 0)
+@@ -312,9 +307,9 @@
+ 		NV_DEBUG(dev, "pclk %d out %d N %d fN 0x%04x M %d P %d\n",
+ 			 pclk, ret, N1, N2, M1, P);
+ 
+-		nv_mask(dev, reg + 0x0c, 0x00000000, 0x00000100);
+-		nv_wr32(dev, reg + 0x04, (P << 16) | (N1 << 8) | M1);
+-		nv_wr32(dev, reg + 0x10, N2 << 16);
++		nv_mask(dev, pll.reg + 0x0c, 0x00000000, 0x00000100);
++		nv_wr32(dev, pll.reg + 0x04, (P << 16) | (N1 << 8) | M1);
++		nv_wr32(dev, pll.reg + 0x10, N2 << 16);
+ 	}
+ 
+ 	return 0;
+@@ -338,7 +333,9 @@
+ 
+ 	nv50_cursor_fini(nv_crtc);
+ 
++	nouveau_bo_unmap(nv_crtc->lut.nvbo);
+ 	nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
++	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
+ 	nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
+ 	kfree(nv_crtc->mode);
+ 	kfree(nv_crtc);
+@@ -536,7 +533,7 @@
+ 	}
+ 
+ 	nv_crtc->fb.offset = fb->nvbo->bo.offset - dev_priv->vm_vram_base;
+-	nv_crtc->fb.tile_flags = fb->nvbo->tile_flags;
++	nv_crtc->fb.tile_flags = nouveau_bo_tile_layout(fb->nvbo);
+ 	nv_crtc->fb.cpp = drm_fb->bits_per_pixel / 8;
+ 	if (!nv_crtc->fb.blanked && dev_priv->chipset != 0x50) {
+ 		ret = RING_SPACE(evo, 2);
+@@ -568,7 +565,7 @@
+ 				  fb->nvbo->tile_mode);
+ 	}
+ 	if (dev_priv->chipset == 0x50)
+-		OUT_RING(evo, (fb->nvbo->tile_flags << 8) | format);
++		OUT_RING(evo, (nv_crtc->fb.tile_flags << 8) | format);
+ 	else
+ 		OUT_RING(evo, format);
+ 
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_cursor.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_cursor.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_cursor.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_cursor.c	2010-10-15 02:04:44.429993052 +0200
+@@ -147,7 +147,7 @@
+ 	NV_DEBUG_KMS(dev, "\n");
+ 
+ 	nv_wr32(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(idx), 0);
+-	if (!nv_wait(NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(idx),
++	if (!nv_wait(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(idx),
+ 		     NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_STATUS, 0)) {
+ 		NV_ERROR(dev, "timeout: CURSOR_CTRL2_STATUS == 0\n");
+ 		NV_ERROR(dev, "CURSOR_CTRL2 = 0x%08x\n",
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_dac.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_dac.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_dac.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_dac.c	2010-10-15 02:04:44.430993065 +0200
+@@ -79,7 +79,7 @@
+ 
+ 	nv_wr32(dev, NV50_PDISPLAY_DAC_DPMS_CTRL(or),
+ 		0x00150000 | NV50_PDISPLAY_DAC_DPMS_CTRL_PENDING);
+-	if (!nv_wait(NV50_PDISPLAY_DAC_DPMS_CTRL(or),
++	if (!nv_wait(dev, NV50_PDISPLAY_DAC_DPMS_CTRL(or),
+ 		     NV50_PDISPLAY_DAC_DPMS_CTRL_PENDING, 0)) {
+ 		NV_ERROR(dev, "timeout: DAC_DPMS_CTRL_PENDING(%d) == 0\n", or);
+ 		NV_ERROR(dev, "DAC_DPMS_CTRL(%d) = 0x%08x\n", or,
+@@ -130,7 +130,7 @@
+ 	NV_DEBUG_KMS(dev, "or %d mode %d\n", or, mode);
+ 
+ 	/* wait for it to be done */
+-	if (!nv_wait(NV50_PDISPLAY_DAC_DPMS_CTRL(or),
++	if (!nv_wait(dev, NV50_PDISPLAY_DAC_DPMS_CTRL(or),
+ 		     NV50_PDISPLAY_DAC_DPMS_CTRL_PENDING, 0)) {
+ 		NV_ERROR(dev, "timeout: DAC_DPMS_CTRL_PENDING(%d) == 0\n", or);
+ 		NV_ERROR(dev, "DAC_DPMS_CTRL(%d) = 0x%08x\n", or,
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_display.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_display.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_display.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_display.c	2010-10-15 02:04:44.433993102 +0200
+@@ -30,8 +30,22 @@
+ #include "nouveau_connector.h"
+ #include "nouveau_fb.h"
+ #include "nouveau_fbcon.h"
++#include "nouveau_ramht.h"
+ #include "drm_crtc_helper.h"
+ 
++static inline int
++nv50_sor_nr(struct drm_device *dev)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++
++	if (dev_priv->chipset  < 0x90 ||
++	    dev_priv->chipset == 0x92 ||
++	    dev_priv->chipset == 0xa0)
++		return 2;
++
++	return 4;
++}
++
+ static void
+ nv50_evo_channel_del(struct nouveau_channel **pchan)
+ {
+@@ -42,6 +56,7 @@
+ 	*pchan = NULL;
+ 
+ 	nouveau_gpuobj_channel_takedown(chan);
++	nouveau_bo_unmap(chan->pushbuf_bo);
+ 	nouveau_bo_ref(NULL, &chan->pushbuf_bo);
+ 
+ 	if (chan->user)
+@@ -65,23 +80,23 @@
+ 		return ret;
+ 	obj->engine = NVOBJ_ENGINE_DISPLAY;
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, evo, name, obj, NULL);
+-	if (ret) {
+-		nouveau_gpuobj_del(dev, &obj);
+-		return ret;
+-	}
+-
+-	nv_wo32(dev, obj, 0, (tile_flags << 22) | (magic_flags << 16) | class);
+-	nv_wo32(dev, obj, 1, limit);
+-	nv_wo32(dev, obj, 2, offset);
+-	nv_wo32(dev, obj, 3, 0x00000000);
+-	nv_wo32(dev, obj, 4, 0x00000000);
++	nv_wo32(obj,  0, (tile_flags << 22) | (magic_flags << 16) | class);
++	nv_wo32(obj,  4, limit);
++	nv_wo32(obj,  8, offset);
++	nv_wo32(obj, 12, 0x00000000);
++	nv_wo32(obj, 16, 0x00000000);
+ 	if (dev_priv->card_type < NV_C0)
+-		nv_wo32(dev, obj, 5, 0x00010000);
++		nv_wo32(obj, 20, 0x00010000);
+ 	else
+-		nv_wo32(dev, obj, 5, 0x00020000);
++		nv_wo32(obj, 20, 0x00020000);
+ 	dev_priv->engine.instmem.flush(dev);
+ 
++	ret = nouveau_ramht_insert(evo, name, obj);
++	nouveau_gpuobj_ref(NULL, &obj);
++	if (ret) {
++		return ret;
++	}
++
+ 	return 0;
+ }
+ 
+@@ -89,6 +104,7 @@
+ nv50_evo_channel_new(struct drm_device *dev, struct nouveau_channel **pchan)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	struct nouveau_gpuobj *ramht = NULL;
+ 	struct nouveau_channel *chan;
+ 	int ret;
+ 
+@@ -102,32 +118,35 @@
+ 	chan->user_get = 4;
+ 	chan->user_put = 0;
+ 
+-	INIT_LIST_HEAD(&chan->ramht_refs);
+-
+-	ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0, 32768, 0x1000,
+-				     NVOBJ_FLAG_ZERO_ALLOC, &chan->ramin);
++	ret = nouveau_gpuobj_new(dev, NULL, 32768, 0x1000,
++				 NVOBJ_FLAG_ZERO_ALLOC, &chan->ramin);
+ 	if (ret) {
+ 		NV_ERROR(dev, "Error allocating EVO channel memory: %d\n", ret);
+ 		nv50_evo_channel_del(pchan);
+ 		return ret;
+ 	}
+ 
+-	ret = drm_mm_init(&chan->ramin_heap,
+-			  chan->ramin->gpuobj->im_pramin->start, 32768);
++	ret = drm_mm_init(&chan->ramin_heap, 0, 32768);
+ 	if (ret) {
+ 		NV_ERROR(dev, "Error initialising EVO PRAMIN heap: %d\n", ret);
+ 		nv50_evo_channel_del(pchan);
+ 		return ret;
+ 	}
+ 
+-	ret = nouveau_gpuobj_new_ref(dev, chan, chan, 0, 4096, 16,
+-				     0, &chan->ramht);
++	ret = nouveau_gpuobj_new(dev, chan, 4096, 16, 0, &ramht);
+ 	if (ret) {
+ 		NV_ERROR(dev, "Unable to allocate EVO RAMHT: %d\n", ret);
+ 		nv50_evo_channel_del(pchan);
+ 		return ret;
+ 	}
+ 
++	ret = nouveau_ramht_new(dev, ramht, &chan->ramht);
++	nouveau_gpuobj_ref(NULL, &ramht);
++	if (ret) {
++		nv50_evo_channel_del(pchan);
++		return ret;
++	}
++
+ 	if (dev_priv->chipset != 0x50) {
+ 		ret = nv50_evo_dmaobj_new(chan, 0x3d, NvEvoFB16, 0x70, 0x19,
+ 					  0, 0xffffffff);
+@@ -227,11 +246,11 @@
+ 		nv_wr32(dev, 0x006101d0 + (i * 0x04), val);
+ 	}
+ 	/* SOR */
+-	for (i = 0; i < 4; i++) {
++	for (i = 0; i < nv50_sor_nr(dev); i++) {
+ 		val = nv_rd32(dev, 0x0061c000 + (i * 0x800));
+ 		nv_wr32(dev, 0x006101e0 + (i * 0x04), val);
+ 	}
+-	/* Something not yet in use, tv-out maybe. */
++	/* EXT */
+ 	for (i = 0; i < 3; i++) {
+ 		val = nv_rd32(dev, 0x0061e000 + (i * 0x800));
+ 		nv_wr32(dev, 0x006101f0 + (i * 0x04), val);
+@@ -260,7 +279,7 @@
+ 	if (nv_rd32(dev, NV50_PDISPLAY_INTR_1) & 0x100) {
+ 		nv_wr32(dev, NV50_PDISPLAY_INTR_1, 0x100);
+ 		nv_wr32(dev, 0x006194e8, nv_rd32(dev, 0x006194e8) & ~1);
+-		if (!nv_wait(0x006194e8, 2, 0)) {
++		if (!nv_wait(dev, 0x006194e8, 2, 0)) {
+ 			NV_ERROR(dev, "timeout: (0x6194e8 & 2) != 0\n");
+ 			NV_ERROR(dev, "0x6194e8 = 0x%08x\n",
+ 						nv_rd32(dev, 0x6194e8));
+@@ -291,7 +310,8 @@
+ 
+ 	nv_wr32(dev, NV50_PDISPLAY_CTRL_STATE, NV50_PDISPLAY_CTRL_STATE_ENABLE);
+ 	nv_wr32(dev, NV50_PDISPLAY_CHANNEL_STAT(0), 0x1000b03);
+-	if (!nv_wait(NV50_PDISPLAY_CHANNEL_STAT(0), 0x40000000, 0x40000000)) {
++	if (!nv_wait(dev, NV50_PDISPLAY_CHANNEL_STAT(0),
++		     0x40000000, 0x40000000)) {
+ 		NV_ERROR(dev, "timeout: (0x610200 & 0x40000000) == 0x40000000\n");
+ 		NV_ERROR(dev, "0x610200 = 0x%08x\n",
+ 			  nv_rd32(dev, NV50_PDISPLAY_CHANNEL_STAT(0)));
+@@ -300,7 +320,7 @@
+ 
+ 	for (i = 0; i < 2; i++) {
+ 		nv_wr32(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i), 0x2000);
+-		if (!nv_wait(NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i),
++		if (!nv_wait(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i),
+ 			     NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_STATUS, 0)) {
+ 			NV_ERROR(dev, "timeout: CURSOR_CTRL2_STATUS == 0\n");
+ 			NV_ERROR(dev, "CURSOR_CTRL2 = 0x%08x\n",
+@@ -310,7 +330,7 @@
+ 
+ 		nv_wr32(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i),
+ 			NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_ON);
+-		if (!nv_wait(NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i),
++		if (!nv_wait(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i),
+ 			     NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_STATUS,
+ 			     NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_STATUS_ACTIVE)) {
+ 			NV_ERROR(dev, "timeout: "
+@@ -321,7 +341,7 @@
+ 		}
+ 	}
+ 
+-	nv_wr32(dev, NV50_PDISPLAY_OBJECTS, (evo->ramin->instance >> 8) | 9);
++	nv_wr32(dev, NV50_PDISPLAY_OBJECTS, (evo->ramin->vinst >> 8) | 9);
+ 
+ 	/* initialise fifo */
+ 	nv_wr32(dev, NV50_PDISPLAY_CHANNEL_DMA_CB(0),
+@@ -330,7 +350,7 @@
+ 		NV50_PDISPLAY_CHANNEL_DMA_CB_VALID);
+ 	nv_wr32(dev, NV50_PDISPLAY_CHANNEL_UNK2(0), 0x00010000);
+ 	nv_wr32(dev, NV50_PDISPLAY_CHANNEL_UNK3(0), 0x00000002);
+-	if (!nv_wait(0x610200, 0x80000000, 0x00000000)) {
++	if (!nv_wait(dev, 0x610200, 0x80000000, 0x00000000)) {
+ 		NV_ERROR(dev, "timeout: (0x610200 & 0x80000000) == 0\n");
+ 		NV_ERROR(dev, "0x610200 = 0x%08x\n", nv_rd32(dev, 0x610200));
+ 		return -EBUSY;
+@@ -370,7 +390,7 @@
+ 	BEGIN_RING(evo, 0, NV50_EVO_CRTC(0, UNK082C), 1);
+ 	OUT_RING(evo, 0);
+ 	FIRE_RING(evo);
+-	if (!nv_wait(0x640004, 0xffffffff, evo->dma.put << 2))
++	if (!nv_wait(dev, 0x640004, 0xffffffff, evo->dma.put << 2))
+ 		NV_ERROR(dev, "evo pushbuf stalled\n");
+ 
+ 	/* enable clock change interrupts. */
+@@ -424,7 +444,7 @@
+ 			continue;
+ 
+ 		nv_wr32(dev, NV50_PDISPLAY_INTR_1, mask);
+-		if (!nv_wait(NV50_PDISPLAY_INTR_1, mask, mask)) {
++		if (!nv_wait(dev, NV50_PDISPLAY_INTR_1, mask, mask)) {
+ 			NV_ERROR(dev, "timeout: (0x610024 & 0x%08x) == "
+ 				      "0x%08x\n", mask, mask);
+ 			NV_ERROR(dev, "0x610024 = 0x%08x\n",
+@@ -434,14 +454,14 @@
+ 
+ 	nv_wr32(dev, NV50_PDISPLAY_CHANNEL_STAT(0), 0);
+ 	nv_wr32(dev, NV50_PDISPLAY_CTRL_STATE, 0);
+-	if (!nv_wait(NV50_PDISPLAY_CHANNEL_STAT(0), 0x1e0000, 0)) {
++	if (!nv_wait(dev, NV50_PDISPLAY_CHANNEL_STAT(0), 0x1e0000, 0)) {
+ 		NV_ERROR(dev, "timeout: (0x610200 & 0x1e0000) == 0\n");
+ 		NV_ERROR(dev, "0x610200 = 0x%08x\n",
+ 			  nv_rd32(dev, NV50_PDISPLAY_CHANNEL_STAT(0)));
+ 	}
+ 
+ 	for (i = 0; i < 3; i++) {
+-		if (!nv_wait(NV50_PDISPLAY_SOR_DPMS_STATE(i),
++		if (!nv_wait(dev, NV50_PDISPLAY_SOR_DPMS_STATE(i),
+ 			     NV50_PDISPLAY_SOR_DPMS_STATE_WAIT, 0)) {
+ 			NV_ERROR(dev, "timeout: SOR_DPMS_STATE_WAIT(%d) == 0\n", i);
+ 			NV_ERROR(dev, "SOR_DPMS_STATE(%d) = 0x%08x\n", i,
+@@ -710,7 +730,7 @@
+ 		or = i;
+ 	}
+ 
+-	for (i = 0; type == OUTPUT_ANY && i < 4; i++) {
++	for (i = 0; type == OUTPUT_ANY && i < nv50_sor_nr(dev); i++) {
+ 		if (dev_priv->chipset  < 0x90 ||
+ 		    dev_priv->chipset == 0x92 ||
+ 		    dev_priv->chipset == 0xa0)
+@@ -841,7 +861,7 @@
+ 		or = i;
+ 	}
+ 
+-	for (i = 0; type == OUTPUT_ANY && i < 4; i++) {
++	for (i = 0; type == OUTPUT_ANY && i < nv50_sor_nr(dev); i++) {
+ 		if (dev_priv->chipset  < 0x90 ||
+ 		    dev_priv->chipset == 0x92 ||
+ 		    dev_priv->chipset == 0xa0)
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_fb.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_fb.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_fb.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_fb.c	2010-10-15 02:04:44.435993127 +0200
+@@ -20,6 +20,7 @@
+ 	case 0x50:
+ 		nv_wr32(dev, 0x100c90, 0x0707ff);
+ 		break;
++	case 0xa3:
+ 	case 0xa5:
+ 	case 0xa8:
+ 		nv_wr32(dev, 0x100c90, 0x0d0fff);
+@@ -36,3 +37,46 @@
+ nv50_fb_takedown(struct drm_device *dev)
+ {
+ }
++
++void
++nv50_fb_vm_trap(struct drm_device *dev, int display, const char *name)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	unsigned long flags;
++	u32 trap[6], idx, chinst;
++	int i, ch;
++
++	idx = nv_rd32(dev, 0x100c90);
++	if (!(idx & 0x80000000))
++		return;
++	idx &= 0x00ffffff;
++
++	for (i = 0; i < 6; i++) {
++		nv_wr32(dev, 0x100c90, idx | i << 24);
++		trap[i] = nv_rd32(dev, 0x100c94);
++	}
++	nv_wr32(dev, 0x100c90, idx | 0x80000000);
++
++	if (!display)
++		return;
++
++	chinst = (trap[2] << 16) | trap[1];
++
++	spin_lock_irqsave(&dev_priv->channels.lock, flags);
++	for (ch = 0; ch < dev_priv->engine.fifo.channels; ch++) {
++		struct nouveau_channel *chan = dev_priv->channels.ptr[ch];
++
++		if (!chan || !chan->ramin)
++			continue;
++
++		if (chinst == chan->ramin->vinst >> 12)
++			break;
++	}
++	spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
++
++	NV_INFO(dev, "%s - VM: Trapped %s at %02x%04x%04x status %08x "
++		     "channel %d (0x%08x)\n",
++		name, (trap[5] & 0x100 ? "read" : "write"),
++		trap[5] & 0xff, trap[4] & 0xffff, trap[3] & 0xffff,
++		trap[0], ch, chinst);
++}
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_fbcon.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_fbcon.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_fbcon.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_fbcon.c	2010-10-15 02:04:44.436993140 +0200
+@@ -1,28 +1,21 @@
+ #include "drmP.h"
+ #include "nouveau_drv.h"
+ #include "nouveau_dma.h"
++#include "nouveau_ramht.h"
+ #include "nouveau_fbcon.h"
+ 
+-void
++int
+ nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
+ {
+ 	struct nouveau_fbdev *nfbdev = info->par;
+ 	struct drm_device *dev = nfbdev->dev;
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_channel *chan = dev_priv->channel;
++	int ret;
+ 
+-	if (info->state != FBINFO_STATE_RUNNING)
+-		return;
+-
+-	if (!(info->flags & FBINFO_HWACCEL_DISABLED) &&
+-	     RING_SPACE(chan, rect->rop == ROP_COPY ? 7 : 11)) {
+-		nouveau_fbcon_gpu_lockup(info);
+-	}
+-
+-	if (info->flags & FBINFO_HWACCEL_DISABLED) {
+-		cfb_fillrect(info, rect);
+-		return;
+-	}
++	ret = RING_SPACE(chan, rect->rop == ROP_COPY ? 7 : 11);
++	if (ret)
++		return ret;
+ 
+ 	if (rect->rop != ROP_COPY) {
+ 		BEGIN_RING(chan, NvSub2D, 0x02ac, 1);
+@@ -44,27 +37,21 @@
+ 		OUT_RING(chan, 3);
+ 	}
+ 	FIRE_RING(chan);
++	return 0;
+ }
+ 
+-void
++int
+ nv50_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
+ {
+ 	struct nouveau_fbdev *nfbdev = info->par;
+ 	struct drm_device *dev = nfbdev->dev;
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_channel *chan = dev_priv->channel;
++	int ret;
+ 
+-	if (info->state != FBINFO_STATE_RUNNING)
+-		return;
+-
+-	if (!(info->flags & FBINFO_HWACCEL_DISABLED) && RING_SPACE(chan, 12)) {
+-		nouveau_fbcon_gpu_lockup(info);
+-	}
+-
+-	if (info->flags & FBINFO_HWACCEL_DISABLED) {
+-		cfb_copyarea(info, region);
+-		return;
+-	}
++	ret = RING_SPACE(chan, 12);
++	if (ret)
++		return ret;
+ 
+ 	BEGIN_RING(chan, NvSub2D, 0x0110, 1);
+ 	OUT_RING(chan, 0);
+@@ -79,9 +66,10 @@
+ 	OUT_RING(chan, 0);
+ 	OUT_RING(chan, region->sy);
+ 	FIRE_RING(chan);
++	return 0;
+ }
+ 
+-void
++int
+ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
+ {
+ 	struct nouveau_fbdev *nfbdev = info->par;
+@@ -91,23 +79,14 @@
+ 	uint32_t width, dwords, *data = (uint32_t *)image->data;
+ 	uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel));
+ 	uint32_t *palette = info->pseudo_palette;
++	int ret;
+ 
+-	if (info->state != FBINFO_STATE_RUNNING)
+-		return;
+-
+-	if (image->depth != 1) {
+-		cfb_imageblit(info, image);
+-		return;
+-	}
+-
+-	if (!(info->flags & FBINFO_HWACCEL_DISABLED) && RING_SPACE(chan, 11)) {
+-		nouveau_fbcon_gpu_lockup(info);
+-	}
++	if (image->depth != 1)
++		return -ENODEV;
+ 
+-	if (info->flags & FBINFO_HWACCEL_DISABLED) {
+-		cfb_imageblit(info, image);
+-		return;
+-	}
++	ret = RING_SPACE(chan, 11);
++	if (ret)
++		return ret;
+ 
+ 	width = ALIGN(image->width, 32);
+ 	dwords = (width * image->height) >> 5;
+@@ -133,11 +112,9 @@
+ 	while (dwords) {
+ 		int push = dwords > 2047 ? 2047 : dwords;
+ 
+-		if (RING_SPACE(chan, push + 1)) {
+-			nouveau_fbcon_gpu_lockup(info);
+-			cfb_imageblit(info, image);
+-			return;
+-		}
++		ret = RING_SPACE(chan, push + 1);
++		if (ret)
++			return ret;
+ 
+ 		dwords -= push;
+ 
+@@ -147,6 +124,7 @@
+ 	}
+ 
+ 	FIRE_RING(chan);
++	return 0;
+ }
+ 
+ int
+@@ -193,7 +171,8 @@
+ 	if (ret)
+ 		return ret;
+ 
+-	ret = nouveau_gpuobj_ref_add(dev, dev_priv->channel, Nv2D, eng2d, NULL);
++	ret = nouveau_ramht_insert(dev_priv->channel, Nv2D, eng2d);
++	nouveau_gpuobj_ref(NULL, &eng2d);
+ 	if (ret)
+ 		return ret;
+ 
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_fifo.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_fifo.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_fifo.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_fifo.c	2010-10-15 02:04:44.439993176 +0200
+@@ -27,13 +27,14 @@
+ #include "drmP.h"
+ #include "drm.h"
+ #include "nouveau_drv.h"
++#include "nouveau_ramht.h"
+ 
+ static void
+ nv50_fifo_playlist_update(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+-	struct nouveau_gpuobj_ref *cur;
++	struct nouveau_gpuobj *cur;
+ 	int i, nr;
+ 
+ 	NV_DEBUG(dev, "\n");
+@@ -43,12 +44,15 @@
+ 
+ 	/* We never schedule channel 0 or 127 */
+ 	for (i = 1, nr = 0; i < 127; i++) {
+-		if (dev_priv->fifos[i] && dev_priv->fifos[i]->ramfc)
+-			nv_wo32(dev, cur->gpuobj, nr++, i);
++		if (dev_priv->channels.ptr[i] &&
++		    dev_priv->channels.ptr[i]->ramfc) {
++			nv_wo32(cur, (nr * 4), i);
++			nr++;
++		}
+ 	}
+ 	dev_priv->engine.instmem.flush(dev);
+ 
+-	nv_wr32(dev, 0x32f4, cur->instance >> 12);
++	nv_wr32(dev, 0x32f4, cur->vinst >> 12);
+ 	nv_wr32(dev, 0x32ec, nr);
+ 	nv_wr32(dev, 0x2500, 0x101);
+ }
+@@ -57,15 +61,15 @@
+ nv50_fifo_channel_enable(struct drm_device *dev, int channel)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_channel *chan = dev_priv->fifos[channel];
++	struct nouveau_channel *chan = dev_priv->channels.ptr[channel];
+ 	uint32_t inst;
+ 
+ 	NV_DEBUG(dev, "ch%d\n", channel);
+ 
+ 	if (dev_priv->chipset == 0x50)
+-		inst = chan->ramfc->instance >> 12;
++		inst = chan->ramfc->vinst >> 12;
+ 	else
+-		inst = chan->ramfc->instance >> 8;
++		inst = chan->ramfc->vinst >> 8;
+ 
+ 	nv_wr32(dev, NV50_PFIFO_CTX_TABLE(channel), inst |
+ 		     NV50_PFIFO_CTX_TABLE_CHANNEL_ENABLED);
+@@ -115,7 +119,7 @@
+ 	NV_DEBUG(dev, "\n");
+ 
+ 	for (i = 0; i < NV50_PFIFO_CTX_TABLE__SIZE; i++) {
+-		if (dev_priv->fifos[i])
++		if (dev_priv->channels.ptr[i])
+ 			nv50_fifo_channel_enable(dev, i);
+ 		else
+ 			nv50_fifo_channel_disable(dev, i);
+@@ -163,19 +167,19 @@
+ 		goto just_reset;
+ 	}
+ 
+-	ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0, 128*4, 0x1000,
+-				     NVOBJ_FLAG_ZERO_ALLOC,
+-				     &pfifo->playlist[0]);
++	ret = nouveau_gpuobj_new(dev, NULL, 128*4, 0x1000,
++				 NVOBJ_FLAG_ZERO_ALLOC,
++				 &pfifo->playlist[0]);
+ 	if (ret) {
+ 		NV_ERROR(dev, "error creating playlist 0: %d\n", ret);
+ 		return ret;
+ 	}
+ 
+-	ret = nouveau_gpuobj_new_ref(dev, NULL, NULL, 0, 128*4, 0x1000,
+-				     NVOBJ_FLAG_ZERO_ALLOC,
+-				     &pfifo->playlist[1]);
++	ret = nouveau_gpuobj_new(dev, NULL, 128*4, 0x1000,
++				 NVOBJ_FLAG_ZERO_ALLOC,
++				 &pfifo->playlist[1]);
+ 	if (ret) {
+-		nouveau_gpuobj_ref_del(dev, &pfifo->playlist[0]);
++		nouveau_gpuobj_ref(NULL, &pfifo->playlist[0]);
+ 		NV_ERROR(dev, "error creating playlist 1: %d\n", ret);
+ 		return ret;
+ 	}
+@@ -203,8 +207,8 @@
+ 	if (!pfifo->playlist[0])
+ 		return;
+ 
+-	nouveau_gpuobj_ref_del(dev, &pfifo->playlist[0]);
+-	nouveau_gpuobj_ref_del(dev, &pfifo->playlist[1]);
++	nouveau_gpuobj_ref(NULL, &pfifo->playlist[0]);
++	nouveau_gpuobj_ref(NULL, &pfifo->playlist[1]);
+ }
+ 
+ int
+@@ -226,59 +230,54 @@
+ 	NV_DEBUG(dev, "ch%d\n", chan->id);
+ 
+ 	if (dev_priv->chipset == 0x50) {
+-		uint32_t ramin_poffset = chan->ramin->gpuobj->im_pramin->start;
+-		uint32_t ramin_voffset = chan->ramin->gpuobj->im_backing_start;
+-
+-		ret = nouveau_gpuobj_new_fake(dev, ramin_poffset, ramin_voffset,
+-					      0x100, NVOBJ_FLAG_ZERO_ALLOC |
+-					      NVOBJ_FLAG_ZERO_FREE, &ramfc,
++		ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst,
++					      chan->ramin->vinst, 0x100,
++					      NVOBJ_FLAG_ZERO_ALLOC |
++					      NVOBJ_FLAG_ZERO_FREE,
+ 					      &chan->ramfc);
+ 		if (ret)
+ 			return ret;
+ 
+-		ret = nouveau_gpuobj_new_fake(dev, ramin_poffset + 0x0400,
+-					      ramin_voffset + 0x0400, 4096,
+-					      0, NULL, &chan->cache);
++		ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst + 0x0400,
++					      chan->ramin->vinst + 0x0400,
++					      4096, 0, &chan->cache);
+ 		if (ret)
+ 			return ret;
+ 	} else {
+-		ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, 0x100, 256,
+-					     NVOBJ_FLAG_ZERO_ALLOC |
+-					     NVOBJ_FLAG_ZERO_FREE,
+-					     &chan->ramfc);
++		ret = nouveau_gpuobj_new(dev, chan, 0x100, 256,
++					 NVOBJ_FLAG_ZERO_ALLOC |
++					 NVOBJ_FLAG_ZERO_FREE, &chan->ramfc);
+ 		if (ret)
+ 			return ret;
+-		ramfc = chan->ramfc->gpuobj;
+ 
+-		ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, 4096, 1024,
+-					     0, &chan->cache);
++		ret = nouveau_gpuobj_new(dev, chan, 4096, 1024,
++					 0, &chan->cache);
+ 		if (ret)
+ 			return ret;
+ 	}
++	ramfc = chan->ramfc;
+ 
+ 	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+ 
+-	nv_wo32(dev, ramfc, 0x48/4, chan->pushbuf->instance >> 4);
+-	nv_wo32(dev, ramfc, 0x80/4, (0 << 27) /* 4KiB */ |
+-				    (4 << 24) /* SEARCH_FULL */ |
+-				    (chan->ramht->instance >> 4));
+-	nv_wo32(dev, ramfc, 0x44/4, 0x2101ffff);
+-	nv_wo32(dev, ramfc, 0x60/4, 0x7fffffff);
+-	nv_wo32(dev, ramfc, 0x40/4, 0x00000000);
+-	nv_wo32(dev, ramfc, 0x7c/4, 0x30000001);
+-	nv_wo32(dev, ramfc, 0x78/4, 0x00000000);
+-	nv_wo32(dev, ramfc, 0x3c/4, 0x403f6078);
+-	nv_wo32(dev, ramfc, 0x50/4, chan->pushbuf_base +
+-				    chan->dma.ib_base * 4);
+-	nv_wo32(dev, ramfc, 0x54/4, drm_order(chan->dma.ib_max + 1) << 16);
++	nv_wo32(ramfc, 0x48, chan->pushbuf->cinst >> 4);
++	nv_wo32(ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
++			     (4 << 24) /* SEARCH_FULL */ |
++			     (chan->ramht->gpuobj->cinst >> 4));
++	nv_wo32(ramfc, 0x44, 0x2101ffff);
++	nv_wo32(ramfc, 0x60, 0x7fffffff);
++	nv_wo32(ramfc, 0x40, 0x00000000);
++	nv_wo32(ramfc, 0x7c, 0x30000001);
++	nv_wo32(ramfc, 0x78, 0x00000000);
++	nv_wo32(ramfc, 0x3c, 0x403f6078);
++	nv_wo32(ramfc, 0x50, chan->pushbuf_base + chan->dma.ib_base * 4);
++	nv_wo32(ramfc, 0x54, drm_order(chan->dma.ib_max + 1) << 16);
+ 
+ 	if (dev_priv->chipset != 0x50) {
+-		nv_wo32(dev, chan->ramin->gpuobj, 0, chan->id);
+-		nv_wo32(dev, chan->ramin->gpuobj, 1,
+-						chan->ramfc->instance >> 8);
++		nv_wo32(chan->ramin, 0, chan->id);
++		nv_wo32(chan->ramin, 4, chan->ramfc->vinst >> 8);
+ 
+-		nv_wo32(dev, ramfc, 0x88/4, chan->cache->instance >> 10);
+-		nv_wo32(dev, ramfc, 0x98/4, chan->ramin->instance >> 12);
++		nv_wo32(ramfc, 0x88, chan->cache->vinst >> 10);
++		nv_wo32(ramfc, 0x98, chan->ramin->vinst >> 12);
+ 	}
+ 
+ 	dev_priv->engine.instmem.flush(dev);
+@@ -293,12 +292,13 @@
+ nv50_fifo_destroy_context(struct nouveau_channel *chan)
+ {
+ 	struct drm_device *dev = chan->dev;
+-	struct nouveau_gpuobj_ref *ramfc = chan->ramfc;
++	struct nouveau_gpuobj *ramfc = NULL;
+ 
+ 	NV_DEBUG(dev, "ch%d\n", chan->id);
+ 
+ 	/* This will ensure the channel is seen as disabled. */
+-	chan->ramfc = NULL;
++	nouveau_gpuobj_ref(chan->ramfc, &ramfc);
++	nouveau_gpuobj_ref(NULL, &chan->ramfc);
+ 	nv50_fifo_channel_disable(dev, chan->id);
+ 
+ 	/* Dummy channel, also used on ch 127 */
+@@ -306,8 +306,8 @@
+ 		nv50_fifo_channel_disable(dev, 127);
+ 	nv50_fifo_playlist_update(dev);
+ 
+-	nouveau_gpuobj_ref_del(dev, &ramfc);
+-	nouveau_gpuobj_ref_del(dev, &chan->cache);
++	nouveau_gpuobj_ref(NULL, &ramfc);
++	nouveau_gpuobj_ref(NULL, &chan->cache);
+ }
+ 
+ int
+@@ -315,63 +315,63 @@
+ {
+ 	struct drm_device *dev = chan->dev;
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_gpuobj *ramfc = chan->ramfc->gpuobj;
+-	struct nouveau_gpuobj *cache = chan->cache->gpuobj;
++	struct nouveau_gpuobj *ramfc = chan->ramfc;
++	struct nouveau_gpuobj *cache = chan->cache;
+ 	int ptr, cnt;
+ 
+ 	NV_DEBUG(dev, "ch%d\n", chan->id);
+ 
+-	nv_wr32(dev, 0x3330, nv_ro32(dev, ramfc, 0x00/4));
+-	nv_wr32(dev, 0x3334, nv_ro32(dev, ramfc, 0x04/4));
+-	nv_wr32(dev, 0x3240, nv_ro32(dev, ramfc, 0x08/4));
+-	nv_wr32(dev, 0x3320, nv_ro32(dev, ramfc, 0x0c/4));
+-	nv_wr32(dev, 0x3244, nv_ro32(dev, ramfc, 0x10/4));
+-	nv_wr32(dev, 0x3328, nv_ro32(dev, ramfc, 0x14/4));
+-	nv_wr32(dev, 0x3368, nv_ro32(dev, ramfc, 0x18/4));
+-	nv_wr32(dev, 0x336c, nv_ro32(dev, ramfc, 0x1c/4));
+-	nv_wr32(dev, 0x3370, nv_ro32(dev, ramfc, 0x20/4));
+-	nv_wr32(dev, 0x3374, nv_ro32(dev, ramfc, 0x24/4));
+-	nv_wr32(dev, 0x3378, nv_ro32(dev, ramfc, 0x28/4));
+-	nv_wr32(dev, 0x337c, nv_ro32(dev, ramfc, 0x2c/4));
+-	nv_wr32(dev, 0x3228, nv_ro32(dev, ramfc, 0x30/4));
+-	nv_wr32(dev, 0x3364, nv_ro32(dev, ramfc, 0x34/4));
+-	nv_wr32(dev, 0x32a0, nv_ro32(dev, ramfc, 0x38/4));
+-	nv_wr32(dev, 0x3224, nv_ro32(dev, ramfc, 0x3c/4));
+-	nv_wr32(dev, 0x324c, nv_ro32(dev, ramfc, 0x40/4));
+-	nv_wr32(dev, 0x2044, nv_ro32(dev, ramfc, 0x44/4));
+-	nv_wr32(dev, 0x322c, nv_ro32(dev, ramfc, 0x48/4));
+-	nv_wr32(dev, 0x3234, nv_ro32(dev, ramfc, 0x4c/4));
+-	nv_wr32(dev, 0x3340, nv_ro32(dev, ramfc, 0x50/4));
+-	nv_wr32(dev, 0x3344, nv_ro32(dev, ramfc, 0x54/4));
+-	nv_wr32(dev, 0x3280, nv_ro32(dev, ramfc, 0x58/4));
+-	nv_wr32(dev, 0x3254, nv_ro32(dev, ramfc, 0x5c/4));
+-	nv_wr32(dev, 0x3260, nv_ro32(dev, ramfc, 0x60/4));
+-	nv_wr32(dev, 0x3264, nv_ro32(dev, ramfc, 0x64/4));
+-	nv_wr32(dev, 0x3268, nv_ro32(dev, ramfc, 0x68/4));
+-	nv_wr32(dev, 0x326c, nv_ro32(dev, ramfc, 0x6c/4));
+-	nv_wr32(dev, 0x32e4, nv_ro32(dev, ramfc, 0x70/4));
+-	nv_wr32(dev, 0x3248, nv_ro32(dev, ramfc, 0x74/4));
+-	nv_wr32(dev, 0x2088, nv_ro32(dev, ramfc, 0x78/4));
+-	nv_wr32(dev, 0x2058, nv_ro32(dev, ramfc, 0x7c/4));
+-	nv_wr32(dev, 0x2210, nv_ro32(dev, ramfc, 0x80/4));
++	nv_wr32(dev, 0x3330, nv_ro32(ramfc, 0x00));
++	nv_wr32(dev, 0x3334, nv_ro32(ramfc, 0x04));
++	nv_wr32(dev, 0x3240, nv_ro32(ramfc, 0x08));
++	nv_wr32(dev, 0x3320, nv_ro32(ramfc, 0x0c));
++	nv_wr32(dev, 0x3244, nv_ro32(ramfc, 0x10));
++	nv_wr32(dev, 0x3328, nv_ro32(ramfc, 0x14));
++	nv_wr32(dev, 0x3368, nv_ro32(ramfc, 0x18));
++	nv_wr32(dev, 0x336c, nv_ro32(ramfc, 0x1c));
++	nv_wr32(dev, 0x3370, nv_ro32(ramfc, 0x20));
++	nv_wr32(dev, 0x3374, nv_ro32(ramfc, 0x24));
++	nv_wr32(dev, 0x3378, nv_ro32(ramfc, 0x28));
++	nv_wr32(dev, 0x337c, nv_ro32(ramfc, 0x2c));
++	nv_wr32(dev, 0x3228, nv_ro32(ramfc, 0x30));
++	nv_wr32(dev, 0x3364, nv_ro32(ramfc, 0x34));
++	nv_wr32(dev, 0x32a0, nv_ro32(ramfc, 0x38));
++	nv_wr32(dev, 0x3224, nv_ro32(ramfc, 0x3c));
++	nv_wr32(dev, 0x324c, nv_ro32(ramfc, 0x40));
++	nv_wr32(dev, 0x2044, nv_ro32(ramfc, 0x44));
++	nv_wr32(dev, 0x322c, nv_ro32(ramfc, 0x48));
++	nv_wr32(dev, 0x3234, nv_ro32(ramfc, 0x4c));
++	nv_wr32(dev, 0x3340, nv_ro32(ramfc, 0x50));
++	nv_wr32(dev, 0x3344, nv_ro32(ramfc, 0x54));
++	nv_wr32(dev, 0x3280, nv_ro32(ramfc, 0x58));
++	nv_wr32(dev, 0x3254, nv_ro32(ramfc, 0x5c));
++	nv_wr32(dev, 0x3260, nv_ro32(ramfc, 0x60));
++	nv_wr32(dev, 0x3264, nv_ro32(ramfc, 0x64));
++	nv_wr32(dev, 0x3268, nv_ro32(ramfc, 0x68));
++	nv_wr32(dev, 0x326c, nv_ro32(ramfc, 0x6c));
++	nv_wr32(dev, 0x32e4, nv_ro32(ramfc, 0x70));
++	nv_wr32(dev, 0x3248, nv_ro32(ramfc, 0x74));
++	nv_wr32(dev, 0x2088, nv_ro32(ramfc, 0x78));
++	nv_wr32(dev, 0x2058, nv_ro32(ramfc, 0x7c));
++	nv_wr32(dev, 0x2210, nv_ro32(ramfc, 0x80));
+ 
+-	cnt = nv_ro32(dev, ramfc, 0x84/4);
++	cnt = nv_ro32(ramfc, 0x84);
+ 	for (ptr = 0; ptr < cnt; ptr++) {
+ 		nv_wr32(dev, NV40_PFIFO_CACHE1_METHOD(ptr),
+-			nv_ro32(dev, cache, (ptr * 2) + 0));
++			nv_ro32(cache, (ptr * 8) + 0));
+ 		nv_wr32(dev, NV40_PFIFO_CACHE1_DATA(ptr),
+-			nv_ro32(dev, cache, (ptr * 2) + 1));
++			nv_ro32(cache, (ptr * 8) + 4));
+ 	}
+ 	nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, cnt << 2);
+ 	nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
+ 
+ 	/* guessing that all the 0x34xx regs aren't on NV50 */
+ 	if (dev_priv->chipset != 0x50) {
+-		nv_wr32(dev, 0x340c, nv_ro32(dev, ramfc, 0x88/4));
+-		nv_wr32(dev, 0x3400, nv_ro32(dev, ramfc, 0x8c/4));
+-		nv_wr32(dev, 0x3404, nv_ro32(dev, ramfc, 0x90/4));
+-		nv_wr32(dev, 0x3408, nv_ro32(dev, ramfc, 0x94/4));
+-		nv_wr32(dev, 0x3410, nv_ro32(dev, ramfc, 0x98/4));
++		nv_wr32(dev, 0x340c, nv_ro32(ramfc, 0x88));
++		nv_wr32(dev, 0x3400, nv_ro32(ramfc, 0x8c));
++		nv_wr32(dev, 0x3404, nv_ro32(ramfc, 0x90));
++		nv_wr32(dev, 0x3408, nv_ro32(ramfc, 0x94));
++		nv_wr32(dev, 0x3410, nv_ro32(ramfc, 0x98));
+ 	}
+ 
+ 	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, chan->id | (1<<16));
+@@ -393,68 +393,69 @@
+ 	if (chid < 1 || chid >= dev_priv->engine.fifo.channels - 1)
+ 		return 0;
+ 
+-	chan = dev_priv->fifos[chid];
++	chan = dev_priv->channels.ptr[chid];
+ 	if (!chan) {
+ 		NV_ERROR(dev, "Inactive channel on PFIFO: %d\n", chid);
+ 		return -EINVAL;
+ 	}
+ 	NV_DEBUG(dev, "ch%d\n", chan->id);
+-	ramfc = chan->ramfc->gpuobj;
+-	cache = chan->cache->gpuobj;
++	ramfc = chan->ramfc;
++	cache = chan->cache;
+ 
+-	nv_wo32(dev, ramfc, 0x00/4, nv_rd32(dev, 0x3330));
+-	nv_wo32(dev, ramfc, 0x04/4, nv_rd32(dev, 0x3334));
+-	nv_wo32(dev, ramfc, 0x08/4, nv_rd32(dev, 0x3240));
+-	nv_wo32(dev, ramfc, 0x0c/4, nv_rd32(dev, 0x3320));
+-	nv_wo32(dev, ramfc, 0x10/4, nv_rd32(dev, 0x3244));
+-	nv_wo32(dev, ramfc, 0x14/4, nv_rd32(dev, 0x3328));
+-	nv_wo32(dev, ramfc, 0x18/4, nv_rd32(dev, 0x3368));
+-	nv_wo32(dev, ramfc, 0x1c/4, nv_rd32(dev, 0x336c));
+-	nv_wo32(dev, ramfc, 0x20/4, nv_rd32(dev, 0x3370));
+-	nv_wo32(dev, ramfc, 0x24/4, nv_rd32(dev, 0x3374));
+-	nv_wo32(dev, ramfc, 0x28/4, nv_rd32(dev, 0x3378));
+-	nv_wo32(dev, ramfc, 0x2c/4, nv_rd32(dev, 0x337c));
+-	nv_wo32(dev, ramfc, 0x30/4, nv_rd32(dev, 0x3228));
+-	nv_wo32(dev, ramfc, 0x34/4, nv_rd32(dev, 0x3364));
+-	nv_wo32(dev, ramfc, 0x38/4, nv_rd32(dev, 0x32a0));
+-	nv_wo32(dev, ramfc, 0x3c/4, nv_rd32(dev, 0x3224));
+-	nv_wo32(dev, ramfc, 0x40/4, nv_rd32(dev, 0x324c));
+-	nv_wo32(dev, ramfc, 0x44/4, nv_rd32(dev, 0x2044));
+-	nv_wo32(dev, ramfc, 0x48/4, nv_rd32(dev, 0x322c));
+-	nv_wo32(dev, ramfc, 0x4c/4, nv_rd32(dev, 0x3234));
+-	nv_wo32(dev, ramfc, 0x50/4, nv_rd32(dev, 0x3340));
+-	nv_wo32(dev, ramfc, 0x54/4, nv_rd32(dev, 0x3344));
+-	nv_wo32(dev, ramfc, 0x58/4, nv_rd32(dev, 0x3280));
+-	nv_wo32(dev, ramfc, 0x5c/4, nv_rd32(dev, 0x3254));
+-	nv_wo32(dev, ramfc, 0x60/4, nv_rd32(dev, 0x3260));
+-	nv_wo32(dev, ramfc, 0x64/4, nv_rd32(dev, 0x3264));
+-	nv_wo32(dev, ramfc, 0x68/4, nv_rd32(dev, 0x3268));
+-	nv_wo32(dev, ramfc, 0x6c/4, nv_rd32(dev, 0x326c));
+-	nv_wo32(dev, ramfc, 0x70/4, nv_rd32(dev, 0x32e4));
+-	nv_wo32(dev, ramfc, 0x74/4, nv_rd32(dev, 0x3248));
+-	nv_wo32(dev, ramfc, 0x78/4, nv_rd32(dev, 0x2088));
+-	nv_wo32(dev, ramfc, 0x7c/4, nv_rd32(dev, 0x2058));
+-	nv_wo32(dev, ramfc, 0x80/4, nv_rd32(dev, 0x2210));
++	nv_wo32(ramfc, 0x00, nv_rd32(dev, 0x3330));
++	nv_wo32(ramfc, 0x04, nv_rd32(dev, 0x3334));
++	nv_wo32(ramfc, 0x08, nv_rd32(dev, 0x3240));
++	nv_wo32(ramfc, 0x0c, nv_rd32(dev, 0x3320));
++	nv_wo32(ramfc, 0x10, nv_rd32(dev, 0x3244));
++	nv_wo32(ramfc, 0x14, nv_rd32(dev, 0x3328));
++	nv_wo32(ramfc, 0x18, nv_rd32(dev, 0x3368));
++	nv_wo32(ramfc, 0x1c, nv_rd32(dev, 0x336c));
++	nv_wo32(ramfc, 0x20, nv_rd32(dev, 0x3370));
++	nv_wo32(ramfc, 0x24, nv_rd32(dev, 0x3374));
++	nv_wo32(ramfc, 0x28, nv_rd32(dev, 0x3378));
++	nv_wo32(ramfc, 0x2c, nv_rd32(dev, 0x337c));
++	nv_wo32(ramfc, 0x30, nv_rd32(dev, 0x3228));
++	nv_wo32(ramfc, 0x34, nv_rd32(dev, 0x3364));
++	nv_wo32(ramfc, 0x38, nv_rd32(dev, 0x32a0));
++	nv_wo32(ramfc, 0x3c, nv_rd32(dev, 0x3224));
++	nv_wo32(ramfc, 0x40, nv_rd32(dev, 0x324c));
++	nv_wo32(ramfc, 0x44, nv_rd32(dev, 0x2044));
++	nv_wo32(ramfc, 0x48, nv_rd32(dev, 0x322c));
++	nv_wo32(ramfc, 0x4c, nv_rd32(dev, 0x3234));
++	nv_wo32(ramfc, 0x50, nv_rd32(dev, 0x3340));
++	nv_wo32(ramfc, 0x54, nv_rd32(dev, 0x3344));
++	nv_wo32(ramfc, 0x58, nv_rd32(dev, 0x3280));
++	nv_wo32(ramfc, 0x5c, nv_rd32(dev, 0x3254));
++	nv_wo32(ramfc, 0x60, nv_rd32(dev, 0x3260));
++	nv_wo32(ramfc, 0x64, nv_rd32(dev, 0x3264));
++	nv_wo32(ramfc, 0x68, nv_rd32(dev, 0x3268));
++	nv_wo32(ramfc, 0x6c, nv_rd32(dev, 0x326c));
++	nv_wo32(ramfc, 0x70, nv_rd32(dev, 0x32e4));
++	nv_wo32(ramfc, 0x74, nv_rd32(dev, 0x3248));
++	nv_wo32(ramfc, 0x78, nv_rd32(dev, 0x2088));
++	nv_wo32(ramfc, 0x7c, nv_rd32(dev, 0x2058));
++	nv_wo32(ramfc, 0x80, nv_rd32(dev, 0x2210));
+ 
+ 	put = (nv_rd32(dev, NV03_PFIFO_CACHE1_PUT) & 0x7ff) >> 2;
+ 	get = (nv_rd32(dev, NV03_PFIFO_CACHE1_GET) & 0x7ff) >> 2;
+ 	ptr = 0;
+ 	while (put != get) {
+-		nv_wo32(dev, cache, ptr++,
+-			    nv_rd32(dev, NV40_PFIFO_CACHE1_METHOD(get)));
+-		nv_wo32(dev, cache, ptr++,
+-			    nv_rd32(dev, NV40_PFIFO_CACHE1_DATA(get)));
++		nv_wo32(cache, ptr + 0,
++			nv_rd32(dev, NV40_PFIFO_CACHE1_METHOD(get)));
++		nv_wo32(cache, ptr + 4,
++			nv_rd32(dev, NV40_PFIFO_CACHE1_DATA(get)));
+ 		get = (get + 1) & 0x1ff;
++		ptr += 8;
+ 	}
+ 
+ 	/* guessing that all the 0x34xx regs aren't on NV50 */
+ 	if (dev_priv->chipset != 0x50) {
+-		nv_wo32(dev, ramfc, 0x84/4, ptr >> 1);
+-		nv_wo32(dev, ramfc, 0x88/4, nv_rd32(dev, 0x340c));
+-		nv_wo32(dev, ramfc, 0x8c/4, nv_rd32(dev, 0x3400));
+-		nv_wo32(dev, ramfc, 0x90/4, nv_rd32(dev, 0x3404));
+-		nv_wo32(dev, ramfc, 0x94/4, nv_rd32(dev, 0x3408));
+-		nv_wo32(dev, ramfc, 0x98/4, nv_rd32(dev, 0x3410));
++		nv_wo32(ramfc, 0x84, ptr >> 3);
++		nv_wo32(ramfc, 0x88, nv_rd32(dev, 0x340c));
++		nv_wo32(ramfc, 0x8c, nv_rd32(dev, 0x3400));
++		nv_wo32(ramfc, 0x90, nv_rd32(dev, 0x3404));
++		nv_wo32(ramfc, 0x94, nv_rd32(dev, 0x3408));
++		nv_wo32(ramfc, 0x98, nv_rd32(dev, 0x3410));
+ 	}
+ 
+ 	dev_priv->engine.instmem.flush(dev);
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_graph.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_graph.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_graph.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_graph.c	2010-10-15 02:04:44.441993202 +0200
+@@ -27,7 +27,7 @@
+ #include "drmP.h"
+ #include "drm.h"
+ #include "nouveau_drv.h"
+-
++#include "nouveau_ramht.h"
+ #include "nouveau_grctx.h"
+ 
+ static void
+@@ -181,7 +181,7 @@
+ 	/* Be sure we're not in the middle of a context switch or bad things
+ 	 * will happen, such as unloading the wrong pgraph context.
+ 	 */
+-	if (!nv_wait(0x400300, 0x00000001, 0x00000000))
++	if (!nv_wait(dev, 0x400300, 0x00000001, 0x00000000))
+ 		NV_ERROR(dev, "Ctxprog is still running\n");
+ 
+ 	inst = nv_rd32(dev, NV50_PGRAPH_CTXCTL_CUR);
+@@ -190,9 +190,9 @@
+ 	inst = (inst & NV50_PGRAPH_CTXCTL_CUR_INSTANCE) << 12;
+ 
+ 	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+-		struct nouveau_channel *chan = dev_priv->fifos[i];
++		struct nouveau_channel *chan = dev_priv->channels.ptr[i];
+ 
+-		if (chan && chan->ramin && chan->ramin->instance == inst)
++		if (chan && chan->ramin && chan->ramin->vinst == inst)
+ 			return chan;
+ 	}
+ 
+@@ -204,36 +204,34 @@
+ {
+ 	struct drm_device *dev = chan->dev;
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_gpuobj *ramin = chan->ramin->gpuobj;
+-	struct nouveau_gpuobj *obj;
++	struct nouveau_gpuobj *ramin = chan->ramin;
+ 	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+ 	struct nouveau_grctx ctx = {};
+ 	int hdr, ret;
+ 
+ 	NV_DEBUG(dev, "ch%d\n", chan->id);
+ 
+-	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pgraph->grctx_size,
+-				     0x1000, NVOBJ_FLAG_ZERO_ALLOC |
+-				     NVOBJ_FLAG_ZERO_FREE, &chan->ramin_grctx);
++	ret = nouveau_gpuobj_new(dev, chan, pgraph->grctx_size, 0x1000,
++				 NVOBJ_FLAG_ZERO_ALLOC |
++				 NVOBJ_FLAG_ZERO_FREE, &chan->ramin_grctx);
+ 	if (ret)
+ 		return ret;
+-	obj = chan->ramin_grctx->gpuobj;
+ 
+ 	hdr = (dev_priv->chipset == 0x50) ? 0x200 : 0x20;
+-	nv_wo32(dev, ramin, (hdr + 0x00)/4, 0x00190002);
+-	nv_wo32(dev, ramin, (hdr + 0x04)/4, chan->ramin_grctx->instance +
+-					   pgraph->grctx_size - 1);
+-	nv_wo32(dev, ramin, (hdr + 0x08)/4, chan->ramin_grctx->instance);
+-	nv_wo32(dev, ramin, (hdr + 0x0c)/4, 0);
+-	nv_wo32(dev, ramin, (hdr + 0x10)/4, 0);
+-	nv_wo32(dev, ramin, (hdr + 0x14)/4, 0x00010000);
++	nv_wo32(ramin, hdr + 0x00, 0x00190002);
++	nv_wo32(ramin, hdr + 0x04, chan->ramin_grctx->vinst +
++				   pgraph->grctx_size - 1);
++	nv_wo32(ramin, hdr + 0x08, chan->ramin_grctx->vinst);
++	nv_wo32(ramin, hdr + 0x0c, 0);
++	nv_wo32(ramin, hdr + 0x10, 0);
++	nv_wo32(ramin, hdr + 0x14, 0x00010000);
+ 
+ 	ctx.dev = chan->dev;
+ 	ctx.mode = NOUVEAU_GRCTX_VALS;
+-	ctx.data = obj;
++	ctx.data = chan->ramin_grctx;
+ 	nv50_grctx_init(&ctx);
+ 
+-	nv_wo32(dev, obj, 0x00000/4, chan->ramin->instance >> 12);
++	nv_wo32(chan->ramin_grctx, 0x00000, chan->ramin->vinst >> 12);
+ 
+ 	dev_priv->engine.instmem.flush(dev);
+ 	return 0;
+@@ -248,14 +246,14 @@
+ 
+ 	NV_DEBUG(dev, "ch%d\n", chan->id);
+ 
+-	if (!chan->ramin || !chan->ramin->gpuobj)
++	if (!chan->ramin)
+ 		return;
+ 
+ 	for (i = hdr; i < hdr + 24; i += 4)
+-		nv_wo32(dev, chan->ramin->gpuobj, i/4, 0);
++		nv_wo32(chan->ramin, i, 0);
+ 	dev_priv->engine.instmem.flush(dev);
+ 
+-	nouveau_gpuobj_ref_del(dev, &chan->ramin_grctx);
++	nouveau_gpuobj_ref(NULL, &chan->ramin_grctx);
+ }
+ 
+ static int
+@@ -282,7 +280,7 @@
+ int
+ nv50_graph_load_context(struct nouveau_channel *chan)
+ {
+-	uint32_t inst = chan->ramin->instance >> 12;
++	uint32_t inst = chan->ramin->vinst >> 12;
+ 
+ 	NV_DEBUG(chan->dev, "ch%d\n", chan->id);
+ 	return nv50_graph_do_load_context(chan->dev, inst);
+@@ -327,15 +325,16 @@
+ nv50_graph_nvsw_dma_vblsem(struct nouveau_channel *chan, int grclass,
+ 			   int mthd, uint32_t data)
+ {
+-	struct nouveau_gpuobj_ref *ref = NULL;
++	struct nouveau_gpuobj *gpuobj;
+ 
+-	if (nouveau_gpuobj_ref_find(chan, data, &ref))
++	gpuobj = nouveau_ramht_find(chan, data);
++	if (!gpuobj)
+ 		return -ENOENT;
+ 
+-	if (nouveau_notifier_offset(ref->gpuobj, NULL))
++	if (nouveau_notifier_offset(gpuobj, NULL))
+ 		return -EINVAL;
+ 
+-	chan->nvsw.vblsem = ref->gpuobj;
++	chan->nvsw.vblsem = gpuobj;
+ 	chan->nvsw.vblsem_offset = ~0;
+ 	return 0;
+ }
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_grctx.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_grctx.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_grctx.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_grctx.c	2010-10-15 02:04:44.448993289 +0200
+@@ -103,6 +103,9 @@
+ #include "nouveau_drv.h"
+ #include "nouveau_grctx.h"
+ 
++#define IS_NVA3F(x) (((x) > 0xa0 && (x) < 0xaa) || (x) == 0xaf)
++#define IS_NVAAF(x) ((x) >= 0xaa && (x) <= 0xac)
++
+ /*
+  * This code deals with PGRAPH contexts on NV50 family cards. Like NV40, it's
+  * the GPU itself that does context-switching, but it needs a special
+@@ -182,6 +185,7 @@
+ 	case 0xa8:
+ 	case 0xaa:
+ 	case 0xac:
++	case 0xaf:
+ 		break;
+ 	default:
+ 		NV_ERROR(ctx->dev, "I don't know how to make a ctxprog for "
+@@ -268,6 +272,9 @@
+  */
+ 
+ static void
++nv50_graph_construct_mmio_ddata(struct nouveau_grctx *ctx);
++
++static void
+ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+@@ -286,7 +293,7 @@
+ 		gr_def(ctx, 0x400840, 0xffe806a8);
+ 	}
+ 	gr_def(ctx, 0x400844, 0x00000002);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
++	if (IS_NVA3F(dev_priv->chipset))
+ 		gr_def(ctx, 0x400894, 0x00001000);
+ 	gr_def(ctx, 0x4008e8, 0x00000003);
+ 	gr_def(ctx, 0x4008ec, 0x00001000);
+@@ -299,13 +306,15 @@
+ 
+ 	if (dev_priv->chipset >= 0xa0)
+ 		cp_ctx(ctx, 0x400b00, 0x1);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
++	if (IS_NVA3F(dev_priv->chipset)) {
+ 		cp_ctx(ctx, 0x400b10, 0x1);
+ 		gr_def(ctx, 0x400b10, 0x0001629d);
+ 		cp_ctx(ctx, 0x400b20, 0x1);
+ 		gr_def(ctx, 0x400b20, 0x0001629d);
+ 	}
+ 
++	nv50_graph_construct_mmio_ddata(ctx);
++
+ 	/* 0C00: VFETCH */
+ 	cp_ctx(ctx, 0x400c08, 0x2);
+ 	gr_def(ctx, 0x400c08, 0x0000fe0c);
+@@ -314,7 +323,7 @@
+ 	if (dev_priv->chipset < 0xa0) {
+ 		cp_ctx(ctx, 0x401008, 0x4);
+ 		gr_def(ctx, 0x401014, 0x00001000);
+-	} else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa) {
++	} else if (!IS_NVA3F(dev_priv->chipset)) {
+ 		cp_ctx(ctx, 0x401008, 0x5);
+ 		gr_def(ctx, 0x401018, 0x00001000);
+ 	} else {
+@@ -368,10 +377,13 @@
+ 	case 0xa3:
+ 	case 0xa5:
+ 	case 0xa8:
++	case 0xaf:
+ 		gr_def(ctx, 0x401c00, 0x142500df);
+ 		break;
+ 	}
+ 
++	/* 2000 */
++
+ 	/* 2400 */
+ 	cp_ctx(ctx, 0x402400, 0x1);
+ 	if (dev_priv->chipset == 0x50)
+@@ -380,12 +392,12 @@
+ 		cp_ctx(ctx, 0x402408, 0x2);
+ 	gr_def(ctx, 0x402408, 0x00000600);
+ 
+-	/* 2800 */
++	/* 2800: CSCHED */
+ 	cp_ctx(ctx, 0x402800, 0x1);
+ 	if (dev_priv->chipset == 0x50)
+ 		gr_def(ctx, 0x402800, 0x00000006);
+ 
+-	/* 2C00 */
++	/* 2C00: ZCULL */
+ 	cp_ctx(ctx, 0x402c08, 0x6);
+ 	if (dev_priv->chipset != 0x50)
+ 		gr_def(ctx, 0x402c14, 0x01000000);
+@@ -396,23 +408,23 @@
+ 		cp_ctx(ctx, 0x402ca0, 0x2);
+ 	if (dev_priv->chipset < 0xa0)
+ 		gr_def(ctx, 0x402ca0, 0x00000400);
+-	else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa)
++	else if (!IS_NVA3F(dev_priv->chipset))
+ 		gr_def(ctx, 0x402ca0, 0x00000800);
+ 	else
+ 		gr_def(ctx, 0x402ca0, 0x00000400);
+ 	cp_ctx(ctx, 0x402cac, 0x4);
+ 
+-	/* 3000 */
++	/* 3000: ENG2D */
+ 	cp_ctx(ctx, 0x403004, 0x1);
+ 	gr_def(ctx, 0x403004, 0x00000001);
+ 
+-	/* 3404 */
++	/* 3400 */
+ 	if (dev_priv->chipset >= 0xa0) {
+ 		cp_ctx(ctx, 0x403404, 0x1);
+ 		gr_def(ctx, 0x403404, 0x00000001);
+ 	}
+ 
+-	/* 5000 */
++	/* 5000: CCACHE */
+ 	cp_ctx(ctx, 0x405000, 0x1);
+ 	switch (dev_priv->chipset) {
+ 	case 0x50:
+@@ -425,6 +437,7 @@
+ 	case 0xa8:
+ 	case 0xaa:
+ 	case 0xac:
++	case 0xaf:
+ 		gr_def(ctx, 0x405000, 0x000e0080);
+ 		break;
+ 	case 0x86:
+@@ -441,210 +454,6 @@
+ 	cp_ctx(ctx, 0x405024, 0x1);
+ 	cp_ctx(ctx, 0x40502c, 0x1);
+ 
+-	/* 5400 or maybe 4800 */
+-	if (dev_priv->chipset == 0x50) {
+-		offset = 0x405400;
+-		cp_ctx(ctx, 0x405400, 0xea);
+-	} else if (dev_priv->chipset < 0x94) {
+-		offset = 0x405400;
+-		cp_ctx(ctx, 0x405400, 0xcb);
+-	} else if (dev_priv->chipset < 0xa0) {
+-		offset = 0x405400;
+-		cp_ctx(ctx, 0x405400, 0xcc);
+-	} else if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		offset = 0x404800;
+-		cp_ctx(ctx, 0x404800, 0xda);
+-	} else {
+-		offset = 0x405400;
+-		cp_ctx(ctx, 0x405400, 0xd4);
+-	}
+-	gr_def(ctx, offset + 0x0c, 0x00000002);
+-	gr_def(ctx, offset + 0x10, 0x00000001);
+-	if (dev_priv->chipset >= 0x94)
+-		offset += 4;
+-	gr_def(ctx, offset + 0x1c, 0x00000001);
+-	gr_def(ctx, offset + 0x20, 0x00000100);
+-	gr_def(ctx, offset + 0x38, 0x00000002);
+-	gr_def(ctx, offset + 0x3c, 0x00000001);
+-	gr_def(ctx, offset + 0x40, 0x00000001);
+-	gr_def(ctx, offset + 0x50, 0x00000001);
+-	gr_def(ctx, offset + 0x54, 0x003fffff);
+-	gr_def(ctx, offset + 0x58, 0x00001fff);
+-	gr_def(ctx, offset + 0x60, 0x00000001);
+-	gr_def(ctx, offset + 0x64, 0x00000001);
+-	gr_def(ctx, offset + 0x6c, 0x00000001);
+-	gr_def(ctx, offset + 0x70, 0x00000001);
+-	gr_def(ctx, offset + 0x74, 0x00000001);
+-	gr_def(ctx, offset + 0x78, 0x00000004);
+-	gr_def(ctx, offset + 0x7c, 0x00000001);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		offset += 4;
+-	gr_def(ctx, offset + 0x80, 0x00000001);
+-	gr_def(ctx, offset + 0x84, 0x00000001);
+-	gr_def(ctx, offset + 0x88, 0x00000007);
+-	gr_def(ctx, offset + 0x8c, 0x00000001);
+-	gr_def(ctx, offset + 0x90, 0x00000007);
+-	gr_def(ctx, offset + 0x94, 0x00000001);
+-	gr_def(ctx, offset + 0x98, 0x00000001);
+-	gr_def(ctx, offset + 0x9c, 0x00000001);
+-	if (dev_priv->chipset == 0x50) {
+-		 gr_def(ctx, offset + 0xb0, 0x00000001);
+-		 gr_def(ctx, offset + 0xb4, 0x00000001);
+-		 gr_def(ctx, offset + 0xbc, 0x00000001);
+-		 gr_def(ctx, offset + 0xc0, 0x0000000a);
+-		 gr_def(ctx, offset + 0xd0, 0x00000040);
+-		 gr_def(ctx, offset + 0xd8, 0x00000002);
+-		 gr_def(ctx, offset + 0xdc, 0x00000100);
+-		 gr_def(ctx, offset + 0xe0, 0x00000001);
+-		 gr_def(ctx, offset + 0xe4, 0x00000100);
+-		 gr_def(ctx, offset + 0x100, 0x00000001);
+-		 gr_def(ctx, offset + 0x124, 0x00000004);
+-		 gr_def(ctx, offset + 0x13c, 0x00000001);
+-		 gr_def(ctx, offset + 0x140, 0x00000100);
+-		 gr_def(ctx, offset + 0x148, 0x00000001);
+-		 gr_def(ctx, offset + 0x154, 0x00000100);
+-		 gr_def(ctx, offset + 0x158, 0x00000001);
+-		 gr_def(ctx, offset + 0x15c, 0x00000100);
+-		 gr_def(ctx, offset + 0x164, 0x00000001);
+-		 gr_def(ctx, offset + 0x170, 0x00000100);
+-		 gr_def(ctx, offset + 0x174, 0x00000001);
+-		 gr_def(ctx, offset + 0x17c, 0x00000001);
+-		 gr_def(ctx, offset + 0x188, 0x00000002);
+-		 gr_def(ctx, offset + 0x190, 0x00000001);
+-		 gr_def(ctx, offset + 0x198, 0x00000001);
+-		 gr_def(ctx, offset + 0x1ac, 0x00000003);
+-		 offset += 0xd0;
+-	} else {
+-		gr_def(ctx, offset + 0xb0, 0x00000001);
+-		gr_def(ctx, offset + 0xb4, 0x00000100);
+-		gr_def(ctx, offset + 0xbc, 0x00000001);
+-		gr_def(ctx, offset + 0xc8, 0x00000100);
+-		gr_def(ctx, offset + 0xcc, 0x00000001);
+-		gr_def(ctx, offset + 0xd0, 0x00000100);
+-		gr_def(ctx, offset + 0xd8, 0x00000001);
+-		gr_def(ctx, offset + 0xe4, 0x00000100);
+-	}
+-	gr_def(ctx, offset + 0xf8, 0x00000004);
+-	gr_def(ctx, offset + 0xfc, 0x00000070);
+-	gr_def(ctx, offset + 0x100, 0x00000080);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		offset += 4;
+-	gr_def(ctx, offset + 0x114, 0x0000000c);
+-	if (dev_priv->chipset == 0x50)
+-		offset -= 4;
+-	gr_def(ctx, offset + 0x11c, 0x00000008);
+-	gr_def(ctx, offset + 0x120, 0x00000014);
+-	if (dev_priv->chipset == 0x50) {
+-		gr_def(ctx, offset + 0x124, 0x00000026);
+-		offset -= 0x18;
+-	} else {
+-		gr_def(ctx, offset + 0x128, 0x00000029);
+-		gr_def(ctx, offset + 0x12c, 0x00000027);
+-		gr_def(ctx, offset + 0x130, 0x00000026);
+-		gr_def(ctx, offset + 0x134, 0x00000008);
+-		gr_def(ctx, offset + 0x138, 0x00000004);
+-		gr_def(ctx, offset + 0x13c, 0x00000027);
+-	}
+-	gr_def(ctx, offset + 0x148, 0x00000001);
+-	gr_def(ctx, offset + 0x14c, 0x00000002);
+-	gr_def(ctx, offset + 0x150, 0x00000003);
+-	gr_def(ctx, offset + 0x154, 0x00000004);
+-	gr_def(ctx, offset + 0x158, 0x00000005);
+-	gr_def(ctx, offset + 0x15c, 0x00000006);
+-	gr_def(ctx, offset + 0x160, 0x00000007);
+-	gr_def(ctx, offset + 0x164, 0x00000001);
+-	gr_def(ctx, offset + 0x1a8, 0x000000cf);
+-	if (dev_priv->chipset == 0x50)
+-		offset -= 4;
+-	gr_def(ctx, offset + 0x1d8, 0x00000080);
+-	gr_def(ctx, offset + 0x1dc, 0x00000004);
+-	gr_def(ctx, offset + 0x1e0, 0x00000004);
+-	if (dev_priv->chipset == 0x50)
+-		offset -= 4;
+-	else
+-		gr_def(ctx, offset + 0x1e4, 0x00000003);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		gr_def(ctx, offset + 0x1ec, 0x00000003);
+-		offset += 8;
+-	}
+-	gr_def(ctx, offset + 0x1e8, 0x00000001);
+-	if (dev_priv->chipset == 0x50)
+-		offset -= 4;
+-	gr_def(ctx, offset + 0x1f4, 0x00000012);
+-	gr_def(ctx, offset + 0x1f8, 0x00000010);
+-	gr_def(ctx, offset + 0x1fc, 0x0000000c);
+-	gr_def(ctx, offset + 0x200, 0x00000001);
+-	gr_def(ctx, offset + 0x210, 0x00000004);
+-	gr_def(ctx, offset + 0x214, 0x00000002);
+-	gr_def(ctx, offset + 0x218, 0x00000004);
+-	if (dev_priv->chipset >= 0xa0)
+-		offset += 4;
+-	gr_def(ctx, offset + 0x224, 0x003fffff);
+-	gr_def(ctx, offset + 0x228, 0x00001fff);
+-	if (dev_priv->chipset == 0x50)
+-		offset -= 0x20;
+-	else if (dev_priv->chipset >= 0xa0) {
+-		gr_def(ctx, offset + 0x250, 0x00000001);
+-		gr_def(ctx, offset + 0x254, 0x00000001);
+-		gr_def(ctx, offset + 0x258, 0x00000002);
+-		offset += 0x10;
+-	}
+-	gr_def(ctx, offset + 0x250, 0x00000004);
+-	gr_def(ctx, offset + 0x254, 0x00000014);
+-	gr_def(ctx, offset + 0x258, 0x00000001);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		offset += 4;
+-	gr_def(ctx, offset + 0x264, 0x00000002);
+-	if (dev_priv->chipset >= 0xa0)
+-		offset += 8;
+-	gr_def(ctx, offset + 0x270, 0x00000001);
+-	gr_def(ctx, offset + 0x278, 0x00000002);
+-	gr_def(ctx, offset + 0x27c, 0x00001000);
+-	if (dev_priv->chipset == 0x50)
+-		offset -= 0xc;
+-	else {
+-		gr_def(ctx, offset + 0x280, 0x00000e00);
+-		gr_def(ctx, offset + 0x284, 0x00001000);
+-		gr_def(ctx, offset + 0x288, 0x00001e00);
+-	}
+-	gr_def(ctx, offset + 0x290, 0x00000001);
+-	gr_def(ctx, offset + 0x294, 0x00000001);
+-	gr_def(ctx, offset + 0x298, 0x00000001);
+-	gr_def(ctx, offset + 0x29c, 0x00000001);
+-	gr_def(ctx, offset + 0x2a0, 0x00000001);
+-	gr_def(ctx, offset + 0x2b0, 0x00000200);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		gr_def(ctx, offset + 0x2b4, 0x00000200);
+-		offset += 4;
+-	}
+-	if (dev_priv->chipset < 0xa0) {
+-		gr_def(ctx, offset + 0x2b8, 0x00000001);
+-		gr_def(ctx, offset + 0x2bc, 0x00000070);
+-		gr_def(ctx, offset + 0x2c0, 0x00000080);
+-		gr_def(ctx, offset + 0x2cc, 0x00000001);
+-		gr_def(ctx, offset + 0x2d0, 0x00000070);
+-		gr_def(ctx, offset + 0x2d4, 0x00000080);
+-	} else {
+-		gr_def(ctx, offset + 0x2b8, 0x00000001);
+-		gr_def(ctx, offset + 0x2bc, 0x000000f0);
+-		gr_def(ctx, offset + 0x2c0, 0x000000ff);
+-		gr_def(ctx, offset + 0x2cc, 0x00000001);
+-		gr_def(ctx, offset + 0x2d0, 0x000000f0);
+-		gr_def(ctx, offset + 0x2d4, 0x000000ff);
+-		gr_def(ctx, offset + 0x2dc, 0x00000009);
+-		offset += 4;
+-	}
+-	gr_def(ctx, offset + 0x2e4, 0x00000001);
+-	gr_def(ctx, offset + 0x2e8, 0x000000cf);
+-	gr_def(ctx, offset + 0x2f0, 0x00000001);
+-	gr_def(ctx, offset + 0x300, 0x000000cf);
+-	gr_def(ctx, offset + 0x308, 0x00000002);
+-	gr_def(ctx, offset + 0x310, 0x00000001);
+-	gr_def(ctx, offset + 0x318, 0x00000001);
+-	gr_def(ctx, offset + 0x320, 0x000000cf);
+-	gr_def(ctx, offset + 0x324, 0x000000cf);
+-	gr_def(ctx, offset + 0x328, 0x00000001);
+-
+ 	/* 6000? */
+ 	if (dev_priv->chipset == 0x50)
+ 		cp_ctx(ctx, 0x4063e0, 0x1);
+@@ -661,7 +470,7 @@
+ 			gr_def(ctx, 0x406818, 0x00000f80);
+ 		else
+ 			gr_def(ctx, 0x406818, 0x00001f80);
+-		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
++		if (IS_NVA3F(dev_priv->chipset))
+ 			gr_def(ctx, 0x40681c, 0x00000030);
+ 		cp_ctx(ctx, 0x406830, 0x3);
+ 	}
+@@ -706,7 +515,7 @@
+ 
+ 			if (dev_priv->chipset < 0xa0)
+ 				cp_ctx(ctx, 0x407094 + (i<<8), 1);
+-			else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa)
++			else if (!IS_NVA3F(dev_priv->chipset))
+ 				cp_ctx(ctx, 0x407094 + (i<<8), 3);
+ 			else {
+ 				cp_ctx(ctx, 0x407094 + (i<<8), 4);
+@@ -799,6 +608,7 @@
+ 				case 0xa8:
+ 				case 0xaa:
+ 				case 0xac:
++				case 0xaf:
+ 					gr_def(ctx, offset + 0x1c, 0x300c0000);
+ 					break;
+ 				}
+@@ -825,7 +635,7 @@
+ 				gr_def(ctx, base + 0x304, 0x00007070);
+ 			else if (dev_priv->chipset < 0xa0)
+ 				gr_def(ctx, base + 0x304, 0x00027070);
+-			else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa)
++			else if (!IS_NVA3F(dev_priv->chipset))
+ 				gr_def(ctx, base + 0x304, 0x01127070);
+ 			else
+ 				gr_def(ctx, base + 0x304, 0x05127070);
+@@ -849,7 +659,7 @@
+ 			if (dev_priv->chipset < 0xa0) {
+ 				cp_ctx(ctx, base + 0x340, 9);
+ 				offset = base + 0x340;
+-			} else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa) {
++			} else if (!IS_NVA3F(dev_priv->chipset)) {
+ 				cp_ctx(ctx, base + 0x33c, 0xb);
+ 				offset = base + 0x344;
+ 			} else {
+@@ -880,7 +690,7 @@
+ 			gr_def(ctx, offset + 0x0, 0x000001f0);
+ 			gr_def(ctx, offset + 0x4, 0x00000001);
+ 			gr_def(ctx, offset + 0x8, 0x00000003);
+-			if (dev_priv->chipset == 0x50 || dev_priv->chipset >= 0xaa)
++			if (dev_priv->chipset == 0x50 || IS_NVAAF(dev_priv->chipset))
+ 				gr_def(ctx, offset + 0xc, 0x00008000);
+ 			gr_def(ctx, offset + 0x14, 0x00039e00);
+ 			cp_ctx(ctx, offset + 0x1c, 2);
+@@ -892,7 +702,7 @@
+ 
+ 			if (dev_priv->chipset >= 0xa0) {
+ 				cp_ctx(ctx, base + 0x54c, 2);
+-				if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa)
++				if (!IS_NVA3F(dev_priv->chipset))
+ 					gr_def(ctx, base + 0x54c, 0x003fe006);
+ 				else
+ 					gr_def(ctx, base + 0x54c, 0x003fe007);
+@@ -948,6 +758,336 @@
+ 	}
+ }
+ 
++static void
++dd_emit(struct nouveau_grctx *ctx, int num, uint32_t val) {
++	int i;
++	if (val && ctx->mode == NOUVEAU_GRCTX_VALS)
++		for (i = 0; i < num; i++)
++			nv_wo32(ctx->data, 4 * (ctx->ctxvals_pos + i), val);
++	ctx->ctxvals_pos += num;
++}
++
++static void
++nv50_graph_construct_mmio_ddata(struct nouveau_grctx *ctx)
++{
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	int base, num;
++	base = ctx->ctxvals_pos;
++
++	/* tesla state */
++	dd_emit(ctx, 1, 0);	/* 00000001 UNK0F90 */
++	dd_emit(ctx, 1, 0);	/* 00000001 UNK135C */
++
++	/* SRC_TIC state */
++	dd_emit(ctx, 1, 0);	/* 00000007 SRC_TILE_MODE_Z */
++	dd_emit(ctx, 1, 2);	/* 00000007 SRC_TILE_MODE_Y */
++	dd_emit(ctx, 1, 1);	/* 00000001 SRC_LINEAR #1 */
++	dd_emit(ctx, 1, 0);	/* 000000ff SRC_ADDRESS_HIGH */
++	dd_emit(ctx, 1, 0);	/* 00000001 SRC_SRGB */
++	if (dev_priv->chipset >= 0x94)
++		dd_emit(ctx, 1, 0);	/* 00000003 eng2d UNK0258 */
++	dd_emit(ctx, 1, 1);	/* 00000fff SRC_DEPTH */
++	dd_emit(ctx, 1, 0x100);	/* 0000ffff SRC_HEIGHT */
++
++	/* turing state */
++	dd_emit(ctx, 1, 0);		/* 0000000f TEXTURES_LOG2 */
++	dd_emit(ctx, 1, 0);		/* 0000000f SAMPLERS_LOG2 */
++	dd_emit(ctx, 1, 0);		/* 000000ff CB_DEF_ADDRESS_HIGH */
++	dd_emit(ctx, 1, 0);		/* ffffffff CB_DEF_ADDRESS_LOW */
++	dd_emit(ctx, 1, 0);		/* ffffffff SHARED_SIZE */
++	dd_emit(ctx, 1, 2);		/* ffffffff REG_MODE */
++	dd_emit(ctx, 1, 1);		/* 0000ffff BLOCK_ALLOC_THREADS */
++	dd_emit(ctx, 1, 1);		/* 00000001 LANES32 */
++	dd_emit(ctx, 1, 0);		/* 000000ff UNK370 */
++	dd_emit(ctx, 1, 0);		/* 000000ff USER_PARAM_UNK */
++	dd_emit(ctx, 1, 0);		/* 000000ff USER_PARAM_COUNT */
++	dd_emit(ctx, 1, 1);		/* 000000ff UNK384 bits 8-15 */
++	dd_emit(ctx, 1, 0x3fffff);	/* 003fffff TIC_LIMIT */
++	dd_emit(ctx, 1, 0x1fff);	/* 000fffff TSC_LIMIT */
++	dd_emit(ctx, 1, 0);		/* 0000ffff CB_ADDR_INDEX */
++	dd_emit(ctx, 1, 1);		/* 000007ff BLOCKDIM_X */
++	dd_emit(ctx, 1, 1);		/* 000007ff BLOCKDIM_XMY */
++	dd_emit(ctx, 1, 0);		/* 00000001 BLOCKDIM_XMY_OVERFLOW */
++	dd_emit(ctx, 1, 1);		/* 0003ffff BLOCKDIM_XMYMZ */
++	dd_emit(ctx, 1, 1);		/* 000007ff BLOCKDIM_Y */
++	dd_emit(ctx, 1, 1);		/* 0000007f BLOCKDIM_Z */
++	dd_emit(ctx, 1, 4);		/* 000000ff CP_REG_ALLOC_TEMP */
++	dd_emit(ctx, 1, 1);		/* 00000001 BLOCKDIM_DIRTY */
++	if (IS_NVA3F(dev_priv->chipset))
++		dd_emit(ctx, 1, 0);	/* 00000003 UNK03E8 */
++	dd_emit(ctx, 1, 1);		/* 0000007f BLOCK_ALLOC_HALFWARPS */
++	dd_emit(ctx, 1, 1);		/* 00000007 LOCAL_WARPS_NO_CLAMP */
++	dd_emit(ctx, 1, 7);		/* 00000007 LOCAL_WARPS_LOG_ALLOC */
++	dd_emit(ctx, 1, 1);		/* 00000007 STACK_WARPS_NO_CLAMP */
++	dd_emit(ctx, 1, 7);		/* 00000007 STACK_WARPS_LOG_ALLOC */
++	dd_emit(ctx, 1, 1);		/* 00001fff BLOCK_ALLOC_REGSLOTS_PACKED */
++	dd_emit(ctx, 1, 1);		/* 00001fff BLOCK_ALLOC_REGSLOTS_STRIDED */
++	dd_emit(ctx, 1, 1);		/* 000007ff BLOCK_ALLOC_THREADS */
++
++	/* compat 2d state */
++	if (dev_priv->chipset == 0x50) {
++		dd_emit(ctx, 4, 0);		/* 0000ffff clip X, Y, W, H */
++
++		dd_emit(ctx, 1, 1);		/* ffffffff chroma COLOR_FORMAT */
++
++		dd_emit(ctx, 1, 1);		/* ffffffff pattern COLOR_FORMAT */
++		dd_emit(ctx, 1, 0);		/* ffffffff pattern SHAPE */
++		dd_emit(ctx, 1, 1);		/* ffffffff pattern PATTERN_SELECT */
++
++		dd_emit(ctx, 1, 0xa);		/* ffffffff surf2d SRC_FORMAT */
++		dd_emit(ctx, 1, 0);		/* ffffffff surf2d DMA_SRC */
++		dd_emit(ctx, 1, 0);		/* 000000ff surf2d SRC_ADDRESS_HIGH */
++		dd_emit(ctx, 1, 0);		/* ffffffff surf2d SRC_ADDRESS_LOW */
++		dd_emit(ctx, 1, 0x40);		/* 0000ffff surf2d SRC_PITCH */
++		dd_emit(ctx, 1, 0);		/* 0000000f surf2d SRC_TILE_MODE_Z */
++		dd_emit(ctx, 1, 2);		/* 0000000f surf2d SRC_TILE_MODE_Y */
++		dd_emit(ctx, 1, 0x100);		/* ffffffff surf2d SRC_HEIGHT */
++		dd_emit(ctx, 1, 1);		/* 00000001 surf2d SRC_LINEAR */
++		dd_emit(ctx, 1, 0x100);		/* ffffffff surf2d SRC_WIDTH */
++
++		dd_emit(ctx, 1, 0);		/* 0000ffff gdirect CLIP_B_X */
++		dd_emit(ctx, 1, 0);		/* 0000ffff gdirect CLIP_B_Y */
++		dd_emit(ctx, 1, 0);		/* 0000ffff gdirect CLIP_C_X */
++		dd_emit(ctx, 1, 0);		/* 0000ffff gdirect CLIP_C_Y */
++		dd_emit(ctx, 1, 0);		/* 0000ffff gdirect CLIP_D_X */
++		dd_emit(ctx, 1, 0);		/* 0000ffff gdirect CLIP_D_Y */
++		dd_emit(ctx, 1, 1);		/* ffffffff gdirect COLOR_FORMAT */
++		dd_emit(ctx, 1, 0);		/* ffffffff gdirect OPERATION */
++		dd_emit(ctx, 1, 0);		/* 0000ffff gdirect POINT_X */
++		dd_emit(ctx, 1, 0);		/* 0000ffff gdirect POINT_Y */
++
++		dd_emit(ctx, 1, 0);		/* 0000ffff blit SRC_Y */
++		dd_emit(ctx, 1, 0);		/* ffffffff blit OPERATION */
++
++		dd_emit(ctx, 1, 0);		/* ffffffff ifc OPERATION */
++
++		dd_emit(ctx, 1, 0);		/* ffffffff iifc INDEX_FORMAT */
++		dd_emit(ctx, 1, 0);		/* ffffffff iifc LUT_OFFSET */
++		dd_emit(ctx, 1, 4);		/* ffffffff iifc COLOR_FORMAT */
++		dd_emit(ctx, 1, 0);		/* ffffffff iifc OPERATION */
++	}
++
++	/* m2mf state */
++	dd_emit(ctx, 1, 0);		/* ffffffff m2mf LINE_COUNT */
++	dd_emit(ctx, 1, 0);		/* ffffffff m2mf LINE_LENGTH_IN */
++	dd_emit(ctx, 2, 0);		/* ffffffff m2mf OFFSET_IN, OFFSET_OUT */
++	dd_emit(ctx, 1, 1);		/* ffffffff m2mf TILING_DEPTH_OUT */
++	dd_emit(ctx, 1, 0x100);		/* ffffffff m2mf TILING_HEIGHT_OUT */
++	dd_emit(ctx, 1, 0);		/* ffffffff m2mf TILING_POSITION_OUT_Z */
++	dd_emit(ctx, 1, 1);		/* 00000001 m2mf LINEAR_OUT */
++	dd_emit(ctx, 2, 0);		/* 0000ffff m2mf TILING_POSITION_OUT_X, Y */
++	dd_emit(ctx, 1, 0x100);		/* ffffffff m2mf TILING_PITCH_OUT */
++	dd_emit(ctx, 1, 1);		/* ffffffff m2mf TILING_DEPTH_IN */
++	dd_emit(ctx, 1, 0x100);		/* ffffffff m2mf TILING_HEIGHT_IN */
++	dd_emit(ctx, 1, 0);		/* ffffffff m2mf TILING_POSITION_IN_Z */
++	dd_emit(ctx, 1, 1);		/* 00000001 m2mf LINEAR_IN */
++	dd_emit(ctx, 2, 0);		/* 0000ffff m2mf TILING_POSITION_IN_X, Y */
++	dd_emit(ctx, 1, 0x100);		/* ffffffff m2mf TILING_PITCH_IN */
++
++	/* more compat 2d state */
++	if (dev_priv->chipset == 0x50) {
++		dd_emit(ctx, 1, 1);		/* ffffffff line COLOR_FORMAT */
++		dd_emit(ctx, 1, 0);		/* ffffffff line OPERATION */
++
++		dd_emit(ctx, 1, 1);		/* ffffffff triangle COLOR_FORMAT */
++		dd_emit(ctx, 1, 0);		/* ffffffff triangle OPERATION */
++
++		dd_emit(ctx, 1, 0);		/* 0000000f sifm TILE_MODE_Z */
++		dd_emit(ctx, 1, 2);		/* 0000000f sifm TILE_MODE_Y */
++		dd_emit(ctx, 1, 0);		/* 000000ff sifm FORMAT_FILTER */
++		dd_emit(ctx, 1, 1);		/* 000000ff sifm FORMAT_ORIGIN */
++		dd_emit(ctx, 1, 0);		/* 0000ffff sifm SRC_PITCH */
++		dd_emit(ctx, 1, 1);		/* 00000001 sifm SRC_LINEAR */
++		dd_emit(ctx, 1, 0);		/* 000000ff sifm SRC_OFFSET_HIGH */
++		dd_emit(ctx, 1, 0);		/* ffffffff sifm SRC_OFFSET */
++		dd_emit(ctx, 1, 0);		/* 0000ffff sifm SRC_HEIGHT */
++		dd_emit(ctx, 1, 0);		/* 0000ffff sifm SRC_WIDTH */
++		dd_emit(ctx, 1, 3);		/* ffffffff sifm COLOR_FORMAT */
++		dd_emit(ctx, 1, 0);		/* ffffffff sifm OPERATION */
++
++		dd_emit(ctx, 1, 0);		/* ffffffff sifc OPERATION */
++	}
++
++	/* tesla state */
++	dd_emit(ctx, 1, 0);		/* 0000000f GP_TEXTURES_LOG2 */
++	dd_emit(ctx, 1, 0);		/* 0000000f GP_SAMPLERS_LOG2 */
++	dd_emit(ctx, 1, 0);		/* 000000ff */
++	dd_emit(ctx, 1, 0);		/* ffffffff */
++	dd_emit(ctx, 1, 4);		/* 000000ff UNK12B0_0 */
++	dd_emit(ctx, 1, 0x70);		/* 000000ff UNK12B0_1 */
++	dd_emit(ctx, 1, 0x80);		/* 000000ff UNK12B0_3 */
++	dd_emit(ctx, 1, 0);		/* 000000ff UNK12B0_2 */
++	dd_emit(ctx, 1, 0);		/* 0000000f FP_TEXTURES_LOG2 */
++	dd_emit(ctx, 1, 0);		/* 0000000f FP_SAMPLERS_LOG2 */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		dd_emit(ctx, 1, 0);	/* ffffffff */
++		dd_emit(ctx, 1, 0);	/* 0000007f MULTISAMPLE_SAMPLES_LOG2 */
++	} else {
++		dd_emit(ctx, 1, 0);	/* 0000000f MULTISAMPLE_SAMPLES_LOG2 */
++	} 
++	dd_emit(ctx, 1, 0xc);		/* 000000ff SEMANTIC_COLOR.BFC0_ID */
++	if (dev_priv->chipset != 0x50)
++		dd_emit(ctx, 1, 0);	/* 00000001 SEMANTIC_COLOR.CLMP_EN */
++	dd_emit(ctx, 1, 8);		/* 000000ff SEMANTIC_COLOR.COLR_NR */
++	dd_emit(ctx, 1, 0x14);		/* 000000ff SEMANTIC_COLOR.FFC0_ID */
++	if (dev_priv->chipset == 0x50) {
++		dd_emit(ctx, 1, 0);	/* 000000ff SEMANTIC_LAYER */
++		dd_emit(ctx, 1, 0);	/* 00000001 */
++	} else {
++		dd_emit(ctx, 1, 0);	/* 00000001 SEMANTIC_PTSZ.ENABLE */
++		dd_emit(ctx, 1, 0x29);	/* 000000ff SEMANTIC_PTSZ.PTSZ_ID */
++		dd_emit(ctx, 1, 0x27);	/* 000000ff SEMANTIC_PRIM */
++		dd_emit(ctx, 1, 0x26);	/* 000000ff SEMANTIC_LAYER */
++		dd_emit(ctx, 1, 8);	/* 0000000f SMENATIC_CLIP.CLIP_HIGH */
++		dd_emit(ctx, 1, 4);	/* 000000ff SEMANTIC_CLIP.CLIP_LO */
++		dd_emit(ctx, 1, 0x27);	/* 000000ff UNK0FD4 */
++		dd_emit(ctx, 1, 0);	/* 00000001 UNK1900 */
++	}
++	dd_emit(ctx, 1, 0);		/* 00000007 RT_CONTROL_MAP0 */
++	dd_emit(ctx, 1, 1);		/* 00000007 RT_CONTROL_MAP1 */
++	dd_emit(ctx, 1, 2);		/* 00000007 RT_CONTROL_MAP2 */
++	dd_emit(ctx, 1, 3);		/* 00000007 RT_CONTROL_MAP3 */
++	dd_emit(ctx, 1, 4);		/* 00000007 RT_CONTROL_MAP4 */
++	dd_emit(ctx, 1, 5);		/* 00000007 RT_CONTROL_MAP5 */
++	dd_emit(ctx, 1, 6);		/* 00000007 RT_CONTROL_MAP6 */
++	dd_emit(ctx, 1, 7);		/* 00000007 RT_CONTROL_MAP7 */
++	dd_emit(ctx, 1, 1);		/* 0000000f RT_CONTROL_COUNT */
++	dd_emit(ctx, 8, 0);		/* 00000001 RT_HORIZ_UNK */
++	dd_emit(ctx, 8, 0);		/* ffffffff RT_ADDRESS_LOW */
++	dd_emit(ctx, 1, 0xcf);		/* 000000ff RT_FORMAT */
++	dd_emit(ctx, 7, 0);		/* 000000ff RT_FORMAT */
++	if (dev_priv->chipset != 0x50)
++		dd_emit(ctx, 3, 0);	/* 1, 1, 1 */
++	else
++		dd_emit(ctx, 2, 0);	/* 1, 1 */
++	dd_emit(ctx, 1, 0);		/* ffffffff GP_ENABLE */
++	dd_emit(ctx, 1, 0x80);		/* 0000ffff GP_VERTEX_OUTPUT_COUNT*/
++	dd_emit(ctx, 1, 4);		/* 000000ff GP_REG_ALLOC_RESULT */
++	dd_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		dd_emit(ctx, 1, 3);	/* 00000003 */
++		dd_emit(ctx, 1, 0);	/* 00000001 UNK1418. Alone. */
++	}
++	if (dev_priv->chipset != 0x50)
++		dd_emit(ctx, 1, 3);	/* 00000003 UNK15AC */
++	dd_emit(ctx, 1, 1);		/* ffffffff RASTERIZE_ENABLE */
++	dd_emit(ctx, 1, 0);		/* 00000001 FP_CONTROL.EXPORTS_Z */
++	if (dev_priv->chipset != 0x50)
++		dd_emit(ctx, 1, 0);	/* 00000001 FP_CONTROL.MULTIPLE_RESULTS */
++	dd_emit(ctx, 1, 0x12);		/* 000000ff FP_INTERPOLANT_CTRL.COUNT */
++	dd_emit(ctx, 1, 0x10);		/* 000000ff FP_INTERPOLANT_CTRL.COUNT_NONFLAT */
++	dd_emit(ctx, 1, 0xc);		/* 000000ff FP_INTERPOLANT_CTRL.OFFSET */
++	dd_emit(ctx, 1, 1);		/* 00000001 FP_INTERPOLANT_CTRL.UMASK.W */
++	dd_emit(ctx, 1, 0);		/* 00000001 FP_INTERPOLANT_CTRL.UMASK.X */
++	dd_emit(ctx, 1, 0);		/* 00000001 FP_INTERPOLANT_CTRL.UMASK.Y */
++	dd_emit(ctx, 1, 0);		/* 00000001 FP_INTERPOLANT_CTRL.UMASK.Z */
++	dd_emit(ctx, 1, 4);		/* 000000ff FP_RESULT_COUNT */
++	dd_emit(ctx, 1, 2);		/* ffffffff REG_MODE */
++	dd_emit(ctx, 1, 4);		/* 000000ff FP_REG_ALLOC_TEMP */
++	if (dev_priv->chipset >= 0xa0)
++		dd_emit(ctx, 1, 0);	/* ffffffff */
++	dd_emit(ctx, 1, 0);		/* 00000001 GP_BUILTIN_RESULT_EN.LAYER_IDX */
++	dd_emit(ctx, 1, 0);		/* ffffffff STRMOUT_ENABLE */
++	dd_emit(ctx, 1, 0x3fffff);	/* 003fffff TIC_LIMIT */
++	dd_emit(ctx, 1, 0x1fff);	/* 000fffff TSC_LIMIT */
++	dd_emit(ctx, 1, 0);		/* 00000001 VERTEX_TWO_SIDE_ENABLE*/
++	if (dev_priv->chipset != 0x50)
++		dd_emit(ctx, 8, 0);	/* 00000001 */
++	if (dev_priv->chipset >= 0xa0) {
++		dd_emit(ctx, 1, 1);	/* 00000007 VTX_ATTR_DEFINE.COMP */
++		dd_emit(ctx, 1, 1);	/* 00000007 VTX_ATTR_DEFINE.SIZE */
++		dd_emit(ctx, 1, 2);	/* 00000007 VTX_ATTR_DEFINE.TYPE */
++		dd_emit(ctx, 1, 0);	/* 000000ff VTX_ATTR_DEFINE.ATTR */
++	}
++	dd_emit(ctx, 1, 4);		/* 0000007f VP_RESULT_MAP_SIZE */
++	dd_emit(ctx, 1, 0x14);		/* 0000001f ZETA_FORMAT */
++	dd_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	dd_emit(ctx, 1, 0);		/* 0000000f VP_TEXTURES_LOG2 */
++	dd_emit(ctx, 1, 0);		/* 0000000f VP_SAMPLERS_LOG2 */
++	if (IS_NVA3F(dev_priv->chipset))
++		dd_emit(ctx, 1, 0);	/* 00000001 */
++	dd_emit(ctx, 1, 2);		/* 00000003 POLYGON_MODE_BACK */
++	if (dev_priv->chipset >= 0xa0)
++		dd_emit(ctx, 1, 0);	/* 00000003 VTX_ATTR_DEFINE.SIZE - 1 */
++	dd_emit(ctx, 1, 0);		/* 0000ffff CB_ADDR_INDEX */
++	if (dev_priv->chipset >= 0xa0)
++		dd_emit(ctx, 1, 0);	/* 00000003 */
++	dd_emit(ctx, 1, 0);		/* 00000001 CULL_FACE_ENABLE */
++	dd_emit(ctx, 1, 1);		/* 00000003 CULL_FACE */
++	dd_emit(ctx, 1, 0);		/* 00000001 FRONT_FACE */
++	dd_emit(ctx, 1, 2);		/* 00000003 POLYGON_MODE_FRONT */
++	dd_emit(ctx, 1, 0x1000);	/* 00007fff UNK141C */
++	if (dev_priv->chipset != 0x50) {
++		dd_emit(ctx, 1, 0xe00);		/* 7fff */
++		dd_emit(ctx, 1, 0x1000);	/* 7fff */
++		dd_emit(ctx, 1, 0x1e00);	/* 7fff */
++	}
++	dd_emit(ctx, 1, 0);		/* 00000001 BEGIN_END_ACTIVE */
++	dd_emit(ctx, 1, 1);		/* 00000001 POLYGON_MODE_??? */
++	dd_emit(ctx, 1, 1);		/* 000000ff GP_REG_ALLOC_TEMP / 4 rounded up */
++	dd_emit(ctx, 1, 1);		/* 000000ff FP_REG_ALLOC_TEMP... without /4? */
++	dd_emit(ctx, 1, 1);		/* 000000ff VP_REG_ALLOC_TEMP / 4 rounded up */
++	dd_emit(ctx, 1, 1);		/* 00000001 */
++	dd_emit(ctx, 1, 0);		/* 00000001 */
++	dd_emit(ctx, 1, 0);		/* 00000001 VTX_ATTR_MASK_UNK0 nonempty */
++	dd_emit(ctx, 1, 0);		/* 00000001 VTX_ATTR_MASK_UNK1 nonempty */
++	dd_emit(ctx, 1, 0x200);		/* 0003ffff GP_VERTEX_OUTPUT_COUNT*GP_REG_ALLOC_RESULT */
++	if (IS_NVA3F(dev_priv->chipset))
++		dd_emit(ctx, 1, 0x200);
++	dd_emit(ctx, 1, 0);		/* 00000001 */
++	if (dev_priv->chipset < 0xa0) {
++		dd_emit(ctx, 1, 1);	/* 00000001 */
++		dd_emit(ctx, 1, 0x70);	/* 000000ff */
++		dd_emit(ctx, 1, 0x80);	/* 000000ff */
++		dd_emit(ctx, 1, 0);	/* 000000ff */
++		dd_emit(ctx, 1, 0);	/* 00000001 */
++		dd_emit(ctx, 1, 1);	/* 00000001 */
++		dd_emit(ctx, 1, 0x70);	/* 000000ff */
++		dd_emit(ctx, 1, 0x80);	/* 000000ff */
++		dd_emit(ctx, 1, 0);	/* 000000ff */
++	} else {
++		dd_emit(ctx, 1, 1);	/* 00000001 */
++		dd_emit(ctx, 1, 0xf0);	/* 000000ff */
++		dd_emit(ctx, 1, 0xff);	/* 000000ff */
++		dd_emit(ctx, 1, 0);	/* 000000ff */
++		dd_emit(ctx, 1, 0);	/* 00000001 */
++		dd_emit(ctx, 1, 1);	/* 00000001 */
++		dd_emit(ctx, 1, 0xf0);	/* 000000ff */
++		dd_emit(ctx, 1, 0xff);	/* 000000ff */
++		dd_emit(ctx, 1, 0);	/* 000000ff */
++		dd_emit(ctx, 1, 9);	/* 0000003f UNK114C.COMP,SIZE */
++	}
++
++	/* eng2d state */
++	dd_emit(ctx, 1, 0);		/* 00000001 eng2d COLOR_KEY_ENABLE */
++	dd_emit(ctx, 1, 0);		/* 00000007 eng2d COLOR_KEY_FORMAT */
++	dd_emit(ctx, 1, 1);		/* ffffffff eng2d DST_DEPTH */
++	dd_emit(ctx, 1, 0xcf);		/* 000000ff eng2d DST_FORMAT */
++	dd_emit(ctx, 1, 0);		/* ffffffff eng2d DST_LAYER */
++	dd_emit(ctx, 1, 1);		/* 00000001 eng2d DST_LINEAR */
++	dd_emit(ctx, 1, 0);		/* 00000007 eng2d PATTERN_COLOR_FORMAT */
++	dd_emit(ctx, 1, 0);		/* 00000007 eng2d OPERATION */
++	dd_emit(ctx, 1, 0);		/* 00000003 eng2d PATTERN_SELECT */
++	dd_emit(ctx, 1, 0xcf);		/* 000000ff eng2d SIFC_FORMAT */
++	dd_emit(ctx, 1, 0);		/* 00000001 eng2d SIFC_BITMAP_ENABLE */
++	dd_emit(ctx, 1, 2);		/* 00000003 eng2d SIFC_BITMAP_UNK808 */
++	dd_emit(ctx, 1, 0);		/* ffffffff eng2d BLIT_DU_DX_FRACT */
++	dd_emit(ctx, 1, 1);		/* ffffffff eng2d BLIT_DU_DX_INT */
++	dd_emit(ctx, 1, 0);		/* ffffffff eng2d BLIT_DV_DY_FRACT */
++	dd_emit(ctx, 1, 1);		/* ffffffff eng2d BLIT_DV_DY_INT */
++	dd_emit(ctx, 1, 0);		/* 00000001 eng2d BLIT_CONTROL_FILTER */
++	dd_emit(ctx, 1, 0xcf);		/* 000000ff eng2d DRAW_COLOR_FORMAT */
++	dd_emit(ctx, 1, 0xcf);		/* 000000ff eng2d SRC_FORMAT */
++	dd_emit(ctx, 1, 1);		/* 00000001 eng2d SRC_LINEAR #2 */
++
++	num = ctx->ctxvals_pos - base;
++	ctx->ctxvals_pos = base;
++	if (IS_NVA3F(dev_priv->chipset))
++		cp_ctx(ctx, 0x404800, num);
++	else
++		cp_ctx(ctx, 0x405400, num);
++}
++
+ /*
+  * xfer areas. These are a pain.
+  *
+@@ -990,28 +1130,33 @@
+  * without the help of ctxprog.
+  */
+ 
+-static inline void
++static void
+ xf_emit(struct nouveau_grctx *ctx, int num, uint32_t val) {
+ 	int i;
+ 	if (val && ctx->mode == NOUVEAU_GRCTX_VALS)
+ 		for (i = 0; i < num; i++)
+-			nv_wo32(ctx->dev, ctx->data, ctx->ctxvals_pos + (i << 3), val);
++			nv_wo32(ctx->data, 4 * (ctx->ctxvals_pos + (i << 3)), val);
+ 	ctx->ctxvals_pos += num << 3;
+ }
+ 
+ /* Gene declarations... */
+ 
++static void nv50_graph_construct_gene_dispatch(struct nouveau_grctx *ctx);
+ static void nv50_graph_construct_gene_m2mf(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk1(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk2(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk3(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk4(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk5(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk6(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk7(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk8(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk9(struct nouveau_grctx *ctx);
+-static void nv50_graph_construct_gene_unk10(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_ccache(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_unk10xx(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_unk14xx(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_zcull(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_clipid(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_unk24xx(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_vfetch(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_eng2d(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_csched(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_unk1cxx(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_strmout(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_unk34xx(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_ropm1(struct nouveau_grctx *ctx);
++static void nv50_graph_construct_gene_ropm2(struct nouveau_grctx *ctx);
+ static void nv50_graph_construct_gene_ropc(struct nouveau_grctx *ctx);
+ static void nv50_graph_construct_xfer_tp(struct nouveau_grctx *ctx);
+ 
+@@ -1030,102 +1175,32 @@
+ 	if (dev_priv->chipset < 0xa0) {
+ 		/* Strand 0 */
+ 		ctx->ctxvals_pos = offset;
+-		switch (dev_priv->chipset) {
+-		case 0x50:
+-			xf_emit(ctx, 0x99, 0);
+-			break;
+-		case 0x84:
+-		case 0x86:
+-			xf_emit(ctx, 0x384, 0);
+-			break;
+-		case 0x92:
+-		case 0x94:
+-		case 0x96:
+-		case 0x98:
+-			xf_emit(ctx, 0x380, 0);
+-			break;
+-		}
+-		nv50_graph_construct_gene_m2mf (ctx);
+-		switch (dev_priv->chipset) {
+-		case 0x50:
+-		case 0x84:
+-		case 0x86:
+-		case 0x98:
+-			xf_emit(ctx, 0x4c4, 0);
+-			break;
+-		case 0x92:
+-		case 0x94:
+-		case 0x96:
+-			xf_emit(ctx, 0x984, 0);
+-			break;
+-		}
+-		nv50_graph_construct_gene_unk5(ctx);
+-		if (dev_priv->chipset == 0x50)
+-			xf_emit(ctx, 0xa, 0);
+-		else
+-			xf_emit(ctx, 0xb, 0);
+-		nv50_graph_construct_gene_unk4(ctx);
+-		nv50_graph_construct_gene_unk3(ctx);
++		nv50_graph_construct_gene_dispatch(ctx);
++		nv50_graph_construct_gene_m2mf(ctx);
++		nv50_graph_construct_gene_unk24xx(ctx);
++		nv50_graph_construct_gene_clipid(ctx);
++		nv50_graph_construct_gene_zcull(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+ 		/* Strand 1 */
+ 		ctx->ctxvals_pos = offset + 0x1;
+-		nv50_graph_construct_gene_unk6(ctx);
+-		nv50_graph_construct_gene_unk7(ctx);
+-		nv50_graph_construct_gene_unk8(ctx);
+-		switch (dev_priv->chipset) {
+-		case 0x50:
+-		case 0x92:
+-			xf_emit(ctx, 0xfb, 0);
+-			break;
+-		case 0x84:
+-			xf_emit(ctx, 0xd3, 0);
+-			break;
+-		case 0x94:
+-		case 0x96:
+-			xf_emit(ctx, 0xab, 0);
+-			break;
+-		case 0x86:
+-		case 0x98:
+-			xf_emit(ctx, 0x6b, 0);
+-			break;
+-		}
+-		xf_emit(ctx, 2, 0x4e3bfdf);
+-		xf_emit(ctx, 4, 0);
+-		xf_emit(ctx, 1, 0x0fac6881);
+-		xf_emit(ctx, 0xb, 0);
+-		xf_emit(ctx, 2, 0x4e3bfdf);
++		nv50_graph_construct_gene_vfetch(ctx);
++		nv50_graph_construct_gene_eng2d(ctx);
++		nv50_graph_construct_gene_csched(ctx);
++		nv50_graph_construct_gene_ropm1(ctx);
++		nv50_graph_construct_gene_ropm2(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+ 		/* Strand 2 */
+ 		ctx->ctxvals_pos = offset + 0x2;
+-		switch (dev_priv->chipset) {
+-		case 0x50:
+-		case 0x92:
+-			xf_emit(ctx, 0xa80, 0);
+-			break;
+-		case 0x84:
+-			xf_emit(ctx, 0xa7e, 0);
+-			break;
+-		case 0x94:
+-		case 0x96:
+-			xf_emit(ctx, 0xa7c, 0);
+-			break;
+-		case 0x86:
+-		case 0x98:
+-			xf_emit(ctx, 0xa7a, 0);
+-			break;
+-		}
+-		xf_emit(ctx, 1, 0x3fffff);
+-		xf_emit(ctx, 2, 0);
+-		xf_emit(ctx, 1, 0x1fff);
+-		xf_emit(ctx, 0xe, 0);
+-		nv50_graph_construct_gene_unk9(ctx);
+-		nv50_graph_construct_gene_unk2(ctx);
+-		nv50_graph_construct_gene_unk1(ctx);
+-		nv50_graph_construct_gene_unk10(ctx);
++		nv50_graph_construct_gene_ccache(ctx);
++		nv50_graph_construct_gene_unk1cxx(ctx);
++		nv50_graph_construct_gene_strmout(ctx);
++		nv50_graph_construct_gene_unk14xx(ctx);
++		nv50_graph_construct_gene_unk10xx(ctx);
++		nv50_graph_construct_gene_unk34xx(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+@@ -1150,86 +1225,46 @@
+ 	} else {
+ 		/* Strand 0 */
+ 		ctx->ctxvals_pos = offset;
+-		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-			xf_emit(ctx, 0x385, 0);
+-		else
+-			xf_emit(ctx, 0x384, 0);
++		nv50_graph_construct_gene_dispatch(ctx);
+ 		nv50_graph_construct_gene_m2mf(ctx);
+-		xf_emit(ctx, 0x950, 0);
+-		nv50_graph_construct_gene_unk10(ctx);
+-		xf_emit(ctx, 1, 0x0fac6881);
+-		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-			xf_emit(ctx, 1, 1);
+-			xf_emit(ctx, 3, 0);
+-		}
+-		nv50_graph_construct_gene_unk8(ctx);
+-		if (dev_priv->chipset == 0xa0)
+-			xf_emit(ctx, 0x189, 0);
+-		else if (dev_priv->chipset == 0xa3)
+-			xf_emit(ctx, 0xd5, 0);
+-		else if (dev_priv->chipset == 0xa5)
+-			xf_emit(ctx, 0x99, 0);
+-		else if (dev_priv->chipset == 0xaa)
+-			xf_emit(ctx, 0x65, 0);
+-		else
+-			xf_emit(ctx, 0x6d, 0);
+-		nv50_graph_construct_gene_unk9(ctx);
++		nv50_graph_construct_gene_unk34xx(ctx);
++		nv50_graph_construct_gene_csched(ctx);
++		nv50_graph_construct_gene_unk1cxx(ctx);
++		nv50_graph_construct_gene_strmout(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+ 		/* Strand 1 */
+ 		ctx->ctxvals_pos = offset + 1;
+-		nv50_graph_construct_gene_unk1(ctx);
++		nv50_graph_construct_gene_unk10xx(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+ 		/* Strand 2 */
+ 		ctx->ctxvals_pos = offset + 2;
+-		if (dev_priv->chipset == 0xa0) {
+-			nv50_graph_construct_gene_unk2(ctx);
+-		}
+-		xf_emit(ctx, 0x36, 0);
+-		nv50_graph_construct_gene_unk5(ctx);
++		if (dev_priv->chipset == 0xa0)
++			nv50_graph_construct_gene_unk14xx(ctx);
++		nv50_graph_construct_gene_unk24xx(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+ 		/* Strand 3 */
+ 		ctx->ctxvals_pos = offset + 3;
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 1, 1);
+-		nv50_graph_construct_gene_unk6(ctx);
++		nv50_graph_construct_gene_vfetch(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+ 		/* Strand 4 */
+ 		ctx->ctxvals_pos = offset + 4;
+-		if (dev_priv->chipset == 0xa0)
+-			xf_emit(ctx, 0xa80, 0);
+-		else if (dev_priv->chipset == 0xa3)
+-			xf_emit(ctx, 0xa7c, 0);
+-		else
+-			xf_emit(ctx, 0xa7a, 0);
+-		xf_emit(ctx, 1, 0x3fffff);
+-		xf_emit(ctx, 2, 0);
+-		xf_emit(ctx, 1, 0x1fff);
++		nv50_graph_construct_gene_ccache(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+ 		/* Strand 5 */
+ 		ctx->ctxvals_pos = offset + 5;
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 1, 0x0fac6881);
+-		xf_emit(ctx, 0xb, 0);
+-		xf_emit(ctx, 2, 0x4e3bfdf);
+-		xf_emit(ctx, 3, 0);
+-		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-			xf_emit(ctx, 1, 0x11);
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 2, 0x4e3bfdf);
+-		xf_emit(ctx, 2, 0);
+-		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-			xf_emit(ctx, 1, 0x11);
+-		xf_emit(ctx, 1, 0);
++		nv50_graph_construct_gene_ropm2(ctx);
++		nv50_graph_construct_gene_ropm1(ctx);
++		/* per-ROP context */
+ 		for (i = 0; i < 8; i++)
+ 			if (units & (1<<(i+16)))
+ 				nv50_graph_construct_gene_ropc(ctx);
+@@ -1238,10 +1273,9 @@
+ 
+ 		/* Strand 6 */
+ 		ctx->ctxvals_pos = offset + 6;
+-		nv50_graph_construct_gene_unk3(ctx);
+-		xf_emit(ctx, 0xb, 0);
+-		nv50_graph_construct_gene_unk4(ctx);
+-		nv50_graph_construct_gene_unk7(ctx);
++		nv50_graph_construct_gene_zcull(ctx);
++		nv50_graph_construct_gene_clipid(ctx);
++		nv50_graph_construct_gene_eng2d(ctx);
+ 		if (units & (1 << 0))
+ 			nv50_graph_construct_xfer_tp(ctx);
+ 		if (units & (1 << 1))
+@@ -1269,7 +1303,7 @@
+ 			if (units & (1 << 9))
+ 				nv50_graph_construct_xfer_tp(ctx);
+ 		} else {
+-			nv50_graph_construct_gene_unk2(ctx);
++			nv50_graph_construct_gene_unk14xx(ctx);
+ 		}
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+@@ -1290,9 +1324,70 @@
+  */
+ 
+ static void
++nv50_graph_construct_gene_dispatch(struct nouveau_grctx *ctx)
++{
++	/* start of strand 0 */
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	/* SEEK */
++	if (dev_priv->chipset == 0x50)
++		xf_emit(ctx, 5, 0);
++	else if (!IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 6, 0);
++	else
++		xf_emit(ctx, 4, 0);
++	/* SEEK */
++	/* the PGRAPH's internal FIFO */
++	if (dev_priv->chipset == 0x50)
++		xf_emit(ctx, 8*3, 0);
++	else
++		xf_emit(ctx, 0x100*3, 0);
++	/* and another bonus slot?!? */
++	xf_emit(ctx, 3, 0);
++	/* and YET ANOTHER bonus slot? */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 3, 0);
++	/* SEEK */
++	/* CTX_SWITCH: caches of gr objects bound to subchannels. 8 values, last used index */
++	xf_emit(ctx, 9, 0);
++	/* SEEK */
++	xf_emit(ctx, 9, 0);
++	/* SEEK */
++	xf_emit(ctx, 9, 0);
++	/* SEEK */
++	xf_emit(ctx, 9, 0);
++	/* SEEK */
++	if (dev_priv->chipset < 0x90)
++		xf_emit(ctx, 4, 0);
++	/* SEEK */
++	xf_emit(ctx, 2, 0);
++	/* SEEK */
++	xf_emit(ctx, 6*2, 0);
++	xf_emit(ctx, 2, 0);
++	/* SEEK */
++	xf_emit(ctx, 2, 0);
++	/* SEEK */
++	xf_emit(ctx, 6*2, 0);
++	xf_emit(ctx, 2, 0);
++	/* SEEK */
++	if (dev_priv->chipset == 0x50)
++		xf_emit(ctx, 0x1c, 0);
++	else if (dev_priv->chipset < 0xa0)
++		xf_emit(ctx, 0x1e, 0);
++	else
++		xf_emit(ctx, 0x22, 0);
++	/* SEEK */
++	xf_emit(ctx, 0x15, 0);
++}
++
++static void
+ nv50_graph_construct_gene_m2mf(struct nouveau_grctx *ctx)
+ {
+-	/* m2mf state */
++	/* Strand 0, right after dispatch */
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	int smallm2mf = 0;
++	if (dev_priv->chipset < 0x92 || dev_priv->chipset == 0x98)
++		smallm2mf = 1;
++	/* SEEK */
+ 	xf_emit (ctx, 1, 0);		/* DMA_NOTIFY instance >> 4 */
+ 	xf_emit (ctx, 1, 0);		/* DMA_BUFFER_IN instance >> 4 */
+ 	xf_emit (ctx, 1, 0);		/* DMA_BUFFER_OUT instance >> 4 */
+@@ -1319,427 +1414,975 @@
+ 	xf_emit (ctx, 1, 0);		/* TILING_POSITION_OUT */
+ 	xf_emit (ctx, 1, 0);		/* OFFSET_IN_HIGH */
+ 	xf_emit (ctx, 1, 0);		/* OFFSET_OUT_HIGH */
++	/* SEEK */
++	if (smallm2mf)
++		xf_emit(ctx, 0x40, 0);	/* 20 * ffffffff, 3ffff */
++	else
++		xf_emit(ctx, 0x100, 0);	/* 80 * ffffffff, 3ffff */
++	xf_emit(ctx, 4, 0);		/* 1f/7f, 0, 1f/7f, 0 [1f for smallm2mf, 7f otherwise] */
++	/* SEEK */
++	if (smallm2mf)
++		xf_emit(ctx, 0x400, 0);	/* ffffffff */
++	else
++		xf_emit(ctx, 0x800, 0);	/* ffffffff */
++	xf_emit(ctx, 4, 0);		/* ff/1ff, 0, 0, 0 [ff for smallm2mf, 1ff otherwise] */
++	/* SEEK */
++	xf_emit(ctx, 0x40, 0);		/* 20 * bits ffffffff, 3ffff */
++	xf_emit(ctx, 0x6, 0);		/* 1f, 0, 1f, 0, 1f, 0 */
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk1(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_ccache(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	/* end of area 2 on pre-NVA0, area 1 on NVAx */
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x80);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0x80c14);
+-	xf_emit(ctx, 1, 0);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 1, 0x3ff);
+-	else
+-		xf_emit(ctx, 1, 0x7ff);
++	xf_emit(ctx, 2, 0);		/* RO */
++	xf_emit(ctx, 0x800, 0);		/* ffffffff */
+ 	switch (dev_priv->chipset) {
+ 	case 0x50:
+-	case 0x86:
+-	case 0x98:
+-	case 0xaa:
+-	case 0xac:
+-		xf_emit(ctx, 0x542, 0);
++	case 0x92:
++	case 0xa0:
++		xf_emit(ctx, 0x2b, 0);
+ 		break;
+ 	case 0x84:
+-	case 0x92:
++		xf_emit(ctx, 0x29, 0);
++		break;
+ 	case 0x94:
+ 	case 0x96:
+-		xf_emit(ctx, 0x942, 0);
+-		break;
+-	case 0xa0:
+ 	case 0xa3:
+-		xf_emit(ctx, 0x2042, 0);
++		xf_emit(ctx, 0x27, 0);
+ 		break;
++	case 0x86:
++	case 0x98:
+ 	case 0xa5:
+ 	case 0xa8:
+-		xf_emit(ctx, 0x842, 0);
++	case 0xaa:
++	case 0xac:
++	case 0xaf:
++		xf_emit(ctx, 0x25, 0);
+ 		break;
+ 	}
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x80);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x27);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x26);
+-	xf_emit(ctx, 3, 0);
++	/* CB bindings, 0x80 of them. first word is address >> 8, second is
++	 * size >> 4 | valid << 24 */
++	xf_emit(ctx, 0x100, 0);		/* ffffffff CB_DEF */
++	xf_emit(ctx, 1, 0);		/* 0000007f CB_ADDR_BUFFER */
++	xf_emit(ctx, 1, 0);		/* 0 */
++	xf_emit(ctx, 0x30, 0);		/* ff SET_PROGRAM_CB */
++	xf_emit(ctx, 1, 0);		/* 3f last SET_PROGRAM_CB */
++	xf_emit(ctx, 4, 0);		/* RO */
++	xf_emit(ctx, 0x100, 0);		/* ffffffff */
++	xf_emit(ctx, 8, 0);		/* 1f, 0, 0, ... */
++	xf_emit(ctx, 8, 0);		/* ffffffff */
++	xf_emit(ctx, 4, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 3 */
++	xf_emit(ctx, 1, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_CODE_CB */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_TIC */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_TSC */
++	xf_emit(ctx, 1, 0);		/* 00000001 LINKED_TSC */
++	xf_emit(ctx, 1, 0);		/* 000000ff TIC_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff TIC_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0x3fffff);	/* 003fffff TIC_LIMIT */
++	xf_emit(ctx, 1, 0);		/* 000000ff TSC_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff TSC_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0x1fff);	/* 000fffff TSC_LIMIT */
++	xf_emit(ctx, 1, 0);		/* 000000ff VP_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff VP_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0);		/* 00ffffff VP_START_ID */
++	xf_emit(ctx, 1, 0);		/* 000000ff CB_DEF_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff CB_DEF_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 000000ff GP_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff GP_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0);		/* 00ffffff GP_START_ID */
++	xf_emit(ctx, 1, 0);		/* 000000ff FP_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff FP_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0);		/* 00ffffff FP_START_ID */
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk10(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_unk10xx(struct nouveau_grctx *ctx)
+ {
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	int i;
+ 	/* end of area 2 on pre-NVA0, area 1 on NVAx */
+-	xf_emit(ctx, 0x10, 0x04000000);
+-	xf_emit(ctx, 0x24, 0);
+-	xf_emit(ctx, 2, 0x04e3bfdf);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x1fe21);
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 4);		/* 0000007f VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0x80);		/* 0000ffff GP_VERTEX_OUTPUT_COUNT */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_REG_ALLOC_RESULT */
++	xf_emit(ctx, 1, 0x80c14);	/* 01ffffff SEMANTIC_COLOR */
++	xf_emit(ctx, 1, 0);		/* 00000001 VERTEX_TWO_SIDE_ENABLE */
++	if (dev_priv->chipset == 0x50)
++		xf_emit(ctx, 1, 0x3ff);
++	else
++		xf_emit(ctx, 1, 0x7ff);	/* 000007ff */
++	xf_emit(ctx, 1, 0);		/* 111/113 */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	for (i = 0; i < 8; i++) {
++		switch (dev_priv->chipset) {
++		case 0x50:
++		case 0x86:
++		case 0x98:
++		case 0xaa:
++		case 0xac:
++			xf_emit(ctx, 0xa0, 0);	/* ffffffff */
++			break;
++		case 0x84:
++		case 0x92:
++		case 0x94:
++		case 0x96:
++			xf_emit(ctx, 0x120, 0);
++			break;
++		case 0xa5:
++		case 0xa8:
++			xf_emit(ctx, 0x100, 0);	/* ffffffff */
++			break;
++		case 0xa0:
++		case 0xa3:
++		case 0xaf:
++			xf_emit(ctx, 0x400, 0);	/* ffffffff */
++			break;
++		}
++		xf_emit(ctx, 4, 0);	/* 3f, 0, 0, 0 */
++		xf_emit(ctx, 4, 0);	/* ffffffff */
++	}
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 4);		/* 0000007f VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0x80);		/* 0000ffff GP_VERTEX_OUTPUT_COUNT */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_REG_ALLOC_TEMP */
++	xf_emit(ctx, 1, 1);		/* 00000001 RASTERIZE_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1900 */
++	xf_emit(ctx, 1, 0x27);		/* 000000ff UNK0FD4 */
++	xf_emit(ctx, 1, 0);		/* 0001ffff GP_BUILTIN_RESULT_EN */
++	xf_emit(ctx, 1, 0x26);		/* 000000ff SEMANTIC_LAYER */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++}
++
++static void
++nv50_graph_construct_gene_unk34xx(struct nouveau_grctx *ctx)
++{
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	/* end of area 2 on pre-NVA0, area 1 on NVAx */
++	xf_emit(ctx, 1, 0);		/* 00000001 VIEWPORT_CLIP_RECTS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000003 VIEWPORT_CLIP_MODE */
++	xf_emit(ctx, 0x10, 0x04000000);	/* 07ffffff VIEWPORT_CLIP_HORIZ*8, VIEWPORT_CLIP_VERT*8 */
++	xf_emit(ctx, 1, 0);		/* 00000001 POLYGON_STIPPLE_ENABLE */
++	xf_emit(ctx, 0x20, 0);		/* ffffffff POLYGON_STIPPLE */
++	xf_emit(ctx, 2, 0);		/* 00007fff WINDOW_OFFSET_XY */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, 0x04e3bfdf);	/* ffffffff UNK0D64 */
++	xf_emit(ctx, 1, 0x04e3bfdf);	/* ffffffff UNK0DF4 */
++	xf_emit(ctx, 1, 0);		/* 00000003 WINDOW_ORIGIN */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	xf_emit(ctx, 1, 0x1fe21);	/* 0001ffff tesla UNK0FAC */
++	if (dev_priv->chipset >= 0xa0)
++		xf_emit(ctx, 1, 0x0fac6881);
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 1, 1);
++		xf_emit(ctx, 3, 0);
++	}
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk2(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_unk14xx(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+ 	/* middle of area 2 on pre-NVA0, beginning of area 2 on NVA0, area 7 on >NVA0 */
+ 	if (dev_priv->chipset != 0x50) {
+-		xf_emit(ctx, 5, 0);
+-		xf_emit(ctx, 1, 0x80c14);
+-		xf_emit(ctx, 2, 0);
+-		xf_emit(ctx, 1, 0x804);
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 2, 4);
+-		xf_emit(ctx, 1, 0x8100c12);
+-	}
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x10);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 3, 0);
+-	else
+-		xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 0x804);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x1a);
++		xf_emit(ctx, 5, 0);		/* ffffffff */
++		xf_emit(ctx, 1, 0x80c14);	/* 01ffffff SEMANTIC_COLOR */
++		xf_emit(ctx, 1, 0);		/* 00000001 */
++		xf_emit(ctx, 1, 0);		/* 000003ff */
++		xf_emit(ctx, 1, 0x804);		/* 00000fff SEMANTIC_CLIP */
++		xf_emit(ctx, 1, 0);		/* 00000001 */
++		xf_emit(ctx, 2, 4);		/* 7f, ff */
++		xf_emit(ctx, 1, 0x8100c12);	/* 1fffffff FP_INTERPOLANT_CTRL */
++	}
++	xf_emit(ctx, 1, 0);			/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 4);			/* 0000007f VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 4);			/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);			/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0x10);			/* 7f/ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 1, 0);			/* 000000ff VP_CLIP_DISTANCE_ENABLE */
+ 	if (dev_priv->chipset != 0x50)
+-		xf_emit(ctx, 1, 0x7f);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x80c14);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x8100c12);
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x10);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x8100c12);
+-	xf_emit(ctx, 6, 0);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 1, 0x3ff);
+-	else
+-		xf_emit(ctx, 1, 0x7ff);
+-	xf_emit(ctx, 1, 0x80c14);
+-	xf_emit(ctx, 0x38, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x10);
+-	xf_emit(ctx, 0x38, 0);
+-	xf_emit(ctx, 2, 0x88);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 0x16, 0);
+-	xf_emit(ctx, 1, 0x26);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x3f800000);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 4, 0);
+-	else
+-		xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x1a);
+-	xf_emit(ctx, 1, 0x10);
++		xf_emit(ctx, 1, 0);		/* 3ff */
++	xf_emit(ctx, 1, 0);			/* 000000ff tesla UNK1940 */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK0D7C */
++	xf_emit(ctx, 1, 0x804);			/* 00000fff SEMANTIC_CLIP */
++	xf_emit(ctx, 1, 1);			/* 00000001 VIEWPORT_TRANSFORM_EN */
++	xf_emit(ctx, 1, 0x1a);			/* 0000001f POLYGON_MODE */
+ 	if (dev_priv->chipset != 0x50)
+-		xf_emit(ctx, 0x28, 0);
+-	else
+-		xf_emit(ctx, 0x25, 0);
+-	xf_emit(ctx, 1, 0x52);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x26);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x1a);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x00ffff00);
+-	xf_emit(ctx, 1, 0);
++		xf_emit(ctx, 1, 0x7f);		/* 000000ff tesla UNK0FFC */
++	xf_emit(ctx, 1, 0);			/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 1);			/* 00000001 SHADE_MODEL */
++	xf_emit(ctx, 1, 0x80c14);		/* 01ffffff SEMANTIC_COLOR */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK1900 */
++	xf_emit(ctx, 1, 0x8100c12);		/* 1fffffff FP_INTERPOLANT_CTRL */
++	xf_emit(ctx, 1, 4);			/* 0000007f VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 4);			/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);			/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0x10);			/* 7f/ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK0D7C */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK0F8C */
++	xf_emit(ctx, 1, 0);			/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 1);			/* 00000001 VIEWPORT_TRANSFORM_EN */
++	xf_emit(ctx, 1, 0x8100c12);		/* 1fffffff FP_INTERPOLANT_CTRL */
++	xf_emit(ctx, 4, 0);			/* ffffffff NOPERSPECTIVE_BITMAP */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK1900 */
++	xf_emit(ctx, 1, 0);			/* 0000000f */
++	if (dev_priv->chipset == 0x50)
++		xf_emit(ctx, 1, 0x3ff);		/* 000003ff tesla UNK0D68 */
++	else
++		xf_emit(ctx, 1, 0x7ff);		/* 000007ff tesla UNK0D68 */
++	xf_emit(ctx, 1, 0x80c14);		/* 01ffffff SEMANTIC_COLOR */
++	xf_emit(ctx, 1, 0);			/* 00000001 VERTEX_TWO_SIDE_ENABLE */
++	xf_emit(ctx, 0x30, 0);			/* ffffffff VIEWPORT_SCALE: X0, Y0, Z0, X1, Y1, ... */
++	xf_emit(ctx, 3, 0);			/* f, 0, 0 */
++	xf_emit(ctx, 3, 0);			/* ffffffff last VIEWPORT_SCALE? */
++	xf_emit(ctx, 1, 0);			/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 1);			/* 00000001 VIEWPORT_TRANSFORM_EN */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK1900 */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK1924 */
++	xf_emit(ctx, 1, 0x10);			/* 000000ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 1, 0);			/* 00000001 */
++	xf_emit(ctx, 0x30, 0);			/* ffffffff VIEWPORT_TRANSLATE */
++	xf_emit(ctx, 3, 0);			/* f, 0, 0 */
++	xf_emit(ctx, 3, 0);			/* ffffffff */
++	xf_emit(ctx, 1, 0);			/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 2, 0x88);			/* 000001ff tesla UNK19D8 */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK1924 */
++	xf_emit(ctx, 1, 0);			/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 4);			/* 0000000f CULL_MODE */
++	xf_emit(ctx, 2, 0);			/* 07ffffff SCREEN_SCISSOR */
++	xf_emit(ctx, 2, 0);			/* 00007fff WINDOW_OFFSET_XY */
++	xf_emit(ctx, 1, 0);			/* 00000003 WINDOW_ORIGIN */
++	xf_emit(ctx, 0x10, 0);			/* 00000001 SCISSOR_ENABLE */
++	xf_emit(ctx, 1, 0);			/* 0001ffff GP_BUILTIN_RESULT_EN */
++	xf_emit(ctx, 1, 0x26);			/* 000000ff SEMANTIC_LAYER */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK1900 */
++	xf_emit(ctx, 1, 0);			/* 0000000f */
++	xf_emit(ctx, 1, 0x3f800000);		/* ffffffff LINE_WIDTH */
++	xf_emit(ctx, 1, 0);			/* 00000001 LINE_STIPPLE_ENABLE */
++	xf_emit(ctx, 1, 0);			/* 00000001 LINE_SMOOTH_ENABLE */
++	xf_emit(ctx, 1, 0);			/* 00000007 MULTISAMPLE_SAMPLES_LOG2 */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 0);		/* 00000001 */
++	xf_emit(ctx, 1, 0x1a);			/* 0000001f POLYGON_MODE */
++	xf_emit(ctx, 1, 0x10);			/* 000000ff VIEW_VOLUME_CLIP_CTRL */
++	if (dev_priv->chipset != 0x50) {
++		xf_emit(ctx, 1, 0);		/* ffffffff */
++		xf_emit(ctx, 1, 0);		/* 00000001 */
++		xf_emit(ctx, 1, 0);		/* 000003ff */
++	}
++	xf_emit(ctx, 0x20, 0);			/* 10xbits ffffffff, 3fffff. SCISSOR_* */
++	xf_emit(ctx, 1, 0);			/* f */
++	xf_emit(ctx, 1, 0);			/* 0? */
++	xf_emit(ctx, 1, 0);			/* ffffffff */
++	xf_emit(ctx, 1, 0);			/* 003fffff */
++	xf_emit(ctx, 1, 0);			/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 0x52);			/* 000001ff SEMANTIC_PTSZ */
++	xf_emit(ctx, 1, 0);			/* 0001ffff GP_BUILTIN_RESULT_EN */
++	xf_emit(ctx, 1, 0x26);			/* 000000ff SEMANTIC_LAYER */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK1900 */
++	xf_emit(ctx, 1, 4);			/* 0000007f VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 4);			/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);			/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0x1a);			/* 0000001f POLYGON_MODE */
++	xf_emit(ctx, 1, 0);			/* 00000001 LINE_SMOOTH_ENABLE */
++	xf_emit(ctx, 1, 0);			/* 00000001 LINE_STIPPLE_ENABLE */
++	xf_emit(ctx, 1, 0x00ffff00);		/* 00ffffff LINE_STIPPLE_PATTERN */
++	xf_emit(ctx, 1, 0);			/* 0000000f */
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk3(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_zcull(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	/* end of area 0 on pre-NVA0, beginning of area 6 on NVAx */
+-	xf_emit(ctx, 1, 0x3f);
+-	xf_emit(ctx, 0xa, 0);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 2, 0x04000000);
+-	xf_emit(ctx, 8, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 4);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 0x10, 0);
+-	else
+-		xf_emit(ctx, 0x11, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x1001);
+-	xf_emit(ctx, 4, 0xffff);
+-	xf_emit(ctx, 0x20, 0);
+-	xf_emit(ctx, 0x10, 0x3f800000);
+-	xf_emit(ctx, 1, 0x10);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 1, 0);
+-	else
+-		xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 3);
+-	xf_emit(ctx, 2, 0);
++	/* end of strand 0 on pre-NVA0, beginning of strand 6 on NVAx */
++	/* SEEK */
++	xf_emit(ctx, 1, 0x3f);		/* 0000003f UNK1590 */
++	xf_emit(ctx, 1, 0);		/* 00000001 ALPHA_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000007 MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_BACK_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_FUNC_REF */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_MASK */
++	xf_emit(ctx, 3, 0);		/* 00000007 STENCIL_BACK_OP_FAIL, ZFAIL, ZPASS */
++	xf_emit(ctx, 1, 2);		/* 00000003 tesla UNK143C */
++	xf_emit(ctx, 2, 0x04000000);	/* 07ffffff tesla UNK0D6C */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, 0);		/* 00000001 CLIPID_ENABLE */
++	xf_emit(ctx, 2, 0);		/* ffffffff DEPTH_BOUNDS */
++	xf_emit(ctx, 1, 0);		/* 00000001 */
++	xf_emit(ctx, 1, 0);		/* 00000007 DEPTH_TEST_FUNC */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 0000000f CULL_MODE */
++	xf_emit(ctx, 1, 0);		/* 0000ffff */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK0FB0 */
++	xf_emit(ctx, 1, 0);		/* 00000001 POLYGON_STIPPLE_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 00000007 FP_CONTROL */
++	xf_emit(ctx, 1, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 0001ffff GP_BUILTIN_RESULT_EN */
++	xf_emit(ctx, 1, 0);		/* 000000ff CLEAR_STENCIL */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_FRONT_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_FUNC_REF */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_MASK */
++	xf_emit(ctx, 3, 0);		/* 00000007 STENCIL_FRONT_OP_FAIL, ZFAIL, ZPASS */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_BACK_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff CLEAR_DEPTH */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	if (dev_priv->chipset != 0x50)
++		xf_emit(ctx, 1, 0);	/* 00000003 tesla UNK1108 */
++	xf_emit(ctx, 1, 0);		/* 00000001 SAMPLECNT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0x1001);	/* 00001fff ZETA_ARRAY_MODE */
++	/* SEEK */
++	xf_emit(ctx, 4, 0xffff);	/* 0000ffff MSAA_MASK */
++	xf_emit(ctx, 0x10, 0);		/* 00000001 SCISSOR_ENABLE */
++	xf_emit(ctx, 0x10, 0);		/* ffffffff DEPTH_RANGE_NEAR */
++	xf_emit(ctx, 0x10, 0x3f800000);	/* ffffffff DEPTH_RANGE_FAR */
++	xf_emit(ctx, 1, 0x10);		/* 7f/ff/3ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 1, 0);		/* 00000001 VIEWPORT_CLIP_RECTS_EN */
++	xf_emit(ctx, 1, 3);		/* 00000003 FP_CTRL_UNK196C */
++	xf_emit(ctx, 1, 0);		/* 00000003 tesla UNK1968 */
++	if (dev_priv->chipset != 0x50)
++		xf_emit(ctx, 1, 0);	/* 0fffffff tesla UNK1104 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK151C */
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk4(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_clipid(struct nouveau_grctx *ctx)
+ {
+-	/* middle of area 0 on pre-NVA0, middle of area 6 on NVAx */
+-	xf_emit(ctx, 2, 0x04000000);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x80);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x80);
+-	xf_emit(ctx, 1, 0);
++	/* middle of strand 0 on pre-NVA0 [after 24xx], middle of area 6 on NVAx */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 00000007 UNK0FB4 */
++	/* SEEK */
++	xf_emit(ctx, 4, 0);		/* 07ffffff CLIPID_REGION_HORIZ */
++	xf_emit(ctx, 4, 0);		/* 07ffffff CLIPID_REGION_VERT */
++	xf_emit(ctx, 2, 0);		/* 07ffffff SCREEN_SCISSOR */
++	xf_emit(ctx, 2, 0x04000000);	/* 07ffffff UNK1508 */
++	xf_emit(ctx, 1, 0);		/* 00000001 CLIPID_ENABLE */
++	xf_emit(ctx, 1, 0x80);		/* 00003fff CLIPID_WIDTH */
++	xf_emit(ctx, 1, 0);		/* 000000ff CLIPID_ID */
++	xf_emit(ctx, 1, 0);		/* 000000ff CLIPID_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff CLIPID_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0x80);		/* 00003fff CLIPID_HEIGHT */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_CLIPID */
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk5(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_unk24xx(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	/* middle of area 0 on pre-NVA0 [after m2mf], end of area 2 on NVAx */
+-	xf_emit(ctx, 2, 4);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 0x1c4d, 0);
+-	else
+-		xf_emit(ctx, 0x1c4b, 0);
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0x8100c12);
++	int i;
++	/* middle of strand 0 on pre-NVA0 [after m2mf], end of strand 2 on NVAx */
++	/* SEEK */
++	xf_emit(ctx, 0x33, 0);
++	/* SEEK */
++	xf_emit(ctx, 2, 0);
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 0000007f VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	/* SEEK */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 4, 0);	/* RO */
++		xf_emit(ctx, 0xe10, 0); /* 190 * 9: 8*ffffffff, 7ff */
++		xf_emit(ctx, 1, 0);	/* 1ff */
++		xf_emit(ctx, 8, 0);	/* 0? */
++		xf_emit(ctx, 9, 0);	/* ffffffff, 7ff */
++
++		xf_emit(ctx, 4, 0);	/* RO */
++		xf_emit(ctx, 0xe10, 0); /* 190 * 9: 8*ffffffff, 7ff */
++		xf_emit(ctx, 1, 0);	/* 1ff */
++		xf_emit(ctx, 8, 0);	/* 0? */
++		xf_emit(ctx, 9, 0);	/* ffffffff, 7ff */
++	}
++	else
++	{
++		xf_emit(ctx, 0xc, 0);	/* RO */
++		/* SEEK */
++		xf_emit(ctx, 0xe10, 0); /* 190 * 9: 8*ffffffff, 7ff */
++		xf_emit(ctx, 1, 0);	/* 1ff */
++		xf_emit(ctx, 8, 0);	/* 0? */
++
++		/* SEEK */
++		xf_emit(ctx, 0xc, 0);	/* RO */
++		/* SEEK */
++		xf_emit(ctx, 0xe10, 0); /* 190 * 9: 8*ffffffff, 7ff */
++		xf_emit(ctx, 1, 0);	/* 1ff */
++		xf_emit(ctx, 8, 0);	/* 0? */
++	}
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 4);		/* 0000007f VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0x8100c12);	/* 1fffffff FP_INTERPOLANT_CTRL */
+ 	if (dev_priv->chipset != 0x50)
+-		xf_emit(ctx, 1, 3);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x8100c12);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x80c14);
+-	xf_emit(ctx, 1, 1);
++		xf_emit(ctx, 1, 3);	/* 00000003 tesla UNK1100 */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0x8100c12);	/* 1fffffff FP_INTERPOLANT_CTRL */
++	xf_emit(ctx, 1, 0);		/* 0000000f VP_GP_BUILTIN_ATTR_EN */
++	xf_emit(ctx, 1, 0x80c14);	/* 01ffffff SEMANTIC_COLOR */
++	xf_emit(ctx, 1, 1);		/* 00000001 */
++	/* SEEK */
+ 	if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0x80c14);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x8100c12);
+-	xf_emit(ctx, 1, 0x27);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0x3c1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0x16, 0);
+-	xf_emit(ctx, 1, 0x8100c12);
+-	xf_emit(ctx, 1, 0);
++		xf_emit(ctx, 2, 4);	/* 000000ff */
++	xf_emit(ctx, 1, 0x80c14);	/* 01ffffff SEMANTIC_COLOR */
++	xf_emit(ctx, 1, 0);		/* 00000001 VERTEX_TWO_SIDE_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 POINT_SPRITE_ENABLE */
++	xf_emit(ctx, 1, 0x8100c12);	/* 1fffffff FP_INTERPOLANT_CTRL */
++	xf_emit(ctx, 1, 0x27);		/* 000000ff SEMANTIC_PRIM_ID */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 0000000f */
++	xf_emit(ctx, 1, 1);		/* 00000001 */
++	for (i = 0; i < 10; i++) {
++		/* SEEK */
++		xf_emit(ctx, 0x40, 0);		/* ffffffff */
++		xf_emit(ctx, 0x10, 0);		/* 3, 0, 0.... */
++		xf_emit(ctx, 0x10, 0);		/* ffffffff */
++	}
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 00000001 POINT_SPRITE_CTRL */
++	xf_emit(ctx, 1, 1);		/* 00000001 */
++	xf_emit(ctx, 1, 0);		/* ffffffff */
++	xf_emit(ctx, 4, 0);		/* ffffffff NOPERSPECTIVE_BITMAP */
++	xf_emit(ctx, 0x10, 0);		/* 00ffffff POINT_COORD_REPLACE_MAP */
++	xf_emit(ctx, 1, 0);		/* 00000003 WINDOW_ORIGIN */
++	xf_emit(ctx, 1, 0x8100c12);	/* 1fffffff FP_INTERPOLANT_CTRL */
++	if (dev_priv->chipset != 0x50)
++		xf_emit(ctx, 1, 0);	/* 000003ff */
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk6(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_vfetch(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	/* beginning of area 1 on pre-NVA0 [after m2mf], area 3 on NVAx */
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 0xf);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 8, 0);
+-	else
+-		xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 0x20);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 0x11, 0);
++	int acnt = 0x10, rep, i;
++	/* beginning of strand 1 on pre-NVA0, strand 3 on NVAx */
++	if (IS_NVA3F(dev_priv->chipset))
++		acnt = 0x20;
++	/* SEEK */
++	if (dev_priv->chipset >= 0xa0) {
++		xf_emit(ctx, 1, 0);	/* ffffffff tesla UNK13A4 */
++		xf_emit(ctx, 1, 1);	/* 00000fff tesla UNK1318 */
++	}
++	xf_emit(ctx, 1, 0);		/* ffffffff VERTEX_BUFFER_FIRST */
++	xf_emit(ctx, 1, 0);		/* 00000001 PRIMITIVE_RESTART_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK0DE8 */
++	xf_emit(ctx, 1, 0);		/* ffffffff PRIMITIVE_RESTART_INDEX */
++	xf_emit(ctx, 1, 0xf);		/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, (acnt/8)-1, 0);	/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, acnt/8, 0);	/* ffffffff VTX_ATR_MASK_UNK0DD0 */
++	xf_emit(ctx, 1, 0);		/* 0000000f VP_GP_BUILTIN_ATTR_EN */
++	xf_emit(ctx, 1, 0x20);		/* 0000ffff tesla UNK129C */
++	xf_emit(ctx, 1, 0);		/* 000000ff turing UNK370??? */
++	xf_emit(ctx, 1, 0);		/* 0000ffff turing USER_PARAM_COUNT */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	/* SEEK */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 0xb, 0);	/* RO */
+ 	else if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 0xf, 0);
++		xf_emit(ctx, 0x9, 0);	/* RO */
+ 	else
+-		xf_emit(ctx, 0xe, 0);
+-	xf_emit(ctx, 1, 0x1a);
+-	xf_emit(ctx, 0xd, 0);
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 8);
+-	xf_emit(ctx, 1, 0);
++		xf_emit(ctx, 0x8, 0);	/* RO */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 00000001 EDGE_FLAG */
++	xf_emit(ctx, 1, 0);		/* 00000001 PROVOKING_VERTEX_LAST */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0x1a);		/* 0000001f POLYGON_MODE */
++	/* SEEK */
++	xf_emit(ctx, 0xc, 0);		/* RO */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 7f/ff */
++	xf_emit(ctx, 1, 4);		/* 7f/ff VP_REG_ALLOC_RESULT */
++	xf_emit(ctx, 1, 4);		/* 7f/ff VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);		/* 0000000f VP_GP_BUILTIN_ATTR_EN */
++	xf_emit(ctx, 1, 4);		/* 000001ff UNK1A28 */
++	xf_emit(ctx, 1, 8);		/* 000001ff UNK0DF0 */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
+ 	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 1, 0x3ff);
++		xf_emit(ctx, 1, 0x3ff);	/* 3ff tesla UNK0D68 */
+ 	else
+-		xf_emit(ctx, 1, 0x7ff);
++		xf_emit(ctx, 1, 0x7ff);	/* 7ff tesla UNK0D68 */
+ 	if (dev_priv->chipset == 0xa8)
+-		xf_emit(ctx, 1, 0x1e00);
+-	xf_emit(ctx, 0xc, 0);
+-	xf_emit(ctx, 1, 0xf);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 0x125, 0);
+-	else if (dev_priv->chipset < 0xa0)
+-		xf_emit(ctx, 0x126, 0);
+-	else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa)
+-		xf_emit(ctx, 0x124, 0);
+-	else
+-		xf_emit(ctx, 0x1f7, 0);
+-	xf_emit(ctx, 1, 0xf);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 3, 0);
+-	else
+-		xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 0xa1, 0);
+-	else
+-		xf_emit(ctx, 0x5a, 0);
+-	xf_emit(ctx, 1, 0xf);
++		xf_emit(ctx, 1, 0x1e00);	/* 7fff */
++	/* SEEK */
++	xf_emit(ctx, 0xc, 0);		/* RO or close */
++	/* SEEK */
++	xf_emit(ctx, 1, 0xf);		/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, (acnt/8)-1, 0);	/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, 1, 0);		/* 0000000f VP_GP_BUILTIN_ATTR_EN */
++	if (dev_priv->chipset > 0x50 && dev_priv->chipset < 0xa0)
++		xf_emit(ctx, 2, 0);	/* ffffffff */
++	else
++		xf_emit(ctx, 1, 0);	/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 00000003 tesla UNK0FD8 */
++	/* SEEK */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 0x10, 0);	/* 0? */
++		xf_emit(ctx, 2, 0);	/* weird... */
++		xf_emit(ctx, 2, 0);	/* RO */
++	} else {
++		xf_emit(ctx, 8, 0);	/* 0? */
++		xf_emit(ctx, 1, 0);	/* weird... */
++		xf_emit(ctx, 2, 0);	/* RO */
++	}
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* ffffffff VB_ELEMENT_BASE */
++	xf_emit(ctx, 1, 0);		/* ffffffff UNK1438 */
++	xf_emit(ctx, acnt, 0);		/* 1 tesla UNK1000 */
++	if (dev_priv->chipset >= 0xa0)
++		xf_emit(ctx, 1, 0);	/* ffffffff tesla UNK1118? */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* ffffffff VERTEX_ARRAY_UNK90C */
++	xf_emit(ctx, 1, 0);		/* f/1f */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* ffffffff VERTEX_ARRAY_UNK90C */
++	xf_emit(ctx, 1, 0);		/* f/1f */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* RO */
++	xf_emit(ctx, 2, 0);		/* RO */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK111C? */
++	xf_emit(ctx, 1, 0);		/* RO */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 000000ff UNK15F4_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff UNK15F4_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0);		/* 000000ff UNK0F84_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff UNK0F84_ADDRESS_LOW */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* 00003fff VERTEX_ARRAY_ATTRIB_OFFSET */
++	xf_emit(ctx, 3, 0);		/* f/1f */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* 00000fff VERTEX_ARRAY_STRIDE */
++	xf_emit(ctx, 3, 0);		/* f/1f */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* ffffffff VERTEX_ARRAY_LOW */
++	xf_emit(ctx, 3, 0);		/* f/1f */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* 000000ff VERTEX_ARRAY_HIGH */
++	xf_emit(ctx, 3, 0);		/* f/1f */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* ffffffff VERTEX_LIMIT_LOW */
++	xf_emit(ctx, 3, 0);		/* f/1f */
++	/* SEEK */
++	xf_emit(ctx, acnt, 0);		/* 000000ff VERTEX_LIMIT_HIGH */
++	xf_emit(ctx, 3, 0);		/* f/1f */
++	/* SEEK */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, acnt, 0);		/* f */
++		xf_emit(ctx, 3, 0);		/* f/1f */
++	}
++	/* SEEK */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 2, 0);	/* RO */
++	else
++		xf_emit(ctx, 5, 0);	/* RO */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* ffff DMA_VTXBUF */
++	/* SEEK */
++	if (dev_priv->chipset < 0xa0) {
++		xf_emit(ctx, 0x41, 0);	/* RO */
++		/* SEEK */
++		xf_emit(ctx, 0x11, 0);	/* RO */
++	} else if (!IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 0x50, 0);	/* RO */
++	else
++		xf_emit(ctx, 0x58, 0);	/* RO */
++	/* SEEK */
++	xf_emit(ctx, 1, 0xf);		/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, (acnt/8)-1, 0);	/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, 1, 1);		/* 1 UNK0DEC */
++	/* SEEK */
++	xf_emit(ctx, acnt*4, 0);	/* ffffffff VTX_ATTR */
++	xf_emit(ctx, 4, 0);		/* f/1f, 0, 0, 0 */
++	/* SEEK */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 0x1d, 0);	/* RO */
++	else
++		xf_emit(ctx, 0x16, 0);	/* RO */
++	/* SEEK */
++	xf_emit(ctx, 1, 0xf);		/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, (acnt/8)-1, 0);	/* ffffffff VP_ATTR_EN */
++	/* SEEK */
+ 	if (dev_priv->chipset < 0xa0)
+-		xf_emit(ctx, 0x834, 0);
+-	else if (dev_priv->chipset == 0xa0)
+-		xf_emit(ctx, 0x1873, 0);
+-	else if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 0x8ba, 0);
++		xf_emit(ctx, 8, 0);	/* RO */
++	else if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 0xc, 0);	/* RO */
++	else
++		xf_emit(ctx, 7, 0);	/* RO */
++	/* SEEK */
++	xf_emit(ctx, 0xa, 0);		/* RO */
++	if (dev_priv->chipset == 0xa0)
++		rep = 0xc;
++	else
++		rep = 4;
++	for (i = 0; i < rep; i++) {
++		/* SEEK */
++		if (IS_NVA3F(dev_priv->chipset))
++			xf_emit(ctx, 0x20, 0);	/* ffffffff */
++		xf_emit(ctx, 0x200, 0);	/* ffffffff */
++		xf_emit(ctx, 4, 0);	/* 7f/ff, 0, 0, 0 */
++		xf_emit(ctx, 4, 0);	/* ffffffff */
++	}
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 113/111 */
++	xf_emit(ctx, 1, 0xf);		/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, (acnt/8)-1, 0);	/* ffffffff VP_ATTR_EN */
++	xf_emit(ctx, acnt/8, 0);	/* ffffffff VTX_ATTR_MASK_UNK0DD0 */
++	xf_emit(ctx, 1, 0);		/* 0000000f VP_GP_BUILTIN_ATTR_EN */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	/* SEEK */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 7, 0);	/* weird... */
+ 	else
+-		xf_emit(ctx, 0x833, 0);
+-	xf_emit(ctx, 1, 0xf);
+-	xf_emit(ctx, 0xf, 0);
++		xf_emit(ctx, 5, 0);	/* weird... */
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk7(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_eng2d(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	/* middle of area 1 on pre-NVA0 [after m2mf], middle of area 6 on NVAx */
+-	xf_emit(ctx, 2, 0);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 2, 1);
+-	else
+-		xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 2, 0x100);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 8);
+-	xf_emit(ctx, 5, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 3, 1);
+-	xf_emit(ctx, 1, 0xcf);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 6, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 3, 1);
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x15);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x4444480);
+-	xf_emit(ctx, 0x37, 0);
++	/* middle of strand 1 on pre-NVA0 [after vfetch], middle of strand 6 on NVAx */
++	/* SEEK */
++	xf_emit(ctx, 2, 0);		/* 0001ffff CLIP_X, CLIP_Y */
++	xf_emit(ctx, 2, 0);		/* 0000ffff CLIP_W, CLIP_H */
++	xf_emit(ctx, 1, 0);		/* 00000001 CLIP_ENABLE */
++	if (dev_priv->chipset < 0xa0) {
++		/* this is useless on everything but the original NV50,
++		 * guess they forgot to nuke it. Or just didn't bother. */
++		xf_emit(ctx, 2, 0);	/* 0000ffff IFC_CLIP_X, Y */
++		xf_emit(ctx, 2, 1);	/* 0000ffff IFC_CLIP_W, H */
++		xf_emit(ctx, 1, 0);	/* 00000001 IFC_CLIP_ENABLE */
++	}
++	xf_emit(ctx, 1, 1);		/* 00000001 DST_LINEAR */
++	xf_emit(ctx, 1, 0x100);		/* 0001ffff DST_WIDTH */
++	xf_emit(ctx, 1, 0x100);		/* 0001ffff DST_HEIGHT */
++	xf_emit(ctx, 1, 0x11);		/* 3f[NV50]/7f[NV84+] DST_FORMAT */
++	xf_emit(ctx, 1, 0);		/* 0001ffff DRAW_POINT_X */
++	xf_emit(ctx, 1, 8);		/* 0000000f DRAW_UNK58C */
++	xf_emit(ctx, 1, 0);		/* 000fffff SIFC_DST_X_FRACT */
++	xf_emit(ctx, 1, 0);		/* 0001ffff SIFC_DST_X_INT */
++	xf_emit(ctx, 1, 0);		/* 000fffff SIFC_DST_Y_FRACT */
++	xf_emit(ctx, 1, 0);		/* 0001ffff SIFC_DST_Y_INT */
++	xf_emit(ctx, 1, 0);		/* 000fffff SIFC_DX_DU_FRACT */
++	xf_emit(ctx, 1, 1);		/* 0001ffff SIFC_DX_DU_INT */
++	xf_emit(ctx, 1, 0);		/* 000fffff SIFC_DY_DV_FRACT */
++	xf_emit(ctx, 1, 1);		/* 0001ffff SIFC_DY_DV_INT */
++	xf_emit(ctx, 1, 1);		/* 0000ffff SIFC_WIDTH */
++	xf_emit(ctx, 1, 1);		/* 0000ffff SIFC_HEIGHT */
++	xf_emit(ctx, 1, 0xcf);		/* 000000ff SIFC_FORMAT */
++	xf_emit(ctx, 1, 2);		/* 00000003 SIFC_BITMAP_UNK808 */
++	xf_emit(ctx, 1, 0);		/* 00000003 SIFC_BITMAP_LINE_PACK_MODE */
++	xf_emit(ctx, 1, 0);		/* 00000001 SIFC_BITMAP_LSB_FIRST */
++	xf_emit(ctx, 1, 0);		/* 00000001 SIFC_BITMAP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 0000ffff BLIT_DST_X */
++	xf_emit(ctx, 1, 0);		/* 0000ffff BLIT_DST_Y */
++	xf_emit(ctx, 1, 0);		/* 000fffff BLIT_DU_DX_FRACT */
++	xf_emit(ctx, 1, 1);		/* 0001ffff BLIT_DU_DX_INT */
++	xf_emit(ctx, 1, 0);		/* 000fffff BLIT_DV_DY_FRACT */
++	xf_emit(ctx, 1, 1);		/* 0001ffff BLIT_DV_DY_INT */
++	xf_emit(ctx, 1, 1);		/* 0000ffff BLIT_DST_W */
++	xf_emit(ctx, 1, 1);		/* 0000ffff BLIT_DST_H */
++	xf_emit(ctx, 1, 0);		/* 000fffff BLIT_SRC_X_FRACT */
++	xf_emit(ctx, 1, 0);		/* 0001ffff BLIT_SRC_X_INT */
++	xf_emit(ctx, 1, 0);		/* 000fffff BLIT_SRC_Y_FRACT */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK888 */
++	xf_emit(ctx, 1, 4);		/* 0000003f UNK884 */
++	xf_emit(ctx, 1, 0);		/* 00000007 UNK880 */
++	xf_emit(ctx, 1, 1);		/* 0000001f tesla UNK0FB8 */
++	xf_emit(ctx, 1, 0x15);		/* 000000ff tesla UNK128C */
++	xf_emit(ctx, 2, 0);		/* 00000007, ffff0ff3 */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK260 */
++	xf_emit(ctx, 1, 0x4444480);	/* 1fffffff UNK870 */
++	/* SEEK */
++	xf_emit(ctx, 0x10, 0);
++	/* SEEK */
++	xf_emit(ctx, 0x27, 0);
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk8(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_csched(struct nouveau_grctx *ctx)
+ {
+-	/* middle of area 1 on pre-NVA0 [after m2mf], middle of area 0 on NVAx */
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 0x8100c12);
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 0x100);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x10001);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x10001);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x10001);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 2);
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	/* middle of strand 1 on pre-NVA0 [after eng2d], middle of strand 0 on NVAx */
++	/* SEEK */
++	xf_emit(ctx, 2, 0);		/* 00007fff WINDOW_OFFSET_XY... what is it doing here??? */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1924 */
++	xf_emit(ctx, 1, 0);		/* 00000003 WINDOW_ORIGIN */
++	xf_emit(ctx, 1, 0x8100c12);	/* 1fffffff FP_INTERPOLANT_CTRL */
++	xf_emit(ctx, 1, 0);		/* 000003ff */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* ffffffff turing UNK364 */
++	xf_emit(ctx, 1, 0);		/* 0000000f turing UNK36C */
++	xf_emit(ctx, 1, 0);		/* 0000ffff USER_PARAM_COUNT */
++	xf_emit(ctx, 1, 0x100);		/* 00ffffff turing UNK384 */
++	xf_emit(ctx, 1, 0);		/* 0000000f turing UNK2A0 */
++	xf_emit(ctx, 1, 0);		/* 0000ffff GRIDID */
++	xf_emit(ctx, 1, 0x10001);	/* ffffffff GRIDDIM_XY */
++	xf_emit(ctx, 1, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0x10001);	/* ffffffff BLOCKDIM_XY */
++	xf_emit(ctx, 1, 1);		/* 0000ffff BLOCKDIM_Z */
++	xf_emit(ctx, 1, 0x10001);	/* 00ffffff BLOCK_ALLOC */
++	xf_emit(ctx, 1, 1);		/* 00000001 LANES32 */
++	xf_emit(ctx, 1, 4);		/* 000000ff FP_REG_ALLOC_TEMP */
++	xf_emit(ctx, 1, 2);		/* 00000003 REG_MODE */
++	/* SEEK */
++	xf_emit(ctx, 0x40, 0);		/* ffffffff USER_PARAM */
++	switch (dev_priv->chipset) {
++	case 0x50:
++	case 0x92:
++		xf_emit(ctx, 8, 0);	/* 7, 0, 0, 0, ... */
++		xf_emit(ctx, 0x80, 0);	/* fff */
++		xf_emit(ctx, 2, 0);	/* ff, fff */
++		xf_emit(ctx, 0x10*2, 0);	/* ffffffff, 1f */
++		break;
++	case 0x84:
++		xf_emit(ctx, 8, 0);	/* 7, 0, 0, 0, ... */
++		xf_emit(ctx, 0x60, 0);	/* fff */
++		xf_emit(ctx, 2, 0);	/* ff, fff */
++		xf_emit(ctx, 0xc*2, 0);	/* ffffffff, 1f */
++		break;
++	case 0x94:
++	case 0x96:
++		xf_emit(ctx, 8, 0);	/* 7, 0, 0, 0, ... */
++		xf_emit(ctx, 0x40, 0);	/* fff */
++		xf_emit(ctx, 2, 0);	/* ff, fff */
++		xf_emit(ctx, 8*2, 0);	/* ffffffff, 1f */
++		break;
++	case 0x86:
++	case 0x98:
++		xf_emit(ctx, 4, 0);	/* f, 0, 0, 0 */
++		xf_emit(ctx, 0x10, 0);	/* fff */
++		xf_emit(ctx, 2, 0);	/* ff, fff */
++		xf_emit(ctx, 2*2, 0);	/* ffffffff, 1f */
++		break;
++	case 0xa0:
++		xf_emit(ctx, 8, 0);	/* 7, 0, 0, 0, ... */
++		xf_emit(ctx, 0xf0, 0);	/* fff */
++		xf_emit(ctx, 2, 0);	/* ff, fff */
++		xf_emit(ctx, 0x1e*2, 0);	/* ffffffff, 1f */
++		break;
++	case 0xa3:
++		xf_emit(ctx, 8, 0);	/* 7, 0, 0, 0, ... */
++		xf_emit(ctx, 0x60, 0);	/* fff */
++		xf_emit(ctx, 2, 0);	/* ff, fff */
++		xf_emit(ctx, 0xc*2, 0);	/* ffffffff, 1f */
++		break;
++	case 0xa5:
++	case 0xaf:
++		xf_emit(ctx, 8, 0);	/* 7, 0, 0, 0, ... */
++		xf_emit(ctx, 0x30, 0);	/* fff */
++		xf_emit(ctx, 2, 0);	/* ff, fff */
++		xf_emit(ctx, 6*2, 0);	/* ffffffff, 1f */
++		break;
++	case 0xaa:
++		xf_emit(ctx, 0x12, 0);
++		break;
++	case 0xa8:
++	case 0xac:
++		xf_emit(ctx, 4, 0);	/* f, 0, 0, 0 */
++		xf_emit(ctx, 0x10, 0);	/* fff */
++		xf_emit(ctx, 2, 0);	/* ff, fff */
++		xf_emit(ctx, 2*2, 0);	/* ffffffff, 1f */
++		break;
++	}
++	xf_emit(ctx, 1, 0);		/* 0000000f */
++	xf_emit(ctx, 1, 0);		/* 00000000 */
++	xf_emit(ctx, 1, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 0000001f */
++	xf_emit(ctx, 4, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 00000003 turing UNK35C */
++	xf_emit(ctx, 1, 0);		/* ffffffff */
++	xf_emit(ctx, 4, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 00000003 turing UNK35C */
++	xf_emit(ctx, 1, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 000000ff */
+ }
+ 
+ static void
+-nv50_graph_construct_gene_unk9(struct nouveau_grctx *ctx)
++nv50_graph_construct_gene_unk1cxx(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	/* middle of area 2 on pre-NVA0 [after m2mf], end of area 0 on NVAx */
+-	xf_emit(ctx, 1, 0x3f800000);
+-	xf_emit(ctx, 6, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0x1a);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0x12, 0);
+-	xf_emit(ctx, 1, 0x00ffff00);
+-	xf_emit(ctx, 6, 0);
+-	xf_emit(ctx, 1, 0xf);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x0fac6881);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 0xf, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 2, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 3);
++	xf_emit(ctx, 2, 0);		/* 00007fff WINDOW_OFFSET_XY */
++	xf_emit(ctx, 1, 0x3f800000);	/* ffffffff LINE_WIDTH */
++	xf_emit(ctx, 1, 0);		/* 00000001 LINE_SMOOTH_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1658 */
++	xf_emit(ctx, 1, 0);		/* 00000001 POLYGON_SMOOTH_ENABLE */
++	xf_emit(ctx, 3, 0);		/* 00000001 POLYGON_OFFSET_*_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 0000000f CULL_MODE */
++	xf_emit(ctx, 1, 0x1a);		/* 0000001f POLYGON_MODE */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 0);		/* 00000001 POINT_SPRITE_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK165C */
++	xf_emit(ctx, 0x10, 0);		/* 00000001 SCISSOR_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 00000001 LINE_STIPPLE_ENABLE */
++	xf_emit(ctx, 1, 0x00ffff00);	/* 00ffffff LINE_STIPPLE_PATTERN */
++	xf_emit(ctx, 1, 0);		/* ffffffff POLYGON_OFFSET_UNITS */
++	xf_emit(ctx, 1, 0);		/* ffffffff POLYGON_OFFSET_FACTOR */
++	xf_emit(ctx, 1, 0);		/* 00000003 tesla UNK1668 */
++	xf_emit(ctx, 2, 0);		/* 07ffffff SCREEN_SCISSOR */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1900 */
++	xf_emit(ctx, 1, 0xf);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 7, 0);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 1, 0x11);		/* 0000007f RT_FORMAT */
++	xf_emit(ctx, 7, 0);		/* 0000007f RT_FORMAT */
++	xf_emit(ctx, 8, 0);		/* 00000001 RT_HORIZ_LINEAR */
++	xf_emit(ctx, 1, 4);		/* 00000007 FP_CONTROL */
++	xf_emit(ctx, 1, 0);		/* 00000001 ALPHA_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000007 ALPHA_TEST_FUNC */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 3);	/* 00000003 UNK16B4 */
+ 	else if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 2, 0x04000000);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 5);
+-	xf_emit(ctx, 1, 0x52);
+-	if (dev_priv->chipset == 0x50) {
+-		xf_emit(ctx, 0x13, 0);
+-	} else {
+-		xf_emit(ctx, 4, 0);
+-		xf_emit(ctx, 1, 1);
+-		if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-			xf_emit(ctx, 0x11, 0);
+-		else
+-			xf_emit(ctx, 0x10, 0);
++		xf_emit(ctx, 1, 1);	/* 00000001 UNK16B4 */
++	xf_emit(ctx, 1, 0);		/* 00000003 MULTISAMPLE_CTRL */
++	xf_emit(ctx, 1, 0);		/* 00000003 tesla UNK0F90 */
++	xf_emit(ctx, 1, 2);		/* 00000003 tesla UNK143C */
++	xf_emit(ctx, 2, 0x04000000);	/* 07ffffff tesla UNK0D6C */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_MASK */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 SAMPLECNT_ENABLE */
++	xf_emit(ctx, 1, 5);		/* 0000000f UNK1408 */
++	xf_emit(ctx, 1, 0x52);		/* 000001ff SEMANTIC_PTSZ */
++	xf_emit(ctx, 1, 0);		/* ffffffff POINT_SIZE */
++	xf_emit(ctx, 1, 0);		/* 00000001 */
++	xf_emit(ctx, 1, 0);		/* 00000007 tesla UNK0FB4 */
++	if (dev_priv->chipset != 0x50) {
++		xf_emit(ctx, 1, 0);	/* 3ff */
++		xf_emit(ctx, 1, 1);	/* 00000001 tesla UNK1110 */
+ 	}
+-	xf_emit(ctx, 0x10, 0x3f800000);
+-	xf_emit(ctx, 1, 0x10);
+-	xf_emit(ctx, 0x26, 0);
+-	xf_emit(ctx, 1, 0x8100c12);
+-	xf_emit(ctx, 1, 5);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 4, 0xffff);
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 0);	/* 00000003 tesla UNK1928 */
++	xf_emit(ctx, 0x10, 0);		/* ffffffff DEPTH_RANGE_NEAR */
++	xf_emit(ctx, 0x10, 0x3f800000);	/* ffffffff DEPTH_RANGE_FAR */
++	xf_emit(ctx, 1, 0x10);		/* 000000ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 0x20, 0);		/* 07ffffff VIEWPORT_HORIZ, then VIEWPORT_VERT. (W&0x3fff)<<13 | (X&0x1fff). */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK187C */
++	xf_emit(ctx, 1, 0);		/* 00000003 WINDOW_ORIGIN */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_BACK_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_MASK */
++	xf_emit(ctx, 1, 0x8100c12);	/* 1fffffff FP_INTERPOLANT_CTRL */
++	xf_emit(ctx, 1, 5);		/* 0000000f tesla UNK1220 */
++	xf_emit(ctx, 1, 0);		/* 00000007 MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 000000ff tesla UNK1A20 */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 VERTEX_TWO_SIDE_ENABLE */
++	xf_emit(ctx, 4, 0xffff);	/* 0000ffff MSAA_MASK */
+ 	if (dev_priv->chipset != 0x50)
+-		xf_emit(ctx, 1, 3);
++		xf_emit(ctx, 1, 3);	/* 00000003 tesla UNK1100 */
+ 	if (dev_priv->chipset < 0xa0)
+-		xf_emit(ctx, 0x1f, 0);
+-	else if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 0xc, 0);
+-	else
+-		xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x00ffff00);
+-	xf_emit(ctx, 1, 0x1a);
++		xf_emit(ctx, 0x1c, 0);	/* RO */
++	else if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 0x9, 0);
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 00000001 LINE_SMOOTH_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 LINE_STIPPLE_ENABLE */
++	xf_emit(ctx, 1, 0x00ffff00);	/* 00ffffff LINE_STIPPLE_PATTERN */
++	xf_emit(ctx, 1, 0x1a);		/* 0000001f POLYGON_MODE */
++	xf_emit(ctx, 1, 0);		/* 00000003 WINDOW_ORIGIN */
+ 	if (dev_priv->chipset != 0x50) {
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 1, 3);
++		xf_emit(ctx, 1, 3);	/* 00000003 tesla UNK1100 */
++		xf_emit(ctx, 1, 0);	/* 3ff */
+ 	}
++	/* XXX: the following block could belong either to unk1cxx, or
++	 * to STRMOUT. Rather hard to tell. */
+ 	if (dev_priv->chipset < 0xa0)
+-		xf_emit(ctx, 0x26, 0);
+-	else
+-		xf_emit(ctx, 0x3c, 0);
+-	xf_emit(ctx, 1, 0x102);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 4, 4);
+-	if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 8, 0);
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 1, 0x3ff);
++		xf_emit(ctx, 0x25, 0);
+ 	else
+-		xf_emit(ctx, 1, 0x7ff);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x102);
+-	xf_emit(ctx, 9, 0);
+-	xf_emit(ctx, 4, 4);
+-	xf_emit(ctx, 0x2c, 0);
++		xf_emit(ctx, 0x3b, 0);
++}
++
++static void
++nv50_graph_construct_gene_strmout(struct nouveau_grctx *ctx)
++{
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	xf_emit(ctx, 1, 0x102);		/* 0000ffff STRMOUT_BUFFER_CTRL */
++	xf_emit(ctx, 1, 0);		/* ffffffff STRMOUT_PRIMITIVE_COUNT */
++	xf_emit(ctx, 4, 4);		/* 000000ff STRMOUT_NUM_ATTRIBS */
++	if (dev_priv->chipset >= 0xa0) {
++		xf_emit(ctx, 4, 0);	/* ffffffff UNK1A8C */
++		xf_emit(ctx, 4, 0);	/* ffffffff UNK1780 */
++	}
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 4);		/* 0000007f VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	if (dev_priv->chipset == 0x50)
++		xf_emit(ctx, 1, 0x3ff);	/* 000003ff tesla UNK0D68 */
++	else
++		xf_emit(ctx, 1, 0x7ff);	/* 000007ff tesla UNK0D68 */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	/* SEEK */
++	xf_emit(ctx, 1, 0x102);		/* 0000ffff STRMOUT_BUFFER_CTRL */
++	xf_emit(ctx, 1, 0);		/* ffffffff STRMOUT_PRIMITIVE_COUNT */
++	xf_emit(ctx, 4, 0);		/* 000000ff STRMOUT_ADDRESS_HIGH */
++	xf_emit(ctx, 4, 0);		/* ffffffff STRMOUT_ADDRESS_LOW */
++	xf_emit(ctx, 4, 4);		/* 000000ff STRMOUT_NUM_ATTRIBS */
++	if (dev_priv->chipset >= 0xa0) {
++		xf_emit(ctx, 4, 0);	/* ffffffff UNK1A8C */
++		xf_emit(ctx, 4, 0);	/* ffffffff UNK1780 */
++	}
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_STRMOUT */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_QUERY */
++	xf_emit(ctx, 1, 0);		/* 000000ff QUERY_ADDRESS_HIGH */
++	xf_emit(ctx, 2, 0);		/* ffffffff QUERY_ADDRESS_LOW QUERY_COUNTER */
++	xf_emit(ctx, 2, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	/* SEEK */
++	xf_emit(ctx, 0x20, 0);		/* ffffffff STRMOUT_MAP */
++	xf_emit(ctx, 1, 0);		/* 0000000f */
++	xf_emit(ctx, 1, 0);		/* 00000000? */
++	xf_emit(ctx, 2, 0);		/* ffffffff */
++}
++
++static void
++nv50_graph_construct_gene_ropm1(struct nouveau_grctx *ctx)
++{
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	xf_emit(ctx, 1, 0x4e3bfdf);	/* ffffffff UNK0D64 */
++	xf_emit(ctx, 1, 0x4e3bfdf);	/* ffffffff UNK0DF4 */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	xf_emit(ctx, 1, 0);		/* 000003ff */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 0x11);	/* 000000ff tesla UNK1968 */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
++}
++
++static void
++nv50_graph_construct_gene_ropm2(struct nouveau_grctx *ctx)
++{
++	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_QUERY */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 2, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 000000ff QUERY_ADDRESS_HIGH */
++	xf_emit(ctx, 2, 0);		/* ffffffff QUERY_ADDRESS_LOW, COUNTER */
++	xf_emit(ctx, 1, 0);		/* 00000001 SAMPLECNT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 7 */
++	/* SEEK */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_QUERY */
++	xf_emit(ctx, 1, 0);		/* 000000ff QUERY_ADDRESS_HIGH */
++	xf_emit(ctx, 2, 0);		/* ffffffff QUERY_ADDRESS_LOW, COUNTER */
++	xf_emit(ctx, 1, 0x4e3bfdf);	/* ffffffff UNK0D64 */
++	xf_emit(ctx, 1, 0x4e3bfdf);	/* ffffffff UNK0DF4 */
++	xf_emit(ctx, 1, 0);		/* 00000001 eng2d UNK260 */
++	xf_emit(ctx, 1, 0);		/* ff/3ff */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 0x11);	/* 000000ff tesla UNK1968 */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
+ }
+ 
+ static void
+@@ -1749,443 +2392,709 @@
+ 	int magic2;
+ 	if (dev_priv->chipset == 0x50) {
+ 		magic2 = 0x00003e60;
+-	} else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa) {
++	} else if (!IS_NVA3F(dev_priv->chipset)) {
+ 		magic2 = 0x001ffe67;
+ 	} else {
+ 		magic2 = 0x00087e67;
+ 	}
+-	xf_emit(ctx, 8, 0);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, magic2);
+-	xf_emit(ctx, 4, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 7, 0);
+-	if (dev_priv->chipset >= 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 0x15);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x10);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 4, 0);
++	xf_emit(ctx, 1, 0);		/* f/7 MUTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_BACK_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_MASK */
++	xf_emit(ctx, 3, 0);		/* 00000007 STENCIL_BACK_OP_FAIL, ZFAIL, ZPASS */
++	xf_emit(ctx, 1, 2);		/* 00000003 tesla UNK143C */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, magic2);	/* 001fffff tesla UNK0F78 */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_BOUNDS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000007 DEPTH_TEST_FUNC */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_FRONT_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_MASK */
++	xf_emit(ctx, 3, 0);		/* 00000007 STENCIL_FRONT_OP_FAIL, ZFAIL, ZPASS */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	if (dev_priv->chipset >= 0xa0 && !IS_NVAAF(dev_priv->chipset))
++		xf_emit(ctx, 1, 0x15);	/* 000000ff */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_BACK_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK15B4 */
++	xf_emit(ctx, 1, 0x10);		/* 3ff/ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 1, 0);		/* ffffffff CLEAR_DEPTH */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
+ 	if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x92 || dev_priv->chipset == 0x98 || dev_priv->chipset >= 0xa0) {
+-		xf_emit(ctx, 1, 4);
+-		xf_emit(ctx, 1, 0x400);
+-		xf_emit(ctx, 1, 0x300);
+-		xf_emit(ctx, 1, 0x1001);
++		xf_emit(ctx, 3, 0);	/* ff, ffffffff, ffffffff */
++		xf_emit(ctx, 1, 4);	/* 7 */
++		xf_emit(ctx, 1, 0x400);	/* fffffff */
++		xf_emit(ctx, 1, 0x300);	/* ffff */
++		xf_emit(ctx, 1, 0x1001);	/* 1fff */
+ 		if (dev_priv->chipset != 0xa0) {
+-			if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-				xf_emit(ctx, 1, 0);
++			if (IS_NVA3F(dev_priv->chipset))
++				xf_emit(ctx, 1, 0);	/* 0000000f UNK15C8 */
+ 			else
+-				xf_emit(ctx, 1, 0x15);
++				xf_emit(ctx, 1, 0x15);	/* ff */
+ 		}
+-		xf_emit(ctx, 3, 0);
+ 	}
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 8, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x10);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0x13, 0);
+-	xf_emit(ctx, 1, 0x10);
+-	xf_emit(ctx, 0x10, 0);
+-	xf_emit(ctx, 0x10, 0x3f800000);
+-	xf_emit(ctx, 0x19, 0);
+-	xf_emit(ctx, 1, 0x10);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x3f);
+-	xf_emit(ctx, 6, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
++	xf_emit(ctx, 1, 0);		/* 00000007 MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_BACK_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, 2);		/* 00000003 tesla UNK143C */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_BOUNDS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000007 DEPTH_TEST_FUNC */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_FRONT_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_BACK_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK15B4 */
++	xf_emit(ctx, 1, 0x10);		/* 7f/ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1900 */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_BACK_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_FUNC_REF */
++	xf_emit(ctx, 2, 0);		/* ffffffff DEPTH_BOUNDS */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_BOUNDS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000007 DEPTH_TEST_FUNC */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 0000000f */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK0FB0 */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_FRONT_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_FUNC_REF */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_BACK_ENABLE */
++	xf_emit(ctx, 1, 0x10);		/* 7f/ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 0x10, 0);		/* ffffffff DEPTH_RANGE_NEAR */
++	xf_emit(ctx, 0x10, 0x3f800000);	/* ffffffff DEPTH_RANGE_FAR */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 0);		/* 00000007 MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_BACK_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_FUNC_REF */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_MASK */
++	xf_emit(ctx, 3, 0);		/* 00000007 STENCIL_BACK_OP_FAIL, ZFAIL, ZPASS */
++	xf_emit(ctx, 2, 0);		/* ffffffff DEPTH_BOUNDS */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_BOUNDS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000007 DEPTH_TEST_FUNC */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 000000ff CLEAR_STENCIL */
++	xf_emit(ctx, 1, 0);		/* 00000007 STENCIL_FRONT_FUNC_FUNC */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_FUNC_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_FUNC_REF */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_MASK */
++	xf_emit(ctx, 3, 0);		/* 00000007 STENCIL_FRONT_OP_FAIL, ZFAIL, ZPASS */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_BACK_ENABLE */
++	xf_emit(ctx, 1, 0x10);		/* 7f/ff VIEW_VOLUME_CLIP_CTRL */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 0x3f);		/* 0000003f UNK1590 */
++	xf_emit(ctx, 1, 0);		/* 00000007 MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 2, 0);		/* ffff0ff3, ffff */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK0FB0 */
++	xf_emit(ctx, 1, 0);		/* 0001ffff GP_BUILTIN_RESULT_EN */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK15B4 */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff CLEAR_DEPTH */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK19CC */
+ 	if (dev_priv->chipset >= 0xa0) {
+ 		xf_emit(ctx, 2, 0);
+ 		xf_emit(ctx, 1, 0x1001);
+ 		xf_emit(ctx, 0xb, 0);
+ 	} else {
+-		xf_emit(ctx, 0xc, 0);
++		xf_emit(ctx, 1, 0);	/* 00000007 */
++		xf_emit(ctx, 1, 0);	/* 00000001 tesla UNK1534 */
++		xf_emit(ctx, 1, 0);	/* 00000007 MULTISAMPLE_SAMPLES_LOG2 */
++		xf_emit(ctx, 8, 0);	/* 00000001 BLEND_ENABLE */
++		xf_emit(ctx, 1, 0);	/* ffff0ff3 */
++	}
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 7, 0);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 1, 0xf);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 7, 0);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f */
++	xf_emit(ctx, 1, 0);		/* 00000001 LOGIC_OP_ENABLE */
++	if (dev_priv->chipset != 0x50) {
++		xf_emit(ctx, 1, 0);	/* 0000000f LOGIC_OP */
++		xf_emit(ctx, 1, 0);	/* 000000ff */
+ 	}
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0xf);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 4, 0);
+-	else
+-		xf_emit(ctx, 6, 0);
+-	xf_emit(ctx, 3, 1);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, magic2);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x0fac6881);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 0x18, 1);
+-		xf_emit(ctx, 8, 2);
+-		xf_emit(ctx, 8, 1);
+-		xf_emit(ctx, 8, 2);
+-		xf_emit(ctx, 8, 1);
+-		xf_emit(ctx, 3, 0);
+-		xf_emit(ctx, 1, 1);
+-		xf_emit(ctx, 5, 0);
+-		xf_emit(ctx, 1, 1);
+-		xf_emit(ctx, 0x16, 0);
++	xf_emit(ctx, 1, 0);		/* 00000007 OPERATION */
++	xf_emit(ctx, 1, 0);		/* ff/3ff */
++	xf_emit(ctx, 1, 0);		/* 00000003 UNK0F90 */
++	xf_emit(ctx, 2, 1);		/* 00000007 BLEND_EQUATION_RGB, ALPHA */
++	xf_emit(ctx, 1, 1);		/* 00000001 UNK133C */
++	xf_emit(ctx, 1, 2);		/* 0000001f BLEND_FUNC_SRC_RGB */
++	xf_emit(ctx, 1, 1);		/* 0000001f BLEND_FUNC_DST_RGB */
++	xf_emit(ctx, 1, 2);		/* 0000001f BLEND_FUNC_SRC_ALPHA */
++	xf_emit(ctx, 1, 1);		/* 0000001f BLEND_FUNC_DST_ALPHA */
++	xf_emit(ctx, 1, 0);		/* 00000001 */
++	xf_emit(ctx, 1, magic2);	/* 001fffff tesla UNK0F78 */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 1, 0);	/* 00000001 tesla UNK12E4 */
++		xf_emit(ctx, 8, 1);	/* 00000007 IBLEND_EQUATION_RGB */
++		xf_emit(ctx, 8, 1);	/* 00000007 IBLEND_EQUATION_ALPHA */
++		xf_emit(ctx, 8, 1);	/* 00000001 IBLEND_UNK00 */
++		xf_emit(ctx, 8, 2);	/* 0000001f IBLEND_FUNC_SRC_RGB */
++		xf_emit(ctx, 8, 1);	/* 0000001f IBLEND_FUNC_DST_RGB */
++		xf_emit(ctx, 8, 2);	/* 0000001f IBLEND_FUNC_SRC_ALPHA */
++		xf_emit(ctx, 8, 1);	/* 0000001f IBLEND_FUNC_DST_ALPHA */
++		xf_emit(ctx, 1, 0);	/* 00000001 tesla UNK1140 */
++		xf_emit(ctx, 2, 0);	/* 00000001 */
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++		xf_emit(ctx, 1, 0);	/* 0000000f */
++		xf_emit(ctx, 1, 0);	/* 00000003 */
++		xf_emit(ctx, 1, 0);	/* ffffffff */
++		xf_emit(ctx, 2, 0);	/* 00000001 */
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++		xf_emit(ctx, 1, 0);	/* 00000001 */
++		xf_emit(ctx, 1, 0);	/* 000003ff */
++	} else if (dev_priv->chipset >= 0xa0) {
++		xf_emit(ctx, 2, 0);	/* 00000001 */
++		xf_emit(ctx, 1, 0);	/* 00000007 */
++		xf_emit(ctx, 1, 0);	/* 00000003 */
++		xf_emit(ctx, 1, 0);	/* ffffffff */
++		xf_emit(ctx, 2, 0);	/* 00000001 */
+ 	} else {
+-		if (dev_priv->chipset >= 0xa0)
+-			xf_emit(ctx, 0x1b, 0);
+-		else
+-			xf_emit(ctx, 0x15, 0);
+-	}
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 2, 1);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 2, 1);
++		xf_emit(ctx, 1, 0);	/* 00000007 MULTISAMPLE_SAMPLES_LOG2 */
++		xf_emit(ctx, 1, 0);	/* 00000003 tesla UNK1430 */
++		xf_emit(ctx, 1, 0);	/* ffffffff tesla UNK1A3C */
++	}
++	xf_emit(ctx, 4, 0);		/* ffffffff CLEAR_COLOR */
++	xf_emit(ctx, 4, 0);		/* ffffffff BLEND_COLOR A R G B */
++	xf_emit(ctx, 1, 0);		/* 00000fff eng2d UNK2B0 */
+ 	if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 4, 0);
+-	else
+-		xf_emit(ctx, 3, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		xf_emit(ctx, 0x10, 1);
+-		xf_emit(ctx, 8, 2);
+-		xf_emit(ctx, 0x10, 1);
+-		xf_emit(ctx, 8, 2);
+-		xf_emit(ctx, 8, 1);
+-		xf_emit(ctx, 3, 0);
+-	}
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0x5b, 0);
++		xf_emit(ctx, 2, 0);	/* 00000001 */
++	xf_emit(ctx, 1, 0);		/* 000003ff */
++	xf_emit(ctx, 8, 0);		/* 00000001 BLEND_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 00000001 UNK133C */
++	xf_emit(ctx, 1, 2);		/* 0000001f BLEND_FUNC_SRC_RGB */
++	xf_emit(ctx, 1, 1);		/* 0000001f BLEND_FUNC_DST_RGB */
++	xf_emit(ctx, 1, 1);		/* 00000007 BLEND_EQUATION_RGB */
++	xf_emit(ctx, 1, 2);		/* 0000001f BLEND_FUNC_SRC_ALPHA */
++	xf_emit(ctx, 1, 1);		/* 0000001f BLEND_FUNC_DST_ALPHA */
++	xf_emit(ctx, 1, 1);		/* 00000007 BLEND_EQUATION_ALPHA */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK19C0 */
++	xf_emit(ctx, 1, 0);		/* 00000001 LOGIC_OP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 0000000f LOGIC_OP */
++	if (dev_priv->chipset >= 0xa0)
++		xf_emit(ctx, 1, 0);	/* 00000001 UNK12E4? NVA3+ only? */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 8, 1);	/* 00000001 IBLEND_UNK00 */
++		xf_emit(ctx, 8, 1);	/* 00000007 IBLEND_EQUATION_RGB */
++		xf_emit(ctx, 8, 2);	/* 0000001f IBLEND_FUNC_SRC_RGB */
++		xf_emit(ctx, 8, 1);	/* 0000001f IBLEND_FUNC_DST_RGB */
++		xf_emit(ctx, 8, 1);	/* 00000007 IBLEND_EQUATION_ALPHA */
++		xf_emit(ctx, 8, 2);	/* 0000001f IBLEND_FUNC_SRC_ALPHA */
++		xf_emit(ctx, 8, 1);	/* 0000001f IBLEND_FUNC_DST_ALPHA */
++		xf_emit(ctx, 1, 0);	/* 00000001 tesla UNK15C4 */
++		xf_emit(ctx, 1, 0);	/* 00000001 */
++		xf_emit(ctx, 1, 0);	/* 00000001 tesla UNK1140 */
++	}
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f DST_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 DST_LINEAR */
++	xf_emit(ctx, 1, 0);		/* 00000007 PATTERN_COLOR_FORMAT */
++	xf_emit(ctx, 2, 0);		/* ffffffff PATTERN_MONO_COLOR */
++	xf_emit(ctx, 1, 0);		/* 00000001 PATTERN_MONO_FORMAT */
++	xf_emit(ctx, 2, 0);		/* ffffffff PATTERN_MONO_BITMAP */
++	xf_emit(ctx, 1, 0);		/* 00000003 PATTERN_SELECT */
++	xf_emit(ctx, 1, 0);		/* 000000ff ROP */
++	xf_emit(ctx, 1, 0);		/* ffffffff BETA1 */
++	xf_emit(ctx, 1, 0);		/* ffffffff BETA4 */
++	xf_emit(ctx, 1, 0);		/* 00000007 OPERATION */
++	xf_emit(ctx, 0x50, 0);		/* 10x ffffff, ffffff, ffffff, ffffff, 3 PATTERN */
+ }
+ 
+ static void
+-nv50_graph_construct_xfer_tp_x1(struct nouveau_grctx *ctx)
++nv50_graph_construct_xfer_unk84xx(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+ 	int magic3;
+-	if (dev_priv->chipset == 0x50)
++	switch (dev_priv->chipset) {
++	case 0x50:
+ 		magic3 = 0x1000;
+-	else if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x98 || dev_priv->chipset >= 0xa8)
++		break;
++	case 0x86:
++	case 0x98:
++	case 0xa8:
++	case 0xaa:
++	case 0xac:
++	case 0xaf:
+ 		magic3 = 0x1e00;
+-	else
++		break;
++	default:
+ 		magic3 = 0;
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 4);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 0x24, 0);
++	}
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 7f/ff[NVA0+] VP_REG_ALLOC_RESULT */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 0);		/* 111/113[NVA0+] */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 0x1f, 0);	/* ffffffff */
+ 	else if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 0x14, 0);
++		xf_emit(ctx, 0x0f, 0);	/* ffffffff */
+ 	else
+-		xf_emit(ctx, 0x15, 0);
+-	xf_emit(ctx, 2, 4);
++		xf_emit(ctx, 0x10, 0);	/* fffffff VP_RESULT_MAP_1 up */
++	xf_emit(ctx, 2, 0);		/* f/1f[NVA3], fffffff/ffffffff[NVA0+] */
++	xf_emit(ctx, 1, 4);		/* 7f/ff VP_REG_ALLOC_RESULT */
++	xf_emit(ctx, 1, 4);		/* 7f/ff VP_RESULT_MAP_SIZE */
+ 	if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 1, 0x03020100);
++		xf_emit(ctx, 1, 0x03020100);	/* ffffffff */
+ 	else
+-		xf_emit(ctx, 1, 0x00608080);
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 2, 4);
+-	xf_emit(ctx, 1, 0x80);
++		xf_emit(ctx, 1, 0x00608080);	/* fffffff VP_RESULT_MAP_0 */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 2, 0);		/* 111/113, 7f/ff */
++	xf_emit(ctx, 1, 4);		/* 7f/ff VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_REG_ALLOC_RESULT */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0x80);		/* 0000ffff GP_VERTEX_OUTPUT_COUNT */
+ 	if (magic3)
+-		xf_emit(ctx, 1, magic3);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 0x24, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0x80);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0x03020100);
+-	xf_emit(ctx, 1, 3);
++		xf_emit(ctx, 1, magic3);	/* 00007fff tesla UNK141C */
++	xf_emit(ctx, 1, 4);		/* 7f/ff VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 0);		/* 111/113 */
++	xf_emit(ctx, 0x1f, 0);		/* ffffffff GP_RESULT_MAP_1 up */
++	xf_emit(ctx, 1, 0);		/* 0000001f */
++	xf_emit(ctx, 1, 0);		/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_REG_ALLOC_RESULT */
++	xf_emit(ctx, 1, 0x80);		/* 0000ffff GP_VERTEX_OUTPUT_COUNT */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0x03020100);	/* ffffffff GP_RESULT_MAP_0 */
++	xf_emit(ctx, 1, 3);		/* 00000003 GP_OUTPUT_PRIMITIVE_TYPE */
+ 	if (magic3)
+-		xf_emit(ctx, 1, magic3);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 3);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 4);
++		xf_emit(ctx, 1, magic3);	/* 7fff tesla UNK141C */
++	xf_emit(ctx, 1, 4);		/* 7f/ff VP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 0);		/* 00000001 PROVOKING_VERTEX_LAST */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 0);		/* 111/113 */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 3);		/* 00000003 GP_OUTPUT_PRIMITIVE_TYPE */
++	xf_emit(ctx, 1, 0);		/* 00000001 PROVOKING_VERTEX_LAST */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 0);		/* 00000003 tesla UNK13A0 */
++	xf_emit(ctx, 1, 4);		/* 7f/ff VP_REG_ALLOC_RESULT */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++	xf_emit(ctx, 1, 0);		/* 111/113 */
+ 	if (dev_priv->chipset == 0x94 || dev_priv->chipset == 0x96)
+-		xf_emit(ctx, 0x1024, 0);
++		xf_emit(ctx, 0x1020, 0);	/* 4 x (0x400 x 0xffffffff, ff, 0, 0, 0, 4 x ffffffff) */
+ 	else if (dev_priv->chipset < 0xa0)
+-		xf_emit(ctx, 0xa24, 0);
+-	else if (dev_priv->chipset == 0xa0 || dev_priv->chipset >= 0xaa)
+-		xf_emit(ctx, 0x214, 0);
+-	else
+-		xf_emit(ctx, 0x414, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 3);
+-	xf_emit(ctx, 2, 0);
++		xf_emit(ctx, 0xa20, 0);	/* 4 x (0x280 x 0xffffffff, ff, 0, 0, 0, 4 x ffffffff) */
++	else if (!IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 0x210, 0);	/* ffffffff */
++	else
++		xf_emit(ctx, 0x410, 0);	/* ffffffff */
++	xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++	xf_emit(ctx, 1, 4);		/* 000000ff GP_RESULT_MAP_SIZE */
++	xf_emit(ctx, 1, 3);		/* 00000003 GP_OUTPUT_PRIMITIVE_TYPE */
++	xf_emit(ctx, 1, 0);		/* 00000001 PROVOKING_VERTEX_LAST */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
+ }
+ 
+ static void
+-nv50_graph_construct_xfer_tp_x2(struct nouveau_grctx *ctx)
++nv50_graph_construct_xfer_tprop(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+ 	int magic1, magic2;
+ 	if (dev_priv->chipset == 0x50) {
+ 		magic1 = 0x3ff;
+ 		magic2 = 0x00003e60;
+-	} else if (dev_priv->chipset <= 0xa0 || dev_priv->chipset >= 0xaa) {
++	} else if (!IS_NVA3F(dev_priv->chipset)) {
+ 		magic1 = 0x7ff;
+ 		magic2 = 0x001ffe67;
+ 	} else {
+ 		magic1 = 0x7ff;
+ 		magic2 = 0x00087e67;
+ 	}
+-	xf_emit(ctx, 3, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0xc, 0);
+-	xf_emit(ctx, 1, 0xf);
+-	xf_emit(ctx, 0xb, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 4, 0xffff);
+-	xf_emit(ctx, 8, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 5, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 2, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		xf_emit(ctx, 1, 3);
+-		xf_emit(ctx, 1, 0);
+-	} else if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0xa, 0);
+-	xf_emit(ctx, 2, 1);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 2, 1);
+-	xf_emit(ctx, 1, 2);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 0x18, 1);
+-		xf_emit(ctx, 8, 2);
+-		xf_emit(ctx, 8, 1);
+-		xf_emit(ctx, 8, 2);
+-		xf_emit(ctx, 8, 1);
+-		xf_emit(ctx, 1, 0);
+-	}
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x0fac6881);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 3, 0xcf);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0xa, 0);
+-	xf_emit(ctx, 2, 1);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 2, 1);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 8, 1);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x0fac6881);
+-	xf_emit(ctx, 1, 0xf);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, magic2);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 2, 1);
+-	else
+-		xf_emit(ctx, 1, 1);
++	xf_emit(ctx, 1, 0);		/* 00000007 ALPHA_TEST_FUNC */
++	xf_emit(ctx, 1, 0);		/* ffffffff ALPHA_TEST_REF */
++	xf_emit(ctx, 1, 0);		/* 00000001 ALPHA_TEST_ENABLE */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 1);	/* 0000000f UNK16A0 */
++	xf_emit(ctx, 1, 0);		/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_BACK_MASK */
++	xf_emit(ctx, 3, 0);		/* 00000007 STENCIL_BACK_OP_FAIL, ZFAIL, ZPASS */
++	xf_emit(ctx, 4, 0);		/* ffffffff BLEND_COLOR */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK19C0 */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK0FDC */
++	xf_emit(ctx, 1, 0xf);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 7, 0);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 LOGIC_OP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ff[NV50]/3ff[NV84+] */
++	xf_emit(ctx, 1, 4);		/* 00000007 FP_CONTROL */
++	xf_emit(ctx, 4, 0xffff);	/* 0000ffff MSAA_MASK */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_MASK */
++	xf_emit(ctx, 3, 0);		/* 00000007 STENCIL_FRONT_OP_FAIL, ZFAIL, ZPASS */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_BACK_ENABLE */
++	xf_emit(ctx, 2, 0);		/* 00007fff WINDOW_OFFSET_XY */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK19CC */
++	xf_emit(ctx, 1, 0);		/* 7 */
++	xf_emit(ctx, 1, 0);		/* 00000001 SAMPLECNT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff COLOR_KEY */
++	xf_emit(ctx, 1, 0);		/* 00000001 COLOR_KEY_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000007 COLOR_KEY_FORMAT */
++	xf_emit(ctx, 2, 0);		/* ffffffff SIFC_BITMAP_COLOR */
++	xf_emit(ctx, 1, 1);		/* 00000001 SIFC_BITMAP_WRITE_BIT0_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000007 ALPHA_TEST_FUNC */
++	xf_emit(ctx, 1, 0);		/* 00000001 ALPHA_TEST_ENABLE */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 1, 3);	/* 00000003 tesla UNK16B4 */
++		xf_emit(ctx, 1, 0);	/* 00000003 */
++		xf_emit(ctx, 1, 0);	/* 00000003 tesla UNK1298 */
++	} else if (dev_priv->chipset >= 0xa0) {
++		xf_emit(ctx, 1, 1);	/* 00000001 tesla UNK16B4 */
++		xf_emit(ctx, 1, 0);	/* 00000003 */
++	} else {
++		xf_emit(ctx, 1, 0);	/* 00000003 MULTISAMPLE_CTRL */
++	}
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 8, 0);		/* 00000001 BLEND_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 0000001f BLEND_FUNC_DST_ALPHA */
++	xf_emit(ctx, 1, 1);		/* 00000007 BLEND_EQUATION_ALPHA */
++	xf_emit(ctx, 1, 2);		/* 0000001f BLEND_FUNC_SRC_ALPHA */
++	xf_emit(ctx, 1, 1);		/* 0000001f BLEND_FUNC_DST_RGB */
++	xf_emit(ctx, 1, 1);		/* 00000007 BLEND_EQUATION_RGB */
++	xf_emit(ctx, 1, 2);		/* 0000001f BLEND_FUNC_SRC_RGB */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 1, 0);	/* 00000001 UNK12E4 */
++		xf_emit(ctx, 8, 1);	/* 00000007 IBLEND_EQUATION_RGB */
++		xf_emit(ctx, 8, 1);	/* 00000007 IBLEND_EQUATION_ALPHA */
++		xf_emit(ctx, 8, 1);	/* 00000001 IBLEND_UNK00 */
++		xf_emit(ctx, 8, 2);	/* 0000001f IBLEND_SRC_RGB */
++		xf_emit(ctx, 8, 1);	/* 0000001f IBLEND_DST_RGB */
++		xf_emit(ctx, 8, 2);	/* 0000001f IBLEND_SRC_ALPHA */
++		xf_emit(ctx, 8, 1);	/* 0000001f IBLEND_DST_ALPHA */
++		xf_emit(ctx, 1, 0);	/* 00000001 UNK1140 */
++	}
++	xf_emit(ctx, 1, 1);		/* 00000001 UNK133C */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 7, 0);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 1, 0);		/* 00000001 LOGIC_OP_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ff/3ff */
++	xf_emit(ctx, 1, 4);		/* 00000007 FP_CONTROL */
++	xf_emit(ctx, 1, 0);		/* 00000003 UNK0F90 */
++	xf_emit(ctx, 1, 0);		/* 00000001 FRAMEBUFFER_SRGB */
++	xf_emit(ctx, 1, 0);		/* 7 */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f DST_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 DST_LINEAR */
++	xf_emit(ctx, 1, 0);		/* 00000007 OPERATION */
++	xf_emit(ctx, 1, 0xcf);		/* 000000ff SIFC_FORMAT */
++	xf_emit(ctx, 1, 0xcf);		/* 000000ff DRAW_COLOR_FORMAT */
++	xf_emit(ctx, 1, 0xcf);		/* 000000ff SRC_FORMAT */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
++	xf_emit(ctx, 1, 0);		/* 7/f[NVA3] MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 8, 0);		/* 00000001 BLEND_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 0000001f BLEND_FUNC_DST_ALPHA */
++	xf_emit(ctx, 1, 1);		/* 00000007 BLEND_EQUATION_ALPHA */
++	xf_emit(ctx, 1, 2);		/* 0000001f BLEND_FUNC_SRC_ALPHA */
++	xf_emit(ctx, 1, 1);		/* 0000001f BLEND_FUNC_DST_RGB */
++	xf_emit(ctx, 1, 1);		/* 00000007 BLEND_EQUATION_RGB */
++	xf_emit(ctx, 1, 2);		/* 0000001f BLEND_FUNC_SRC_RGB */
++	xf_emit(ctx, 1, 1);		/* 00000001 UNK133C */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 8, 1);		/* 00000001 UNK19E0 */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 7, 0);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 1, 0xf);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 7, 0);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 1, magic2);	/* 001fffff tesla UNK0F78 */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_BOUNDS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f DST_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 DST_LINEAR */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
+ 	if(dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 1, 0);
+-	else
+-		xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 5, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x0fac6881);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, magic1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 2, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 0x28, 0);
+-	xf_emit(ctx, 8, 8);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x0fac6881);
+-	xf_emit(ctx, 8, 0x400);
+-	xf_emit(ctx, 8, 0x300);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0xf);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x20);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 1, 0x100);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x40);
+-	xf_emit(ctx, 1, 0x100);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 3);
+-	xf_emit(ctx, 4, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, magic2);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 1, 0x0fac6881);
+-	xf_emit(ctx, 9, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x400);
+-	xf_emit(ctx, 1, 0x300);
+-	xf_emit(ctx, 1, 0x1001);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 4, 0);
++		xf_emit(ctx, 1, 0);	/* ff */
+ 	else
+-		xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x0fac6881);
+-	xf_emit(ctx, 1, 0xf);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		xf_emit(ctx, 0x15, 0);
+-		xf_emit(ctx, 1, 1);
+-		xf_emit(ctx, 3, 0);
+-	} else
+-		xf_emit(ctx, 0x17, 0);
++		xf_emit(ctx, 3, 0);	/* 1, 7, 3ff */
++	xf_emit(ctx, 1, 4);		/* 00000007 FP_CONTROL */
++	xf_emit(ctx, 1, 0);		/* 00000003 UNK0F90 */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	xf_emit(ctx, 1, 0);		/* 00000001 SAMPLECNT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
++	xf_emit(ctx, 1, 0);		/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 7, 0);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_BOUNDS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f DST_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 DST_LINEAR */
++	xf_emit(ctx, 1, 0);		/* 000fffff BLIT_DU_DX_FRACT */
++	xf_emit(ctx, 1, 1);		/* 0001ffff BLIT_DU_DX_INT */
++	xf_emit(ctx, 1, 0);		/* 000fffff BLIT_DV_DY_FRACT */
++	xf_emit(ctx, 1, 1);		/* 0001ffff BLIT_DV_DY_INT */
++	xf_emit(ctx, 1, 0);		/* ff/3ff */
++	xf_emit(ctx, 1, magic1);	/* 3ff/7ff tesla UNK0D68 */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK15B4 */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++	xf_emit(ctx, 8, 0);		/* 0000ffff DMA_COLOR */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_GLOBAL */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_LOCAL */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_STACK */
++	xf_emit(ctx, 1, 0);		/* ff/3ff */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_DST */
++	xf_emit(ctx, 1, 0);		/* 7 */
++	xf_emit(ctx, 1, 0);		/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 8, 0);		/* 000000ff RT_ADDRESS_HIGH */
++	xf_emit(ctx, 8, 0);		/* ffffffff RT_LAYER_STRIDE */
++	xf_emit(ctx, 8, 0);		/* ffffffff RT_ADDRESS_LOW */
++	xf_emit(ctx, 8, 8);		/* 0000007f RT_TILE_MODE */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 7, 0);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 8, 0x400);		/* 0fffffff RT_HORIZ */
++	xf_emit(ctx, 8, 0x300);		/* 0000ffff RT_VERT */
++	xf_emit(ctx, 1, 1);		/* 00001fff RT_ARRAY_MODE */
++	xf_emit(ctx, 1, 0xf);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 7, 0);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 1, 0x20);		/* 00000fff DST_TILE_MODE */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f DST_FORMAT */
++	xf_emit(ctx, 1, 0x100);		/* 0001ffff DST_HEIGHT */
++	xf_emit(ctx, 1, 0);		/* 000007ff DST_LAYER */
++	xf_emit(ctx, 1, 1);		/* 00000001 DST_LINEAR */
++	xf_emit(ctx, 1, 0);		/* ffffffff DST_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0);		/* 000000ff DST_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0x40);		/* 0007ffff DST_PITCH */
++	xf_emit(ctx, 1, 0x100);		/* 0001ffff DST_WIDTH */
++	xf_emit(ctx, 1, 0);		/* 0000ffff */
++	xf_emit(ctx, 1, 3);		/* 00000003 tesla UNK15AC */
++	xf_emit(ctx, 1, 0);		/* ff/3ff */
++	xf_emit(ctx, 1, 0);		/* 0001ffff GP_BUILTIN_RESULT_EN */
++	xf_emit(ctx, 1, 0);		/* 00000003 UNK0F90 */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++	xf_emit(ctx, 1, magic2);	/* 001fffff tesla UNK0F78 */
++	xf_emit(ctx, 1, 0);		/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, 2);		/* 00000003 tesla UNK143C */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_ZETA */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_BOUNDS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 2, 0);		/* ffff, ff/3ff */
++	xf_emit(ctx, 1, 0);		/* 0001ffff GP_BUILTIN_RESULT_EN */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 000000ff STENCIL_FRONT_MASK */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK15B4 */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	xf_emit(ctx, 1, 0);		/* ffffffff ZETA_LAYER_STRIDE */
++	xf_emit(ctx, 1, 0);		/* 000000ff ZETA_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);		/* ffffffff ZETA_ADDRESS_LOW */
++	xf_emit(ctx, 1, 4);		/* 00000007 ZETA_TILE_MODE */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	xf_emit(ctx, 1, 0x400);		/* 0fffffff ZETA_HORIZ */
++	xf_emit(ctx, 1, 0x300);		/* 0000ffff ZETA_VERT */
++	xf_emit(ctx, 1, 0x1001);	/* 00001fff ZETA_ARRAY_MODE */
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
++	xf_emit(ctx, 1, 0);		/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 0);	/* 00000001 */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 7, 0);		/* 3f/7f RT_FORMAT */
++	xf_emit(ctx, 1, 0x0fac6881);	/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 1, 0xf);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 7, 0);		/* 0000000f COLOR_MASK */
++	xf_emit(ctx, 1, 0);		/* ff/3ff */
++	xf_emit(ctx, 8, 0);		/* 00000001 BLEND_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000003 UNK0F90 */
++	xf_emit(ctx, 1, 0);		/* 00000001 FRAMEBUFFER_SRGB */
++	xf_emit(ctx, 1, 0);		/* 7 */
++	xf_emit(ctx, 1, 0);		/* 00000001 LOGIC_OP_ENABLE */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 1, 0);	/* 00000001 UNK1140 */
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++	}
++	xf_emit(ctx, 1, 0);		/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK1534 */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
+ 	if (dev_priv->chipset >= 0xa0)
+-		xf_emit(ctx, 1, 0x0fac6881);
+-	xf_emit(ctx, 1, magic2);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 2, 1);
+-	xf_emit(ctx, 3, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 2, 1);
+-	else
+-		xf_emit(ctx, 1, 1);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 2, 0);
+-	else if (dev_priv->chipset != 0x50)
+-		xf_emit(ctx, 1, 0);
+-}
+-
+-static void
+-nv50_graph_construct_xfer_tp_x3(struct nouveau_grctx *ctx)
+-{
+-	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	if (dev_priv->chipset == 0x50)
+-		xf_emit(ctx, 2, 0);
+-	else
+-		xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0x2a712488);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x4085c000);
+-	xf_emit(ctx, 1, 0x40);
+-	xf_emit(ctx, 1, 0x100);
+-	xf_emit(ctx, 1, 0x10100);
+-	xf_emit(ctx, 1, 0x02800000);
++		xf_emit(ctx, 1, 0x0fac6881);	/* fffffff */
++	xf_emit(ctx, 1, magic2);	/* 001fffff tesla UNK0F78 */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_BOUNDS_EN */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE_ENABLE */
++	xf_emit(ctx, 1, 0x11);		/* 3f/7f DST_FORMAT */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK0FB0 */
++	xf_emit(ctx, 1, 0);		/* ff/3ff */
++	xf_emit(ctx, 1, 4);		/* 00000007 FP_CONTROL */
++	xf_emit(ctx, 1, 0);		/* 00000001 STENCIL_FRONT_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK15B4 */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK19CC */
++	xf_emit(ctx, 1, 0);		/* 00000007 */
++	xf_emit(ctx, 1, 0);		/* 00000001 SAMPLECNT_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 0000000f ZETA_FORMAT */
++	xf_emit(ctx, 1, 1);		/* 00000001 ZETA_ENABLE */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++		xf_emit(ctx, 1, 0);	/* 0000000f tesla UNK15C8 */
++	}
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A3C */
++	if (dev_priv->chipset >= 0xa0) {
++		xf_emit(ctx, 3, 0);		/* 7/f, 1, ffff0ff3 */
++		xf_emit(ctx, 1, 0xfac6881);	/* fffffff */
++		xf_emit(ctx, 4, 0);		/* 1, 1, 1, 3ff */
++		xf_emit(ctx, 1, 4);		/* 7 */
++		xf_emit(ctx, 1, 0);		/* 1 */
++		xf_emit(ctx, 2, 1);		/* 1 */
++		xf_emit(ctx, 2, 0);		/* 7, f */
++		xf_emit(ctx, 1, 1);		/* 1 */
++		xf_emit(ctx, 1, 0);		/* 7/f */
++		if (IS_NVA3F(dev_priv->chipset))
++			xf_emit(ctx, 0x9, 0);	/* 1 */
++		else
++			xf_emit(ctx, 0x8, 0);	/* 1 */
++		xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++		xf_emit(ctx, 8, 1);		/* 1 */
++		xf_emit(ctx, 1, 0x11);		/* 7f */
++		xf_emit(ctx, 7, 0);		/* 7f */
++		xf_emit(ctx, 1, 0xfac6881);	/* fffffff */
++		xf_emit(ctx, 1, 0xf);		/* f */
++		xf_emit(ctx, 7, 0);		/* f */
++		xf_emit(ctx, 1, 0x11);		/* 7f */
++		xf_emit(ctx, 1, 1);		/* 1 */
++		xf_emit(ctx, 5, 0);		/* 1, 7, 3ff, 3, 7 */
++		if (IS_NVA3F(dev_priv->chipset)) {
++			xf_emit(ctx, 1, 0);	/* 00000001 UNK1140 */
++			xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++		}
++	}
+ }
+ 
+ static void
+-nv50_graph_construct_xfer_tp_x4(struct nouveau_grctx *ctx)
++nv50_graph_construct_xfer_tex(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	xf_emit(ctx, 2, 0x04e3bfdf);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x00ffff00);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 2, 1);
+-	else
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 0x00ffff00);
+-	xf_emit(ctx, 8, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0x30201000);
+-	xf_emit(ctx, 1, 0x70605040);
+-	xf_emit(ctx, 1, 0xb8a89888);
+-	xf_emit(ctx, 1, 0xf8e8d8c8);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x1a);
++	xf_emit(ctx, 2, 0);		/* 1 LINKED_TSC. yes, 2. */
++	if (dev_priv->chipset != 0x50)
++		xf_emit(ctx, 1, 0);	/* 3 */
++	xf_emit(ctx, 1, 1);		/* 1ffff BLIT_DU_DX_INT */
++	xf_emit(ctx, 1, 0);		/* fffff BLIT_DU_DX_FRACT */
++	xf_emit(ctx, 1, 1);		/* 1ffff BLIT_DV_DY_INT */
++	xf_emit(ctx, 1, 0);		/* fffff BLIT_DV_DY_FRACT */
++	if (dev_priv->chipset == 0x50)
++		xf_emit(ctx, 1, 0);	/* 3 BLIT_CONTROL */
++	else
++		xf_emit(ctx, 2, 0);	/* 3ff, 1 */
++	xf_emit(ctx, 1, 0x2a712488);	/* ffffffff SRC_TIC_0 */
++	xf_emit(ctx, 1, 0);		/* ffffffff SRC_TIC_1 */
++	xf_emit(ctx, 1, 0x4085c000);	/* ffffffff SRC_TIC_2 */
++	xf_emit(ctx, 1, 0x40);		/* ffffffff SRC_TIC_3 */
++	xf_emit(ctx, 1, 0x100);		/* ffffffff SRC_TIC_4 */
++	xf_emit(ctx, 1, 0x10100);	/* ffffffff SRC_TIC_5 */
++	xf_emit(ctx, 1, 0x02800000);	/* ffffffff SRC_TIC_6 */
++	xf_emit(ctx, 1, 0);		/* ffffffff SRC_TIC_7 */
++	if (dev_priv->chipset == 0x50) {
++		xf_emit(ctx, 1, 0);	/* 00000001 turing UNK358 */
++		xf_emit(ctx, 1, 0);	/* ffffffff tesla UNK1A34? */
++		xf_emit(ctx, 1, 0);	/* 00000003 turing UNK37C tesla UNK1690 */
++		xf_emit(ctx, 1, 0);	/* 00000003 BLIT_CONTROL */
++		xf_emit(ctx, 1, 0);	/* 00000001 turing UNK32C tesla UNK0F94 */
++	} else if (!IS_NVAAF(dev_priv->chipset)) {
++		xf_emit(ctx, 1, 0);	/* ffffffff tesla UNK1A34? */
++		xf_emit(ctx, 1, 0);	/* 00000003 */
++		xf_emit(ctx, 1, 0);	/* 000003ff */
++		xf_emit(ctx, 1, 0);	/* 00000003 */
++		xf_emit(ctx, 1, 0);	/* 000003ff */
++		xf_emit(ctx, 1, 0);	/* 00000003 tesla UNK1664 / turing UNK03E8 */
++		xf_emit(ctx, 1, 0);	/* 00000003 */
++		xf_emit(ctx, 1, 0);	/* 000003ff */
++	} else {
++		xf_emit(ctx, 0x6, 0);
++	}
++	xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A34 */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_TEXTURE */
++	xf_emit(ctx, 1, 0);		/* 0000ffff DMA_SRC */
+ }
+ 
+ static void
+-nv50_graph_construct_xfer_tp_x5(struct nouveau_grctx *ctx)
++nv50_graph_construct_xfer_unk8cxx(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 0xfac6881);
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 2, 1);
+-	xf_emit(ctx, 2, 0);
+-	xf_emit(ctx, 1, 1);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 0xb, 0);
+-	else
+-		xf_emit(ctx, 0xa, 0);
+-	xf_emit(ctx, 8, 1);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0xfac6881);
+-	xf_emit(ctx, 1, 0xf);
+-	xf_emit(ctx, 7, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 1, 1);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		xf_emit(ctx, 6, 0);
+-		xf_emit(ctx, 1, 1);
+-		xf_emit(ctx, 6, 0);
+-	} else {
+-		xf_emit(ctx, 0xb, 0);
+-	}
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 2, 0);		/* 7, ffff0ff3 */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE */
++	xf_emit(ctx, 1, 0x04e3bfdf);	/* ffffffff UNK0D64 */
++	xf_emit(ctx, 1, 0x04e3bfdf);	/* ffffffff UNK0DF4 */
++	xf_emit(ctx, 1, 1);		/* 00000001 UNK15B4 */
++	xf_emit(ctx, 1, 0);		/* 00000001 LINE_STIPPLE_ENABLE */
++	xf_emit(ctx, 1, 0x00ffff00);	/* 00ffffff LINE_STIPPLE_PATTERN */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK0F98 */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
++	xf_emit(ctx, 1, 0);		/* 00000003 tesla UNK1668 */
++	xf_emit(ctx, 1, 0);		/* 00000001 LINE_STIPPLE_ENABLE */
++	xf_emit(ctx, 1, 0x00ffff00);	/* 00ffffff LINE_STIPPLE_PATTERN */
++	xf_emit(ctx, 1, 0);		/* 00000001 POLYGON_SMOOTH_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 UNK1534 */
++	xf_emit(ctx, 1, 0);		/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 1, 0);		/* 00000001 tesla UNK1658 */
++	xf_emit(ctx, 1, 0);		/* 00000001 LINE_SMOOTH_ENABLE */
++	xf_emit(ctx, 1, 0);		/* ffff0ff3 */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);		/* 00000001 DEPTH_WRITE */
++	xf_emit(ctx, 1, 1);		/* 00000001 UNK15B4 */
++	xf_emit(ctx, 1, 0);		/* 00000001 POINT_SPRITE_ENABLE */
++	xf_emit(ctx, 1, 1);		/* 00000001 tesla UNK165C */
++	xf_emit(ctx, 1, 0x30201000);	/* ffffffff tesla UNK1670 */
++	xf_emit(ctx, 1, 0x70605040);	/* ffffffff tesla UNK1670 */
++	xf_emit(ctx, 1, 0xb8a89888);	/* ffffffff tesla UNK1670 */
++	xf_emit(ctx, 1, 0xf8e8d8c8);	/* ffffffff tesla UNK1670 */
++	xf_emit(ctx, 1, 0);		/* 00000001 VERTEX_TWO_SIDE_ENABLE */
++	xf_emit(ctx, 1, 0x1a);		/* 0000001f POLYGON_MODE */
+ }
+ 
+ static void
+@@ -2193,108 +3102,136 @@
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+ 	if (dev_priv->chipset < 0xa0) {
+-		nv50_graph_construct_xfer_tp_x1(ctx);
+-		nv50_graph_construct_xfer_tp_x2(ctx);
+-		nv50_graph_construct_xfer_tp_x3(ctx);
+-		if (dev_priv->chipset == 0x50)
+-			xf_emit(ctx, 0xf, 0);
+-		else
+-			xf_emit(ctx, 0x12, 0);
+-		nv50_graph_construct_xfer_tp_x4(ctx);
++		nv50_graph_construct_xfer_unk84xx(ctx);
++		nv50_graph_construct_xfer_tprop(ctx);
++		nv50_graph_construct_xfer_tex(ctx);
++		nv50_graph_construct_xfer_unk8cxx(ctx);
+ 	} else {
+-		nv50_graph_construct_xfer_tp_x3(ctx);
+-		if (dev_priv->chipset < 0xaa)
+-			xf_emit(ctx, 0xc, 0);
+-		else
+-			xf_emit(ctx, 0xa, 0);
+-		nv50_graph_construct_xfer_tp_x2(ctx);
+-		nv50_graph_construct_xfer_tp_x5(ctx);
+-		nv50_graph_construct_xfer_tp_x4(ctx);
+-		nv50_graph_construct_xfer_tp_x1(ctx);
++		nv50_graph_construct_xfer_tex(ctx);
++		nv50_graph_construct_xfer_tprop(ctx);
++		nv50_graph_construct_xfer_unk8cxx(ctx);
++		nv50_graph_construct_xfer_unk84xx(ctx);
+ 	}
+ }
+ 
+ static void
+-nv50_graph_construct_xfer_tp2(struct nouveau_grctx *ctx)
++nv50_graph_construct_xfer_mpc(struct nouveau_grctx *ctx)
+ {
+ 	struct drm_nouveau_private *dev_priv = ctx->dev->dev_private;
+-	int i, mpcnt;
+-	if (dev_priv->chipset == 0x98 || dev_priv->chipset == 0xaa)
+-		mpcnt = 1;
+-	else if (dev_priv->chipset < 0xa0 || dev_priv->chipset >= 0xa8)
+-		mpcnt = 2;
+-	else
+-		mpcnt = 3;
++	int i, mpcnt = 2;
++	switch (dev_priv->chipset) {
++		case 0x98:
++		case 0xaa:
++			mpcnt = 1;
++			break;
++		case 0x50:
++		case 0x84:
++		case 0x86:
++		case 0x92:
++		case 0x94:
++		case 0x96:
++		case 0xa8:
++		case 0xac:
++			mpcnt = 2;
++			break;
++		case 0xa0:
++		case 0xa3:
++		case 0xa5:
++		case 0xaf:
++			mpcnt = 3;
++			break;
++	}
+ 	for (i = 0; i < mpcnt; i++) {
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 1, 0x80);
+-		xf_emit(ctx, 1, 0x80007004);
+-		xf_emit(ctx, 1, 0x04000400);
++		xf_emit(ctx, 1, 0);		/* ff */
++		xf_emit(ctx, 1, 0x80);		/* ffffffff tesla UNK1404 */
++		xf_emit(ctx, 1, 0x80007004);	/* ffffffff tesla UNK12B0 */
++		xf_emit(ctx, 1, 0x04000400);	/* ffffffff */
+ 		if (dev_priv->chipset >= 0xa0)
+-			xf_emit(ctx, 1, 0xc0);
+-		xf_emit(ctx, 1, 0x1000);
+-		xf_emit(ctx, 2, 0);
+-		if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x98 || dev_priv->chipset >= 0xa8) {
+-			xf_emit(ctx, 1, 0xe00);
+-			xf_emit(ctx, 1, 0x1e00);
++			xf_emit(ctx, 1, 0xc0);	/* 00007fff tesla UNK152C */
++		xf_emit(ctx, 1, 0x1000);	/* 0000ffff tesla UNK0D60 */
++		xf_emit(ctx, 1, 0);		/* ff/3ff */
++		xf_emit(ctx, 1, 0);		/* ffffffff tesla UNK1A30 */
++		if (dev_priv->chipset == 0x86 || dev_priv->chipset == 0x98 || dev_priv->chipset == 0xa8 || IS_NVAAF(dev_priv->chipset)) {
++			xf_emit(ctx, 1, 0xe00);		/* 7fff */
++			xf_emit(ctx, 1, 0x1e00);	/* 7fff */
+ 		}
+-		xf_emit(ctx, 1, 1);
+-		xf_emit(ctx, 2, 0);
++		xf_emit(ctx, 1, 1);		/* 000000ff VP_REG_ALLOC_TEMP */
++		xf_emit(ctx, 1, 0);		/* 00000001 LINKED_TSC */
++		xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
+ 		if (dev_priv->chipset == 0x50)
+-			xf_emit(ctx, 2, 0x1000);
+-		xf_emit(ctx, 1, 1);
+-		xf_emit(ctx, 1, 0);
+-		xf_emit(ctx, 1, 4);
+-		xf_emit(ctx, 1, 2);
+-		if (dev_priv->chipset >= 0xaa)
+-			xf_emit(ctx, 0xb, 0);
++			xf_emit(ctx, 2, 0x1000);	/* 7fff tesla UNK141C */
++		xf_emit(ctx, 1, 1);		/* 000000ff GP_REG_ALLOC_TEMP */
++		xf_emit(ctx, 1, 0);		/* 00000001 GP_ENABLE */
++		xf_emit(ctx, 1, 4);		/* 000000ff FP_REG_ALLOC_TEMP */
++		xf_emit(ctx, 1, 2);		/* 00000003 REG_MODE */
++		if (IS_NVAAF(dev_priv->chipset))
++			xf_emit(ctx, 0xb, 0);	/* RO */
+ 		else if (dev_priv->chipset >= 0xa0)
+-			xf_emit(ctx, 0xc, 0);
++			xf_emit(ctx, 0xc, 0);	/* RO */
+ 		else
+-			xf_emit(ctx, 0xa, 0);
++			xf_emit(ctx, 0xa, 0);	/* RO */
+ 	}
+-	xf_emit(ctx, 1, 0x08100c12);
+-	xf_emit(ctx, 1, 0);
++	xf_emit(ctx, 1, 0x08100c12);		/* 1fffffff FP_INTERPOLANT_CTRL */
++	xf_emit(ctx, 1, 0);			/* ff/3ff */
+ 	if (dev_priv->chipset >= 0xa0) {
+-		xf_emit(ctx, 1, 0x1fe21);
+-	}
+-	xf_emit(ctx, 5, 0);
+-	xf_emit(ctx, 4, 0xffff);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 2, 0x10001);
+-	xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 0x1fe21);
+-	xf_emit(ctx, 1, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 1);
+-	xf_emit(ctx, 4, 0);
+-	xf_emit(ctx, 1, 0x08100c12);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 8, 0);
+-	xf_emit(ctx, 1, 0xfac6881);
+-	xf_emit(ctx, 1, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa)
+-		xf_emit(ctx, 1, 3);
+-	xf_emit(ctx, 3, 0);
+-	xf_emit(ctx, 1, 4);
+-	xf_emit(ctx, 9, 0);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 2, 1);
+-	xf_emit(ctx, 1, 2);
+-	xf_emit(ctx, 3, 1);
+-	xf_emit(ctx, 1, 0);
+-	if (dev_priv->chipset > 0xa0 && dev_priv->chipset < 0xaa) {
+-		xf_emit(ctx, 8, 2);
+-		xf_emit(ctx, 0x10, 1);
+-		xf_emit(ctx, 8, 2);
+-		xf_emit(ctx, 0x18, 1);
+-		xf_emit(ctx, 3, 0);
++		xf_emit(ctx, 1, 0x1fe21);	/* 0003ffff tesla UNK0FAC */
+ 	}
+-	xf_emit(ctx, 1, 4);
++	xf_emit(ctx, 3, 0);			/* 7fff, 0, 0 */
++	xf_emit(ctx, 1, 0);			/* 00000001 tesla UNK1534 */
++	xf_emit(ctx, 1, 0);			/* 7/f MULTISAMPLE_SAMPLES_LOG2 */
++	xf_emit(ctx, 4, 0xffff);		/* 0000ffff MSAA_MASK */
++	xf_emit(ctx, 1, 1);			/* 00000001 LANES32 */
++	xf_emit(ctx, 1, 0x10001);		/* 00ffffff BLOCK_ALLOC */
++	xf_emit(ctx, 1, 0x10001);		/* ffffffff BLOCKDIM_XY */
++	xf_emit(ctx, 1, 1);			/* 0000ffff BLOCKDIM_Z */
++	xf_emit(ctx, 1, 0);			/* ffffffff SHARED_SIZE */
++	xf_emit(ctx, 1, 0x1fe21);		/* 1ffff/3ffff[NVA0+] tesla UNk0FAC */
++	xf_emit(ctx, 1, 0);			/* ffffffff tesla UNK1A34 */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 1);		/* 0000001f tesla UNK169C */
++	xf_emit(ctx, 1, 0);			/* ff/3ff */
++	xf_emit(ctx, 1, 0);			/* 1 LINKED_TSC */
++	xf_emit(ctx, 1, 0);			/* ff FP_ADDRESS_HIGH */
++	xf_emit(ctx, 1, 0);			/* ffffffff FP_ADDRESS_LOW */
++	xf_emit(ctx, 1, 0x08100c12);		/* 1fffffff FP_INTERPOLANT_CTRL */
++	xf_emit(ctx, 1, 4);			/* 00000007 FP_CONTROL */
++	xf_emit(ctx, 1, 0);			/* 000000ff FRAG_COLOR_CLAMP_EN */
++	xf_emit(ctx, 1, 2);			/* 00000003 REG_MODE */
++	xf_emit(ctx, 1, 0x11);			/* 0000007f RT_FORMAT */
++	xf_emit(ctx, 7, 0);			/* 0000007f RT_FORMAT */
++	xf_emit(ctx, 1, 0);			/* 00000007 */
++	xf_emit(ctx, 1, 0xfac6881);		/* 0fffffff RT_CONTROL */
++	xf_emit(ctx, 1, 0);			/* 00000003 MULTISAMPLE_CTRL */
++	if (IS_NVA3F(dev_priv->chipset))
++		xf_emit(ctx, 1, 3);		/* 00000003 tesla UNK16B4 */
++	xf_emit(ctx, 1, 0);			/* 00000001 ALPHA_TEST_ENABLE */
++	xf_emit(ctx, 1, 0);			/* 00000007 ALPHA_TEST_FUNC */
++	xf_emit(ctx, 1, 0);			/* 00000001 FRAMEBUFFER_SRGB */
++	xf_emit(ctx, 1, 4);			/* ffffffff tesla UNK1400 */
++	xf_emit(ctx, 8, 0);			/* 00000001 BLEND_ENABLE */
++	xf_emit(ctx, 1, 0);			/* 00000001 LOGIC_OP_ENABLE */
++	xf_emit(ctx, 1, 2);			/* 0000001f BLEND_FUNC_SRC_RGB */
++	xf_emit(ctx, 1, 1);			/* 0000001f BLEND_FUNC_DST_RGB */
++	xf_emit(ctx, 1, 1);			/* 00000007 BLEND_EQUATION_RGB */
++	xf_emit(ctx, 1, 2);			/* 0000001f BLEND_FUNC_SRC_ALPHA */
++	xf_emit(ctx, 1, 1);			/* 0000001f BLEND_FUNC_DST_ALPHA */
++	xf_emit(ctx, 1, 1);			/* 00000007 BLEND_EQUATION_ALPHA */
++	xf_emit(ctx, 1, 1);			/* 00000001 UNK133C */
++	if (IS_NVA3F(dev_priv->chipset)) {
++		xf_emit(ctx, 1, 0);		/* 00000001 UNK12E4 */
++		xf_emit(ctx, 8, 2);		/* 0000001f IBLEND_FUNC_SRC_RGB */
++		xf_emit(ctx, 8, 1);		/* 0000001f IBLEND_FUNC_DST_RGB */
++		xf_emit(ctx, 8, 1);		/* 00000007 IBLEND_EQUATION_RGB */
++		xf_emit(ctx, 8, 2);		/* 0000001f IBLEND_FUNC_SRC_ALPHA */
++		xf_emit(ctx, 8, 1);		/* 0000001f IBLEND_FUNC_DST_ALPHA */
++		xf_emit(ctx, 8, 1);		/* 00000007 IBLEND_EQUATION_ALPHA */
++		xf_emit(ctx, 8, 1);		/* 00000001 IBLEND_UNK00 */
++		xf_emit(ctx, 1, 0);		/* 00000003 tesla UNK1928 */
++		xf_emit(ctx, 1, 0);		/* 00000001 UNK1140 */
++	}
++	xf_emit(ctx, 1, 0);			/* 00000003 tesla UNK0F90 */
++	xf_emit(ctx, 1, 4);			/* 000000ff FP_RESULT_COUNT */
++	/* XXX: demagic this part some day */
+ 	if (dev_priv->chipset == 0x50)
+ 		xf_emit(ctx, 0x3a0, 0);
+ 	else if (dev_priv->chipset < 0x94)
+@@ -2303,9 +3240,9 @@
+ 		xf_emit(ctx, 0x39f, 0);
+ 	else
+ 		xf_emit(ctx, 0x3a3, 0);
+-	xf_emit(ctx, 1, 0x11);
+-	xf_emit(ctx, 1, 0);
+-	xf_emit(ctx, 1, 1);
++	xf_emit(ctx, 1, 0x11);			/* 3f/7f DST_FORMAT */
++	xf_emit(ctx, 1, 0);			/* 7 OPERATION */
++	xf_emit(ctx, 1, 1);			/* 1 DST_LINEAR */
+ 	xf_emit(ctx, 0x2d, 0);
+ }
+ 
+@@ -2323,52 +3260,56 @@
+ 	if (dev_priv->chipset < 0xa0) {
+ 		for (i = 0; i < 8; i++) {
+ 			ctx->ctxvals_pos = offset + i;
++			/* that little bugger belongs to csched. No idea
++			 * what it's doing here. */
+ 			if (i == 0)
+-				xf_emit(ctx, 1, 0x08100c12);
++				xf_emit(ctx, 1, 0x08100c12); /* FP_INTERPOLANT_CTRL */
+ 			if (units & (1 << i))
+-				nv50_graph_construct_xfer_tp2(ctx);
++				nv50_graph_construct_xfer_mpc(ctx);
+ 			if ((ctx->ctxvals_pos-offset)/8 > size)
+ 				size = (ctx->ctxvals_pos-offset)/8;
+ 		}
+ 	} else {
+ 		/* Strand 0: TPs 0, 1 */
+ 		ctx->ctxvals_pos = offset;
+-		xf_emit(ctx, 1, 0x08100c12);
++		/* that little bugger belongs to csched. No idea
++		 * what it's doing here. */
++		xf_emit(ctx, 1, 0x08100c12); /* FP_INTERPOLANT_CTRL */
+ 		if (units & (1 << 0))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if (units & (1 << 1))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+-		/* Strand 0: TPs 2, 3 */
++		/* Strand 1: TPs 2, 3 */
+ 		ctx->ctxvals_pos = offset + 1;
+ 		if (units & (1 << 2))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if (units & (1 << 3))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+-		/* Strand 0: TPs 4, 5, 6 */
++		/* Strand 2: TPs 4, 5, 6 */
+ 		ctx->ctxvals_pos = offset + 2;
+ 		if (units & (1 << 4))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if (units & (1 << 5))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if (units & (1 << 6))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 
+-		/* Strand 0: TPs 7, 8, 9 */
++		/* Strand 3: TPs 7, 8, 9 */
+ 		ctx->ctxvals_pos = offset + 3;
+ 		if (units & (1 << 7))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if (units & (1 << 8))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if (units & (1 << 9))
+-			nv50_graph_construct_xfer_tp2(ctx);
++			nv50_graph_construct_xfer_mpc(ctx);
+ 		if ((ctx->ctxvals_pos-offset)/8 > size)
+ 			size = (ctx->ctxvals_pos-offset)/8;
+ 	}
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_instmem.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_instmem.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_instmem.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_instmem.c	2010-10-15 02:04:44.451993326 +0200
+@@ -32,39 +32,87 @@
+ struct nv50_instmem_priv {
+ 	uint32_t save1700[5]; /* 0x1700->0x1710 */
+ 
+-	struct nouveau_gpuobj_ref *pramin_pt;
+-	struct nouveau_gpuobj_ref *pramin_bar;
+-	struct nouveau_gpuobj_ref *fb_bar;
++	struct nouveau_gpuobj *pramin_pt;
++	struct nouveau_gpuobj *pramin_bar;
++	struct nouveau_gpuobj *fb_bar;
+ };
+ 
+-#define NV50_INSTMEM_PAGE_SHIFT 12
+-#define NV50_INSTMEM_PAGE_SIZE  (1 << NV50_INSTMEM_PAGE_SHIFT)
+-#define NV50_INSTMEM_PT_SIZE(a)	(((a) >> 12) << 3)
+-
+-/*NOTE: - Assumes 0x1700 already covers the correct MiB of PRAMIN
+- */
+-#define BAR0_WI32(g, o, v) do {                                   \
+-	uint32_t offset;                                          \
+-	if ((g)->im_backing) {                                    \
+-		offset = (g)->im_backing_start;                   \
+-	} else {                                                  \
+-		offset  = chan->ramin->gpuobj->im_backing_start;  \
+-		offset += (g)->im_pramin->start;                  \
+-	}                                                         \
+-	offset += (o);                                            \
+-	nv_wr32(dev, NV_RAMIN + (offset & 0xfffff), (v));              \
+-} while (0)
++static void
++nv50_channel_del(struct nouveau_channel **pchan)
++{
++	struct nouveau_channel *chan;
++
++	chan = *pchan;
++	*pchan = NULL;
++	if (!chan)
++		return;
++
++	nouveau_gpuobj_ref(NULL, &chan->ramfc);
++	nouveau_gpuobj_ref(NULL, &chan->vm_pd);
++	if (chan->ramin_heap.free_stack.next)
++		drm_mm_takedown(&chan->ramin_heap);
++	nouveau_gpuobj_ref(NULL, &chan->ramin);
++	kfree(chan);
++}
++
++static int
++nv50_channel_new(struct drm_device *dev, u32 size,
++		 struct nouveau_channel **pchan)
++{
++	struct drm_nouveau_private *dev_priv = dev->dev_private;
++	u32 pgd = (dev_priv->chipset == 0x50) ? 0x1400 : 0x0200;
++	u32  fc = (dev_priv->chipset == 0x50) ? 0x0000 : 0x4200;
++	struct nouveau_channel *chan;
++	int ret;
++
++	chan = kzalloc(sizeof(*chan), GFP_KERNEL);
++	if (!chan)
++		return -ENOMEM;
++	chan->dev = dev;
++
++	ret = nouveau_gpuobj_new(dev, NULL, size, 0x1000, 0, &chan->ramin);
++	if (ret) {
++		nv50_channel_del(&chan);
++		return ret;
++	}
++
++	ret = drm_mm_init(&chan->ramin_heap, 0x6000, chan->ramin->size);
++	if (ret) {
++		nv50_channel_del(&chan);
++		return ret;
++	}
++
++	ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst == ~0 ? ~0 :
++				      chan->ramin->pinst + pgd,
++				      chan->ramin->vinst + pgd,
++				      0x4000, NVOBJ_FLAG_ZERO_ALLOC,
++				      &chan->vm_pd);
++	if (ret) {
++		nv50_channel_del(&chan);
++		return ret;
++	}
++
++	ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst == ~0 ? ~0 :
++				      chan->ramin->pinst + fc,
++				      chan->ramin->vinst + fc, 0x100,
++				      NVOBJ_FLAG_ZERO_ALLOC, &chan->ramfc);
++	if (ret) {
++		nv50_channel_del(&chan);
++		return ret;
++	}
++
++	*pchan = chan;
++	return 0;
++}
+ 
+ int
+ nv50_instmem_init(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_channel *chan;
+-	uint32_t c_offset, c_size, c_ramfc, c_vmpd, c_base, pt_size;
+-	uint32_t save_nv001700;
+-	uint64_t v;
+ 	struct nv50_instmem_priv *priv;
++	struct nouveau_channel *chan;
+ 	int ret, i;
++	u32 tmp;
+ 
+ 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ 	if (!priv)
+@@ -75,212 +123,115 @@
+ 	for (i = 0x1700; i <= 0x1710; i += 4)
+ 		priv->save1700[(i-0x1700)/4] = nv_rd32(dev, i);
+ 
+-	/* Reserve the last MiB of VRAM, we should probably try to avoid
+-	 * setting up the below tables over the top of the VBIOS image at
+-	 * some point.
+-	 */
+-	dev_priv->ramin_rsvd_vram = 1 << 20;
+-	c_offset = dev_priv->vram_size - dev_priv->ramin_rsvd_vram;
+-	c_size   = 128 << 10;
+-	c_vmpd   = ((dev_priv->chipset & 0xf0) == 0x50) ? 0x1400 : 0x200;
+-	c_ramfc  = ((dev_priv->chipset & 0xf0) == 0x50) ? 0x0 : 0x20;
+-	c_base   = c_vmpd + 0x4000;
+-	pt_size  = NV50_INSTMEM_PT_SIZE(dev_priv->ramin_size);
+-
+-	NV_DEBUG(dev, " Rsvd VRAM base: 0x%08x\n", c_offset);
+-	NV_DEBUG(dev, "    VBIOS image: 0x%08x\n",
+-				(nv_rd32(dev, 0x619f04) & ~0xff) << 8);
+-	NV_DEBUG(dev, "  Aperture size: %d MiB\n", dev_priv->ramin_size >> 20);
+-	NV_DEBUG(dev, "        PT size: %d KiB\n", pt_size >> 10);
+-
+-	/* Determine VM layout, we need to do this first to make sure
+-	 * we allocate enough memory for all the page tables.
+-	 */
+-	dev_priv->vm_gart_base = roundup(NV50_VM_BLOCK, NV50_VM_BLOCK);
+-	dev_priv->vm_gart_size = NV50_VM_BLOCK;
+-
+-	dev_priv->vm_vram_base = dev_priv->vm_gart_base + dev_priv->vm_gart_size;
+-	dev_priv->vm_vram_size = dev_priv->vram_size;
+-	if (dev_priv->vm_vram_size > NV50_VM_MAX_VRAM)
+-		dev_priv->vm_vram_size = NV50_VM_MAX_VRAM;
+-	dev_priv->vm_vram_size = roundup(dev_priv->vm_vram_size, NV50_VM_BLOCK);
+-	dev_priv->vm_vram_pt_nr = dev_priv->vm_vram_size / NV50_VM_BLOCK;
+-
+-	dev_priv->vm_end = dev_priv->vm_vram_base + dev_priv->vm_vram_size;
+-
+-	NV_DEBUG(dev, "NV50VM: GART 0x%016llx-0x%016llx\n",
+-		 dev_priv->vm_gart_base,
+-		 dev_priv->vm_gart_base + dev_priv->vm_gart_size - 1);
+-	NV_DEBUG(dev, "NV50VM: VRAM 0x%016llx-0x%016llx\n",
+-		 dev_priv->vm_vram_base,
+-		 dev_priv->vm_vram_base + dev_priv->vm_vram_size - 1);
+-
+-	c_size += dev_priv->vm_vram_pt_nr * (NV50_VM_BLOCK / 65536 * 8);
+-
+-	/* Map BAR0 PRAMIN aperture over the memory we want to use */
+-	save_nv001700 = nv_rd32(dev, NV50_PUNK_BAR0_PRAMIN);
+-	nv_wr32(dev, NV50_PUNK_BAR0_PRAMIN, (c_offset >> 16));
+-
+-	/* Create a fake channel, and use it as our "dummy" channels 0/127.
+-	 * The main reason for creating a channel is so we can use the gpuobj
+-	 * code.  However, it's probably worth noting that NVIDIA also setup
+-	 * their channels 0/127 with the same values they configure here.
+-	 * So, there may be some other reason for doing this.
+-	 *
+-	 * Have to create the entire channel manually, as the real channel
+-	 * creation code assumes we have PRAMIN access, and we don't until
+-	 * we're done here.
+-	 */
+-	chan = kzalloc(sizeof(*chan), GFP_KERNEL);
+-	if (!chan)
++	/* Global PRAMIN heap */
++	ret = drm_mm_init(&dev_priv->ramin_heap, 0, dev_priv->ramin_size);
++	if (ret) {
++		NV_ERROR(dev, "Failed to init RAMIN heap\n");
+ 		return -ENOMEM;
+-	chan->id = 0;
+-	chan->dev = dev;
+-	chan->file_priv = (struct drm_file *)-2;
+-	dev_priv->fifos[0] = dev_priv->fifos[127] = chan;
+-
+-	INIT_LIST_HEAD(&chan->ramht_refs);
++	}
+ 
+-	/* Channel's PRAMIN object + heap */
+-	ret = nouveau_gpuobj_new_fake(dev, 0, c_offset, c_size, 0,
+-							NULL, &chan->ramin);
++	/* we need a channel to plug into the hw to control the BARs */
++	ret = nv50_channel_new(dev, 128*1024, &dev_priv->channels.ptr[0]);
+ 	if (ret)
+ 		return ret;
++	chan = dev_priv->channels.ptr[127] = dev_priv->channels.ptr[0];
+ 
+-	if (drm_mm_init(&chan->ramin_heap, c_base, c_size - c_base))
+-		return -ENOMEM;
+-
+-	/* RAMFC + zero channel's PRAMIN up to start of VM pagedir */
+-	ret = nouveau_gpuobj_new_fake(dev, c_ramfc, c_offset + c_ramfc,
+-						0x4000, 0, NULL, &chan->ramfc);
++	/* allocate page table for PRAMIN BAR */
++	ret = nouveau_gpuobj_new(dev, chan, (dev_priv->ramin_size >> 12) * 8,
++				 0x1000, NVOBJ_FLAG_ZERO_ALLOC,
++				 &priv->pramin_pt);
+ 	if (ret)
+ 		return ret;
+ 
+-	for (i = 0; i < c_vmpd; i += 4)
+-		BAR0_WI32(chan->ramin->gpuobj, i, 0);
++	nv_wo32(chan->vm_pd, 0x0000, priv->pramin_pt->vinst | 0x63);
++	nv_wo32(chan->vm_pd, 0x0004, 0);
+ 
+-	/* VM page directory */
+-	ret = nouveau_gpuobj_new_fake(dev, c_vmpd, c_offset + c_vmpd,
+-					   0x4000, 0, &chan->vm_pd, NULL);
++	/* DMA object for PRAMIN BAR */
++	ret = nouveau_gpuobj_new(dev, chan, 6*4, 16, 0, &priv->pramin_bar);
+ 	if (ret)
+ 		return ret;
+-	for (i = 0; i < 0x4000; i += 8) {
+-		BAR0_WI32(chan->vm_pd, i + 0x00, 0x00000000);
+-		BAR0_WI32(chan->vm_pd, i + 0x04, 0x00000000);
+-	}
++	nv_wo32(priv->pramin_bar, 0x00, 0x7fc00000);
++	nv_wo32(priv->pramin_bar, 0x04, dev_priv->ramin_size - 1);
++	nv_wo32(priv->pramin_bar, 0x08, 0x00000000);
++	nv_wo32(priv->pramin_bar, 0x0c, 0x00000000);
++	nv_wo32(priv->pramin_bar, 0x10, 0x00000000);
++	nv_wo32(priv->pramin_bar, 0x14, 0x00000000);
+ 
+-	/* PRAMIN page table, cheat and map into VM at 0x0000000000.
+-	 * We map the entire fake channel into the start of the PRAMIN BAR
+-	 */
+-	ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, pt_size, 0x1000,
+-				     0, &priv->pramin_pt);
++	/* map channel into PRAMIN, gpuobj didn't do it for us */
++	ret = nv50_instmem_bind(dev, chan->ramin);
+ 	if (ret)
+ 		return ret;
+ 
+-	v = c_offset | 1;
+-	if (dev_priv->vram_sys_base) {
+-		v += dev_priv->vram_sys_base;
+-		v |= 0x30;
++	/* poke regs... */
++	nv_wr32(dev, 0x001704, 0x00000000 | (chan->ramin->vinst >> 12));
++	nv_wr32(dev, 0x001704, 0x40000000 | (chan->ramin->vinst >> 12));
++	nv_wr32(dev, 0x00170c, 0x80000000 | (priv->pramin_bar->cinst >> 4));
++
++	tmp = nv_ri32(dev, 0);
++	nv_wi32(dev, 0, ~tmp);
++	if (nv_ri32(dev, 0) != ~tmp) {
++		NV_ERROR(dev, "PRAMIN readback failed\n");
++		return -EIO;
+ 	}
++	nv_wi32(dev, 0, tmp);
+ 
+-	i = 0;
+-	while (v < dev_priv->vram_sys_base + c_offset + c_size) {
+-		BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, lower_32_bits(v));
+-		BAR0_WI32(priv->pramin_pt->gpuobj, i + 4, upper_32_bits(v));
+-		v += 0x1000;
+-		i += 8;
+-	}
++	dev_priv->ramin_available = true;
+ 
+-	while (i < pt_size) {
+-		BAR0_WI32(priv->pramin_pt->gpuobj, i + 0, 0x00000000);
+-		BAR0_WI32(priv->pramin_pt->gpuobj, i + 4, 0x00000000);
+-		i += 8;
+-	}
++	/* Determine VM layout */
++	dev_priv->vm_gart_base = roundup(NV50_VM_BLOCK, NV50_VM_BLOCK);
++	dev_priv->vm_gart_size = NV50_VM_BLOCK;
++
++	dev_priv->vm_vram_base = dev_priv->vm_gart_base + dev_priv->vm_gart_size;
++	dev_priv->vm_vram_size = dev_priv->vram_size;
++	if (dev_priv->vm_vram_size > NV50_VM_MAX_VRAM)
++		dev_priv->vm_vram_size = NV50_VM_MAX_VRAM;
++	dev_priv->vm_vram_size = roundup(dev_priv->vm_vram_size, NV50_VM_BLOCK);
++	dev_priv->vm_vram_pt_nr = dev_priv->vm_vram_size / NV50_VM_BLOCK;
+ 
+-	BAR0_WI32(chan->vm_pd, 0x00, priv->pramin_pt->instance | 0x63);
+-	BAR0_WI32(chan->vm_pd, 0x04, 0x00000000);
++	dev_priv->vm_end = dev_priv->vm_vram_base + dev_priv->vm_vram_size;
++
++	NV_DEBUG(dev, "NV50VM: GART 0x%016llx-0x%016llx\n",
++		 dev_priv->vm_gart_base,
++		 dev_priv->vm_gart_base + dev_priv->vm_gart_size - 1);
++	NV_DEBUG(dev, "NV50VM: VRAM 0x%016llx-0x%016llx\n",
++		 dev_priv->vm_vram_base,
++		 dev_priv->vm_vram_base + dev_priv->vm_vram_size - 1);
+ 
+ 	/* VRAM page table(s), mapped into VM at +1GiB  */
+ 	for (i = 0; i < dev_priv->vm_vram_pt_nr; i++) {
+-		ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0,
+-					     NV50_VM_BLOCK/65536*8, 0, 0,
+-					     &chan->vm_vram_pt[i]);
++		ret = nouveau_gpuobj_new(dev, NULL, NV50_VM_BLOCK / 0x10000 * 8,
++					 0, NVOBJ_FLAG_ZERO_ALLOC,
++					 &chan->vm_vram_pt[i]);
+ 		if (ret) {
+-			NV_ERROR(dev, "Error creating VRAM page tables: %d\n",
+-									ret);
++			NV_ERROR(dev, "Error creating VRAM PGT: %d\n", ret);
+ 			dev_priv->vm_vram_pt_nr = i;
+ 			return ret;
+ 		}
+-		dev_priv->vm_vram_pt[i] = chan->vm_vram_pt[i]->gpuobj;
++		dev_priv->vm_vram_pt[i] = chan->vm_vram_pt[i];
+ 
+-		for (v = 0; v < dev_priv->vm_vram_pt[i]->im_pramin->size;
+-								v += 4)
+-			BAR0_WI32(dev_priv->vm_vram_pt[i], v, 0);
+-
+-		BAR0_WI32(chan->vm_pd, 0x10 + (i*8),
+-			  chan->vm_vram_pt[i]->instance | 0x61);
+-		BAR0_WI32(chan->vm_pd, 0x14 + (i*8), 0);
++		nv_wo32(chan->vm_pd, 0x10 + (i*8),
++			chan->vm_vram_pt[i]->vinst | 0x61);
++		nv_wo32(chan->vm_pd, 0x14 + (i*8), 0);
+ 	}
+ 
+-	/* DMA object for PRAMIN BAR */
+-	ret = nouveau_gpuobj_new_ref(dev, chan, chan, 0, 6*4, 16, 0,
+-							&priv->pramin_bar);
+-	if (ret)
+-		return ret;
+-	BAR0_WI32(priv->pramin_bar->gpuobj, 0x00, 0x7fc00000);
+-	BAR0_WI32(priv->pramin_bar->gpuobj, 0x04, dev_priv->ramin_size - 1);
+-	BAR0_WI32(priv->pramin_bar->gpuobj, 0x08, 0x00000000);
+-	BAR0_WI32(priv->pramin_bar->gpuobj, 0x0c, 0x00000000);
+-	BAR0_WI32(priv->pramin_bar->gpuobj, 0x10, 0x00000000);
+-	BAR0_WI32(priv->pramin_bar->gpuobj, 0x14, 0x00000000);
+-
+ 	/* DMA object for FB BAR */
+-	ret = nouveau_gpuobj_new_ref(dev, chan, chan, 0, 6*4, 16, 0,
+-							&priv->fb_bar);
++	ret = nouveau_gpuobj_new(dev, chan, 6*4, 16, 0, &priv->fb_bar);
+ 	if (ret)
+ 		return ret;
+-	BAR0_WI32(priv->fb_bar->gpuobj, 0x00, 0x7fc00000);
+-	BAR0_WI32(priv->fb_bar->gpuobj, 0x04, 0x40000000 +
+-					      pci_resource_len(dev->pdev, 1) - 1);
+-	BAR0_WI32(priv->fb_bar->gpuobj, 0x08, 0x40000000);
+-	BAR0_WI32(priv->fb_bar->gpuobj, 0x0c, 0x00000000);
+-	BAR0_WI32(priv->fb_bar->gpuobj, 0x10, 0x00000000);
+-	BAR0_WI32(priv->fb_bar->gpuobj, 0x14, 0x00000000);
++	nv_wo32(priv->fb_bar, 0x00, 0x7fc00000);
++	nv_wo32(priv->fb_bar, 0x04, 0x40000000 +
++				    pci_resource_len(dev->pdev, 1) - 1);
++	nv_wo32(priv->fb_bar, 0x08, 0x40000000);
++	nv_wo32(priv->fb_bar, 0x0c, 0x00000000);
++	nv_wo32(priv->fb_bar, 0x10, 0x00000000);
++	nv_wo32(priv->fb_bar, 0x14, 0x00000000);
+ 
+-	/* Poke the relevant regs, and pray it works :) */
+-	nv_wr32(dev, NV50_PUNK_BAR_CFG_BASE, (chan->ramin->instance >> 12));
+-	nv_wr32(dev, NV50_PUNK_UNK1710, 0);
+-	nv_wr32(dev, NV50_PUNK_BAR_CFG_BASE, (chan->ramin->instance >> 12) |
+-					 NV50_PUNK_BAR_CFG_BASE_VALID);
+-	nv_wr32(dev, NV50_PUNK_BAR1_CTXDMA, (priv->fb_bar->instance >> 4) |
+-					NV50_PUNK_BAR1_CTXDMA_VALID);
+-	nv_wr32(dev, NV50_PUNK_BAR3_CTXDMA, (priv->pramin_bar->instance >> 4) |
+-					NV50_PUNK_BAR3_CTXDMA_VALID);
++	dev_priv->engine.instmem.flush(dev);
+ 
++	nv_wr32(dev, 0x001708, 0x80000000 | (priv->fb_bar->cinst >> 4));
+ 	for (i = 0; i < 8; i++)
+ 		nv_wr32(dev, 0x1900 + (i*4), 0);
+ 
+-	/* Assume that praying isn't enough, check that we can re-read the
+-	 * entire fake channel back from the PRAMIN BAR */
+-	for (i = 0; i < c_size; i += 4) {
+-		if (nv_rd32(dev, NV_RAMIN + i) != nv_ri32(dev, i)) {
+-			NV_ERROR(dev, "Error reading back PRAMIN at 0x%08x\n",
+-									i);
+-			return -EINVAL;
+-		}
+-	}
+-
+-	nv_wr32(dev, NV50_PUNK_BAR0_PRAMIN, save_nv001700);
+-
+-	/* Global PRAMIN heap */
+-	if (drm_mm_init(&dev_priv->ramin_heap, c_size, dev_priv->ramin_size - c_size)) {
+-		NV_ERROR(dev, "Failed to init RAMIN heap\n");
+-	}
+-
+-	/*XXX: incorrect, but needed to make hash func "work" */
+-	dev_priv->ramht_offset = 0x10000;
+-	dev_priv->ramht_bits   = 9;
+-	dev_priv->ramht_size   = (1 << dev_priv->ramht_bits) * 8;
+ 	return 0;
+ }
+ 
+@@ -289,7 +240,7 @@
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nv50_instmem_priv *priv = dev_priv->engine.instmem.priv;
+-	struct nouveau_channel *chan = dev_priv->fifos[0];
++	struct nouveau_channel *chan = dev_priv->channels.ptr[0];
+ 	int i;
+ 
+ 	NV_DEBUG(dev, "\n");
+@@ -297,29 +248,24 @@
+ 	if (!priv)
+ 		return;
+ 
++	dev_priv->ramin_available = false;
++
+ 	/* Restore state from before init */
+ 	for (i = 0x1700; i <= 0x1710; i += 4)
+ 		nv_wr32(dev, i, priv->save1700[(i - 0x1700) / 4]);
+ 
+-	nouveau_gpuobj_ref_del(dev, &priv->fb_bar);
+-	nouveau_gpuobj_ref_del(dev, &priv->pramin_bar);
+-	nouveau_gpuobj_ref_del(dev, &priv->pramin_pt);
++	nouveau_gpuobj_ref(NULL, &priv->fb_bar);
++	nouveau_gpuobj_ref(NULL, &priv->pramin_bar);
++	nouveau_gpuobj_ref(NULL, &priv->pramin_pt);
+ 
+ 	/* Destroy dummy channel */
+ 	if (chan) {
+-		for (i = 0; i < dev_priv->vm_vram_pt_nr; i++) {
+-			nouveau_gpuobj_ref_del(dev, &chan->vm_vram_pt[i]);
+-			dev_priv->vm_vram_pt[i] = NULL;
+-		}
++		for (i = 0; i < dev_priv->vm_vram_pt_nr; i++)
++			nouveau_gpuobj_ref(NULL, &chan->vm_vram_pt[i]);
+ 		dev_priv->vm_vram_pt_nr = 0;
+ 
+-		nouveau_gpuobj_del(dev, &chan->vm_pd);
+-		nouveau_gpuobj_ref_del(dev, &chan->ramfc);
+-		nouveau_gpuobj_ref_del(dev, &chan->ramin);
+-		drm_mm_takedown(&chan->ramin_heap);
+-
+-		dev_priv->fifos[0] = dev_priv->fifos[127] = NULL;
+-		kfree(chan);
++		nv50_channel_del(&dev_priv->channels.ptr[0]);
++		dev_priv->channels.ptr[127] = NULL;
+ 	}
+ 
+ 	dev_priv->engine.instmem.priv = NULL;
+@@ -330,15 +276,15 @@
+ nv50_instmem_suspend(struct drm_device *dev)
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+-	struct nouveau_channel *chan = dev_priv->fifos[0];
+-	struct nouveau_gpuobj *ramin = chan->ramin->gpuobj;
++	struct nouveau_channel *chan = dev_priv->channels.ptr[0];
++	struct nouveau_gpuobj *ramin = chan->ramin;
+ 	int i;
+ 
+-	ramin->im_backing_suspend = vmalloc(ramin->im_pramin->size);
++	ramin->im_backing_suspend = vmalloc(ramin->size);
+ 	if (!ramin->im_backing_suspend)
+ 		return -ENOMEM;
+ 
+-	for (i = 0; i < ramin->im_pramin->size; i += 4)
++	for (i = 0; i < ramin->size; i += 4)
+ 		ramin->im_backing_suspend[i/4] = nv_ri32(dev, i);
+ 	return 0;
+ }
+@@ -348,24 +294,26 @@
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nv50_instmem_priv *priv = dev_priv->engine.instmem.priv;
+-	struct nouveau_channel *chan = dev_priv->fifos[0];
+-	struct nouveau_gpuobj *ramin = chan->ramin->gpuobj;
++	struct nouveau_channel *chan = dev_priv->channels.ptr[0];
++	struct nouveau_gpuobj *ramin = chan->ramin;
+ 	int i;
+ 
+-	nv_wr32(dev, NV50_PUNK_BAR0_PRAMIN, (ramin->im_backing_start >> 16));
+-	for (i = 0; i < ramin->im_pramin->size; i += 4)
+-		BAR0_WI32(ramin, i, ramin->im_backing_suspend[i/4]);
++	dev_priv->ramin_available = false;
++	dev_priv->ramin_base = ~0;
++	for (i = 0; i < ramin->size; i += 4)
++		nv_wo32(ramin, i, ramin->im_backing_suspend[i/4]);
++	dev_priv->ramin_available = true;
+ 	vfree(ramin->im_backing_suspend);
+ 	ramin->im_backing_suspend = NULL;
+ 
+ 	/* Poke the relevant regs, and pray it works :) */
+-	nv_wr32(dev, NV50_PUNK_BAR_CFG_BASE, (chan->ramin->instance >> 12));
++	nv_wr32(dev, NV50_PUNK_BAR_CFG_BASE, (chan->ramin->vinst >> 12));
+ 	nv_wr32(dev, NV50_PUNK_UNK1710, 0);
+-	nv_wr32(dev, NV50_PUNK_BAR_CFG_BASE, (chan->ramin->instance >> 12) |
++	nv_wr32(dev, NV50_PUNK_BAR_CFG_BASE, (chan->ramin->vinst >> 12) |
+ 					 NV50_PUNK_BAR_CFG_BASE_VALID);
+-	nv_wr32(dev, NV50_PUNK_BAR1_CTXDMA, (priv->fb_bar->instance >> 4) |
++	nv_wr32(dev, NV50_PUNK_BAR1_CTXDMA, (priv->fb_bar->cinst >> 4) |
+ 					NV50_PUNK_BAR1_CTXDMA_VALID);
+-	nv_wr32(dev, NV50_PUNK_BAR3_CTXDMA, (priv->pramin_bar->instance >> 4) |
++	nv_wr32(dev, NV50_PUNK_BAR3_CTXDMA, (priv->pramin_bar->cinst >> 4) |
+ 					NV50_PUNK_BAR3_CTXDMA_VALID);
+ 
+ 	for (i = 0; i < 8; i++)
+@@ -381,7 +329,7 @@
+ 	if (gpuobj->im_backing)
+ 		return -EINVAL;
+ 
+-	*sz = ALIGN(*sz, NV50_INSTMEM_PAGE_SIZE);
++	*sz = ALIGN(*sz, 4096);
+ 	if (*sz == 0)
+ 		return -EINVAL;
+ 
+@@ -399,9 +347,7 @@
+ 		return ret;
+ 	}
+ 
+-	gpuobj->im_backing_start = gpuobj->im_backing->bo.mem.mm_node->start;
+-	gpuobj->im_backing_start <<= PAGE_SHIFT;
+-
++	gpuobj->vinst = gpuobj->im_backing->bo.mem.mm_node->start << PAGE_SHIFT;
+ 	return 0;
+ }
+ 
+@@ -424,7 +370,7 @@
+ {
+ 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+ 	struct nv50_instmem_priv *priv = dev_priv->engine.instmem.priv;
+-	struct nouveau_gpuobj *pramin_pt = priv->pramin_pt->gpuobj;
++	struct nouveau_gpuobj *pramin_pt = priv->pramin_pt;
+ 	uint32_t pte, pte_end;
+ 	uint64_t vram;
+ 
+@@ -436,11 +382,11 @@
+ 
+ 	pte     = (gpuobj->im_pramin->start >> 12) << 1;
+ 	pte_end = ((gpuobj->im_pramin->size >> 12) << 1) + pte;
+-	vram    = gpuobj->im_backing_start;
++	vram    = gpuobj->vinst;
+ 
+ 	NV_DEBUG(dev, "pramin=0x%lx, pte=%d, pte_end=%d\n",
+ 		 gpuobj->im_pramin->start, pte, pte_end);
+-	NV_DEBUG(dev, "first vram page: 0x%08x\n", gpuobj->im_backing_start);
++	NV_DEBUG(dev, "first vram page: 0x%010llx\n", gpuobj->vinst);
+ 
+ 	vram |= 1;
+ 	if (dev_priv->vram_sys_base) {
+@@ -449,9 +395,10 @@
+ 	}
+ 
+ 	while (pte < pte_end) {
+-		nv_wo32(dev, pramin_pt, pte++, lower_32_bits(vram));
+-		nv_wo32(dev, pramin_pt, pte++, upper_32_bits(vram));
+-		vram += NV50_INSTMEM_PAGE_SIZE;
++		nv_wo32(pramin_pt, (pte * 4) + 0, lower_32_bits(vram));
++		nv_wo32(pramin_pt, (pte * 4) + 4, upper_32_bits(vram));
++		vram += 0x1000;
++		pte += 2;
+ 	}
+ 	dev_priv->engine.instmem.flush(dev);
+ 
+@@ -472,12 +419,17 @@
+ 	if (gpuobj->im_bound == 0)
+ 		return -EINVAL;
+ 
++	/* can happen during late takedown */
++	if (unlikely(!dev_priv->ramin_available))
++		return 0;
++
+ 	pte     = (gpuobj->im_pramin->start >> 12) << 1;
+ 	pte_end = ((gpuobj->im_pramin->size >> 12) << 1) + pte;
+ 
+ 	while (pte < pte_end) {
+-		nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000);
+-		nv_wo32(dev, priv->pramin_pt->gpuobj, pte++, 0x00000000);
++		nv_wo32(priv->pramin_pt, (pte * 4) + 0, 0x00000000);
++		nv_wo32(priv->pramin_pt, (pte * 4) + 4, 0x00000000);
++		pte += 2;
+ 	}
+ 	dev_priv->engine.instmem.flush(dev);
+ 
+@@ -489,7 +441,7 @@
+ nv50_instmem_flush(struct drm_device *dev)
+ {
+ 	nv_wr32(dev, 0x00330c, 0x00000001);
+-	if (!nv_wait(0x00330c, 0x00000002, 0x00000000))
++	if (!nv_wait(dev, 0x00330c, 0x00000002, 0x00000000))
+ 		NV_ERROR(dev, "PRAMIN flush timeout\n");
+ }
+ 
+@@ -497,7 +449,7 @@
+ nv84_instmem_flush(struct drm_device *dev)
+ {
+ 	nv_wr32(dev, 0x070000, 0x00000001);
+-	if (!nv_wait(0x070000, 0x00000002, 0x00000000))
++	if (!nv_wait(dev, 0x070000, 0x00000002, 0x00000000))
+ 		NV_ERROR(dev, "PRAMIN flush timeout\n");
+ }
+ 
+@@ -505,7 +457,7 @@
+ nv50_vm_flush(struct drm_device *dev, int engine)
+ {
+ 	nv_wr32(dev, 0x100c80, (engine << 16) | 1);
+-	if (!nv_wait(0x100c80, 0x00000001, 0x00000000))
++	if (!nv_wait(dev, 0x100c80, 0x00000001, 0x00000000))
+ 		NV_ERROR(dev, "vm flush timeout: engine %d\n", engine);
+ }
+ 
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_pm.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_pm.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_pm.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_pm.c	2010-10-15 02:04:44.452993338 +0200
+@@ -0,0 +1,131 @@
++/*
++ * Copyright 2010 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: Ben Skeggs
++ */
++
++#include "drmP.h"
++#include "nouveau_drv.h"
++#include "nouveau_bios.h"
++#include "nouveau_pm.h"
++
++struct nv50_pm_state {
++	struct nouveau_pm_level *perflvl;
++	struct pll_lims pll;
++	enum pll_types type;
++	int N, M, P;
++};
++
++int
++nv50_pm_clock_get(struct drm_device *dev, u32 id)
++{
++	struct pll_lims pll;
++	int P, N, M, ret;
++	u32 reg0, reg1;
++
++	ret = get_pll_limits(dev, id, &pll);
++	if (ret)
++		return ret;
++
++	reg0 = nv_rd32(dev, pll.reg + 0);
++	reg1 = nv_rd32(dev, pll.reg + 4);
++	P = (reg0 & 0x00070000) >> 16;
++	N = (reg1 & 0x0000ff00) >> 8;
++	M = (reg1 & 0x000000ff);
++
++	return ((pll.refclk * N / M) >> P);
++}
++
++void *
++nv50_pm_clock_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl,
++		  u32 id, int khz)
++{
++	struct nv50_pm_state *state;
++	int dummy, ret;
++
++	state = kzalloc(sizeof(*state), GFP_KERNEL);
++	if (!state)
++		return ERR_PTR(-ENOMEM);
++	state->type = id;
++	state->perflvl = perflvl;
++
++	ret = get_pll_limits(dev, id, &state->pll);
++	if (ret < 0) {
++		kfree(state);
++		return (ret == -ENOENT) ? NULL : ERR_PTR(ret);
++	}
++
++	ret = nv50_calc_pll(dev, &state->pll, khz, &state->N, &state->M,
++			    &dummy, &dummy, &state->P);
++	if (ret < 0) {
++		kfree(state);
++		return ERR_PTR(ret);
++	}
++
++	return state;
++}
++
++void
++nv50_pm_clock_set(struct drm_device *dev, void *pre_state)
++{
++	struct nv50_pm_state *state = pre_state;
++	struct nouveau_pm_level *perflvl = state->perflvl;
++	u32 reg = state->pll.reg, tmp;
++	struct bit_entry BIT_M;
++	u16 script;
++	int N = state->N;
++	int M = state->M;
++	int P = state->P;
++
++	if (state->type == PLL_MEMORY && perflvl->memscript &&
++	    bit_table(dev, 'M', &BIT_M) == 0 &&
++	    BIT_M.version == 1 && BIT_M.length >= 0x0b) {
++		script = ROM16(BIT_M.data[0x05]);
++		if (script)
++			nouveau_bios_run_init_table(dev, script, NULL);
++		script = ROM16(BIT_M.data[0x07]);
++		if (script)
++			nouveau_bios_run_init_table(dev, script, NULL);
++		script = ROM16(BIT_M.data[0x09]);
++		if (script)
++			nouveau_bios_run_init_table(dev, script, NULL);
++
++		nouveau_bios_run_init_table(dev, perflvl->memscript, NULL);
++	}
++
++	if (state->type == PLL_MEMORY) {
++		nv_wr32(dev, 0x100210, 0);
++		nv_wr32(dev, 0x1002dc, 1);
++	}
++
++	tmp  = nv_rd32(dev, reg + 0) & 0xfff8ffff;
++	tmp |= 0x80000000 | (P << 16);
++	nv_wr32(dev, reg + 0, tmp);
++	nv_wr32(dev, reg + 4, (N << 8) | M);
++
++	if (state->type == PLL_MEMORY) {
++		nv_wr32(dev, 0x1002dc, 0);
++		nv_wr32(dev, 0x100210, 0x80000000);
++	}
++
++	kfree(state);
++}
++
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_sor.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_sor.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nv50_sor.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nv50_sor.c	2010-10-15 02:04:44.454993362 +0200
+@@ -92,7 +92,7 @@
+ 	}
+ 
+ 	/* wait for it to be done */
+-	if (!nv_wait(NV50_PDISPLAY_SOR_DPMS_CTRL(or),
++	if (!nv_wait(dev, NV50_PDISPLAY_SOR_DPMS_CTRL(or),
+ 		     NV50_PDISPLAY_SOR_DPMS_CTRL_PENDING, 0)) {
+ 		NV_ERROR(dev, "timeout: SOR_DPMS_CTRL_PENDING(%d) == 0\n", or);
+ 		NV_ERROR(dev, "SOR_DPMS_CTRL(%d) = 0x%08x\n", or,
+@@ -108,7 +108,7 @@
+ 
+ 	nv_wr32(dev, NV50_PDISPLAY_SOR_DPMS_CTRL(or), val |
+ 		NV50_PDISPLAY_SOR_DPMS_CTRL_PENDING);
+-	if (!nv_wait(NV50_PDISPLAY_SOR_DPMS_STATE(or),
++	if (!nv_wait(dev, NV50_PDISPLAY_SOR_DPMS_STATE(or),
+ 		     NV50_PDISPLAY_SOR_DPMS_STATE_WAIT, 0)) {
+ 		NV_ERROR(dev, "timeout: SOR_DPMS_STATE_WAIT(%d) == 0\n", or);
+ 		NV_ERROR(dev, "SOR_DPMS_STATE(%d) = 0x%08x\n", or,
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nva3_pm.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nva3_pm.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nva3_pm.c	1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nva3_pm.c	2010-10-15 02:04:44.456993388 +0200
+@@ -0,0 +1,95 @@
++/*
++ * Copyright 2010 Red Hat Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: Ben Skeggs
++ */
++
++#include "drmP.h"
++#include "nouveau_drv.h"
++#include "nouveau_bios.h"
++#include "nouveau_pm.h"
++
++/*XXX: boards using limits 0x40 need fixing, the register layout
++ *     is correct here, but, there's some other funny magic
++ *     that modifies things, so it's not likely we'll set/read
++ *     the correct timings yet..  working on it...
++ */
++
++struct nva3_pm_state {
++	struct pll_lims pll;
++	int N, M, P;
++};
++
++int
++nva3_pm_clock_get(struct drm_device *dev, u32 id)
++{
++	struct pll_lims pll;
++	int P, N, M, ret;
++	u32 reg;
++
++	ret = get_pll_limits(dev, id, &pll);
++	if (ret)
++		return ret;
++
++	reg = nv_rd32(dev, pll.reg + 4);
++	P = (reg & 0x003f0000) >> 16;
++	N = (reg & 0x0000ff00) >> 8;
++	M = (reg & 0x000000ff);
++	return pll.refclk * N / M / P;
++}
++
++void *
++nva3_pm_clock_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl,
++		  u32 id, int khz)
++{
++	struct nva3_pm_state *state;
++	int dummy, ret;
++
++	state = kzalloc(sizeof(*state), GFP_KERNEL);
++	if (!state)
++		return ERR_PTR(-ENOMEM);
++
++	ret = get_pll_limits(dev, id, &state->pll);
++	if (ret < 0) {
++		kfree(state);
++		return (ret == -ENOENT) ? NULL : ERR_PTR(ret);
++	}
++
++	ret = nv50_calc_pll2(dev, &state->pll, khz, &state->N, &dummy,
++			     &state->M, &state->P);
++	if (ret < 0) {
++		kfree(state);
++		return ERR_PTR(ret);
++	}
++
++	return state;
++}
++
++void
++nva3_pm_clock_set(struct drm_device *dev, void *pre_state)
++{
++	struct nva3_pm_state *state = pre_state;
++	u32 reg = state->pll.reg;
++
++	nv_wr32(dev, reg + 4, (state->P << 16) | (state->N << 8) | state->M);
++	kfree(state);
++}
++
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nvc0_fifo.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nvc0_fifo.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nvc0_fifo.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nvc0_fifo.c	2010-10-15 02:04:44.457993401 +0200
+@@ -43,12 +43,6 @@
+ }
+ 
+ bool
+-nvc0_fifo_cache_flush(struct drm_device *dev)
+-{
+-	return true;
+-}
+-
+-bool
+ nvc0_fifo_cache_pull(struct drm_device *dev, bool enable)
+ {
+ 	return false;
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nvc0_instmem.c linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nvc0_instmem.c
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nvc0_instmem.c	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nvc0_instmem.c	2010-10-15 02:04:44.458993413 +0200
+@@ -50,8 +50,7 @@
+ 		return ret;
+ 	}
+ 
+-	gpuobj->im_backing_start = gpuobj->im_backing->bo.mem.mm_node->start;
+-	gpuobj->im_backing_start <<= PAGE_SHIFT;
++	gpuobj->vinst = gpuobj->im_backing->bo.mem.mm_node->start << PAGE_SHIFT;
+ 	return 0;
+ }
+ 
+@@ -84,11 +83,11 @@
+ 
+ 	pte     = gpuobj->im_pramin->start >> 12;
+ 	pte_end = (gpuobj->im_pramin->size >> 12) + pte;
+-	vram    = gpuobj->im_backing_start;
++	vram    = gpuobj->vinst;
+ 
+ 	NV_DEBUG(dev, "pramin=0x%lx, pte=%d, pte_end=%d\n",
+ 		 gpuobj->im_pramin->start, pte, pte_end);
+-	NV_DEBUG(dev, "first vram page: 0x%08x\n", gpuobj->im_backing_start);
++	NV_DEBUG(dev, "first vram page: 0x%010llx\n", gpuobj->vinst);
+ 
+ 	while (pte < pte_end) {
+ 		nv_wr32(dev, 0x702000 + (pte * 8), (vram >> 8) | 1);
+@@ -134,7 +133,7 @@
+ nvc0_instmem_flush(struct drm_device *dev)
+ {
+ 	nv_wr32(dev, 0x070000, 1);
+-	if (!nv_wait(0x070000, 0x00000002, 0x00000000))
++	if (!nv_wait(dev, 0x070000, 0x00000002, 0x00000000))
+ 		NV_ERROR(dev, "PRAMIN flush timeout\n");
+ }
+ 
+@@ -221,10 +220,6 @@
+ 		return -ENOMEM;
+ 	}
+ 
+-	/*XXX: incorrect, but needed to make hash func "work" */
+-	dev_priv->ramht_offset = 0x10000;
+-	dev_priv->ramht_bits   = 9;
+-	dev_priv->ramht_size   = (1 << dev_priv->ramht_bits) * 8;
+ 	return 0;
+ }
+ 
+diff -Naur linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nvreg.h linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nvreg.h
+--- linux-2.6.36-rc7/drivers/gpu/drm/nouveau/nvreg.h	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/drivers/gpu/drm/nouveau/nvreg.h	2010-10-15 02:04:44.460993437 +0200
+@@ -263,6 +263,7 @@
+ #		define NV_CIO_CRE_HCUR_ADDR1_ADR	7:2
+ #	define NV_CIO_CRE_LCD__INDEX		0x33
+ #		define NV_CIO_CRE_LCD_LCD_SELECT	0:0
++#		define NV_CIO_CRE_LCD_ROUTE_MASK	0x3b
+ #	define NV_CIO_CRE_DDC0_STATUS__INDEX	0x36
+ #	define NV_CIO_CRE_DDC0_WR__INDEX	0x37
+ #	define NV_CIO_CRE_ILACE__INDEX		0x39	/* interlace */
+diff -Naur linux-2.6.36-rc7/include/drm/nouveau_drm.h linux-2.6.36-rc7.patch/include/drm/nouveau_drm.h
+--- linux-2.6.36-rc7/include/drm/nouveau_drm.h	2010-10-06 22:39:52.000000000 +0200
++++ linux-2.6.36-rc7.patch/include/drm/nouveau_drm.h	2010-10-15 02:04:44.518994158 +0200
+@@ -80,6 +80,7 @@
+ #define NOUVEAU_GETPARAM_VM_VRAM_BASE    12
+ #define NOUVEAU_GETPARAM_GRAPH_UNITS     13
+ #define NOUVEAU_GETPARAM_PTIMER_TIME     14
++#define NOUVEAU_GETPARAM_HAS_BO_USAGE    15
+ struct drm_nouveau_getparam {
+ 	uint64_t param;
+ 	uint64_t value;
+@@ -95,6 +96,12 @@
+ #define NOUVEAU_GEM_DOMAIN_GART      (1 << 2)
+ #define NOUVEAU_GEM_DOMAIN_MAPPABLE  (1 << 3)
+ 
++#define NOUVEAU_GEM_TILE_LAYOUT_MASK 0x0000ff00
++#define NOUVEAU_GEM_TILE_16BPP       0x00000001
++#define NOUVEAU_GEM_TILE_32BPP       0x00000002
++#define NOUVEAU_GEM_TILE_ZETA        0x00000004
++#define NOUVEAU_GEM_TILE_NONCONTIG   0x00000008
++
+ struct drm_nouveau_gem_info {
+ 	uint32_t handle;
+ 	uint32_t domain;
+@@ -164,7 +171,6 @@
+ };
+ 
+ #define NOUVEAU_GEM_CPU_PREP_NOWAIT                                  0x00000001
+-#define NOUVEAU_GEM_CPU_PREP_NOBLOCK                                 0x00000002
+ #define NOUVEAU_GEM_CPU_PREP_WRITE                                   0x00000004
+ struct drm_nouveau_gem_cpu_prep {
+ 	uint32_t handle;