2 #include "../port/lib.h"
7 #include "../port/error.h"
16 PCIS3 = 0x5333, /* PCI VID */
18 SAVAGE3D = 0x8A20, /* PCI DID */
28 SUPERSAVAGEIXC16 = 0x8C2E,
38 AURORA64VPLUS = 0x8812,
42 * Savage4 et al. acceleration.
44 * This is based only on the Savage4 documentation.
45 * It is expected to work on other Savage cards as well,
46 * but has not been tried.
48 * There are five ways to access the 2D graphics engine registers:
49 * - Old MMIO non-packed format
50 * - Old MMIO packed format
51 * - New MMIO non-packed format
52 * - New MMIO packed format
53 * - Burst Command Interface (BCI)
55 * Of these, the manual hints that the first three are deprecated,
56 * and it does not document any of those three well enough to use.
58 * I have tried for many hours with no success to understand the BCI
59 * interface well enough to use it. It is not well documented, and the
60 * XFree86 driver seems to completely contradict what little documentation
63 * This leaves the packed new MMIO.
64 * The manual contradicts itself here, claming that the registers
65 * start at 0x2008100 as well as at 0x0008100 from the base of the
66 * mmio segment. Since the segment is only 512k, we assume that
67 * the latter is the correct offset.
69 * According to the manual, only 16-bit reads of the 2D registers
70 * are supported: 32-bit reads will return garbage in the upper word.
71 * 32-bit writes must be enabled explicitly.
73 * 32-bit reads of the status registers seem just fine.
76 /* 2D graphics engine registers for Savage4; others appear to be mostly the same */
78 SubsystemStatus = 0x8504, /* Subsystem Status: read only */
79 /* read only: whether we get interrupts on various events */
80 VsyncInt = 1<<0, /* vertical sync */
81 GeBusyInt = 1<<1, /* 2D graphics engine busy */
82 BfifoFullInt = 1<<2, /* BIU FIFO full */
83 BfifoEmptyInt = 1<<3, /* BIU FIFO empty */
84 CfifoFullInt = 1<<4, /* command FIFO full */
85 CfifoEmptyInt = 1<<5, /* command FIFO empty */
86 BciInt = 1<<6, /* BCI */
87 LpbInt = 1<<7, /* LPB */
88 CbHiInt = 1<<16, /* COB upper threshold */
89 CbLoInt = 1<<17, /* COB lower threshold */
91 SubsystemCtl = 0x8504, /* Subsystem Control: write only */
92 /* clear interrupts for various events */
104 /* enable interrupts for various events */
107 BfifoFullEna = 1<<10,
108 BfifoEmptyEna = 1<<11,
109 CfifoFullEna = 1<<12,
110 CfifoEmptyEna = 1<<13,
111 SubsysBciEna = 1<<14,
115 /* 2D graphics engine software reset */
118 FifoStatus = 0x8508, /* FIFO status: read only */
119 CwbEmpty = 1<<0, /* command write buffer empty */
120 CrbEmpty = 1<<1, /* command read buffer empty */
121 CobEmpty = 1<<2, /* command overflow buffer empty */
122 CfifoEmpty = 1<<3, /* command FIFO empty */
123 CwbFull = 1<<8, /* command write buffer full */
124 CrbFull = 1<<9, /* command read buffer full */
125 CobFull = 1<<10, /* command overflow buffer full */
126 CfifoFull = 1<<11, /* command FIFO full */
128 AdvFunCtl = 0x850C, /* Advanced Function Control: read/write */
129 GeEna = 1<<0, /* enable 2D/3D engine */
131 * according to the manual, BigPixel should be
132 * set when bpp >= 8 (bpp != 4), and then CR50_5-4 are
133 * used to figure out bpp example. however, it does bad things
134 * to the screen in 8bpp mode.
136 BigPixel = 1<<2, /* 8 or more bpp enhanced mode */
137 LaEna = 1<<3, /* linear addressing ena: or'ed with CR58_4 */
138 Mclk_2 = 0<<8, /* 2D engine clock divide: MCLK/2 */
139 Mclk_4 = 1<<8, /* " MCLK/4 */
140 Mclk = 2<<8, /* " MCLK */
141 /* Mclk = 3<<8, /* " MCLK */
142 Ic33mhz = 1<<16, /* Internal clock 33 MHz (instead of 66) */
144 WakeupReg = 0x8510, /* Wakeup: read/write */
145 WakeupBit = 1<<0, /* wake up: or'ed with 3C3_0 */
147 SourceY = 0x8100, /* UL corner of bitblt source */
148 SourceX = 0x8102, /* " */
149 RectY = 0x8100, /* UL corner of rectangle fill */
150 RectX = 0x8102, /* " */
151 DestY = 0x8108, /* UL corner of bitblt dest */
152 DestX = 0x810A, /* " */
153 Height = 0x8148, /* bitblt, image xfer rectangle height */
154 Width = 0x814A, /* bitblt, image xfer rectangle width */
156 StartY = 0x8100, /* Line draw: first point*/
157 StartX = 0x8102, /* " */
159 * For line draws, the following must be programmed:
160 * axial step constant = 2*min(|dx|,|dy|)
161 * diagonal step constant = 2*[min(|dx|,|dy|) - max(|dx|,|dy|)]
162 * error term = 2*min(|dx|,|dy|) - max(|dx|,|dy| - 1
163 * [sic] when start X < end X
164 * error term = 2*min(|dx|,|dy|) - max(|dx|,|dy|
165 * [sic] when start X >= end X
168 DiagonalStep = 0x810A,
170 MinorLength = 0x8148, /* pixel count along minor axis */
171 MajorLength = 0x814A, /* pixel count along major axis */
173 DrawCmd = 0x8118, /* Drawing Command: write only */
175 AcrossPlane = 1<<1, /* across the plane mode */
176 LastPixelOff = 1<<2, /* last pixel of line or vector draw not drawn */
177 Radial = 1<<3, /* enable radial direction (else axial) */
178 DoDraw = 1<<4, /* draw pixels (else only move current pos) */
180 DrawRight = 1<<5, /* axial drawing direction: left to right */
181 /* DrawLeft = 0<<5, */
186 Degree0 = 0<<5, /* drawing direction when Radial */
193 /* image write bus transfer width */
197 * in Bus32 mode, doubleword bits beyond the image rect width are
198 * discarded. each line starts on a new doubleword.
199 * Bus32AP is intended for across-the-plane mode and
200 * rounds to byte boundaries instead.
205 CmdNop = 0<<13, /* nop */
206 CmdLine = 1<<13, /* draw line */
207 CmdFill = 2<<13, /* fill rectangle */
208 CmdBitblt = 6<<13, /* bitblt */
209 CmdPatblt = 7<<13, /* 8x8 pattern blt */
219 /* color sources, controls */
220 BgColor = 0x8120, /* Background Color: read/write */
221 FgColor = 0x8124, /* Foreground Color: read/write */
222 BitplaneWmask = 0x8128, /* Bitplane Write Mask: read/write */
223 BitplaneRmask = 0x812C, /* Bitplane Read Mask: read/write */
224 CmpColor = 0x8130, /* Color Compare: read/write */
233 /* clipping rectangle */
234 TopScissors = 0x8138, /* Top Scissors: write only */
235 LeftScissors = 0x813A, /* Left Scissors: write only */
236 BottomScissors = 0x813C, /* Bottom Scissors: write only */
237 RightScissors = 0x813E, /* Right Scissors: write only */
240 * Registers with Magic were indirectly accessed in older modes.
241 * It is not clear whether the Magic is necessary.
242 * In the older modes, writes to these registers were pipelined,
243 * so that you had to issue an engine command and wait for engine
244 * idle before reading a write back. It is not clear if this is
245 * still the case either.
247 PixCtl = 0x8140, /* Pixel Control: write only */
249 PixMixFg = 0<<6, /* foreground mix register always */
250 PixMixCPU = 2<<6, /* CPU data determines mix register */
251 PixMixDisp = 3<<6, /* display data determines mix register */
253 MfMisc2Ctl = 0x8142, /* Multifunction Control Misc. 2: write only */
254 MfMisc2Magic = 0xD<<12,
255 DstShift = 0, /* 3 bits: destination base address in MB */
256 SrcShift = 4, /* 3 bits: source base address in MB */
257 WaitFifoEmpty = 2<<8, /* wait for write FIFO empty between draws */
259 MfMiscCtl = 0x8144, /* Multifunction Control Misc: write only */
260 MfMiscMagic = 0xE<<12,
261 UseHighBits = 1<<4, /* select upper 16 bits for 32-bit reg access */
262 ClipInvert = 1<<5, /* only touch pixels outside clip rectangle */
263 SkipSame = 0<<6, /* ignore pixels with color CmpColor */
264 SkipDifferent = 1<<7, /* ignore pixels not color CmpColor */
265 CmpEna = 1<<8, /* enable color compare */
266 W32Ena = 1<<9, /* enable 32-bit register write */
267 ClipDis = 1<<11, /* disable clipping */
270 * The bitmap descriptor 1 registers contain the starting
271 * address of the bitmap (in bytes).
272 * The bitmap descriptor 2 registesr contain stride (in pixels)
273 * in the lower 16 bits, depth (in bits) in the next 8 bits,
274 * and whether block write is disabled.
276 GBD1 = 0x8168, /* Global Bitmap Descriptor 1: read/write */
277 GBD2 = 0x816C, /* Global Bitmap Descriptor 2: read/write */
279 BDS64 = 1<<0, /* bitmap descriptor size 64 bits */
280 GBDBciEna = 1<<3, /* BCI enable */
281 /* generic BD2 bits */
282 BlockWriteDis = 1<<28,
286 PBD1 = 0x8170, /* Primary Bitmap Descriptor: read/write */
288 SBD1 = 0x8178, /* Secondary Bitmap Descriptor: read/write */
292 /* mastered data transfer registers */
294 /* configuration/status registers */
296 XStatus0 = 0x48C00, /* Status Word 0: read only */
297 /* rev. A silicon differs from rev. B; use AltStatus0 */
298 CBEMaskA = 0x1FFFF, /* filled command buffer entries */
300 BciIdleA = 1<<17, /* BCI idle */
301 Ge3IdleA = 1<<18, /* 3D engine idle */
302 Ge2IdleA = 1<<19, /* 2D engine idle */
303 McpIdleA = 1<<20, /* motion compensation processor idle */
304 MeIdleA = 1<<22, /* master engine idle */
305 PfPendA = 1<<23, /* page flip pending */
316 AltStatus0 = 0x48C60, /* Alternate Status Word 0: read only */
319 /* the Savage4 manual says bits 17..23 for these, like Status0 */
320 /* empirically, they are bits 21..26 */
328 XStatus1 = 0x48C04, /* Status Word 1: read only */
329 /* contains event tag 1, event tag 0, both 16 bits */
331 XStatus2 = 0x48C08, /* Status Word 2: read only */
332 ScanMask = 0x3FF, /* current scan line */
334 VRTMask = 0x7F100, /* vert retrace count */
337 CbThresh = 0x48C10, /* Command Buffer Thresholds: read/write */
338 CobOff = 0x48C14, /* Command Overflow Buffer: read/write */
340 CobPtr = 0x48C18, /* Command Overflow Buffer Pointers: read/write */
341 CobEna = 1<<2, /* command overflow buffer enable */
342 CobBciEna = 1<<3, /* BCI function enable */
343 CbeMask = 0xFFFF8000, /* no. of entries in command buffer */
346 AltStatus1 = 0x48C64, /* Alternate Status Word 1: read onnly */
347 /* contains current texture surface tag, vertex buffer tag */
361 savagewaitidle(VGAscr *scr)
364 ulong *statw, mask, goal;
371 /* wait for engine idle and FIFO empty */
372 statw = (ulong*)((uchar*)scr->mmio+AltStatus0);
373 mask = CBEMask | Ge2Idle;
376 /* case SAVAGEMXMV: ? */
377 /* case SAVAGEMX: ? */
378 /* case SAVAGEIX: ? */
379 case SUPERSAVAGEIXC16:
382 /* wait for engine idle and FIFO empty */
383 statw = (ulong*)((uchar*)scr->mmio+XStatus0);
384 mask = CBEMaskA | Ge2IdleA;
389 * best we can do: can't print or we'll call ourselves.
390 * savageinit is supposed to not let this happen.
395 for(x=0; x<Maxloop; x++)
396 if((*statw & mask) == goal)
399 savagestats.tostatw[savagestats.idletimeout++&15] = *statw;
400 savagestats.tostatw[savagestats.idletimeout++&15] = (ulong)statw;
404 savagefill(VGAscr *scr, Rectangle r, ulong sval)
408 mmio = (uchar*)scr->mmio;
410 *(ulong*)(mmio+FgColor) = sval;
411 *(ulong*)(mmio+BgColor) = sval;
412 *(ulong*)(mmio+BgMix) = SrcFg|MixNew;
413 *(ulong*)(mmio+FgMix) = SrcFg|MixNew;
414 *(ushort*)(mmio+RectY) = r.min.y;
415 *(ushort*)(mmio+RectX) = r.min.x;
416 *(ushort*)(mmio+Width) = Dx(r)-1;
417 *(ushort*)(mmio+Height) = Dy(r)-1;
418 *(ulong*)(mmio+DrawCmd) = CmdMagic | DoDraw | CmdFill | DrawRight | DrawDown;
424 savagescroll(VGAscr *scr, Rectangle r, Rectangle sr)
430 cmd = CmdMagic | DoDraw | CmdBitblt | SrcPBD | DstGBD;
432 if(r.min.x <= sr.min.x){
441 if(r.min.y <= sr.min.y){
450 mmio = (uchar*)scr->mmio;
452 *(ushort*)(mmio+SourceX) = sp.x;
453 *(ushort*)(mmio+SourceY) = sp.y;
454 *(ushort*)(mmio+DestX) = dp.x;
455 *(ushort*)(mmio+DestY) = dp.y;
456 *(ushort*)(mmio+Width) = Dx(r)-1;
457 *(ushort*)(mmio+Height) = Dy(r)-1;
458 *(ulong*)(mmio+BgMix) = SrcDisp|MixNew;
459 *(ulong*)(mmio+FgMix) = SrcDisp|MixNew;
460 *(ulong*)(mmio+DrawCmd) = cmd;
466 savageblank(VGAscr*, int blank)
471 * Will handle DPMS to monitor
473 vgaxo(Seqx, 8, vgaxi(Seqx,8)|0x06);
474 seqD = vgaxi(Seqx, 0xD);
478 vgaxo(Seqx, 0xD, seqD);
484 vgaxo(Seqx, 0x31, vgaxi(Seqx, 0x31) & ~0x10);
486 vgaxo(Seqx, 0x31, vgaxi(Seqx, 0x31) | 0x10);
491 savageinit(VGAscr *scr)
496 /* if you add chip IDs here be sure to update savagewaitidle */
503 case SUPERSAVAGEIXC16:
507 print("unknown savage %.4lux\n", scr->id);
511 mmio = (uchar*)scr->mmio;
513 print("savageinit: no mmio\n");
517 /* 2D graphics engine software reset */
518 *(ushort*)(mmio+SubsystemCtl) = GeSoftReset;
520 *(ushort*)(mmio+SubsystemCtl) = 0;
523 /* disable BCI as much as possible */
524 *(ushort*)(mmio+CobPtr) &= ~CobBciEna;
525 *(ushort*)(mmio+GBD2) &= ~GBDBciEna;
528 /* enable 32-bit writes, disable clipping */
529 *(ushort*)(mmio+MfMiscCtl) = MfMiscMagic|W32Ena|ClipDis;
532 /* enable all read, write planes */
533 *(ulong*)(mmio+BitplaneRmask) = ~0;
534 *(ulong*)(mmio+BitplaneWmask) = ~0;
537 /* turn on linear access, 2D engine */
538 *(ulong*)(mmio+AdvFunCtl) |= GeEna|LaEna;
541 /* set bitmap descriptors */
542 bd = (scr->gscreen->depth<<DepthShift) |
543 (Dx(scr->gscreen->r)<<StrideShift) | BlockWriteDis
546 *(ulong*)(mmio+GBD1) = 0;
547 *(ulong*)(mmio+GBD2) = bd;
549 *(ulong*)(mmio+PBD1) = 0;
550 *(ulong*)(mmio+PBD2) = bd;
552 *(ulong*)(mmio+SBD1) = 0;
553 *(ulong*)(mmio+SBD2) = bd;
556 * For some reason, the GBD needs to get programmed twice,
557 * once before the PBD, SBD, and once after.
558 * This empirically makes it get set right.
559 * I would like to better understand the ugliness
562 *(ulong*)(mmio+GBD1) = 0;
563 *(ulong*)(mmio+GBD2) = bd;
564 *(ushort*)(mmio+GBD2+2) = bd>>16;
567 scr->fill = savagefill;
568 scr->scroll = savagescroll;
569 scr->blank = savageblank;