]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/pc/vgamach64xx.c
pc, pc64: more conservative pcirouting
[plan9front.git] / sys / src / 9 / pc / vgamach64xx.c
1 #include "u.h"
2 #include "../port/lib.h"
3 #include "mem.h"
4 #include "dat.h"
5 #include "fns.h"
6 #include "io.h"
7 #include "../port/error.h"
8
9 #define Image   IMAGE
10 #include <draw.h>
11 #include <memdraw.h>
12 #include <cursor.h>
13 #include "screen.h"
14
15 char Eunsupportedformat[] = "unsupported video format";
16 char Enotconfigured[] = "device not configured";
17
18 #define SCALE_ZERO_EXTEND               0x0
19 #define SCALE_DYNAMIC                           0x1
20 #define SCALE_RED_TEMP_6500K            0x0
21 #define SCALE_RED_TEMP_9800K            0x2
22 #define SCALE_HORZ_BLEND                0x0
23 #define SCALE_HORZ_REP                          0x4
24 #define SCALE_VERT_BLEND                        0x0
25 #define SCALE_VERT_REP                          0x8
26 #define SCALE_BANDWIDTH_NORMAL     0x0
27 #define SCALE_BANDWIDTH_EXCEEDED  0x4000000
28 #define SCALE_BANDWIDTH_RESET           0x4000000
29 #define SCALE_CLK_ACTIVITY              0x0
30 #define SCALE_CLK_CONTINUOUS            0x20000000
31 #define OVERLAY_DISABLE                         0x0
32 #define OVERLAY_ENABLE                          0x40000000
33 #define SCALE_DISABLE                           0x0
34 #define SCALE_ENABLE                            0x80000000
35
36 #define SCALER_FRAME_READ_MODE_FULL     0x0
37 #define SCALER_BUF_MODE_SINGLE                  0x0
38 #define SCALER_BUF_MODE_DOUBLE                  0x40000
39 #define SCALER_BUF_NEXT_0                       0x0
40 #define SCALER_BUF_NEXT_1                       0x80000
41 #define SCALER_BUF_STATUS_0                     0x0
42 #define SCALER_BUF_STATUS_1                     0x100000
43
44 #define OVERLAY_MIX_G_CMP                       0x0
45 #define OVERLAY_MIX_ALWAYS_G                    0x100
46 #define OVERLAY_MIX_ALWAYS_V                    0x200
47 #define OVERLAY_MIX_NOT_G                       0x300
48 #define OVERLAY_MIX_NOT_V                       0x400
49 #define OVERLAY_MIX_G_XOR_V                     0x500
50 #define OVERLAY_MIX_NOT_G_XOR_V         0x600
51 #define OVERLAY_MIX_V_CMP                       0x700
52 #define OVERLAY_MIX_NOT_G_OR_NOT_V      0x800
53 #define OVERLAY_MIX_G_OR_NOT_V          0x900
54 #define OVERLAY_MIX_NOT_G_OR_V          0xA00
55 #define OVERLAY_MIX_G_OR_V                      0xB00
56 #define OVERLAY_MIX_G_AND_V                     0xC00
57 #define OVERLAY_MIX_NOT_G_AND_V         0xD00
58 #define OVERLAY_MIX_G_AND_NOT_V         0xE00
59 #define OVERLAY_MIX_NOT_G_AND_NOT_V     0xF00
60 #define OVERLAY_EXCLUSIVE_NORMAL        0x0
61 #define OVERLAY_EXCLUSIVE_V_ONLY        0x80000000
62
63 #define VIDEO_IN_8BPP                                   0x2
64 #define VIDEO_IN_16BPP                                  0x4
65 #define VIDEO_IN_32BPP                                  0x6
66 #define VIDEO_IN_VYUY422                                0xB                     /*16 bpp */
67 #define VIDEO_IN_YVYU422                                0xC                     /* 16 bpp */
68 #define SCALE_IN_15BPP                                  0x30000         /* aRGB 1555 */
69 #define SCALE_IN_16BPP                                  0x40000         /* RGB 565 */
70 #define SCALE_IN_32BPP                                  0x60000         /* aRGB 8888 */
71 #define SCALE_IN_YUV9                                   0x90000         /* planar */
72 #define SCALE_IN_YUV12                                  0xA0000         /* planar */
73 #define SCALE_IN_VYUY422                                0xB0000         /* 16 bpp */
74 #define SCALE_IN_YVYU422                                0xC0000         /* 16 bpp */
75 #define HOST_YUV_APERTURE_UPPER                 0x0
76 #define HOST_YUV_APERTURE_LOWER         0x20000000
77 #define HOST_MEM_MODE_Y                         0x40000000
78 #define HOST_MEM_MODE_U                         0x80000000
79 #define HOST_MEM_MODE_V                         0xC0000000
80 #define HOST_MEM_MODE_NORMAL                    HOST_YUV_APERTURE_UPPER 
81
82 static Chan *ovl_chan;          /* Channel of controlling process */
83 static int ovl_width;           /* Width of input overlay buffer */
84 static int ovl_height;          /* Height of input overlay buffer */
85 static int ovl_format;          /* Overlay format */
86 static ulong ovl_fib;           /* Frame in bytes */
87
88 enum {
89          VTGTB1S1       = 0x01, /* Asic description for VTB1S1 and GTB1S1. */
90          VT4GTIIC       = 0x3A,         /* asic descr for VT4 and RAGE IIC */
91          GTB1U1         = 0x19,         /* Asic description for GTB1U1. */
92          GTB1S2         = 0x41,         /* Asic description for GTB1S2. */
93          GTB2U1         = 0x1A,
94          GTB2U2         = 0x5A,
95          GTB2U3         = 0x9A,
96          GTIIIC1U1      = 0x1B,         /* 3D RAGE PRO asic descrp. */
97          GTIIIC1U2      = 0x5B,         /* 3D RAGE PRO asic descrp. */
98          GTIIIC2U1      = 0x1C,         /* 3D RAGE PRO asic descrp. */
99          GTIIIC2U2      = 0x5C,         /* 3D RAGE PRO asic descrp. */
100          GTIIIC2U3      = 0x7C,         /* 3D RAGE PRO asic descrp. */
101          GTBC           = 0x3A,         /* 3D RAGE IIC asic descrp. */
102          LTPRO          = 0x9C,         /* 3D RAGE LT PRO */
103 };
104
105 /*
106  * ATI Mach64(CT|ET|G*|V*|L*).
107  */
108 typedef struct Mach64types Mach64types;
109 struct Mach64types {
110         ushort  m64_id;                 /* Chip ID */
111         int     m64_vtgt;               /* Is this a VT or GT chipset? */
112         ulong   m64_ovlclock;           /* Max. overlay clock frequency */
113         int     m64_pro;                /* Is this a PRO? */
114 };
115
116 static ulong mach64refclock;
117 static Mach64types *mach64type;
118 static int mach64revb;                  /* Revision B or greater? */
119 static ulong mach64overlay;             /* Overlay buffer */
120
121 static Mach64types mach64s[] = {
122         ('C'<<8)|'T',   0,      1350000, /*?*/  0,      /* 4354: CT */
123         ('E'<<8)|'T',   0,      1350000, /*?*/  0,      /* 4554: ET */
124         ('G'<<8)|'B',   1,      1250000,        1,      /* 4742: 264GT PRO */
125         ('G'<<8)|'D',   1,      1250000,        1,      /* 4744: 264GT PRO */
126         ('G'<<8)|'I',   1,      1250000,        1,      /* 4749: 264GT PRO */
127         ('G'<<8)|'M',   0,      1350000,        0,      /* 474D: Rage XL */
128         ('G'<<8)|'P',   1,      1250000,        1,      /* 4750: 264GT PRO */
129         ('G'<<8)|'Q',   1,      1250000,        1,      /* 4751: 264GT PRO */
130         ('G'<<8)|'R',   1,      1250000,        1,      /* 4752: */
131         ('G'<<8)|'T',   1,      800000,         0,      /* 4754: 264GT[B] */
132         ('G'<<8)|'U',   1,      1000000,        0,      /* 4755: 264GT DVD */
133         ('G'<<8)|'V',   1,      1000000,        0,      /* 4756: Rage2C */
134         ('G'<<8)|'Z',   1,      1000000,        0,      /* 475A: Rage2C */
135         ('V'<<8)|'T',   1,      800000,         0,      /* 5654: 264VT/GT/VTB */
136         ('V'<<8)|'U',   1,      800000,         0,      /* 5655: 264VT3 */
137         ('V'<<8)|'V',   1,      1000000,        0,      /* 5656: 264VT4 */
138         ('L'<<8)|'B',   0,      1350000,        1,      /* 4C42: Rage LTPro AGP */
139         ('L'<<8)|'I',   0,      1350000,        0,      /* 4C49: Rage LTPro AGP */
140         ('L'<<8)|'M',   0,      1350000,        0,      /* 4C4D: Rage Mobility */
141         ('L'<<8)|'P',   0,      1350000,        1,      /* 4C50: 264LT PRO */
142 };
143
144
145 static int hwfill(VGAscr*, Rectangle, ulong);
146 static int hwscroll(VGAscr*, Rectangle, Rectangle);
147 static void initengine(VGAscr*);
148
149 static void
150 mach64xxenable(VGAscr* scr)
151 {
152         Pcidev *p;
153         int i;
154
155         if(scr->io)
156                 return;
157         p = scr->pci;
158         if(p == nil || p->vid != 0x1002)
159                 return;
160
161         mach64type = nil;
162         for (i = 0; i != nelem(mach64s); i++)
163                 if (mach64s[i].m64_id == p->did) {
164                         scr->id = p->did;
165                         mach64type = &mach64s[i];
166                         break;                  
167                 }
168
169         if(mach64type != nil){
170                 /*
171                  * The CT doesn't always have the I/O base address
172                  * in the PCI base registers. There is a way to find
173                  * it via the vendor-specific PCI config space but
174                  * this will do for now.
175                  */
176                 scr->io = p->mem[1].bar & ~0x03;
177
178                 if(scr->io == 0)
179                         scr->io = 0x2EC;
180         }
181 }
182
183 static void
184 mach64xxlinear(VGAscr* scr, int size, int)
185 {
186         vgalinearpci(scr);
187         if(scr->paddr == 0)
188                 return;
189         scr->mmio = (ulong*)((uchar*)scr->vaddr+size-1024);
190         addvgaseg("mach64mmio", scr->paddr+size-BY2PG, BY2PG);
191         addvgaseg("mach64screen", scr->paddr, scr->apsize);
192 }
193
194 enum {
195         CrtcOffPitch    = 0x05,
196         CrtcGenCtl      = 0x07,
197         CurClr0         = 0x0B,         /* I/O Select */
198         CurClr1         = 0x0C,
199         CurOffset       = 0x0D,
200         CurHVposn       = 0x0E,
201         CurHVoff        = 0x0F,
202         BusCntl = 0x13,
203         GenTestCntl     = 0x19,
204
205         CrtcHsyncDis    = 0x04,
206         CrtcVsyncDis    = 0x08,
207
208         ContextMask     = 0x100,        /* not accessible via I/O */
209         FifoStat,
210         GuiStat,
211         DpFrgdClr,
212         DpBkgdClr,
213         DpWriteMask,
214         DpMix,
215         DpPixWidth,
216         DpSrc,
217         ClrCmpCntl,
218         GuiTrajCntl,
219         ScLeftRight,
220         ScTopBottom,
221         DstOffPitch,
222         DstYX,
223         DstHeightWidth,
224         DstCntl,
225         DstHeight,
226         DstBresErr,
227         DstBresInc,
228         DstBresDec,
229         SrcCntl,
230         SrcHeight1Width1,
231         SrcHeight2Width2,
232         SrcYX,
233         SrcWidth1,
234         SrcYXstart,
235         HostCntl,
236         PatReg0,
237         PatReg1,
238         PatCntl,
239         ScBottom,
240         ScLeft,
241         ScRight,
242         ScTop,
243         ClrCmpClr,
244         ClrCmpMask,
245         DpChainMask,
246         SrcOffPitch,    
247         LcdIndex,
248         LcdData,
249         ClockCntl,
250         OverlayScaleCntl,
251         ConfigChipId,
252         Buf0Pitch,
253         ScalerBuf0Pitch,
254         CaptureConfig,
255         OverlayKeyCntl,
256         ScalerColourCntl,
257         ScalerHCoef0,
258         ScalerHCoef1,
259         ScalerHCoef2,
260         ScalerHCoef3,
261         ScalerHCoef4,
262         VideoFormat,
263         Buf0Offset,
264         ScalerBuf0Offset,
265         CrtcGenCntl,
266         OverlayScaleInc,
267         OverlayYX,
268         OverlayYXEnd,
269         ScalerHeightWidth,
270         HTotalDisp,
271         VTotalDisp,
272 };
273
274 enum {
275         LCD_ConfigPanel = 0,
276         LCD_GenCtrl,
277         LCD_DstnCntl,
278         LCD_HfbPitchAddr,
279         LCD_HorzStretch,
280         LCD_VertStretch,
281         LCD_ExtVertStretch,
282         LCD_LtGio,
283         LCD_PowerMngmnt,
284         LCD_ZvgPio,
285         Nlcd,
286 };
287
288 #define Bank1                   (-0x100)                /* 1KB */
289
290 static int mmoffset[] = {
291         [HTotalDisp]            0x00,
292         [VTotalDisp]            0x02,
293         [CrtcOffPitch]          0x05,
294         [CrtcGenCntl]           0x07,
295         [CurClr0]                       0x18,
296         [CurClr1]                       0x19,
297         [CurOffset]             0x1A,
298         [CurHVposn]             0x1B,
299         [CurHVoff]              0x1C,
300         [ClockCntl]             0x24,
301         [BusCntl]                       0x28,
302         [LcdIndex]              0x29,
303         [LcdData]                       0x2A,
304         [GenTestCntl]           0x34,
305         [ConfigChipId]          0x38,
306         [DstOffPitch]           0x40,
307         [DstYX]                 0x43,
308         [DstHeight]             0x45,
309         [DstHeightWidth]        0x46,
310         [DstBresErr]            0x49,
311         [DstBresInc]            0x4A,
312         [DstBresDec]            0x4B,
313         [DstCntl]                       0x4C,
314         [SrcOffPitch]           0x60,
315         [SrcYX]                 0x63,
316         [SrcWidth1]             0x64,
317         [SrcYXstart]            0x69,
318         [SrcHeight1Width1]      0x66,
319         [SrcHeight2Width2]      0x6C,
320         [SrcCntl]                       0x6D,
321         [HostCntl]                      0x90,
322         [PatReg0]                       0xA0,
323         [PatReg1]                       0xA1,
324         [PatCntl]                       0xA2,
325         [ScLeft]                        0xA8,
326         [ScRight]                       0xA9,
327         [ScLeftRight]           0xAA,
328         [ScTop]                 0xAB,
329         [ScBottom]              0xAC,
330         [ScTopBottom]           0xAD,
331         [DpBkgdClr]             0xB0,
332         [DpFrgdClr]             0xB1,
333         [DpWriteMask]           0xB2,
334         [DpChainMask]           0xB3,
335         [DpPixWidth]            0xB4,
336         [DpMix]                 0xB5,
337         [DpSrc]                 0xB6,
338         [ClrCmpClr]             0xC0,
339         [ClrCmpMask]            0xC1,
340         [ClrCmpCntl]            0xC2,
341         [FifoStat]                      0xC4,
342         [ContextMask]           0xC8,
343         [GuiTrajCntl]           0xCC,
344         [GuiStat]                       0xCE,
345
346         /* Bank1 */
347         [OverlayYX]             Bank1 + 0x00,
348         [OverlayYXEnd]          Bank1 + 0x01,
349         [OverlayKeyCntl]        Bank1 + 0x06,
350         [OverlayScaleInc]       Bank1 + 0x08,
351         [OverlayScaleCntl]      Bank1 + 0x09,
352         [ScalerHeightWidth]     Bank1 + 0x0A,
353         [ScalerBuf0Offset]      Bank1 + 0x0D,
354         [ScalerBuf0Pitch]       Bank1 + 0x0F,
355         [VideoFormat]           Bank1 + 0x12,
356         [CaptureConfig] Bank1 + 0x14,
357         [Buf0Offset]            Bank1 + 0x20,
358         [Buf0Pitch]             Bank1 + 0x23,
359         [ScalerColourCntl]      Bank1 + 0x54,
360         [ScalerHCoef0]          Bank1 + 0x55,
361         [ScalerHCoef1]          Bank1 + 0x56,
362         [ScalerHCoef2]          Bank1 + 0x57,
363         [ScalerHCoef3]          Bank1 + 0x58,
364         [ScalerHCoef4]          Bank1 + 0x59,
365 };
366
367 static ulong
368 ior32(VGAscr* scr, int r)
369 {
370         if(scr->io == 0x2EC || scr->io == 0x1C8)
371                 return inl((r<<10)+scr->io);
372         if(r >= 0x100 && scr->mmio != nil)
373                 return scr->mmio[mmoffset[r]];
374         return inl((mmoffset[r]<<2)+scr->io);
375 }
376
377 static void
378 iow32(VGAscr* scr, int r, ulong l)
379 {
380         if(scr->io == 0x2EC || scr->io == 0x1C8)
381                 outl(((r)<<10)+scr->io, l);
382         else if(r >= 0x100 && scr->mmio != nil)
383                 scr->mmio[mmoffset[r]] = l;
384         else
385                 outl((mmoffset[r]<<2)+scr->io, l);
386 }
387
388 static ulong
389 lcdr32(VGAscr *scr, ulong r)
390 {
391         ulong or;
392
393         or = ior32(scr, LcdIndex);
394         iow32(scr, LcdIndex, (or&~0x0F) | (r&0x0F));
395         return ior32(scr, LcdData);
396 }
397
398 static void
399 lcdw32(VGAscr *scr, ulong r, ulong v)
400 {
401         ulong or;
402
403         or = ior32(scr, LcdIndex);
404         iow32(scr, LcdIndex, (or&~0x0F) | (r&0x0F));
405         iow32(scr, LcdData, v);
406 }
407
408 static void
409 mach64xxcurdisable(VGAscr* scr)
410 {
411         ulong r;
412
413         r = ior32(scr, GenTestCntl);
414         iow32(scr, GenTestCntl, r & ~0x80);
415 }
416
417 static void
418 mach64xxcurload(VGAscr* scr, Cursor* curs)
419 {
420         uchar *p;
421         int i, y;
422         ulong c, s, m, r;
423
424         /*
425          * Disable the cursor.
426          */
427         r = ior32(scr, GenTestCntl);
428         iow32(scr, GenTestCntl, r & ~0x80);
429
430         p = scr->vaddr;
431         p += scr->storage;
432
433         /*
434          * Initialise the 64x64 cursor RAM array.
435          * The cursor mode gives the following truth table:
436          *      p1 p0   colour
437          *       0  0   Cursor Colour 0
438          *       0  1   Cursor Colour 1
439          *       1  0   Transparent
440          *       1  1   Complement
441          * Put the cursor into the top-right of the 64x64 array.
442          */
443         for(y = 0; y < 16; y++){
444                 for(i = 0; i < (64-16)/8; i++){
445                         *p++ = 0xAA;
446                         *p++ = 0xAA;
447                 }
448
449                 c = (curs->clr[2*y]<<8)|curs->clr[y*2 + 1];
450                 s = (curs->set[2*y]<<8)|curs->set[y*2 + 1];
451
452                 m = 0x00000000;
453                 for(i = 0; i < 16; i++){
454                         if(s & (1<<(15-i)))
455                                 m |= 0x01<<(2*i);
456                         else if(c & (1<<(15-i))){
457                                 /* nothing to do */
458                         }
459                         else
460                                 m |= 0x02<<(2*i);
461                 }
462                 *p++ = m;
463                 *p++ = m>>8;
464                 *p++ = m>>16;
465                 *p++ = m>>24;
466         }
467         memset(p, 0xAA, (64-16)*16);
468
469         /*
470          * Set the cursor hotpoint and enable the cursor.
471          */
472         scr->offset = curs->offset;
473         iow32(scr, GenTestCntl, 0x80|r);
474 }
475
476 static int
477 ptalmostinrect(Point p, Rectangle r)
478 {
479         return p.x>=r.min.x && p.x<=r.max.x &&
480                p.y>=r.min.y && p.y<=r.max.y;
481 }
482
483 /*
484  * If necessary, translate the rectangle physr
485  * some multiple of [dx dy] so that it includes p.
486  * Return 1 if the rectangle changed.
487  */
488 static int
489 screenpan(Point p, Rectangle *physr, int dx, int dy)
490 {
491         int d;
492
493         if(ptalmostinrect(p, *physr))
494                 return 0;
495
496         if(p.y < physr->min.y){
497                 d = physr->min.y - (p.y&~(dy-1));
498                 physr->min.y -= d;
499                 physr->max.y -= d;
500         }
501         if(p.y > physr->max.y){
502                 d = ((p.y+dy-1)&~(dy-1)) - physr->max.y;
503                 physr->min.y += d;
504                 physr->max.y += d;
505         }
506
507         if(p.x < physr->min.x){
508                 d = physr->min.x - (p.x&~(dx-1));
509                 physr->min.x -= d;
510                 physr->max.x -= d;
511         }
512         if(p.x > physr->max.x){
513                 d = ((p.x+dx-1)&~(dx-1)) - physr->max.x;
514                 physr->min.x += d;
515                 physr->max.x += d;
516         }
517         return 1;
518 }
519
520 static int
521 mach64xxcurmove(VGAscr* scr, Point p)
522 {
523         int x, xo, y, yo;
524         int dx;
525         ulong off, pitch;
526
527         /*
528          * If the point we want to display is outside the current
529          * screen rectangle, pan the screen to display it.
530          *
531          * We have to move in 64-bit chunks.
532          */
533         if(scr->gscreen->depth == 24)
534                 dx = (64*3)/24;
535         else
536                 dx = 64 / scr->gscreen->depth;
537
538         if(panning && screenpan(p, &physgscreenr, dx, 1)){
539                 off = (physgscreenr.min.y*Dx(scr->gscreen->r)+physgscreenr.min.x)/dx;
540                 pitch = Dx(scr->gscreen->r)/8;
541                 iow32(scr, CrtcOffPitch, (pitch<<22)|off);
542         }
543
544         p.x -= physgscreenr.min.x;
545         p.y -= physgscreenr.min.y;
546
547         /*
548          * Mustn't position the cursor offscreen even partially,
549          * or it disappears. Therefore, if x or y is -ve, adjust the
550          * cursor presets instead. If y is negative also have to
551          * adjust the starting offset.
552          */
553         if((x = p.x+scr->offset.x) < 0){
554                 xo = x;
555                 x = 0;
556         }
557         else
558                 xo = 0;
559         if((y = p.y+scr->offset.y) < 0){
560                 yo = y;
561                 y = 0;
562         }
563         else
564                 yo = 0;
565
566         iow32(scr, CurHVoff, ((64-16-yo)<<16)|(64-16-xo));
567         iow32(scr, CurOffset, scr->storage/8 + (-yo*2));
568         iow32(scr, CurHVposn, (y<<16)|x);
569
570         return 0;
571 }
572
573 static void
574 mach64xxcurenable(VGAscr* scr)
575 {
576         ulong r, storage;
577
578         mach64xxenable(scr);
579         if(scr->io == 0)
580                 return;
581
582         r = ior32(scr, GenTestCntl);
583         iow32(scr, GenTestCntl, r & ~0x80);
584
585         iow32(scr, CurClr0, (Pwhite<<24)|(Pwhite<<16)|(Pwhite<<8)|Pwhite);
586         iow32(scr, CurClr1, (Pblack<<24)|(Pblack<<16)|(Pblack<<8)|Pblack);
587
588         /*
589          * Find a place for the cursor data in display memory.
590          * Must be 64-bit aligned.
591          */
592         storage = (scr->gscreen->width*sizeof(ulong)*scr->gscreen->r.max.y+7)/8;
593         iow32(scr, CurOffset, storage);
594         scr->storage = storage*8;
595
596         /*
597          * Cursor goes in the top right corner of the 64x64 array
598          * so the horizontal and vertical presets are 64-16.
599          */
600         iow32(scr, CurHVposn, (0<<16)|0);
601         iow32(scr, CurHVoff, ((64-16)<<16)|(64-16));
602
603         /*
604          * Load, locate and enable the 64x64 cursor.
605          */
606         mach64xxcurload(scr, &arrow);
607         mach64xxcurmove(scr, ZP);
608         iow32(scr, GenTestCntl, 0x80|r);
609 }
610
611 static void
612 waitforfifo(VGAscr *scr, int entries)
613 {
614         int x;
615
616         x = 0;
617         while((ior32(scr, FifoStat)&0xFF) > (0x8000>>entries) && x++ < 1000000)
618                 ;
619         if(x >= 1000000)
620                 iprint("fifo %d stat %#.8lux %#.8lux scrio %#.8lux mmio %#p scr %#p pc %#p\n", entries, ior32(scr, FifoStat), scr->mmio[mmoffset[FifoStat]], scr->io, scr->mmio, scr, getcallerpc(&scr));
621 }
622
623 static void
624 waitforidle(VGAscr *scr)
625 {
626         int x;
627
628         waitforfifo(scr, 16);
629         x = 0;
630         while((ior32(scr, GuiStat)&1) && x++ < 1000000)
631                 ;
632         if(x >= 1000000)
633                 iprint("idle stat %#.8lux %#.8lux scrio %#.8lux mmio %#p scr %#p pc %#p\n", ior32(scr, GuiStat), scr->mmio[mmoffset[GuiStat]], scr->io, scr->mmio, scr, getcallerpc(&scr));
634 }
635
636 static void
637 resetengine(VGAscr *scr)
638 {
639         ulong x;
640         x = ior32(scr, GenTestCntl);
641         iow32(scr, GenTestCntl, x&~0x100);
642         iow32(scr, GenTestCntl, x|0x100);
643         iow32(scr, BusCntl, ior32(scr, BusCntl)|0x00A00000);
644 }
645
646 static void
647 init_overlayclock(VGAscr *scr)
648 {
649         uchar *cc, save, pll_ref_div, pll_vclk_cntl, vclk_post_div, 
650                         vclk_fb_div, ecp_div;
651         int i;
652         ulong dotclock;
653
654         /* Taken from GLX */
655         /* Get monitor dotclock, check for Overlay Scaler clock limit */
656         cc = (uchar *)&scr->mmio[mmoffset[ClockCntl]];
657         save = cc[1]; i = cc[0] & 3;
658         cc[1] = 2<<2; pll_ref_div = cc[2];
659         cc[1] = 5<<2; pll_vclk_cntl = cc[2];
660         cc[1] = 6<<2; vclk_post_div = (cc[2]>>(i+i)) & 3;
661         cc[1] = (7+i)<<2; vclk_fb_div = cc[2];
662
663         dotclock = 2 * mach64refclock * vclk_fb_div / 
664                         (pll_ref_div * (1 << vclk_post_div));
665         /* ecp_div: 0=dotclock, 1=dotclock/2, 2=dotclock/4 */
666         ecp_div = dotclock / mach64type->m64_ovlclock;
667         if (ecp_div>2) ecp_div = 2;
668
669         /* Force a scaler clock factor of 1 if refclock *
670           * is unknown (VCLK_SRC not PLLVCLK)  */
671         if ((pll_vclk_cntl & 0x03) != 0x03) 
672                 ecp_div = 0;
673         if ((pll_vclk_cntl & 0x30) != ecp_div<<4) {
674                 cc[1] = (5<<2)|2;
675                 cc[2] = (pll_vclk_cntl&0xCF) | (ecp_div<<4);
676         }
677
678         /* Restore PLL Register Index */
679         cc[1] = save;
680 }
681
682 static void
683 initengine(VGAscr *scr)
684 {
685         ulong pitch;
686         uchar *bios;
687         ushort table;
688
689         pitch = Dx(scr->gscreen->r)/8;
690         if(scr->gscreen->depth == 24)
691                 pitch *= 3;
692
693         resetengine(scr);
694         waitforfifo(scr, 14);
695         iow32(scr, ContextMask, ~0);
696         iow32(scr, DstOffPitch, pitch<<22);
697         iow32(scr, DstYX, 0);
698         iow32(scr, DstHeight, 0);
699         iow32(scr, DstBresErr, 0);
700         iow32(scr, DstBresInc, 0);
701         iow32(scr, DstBresDec, 0);
702         iow32(scr, DstCntl, 0x23);
703         iow32(scr, SrcOffPitch, pitch<<22);
704         iow32(scr, SrcYX, 0);
705         iow32(scr, SrcHeight1Width1, 1);
706         iow32(scr, SrcYXstart, 0);
707         iow32(scr, SrcHeight2Width2, 1);
708         iow32(scr, SrcCntl, 0x01);
709
710         waitforfifo(scr, 13);
711         iow32(scr, HostCntl, 0);
712         iow32(scr, PatReg0, 0);
713         iow32(scr, PatReg1, 0);
714         iow32(scr, PatCntl, 0);
715         iow32(scr, ScLeft, 0);
716         iow32(scr, ScTop, 0);
717         iow32(scr, ScBottom, 0xFFFF);
718         iow32(scr, ScRight, 0xFFFF);
719         iow32(scr, DpBkgdClr, 0);
720         iow32(scr, DpFrgdClr, ~0);
721         iow32(scr, DpWriteMask, ~0);
722         iow32(scr, DpMix, 0x70003);
723         iow32(scr, DpSrc, 0x00010100);
724
725         waitforfifo(scr, 3);
726         iow32(scr, ClrCmpClr, 0);
727         iow32(scr, ClrCmpMask, ~0);
728         iow32(scr, ClrCmpCntl, 0);
729
730         waitforfifo(scr, 2);
731         switch(scr->gscreen->depth){
732         case 8:
733         case 24:        /* [sic] */
734                 iow32(scr, DpPixWidth, 0x00020202);
735                 iow32(scr, DpChainMask, 0x8080);
736                 break;
737         case 16:
738                 iow32(scr, DpPixWidth, 0x00040404);
739                 iow32(scr, DpChainMask, 0x8410);
740                 break;
741         case 32:
742                 iow32(scr, DpPixWidth, 0x00060606);
743                 iow32(scr, DpChainMask, 0x8080);
744                 break;
745         }
746
747         /* Get the base freq from the BIOS */
748         bios  = kaddr(0xC000);
749         table = *(ushort *)(bios + 0x48);
750         table = *(ushort *)(bios + table + 0x10);
751         switch (*(ushort *)(bios + table + 0x08)) {
752         case 2700: 
753                 mach64refclock = 270000; 
754                 break;
755         case 2863: 
756         case 2864: 
757                 mach64refclock = 286363; 
758                 break;
759         case 2950: 
760                 mach64refclock = 294989; 
761                 break;
762         case 1432: 
763         default:
764                 mach64refclock = 143181; 
765                 break ; 
766         }
767         
768         /* Figure out which revision this chip is */
769         switch ((scr->mmio[mmoffset[ConfigChipId]] >> 24) & 0xFF) {
770         case VTGTB1S1:
771         case GTB1U1:
772         case GTB1S2:
773         case GTB2U1:
774         case GTB2U2:
775         case GTB2U3:
776         case GTBC:
777         case GTIIIC1U1:
778         case GTIIIC1U2:
779         case GTIIIC2U1:
780         case GTIIIC2U2: 
781         case GTIIIC2U3: 
782         case LTPRO:
783                         mach64revb = 1;
784                         break;
785         default: 
786                         mach64revb = 0;
787                         break;
788         }
789
790         waitforidle(scr);
791 }
792
793 static int
794 mach64hwfill(VGAscr *scr, Rectangle r, ulong sval)
795 {
796         ulong pitch;
797         ulong ctl;
798
799 if(drawdebug)
800         iprint("hwfill %R val %lux...\n", r, sval);
801
802         /* shouldn't happen */
803         if(scr->io == 0x2EC || scr->io == 0x1C8 || scr->io == 0)
804                 return 0;
805
806         pitch = Dx(scr->gscreen->r)/8;
807         ctl = 1|2;      /* left-to-right, top-to-bottom */
808         if(scr->gscreen->depth == 24){
809                 r.min.x *= 3;
810                 r.max.x *= 3;
811                 pitch *= 3;
812                 ctl |= (1<<7)|(((r.min.x/4)%6)<<8);
813         }
814
815         waitforfifo(scr, 11);
816         iow32(scr, DpFrgdClr, sval);
817         iow32(scr, DpWriteMask, 0xFFFFFFFF);
818         iow32(scr, DpMix, 0x00070003);
819         iow32(scr, DpSrc, 0x00000111);
820         iow32(scr, ClrCmpCntl, 0x00000000);
821         iow32(scr, ScLeftRight, 0x1FFF0000);
822         iow32(scr, ScTopBottom, 0x1FFF0000);
823         iow32(scr, DstOffPitch, pitch<<22);
824         iow32(scr, DstCntl, ctl);
825         iow32(scr, DstYX, (r.min.x<<16)|r.min.y);
826         iow32(scr, DstHeightWidth, (Dx(r)<<16)|Dy(r));
827
828         waitforidle(scr);
829         return 1;
830 }
831
832 static int
833 mach64hwscroll(VGAscr *scr, Rectangle r, Rectangle sr)
834 {
835         ulong pitch;
836         Point dp, sp;
837         ulong ctl;
838         int dx, dy;
839
840         dx = Dx(r);
841         dy = Dy(r);
842         pitch = Dx(scr->gscreen->r)/8;
843         if(scr->gscreen->depth == 24){
844                 dx *= 3;
845                 pitch *= 3;
846                 r.min.x *= 3;
847                 sr.min.x *= 3;
848         }
849
850         ctl = 0;
851         if(r.min.x <= sr.min.x){
852                 ctl |= 1;
853                 dp.x = r.min.x;
854                 sp.x = sr.min.x;
855         }else{
856                 dp.x = r.min.x+dx-1;
857                 sp.x = sr.min.x+dx-1;
858         }
859
860         if(r.min.y <= sr.min.y){
861                 ctl |= 2;
862                 dp.y = r.min.y;
863                 sp.y = sr.min.y;
864         }else{
865                 dp.y = r.min.y+dy-1;
866                 sp.y = sr.min.y+dy-1;
867         }
868
869         if(scr->gscreen->depth == 24)
870                 ctl |= (1<<7)|(((dp.x/4)%6)<<8);
871
872         waitforfifo(scr, 6);
873         iow32(scr, ScLeftRight, 0x1FFF0000);
874         iow32(scr, ScTopBottom, 0x1FFF0000);
875         iow32(scr, DpWriteMask, 0xFFFFFFFF);
876         iow32(scr, DpMix, 0x00070003);
877         iow32(scr, DpSrc, 0x00000300);
878         iow32(scr, ClrCmpCntl, 0x00000000);
879
880         waitforfifo(scr, 8);
881         iow32(scr, SrcOffPitch, pitch<<22);
882         iow32(scr, SrcCntl, 0x00000000);
883         iow32(scr, SrcYX, (sp.x<<16)|sp.y);
884         iow32(scr, SrcWidth1, dx);
885         iow32(scr, DstOffPitch, pitch<<22);
886         iow32(scr, DstCntl, ctl);
887
888         iow32(scr, DstYX, (dp.x<<16)|dp.y);
889         iow32(scr, DstHeightWidth, (dx<<16)|dy);
890
891         waitforidle(scr);
892
893         return 1;
894 }
895
896 /*
897  * This should work, but doesn't.
898  * It messes up the screen timings for some reason.
899  */
900 static void
901 mach64blank(VGAscr *scr, int blank)
902 {
903         ulong ctl;
904
905         ctl = ior32(scr, CrtcGenCtl) & ~(CrtcHsyncDis|CrtcVsyncDis);
906         if(blank)
907                 ctl |= CrtcHsyncDis|CrtcVsyncDis;
908         iow32(scr, CrtcGenCtl, ctl);
909 }
910
911 /*
912  * We squirrel away whether the LCD and/or CRT were
913  * on when we were called to blank the screen, and
914  * restore the old state.  If we are called to blank the
915  * screen when it is already blank, we don't update the state.
916  * Such a call sequence should not happen, though.
917  *
918  * We could try forcing the chip into power management
919  * mode instead, but I'm not sure how that would interact
920  * with screen updates going on while the screen is blanked.
921  */
922 static void
923 mach64lcdblank(VGAscr *scr, int blank)
924 {
925         static int crtlcd;
926         ulong x;
927
928         if(blank) {
929                 x = lcdr32(scr, LCD_GenCtrl);
930                 if(x & 3) {
931                         crtlcd = x & 3;
932                         lcdw32(scr, LCD_GenCtrl,  x&~3);
933                 }
934         } else {
935                 if(crtlcd == 0)
936                         crtlcd = 2;     /* lcd only */
937                 x = lcdr32(scr, LCD_GenCtrl);
938                 lcdw32(scr, LCD_GenCtrl, x | crtlcd);
939         }
940 }
941
942 static void
943 mach64xxdrawinit(VGAscr *scr)
944 {
945         if(scr->io > 0x2FF){
946                 initengine(scr);
947                 scr->fill = mach64hwfill;
948                 scr->scroll = mach64hwscroll;
949         }
950 /*      scr->blank = mach64blank; */
951         switch(scr->id){
952         default:
953                 break;
954         case ('L'<<8)|'B':              /* 4C42: Rage 3D LTPro */
955         case ('L'<<8)|'I':              /* 4C49: Rage 3D LTPro */
956         case ('L'<<8)|'M':              /* 4C4D: Rage Mobility */
957         case ('L'<<8)|'P':              /* 4C50: Rage 3D LTPro */
958                 scr->blank = mach64lcdblank;
959                 hwblank = 1;
960                 break;
961         }
962 }
963
964 static void
965 ovl_configure(VGAscr *scr, Chan *c, char **field)
966 {
967         int w, h;
968         char *format;
969
970         w = (int)strtol(field[1], nil, 0);
971         h = (int)strtol(field[2], nil, 0);
972         format = field[3];
973
974         if (c != ovl_chan) 
975                 error(Einuse);
976         if (strcmp(format, "YUYV"))
977                 error(Eunsupportedformat);
978         
979         ovl_width  = w;
980         ovl_height = h;
981         ovl_fib       = w * h * sizeof(ushort);
982
983         waitforidle(scr);
984         scr->mmio[mmoffset[BusCntl]] |= 0x08000000;     /* Enable regblock 1 */
985         scr->mmio[mmoffset[OverlayScaleCntl]] = 
986                 SCALE_ZERO_EXTEND|SCALE_RED_TEMP_6500K|
987                 SCALE_HORZ_BLEND|SCALE_VERT_BLEND;
988         scr->mmio[mmoffset[!mach64revb? Buf0Pitch: ScalerBuf0Pitch]] = w;
989         scr->mmio[mmoffset[CaptureConfig]] = 
990                 SCALER_FRAME_READ_MODE_FULL|
991                 SCALER_BUF_MODE_SINGLE|
992                 SCALER_BUF_NEXT_0;
993         scr->mmio[mmoffset[OverlayKeyCntl]] = !mach64revb?
994                 OVERLAY_MIX_ALWAYS_V|(OVERLAY_EXCLUSIVE_NORMAL << 28): 
995                 0x011;
996
997         if (mach64type->m64_pro) {
998                 waitforfifo(scr, 6);
999
1000                 /* set the scaler co-efficient registers */
1001                 scr->mmio[mmoffset[ScalerColourCntl]] = 
1002                         (0x00) | (0x10 << 8) | (0x10 << 16);
1003                 scr->mmio[mmoffset[ScalerHCoef0]] = 
1004                         (0x00) | (0x20 << 8);
1005                 scr->mmio[mmoffset[ScalerHCoef1]] = 
1006                         (0x0D) | (0x20 << 8) | (0x06 << 16) | (0x0D << 24);
1007                 scr->mmio[mmoffset[ScalerHCoef2]] = 
1008                         (0x0D) | (0x1C << 8) | (0x0A << 16) | (0x0D << 24);
1009                 scr->mmio[mmoffset[ScalerHCoef3]] = 
1010                         (0x0C) | (0x1A << 8) | (0x0E << 16) | (0x0C << 24);
1011                 scr->mmio[mmoffset[ScalerHCoef4]] = 
1012                         (0x0C) | (0x14 << 8) | (0x14 << 16) | (0x0C << 24);
1013         }
1014         
1015         waitforfifo(scr, 3);
1016         scr->mmio[mmoffset[VideoFormat]] = SCALE_IN_YVYU422 |
1017                 (!mach64revb? 0xC: 0);
1018
1019         if (mach64overlay == 0)
1020                 mach64overlay = scr->storage + 64 * 64 * sizeof(uchar);
1021         scr->mmio[mmoffset[!mach64revb? Buf0Offset: ScalerBuf0Offset]] = 
1022                 mach64overlay;
1023 }
1024
1025 static void
1026 ovl_enable(VGAscr *scr, Chan *c, char **field)
1027 {
1028         int x, y, w, h;
1029         long h_inc, v_inc;
1030
1031         x = (int)strtol(field[1], nil, 0);
1032         y = (int)strtol(field[2], nil, 0);
1033         w = (int)strtol(field[3], nil, 0);
1034         h = (int)strtol(field[4], nil, 0);
1035
1036         if (x < 0 || x + w > physgscreenr.max.x ||
1037              y < 0 || y + h > physgscreenr.max.y)
1038                 error(Ebadarg);
1039
1040         if (c != ovl_chan) 
1041                 error(Einuse);
1042         if (scr->mmio[mmoffset[CrtcGenCntl]] & 1) {     /* double scan enable */
1043                 y *= 2;
1044                 h *= 2;
1045         }
1046
1047         waitforfifo(scr, 2);
1048         scr->mmio[mmoffset[OverlayYX]] = 
1049                         ((x & 0xFFFF) << 16) | (y & 0xFFFF);
1050         scr->mmio[mmoffset[OverlayYXEnd]] = 
1051                         (((x + w) & 0xFFFF) << 16) | ((y + h) & 0xFFFF);
1052
1053         h_inc = (ovl_width << 12) / (w >> 1);  /* ??? */
1054         v_inc = (ovl_height << 12) / h;
1055         waitforfifo(scr, 2);
1056         scr->mmio[mmoffset[OverlayScaleInc]] = 
1057                         ((h_inc & 0xFFFF) << 16) | (v_inc & 0xFFFF);
1058         scr->mmio[mmoffset[ScalerHeightWidth]] = 
1059                         ((ovl_width & 0xFFFF) << 16) | (ovl_height & 0xFFFF);
1060         waitforidle(scr);
1061         scr->mmio[mmoffset[OverlayScaleCntl]] |= 
1062                         (SCALE_ENABLE|OVERLAY_ENABLE);
1063 }
1064
1065 static void
1066 ovl_status(VGAscr *scr, Chan *, char **field)
1067 {
1068         pprint("%s: %s %.4uX, VT/GT %s, PRO %s, ovlclock %lud, rev B %s, refclock %ld\n",
1069                    scr->dev->name, field[0], mach64type->m64_id,
1070                    mach64type->m64_vtgt? "yes": "no",
1071                    mach64type->m64_pro? "yes": "no",
1072                    mach64type->m64_ovlclock,
1073                    mach64revb? "yes": "no",
1074                    mach64refclock);
1075         pprint("%s: storage @%.8luX, aperture @%8.ulX, ovl buf @%.8ulX\n",
1076                    scr->dev->name, scr->storage, scr->paddr,
1077                    mach64overlay);
1078 }
1079         
1080 static void
1081 ovl_openctl(VGAscr *, Chan *c, char **)
1082 {
1083         if (ovl_chan) 
1084                 error(Einuse);
1085         ovl_chan = c;
1086 }
1087
1088 static void
1089 ovl_closectl(VGAscr *scr, Chan *c, char **)
1090 {
1091         if (c != ovl_chan) return;
1092
1093         waitforidle(scr);
1094         scr->mmio[mmoffset[OverlayScaleCntl]] &=
1095                         ~(SCALE_ENABLE|OVERLAY_ENABLE);
1096         ovl_chan = nil;
1097         ovl_width = ovl_height = ovl_fib = 0;
1098 }
1099
1100 enum
1101 {
1102         CMclosectl,
1103         CMconfigure,
1104         CMenable,
1105         CMopenctl,
1106         CMstatus,
1107 };
1108
1109 static void (*ovl_cmds[])(VGAscr *, Chan *, char **) =
1110 {
1111         [CMclosectl]    ovl_closectl,
1112         [CMconfigure]   ovl_configure,
1113         [CMenable]      ovl_enable,
1114         [CMopenctl]     ovl_openctl,
1115         [CMstatus]      ovl_status,
1116 };
1117
1118 static Cmdtab mach64xxcmd[] =
1119 {
1120         CMclosectl,     "closectl",     1,
1121         CMconfigure,    "configure",    4,
1122         CMenable,       "enable",       5,
1123         CMopenctl,      "openctl",      1,
1124         CMstatus,       "status",       1,
1125 };
1126
1127 static void
1128 mach64xxovlctl(VGAscr *scr, Chan *c, void *a, int n)
1129 {
1130         Cmdbuf *cb;
1131         Cmdtab *ct;
1132
1133         if(!mach64type->m64_vtgt) 
1134                 error(Enodev);
1135
1136         if(!scr->overlayinit){
1137                 scr->overlayinit = 1;
1138                 init_overlayclock(scr);
1139         }
1140         cb = parsecmd(a, n);
1141         if(waserror()){
1142                 free(cb);
1143                 nexterror();
1144         }
1145
1146         ct = lookupcmd(cb, mach64xxcmd, nelem(mach64xxcmd));
1147
1148         ovl_cmds[ct->index](scr, c, cb->f);
1149
1150         poperror();
1151         free(cb);
1152 }
1153
1154 static int
1155 mach64xxovlwrite(VGAscr *scr, void *a, int len, vlong offs)
1156 {
1157         uchar *src;
1158         int _len;
1159
1160         if (ovl_chan == nil) return len;        /* Acts as a /dev/null */
1161         
1162         /* Calculate the destination address */
1163         _len = len;
1164         src   = (uchar *)a;
1165         while (len > 0) {
1166                 ulong _offs;
1167                 int nb;
1168
1169                 _offs = (ulong)(offs % ovl_fib);
1170                 nb     = (_offs + len > ovl_fib)? ovl_fib - _offs: len;
1171                 memmove((uchar *)scr->vaddr + mach64overlay + _offs, 
1172                                   src, nb);
1173                 offs += nb;
1174                 src  += nb;
1175                 len  -= nb;
1176         }
1177         return _len;
1178 }
1179
1180 VGAdev vgamach64xxdev = {
1181         "mach64xx",
1182
1183         mach64xxenable,                 /* enable */
1184         0,                              /* disable */
1185         0,                              /* page */
1186         mach64xxlinear,                 /* linear */
1187         mach64xxdrawinit,       /* drawinit */
1188         0,
1189         mach64xxovlctl, /* overlay control */
1190         mach64xxovlwrite,       /* write the overlay */
1191 };
1192
1193 VGAcur vgamach64xxcur = {
1194         "mach64xxhwgc",
1195
1196         mach64xxcurenable,              /* enable */
1197         mach64xxcurdisable,             /* disable */
1198         mach64xxcurload,                /* load */
1199         mach64xxcurmove,                /* move */
1200
1201         1                                       /* doespanning */
1202 };
1203