]> git.lizzy.rs Git - plan9front.git/blob - sys/src/libmemdraw/draw.c
ssh: document thumbfile options
[plan9front.git] / sys / src / libmemdraw / draw.c
1 #include <u.h>
2 #include <libc.h>
3 #include <draw.h>
4 #include <memdraw.h>
5 #include <pool.h>
6
7 extern Pool* imagmem;
8 int drawdebug;
9
10 /* perfect approximation to NTSC = .299r+.587g+.114b when 0 ≤ r,g,b < 256 */
11 #define RGB2K(r,g,b)    ((156763*(r)+307758*(g)+59769*(b))>>19)
12
13 /*
14  * For 16-bit values, x / 255 == (t = x+1, (t+(t>>8)) >> 8).
15  * We add another 127 to round to the nearest value rather
16  * than truncate.
17  *
18  * CALCxy does x bytewise calculations on y input images (x=1,4; y=1,2).
19  * CALC2x does two parallel 16-bit calculations on y input images (y=1,2).
20  */
21 #define CALC11(a, v, tmp) \
22         (tmp=(a)*(v)+128, (tmp+(tmp>>8))>>8)
23
24 #define CALC12(a1, v1, a2, v2, tmp) \
25         (tmp=(a1)*(v1)+(a2)*(v2)+128, (tmp+(tmp>>8))>>8)
26
27 #define MASK 0xFF00FF
28
29 #define CALC21(a, vvuu, tmp) \
30         (tmp=(a)*(vvuu)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)
31
32 #define CALC41(a, rgba, tmp1, tmp2) \
33         (CALC21(a, rgba & MASK, tmp1) | \
34          (CALC21(a, (rgba>>8)&MASK, tmp2)<<8))
35
36 #define CALC22(a1, vvuu1, a2, vvuu2, tmp) \
37         (tmp=(a1)*(vvuu1)+(a2)*(vvuu2)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)
38
39 #define CALC42(a1, rgba1, a2, rgba2, tmp1, tmp2) \
40         (CALC22(a1, rgba1 & MASK, a2, rgba2 & MASK, tmp1) | \
41          (CALC22(a1, (rgba1>>8) & MASK, a2, (rgba2>>8) & MASK, tmp2)<<8))
42
43 static void mktables(void);
44 typedef int Subdraw(Memdrawparam*);
45 static Subdraw chardraw, alphadraw, memoptdraw;
46
47 static Memimage*        memones;
48 static Memimage*        memzeros;
49 Memimage *memwhite;
50 Memimage *memblack;
51 Memimage *memtransparent;
52 Memimage *memopaque;
53
54 int     _ifmt(Fmt*);
55
56 int
57 memimageinit(void)
58 {
59         static int didinit = 0;
60
61         if(didinit)
62                 return 0;
63
64         if(imagmem != nil)
65         if(strcmp(imagmem->name, "Image") == 0 || strcmp(imagmem->name, "image") == 0)
66                 imagmem->move = memimagemove;
67
68         mktables();
69         _memmkcmap();
70
71         fmtinstall('R', Rfmt); 
72         fmtinstall('P', Pfmt);
73         fmtinstall('b', _ifmt);
74
75         memones = allocmemimage(Rect(0,0,1,1), GREY1);
76         memzeros = allocmemimage(Rect(0,0,1,1), GREY1);
77         if(memones == nil || memzeros == nil)
78                 return -1;
79
80         memones->flags |= Frepl;
81         memones->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
82         *byteaddr(memones, ZP) = ~0;
83
84         memzeros->flags |= Frepl;
85         memzeros->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
86         *byteaddr(memzeros, ZP) = 0;
87
88         memwhite = memones;
89         memblack = memzeros;
90         memopaque = memones;
91         memtransparent = memzeros;
92
93         didinit = 1;
94         return 0;
95 }
96
97 static ulong imgtorgba(Memimage*, ulong);
98 static ulong rgbatoimg(Memimage*, ulong);
99 static ulong pixelbits(Memimage*, Point);
100
101 #define DBG if(0)
102 void
103 memimagedraw(Memimage *dst, Rectangle r, Memimage *src, Point p0, Memimage *mask, Point p1, int op)
104 {
105         Memdrawparam par;
106
107         if(mask == nil)
108                 mask = memopaque;
109
110 DBG     print("memimagedraw %p/%luX %R @ %p %p/%luX %P %p/%luX %P... ", dst, dst->chan, r, dst->data->bdata, src, src->chan, p0, mask, mask->chan, p1);
111
112         if(drawclip(dst, &r, src, &p0, mask, &p1, &par.sr, &par.mr) == 0){
113 //              if(drawdebug)
114 //                      iprint("empty clipped rectangle\n");
115                 return;
116         }
117
118         if(op < Clear || op > SoverD){
119 //              if(drawdebug)
120 //                      iprint("op out of range: %d\n", op);
121                 return;
122         }
123
124         par.op = op;
125         par.dst = dst;
126         par.r = r;
127         par.src = src;
128         /* par.sr set by drawclip */
129         par.mask = mask;
130         /* par.mr set by drawclip */
131
132         par.state = 0;
133         if(src->flags&Frepl){
134                 par.state |= Replsrc;
135                 if(Dx(src->r)==1 && Dy(src->r)==1){
136                         par.sval = pixelbits(src, src->r.min);
137                         par.state |= Simplesrc;
138                         par.srgba = imgtorgba(src, par.sval);
139                         par.sdval = rgbatoimg(dst, par.srgba);
140                         if((par.srgba&0xFF) == 0 && (op&DoutS)){
141 //                              if (drawdebug) iprint("fill with transparent source\n");
142                                 return; /* no-op successfully handled */
143                         }
144                 }
145         }
146
147         if(mask->flags & Frepl){
148                 par.state |= Replmask;
149                 if(Dx(mask->r)==1 && Dy(mask->r)==1){
150                         par.mval = pixelbits(mask, mask->r.min);
151                         if(par.mval == 0 && (op&DoutS)){
152 //                              if(drawdebug) iprint("fill with zero mask\n");
153                                 return; /* no-op successfully handled */
154                         }
155                         par.state |= Simplemask;
156                         if(par.mval == ~0)
157                                 par.state |= Fullmask;
158                         par.mrgba = imgtorgba(mask, par.mval);
159                 }
160         }
161
162 //      if(drawdebug)
163 //              iprint("dr %R sr %R mr %R...", r, par.sr, par.mr);
164 DBG print("draw dr %R sr %R mr %R %lux\n", r, par.sr, par.mr, par.state);
165
166         /*
167          * Now that we've clipped the parameters down to be consistent, we 
168          * simply try sub-drawing routines in order until we find one that was able
169          * to handle us.  If the sub-drawing routine returns zero, it means it was
170          * unable to satisfy the request, so we do not return.
171          */
172
173         /*
174          * Hardware support.  Each video driver provides this function,
175          * which checks to see if there is anything it can help with.
176          * There could be an if around this checking to see if dst is in video memory.
177          */
178 DBG print("test hwdraw\n");
179         if(hwdraw(&par)){
180 //if(drawdebug) iprint("hw handled\n");
181 DBG print("hwdraw handled\n");
182                 return;
183         }
184         /*
185          * Optimizations using memmove and memset.
186          */
187 DBG print("test memoptdraw\n");
188         if(memoptdraw(&par)){
189 //if(drawdebug) iprint("memopt handled\n");
190 DBG print("memopt handled\n");
191                 return;
192         }
193
194         /*
195          * Character drawing.
196          * Solid source color being painted through a boolean mask onto a high res image.
197          */
198 DBG print("test chardraw\n");
199         if(chardraw(&par)){
200 //if(drawdebug) iprint("chardraw handled\n");
201 DBG print("chardraw handled\n");
202                 return;
203         }
204
205         /*
206          * General calculation-laden case that does alpha for each pixel.
207          */
208 DBG print("do alphadraw\n");
209         alphadraw(&par);
210 //if(drawdebug) iprint("alphadraw handled\n");
211 DBG print("alphadraw handled\n");
212 }
213 #undef DBG
214
215
216 /*
217  * Clip the destination rectangle further based on the properties of the 
218  * source and mask rectangles.  Once the destination rectangle is properly
219  * clipped, adjust the source and mask rectangles to be the same size.
220  *
221  * Return zero if the final rectangle is null.
222  */
223 int
224 drawclipnorepl(Memimage *dst, Rectangle *r, Memimage *src, Point *p0, Memimage *mask, Point *p1, Rectangle *sr, Rectangle *mr)
225 {
226         Point rmin, delta;
227         int splitcoords;
228         Rectangle omr;
229
230         if(badrect(*r))
231                 return 0;
232         splitcoords = (p0->x!=p1->x) || (p0->y!=p1->y);
233         /* clip to destination */
234         rmin = r->min;
235         if(!rectclip(r, dst->r) || !rectclip(r, dst->clipr))
236                 return 0;
237         /* move mask point */
238         p1->x += r->min.x-rmin.x;
239         p1->y += r->min.y-rmin.y;
240         /* move source point */
241         p0->x += r->min.x-rmin.x;
242         p0->y += r->min.y-rmin.y;
243         /* map destination rectangle into source */
244         sr->min = *p0;
245         sr->max.x = p0->x+Dx(*r);
246         sr->max.y = p0->y+Dy(*r);
247         /* sr is r in source coordinates; clip to source */
248         if(!(src->flags&Frepl) && !rectclip(sr, src->r))
249                 return 0;
250         if(!rectclip(sr, src->clipr))
251                 return 0;
252         /* compute and clip rectangle in mask */
253         if(splitcoords){
254                 /* move mask point with source */
255                 p1->x += sr->min.x-p0->x;
256                 p1->y += sr->min.y-p0->y;
257                 mr->min = *p1;
258                 mr->max.x = p1->x+Dx(*sr);
259                 mr->max.y = p1->y+Dy(*sr);
260                 omr = *mr;
261                 /* mr is now rectangle in mask; clip it */
262                 if(!(mask->flags&Frepl) && !rectclip(mr, mask->r))
263                         return 0;
264                 if(!rectclip(mr, mask->clipr))
265                         return 0;
266                 /* reflect any clips back to source */
267                 sr->min.x += mr->min.x-omr.min.x;
268                 sr->min.y += mr->min.y-omr.min.y;
269                 sr->max.x += mr->max.x-omr.max.x;
270                 sr->max.y += mr->max.y-omr.max.y;
271         }else{
272                 if(!(mask->flags&Frepl) && !rectclip(sr, mask->r))
273                         return 0;
274                 if(!rectclip(sr, mask->clipr))
275                         return 0;
276                 *mr = *sr;
277         }
278         /* move source clipping back to destination */
279         delta.x = r->min.x - p0->x;
280         delta.y = r->min.y - p0->y;
281         r->min.x = sr->min.x + delta.x;
282         r->min.y = sr->min.y + delta.y;
283         r->max.x = sr->max.x + delta.x;
284         r->max.y = sr->max.y + delta.y;
285         *p0 = sr->min;
286         *p1 = mr->min;
287
288         assert(Dx(*sr) == Dx(*mr) && Dx(*mr) == Dx(*r));
289         assert(Dy(*sr) == Dy(*mr) && Dy(*mr) == Dy(*r));
290         assert(ptinrect(r->min, dst->r));
291
292         return 1;
293 }
294
295 /*
296  * like drawclipnorepl() above, but if source or mask is replicated,
297  * move its clipped rectangle so that its minimum point falls within
298  * the repl rectangle.
299  *
300  * Return zero if the final rectangle is null.
301  */
302 int
303 drawclip(Memimage *dst, Rectangle *r, Memimage *src, Point *p0, Memimage *mask, Point *p1, Rectangle *sr, Rectangle *mr)
304 {
305         Point delta;
306
307         if(!drawclipnorepl(dst, r, src, p0, mask, p1, sr, mr))
308                 return 0;
309
310         /* move source rectangle so sr->min is in src->r */
311         if(src->flags&Frepl) {
312                 delta.x = drawreplxy(src->r.min.x, src->r.max.x, sr->min.x) - sr->min.x;
313                 delta.y = drawreplxy(src->r.min.y, src->r.max.y, sr->min.y) - sr->min.y;
314                 sr->min.x += delta.x;
315                 sr->min.y += delta.y;
316                 sr->max.x += delta.x;
317                 sr->max.y += delta.y;
318                 *p0 = sr->min;
319         }
320
321         /* move mask point so it is in mask->r */
322         *p1 = drawrepl(mask->r, *p1);
323         mr->min = *p1;
324         mr->max.x = p1->x+Dx(*sr);
325         mr->max.y = p1->y+Dy(*sr);
326
327         assert(ptinrect(*p0, src->r));
328         assert(ptinrect(*p1, mask->r));
329
330         return 1;
331 }
332
333 /*
334  * Conversion tables.
335  */
336 static uchar replbit[1+8][256];         /* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */
337 static uchar conv18[256][8];            /* conv18[x][y] is the yth pixel in the depth-1 pixel x */
338 static uchar conv28[256][4];            /* ... */
339 static uchar conv48[256][2];
340
341 /*
342  * bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8.
343  * the X's are where to put the bottom (ones) bit of the n-bit pattern.
344  * only the top 8 bits of the result are actually used.
345  * (the lower 8 bits are needed to get bits in the right place
346  * when n is not a divisor of 8.)
347  *
348  * Should check to see if its easier to just refer to replmul than
349  * use the precomputed values in replbit.  On PCs it may well
350  * be; on machines with slow multiply instructions it probably isn't.
351  */
352 #define a ((((((((((((((((0
353 #define X *2+1)
354 #define _ *2)
355 static int replmul[1+8] = {
356         0,
357         a X X X X X X X X X X X X X X X X,
358         a _ X _ X _ X _ X _ X _ X _ X _ X,
359         a _ _ X _ _ X _ _ X _ _ X _ _ X _,
360         a _ _ _ X _ _ _ X _ _ _ X _ _ _ X,
361         a _ _ _ _ X _ _ _ _ X _ _ _ _ X _,
362         a _ _ _ _ _ X _ _ _ _ _ X _ _ _ _, 
363         a _ _ _ _ _ _ X _ _ _ _ _ _ X _ _,
364         a _ _ _ _ _ _ _ X _ _ _ _ _ _ _ X,
365 };
366 #undef a
367 #undef X
368 #undef _
369
370 static void
371 mktables(void)
372 {
373         int i, j, mask, sh, small;
374                 
375         /* bit replication up to 8 bits */
376         for(i=0; i<256; i++){
377                 for(j=0; j<=8; j++){    /* j <= 8 [sic] */
378                         small = i & ((1<<j)-1);
379                         replbit[j][i] = (small*replmul[j])>>8;
380                 }
381         }
382
383         /* bit unpacking up to 8 bits, only powers of 2 */
384         for(i=0; i<256; i++){
385                 for(j=0, sh=7, mask=1; j<8; j++, sh--)
386                         conv18[i][j] = replbit[1][(i>>sh)&mask];
387
388                 for(j=0, sh=6, mask=3; j<4; j++, sh-=2)
389                         conv28[i][j] = replbit[2][(i>>sh)&mask];
390
391                 for(j=0, sh=4, mask=15; j<2; j++, sh-=4)
392                         conv48[i][j] = replbit[4][(i>>sh)&mask];
393         }
394 }
395
396 static uchar ones = 0xff;
397
398 /*
399  * General alpha drawing case.  Can handle anything.
400  */
401 typedef struct  Buffer  Buffer;
402 struct Buffer {
403         /* used by most routines */
404         uchar   *red;
405         uchar   *grn;
406         uchar   *blu;
407         uchar   *alpha; /* is &ones when unused, never nil */
408         uchar   *grey;
409         ulong   *rgba;
410         int     delta;  /* number of bytes to add to pointer to get next pixel to the right */
411
412         /* used by boolcalc* for mask data */
413         uchar   *m;             /* ptr to mask data r.min byte; like p->bytermin */
414         int             mskip;  /* no. of left bits to skip in *m */
415         uchar   *bm;            /* ptr to mask data img->r.min byte; like p->bytey0s */
416         int             bmskip; /* no. of left bits to skip in *bm */
417         uchar   *em;            /* ptr to mask data img->r.max.x byte; like p->bytey0e */
418         int             emskip; /* no. of right bits to skip in *em */
419 };
420
421 typedef struct  Param   Param;
422 typedef Buffer  Readfn(Param*, uchar*, int);
423 typedef void    Writefn(Param*, uchar*, Buffer);
424 typedef Buffer  Calcfn(Buffer, Buffer, Buffer, int, int, int);
425
426 enum {
427         MAXBCACHE = 16
428 };
429
430 /* giant rathole to customize functions with */
431 struct Param {
432         Readfn  *replcall;
433         Readfn  *greymaskcall;  
434         Readfn  *convreadcall;
435         Writefn *convwritecall;
436
437         Memimage *img;
438         Rectangle       r;
439         int     dx;     /* of r */
440         int     needbuf;
441         int     convgrey;
442         int     alphaonly;
443
444         uchar   *bytey0s;               /* byteaddr(Pt(img->r.min.x, img->r.min.y)) */
445         uchar   *bytermin;      /* byteaddr(Pt(r.min.x, img->r.min.y)) */
446         uchar   *bytey0e;               /* byteaddr(Pt(img->r.max.x, img->r.min.y)) */
447         int             bwidth;
448
449         int     replcache;      /* if set, cache buffers */
450         Buffer  bcache[MAXBCACHE];
451         ulong   bfilled;
452         uchar   *bufbase;
453         int     bufoff;
454         int     bufdelta;
455
456         int     dir;
457
458         int     convbufoff;
459         uchar   *convbuf;
460         Param   *convdpar;
461         int     convdx;
462 };
463
464 static Readfn   greymaskread, replread, readptr;
465 static Writefn  nullwrite;
466 static Calcfn   alphacalc0, alphacalc14, alphacalc2810, alphacalc3679, alphacalc5, alphacalc11, alphacalcS;
467 static Calcfn   boolcalc14, boolcalc236789, boolcalc1011;
468
469 static Readfn*  readfn(Memimage*);
470 static Readfn*  readalphafn(Memimage*);
471 static Writefn* writefn(Memimage*);
472
473 static Calcfn*  boolcopyfn(Memimage*, Memimage*);
474 static Readfn*  convfn(Memimage*, Param*, Memimage*, Param*, int*);
475
476 static Calcfn *alphacalc[Ncomp] = 
477 {
478         alphacalc0,             /* Clear */
479         alphacalc14,            /* DoutS */
480         alphacalc2810,          /* SoutD */
481         alphacalc3679,          /* DxorS */
482         alphacalc14,            /* DinS */
483         alphacalc5,             /* D */
484         alphacalc3679,          /* DatopS */
485         alphacalc3679,          /* DoverS */
486         alphacalc2810,          /* SinD */
487         alphacalc3679,          /* SatopD */
488         alphacalc2810,          /* S */
489         alphacalc11,            /* SoverD */
490 };
491
492 static Calcfn *boolcalc[Ncomp] =
493 {
494         alphacalc0,             /* Clear */
495         boolcalc14,             /* DoutS */
496         boolcalc236789,         /* SoutD */
497         boolcalc236789,         /* DxorS */
498         boolcalc14,             /* DinS */
499         alphacalc5,             /* D */
500         boolcalc236789,         /* DatopS */
501         boolcalc236789,         /* DoverS */
502         boolcalc236789,         /* SinD */
503         boolcalc236789,         /* SatopD */
504         boolcalc1011,           /* S */
505         boolcalc1011,           /* SoverD */
506 };
507
508 /*
509  * Avoid standard Lock, QLock so that can be used in kernel.
510  */
511 typedef struct Dbuf Dbuf;
512 struct Dbuf
513 {
514         uchar *p;
515         int n;
516         Param spar, mpar, dpar;
517         int inuse;
518 };
519 static Dbuf dbuf[10];
520
521 static Dbuf*
522 allocdbuf(void)
523 {
524         int i;
525
526         for(i=0; i<nelem(dbuf); i++){
527                 if(dbuf[i].inuse)
528                         continue;
529                 if(!_tas(&dbuf[i].inuse))
530                         return &dbuf[i];
531         }
532         return nil;
533 }
534
535 static void
536 getparam(Param *p, Memimage *img, Rectangle r, int convgrey, int needbuf, int *ndrawbuf)
537 {
538         int nbuf;
539
540         memset(p, 0, sizeof *p);
541
542         p->img = img;
543         p->r = r;
544         p->dx = Dx(r);
545         p->needbuf = needbuf;
546         p->convgrey = convgrey;
547
548         assert(img->r.min.x <= r.min.x && r.min.x < img->r.max.x);
549
550         p->bytey0s = byteaddr(img, Pt(img->r.min.x, img->r.min.y));
551         p->bytermin = byteaddr(img, Pt(r.min.x, img->r.min.y));
552         p->bytey0e = byteaddr(img, Pt(img->r.max.x, img->r.min.y));
553         p->bwidth = sizeof(ulong)*img->width;
554
555         assert(p->bytey0s <= p->bytermin && p->bytermin <= p->bytey0e);
556
557         if(p->r.min.x == p->img->r.min.x)
558                 assert(p->bytermin == p->bytey0s);
559
560         nbuf = 1;
561         if((img->flags&Frepl) && Dy(img->r) <= MAXBCACHE && Dy(img->r) < Dy(r)){
562                 p->replcache = 1;
563                 nbuf = Dy(img->r);
564         }
565         p->bufdelta = 4*p->dx;
566         p->bufoff = *ndrawbuf;
567         *ndrawbuf += p->bufdelta*nbuf;
568 }
569
570 static void
571 clipy(Memimage *img, int *y)
572 {
573         int dy;
574
575         dy = Dy(img->r);
576         if(*y == dy)
577                 *y = 0;
578         else if(*y == -1)
579                 *y = dy-1;
580         assert(0 <= *y && *y < dy);
581 }
582
583 static void
584 dumpbuf(char *s, Buffer b, int n)
585 {
586         int i;
587         uchar *p;
588         
589         print("%s", s);
590         for(i=0; i<n; i++){
591                 print(" ");
592                 if(p=b.grey){
593                         print(" k%.2uX", *p);
594                         b.grey += b.delta;
595                 }else{  
596                         if(p=b.red){
597                                 print(" r%.2uX", *p);
598                                 b.red += b.delta;
599                         }
600                         if(p=b.grn){
601                                 print(" g%.2uX", *p);
602                                 b.grn += b.delta;
603                         }
604                         if(p=b.blu){
605                                 print(" b%.2uX", *p);
606                                 b.blu += b.delta;
607                         }
608                 }
609                 if((p=b.alpha) != &ones){
610                         print(" α%.2uX", *p);
611                         b.alpha += b.delta;
612                 }
613         }
614         print("\n");
615 }
616
617 /*
618  * For each scan line, we expand the pixels from source, mask, and destination
619  * into byte-aligned red, green, blue, alpha, and grey channels.  If buffering is not
620  * needed and the channels were already byte-aligned (grey8, rgb24, rgba32, rgb32),
621  * the readers need not copy the data: they can simply return pointers to the data.
622  * If the destination image is grey and the source is not, it is converted using the NTSC
623  * formula.
624  *
625  * Once we have all the channels, we call either rgbcalc or greycalc, depending on 
626  * whether the destination image is color.  This is allowed to overwrite the dst buffer (perhaps
627  * the actual data, perhaps a copy) with its result.  It should only overwrite the dst buffer
628  * with the same format (i.e. red bytes with red bytes, etc.)  A new buffer is returned from
629  * the calculator, and that buffer is passed to a function to write it to the destination.
630  * If the buffer is already pointing at the destination, the writing function is a no-op.
631  */
632 #define DBG if(0)
633 static int
634 alphadraw(Memdrawparam *par)
635 {
636         int isgrey, starty, endy, op;
637         int needbuf, dsty, srcy, masky;
638         int y, dir, dx, dy, ndrawbuf;
639         uchar *drawbuf;
640         Buffer bsrc, bdst, bmask;
641         Readfn *rdsrc, *rdmask, *rddst;
642         Calcfn *calc;
643         Writefn *wrdst;
644         Memimage *src, *mask, *dst;
645         Rectangle r, sr, mr;
646         Dbuf *z;
647
648         r = par->r;
649         dx = Dx(r);
650         dy = Dy(r);
651
652         z = allocdbuf();
653         if(z == nil)
654                 return 0;
655
656         src = par->src;
657         mask = par->mask;       
658         dst = par->dst;
659         sr = par->sr;
660         mr = par->mr;
661         op = par->op;
662
663         isgrey = dst->flags&Fgrey;
664
665         /*
666          * Buffering when src and dst are the same bitmap is sufficient but not 
667          * necessary.  There are stronger conditions we could use.  We could
668          * check to see if the rectangles intersect, and if simply moving in the
669          * correct y direction can avoid the need to buffer.
670          */
671         needbuf = (src->data == dst->data);
672
673         ndrawbuf = 0;
674         getparam(&z->spar, src, sr, isgrey, needbuf, &ndrawbuf);
675         getparam(&z->dpar, dst, r, isgrey, needbuf, &ndrawbuf);
676         getparam(&z->mpar, mask, mr, 0, needbuf, &ndrawbuf);
677
678         dir = (needbuf && byteaddr(dst, r.min) > byteaddr(src, sr.min)) ? -1 : 1;
679         z->spar.dir = z->mpar.dir = z->dpar.dir = dir;
680
681         /*
682          * If the mask is purely boolean, we can convert from src to dst format
683          * when we read src, and then just copy it to dst where the mask tells us to.
684          * This requires a boolean (1-bit grey) mask and lack of a source alpha channel.
685          *
686          * The computation is accomplished by assigning the function pointers as follows:
687          *      rdsrc - read and convert source into dst format in a buffer
688          *      rdmask - convert mask to bytes, set pointer to it
689          *      rddst - fill with pointer to real dst data, but do no reads
690          *      calc - copy src onto dst when mask says to.
691          *      wrdst - do nothing
692          * This is slightly sleazy, since things aren't doing exactly what their names say,
693          * but it avoids a fair amount of code duplication to make this a case here
694          * rather than have a separate booldraw.
695          */
696 //if(drawdebug) iprint("flag %lud mchan %lux=?%x dd %d\n", src->flags&Falpha, mask->chan, GREY1, dst->depth);
697         if(!(src->flags&Falpha) && mask->chan == GREY1 && dst->depth >= 8 && op == SoverD){
698 //if(drawdebug) iprint("boolcopy...");
699                 rdsrc = convfn(dst, &z->dpar, src, &z->spar, &ndrawbuf);
700                 rddst = readptr;
701                 rdmask = readfn(mask);
702                 calc = boolcopyfn(dst, mask);
703                 wrdst = nullwrite;
704         }else{
705                 /* usual alphadraw parameter fetching */
706                 rdsrc = readfn(src);
707                 rddst = readfn(dst);
708                 wrdst = writefn(dst);
709                 calc = alphacalc[op];
710
711                 /*
712                  * If there is no alpha channel, we'll ask for a grey channel
713                  * and pretend it is the alpha.
714                  */
715                 if(mask->flags&Falpha){
716                         rdmask = readalphafn(mask);
717                         z->mpar.alphaonly = 1;
718                 }else{
719                         z->mpar.greymaskcall = readfn(mask);
720                         z->mpar.convgrey = 1;
721                         rdmask = greymaskread;
722
723                         /*
724                          * Should really be above, but then boolcopyfns would have
725                          * to deal with bit alignment, and I haven't written that.
726                          *
727                          * This is a common case for things like ellipse drawing.
728                          * When there's no alpha involved and the mask is boolean,
729                          * we can avoid all the division and multiplication.
730                          */
731                         if(mask->chan == GREY1 && !(src->flags&Falpha))
732                                 calc = boolcalc[op];
733                         else if(op == SoverD && !(src->flags&Falpha))
734                                 calc = alphacalcS;
735                 }
736         }
737
738         /*
739          * If the image has a small enough repl rectangle,
740          * we can just read each line once and cache them.
741          */
742         if(z->spar.replcache){
743                 z->spar.replcall = rdsrc;
744                 rdsrc = replread;
745         }
746         if(z->mpar.replcache){
747                 z->mpar.replcall = rdmask;
748                 rdmask = replread;
749         }
750
751         if(z->n < ndrawbuf){
752                 free(z->p);
753                 if((z->p = mallocz(ndrawbuf, 0)) == nil){
754                         z->inuse = 0;
755                         return 0;
756                 }
757                 z->n = ndrawbuf;
758         }
759         drawbuf = z->p;
760
761         /*
762          * Before we were saving only offsets from drawbuf in the parameter
763          * structures; now that drawbuf has been grown to accomodate us,
764          * we can fill in the pointers.
765          */
766         z->spar.bufbase = drawbuf+z->spar.bufoff;
767         z->mpar.bufbase = drawbuf+z->mpar.bufoff;
768         z->dpar.bufbase = drawbuf+z->dpar.bufoff;
769         z->spar.convbuf = drawbuf+z->spar.convbufoff;
770
771         if(dir == 1){
772                 starty = 0;
773                 endy = dy;
774         }else{
775                 starty = dy-1;
776                 endy = -1;
777         }
778
779         /*
780          * srcy, masky, and dsty are offsets from the top of their
781          * respective Rectangles.  they need to be contained within
782          * the rectangles, so clipy can keep them there without division.
783          */
784         srcy = (starty + sr.min.y - src->r.min.y)%Dy(src->r);
785         masky = (starty + mr.min.y - mask->r.min.y)%Dy(mask->r);
786         dsty = starty + r.min.y - dst->r.min.y;
787
788         assert(0 <= srcy && srcy < Dy(src->r));
789         assert(0 <= masky && masky < Dy(mask->r));
790         assert(0 <= dsty && dsty < Dy(dst->r));
791
792         for(y=starty; y!=endy; y+=dir, srcy+=dir, masky+=dir, dsty+=dir){
793                 clipy(src, &srcy);
794                 clipy(dst, &dsty);
795                 clipy(mask, &masky);
796
797                 bsrc = rdsrc(&z->spar, z->spar.bufbase, srcy);
798 DBG print("[");
799                 bmask = rdmask(&z->mpar, z->mpar.bufbase, masky);
800 DBG print("]\n");
801                 bdst = rddst(&z->dpar, z->dpar.bufbase, dsty);
802 DBG             dumpbuf("src", bsrc, dx);
803 DBG             dumpbuf("mask", bmask, dx);
804 DBG             dumpbuf("dst", bdst, dx);
805                 bdst = calc(bdst, bsrc, bmask, dx, isgrey, op);
806                 wrdst(&z->dpar, z->dpar.bytermin+dsty*z->dpar.bwidth, bdst);
807         }
808
809         z->inuse = 0;
810         return 1;
811 }
812 #undef DBG
813
814 static Buffer
815 alphacalc0(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
816 {
817         USED(grey);
818         USED(op);
819         USED(b1);
820         USED(b2);
821         memset(bdst.rgba, 0, dx*bdst.delta);
822         return bdst;
823 }
824
825 /*
826  * Do the channels in the buffers match enough
827  * that we can do word-at-a-time operations
828  * on the pixels?
829  */
830 static int
831 chanmatch(Buffer *bdst, Buffer *bsrc)
832 {
833         uchar *drgb, *srgb;
834         
835         /*
836          * first, r, g, b must be in the same place
837          * in the rgba word.
838          */
839         drgb = (uchar*)bdst->rgba;
840         srgb = (uchar*)bsrc->rgba;
841         if(bdst->red - drgb != bsrc->red - srgb
842         || bdst->blu - drgb != bsrc->blu - srgb
843         || bdst->grn - drgb != bsrc->grn - srgb)
844                 return 0;
845         
846         /*
847          * that implies alpha is in the same place,
848          * if it is there at all (it might be == &ones).
849          * if the destination is &ones, we can scribble
850          * over the rgba slot just fine.
851          */
852         if(bdst->alpha == &ones)
853                 return 1;
854         
855         /*
856          * if the destination is not ones but the src is,
857          * then the simultaneous calculation will use
858          * bogus bytes from the src's rgba.  no good.
859          */
860         if(bsrc->alpha == &ones)
861                 return 0;
862         
863         /*
864          * otherwise, alphas are in the same place.
865          */
866         return 1;
867 }
868
869 static Buffer
870 alphacalc14(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
871 {
872         Buffer obdst;
873         int fd, sadelta;
874         int i, sa, ma, q;
875         ulong t, t1;
876
877         obdst = bdst;
878         sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
879         q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
880
881         for(i=0; i<dx; i++){
882                 sa = *bsrc.alpha;
883                 ma = *bmask.alpha;
884                 fd = CALC11(sa, ma, t);
885                 if(op == DoutS)
886                         fd = 255-fd;
887
888                 if(grey){
889                         *bdst.grey = CALC11(fd, *bdst.grey, t);
890                         bsrc.grey += bsrc.delta;
891                         bdst.grey += bdst.delta;
892                 }else{
893                         if(q){
894                                 *bdst.rgba = CALC41(fd, *bdst.rgba, t, t1);
895                                 bsrc.rgba++;
896                                 bdst.rgba++;
897                                 bsrc.alpha += sadelta;
898                                 bmask.alpha += bmask.delta;
899                                 continue;
900                         }
901                         *bdst.red = CALC11(fd, *bdst.red, t);
902                         *bdst.grn = CALC11(fd, *bdst.grn, t);
903                         *bdst.blu = CALC11(fd, *bdst.blu, t);
904                         bsrc.red += bsrc.delta;
905                         bsrc.blu += bsrc.delta;
906                         bsrc.grn += bsrc.delta;
907                         bdst.red += bdst.delta;
908                         bdst.blu += bdst.delta;
909                         bdst.grn += bdst.delta;
910                 }
911                 if(bdst.alpha != &ones){
912                         *bdst.alpha = CALC11(fd, *bdst.alpha, t);
913                         bdst.alpha += bdst.delta;
914                 }
915                 bmask.alpha += bmask.delta;
916                 bsrc.alpha += sadelta;
917         }
918         return obdst;
919 }
920
921 static Buffer
922 alphacalc2810(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
923 {
924         Buffer obdst;
925         int fs, sadelta;
926         int i, ma, da, q;
927         ulong t, t1;
928
929         obdst = bdst;
930         sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
931         q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
932
933         for(i=0; i<dx; i++){
934                 ma = *bmask.alpha;
935                 da = *bdst.alpha;
936                 if(op == SoutD)
937                         da = 255-da;
938                 fs = ma;
939                 if(op != S)
940                         fs = CALC11(fs, da, t);
941
942                 if(grey){
943                         *bdst.grey = CALC11(fs, *bsrc.grey, t);
944                         bsrc.grey += bsrc.delta;
945                         bdst.grey += bdst.delta;
946                 }else{
947                         if(q){
948                                 *bdst.rgba = CALC41(fs, *bsrc.rgba, t, t1);
949                                 bsrc.rgba++;
950                                 bdst.rgba++;
951                                 bmask.alpha += bmask.delta;
952                                 bdst.alpha += bdst.delta;
953                                 continue;
954                         }
955                         *bdst.red = CALC11(fs, *bsrc.red, t);
956                         *bdst.grn = CALC11(fs, *bsrc.grn, t);
957                         *bdst.blu = CALC11(fs, *bsrc.blu, t);
958                         bsrc.red += bsrc.delta;
959                         bsrc.blu += bsrc.delta;
960                         bsrc.grn += bsrc.delta;
961                         bdst.red += bdst.delta;
962                         bdst.blu += bdst.delta;
963                         bdst.grn += bdst.delta;
964                 }
965                 if(bdst.alpha != &ones){
966                         *bdst.alpha = CALC11(fs, *bsrc.alpha, t);
967                         bdst.alpha += bdst.delta;
968                 }
969                 bmask.alpha += bmask.delta;
970                 bsrc.alpha += sadelta;
971         }
972         return obdst;
973 }
974
975 static Buffer
976 alphacalc3679(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
977 {
978         Buffer obdst;
979         int fs, fd, sadelta;
980         int i, sa, ma, da, q;
981         ulong t, t1;
982
983         obdst = bdst;
984         sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
985         q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
986
987         for(i=0; i<dx; i++){
988                 sa = *bsrc.alpha;
989                 ma = *bmask.alpha;
990                 da = *bdst.alpha;
991                 if(op == SatopD)
992                         fs = CALC11(ma, da, t);
993                 else
994                         fs = CALC11(ma, 255-da, t);
995                 if(op == DoverS)
996                         fd = 255;
997                 else{
998                         fd = CALC11(sa, ma, t);
999                         if(op != DatopS)
1000                                 fd = 255-fd;
1001                 }
1002
1003                 if(grey){
1004                         *bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);
1005                         bsrc.grey += bsrc.delta;
1006                         bdst.grey += bdst.delta;
1007                 }else{
1008                         if(q){
1009                                 *bdst.rgba = CALC42(fs, *bsrc.rgba, fd, *bdst.rgba, t, t1);
1010                                 bsrc.rgba++;
1011                                 bdst.rgba++;
1012                                 bsrc.alpha += sadelta;
1013                                 bmask.alpha += bmask.delta;
1014                                 bdst.alpha += bdst.delta;
1015                                 continue;
1016                         }
1017                         *bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);
1018                         *bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);
1019                         *bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);
1020                         bsrc.red += bsrc.delta;
1021                         bsrc.blu += bsrc.delta;
1022                         bsrc.grn += bsrc.delta;
1023                         bdst.red += bdst.delta;
1024                         bdst.blu += bdst.delta;
1025                         bdst.grn += bdst.delta;
1026                 }
1027                 if(bdst.alpha != &ones){
1028                         *bdst.alpha = CALC12(fs, sa, fd, da, t);
1029                         bdst.alpha += bdst.delta;
1030                 }
1031                 bmask.alpha += bmask.delta;
1032                 bsrc.alpha += sadelta;
1033         }
1034         return obdst;
1035 }
1036
1037 static Buffer
1038 alphacalc5(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
1039 {
1040         USED(dx);
1041         USED(grey);
1042         USED(op);
1043         USED(b1);
1044         USED(b2);
1045         return bdst;
1046 }
1047
1048 static Buffer
1049 alphacalc11(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1050 {
1051         Buffer obdst;
1052         int fd, sadelta;
1053         int i, sa, ma, q;
1054         ulong t, t1;
1055
1056         USED(op);
1057         obdst = bdst;
1058         sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
1059         q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
1060
1061         for(i=0; i<dx; i++){
1062                 sa = *bsrc.alpha;
1063                 ma = *bmask.alpha;
1064                 fd = 255-CALC11(sa, ma, t);
1065
1066                 if(grey){
1067                         *bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);
1068                         bsrc.grey += bsrc.delta;
1069                         bdst.grey += bdst.delta;
1070                 }else{
1071                         if(q){
1072                                 *bdst.rgba = CALC42(ma, *bsrc.rgba, fd, *bdst.rgba, t, t1);
1073                                 bsrc.rgba++;
1074                                 bdst.rgba++;
1075                                 bsrc.alpha += sadelta;
1076                                 bmask.alpha += bmask.delta;
1077                                 continue;
1078                         }
1079                         *bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);
1080                         *bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);
1081                         *bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);
1082                         bsrc.red += bsrc.delta;
1083                         bsrc.blu += bsrc.delta;
1084                         bsrc.grn += bsrc.delta;
1085                         bdst.red += bdst.delta;
1086                         bdst.blu += bdst.delta;
1087                         bdst.grn += bdst.delta;
1088                 }
1089                 if(bdst.alpha != &ones){
1090                         *bdst.alpha = CALC12(ma, sa, fd, *bdst.alpha, t);
1091                         bdst.alpha += bdst.delta;
1092                 }
1093                 bmask.alpha += bmask.delta;
1094                 bsrc.alpha += sadelta;
1095         }
1096         return obdst;
1097 }
1098
1099 /*
1100 not used yet
1101 source and mask alpha 1
1102 static Buffer
1103 alphacalcS0(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1104 {
1105         Buffer obdst;
1106         int i;
1107
1108         USED(op);
1109         obdst = bdst;
1110         if(bsrc.delta == bdst.delta){
1111                 memmove(bdst.rgba, bsrc.rgba, dx*bdst.delta);
1112                 return obdst;
1113         }
1114         for(i=0; i<dx; i++){
1115                 if(grey){
1116                         *bdst.grey = *bsrc.grey;
1117                         bsrc.grey += bsrc.delta;
1118                         bdst.grey += bdst.delta;
1119                 }else{
1120                         *bdst.red = *bsrc.red;
1121                         *bdst.grn = *bsrc.grn;
1122                         *bdst.blu = *bsrc.blu;
1123                         bsrc.red += bsrc.delta;
1124                         bsrc.blu += bsrc.delta;
1125                         bsrc.grn += bsrc.delta;
1126                         bdst.red += bdst.delta;
1127                         bdst.blu += bdst.delta;
1128                         bdst.grn += bdst.delta;
1129                 }
1130                 if(bdst.alpha != &ones){
1131                         *bdst.alpha = 255;
1132                         bdst.alpha += bdst.delta;
1133                 }
1134         }
1135         return obdst;
1136 }
1137 */
1138
1139 /* source alpha 1 */
1140 static Buffer
1141 alphacalcS(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1142 {
1143         Buffer obdst;
1144         int fd;
1145         int i, ma;
1146         ulong t;
1147
1148         USED(op);
1149         obdst = bdst;
1150
1151         for(i=0; i<dx; i++){
1152                 ma = *bmask.alpha;
1153                 fd = 255-ma;
1154
1155                 if(grey){
1156                         *bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);
1157                         bsrc.grey += bsrc.delta;
1158                         bdst.grey += bdst.delta;
1159                 }else{
1160                         *bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);
1161                         *bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);
1162                         *bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);
1163                         bsrc.red += bsrc.delta;
1164                         bsrc.blu += bsrc.delta;
1165                         bsrc.grn += bsrc.delta;
1166                         bdst.red += bdst.delta;
1167                         bdst.blu += bdst.delta;
1168                         bdst.grn += bdst.delta;
1169                 }
1170                 if(bdst.alpha != &ones){
1171                         *bdst.alpha = ma+CALC11(fd, *bdst.alpha, t);
1172                         bdst.alpha += bdst.delta;
1173                 }
1174                 bmask.alpha += bmask.delta;
1175         }
1176         return obdst;
1177 }
1178
1179 static Buffer
1180 boolcalc14(Buffer bdst, Buffer b1, Buffer bmask, int dx, int grey, int op)
1181 {
1182         Buffer obdst;
1183         int i, ma, zero;
1184
1185         USED(b1);
1186
1187         obdst = bdst;
1188
1189         for(i=0; i<dx; i++){
1190                 ma = *bmask.alpha;
1191                 zero = ma ? op == DoutS : op == DinS;
1192
1193                 if(grey){
1194                         if(zero)
1195                                 *bdst.grey = 0;
1196                         bdst.grey += bdst.delta;
1197                 }else{
1198                         if(zero)
1199                                 *bdst.red = *bdst.grn = *bdst.blu = 0;
1200                         bdst.red += bdst.delta;
1201                         bdst.blu += bdst.delta;
1202                         bdst.grn += bdst.delta;
1203                 }
1204                 bmask.alpha += bmask.delta;
1205                 if(bdst.alpha != &ones){
1206                         if(zero)
1207                                 *bdst.alpha = 0;
1208                         bdst.alpha += bdst.delta;
1209                 }
1210         }
1211         return obdst;
1212 }
1213
1214 static Buffer
1215 boolcalc236789(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1216 {
1217         Buffer obdst;
1218         int fs, fd;
1219         int i, ma, da, zero;
1220         ulong t;
1221
1222         obdst = bdst;
1223         zero = !(op&1);
1224
1225         for(i=0; i<dx; i++){
1226                 ma = *bmask.alpha;
1227                 da = *bdst.alpha;
1228                 fs = da;
1229                 if(op&2)
1230                         fs = 255-da;
1231                 fd = 0;
1232                 if(op&4)
1233                         fd = 255;
1234
1235                 if(grey){
1236                         if(ma)
1237                                 *bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);
1238                         else if(zero)
1239                                 *bdst.grey = 0;
1240                         bsrc.grey += bsrc.delta;
1241                         bdst.grey += bdst.delta;
1242                 }else{
1243                         if(ma){
1244                                 *bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);
1245                                 *bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);
1246                                 *bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);
1247                         }
1248                         else if(zero)
1249                                 *bdst.red = *bdst.grn = *bdst.blu = 0;
1250                         bsrc.red += bsrc.delta;
1251                         bsrc.blu += bsrc.delta;
1252                         bsrc.grn += bsrc.delta;
1253                         bdst.red += bdst.delta;
1254                         bdst.blu += bdst.delta;
1255                         bdst.grn += bdst.delta;
1256                 }
1257                 bmask.alpha += bmask.delta;
1258                 if(bdst.alpha != &ones){
1259                         if(ma)
1260                                 *bdst.alpha = fs+CALC11(fd, da, t);
1261                         else if(zero)
1262                                 *bdst.alpha = 0;
1263                         bdst.alpha += bdst.delta;
1264                 }
1265         }
1266         return obdst;
1267 }
1268
1269 static Buffer
1270 boolcalc1011(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1271 {
1272         Buffer obdst;
1273         int i, ma, zero;
1274
1275         obdst = bdst;
1276         zero = !(op&1);
1277
1278         for(i=0; i<dx; i++){
1279                 ma = *bmask.alpha;
1280
1281                 if(grey){
1282                         if(ma)
1283                                 *bdst.grey = *bsrc.grey;
1284                         else if(zero)
1285                                 *bdst.grey = 0;
1286                         bsrc.grey += bsrc.delta;
1287                         bdst.grey += bdst.delta;
1288                 }else{
1289                         if(ma){
1290                                 *bdst.red = *bsrc.red;
1291                                 *bdst.grn = *bsrc.grn;
1292                                 *bdst.blu = *bsrc.blu;
1293                         }
1294                         else if(zero)
1295                                 *bdst.red = *bdst.grn = *bdst.blu = 0;
1296                         bsrc.red += bsrc.delta;
1297                         bsrc.blu += bsrc.delta;
1298                         bsrc.grn += bsrc.delta;
1299                         bdst.red += bdst.delta;
1300                         bdst.blu += bdst.delta;
1301                         bdst.grn += bdst.delta;
1302                 }
1303                 bmask.alpha += bmask.delta;
1304                 if(bdst.alpha != &ones){
1305                         if(ma)
1306                                 *bdst.alpha = 255;
1307                         else if(zero)
1308                                 *bdst.alpha = 0;
1309                         bdst.alpha += bdst.delta;
1310                 }
1311         }
1312         return obdst;
1313 }
1314 /*
1315  * Replicated cached scan line read.  Call the function listed in the Param,
1316  * but cache the result so that for replicated images we only do the work once.
1317  */
1318 static Buffer
1319 replread(Param *p, uchar *s, int y)
1320 {
1321         Buffer *b;
1322
1323         USED(s);
1324         b = &p->bcache[y];
1325         if((p->bfilled & (1<<y)) == 0){
1326                 p->bfilled |= 1<<y;
1327                 *b = p->replcall(p, p->bufbase+y*p->bufdelta, y);
1328         }
1329         return *b;
1330 }
1331
1332 /*
1333  * Alpha reading function that simply relabels the grey pointer.
1334  */
1335 static Buffer
1336 greymaskread(Param *p, uchar *buf, int y)
1337 {
1338         Buffer b;
1339
1340         b = p->greymaskcall(p, buf, y);
1341         b.alpha = b.grey;
1342         return b;
1343 }
1344
1345 #define DBG if(0)
1346 static Buffer
1347 readnbit(Param *p, uchar *buf, int y)
1348 {
1349         Buffer b;
1350         Memimage *img;
1351         uchar *repl, *r, *w, *ow, bits;
1352         int i, n, sh, depth, x, dx, npack, nbits;
1353
1354         b.rgba = (ulong*)buf;
1355         b.grey = w = buf;
1356         b.red = b.blu = b.grn = w;
1357         b.alpha = &ones;
1358         b.delta = 1;
1359
1360         dx = p->dx;
1361         img = p->img;
1362         depth = img->depth;
1363         repl = &replbit[depth][0];
1364         npack = 8/depth;
1365         sh = 8-depth;
1366
1367         /* copy from p->r.min.x until end of repl rectangle */
1368         x = p->r.min.x;
1369         n = dx;
1370         if(n > p->img->r.max.x - x)
1371                 n = p->img->r.max.x - x;
1372
1373         r = p->bytermin + y*p->bwidth;
1374 DBG print("readnbit dx %d %p=%p+%d*%d, *r=%d fetch %d ", dx, r, p->bytermin, y, p->bwidth, *r, n);
1375         bits = *r++;
1376         nbits = 8;
1377         if(i=x&(npack-1)){
1378 DBG print("throwaway %d...", i);
1379                 bits <<= depth*i;
1380                 nbits -= depth*i;
1381         }
1382         for(i=0; i<n; i++){
1383                 if(nbits == 0){
1384 DBG print("(%.2ux)...", *r);
1385                         bits = *r++;
1386                         nbits = 8;
1387                 }
1388                 *w++ = repl[bits>>sh];
1389 DBG print("bit %x...", repl[bits>>sh]);
1390                 bits <<= depth;
1391                 nbits -= depth;
1392         }
1393         dx -= n;
1394         if(dx == 0)
1395                 return b;
1396
1397         assert(x+i == p->img->r.max.x);
1398
1399         /* copy from beginning of repl rectangle until where we were before. */
1400         x = p->img->r.min.x;
1401         n = dx;
1402         if(n > p->r.min.x - x)
1403                 n = p->r.min.x - x;
1404
1405         r = p->bytey0s + y*p->bwidth;
1406 DBG print("x=%d r=%p...", x, r);
1407         bits = *r++;
1408         nbits = 8;
1409         if(i=x&(npack-1)){
1410                 bits <<= depth*i;
1411                 nbits -= depth*i;
1412         }
1413 DBG print("nbits=%d...", nbits);
1414         for(i=0; i<n; i++){
1415                 if(nbits == 0){
1416                         bits = *r++;
1417                         nbits = 8;
1418                 }
1419                 *w++ = repl[bits>>sh];
1420 DBG print("bit %x...", repl[bits>>sh]);
1421                 bits <<= depth;
1422                 nbits -= depth;
1423 DBG print("bits %x nbits %d...", bits, nbits);
1424         }
1425         dx -= n;
1426         if(dx == 0)
1427                 return b;
1428
1429         assert(dx > 0);
1430         /* now we have exactly one full scan line: just replicate the buffer itself until we are done */
1431         ow = buf;
1432         while(dx--)
1433                 *w++ = *ow++;
1434
1435         return b;
1436 }
1437 #undef DBG
1438
1439 #define DBG if(0)
1440 static void
1441 writenbit(Param *p, uchar *w, Buffer src)
1442 {
1443         uchar *r;
1444         ulong bits;
1445         int i, sh, depth, npack, nbits, x, ex;
1446
1447         assert(src.grey != nil && src.delta == 1);
1448
1449         x = p->r.min.x;
1450         ex = x+p->dx;
1451         depth = p->img->depth;
1452         npack = 8/depth;
1453
1454         i=x&(npack-1);
1455         bits = i ? (*w >> (8-depth*i)) : 0;
1456         nbits = depth*i;
1457         sh = 8-depth;
1458         r = src.grey;
1459
1460         for(; x<ex; x++){
1461                 bits <<= depth;
1462 DBG print(" %x", *r);
1463                 bits |= (*r++ >> sh);
1464                 nbits += depth;
1465                 if(nbits == 8){
1466                         *w++ = bits;
1467                         nbits = 0;
1468                 }
1469         }
1470
1471         if(nbits){
1472                 sh = 8-nbits;
1473                 bits <<= sh;
1474                 bits |= *w & ((1<<sh)-1);
1475                 *w = bits;
1476         }
1477 DBG print("\n");
1478         return;
1479 }
1480 #undef DBG
1481
1482 static Buffer
1483 readcmap(Param *p, uchar *buf, int y)
1484 {
1485         Buffer b;
1486         int a, convgrey, copyalpha, dx, i, m;
1487         uchar *q, *cmap, *begin, *end, *r, *w;
1488
1489         begin = p->bytey0s + y*p->bwidth;
1490         r = p->bytermin + y*p->bwidth;
1491         end = p->bytey0e + y*p->bwidth;
1492         cmap = p->img->cmap->cmap2rgb;
1493         convgrey = p->convgrey;
1494         copyalpha = (p->img->flags&Falpha) != 0;
1495
1496         w = buf;
1497         dx = p->dx;
1498         if(copyalpha){
1499                 b.alpha = buf++;
1500                 a = p->img->shift[CAlpha]/8;
1501                 m = p->img->shift[CMap]/8;
1502                 for(i=0; i<dx; i++){
1503                         *w++ = r[a];
1504                         q = cmap+r[m]*3;
1505                         r += 2;
1506                         if(r == end)
1507                                 r = begin;
1508                         if(convgrey){
1509                                 *w++ = RGB2K(q[0], q[1], q[2]);
1510                         }else{
1511                                 *w++ = q[2];    /* blue */
1512                                 *w++ = q[1];    /* green */
1513                                 *w++ = q[0];    /* red */
1514                         }
1515                 }
1516         }else{
1517                 b.alpha = &ones;
1518                 for(i=0; i<dx; i++){
1519                         q = cmap+*r++*3;
1520                         if(r == end)
1521                                 r = begin;
1522                         if(convgrey){
1523                                 *w++ = RGB2K(q[0], q[1], q[2]);
1524                         }else{
1525                                 *w++ = q[2];    /* blue */
1526                                 *w++ = q[1];    /* green */
1527                                 *w++ = q[0];    /* red */
1528                         }
1529                 }
1530         }
1531
1532         b.rgba = (ulong*)(buf-copyalpha);
1533
1534         if(convgrey){
1535                 b.grey = buf;
1536                 b.red = b.blu = b.grn = buf;
1537                 b.delta = 1+copyalpha;
1538         }else{
1539                 b.blu = buf;
1540                 b.grn = buf+1;
1541                 b.red = buf+2;
1542                 b.grey = nil;
1543                 b.delta = 3+copyalpha;
1544         }
1545         return b;
1546 }
1547
1548 static void
1549 writecmap(Param *p, uchar *w, Buffer src)
1550 {
1551         uchar *cmap, *red, *grn, *blu, *alpha;
1552         int i, dx, delta, a, m;
1553
1554         cmap = p->img->cmap->rgb2cmap;
1555         
1556         delta = src.delta;
1557         red= src.red;
1558         grn = src.grn;
1559         blu = src.blu;
1560
1561         dx = p->dx;
1562         if(p->img->flags&Falpha){
1563                 alpha = src.alpha;
1564                 m = p->img->shift[CMap]/8;
1565                 a = p->img->shift[CAlpha]/8;
1566                 for(i=0; i<dx; i++, red+=delta, grn+=delta, blu+=delta, w+=2){
1567                         w[a] = *alpha;
1568                         if(alpha != &ones)
1569                                 alpha+=delta;
1570                         w[m] = cmap[(*red>>4)*256+(*grn>>4)*16+(*blu>>4)];
1571                 }
1572         } else {
1573                 for(i=0; i<dx; i++, red+=delta, grn+=delta, blu+=delta)
1574                         *w++ = cmap[(*red>>4)*256+(*grn>>4)*16+(*blu>>4)];
1575         }
1576 }
1577
1578 #define DBG if(0)
1579 static Buffer
1580 readbyte(Param *p, uchar *buf, int y)
1581 {
1582         Buffer b;
1583         Memimage *img;
1584         int dx, isgrey, convgrey, alphaonly, copyalpha, i, nb;
1585         uchar *begin, *end, *r, *w, *rrepl, *grepl, *brepl, *arepl, *krepl;
1586         uchar ured, ugrn, ublu;
1587         ulong u;
1588
1589         img = p->img;
1590         begin = p->bytey0s + y*p->bwidth;
1591         r = p->bytermin + y*p->bwidth;
1592         end = p->bytey0e + y*p->bwidth;
1593
1594         w = buf;
1595         dx = p->dx;
1596         nb = img->depth/8;
1597
1598         convgrey = p->convgrey; /* convert rgb to grey */
1599         isgrey = img->flags&Fgrey;
1600         alphaonly = p->alphaonly;
1601         copyalpha = (img->flags&Falpha) != 0;
1602
1603 DBG print("copyalpha %d alphaonly %d convgrey %d isgrey %d\n", copyalpha, alphaonly, convgrey, isgrey);
1604         /* if we can, avoid processing everything */
1605         if(!(img->flags&Frepl) && !convgrey && (img->flags&Fbytes)){
1606                 memset(&b, 0, sizeof b);
1607                 if(p->needbuf){
1608                         memmove(buf, r, dx*nb);
1609                         r = buf;
1610                 }
1611                 b.rgba = (ulong*)r;
1612                 if(copyalpha)
1613                         b.alpha = r+img->shift[CAlpha]/8;
1614                 else
1615                         b.alpha = &ones;
1616                 if(isgrey){
1617                         b.grey = r+img->shift[CGrey]/8;
1618                         b.red = b.grn = b.blu = b.grey;
1619                 }else{
1620                         b.red = r+img->shift[CRed]/8;
1621                         b.grn = r+img->shift[CGreen]/8;
1622                         b.blu = r+img->shift[CBlue]/8;
1623                 }
1624                 b.delta = nb;
1625                 return b;
1626         }
1627
1628 DBG print("2\n");
1629         rrepl = replbit[img->nbits[CRed]];
1630         grepl = replbit[img->nbits[CGreen]];
1631         brepl = replbit[img->nbits[CBlue]];
1632         arepl = replbit[img->nbits[CAlpha]];
1633         krepl = replbit[img->nbits[CGrey]];
1634
1635         for(i=0; i<dx; i++){
1636                 u = r[0] | (r[1]<<8) | (r[2]<<16) | (r[3]<<24);
1637                 if(copyalpha) {
1638                         *w++ = arepl[(u>>img->shift[CAlpha]) & img->mask[CAlpha]];
1639 DBG print("a %x\n", w[-1]);
1640                 }
1641
1642                 if(isgrey)
1643                         *w++ = krepl[(u >> img->shift[CGrey]) & img->mask[CGrey]];
1644                 else if(!alphaonly){
1645                         ured = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1646                         ugrn = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1647                         ublu = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1648                         if(convgrey){
1649 DBG print("g %x %x %x\n", ured, ugrn, ublu);
1650                                 *w++ = RGB2K(ured, ugrn, ublu);
1651 DBG print("%x\n", w[-1]);
1652                         }else{
1653                                 w[0] = ublu;
1654                                 w[1] = ugrn;
1655                                 w[2] = ured;
1656                                 w += 3;
1657                         }
1658                 }
1659                 r += nb;
1660                 if(r == end)
1661                         r = begin;
1662         }
1663
1664         b.alpha = copyalpha ? buf : &ones;
1665         b.rgba = (ulong*)buf;
1666         if(alphaonly){
1667                 b.red = b.grn = b.blu = b.grey = nil;
1668                 if(!copyalpha)
1669                         b.rgba = nil;
1670                 b.delta = 1;
1671         }else if(isgrey || convgrey){
1672                 b.grey = buf+copyalpha;
1673                 b.red = b.grn = b.blu = buf+copyalpha;
1674                 b.delta = copyalpha+1;
1675 DBG print("alpha %x grey %x\n", *b.alpha, *b.grey);
1676         }else{
1677                 b.blu = buf+copyalpha;
1678                 b.grn = buf+copyalpha+1;
1679                 b.grey = nil;
1680                 b.red = buf+copyalpha+2;
1681                 b.delta = copyalpha+3;
1682         }
1683         return b;
1684 }
1685 #undef DBG
1686
1687 #define DBG if(0)
1688 static void
1689 writebyte(Param *p, uchar *w, Buffer src)
1690 {
1691         Memimage *img;
1692         int i, isalpha, isgrey, nb, delta, dx, adelta;
1693         uchar *red, *grn, *blu, *grey, *alpha;
1694         ulong u, mask;
1695
1696         img = p->img;
1697
1698         red = src.red;
1699         grn = src.grn;
1700         blu = src.blu;
1701         alpha = src.alpha;
1702         delta = src.delta;
1703         grey = src.grey;
1704         dx = p->dx;
1705
1706         nb = img->depth/8;
1707
1708         isalpha = img->flags&Falpha;
1709         isgrey = img->flags&Fgrey;
1710         adelta = src.delta;
1711
1712         if(isalpha && alpha == &ones)
1713                 adelta = 0;
1714
1715         if((img->flags&Fbytes) != 0){
1716                 int ogry, ored, ogrn, oblu, oalp;
1717
1718                 ogry = img->shift[CGrey]/8;
1719                 ored = img->shift[CRed]/8;
1720                 ogrn = img->shift[CGreen]/8;
1721                 oblu = img->shift[CBlue]/8;
1722                 oalp = img->shift[CAlpha]/8;
1723
1724                 for(i=0; i<dx; i++){
1725                         if(isgrey){
1726                                 w[ogry] = *grey;
1727                                 grey += delta;
1728                         } else {
1729                                 w[ored] = *red;
1730                                 w[ogrn] = *grn;
1731                                 w[oblu] = *blu;
1732                                 red += delta;
1733                                 grn += delta;
1734                                 blu += delta;
1735                         }
1736                         if(isalpha){
1737                                 w[oalp] = *alpha;
1738                                 alpha += adelta;
1739                         }
1740                         w += nb;
1741                 }
1742                 return;
1743         }
1744
1745         mask = (nb==4) ? 0 : ~((1<<img->depth)-1);
1746         for(i=0; i<dx; i++){
1747                 u = w[0] | (w[1]<<8) | (w[2]<<16) | (w[3]<<24);
1748 DBG print("u %.8lux...", u);
1749                 u &= mask;
1750 DBG print("&mask %.8lux...", u);
1751                 if(isgrey){
1752                         u |= ((*grey >> (8-img->nbits[CGrey])) & img->mask[CGrey]) << img->shift[CGrey];
1753 DBG print("|grey %.8lux...", u);
1754                         grey += delta;
1755                 }else{
1756                         u |= ((*red >> (8-img->nbits[CRed])) & img->mask[CRed]) << img->shift[CRed];
1757                         u |= ((*grn >> (8-img->nbits[CGreen])) & img->mask[CGreen]) << img->shift[CGreen];
1758                         u |= ((*blu >> (8-img->nbits[CBlue])) & img->mask[CBlue]) << img->shift[CBlue];
1759                         red += delta;
1760                         grn += delta;
1761                         blu += delta;
1762 DBG print("|rgb %.8lux...", u);
1763                 }
1764
1765                 if(isalpha){
1766                         u |= ((*alpha >> (8-img->nbits[CAlpha])) & img->mask[CAlpha]) << img->shift[CAlpha];
1767                         alpha += adelta;
1768 DBG print("|alpha %.8lux...", u);
1769                 }
1770
1771                 w[0] = u;
1772                 w[1] = u>>8;
1773                 w[2] = u>>16;
1774                 w[3] = u>>24;
1775                 w += nb;
1776         }
1777 }
1778 #undef DBG
1779
1780 static Readfn*
1781 readfn(Memimage *img)
1782 {
1783         if(img->depth < 8)
1784                 return readnbit;
1785         if(img->nbits[CMap] == 8)
1786                 return readcmap;
1787         return readbyte;
1788 }
1789
1790 static Readfn*
1791 readalphafn(Memimage *m)
1792 {
1793         USED(m);
1794         return readbyte;
1795 }
1796
1797 static Writefn*
1798 writefn(Memimage *img)
1799 {
1800         if(img->depth < 8)
1801                 return writenbit;
1802         if(img->nbits[CMap] == 8)
1803                 return writecmap;
1804         return writebyte;
1805 }
1806
1807 static void
1808 nullwrite(Param *p, uchar *s, Buffer b)
1809 {
1810         USED(p);
1811         USED(s);
1812         USED(b);
1813 }
1814
1815 static Buffer
1816 readptr(Param *p, uchar *s, int y)
1817 {
1818         Buffer b;
1819         uchar *q;
1820
1821         USED(s);
1822         q = p->bytermin + y*p->bwidth;
1823         b.red = q;      /* ptr to data */
1824         b.grn = b.blu = b.grey = nil;
1825         b.alpha = &ones;
1826         b.rgba = (ulong*)q;
1827         b.delta = p->img->depth/8;
1828         return b;
1829 }
1830
1831 static Buffer
1832 boolmemmove(Buffer bdst, Buffer bsrc, Buffer b1, int dx, int i, int o)
1833 {
1834         USED(i);
1835         USED(o);
1836         USED(b1);
1837         USED(bsrc);
1838         memmove(bdst.red, bsrc.red, dx*bdst.delta);
1839         return bdst;
1840 }
1841
1842 static Buffer
1843 boolcopy8(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1844 {
1845         uchar *m, *r, *w, *ew;
1846
1847         USED(i);
1848         USED(o);
1849         m = bmask.grey;
1850         w = bdst.red;
1851         r = bsrc.red;
1852         ew = w+dx;
1853         for(; w < ew; w++,r++)
1854                 if(*m++)
1855                         *w = *r;
1856         return bdst;    /* not used */
1857 }
1858
1859 static Buffer
1860 boolcopy16(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1861 {
1862         uchar *m;
1863         ushort *r, *w, *ew;
1864
1865         USED(i);
1866         USED(o);
1867         m = bmask.grey;
1868         w = (ushort*)bdst.red;
1869         r = (ushort*)bsrc.red;
1870         ew = w+dx;
1871         for(; w < ew; w++,r++)
1872                 if(*m++)
1873                         *w = *r;
1874         return bdst;    /* not used */
1875 }
1876
1877 static Buffer
1878 boolcopy24(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1879 {
1880         uchar *m;
1881         uchar *r, *w, *ew;
1882
1883         USED(i);
1884         USED(o);
1885         m = bmask.grey;
1886         w = bdst.red;
1887         r = bsrc.red;
1888         ew = w+dx*3;
1889         while(w < ew){
1890                 if(*m++){
1891                         *w++ = *r++;
1892                         *w++ = *r++;
1893                         *w++ = *r++;
1894                 }else{
1895                         w += 3;
1896                         r += 3;
1897                 }
1898         }
1899         return bdst;    /* not used */
1900 }
1901
1902 static Buffer
1903 boolcopy32(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1904 {
1905         uchar *m;
1906         ulong *r, *w, *ew;
1907
1908         USED(i);
1909         USED(o);
1910         m = bmask.grey;
1911         w = (ulong*)bdst.red;
1912         r = (ulong*)bsrc.red;
1913         ew = w+dx;
1914         for(; w < ew; w++,r++)
1915                 if(*m++)
1916                         *w = *r;
1917         return bdst;    /* not used */
1918 }
1919
1920 static Buffer
1921 genconv(Param *p, uchar *buf, int y)
1922 {
1923         Buffer b;
1924         int nb;
1925         uchar *r, *w, *ew;
1926
1927         /* read from source into RGB format in convbuf */
1928         b = p->convreadcall(p, p->convbuf, y);
1929
1930         /* write RGB format into dst format in buf */
1931         p->convwritecall(p->convdpar, buf, b);
1932
1933         if(p->convdx){
1934                 nb = p->convdpar->img->depth/8;
1935                 r = buf;
1936                 w = buf+nb*p->dx;
1937                 ew = buf+nb*p->convdx;
1938                 while(w<ew)
1939                         *w++ = *r++;
1940         }
1941
1942         b.red = buf;
1943         b.blu = b.grn = b.grey = nil;
1944         b.alpha = &ones;
1945         b.rgba = (ulong*)buf;
1946         b.delta = 0;
1947         
1948         return b;
1949 }
1950
1951 static Readfn*
1952 convfn(Memimage *dst, Param *dpar, Memimage *src, Param *spar, int *ndrawbuf)
1953 {
1954         if(dst->chan == src->chan && !(src->flags&Frepl)){
1955 //if(drawdebug) iprint("readptr...");
1956                 return readptr;
1957         }
1958
1959         if(dst->chan==CMAP8 && (src->chan==GREY1||src->chan==GREY2||src->chan==GREY4)){
1960                 /* cheat because we know the replicated value is exactly the color map entry. */
1961 //if(drawdebug) iprint("Readnbit...");
1962                 return readnbit;
1963         }
1964
1965         spar->convreadcall = readfn(src);
1966         spar->convwritecall = writefn(dst);
1967         spar->convdpar = dpar;
1968
1969         /* allocate a conversion buffer */
1970         spar->convbufoff = *ndrawbuf;
1971         *ndrawbuf += spar->dx*4;
1972
1973         if(spar->dx > Dx(spar->img->r)){
1974                 spar->convdx = spar->dx;
1975                 spar->dx = Dx(spar->img->r);
1976         }
1977
1978 //if(drawdebug) iprint("genconv...");
1979         return genconv;
1980 }
1981
1982 static ulong
1983 pixelbits(Memimage *i, Point pt)
1984 {
1985         uchar *p;
1986         ulong val;
1987         int off, bpp, npack;
1988
1989         val = 0;
1990         p = byteaddr(i, pt);
1991         switch(bpp=i->depth){
1992         case 1:
1993         case 2:
1994         case 4:
1995                 npack = 8/bpp;
1996                 off = pt.x%npack;
1997                 val = p[0] >> bpp*(npack-1-off);
1998                 val &= (1<<bpp)-1;
1999                 break;
2000         case 8:
2001                 val = p[0];
2002                 break;
2003         case 16:
2004                 val = p[0]|(p[1]<<8);
2005                 break;
2006         case 24:
2007                 val = p[0]|(p[1]<<8)|(p[2]<<16);
2008                 break;
2009         case 32:
2010                 val = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24);
2011                 break;
2012         }
2013         while(bpp<32){
2014                 val |= val<<bpp;
2015                 bpp *= 2;
2016         }
2017         return val;
2018 }
2019
2020 static Calcfn*
2021 boolcopyfn(Memimage *img, Memimage *mask)
2022 {
2023         if(mask->flags&Frepl && Dx(mask->r)==1 && Dy(mask->r)==1 && pixelbits(mask, mask->r.min)==~0)
2024                 return boolmemmove;
2025
2026         switch(img->depth){
2027         case 8:
2028                 return boolcopy8;
2029         case 16:
2030                 return boolcopy16;
2031         case 24:
2032                 return boolcopy24;
2033         case 32:
2034                 return boolcopy32;
2035         default:
2036                 assert(0 /* boolcopyfn */);
2037         }
2038         return nil;
2039 }
2040
2041 /*
2042  * Optimized draw for filling and scrolling; uses memset and memmove.
2043  */
2044 static void
2045 memsets(void *vp, ushort val, int n)
2046 {
2047         ushort *p, *ep;
2048         uchar b[2];
2049
2050         /* make little endian */
2051         b[0] = val;
2052         b[1] = val>>8;
2053         val = *(ushort*)b;
2054
2055         p = vp;
2056         ep = p+n;
2057         while(p<ep)
2058                 *p++ = val;
2059 }
2060
2061 static void
2062 memsetl(void *vp, ulong val, int n)
2063 {
2064         ulong *p, *ep;
2065         uchar b[4];
2066
2067         /* make little endian */
2068         b[0] = val;
2069         b[1] = val>>8;
2070         b[2] = val>>16;
2071         b[3] = val>>24;
2072         val = *(ulong*)b;
2073
2074         p = vp;
2075         ep = p+n;
2076         while(p<ep)
2077                 *p++ = val;
2078 }
2079
2080 static void
2081 memset24(void *vp, ulong val, int n)
2082 {
2083         uchar *p, *ep;
2084         uchar a,b,c;
2085
2086         a = val;
2087         b = val>>8;
2088         c = val>>16;
2089
2090         p = vp;
2091         ep = p+3*n;
2092         while(p<ep){
2093                 p[0] = a;
2094                 p[1] = b;
2095                 p[2] = c;
2096                 p += 3;
2097         }
2098 }
2099
2100 static ulong
2101 imgtorgba(Memimage *img, ulong val)
2102 {
2103         uchar r, g, b, a;
2104         int nb, ov, v;
2105         ulong chan;
2106         uchar *p;
2107
2108         a = 0xFF;
2109         r = g = b = 0xAA;       /* garbage */
2110         for(chan=img->chan; chan; chan>>=8){
2111                 nb = NBITS(chan);
2112                 ov = v = val&((1<<nb)-1);
2113                 val >>= nb;
2114
2115                 while(nb < 8){
2116                         v |= v<<nb;
2117                         nb *= 2;
2118                 }
2119                 v >>= (nb-8);
2120
2121                 switch(TYPE(chan)){
2122                 case CRed:
2123                         r = v;
2124                         break;
2125                 case CGreen:
2126                         g = v;
2127                         break;
2128                 case CBlue:
2129                         b = v;
2130                         break;
2131                 case CAlpha:
2132                         a = v;
2133                         break;
2134                 case CGrey:
2135                         r = g = b = v;
2136                         break;
2137                 case CMap:
2138                         p = img->cmap->cmap2rgb+3*ov;
2139                         r = p[0];
2140                         g = p[1];
2141                         b = p[2];
2142                         break;
2143                 }
2144         }
2145         return (r<<24)|(g<<16)|(b<<8)|a;        
2146 }
2147
2148 static ulong
2149 rgbatoimg(Memimage *img, ulong rgba)
2150 {
2151         ulong chan;
2152         int d, nb;
2153         ulong v;
2154         uchar *p, r, g, b, a, m;
2155
2156         v = 0;
2157         r = rgba>>24;
2158         g = rgba>>16;
2159         b = rgba>>8;
2160         a = rgba;
2161         d = 0;
2162         for(chan=img->chan; chan; chan>>=8){
2163                 nb = NBITS(chan);
2164                 switch(TYPE(chan)){
2165                 case CRed:
2166                         v |= (r>>(8-nb))<<d;
2167                         break;
2168                 case CGreen:
2169                         v |= (g>>(8-nb))<<d;
2170                         break;
2171                 case CBlue:
2172                         v |= (b>>(8-nb))<<d;
2173                         break;
2174                 case CAlpha:
2175                         v |= (a>>(8-nb))<<d;
2176                         break;
2177                 case CMap:
2178                         p = img->cmap->rgb2cmap;
2179                         m = p[(r>>4)*256+(g>>4)*16+(b>>4)];
2180                         v |= (m>>(8-nb))<<d;
2181                         break;
2182                 case CGrey:
2183                         m = RGB2K(r,g,b);
2184                         v |= (m>>(8-nb))<<d;
2185                         break;
2186                 }
2187                 d += nb;
2188         }
2189 //      print("rgba2img %.8lux = %.*lux\n", rgba, 2*d/8, v);
2190         return v;
2191 }
2192
2193 #define DBG if(0)
2194 static int
2195 memoptdraw(Memdrawparam *par)
2196 {
2197         int m, y, dy, dx, op;
2198         ulong v;
2199         Memimage *src;
2200         Memimage *dst;
2201
2202         dx = Dx(par->r);
2203         dy = Dy(par->r);
2204         src = par->src;
2205         dst = par->dst;
2206         op = par->op;
2207
2208 DBG print("state %lux mval %lux dd %d\n", par->state, par->mval, dst->depth);
2209         /*
2210          * If we have an opaque mask and source is one opaque pixel we can convert to the
2211          * destination format and just replicate with memset.
2212          */
2213         m = Simplesrc|Simplemask|Fullmask;
2214         if((par->state&m)==m && (par->srgba&0xFF) == 0xFF && (op ==S || op == SoverD)){
2215                 int d, dwid, ppb, np, nb;
2216                 uchar *dp, lm, rm;
2217
2218 DBG print("memopt, dst %p, dst->data->bdata %p\n", dst, dst->data->bdata);
2219                 dwid = dst->width*sizeof(ulong);
2220                 dp = byteaddr(dst, par->r.min);
2221                 v = par->sdval;
2222 DBG print("sdval %lud, depth %d\n", v, dst->depth);
2223                 switch(dst->depth){
2224                 case 1:
2225                 case 2:
2226                 case 4:
2227                         for(d=dst->depth; d<8; d*=2)
2228                                 v |= (v<<d);
2229                         ppb = 8/dst->depth;     /* pixels per byte */
2230                         m = ppb-1;
2231                         /* left edge */
2232                         np = par->r.min.x&m;            /* no. pixels unused on left side of word */
2233                         dx -= (ppb-np);
2234                         nb = 8 - np * dst->depth;               /* no. bits used on right side of word */
2235                         lm = (1<<nb)-1;
2236 DBG print("np %d x %d nb %d lm %ux ppb %d m %ux\n", np, par->r.min.x, nb, lm, ppb, m);  
2237
2238                         /* right edge */
2239                         np = par->r.max.x&m;    /* no. pixels used on left side of word */
2240                         dx -= np;
2241                         nb = 8 - np * dst->depth;               /* no. bits unused on right side of word */
2242                         rm = ~((1<<nb)-1);
2243 DBG print("np %d x %d nb %d rm %ux ppb %d m %ux\n", np, par->r.max.x, nb, rm, ppb, m);  
2244
2245 DBG print("dx %d Dx %d\n", dx, Dx(par->r));
2246                         /* lm, rm are masks that are 1 where we should touch the bits */
2247                         if(dx < 0){     /* just one byte */
2248                                 lm &= rm;
2249                                 for(y=0; y<dy; y++, dp+=dwid)
2250                                         *dp ^= (v ^ *dp) & lm;
2251                         }else if(dx == 0){      /* no full bytes */
2252                                 if(lm)
2253                                         dwid--;
2254
2255                                 for(y=0; y<dy; y++, dp+=dwid){
2256                                         if(lm){
2257 DBG print("dp %p v %lux lm %ux (v ^ *dp) & lm %lux\n", dp, v, lm, (v^*dp)&lm);
2258                                                 *dp ^= (v ^ *dp) & lm;
2259                                                 dp++;
2260                                         }
2261                                         *dp ^= (v ^ *dp) & rm;
2262                                 }
2263                         }else{          /* full bytes in middle */
2264                                 dx /= ppb;
2265                                 if(lm)
2266                                         dwid--;
2267                                 dwid -= dx;
2268
2269                                 for(y=0; y<dy; y++, dp+=dwid){
2270                                         if(lm){
2271                                                 *dp ^= (v ^ *dp) & lm;
2272                                                 dp++;
2273                                         }
2274                                         memset(dp, v, dx);
2275                                         dp += dx;
2276                                         *dp ^= (v ^ *dp) & rm;
2277                                 }
2278                         }
2279                         return 1;
2280                 case 8:
2281                         for(y=0; y<dy; y++, dp+=dwid)
2282                                 memset(dp, v, dx);
2283                         return 1;
2284                 case 16:
2285                         for(y=0; y<dy; y++, dp+=dwid)
2286                                 memsets(dp, v, dx);
2287                         return 1;
2288                 case 24:
2289                         for(y=0; y<dy; y++, dp+=dwid)
2290                                 memset24(dp, v, dx);
2291                         return 1;
2292                 case 32:
2293                         for(y=0; y<dy; y++, dp+=dwid)
2294                                 memsetl(dp, v, dx);
2295                         return 1;
2296                 default:
2297                         assert(0 /* bad dest depth in memoptdraw */);
2298                 }
2299         }
2300
2301         /*
2302          * If no source alpha, an opaque mask, we can just copy the
2303          * source onto the destination.  If the channels are the same and
2304          * the source is not replicated, memmove suffices.
2305          */
2306         m = Simplemask|Fullmask;
2307         if((par->state&(m|Replsrc))==m && src->depth >= 8 
2308         && src->chan == dst->chan && !(src->flags&Falpha) && (op == S || op == SoverD)){
2309                 uchar *sp, *dp;
2310                 long swid, dwid, nb;
2311                 int dir;
2312
2313                 if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min))
2314                         dir = -1;
2315                 else
2316                         dir = 1;
2317
2318                 swid = src->width*sizeof(ulong);
2319                 dwid = dst->width*sizeof(ulong);
2320                 sp = byteaddr(src, par->sr.min);
2321                 dp = byteaddr(dst, par->r.min);
2322                 if(dir == -1){
2323                         sp += (dy-1)*swid;
2324                         dp += (dy-1)*dwid;
2325                         swid = -swid;
2326                         dwid = -dwid;
2327                 }
2328                 nb = (dx*src->depth)/8;
2329                 for(y=0; y<dy; y++, sp+=swid, dp+=dwid)
2330                         memmove(dp, sp, nb);
2331                 return 1;
2332         }
2333
2334         /*
2335          * If we have a 1-bit mask, 1-bit source, and 1-bit destination, and
2336          * they're all bit aligned, we can just use bit operators.  This happens
2337          * when we're manipulating boolean masks, e.g. in the arc code.
2338          */
2339         if((par->state&(Simplemask|Simplesrc|Replmask|Replsrc))==0 
2340         && dst->chan==GREY1 && src->chan==GREY1 && par->mask->chan==GREY1 
2341         && (par->r.min.x&7)==(par->sr.min.x&7) && (par->r.min.x&7)==(par->mr.min.x&7)){
2342                 uchar *sp, *dp, *mp;
2343                 uchar lm, rm;
2344                 long swid, dwid, mwid;
2345                 int i, x, dir;
2346
2347                 sp = byteaddr(src, par->sr.min);
2348                 dp = byteaddr(dst, par->r.min);
2349                 mp = byteaddr(par->mask, par->mr.min);
2350                 swid = src->width*sizeof(ulong);
2351                 dwid = dst->width*sizeof(ulong);
2352                 mwid = par->mask->width*sizeof(ulong);
2353
2354                 if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min)){
2355                         dir = -1;
2356                 }else
2357                         dir = 1;
2358
2359                 lm = 0xFF>>(par->r.min.x&7);
2360                 rm = 0xFF<<(8-(par->r.max.x&7));
2361                 dx -= (8-(par->r.min.x&7)) + (par->r.max.x&7);
2362
2363                 if(dx < 0){     /* one byte wide */
2364                         lm &= rm;
2365                         if(dir == -1){
2366                                 dp += dwid*(dy-1);
2367                                 sp += swid*(dy-1);
2368                                 mp += mwid*(dy-1);
2369                                 dwid = -dwid;
2370                                 swid = -swid;
2371                                 mwid = -mwid;
2372                         }
2373                         for(y=0; y<dy; y++){
2374                                 *dp ^= (*dp ^ *sp) & *mp & lm;
2375                                 dp += dwid;
2376                                 sp += swid;
2377                                 mp += mwid;
2378                         }
2379                         return 1;
2380                 }
2381
2382                 dx /= 8;
2383                 if(dir == 1){
2384                         i = (lm!=0)+dx+(rm!=0);
2385                         mwid -= i;
2386                         swid -= i;
2387                         dwid -= i;
2388                         for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2389                                 if(lm){
2390                                         *dp ^= (*dp ^ *sp++) & *mp++ & lm;
2391                                         dp++;
2392                                 }
2393                                 for(x=0; x<dx; x++){
2394                                         *dp ^= (*dp ^ *sp++) & *mp++;
2395                                         dp++;
2396                                 }
2397                                 if(rm){
2398                                         *dp ^= (*dp ^ *sp++) & *mp++ & rm;
2399                                         dp++;
2400                                 }
2401                         }
2402                         return 1;
2403                 }else{
2404                 /* dir == -1 */
2405                         i = (lm!=0)+dx+(rm!=0);
2406                         dp += dwid*(dy-1)+i-1;
2407                         sp += swid*(dy-1)+i-1;
2408                         mp += mwid*(dy-1)+i-1;
2409                         dwid = -dwid+i;
2410                         swid = -swid+i;
2411                         mwid = -mwid+i;
2412                         for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2413                                 if(rm){
2414                                         *dp ^= (*dp ^ *sp--) & *mp-- & rm;
2415                                         dp--;
2416                                 }
2417                                 for(x=0; x<dx; x++){
2418                                         *dp ^= (*dp ^ *sp--) & *mp--;
2419                                         dp--;
2420                                 }
2421                                 if(lm){
2422                                         *dp ^= (*dp ^ *sp--) & *mp-- & lm;
2423                                         dp--;
2424                                 }
2425                         }
2426                 }
2427                 return 1;
2428         }
2429         return 0;       
2430 }
2431 #undef DBG
2432
2433 /*
2434  * Boolean character drawing.
2435  * Solid opaque color through a 1-bit greyscale mask.
2436  */
2437 #define DBG if(0)
2438 static int
2439 chardraw(Memdrawparam *par)
2440 {
2441         int i, ddepth, dy, dx, x, bx, ex, y, npack, bsh, depth, op;
2442         ulong bits, v, maskwid, dstwid;
2443         uchar *wp, *rp, *q, *wc;
2444         ushort *ws;
2445         ulong *wl;
2446         uchar sp[4];
2447         Rectangle r, mr;
2448         Memimage *mask, *src, *dst;
2449
2450 if(0) if(drawdebug) iprint("chardraw? mf %lux md %d sf %lux dxs %d dys %d dd %d ddat %p sdat %p\n",
2451                 par->mask->flags, par->mask->depth, par->src->flags, 
2452                 Dx(par->src->r), Dy(par->src->r), par->dst->depth, par->dst->data, par->src->data);
2453
2454         mask = par->mask;
2455         src = par->src;
2456         dst = par->dst;
2457         r = par->r;
2458         mr = par->mr;
2459         op = par->op;
2460
2461         if((par->state&(Replsrc|Simplesrc|Replmask)) != (Replsrc|Simplesrc)
2462         || mask->depth != 1 || src->flags&Falpha || dst->depth<8 || dst->data==src->data
2463         || op != SoverD)
2464                 return 0;
2465
2466 //if(drawdebug) iprint("chardraw...");
2467
2468         depth = mask->depth;
2469         maskwid = mask->width*sizeof(ulong);
2470         rp = byteaddr(mask, mr.min);
2471         npack = 8/depth;
2472         bsh = (mr.min.x % npack) * depth;
2473
2474         wp = byteaddr(dst, r.min);
2475         dstwid = dst->width*sizeof(ulong);
2476 DBG print("bsh %d\n", bsh);
2477         dy = Dy(r);
2478         dx = Dx(r);
2479
2480         ddepth = dst->depth;
2481
2482         /*
2483          * for loop counts from bsh to bsh+dx
2484          *
2485          * we want the bottom bits to be the amount
2486          * to shift the pixels down, so for n≡0 (mod 8) we want 
2487          * bottom bits 7.  for n≡1, 6, etc.
2488          * the bits come from -n-1.
2489          */
2490
2491         bx = -bsh-1;
2492         ex = -bsh-1-dx;
2493         SET(bits);
2494         v = par->sdval;
2495
2496         /* make little endian */
2497         sp[0] = v;
2498         sp[1] = v>>8;
2499         sp[2] = v>>16;
2500         sp[3] = v>>24;
2501
2502 //print("sp %x %x %x %x\n", sp[0], sp[1], sp[2], sp[3]);
2503         for(y=0; y<dy; y++, rp+=maskwid, wp+=dstwid){
2504                 q = rp;
2505                 if(bsh)
2506                         bits = *q++;
2507                 switch(ddepth){
2508                 case 8:
2509 //if(drawdebug) iprint("8loop...");
2510                         wc = wp;
2511                         for(x=bx; x>ex; x--, wc++){
2512                                 i = x&7;
2513                                 if(i == 8-1)
2514                                         bits = *q++;
2515 DBG print("bits %lux sh %d...", bits, i);
2516                                 if((bits>>i)&1)
2517                                         *wc = v;
2518                         }
2519                         break;
2520                 case 16:
2521                         ws = (ushort*)wp;
2522                         v = *(ushort*)sp;
2523                         for(x=bx; x>ex; x--, ws++){
2524                                 i = x&7;
2525                                 if(i == 8-1)
2526                                         bits = *q++;
2527 DBG print("bits %lux sh %d...", bits, i);
2528                                 if((bits>>i)&1)
2529                                         *ws = v;
2530                         }
2531                         break;
2532                 case 24:
2533                         wc = wp;
2534                         for(x=bx; x>ex; x--, wc+=3){
2535                                 i = x&7;
2536                                 if(i == 8-1)
2537                                         bits = *q++;
2538 DBG print("bits %lux sh %d...", bits, i);
2539                                 if((bits>>i)&1){
2540                                         wc[0] = sp[0];
2541                                         wc[1] = sp[1];
2542                                         wc[2] = sp[2];
2543                                 }
2544                         }
2545                         break;
2546                 case 32:
2547                         wl = (ulong*)wp;
2548                         v = *(ulong*)sp;
2549                         for(x=bx; x>ex; x--, wl++){
2550                                 i = x&7;
2551                                 if(i == 8-1)
2552                                         bits = *q++;
2553 DBG iprint("bits %lux sh %d...", bits, i);
2554                                 if((bits>>i)&1)
2555                                         *wl = v;
2556                         }
2557                         break;
2558                 }
2559         }
2560
2561 DBG print("\n");        
2562         return 1;       
2563 }
2564 #undef DBG
2565
2566
2567 void
2568 memfillcolor(Memimage *i, ulong val)
2569 {
2570         ulong bits;
2571         int d, y;
2572
2573         if(val == DNofill)
2574                 return;
2575
2576         bits = rgbatoimg(i, val);
2577         switch(i->depth){
2578         case 24:        /* 24-bit images suck */
2579                 for(y=i->r.min.y; y<i->r.max.y; y++)
2580                         memset24(byteaddr(i, Pt(i->r.min.x, y)), bits, Dx(i->r));
2581                 break;
2582         default:        /* 1, 2, 4, 8, 16, 32 */
2583                 for(d=i->depth; d<32; d*=2)
2584                         bits = (bits << d) | bits;
2585                 memsetl(wordaddr(i, i->r.min), bits, i->width*Dy(i->r));
2586                 break;
2587         }
2588 }
2589