sys/src/libmemdraw/draw.c

   1 #include <u.h>
   2 #include <libc.h>
   3 #include <draw.h>
   4 #include <memdraw.h>
   5 #include <pool.h>
   6
   7 extern Pool* imagmem;
   8
   9 /* perfect approximation to NTSC = .299r+.587g+.114b when 0 ≤ r,g,b < 256 */
  10 #define RGB2K(r,g,b)    ((156763*(r)+307758*(g)+59769*(b))>>19)
  11
  12 /*
  13  * For 16-bit values, x / 255 == (t = x+1, (t+(t>>8)) >> 8).
  14  * We add another 127 to round to the nearest value rather
  15  * than truncate.
  16  *
  17  * CALCxy does x bytewise calculations on y input images (x=1,4; y=1,2).
  18  * CALC2x does two parallel 16-bit calculations on y input images (y=1,2).
  19  */
  20 #define CALC11(a, v, tmp) \
  21         (tmp=(a)*(v)+128, (tmp+(tmp>>8))>>8)
  22
  23 #define CALC12(a1, v1, a2, v2, tmp) \
  24         (tmp=(a1)*(v1)+(a2)*(v2)+128, (tmp+(tmp>>8))>>8)
  25
  26 #define MASK 0xFF00FF
  27
  28 #define CALC21(a, vvuu, tmp) \
  29         (tmp=(a)*(vvuu)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)
  30
  31 #define CALC41(a, rgba, tmp1, tmp2) \
  32         (CALC21(a, rgba & MASK, tmp1) | \
  33          (CALC21(a, (rgba>>8)&MASK, tmp2)<<8))
  34
  35 #define CALC22(a1, vvuu1, a2, vvuu2, tmp) \
  36         (tmp=(a1)*(vvuu1)+(a2)*(vvuu2)+0x00800080, ((tmp+((tmp>>8)&MASK))>>8)&MASK)
  37
  38 #define CALC42(a1, rgba1, a2, rgba2, tmp1, tmp2) \
  39         (CALC22(a1, rgba1 & MASK, a2, rgba2 & MASK, tmp1) | \
  40          (CALC22(a1, (rgba1>>8) & MASK, a2, (rgba2>>8) & MASK, tmp2)<<8))
  41
  42 static void mktables(void);
  43 typedef int Subdraw(Memdrawparam*);
  44 static Subdraw chardraw, alphadraw, memoptdraw;
  45
  46 static Memimage*        memones;
  47 static Memimage*        memzeros;
  48 Memimage *memwhite;
  49 Memimage *memblack;
  50 Memimage *memtransparent;
  51 Memimage *memopaque;
  52
  53 int     _ifmt(Fmt*);
  54
  55 int
  56 memimageinit(void)
  57 {
  58         static int didinit = 0;
  59
  60         if(didinit)
  61                 return 0;
  62
  63         if(imagmem != nil)
  64         if(strcmp(imagmem->name, "Image") == 0 || strcmp(imagmem->name, "image") == 0)
  65                 imagmem->move = memimagemove;
  66
  67         mktables();
  68         _memmkcmap();
  69
  70         fmtinstall('R', Rfmt);
  71         fmtinstall('P', Pfmt);
  72         fmtinstall('b', _ifmt);
  73
  74         memones = allocmemimage(Rect(0,0,1,1), GREY1);
  75         memzeros = allocmemimage(Rect(0,0,1,1), GREY1);
  76         if(memones == nil || memzeros == nil)
  77                 return -1;
  78
  79         memones->flags |= Frepl;
  80         memones->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
  81         *byteaddr(memones, ZP) = ~0;
  82
  83         memzeros->flags |= Frepl;
  84         memzeros->clipr = Rect(-0x3FFFFFF, -0x3FFFFFF, 0x3FFFFFF, 0x3FFFFFF);
  85         *byteaddr(memzeros, ZP) = 0;
  86
  87         memwhite = memones;
  88         memblack = memzeros;
  89         memopaque = memones;
  90         memtransparent = memzeros;
  91
  92         didinit = 1;
  93         return 0;
  94 }
  95
  96 static ulong imgtorgba(Memimage*, ulong);
  97 static ulong rgbatoimg(Memimage*, ulong);
  98 static ulong pixelbits(Memimage*, Point);
  99
 100 void
 101 memimagedraw(Memimage *dst, Rectangle r, Memimage *src, Point p0, Memimage *mask, Point p1, int op)
 102 {
 103         Memdrawparam par;
 104
 105         if(mask == nil)
 106                 mask = memopaque;
 107
 108         if(drawclip(dst, &r, src, &p0, mask, &p1, &par.sr, &par.mr) == 0)
 109                 return;
 110
 111         if(op < Clear || op > SoverD)
 112                 return;
 113
 114         par.op = op;
 115         par.dst = dst;
 116         par.r = r;
 117         par.src = src;
 118         /* par.sr set by drawclip */
 119         par.mask = mask;
 120         /* par.mr set by drawclip */
 121
 122         par.state = 0;
 123         if(src->flags&Frepl){
 124                 par.state |= Replsrc;
 125                 if(Dx(src->r)==1 && Dy(src->r)==1){
 126                         par.sval = pixelbits(src, src->r.min);
 127                         par.state |= Simplesrc;
 128                         par.srgba = imgtorgba(src, par.sval);
 129                         par.sdval = rgbatoimg(dst, par.srgba);
 130                         if((par.srgba&0xFF) == 0 && (op&DoutS))
 131                                 return; /* no-op successfully handled */
 132                 }
 133         }
 134
 135         if(mask->flags & Frepl){
 136                 par.state |= Replmask;
 137                 if(Dx(mask->r)==1 && Dy(mask->r)==1){
 138                         par.mval = pixelbits(mask, mask->r.min);
 139                         if(par.mval == 0 && (op&DoutS))
 140                                 return; /* no-op successfully handled */
 141                         par.state |= Simplemask;
 142                         if(par.mval == ~0)
 143                                 par.state |= Fullmask;
 144                         par.mrgba = imgtorgba(mask, par.mval);
 145                 }
 146         }
 147
 148         /*
 149          * Now that we've clipped the parameters down to be consistent, we
 150          * simply try sub-drawing routines in order until we find one that was able
 151          * to handle us.  If the sub-drawing routine returns zero, it means it was
 152          * unable to satisfy the request, so we do not return.
 153          */
 154
 155         /*
 156          * Hardware support.  Each video driver provides this function,
 157          * which checks to see if there is anything it can help with.
 158          * There could be an if around this checking to see if dst is in video memory.
 159          */
 160         if(hwdraw(&par))
 161                 return;
 162
 163         /*
 164          * Optimizations using memmove and memset.
 165          */
 166         if(memoptdraw(&par))
 167                 return;
 168
 169         /*
 170          * Character drawing.
 171          * Solid source color being painted through a boolean mask onto a high res image.
 172          */
 173         if(chardraw(&par))
 174                 return;
 175
 176         /*
 177          * General calculation-laden case that does alpha for each pixel.
 178          */
 179         alphadraw(&par);
 180 }
 181
 182
 183 /*
 184  * Clip the destination rectangle further based on the properties of the
 185  * source and mask rectangles.  Once the destination rectangle is properly
 186  * clipped, adjust the source and mask rectangles to be the same size.
 187  *
 188  * Return zero if the final rectangle is null.
 189  */
 190 int
 191 drawclipnorepl(Memimage *dst, Rectangle *r, Memimage *src, Point *p0, Memimage *mask, Point *p1, Rectangle *sr, Rectangle *mr)
 192 {
 193         Point rmin, delta;
 194         int splitcoords;
 195         Rectangle omr;
 196
 197         if(badrect(*r))
 198                 return 0;
 199         splitcoords = (p0->x!=p1->x) || (p0->y!=p1->y);
 200         /* clip to destination */
 201         rmin = r->min;
 202         if(!rectclip(r, dst->r) || !rectclip(r, dst->clipr))
 203                 return 0;
 204         /* move mask point */
 205         p1->x += r->min.x-rmin.x;
 206         p1->y += r->min.y-rmin.y;
 207         /* move source point */
 208         p0->x += r->min.x-rmin.x;
 209         p0->y += r->min.y-rmin.y;
 210         /* map destination rectangle into source */
 211         sr->min = *p0;
 212         sr->max.x = p0->x+Dx(*r);
 213         sr->max.y = p0->y+Dy(*r);
 214         /* sr is r in source coordinates; clip to source */
 215         if(!(src->flags&Frepl) && !rectclip(sr, src->r))
 216                 return 0;
 217         if(!rectclip(sr, src->clipr))
 218                 return 0;
 219         /* compute and clip rectangle in mask */
 220         if(splitcoords){
 221                 /* move mask point with source */
 222                 p1->x += sr->min.x-p0->x;
 223                 p1->y += sr->min.y-p0->y;
 224                 mr->min = *p1;
 225                 mr->max.x = p1->x+Dx(*sr);
 226                 mr->max.y = p1->y+Dy(*sr);
 227                 omr = *mr;
 228                 /* mr is now rectangle in mask; clip it */
 229                 if(!(mask->flags&Frepl) && !rectclip(mr, mask->r))
 230                         return 0;
 231                 if(!rectclip(mr, mask->clipr))
 232                         return 0;
 233                 /* reflect any clips back to source */
 234                 sr->min.x += mr->min.x-omr.min.x;
 235                 sr->min.y += mr->min.y-omr.min.y;
 236                 sr->max.x += mr->max.x-omr.max.x;
 237                 sr->max.y += mr->max.y-omr.max.y;
 238         }else{
 239                 if(!(mask->flags&Frepl) && !rectclip(sr, mask->r))
 240                         return 0;
 241                 if(!rectclip(sr, mask->clipr))
 242                         return 0;
 243                 *mr = *sr;
 244         }
 245         /* move source clipping back to destination */
 246         delta.x = r->min.x - p0->x;
 247         delta.y = r->min.y - p0->y;
 248         r->min.x = sr->min.x + delta.x;
 249         r->min.y = sr->min.y + delta.y;
 250         r->max.x = sr->max.x + delta.x;
 251         r->max.y = sr->max.y + delta.y;
 252         *p0 = sr->min;
 253         *p1 = mr->min;
 254
 255         assert(Dx(*sr) == Dx(*mr) && Dx(*mr) == Dx(*r));
 256         assert(Dy(*sr) == Dy(*mr) && Dy(*mr) == Dy(*r));
 257         assert(ptinrect(r->min, dst->r));
 258
 259         return 1;
 260 }
 261
 262 /*
 263  * like drawclipnorepl() above, but if source or mask is replicated,
 264  * move its clipped rectangle so that its minimum point falls within
 265  * the repl rectangle.
 266  *
 267  * Return zero if the final rectangle is null.
 268  */
 269 int
 270 drawclip(Memimage *dst, Rectangle *r, Memimage *src, Point *p0, Memimage *mask, Point *p1, Rectangle *sr, Rectangle *mr)
 271 {
 272         Point delta;
 273
 274         if(!drawclipnorepl(dst, r, src, p0, mask, p1, sr, mr))
 275                 return 0;
 276
 277         /* move source rectangle so sr->min is in src->r */
 278         if(src->flags&Frepl) {
 279                 delta.x = drawreplxy(src->r.min.x, src->r.max.x, sr->min.x) - sr->min.x;
 280                 delta.y = drawreplxy(src->r.min.y, src->r.max.y, sr->min.y) - sr->min.y;
 281                 sr->min.x += delta.x;
 282                 sr->min.y += delta.y;
 283                 sr->max.x += delta.x;
 284                 sr->max.y += delta.y;
 285                 *p0 = sr->min;
 286         }
 287
 288         /* move mask point so it is in mask->r */
 289         *p1 = drawrepl(mask->r, *p1);
 290         mr->min = *p1;
 291         mr->max.x = p1->x+Dx(*sr);
 292         mr->max.y = p1->y+Dy(*sr);
 293
 294         assert(ptinrect(*p0, src->r));
 295         assert(ptinrect(*p1, mask->r));
 296
 297         return 1;
 298 }
 299
 300 /*
 301  * Conversion tables.
 302  */
 303 static uchar replbit[1+8][256];         /* replbit[x][y] is the replication of the x-bit quantity y to 8-bit depth */
 304 static uchar conv18[256][8];            /* conv18[x][y] is the yth pixel in the depth-1 pixel x */
 305 static uchar conv28[256][4];            /* ... */
 306 static uchar conv48[256][2];
 307
 308 /*
 309  * bitmap of how to replicate n bits to fill 8, for 1 ≤ n ≤ 8.
 310  * the X's are where to put the bottom (ones) bit of the n-bit pattern.
 311  * only the top 8 bits of the result are actually used.
 312  * (the lower 8 bits are needed to get bits in the right place
 313  * when n is not a divisor of 8.)
 314  *
 315  * Should check to see if its easier to just refer to replmul than
 316  * use the precomputed values in replbit.  On PCs it may well
 317  * be; on machines with slow multiply instructions it probably isn't.
 318  */
 319 #define a ((((((((((((((((0
 320 #define X *2+1)
 321 #define _ *2)
 322 static int replmul[1+8] = {
 323         0,
 324         a X X X X X X X X X X X X X X X X,
 325         a _ X _ X _ X _ X _ X _ X _ X _ X,
 326         a _ _ X _ _ X _ _ X _ _ X _ _ X _,
 327         a _ _ _ X _ _ _ X _ _ _ X _ _ _ X,
 328         a _ _ _ _ X _ _ _ _ X _ _ _ _ X _,
 329         a _ _ _ _ _ X _ _ _ _ _ X _ _ _ _,
 330         a _ _ _ _ _ _ X _ _ _ _ _ _ X _ _,
 331         a _ _ _ _ _ _ _ X _ _ _ _ _ _ _ X,
 332 };
 333 #undef a
 334 #undef X
 335 #undef _
 336
 337 static void
 338 mktables(void)
 339 {
 340         int i, j, mask, sh, small;
 341
 342         /* bit replication up to 8 bits */
 343         for(i=0; i<256; i++){
 344                 for(j=0; j<=8; j++){    /* j <= 8 [sic] */
 345                         small = i & ((1<<j)-1);
 346                         replbit[j][i] = (small*replmul[j])>>8;
 347                 }
 348         }
 349
 350         /* bit unpacking up to 8 bits, only powers of 2 */
 351         for(i=0; i<256; i++){
 352                 for(j=0, sh=7, mask=1; j<8; j++, sh--)
 353                         conv18[i][j] = replbit[1][(i>>sh)&mask];
 354
 355                 for(j=0, sh=6, mask=3; j<4; j++, sh-=2)
 356                         conv28[i][j] = replbit[2][(i>>sh)&mask];
 357
 358                 for(j=0, sh=4, mask=15; j<2; j++, sh-=4)
 359                         conv48[i][j] = replbit[4][(i>>sh)&mask];
 360         }
 361 }
 362
 363 static uchar ones = 0xff;
 364
 365 /*
 366  * General alpha drawing case.  Can handle anything.
 367  */
 368 typedef struct  Buffer  Buffer;
 369 struct Buffer {
 370         /* used by most routines */
 371         uchar   *red;
 372         uchar   *grn;
 373         uchar   *blu;
 374         uchar   *alpha; /* is &ones when unused, never nil */
 375         uchar   *grey;
 376         ulong   *rgba;
 377         int     delta;  /* number of bytes to add to pointer to get next pixel to the right */
 378
 379         /* used by boolcalc* for mask data */
 380         uchar   *m;             /* ptr to mask data r.min byte; like p->bytermin */
 381         int             mskip;  /* no. of left bits to skip in *m */
 382         uchar   *bm;            /* ptr to mask data img->r.min byte; like p->bytey0s */
 383         int             bmskip; /* no. of left bits to skip in *bm */
 384         uchar   *em;            /* ptr to mask data img->r.max.x byte; like p->bytey0e */
 385         int             emskip; /* no. of right bits to skip in *em */
 386 };
 387
 388 typedef struct  Param   Param;
 389 typedef Buffer  Readfn(Param*, uchar*, int);
 390 typedef void    Writefn(Param*, uchar*, Buffer);
 391 typedef Buffer  Calcfn(Buffer, Buffer, Buffer, int, int, int);
 392
 393 enum {
 394         MAXBCACHE = 16
 395 };
 396
 397 /* giant rathole to customize functions with */
 398 struct Param {
 399         Readfn  *replcall;
 400         Readfn  *greymaskcall;
 401         Readfn  *convreadcall;
 402         Writefn *convwritecall;
 403
 404         Memimage *img;
 405         Rectangle       r;
 406         int     dx;     /* of r */
 407         int     needbuf;
 408         int     convgrey;
 409         int     alphaonly;
 410
 411         uchar   *bytey0s;               /* byteaddr(Pt(img->r.min.x, img->r.min.y)) */
 412         uchar   *bytermin;      /* byteaddr(Pt(r.min.x, img->r.min.y)) */
 413         uchar   *bytey0e;               /* byteaddr(Pt(img->r.max.x, img->r.min.y)) */
 414         int             bwidth;
 415
 416         int     replcache;      /* if set, cache buffers */
 417         Buffer  bcache[MAXBCACHE];
 418         ulong   bfilled;
 419         uchar   *bufbase;
 420         int     bufoff;
 421         int     bufdelta;
 422
 423         int     dir;
 424
 425         int     convbufoff;
 426         uchar   *convbuf;
 427         Param   *convdpar;
 428         int     convdx;
 429 };
 430
 431 static Readfn   greymaskread, replread, readptr;
 432 static Writefn  nullwrite;
 433 static Calcfn   alphacalc0, alphacalc14, alphacalc2810, alphacalc3679, alphacalc5, alphacalc11, alphacalcS;
 434 static Calcfn   boolcalc14, boolcalc236789, boolcalc1011;
 435
 436 static Readfn*  readfn(Memimage*);
 437 static Readfn*  readalphafn(Memimage*);
 438 static Writefn* writefn(Memimage*);
 439
 440 static Calcfn*  boolcopyfn(Memimage*, Memimage*);
 441 static Readfn*  convfn(Memimage*, Param*, Memimage*, Param*, int*);
 442
 443 static Calcfn *alphacalc[Ncomp] =
 444 {
 445         alphacalc0,             /* Clear */
 446         alphacalc14,            /* DoutS */
 447         alphacalc2810,          /* SoutD */
 448         alphacalc3679,          /* DxorS */
 449         alphacalc14,            /* DinS */
 450         alphacalc5,             /* D */
 451         alphacalc3679,          /* DatopS */
 452         alphacalc3679,          /* DoverS */
 453         alphacalc2810,          /* SinD */
 454         alphacalc3679,          /* SatopD */
 455         alphacalc2810,          /* S */
 456         alphacalc11,            /* SoverD */
 457 };
 458
 459 static Calcfn *boolcalc[Ncomp] =
 460 {
 461         alphacalc0,             /* Clear */
 462         boolcalc14,             /* DoutS */
 463         boolcalc236789,         /* SoutD */
 464         boolcalc236789,         /* DxorS */
 465         boolcalc14,             /* DinS */
 466         alphacalc5,             /* D */
 467         boolcalc236789,         /* DatopS */
 468         boolcalc236789,         /* DoverS */
 469         boolcalc236789,         /* SinD */
 470         boolcalc236789,         /* SatopD */
 471         boolcalc1011,           /* S */
 472         boolcalc1011,           /* SoverD */
 473 };
 474
 475 /*
 476  * Avoid standard Lock, QLock so that can be used in kernel.
 477  */
 478 typedef struct Dbuf Dbuf;
 479 struct Dbuf
 480 {
 481         uchar *p;
 482         int n;
 483         Param spar, mpar, dpar;
 484         int inuse;
 485 };
 486 static Dbuf dbuf[10];
 487
 488 static Dbuf*
 489 allocdbuf(void)
 490 {
 491         int i;
 492
 493         for(i=0; i<nelem(dbuf); i++){
 494                 if(dbuf[i].inuse)
 495                         continue;
 496                 if(!_tas(&dbuf[i].inuse))
 497                         return &dbuf[i];
 498         }
 499         return nil;
 500 }
 501
 502 static void
 503 getparam(Param *p, Memimage *img, Rectangle r, int convgrey, int needbuf, int *ndrawbuf)
 504 {
 505         int nbuf;
 506
 507         memset(p, 0, sizeof *p);
 508
 509         p->img = img;
 510         p->r = r;
 511         p->dx = Dx(r);
 512         p->needbuf = needbuf;
 513         p->convgrey = convgrey;
 514
 515         assert(img->r.min.x <= r.min.x && r.min.x < img->r.max.x);
 516
 517         p->bytey0s = byteaddr(img, Pt(img->r.min.x, img->r.min.y));
 518         p->bytermin = byteaddr(img, Pt(r.min.x, img->r.min.y));
 519         p->bytey0e = byteaddr(img, Pt(img->r.max.x, img->r.min.y));
 520         p->bwidth = sizeof(ulong)*img->width;
 521
 522         assert(p->bytey0s <= p->bytermin && p->bytermin <= p->bytey0e);
 523
 524         if(p->r.min.x == p->img->r.min.x)
 525                 assert(p->bytermin == p->bytey0s);
 526
 527         nbuf = 1;
 528         if((img->flags&Frepl) && Dy(img->r) <= MAXBCACHE && Dy(img->r) < Dy(r)){
 529                 p->replcache = 1;
 530                 nbuf = Dy(img->r);
 531         }
 532         p->bufdelta = 4*p->dx;
 533         p->bufoff = *ndrawbuf;
 534         *ndrawbuf += p->bufdelta*nbuf;
 535 }
 536
 537 static void
 538 clipy(Memimage *img, int *y)
 539 {
 540         int dy;
 541
 542         dy = Dy(img->r);
 543         if(*y == dy)
 544                 *y = 0;
 545         else if(*y == -1)
 546                 *y = dy-1;
 547         assert(0 <= *y && *y < dy);
 548 }
 549
 550 /*
 551  * For each scan line, we expand the pixels from source, mask, and destination
 552  * into byte-aligned red, green, blue, alpha, and grey channels.  If buffering is not
 553  * needed and the channels were already byte-aligned (grey8, rgb24, rgba32, rgb32),
 554  * the readers need not copy the data: they can simply return pointers to the data.
 555  * If the destination image is grey and the source is not, it is converted using the NTSC
 556  * formula.
 557  *
 558  * Once we have all the channels, we call either rgbcalc or greycalc, depending on
 559  * whether the destination image is color.  This is allowed to overwrite the dst buffer (perhaps
 560  * the actual data, perhaps a copy) with its result.  It should only overwrite the dst buffer
 561  * with the same format (i.e. red bytes with red bytes, etc.)  A new buffer is returned from
 562  * the calculator, and that buffer is passed to a function to write it to the destination.
 563  * If the buffer is already pointing at the destination, the writing function is a no-op.
 564  */
 565 static int
 566 alphadraw(Memdrawparam *par)
 567 {
 568         int isgrey, starty, endy, op;
 569         int needbuf, dsty, srcy, masky;
 570         int y, dir, dx, dy, ndrawbuf;
 571         uchar *drawbuf;
 572         Buffer bsrc, bdst, bmask;
 573         Readfn *rdsrc, *rdmask, *rddst;
 574         Calcfn *calc;
 575         Writefn *wrdst;
 576         Memimage *src, *mask, *dst;
 577         Rectangle r, sr, mr;
 578         Dbuf *z;
 579
 580         r = par->r;
 581         dx = Dx(r);
 582         dy = Dy(r);
 583
 584         z = allocdbuf();
 585         if(z == nil)
 586                 return 0;
 587
 588         src = par->src;
 589         mask = par->mask;
 590         dst = par->dst;
 591         sr = par->sr;
 592         mr = par->mr;
 593         op = par->op;
 594
 595         isgrey = dst->flags&Fgrey;
 596
 597         /*
 598          * Buffering when src and dst are the same bitmap is sufficient but not
 599          * necessary.  There are stronger conditions we could use.  We could
 600          * check to see if the rectangles intersect, and if simply moving in the
 601          * correct y direction can avoid the need to buffer.
 602          */
 603         needbuf = (src->data == dst->data);
 604
 605         ndrawbuf = 0;
 606         getparam(&z->spar, src, sr, isgrey, needbuf, &ndrawbuf);
 607         getparam(&z->dpar, dst, r, isgrey, needbuf, &ndrawbuf);
 608         getparam(&z->mpar, mask, mr, 0, needbuf, &ndrawbuf);
 609
 610         dir = (needbuf && byteaddr(dst, r.min) > byteaddr(src, sr.min)) ? -1 : 1;
 611         z->spar.dir = z->mpar.dir = z->dpar.dir = dir;
 612
 613         /*
 614          * If the mask is purely boolean, we can convert from src to dst format
 615          * when we read src, and then just copy it to dst where the mask tells us to.
 616          * This requires a boolean (1-bit grey) mask and lack of a source alpha channel.
 617          *
 618          * The computation is accomplished by assigning the function pointers as follows:
 619          *      rdsrc - read and convert source into dst format in a buffer
 620          *      rdmask - convert mask to bytes, set pointer to it
 621          *      rddst - fill with pointer to real dst data, but do no reads
 622          *      calc - copy src onto dst when mask says to.
 623          *      wrdst - do nothing
 624          * This is slightly sleazy, since things aren't doing exactly what their names say,
 625          * but it avoids a fair amount of code duplication to make this a case here
 626          * rather than have a separate booldraw.
 627          */
 628         if(!(src->flags&Falpha) && mask->chan == GREY1 && dst->depth >= 8 && op == SoverD){
 629                 rdsrc = convfn(dst, &z->dpar, src, &z->spar, &ndrawbuf);
 630                 rddst = readptr;
 631                 rdmask = readfn(mask);
 632                 calc = boolcopyfn(dst, mask);
 633                 wrdst = nullwrite;
 634         }else{
 635                 /* usual alphadraw parameter fetching */
 636                 rdsrc = readfn(src);
 637                 rddst = readfn(dst);
 638                 wrdst = writefn(dst);
 639                 calc = alphacalc[op];
 640
 641                 /*
 642                  * If there is no alpha channel, we'll ask for a grey channel
 643                  * and pretend it is the alpha.
 644                  */
 645                 if(mask->flags&Falpha){
 646                         rdmask = readalphafn(mask);
 647                         z->mpar.alphaonly = 1;
 648                 }else{
 649                         z->mpar.greymaskcall = readfn(mask);
 650                         z->mpar.convgrey = 1;
 651                         rdmask = greymaskread;
 652
 653                         /*
 654                          * Should really be above, but then boolcopyfns would have
 655                          * to deal with bit alignment, and I haven't written that.
 656                          *
 657                          * This is a common case for things like ellipse drawing.
 658                          * When there's no alpha involved and the mask is boolean,
 659                          * we can avoid all the division and multiplication.
 660                          */
 661                         if(mask->chan == GREY1 && !(src->flags&Falpha))
 662                                 calc = boolcalc[op];
 663                         else if(op == SoverD && !(src->flags&Falpha))
 664                                 calc = alphacalcS;
 665                 }
 666         }
 667
 668         /*
 669          * If the image has a small enough repl rectangle,
 670          * we can just read each line once and cache them.
 671          */
 672         if(z->spar.replcache){
 673                 z->spar.replcall = rdsrc;
 674                 rdsrc = replread;
 675         }
 676         if(z->mpar.replcache){
 677                 z->mpar.replcall = rdmask;
 678                 rdmask = replread;
 679         }
 680
 681         if(z->n < ndrawbuf){
 682                 free(z->p);
 683                 if((z->p = mallocz(ndrawbuf, 0)) == nil){
 684                         z->inuse = 0;
 685                         return 0;
 686                 }
 687                 z->n = ndrawbuf;
 688         }
 689         drawbuf = z->p;
 690
 691         /*
 692          * Before we were saving only offsets from drawbuf in the parameter
 693          * structures; now that drawbuf has been grown to accomodate us,
 694          * we can fill in the pointers.
 695          */
 696         z->spar.bufbase = drawbuf+z->spar.bufoff;
 697         z->mpar.bufbase = drawbuf+z->mpar.bufoff;
 698         z->dpar.bufbase = drawbuf+z->dpar.bufoff;
 699         z->spar.convbuf = drawbuf+z->spar.convbufoff;
 700
 701         if(dir == 1){
 702                 starty = 0;
 703                 endy = dy;
 704         }else{
 705                 starty = dy-1;
 706                 endy = -1;
 707         }
 708
 709         /*
 710          * srcy, masky, and dsty are offsets from the top of their
 711          * respective Rectangles.  they need to be contained within
 712          * the rectangles, so clipy can keep them there without division.
 713          */
 714         srcy = (starty + sr.min.y - src->r.min.y)%Dy(src->r);
 715         masky = (starty + mr.min.y - mask->r.min.y)%Dy(mask->r);
 716         dsty = starty + r.min.y - dst->r.min.y;
 717
 718         assert(0 <= srcy && srcy < Dy(src->r));
 719         assert(0 <= masky && masky < Dy(mask->r));
 720         assert(0 <= dsty && dsty < Dy(dst->r));
 721
 722         for(y=starty; y!=endy; y+=dir, srcy+=dir, masky+=dir, dsty+=dir){
 723                 clipy(src, &srcy);
 724                 clipy(dst, &dsty);
 725                 clipy(mask, &masky);
 726
 727                 bsrc = rdsrc(&z->spar, z->spar.bufbase, srcy);
 728                 bmask = rdmask(&z->mpar, z->mpar.bufbase, masky);
 729                 bdst = rddst(&z->dpar, z->dpar.bufbase, dsty);
 730                 bdst = calc(bdst, bsrc, bmask, dx, isgrey, op);
 731                 wrdst(&z->dpar, z->dpar.bytermin+dsty*z->dpar.bwidth, bdst);
 732         }
 733
 734         z->inuse = 0;
 735         return 1;
 736 }
 737
 738 static Buffer
 739 alphacalc0(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
 740 {
 741         USED(grey);
 742         USED(op);
 743         USED(b1);
 744         USED(b2);
 745         memset(bdst.rgba, 0, dx*bdst.delta);
 746         return bdst;
 747 }
 748
 749 /*
 750  * Do the channels in the buffers match enough
 751  * that we can do word-at-a-time operations
 752  * on the pixels?
 753  */
 754 static int
 755 chanmatch(Buffer *bdst, Buffer *bsrc)
 756 {
 757         uchar *drgb, *srgb;
 758
 759         /*
 760          * first, r, g, b must be in the same place
 761          * in the rgba word.
 762          */
 763         drgb = (uchar*)bdst->rgba;
 764         srgb = (uchar*)bsrc->rgba;
 765         if(bdst->red - drgb != bsrc->red - srgb
 766         || bdst->blu - drgb != bsrc->blu - srgb
 767         || bdst->grn - drgb != bsrc->grn - srgb)
 768                 return 0;
 769
 770         /*
 771          * that implies alpha is in the same place,
 772          * if it is there at all (it might be == &ones).
 773          * if the destination is &ones, we can scribble
 774          * over the rgba slot just fine.
 775          */
 776         if(bdst->alpha == &ones)
 777                 return 1;
 778
 779         /*
 780          * if the destination is not ones but the src is,
 781          * then the simultaneous calculation will use
 782          * bogus bytes from the src's rgba.  no good.
 783          */
 784         if(bsrc->alpha == &ones)
 785                 return 0;
 786
 787         /*
 788          * otherwise, alphas are in the same place.
 789          */
 790         return 1;
 791 }
 792
 793 static Buffer
 794 alphacalc14(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
 795 {
 796         Buffer obdst;
 797         int fd, sadelta;
 798         int i, sa, ma, q;
 799         ulong t, t1;
 800
 801         obdst = bdst;
 802         sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
 803         q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
 804
 805         for(i=0; i<dx; i++){
 806                 sa = *bsrc.alpha;
 807                 ma = *bmask.alpha;
 808                 fd = CALC11(sa, ma, t);
 809                 if(op == DoutS)
 810                         fd = 255-fd;
 811
 812                 if(grey){
 813                         *bdst.grey = CALC11(fd, *bdst.grey, t);
 814                         bsrc.grey += bsrc.delta;
 815                         bdst.grey += bdst.delta;
 816                 }else{
 817                         if(q){
 818                                 *bdst.rgba = CALC41(fd, *bdst.rgba, t, t1);
 819                                 bsrc.rgba++;
 820                                 bdst.rgba++;
 821                                 bsrc.alpha += sadelta;
 822                                 bmask.alpha += bmask.delta;
 823                                 continue;
 824                         }
 825                         *bdst.red = CALC11(fd, *bdst.red, t);
 826                         *bdst.grn = CALC11(fd, *bdst.grn, t);
 827                         *bdst.blu = CALC11(fd, *bdst.blu, t);
 828                         bsrc.red += bsrc.delta;
 829                         bsrc.blu += bsrc.delta;
 830                         bsrc.grn += bsrc.delta;
 831                         bdst.red += bdst.delta;
 832                         bdst.blu += bdst.delta;
 833                         bdst.grn += bdst.delta;
 834                 }
 835                 if(bdst.alpha != &ones){
 836                         *bdst.alpha = CALC11(fd, *bdst.alpha, t);
 837                         bdst.alpha += bdst.delta;
 838                 }
 839                 bmask.alpha += bmask.delta;
 840                 bsrc.alpha += sadelta;
 841         }
 842         return obdst;
 843 }
 844
 845 static Buffer
 846 alphacalc2810(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
 847 {
 848         Buffer obdst;
 849         int fs, sadelta;
 850         int i, ma, da, q;
 851         ulong t, t1;
 852
 853         obdst = bdst;
 854         sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
 855         q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
 856
 857         for(i=0; i<dx; i++){
 858                 ma = *bmask.alpha;
 859                 da = *bdst.alpha;
 860                 if(op == SoutD)
 861                         da = 255-da;
 862                 fs = ma;
 863                 if(op != S)
 864                         fs = CALC11(fs, da, t);
 865
 866                 if(grey){
 867                         *bdst.grey = CALC11(fs, *bsrc.grey, t);
 868                         bsrc.grey += bsrc.delta;
 869                         bdst.grey += bdst.delta;
 870                 }else{
 871                         if(q){
 872                                 *bdst.rgba = CALC41(fs, *bsrc.rgba, t, t1);
 873                                 bsrc.rgba++;
 874                                 bdst.rgba++;
 875                                 bmask.alpha += bmask.delta;
 876                                 bdst.alpha += bdst.delta;
 877                                 continue;
 878                         }
 879                         *bdst.red = CALC11(fs, *bsrc.red, t);
 880                         *bdst.grn = CALC11(fs, *bsrc.grn, t);
 881                         *bdst.blu = CALC11(fs, *bsrc.blu, t);
 882                         bsrc.red += bsrc.delta;
 883                         bsrc.blu += bsrc.delta;
 884                         bsrc.grn += bsrc.delta;
 885                         bdst.red += bdst.delta;
 886                         bdst.blu += bdst.delta;
 887                         bdst.grn += bdst.delta;
 888                 }
 889                 if(bdst.alpha != &ones){
 890                         *bdst.alpha = CALC11(fs, *bsrc.alpha, t);
 891                         bdst.alpha += bdst.delta;
 892                 }
 893                 bmask.alpha += bmask.delta;
 894                 bsrc.alpha += sadelta;
 895         }
 896         return obdst;
 897 }
 898
 899 static Buffer
 900 alphacalc3679(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
 901 {
 902         Buffer obdst;
 903         int fs, fd, sadelta;
 904         int i, sa, ma, da, q;
 905         ulong t, t1;
 906
 907         obdst = bdst;
 908         sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
 909         q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
 910
 911         for(i=0; i<dx; i++){
 912                 sa = *bsrc.alpha;
 913                 ma = *bmask.alpha;
 914                 da = *bdst.alpha;
 915                 if(op == SatopD)
 916                         fs = CALC11(ma, da, t);
 917                 else
 918                         fs = CALC11(ma, 255-da, t);
 919                 if(op == DoverS)
 920                         fd = 255;
 921                 else{
 922                         fd = CALC11(sa, ma, t);
 923                         if(op != DatopS)
 924                                 fd = 255-fd;
 925                 }
 926
 927                 if(grey){
 928                         *bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);
 929                         bsrc.grey += bsrc.delta;
 930                         bdst.grey += bdst.delta;
 931                 }else{
 932                         if(q){
 933                                 *bdst.rgba = CALC42(fs, *bsrc.rgba, fd, *bdst.rgba, t, t1);
 934                                 bsrc.rgba++;
 935                                 bdst.rgba++;
 936                                 bsrc.alpha += sadelta;
 937                                 bmask.alpha += bmask.delta;
 938                                 bdst.alpha += bdst.delta;
 939                                 continue;
 940                         }
 941                         *bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);
 942                         *bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);
 943                         *bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);
 944                         bsrc.red += bsrc.delta;
 945                         bsrc.blu += bsrc.delta;
 946                         bsrc.grn += bsrc.delta;
 947                         bdst.red += bdst.delta;
 948                         bdst.blu += bdst.delta;
 949                         bdst.grn += bdst.delta;
 950                 }
 951                 if(bdst.alpha != &ones){
 952                         *bdst.alpha = CALC12(fs, sa, fd, da, t);
 953                         bdst.alpha += bdst.delta;
 954                 }
 955                 bmask.alpha += bmask.delta;
 956                 bsrc.alpha += sadelta;
 957         }
 958         return obdst;
 959 }
 960
 961 static Buffer
 962 alphacalc5(Buffer bdst, Buffer b1, Buffer b2, int dx, int grey, int op)
 963 {
 964         USED(dx);
 965         USED(grey);
 966         USED(op);
 967         USED(b1);
 968         USED(b2);
 969         return bdst;
 970 }
 971
 972 static Buffer
 973 alphacalc11(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
 974 {
 975         Buffer obdst;
 976         int fd, sadelta;
 977         int i, sa, ma, q;
 978         ulong t, t1;
 979
 980         USED(op);
 981         obdst = bdst;
 982         sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
 983         q = bsrc.delta == 4 && bdst.delta == 4 && chanmatch(&bdst, &bsrc);
 984
 985         for(i=0; i<dx; i++){
 986                 sa = *bsrc.alpha;
 987                 ma = *bmask.alpha;
 988                 fd = 255-CALC11(sa, ma, t);
 989
 990                 if(grey){
 991                         *bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);
 992                         bsrc.grey += bsrc.delta;
 993                         bdst.grey += bdst.delta;
 994                 }else{
 995                         if(q){
 996                                 *bdst.rgba = CALC42(ma, *bsrc.rgba, fd, *bdst.rgba, t, t1);
 997                                 bsrc.rgba++;
 998                                 bdst.rgba++;
 999                                 bsrc.alpha += sadelta;
1000                                 bmask.alpha += bmask.delta;
1001                                 continue;
1002                         }
1003                         *bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);
1004                         *bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);
1005                         *bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);
1006                         bsrc.red += bsrc.delta;
1007                         bsrc.blu += bsrc.delta;
1008                         bsrc.grn += bsrc.delta;
1009                         bdst.red += bdst.delta;
1010                         bdst.blu += bdst.delta;
1011                         bdst.grn += bdst.delta;
1012                 }
1013                 if(bdst.alpha != &ones){
1014                         *bdst.alpha = CALC12(ma, sa, fd, *bdst.alpha, t);
1015                         bdst.alpha += bdst.delta;
1016                 }
1017                 bmask.alpha += bmask.delta;
1018                 bsrc.alpha += sadelta;
1019         }
1020         return obdst;
1021 }
1022
1023 /*
1024 not used yet
1025 source and mask alpha 1
1026 static Buffer
1027 alphacalcS0(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1028 {
1029         Buffer obdst;
1030         int i;
1031
1032         USED(op);
1033         obdst = bdst;
1034         if(bsrc.delta == bdst.delta){
1035                 memmove(bdst.rgba, bsrc.rgba, dx*bdst.delta);
1036                 return obdst;
1037         }
1038         for(i=0; i<dx; i++){
1039                 if(grey){
1040                         *bdst.grey = *bsrc.grey;
1041                         bsrc.grey += bsrc.delta;
1042                         bdst.grey += bdst.delta;
1043                 }else{
1044                         *bdst.red = *bsrc.red;
1045                         *bdst.grn = *bsrc.grn;
1046                         *bdst.blu = *bsrc.blu;
1047                         bsrc.red += bsrc.delta;
1048                         bsrc.blu += bsrc.delta;
1049                         bsrc.grn += bsrc.delta;
1050                         bdst.red += bdst.delta;
1051                         bdst.blu += bdst.delta;
1052                         bdst.grn += bdst.delta;
1053                 }
1054                 if(bdst.alpha != &ones){
1055                         *bdst.alpha = 255;
1056                         bdst.alpha += bdst.delta;
1057                 }
1058         }
1059         return obdst;
1060 }
1061 */
1062
1063 /* source alpha 1 */
1064 static Buffer
1065 alphacalcS(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1066 {
1067         Buffer obdst;
1068         int fd;
1069         int i, ma;
1070         ulong t;
1071
1072         USED(op);
1073         obdst = bdst;
1074
1075         for(i=0; i<dx; i++){
1076                 ma = *bmask.alpha;
1077                 fd = 255-ma;
1078
1079                 if(grey){
1080                         *bdst.grey = CALC12(ma, *bsrc.grey, fd, *bdst.grey, t);
1081                         bsrc.grey += bsrc.delta;
1082                         bdst.grey += bdst.delta;
1083                 }else{
1084                         *bdst.red = CALC12(ma, *bsrc.red, fd, *bdst.red, t);
1085                         *bdst.grn = CALC12(ma, *bsrc.grn, fd, *bdst.grn, t);
1086                         *bdst.blu = CALC12(ma, *bsrc.blu, fd, *bdst.blu, t);
1087                         bsrc.red += bsrc.delta;
1088                         bsrc.blu += bsrc.delta;
1089                         bsrc.grn += bsrc.delta;
1090                         bdst.red += bdst.delta;
1091                         bdst.blu += bdst.delta;
1092                         bdst.grn += bdst.delta;
1093                 }
1094                 if(bdst.alpha != &ones){
1095                         *bdst.alpha = ma+CALC11(fd, *bdst.alpha, t);
1096                         bdst.alpha += bdst.delta;
1097                 }
1098                 bmask.alpha += bmask.delta;
1099         }
1100         return obdst;
1101 }
1102
1103 static Buffer
1104 boolcalc14(Buffer bdst, Buffer b1, Buffer bmask, int dx, int grey, int op)
1105 {
1106         Buffer obdst;
1107         int i, ma, zero;
1108
1109         USED(b1);
1110
1111         obdst = bdst;
1112
1113         for(i=0; i<dx; i++){
1114                 ma = *bmask.alpha;
1115                 zero = ma ? op == DoutS : op == DinS;
1116
1117                 if(grey){
1118                         if(zero)
1119                                 *bdst.grey = 0;
1120                         bdst.grey += bdst.delta;
1121                 }else{
1122                         if(zero)
1123                                 *bdst.red = *bdst.grn = *bdst.blu = 0;
1124                         bdst.red += bdst.delta;
1125                         bdst.blu += bdst.delta;
1126                         bdst.grn += bdst.delta;
1127                 }
1128                 bmask.alpha += bmask.delta;
1129                 if(bdst.alpha != &ones){
1130                         if(zero)
1131                                 *bdst.alpha = 0;
1132                         bdst.alpha += bdst.delta;
1133                 }
1134         }
1135         return obdst;
1136 }
1137
1138 static Buffer
1139 boolcalc236789(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1140 {
1141         Buffer obdst;
1142         int fs, fd;
1143         int i, ma, da, zero;
1144         ulong t;
1145
1146         obdst = bdst;
1147         zero = !(op&1);
1148
1149         for(i=0; i<dx; i++){
1150                 ma = *bmask.alpha;
1151                 da = *bdst.alpha;
1152                 fs = da;
1153                 if(op&2)
1154                         fs = 255-da;
1155                 fd = 0;
1156                 if(op&4)
1157                         fd = 255;
1158
1159                 if(grey){
1160                         if(ma)
1161                                 *bdst.grey = CALC12(fs, *bsrc.grey, fd, *bdst.grey, t);
1162                         else if(zero)
1163                                 *bdst.grey = 0;
1164                         bsrc.grey += bsrc.delta;
1165                         bdst.grey += bdst.delta;
1166                 }else{
1167                         if(ma){
1168                                 *bdst.red = CALC12(fs, *bsrc.red, fd, *bdst.red, t);
1169                                 *bdst.grn = CALC12(fs, *bsrc.grn, fd, *bdst.grn, t);
1170                                 *bdst.blu = CALC12(fs, *bsrc.blu, fd, *bdst.blu, t);
1171                         }
1172                         else if(zero)
1173                                 *bdst.red = *bdst.grn = *bdst.blu = 0;
1174                         bsrc.red += bsrc.delta;
1175                         bsrc.blu += bsrc.delta;
1176                         bsrc.grn += bsrc.delta;
1177                         bdst.red += bdst.delta;
1178                         bdst.blu += bdst.delta;
1179                         bdst.grn += bdst.delta;
1180                 }
1181                 bmask.alpha += bmask.delta;
1182                 if(bdst.alpha != &ones){
1183                         if(ma)
1184                                 *bdst.alpha = fs+CALC11(fd, da, t);
1185                         else if(zero)
1186                                 *bdst.alpha = 0;
1187                         bdst.alpha += bdst.delta;
1188                 }
1189         }
1190         return obdst;
1191 }
1192
1193 static Buffer
1194 boolcalc1011(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int grey, int op)
1195 {
1196         Buffer obdst;
1197         int i, ma, zero;
1198
1199         obdst = bdst;
1200         zero = !(op&1);
1201
1202         for(i=0; i<dx; i++){
1203                 ma = *bmask.alpha;
1204
1205                 if(grey){
1206                         if(ma)
1207                                 *bdst.grey = *bsrc.grey;
1208                         else if(zero)
1209                                 *bdst.grey = 0;
1210                         bsrc.grey += bsrc.delta;
1211                         bdst.grey += bdst.delta;
1212                 }else{
1213                         if(ma){
1214                                 *bdst.red = *bsrc.red;
1215                                 *bdst.grn = *bsrc.grn;
1216                                 *bdst.blu = *bsrc.blu;
1217                         }
1218                         else if(zero)
1219                                 *bdst.red = *bdst.grn = *bdst.blu = 0;
1220                         bsrc.red += bsrc.delta;
1221                         bsrc.blu += bsrc.delta;
1222                         bsrc.grn += bsrc.delta;
1223                         bdst.red += bdst.delta;
1224                         bdst.blu += bdst.delta;
1225                         bdst.grn += bdst.delta;
1226                 }
1227                 bmask.alpha += bmask.delta;
1228                 if(bdst.alpha != &ones){
1229                         if(ma)
1230                                 *bdst.alpha = 255;
1231                         else if(zero)
1232                                 *bdst.alpha = 0;
1233                         bdst.alpha += bdst.delta;
1234                 }
1235         }
1236         return obdst;
1237 }
1238 /*
1239  * Replicated cached scan line read.  Call the function listed in the Param,
1240  * but cache the result so that for replicated images we only do the work once.
1241  */
1242 static Buffer
1243 replread(Param *p, uchar *s, int y)
1244 {
1245         Buffer *b;
1246
1247         USED(s);
1248         b = &p->bcache[y];
1249         if((p->bfilled & (1<<y)) == 0){
1250                 p->bfilled |= 1<<y;
1251                 *b = p->replcall(p, p->bufbase+y*p->bufdelta, y);
1252         }
1253         return *b;
1254 }
1255
1256 /*
1257  * Alpha reading function that simply relabels the grey pointer.
1258  */
1259 static Buffer
1260 greymaskread(Param *p, uchar *buf, int y)
1261 {
1262         Buffer b;
1263
1264         b = p->greymaskcall(p, buf, y);
1265         b.alpha = b.grey;
1266         return b;
1267 }
1268
1269 static Buffer
1270 readnbit(Param *p, uchar *buf, int y)
1271 {
1272         Buffer b;
1273         Memimage *img;
1274         uchar *repl, *r, *w, *ow, bits;
1275         int i, n, sh, depth, x, dx, npack, nbits;
1276
1277         b.rgba = (ulong*)buf;
1278         b.grey = w = buf;
1279         b.red = b.blu = b.grn = w;
1280         b.alpha = &ones;
1281         b.delta = 1;
1282
1283         dx = p->dx;
1284         img = p->img;
1285         depth = img->depth;
1286         repl = &replbit[depth][0];
1287         npack = 8/depth;
1288         sh = 8-depth;
1289
1290         /* copy from p->r.min.x until end of repl rectangle */
1291         x = p->r.min.x;
1292         n = dx;
1293         if(n > p->img->r.max.x - x)
1294                 n = p->img->r.max.x - x;
1295
1296         r = p->bytermin + y*p->bwidth;
1297         bits = *r++;
1298         nbits = 8;
1299         if(i=x&(npack-1)){
1300                 bits <<= depth*i;
1301                 nbits -= depth*i;
1302         }
1303         for(i=0; i<n; i++){
1304                 if(nbits == 0){
1305                         bits = *r++;
1306                         nbits = 8;
1307                 }
1308                 *w++ = repl[bits>>sh];
1309                 bits <<= depth;
1310                 nbits -= depth;
1311         }
1312         dx -= n;
1313         if(dx == 0)
1314                 return b;
1315
1316         assert(x+i == p->img->r.max.x);
1317
1318         /* copy from beginning of repl rectangle until where we were before. */
1319         x = p->img->r.min.x;
1320         n = dx;
1321         if(n > p->r.min.x - x)
1322                 n = p->r.min.x - x;
1323
1324         r = p->bytey0s + y*p->bwidth;
1325         bits = *r++;
1326         nbits = 8;
1327         if(i=x&(npack-1)){
1328                 bits <<= depth*i;
1329                 nbits -= depth*i;
1330         }
1331         for(i=0; i<n; i++){
1332                 if(nbits == 0){
1333                         bits = *r++;
1334                         nbits = 8;
1335                 }
1336                 *w++ = repl[bits>>sh];
1337                 bits <<= depth;
1338                 nbits -= depth;
1339         }
1340         dx -= n;
1341         if(dx == 0)
1342                 return b;
1343
1344         assert(dx > 0);
1345         /* now we have exactly one full scan line: just replicate the buffer itself until we are done */
1346         ow = buf;
1347         while(dx--)
1348                 *w++ = *ow++;
1349
1350         return b;
1351 }
1352
1353 static void
1354 writenbit(Param *p, uchar *w, Buffer src)
1355 {
1356         uchar *r;
1357         ulong bits;
1358         int i, sh, depth, npack, nbits, x, ex;
1359
1360         assert(src.grey != nil && src.delta == 1);
1361
1362         x = p->r.min.x;
1363         ex = x+p->dx;
1364         depth = p->img->depth;
1365         npack = 8/depth;
1366
1367         i=x&(npack-1);
1368         bits = i ? (*w >> (8-depth*i)) : 0;
1369         nbits = depth*i;
1370         sh = 8-depth;
1371         r = src.grey;
1372
1373         for(; x<ex; x++){
1374                 bits <<= depth;
1375                 bits |= (*r++ >> sh);
1376                 nbits += depth;
1377                 if(nbits == 8){
1378                         *w++ = bits;
1379                         nbits = 0;
1380                 }
1381         }
1382
1383         if(nbits){
1384                 sh = 8-nbits;
1385                 bits <<= sh;
1386                 bits |= *w & ((1<<sh)-1);
1387                 *w = bits;
1388         }
1389         return;
1390 }
1391
1392 static Buffer
1393 readcmap(Param *p, uchar *buf, int y)
1394 {
1395         Buffer b;
1396         int a, convgrey, copyalpha, dx, i, m;
1397         uchar *q, *cmap, *begin, *end, *r, *w;
1398
1399         begin = p->bytey0s + y*p->bwidth;
1400         r = p->bytermin + y*p->bwidth;
1401         end = p->bytey0e + y*p->bwidth;
1402         cmap = p->img->cmap->cmap2rgb;
1403         convgrey = p->convgrey;
1404         copyalpha = (p->img->flags&Falpha) != 0;
1405
1406         w = buf;
1407         dx = p->dx;
1408         if(copyalpha){
1409                 b.alpha = buf++;
1410                 a = p->img->shift[CAlpha]/8;
1411                 m = p->img->shift[CMap]/8;
1412                 for(i=0; i<dx; i++){
1413                         *w++ = r[a];
1414                         q = cmap+r[m]*3;
1415                         r += 2;
1416                         if(r == end)
1417                                 r = begin;
1418                         if(convgrey){
1419                                 *w++ = RGB2K(q[0], q[1], q[2]);
1420                         }else{
1421                                 *w++ = q[2];    /* blue */
1422                                 *w++ = q[1];    /* green */
1423                                 *w++ = q[0];    /* red */
1424                         }
1425                 }
1426         }else{
1427                 b.alpha = &ones;
1428                 for(i=0; i<dx; i++){
1429                         q = cmap+*r++*3;
1430                         if(r == end)
1431                                 r = begin;
1432                         if(convgrey){
1433                                 *w++ = RGB2K(q[0], q[1], q[2]);
1434                         }else{
1435                                 *w++ = q[2];    /* blue */
1436                                 *w++ = q[1];    /* green */
1437                                 *w++ = q[0];    /* red */
1438                         }
1439                 }
1440         }
1441
1442         b.rgba = (ulong*)(buf-copyalpha);
1443
1444         if(convgrey){
1445                 b.grey = buf;
1446                 b.red = b.blu = b.grn = buf;
1447                 b.delta = 1+copyalpha;
1448         }else{
1449                 b.blu = buf;
1450                 b.grn = buf+1;
1451                 b.red = buf+2;
1452                 b.grey = nil;
1453                 b.delta = 3+copyalpha;
1454         }
1455         return b;
1456 }
1457
1458 static void
1459 writecmap(Param *p, uchar *w, Buffer src)
1460 {
1461         uchar *cmap, *red, *grn, *blu, *alpha;
1462         int i, dx, delta, a, m;
1463
1464         cmap = p->img->cmap->rgb2cmap;
1465
1466         delta = src.delta;
1467         red= src.red;
1468         grn = src.grn;
1469         blu = src.blu;
1470
1471         dx = p->dx;
1472         if(p->img->flags&Falpha){
1473                 alpha = src.alpha;
1474                 m = p->img->shift[CMap]/8;
1475                 a = p->img->shift[CAlpha]/8;
1476                 for(i=0; i<dx; i++, red+=delta, grn+=delta, blu+=delta, w+=2){
1477                         w[a] = *alpha;
1478                         if(alpha != &ones)
1479                                 alpha+=delta;
1480                         w[m] = cmap[(*red>>4)*256+(*grn>>4)*16+(*blu>>4)];
1481                 }
1482         } else {
1483                 for(i=0; i<dx; i++, red+=delta, grn+=delta, blu+=delta)
1484                         *w++ = cmap[(*red>>4)*256+(*grn>>4)*16+(*blu>>4)];
1485         }
1486 }
1487
1488 static Buffer
1489 readbyte(Param *p, uchar *buf, int y)
1490 {
1491         Buffer b;
1492         Memimage *img;
1493         int dx, isgrey, convgrey, alphaonly, copyalpha, i, nb;
1494         uchar *begin, *end, *r, *w, *rrepl, *grepl, *brepl, *arepl, *krepl;
1495         uchar ured, ugrn, ublu;
1496         ulong u;
1497
1498         img = p->img;
1499         begin = p->bytey0s + y*p->bwidth;
1500         r = p->bytermin + y*p->bwidth;
1501         end = p->bytey0e + y*p->bwidth;
1502
1503         w = buf;
1504         dx = p->dx;
1505         nb = img->depth/8;
1506
1507         convgrey = p->convgrey; /* convert rgb to grey */
1508         isgrey = img->flags&Fgrey;
1509         alphaonly = p->alphaonly;
1510         copyalpha = (img->flags&Falpha) != 0;
1511
1512         /* if we can, avoid processing everything */
1513         if(!(img->flags&Frepl) && !convgrey && (img->flags&Fbytes)){
1514                 memset(&b, 0, sizeof b);
1515                 if(p->needbuf){
1516                         memmove(buf, r, dx*nb);
1517                         r = buf;
1518                 }
1519                 b.rgba = (ulong*)r;
1520                 if(copyalpha)
1521                         b.alpha = r+img->shift[CAlpha]/8;
1522                 else
1523                         b.alpha = &ones;
1524                 if(isgrey){
1525                         b.grey = r+img->shift[CGrey]/8;
1526                         b.red = b.grn = b.blu = b.grey;
1527                 }else{
1528                         b.red = r+img->shift[CRed]/8;
1529                         b.grn = r+img->shift[CGreen]/8;
1530                         b.blu = r+img->shift[CBlue]/8;
1531                 }
1532                 b.delta = nb;
1533                 return b;
1534         }
1535
1536         rrepl = replbit[img->nbits[CRed]];
1537         grepl = replbit[img->nbits[CGreen]];
1538         brepl = replbit[img->nbits[CBlue]];
1539         arepl = replbit[img->nbits[CAlpha]];
1540         krepl = replbit[img->nbits[CGrey]];
1541
1542         for(i=0; i<dx; i++){
1543                 u = r[0] | (r[1]<<8) | (r[2]<<16) | (r[3]<<24);
1544                 if(copyalpha) {
1545                         *w++ = arepl[(u>>img->shift[CAlpha]) & img->mask[CAlpha]];
1546                 }
1547
1548                 if(isgrey)
1549                         *w++ = krepl[(u >> img->shift[CGrey]) & img->mask[CGrey]];
1550                 else if(!alphaonly){
1551                         ured = rrepl[(u >> img->shift[CRed]) & img->mask[CRed]];
1552                         ugrn = grepl[(u >> img->shift[CGreen]) & img->mask[CGreen]];
1553                         ublu = brepl[(u >> img->shift[CBlue]) & img->mask[CBlue]];
1554                         if(convgrey){
1555                                 *w++ = RGB2K(ured, ugrn, ublu);
1556                         }else{
1557                                 w[0] = ublu;
1558                                 w[1] = ugrn;
1559                                 w[2] = ured;
1560                                 w += 3;
1561                         }
1562                 }
1563                 r += nb;
1564                 if(r == end)
1565                         r = begin;
1566         }
1567
1568         b.alpha = copyalpha ? buf : &ones;
1569         b.rgba = (ulong*)buf;
1570         if(alphaonly){
1571                 b.red = b.grn = b.blu = b.grey = nil;
1572                 if(!copyalpha)
1573                         b.rgba = nil;
1574                 b.delta = 1;
1575         }else if(isgrey || convgrey){
1576                 b.grey = buf+copyalpha;
1577                 b.red = b.grn = b.blu = buf+copyalpha;
1578                 b.delta = copyalpha+1;
1579         }else{
1580                 b.blu = buf+copyalpha;
1581                 b.grn = buf+copyalpha+1;
1582                 b.grey = nil;
1583                 b.red = buf+copyalpha+2;
1584                 b.delta = copyalpha+3;
1585         }
1586         return b;
1587 }
1588
1589 static void
1590 writebyte(Param *p, uchar *w, Buffer src)
1591 {
1592         Memimage *img;
1593         int i, isalpha, isgrey, nb, delta, dx, adelta;
1594         uchar *red, *grn, *blu, *grey, *alpha;
1595         ulong u, mask;
1596
1597         img = p->img;
1598
1599         red = src.red;
1600         grn = src.grn;
1601         blu = src.blu;
1602         alpha = src.alpha;
1603         delta = src.delta;
1604         grey = src.grey;
1605         dx = p->dx;
1606
1607         nb = img->depth/8;
1608
1609         isalpha = img->flags&Falpha;
1610         isgrey = img->flags&Fgrey;
1611         adelta = src.delta;
1612
1613         if(isalpha && alpha == &ones)
1614                 adelta = 0;
1615
1616         if((img->flags&Fbytes) != 0){
1617                 int ogry, ored, ogrn, oblu, oalp;
1618
1619                 ogry = img->shift[CGrey]/8;
1620                 ored = img->shift[CRed]/8;
1621                 ogrn = img->shift[CGreen]/8;
1622                 oblu = img->shift[CBlue]/8;
1623                 oalp = img->shift[CAlpha]/8;
1624
1625                 for(i=0; i<dx; i++){
1626                         if(isgrey){
1627                                 w[ogry] = *grey;
1628                                 grey += delta;
1629                         } else {
1630                                 w[ored] = *red;
1631                                 w[ogrn] = *grn;
1632                                 w[oblu] = *blu;
1633                                 red += delta;
1634                                 grn += delta;
1635                                 blu += delta;
1636                         }
1637                         if(isalpha){
1638                                 w[oalp] = *alpha;
1639                                 alpha += adelta;
1640                         }
1641                         w += nb;
1642                 }
1643                 return;
1644         }
1645
1646         mask = (nb==4) ? 0 : ~((1<<img->depth)-1);
1647         for(i=0; i<dx; i++){
1648                 u = w[0] | (w[1]<<8) | (w[2]<<16) | (w[3]<<24);
1649                 u &= mask;
1650                 if(isgrey){
1651                         u |= ((*grey >> (8-img->nbits[CGrey])) & img->mask[CGrey]) << img->shift[CGrey];
1652                         grey += delta;
1653                 }else{
1654                         u |= ((*red >> (8-img->nbits[CRed])) & img->mask[CRed]) << img->shift[CRed];
1655                         u |= ((*grn >> (8-img->nbits[CGreen])) & img->mask[CGreen]) << img->shift[CGreen];
1656                         u |= ((*blu >> (8-img->nbits[CBlue])) & img->mask[CBlue]) << img->shift[CBlue];
1657                         red += delta;
1658                         grn += delta;
1659                         blu += delta;
1660                 }
1661
1662                 if(isalpha){
1663                         u |= ((*alpha >> (8-img->nbits[CAlpha])) & img->mask[CAlpha]) << img->shift[CAlpha];
1664                         alpha += adelta;
1665                 }
1666
1667                 w[0] = u;
1668                 w[1] = u>>8;
1669                 w[2] = u>>16;
1670                 w[3] = u>>24;
1671                 w += nb;
1672         }
1673 }
1674
1675 static Readfn*
1676 readfn(Memimage *img)
1677 {
1678         if(img->depth < 8)
1679                 return readnbit;
1680         if(img->nbits[CMap] == 8)
1681                 return readcmap;
1682         return readbyte;
1683 }
1684
1685 static Readfn*
1686 readalphafn(Memimage *m)
1687 {
1688         USED(m);
1689         return readbyte;
1690 }
1691
1692 static Writefn*
1693 writefn(Memimage *img)
1694 {
1695         if(img->depth < 8)
1696                 return writenbit;
1697         if(img->nbits[CMap] == 8)
1698                 return writecmap;
1699         return writebyte;
1700 }
1701
1702 static void
1703 nullwrite(Param *p, uchar *s, Buffer b)
1704 {
1705         USED(p);
1706         USED(s);
1707         USED(b);
1708 }
1709
1710 static Buffer
1711 readptr(Param *p, uchar *s, int y)
1712 {
1713         Buffer b;
1714         uchar *q;
1715
1716         USED(s);
1717         q = p->bytermin + y*p->bwidth;
1718         b.red = q;      /* ptr to data */
1719         b.grn = b.blu = b.grey = nil;
1720         b.alpha = &ones;
1721         b.rgba = (ulong*)q;
1722         b.delta = p->img->depth/8;
1723         return b;
1724 }
1725
1726 static Buffer
1727 boolmemmove(Buffer bdst, Buffer bsrc, Buffer b1, int dx, int i, int o)
1728 {
1729         USED(i);
1730         USED(o);
1731         USED(b1);
1732         USED(bsrc);
1733         memmove(bdst.red, bsrc.red, dx*bdst.delta);
1734         return bdst;
1735 }
1736
1737 static Buffer
1738 boolcopy8(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1739 {
1740         uchar *m, *r, *w, *ew;
1741
1742         USED(i);
1743         USED(o);
1744         m = bmask.grey;
1745         w = bdst.red;
1746         r = bsrc.red;
1747         ew = w+dx;
1748         for(; w < ew; w++,r++)
1749                 if(*m++)
1750                         *w = *r;
1751         return bdst;    /* not used */
1752 }
1753
1754 static Buffer
1755 boolcopy16(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1756 {
1757         uchar *m;
1758         ushort *r, *w, *ew;
1759
1760         USED(i);
1761         USED(o);
1762         m = bmask.grey;
1763         w = (ushort*)bdst.red;
1764         r = (ushort*)bsrc.red;
1765         ew = w+dx;
1766         for(; w < ew; w++,r++)
1767                 if(*m++)
1768                         *w = *r;
1769         return bdst;    /* not used */
1770 }
1771
1772 static Buffer
1773 boolcopy24(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1774 {
1775         uchar *m;
1776         uchar *r, *w, *ew;
1777
1778         USED(i);
1779         USED(o);
1780         m = bmask.grey;
1781         w = bdst.red;
1782         r = bsrc.red;
1783         ew = w+dx*3;
1784         while(w < ew){
1785                 if(*m++){
1786                         *w++ = *r++;
1787                         *w++ = *r++;
1788                         *w++ = *r++;
1789                 }else{
1790                         w += 3;
1791                         r += 3;
1792                 }
1793         }
1794         return bdst;    /* not used */
1795 }
1796
1797 static Buffer
1798 boolcopy32(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
1799 {
1800         uchar *m;
1801         ulong *r, *w, *ew;
1802
1803         USED(i);
1804         USED(o);
1805         m = bmask.grey;
1806         w = (ulong*)bdst.red;
1807         r = (ulong*)bsrc.red;
1808         ew = w+dx;
1809         for(; w < ew; w++,r++)
1810                 if(*m++)
1811                         *w = *r;
1812         return bdst;    /* not used */
1813 }
1814
1815 static Buffer
1816 genconv(Param *p, uchar *buf, int y)
1817 {
1818         Buffer b;
1819         int nb;
1820         uchar *r, *w, *ew;
1821
1822         /* read from source into RGB format in convbuf */
1823         b = p->convreadcall(p, p->convbuf, y);
1824
1825         /* write RGB format into dst format in buf */
1826         p->convwritecall(p->convdpar, buf, b);
1827
1828         if(p->convdx){
1829                 nb = p->convdpar->img->depth/8;
1830                 r = buf;
1831                 w = buf+nb*p->dx;
1832                 ew = buf+nb*p->convdx;
1833                 while(w<ew)
1834                         *w++ = *r++;
1835         }
1836
1837         b.red = buf;
1838         b.blu = b.grn = b.grey = nil;
1839         b.alpha = &ones;
1840         b.rgba = (ulong*)buf;
1841         b.delta = 0;
1842
1843         return b;
1844 }
1845
1846 static Readfn*
1847 convfn(Memimage *dst, Param *dpar, Memimage *src, Param *spar, int *ndrawbuf)
1848 {
1849         if(dst->chan == src->chan && !(src->flags&Frepl))
1850                 return readptr;
1851
1852         if(dst->chan==CMAP8 && (src->chan==GREY1||src->chan==GREY2||src->chan==GREY4)){
1853                 /* cheat because we know the replicated value is exactly the color map entry. */
1854                 return readnbit;
1855         }
1856
1857         spar->convreadcall = readfn(src);
1858         spar->convwritecall = writefn(dst);
1859         spar->convdpar = dpar;
1860
1861         /* allocate a conversion buffer */
1862         spar->convbufoff = *ndrawbuf;
1863         *ndrawbuf += spar->dx*4;
1864
1865         if(spar->dx > Dx(spar->img->r)){
1866                 spar->convdx = spar->dx;
1867                 spar->dx = Dx(spar->img->r);
1868         }
1869
1870         return genconv;
1871 }
1872
1873 static ulong
1874 pixelbits(Memimage *i, Point pt)
1875 {
1876         uchar *p;
1877         ulong val;
1878         int off, bpp, npack;
1879
1880         val = 0;
1881         p = byteaddr(i, pt);
1882         switch(bpp=i->depth){
1883         case 1:
1884         case 2:
1885         case 4:
1886                 npack = 8/bpp;
1887                 off = pt.x%npack;
1888                 val = p[0] >> bpp*(npack-1-off);
1889                 val &= (1<<bpp)-1;
1890                 break;
1891         case 8:
1892                 val = p[0];
1893                 break;
1894         case 16:
1895                 val = p[0]|(p[1]<<8);
1896                 break;
1897         case 24:
1898                 val = p[0]|(p[1]<<8)|(p[2]<<16);
1899                 break;
1900         case 32:
1901                 val = p[0]|(p[1]<<8)|(p[2]<<16)|(p[3]<<24);
1902                 break;
1903         }
1904         while(bpp<32){
1905                 val |= val<<bpp;
1906                 bpp *= 2;
1907         }
1908         return val;
1909 }
1910
1911 static Calcfn*
1912 boolcopyfn(Memimage *img, Memimage *mask)
1913 {
1914         if(mask->flags&Frepl && Dx(mask->r)==1 && Dy(mask->r)==1 && pixelbits(mask, mask->r.min)==~0)
1915                 return boolmemmove;
1916
1917         switch(img->depth){
1918         case 8:
1919                 return boolcopy8;
1920         case 16:
1921                 return boolcopy16;
1922         case 24:
1923                 return boolcopy24;
1924         case 32:
1925                 return boolcopy32;
1926         default:
1927                 assert(0 /* boolcopyfn */);
1928         }
1929         return nil;
1930 }
1931
1932 /*
1933  * Optimized draw for filling and scrolling; uses memset and memmove.
1934  */
1935 static void
1936 memsets(void *vp, ushort val, int n)
1937 {
1938         ushort *p, *ep;
1939         uchar b[2];
1940
1941         /* make little endian */
1942         b[0] = val;
1943         b[1] = val>>8;
1944         val = *(ushort*)b;
1945
1946         p = vp;
1947         ep = p+n;
1948         while(p<ep)
1949                 *p++ = val;
1950 }
1951
1952 static void
1953 memsetl(void *vp, ulong val, int n)
1954 {
1955         ulong *p, *ep;
1956         uchar b[4];
1957
1958         /* make little endian */
1959         b[0] = val;
1960         b[1] = val>>8;
1961         b[2] = val>>16;
1962         b[3] = val>>24;
1963         val = *(ulong*)b;
1964
1965         p = vp;
1966         ep = p+n;
1967         while(p<ep)
1968                 *p++ = val;
1969 }
1970
1971 static void
1972 memset24(void *vp, ulong val, int n)
1973 {
1974         uchar *p, *ep;
1975         uchar a,b,c;
1976
1977         a = val;
1978         b = val>>8;
1979         c = val>>16;
1980
1981         p = vp;
1982         ep = p+3*n;
1983         while(p<ep){
1984                 p[0] = a;
1985                 p[1] = b;
1986                 p[2] = c;
1987                 p += 3;
1988         }
1989 }
1990
1991 static ulong
1992 imgtorgba(Memimage *img, ulong val)
1993 {
1994         uchar r, g, b, a;
1995         int nb, ov, v;
1996         ulong chan;
1997         uchar *p;
1998
1999         a = 0xFF;
2000         r = g = b = 0xAA;       /* garbage */
2001         for(chan=img->chan; chan; chan>>=8){
2002                 nb = NBITS(chan);
2003                 ov = v = val&((1<<nb)-1);
2004                 val >>= nb;
2005
2006                 while(nb < 8){
2007                         v |= v<<nb;
2008                         nb *= 2;
2009                 }
2010                 v >>= (nb-8);
2011
2012                 switch(TYPE(chan)){
2013                 case CRed:
2014                         r = v;
2015                         break;
2016                 case CGreen:
2017                         g = v;
2018                         break;
2019                 case CBlue:
2020                         b = v;
2021                         break;
2022                 case CAlpha:
2023                         a = v;
2024                         break;
2025                 case CGrey:
2026                         r = g = b = v;
2027                         break;
2028                 case CMap:
2029                         p = img->cmap->cmap2rgb+3*ov;
2030                         r = p[0];
2031                         g = p[1];
2032                         b = p[2];
2033                         break;
2034                 }
2035         }
2036         return (r<<24)|(g<<16)|(b<<8)|a;
2037 }
2038
2039 static ulong
2040 rgbatoimg(Memimage *img, ulong rgba)
2041 {
2042         ulong chan;
2043         int d, nb;
2044         ulong v;
2045         uchar *p, r, g, b, a, m;
2046
2047         v = 0;
2048         r = rgba>>24;
2049         g = rgba>>16;
2050         b = rgba>>8;
2051         a = rgba;
2052         d = 0;
2053         for(chan=img->chan; chan; chan>>=8){
2054                 nb = NBITS(chan);
2055                 switch(TYPE(chan)){
2056                 case CRed:
2057                         v |= (r>>(8-nb))<<d;
2058                         break;
2059                 case CGreen:
2060                         v |= (g>>(8-nb))<<d;
2061                         break;
2062                 case CBlue:
2063                         v |= (b>>(8-nb))<<d;
2064                         break;
2065                 case CAlpha:
2066                         v |= (a>>(8-nb))<<d;
2067                         break;
2068                 case CMap:
2069                         p = img->cmap->rgb2cmap;
2070                         m = p[(r>>4)*256+(g>>4)*16+(b>>4)];
2071                         v |= (m>>(8-nb))<<d;
2072                         break;
2073                 case CGrey:
2074                         m = RGB2K(r,g,b);
2075                         v |= (m>>(8-nb))<<d;
2076                         break;
2077                 }
2078                 d += nb;
2079         }
2080         return v;
2081 }
2082
2083 static int
2084 memoptdraw(Memdrawparam *par)
2085 {
2086         int m, y, dy, dx, op;
2087         ulong v;
2088         Memimage *src;
2089         Memimage *dst;
2090
2091         dx = Dx(par->r);
2092         dy = Dy(par->r);
2093         src = par->src;
2094         dst = par->dst;
2095         op = par->op;
2096
2097         /*
2098          * If we have an opaque mask and source is one opaque pixel we can convert to the
2099          * destination format and just replicate with memset.
2100          */
2101         m = Simplesrc|Simplemask|Fullmask;
2102         if((par->state&m)==m && (par->srgba&0xFF) == 0xFF && (op ==S || op == SoverD)){
2103                 int d, dwid, ppb, np, nb;
2104                 uchar *dp, lm, rm;
2105
2106                 dwid = dst->width*sizeof(ulong);
2107                 dp = byteaddr(dst, par->r.min);
2108                 v = par->sdval;
2109                 switch(dst->depth){
2110                 case 1:
2111                 case 2:
2112                 case 4:
2113                         for(d=dst->depth; d<8; d*=2)
2114                                 v |= (v<<d);
2115                         ppb = 8/dst->depth;     /* pixels per byte */
2116                         m = ppb-1;
2117                         /* left edge */
2118                         np = par->r.min.x&m;            /* no. pixels unused on left side of word */
2119                         dx -= (ppb-np);
2120                         nb = 8 - np * dst->depth;               /* no. bits used on right side of word */
2121                         lm = (1<<nb)-1;
2122
2123                         /* right edge */
2124                         np = par->r.max.x&m;    /* no. pixels used on left side of word */
2125                         dx -= np;
2126                         nb = 8 - np * dst->depth;               /* no. bits unused on right side of word */
2127                         rm = ~((1<<nb)-1);
2128
2129                         /* lm, rm are masks that are 1 where we should touch the bits */
2130                         if(dx < 0){     /* just one byte */
2131                                 lm &= rm;
2132                                 for(y=0; y<dy; y++, dp+=dwid)
2133                                         *dp ^= (v ^ *dp) & lm;
2134                         }else if(dx == 0){      /* no full bytes */
2135                                 if(lm)
2136                                         dwid--;
2137
2138                                 for(y=0; y<dy; y++, dp+=dwid){
2139                                         if(lm){
2140                                                 *dp ^= (v ^ *dp) & lm;
2141                                                 dp++;
2142                                         }
2143                                         *dp ^= (v ^ *dp) & rm;
2144                                 }
2145                         }else{          /* full bytes in middle */
2146                                 dx /= ppb;
2147                                 if(lm)
2148                                         dwid--;
2149                                 dwid -= dx;
2150
2151                                 for(y=0; y<dy; y++, dp+=dwid){
2152                                         if(lm){
2153                                                 *dp ^= (v ^ *dp) & lm;
2154                                                 dp++;
2155                                         }
2156                                         memset(dp, v, dx);
2157                                         dp += dx;
2158                                         *dp ^= (v ^ *dp) & rm;
2159                                 }
2160                         }
2161                         return 1;
2162                 case 8:
2163                         for(y=0; y<dy; y++, dp+=dwid)
2164                                 memset(dp, v, dx);
2165                         return 1;
2166                 case 16:
2167                         for(y=0; y<dy; y++, dp+=dwid)
2168                                 memsets(dp, v, dx);
2169                         return 1;
2170                 case 24:
2171                         for(y=0; y<dy; y++, dp+=dwid)
2172                                 memset24(dp, v, dx);
2173                         return 1;
2174                 case 32:
2175                         for(y=0; y<dy; y++, dp+=dwid)
2176                                 memsetl(dp, v, dx);
2177                         return 1;
2178                 default:
2179                         assert(0 /* bad dest depth in memoptdraw */);
2180                 }
2181         }
2182
2183         /*
2184          * If no source alpha, an opaque mask, we can just copy the
2185          * source onto the destination.  If the channels are the same and
2186          * the source is not replicated, memmove suffices.
2187          */
2188         m = Simplemask|Fullmask;
2189         if((par->state&(m|Replsrc))==m && src->depth >= 8
2190         && src->chan == dst->chan && !(src->flags&Falpha) && (op == S || op == SoverD)){
2191                 uchar *sp, *dp;
2192                 long swid, dwid, nb;
2193                 int dir;
2194
2195                 if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min))
2196                         dir = -1;
2197                 else
2198                         dir = 1;
2199
2200                 swid = src->width*sizeof(ulong);
2201                 dwid = dst->width*sizeof(ulong);
2202                 sp = byteaddr(src, par->sr.min);
2203                 dp = byteaddr(dst, par->r.min);
2204                 if(dir == -1){
2205                         sp += (dy-1)*swid;
2206                         dp += (dy-1)*dwid;
2207                         swid = -swid;
2208                         dwid = -dwid;
2209                 }
2210                 nb = (dx*src->depth)/8;
2211                 for(y=0; y<dy; y++, sp+=swid, dp+=dwid)
2212                         memmove(dp, sp, nb);
2213                 return 1;
2214         }
2215
2216         /*
2217          * If we have a 1-bit mask, 1-bit source, and 1-bit destination, and
2218          * they're all bit aligned, we can just use bit operators.  This happens
2219          * when we're manipulating boolean masks, e.g. in the arc code.
2220          */
2221         if((par->state&(Simplemask|Simplesrc|Replmask|Replsrc))==0
2222         && dst->chan==GREY1 && src->chan==GREY1 && par->mask->chan==GREY1
2223         && (par->r.min.x&7)==(par->sr.min.x&7) && (par->r.min.x&7)==(par->mr.min.x&7)){
2224                 uchar *sp, *dp, *mp;
2225                 uchar lm, rm;
2226                 long swid, dwid, mwid;
2227                 int i, x, dir;
2228
2229                 sp = byteaddr(src, par->sr.min);
2230                 dp = byteaddr(dst, par->r.min);
2231                 mp = byteaddr(par->mask, par->mr.min);
2232                 swid = src->width*sizeof(ulong);
2233                 dwid = dst->width*sizeof(ulong);
2234                 mwid = par->mask->width*sizeof(ulong);
2235
2236                 if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min)){
2237                         dir = -1;
2238                 }else
2239                         dir = 1;
2240
2241                 lm = 0xFF>>(par->r.min.x&7);
2242                 rm = 0xFF<<(8-(par->r.max.x&7));
2243                 dx -= (8-(par->r.min.x&7)) + (par->r.max.x&7);
2244
2245                 if(dx < 0){     /* one byte wide */
2246                         lm &= rm;
2247                         if(dir == -1){
2248                                 dp += dwid*(dy-1);
2249                                 sp += swid*(dy-1);
2250                                 mp += mwid*(dy-1);
2251                                 dwid = -dwid;
2252                                 swid = -swid;
2253                                 mwid = -mwid;
2254                         }
2255                         for(y=0; y<dy; y++){
2256                                 *dp ^= (*dp ^ *sp) & *mp & lm;
2257                                 dp += dwid;
2258                                 sp += swid;
2259                                 mp += mwid;
2260                         }
2261                         return 1;
2262                 }
2263
2264                 dx /= 8;
2265                 if(dir == 1){
2266                         i = (lm!=0)+dx+(rm!=0);
2267                         mwid -= i;
2268                         swid -= i;
2269                         dwid -= i;
2270                         for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2271                                 if(lm){
2272                                         *dp ^= (*dp ^ *sp++) & *mp++ & lm;
2273                                         dp++;
2274                                 }
2275                                 for(x=0; x<dx; x++){
2276                                         *dp ^= (*dp ^ *sp++) & *mp++;
2277                                         dp++;
2278                                 }
2279                                 if(rm){
2280                                         *dp ^= (*dp ^ *sp++) & *mp++ & rm;
2281                                         dp++;
2282                                 }
2283                         }
2284                         return 1;
2285                 }else{
2286                 /* dir == -1 */
2287                         i = (lm!=0)+dx+(rm!=0);
2288                         dp += dwid*(dy-1)+i-1;
2289                         sp += swid*(dy-1)+i-1;
2290                         mp += mwid*(dy-1)+i-1;
2291                         dwid = -dwid+i;
2292                         swid = -swid+i;
2293                         mwid = -mwid+i;
2294                         for(y=0; y<dy; y++, dp+=dwid, sp+=swid, mp+=mwid){
2295                                 if(rm){
2296                                         *dp ^= (*dp ^ *sp--) & *mp-- & rm;
2297                                         dp--;
2298                                 }
2299                                 for(x=0; x<dx; x++){
2300                                         *dp ^= (*dp ^ *sp--) & *mp--;
2301                                         dp--;
2302                                 }
2303                                 if(lm){
2304                                         *dp ^= (*dp ^ *sp--) & *mp-- & lm;
2305                                         dp--;
2306                                 }
2307                         }
2308                 }
2309                 return 1;
2310         }
2311         return 0;
2312 }
2313
2314 /*
2315  * Boolean character drawing.
2316  * Solid opaque color through a 1-bit greyscale mask.
2317  */
2318 static int
2319 chardraw(Memdrawparam *par)
2320 {
2321         int i, ddepth, dy, dx, x, bx, ex, y, npack, bsh, depth, op;
2322         ulong bits, v, maskwid, dstwid;
2323         uchar *wp, *rp, *q, *wc;
2324         ushort *ws;
2325         ulong *wl;
2326         uchar sp[4];
2327         Rectangle r, mr;
2328         Memimage *mask, *src, *dst;
2329
2330         mask = par->mask;
2331         src = par->src;
2332         dst = par->dst;
2333         r = par->r;
2334         mr = par->mr;
2335         op = par->op;
2336
2337         if((par->state&(Replsrc|Simplesrc|Replmask)) != (Replsrc|Simplesrc)
2338         || mask->depth != 1 || src->flags&Falpha || dst->depth<8 || dst->data==src->data
2339         || op != SoverD)
2340                 return 0;
2341
2342         depth = mask->depth;
2343         maskwid = mask->width*sizeof(ulong);
2344         rp = byteaddr(mask, mr.min);
2345         npack = 8/depth;
2346         bsh = (mr.min.x % npack) * depth;
2347
2348         wp = byteaddr(dst, r.min);
2349         dstwid = dst->width*sizeof(ulong);
2350         dy = Dy(r);
2351         dx = Dx(r);
2352
2353         ddepth = dst->depth;
2354
2355         /*
2356          * for loop counts from bsh to bsh+dx
2357          *
2358          * we want the bottom bits to be the amount
2359          * to shift the pixels down, so for n≡0 (mod 8) we want
2360          * bottom bits 7.  for n≡1, 6, etc.
2361          * the bits come from -n-1.
2362          */
2363
2364         bx = -bsh-1;
2365         ex = -bsh-1-dx;
2366         SET(bits);
2367         v = par->sdval;
2368
2369         /* make little endian */
2370         sp[0] = v;
2371         sp[1] = v>>8;
2372         sp[2] = v>>16;
2373         sp[3] = v>>24;
2374
2375         for(y=0; y<dy; y++, rp+=maskwid, wp+=dstwid){
2376                 q = rp;
2377                 if(bsh)
2378                         bits = *q++;
2379                 switch(ddepth){
2380                 case 8:
2381                         wc = wp;
2382                         for(x=bx; x>ex; x--, wc++){
2383                                 i = x&7;
2384                                 if(i == 8-1)
2385                                         bits = *q++;
2386                                 if((bits>>i)&1)
2387                                         *wc = v;
2388                         }
2389                         break;
2390                 case 16:
2391                         ws = (ushort*)wp;
2392                         v = *(ushort*)sp;
2393                         for(x=bx; x>ex; x--, ws++){
2394                                 i = x&7;
2395                                 if(i == 8-1)
2396                                         bits = *q++;
2397                                 if((bits>>i)&1)
2398                                         *ws = v;
2399                         }
2400                         break;
2401                 case 24:
2402                         wc = wp;
2403                         for(x=bx; x>ex; x--, wc+=3){
2404                                 i = x&7;
2405                                 if(i == 8-1)
2406                                         bits = *q++;
2407                                 if((bits>>i)&1){
2408                                         wc[0] = sp[0];
2409                                         wc[1] = sp[1];
2410                                         wc[2] = sp[2];
2411                                 }
2412                         }
2413                         break;
2414                 case 32:
2415                         wl = (ulong*)wp;
2416                         v = *(ulong*)sp;
2417                         for(x=bx; x>ex; x--, wl++){
2418                                 i = x&7;
2419                                 if(i == 8-1)
2420                                         bits = *q++;
2421                                 if((bits>>i)&1)
2422                                         *wl = v;
2423                         }
2424                         break;
2425                 }
2426         }
2427         return 1;
2428 }
2429
2430
2431 void
2432 memfillcolor(Memimage *i, ulong val)
2433 {
2434         ulong bits;
2435         int d, y;
2436
2437         if(val == DNofill)
2438                 return;
2439
2440         bits = rgbatoimg(i, val);
2441         switch(i->depth){
2442         case 24:        /* 24-bit images suck */
2443                 for(y=i->r.min.y; y<i->r.max.y; y++)
2444                         memset24(byteaddr(i, Pt(i->r.min.x, y)), bits, Dx(i->r));
2445                 break;
2446         default:        /* 1, 2, 4, 8, 16, 32 */
2447                 for(d=i->depth; d<32; d*=2)
2448                         bits = (bits << d) | bits;
2449                 memsetl(wordaddr(i, i->r.min), bits, i->width*Dy(i->r));
2450                 break;
2451         }
2452 }
2453