]> git.lizzy.rs Git - plan9front.git/blobdiff - sys/src/9/port/segment.c
devmnt: deal with partial response for Tversion request in mntversion()
[plan9front.git] / sys / src / 9 / port / segment.c
index 26a62998e66b705f36f7903b4f91da7154c7472f..e9e8959df8fa994735f684fea746a4fe37befaa3 100644 (file)
@@ -5,36 +5,26 @@
 #include       "fns.h"
 #include       "../port/error.h"
 
-static void    imagereclaim(void);
-static void    imagechanreclaim(void);
-
-#include "io.h"
-
 /*
  * Attachable segment types
  */
 static Physseg physseg[10] = {
-       { SG_SHARED,    "shared",       0,      SEGMAXSIZE,     0,      0 },
-       { SG_BSS,       "memory",       0,      SEGMAXSIZE,     0,      0 },
-       { 0,            0,              0,      0,              0,      0 },
+       { SG_SHARED,    "shared",       0,      SEGMAXSIZE      },
+       { SG_BSS,       "memory",       0,      SEGMAXSIZE      },
+       { 0,            0,              0,      0               },
 };
 
 static Lock physseglock;
 
-#define NFREECHAN      64
 #define IHASHSIZE      64
 #define ihash(s)       imagealloc.hash[s%IHASHSIZE]
 static struct Imagealloc
 {
        Lock;
+       Image   *list;
        Image   *free;
        Image   *hash[IHASHSIZE];
        QLock   ireclaim;       /* mutex on reclaiming free images */
-
-       Chan    **freechan;     /* free image channels */
-       int     nfreechan;      /* number of free channels */
-       int     szfreechan;     /* size of freechan array */
-       QLock   fcreclaim;      /* mutex on reclaiming free channels */
 }imagealloc;
 
 Segment* (*_globalsegattach)(Proc*, char*);
@@ -44,19 +34,18 @@ initseg(void)
 {
        Image *i, *ie;
 
-       imagealloc.free = xalloc(conf.nimage*sizeof(Image));
-       if (imagealloc.free == nil)
-               panic("initseg: no memory");
-       ie = &imagealloc.free[conf.nimage-1];
-       for(i = imagealloc.free; i < ie; i++)
+       imagealloc.list = xalloc(conf.nimage*sizeof(Image));
+       if(imagealloc.list == nil)
+               panic("initseg: no memory for Image");
+       ie = &imagealloc.list[conf.nimage-1];
+       for(i = imagealloc.list; i < ie; i++)
                i->next = i+1;
-       i->next = 0;
-       imagealloc.freechan = malloc(NFREECHAN * sizeof(Chan*));
-       imagealloc.szfreechan = NFREECHAN;
+       i->next = nil;
+       imagealloc.free = imagealloc.list;
 }
 
 Segment *
-newseg(int type, ulong base, ulong size)
+newseg(int type, uintptr base, ulong size)
 {
        Segment *s;
        int mapsize;
@@ -64,7 +53,9 @@ newseg(int type, ulong base, ulong size)
        if(size > (SEGMAPSIZE*PTEPERTAB))
                error(Enovmem);
 
-       s = smalloc(sizeof(Segment));
+       s = malloc(sizeof(Segment));
+       if(s == nil)
+               error(Enomem);
        s->ref = 1;
        s->type = type;
        s->base = base;
@@ -75,10 +66,11 @@ newseg(int type, ulong base, ulong size)
 
        mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
        if(mapsize > nelem(s->ssegmap)){
-               mapsize *= 2;
-               if(mapsize > (SEGMAPSIZE*PTEPERTAB))
-                       mapsize = (SEGMAPSIZE*PTEPERTAB);
-               s->map = smalloc(mapsize*sizeof(Pte*));
+               s->map = malloc(mapsize*sizeof(Pte*));
+               if(s->map == nil){
+                       free(s);
+                       error(Enomem);
+               }
                s->mapsize = mapsize;
        }
        else{
@@ -92,61 +84,53 @@ newseg(int type, ulong base, ulong size)
 void
 putseg(Segment *s)
 {
-       Pte **pp, **emap;
+       Pte **pte, **emap;
        Image *i;
 
-       if(s == 0)
+       if(s == nil)
                return;
 
        i = s->image;
-       if(i != 0) {
+       if(i != nil) {
                lock(i);
-               lock(s);
-               if(i->s == s && s->ref == 1)
-                       i->s = 0;
+               if(decref(s) != 0){
+                       unlock(i);
+                       return;
+               }
+               if(i->s == s)
+                       i->s = nil;
                unlock(i);
-       }
-       else
-               lock(s);
-
-       s->ref--;
-       if(s->ref != 0) {
-               unlock(s);
-               return;
-       }
-       unlock(s);
-
-       qlock(&s->lk);
-       if(i)
                putimage(i);
+       } else if(decref(s) != 0)
+               return;
 
        emap = &s->map[s->mapsize];
-       for(pp = s->map; pp < emap; pp++)
-               if(*pp)
-                       freepte(s, *pp);
+       for(pte = s->map; pte < emap; pte++)
+               if(*pte != nil)
+                       freepte(s, *pte);
 
-       qunlock(&s->lk);
        if(s->map != s->ssegmap)
                free(s->map);
-       if(s->profile != 0)
+       if(s->profile != nil)
                free(s->profile);
+
        free(s);
 }
 
 void
-relocateseg(Segment *s, ulong offset)
+relocateseg(Segment *s, uintptr offset)
 {
-       Page **pg, *x;
-       Pte *pte, **p, **endpte;
+       Pte **pte, **emap;
+       Page **pg, **pe;
 
-       endpte = &s->map[s->mapsize];
-       for(p = s->map; p < endpte; p++) {
-               if(*p == 0)
+       emap = &s->map[s->mapsize];
+       for(pte = s->map; pte < emap; pte++) {
+               if(*pte == nil)
                        continue;
-               pte = *p;
-               for(pg = pte->first; pg <= pte->last; pg++) {
-                       if(x = *pg)
-                               x->va += offset;
+               pe = (*pte)->last;
+               for(pg = (*pte)->first; pg <= pe; pg++) {
+                       if(!pagedout(*pg))
+                               (*pg)->va += offset;
                }
        }
 }
@@ -161,15 +145,16 @@ dupseg(Segment **seg, int segno, int share)
        SET(n);
        s = seg[segno];
 
-       qlock(&s->lk);
+       qlock(s);
        if(waserror()){
-               qunlock(&s->lk);
+               qunlock(s);
                nexterror();
        }
        switch(s->type&SG_TYPE) {
        case SG_TEXT:           /* New segment shares pte set */
        case SG_SHARED:
        case SG_PHYSICAL:
+       case SG_FIXED:
                goto sameseg;
 
        case SG_STACK:
@@ -184,9 +169,10 @@ dupseg(Segment **seg, int segno, int share)
 
        case SG_DATA:           /* Copy on write plus demand load info */
                if(segno == TSEG){
+                       n = data2txt(s);
                        poperror();
-                       qunlock(&s->lk);
-                       return data2txt(s);
+                       qunlock(s);
+                       return n;
                }
 
                if(share)
@@ -201,54 +187,53 @@ dupseg(Segment **seg, int segno, int share)
        }
        size = s->mapsize;
        for(i = 0; i < size; i++)
-               if(pte = s->map[i])
+               if((pte = s->map[i]) != nil)
                        n->map[i] = ptecpy(pte);
 
        n->flushme = s->flushme;
        if(s->ref > 1)
                procflushseg(s);
        poperror();
-       qunlock(&s->lk);
+       qunlock(s);
        return n;
 
 sameseg:
        incref(s);
        poperror();
-       qunlock(&s->lk);
+       qunlock(s);
        return s;
 }
 
 void
 segpage(Segment *s, Page *p)
 {
-       Pte **pte;
-       ulong off;
+       Pte **pte, *etp;
+       uintptr soff;
        Page **pg;
 
        if(p->va < s->base || p->va >= s->top)
                panic("segpage");
 
-       off = p->va - s->base;
-       pte = &s->map[off/PTEMAPMEM];
-       if(*pte == 0)
-               *pte = ptealloc();
+       soff = p->va - s->base;
+       pte = &s->map[soff/PTEMAPMEM];
+       if((etp = *pte) == nil)
+               *pte = etp = ptealloc();
 
-       pg = &(*pte)->pages[(off&(PTEMAPMEM-1))/BY2PG];
+       pg = &etp->pages[(soff&(PTEMAPMEM-1))/BY2PG];
        *pg = p;
-       if(pg < (*pte)->first)
-               (*pte)->first = pg;
-       if(pg > (*pte)->last)
-               (*pte)->last = pg;
+       if(pg < etp->first)
+               etp->first = pg;
+       if(pg > etp->last)
+               etp->last = pg;
 }
 
 Image*
-attachimage(int type, Chan *c, ulong base, ulong len)
+attachimage(int type, Chan *c, uintptr base, ulong len)
 {
        Image *i, **l;
 
-       /* reclaim any free channels from reclaimed segments */
-       if(imagealloc.nfreechan)
-               imagechanreclaim();
+       c->flag &= ~CCACHE;
+       cclunk(c);
 
        lock(&imagealloc);
 
@@ -259,51 +244,49 @@ attachimage(int type, Chan *c, ulong base, ulong len)
        for(i = ihash(c->qid.path); i; i = i->hash) {
                if(c->qid.path == i->qid.path) {
                        lock(i);
-                       if(eqqid(c->qid, i->qid) &&
-                          eqqid(c->mqid, i->mqid) &&
-                          c->mchan == i->mchan &&
-                          c->type == i->type) {
+                       if(eqchantdqid(c, i->type, i->dev, i->qid, 0) && c->qid.type == i->qid.type)
                                goto found;
-                       }
                        unlock(i);
                }
        }
 
-       /*
-        * imagereclaim dumps pages from the free list which are cached by image
-        * structures. This should free some image structures.
-        */
-       while(!(i = imagealloc.free)) {
+       /* dump pages of inactive images to free image structures */
+       while((i = imagealloc.free) == nil) {
                unlock(&imagealloc);
-               imagereclaim();
-               sched();
+               if(imagereclaim(1000) == 0 && imagealloc.free == nil){
+                       freebroken();           /* can use the memory */
+                       resrcwait("no image after reclaim");
+               }
                lock(&imagealloc);
        }
 
        imagealloc.free = i->next;
 
        lock(i);
-       incref(c);
-       i->c = c;
        i->type = c->type;
+       i->dev = c->dev;
        i->qid = c->qid;
-       i->mqid = c->mqid;
-       i->mchan = c->mchan;
+
        l = &ihash(c->qid.path);
        i->hash = *l;
        *l = i;
+
 found:
        unlock(&imagealloc);
+       if(i->c == nil){
+               i->c = c;
+               incref(c);
+       }
 
-       if(i->s == 0) {
-               /* Disaster after commit in exec */
+       if(i->s == nil) {
+               incref(i);
                if(waserror()) {
                        unlock(i);
-                       pexit(Enovmem, 1);
+                       putimage(i);
+                       nexterror();
                }
                i->s = newseg(type, base, len);
                i->s->image = i;
-               i->ref++;
                poperror();
        }
        else
@@ -312,149 +295,109 @@ found:
        return i;
 }
 
-static struct {
-       int     calls;                  /* times imagereclaim was called */
-       int     loops;                  /* times the main loop was run */
-       uvlong  ticks;                  /* total time in the main loop */
-       uvlong  maxt;                   /* longest time in main loop */
-} irstats;
-
-static void
-imagereclaim(void)
+ulong
+imagereclaim(ulong pages)
 {
-       int n;
-       Page *p;
-       uvlong ticks;
+       static Image *i, *ie;
+       ulong np;
+       int j;
 
-       irstats.calls++;
-       /* Somebody is already cleaning the page cache */
-       if(!canqlock(&imagealloc.ireclaim))
-               return;
+       if(pages == 0)
+               return 0;
 
-       lock(&palloc);
-       ticks = fastticks(nil);
-       n = 0;
-       /*
-        * All the pages with images backing them are at the
-        * end of the list (see putpage) so start there and work
-        * backward.
-        */
-       for(p = palloc.tail; p && p->image && n<1000; p = p->prev) {
-               if(p->ref == 0 && canlock(p)) {
-                       if(p->ref == 0) {
-                               n++;
-                               uncachepage(p);
-                       }
-                       unlock(p);
+       eqlock(&imagealloc.ireclaim);
+       if(i == nil){
+               i = imagealloc.list;
+               ie = &imagealloc.list[conf.nimage];
+       }
+       np = 0;
+       for(j = 0; j < conf.nimage; j++, i++){
+               if(i >= ie)
+                       i = imagealloc.list;
+               if(i->ref == 0)
+                       continue;
+               /*
+                * if there are no free image structures, only
+                * reclaim pages from inactive images.
+                */
+               if(imagealloc.free != nil || i->ref == i->pgref){
+                       np += pagereclaim(i, pages - np);
+                       if(np >= pages)
+                               break;
                }
        }
-       ticks = fastticks(nil) - ticks;
-       unlock(&palloc);
-       irstats.loops++;
-       irstats.ticks += ticks;
-       if(ticks > irstats.maxt)
-               irstats.maxt = ticks;
-       //print("T%llud+", ticks);
        qunlock(&imagealloc.ireclaim);
-}
-
-/*
- *  since close can block, this has to be called outside of
- *  spin locks.
- */
-static void
-imagechanreclaim(void)
-{
-       Chan *c;
-
-       /* Somebody is already cleaning the image chans */
-       if(!canqlock(&imagealloc.fcreclaim))
-               return;
 
-       /*
-        * We don't have to recheck that nfreechan > 0 after we
-        * acquire the lock, because we're the only ones who decrement 
-        * it (the other lock contender increments it), and there's only
-        * one of us thanks to the qlock above.
-        */
-       while(imagealloc.nfreechan > 0){
-               lock(&imagealloc);
-               imagealloc.nfreechan--;
-               c = imagealloc.freechan[imagealloc.nfreechan];
-               unlock(&imagealloc);
-               cclose(c);
-       }
-
-       qunlock(&imagealloc.fcreclaim);
+       return np;
 }
 
 void
 putimage(Image *i)
 {
-       Chan *c, **cp;
        Image *f, **l;
+       Chan *c;
+       long r;
 
-       if(i->notext)
+       if(i->notext){
+               decref(i);
                return;
+       }
 
+       c = nil;
        lock(i);
-       if(--i->ref == 0) {
+       r = decref(i);
+       if(r == i->pgref){
+               /*
+                * all remaining references to this image are from the
+                * page cache, so close the chan.
+                */
+               c = i->c;
+               i->c = nil;
+       }
+       if(r == 0){
                l = &ihash(i->qid.path);
                mkqid(&i->qid, ~0, ~0, QTFILE);
                unlock(i);
-               c = i->c;
 
                lock(&imagealloc);
-               for(f = *l; f; f = f->hash) {
+               for(f = *l; f != nil; f = f->hash) {
                        if(f == i) {
                                *l = i->hash;
                                break;
                        }
                        l = &f->hash;
                }
-
                i->next = imagealloc.free;
                imagealloc.free = i;
-
-               /* defer freeing channel till we're out of spin lock's */
-               if(imagealloc.nfreechan == imagealloc.szfreechan){
-                       imagealloc.szfreechan += NFREECHAN;
-                       cp = malloc(imagealloc.szfreechan*sizeof(Chan*));
-                       if(cp == nil)
-                               panic("putimage");
-                       memmove(cp, imagealloc.freechan, imagealloc.nfreechan*sizeof(Chan*));
-                       free(imagealloc.freechan);
-                       imagealloc.freechan = cp;
-               }
-               imagealloc.freechan[imagealloc.nfreechan++] = c;
                unlock(&imagealloc);
-
-               return;
-       }
-       unlock(i);
+       } else
+               unlock(i);
+       if(c != nil)
+               ccloseq(c);     /* does not block */
 }
 
-long
-ibrk(ulong addr, int seg)
+uintptr
+ibrk(uintptr addr, int seg)
 {
        Segment *s, *ns;
-       ulong newtop, newsize;
+       uintptr newtop;
+       ulong newsize;
        int i, mapsize;
        Pte **map;
 
        s = up->seg[seg];
-       if(s == 0)
+       if(s == nil)
                error(Ebadarg);
 
        if(addr == 0)
                return s->base;
 
-       qlock(&s->lk);
+       qlock(s);
 
        /* We may start with the bss overlapping the data */
        if(addr < s->base) {
-               if(seg != BSEG || up->seg[DSEG] == 0 || addr < up->seg[DSEG]->base) {
-                       qunlock(&s->lk);
+               if(seg != BSEG || up->seg[DSEG] == nil || addr < up->seg[DSEG]->base) {
+                       qunlock(s);
                        error(Enovmem);
                }
                addr = s->base;
@@ -469,34 +412,38 @@ ibrk(ulong addr, int seg)
                 * already by another proc and is past the validaddr stage.
                 */
                if(s->ref > 1){
-                       qunlock(&s->lk);
+                       qunlock(s);
                        error(Einuse);
                }
                mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
                s->top = newtop;
                s->size = newsize;
-               qunlock(&s->lk);
+               qunlock(s);
                flushmmu();
                return 0;
        }
 
        for(i = 0; i < NSEG; i++) {
                ns = up->seg[i];
-               if(ns == 0 || ns == s)
+               if(ns == nil || ns == s)
                        continue;
                if(newtop >= ns->base && newtop < ns->top) {
-                       qunlock(&s->lk);
+                       qunlock(s);
                        error(Esoverlap);
                }
        }
 
        if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
-               qunlock(&s->lk);
+               qunlock(s);
                error(Enovmem);
        }
        mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
        if(mapsize > s->mapsize){
-               map = smalloc(mapsize*sizeof(Pte*));
+               map = malloc(mapsize*sizeof(Pte*));
+               if(map == nil){
+                       qunlock(s);
+                       error(Enomem);
+               }
                memmove(map, s->map, s->mapsize*sizeof(Pte*));
                if(s->map != s->ssegmap)
                        free(s->map);
@@ -506,106 +453,96 @@ ibrk(ulong addr, int seg)
 
        s->top = newtop;
        s->size = newsize;
-       qunlock(&s->lk);
+       qunlock(s);
        return 0;
 }
 
 /*
- *  called with s->lk locked
+ *  called with s locked
  */
-int
+ulong
 mcountseg(Segment *s)
 {
-       int i, j, pages;
-       Page **map;
+       Pte **pte, **emap;
+       Page **pg, **pe;
+       ulong pages;
+
+       if((s->type&SG_TYPE) == SG_PHYSICAL)
+               return 0;
 
        pages = 0;
-       for(i = 0; i < s->mapsize; i++){
-               if(s->map[i] == 0)
+       emap = &s->map[s->mapsize];
+       for(pte = s->map; pte < emap; pte++){
+               if(*pte == nil)
                        continue;
-               map = s->map[i]->pages;
-               for(j = 0; j < PTEPERTAB; j++)
-                       if(map[j])
+               pe = (*pte)->last;
+               for(pg = (*pte)->first; pg <= pe; pg++)
+                       if(!pagedout(*pg))
                                pages++;
        }
        return pages;
 }
 
 /*
- *  called with s->lk locked
+ *  called with s locked
  */
 void
-mfreeseg(Segment *s, ulong start, int pages)
+mfreeseg(Segment *s, uintptr start, ulong pages)
 {
-       int i, j, size;
-       ulong soff;
-       Page *pg;
-       Page *list;
+       uintptr off;
+       Pte **pte, **emap;
+       Page **pg, **pe;
 
-       soff = start-s->base;
-       j = (soff&(PTEMAPMEM-1))/BY2PG;
+       if(pages == 0)
+               return;
 
-       size = s->mapsize;
-       list = nil;
-       for(i = soff/PTEMAPMEM; i < size; i++) {
-               if(pages <= 0)
-                       break;
-               if(s->map[i] == 0) {
-                       pages -= PTEPERTAB-j;
-                       j = 0;
+       switch(s->type&SG_TYPE){
+       case SG_PHYSICAL:
+       case SG_FIXED:
+               return;
+       }
+
+       /*
+        * we have to make sure other processors flush the
+        * entry from their TLBs before the page is freed.
+        */
+       if(s->ref > 1)
+               procflushseg(s);
+
+       off = start-s->base;
+       pte = &s->map[off/PTEMAPMEM];
+       off = (off&(PTEMAPMEM-1))/BY2PG;
+       for(emap = &s->map[s->mapsize]; pte < emap; pte++, off = 0) {
+               if(*pte == nil) {
+                       off = PTEPERTAB - off;
+                       if(off >= pages)
+                               return;
+                       pages -= off;
                        continue;
                }
-               while(j < PTEPERTAB) {
-                       pg = s->map[i]->pages[j];
-                       /*
-                        * We want to zero s->map[i]->page[j] and putpage(pg),
-                        * but we have to make sure other processors flush the
-                        * entry from their TLBs before the page is freed.
-                        * We construct a list of the pages to be freed, zero
-                        * the entries, then (below) call procflushseg, and call
-                        * putpage on the whole list.
-                        *
-                        * Swapped-out pages don't appear in TLBs, so it's okay
-                        * to putswap those pages before procflushseg.
-                        */
-                       if(pg){
-                               if(onswap(pg))
-                                       putswap(pg);
-                               else{
-                                       pg->next = list;
-                                       list = pg;
-                               }
-                               s->map[i]->pages[j] = 0;
+               pg = &(*pte)->pages[off];
+               for(pe = &(*pte)->pages[PTEPERTAB]; pg < pe; pg++) {
+                       if(*pg != nil){
+                               putpage(*pg);
+                               *pg = nil;
                        }
                        if(--pages == 0)
-                               goto out;
-                       j++;
+                               return;
                }
-               j = 0;
-       }
-out:
-       /* flush this seg in all other processes */
-       if(s->ref > 1)
-               procflushseg(s);
-
-       /* free the pages */
-       for(pg = list; pg != nil; pg = list){
-               list = list->next;
-               putpage(pg);
        }
 }
 
 Segment*
-isoverlap(Proc *p, ulong va, int len)
+isoverlap(Proc *p, uintptr va, uintptr len)
 {
        int i;
        Segment *ns;
-       ulong newtop;
+       uintptr newtop;
 
        newtop = va+len;
        for(i = 0; i < NSEG; i++) {
                ns = p->seg[i];
-               if(ns == 0)
+               if(ns == nil)
                        continue;
                if((newtop > ns->base && newtop <= ns->top) ||
                   (va >= ns->base && va < ns->top))
@@ -634,7 +571,6 @@ addphysseg(Physseg* new)
                unlock(&physseglock);
                return -1;
        }
-
        *ps = *new;
        unlock(&physseglock);
 
@@ -658,8 +594,8 @@ isphysseg(char *name)
        return rv;
 }
 
-ulong
-segattach(Proc *p, ulong attr, char *name, ulong va, ulong len)
+uintptr
+segattach(Proc *p, ulong attr, char *name, uintptr va, uintptr len)
 {
        int sno;
        Segment *s, *os;
@@ -668,9 +604,6 @@ segattach(Proc *p, ulong attr, char *name, ulong va, ulong len)
        if(va != 0 && va >= USTKTOP)
                error(Ebadarg);
 
-       validaddr((ulong)name, 1, 0);
-       vmemchr(name, 0, ~0);
-
        for(sno = 0; sno < NSEG; sno++)
                if(p->seg[sno] == nil && sno != ESEG)
                        break;
@@ -690,7 +623,10 @@ segattach(Proc *p, ulong attr, char *name, ulong va, ulong len)
                }
        }
 
+       /* round up va+len */
+       len += va & (BY2PG-1);
        len = PGROUND(len);
+
        if(len == 0)
                error(Ebadarg);
 
@@ -709,13 +645,12 @@ segattach(Proc *p, ulong attr, char *name, ulong va, ulong len)
                                error(Enovmem);
                        va -= len;
                }
-               va &= ~(BY2PG-1);
-       } else {
-               va &= ~(BY2PG-1);
-               if(va == 0 || va >= USTKTOP)
-                       error(Ebadarg);
        }
 
+       va &= ~(BY2PG-1);
+       if(va == 0 || (va+len) > USTKTOP || (va+len) < va)
+               error(Ebadarg);
+
        if(isoverlap(p, va, len) != nil)
                error(Esoverlap);
 
@@ -738,77 +673,73 @@ found:
        return va;
 }
 
-void
-pteflush(Pte *pte, int s, int e)
-{
-       int i;
-       Page *p;
-
-       for(i = s; i < e; i++) {
-               p = pte->pages[i];
-               if(pagedout(p) == 0)
-                       memset(p->cachectl, PG_TXTFLUSH, sizeof(p->cachectl));
-       }
-}
-
-long
-syssegflush(ulong *arg)
+static void
+segflush(void *va, uintptr len)
 {
+       uintptr from, to, off;
        Segment *s;
-       ulong addr, l;
        Pte *pte;
-       int chunk, ps, pe, len;
+       Page **pg, **pe;
 
-       addr = arg[0];
-       len = arg[1];
+       from = (uintptr)va;
+       to = from + len;
+       to = PGROUND(to);
+       from &= ~(BY2PG-1);
+       if(to < from)
+               error(Ebadarg);
 
-       while(len > 0) {
-               s = seg(up, addr, 1);
-               if(s == 0)
+       while(from < to) {
+               s = seg(up, from, 1);
+               if(s == nil)
                        error(Ebadarg);
 
                s->flushme = 1;
        more:
-               l = len;
-               if(addr+l > s->top)
-                       l = s->top - addr;
-
-               ps = addr-s->base;
-               pte = s->map[ps/PTEMAPMEM];
-               ps &= PTEMAPMEM-1;
-               pe = PTEMAPMEM;
-               if(pe-ps > l){
-                       pe = ps + l;
-                       pe = (pe+BY2PG-1)&~(BY2PG-1);
-               }
-               if(pe == ps) {
-                       qunlock(&s->lk);
-                       error(Ebadarg);
+               len = (s->top < to ? s->top : to) - from;
+               off = from-s->base;
+               pte = s->map[off/PTEMAPMEM];
+               off &= PTEMAPMEM-1;
+               if(off+len > PTEMAPMEM)
+                       len = PTEMAPMEM-off;
+
+               if(pte != nil) {
+                       pg = &pte->pages[off/BY2PG];
+                       pe = pg + len/BY2PG;
+                       while(pg < pe) {
+                               if(!pagedout(*pg))
+                                       (*pg)->txtflush = ~0;
+                               pg++;
+                       }
                }
 
-               if(pte)
-                       pteflush(pte, ps/BY2PG, pe/BY2PG);
-
-               chunk = pe-ps;
-               len -= chunk;
-               addr += chunk;
-
-               if(len > 0 && addr < s->top)
+               from += len;
+               if(from < to && from < s->top)
                        goto more;
 
-               qunlock(&s->lk);
+               qunlock(s);
        }
+}
+
+uintptr
+syssegflush(va_list list)
+{
+       void *va;
+       ulong len;
+
+       va = va_arg(list, void*);
+       len = va_arg(list, ulong);
+       segflush(va, len);
        flushmmu();
        return 0;
 }
 
 void
-segclock(ulong pc)
+segclock(uintptr pc)
 {
        Segment *s;
 
        s = up->seg[TSEG];
-       if(s == 0 || s->profile == 0)
+       if(s == nil || s->profile == nil)
                return;
 
        s->profile[0] += TK2MS(1);
@@ -818,3 +749,190 @@ segclock(ulong pc)
        }
 }
 
+Segment*
+txt2data(Segment *s)
+{
+       Segment *ps;
+
+       ps = newseg(SG_DATA, s->base, s->size);
+       ps->image = s->image;
+       incref(ps->image);
+       ps->fstart = s->fstart;
+       ps->flen = s->flen;
+       ps->flushme = 1;
+       qunlock(s);
+       putseg(s);
+       qlock(ps);
+       return ps;
+}
+
+Segment*
+data2txt(Segment *s)
+{
+       Segment *ps;
+
+       ps = newseg(SG_TEXT, s->base, s->size);
+       ps->image = s->image;
+       incref(ps->image);
+       ps->fstart = s->fstart;
+       ps->flen = s->flen;
+       ps->flushme = 1;
+       return ps;
+}
+
+
+enum {
+       /* commands to segmentioproc */
+       Cnone=0,
+       Cread,
+       Cwrite,
+       Cdie,
+};
+
+static int
+cmddone(void *arg)
+{
+       Segio *sio = arg;
+
+       return sio->cmd == Cnone;
+}
+
+static void
+docmd(Segio *sio, int cmd)
+{
+       sio->err = nil;
+       sio->cmd = cmd;
+       while(waserror())
+               ;
+       wakeup(&sio->cmdwait);
+       sleep(&sio->replywait, cmddone, sio);
+       poperror();
+       if(sio->err != nil)
+               error(sio->err);
+}
+
+static int
+cmdready(void *arg)
+{
+       Segio *sio = arg;
+
+       return sio->cmd != Cnone;
+}
+
+static void
+segmentioproc(void *arg)
+{
+       Segio *sio = arg;
+       int done;
+       int sno;
+
+       for(sno = 0; sno < NSEG; sno++)
+               if(up->seg[sno] == nil && sno != ESEG)
+                       break;
+       if(sno == NSEG)
+               panic("segmentkproc");
+
+       sio->p = up;
+       incref(sio->s);
+       up->seg[sno] = sio->s;
+
+       while(waserror())
+               ;
+       for(done = 0; !done;){
+               sleep(&sio->cmdwait, cmdready, sio);
+               if(waserror())
+                       sio->err = up->errstr;
+               else {
+                       if(sio->s != nil && up->seg[sno] != sio->s){
+                               putseg(up->seg[sno]);
+                               incref(sio->s);
+                               up->seg[sno] = sio->s;
+                               flushmmu();
+                       }
+                       switch(sio->cmd){
+                       case Cread:
+                               memmove(sio->data, sio->addr, sio->dlen);
+                               break;
+                       case Cwrite:
+                               memmove(sio->addr, sio->data, sio->dlen);
+                               if(sio->s->flushme)
+                                       segflush(sio->addr, sio->dlen);
+                               break;
+                       case Cdie:
+                               done = 1;
+                               break;
+                       }
+                       poperror();
+               }
+               sio->cmd = Cnone;
+               wakeup(&sio->replywait);
+       }
+
+       pexit("done", 1);
+}
+
+long
+segio(Segio *sio, Segment *s, void *a, long n, vlong off, int read)
+{
+       uintptr m;
+       void *b;
+
+       b = a;
+       if(s != nil){
+               m = s->top - s->base;
+               if(off < 0 || off >= m){
+                       if(!read)
+                               error(Ebadarg);
+                       return 0;
+               }
+               if(off+n > m){
+                       if(!read)
+                               error(Ebadarg); 
+                       n = m - off;
+               }
+
+               if((uintptr)a < KZERO) {
+                       b = smalloc(n);
+                       if(waserror()){
+                               free(b);
+                               nexterror();
+                       }
+                       if(!read)
+                               memmove(b, a, n);
+               }
+       }
+
+       eqlock(sio);
+       if(waserror()){
+               qunlock(sio);
+               nexterror();
+       }
+       sio->s = s;
+       if(s == nil){
+               if(sio->p != nil){
+                       docmd(sio, Cdie);
+                       sio->p = nil;
+               }
+               qunlock(sio);
+               poperror();
+               return 0;
+       }
+       if(sio->p == nil){
+               sio->cmd = Cnone;
+               kproc("segmentio", segmentioproc, sio);
+       }
+       sio->addr = (char*)s->base + off;
+       sio->data = b;
+       sio->dlen = n;
+       docmd(sio, read ? Cread : Cwrite);
+       qunlock(sio);
+       poperror();
+
+       if(a != b){
+               if(read)
+                       memmove(a, b, n);
+               free(b);
+               poperror();
+       }
+       return n;
+}