]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/port/segment.c
c2ad137f192fc42822ffb4c3071eb775204182ad
[plan9front.git] / sys / src / 9 / port / segment.c
1 #include        "u.h"
2 #include        "../port/lib.h"
3 #include        "mem.h"
4 #include        "dat.h"
5 #include        "fns.h"
6 #include        "../port/error.h"
7
8 int imagereclaim(int);
9
10 /*
11  * Attachable segment types
12  */
13 static Physseg physseg[10] = {
14         { SG_SHARED,    "shared",       0,      SEGMAXSIZE      },
15         { SG_BSS,       "memory",       0,      SEGMAXSIZE      },
16         { 0,            0,              0,      0               },
17 };
18
19 static Lock physseglock;
20
21 #define IHASHSIZE       64
22 #define ihash(s)        imagealloc.hash[s%IHASHSIZE]
23 static struct Imagealloc
24 {
25         Lock;
26         Image   *list;
27         Image   *free;
28         Image   *hash[IHASHSIZE];
29         QLock   ireclaim;       /* mutex on reclaiming free images */
30 }imagealloc;
31
32 Segment* (*_globalsegattach)(Proc*, char*);
33
34 void
35 initseg(void)
36 {
37         Image *i, *ie;
38
39         imagealloc.list = xalloc(conf.nimage*sizeof(Image));
40         if(imagealloc.list == nil)
41                 panic("initseg: no memory for Image");
42         ie = &imagealloc.list[conf.nimage-1];
43         for(i = imagealloc.list; i < ie; i++)
44                 i->next = i+1;
45         i->next = nil;
46         imagealloc.free = imagealloc.list;
47 }
48
49 Segment *
50 newseg(int type, uintptr base, ulong size)
51 {
52         Segment *s;
53         int mapsize;
54
55         if(size > (SEGMAPSIZE*PTEPERTAB))
56                 error(Enovmem);
57
58         s = malloc(sizeof(Segment));
59         if(s == nil)
60                 error(Enomem);
61         s->ref = 1;
62         s->type = type;
63         s->base = base;
64         s->top = base+(size*BY2PG);
65         s->size = size;
66         s->sema.prev = &s->sema;
67         s->sema.next = &s->sema;
68
69         mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
70         if(mapsize > nelem(s->ssegmap)){
71                 s->map = malloc(mapsize*sizeof(Pte*));
72                 if(s->map == nil){
73                         free(s);
74                         error(Enomem);
75                 }
76                 s->mapsize = mapsize;
77         }
78         else{
79                 s->map = s->ssegmap;
80                 s->mapsize = nelem(s->ssegmap);
81         }
82
83         return s;
84 }
85
86 void
87 putseg(Segment *s)
88 {
89         Pte **pte, **emap;
90         Image *i;
91
92         if(s == nil)
93                 return;
94
95         i = s->image;
96         if(i != nil) {
97                 lock(i);
98                 if(decref(s) != 0){
99                         unlock(i);
100                         return;
101                 }
102                 if(i->s == s)
103                         i->s = nil;
104                 unlock(i);
105                 putimage(i);
106         } else if(decref(s) != 0)
107                 return;
108
109         emap = &s->map[s->mapsize];
110         for(pte = s->map; pte < emap; pte++)
111                 if(*pte != nil)
112                         freepte(s, *pte);
113
114         if(s->map != s->ssegmap)
115                 free(s->map);
116         if(s->profile != nil)
117                 free(s->profile);
118
119         free(s);
120 }
121
122 void
123 relocateseg(Segment *s, uintptr offset)
124 {
125         Pte **pte, **emap;
126         Page **pg, **pe;
127
128         emap = &s->map[s->mapsize];
129         for(pte = s->map; pte < emap; pte++) {
130                 if(*pte == nil)
131                         continue;
132                 pe = (*pte)->last;
133                 for(pg = (*pte)->first; pg <= pe; pg++) {
134                         if(!pagedout(*pg))
135                                 (*pg)->va += offset;
136                 }
137         }
138 }
139
140 Segment*
141 dupseg(Segment **seg, int segno, int share)
142 {
143         int i, size;
144         Pte *pte;
145         Segment *n, *s;
146
147         SET(n);
148         s = seg[segno];
149
150         qlock(s);
151         if(waserror()){
152                 qunlock(s);
153                 nexterror();
154         }
155         switch(s->type&SG_TYPE) {
156         case SG_TEXT:           /* New segment shares pte set */
157         case SG_SHARED:
158         case SG_PHYSICAL:
159         case SG_FIXED:
160                 goto sameseg;
161
162         case SG_STACK:
163                 n = newseg(s->type, s->base, s->size);
164                 break;
165
166         case SG_BSS:            /* Just copy on write */
167                 if(share)
168                         goto sameseg;
169                 n = newseg(s->type, s->base, s->size);
170                 break;
171
172         case SG_DATA:           /* Copy on write plus demand load info */
173                 if(segno == TSEG){
174                         n = data2txt(s);
175                         poperror();
176                         qunlock(s);
177                         return n;
178                 }
179
180                 if(share)
181                         goto sameseg;
182                 n = newseg(s->type, s->base, s->size);
183
184                 incref(s->image);
185                 n->image = s->image;
186                 n->fstart = s->fstart;
187                 n->flen = s->flen;
188                 break;
189         }
190         size = s->mapsize;
191         for(i = 0; i < size; i++)
192                 if((pte = s->map[i]) != nil)
193                         n->map[i] = ptecpy(pte);
194
195         n->flushme = s->flushme;
196         if(s->ref > 1)
197                 procflushseg(s);
198         poperror();
199         qunlock(s);
200         return n;
201
202 sameseg:
203         incref(s);
204         poperror();
205         qunlock(s);
206         return s;
207 }
208
209 void
210 segpage(Segment *s, Page *p)
211 {
212         Pte **pte, *etp;
213         uintptr soff;
214         Page **pg;
215
216         if(p->va < s->base || p->va >= s->top)
217                 panic("segpage");
218
219         soff = p->va - s->base;
220         pte = &s->map[soff/PTEMAPMEM];
221         if((etp = *pte) == nil)
222                 *pte = etp = ptealloc();
223
224         pg = &etp->pages[(soff&(PTEMAPMEM-1))/BY2PG];
225         *pg = p;
226         if(pg < etp->first)
227                 etp->first = pg;
228         if(pg > etp->last)
229                 etp->last = pg;
230 }
231
232 Image*
233 attachimage(int type, Chan *c, uintptr base, ulong len)
234 {
235         Image *i, **l;
236
237         lock(&imagealloc);
238
239         /*
240          * Search the image cache for remains of the text from a previous
241          * or currently running incarnation
242          */
243         for(i = ihash(c->qid.path); i; i = i->hash) {
244                 if(c->qid.path == i->qid.path) {
245                         lock(i);
246                         if(eqchantdqid(c, i->type, i->dev, i->qid, 0) && c->qid.type == i->qid.type)
247                                 goto found;
248                         unlock(i);
249                 }
250         }
251
252         /* dump pages of inactive images to free image structures */
253         while((i = imagealloc.free) == nil) {
254                 unlock(&imagealloc);
255                 if(imagereclaim(1000) == 0 && imagealloc.free == nil){
256                         freebroken();           /* can use the memory */
257                         resrcwait("no image after reclaim");
258                 }
259                 lock(&imagealloc);
260         }
261
262         imagealloc.free = i->next;
263
264         lock(i);
265         i->type = c->type;
266         i->dev = c->dev;
267         i->qid = c->qid;
268
269         l = &ihash(c->qid.path);
270         i->hash = *l;
271         *l = i;
272
273 found:
274         unlock(&imagealloc);
275         if(i->c == nil){
276                 i->c = c;
277                 c->flag &= ~CCACHE;
278                 incref(c);
279         }
280
281         if(i->s == nil) {
282                 incref(i);
283                 if(waserror()) {
284                         unlock(i);
285                         putimage(i);
286                         nexterror();
287                 }
288                 i->s = newseg(type, base, len);
289                 i->s->image = i;
290                 poperror();
291         }
292         else
293                 incref(i->s);
294
295         return i;
296 }
297
298 extern int pagereclaim(Image*, int);    /* page.c */
299
300 int
301 imagereclaim(int min)
302 {
303         static Image *i, *ie;
304         int j, n;
305
306         eqlock(&imagealloc.ireclaim);
307         if(i == nil){
308                 i = imagealloc.list;
309                 ie = &imagealloc.list[conf.nimage];
310         }
311         n = 0;
312         for(j = 0; j < conf.nimage; j++, i++){
313                 if(i >= ie)
314                         i = imagealloc.list;
315                 if(i->ref == 0)
316                         continue;
317                 /*
318                  * if there are no free image structures, only
319                  * reclaim pages from inactive images.
320                  */
321                 if(imagealloc.free != nil || i->ref == i->pgref){
322                         n += pagereclaim(i, min - n);
323                         if(n >= min)
324                                 break;
325                 }
326         }
327         qunlock(&imagealloc.ireclaim);
328
329         return n;
330 }
331
332 void
333 putimage(Image *i)
334 {
335         Image *f, **l;
336         Chan *c;
337         int r;
338
339         if(i->notext){
340                 decref(i);
341                 return;
342         }
343
344         c = nil;
345         lock(i);
346         r = decref(i);
347         if(r == i->pgref){
348                 /*
349                  * all remaining references to this image are from the
350                  * page cache, so close the chan.
351                  */
352                 c = i->c;
353                 i->c = nil;
354         }
355         if(r == 0){
356                 l = &ihash(i->qid.path);
357                 mkqid(&i->qid, ~0, ~0, QTFILE);
358                 unlock(i);
359
360                 lock(&imagealloc);
361                 for(f = *l; f != nil; f = f->hash) {
362                         if(f == i) {
363                                 *l = i->hash;
364                                 break;
365                         }
366                         l = &f->hash;
367                 }
368                 i->next = imagealloc.free;
369                 imagealloc.free = i;
370                 unlock(&imagealloc);
371         } else
372                 unlock(i);
373         if(c != nil)
374                 ccloseq(c);     /* does not block */
375 }
376
377 long
378 ibrk(uintptr addr, int seg)
379 {
380         Segment *s, *ns;
381         uintptr newtop;
382         ulong newsize;
383         int i, mapsize;
384         Pte **map;
385
386         s = up->seg[seg];
387         if(s == nil)
388                 error(Ebadarg);
389
390         if(addr == 0)
391                 return s->base;
392
393         qlock(s);
394
395         /* We may start with the bss overlapping the data */
396         if(addr < s->base) {
397                 if(seg != BSEG || up->seg[DSEG] == nil || addr < up->seg[DSEG]->base) {
398                         qunlock(s);
399                         error(Enovmem);
400                 }
401                 addr = s->base;
402         }
403
404         newtop = PGROUND(addr);
405         newsize = (newtop-s->base)/BY2PG;
406         if(newtop < s->top) {
407                 /*
408                  * do not shrink a segment shared with other procs, as the
409                  * to-be-freed address space may have been passed to the kernel
410                  * already by another proc and is past the validaddr stage.
411                  */
412                 if(s->ref > 1){
413                         qunlock(s);
414                         error(Einuse);
415                 }
416                 mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
417                 s->top = newtop;
418                 s->size = newsize;
419                 qunlock(s);
420                 flushmmu();
421                 return 0;
422         }
423
424         for(i = 0; i < NSEG; i++) {
425                 ns = up->seg[i];
426                 if(ns == nil || ns == s)
427                         continue;
428                 if(newtop >= ns->base && newtop < ns->top) {
429                         qunlock(s);
430                         error(Esoverlap);
431                 }
432         }
433
434         if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
435                 qunlock(s);
436                 error(Enovmem);
437         }
438         mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
439         if(mapsize > s->mapsize){
440                 map = malloc(mapsize*sizeof(Pte*));
441                 if(map == nil){
442                         qunlock(s);
443                         error(Enomem);
444                 }
445                 memmove(map, s->map, s->mapsize*sizeof(Pte*));
446                 if(s->map != s->ssegmap)
447                         free(s->map);
448                 s->map = map;
449                 s->mapsize = mapsize;
450         }
451
452         s->top = newtop;
453         s->size = newsize;
454         qunlock(s);
455         return 0;
456 }
457
458 /*
459  *  called with s locked
460  */
461 ulong
462 mcountseg(Segment *s)
463 {
464         Pte **pte, **emap;
465         Page **pg, **pe;
466         ulong pages;
467
468         if((s->type&SG_TYPE) == SG_PHYSICAL)
469                 return 0;
470
471         pages = 0;
472         emap = &s->map[s->mapsize];
473         for(pte = s->map; pte < emap; pte++){
474                 if(*pte == nil)
475                         continue;
476                 pe = (*pte)->last;
477                 for(pg = (*pte)->first; pg <= pe; pg++)
478                         if(!pagedout(*pg))
479                                 pages++;
480         }
481         return pages;
482 }
483
484 /*
485  *  called with s locked
486  */
487 void
488 mfreeseg(Segment *s, uintptr start, ulong pages)
489 {
490         uintptr off;
491         Pte **pte, **emap;
492         Page **pg, **pe;
493
494         if(pages == 0)
495                 return;
496
497         switch(s->type&SG_TYPE){
498         case SG_PHYSICAL:
499         case SG_FIXED:
500                 return;
501         }
502
503         /*
504          * we have to make sure other processors flush the
505          * entry from their TLBs before the page is freed.
506          */
507         if(s->ref > 1)
508                 procflushseg(s);
509
510         off = start-s->base;
511         pte = &s->map[off/PTEMAPMEM];
512         off = (off&(PTEMAPMEM-1))/BY2PG;
513         for(emap = &s->map[s->mapsize]; pte < emap; pte++, off = 0) {
514                 if(*pte == nil) {
515                         off = PTEPERTAB - off;
516                         if(off >= pages)
517                                 return;
518                         pages -= off;
519                         continue;
520                 }
521                 pg = &(*pte)->pages[off];
522                 for(pe = &(*pte)->pages[PTEPERTAB]; pg < pe; pg++) {
523                         if(*pg != nil){
524                                 putpage(*pg);
525                                 *pg = nil;
526                         }
527                         if(--pages == 0)
528                                 return;
529                 }
530         }
531 }
532
533 Segment*
534 isoverlap(Proc *p, uintptr va, uintptr len)
535 {
536         int i;
537         Segment *ns;
538         uintptr newtop;
539
540         newtop = va+len;
541         for(i = 0; i < NSEG; i++) {
542                 ns = p->seg[i];
543                 if(ns == nil)
544                         continue;
545                 if((newtop > ns->base && newtop <= ns->top) ||
546                    (va >= ns->base && va < ns->top))
547                         return ns;
548         }
549         return nil;
550 }
551
552 int
553 addphysseg(Physseg* new)
554 {
555         Physseg *ps;
556
557         /*
558          * Check not already entered and there is room
559          * for a new entry and the terminating null entry.
560          */
561         lock(&physseglock);
562         for(ps = physseg; ps->name; ps++){
563                 if(strcmp(ps->name, new->name) == 0){
564                         unlock(&physseglock);
565                         return -1;
566                 }
567         }
568         if(ps-physseg >= nelem(physseg)-2){
569                 unlock(&physseglock);
570                 return -1;
571         }
572         *ps = *new;
573         unlock(&physseglock);
574
575         return 0;
576 }
577
578 int
579 isphysseg(char *name)
580 {
581         Physseg *ps;
582         int rv = 0;
583
584         lock(&physseglock);
585         for(ps = physseg; ps->name; ps++){
586                 if(strcmp(ps->name, name) == 0){
587                         rv = 1;
588                         break;
589                 }
590         }
591         unlock(&physseglock);
592         return rv;
593 }
594
595 uintptr
596 segattach(Proc *p, ulong attr, char *name, uintptr va, uintptr len)
597 {
598         int sno;
599         Segment *s, *os;
600         Physseg *ps;
601
602         if(va != 0 && va >= USTKTOP)
603                 error(Ebadarg);
604
605         validaddr((uintptr)name, 1, 0);
606         vmemchr(name, 0, ~0);
607
608         for(sno = 0; sno < NSEG; sno++)
609                 if(p->seg[sno] == nil && sno != ESEG)
610                         break;
611
612         if(sno == NSEG)
613                 error(Enovmem);
614
615         /*
616          *  first look for a global segment with the
617          *  same name
618          */
619         if(_globalsegattach != nil){
620                 s = (*_globalsegattach)(p, name);
621                 if(s != nil){
622                         p->seg[sno] = s;
623                         return s->base;
624                 }
625         }
626
627         /* round up va+len */
628         len += va & (BY2PG-1);
629         len = PGROUND(len);
630
631         if(len == 0)
632                 error(Ebadarg);
633
634         /*
635          * Find a hole in the address space.
636          * Starting at the lowest possible stack address - len,
637          * check for an overlapping segment, and repeat at the
638          * base of that segment - len until either a hole is found
639          * or the address space is exhausted.  Ensure that we don't
640          * map the zero page.
641          */
642         if(va == 0) {
643                 for (os = p->seg[SSEG]; os != nil; os = isoverlap(p, va, len)) {
644                         va = os->base;
645                         if(len >= va)
646                                 error(Enovmem);
647                         va -= len;
648                 }
649         }
650
651         va &= ~(BY2PG-1);
652         if(va == 0 || (va+len) > USTKTOP || (va+len) < va)
653                 error(Ebadarg);
654
655         if(isoverlap(p, va, len) != nil)
656                 error(Esoverlap);
657
658         for(ps = physseg; ps->name; ps++)
659                 if(strcmp(name, ps->name) == 0)
660                         goto found;
661
662         error(Ebadarg);
663 found:
664         if(len > ps->size)
665                 error(Enovmem);
666
667         attr &= ~SG_TYPE;               /* Turn off what is not allowed */
668         attr |= ps->attr;               /* Copy in defaults */
669
670         s = newseg(attr, va, len/BY2PG);
671         s->pseg = ps;
672         p->seg[sno] = s;
673
674         return va;
675 }
676
677 static void
678 segflush(void *va, uintptr len)
679 {
680         uintptr from, to, off;
681         Segment *s;
682         Pte *pte;
683         Page **pg, **pe;
684
685         from = (uintptr)va;
686         to = from + len;
687         to = PGROUND(to);
688         from &= ~(BY2PG-1);
689         if(to < from)
690                 error(Ebadarg);
691
692         while(from < to) {
693                 s = seg(up, from, 1);
694                 if(s == nil)
695                         error(Ebadarg);
696
697                 s->flushme = 1;
698         more:
699                 len = (s->top < to ? s->top : to) - from;
700                 off = from-s->base;
701                 pte = s->map[off/PTEMAPMEM];
702                 off &= PTEMAPMEM-1;
703                 if(off+len > PTEMAPMEM)
704                         len = PTEMAPMEM-off;
705
706                 if(pte != nil) {
707                         pg = &pte->pages[off/BY2PG];
708                         pe = pg + len/BY2PG;
709                         while(pg < pe) {
710                                 if(!pagedout(*pg))
711                                         (*pg)->txtflush = ~0;
712                                 pg++;
713                         }
714                 }
715
716                 from += len;
717                 if(from < to && from < s->top)
718                         goto more;
719
720                 qunlock(s);
721         }
722 }
723
724 uintptr
725 syssegflush(va_list list)
726 {
727         void *va;
728         ulong len;
729
730         va = va_arg(list, void*);
731         len = va_arg(list, ulong);
732         segflush(va, len);
733         flushmmu();
734         return 0;
735 }
736
737 void
738 segclock(uintptr pc)
739 {
740         Segment *s;
741
742         s = up->seg[TSEG];
743         if(s == nil || s->profile == nil)
744                 return;
745
746         s->profile[0] += TK2MS(1);
747         if(pc >= s->base && pc < s->top) {
748                 pc -= s->base;
749                 s->profile[pc>>LRESPROF] += TK2MS(1);
750         }
751 }
752
753 Segment*
754 txt2data(Segment *s)
755 {
756         Segment *ps;
757
758         ps = newseg(SG_DATA, s->base, s->size);
759         ps->image = s->image;
760         incref(ps->image);
761         ps->fstart = s->fstart;
762         ps->flen = s->flen;
763         ps->flushme = 1;
764         qunlock(s);
765         putseg(s);
766         qlock(ps);
767         return ps;
768 }
769
770 Segment*
771 data2txt(Segment *s)
772 {
773         Segment *ps;
774
775         ps = newseg(SG_TEXT, s->base, s->size);
776         ps->image = s->image;
777         incref(ps->image);
778         ps->fstart = s->fstart;
779         ps->flen = s->flen;
780         ps->flushme = 1;
781         return ps;
782 }
783
784
785 enum {
786         /* commands to segmentioproc */
787         Cnone=0,
788         Cread,
789         Cwrite,
790         Cdie,
791 };
792
793 static int
794 cmddone(void *arg)
795 {
796         Segio *sio = arg;
797
798         return sio->cmd == Cnone;
799 }
800
801 static void
802 docmd(Segio *sio, int cmd)
803 {
804         sio->err = nil;
805         sio->cmd = cmd;
806         while(waserror())
807                 ;
808         wakeup(&sio->cmdwait);
809         sleep(&sio->replywait, cmddone, sio);
810         poperror();
811         if(sio->err != nil)
812                 error(sio->err);
813 }
814
815 static int
816 cmdready(void *arg)
817 {
818         Segio *sio = arg;
819
820         return sio->cmd != Cnone;
821 }
822
823 static void
824 segmentioproc(void *arg)
825 {
826         Segio *sio = arg;
827         int done;
828         int sno;
829
830         for(sno = 0; sno < NSEG; sno++)
831                 if(up->seg[sno] == nil && sno != ESEG)
832                         break;
833         if(sno == NSEG)
834                 panic("segmentkproc");
835
836         sio->p = up;
837         incref(sio->s);
838         up->seg[sno] = sio->s;
839
840         cclose(up->dot);
841         up->dot = up->slash;
842         incref(up->dot);
843
844         while(waserror())
845                 ;
846         for(done = 0; !done;){
847                 sleep(&sio->cmdwait, cmdready, sio);
848                 if(waserror())
849                         sio->err = up->errstr;
850                 else {
851                         if(sio->s != nil && up->seg[sno] != sio->s){
852                                 putseg(up->seg[sno]);
853                                 incref(sio->s);
854                                 up->seg[sno] = sio->s;
855                                 flushmmu();
856                         }
857                         switch(sio->cmd){
858                         case Cread:
859                                 memmove(sio->data, sio->addr, sio->dlen);
860                                 break;
861                         case Cwrite:
862                                 memmove(sio->addr, sio->data, sio->dlen);
863                                 if(sio->s->flushme)
864                                         segflush(sio->addr, sio->dlen);
865                                 break;
866                         case Cdie:
867                                 done = 1;
868                                 break;
869                         }
870                         poperror();
871                 }
872                 sio->cmd = Cnone;
873                 wakeup(&sio->replywait);
874         }
875
876         pexit("done", 1);
877 }
878
879 long
880 segio(Segio *sio, Segment *s, void *a, long n, vlong off, int read)
881 {
882         uintptr m;
883         void *b;
884
885         b = a;
886         if(s != nil){
887                 m = s->top - s->base;
888                 if(off < 0 || off >= m){
889                         if(!read)
890                                 error(Ebadarg);
891                         return 0;
892                 }
893                 if(off+n > m){
894                         if(!read)
895                                 error(Ebadarg); 
896                         n = m - off;
897                 }
898
899                 if((uintptr)a < KZERO) {
900                         b = smalloc(n);
901                         if(waserror()){
902                                 free(b);
903                                 nexterror();
904                         }
905                         if(!read)
906                                 memmove(b, a, n);
907                 }
908         }
909
910         eqlock(sio);
911         if(waserror()){
912                 qunlock(sio);
913                 nexterror();
914         }
915         sio->s = s;
916         if(s == nil){
917                 if(sio->p != nil){
918                         docmd(sio, Cdie);
919                         sio->p = nil;
920                 }
921                 qunlock(sio);
922                 poperror();
923                 return 0;
924         }
925         if(sio->p == nil){
926                 sio->cmd = Cnone;
927                 kproc("segmentio", segmentioproc, sio);
928         }
929         sio->addr = (char*)s->base + off;
930         sio->data = b;
931         sio->dlen = n;
932         docmd(sio, read ? Cread : Cwrite);
933         qunlock(sio);
934         poperror();
935
936         if(a != b){
937                 if(read)
938                         memmove(a, b, n);
939                 free(b);
940                 poperror();
941         }
942         return n;
943 }