]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/port/segment.c
kernel: attachimage / exec error handling
[plan9front.git] / sys / src / 9 / port / segment.c
1 #include        "u.h"
2 #include        "../port/lib.h"
3 #include        "mem.h"
4 #include        "dat.h"
5 #include        "fns.h"
6 #include        "../port/error.h"
7
8 static void     imagereclaim(void);
9
10 #include "io.h"
11
12 /*
13  * Attachable segment types
14  */
15 static Physseg physseg[10] = {
16         { SG_SHARED,    "shared",       0,      SEGMAXSIZE,     0,      0 },
17         { SG_BSS,       "memory",       0,      SEGMAXSIZE,     0,      0 },
18         { 0,            0,              0,      0,              0,      0 },
19 };
20
21 static Lock physseglock;
22
23 #define IHASHSIZE       64
24 #define ihash(s)        imagealloc.hash[s%IHASHSIZE]
25 static struct Imagealloc
26 {
27         Lock;
28         Image   *free;
29         Image   *hash[IHASHSIZE];
30         QLock   ireclaim;       /* mutex on reclaiming free images */
31 }imagealloc;
32
33 Segment* (*_globalsegattach)(Proc*, char*);
34
35 void
36 initseg(void)
37 {
38         Image *i, *ie;
39
40         imagealloc.free = xalloc(conf.nimage*sizeof(Image));
41         if(imagealloc.free == nil)
42                 panic("initseg: no memory for Image");
43         ie = &imagealloc.free[conf.nimage-1];
44         for(i = imagealloc.free; i < ie; i++)
45                 i->next = i+1;
46         i->next = 0;
47 }
48
49 Segment *
50 newseg(int type, ulong base, ulong size)
51 {
52         Segment *s;
53         int mapsize;
54
55         if(size > (SEGMAPSIZE*PTEPERTAB))
56                 error(Enovmem);
57
58         s = smalloc(sizeof(Segment));
59         s->ref = 1;
60         s->type = type;
61         s->base = base;
62         s->top = base+(size*BY2PG);
63         s->size = size;
64         s->sema.prev = &s->sema;
65         s->sema.next = &s->sema;
66
67         mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
68         if(mapsize > nelem(s->ssegmap)){
69                 mapsize *= 2;
70                 if(mapsize > (SEGMAPSIZE*PTEPERTAB))
71                         mapsize = (SEGMAPSIZE*PTEPERTAB);
72                 s->map = smalloc(mapsize*sizeof(Pte*));
73                 s->mapsize = mapsize;
74         }
75         else{
76                 s->map = s->ssegmap;
77                 s->mapsize = nelem(s->ssegmap);
78         }
79
80         return s;
81 }
82
83 void
84 putseg(Segment *s)
85 {
86         Pte **pp, **emap;
87         Image *i;
88
89         if(s == 0)
90                 return;
91
92         i = s->image;
93         if(i != 0) {
94                 lock(i);
95                 lock(s);
96                 if(i->s == s && s->ref == 1)
97                         i->s = 0;
98                 unlock(i);
99         }
100         else
101                 lock(s);
102
103         s->ref--;
104         if(s->ref != 0) {
105                 unlock(s);
106                 return;
107         }
108         unlock(s);
109
110         qlock(&s->lk);
111         if(i)
112                 putimage(i);
113
114         emap = &s->map[s->mapsize];
115         for(pp = s->map; pp < emap; pp++)
116                 if(*pp)
117                         freepte(s, *pp);
118
119         qunlock(&s->lk);
120         if(s->map != s->ssegmap)
121                 free(s->map);
122         if(s->profile != 0)
123                 free(s->profile);
124         free(s);
125 }
126
127 void
128 relocateseg(Segment *s, ulong offset)
129 {
130         Page **pg, *x;
131         Pte *pte, **p, **endpte;
132
133         endpte = &s->map[s->mapsize];
134         for(p = s->map; p < endpte; p++) {
135                 if(*p == 0)
136                         continue;
137                 pte = *p;
138                 for(pg = pte->first; pg <= pte->last; pg++) {
139                         if(x = *pg)
140                                 x->va += offset;
141                 }
142         }
143 }
144
145 Segment*
146 dupseg(Segment **seg, int segno, int share)
147 {
148         int i, size;
149         Pte *pte;
150         Segment *n, *s;
151
152         SET(n);
153         s = seg[segno];
154
155         qlock(&s->lk);
156         if(waserror()){
157                 qunlock(&s->lk);
158                 nexterror();
159         }
160         switch(s->type&SG_TYPE) {
161         case SG_TEXT:           /* New segment shares pte set */
162         case SG_SHARED:
163         case SG_PHYSICAL:
164                 goto sameseg;
165
166         case SG_STACK:
167                 n = newseg(s->type, s->base, s->size);
168                 break;
169
170         case SG_BSS:            /* Just copy on write */
171                 if(share)
172                         goto sameseg;
173                 n = newseg(s->type, s->base, s->size);
174                 break;
175
176         case SG_DATA:           /* Copy on write plus demand load info */
177                 if(segno == TSEG){
178                         n = data2txt(s);
179                         poperror();
180                         qunlock(&s->lk);
181                         return n;
182                 }
183
184                 if(share)
185                         goto sameseg;
186                 n = newseg(s->type, s->base, s->size);
187
188                 incref(s->image);
189                 n->image = s->image;
190                 n->fstart = s->fstart;
191                 n->flen = s->flen;
192                 break;
193         }
194         size = s->mapsize;
195         for(i = 0; i < size; i++)
196                 if(pte = s->map[i])
197                         n->map[i] = ptecpy(pte);
198
199         n->flushme = s->flushme;
200         if(s->ref > 1)
201                 procflushseg(s);
202         poperror();
203         qunlock(&s->lk);
204         return n;
205
206 sameseg:
207         incref(s);
208         poperror();
209         qunlock(&s->lk);
210         return s;
211 }
212
213 void
214 segpage(Segment *s, Page *p)
215 {
216         Pte **pte;
217         ulong off;
218         Page **pg;
219
220         if(p->va < s->base || p->va >= s->top)
221                 panic("segpage");
222
223         off = p->va - s->base;
224         pte = &s->map[off/PTEMAPMEM];
225         if(*pte == 0)
226                 *pte = ptealloc();
227
228         pg = &(*pte)->pages[(off&(PTEMAPMEM-1))/BY2PG];
229         *pg = p;
230         if(pg < (*pte)->first)
231                 (*pte)->first = pg;
232         if(pg > (*pte)->last)
233                 (*pte)->last = pg;
234 }
235
236 Image*
237 attachimage(int type, Chan *c, ulong base, ulong len)
238 {
239         Image *i, **l;
240
241         lock(&imagealloc);
242
243         /*
244          * Search the image cache for remains of the text from a previous
245          * or currently running incarnation
246          */
247         for(i = ihash(c->qid.path); i; i = i->hash) {
248                 if(c->qid.path == i->qid.path) {
249                         lock(i);
250                         if(eqqid(c->qid, i->qid) &&
251                            eqqid(c->mqid, i->mqid) &&
252                            c->mchan == i->mchan &&
253                            c->type == i->type) {
254                                 goto found;
255                         }
256                         unlock(i);
257                 }
258         }
259
260         /*
261          * imagereclaim dumps pages from the free list which are cached by image
262          * structures. This should free some image structures.
263          */
264         while(!(i = imagealloc.free)) {
265                 unlock(&imagealloc);
266                 imagereclaim();
267                 sched();
268                 lock(&imagealloc);
269         }
270
271         imagealloc.free = i->next;
272
273         lock(i);
274         incref(c);
275         c->flag &= ~CCACHE;
276         i->c = c;
277         i->type = c->type;
278         i->qid = c->qid;
279         i->mqid = c->mqid;
280         i->mchan = c->mchan;
281         l = &ihash(c->qid.path);
282         i->hash = *l;
283         *l = i;
284 found:
285         unlock(&imagealloc);
286
287         if(i->s == 0) {
288                 i->ref++;
289                 if(waserror()) {
290                         unlock(i);
291                         putimage(i);
292                         nexterror();
293                 }
294                 i->s = newseg(type, base, len);
295                 i->s->image = i;
296                 poperror();
297         }
298         else
299                 incref(i->s);
300
301         return i;
302 }
303
304 static struct {
305         int     calls;                  /* times imagereclaim was called */
306         int     loops;                  /* times the main loop was run */
307         uvlong  ticks;                  /* total time in the main loop */
308         uvlong  maxt;                   /* longest time in main loop */
309 } irstats;
310
311 static void
312 imagereclaim(void)
313 {
314         int n;
315         Page *p;
316         uvlong ticks;
317
318         irstats.calls++;
319         /* Somebody is already cleaning the page cache */
320         if(!canqlock(&imagealloc.ireclaim))
321                 return;
322
323         lock(&palloc);
324         ticks = fastticks(nil);
325         n = 0;
326         /*
327          * All the pages with images backing them are at the
328          * end of the list (see putpage) so start there and work
329          * backward.
330          */
331         for(p = palloc.tail; p && p->image && n<1000; p = p->prev) {
332                 if(p->ref == 0 && canlock(p)) {
333                         if(p->ref == 0) {
334                                 n++;
335                                 uncachepage(p);
336                         }
337                         unlock(p);
338                 }
339         }
340         ticks = fastticks(nil) - ticks;
341         unlock(&palloc);
342         irstats.loops++;
343         irstats.ticks += ticks;
344         if(ticks > irstats.maxt)
345                 irstats.maxt = ticks;
346         //print("T%llud+", ticks);
347         qunlock(&imagealloc.ireclaim);
348 }
349
350 void
351 putimage(Image *i)
352 {
353         Image *f, **l;
354         Chan *c;
355
356         if(i->notext)
357                 return;
358
359         lock(i);
360         if(--i->ref == 0) {
361                 l = &ihash(i->qid.path);
362                 mkqid(&i->qid, ~0, ~0, QTFILE);
363                 unlock(i);
364                 c = i->c;
365
366                 lock(&imagealloc);
367                 for(f = *l; f; f = f->hash) {
368                         if(f == i) {
369                                 *l = i->hash;
370                                 break;
371                         }
372                         l = &f->hash;
373                 }
374
375                 i->next = imagealloc.free;
376                 imagealloc.free = i;
377                 unlock(&imagealloc);
378
379                 ccloseq(c);     /* does not block */
380                 return;
381         }
382         unlock(i);
383 }
384
385 long
386 ibrk(ulong addr, int seg)
387 {
388         Segment *s, *ns;
389         ulong newtop, newsize;
390         int i, mapsize;
391         Pte **map;
392
393         s = up->seg[seg];
394         if(s == 0)
395                 error(Ebadarg);
396
397         if(addr == 0)
398                 return s->base;
399
400         qlock(&s->lk);
401
402         /* We may start with the bss overlapping the data */
403         if(addr < s->base) {
404                 if(seg != BSEG || up->seg[DSEG] == 0 || addr < up->seg[DSEG]->base) {
405                         qunlock(&s->lk);
406                         error(Enovmem);
407                 }
408                 addr = s->base;
409         }
410
411         newtop = PGROUND(addr);
412         newsize = (newtop-s->base)/BY2PG;
413         if(newtop < s->top) {
414                 /*
415                  * do not shrink a segment shared with other procs, as the
416                  * to-be-freed address space may have been passed to the kernel
417                  * already by another proc and is past the validaddr stage.
418                  */
419                 if(s->ref > 1){
420                         qunlock(&s->lk);
421                         error(Einuse);
422                 }
423                 mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
424                 s->top = newtop;
425                 s->size = newsize;
426                 qunlock(&s->lk);
427                 flushmmu();
428                 return 0;
429         }
430
431         for(i = 0; i < NSEG; i++) {
432                 ns = up->seg[i];
433                 if(ns == 0 || ns == s)
434                         continue;
435                 if(newtop >= ns->base && newtop < ns->top) {
436                         qunlock(&s->lk);
437                         error(Esoverlap);
438                 }
439         }
440
441         if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
442                 qunlock(&s->lk);
443                 error(Enovmem);
444         }
445         mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
446         if(mapsize > s->mapsize){
447                 map = smalloc(mapsize*sizeof(Pte*));
448                 memmove(map, s->map, s->mapsize*sizeof(Pte*));
449                 if(s->map != s->ssegmap)
450                         free(s->map);
451                 s->map = map;
452                 s->mapsize = mapsize;
453         }
454
455         s->top = newtop;
456         s->size = newsize;
457         qunlock(&s->lk);
458         return 0;
459 }
460
461 /*
462  *  called with s->lk locked
463  */
464 int
465 mcountseg(Segment *s)
466 {
467         int i, j, pages;
468         Page **map;
469
470         pages = 0;
471         for(i = 0; i < s->mapsize; i++){
472                 if(s->map[i] == 0)
473                         continue;
474                 map = s->map[i]->pages;
475                 for(j = 0; j < PTEPERTAB; j++)
476                         if(map[j])
477                                 pages++;
478         }
479         return pages;
480 }
481
482 /*
483  *  called with s->lk locked
484  */
485 void
486 mfreeseg(Segment *s, ulong start, int pages)
487 {
488         int i, j, size;
489         ulong soff;
490         Page *pg;
491         Page *list;
492
493         soff = start-s->base;
494         j = (soff&(PTEMAPMEM-1))/BY2PG;
495
496         size = s->mapsize;
497         list = nil;
498         for(i = soff/PTEMAPMEM; i < size; i++) {
499                 if(pages <= 0)
500                         break;
501                 if(s->map[i] == 0) {
502                         pages -= PTEPERTAB-j;
503                         j = 0;
504                         continue;
505                 }
506                 while(j < PTEPERTAB) {
507                         pg = s->map[i]->pages[j];
508                         /*
509                          * We want to zero s->map[i]->page[j] and putpage(pg),
510                          * but we have to make sure other processors flush the
511                          * entry from their TLBs before the page is freed.
512                          * We construct a list of the pages to be freed, zero
513                          * the entries, then (below) call procflushseg, and call
514                          * putpage on the whole list.
515                          *
516                          * Swapped-out pages don't appear in TLBs, so it's okay
517                          * to putswap those pages before procflushseg.
518                          */
519                         if(pg){
520                                 if(onswap(pg))
521                                         putswap(pg);
522                                 else{
523                                         pg->next = list;
524                                         list = pg;
525                                 }
526                                 s->map[i]->pages[j] = 0;
527                         }
528                         if(--pages == 0)
529                                 goto out;
530                         j++;
531                 }
532                 j = 0;
533         }
534 out:
535         /* flush this seg in all other processes */
536         if(s->ref > 1)
537                 procflushseg(s);
538
539         /* free the pages */
540         for(pg = list; pg != nil; pg = list){
541                 list = list->next;
542                 putpage(pg);
543         }
544 }
545
546 Segment*
547 isoverlap(Proc *p, ulong va, int len)
548 {
549         int i;
550         Segment *ns;
551         ulong newtop;
552
553         newtop = va+len;
554         for(i = 0; i < NSEG; i++) {
555                 ns = p->seg[i];
556                 if(ns == 0)
557                         continue;
558                 if((newtop > ns->base && newtop <= ns->top) ||
559                    (va >= ns->base && va < ns->top))
560                         return ns;
561         }
562         return nil;
563 }
564
565 int
566 addphysseg(Physseg* new)
567 {
568         Physseg *ps;
569
570         /*
571          * Check not already entered and there is room
572          * for a new entry and the terminating null entry.
573          */
574         lock(&physseglock);
575         for(ps = physseg; ps->name; ps++){
576                 if(strcmp(ps->name, new->name) == 0){
577                         unlock(&physseglock);
578                         return -1;
579                 }
580         }
581         if(ps-physseg >= nelem(physseg)-2){
582                 unlock(&physseglock);
583                 return -1;
584         }
585
586         *ps = *new;
587         unlock(&physseglock);
588
589         return 0;
590 }
591
592 int
593 isphysseg(char *name)
594 {
595         Physseg *ps;
596         int rv = 0;
597
598         lock(&physseglock);
599         for(ps = physseg; ps->name; ps++){
600                 if(strcmp(ps->name, name) == 0){
601                         rv = 1;
602                         break;
603                 }
604         }
605         unlock(&physseglock);
606         return rv;
607 }
608
609 ulong
610 segattach(Proc *p, ulong attr, char *name, ulong va, ulong len)
611 {
612         int sno;
613         Segment *s, *os;
614         Physseg *ps;
615
616         if(va != 0 && va >= USTKTOP)
617                 error(Ebadarg);
618
619         validaddr((ulong)name, 1, 0);
620         vmemchr(name, 0, ~0);
621
622         for(sno = 0; sno < NSEG; sno++)
623                 if(p->seg[sno] == nil && sno != ESEG)
624                         break;
625
626         if(sno == NSEG)
627                 error(Enovmem);
628
629         /*
630          *  first look for a global segment with the
631          *  same name
632          */
633         if(_globalsegattach != nil){
634                 s = (*_globalsegattach)(p, name);
635                 if(s != nil){
636                         p->seg[sno] = s;
637                         return s->base;
638                 }
639         }
640
641         len = PGROUND(len);
642         if(len == 0)
643                 error(Ebadarg);
644
645         /*
646          * Find a hole in the address space.
647          * Starting at the lowest possible stack address - len,
648          * check for an overlapping segment, and repeat at the
649          * base of that segment - len until either a hole is found
650          * or the address space is exhausted.  Ensure that we don't
651          * map the zero page.
652          */
653         if(va == 0) {
654                 for (os = p->seg[SSEG]; os != nil; os = isoverlap(p, va, len)) {
655                         va = os->base;
656                         if(len >= va)
657                                 error(Enovmem);
658                         va -= len;
659                 }
660                 va &= ~(BY2PG-1);
661         } else {
662                 va &= ~(BY2PG-1);
663                 if(va == 0 || va >= USTKTOP)
664                         error(Ebadarg);
665         }
666
667         if(isoverlap(p, va, len) != nil)
668                 error(Esoverlap);
669
670         for(ps = physseg; ps->name; ps++)
671                 if(strcmp(name, ps->name) == 0)
672                         goto found;
673
674         error(Ebadarg);
675 found:
676         if(len > ps->size)
677                 error(Enovmem);
678
679         attr &= ~SG_TYPE;               /* Turn off what is not allowed */
680         attr |= ps->attr;               /* Copy in defaults */
681
682         s = newseg(attr, va, len/BY2PG);
683         s->pseg = ps;
684         p->seg[sno] = s;
685
686         return va;
687 }
688
689 void
690 pteflush(Pte *pte, int s, int e)
691 {
692         int i;
693         Page *p;
694
695         for(i = s; i < e; i++) {
696                 p = pte->pages[i];
697                 if(pagedout(p) == 0)
698                         memset(p->cachectl, PG_TXTFLUSH, sizeof(p->cachectl));
699         }
700 }
701
702 long
703 syssegflush(ulong *arg)
704 {
705         Segment *s;
706         ulong addr, l;
707         Pte *pte;
708         int chunk, ps, pe, len;
709
710         addr = arg[0];
711         len = arg[1];
712
713         while(len > 0) {
714                 s = seg(up, addr, 1);
715                 if(s == 0)
716                         error(Ebadarg);
717
718                 s->flushme = 1;
719         more:
720                 l = len;
721                 if(addr+l > s->top)
722                         l = s->top - addr;
723
724                 ps = addr-s->base;
725                 pte = s->map[ps/PTEMAPMEM];
726                 ps &= PTEMAPMEM-1;
727                 pe = PTEMAPMEM;
728                 if(pe-ps > l){
729                         pe = ps + l;
730                         pe = (pe+BY2PG-1)&~(BY2PG-1);
731                 }
732                 if(pe == ps) {
733                         qunlock(&s->lk);
734                         error(Ebadarg);
735                 }
736
737                 if(pte)
738                         pteflush(pte, ps/BY2PG, pe/BY2PG);
739
740                 chunk = pe-ps;
741                 len -= chunk;
742                 addr += chunk;
743
744                 if(len > 0 && addr < s->top)
745                         goto more;
746
747                 qunlock(&s->lk);
748         }
749         flushmmu();
750         return 0;
751 }
752
753 void
754 segclock(ulong pc)
755 {
756         Segment *s;
757
758         s = up->seg[TSEG];
759         if(s == 0 || s->profile == 0)
760                 return;
761
762         s->profile[0] += TK2MS(1);
763         if(pc >= s->base && pc < s->top) {
764                 pc -= s->base;
765                 s->profile[pc>>LRESPROF] += TK2MS(1);
766         }
767 }
768