]> git.lizzy.rs Git - plan9front.git/blob - sys/src/9/port/devswap.c
pc kernel: fix wrong simd exception mask (fixes go bootstrap)
[plan9front.git] / sys / src / 9 / port / devswap.c
1 #include        "u.h"
2 #include        "../port/lib.h"
3 #include        "mem.h"
4 #include        "dat.h"
5 #include        "fns.h"
6 #include        "../port/error.h"
7
8 #include        <libsec.h>
9 #include        <pool.h>
10
11 static int      canflush(Proc*, Segment*);
12 static void     executeio(void);
13 static void     pageout(Proc*, Segment*);
14 static void     pagepte(int, Page**);
15 static void     pager(void*);
16
17 Image   swapimage = {
18         .notext = 1,
19 };
20
21 static Chan     *swapchan;
22 static uchar    *swapbuf;
23 static AESstate *swapkey;
24
25 static Page     **iolist;
26 static int      ioptr;
27
28 static ushort   ageclock;
29
30 static void
31 swapinit(void)
32 {
33         swapalloc.swmap = xalloc(conf.nswap);
34         swapalloc.top = &swapalloc.swmap[conf.nswap];
35         swapalloc.alloc = swapalloc.swmap;
36         swapalloc.last = swapalloc.swmap;
37         swapalloc.free = conf.nswap;
38         swapalloc.xref = 0;
39
40         iolist = xalloc(conf.nswppo*sizeof(Page*));
41         if(swapalloc.swmap == nil || iolist == nil)
42                 panic("swapinit: not enough memory");
43 }
44
45 static uintptr
46 newswap(void)
47 {
48         uchar *look;
49
50         lock(&swapalloc);
51         if(swapalloc.free == 0) {
52                 unlock(&swapalloc);
53                 return ~0;
54         }
55         look = memchr(swapalloc.last, 0, swapalloc.top-swapalloc.last);
56         if(look == nil)
57                 look = memchr(swapalloc.swmap, 0, swapalloc.last-swapalloc.swmap);
58         *look = 2;      /* ref for pte + io transaction */
59         swapalloc.last = look;
60         swapalloc.free--;
61         unlock(&swapalloc);
62         return (look-swapalloc.swmap) * BY2PG;
63 }
64
65 void
66 putswap(Page *p)
67 {
68         uchar *idx;
69
70         lock(&swapalloc);
71         idx = &swapalloc.swmap[((uintptr)p)/BY2PG];
72         if(*idx == 0)
73                 panic("putswap %#p ref == 0", p);
74
75         if(*idx == 255) {
76                 if(swapalloc.xref == 0)
77                         panic("putswap %#p xref == 0", p);
78
79                 if(--swapalloc.xref == 0) {
80                         for(idx = swapalloc.swmap; idx < swapalloc.top; idx++) {
81                                 if(*idx == 255) {
82                                         *idx = 0;
83                                         swapalloc.free++;
84                                 }
85                         }
86                 }
87         } else {
88                 if(--(*idx) == 0)
89                         swapalloc.free++;
90         }
91         unlock(&swapalloc);
92 }
93
94 void
95 dupswap(Page *p)
96 {
97         uchar *idx;
98
99         lock(&swapalloc);
100         idx = &swapalloc.swmap[((uintptr)p)/BY2PG];
101         if(*idx == 255)
102                 swapalloc.xref++;
103         else {
104                 if(++(*idx) == 255)
105                         swapalloc.xref += 255;
106         }
107         unlock(&swapalloc);
108 }
109
110 int
111 swapcount(uintptr daddr)
112 {
113         return swapalloc.swmap[daddr/BY2PG];
114 }
115
116 void
117 kickpager(void)
118 {
119         static Ref started;
120
121         if(started.ref || incref(&started) != 1)
122                 wakeup(&swapalloc.r);
123         else
124                 kproc("pager", pager, 0);
125 }
126
127 static int
128 reclaim(void)
129 {
130         ulong np;
131
132         for(;;){
133                 if((np = pagereclaim(&fscache, 1000)) > 0) {
134                         if(0) print("reclaim: %lud fscache\n", np);
135                 } else if((np = pagereclaim(&swapimage, 1000)) > 0) {
136                         if(0) print("reclaim: %lud swap\n", np);
137                 } else if((np = imagereclaim(1000)) > 0) {
138                         if(0) print("reclaim: %lud image\n", np);
139                 }
140                 if(!needpages(nil))
141                         return 1;       /* have pages, done */
142                 if(np == 0)
143                         return 0;       /* didnt reclaim, need to swap */
144                 sched();
145         }
146 }
147
148 static void
149 pager(void*)
150 {
151         int i;
152         Segment *s;
153         Proc *p, *ep;
154
155         p = proctab(0);
156         ep = &p[conf.nproc];
157
158         while(waserror())
159                 ;
160
161         for(;;){
162                 up->psstate = "Reclaim";
163                 if(reclaim()){
164                         up->psstate = "Idle";
165                         wakeup(&palloc.pwait[0]);
166                         wakeup(&palloc.pwait[1]);
167                         sleep(&swapalloc.r, needpages, nil);
168                         continue;
169                 }
170
171                 if(swapimage.c == nil || swapalloc.free == 0){
172                 Killbig:
173                         if(!freebroken())
174                                 killbig("out of memory");
175                         sched();
176                         continue;
177                 }
178
179                 i = ageclock;
180                 do {
181                         if(++p >= ep){
182                                 if(++ageclock == i)
183                                         goto Killbig;
184                                 p = proctab(0);
185                         }
186                 } while(p->state == Dead || p->noswap || !canqlock(&p->seglock));
187                 up->psstate = "Pageout";
188                 for(i = 0; i < NSEG; i++) {
189                         if((s = p->seg[i]) != nil) {
190                                 switch(s->type&SG_TYPE) {
191                                 default:
192                                         break;
193                                 case SG_TEXT:
194                                         pageout(p, s);
195                                         break;
196                                 case SG_DATA:
197                                 case SG_BSS:
198                                 case SG_STACK:
199                                 case SG_SHARED:
200                                         pageout(p, s);
201                                         break;
202                                 }
203                         }
204                 }
205                 qunlock(&p->seglock);
206
207                 if(ioptr > 0) {
208                         up->psstate = "I/O";
209                         executeio();
210                 }
211         }
212 }
213
214 static void
215 pageout(Proc *p, Segment *s)
216 {
217         int type, i, size;
218         short age;
219         Pte *l;
220         Page **pg, *entry;
221
222         if(!canqlock(s))        /* We cannot afford to wait, we will surely deadlock */
223                 return;
224
225         if(!canflush(p, s)      /* Able to invalidate all tlbs with references */
226         || waserror()) {
227                 qunlock(s);
228                 putseg(s);
229                 return;
230         }
231
232         /* Pass through the pte tables looking for memory pages to swap out */
233         type = s->type&SG_TYPE;
234         size = s->mapsize;
235         for(i = 0; i < size; i++) {
236                 l = s->map[i];
237                 if(l == nil)
238                         continue;
239                 for(pg = l->first; pg <= l->last; pg++) {
240                         entry = *pg;
241                         if(pagedout(entry))
242                                 continue;
243                         if(entry->modref & PG_REF) {
244                                 entry->modref &= ~PG_REF;
245                                 entry->refage = ageclock;
246                                 continue;
247                         }
248                         age = (short)(ageclock - entry->refage);
249                         if(age < 16)
250                                 continue;
251                         pagepte(type, pg);
252                 }
253         }
254         poperror();
255         qunlock(s);
256         putseg(s);
257 }
258
259 static int
260 canflush(Proc *p, Segment *s)
261 {
262         int i;
263         Proc *ep;
264
265         if(incref(s) == 2)              /* Easy if we are the only user */
266                 return canpage(p);
267
268         /* Now we must do hardwork to ensure all processes which have tlb
269          * entries for this segment will be flushed if we succeed in paging it out
270          */
271         p = proctab(0);
272         ep = &p[conf.nproc];
273         while(p < ep) {
274                 if(p->state != Dead) {
275                         for(i = 0; i < NSEG; i++)
276                                 if(p->seg[i] == s)
277                                         if(!canpage(p))
278                                                 return 0;
279                 }
280                 p++;
281         }
282         return 1;
283 }
284
285 static void
286 pagepte(int type, Page **pg)
287 {
288         uintptr daddr;
289         Page *outp;
290
291         outp = *pg;
292         switch(type) {
293         case SG_TEXT:                           /* Revert to demand load */
294                 putpage(outp);
295                 *pg = nil;
296                 break;
297
298         case SG_DATA:
299         case SG_BSS:
300         case SG_STACK:
301         case SG_SHARED:
302                 if(ioptr >= conf.nswppo)
303                         break;
304
305                 /*
306                  *  get a new swap address with swapcount 2, one for the pte
307                  *  and one extra ref for us while we write the page to disk
308                  */
309                 daddr = newswap();
310                 if(daddr == ~0)
311                         break;
312
313                 /* clear any pages referring to it from the cache */
314                 cachedel(&swapimage, daddr);
315
316                 /* forget anything that it used to cache */
317                 uncachepage(outp);
318
319                 /*
320                  *  enter it into the cache so that a fault happening
321                  *  during the write will grab the page from the cache
322                  *  rather than one partially written to the disk
323                  */
324                 outp->daddr = daddr;
325                 cachepage(outp, &swapimage);
326                 *pg = (Page*)(daddr|PG_ONSWAP);
327
328                 /* Add page to IO transaction list */
329                 iolist[ioptr++] = outp;
330                 break;
331         }
332 }
333
334 void
335 pagersummary(void)
336 {
337         print("%lud/%lud memory %lud/%lud swap %d iolist\n",
338                 palloc.user-palloc.freecount,
339                 palloc.user, conf.nswap-swapalloc.free, conf.nswap,
340                 ioptr);
341 }
342
343 static void
344 executeio(void)
345 {
346         Page *outp;
347         int i, n;
348         Chan *c;
349         char *kaddr;
350         KMap *k;
351
352         c = swapimage.c;
353         for(i = 0; i < ioptr; i++) {
354                 if(ioptr > conf.nswppo)
355                         panic("executeio: ioptr %d > %d", ioptr, conf.nswppo);
356                 outp = iolist[i];
357
358                 assert(outp->ref > 0);
359                 assert(outp->image == &swapimage);
360                 assert(outp->daddr != ~0);
361
362                 /* only write when swap address still in use */
363                 if(swapcount(outp->daddr) > 1){
364                         k = kmap(outp);
365                         kaddr = (char*)VA(k);
366
367                         if(waserror())
368                                 panic("executeio: page outp I/O error");
369
370                         n = devtab[c->type]->write(c, kaddr, BY2PG, outp->daddr);
371                         if(n != BY2PG)
372                                 nexterror();
373
374                         kunmap(k);
375                         poperror();
376                 }
377
378                 /* drop our extra swap reference */
379                 putswap((Page*)outp->daddr);
380
381                 /* Free up the page after I/O */
382                 putpage(outp);
383         }
384         ioptr = 0;
385 }
386
387 int
388 needpages(void*)
389 {
390         return palloc.freecount < swapalloc.headroom;
391 }
392
393 static void
394 setswapchan(Chan *c)
395 {
396         uchar buf[sizeof(Dir)+100];
397         Dir d;
398         int n;
399
400         if(waserror()){
401                 cclose(c);
402                 nexterror();
403         }
404         if(swapimage.c != nil) {
405                 if(swapalloc.free != conf.nswap)
406                         error(Einuse);
407                 cclose(swapimage.c);
408                 swapimage.c = nil;
409         }
410
411         /*
412          *  if this isn't a file, set the swap space
413          *  to be at most the size of the partition
414          */
415         if(devtab[c->type]->dc != L'M'){
416                 n = devtab[c->type]->stat(c, buf, sizeof buf);
417                 if(n <= 0 || convM2D(buf, n, &d, nil) == 0)
418                         error("stat failed in setswapchan");
419                 if(d.length < conf.nswppo*BY2PG)
420                         error("swap device too small");
421                 if(d.length < conf.nswap*BY2PG){
422                         conf.nswap = d.length/BY2PG;
423                         swapalloc.top = &swapalloc.swmap[conf.nswap];
424                         swapalloc.free = conf.nswap;
425                 }
426         }
427         c->flag &= ~CCACHE;
428         cclunk(c);
429         poperror();
430
431         swapchan = c;
432         swapimage.c = namec("#¶/swapfile", Aopen, ORDWR, 0);
433 }
434
435 enum {
436         Qdir,
437         Qswap,
438         Qswapfile,
439 };
440
441 static Dirtab swapdir[]={
442         ".",            {Qdir, 0, QTDIR},       0,              DMDIR|0555,
443         "swap",         {Qswap},                0,              0664,
444         "swapfile",     {Qswapfile},            0,              0600,
445 };
446
447 static Chan*
448 swapattach(char *spec)
449 {
450         return devattach(L'¶', spec);
451 }
452
453 static Walkqid*
454 swapwalk(Chan *c, Chan *nc, char **name, int nname)
455 {
456         return devwalk(c, nc, name, nname, swapdir, nelem(swapdir), devgen);
457 }
458
459 static int
460 swapstat(Chan *c, uchar *dp, int n)
461 {
462         return devstat(c, dp, n, swapdir, nelem(swapdir), devgen);
463 }
464
465 static Chan*
466 swapopen(Chan *c, int omode)
467 {
468         uchar key[128/8];
469
470         switch((ulong)c->qid.path){
471         case Qswapfile:
472                 if(!iseve() || omode != ORDWR)
473                         error(Eperm);
474                 if(swapimage.c != nil)
475                         error(Einuse);
476                 if(swapchan == nil)
477                         error(Egreg);
478
479                 c->mode = openmode(omode);
480                 c->flag |= COPEN;
481                 c->offset = 0;
482
483                 swapbuf = mallocalign(BY2PG, BY2PG, 0, 0);
484                 swapkey = secalloc(sizeof(AESstate)*2);
485                 if(swapbuf == nil || swapkey == nil)
486                         error(Enomem);
487
488                 genrandom(key, sizeof(key));
489                 setupAESstate(&swapkey[0], key, sizeof(key), nil);
490                 genrandom(key, sizeof(key));
491                 setupAESstate(&swapkey[1], key, sizeof(key), nil);
492                 memset(key, 0, sizeof(key));
493
494                 return c;
495         }
496         return devopen(c, omode, swapdir, nelem(swapdir), devgen);
497 }
498
499 static void
500 swapclose(Chan *c)
501 {
502         if((c->flag & COPEN) == 0)
503                 return;
504         switch((ulong)c->qid.path){
505         case Qswapfile:
506                 cclose(swapchan);
507                 swapchan = nil;
508                 secfree(swapkey);
509                 swapkey = nil;
510                 free(swapbuf);
511                 swapbuf = nil;
512                 break;
513         }
514 }
515
516 static long
517 swapread(Chan *c, void *va, long n, vlong off)
518 {
519         char tmp[256];          /* must be >= 18*NUMSIZE (Qswap) */
520
521         switch((ulong)c->qid.path){
522         case Qdir:
523                 return devdirread(c, va, n, swapdir, nelem(swapdir), devgen);
524         case Qswap:
525                 snprint(tmp, sizeof tmp,
526                         "%llud memory\n"
527                         "%llud pagesize\n"
528                         "%lud kernel\n"
529                         "%lud/%lud user\n"
530                         "%lud/%lud swap\n"
531                         "%llud/%llud/%llud kernel malloc\n"
532                         "%llud/%llud/%llud kernel draw\n"
533                         "%llud/%llud/%llud kernel secret\n",
534                         (uvlong)conf.npage*BY2PG,
535                         (uvlong)BY2PG,
536                         conf.npage-conf.upages,
537                         palloc.user-palloc.freecount-fscache.pgref-swapimage.pgref, palloc.user,
538                         conf.nswap-swapalloc.free, conf.nswap,
539                         (uvlong)mainmem->curalloc,
540                         (uvlong)mainmem->cursize,
541                         (uvlong)mainmem->maxsize,
542                         (uvlong)imagmem->curalloc,
543                         (uvlong)imagmem->cursize,
544                         (uvlong)imagmem->maxsize,
545                         (uvlong)secrmem->curalloc,
546                         (uvlong)secrmem->cursize,
547                         (uvlong)secrmem->maxsize);
548                 return readstr((ulong)off, va, n, tmp);
549         case Qswapfile:
550                 if(n != BY2PG)
551                         error(Ebadarg);
552                 if(devtab[swapchan->type]->read(swapchan, va, n, off) != n)
553                         error(Eio);
554                 aes_xts_decrypt(&swapkey[0], &swapkey[1], off, va, va, n);
555                 return n;
556         }
557         error(Egreg);
558         return 0;
559 }
560
561 static long
562 swapwrite(Chan *c, void *va, long n, vlong off)
563 {
564         char buf[256];
565         
566         switch((ulong)c->qid.path){
567         case Qswap:
568                 if(!iseve())
569                         error(Eperm);
570                 if(n >= sizeof buf)
571                         error(Egreg);
572                 memmove(buf, va, n);    /* so we can NUL-terminate */
573                 buf[n] = 0;
574                 /* start a pager if not already started */
575                 if(strncmp(buf, "start", 5) == 0)
576                         kickpager();
577                 else if(buf[0]>='0' && buf[0]<='9')
578                         setswapchan(fdtochan(strtoul(buf, nil, 0), ORDWR, 1, 1));
579                 else
580                         error(Ebadctl);
581                 return n;
582         case Qswapfile:
583                 if(n != BY2PG)
584                         error(Ebadarg);
585                 aes_xts_encrypt(&swapkey[0], &swapkey[1], off, va, swapbuf, n);
586                 if(devtab[swapchan->type]->write(swapchan, swapbuf, n, off) != n)
587                         error(Eio);
588                 return n;
589         }
590         error(Egreg);
591         return 0;
592 }
593
594 Dev swapdevtab = {
595         L'¶',
596         "swap",
597         devreset,
598         swapinit,
599         devshutdown,
600         swapattach,
601         swapwalk,
602         swapstat,
603         swapopen,
604         devcreate,
605         swapclose,
606         swapread,
607         devbread,
608         swapwrite,
609         devbwrite,
610         devremove,
611         devwstat,
612 };