2 * domain name resolvers, see rfcs 1035 and 1123
11 typedef struct Dest Dest;
12 typedef struct Query Query;
21 Maxdest= 24, /* maximum destinations for a request message */
22 Maxoutstanding= 15, /* max. outstanding queries per domain name */
23 Remntretry= 15, /* min. sec.s between /net.alt remount tries */
26 * these are the old values; we're trying longer timeouts now
27 * primarily for the benefit of remote nameservers querying us
28 * during times of bad connectivity.
30 // Maxtrans= 3, /* maximum transmissions to a server */
31 // Maxretries= 3, /* cname+actual resends: was 32; have pity on user */
32 // Maxwaitms= 1000, /* wait no longer for a remote dns query */
33 // Minwaitms= 100, /* willing to wait for a remote dns query */
35 Maxtrans= 5, /* maximum transmissions to a server */
36 Maxretries= 5, /* cname+actual resends: was 32; have pity on user */
37 Maxwaitms= 5000, /* wait no longer for a remote dns query */
38 Minwaitms= 500, /* willing to wait for a remote dns query */
40 Destmagic= 0xcafebabe,
41 Querymagic= 0xdeadbeef,
43 enum { Hurry, Patient, };
44 enum { Outns, Inns, };
48 uchar a[IPaddrlen]; /* ip address */
49 DN *s; /* name server */
50 int nx; /* number of transmissions */
51 int code; /* response code; used to clear dp->respcode */
57 * Query has a QLock in it, thus it can't be an automatic
58 * variable, since each process would see a separate copy
59 * of the lock on its stack.
63 ushort type; /* and type to look up */
65 RR *nsrp; /* name servers to consult */
67 /* dest must not be on the stack due to forking in slave() */
68 Dest *dest; /* array of destinations */
69 Dest *curdest; /* pointer to next to fill */
70 int ndest; /* transmit to this many on this round */
74 QLock tcplock; /* only one tcp call at a time per query */
76 int tcpfd; /* if Tcp, read replies from here */
78 uchar tcpip[IPaddrlen];
83 /* estimated % probability of such a record existing at all */
98 static RR* dnresolve1(char*, int, int, Request*, int, int);
99 static int netquery(Query *, int);
102 * reading /proc/pid/args yields either "name args" or "name [display args]",
103 * so return only display args, if any.
112 snprint(buf, sizeof buf, "#p/%d/args", getpid());
113 if((fd = open(buf, OREAD)) < 0)
116 n = read(fd, buf, sizeof buf-1);
120 if ((lp = strchr(buf, '[')) == nil ||
121 (rp = strrchr(buf, ']')) == nil)
128 rrfreelistptr(RR **rpp)
132 if (rpp == nil || *rpp == nil)
135 *rpp = nil; /* update pointer in memory before freeing list */
140 * lookup 'type' info for domain name 'name'. If it doesn't exist, try
141 * looking it up as a canonical name.
143 * this process can be quite slow if time-outs are set too high when querying
144 * nameservers that just don't respond to certain query types. in that case,
145 * there will be multiple udp retries, multiple nameservers will be queried,
146 * and this will be repeated for a cname query. the whole thing will be
147 * retried several times until we get an answer or a time-out.
150 dnresolve(char *name, int class, int type, Request *req, RR **cn, int depth,
151 int recurse, int rooted, int *status)
162 if(depth > 12) /* in a recursive loop? */
165 procname = procgetname();
167 * hack for systems that don't have resolve search
168 * lists. Just look up the simple name in the database.
170 if(!rooted && strchr(name, '.') == nil){
172 drp = domainlist(class);
173 for(nrp = drp; rp == nil && nrp != nil; nrp = nrp->next){
174 snprint(nname, sizeof nname, "%s.%s", name,
176 rp = dnresolve(nname, class, type, req, cn, depth+1,
177 recurse, rooted, status);
179 rrfreelist(rrremneg(&rp));
187 procsetname(procname);
193 * try the name directly
195 rp = dnresolve1(name, class, type, req, depth, recurse);
198 * try it as a canonical name if we weren't told
199 * that the name didn't exist
201 dp = dnlookup(name, class, 0);
202 if(type != Tptr && dp->respcode != Rname)
203 for(loops = 0; rp == nil && loops < Maxretries; loops++){
204 /* retry cname, then the actual type */
205 rp = dnresolve1(name, class, Tcname, req,
211 /* rp->host == nil shouldn't happen, but does */
212 if(rp->negative || rp->host == nil){
219 name = rp->host->name;
226 rp = dnresolve1(name, class, type, req,
230 /* distinction between not found and not good */
231 if(rp == nil && status != nil && dp->respcode != Rok)
232 *status = dp->respcode;
234 procsetname(procname);
236 return randomize(rp);
240 queryinit(Query *qp, DN *dp, int type, Request *req)
242 memset(qp, 0, sizeof *qp);
243 qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
246 if (qp->type != type)
247 dnslog("queryinit: bogus type %d", type);
250 qp->dest = qp->curdest = nil;
251 qp->magic = Querymagic;
258 assert(qp->magic == Querymagic);
262 querydestroy(Query *qp)
265 /* leave udpfd open */
268 if (qp->tcpctlfd >= 0) {
269 hangup(qp->tcpctlfd);
273 memset(qp, 0, sizeof *qp); /* prevent accidents */
274 qp->udpfd = qp->tcpfd = qp->tcpctlfd = -1;
280 memset(p, 0, sizeof *p);
281 p->magic = Destmagic;
288 assert(p->magic == Destmagic);
292 * if the response to a query hasn't arrived within 100 ms.,
293 * it's unlikely to arrive at all. after 1 s., it's really unlikely.
294 * queries for missing RRs are likely to produce time-outs rather than
295 * negative responses, so cname and aaaa queries are likely to time out,
296 * thus we don't wait very long for them.
299 notestats(vlong start, int tmout, int type)
306 else if (type == Tcname)
309 long wait10ths = NS2MS(nsec() - start) / 100;
312 stats.under10ths[0]++;
313 else if (wait10ths >= nelem(stats.under10ths))
314 stats.under10ths[nelem(stats.under10ths) - 1]++;
316 stats.under10ths[wait10ths]++;
329 /* netquery with given name servers, free ns rrs when done */
331 netqueryns(Query *qp, int depth, RR *nsrp)
336 rv = netquery(qp, depth);
337 qp->nsrp = nil; /* prevent accidents */
345 issuequery(Query *qp, char *name, int class, int depth, int recurse)
349 RR *rp, *nsrp, *dbnsrp;
352 * if we're running as just a resolver, query our
353 * designated name servers
356 nsrp = randomize(getdnsservers(class));
358 if(netqueryns(qp, depth+1, nsrp) > Answnone)
359 return rrlookup(qp->dp, qp->type, OKneg);
363 * walk up the domain name looking for
364 * a name server for the domain.
366 for(cp = name; cp; cp = walkup(cp)){
368 * if this is a local (served by us) domain,
371 dbnsrp = randomize(dblookup(cp, class, Tns, 0, 0));
372 if(dbnsrp && dbnsrp->local){
373 rp = dblookup(name, class, qp->type, 1, dbnsrp->ttl);
381 * if recursion isn't set, just accept local
384 if(recurse == Dontrecurse){
393 /* look for ns in cache */
394 nsdp = dnlookup(cp, class, 0);
397 nsrp = randomize(rrlookup(nsdp, Tns, NOneg));
399 /* if the entry timed out, ignore it */
400 if(nsrp && nsrp->ttl < now){
402 rrfreelistptr(&nsrp);
408 rrfreelistptr(&dbnsrp);
411 /* query the name servers found in cache */
412 if(netqueryns(qp, depth+1, nsrp) > Answnone)
413 return rrlookup(qp->dp, qp->type, OKneg);
415 /* try the name servers found in db */
416 if(netqueryns(qp, depth+1, dbnsrp) > Answnone)
417 return rrlookup(qp->dp, qp->type, NOneg);
423 dnresolve1(char *name, int class, int type, Request *req, int depth,
432 dnslog("[%d] dnresolve1 %s %d %d", getpid(), name, type, class);
434 /* only class Cin implemented so far */
438 dp = dnlookup(name, class, 1);
441 * Try the cache first
443 rp = rrlookup(dp, type, OKneg);
446 /* unauthoritative db entries are hints */
450 dnslog("[%d] dnresolve1 %s %d %d: auth rr in db",
451 getpid(), name, type, class);
455 /* cached entry must still be valid */
457 /* but Tall entries are special */
458 if(type != Tall || rp->query == Tall) {
461 dnslog("[%d] dnresolve1 %s %d %d: rr not in db",
462 getpid(), name, type, class);
468 rp = nil; /* accident prevention */
472 * try the cache for a canonical name. if found punt
473 * since we'll find it during the canonical name search
477 rp = rrlookup(dp, Tcname, NOneg);
483 dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup for non-cname",
484 getpid(), name, type, class);
490 * if the domain name is within an area of ours,
491 * we should have found its data in memory by now.
493 area = inmyarea(dp->name);
494 if (area || strncmp(dp->name, "local#", 6) == 0)
497 qp = emalloc(sizeof *qp);
498 queryinit(qp, dp, type, req);
499 rp = issuequery(qp, name, class, depth, recurse);
504 dnslog("[%d] dnresolve1 %s %d %d: rr from query",
505 getpid(), name, type, class);
509 /* settle for a non-authoritative answer */
510 rp = rrlookup(dp, type, OKneg);
513 dnslog("[%d] dnresolve1 %s %d %d: rr from rrlookup",
514 getpid(), name, type, class);
518 /* noone answered. try the database, we might have a chance. */
519 rp = dblookup(name, class, type, 0, 0);
522 dnslog("[%d] dnresolve1 %s %d %d: rr from dblookup",
523 getpid(), name, type, class);
526 dnslog("[%d] dnresolve1 %s %d %d: no rr from dblookup; crapped out",
527 getpid(), name, type, class);
533 * walk a domain name one element to the right.
534 * return a pointer to that element.
535 * in other words, return a pointer to the parent domain name.
542 cp = strchr(name, '.');
552 * Get a udp port for sending requests and reading replies. Put the port
553 * into "headers" mode.
555 static char *hmsg = "headers";
561 char ds[64], adir[64];
564 snprint(ds, sizeof ds, "%s/udp!*!0", (mtpt && *mtpt) ? mtpt : "/net");
565 ctl = announce(ds, adir);
567 /* warning("can't get udp port"); */
571 /* turn on header style interface */
572 if(write(ctl, hmsg, strlen(hmsg)) != strlen(hmsg)){
578 /* grab the data file */
579 snprint(ds, sizeof ds, "%s/data", adir);
580 fd = open(ds, ORDWR);
583 warning("can't open udp port %s: %r", ds);
588 initdnsmsg(DNSmsg *mp, RR *rp, int flags, ushort reqno)
598 newdnsmsg(RR *rp, int flags, ushort reqno)
602 mp = emalloc(sizeof *mp);
603 initdnsmsg(mp, rp, flags, reqno);
607 /* generate a DNS UDP query packet */
609 mkreq(DN *dp, int type, uchar *buf, int flags, ushort reqno)
613 Udphdr *uh = (Udphdr*)buf;
616 /* stuff port number into output buffer */
617 memset(uh, 0, sizeof *uh);
618 hnputs(uh->rport, 53);
620 /* make request and convert it to output format */
623 memset(&m, 0, sizeof m);
624 initdnsmsg(&m, rp, flags, reqno);
625 len = convDNS2M(&m, &buf[Udphdrsize], Maxudp);
631 freeanswers(DNSmsg *mp)
634 rrfreelistptr(&mp->qd);
635 rrfreelistptr(&mp->an);
636 rrfreelistptr(&mp->ns);
637 rrfreelistptr(&mp->ar);
639 mp->qdcount = mp->ancount = mp->nscount = mp->arcount = 0;
642 /* timed read of reply. sets srcip. ibuf must be 64K to handle tcp answers. */
644 readnet(Query *qp, int medium, uchar *ibuf, uvlong endms, uchar **replyp,
649 vlong startns = nsec();
653 len = -1; /* pessimism */
655 memset(srcip, 0, IPaddrlen);
656 ms = endms - NS2MS(startns);
658 return -1; /* taking too long */
664 dnslog("readnet: qp->udpfd closed");
666 len = read(qp->udpfd, ibuf, Udphdrsize+Maxudpin);
668 notestats(startns, len < 0, qp->type);
669 if (len >= IPaddrlen)
670 memmove(srcip, ibuf, IPaddrlen);
671 if (len >= Udphdrsize) {
678 dnslog("readnet: tcp params not set");
681 dnslog("readnet: %s: tcp fd unset for dest %I",
682 qp->dp->name, qp->tcpip);
683 else if (readn(fd, lenbuf, 2) != 2) {
684 dnslog("readnet: short read of 2-byte tcp msg size from %I",
686 /* probably a time-out */
687 notestats(startns, 1, qp->type);
689 len = lenbuf[0]<<8 | lenbuf[1];
690 if (readn(fd, ibuf, len) != len) {
691 dnslog("readnet: short read of tcp data from %I",
693 /* probably a time-out */
694 notestats(startns, 1, qp->type);
698 memmove(srcip, qp->tcpip, IPaddrlen);
706 * read replies to a request and remember the rrs in the answer(s).
707 * ignore any of the wrong type.
708 * wait at most until endms.
711 readreply(Query *qp, int medium, ushort req, uchar *ibuf, DNSmsg *mp,
718 uchar srcip[IPaddrlen];
722 for (; timems() < endms &&
723 (len = readnet(qp, medium, ibuf, endms, &reply, srcip)) >= 0;
725 /* convert into internal format */
726 memset(mp, 0, sizeof *mp);
727 err = convM2DNS(reply, len, mp, nil);
728 if (mp->flags & Ftrunc) {
731 /* notify our caller to retry the query via tcp. */
734 dnslog("readreply: %s: input err, len %d: %s: %I",
735 qp->dp->name, len, err, srcip);
740 logreply(qp->req->id, srcip, mp);
742 /* answering the right question? */
744 dnslog("%d: id %d instead of %d: %I", qp->req->id,
747 dnslog("%d: no question RR: %I", qp->req->id, srcip);
748 else if(mp->qd->owner != qp->dp)
749 dnslog("%d: owner %s instead of %s: %I", qp->req->id,
750 mp->qd->owner->name, qp->dp->name, srcip);
751 else if(mp->qd->type != qp->type)
752 dnslog("%d: qp->type %d instead of %d: %I",
753 qp->req->id, mp->qd->type, qp->type, srcip);
755 /* remember what request this is in answer to */
756 for(rp = mp->an; rp; rp = rp->next)
757 rp->query = qp->type;
761 if (timems() >= endms) {
762 ; /* query expired */
764 /* this happens routinely when a read times out */
765 dnslog("readreply: %s type %s: ns %I read error or eof "
766 "(returned %d): %r", qp->dp->name, rrname(qp->type,
767 tbuf, sizeof tbuf), srcip, len);
769 for (rp = qp->nsrp; rp != nil; rp = rp->next)
771 dnslog("readreply: %s: query sent to "
772 "ns %s", qp->dp->name,
775 memset(mp, 0, sizeof *mp);
780 * return non-0 if first list includes second list
783 contains(RR *rp1, RR *rp2)
787 for(trp2 = rp2; trp2; trp2 = trp2->next){
788 for(trp1 = rp1; trp1; trp1 = trp1->next)
789 if(trp1->type == trp2->type)
790 if(trp1->host == trp2->host)
791 if(trp1->owner == trp2->owner)
801 * return multicast version if any
807 if (ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0 ||
808 ipcmp(ip, IPv4bcast) == 0)
817 * Get next server address(es) into qp->dest[nd] and beyond
820 serveraddrs(Query *qp, int nd, int depth)
825 if(nd >= Maxdest) /* dest array is full? */
829 * look for a server whose address we already know.
830 * if we find one, mark it so we ignore this on
834 for(rp = qp->nsrp; rp; rp = rp->next){
835 assert(rp->magic == RRmagic);
838 arp = rrlookup(rp->host, Ta, NOneg);
840 arp = rrlookup(rp->host, Taaaa, NOneg);
845 arp = dblookup(rp->host->name, Cin, Ta, 0, 0);
847 arp = dblookup(rp->host->name, Cin, Taaaa, 0, 0);
855 * if the cache and database lookup didn't find any new
856 * server addresses, try resolving one via the network.
857 * Mark any we try to resolve so we don't try a second time.
860 for(rp = qp->nsrp; rp; rp = rp->next){
866 * avoid loops looking up a server under itself
868 if(subsume(rp->owner->name, rp->host->name))
871 arp = dnresolve(rp->host->name, Cin, Ta, qp->req, 0,
872 depth+1, Recurse, 1, 0);
874 arp = dnresolve(rp->host->name, Cin, Taaaa,
875 qp->req, 0, depth+1, Recurse, 1, 0);
877 rrfreelist(rrremneg(&arp));
883 /* use any addresses that we found */
884 for(trp = arp; trp && nd < Maxdest; trp = trp->next){
886 parseip(cur->a, trp->ip->name);
888 * straddling servers can reject all nameservers if they are all
889 * inside, so be sure to list at least one outside ns at
890 * the end of the ns list in /lib/ndb for `dom='.
892 if (ipisbm(cur->a) ||
893 cfg.straddle && !insideaddr(qp->dp->name) && insidens(cur->a))
897 cur->code = Rtimeout;
907 * cache negative responses
910 cacheneg(DN *dp, int type, int rcode, RR *soarr)
920 /* no cache time specified, don't make anything up */
923 if(soarr->next != nil)
924 rrfreelistptr(&soarr->next);
926 soaowner = soarr->owner;
930 /* the attach can cause soarr to be freed so mine it now */
931 if(soarr != nil && soarr->soa != nil)
932 ttl = soarr->soa->minttl+now;
936 /* add soa and negative RR to the database */
937 rrattach(soarr, Authoritative);
942 rp->negsoaowner = soaowner;
943 rp->negrcode = rcode;
945 rrattach(rp, Authoritative);
949 setdestoutns(Dest *p, int n)
951 uchar *outns = outsidens(n);
957 dnslog("[%d] no outside-ns in ndb", getpid());
960 memmove(p->a, outns, sizeof p->a);
961 p->s = dnlookup("outside-ns-ips", Cin, 1);
966 * issue query via UDP or TCP as appropriate.
967 * for TCP, returns with qp->tcpip set from udppkt header.
970 mydnsquery(Query *qp, int medium, uchar *udppkt, int len)
974 char conndir[40], addr[128];
980 domain = smprint("%I", udppkt);
982 warning("mydnsquery: no memory for domain");
985 if (myaddr(domain)) {
986 dnslog("mydnsquery: trying to send to myself (%s); bzzzt",
993 nfd = dup(qp->udpfd, -1);
995 warning("mydnsquery: qp->udpfd %d: %r", qp->udpfd);
996 close(qp->udpfd); /* ensure it's closed */
997 qp->udpfd = -1; /* poison it */
1003 dnslog("mydnsquery: qp->udpfd %d closed", qp->udpfd);
1005 if (write(qp->udpfd, udppkt, len+Udphdrsize) !=
1007 warning("sending udp msg: %r");
1017 /* send via TCP & keep fd around for reply */
1018 parseip(qp->tcpip, domain);
1019 snprint(addr, sizeof addr, "%s/tcp!%s!dns",
1020 (mntpt && *mntpt) ? mntpt : "/net",
1023 qp->tcpfd = dial(addr, nil, conndir, &qp->tcpctlfd);
1025 if (qp->tcpfd < 0) {
1026 dnslog("can't dial %s: %r", addr);
1029 nci = getnetconninfo(conndir, qp->tcpfd);
1031 parseip(qp->tcpip, nci->rsys);
1032 freenetconninfo(nci);
1034 dnslog("mydnsquery: getnetconninfo failed");
1037 belen[0] = len >> 8;
1039 if (write(qp->tcpfd, belen, 2) != 2 ||
1040 write(qp->tcpfd, udppkt + Udphdrsize, len) != len)
1041 warning("sending tcp msg: %r");
1051 * send query to all UDP destinations or one TCP destination,
1052 * taken from obuf (udp packet) header
1055 xmitquery(Query *qp, int medium, int depth, uchar *obuf, int inns, int len)
1062 if(timems() >= qp->req->aborttime)
1066 * get a nameserver address if we need one.
1067 * serveraddrs populates qp->dest.
1071 if (qp->ndest < 0 || qp->ndest > Maxdest) {
1072 dnslog("qp->ndest %d out of range", qp->ndest);
1076 * we're to transmit to more destinations than we currently have,
1079 if (qp->ndest > qp->curdest - p) {
1080 j = serveraddrs(qp, qp->curdest - p, depth);
1081 if (j < 0 || j > Maxdest) {
1082 dnslog("serveraddrs() result %d out of range", j);
1085 qp->curdest = &qp->dest[j];
1088 /* no servers, punt */
1090 if (cfg.straddle && cfg.inside) {
1091 /* get ips of "outside-ns-ips" */
1092 qp->curdest = qp->dest;
1093 for(n = 0; n < Maxdest; n++, qp->curdest++)
1094 if (setdestoutns(qp->curdest, n) < 0)
1097 dnslog("xmitquery: %s: no outside-ns nameservers",
1100 /* it's probably just a bogus domain, don't log it */
1103 /* send to first 'qp->ndest' destinations */
1105 if (medium == Tcp) {
1109 procsetname("tcp %sside query for %s %s", (inns? "in": "out"),
1110 qp->dp->name, rrname(qp->type, buf, sizeof buf));
1111 mydnsquery(qp, medium, obuf, len); /* sets qp->tcpip from obuf */
1113 logsend(qp->req->id, depth, qp->tcpip, "", qp->dp->name,
1116 for(; p < &qp->dest[qp->ndest] && p < qp->curdest; p++){
1117 /* skip destinations we've finished with */
1118 if(p->nx >= Maxtrans)
1123 /* exponential backoff of requests */
1124 if((1<<p->nx) > qp->ndest)
1127 if(memcmp(p->a, IPnoaddr, sizeof IPnoaddr) == 0)
1128 continue; /* mistake */
1130 procsetname("udp %sside query to %I/%s %s %s",
1131 (inns? "in": "out"), p->a, p->s->name,
1132 qp->dp->name, rrname(qp->type, buf, sizeof buf));
1134 logsend(qp->req->id, depth, p->a, p->s->name,
1135 qp->dp->name, qp->type);
1137 /* fill in UDP destination addr & send it */
1138 memmove(obuf, p->a, sizeof p->a);
1139 mydnsquery(qp, medium, obuf, len);
1148 static int lckindex[Maxlcks] = {
1149 0, /* all others map here */
1161 qtype2lck(int qtype) /* map query type to querylck index */
1165 for (i = 1; i < nelem(lckindex); i++)
1166 if (lckindex[i] == qtype)
1171 /* is mp a cachable negative response (with Rname set)? */
1173 isnegrname(DNSmsg *mp)
1175 /* TODO: could add || cfg.justforw to RHS of && */
1176 return mp->an == nil && (mp->flags & Rmask) == Rname;
1179 /* returns Answerr (-1) on errors, else number of answers, which can be zero. */
1181 procansw(Query *qp, DNSmsg *mp, uchar *srcip, int depth, Dest *p)
1192 /* ignore any error replies */
1193 if((mp->flags & Rmask) == Rserver){
1196 if(p != qp->curdest)
1201 /* ignore any bad delegations */
1202 if(mp->ns && baddelegation(mp->ns, qp->nsrp, srcip)){
1203 stats.negbaddeleg++;
1207 if(p != qp->curdest)
1209 dnslog(" and no answers");
1212 dnslog(" but has answers; ignoring ns");
1214 rrfreelistptr(&mp->ns);
1219 /* remove any soa's from the authority section */
1221 soarr = rrremtype(&mp->ns, Tsoa);
1223 /* incorporate answers */
1230 rrattach(mp->an, (mp->flags & Fauth) != 0);
1232 rrattach(mp->ar, Notauthoritative);
1233 if(mp->ns && !cfg.justforw){
1234 ndp = mp->ns->owner;
1235 rrattach(mp->ns, Notauthoritative);
1239 rrfreelistptr(&mp->ns);
1244 /* free the question */
1247 rrfreelistptr(&mp->qd);
1253 * Any reply from an authoritative server,
1254 * or a positive reply terminates the search.
1255 * A negative response now also terminates the search.
1257 if(mp->an != nil || (mp->flags & Fauth)){
1259 qp->dp->respcode = Rname;
1261 qp->dp->respcode = Rok;
1264 * cache any negative responses, free soarr.
1265 * negative responses need not be authoritative:
1266 * they can legitimately come from a cache.
1268 if( /* (mp->flags & Fauth) && */ mp->an == nil)
1269 cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1276 } else if (isnegrname(mp)) {
1277 qp->dp->respcode = Rname;
1279 * cache negative response.
1280 * negative responses need not be authoritative:
1281 * they can legitimately come from a cache.
1283 cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
1292 * if we've been given better name servers, recurse.
1293 * if we're a pure resolver, don't recurse, we have
1294 * to forward to a fixed set of named servers.
1296 if(!mp->ns || cfg.resolver && cfg.justforw)
1298 tp = rrlookup(ndp, Tns, NOneg);
1299 if(contains(qp->nsrp, tp)){
1305 procsetname("recursive query for %s %s", qp->dp->name,
1306 rrname(qp->type, buf, sizeof buf));
1308 nqp = emalloc(sizeof *nqp);
1309 queryinit(nqp, qp->dp, qp->type, qp->req);
1310 rv = netqueryns(nqp, depth+1, tp);
1317 * send a query via tcp to a single address (from ibuf's udp header)
1318 * and read the answer(s) into mp->an.
1321 tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len,
1322 ulong waitms, int inns, ushort req)
1327 endms = timems() + waitms;
1328 if(endms > qp->req->aborttime)
1329 endms = qp->req->aborttime;
1332 dnslog("%s: udp reply truncated; retrying query via tcp to %I",
1333 qp->dp->name, qp->tcpip);
1335 qlock(&qp->tcplock);
1336 memmove(obuf, ibuf, IPaddrlen); /* send back to respondent */
1337 memset(mp, 0, sizeof *mp);
1338 if (xmitquery(qp, Tcp, depth, obuf, inns, len) < 0 ||
1339 readreply(qp, Tcp, req, ibuf, mp, endms) < 0)
1341 if (qp->tcpfd >= 0) {
1342 hangup(qp->tcpctlfd);
1343 close(qp->tcpctlfd);
1346 qp->tcpfd = qp->tcpctlfd = -1;
1347 qunlock(&qp->tcplock);
1352 * query name servers. fill in obuf with on-the-wire representation of a
1353 * DNSmsg derived from qp. if the name server returns a pointer to another
1354 * name server, recurse.
1357 queryns(Query *qp, int depth, uchar *ibuf, uchar *obuf, ulong waitms, int inns)
1359 int ndest, len, replywaits, rv;
1363 uchar srcip[IPaddrlen];
1364 Dest *p, *np, *dest;
1366 /* pack request into a udp message */
1368 len = mkreq(qp->dp, qp->type, obuf, Frecurse|Oquery, req);
1370 /* no server addresses yet */
1372 dest = emalloc(Maxdest * sizeof *dest); /* dest can't be on stack */
1373 for (p = dest; p < dest + Maxdest; p++)
1375 /* this dest array is local to this call of queryns() */
1377 qp->curdest = qp->dest = dest;
1380 * transmit udp requests and wait for answers.
1381 * at most Maxtrans attempts to each address.
1382 * each cycle send one more message than the previous.
1383 * retry a query via tcp if its response is truncated.
1385 for(ndest = 1; ndest < Maxdest; ndest++){
1388 if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
1391 endms = timems() + waitms;
1392 if(endms > qp->req->aborttime)
1393 endms = qp->req->aborttime;
1395 for(replywaits = 0; replywaits < ndest; replywaits++){
1398 procsetname("reading %sside reply from %I: %s %s from %s",
1399 (inns? "in": "out"), obuf, qp->dp->name,
1400 rrname(qp->type, buf, sizeof buf), qp->req->from);
1402 /* read udp answer into m */
1403 if (readreply(qp, Udp, req, ibuf, &m, endms) >= 0)
1404 memmove(srcip, ibuf, IPaddrlen);
1405 else if (!(m.flags & Ftrunc)) {
1407 break; /* timed out on this dest */
1409 /* whoops, it was truncated! ask again via tcp */
1411 rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
1412 waitms, inns, req); /* answer in m */
1415 break; /* failed via tcp too */
1417 memmove(srcip, qp->tcpip, IPaddrlen);
1420 /* find responder */
1421 // dnslog("queryns got reply from %I", srcip);
1422 for(p = qp->dest; p < qp->curdest; p++)
1423 if(memcmp(p->a, srcip, sizeof p->a) == 0)
1426 /* remove all addrs of responding server from list */
1427 if(p != qp->curdest)
1428 for(np = qp->dest; np < qp->curdest; np++)
1432 /* free or incorporate RRs in m */
1433 rv = procansw(qp, &m, srcip, depth, p);
1434 if (rv > Answnone) {
1436 qp->dest = qp->curdest = nil; /* prevent accidents */
1442 /* if all servers returned failure, propagate it */
1443 qp->dp->respcode = Rserver;
1444 for(p = dest; p < qp->curdest; p++) {
1446 if(p->code != Rserver)
1447 qp->dp->respcode = Rok;
1448 p->magic = 0; /* prevent accidents */
1451 // if (qp->dp->respcode)
1452 // dnslog("queryns setting Rserver for %s", qp->dp->name);
1455 qp->dest = qp->curdest = nil; /* prevent accidents */
1460 * run a command with a supplied fd as standard input
1463 system(int fd, char *cmd)
1468 if((pid = fork()) == -1)
1469 sysfatal("fork failed: %r");
1473 for (i = 3; i < 200; i++)
1474 close(i); /* don't leak fds */
1475 execl("/bin/rc", "rc", "-c", cmd, nil);
1476 sysfatal("exec rc: %r");
1478 for(p = waitpid(); p >= 0; p = waitpid())
1481 return "lost child";
1484 /* compute wait, weighted by probability of success, with bounds */
1486 weight(ulong ms, unsigned pcntprob)
1490 wait = (ms * pcntprob) / 100;
1491 if (wait < Minwaitms)
1493 if (wait > Maxwaitms)
1499 * in principle we could use a single descriptor for a udp port
1500 * to send all queries and receive all the answers to them,
1501 * but we'd have to sort out the answers by dns-query id.
1504 udpquery(Query *qp, char *mntpt, int depth, int patient, int inns)
1512 static QLock mntlck;
1513 static ulong lastmount;
1517 /* use alloced buffers rather than ones from the stack */
1518 ibuf = emalloc(64*1024); /* max. tcp reply size */
1519 obuf = emalloc(Maxudp+Udphdrsize);
1521 fd = udpport(mntpt);
1522 while (fd < 0 && cfg.straddle && strcmp(mntpt, "/net.alt") == 0) {
1523 /* HACK: remount /net.alt */
1525 if (now < lastmount + Remntretry)
1526 sleep(S2MS(lastmount + Remntretry - now));
1528 fd = udpport(mntpt); /* try again under lock */
1530 dnslog("[%d] remounting /net.alt", getpid());
1531 unmount(nil, "/net.alt");
1533 msg = system(open("/dev/null", ORDWR), "outside");
1535 lastmount = time(nil);
1537 dnslog("[%d] can't remount /net.alt: %s",
1539 sleep(10*1000); /* don't spin remounting */
1541 fd = udpport(mntpt);
1546 dnslog("can't get udpport for %s query of name %s: %r",
1547 mntpt, qp->dp->name);
1552 * Our QIP servers are busted and respond to AAAA and CNAME queries
1553 * with (sometimes malformed [too short] packets and) no answers and
1554 * just NS RRs but not Rname errors. so make time-to-wait
1555 * proportional to estimated probability of an RR of that type existing.
1557 if (qp->type >= nelem(likely))
1558 pcntprob = 35; /* unpopular query type */
1560 pcntprob = likely[qp->type];
1561 reqtm = (patient? 2 * Maxreqtm: Maxreqtm);
1562 wait = weight(reqtm / 3, pcntprob); /* time for one udp query */
1563 qp->req->aborttime = timems() + 3*wait; /* for all udp queries */
1566 rv = queryns(qp, depth, ibuf, obuf, wait, inns);
1577 * look up (qp->dp->name, qp->type) rr in dns,
1578 * using nameservers in qp->nsrp.
1581 netquery(Query *qp, int depth)
1583 int lock, rv, triedin, inname;
1590 rv = Answnone; /* pessimism */
1591 if(depth > 12) /* in a recursive loop? */
1597 * slave might have forked. if so, the parent process longjmped to
1598 * req->mret; we're usually the child slave, but if there are too
1599 * many children already, we're still the same process. under no
1600 * circumstances block the 9p loop.
1602 if(!qp->req->isslave && strcmp(qp->req->from, "9p") == 0)
1606 * don't lock before call to slave so only children can block.
1607 * just lock at top-level invocation.
1609 lock = depth <= 1 && qp->req->isslave;
1610 dp = qp->dp; /* ensure that it doesn't change underfoot */
1613 procsetname("query lock wait: %s %s from %s", dp->name,
1614 rrname(qp->type, buf, sizeof buf), qp->req->from);
1616 * don't make concurrent queries for this name.
1617 * dozens of processes blocking here probably indicates
1618 * an error in our dns data that causes us to not
1619 * recognise a zone (area) as one of our own, thus
1620 * causing us to query other nameservers.
1622 qlp = &dp->querylck[qtype2lck(qp->type)];
1624 if (qlp->Ref.ref > Maxoutstanding) {
1628 dnslog("too many outstanding queries for %s;"
1629 " dropping this one; no further logging"
1630 " of drops", dp->name);
1637 procsetname("netquery: %s", dp->name);
1639 /* prepare server RR's for incremental lookup */
1640 for(rp = qp->nsrp; rp; rp = rp->next)
1646 * normal resolvers and servers will just use mntpt for all addresses,
1647 * even on the outside. straddling servers will use mntpt (/net)
1648 * for inside addresses and /net.alt for outside addresses,
1649 * thus bypassing other inside nameservers.
1651 inname = insideaddr(dp->name);
1652 if (!cfg.straddle || inname) {
1653 rv = udpquery(qp, mntpt, depth, Hurry, (cfg.inside? Inns: Outns));
1658 * if we're still looking, are inside, and have an outside domain,
1659 * try it on our outside interface, if any.
1661 if (rv == Answnone && cfg.inside && !inname) {
1664 "[%d] netquery: internal nameservers failed for %s; trying external",
1665 getpid(), dp->name);
1667 /* prepare server RR's for incremental lookup */
1668 for(rp = qp->nsrp; rp; rp = rp->next)
1671 rv = udpquery(qp, "/net.alt", depth, Patient, Outns);
1676 assert(qlp->Ref.ref > 0);
1692 memset(&req, 0, sizeof req);
1694 req.aborttime = timems() + Maxreqtm;
1695 req.from = "internal";
1696 qp = emalloc(sizeof *qp);
1697 queryinit(qp, dnlookup(root, Cin, 1), Tns, &req);
1698 nsrp = dblookup(root, Cin, Tns, 0, 0);
1699 for (rr = nsrp; rr != nil; rr = rr->next) /* DEBUG */
1700 dnslog("seerootns query nsrp: %R", rr);
1701 rv = netqueryns(qp, 0, nsrp); /* lookup ". ns" using nsrp */