Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport of Add peering .service and .node DNS lookups. into release/1.14.x #15599

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/15596.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:improvement
dns: Add support for cluster peering `.service` and `.node` DNS queries.
```
128 changes: 91 additions & 37 deletions agent/dns.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ type dnsConfig struct {
}

type serviceLookup struct {
PeerName string
Datacenter string
Service string
Tag string
Expand All @@ -116,6 +117,7 @@ type serviceLookup struct {

type nodeLookup struct {
Datacenter string
PeerName string
Node string
Tag string
MaxRecursionLevel int
Expand Down Expand Up @@ -421,11 +423,18 @@ func (d *DNSServer) handlePtr(resp dns.ResponseWriter, req *dns.Msg) {
// server side to avoid transferring the entire node list.
if err := d.agent.RPC("Catalog.ListNodes", &args, &out); err == nil {
for _, n := range out.Nodes {
lookup := serviceLookup{
// Peering PTR lookups are currently not supported, so we don't
// need to populate that field for creating the node FQDN.
// PeerName: n.PeerName,
Datacenter: n.Datacenter,
EnterpriseMeta: *n.GetEnterpriseMeta(),
}
arpa, _ := dns.ReverseAddr(n.Address)
if arpa == qName {
ptr := &dns.PTR{
Hdr: dns.RR_Header{Name: q.Name, Rrtype: dns.TypePTR, Class: dns.ClassINET, Ttl: 0},
Ptr: fmt.Sprintf("%s.node.%s.%s", n.Node, datacenter, d.domain),
Ptr: nodeCanonicalDNSName(lookup, n.Node, d.domain),
}
m.Answer = append(m.Answer, ptr)
break
Expand Down Expand Up @@ -685,8 +694,16 @@ type queryLocality struct {
// Example query: <service>.virtual.<namespace>.ns.<partition>.ap.<datacenter>.dc.consul
datacenter string

// peerOrDatacenter is parsed from DNS queries where the datacenter and peer name are specified in the same query part.
// peer is the peer name parsed from a label that has explicit parts.
// Example query: <service>.virtual.<namespace>.ns.<peer>.peer.<partition>.ap.consul
peer string

// peerOrDatacenter is parsed from DNS queries where the datacenter and peer name are
// specified in the same query part.
// Example query: <service>.virtual.<peerOrDatacenter>.consul
//
// Note that this field should only be a "peer" for virtual queries, since virtual IPs should
// not be shared between datacenters. In all other cases, it should be considered a DC.
peerOrDatacenter string

acl.EnterpriseMeta
Expand Down Expand Up @@ -763,11 +780,17 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi

lookup := serviceLookup{
Datacenter: locality.effectiveDatacenter(d.agent.config.Datacenter),
PeerName: locality.peer,
Connect: false,
Ingress: false,
MaxRecursionLevel: maxRecursionLevel,
EnterpriseMeta: locality.EnterpriseMeta,
}
// Only one of dc or peer can be used.
if lookup.PeerName != "" {
lookup.Datacenter = ""
}

// Support RFC 2782 style syntax
if n == 2 && strings.HasPrefix(queryParts[1], "_") && strings.HasPrefix(queryParts[0], "_") {

Expand Down Expand Up @@ -808,6 +831,9 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi
return invalid()
}

// Peering is not currently supported for connect queries.
// Exposing this likely would not provide much value, since users would
// need to be very familiar with our TLS / SNI / mesh gateways to leverage it.
lookup := serviceLookup{
Datacenter: locality.effectiveDatacenter(d.agent.config.Datacenter),
Service: queryParts[len(queryParts)-1],
Expand All @@ -833,13 +859,18 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi
// The datacenter of the request is not specified because cross-datacenter virtual IP
// queries are not supported. This guard rail is in place because virtual IPs are allocated
// within a DC, therefore their uniqueness is not guaranteed globally.
PeerName: locality.peerOrDatacenter,
PeerName: locality.peer,
ServiceName: queryParts[len(queryParts)-1],
EnterpriseMeta: locality.EnterpriseMeta,
QueryOptions: structs.QueryOptions{
Token: d.agent.tokens.UserToken(),
},
}
if args.PeerName == "" {
// If the peer name was not explicitly defined, fall back to the ambiguously-parsed version.
args.PeerName = locality.peerOrDatacenter
}

var out string
if err := d.agent.RPC("Catalog.VirtualIPForService", &args, &out); err != nil {
return err
Expand Down Expand Up @@ -868,6 +899,8 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi
return invalid()
}

// Peering is not currently supported for ingress queries.
// We probably should not be encouraging chained calls from ingress to peers anyway.
lookup := serviceLookup{
Datacenter: locality.effectiveDatacenter(d.agent.config.Datacenter),
Service: queryParts[len(queryParts)-1],
Expand Down Expand Up @@ -900,10 +933,15 @@ func (d *DNSServer) dispatch(remoteAddr net.Addr, req, resp *dns.Msg, maxRecursi

lookup := nodeLookup{
Datacenter: locality.effectiveDatacenter(d.agent.config.Datacenter),
PeerName: locality.peer,
Node: node,
MaxRecursionLevel: maxRecursionLevel,
EnterpriseMeta: locality.EnterpriseMeta,
}
// Only one of dc or peer can be used.
if lookup.PeerName != "" {
lookup.Datacenter = ""
}

return d.nodeLookup(cfg, lookup, req, resp)

Expand Down Expand Up @@ -1024,6 +1062,7 @@ func (d *DNSServer) nodeLookup(cfg *dnsConfig, lookup nodeLookup, req, resp *dns
// Make an RPC request
args := &structs.NodeSpecificRequest{
Datacenter: lookup.Datacenter,
PeerName: lookup.PeerName,
Node: lookup.Node,
QueryOptions: structs.QueryOptions{
Token: d.agent.tokens.UserToken(),
Expand Down Expand Up @@ -1350,6 +1389,7 @@ func (d *DNSServer) lookupServiceNodes(cfg *dnsConfig, lookup serviceLookup) (st
serviceTags = []string{lookup.Tag}
}
args := structs.ServiceSpecificRequest{
PeerName: lookup.PeerName,
Connect: lookup.Connect,
Ingress: lookup.Ingress,
Datacenter: lookup.Datacenter,
Expand Down Expand Up @@ -1400,9 +1440,9 @@ func (d *DNSServer) serviceLookup(cfg *dnsConfig, lookup serviceLookup, req, res
// Add various responses depending on the request
qType := req.Question[0].Qtype
if qType == dns.TypeSRV {
d.serviceSRVRecords(cfg, lookup.Datacenter, out.Nodes, req, resp, ttl, lookup.MaxRecursionLevel)
d.serviceSRVRecords(cfg, lookup, out.Nodes, req, resp, ttl, lookup.MaxRecursionLevel)
} else {
d.serviceNodeRecords(cfg, lookup.Datacenter, out.Nodes, req, resp, ttl, lookup.MaxRecursionLevel)
d.serviceNodeRecords(cfg, lookup, out.Nodes, req, resp, ttl, lookup.MaxRecursionLevel)
}

if len(resp.Answer) == 0 {
Expand Down Expand Up @@ -1505,10 +1545,14 @@ func (d *DNSServer) preparedQueryLookup(cfg *dnsConfig, datacenter, query string

// Add various responses depending on the request.
qType := req.Question[0].Qtype

// This serviceLookup only needs the datacenter field populated,
// because peering is not supported with prepared queries.
lookup := serviceLookup{Datacenter: out.Datacenter}
if qType == dns.TypeSRV {
d.serviceSRVRecords(cfg, out.Datacenter, out.Nodes, req, resp, ttl, maxRecursionLevel)
d.serviceSRVRecords(cfg, lookup, out.Nodes, req, resp, ttl, maxRecursionLevel)
} else {
d.serviceNodeRecords(cfg, out.Datacenter, out.Nodes, req, resp, ttl, maxRecursionLevel)
d.serviceNodeRecords(cfg, lookup, out.Nodes, req, resp, ttl, maxRecursionLevel)
}

if len(resp.Answer) == 0 {
Expand Down Expand Up @@ -1559,15 +1603,15 @@ RPC:
}

// serviceNodeRecords is used to add the node records for a service lookup
func (d *DNSServer) serviceNodeRecords(cfg *dnsConfig, dc string, nodes structs.CheckServiceNodes, req, resp *dns.Msg, ttl time.Duration, maxRecursionLevel int) {
func (d *DNSServer) serviceNodeRecords(cfg *dnsConfig, lookup serviceLookup, nodes structs.CheckServiceNodes, req, resp *dns.Msg, ttl time.Duration, maxRecursionLevel int) {
handled := make(map[string]struct{})
var answerCNAME []dns.RR = nil

count := 0
for _, node := range nodes {
// Add the node record
had_answer := false
records, _ := d.nodeServiceRecords(dc, node, req, ttl, cfg, maxRecursionLevel)
records, _ := d.nodeServiceRecords(lookup, node, req, ttl, cfg, maxRecursionLevel)
if len(records) == 0 {
continue
}
Expand Down Expand Up @@ -1650,15 +1694,20 @@ func findWeight(node structs.CheckServiceNode) int {
}
}

func (d *DNSServer) encodeIPAsFqdn(questionName string, dc string, ip net.IP) string {
func (d *DNSServer) encodeIPAsFqdn(questionName string, lookup serviceLookup, ip net.IP) string {
ipv4 := ip.To4()
respDomain := d.getResponseDomain(questionName)
ipStr := hex.EncodeToString(ip)
if ipv4 != nil {
ipStr := hex.EncodeToString(ip)
return fmt.Sprintf("%s.addr.%s.%s", ipStr[len(ipStr)-(net.IPv4len*2):], dc, respDomain)
} else {
return fmt.Sprintf("%s.addr.%s.%s", hex.EncodeToString(ip), dc, respDomain)
ipStr = ipStr[len(ipStr)-(net.IPv4len*2):]
}
if lookup.PeerName != "" {
// Exclude the datacenter from the FQDN on the addr for peers.
// This technically makes no difference, since the addr endpoint ignores the DC
// component of the request, but do it anyway for a less confusing experience.
return fmt.Sprintf("%s.addr.%s", ipStr, respDomain)
}
return fmt.Sprintf("%s.addr.%s.%s", ipStr, lookup.Datacenter, respDomain)
}

func makeARecord(qType uint16, ip net.IP, ttl time.Duration) dns.RR {
Expand Down Expand Up @@ -1737,16 +1786,16 @@ func (d *DNSServer) makeRecordFromNode(node *structs.Node, qType uint16, qName s
// Craft dns records for a service
// In case of an SRV query the answer will be a IN SRV and additional data will store an IN A to the node IP
// Otherwise it will return a IN A record
func (d *DNSServer) makeRecordFromServiceNode(dc string, serviceNode structs.CheckServiceNode, addr net.IP, req *dns.Msg, ttl time.Duration) ([]dns.RR, []dns.RR) {
func (d *DNSServer) makeRecordFromServiceNode(lookup serviceLookup, serviceNode structs.CheckServiceNode, addr net.IP, req *dns.Msg, ttl time.Duration) ([]dns.RR, []dns.RR) {
q := req.Question[0]
respDomain := d.getResponseDomain(q.Name)

ipRecord := makeARecord(q.Qtype, addr, ttl)
if ipRecord == nil {
return nil, nil
}

if q.Qtype == dns.TypeSRV {
nodeFQDN := fmt.Sprintf("%s.node.%s.%s", serviceNode.Node.Node, dc, respDomain)
respDomain := d.getResponseDomain(q.Name)
nodeFQDN := nodeCanonicalDNSName(lookup, serviceNode.Node.Node, respDomain)
answers := []dns.RR{
&dns.SRV{
Hdr: dns.RR_Header{
Expand All @@ -1757,7 +1806,7 @@ func (d *DNSServer) makeRecordFromServiceNode(dc string, serviceNode structs.Che
},
Priority: 1,
Weight: uint16(findWeight(serviceNode)),
Port: uint16(d.agent.TranslateServicePort(dc, serviceNode.Service.Port, serviceNode.Service.TaggedAddresses)),
Port: uint16(d.agent.TranslateServicePort(lookup.Datacenter, serviceNode.Service.Port, serviceNode.Service.TaggedAddresses)),
Target: nodeFQDN,
},
}
Expand All @@ -1773,15 +1822,15 @@ func (d *DNSServer) makeRecordFromServiceNode(dc string, serviceNode structs.Che
// Craft dns records for an IP
// In case of an SRV query the answer will be a IN SRV and additional data will store an IN A to the IP
// Otherwise it will return a IN A record
func (d *DNSServer) makeRecordFromIP(dc string, addr net.IP, serviceNode structs.CheckServiceNode, req *dns.Msg, ttl time.Duration) ([]dns.RR, []dns.RR) {
func (d *DNSServer) makeRecordFromIP(lookup serviceLookup, addr net.IP, serviceNode structs.CheckServiceNode, req *dns.Msg, ttl time.Duration) ([]dns.RR, []dns.RR) {
q := req.Question[0]
ipRecord := makeARecord(q.Qtype, addr, ttl)
if ipRecord == nil {
return nil, nil
}

if q.Qtype == dns.TypeSRV {
ipFQDN := d.encodeIPAsFqdn(q.Name, dc, addr)
ipFQDN := d.encodeIPAsFqdn(q.Name, lookup, addr)
answers := []dns.RR{
&dns.SRV{
Hdr: dns.RR_Header{
Expand All @@ -1792,7 +1841,7 @@ func (d *DNSServer) makeRecordFromIP(dc string, addr net.IP, serviceNode structs
},
Priority: 1,
Weight: uint16(findWeight(serviceNode)),
Port: uint16(d.agent.TranslateServicePort(dc, serviceNode.Service.Port, serviceNode.Service.TaggedAddresses)),
Port: uint16(d.agent.TranslateServicePort(lookup.Datacenter, serviceNode.Service.Port, serviceNode.Service.TaggedAddresses)),
Target: ipFQDN,
},
}
Expand All @@ -1808,7 +1857,7 @@ func (d *DNSServer) makeRecordFromIP(dc string, addr net.IP, serviceNode structs
// Craft dns records for an FQDN
// In case of an SRV query the answer will be a IN SRV and additional data will store an IN A to the IP
// Otherwise it will return a CNAME and a IN A record
func (d *DNSServer) makeRecordFromFQDN(dc string, fqdn string, serviceNode structs.CheckServiceNode, req *dns.Msg, ttl time.Duration, cfg *dnsConfig, maxRecursionLevel int) ([]dns.RR, []dns.RR) {
func (d *DNSServer) makeRecordFromFQDN(lookup serviceLookup, fqdn string, serviceNode structs.CheckServiceNode, req *dns.Msg, ttl time.Duration, cfg *dnsConfig, maxRecursionLevel int) ([]dns.RR, []dns.RR) {
edns := req.IsEdns0() != nil
q := req.Question[0]

Expand Down Expand Up @@ -1841,7 +1890,7 @@ MORE_REC:
},
Priority: 1,
Weight: uint16(findWeight(serviceNode)),
Port: uint16(d.agent.TranslateServicePort(dc, serviceNode.Service.Port, serviceNode.Service.TaggedAddresses)),
Port: uint16(d.agent.TranslateServicePort(lookup.Datacenter, serviceNode.Service.Port, serviceNode.Service.TaggedAddresses)),
Target: dns.Fqdn(fqdn),
},
}
Expand All @@ -1863,7 +1912,7 @@ MORE_REC:
return answers, nil
}

func (d *DNSServer) nodeServiceRecords(dc string, node structs.CheckServiceNode, req *dns.Msg, ttl time.Duration, cfg *dnsConfig, maxRecursionLevel int) ([]dns.RR, []dns.RR) {
func (d *DNSServer) nodeServiceRecords(lookup serviceLookup, node structs.CheckServiceNode, req *dns.Msg, ttl time.Duration, cfg *dnsConfig, maxRecursionLevel int) ([]dns.RR, []dns.RR) {
addrTranslate := TranslateAddressAcceptDomain
if req.Question[0].Qtype == dns.TypeA {
addrTranslate |= TranslateAddressAcceptIPv4
Expand All @@ -1873,7 +1922,9 @@ func (d *DNSServer) nodeServiceRecords(dc string, node structs.CheckServiceNode,
addrTranslate |= TranslateAddressAcceptAny
}

serviceAddr := d.agent.TranslateServiceAddress(dc, node.Service.Address, node.Service.TaggedAddresses, addrTranslate)
// The datacenter should be empty during translation if it is a peering lookup.
// This should be fine because we should always prefer the WAN address.
serviceAddr := d.agent.TranslateServiceAddress(lookup.Datacenter, node.Service.Address, node.Service.TaggedAddresses, addrTranslate)
nodeAddr := d.agent.TranslateAddress(node.Node.Datacenter, node.Node.Address, node.Node.TaggedAddresses, addrTranslate)
if serviceAddr == "" && nodeAddr == "" {
return nil, nil
Expand All @@ -1886,30 +1937,30 @@ func (d *DNSServer) nodeServiceRecords(dc string, node structs.CheckServiceNode,
if serviceAddr == "" && nodeIPAddr != nil {
if node.Node.Address != nodeAddr {
// Do not CNAME node address in case of WAN address
return d.makeRecordFromIP(dc, nodeIPAddr, node, req, ttl)
return d.makeRecordFromIP(lookup, nodeIPAddr, node, req, ttl)
}

return d.makeRecordFromServiceNode(dc, node, nodeIPAddr, req, ttl)
return d.makeRecordFromServiceNode(lookup, node, nodeIPAddr, req, ttl)
}

// There is no service address and the node address is a FQDN (external service)
if serviceAddr == "" {
return d.makeRecordFromFQDN(dc, nodeAddr, node, req, ttl, cfg, maxRecursionLevel)
return d.makeRecordFromFQDN(lookup, nodeAddr, node, req, ttl, cfg, maxRecursionLevel)
}

// The service address is an IP
if serviceIPAddr != nil {
return d.makeRecordFromIP(dc, serviceIPAddr, node, req, ttl)
return d.makeRecordFromIP(lookup, serviceIPAddr, node, req, ttl)
}

// If the service address is a CNAME for the service we are looking
// for then use the node address.
if dns.Fqdn(serviceAddr) == req.Question[0].Name && nodeIPAddr != nil {
return d.makeRecordFromServiceNode(dc, node, nodeIPAddr, req, ttl)
return d.makeRecordFromServiceNode(lookup, node, nodeIPAddr, req, ttl)
}

// The service address is a FQDN (external service)
return d.makeRecordFromFQDN(dc, serviceAddr, node, req, ttl, cfg, maxRecursionLevel)
return d.makeRecordFromFQDN(lookup, serviceAddr, node, req, ttl, cfg, maxRecursionLevel)
}

func (d *DNSServer) generateMeta(qName string, node *structs.Node, ttl time.Duration) []dns.RR {
Expand All @@ -1934,28 +1985,31 @@ func (d *DNSServer) generateMeta(qName string, node *structs.Node, ttl time.Dura
}

// serviceARecords is used to add the SRV records for a service lookup
func (d *DNSServer) serviceSRVRecords(cfg *dnsConfig, dc string, nodes structs.CheckServiceNodes, req, resp *dns.Msg, ttl time.Duration, maxRecursionLevel int) {
func (d *DNSServer) serviceSRVRecords(cfg *dnsConfig, lookup serviceLookup, nodes structs.CheckServiceNodes, req, resp *dns.Msg, ttl time.Duration, maxRecursionLevel int) {
handled := make(map[string]struct{})

for _, node := range nodes {
// Avoid duplicate entries, possible if a node has
// the same service the same port, etc.
serviceAddress := d.agent.TranslateServiceAddress(dc, node.Service.Address, node.Service.TaggedAddresses, TranslateAddressAcceptAny)
servicePort := d.agent.TranslateServicePort(dc, node.Service.Port, node.Service.TaggedAddresses)

// The datacenter should be empty during translation if it is a peering lookup.
// This should be fine because we should always prefer the WAN address.
serviceAddress := d.agent.TranslateServiceAddress(lookup.Datacenter, node.Service.Address, node.Service.TaggedAddresses, TranslateAddressAcceptAny)
servicePort := d.agent.TranslateServicePort(lookup.Datacenter, node.Service.Port, node.Service.TaggedAddresses)
tuple := fmt.Sprintf("%s:%s:%d", node.Node.Node, serviceAddress, servicePort)
if _, ok := handled[tuple]; ok {
continue
}
handled[tuple] = struct{}{}

answers, extra := d.nodeServiceRecords(dc, node, req, ttl, cfg, maxRecursionLevel)
answers, extra := d.nodeServiceRecords(lookup, node, req, ttl, cfg, maxRecursionLevel)

respDomain := d.getResponseDomain(req.Question[0].Name)
resp.Answer = append(resp.Answer, answers...)
resp.Extra = append(resp.Extra, extra...)

if cfg.NodeMetaTXT {
resp.Extra = append(resp.Extra, d.generateMeta(fmt.Sprintf("%s.node.%s.%s", node.Node.Node, dc, respDomain), node.Node, ttl)...)
resp.Extra = append(resp.Extra, d.generateMeta(nodeCanonicalDNSName(lookup, node.Node.Node, respDomain), node.Node, ttl)...)
}
}
}
Expand Down
Loading