Explorar el Código

Cleanup FindLabels API, internal API for checking health status

Ask Bjørn Hansen hace 8 años
padre
commit
b6858f3dac

+ 4 - 0
Godeps/Godeps.json

@@ -43,6 +43,10 @@
 			"ImportPath": "github.com/rcrowley/go-metrics",
 			"Rev": "eeba7bd0dd01ace6e690fa833b3f22aaec29af43"
 		},
+		{
+			"ImportPath": "github.com/stretchr/testify",
+			"Rev": "4d4bfba8f1d1027c4fdbe371823030df51419987"
+		},
 		{
 			"ImportPath": "golang.org/x/net/websocket",
 			"Rev": "db8e4de5b2d6653f66aea53094624468caad15d2"

+ 8 - 0
Makefile

@@ -10,6 +10,14 @@ test: .PHONY
 testrace: .PHONY
 	go test -v -race $(shell go list ./... | grep -v /vendor/)
 
+docker-test: .PHONY
+	mkdir -p .cache/pkg
+	docker run --rm -v `pwd`:/go/src/github.com/abh/geodns \
+		-v `pwd`/.cache/pkg:/go/pkg \
+		geodns-build \
+		make test
+		# go test -i ./...
+
 devel:
 	go build -tags devel
 

+ 29 - 0
dns/hc.example.com.json

@@ -0,0 +1,29 @@
+{ "serial": 3,
+  "ttl":    600,
+  "max_hosts": 2,
+  "targeting": "country continent @ regiongroup region",
+  "data" : {
+    "":  {
+      "ns": { "ns1.example.net.": null, "ns2.example.net.": null },
+      "mx": [ { "preference": 20, "mx": "mx2.example.net", "weight": 0 },
+              { "preference": 10, "mx": "mx.example.net.", "weight": 1 }
+            ]
+    },
+    "tucs": {
+      "a": [ [ "194.106.223.155", 100 ], [ "199.15.176.188", 100 ],
+             [ "207.171.7.49", 100 ], [ "207.171.7.59", 100 ],
+             [ "207.171.7.64", 100 ], [ "207.171.7.65", 100 ]
+           ],
+      "max_hosts": "1",
+      "closest": true,
+      "health": {
+         "type": "tcp",
+         "frequency": 15,
+         "retry_time": 5,
+         "retries": 2,
+         "timeout": 3,
+         "port": 80
+      }
+    }
+  }
+}

+ 75 - 0
health/health.go

@@ -0,0 +1,75 @@
+package health
+
+import (
+	"fmt"
+
+	"github.com/abh/geodns/typeutil"
+)
+
+type HealthTester interface {
+	// Test(record string) bool
+	Name(record string) string
+	String() string
+}
+
+type HealthReference struct {
+	name string
+}
+
+func (hr *HealthReference) Name(record string) string {
+	if len(record) > 0 {
+		return hr.name + "/" + record
+	}
+	return hr.name
+}
+
+func (hr *HealthReference) String() string {
+	return hr.name
+}
+
+func NewReferenceFromMap(i map[string]interface{}) (HealthTester, error) {
+	var name, ts string
+
+	if ti, ok := i["type"]; ok {
+		ts = typeutil.ToString(ti)
+	}
+
+	if ni, ok := i["name"]; ok {
+		name = typeutil.ToString(ni)
+	}
+
+	if len(name) == 0 {
+		name = ts
+	}
+
+	if len(name) == 0 {
+		return nil, fmt.Errorf("name or type required")
+	}
+
+	tester := &HealthReference{name: name}
+	return tester, nil
+}
+
+// func (hr *HealthReference) RecordTest(rec *zones.Record) {
+// 	key := ht.String()
+// 	htr.entryMutex.Lock()
+// 	defer htr.entryMutex.Unlock()
+// 	if t, ok := htr.entries[key]; ok {
+// 		// we already have an instance of this test running. Record we are using it
+// 		t.references[ref] = true
+// 	} else {
+// 		// a test that isn't running. Record we are using it and start the test
+// 		t := &HealthTestRunnerEntry{
+// 			HealthTest: *ht.copy(ht.ipAddress),
+// 			references: make(map[string]bool),
+// 		}
+// 		if t.global {
+// 			t.ipAddress = nil
+// 		}
+// 		// we know it is not started, so no need for the mutex
+// 		t.healthy = ht.healthy
+// 		t.references[ref] = true
+// 		t.start()
+// 		htr.entries[key] = t
+// 	}
+// }

+ 2 - 2
health/healthtest.go → health/healthtest/healthtest.go

@@ -1,4 +1,4 @@
-package health
+package healthtest
 
 import (
 	"fmt"
@@ -18,8 +18,8 @@ var (
 )
 
 type HealthTester interface {
-	Test(ht *HealthTest) bool
 	String() string
+	Test(*HealthTest) bool
 }
 
 type HealthTestParameters struct {

+ 1 - 1
health/healthtesters.go → health/healthtest/healthtesters.go

@@ -1,4 +1,4 @@
-package health
+package healthtest
 
 import (
 	"crypto/sha256"

+ 73 - 0
health/status.go

@@ -0,0 +1,73 @@
+package health
+
+import (
+	"strings"
+	"sync"
+)
+
+// todo: how to deal with multiple files?
+// specified in zone and a status object for each?
+
+type StatusType uint8
+
+const (
+	StatusUnhealthy StatusType = iota
+	StatusHealthy
+	StatusUnknown
+)
+
+type Status interface {
+	// Load(string) error
+	GetStatus(string) StatusType
+}
+
+type statusRegistry struct {
+	mu sync.RWMutex
+	m  map[string]Status
+}
+
+var registry statusRegistry
+
+type Service struct {
+	Status StatusType
+}
+
+type StatusFile struct {
+	mu sync.RWMutex
+	m  map[string]*Service
+}
+
+func init() {
+	registry = statusRegistry{
+		m: make(map[string]Status),
+	}
+}
+
+func GetStatus(name string) StatusType {
+	check := strings.SplitN(name, "/", 2)
+	if len(check) != 2 {
+		return StatusUnknown
+	}
+	registry.mu.RLock()
+	status, ok := registry.m[check[0]]
+	registry.mu.RUnlock()
+
+	if !ok {
+		return StatusUnknown
+	}
+	return status.GetStatus(check[1])
+}
+
+func NewStatusFile() *StatusFile {
+	return &StatusFile{
+		m: make(map[string]*Service),
+	}
+}
+
+func (s *StatusFile) Load(filename string) error {
+	return nil
+}
+
+func (s *StatusFile) GetStatus(check string) StatusType {
+	return StatusUnknown
+}

+ 1 - 0
http.go

@@ -229,6 +229,7 @@ func (hs *httpServer) mainServer(w http.ResponseWriter, req *http.Request) {
 		http.NotFound(w, req)
 		return
 	}
+	log.Printf("serverInfo: %+v", hs.serverInfo)
 	io.WriteString(w, `<html><head><title>GeoDNS `+
 		hs.serverInfo.Version+`</title><body>`+
 		`GeoDNS Server`+

+ 42 - 29
server/serve.go

@@ -18,15 +18,15 @@ import (
 	"github.com/rcrowley/go-metrics"
 )
 
-func getQuestionName(z *zones.Zone, req *dns.Msg) string {
-	lx := dns.SplitDomainName(req.Question[0].Name)
+func getQuestionName(z *zones.Zone, fqdn string) string {
+	lx := dns.SplitDomainName(fqdn)
 	ql := lx[0 : len(lx)-z.LabelCount]
 	return strings.ToLower(strings.Join(ql, "."))
 }
 
 func (srv *Server) serve(w dns.ResponseWriter, req *dns.Msg, z *zones.Zone) {
 
-	qname := req.Question[0].Name
+	qnamefqdn := req.Question[0].Name
 	qtype := req.Question[0].Qtype
 
 	var qle *querylog.Entry
@@ -35,13 +35,13 @@ func (srv *Server) serve(w dns.ResponseWriter, req *dns.Msg, z *zones.Zone) {
 		qle = &querylog.Entry{
 			Time:   time.Now().UnixNano(),
 			Origin: z.Origin,
-			Name:   qname,
+			Name:   qnamefqdn,
 			Qtype:  qtype,
 		}
 		defer srv.queryLogger.Write(qle)
 	}
 
-	applog.Printf("[zone %s] incoming  %s %s (id %d) from %s\n", z.Origin, qname,
+	applog.Printf("[zone %s] incoming  %s %s (id %d) from %s\n", z.Origin, qnamefqdn,
 		dns.TypeToString[qtype], req.Id, w.RemoteAddr())
 
 	// Global meter
@@ -52,9 +52,10 @@ func (srv *Server) serve(w dns.ResponseWriter, req *dns.Msg, z *zones.Zone) {
 
 	applog.Println("Got request", req)
 
-	label := getQuestionName(z, req)
+	// qlabel is the qname without the zone origin suffix
+	qlabel := getQuestionName(z, qnamefqdn)
 
-	z.Metrics.LabelStats.Add(label)
+	z.Metrics.LabelStats.Add(qlabel)
 
 	// IP that's talking to us (not EDNS CLIENT SUBNET)
 	var realIP net.IP
@@ -141,16 +142,13 @@ func (srv *Server) serve(w dns.ResponseWriter, req *dns.Msg, z *zones.Zone) {
 		}
 	}
 
-	labels, labelQtype := z.FindLabels(label, targets, []uint16{dns.TypeMF, dns.TypeCNAME, qtype})
-	if labelQtype == 0 {
-		labelQtype = qtype
-	}
+	labelMatches := z.FindLabels(qlabel, targets, []uint16{dns.TypeMF, dns.TypeCNAME, qtype})
 
-	if labels == nil {
+	if len(labelMatches) == 0 {
 
 		permitDebug := srv.PublicDebugQueries || (realIP != nil && realIP.IsLoopback())
 
-		firstLabel := (strings.Split(label, "."))[0]
+		firstLabel := (strings.Split(qlabel, "."))[0]
 
 		if qle != nil {
 			qle.LabelName = firstLabel
@@ -158,7 +156,7 @@ func (srv *Server) serve(w dns.ResponseWriter, req *dns.Msg, z *zones.Zone) {
 
 		if permitDebug && firstLabel == "_status" {
 			if qtype == dns.TypeANY || qtype == dns.TypeTXT {
-				m.Answer = srv.statusRR(label + "." + z.Origin + ".")
+				m.Answer = srv.statusRR(qlabel + "." + z.Origin + ".")
 			} else {
 				m.Ns = append(m.Ns, z.SoaRR())
 			}
@@ -169,8 +167,8 @@ func (srv *Server) serve(w dns.ResponseWriter, req *dns.Msg, z *zones.Zone) {
 
 		if permitDebug && firstLabel == "_health" {
 			if qtype == dns.TypeANY || qtype == dns.TypeTXT {
-				baseLabel := strings.Join((strings.Split(label, "."))[1:], ".")
-				m.Answer = z.HealthRR(label+"."+z.Origin+".", baseLabel)
+				baseLabel := strings.Join((strings.Split(qlabel, "."))[1:], ".")
+				m.Answer = z.HealthRR(qlabel+"."+z.Origin+".", baseLabel)
 				m.Authoritative = true
 				w.WriteMsg(m)
 				return
@@ -184,7 +182,7 @@ func (srv *Server) serve(w dns.ResponseWriter, req *dns.Msg, z *zones.Zone) {
 		if firstLabel == "_country" {
 			if qtype == dns.TypeANY || qtype == dns.TypeTXT {
 				h := dns.RR_Header{Ttl: 1, Class: dns.ClassINET, Rrtype: dns.TypeTXT}
-				h.Name = label + "." + z.Origin + "."
+				h.Name = qnamefqdn
 
 				txt := []string{
 					w.RemoteAddr().String(),
@@ -223,18 +221,34 @@ func (srv *Server) serve(w dns.ResponseWriter, req *dns.Msg, z *zones.Zone) {
 		return
 	}
 
-	if !labels.Closest {
-		location = nil
-	}
+	for _, match := range labelMatches {
+		label := match.Label
+		labelQtype := match.Type
+
+		if !label.Closest {
+			location = nil
+		}
+
+		if servers := z.Picker(label, labelQtype, label.MaxHosts, location); servers != nil {
+			var rrs []dns.RR
+			for _, record := range servers {
+				rr := dns.Copy(record.RR)
+				rr.Header().Name = qnamefqdn
+				rrs = append(rrs, rr)
+			}
+			m.Answer = rrs
+		}
+		if len(m.Answer) > 0 {
+			// maxHosts only matter within a "targeting group"; at least that's
+			// how it has been working
+
+			if qle != nil {
+				qle.LabelName = label.Label
+				qle.Answers = len(m.Answer)
+			}
 
-	if servers := labels.Picker(labelQtype, labels.MaxHosts, location); servers != nil {
-		var rrs []dns.RR
-		for _, record := range servers {
-			rr := dns.Copy(record.RR)
-			rr.Header().Name = qname
-			rrs = append(rrs, rr)
+			break
 		}
-		m.Answer = rrs
 	}
 
 	if len(m.Answer) == 0 {
@@ -245,8 +259,7 @@ func (srv *Server) serve(w dns.ResponseWriter, req *dns.Msg, z *zones.Zone) {
 	applog.Println(m)
 
 	if qle != nil {
-		qle.LabelName = labels.Label
-		qle.Answers = len(m.Answer)
+		// should this be in the match loop above?
 		qle.Rcode = m.Rcode
 	}
 	err := w.WriteMsg(m)

+ 3 - 9
zones/muxmanager.go

@@ -131,7 +131,8 @@ func (mm *MuxManager) reload() error {
 				continue
 			}
 
-			zone, err := ReadZoneFile(zoneName, filename)
+			zone := NewZone(zoneName)
+			err := zone.ReadZoneFile(filename)
 			if zone == nil || err != nil {
 				parseErr = fmt.Errorf("Error reading zone '%s': %s", zoneName, err)
 				log.Println(parseErr.Error())
@@ -161,16 +162,9 @@ func (mm *MuxManager) reload() error {
 
 func (mm *MuxManager) addHandler(name string, zone *Zone) {
 	oldZone := mm.zonelist[name]
-	// across the recconfiguration keep a reference to all healthchecks to ensure
-	// the global map doesn't get destroyed
-	// healtmm.TestRunner.refAllGlobalHealthChecks(name, true)
-	// defer healtmm.TestRunner.refAllGlobalHealthChecks(name, false)
-	// if oldZone != nil {
-	// 	oldZone.StartStopHealthChecks(false, nil)
-	// }
 	zone.SetupMetrics(oldZone)
+	zone.setupHealthTests()
 	mm.zonelist[name] = zone
-	// config.StartStopHealthChecks(true, oldZone)
 	mm.reg.Add(name, zone)
 }
 

+ 29 - 15
zones/picker.go

@@ -1,6 +1,7 @@
 package zones
 
 import (
+	"log"
 	"math/rand"
 
 	"github.com/abh/geodns/health"
@@ -9,13 +10,28 @@ import (
 	"github.com/miekg/dns"
 )
 
-func (label *Label) Picker(qtype uint16, max int, location *targeting.Location) Records {
+func (zone *Zone) filterHealth(servers Records) int {
+	// Remove any unhealthy servers
+	tmpServers := servers[:0]
+	sum := 0
+
+	for i, s := range servers {
+		if len(servers[i].Test) == 0 || zone.HealthStatus.GetStatus(servers[i].Test) == health.StatusHealthy {
+			tmpServers = append(tmpServers, s)
+			sum += s.Weight
+		}
+	}
+	servers = tmpServers
+	return sum
+}
+
+func (zone *Zone) Picker(label *Label, qtype uint16, max int, location *targeting.Location) Records {
 
 	if qtype == dns.TypeANY {
-		var result []Record
+		var result Records
 		for rtype := range label.Records {
 
-			rtypeRecords := label.Picker(rtype, max, location)
+			rtypeRecords := zone.Picker(label, rtype, max, location)
 
 			tmpResult := make(Records, len(result)+len(rtypeRecords))
 
@@ -31,23 +47,20 @@ func (label *Label) Picker(qtype uint16, max int, location *targeting.Location)
 
 		sum := label.Weight[qtype]
 
-		servers := make([]Record, len(labelRR))
+		servers := make(Records, len(labelRR))
 		copy(servers, labelRR)
 
 		if label.Test != nil {
-			// Remove any unhealthy servers
-			tmpServers := servers[:0]
-			sum = 0
-			for i, s := range servers {
-				if servers[i].Test == nil || health.TestRunner.IsHealthy(servers[i].Test) {
-					tmpServers = append(tmpServers, s)
-					sum += s.Weight
-				}
+			sum = zone.filterHealth(servers)
+			if sum == 0 {
+				return servers
 			}
-			servers = tmpServers
 		}
 
-		// not "balanced", just return all
+		// not "balanced", just return all -- It's been working
+		// this way since the first prototype, it might not make
+		// sense anymore. This probably makes NS records and such
+		// work as expected.
 		if label.Weight[qtype] == 0 {
 			return servers
 		}
@@ -60,7 +73,7 @@ func (label *Label) Picker(qtype uint16, max int, location *targeting.Location)
 		if max > rrCount {
 			max = rrCount
 		}
-		result := make([]Record, max)
+		result := make(Records, max)
 
 		// Find the distance to each server, and find the servers that are
 		// closer to the querier than the max'th furthest server, or within
@@ -134,5 +147,6 @@ func (label *Label) Picker(qtype uint16, max int, location *targeting.Location)
 
 		return result
 	}
+	log.Printf("returning nil ...!")
 	return nil
 }

+ 10 - 15
zones/zones.go → zones/reader.go

@@ -21,12 +21,12 @@ import (
 // ZoneList maps domain names to zone data
 type ZoneList map[string]*Zone
 
-func ReadZoneFile(zoneName, fileName string) (zone *Zone, zerr error) {
+func (zone *Zone) ReadZoneFile(fileName string) (zerr error) {
 	defer func() {
 		if r := recover(); r != nil {
-			log.Printf("reading %s failed: %s", zoneName, r)
+			log.Printf("reading %s failed: %s", zone.Origin, r)
 			debug.PrintStack()
-			zerr = fmt.Errorf("reading %s failed: %s", zoneName, r)
+			zerr = fmt.Errorf("reading %s failed: %s", zone.Origin, r)
 		}
 	}()
 
@@ -36,8 +36,6 @@ func ReadZoneFile(zoneName, fileName string) (zone *Zone, zerr error) {
 		panic(err)
 	}
 
-	zone = NewZone(zoneName)
-
 	fileInfo, err := fh.Stat()
 	if err != nil {
 		log.Printf("Could not stat '%s': %s", fileName, err)
@@ -57,13 +55,10 @@ func ReadZoneFile(zoneName, fileName string) (zone *Zone, zerr error) {
 			extra = fmt.Sprintf(":\nError at line %d, column %d (file offset %d):\n%s",
 				line, col, serr.Offset, highlight)
 		}
-		return nil, fmt.Errorf("error parsing JSON object in config file %s%s\n%v",
+		return fmt.Errorf("error parsing JSON object in config file %s%s\n%v",
 			fh.Name(), extra, err)
 	}
 
-	if err != nil {
-		panic(err)
-	}
 	//log.Println(objmap)
 
 	var data map[string]interface{}
@@ -88,8 +83,7 @@ func ReadZoneFile(zoneName, fileName string) (zone *Zone, zerr error) {
 		case "targeting":
 			zone.Options.Targeting, err = targeting.ParseTargets(v.(string))
 			if err != nil {
-				log.Printf("Could not parse targeting '%s': %s", v, err)
-				return nil, err
+				return fmt.Errorf("parsing targeting '%s': %s", v, err)
 			}
 
 		case "logging":
@@ -136,7 +130,7 @@ func ReadZoneFile(zoneName, fileName string) (zone *Zone, zerr error) {
 		zone.SetLocations()
 	}
 
-	return zone, nil
+	return nil
 }
 
 func setupZoneData(data map[string]interface{}, zone *Zone) {
@@ -174,8 +168,9 @@ func setupZoneData(data map[string]interface{}, zone *Zone) {
 			case "ttl":
 				label.Ttl = typeutil.ToInt(rdata)
 				continue
-			case "test":
-				zone.newHealthTest(label, rdata)
+			case "health":
+				zone.addHealthReference(label, rdata)
+				log.Printf("health status: '%+v'", label.Test.String())
 				continue
 			}
 
@@ -416,7 +411,7 @@ func setupZoneData(data map[string]interface{}, zone *Zone) {
 				}
 
 				label.Weight[dnsType] += record.Weight
-				label.Records[dnsType][i] = *record
+				label.Records[dnsType][i] = record
 			}
 			if label.Weight[dnsType] > 0 {
 				sort.Sort(RecordsByWeight{label.Records[dnsType]})

+ 12 - 8
zones/reader_test.go

@@ -6,10 +6,11 @@ import (
 	"io/ioutil"
 	"os"
 	"testing"
-)
 
-func TestReadConfigs(t *testing.T) {
+	"github.com/stretchr/testify/assert"
+)
 
+func loadZones(t *testing.T) *MuxManager {
 	muxm, err := NewMuxManager("../dns", &NilReg{})
 	if err != nil {
 		t.Logf("loading zones: %s", err)
@@ -17,7 +18,7 @@ func TestReadConfigs(t *testing.T) {
 	}
 
 	// Just check that example.com and test.example.org loaded, too.
-	for _, zonename := range []string{"example.com", "test.example.com"} {
+	for _, zonename := range []string{"example.com", "test.example.com", "hc.example.com"} {
 
 		if z, ok := muxm.zonelist[zonename]; ok {
 			if z.Origin != zonename {
@@ -32,6 +33,12 @@ func TestReadConfigs(t *testing.T) {
 			t.Fatalf("Didn't load '%s'", zonename)
 		}
 	}
+	return muxm
+}
+
+func TestReadConfigs(t *testing.T) {
+
+	muxm := loadZones(t)
 
 	// The real tests are in test.example.com so we have a place
 	// to make nutty configuration entries
@@ -48,12 +55,9 @@ func TestReadConfigs(t *testing.T) {
 		t.Logf("Contact='%s', expected support.bitnames.com", tz.Options.Contact)
 		t.Fail()
 	}
-	// c.Check(tz.Options.Targeting.String(), Equals, "@ continent country regiongroup region asn ip")
-
-	// // Got logging option
-	// c.Check(tz.Logging.StatHat, Equals, true)
 
-	// c.Check(tz.Labels["weight"].MaxHosts, Equals, 1)
+	assert.Equal(t, tz.Options.Targeting.String(), "@ continent country regiongroup region asn ip", "Targeting.String()")
+	assert.Equal(t, tz.Labels["weight"].MaxHosts, 1, "weight label has max_hosts=1")
 
 	// /* test different cname targets */
 	// c.Check(tz.Labels["www"].

+ 182 - 118
zones/zone.go

@@ -22,6 +22,10 @@ type ZoneOptions struct {
 	Contact   string
 	Targeting targeting.TargetOptions
 	Closest   bool
+
+	// temporary, using this to keep the healthtest code
+	// compiling and vaguely included
+	healthChecker bool
 }
 
 type ZoneLogging struct {
@@ -33,10 +37,10 @@ type Record struct {
 	RR     dns.RR
 	Weight int
 	Loc    *targeting.Location
-	Test   *health.HealthTest
+	Test   string
 }
 
-type Records []Record
+type Records []*Record
 
 func (s Records) Len() int      { return len(s) }
 func (s Records) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
@@ -52,10 +56,15 @@ type Label struct {
 	Records  map[uint16]Records
 	Weight   map[uint16]int
 	Closest  bool
-	Test     *health.HealthTest
+	Test     health.HealthTester
+}
+
+type LabelMatch struct {
+	Label *Label
+	Type  uint16
 }
 
-type labels map[string]*Label
+type labelmap map[string]*Label
 
 type ZoneMetrics struct {
 	Queries     metrics.Meter
@@ -66,19 +75,22 @@ type ZoneMetrics struct {
 }
 
 type Zone struct {
-	Origin     string
-	Labels     labels
-	LabelCount int
-	Options    ZoneOptions
-	Logging    *ZoneLogging
-	Metrics    ZoneMetrics
-	HasClosest bool
+	Origin       string
+	Labels       labelmap
+	LabelCount   int
+	Options      ZoneOptions
+	Logging      *ZoneLogging
+	Metrics      ZoneMetrics
+	HasClosest   bool
+	HealthStatus health.Status
+	healthExport bool
+
 	sync.RWMutex
 }
 
 func NewZone(name string) *Zone {
 	zone := new(Zone)
-	zone.Labels = make(labels)
+	zone.Labels = make(labelmap)
 	zone.Origin = name
 	zone.LabelCount = dns.CountLabel(zone.Origin)
 
@@ -118,7 +130,6 @@ func (z *Zone) SetupMetrics(old *Zone) {
 }
 
 func (z *Zone) Close() {
-	z.StartStopHealthChecks(false, nil)
 	z.Metrics.Registry.UnregisterAll()
 	if z.Metrics.LabelStats != nil {
 		z.Metrics.LabelStats.Close()
@@ -198,15 +209,19 @@ func (zone *Zone) addSOA() {
 
 	record := Record{RR: rr}
 
-	label.Records[dns.TypeSOA] = make([]Record, 1)
-	label.Records[dns.TypeSOA][0] = record
+	label.Records[dns.TypeSOA] = make([]*Record, 1)
+	label.Records[dns.TypeSOA][0] = &record
 }
 
 // Find label "s" in country "cc" falling back to the appropriate
 // continent and the global label name as needed. Looks for the
-// first available qType at each targeting level. Return a Label
-// and the qtype that was "found"
-func (z *Zone) FindLabels(s string, targets []string, qts []uint16) (*Label, uint16) {
+// first available qType at each targeting level. Returns a list of
+// LabelMatch for potential labels that might satisfy the query.
+// "MF" records are treated as aliases.
+func (z *Zone) FindLabels(s string, targets []string, qts []uint16) []LabelMatch {
+
+	matches := make([]LabelMatch, 0)
+
 	for _, target := range targets {
 		var name string
 
@@ -229,24 +244,36 @@ func (z *Zone) FindLabels(s string, targets []string, qts []uint16) (*Label, uin
 					// short-circuit mostly to avoid subtle bugs later
 					// to be correct we should run through all the selectors and
 					// pick types not already picked
-					return z.Labels[s], qtype
+					matches = append(matches, LabelMatch{z.Labels[s], qtype})
+					continue
 				case dns.TypeMF:
 					if label.Records[dns.TypeMF] != nil {
 						name = label.FirstRR(dns.TypeMF).(*dns.MF).Mf
 						// TODO: need to avoid loops here somehow
-						return z.FindLabels(name, targets, qts)
+						aliases := z.FindLabels(name, targets, qts)
+						matches = append(matches, aliases...)
+						continue
 					}
 				default:
 					// return the label if it has the right record
 					if label.Records[qtype] != nil && len(label.Records[qtype]) > 0 {
-						return label, qtype
+						matches = append(matches, LabelMatch{label, qtype})
+						continue
 					}
 				}
 			}
 		}
 	}
 
-	return z.Labels[s], 0
+	if len(matches) == 0 {
+		// this is to make sure we return 'noerror' instead of 'nxdomain' when
+		// appropriate.
+		if label, ok := z.Labels[s]; ok {
+			matches = append(matches, LabelMatch{label, 0})
+		}
+	}
+
+	return matches
 }
 
 // Find the locations of all the A records within a zone. If we were being really clever
@@ -272,125 +299,162 @@ func (z *Zone) SetLocations() {
 	}
 }
 
-func (z *Zone) newHealthTest(l *Label, data interface{}) {
+func (z *Zone) addHealthReference(l *Label, data interface{}) {
+
 	// First safely get rid of any old test. As label tests
 	// should never run this should never be executed
-	if l.Test != nil {
-		l.Test.Stop()
-		l.Test = nil
-	}
+	// if l.Test != nil {
+	// 	l.Test.Stop()
+	// 	l.Test = nil
+	// }
 
 	if data == nil {
 		return
 	}
 
 	if i, ok := data.(map[string]interface{}); ok {
-		tester, err := health.NewFromMap(i)
+		tester, err := health.NewReferenceFromMap(i)
 		if err != nil {
-			applog.Printf("Could not configure health check: %s", err)
+			applog.Printf("Could not setup reference to health check: %s", err)
 			return
 		}
 		l.Test = tester
-
 	}
 }
 
-func (z *Zone) StartStopHealthChecks(start bool, oldZone *Zone) {
-	// 	applog.Printf("Start/stop health checks on zone %s start=%v", z.Origin, start)
-	// 	for labelName, label := range z.Labels {
-	// 		for _, qtype := range health.Qtypes {
-	// 			if label.Records[qtype] != nil && len(label.Records[qtype]) > 0 {
-	// 				for i := range label.Records[qtype] {
-	// 					rr := label.Records[qtype][i].RR
-	// 					var ip net.IP
-	// 					switch rrt := rr.(type) {
-	// 					case *dns.A:
-	// 						ip = rrt.A
-	// 					case *dns.AAAA:
-	// 						ip = rrt.AAAA
-	// 					default:
-	// 						continue
-	// 					}
-
-	// 					var test *health.HealthTest
-	// 					ref := fmt.Sprintf("%s/%s/%d/%d", z.Origin, labelName, qtype, i)
-	// 					if start {
-	// 						if test = label.Records[qtype][i].Test; test != nil {
-	// 							// stop any old test
-	// 							health.TestRunner.removeTest(test, ref)
-	// 						} else {
-	// 							if ltest := label.Test; ltest != nil {
-	// 								test = ltest.copy(ip)
-	// 								label.Records[qtype][i].Test = test
-	// 							}
-	// 						}
-	// 						if test != nil {
-	// 							test.ipAddress = ip
-	// 							// if we are given an oldzone, let's see if we can find the old RR and
-	// 							// copy over the initial health state, rather than use the initial health
-	// 							// state provided from the label. This helps to stop health state bouncing
-	// 							// when a zone file is reloaded for a purposes unrelated to the RR
-	// 							if oldZone != nil {
-	// 								oLabel, ok := oldZone.Labels[labelName]
-	// 								if ok {
-	// 									if oLabel.Test != nil {
-	// 										for i := range oLabel.Records[qtype] {
-	// 											oRecord := oLabel.Records[qtype][i]
-	// 											var oip net.IP
-	// 											switch orrt := oRecord.RR.(type) {
-	// 											case *dns.A:
-	// 												oip = orrt.A
-	// 											case *dns.AAAA:
-	// 												oip = orrt.AAAA
-	// 											default:
-	// 												continue
-	// 											}
-	// 											if oip.Equal(ip) {
-	// 												if oRecord.Test != nil {
-	// 													h := oRecord.Test.IsHealthy()
-	// 													applog.Printf("Carrying over previous health state for %s: %v", oRecord.Test.ipAddress, h)
-	// 													// we know the test is stopped (as we haven't started it) so we can write
-	// 													// without the mutex and avoid a misleading log message
-	// 													test.healthy = h
-	// 												}
-	// 												break
-	// 											}
-	// 										}
-	// 									}
-	// 								}
-	// 							}
-	// 							health.TestRunner.addTest(test, ref)
-	// 						}
-	// 					} else {
-	// 						if test = label.Records[qtype][i].Test; test != nil {
-	// 							health.TestRunner.removeTest(test, ref)
-	// 						}
-	// 					}
-	// 				}
-	// 			}
-	// 		}
-	// 	}
+func (z *Zone) setupHealthTests() {
+
+	log.Println("Setting up Health Tests on records")
+
+	for _, label := range z.Labels {
+		if label.Test == nil {
+			// log.Printf("label.Test for '%s' == nil", label.Label)
+			continue
+		}
+
+		log.Printf("====  setting up '%s'", label.Label)
+
+		// todo: document which record types are processed
+		// or process all ...
+		for _, rrs := range label.Records {
+			for _, rec := range rrs {
+				if len(rec.Test) > 0 {
+					continue
+				}
+				var t string
+				switch rrt := rec.RR.(type) {
+				case *dns.A:
+					t = rrt.A.String()
+				case *dns.AAAA:
+					t = rrt.AAAA.String()
+				case *dns.MX:
+					t = rrt.Mx
+				default:
+					continue
+				}
+				log.Printf("t='%s'", t)
+				rec.Test = t
+			}
+			log.Printf("rrs: %+v", rrs)
+		}
+	}
 }
 
+// func (z *Zone) StartStopHealthTests(start bool, oldZone *Zone) {}
+// 	applog.Printf("Start/stop health checks on zone %s start=%v", z.Origin, start)
+// for labelName, label := range z.Labels {
+// 		for _, qtype := range health.Qtypes {
+// 			if label.Records[qtype] != nil && len(label.Records[qtype]) > 0 {
+// 				for i := range label.Records[qtype] {
+// 					rr := label.Records[qtype][i].RR
+// 					var ip net.IP
+// 					switch rrt := rr.(type) {
+// 					case *dns.A:
+// 						ip = rrt.A
+// 					case *dns.AAAA:
+// 						ip = rrt.AAAA
+// 					default:
+// 						continue
+// 					}
+
+// 					var test *health.HealthTest
+// 					ref := fmt.Sprintf("%s/%s/%d/%d", z.Origin, labelName, qtype, i)
+// 					if start {
+// 						if test = label.Records[qtype][i].Test; test != nil {
+// 							// stop any old test
+// 							health.TestRunner.removeTest(test, ref)
+// 						} else {
+// 							if ltest := label.Test; ltest != nil {
+// 								test = ltest.copy(ip)
+// 								label.Records[qtype][i].Test = test
+// 							}
+// 						}
+// 						if test != nil {
+// 							test.ipAddress = ip
+// 							// if we are given an oldzone, let's see if we can find the old RR and
+// 							// copy over the initial health state, rather than use the initial health
+// 							// state provided from the label. This helps to stop health state bouncing
+// 							// when a zone file is reloaded for a purposes unrelated to the RR
+// 							if oldZone != nil {
+// 								oLabel, ok := oldZone.Labels[labelName]
+// 								if ok {
+// 									if oLabel.Test != nil {
+// 										for i := range oLabel.Records[qtype] {
+// 											oRecord := oLabel.Records[qtype][i]
+// 											var oip net.IP
+// 											switch orrt := oRecord.RR.(type) {
+// 											case *dns.A:
+// 												oip = orrt.A
+// 											case *dns.AAAA:
+// 												oip = orrt.AAAA
+// 											default:
+// 												continue
+// 											}
+// 											if oip.Equal(ip) {
+// 												if oRecord.Test != nil {
+// 													h := oRecord.Test.IsHealthy()
+// 													applog.Printf("Carrying over previous health state for %s: %v", oRecord.Test.ipAddress, h)
+// 													// we know the test is stopped (as we haven't started it) so we can write
+// 													// without the mutex and avoid a misleading log message
+// 													test.healthy = h
+// 												}
+// 												break
+// 											}
+// 										}
+// 									}
+// 								}
+// 							}
+// 							health.TestRunner.addTest(test, ref)
+// 						}
+// 					} else {
+// 						if test = label.Records[qtype][i].Test; test != nil {
+// 							health.TestRunner.removeTest(test, ref)
+// 						}
+// 					}
+// 				}
+// 			}
+// 		}
+// 	}
+
 func (z *Zone) HealthRR(label string, baseLabel string) []dns.RR {
 	h := dns.RR_Header{Ttl: 1, Class: dns.ClassINET, Rrtype: dns.TypeTXT}
 	h.Name = label
 
 	healthstatus := make(map[string]map[string]bool)
 
-	if l, ok := z.Labels[baseLabel]; ok {
-		for qt, records := range l.Records {
-			if qts, ok := dns.TypeToString[qt]; ok {
-				hmap := make(map[string]bool)
-				for _, record := range records {
-					if record.Test != nil {
-						hmap[(*record.Test).IP().String()] = health.TestRunner.IsHealthy(record.Test)
-					}
-				}
-				healthstatus[qts] = hmap
-			}
-		}
-	}
+	// if l, ok := z.Labels[baseLabel]; ok {
+	// 	for qt, records := range l.Records {
+	// 		if qts, ok := dns.TypeToString[qt]; ok {
+	// 			hmap := make(map[string]bool)
+	// 			for _, record := range records {
+	// 				if record.Test != nil {
+	// 					hmap[(*record.Test).IP().String()] = health.TestRunner.IsHealthy(record.Test)
+	// 				}
+	// 			}
+	// 			healthstatus[qts] = hmap
+	// 		}
+	// 	}
+	// }
 
 	js, _ := json.Marshal(healthstatus)
 

+ 44 - 0
zones/zone_health_test.go

@@ -0,0 +1,44 @@
+package zones
+
+import (
+	"testing"
+
+	"github.com/abh/geodns/health"
+	"github.com/miekg/dns"
+)
+
+type HealthStatus struct {
+	t *testing.T
+}
+
+func (hs *HealthStatus) GetStatus(name string) health.StatusType {
+	hs.t.Logf("GetStatus(%s)", name)
+
+	// hs.t.Fatalf("in get status")
+	return health.StatusUnknown
+}
+
+func TestHealth(t *testing.T) {
+	muxm := loadZones(t)
+	t.Log("setting up health status")
+
+	hs := &HealthStatus{t: t}
+
+	tz := muxm.zonelist["hc.example.com"]
+	tz.HealthStatus = hs
+	// t.Logf("hs: '%+v'", tz.HealthStatus)
+	// t.Logf("hc zone: '%+v'", tz)
+
+	matches := tz.FindLabels("tucs", []string{"@"}, []uint16{dns.TypeA})
+	// t.Logf("qt: %d, label: '%+v'", qt, label)
+	records := tz.Picker(matches[0].Label, matches[0].Type, 2, nil)
+
+	// t.Logf("label.Test: '%+v'", label.Test)
+
+	t.Logf("records: '%+v'", records)
+
+	if len(records) == 0 {
+		t.Log("didn't get any records")
+	}
+
+}

+ 8 - 5
zones/zone_test.go

@@ -7,6 +7,7 @@ import (
 )
 
 func TestExampleComZone(t *testing.T) {
+	t.Log("example com")
 	mm, err := NewMuxManager("../dns", &NilReg{})
 	if err != nil {
 		t.Fatalf("Loading test zones: %s", err)
@@ -23,7 +24,9 @@ func TestExampleComZone(t *testing.T) {
 	}
 
 	// Make sure that the empty "no.bar" zone gets skipped and "bar" is used
-	label, qtype := ex.FindLabels("bar", []string{"no", "europe", "@"}, []uint16{dns.TypeA})
+	matches := ex.FindLabels("bar", []string{"no", "europe", "@"}, []uint16{dns.TypeA})
+	label := matches[0].Label
+	qtype := matches[0].Type
 	if l := len(label.Records[dns.TypeA]); l != 1 {
 		t.Logf("Unexpected number of A records: '%d'", l)
 		t.Fail()
@@ -118,12 +121,12 @@ func TestExampleOrgZone(t *testing.T) {
 		t.Fatalf("Did not load 'test.example.org' test zone")
 	}
 
-	label, qtype := ex.FindLabels("sub", []string{"@"}, []uint16{dns.TypeNS})
-	if qtype != dns.TypeNS {
-		t.Fatalf("Expected qtype = NS record (type %d), got type %d", dns.TypeNS, qtype)
+	matches := ex.FindLabels("sub", []string{"@"}, []uint16{dns.TypeNS})
+	if matches[0].Type != dns.TypeNS {
+		t.Fatalf("Expected qtype = NS record (type %d), got type %d", dns.TypeNS, matches[0].Type)
 	}
 
-	Ns := label.Records[qtype]
+	Ns := matches[0].Label.Records[matches[0].Type]
 	if l := len(Ns); l != 2 {
 		t.Fatalf("Expected 2 NS records, got '%d'", l)
 	}