routes: add health dimension to HA primary route election

Track unhealthy nodes in PrimaryRoutes so primary election skips them.
When all nodes for a prefix are unhealthy, keep the first as a degraded
primary rather than dropping the route entirely.

Anti-flap is built in: a recovered node becomes standby, not primary,
because updatePrimaryLocked keeps the current primary when still
available and healthy.

Updates #2129
Updates #2902
This commit is contained in:
Kristoffer Dalby
2026-04-15 13:40:45 +00:00
parent 99a93c126b
commit 786ce2dce8
2 changed files with 392 additions and 8 deletions

View File

@@ -27,6 +27,10 @@ type PrimaryRoutes struct {
// primaries is a map of prefixes to the node that is the primary for that prefix.
primaries map[netip.Prefix]types.NodeID
isPrimary map[types.NodeID]bool
// unhealthy tracks nodes that failed health probes. Unhealthy nodes
// are skipped during primary selection but remain in the routes map.
unhealthy set.Set[types.NodeID]
}
func New() *PrimaryRoutes {
@@ -34,6 +38,7 @@ func New() *PrimaryRoutes {
routes: make(map[types.NodeID]set.Set[netip.Prefix]),
primaries: make(map[netip.Prefix]types.NodeID),
isPrimary: make(map[types.NodeID]bool),
unhealthy: make(set.Set[types.NodeID]),
}
}
@@ -92,13 +97,13 @@ func (pr *PrimaryRoutes) updatePrimaryLocked() bool {
Msg("processing prefix for primary route selection")
if node, ok := pr.primaries[prefix]; ok {
// If the current primary is still available, continue.
if slices.Contains(nodes, node) {
// If the current primary is still available and healthy, continue.
if slices.Contains(nodes, node) && !pr.unhealthy.Contains(node) {
log.Debug().
Caller().
Str(zf.Prefix, prefix.String()).
Uint64("currentPrimary", node.Uint64()).
Msg("current primary still available, keeping it")
Msg("current primary still available and healthy, keeping it")
continue
} else {
@@ -106,18 +111,46 @@ func (pr *PrimaryRoutes) updatePrimaryLocked() bool {
Caller().
Str(zf.Prefix, prefix.String()).
Uint64("oldPrimary", node.Uint64()).
Msg("current primary no longer available")
Bool("unhealthy", pr.unhealthy.Contains(node)).
Msg("current primary no longer available or unhealthy")
}
}
if len(nodes) >= 1 {
pr.primaries[prefix] = nodes[0]
// Select the first healthy node as primary.
// If all nodes are unhealthy, fall back to the first node
// (degraded service is better than no service).
var selected types.NodeID
found := false
for _, n := range nodes {
if !pr.unhealthy.Contains(n) {
selected = n
found = true
break
}
}
if !found && len(nodes) >= 1 {
selected = nodes[0]
found = true
log.Warn().
Caller().
Str(zf.Prefix, prefix.String()).
Uint64("fallbackPrimary", selected.Uint64()).
Msg("all nodes unhealthy for prefix, keeping first as degraded primary")
}
if found {
pr.primaries[prefix] = selected
changed = true
log.Debug().
Caller().
Str(zf.Prefix, prefix.String()).
Uint64("newPrimary", nodes[0].Uint64()).
Uint64("newPrimary", selected.Uint64()).
Msg("selected new primary for prefix")
}
}
@@ -164,12 +197,14 @@ func (pr *PrimaryRoutes) SetRoutes(node types.NodeID, prefixes ...netip.Prefix)
Strs("prefixes", util.PrefixesToString(prefixes)).
Msg("PrimaryRoutes.SetRoutes called")
// If no routes are being set, remove the node from the routes map.
// If no routes are being set, remove the node from the routes map
// and clear any unhealthy state.
if len(prefixes) == 0 {
wasPresent := false
if _, ok := pr.routes[node]; ok {
delete(pr.routes, node)
pr.unhealthy.Delete(node)
wasPresent = true
@@ -245,6 +280,99 @@ func (pr *PrimaryRoutes) PrimaryRoutes(id types.NodeID) []netip.Prefix {
return routes
}
// SetNodeHealthy marks a node as healthy or unhealthy and recalculates
// primary routes. Returns true if primaries changed.
func (pr *PrimaryRoutes) SetNodeHealthy(
node types.NodeID,
healthy bool,
) bool {
pr.mu.Lock()
defer pr.mu.Unlock()
if healthy {
if !pr.unhealthy.Contains(node) {
return false
}
pr.unhealthy.Delete(node)
log.Debug().
Caller().
Uint64(zf.NodeID, node.Uint64()).
Msg("node marked healthy")
} else {
if pr.unhealthy.Contains(node) {
return false
}
pr.unhealthy.Add(node)
log.Info().
Caller().
Uint64(zf.NodeID, node.Uint64()).
Msg("node marked unhealthy")
}
return pr.updatePrimaryLocked()
}
// ClearUnhealthy removes a node from the unhealthy set without
// recalculating primaries. Used when a node reconnects to give
// it a fresh start.
func (pr *PrimaryRoutes) ClearUnhealthy(node types.NodeID) {
pr.mu.Lock()
defer pr.mu.Unlock()
if pr.unhealthy.Contains(node) {
pr.unhealthy.Delete(node)
log.Debug().
Caller().
Uint64(zf.NodeID, node.Uint64()).
Msg("cleared unhealthy state on reconnect")
}
}
// HANodes returns a snapshot of prefixes that have 2+ nodes
// advertising them (HA configurations), mapped to the node IDs.
// Node IDs are sorted deterministically.
func (pr *PrimaryRoutes) HANodes() map[netip.Prefix][]types.NodeID {
pr.mu.Lock()
defer pr.mu.Unlock()
// Build prefix → nodes map.
prefixNodes := make(map[netip.Prefix][]types.NodeID)
ids := types.NodeIDs(xmaps.Keys(pr.routes))
sort.Sort(ids)
for _, id := range ids {
routes := pr.routes[id]
for route := range routes {
prefixNodes[route] = append(prefixNodes[route], id)
}
}
// Keep only prefixes with 2+ advertisers.
result := make(map[netip.Prefix][]types.NodeID)
for prefix, nodes := range prefixNodes {
if len(nodes) >= 2 {
result[prefix] = nodes
}
}
return result
}
// IsNodeHealthy returns whether the node is currently considered healthy.
func (pr *PrimaryRoutes) IsNodeHealthy(node types.NodeID) bool {
pr.mu.Lock()
defer pr.mu.Unlock()
return !pr.unhealthy.Contains(node)
}
func (pr *PrimaryRoutes) String() string {
pr.mu.Lock()
defer pr.mu.Unlock()
@@ -285,6 +413,9 @@ type DebugRoutes struct {
// PrimaryRoutes maps route prefixes to the primary node serving them
PrimaryRoutes map[string]types.NodeID `json:"primary_routes"`
// UnhealthyNodes lists nodes that have failed health probes.
UnhealthyNodes []types.NodeID `json:"unhealthy_nodes,omitempty"`
}
// DebugJSON returns a structured representation of the primary routes state suitable for JSON serialization.
@@ -309,5 +440,22 @@ func (pr *PrimaryRoutes) DebugJSON() DebugRoutes {
debug.PrimaryRoutes[prefix.String()] = nodeID
}
// Populate unhealthy nodes
if pr.unhealthy.Len() > 0 {
nodes := pr.unhealthy.Slice()
slices.SortFunc(nodes, func(a, b types.NodeID) int {
if a < b {
return -1
}
if a > b {
return 1
}
return 0
})
debug.UnhealthyNodes = nodes
}
return debug
}

View File

@@ -24,6 +24,7 @@ func TestPrimaryRoutes(t *testing.T) {
expectedRoutes map[types.NodeID]set.Set[netip.Prefix]
expectedPrimaries map[netip.Prefix]types.NodeID
expectedIsPrimary map[types.NodeID]bool
expectedUnhealthy set.Set[types.NodeID]
expectedChange bool
// primaries is a map of prefixes to the node that is the primary for that prefix.
@@ -412,6 +413,161 @@ func TestPrimaryRoutes(t *testing.T) {
},
expectedChange: false,
},
{
name: "unhealthy-primary-fails-over",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24"))
pr.SetRoutes(2, mp("192.168.1.0/24"))
return pr.SetNodeHealthy(1, false)
},
expectedRoutes: map[types.NodeID]set.Set[netip.Prefix]{
1: {mp("192.168.1.0/24"): {}},
2: {mp("192.168.1.0/24"): {}},
},
expectedPrimaries: map[netip.Prefix]types.NodeID{
mp("192.168.1.0/24"): 2,
},
expectedIsPrimary: map[types.NodeID]bool{
2: true,
},
expectedUnhealthy: set.Set[types.NodeID]{1: {}},
expectedChange: true,
},
{
name: "unhealthy-non-primary-no-change",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24"))
pr.SetRoutes(2, mp("192.168.1.0/24"))
return pr.SetNodeHealthy(2, false)
},
expectedRoutes: map[types.NodeID]set.Set[netip.Prefix]{
1: {mp("192.168.1.0/24"): {}},
2: {mp("192.168.1.0/24"): {}},
},
expectedPrimaries: map[netip.Prefix]types.NodeID{
mp("192.168.1.0/24"): 1,
},
expectedIsPrimary: map[types.NodeID]bool{
1: true,
},
expectedUnhealthy: set.Set[types.NodeID]{2: {}},
expectedChange: false,
},
{
name: "all-unhealthy-keeps-current-primary",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24"))
pr.SetRoutes(2, mp("192.168.1.0/24"))
pr.SetNodeHealthy(1, false) // failover to 2
return pr.SetNodeHealthy(2, false) // both unhealthy, falls back to first sorted (1)
},
expectedRoutes: map[types.NodeID]set.Set[netip.Prefix]{
1: {mp("192.168.1.0/24"): {}},
2: {mp("192.168.1.0/24"): {}},
},
expectedPrimaries: map[netip.Prefix]types.NodeID{
mp("192.168.1.0/24"): 1,
},
expectedIsPrimary: map[types.NodeID]bool{
1: true,
},
expectedUnhealthy: set.Set[types.NodeID]{1: {}, 2: {}},
expectedChange: true,
},
{
name: "recovery-marks-healthy-no-flap",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24"))
pr.SetRoutes(2, mp("192.168.1.0/24"))
pr.SetRoutes(3, mp("192.168.1.0/24"))
pr.SetNodeHealthy(1, false) // failover to 2
return pr.SetNodeHealthy(1, true) // recovered, but 2 stays primary
},
expectedRoutes: map[types.NodeID]set.Set[netip.Prefix]{
1: {mp("192.168.1.0/24"): {}},
2: {mp("192.168.1.0/24"): {}},
3: {mp("192.168.1.0/24"): {}},
},
expectedPrimaries: map[netip.Prefix]types.NodeID{
mp("192.168.1.0/24"): 2,
},
expectedIsPrimary: map[types.NodeID]bool{
2: true,
},
expectedChange: false,
},
{
name: "clear-unhealthy-on-reconnect",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24"))
pr.SetRoutes(2, mp("192.168.1.0/24"))
pr.SetNodeHealthy(1, false) // failover to 2
pr.ClearUnhealthy(1) // reconnect clears unhealthy
// 2 stays primary (stability), but 1 is healthy again
return pr.SetNodeHealthy(1, false) // can be marked unhealthy again
},
expectedRoutes: map[types.NodeID]set.Set[netip.Prefix]{
1: {mp("192.168.1.0/24"): {}},
2: {mp("192.168.1.0/24"): {}},
},
expectedPrimaries: map[netip.Prefix]types.NodeID{
mp("192.168.1.0/24"): 2,
},
expectedIsPrimary: map[types.NodeID]bool{
2: true,
},
expectedUnhealthy: set.Set[types.NodeID]{1: {}},
expectedChange: false,
},
{
name: "unhealthy-then-deregister",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24"))
pr.SetRoutes(2, mp("192.168.1.0/24"))
pr.SetNodeHealthy(1, false)
return pr.SetRoutes(1) // deregister clears routes and unhealthy
},
expectedRoutes: map[types.NodeID]set.Set[netip.Prefix]{
2: {mp("192.168.1.0/24"): {}},
},
expectedPrimaries: map[netip.Prefix]types.NodeID{
mp("192.168.1.0/24"): 2,
},
expectedIsPrimary: map[types.NodeID]bool{
2: true,
},
expectedChange: false,
},
{
name: "unhealthy-primary-three-nodes-cascade",
operations: func(pr *PrimaryRoutes) bool {
pr.SetRoutes(1, mp("192.168.1.0/24"))
pr.SetRoutes(2, mp("192.168.1.0/24"))
pr.SetRoutes(3, mp("192.168.1.0/24"))
pr.SetNodeHealthy(1, false) // failover to 2
return pr.SetNodeHealthy(2, false) // failover to 3
},
expectedRoutes: map[types.NodeID]set.Set[netip.Prefix]{
1: {mp("192.168.1.0/24"): {}},
2: {mp("192.168.1.0/24"): {}},
3: {mp("192.168.1.0/24"): {}},
},
expectedPrimaries: map[netip.Prefix]types.NodeID{
mp("192.168.1.0/24"): 3,
},
expectedIsPrimary: map[types.NodeID]bool{
3: true,
},
expectedUnhealthy: set.Set[types.NodeID]{1: {}, 2: {}},
expectedChange: true,
},
{
name: "concurrent-access",
operations: func(pr *PrimaryRoutes) bool {
@@ -476,6 +632,86 @@ func TestPrimaryRoutes(t *testing.T) {
if diff := cmp.Diff(tt.expectedIsPrimary, pr.isPrimary, comps...); diff != "" {
t.Errorf("isPrimary mismatch (-want +got):\n%s", diff)
}
if diff := cmp.Diff(tt.expectedUnhealthy, pr.unhealthy, comps...); diff != "" {
t.Errorf("unhealthy mismatch (-want +got):\n%s", diff)
}
})
}
}
func TestHANodes(t *testing.T) {
tests := []struct {
name string
setup func(pr *PrimaryRoutes)
expected map[netip.Prefix][]types.NodeID
}{
{
name: "single-node-not-ha",
setup: func(pr *PrimaryRoutes) {
pr.SetRoutes(1, mp("192.168.1.0/24"))
},
expected: nil,
},
{
name: "two-nodes-same-prefix-is-ha",
setup: func(pr *PrimaryRoutes) {
pr.SetRoutes(1, mp("192.168.1.0/24"))
pr.SetRoutes(2, mp("192.168.1.0/24"))
},
expected: map[netip.Prefix][]types.NodeID{
mp("192.168.1.0/24"): {1, 2},
},
},
{
name: "two-nodes-different-prefixes-not-ha",
setup: func(pr *PrimaryRoutes) {
pr.SetRoutes(1, mp("192.168.1.0/24"))
pr.SetRoutes(2, mp("192.168.2.0/24"))
},
expected: nil,
},
{
name: "three-nodes-two-share-prefix",
setup: func(pr *PrimaryRoutes) {
pr.SetRoutes(1, mp("192.168.1.0/24"))
pr.SetRoutes(2, mp("192.168.1.0/24"))
pr.SetRoutes(3, mp("10.0.0.0/8"))
},
expected: map[netip.Prefix][]types.NodeID{
mp("192.168.1.0/24"): {1, 2},
},
},
{
name: "three-nodes-all-share",
setup: func(pr *PrimaryRoutes) {
pr.SetRoutes(1, mp("192.168.1.0/24"))
pr.SetRoutes(2, mp("192.168.1.0/24"))
pr.SetRoutes(3, mp("192.168.1.0/24"))
},
expected: map[netip.Prefix][]types.NodeID{
mp("192.168.1.0/24"): {1, 2, 3},
},
},
{
name: "empty",
setup: func(_ *PrimaryRoutes) {
},
expected: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
pr := New()
tt.setup(pr)
got := pr.HANodes()
comps := append(util.Comparers, cmpopts.EquateEmpty())
if diff := cmp.Diff(tt.expected, got, comps...); diff != "" {
t.Errorf("HANodes mismatch (-want +got):\n%s", diff)
}
})
}
}