package networking import ( "context" "errors" "fmt" "log" "time" ) const defaultRollbackSeconds = 120 // errAlreadyPending is returned when another change is awaiting confirmation. // The write handlers map this to 409 Conflict. var errAlreadyPending = errors.New("already pending") // errPending builds the 409 message. The lock is global across all interfaces // (see pendingChange), so the message says so to avoid confusing a user who is // touching a different interface than the one that holds the lock. func errPending(iface string) error { return fmt.Errorf("%w: a change to %s is awaiting confirmation. This is a global lock across all interfaces — confirm or roll that change back first", errAlreadyPending, iface) } // startRollback snapshots the current state, applies the new config, and arms a // timer that auto-reverts if not confirmed. Returns errAlreadyPending (409) if // another change is in flight, or a wrapped error (500) if apply fails. // // Snapshot and Apply run WITHOUT the mutex held, so they don't block reads or // the pending-status endpoint while shelling out to nmcli/networkctl/ifup. func (m *Module) startRollback(ctx context.Context, iface string, cfg IfaceConfig) (int, error) { // Fast pre-check so we don't snapshot/apply when something is already // pending. armPending re-checks under the lock to close the race. if err := m.checkNoPending(); err != nil { return 0, err } prior, err := m.be.Snapshot(ctx, iface) if err != nil { return 0, fmt.Errorf("snapshot %s: %w", iface, err) } if err := m.be.Apply(ctx, iface, cfg); err != nil { // Apply is not atomic: nmcli `con modify` may succeed before `con up` // fails, and networkd writes the .network file before `reconfigure` // runs. A failed Apply can therefore leave a half-applied config that // would otherwise have NO auto-revert (we bail before arming the timer). // Best-effort restore the snapshot so we never leave that unprotected. if rerr := m.be.Apply(ctx, iface, prior); rerr != nil { log.Printf("networking: apply %s failed and restore also failed: %v", iface, rerr) } return 0, fmt.Errorf("apply %s: %w", iface, err) } // The revert runs from the timer or an explicit rollback, possibly with no // client attached, so it uses context.Background() rather than ctx. return m.armPending(iface, func() error { return m.be.Apply(context.Background(), iface, prior) }, cfg.RollbackSeconds) } // startLinkDown takes the interface down behind the same rollback safety net: if // the change is not confirmed, the interface is brought back up. Taking a remote // interface down is just as much a lock-yourself-out risk as a bad static config. // // Bringing a link UP needs no protection (it cannot lock you out), so link-up // stays a direct, un-wrapped call in the handler. func (m *Module) startLinkDown(ctx context.Context, iface string) (int, error) { if err := m.checkNoPending(); err != nil { return 0, err } if err := m.be.SetLinkDown(ctx, iface); err != nil { return 0, fmt.Errorf("link down %s: %w", iface, err) } // Revert (bring the link back up) may run from the timer with no client. return m.armPending(iface, func() error { return m.be.SetLinkUp(context.Background(), iface) }, 0) } // checkNoPending reports a 409 error if a change is already pending. func (m *Module) checkNoPending() error { m.mu.Lock() defer m.mu.Unlock() if m.pending != nil { return errPending(m.pending.Iface) } return nil } // armPending installs the pending change and starts its auto-revert timer. The // caller has already applied the change; revert is the closure that undoes it. // It is invoked on timer expiry, explicit rollback, or if a concurrent change // raced us between the pre-check and here (in which case we revert immediately // and report the conflict). seconds <= 0 uses the default timeout. func (m *Module) armPending(iface string, revert func() error, seconds int) (int, error) { if seconds <= 0 { seconds = defaultRollbackSeconds } dur := time.Duration(seconds) * time.Second m.mu.Lock() defer m.mu.Unlock() if m.pending != nil { // Lost the race — undo what we just applied and report conflict. if err := revert(); err != nil { log.Printf("networking: failed to undo raced change on %s: %v", iface, err) } return 0, errPending(m.pending.Iface) } pc := &pendingChange{ Iface: iface, revert: revert, Deadline: time.Now().Add(dur), } // The timer fires the auto-revert. It captures m and pc by closure so it can // revert even if the server is otherwise idle — the whole point is protecting // against being locked out of a remote box. pc.Timer = time.AfterFunc(dur, func() { // Check validity under lock, then revert outside it so a slow // nmcli/networkctl call doesn't block the entire networking module. m.mu.Lock() if m.pending != pc { m.mu.Unlock() return } iface := pc.Iface revert := pc.revert m.mu.Unlock() log.Printf("networking: rollback timer expired for %s — reverting", iface) if err := revert(); err != nil { log.Printf("networking: auto-rollback of %s failed: %v", iface, err) } m.mu.Lock() if m.pending == pc { m.pending = nil } m.mu.Unlock() }) m.pending = pc return seconds, nil } // confirm cancels the rollback timer and clears the pending change, making it // permanent. Errors if there is no pending change or it's for another interface. func (m *Module) confirm(iface string) error { m.mu.Lock() defer m.mu.Unlock() if m.pending == nil { return fmt.Errorf("no pending change to confirm") } if m.pending.Iface != iface { return fmt.Errorf("pending change is for %s, not %s", m.pending.Iface, iface) } m.pending.Timer.Stop() m.pending = nil return nil } // rollbackNow immediately reverts the pending change and clears it. Errors if // there is no pending change or it's for another interface. func (m *Module) rollbackNow(iface string) error { m.mu.Lock() defer m.mu.Unlock() if m.pending == nil { return fmt.Errorf("no pending change to rollback") } if m.pending.Iface != iface { return fmt.Errorf("pending change is for %s, not %s", m.pending.Iface, iface) } m.pending.Timer.Stop() err := m.pending.revert() m.pending = nil if err != nil { return fmt.Errorf("rollback %s: %w", iface, err) } return nil } // PendingInfo is the JSON body returned by the pending-change status endpoint. type PendingInfo struct { Iface string `json:"interface" example:"eth0" doc:"Interface with a pending change"` SecondsRemaining int `json:"seconds_remaining" example:"45" doc:"Seconds until auto-rollback"` } // pendingInfo returns the current pending change status, or nil if none. func (m *Module) pendingInfo() *PendingInfo { m.mu.Lock() defer m.mu.Unlock() if m.pending == nil { return nil } remaining := max(int(time.Until(m.pending.Deadline).Seconds()), 0) return &PendingInfo{ Iface: m.pending.Iface, SecondsRemaining: remaining, } }