Files
nadir-agent/internal/modules/system/info.go
T
urania 0e041fac5e
build-and-release / release (push) Failing after 2m1s
fix: .minisign for signed releases
2026-06-22 20:03:27 +02:00

617 lines
19 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package system
import (
"context"
"math"
"net"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"syscall"
"time"
"nadir/internal/mounts"
"nadir/internal/oscmd"
"github.com/danielgtaylor/huma/v2"
)
// SystemInfoBody is the dashboard overview: OS identity plus live CPU, memory,
// disk, load, network, and temperature readings. Every section is best-effort —
// a source that's unavailable (e.g. no thermal zones in a VM) yields a zero
// value or empty list rather than failing the whole call.
type SystemInfoBody struct {
OS OSInfo `json:"os" doc:"OS and kernel identity"`
CPU CPUInfo `json:"cpu" doc:"Processor model and core count"`
Memory MemoryInfo `json:"memory" doc:"RAM and swap usage in bytes"`
Load LoadInfo `json:"load" doc:"Load averages (1/5/15 min)"`
UptimeSec int64 `json:"uptime_seconds" example:"12490" doc:"Seconds since boot"`
BootTime string `json:"boot_time" example:"2026-06-19T12:08:00Z" doc:"Boot time (RFC3339, UTC)"`
Disks []DiskInfo `json:"disks" doc:"Mounted block-device filesystems"`
NetworkInterfaces []NetInterface `json:"network_interfaces" doc:"Network interfaces and their addresses"`
Temperatures []Temperature `json:"temperatures" doc:"Thermal sensor readings in Celsius"`
}
type OSInfo struct {
PrettyName string `json:"pretty_name" example:"Fedora Linux 44 (Workstation Edition)" doc:"Distro name from /etc/os-release PRETTY_NAME"`
Kernel string `json:"kernel" example:"7.0.12-201.fc44.x86_64" doc:"Running kernel release (uname -r)"`
Architecture string `json:"architecture" example:"x86_64" doc:"Machine hardware architecture (uname -m)"`
Hostname string `json:"hostname" example:"server01" doc:"System hostname"`
}
type CPUInfo struct {
Model string `json:"model" example:"AMD Ryzen 7 7840U" doc:"CPU model name"`
LogicalCPUs int `json:"logical_cpus" example:"16" doc:"Number of logical CPUs (cores × threads)"`
MinMHz int `json:"min_mhz" example:"400" doc:"Lowest frequency the scaling governor can select"`
MaxMHz int `json:"max_mhz" example:"5137" doc:"Highest frequency (boost ceiling)"`
CurrentMHz int `json:"current_mhz" example:"3157" doc:"Peak current clock across all cores (instantaneous snapshot; 0 if cpufreq unavailable)"`
}
type MemoryInfo struct {
TotalBytes uint64 `json:"total_bytes" example:"16384000000"`
AvailableBytes uint64 `json:"available_bytes" example:"8192000000" doc:"Memory available for new allocations without swapping"`
UsedBytes uint64 `json:"used_bytes" example:"8192000000" doc:"total - available"`
SwapTotalBytes uint64 `json:"swap_total_bytes" example:"8589934592"`
SwapFreeBytes uint64 `json:"swap_free_bytes" example:"8589934592"`
}
type LoadInfo struct {
Load1 float64 `json:"load1" example:"0.42"`
Load5 float64 `json:"load5" example:"0.55"`
Load15 float64 `json:"load15" example:"0.61"`
CPUUsage []CoreUsage `json:"cpu_usage" doc:"Per-core CPU usage percentage (sampled over ~1 s); empty until the first sample completes"`
}
// CoreUsage holds the usage percentage for a single logical core, computed as
// the delta of non-idle ticks over total ticks between two /proc/stat reads.
type CoreUsage struct {
Core int `json:"core" example:"0" doc:"Logical core index"`
UsagePct float64 `json:"usage_pct" example:"23.4" doc:"Usage percentage (0100)"`
}
type DiskInfo struct {
Mountpoint string `json:"mountpoint" example:"/"`
Filesystem string `json:"filesystem" example:"/dev/nvme0n1p2" doc:"Backing device"`
FSType string `json:"fstype" example:"btrfs"`
TotalBytes uint64 `json:"total_bytes" example:"512000000000"`
FreeBytes uint64 `json:"free_bytes" example:"256000000000" doc:"Space available to unprivileged users"`
UsedBytes uint64 `json:"used_bytes" example:"256000000000"`
}
type NetInterface struct {
Name string `json:"name" example:"eth0"`
MAC string `json:"mac" example:"aa:bb:cc:dd:ee:ff"`
Up bool `json:"up" doc:"Interface is administratively up"`
Addresses []string `json:"addresses" doc:"Assigned addresses in CIDR notation"`
}
type Temperature struct {
Chip string `json:"chip" example:"k10temp" doc:"hwmon chip name; identifies the source (k10temp/coretemp=CPU, amdgpu/nvidia=GPU, nvme=disk)"`
Label string `json:"label" example:"Tctl" doc:"Per-sensor label, or the chip name when the sensor is unlabelled"`
Celsius float64 `json:"celsius" example:"47.5"`
}
type GetInfoOutput struct{ Body SystemInfoBody }
func registerInfo(api huma.API) {
startCPUSampler()
huma.Register(api, huma.Operation{
OperationID: "system-get-info",
Method: "GET",
Path: "/api/system/info",
Summary: "Get system information",
Description: "Returns an overview for a dashboard: OS/kernel identity, CPU, " +
"memory and swap, mounted disks, load averages, uptime, network " +
"interfaces, and temperatures. All values come from cheap local reads " +
"(/proc, /sys, syscalls) with no D-Bus dependency; each section is " +
"best-effort.",
Tags: []string{tagSystem},
Metadata: op("read"),
Errors: readErrors,
}, func(ctx context.Context, _ *struct{}) (*GetInfoOutput, error) {
uptime, boot := uptimeAndBoot()
return &GetInfoOutput{Body: SystemInfoBody{
OS: osInfo(),
CPU: cpuInfo(),
Memory: memInfo(),
Load: loadInfo(),
UptimeSec: uptime,
BootTime: boot.Format(time.RFC3339),
Disks: diskInfo(),
NetworkInterfaces: netInfo(),
Temperatures: tempInfo(),
}}, nil
})
}
func osInfo() OSInfo {
host, _ := os.Hostname()
return OSInfo{
PrettyName: osReleasePretty(),
Kernel: firstLine(oscmd.Run("uname", "-r")),
Architecture: firstLine(oscmd.Run("uname", "-m")),
Hostname: host,
}
}
// firstLine discards a command error and returns its (already trimmed) output,
// used where a missing value is acceptable.
func firstLine(out string, _ error) string { return out }
func osReleasePretty() string {
data, err := os.ReadFile("/etc/os-release")
if err != nil {
return ""
}
for line := range strings.SplitSeq(string(data), "\n") {
if v, ok := strings.CutPrefix(line, "PRETTY_NAME="); ok {
return strings.Trim(v, `"`)
}
}
return ""
}
func cpuInfo() CPUInfo {
data, _ := os.ReadFile("/proc/cpuinfo")
c := CPUInfo{Model: cpuModel(string(data)), LogicalCPUs: runtime.NumCPU()}
c.MinMHz, c.MaxMHz, c.CurrentMHz = cpuFreqMHz("/sys/devices/system/cpu")
// ponytail: cpufreq sysfs is absent on many VMs and stock Ubuntu server
// kernels; fall back to /proc/cpuinfo "cpu MHz" — VMs have a fixed clock,
// so min == max == cur is the honest answer.
mhz := cpuinfoMaxMHz(string(data))
// ponytail: ARM /proc/cpuinfo has no "cpu MHz" and often no "model name";
// lscpu decodes the ARM part-id table and reads DMI, so use it as last resort.
if c.Model == "" || mhz == 0 {
model, lscpuMHz := lscpuFallback()
if c.Model == "" {
c.Model = model
}
if mhz == 0 {
mhz = lscpuMHz
}
}
if mhz > 0 {
if c.CurrentMHz == 0 {
c.CurrentMHz = mhz
}
if c.MaxMHz == 0 {
c.MaxMHz = mhz
}
if c.MinMHz == 0 {
c.MinMHz = mhz
}
}
return c
}
// lscpuFallback parses `lscpu` for "Model name" and any embedded "@ X.X GHz"
// or "CPU max MHz:" value. Returns zeros when lscpu is missing or silent.
func lscpuFallback() (model string, mhz int) {
out, err := exec.Command("lscpu").Output()
if err != nil {
return "", 0
}
for line := range strings.SplitSeq(string(out), "\n") {
k, v, ok := strings.Cut(line, ":")
if !ok {
continue
}
k, v = strings.TrimSpace(k), strings.TrimSpace(v)
switch k {
case "Model name":
if model == "" {
model = v
}
case "BIOS Model name":
if model == "" {
model = v
}
case "CPU max MHz", "CPU MHz":
if f, err := strconv.ParseFloat(v, 64); err == nil && int(f) > mhz {
mhz = int(math.Round(f))
}
}
}
if mhz == 0 {
mhz = parseGHzSuffix(model)
}
return model, mhz
}
// parseGHzSuffix pulls "2.0GHz" / "@ 2.0 GHz" out of a model string.
func parseGHzSuffix(s string) int {
i := strings.LastIndex(s, "@")
if i < 0 {
return 0
}
rest := strings.TrimSpace(s[i+1:])
rest = strings.TrimSuffix(strings.TrimSuffix(rest, "GHz"), "Ghz")
rest = strings.TrimSpace(strings.TrimSuffix(rest, "G"))
f, err := strconv.ParseFloat(strings.TrimSpace(rest), 64)
if err != nil {
return 0
}
return int(math.Round(f * 1000))
}
// cpuinfoMaxMHz returns the highest "cpu MHz" value across all cores in
// /proc/cpuinfo, rounded to an int. Returns 0 when no such line exists.
func cpuinfoMaxMHz(cpuinfo string) int {
var max float64
for line := range strings.SplitSeq(cpuinfo, "\n") {
k, v, ok := strings.Cut(line, ":")
if !ok || strings.TrimSpace(k) != "cpu MHz" {
continue
}
if f, err := strconv.ParseFloat(strings.TrimSpace(v), 64); err == nil && f > max {
max = f
}
}
return int(math.Round(max))
}
// cpuFreqMHz reads cpufreq sysfs: min/max are stable hardware limits (from
// cpu0); current is the highest scaling_cur_freq across all cores — the "is it
// boosting" figure. Values are kHz in sysfs. Returns zeros when cpufreq is
// absent (e.g. some VMs).
func cpuFreqMHz(root string) (min, max, cur int) {
min = readKHzAsMHz(filepath.Join(root, "cpu0/cpufreq/cpuinfo_min_freq"))
max = readKHzAsMHz(filepath.Join(root, "cpu0/cpufreq/cpuinfo_max_freq"))
cores, _ := filepath.Glob(filepath.Join(root, "cpu[0-9]*/cpufreq/scaling_cur_freq"))
for _, f := range cores {
if v := readKHzAsMHz(f); v > cur {
cur = v
}
}
return min, max, cur
}
func readKHzAsMHz(path string) int {
khz, err := strconv.Atoi(readTrim(path))
if err != nil {
return 0
}
return khz / 1000
}
// cpuModel extracts the processor model from /proc/cpuinfo. x86 uses "model
// name"; many ARM boards use "Model" instead, so fall back to it.
func cpuModel(cpuinfo string) string {
var fallback string
for line := range strings.SplitSeq(cpuinfo, "\n") {
k, v, ok := strings.Cut(line, ":")
if !ok {
continue
}
switch strings.TrimSpace(k) {
case "model name":
return strings.TrimSpace(v)
case "Model":
fallback = strings.TrimSpace(v)
}
}
return fallback
}
func memInfo() MemoryInfo {
data, _ := os.ReadFile("/proc/meminfo")
return parseMeminfo(data)
}
// parseMeminfo reads the kB values in /proc/meminfo and converts them to bytes.
func parseMeminfo(data []byte) MemoryInfo {
kv := map[string]uint64{}
for line := range strings.SplitSeq(string(data), "\n") {
k, v, ok := strings.Cut(line, ":")
if !ok {
continue
}
fields := strings.Fields(v) // e.g. "16384000 kB"
if len(fields) == 0 {
continue
}
if n, err := strconv.ParseUint(fields[0], 10, 64); err == nil {
kv[k] = n * 1024 // values are in kB
}
}
return MemoryInfo{
TotalBytes: kv["MemTotal"],
AvailableBytes: kv["MemAvailable"],
UsedBytes: kv["MemTotal"] - kv["MemAvailable"],
SwapTotalBytes: kv["SwapTotal"],
SwapFreeBytes: kv["SwapFree"],
}
}
func loadInfo() LoadInfo {
data, _ := os.ReadFile("/proc/loadavg")
l := parseLoadavg(string(data))
l.CPUUsage = cachedCPUUsage()
return l
}
// parseLoadavg reads the three load averages from /proc/loadavg.
func parseLoadavg(loadavg string) LoadInfo {
f := strings.Fields(loadavg)
if len(f) < 3 {
return LoadInfo{}
}
at := func(i int) float64 { v, _ := strconv.ParseFloat(f[i], 64); return v }
return LoadInfo{Load1: at(0), Load5: at(1), Load15: at(2)}
}
// ---------------------------------------------------------------------------
// Per-core CPU usage sampler
// ---------------------------------------------------------------------------
//
// /proc/stat exposes cumulative jiffies per core:
//
// cpuN user nice system idle iowait irq softirq steal guest guest_nice
//
// We sample every second, compute the delta, and derive:
//
// usage% = (totalΔ idleΔ) / totalΔ × 100
//
// The result is cached behind a RWMutex so the HTTP handler never blocks.
var (
usageMu sync.RWMutex
usageCache []CoreUsage
)
func cachedCPUUsage() []CoreUsage {
usageMu.RLock()
defer usageMu.RUnlock()
// Return a copy so callers can't mutate the cache.
if usageCache == nil {
return nil
}
out := make([]CoreUsage, len(usageCache))
copy(out, usageCache)
return out
}
// startCPUSampler launches a goroutine that samples /proc/stat once per second
// for the lifetime of the process. Safe to call multiple times (only the first
// call starts the goroutine).
var samplerOnce sync.Once
func startCPUSampler() {
samplerOnce.Do(func() {
go cpuSamplerLoop("/proc/stat", 1*time.Second)
})
}
func cpuSamplerLoop(statPath string, interval time.Duration) {
prev := readProcStat(statPath)
for {
time.Sleep(interval)
cur := readProcStat(statPath)
usage := computeUsage(prev, cur)
usageMu.Lock()
usageCache = usage
usageMu.Unlock()
prev = cur
}
}
// cpuCoreTicks holds the cumulative jiffies for one "cpuN" line.
type cpuCoreTicks struct {
core int
total uint64
idle uint64
}
// readProcStat reads /proc/stat and returns per-core tick totals. The
// aggregate "cpu" line (no digit suffix) is skipped.
func readProcStat(path string) []cpuCoreTicks {
data, _ := os.ReadFile(path)
var cores []cpuCoreTicks
for line := range strings.SplitSeq(string(data), "\n") {
if !strings.HasPrefix(line, "cpu") {
continue
}
fields := strings.Fields(line)
if len(fields) < 5 {
continue
}
// Skip the aggregate "cpu" line; we only want "cpu0", "cpu1", …
name := fields[0]
if name == "cpu" {
continue
}
coreIdx, err := strconv.Atoi(strings.TrimPrefix(name, "cpu"))
if err != nil {
continue
}
// Fields: user(1) nice(2) system(3) idle(4) iowait(5) irq(6) softirq(7) steal(8) …
var total, idle uint64
for _, f := range fields[1:] {
v, _ := strconv.ParseUint(f, 10, 64)
total += v
}
// idle = idle + iowait (indices 4 and 5 in the original line).
if len(fields) > 5 {
v4, _ := strconv.ParseUint(fields[4], 10, 64)
v5, _ := strconv.ParseUint(fields[5], 10, 64)
idle = v4 + v5
} else {
v4, _ := strconv.ParseUint(fields[4], 10, 64)
idle = v4
}
cores = append(cores, cpuCoreTicks{core: coreIdx, total: total, idle: idle})
}
return cores
}
func computeUsage(prev, cur []cpuCoreTicks) []CoreUsage {
prevMap := make(map[int]cpuCoreTicks, len(prev))
for _, c := range prev {
prevMap[c.core] = c
}
usage := make([]CoreUsage, 0, len(cur))
for _, c := range cur {
p, ok := prevMap[c.core]
if !ok {
continue
}
dTotal := c.total - p.total
dIdle := c.idle - p.idle
var pct float64
if dTotal > 0 {
pct = float64(dTotal-dIdle) / float64(dTotal) * 100
// Round to one decimal.
pct = math.Round(pct*10) / 10
}
usage = append(usage, CoreUsage{Core: c.core, UsagePct: pct})
}
return usage
}
// uptimeAndBoot reads /proc/uptime (seconds since boot) and derives boot time.
// On any read error it returns zero values rather than failing the request.
func uptimeAndBoot() (int64, time.Time) {
data, err := os.ReadFile("/proc/uptime")
if err != nil {
return 0, time.Time{}
}
fields := strings.Fields(string(data))
if len(fields) == 0 {
return 0, time.Time{}
}
secs, err := strconv.ParseFloat(fields[0], 64)
if err != nil {
return 0, time.Time{}
}
boot := time.Now().Add(-time.Duration(secs * float64(time.Second))).UTC()
return int64(secs), boot
}
func diskInfo() []DiskInfo {
entries, err := mounts.Proc()
if err != nil {
return nil
}
disks := []DiskInfo{}
seen := map[string]bool{}
for _, e := range entries {
// Only real block devices; skip pseudo filesystems and snap's squashfs
// loop mounts that would otherwise clutter the list.
if !strings.HasPrefix(e.Device, "/dev/") || e.FSType == "squashfs" || seen[e.Mountpoint] {
continue
}
var st syscall.Statfs_t
if syscall.Statfs(e.Mountpoint, &st) != nil || st.Blocks == 0 {
continue
}
seen[e.Mountpoint] = true
bs := uint64(st.Bsize)
disks = append(disks, DiskInfo{
Mountpoint: e.Mountpoint,
Filesystem: e.Device,
FSType: e.FSType,
TotalBytes: st.Blocks * bs,
FreeBytes: st.Bavail * bs,
UsedBytes: (st.Blocks - st.Bfree) * bs,
})
}
return disks
}
func netInfo() []NetInterface {
ifaces, err := net.Interfaces()
if err != nil {
return nil
}
out := []NetInterface{}
for _, ifi := range ifaces {
addrs, _ := ifi.Addrs()
strs := []string{}
for _, a := range addrs {
strs = append(strs, a.String())
}
out = append(out, NetInterface{
Name: ifi.Name,
MAC: ifi.HardwareAddr.String(),
Up: ifi.Flags&net.FlagUp != 0,
Addresses: strs,
})
}
return out
}
func tempInfo() []Temperature {
if t := readHwmonTemps("/sys/class/hwmon"); len(t) > 0 {
return t
}
// ponytail: stock Ubuntu server has no coretemp/k10temp loaded, so hwmon
// is empty; thermal_zone exposes ACPI sensors (coarser, no chip name).
return readThermalZones("/sys/class/thermal")
}
// readThermalZones reads /sys/class/thermal/thermal_zone*/temp as a fallback
// for hosts without hwmon chip drivers. "type" names the zone (e.g. acpitz,
// x86_pkg_temp); used as both chip and label.
func readThermalZones(root string) []Temperature {
zones, _ := filepath.Glob(filepath.Join(root, "thermal_zone*"))
temps := []Temperature{}
for _, dir := range zones {
milli, err := strconv.Atoi(readTrim(filepath.Join(dir, "temp")))
if err != nil {
continue
}
c := float64(milli) / 1000
if c <= 0 || c >= 150 {
continue
}
name := readTrim(filepath.Join(dir, "type"))
if name == "" {
name = filepath.Base(dir)
}
temps = append(temps, Temperature{Chip: name, Label: name, Celsius: c})
}
return temps
}
// readHwmonTemps walks /sys/class/hwmon, which (unlike /sys/class/thermal, that
// only exposes generic ACPI zones like "acpitz") names each chip — so callers
// can split CPU (k10temp/coretemp) from GPU (amdgpu/nvidia) from disk (nvme).
// Each tempN_input may carry a tempN_label; when absent we fall back to the
// chip name. Best-effort: unreadable, empty, or implausible sensors are skipped.
func readHwmonTemps(root string) []Temperature {
chips, _ := filepath.Glob(filepath.Join(root, "hwmon*"))
temps := []Temperature{}
for _, dir := range chips {
chip := readTrim(filepath.Join(dir, "name"))
inputs, _ := filepath.Glob(filepath.Join(dir, "temp*_input"))
for _, in := range inputs {
milli, err := strconv.Atoi(readTrim(in))
if err != nil {
continue
}
c := float64(milli) / 1000
// Disabled/placeholder sensors report absurd values (e.g. -0.15 or
// 179.8 °C). Drop anything outside a plausible band.
if c <= 0 || c >= 150 {
continue
}
label := readTrim(strings.TrimSuffix(in, "_input") + "_label")
if label == "" {
label = chip
}
temps = append(temps, Temperature{Chip: chip, Label: label, Celsius: c})
}
}
return temps
}
// readTrim reads a sysfs file and trims it; a missing file yields "".
func readTrim(path string) string {
b, _ := os.ReadFile(path)
return strings.TrimSpace(string(b))
}