Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| bb5cc8268f | |||
| cad6c1f421 | |||
| 22e6812d4b | |||
| 0415e905af |
@@ -1,15 +0,0 @@
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(go get *)",
|
||||
"Bash(go build *)",
|
||||
"Bash(go vet *)",
|
||||
"Read(//usr/lib/**)",
|
||||
"Read(//proc/**)",
|
||||
"Bash(systemctl show *)",
|
||||
"Bash(echo \"exit=$?\")",
|
||||
"Bash(systemctl list-units *)",
|
||||
"Bash(go test *)"
|
||||
]
|
||||
}
|
||||
}
|
||||
+2
-1
@@ -13,4 +13,5 @@ config.yml
|
||||
|
||||
CLAUDE.md
|
||||
|
||||
minisign.key
|
||||
minisign.key
|
||||
./.claude
|
||||
@@ -19,7 +19,7 @@ Functionality is organized into **modules**. Each module owns a slice of the
|
||||
API and declares its own permission vocabulary.
|
||||
|
||||
- **System** - Dashboard overview (OS/kernel, CPU, memory, disks, load, uptime,
|
||||
network interfaces, temperatures); get/set hostname; time, timezone, and NTP;
|
||||
network interfaces, GPU, temperatures); get/set hostname; time, timezone, and NTP;
|
||||
locale and console keymap; reboot and power off.
|
||||
- **Services** - List and inspect systemd units; start / stop / restart / enable
|
||||
/ disable; read service logs from the journal or an allowlisted file, as a
|
||||
@@ -29,10 +29,11 @@ API and declares its own permission vocabulary.
|
||||
- **Groups** - List, inspect, create, and delete local groups.
|
||||
- **Packages** - List installed packages and available updates; install, remove,
|
||||
and upgrade - streamed live over SSE. Auto-detects `dnf`, `apt`, or `pacman`.
|
||||
- **Networking** - List network interfaces, routing tables, and DNS settings; configure IPv4 settings with temporary applying and safety auto-rollback; bring interfaces up or down.
|
||||
- **Networking** - List network interfaces, routing tables, and DNS settings; configure IPv4 settings with temporary applying and safety auto-rollback; bring interfaces up or down; edit `/etc/hosts`.
|
||||
- **Storage** - List active mounts and `/etc/fstab` entries; add, edit, and delete fstab entries; mount and unmount filesystems.
|
||||
- **Audit** - Read-only trail of every privileged write (who, what, when, result).
|
||||
- **Meta** - Self-description for clients: `/api/_modules`, `/api/whoami`,
|
||||
`/api/health`.
|
||||
`/api/health`; trigger a self-update via `POST /api/update`.
|
||||
|
||||
### Security model at a glance
|
||||
|
||||
@@ -159,14 +160,18 @@ assigns the admin role to the installing user.
|
||||
|
||||
| Command | Effect |
|
||||
| ------------------------------------------------ | --------------------------------------------------------------------------- |
|
||||
| `nadir [run] [-d]` | Start the server. `-d` / `--detach` runs it in the background. |
|
||||
| `nadir --save-config` | Save the default configuration template to the target path and exit. |
|
||||
| `nadir install` | Install + enable the systemd service (starts now and on boot). |
|
||||
| `nadir uninstall` | Stop, disable, and remove the systemd service. |
|
||||
| `nadir start` \| `stop` \| `restart` \| `status` | Control the running service. |
|
||||
| `nadir enable` \| `disable` | Toggle start-on-boot without removing the unit. |
|
||||
| `nadir logs` | Follow logs - journald if installed as a service, otherwise the detach log. |
|
||||
| `nadir help` | Show usage. |
|
||||
| `nadir [run] [-d]` | Start the server. `-d` / `--detach` runs it in the background. |
|
||||
| `nadir --save-config` | Save the default configuration template to the target path and exit. |
|
||||
| `nadir install` | Install + enable the systemd service (starts now and on boot). |
|
||||
| `nadir uninstall` | Stop, disable, and remove the systemd service. |
|
||||
| `nadir start` \| `stop` \| `restart` \| `status` | Control the running service. |
|
||||
| `nadir enable` \| `disable` | Toggle start-on-boot without removing the unit. |
|
||||
| `nadir logs` | Follow logs - journald if installed as a service, otherwise the detach log. |
|
||||
| `nadir update [--check] [--force]` | Download and install the latest release (requires `server.release_repo` in config). `--check` reports the available version without downloading; `--force` re-downloads even when already current. |
|
||||
| `nadir token add <name>` | Mint a machine API token (shown once, not stored in plain text). |
|
||||
| `nadir token rm <name>` | Revoke a token immediately (no restart needed). |
|
||||
| `nadir token ls` | List token names (not the raw keys). |
|
||||
| `nadir help` | Show usage. |
|
||||
|
||||
Most commands need root.
|
||||
|
||||
@@ -187,6 +192,7 @@ server:
|
||||
# tls_key: /etc/nadir/tls/key.pem
|
||||
hostname: 100.64.0.189
|
||||
port: 9999
|
||||
# release_repo: https://gitea.example.com/owner/nadir # enables `nadir update`
|
||||
|
||||
# Quote "*" - bare * is YAML alias syntax and fails to parse.
|
||||
roles:
|
||||
@@ -216,6 +222,7 @@ log_files:
|
||||
| `tls_cert`, `tls_key` | - | PEM paths. When both are set (and `trust_proxy` is off), nadir terminates TLS with this pair. |
|
||||
| `hostname` | - | Address to bind. Use `127.0.0.1` for local-only, or an overlay/VPN address to expose nadir only on that interface. |
|
||||
| `port` | - | TCP port to listen on. |
|
||||
| `release_repo` | - | Gitea repo URL (`https://host/owner/repo`). When set, enables `nadir update` and `POST /api/update`. Must be `https://`. |
|
||||
|
||||
TLS selection is covered in [Deployment note 2](#2-tls-three-modes).
|
||||
|
||||
@@ -389,7 +396,27 @@ forwarded headers are trustworthy. Without step 1 you're trusting every peer on
|
||||
the overlay - fine for a single-tenant network you fully control, risky on a
|
||||
shared one.
|
||||
|
||||
### 4. Connecting a dashboard (machine clients)
|
||||
### 4. Self-update
|
||||
|
||||
When `server.release_repo` points at a Gitea repo, nadir can update itself:
|
||||
|
||||
```bash
|
||||
sudo nadir update # download + install latest, restart service
|
||||
sudo nadir update --check # report available version, do nothing
|
||||
sudo nadir update --force # re-download even if already at latest
|
||||
```
|
||||
|
||||
The updater:
|
||||
1. Fetches the latest release from the Gitea API.
|
||||
2. Downloads the binary for the host's architecture (`linux-amd64`, `linux-arm64`, …).
|
||||
3. Verifies the release: checks the minisign signature on `sha256sums.txt`, then checks the binary's SHA-256 against it. Refuses to install if either check fails.
|
||||
4. Atomically replaces the running binary (`os.Rename` on the same filesystem) and runs `systemctl restart nadir`.
|
||||
|
||||
The same flow is also reachable via `POST /api/update` (requires the admin wildcard role), which runs the updater detached and returns 202 immediately. Poll `GET /api/health` to confirm the new version is running after the restart drops in-flight connections.
|
||||
|
||||
`release_repo` must use `https://` — the update downloads and executes the binary, and a plaintext URL would expose the host to on-path replacement.
|
||||
|
||||
### 5. Connecting a dashboard (machine clients)
|
||||
|
||||
To manage one or more Nadir instances via a central dashboard or non-interactive client, authenticate requests using a static Bearer token rather than interactive PAM credentials.
|
||||
|
||||
@@ -439,22 +466,23 @@ To connect a browser-based dashboard hosted on a different origin, choose one of
|
||||
## Layout
|
||||
|
||||
```
|
||||
cmd/ process entry point + CLI (run / install / logs …), TLS, service wiring
|
||||
internal/auth PAM auth, sessions, login/logout, login throttle, PAM service install
|
||||
cmd/ process entry point + CLI (run / install / update / token / logs …), TLS, service wiring
|
||||
internal/auth PAM auth, sessions, login/logout, login throttle, bearer tokens, PAM service install
|
||||
internal/auditlog SQLite-backed audit log writer
|
||||
internal/config config.yaml loader + startup validation
|
||||
internal/meta /api/_modules, /api/whoami, /api/health discovery endpoints
|
||||
internal/meta /api/_modules, /api/whoami, /api/health, /api/update discovery + update endpoints
|
||||
internal/module the Module interface
|
||||
internal/modules concrete modules:
|
||||
system - info, hostname, time/timezone/NTP, locale/keymap, power
|
||||
services - systemd unit control + journal/file logs (snapshot + SSE)
|
||||
users - local accounts
|
||||
groups - local groups
|
||||
packages - dnf/apt/pacman install/remove/upgrade (streamed)
|
||||
audit - read-only audit trail
|
||||
networking - network interfaces, routing tables, DNS, and IP configurations
|
||||
system - info, hostname, time/timezone/NTP, locale/keymap, power
|
||||
services - systemd unit control + journal/file logs (snapshot + SSE)
|
||||
users - local accounts
|
||||
groups - local groups
|
||||
packages - dnf/apt/pacman install/remove/upgrade (streamed)
|
||||
networking - interfaces, routing tables, DNS, IP config, /etc/hosts
|
||||
storage - active mounts, /etc/fstab read/write, mount/unmount
|
||||
internal/mounts /proc/mounts parser (used by storage module)
|
||||
internal/oscmd shared command runner (timeouts, stderr surfacing) + helpers
|
||||
internal/rbac roles, permissions ("*" wildcards), HTTP middleware (RBAC + CSRF)
|
||||
internal/audit SQLite-backed audit log writer
|
||||
```
|
||||
|
||||
## API docs
|
||||
|
||||
+136
-8
@@ -1,6 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
@@ -8,8 +9,11 @@ import (
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/mattn/go-isatty"
|
||||
|
||||
"nadir/internal/auth"
|
||||
"nadir/internal/config"
|
||||
)
|
||||
@@ -137,6 +141,125 @@ func installService(args []string) error {
|
||||
isUnsecure := *unsecureOpt || optCount == 0
|
||||
isTrustProxy := *trustProxyOpt
|
||||
|
||||
cfgPath, err := resolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
shouldWriteConfig := false
|
||||
if _, err := os.Stat(cfgPath); os.IsNotExist(err) {
|
||||
shouldWriteConfig = true
|
||||
}
|
||||
|
||||
username := getUsername()
|
||||
var logFiles map[string][]string
|
||||
|
||||
if fs.NFlag() == 0 && (isatty.IsTerminal(os.Stdin.Fd()) || isatty.IsCygwinTerminal(os.Stdin.Fd())) {
|
||||
reader := bufio.NewReader(os.Stdin)
|
||||
|
||||
if !shouldWriteConfig {
|
||||
fmt.Printf("Configuration file already exists at %s. Overwrite? [y/N] (default n): ", cfgPath)
|
||||
overwriteInput, _ := reader.ReadString('\n')
|
||||
overwriteInput = strings.ToLower(strings.TrimSpace(overwriteInput))
|
||||
if overwriteInput != "y" && overwriteInput != "yes" {
|
||||
fmt.Println("Keeping existing configuration. Proceeding with installation...")
|
||||
if existingCfg, loadErr := config.Load(cfgPath); loadErr == nil {
|
||||
*hostnameOpt = existingCfg.Server.Hostname
|
||||
if p, err := strconv.Atoi(existingCfg.Server.Port); err == nil {
|
||||
*portOpt = p
|
||||
}
|
||||
isTLS = existingCfg.Server.TLSCert != "" && existingCfg.Server.TLSKey != ""
|
||||
isTrustProxy = existingCfg.Server.TrustProxy
|
||||
isUnsecure = !isTLS && !isTrustProxy
|
||||
}
|
||||
goto skipConfigPrompt
|
||||
}
|
||||
shouldWriteConfig = true
|
||||
}
|
||||
|
||||
fmt.Println("Configuring Nadir installation:")
|
||||
fmt.Println(" 1) Serve plaintext HTTP directly (unsecure) [default]")
|
||||
fmt.Println(" 2) Generate persistent self-signed TLS cert/key and enable HTTPS (tls)")
|
||||
fmt.Println(" 3) Serve plaintext HTTP behind a trusted TLS-terminating reverse proxy (trust-proxy)")
|
||||
fmt.Print("Enter choice [1-3] (default 1): ")
|
||||
choice, _ := reader.ReadString('\n')
|
||||
choice = strings.TrimSpace(choice)
|
||||
if choice == "" || choice == "1" {
|
||||
isUnsecure = true
|
||||
isTLS = false
|
||||
isTrustProxy = false
|
||||
} else if choice == "2" {
|
||||
isTLS = true
|
||||
isUnsecure = false
|
||||
isTrustProxy = false
|
||||
} else if choice == "3" {
|
||||
isTrustProxy = true
|
||||
isTLS = false
|
||||
isUnsecure = false
|
||||
} else {
|
||||
return fmt.Errorf("invalid choice: %q", choice)
|
||||
}
|
||||
|
||||
fmt.Printf("Enter hostname to bind to (default %s): ", *hostnameOpt)
|
||||
hostChoice, _ := reader.ReadString('\n')
|
||||
hostChoice = strings.TrimSpace(hostChoice)
|
||||
if hostChoice != "" {
|
||||
*hostnameOpt = hostChoice
|
||||
}
|
||||
|
||||
fmt.Printf("Enter port to bind to (default %d): ", *portOpt)
|
||||
portChoice, _ := reader.ReadString('\n')
|
||||
portChoice = strings.TrimSpace(portChoice)
|
||||
if portChoice != "" {
|
||||
p, err := strconv.Atoi(portChoice)
|
||||
if err != nil || p <= 0 || p > 65535 {
|
||||
return fmt.Errorf("invalid port: %q", portChoice)
|
||||
}
|
||||
*portOpt = p
|
||||
}
|
||||
|
||||
fmt.Printf("Enter main admin username (default %s): ", username)
|
||||
userChoice, _ := reader.ReadString('\n')
|
||||
userChoice = strings.TrimSpace(userChoice)
|
||||
if userChoice != "" {
|
||||
username = userChoice
|
||||
}
|
||||
|
||||
fmt.Print("Would you like to expose any log files to the Nadir UI? [y/N] (default n): ")
|
||||
logInput, _ := reader.ReadString('\n')
|
||||
logInput = strings.ToLower(strings.TrimSpace(logInput))
|
||||
if logInput == "y" || logInput == "yes" {
|
||||
logFiles = make(map[string][]string)
|
||||
for {
|
||||
fmt.Print(" Enter service/unit name (e.g. nginx): ")
|
||||
unit, _ := reader.ReadString('\n')
|
||||
unit = strings.TrimSpace(unit)
|
||||
if unit == "" {
|
||||
fmt.Println(" Service name cannot be empty. Skipping.")
|
||||
continue
|
||||
}
|
||||
|
||||
fmt.Printf(" Enter absolute path to log file for %s: ", unit)
|
||||
path, _ := reader.ReadString('\n')
|
||||
path = strings.TrimSpace(path)
|
||||
if path == "" {
|
||||
fmt.Println(" Path cannot be empty. Skipping.")
|
||||
continue
|
||||
}
|
||||
|
||||
logFiles[unit] = append(logFiles[unit], path)
|
||||
|
||||
fmt.Print(" Add another log file? [y/N] (default n): ")
|
||||
another, _ := reader.ReadString('\n')
|
||||
another = strings.ToLower(strings.TrimSpace(another))
|
||||
if another != "y" && another != "yes" {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
skipConfigPrompt:
|
||||
// Provision the PAM service the server authenticates against, so it exists
|
||||
// before the unit starts rather than appearing on first login. Idempotent:
|
||||
// EnsurePAMService leaves an existing /etc/pam.d/nadir untouched. runServer
|
||||
@@ -181,11 +304,6 @@ func installService(args []string) error {
|
||||
}
|
||||
}
|
||||
|
||||
cfgPath, err := resolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Construct configuration template content based on installation options
|
||||
secureTLSVal := "true"
|
||||
trustProxyLine := "# trust_proxy: false"
|
||||
@@ -204,11 +322,21 @@ func installService(args []string) error {
|
||||
keyLine = "# tls_key: /var/lib/nadir/tls/key.pem"
|
||||
}
|
||||
|
||||
username := getUsername()
|
||||
configContent := fmt.Sprintf(configTemplateBase, secureTLSVal, trustProxyLine, certLine, keyLine, *hostnameOpt, *portOpt, username)
|
||||
if len(logFiles) > 0 {
|
||||
var logFilesSection strings.Builder
|
||||
logFilesSection.WriteString("\nlog_files:\n")
|
||||
for unit, paths := range logFiles {
|
||||
logFilesSection.WriteString(fmt.Sprintf(" %s:\n", unit))
|
||||
for _, path := range paths {
|
||||
logFilesSection.WriteString(fmt.Sprintf(" - %s\n", path))
|
||||
}
|
||||
}
|
||||
configContent += logFilesSection.String()
|
||||
}
|
||||
|
||||
// Ensure default config file exists
|
||||
if _, err := os.Stat(cfgPath); os.IsNotExist(err) {
|
||||
// Ensure default config file exists or we explicitly overwrote it
|
||||
if shouldWriteConfig {
|
||||
if err := saveDefaultConfig(cfgPath, configContent); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ require (
|
||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/jedisct1/go-minisign v0.0.0-20260527172527-a09352b57a22
|
||||
github.com/mattn/go-isatty v0.0.21 // indirect
|
||||
github.com/mattn/go-isatty v0.0.21
|
||||
github.com/ncruces/go-strftime v1.0.0 // indirect
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||
golang.org/x/crypto v0.52.0 // indirect
|
||||
|
||||
+5
-1
@@ -99,7 +99,11 @@ do_install() {
|
||||
|
||||
echo "binary installed at /usr/local/bin/nadir"
|
||||
echo "installing as a systemd service ..."
|
||||
/usr/local/bin/nadir install
|
||||
if [ -c /dev/tty ]; then
|
||||
/usr/local/bin/nadir install < /dev/tty
|
||||
else
|
||||
/usr/local/bin/nadir install
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "done. check status with: nadir status"
|
||||
|
||||
@@ -0,0 +1,187 @@
|
||||
package system
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"nadir/internal/oscmd"
|
||||
)
|
||||
|
||||
type GPUInfo struct {
|
||||
Model string `json:"model" example:"AMD Radeon RX 7900 XTX" doc:"GPU model name, or vendor:device hex ID if lspci unavailable"`
|
||||
Vendor string `json:"vendor" example:"1002" doc:"PCI vendor ID (hex)"`
|
||||
DeviceID string `json:"device_id" example:"744c" doc:"PCI device ID (hex)"`
|
||||
Driver string `json:"driver" example:"amdgpu" doc:"Kernel driver in use"`
|
||||
MemoryTotalBytes uint64 `json:"memory_total_bytes" example:"8589934592" doc:"Total VRAM in bytes (driver-dependent; 0 if unavailable)"`
|
||||
MemoryUsedBytes uint64 `json:"memory_used_bytes" example:"27267072" doc:"Used VRAM in bytes (driver-dependent; 0 if unavailable)"`
|
||||
UtilizationPct float64 `json:"utilization_pct" example:"23.5" doc:"GPU compute utilization percentage (driver-dependent; 0 if unavailable)"`
|
||||
MemUtilizationPct float64 `json:"mem_utilization_pct" example:"15.0" doc:"GPU memory controller utilization percentage (driver-dependent; 0 if unavailable)"`
|
||||
}
|
||||
|
||||
func gpuInfo() []GPUInfo {
|
||||
return readGPUsFromSysfs("/sys/class/drm")
|
||||
}
|
||||
|
||||
func readGPUsFromSysfs(drmRoot string) []GPUInfo {
|
||||
entries, err := os.ReadDir(drmRoot)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
seen := map[string]bool{}
|
||||
var gpus []GPUInfo
|
||||
|
||||
for _, e := range entries {
|
||||
if !isGPUCard(e.Name()) {
|
||||
continue
|
||||
}
|
||||
|
||||
pciAddr, vendor, device, driver := readGPUFromCard(drmRoot, e.Name())
|
||||
if pciAddr == "" || seen[pciAddr] {
|
||||
continue
|
||||
}
|
||||
seen[pciAddr] = true
|
||||
|
||||
model := vendor + ":" + device
|
||||
if m := lookupGPUName(pciAddr); m != "" {
|
||||
model = m
|
||||
}
|
||||
|
||||
gpu := GPUInfo{
|
||||
Model: model,
|
||||
Vendor: vendor,
|
||||
DeviceID: device,
|
||||
Driver: driver,
|
||||
}
|
||||
|
||||
devPath := filepath.Join(drmRoot, e.Name(), "device")
|
||||
enrichGPUInfo(&gpu, devPath, pciAddr, driver)
|
||||
gpus = append(gpus, gpu)
|
||||
}
|
||||
|
||||
return gpus
|
||||
}
|
||||
|
||||
func enrichGPUInfo(gpu *GPUInfo, devPath, pciAddr, driver string) {
|
||||
switch driver {
|
||||
case "amdgpu":
|
||||
enrichAMDGPU(gpu, devPath)
|
||||
case "nvidia":
|
||||
enrichNvidiaGPU(gpu, pciAddr)
|
||||
}
|
||||
}
|
||||
|
||||
func enrichAMDGPU(gpu *GPUInfo, devPath string) {
|
||||
if total := readUint64FromFile(filepath.Join(devPath, "mem_info_vram_total")); total > 0 {
|
||||
gpu.MemoryTotalBytes = total
|
||||
gpu.MemoryUsedBytes = readUint64FromFile(filepath.Join(devPath, "mem_info_vram_used"))
|
||||
}
|
||||
if pct := readIntFromFile(filepath.Join(devPath, "gpu_busy_percent")); pct >= 0 {
|
||||
gpu.UtilizationPct = float64(pct)
|
||||
}
|
||||
if pct := readIntFromFile(filepath.Join(devPath, "mem_busy_percent")); pct >= 0 {
|
||||
gpu.MemUtilizationPct = float64(pct)
|
||||
}
|
||||
}
|
||||
|
||||
func enrichNvidiaGPU(gpu *GPUInfo, pciAddr string) {
|
||||
out, err := oscmd.Run("nvidia-smi",
|
||||
"-i", pciAddr,
|
||||
"--query-gpu=memory.total,memory.used,utilization.gpu,utilization.memory",
|
||||
"--format=csv,noheader,nounits",
|
||||
)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
parts := strings.Split(strings.TrimSpace(out), ", ")
|
||||
if len(parts) < 4 {
|
||||
return
|
||||
}
|
||||
if total, err := strconv.ParseUint(parts[0], 10, 64); err == nil && total > 0 {
|
||||
gpu.MemoryTotalBytes = total * 1024 * 1024
|
||||
if used, err := strconv.ParseUint(parts[1], 10, 64); err == nil {
|
||||
gpu.MemoryUsedBytes = used * 1024 * 1024
|
||||
}
|
||||
}
|
||||
if pct, err := strconv.ParseFloat(parts[2], 64); err == nil {
|
||||
gpu.UtilizationPct = pct
|
||||
}
|
||||
if pct, err := strconv.ParseFloat(parts[3], 64); err == nil {
|
||||
gpu.MemUtilizationPct = pct
|
||||
}
|
||||
}
|
||||
|
||||
func isGPUCard(name string) bool {
|
||||
if !strings.HasPrefix(name, "card") {
|
||||
return false
|
||||
}
|
||||
if len(name) == 4 {
|
||||
return false
|
||||
}
|
||||
ch := name[4]
|
||||
return ch >= '0' && ch <= '9'
|
||||
}
|
||||
|
||||
func readGPUFromCard(drmRoot, name string) (pciAddr, vendor, device, driver string) {
|
||||
cardPath := filepath.Join(drmRoot, name)
|
||||
devPath := filepath.Join(cardPath, "device")
|
||||
|
||||
resolved, err := filepath.EvalSymlinks(cardPath)
|
||||
if err != nil {
|
||||
return "", "", "", ""
|
||||
}
|
||||
|
||||
parts := strings.Split(resolved, "/")
|
||||
for i, p := range parts {
|
||||
if p == "drm" && i > 0 {
|
||||
pciAddr = parts[i-1]
|
||||
break
|
||||
}
|
||||
}
|
||||
if pciAddr == "" {
|
||||
return "", "", "", ""
|
||||
}
|
||||
|
||||
vendor = strings.TrimPrefix(readTrim(filepath.Join(devPath, "vendor")), "0x")
|
||||
device = strings.TrimPrefix(readTrim(filepath.Join(devPath, "device")), "0x")
|
||||
driver = readDriver(devPath)
|
||||
return
|
||||
}
|
||||
|
||||
func readDriver(devPath string) string {
|
||||
target, err := os.Readlink(filepath.Join(devPath, "driver"))
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return filepath.Base(target)
|
||||
}
|
||||
|
||||
func lookupGPUName(pciAddr string) string {
|
||||
out, err := oscmd.Run("lspci", "-nns", pciAddr)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
_, rest, ok := strings.Cut(out, " ")
|
||||
if !ok {
|
||||
return ""
|
||||
}
|
||||
return strings.TrimSpace(rest)
|
||||
}
|
||||
|
||||
func readUint64FromFile(path string) uint64 {
|
||||
v, err := strconv.ParseUint(readTrim(path), 10, 64)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func readIntFromFile(path string) int {
|
||||
v, err := strconv.Atoi(readTrim(path))
|
||||
if err != nil {
|
||||
return -1
|
||||
}
|
||||
return v
|
||||
}
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
)
|
||||
|
||||
// SystemInfoBody is the dashboard overview: OS identity plus live CPU, memory,
|
||||
// disk, load, network, and temperature readings. Every section is best-effort —
|
||||
// disk, load, network, GPU, and temperature readings. Every section is best-effort —
|
||||
// a source that's unavailable (e.g. no thermal zones in a VM) yields a zero
|
||||
// value or empty list rather than failing the whole call.
|
||||
type SystemInfoBody struct {
|
||||
@@ -24,6 +24,7 @@ type SystemInfoBody struct {
|
||||
Disks []DiskInfo `json:"disks" doc:"Mounted block-device filesystems"`
|
||||
NetworkInterfaces []NetInterface `json:"network_interfaces" doc:"Network interfaces and their addresses"`
|
||||
Temperatures []Temperature `json:"temperatures" doc:"Thermal sensor readings in Celsius"`
|
||||
GPUs []GPUInfo `json:"gpus" doc:"Graphics processors detected via DRM sysfs"`
|
||||
}
|
||||
|
||||
type GetInfoOutput struct{ Body SystemInfoBody }
|
||||
@@ -36,9 +37,9 @@ func registerInfo(api huma.API, sampler *Sampler) {
|
||||
Summary: "Get system information",
|
||||
Description: "Returns an overview for a dashboard: OS/kernel identity, CPU, " +
|
||||
"memory and swap, mounted disks, load averages, uptime, network " +
|
||||
"interfaces, and temperatures. All values come from cheap local reads " +
|
||||
"(/proc, /sys, syscalls) with no D-Bus dependency; each section is " +
|
||||
"best-effort.",
|
||||
"interfaces, temperatures, and GPU information. All values come from cheap " +
|
||||
"local reads (/proc, /sys, syscalls) with no D-Bus dependency; each " +
|
||||
"section is best-effort.",
|
||||
Tags: []string{tagSystem},
|
||||
Metadata: op("read"),
|
||||
Errors: readErrors,
|
||||
@@ -54,6 +55,7 @@ func registerInfo(api huma.API, sampler *Sampler) {
|
||||
Disks: diskInfo(),
|
||||
NetworkInterfaces: netInfo(),
|
||||
Temperatures: tempInfo(),
|
||||
GPUs: gpuInfo(),
|
||||
}}, nil
|
||||
})
|
||||
}
|
||||
|
||||
@@ -6,6 +6,106 @@ import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestReadGPUsFromSysfs(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
|
||||
// card0 — AMD GPU with VRAM and utilization files
|
||||
pciDev := filepath.Join(root, "devices/pci0000:00/0000:00:02.0")
|
||||
mkdirAll(t, pciDev)
|
||||
write(t, pciDev, ".", "vendor", "0x1002")
|
||||
write(t, pciDev, ".", "device", "0x7480")
|
||||
write(t, pciDev, ".", "mem_info_vram_total", "8573157376")
|
||||
write(t, pciDev, ".", "mem_info_vram_used", "27267072")
|
||||
write(t, pciDev, ".", "gpu_busy_percent", "23")
|
||||
write(t, pciDev, ".", "mem_busy_percent", "15")
|
||||
|
||||
driverDir := filepath.Join(root, "bus/pci/drivers/amdgpu")
|
||||
mkdirAll(t, driverDir)
|
||||
mustSymlink(t, driverDir, filepath.Join(pciDev, "driver"))
|
||||
|
||||
cardTarget := filepath.Join(pciDev, "drm", "card0")
|
||||
mkdirAll(t, cardTarget)
|
||||
mustSymlink(t, pciDev, filepath.Join(cardTarget, "device"))
|
||||
mustSymlink(t, cardTarget, filepath.Join(root, "card0"))
|
||||
|
||||
// Distractors
|
||||
write(t, root, ".", "card0-HDMI-1", "distract")
|
||||
write(t, root, ".", "renderD128", "distract")
|
||||
|
||||
gpus := readGPUsFromSysfs(root)
|
||||
if len(gpus) != 1 {
|
||||
t.Fatalf("want 1 GPU, got %d: %+v", len(gpus), gpus)
|
||||
}
|
||||
if gpus[0].Vendor != "1002" {
|
||||
t.Errorf("vendor = %q, want 1002", gpus[0].Vendor)
|
||||
}
|
||||
if gpus[0].DeviceID != "7480" {
|
||||
t.Errorf("device_id = %q, want 7480", gpus[0].DeviceID)
|
||||
}
|
||||
if gpus[0].Driver != "amdgpu" {
|
||||
t.Errorf("driver = %q, want amdgpu", gpus[0].Driver)
|
||||
}
|
||||
if gpus[0].MemoryTotalBytes != 8573157376 {
|
||||
t.Errorf("MemoryTotalBytes = %d, want 8573157376", gpus[0].MemoryTotalBytes)
|
||||
}
|
||||
if gpus[0].MemoryUsedBytes != 27267072 {
|
||||
t.Errorf("MemoryUsedBytes = %d, want 27267072", gpus[0].MemoryUsedBytes)
|
||||
}
|
||||
if gpus[0].UtilizationPct != 23.0 {
|
||||
t.Errorf("UtilizationPct = %f, want 23.0", gpus[0].UtilizationPct)
|
||||
}
|
||||
if gpus[0].MemUtilizationPct != 15.0 {
|
||||
t.Errorf("MemUtilizationPct = %f, want 15.0", gpus[0].MemUtilizationPct)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadGPUsFromSysfsNoEnrichment(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
|
||||
// i915 GPU with no VRAM or utilization files
|
||||
pciDev := filepath.Join(root, "devices/pci0000:00/0000:00:02.0")
|
||||
mkdirAll(t, pciDev)
|
||||
write(t, pciDev, ".", "vendor", "0x8086")
|
||||
write(t, pciDev, ".", "device", "0x46a6")
|
||||
|
||||
driverDir := filepath.Join(root, "bus/pci/drivers/i915")
|
||||
mkdirAll(t, driverDir)
|
||||
mustSymlink(t, driverDir, filepath.Join(pciDev, "driver"))
|
||||
|
||||
cardTarget := filepath.Join(pciDev, "drm", "card0")
|
||||
mkdirAll(t, cardTarget)
|
||||
mustSymlink(t, pciDev, filepath.Join(cardTarget, "device"))
|
||||
mustSymlink(t, cardTarget, filepath.Join(root, "card0"))
|
||||
|
||||
gpus := readGPUsFromSysfs(root)
|
||||
if len(gpus) != 1 {
|
||||
t.Fatalf("want 1 GPU, got %d", len(gpus))
|
||||
}
|
||||
if gpus[0].MemoryTotalBytes != 0 || gpus[0].MemoryUsedBytes != 0 {
|
||||
t.Errorf("expected 0 VRAM for i915, got total=%d used=%d", gpus[0].MemoryTotalBytes, gpus[0].MemoryUsedBytes)
|
||||
}
|
||||
if gpus[0].UtilizationPct != 0 || gpus[0].MemUtilizationPct != 0 {
|
||||
t.Errorf("expected 0 utilization for i915, got gpu=%f mem=%f", gpus[0].UtilizationPct, gpus[0].MemUtilizationPct)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadGPUsFromSysfsMissingDir(t *testing.T) {
|
||||
gpus := readGPUsFromSysfs("/nonexistent/drm")
|
||||
if gpus != nil {
|
||||
t.Errorf("expected nil, got %+v", gpus)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadGPUsFromSysfsSkipsNonGPU(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
write(t, root, ".", "renderD128", "x")
|
||||
write(t, root, ".", "card0-HDMI-1", "x")
|
||||
gpus := readGPUsFromSysfs(root)
|
||||
if len(gpus) != 0 {
|
||||
t.Errorf("expected 0 GPUs, got %d", len(gpus))
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadHwmonTemps(t *testing.T) {
|
||||
root := t.TempDir()
|
||||
// k10temp: CPU, labelled Tctl.
|
||||
@@ -33,6 +133,20 @@ func TestReadHwmonTemps(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func mkdirAll(t *testing.T, path string) {
|
||||
t.Helper()
|
||||
if err := os.MkdirAll(path, 0o755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func mustSymlink(t *testing.T, target, link string) {
|
||||
t.Helper()
|
||||
if err := os.Symlink(target, link); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func write(t *testing.T, root, chip, file, val string) {
|
||||
t.Helper()
|
||||
dir := filepath.Join(root, chip)
|
||||
|
||||
@@ -34,6 +34,14 @@ func TestSystemHandlers(t *testing.T) {
|
||||
}
|
||||
return oscmd.MockCommand{ExitCode: 1}
|
||||
})
|
||||
// Mock lspci to prevent real calls in case the test host has GPUs.
|
||||
oscmd.SetMock("lspci", func(args []string) oscmd.MockCommand {
|
||||
return oscmd.MockCommand{ExitCode: 1}
|
||||
})
|
||||
// Mock nvidia-smi: return failure so enrichment is a no-op.
|
||||
oscmd.SetMock("nvidia-smi", func(args []string) oscmd.MockCommand {
|
||||
return oscmd.MockCommand{ExitCode: 1}
|
||||
})
|
||||
defer oscmd.ClearMocks()
|
||||
|
||||
// 1. Test GET /api/system/info
|
||||
|
||||
Reference in New Issue
Block a user