Feat: GPU detection via DRM sysfs, integrated into system info endpoint
build-and-release / release (push) Successful in 2m34s

Adds GPUInfo struct and readGPUsFromSysfs parsing DRM card entries
(/sys/class/drm/card*). Supports:
- AMD GPUs (amdgpu driver): VRAM totals/utilization from sysfs files
- NVIDIA GPUs: enrichment via nvidia-smi query
- Intel/other: basic PCI vendor/device/driver identification

Includes full test coverage for AMD enrichment, i915 fallback, missing
sysfs dir, and non-GPU DRM entry filtering.
This commit is contained in:
2026-06-25 18:34:34 +02:00
parent d4364a6cb7
commit 0415e905af
4 changed files with 315 additions and 4 deletions
+187
View File
@@ -0,0 +1,187 @@
package system
import (
"os"
"path/filepath"
"strconv"
"strings"
"nadir/internal/oscmd"
)
type GPUInfo struct {
Model string `json:"model" example:"AMD Radeon RX 7900 XTX" doc:"GPU model name, or vendor:device hex ID if lspci unavailable"`
Vendor string `json:"vendor" example:"1002" doc:"PCI vendor ID (hex)"`
DeviceID string `json:"device_id" example:"744c" doc:"PCI device ID (hex)"`
Driver string `json:"driver" example:"amdgpu" doc:"Kernel driver in use"`
MemoryTotalBytes uint64 `json:"memory_total_bytes" example:"8589934592" doc:"Total VRAM in bytes (driver-dependent; 0 if unavailable)"`
MemoryUsedBytes uint64 `json:"memory_used_bytes" example:"27267072" doc:"Used VRAM in bytes (driver-dependent; 0 if unavailable)"`
UtilizationPct float64 `json:"utilization_pct" example:"23.5" doc:"GPU compute utilization percentage (driver-dependent; 0 if unavailable)"`
MemUtilizationPct float64 `json:"mem_utilization_pct" example:"15.0" doc:"GPU memory controller utilization percentage (driver-dependent; 0 if unavailable)"`
}
func gpuInfo() []GPUInfo {
return readGPUsFromSysfs("/sys/class/drm")
}
func readGPUsFromSysfs(drmRoot string) []GPUInfo {
entries, err := os.ReadDir(drmRoot)
if err != nil {
return nil
}
seen := map[string]bool{}
var gpus []GPUInfo
for _, e := range entries {
if !isGPUCard(e.Name()) {
continue
}
pciAddr, vendor, device, driver := readGPUFromCard(drmRoot, e.Name())
if pciAddr == "" || seen[pciAddr] {
continue
}
seen[pciAddr] = true
model := vendor + ":" + device
if m := lookupGPUName(pciAddr); m != "" {
model = m
}
gpu := GPUInfo{
Model: model,
Vendor: vendor,
DeviceID: device,
Driver: driver,
}
devPath := filepath.Join(drmRoot, e.Name(), "device")
enrichGPUInfo(&gpu, devPath, pciAddr, driver)
gpus = append(gpus, gpu)
}
return gpus
}
func enrichGPUInfo(gpu *GPUInfo, devPath, pciAddr, driver string) {
switch driver {
case "amdgpu":
enrichAMDGPU(gpu, devPath)
case "nvidia":
enrichNvidiaGPU(gpu, pciAddr)
}
}
func enrichAMDGPU(gpu *GPUInfo, devPath string) {
if total := readUint64FromFile(filepath.Join(devPath, "mem_info_vram_total")); total > 0 {
gpu.MemoryTotalBytes = total
gpu.MemoryUsedBytes = readUint64FromFile(filepath.Join(devPath, "mem_info_vram_used"))
}
if pct := readIntFromFile(filepath.Join(devPath, "gpu_busy_percent")); pct >= 0 {
gpu.UtilizationPct = float64(pct)
}
if pct := readIntFromFile(filepath.Join(devPath, "mem_busy_percent")); pct >= 0 {
gpu.MemUtilizationPct = float64(pct)
}
}
func enrichNvidiaGPU(gpu *GPUInfo, pciAddr string) {
out, err := oscmd.Run("nvidia-smi",
"-i", pciAddr,
"--query-gpu=memory.total,memory.used,utilization.gpu,utilization.memory",
"--format=csv,noheader,nounits",
)
if err != nil {
return
}
parts := strings.Split(strings.TrimSpace(out), ", ")
if len(parts) < 4 {
return
}
if total, err := strconv.ParseUint(parts[0], 10, 64); err == nil && total > 0 {
gpu.MemoryTotalBytes = total * 1024 * 1024
if used, err := strconv.ParseUint(parts[1], 10, 64); err == nil {
gpu.MemoryUsedBytes = used * 1024 * 1024
}
}
if pct, err := strconv.ParseFloat(parts[2], 64); err == nil {
gpu.UtilizationPct = pct
}
if pct, err := strconv.ParseFloat(parts[3], 64); err == nil {
gpu.MemUtilizationPct = pct
}
}
func isGPUCard(name string) bool {
if !strings.HasPrefix(name, "card") {
return false
}
if len(name) == 4 {
return false
}
ch := name[4]
return ch >= '0' && ch <= '9'
}
func readGPUFromCard(drmRoot, name string) (pciAddr, vendor, device, driver string) {
cardPath := filepath.Join(drmRoot, name)
devPath := filepath.Join(cardPath, "device")
resolved, err := filepath.EvalSymlinks(cardPath)
if err != nil {
return "", "", "", ""
}
parts := strings.Split(resolved, "/")
for i, p := range parts {
if p == "drm" && i > 0 {
pciAddr = parts[i-1]
break
}
}
if pciAddr == "" {
return "", "", "", ""
}
vendor = strings.TrimPrefix(readTrim(filepath.Join(devPath, "vendor")), "0x")
device = strings.TrimPrefix(readTrim(filepath.Join(devPath, "device")), "0x")
driver = readDriver(devPath)
return
}
func readDriver(devPath string) string {
target, err := os.Readlink(filepath.Join(devPath, "driver"))
if err != nil {
return ""
}
return filepath.Base(target)
}
func lookupGPUName(pciAddr string) string {
out, err := oscmd.Run("lspci", "-nns", pciAddr)
if err != nil {
return ""
}
_, rest, ok := strings.Cut(out, " ")
if !ok {
return ""
}
return strings.TrimSpace(rest)
}
func readUint64FromFile(path string) uint64 {
v, err := strconv.ParseUint(readTrim(path), 10, 64)
if err != nil {
return 0
}
return v
}
func readIntFromFile(path string) int {
v, err := strconv.Atoi(readTrim(path))
if err != nil {
return -1
}
return v
}
+6 -4
View File
@@ -11,7 +11,7 @@ import (
)
// SystemInfoBody is the dashboard overview: OS identity plus live CPU, memory,
// disk, load, network, and temperature readings. Every section is best-effort —
// disk, load, network, GPU, and temperature readings. Every section is best-effort —
// a source that's unavailable (e.g. no thermal zones in a VM) yields a zero
// value or empty list rather than failing the whole call.
type SystemInfoBody struct {
@@ -24,6 +24,7 @@ type SystemInfoBody struct {
Disks []DiskInfo `json:"disks" doc:"Mounted block-device filesystems"`
NetworkInterfaces []NetInterface `json:"network_interfaces" doc:"Network interfaces and their addresses"`
Temperatures []Temperature `json:"temperatures" doc:"Thermal sensor readings in Celsius"`
GPUs []GPUInfo `json:"gpus" doc:"Graphics processors detected via DRM sysfs"`
}
type GetInfoOutput struct{ Body SystemInfoBody }
@@ -36,9 +37,9 @@ func registerInfo(api huma.API, sampler *Sampler) {
Summary: "Get system information",
Description: "Returns an overview for a dashboard: OS/kernel identity, CPU, " +
"memory and swap, mounted disks, load averages, uptime, network " +
"interfaces, and temperatures. All values come from cheap local reads " +
"(/proc, /sys, syscalls) with no D-Bus dependency; each section is " +
"best-effort.",
"interfaces, temperatures, and GPU information. All values come from cheap " +
"local reads (/proc, /sys, syscalls) with no D-Bus dependency; each " +
"section is best-effort.",
Tags: []string{tagSystem},
Metadata: op("read"),
Errors: readErrors,
@@ -54,6 +55,7 @@ func registerInfo(api huma.API, sampler *Sampler) {
Disks: diskInfo(),
NetworkInterfaces: netInfo(),
Temperatures: tempInfo(),
GPUs: gpuInfo(),
}}, nil
})
}
+114
View File
@@ -6,6 +6,106 @@ import (
"testing"
)
func TestReadGPUsFromSysfs(t *testing.T) {
root := t.TempDir()
// card0 — AMD GPU with VRAM and utilization files
pciDev := filepath.Join(root, "devices/pci0000:00/0000:00:02.0")
mkdirAll(t, pciDev)
write(t, pciDev, ".", "vendor", "0x1002")
write(t, pciDev, ".", "device", "0x7480")
write(t, pciDev, ".", "mem_info_vram_total", "8573157376")
write(t, pciDev, ".", "mem_info_vram_used", "27267072")
write(t, pciDev, ".", "gpu_busy_percent", "23")
write(t, pciDev, ".", "mem_busy_percent", "15")
driverDir := filepath.Join(root, "bus/pci/drivers/amdgpu")
mkdirAll(t, driverDir)
mustSymlink(t, driverDir, filepath.Join(pciDev, "driver"))
cardTarget := filepath.Join(pciDev, "drm", "card0")
mkdirAll(t, cardTarget)
mustSymlink(t, pciDev, filepath.Join(cardTarget, "device"))
mustSymlink(t, cardTarget, filepath.Join(root, "card0"))
// Distractors
write(t, root, ".", "card0-HDMI-1", "distract")
write(t, root, ".", "renderD128", "distract")
gpus := readGPUsFromSysfs(root)
if len(gpus) != 1 {
t.Fatalf("want 1 GPU, got %d: %+v", len(gpus), gpus)
}
if gpus[0].Vendor != "1002" {
t.Errorf("vendor = %q, want 1002", gpus[0].Vendor)
}
if gpus[0].DeviceID != "7480" {
t.Errorf("device_id = %q, want 7480", gpus[0].DeviceID)
}
if gpus[0].Driver != "amdgpu" {
t.Errorf("driver = %q, want amdgpu", gpus[0].Driver)
}
if gpus[0].MemoryTotalBytes != 8573157376 {
t.Errorf("MemoryTotalBytes = %d, want 8573157376", gpus[0].MemoryTotalBytes)
}
if gpus[0].MemoryUsedBytes != 27267072 {
t.Errorf("MemoryUsedBytes = %d, want 27267072", gpus[0].MemoryUsedBytes)
}
if gpus[0].UtilizationPct != 23.0 {
t.Errorf("UtilizationPct = %f, want 23.0", gpus[0].UtilizationPct)
}
if gpus[0].MemUtilizationPct != 15.0 {
t.Errorf("MemUtilizationPct = %f, want 15.0", gpus[0].MemUtilizationPct)
}
}
func TestReadGPUsFromSysfsNoEnrichment(t *testing.T) {
root := t.TempDir()
// i915 GPU with no VRAM or utilization files
pciDev := filepath.Join(root, "devices/pci0000:00/0000:00:02.0")
mkdirAll(t, pciDev)
write(t, pciDev, ".", "vendor", "0x8086")
write(t, pciDev, ".", "device", "0x46a6")
driverDir := filepath.Join(root, "bus/pci/drivers/i915")
mkdirAll(t, driverDir)
mustSymlink(t, driverDir, filepath.Join(pciDev, "driver"))
cardTarget := filepath.Join(pciDev, "drm", "card0")
mkdirAll(t, cardTarget)
mustSymlink(t, pciDev, filepath.Join(cardTarget, "device"))
mustSymlink(t, cardTarget, filepath.Join(root, "card0"))
gpus := readGPUsFromSysfs(root)
if len(gpus) != 1 {
t.Fatalf("want 1 GPU, got %d", len(gpus))
}
if gpus[0].MemoryTotalBytes != 0 || gpus[0].MemoryUsedBytes != 0 {
t.Errorf("expected 0 VRAM for i915, got total=%d used=%d", gpus[0].MemoryTotalBytes, gpus[0].MemoryUsedBytes)
}
if gpus[0].UtilizationPct != 0 || gpus[0].MemUtilizationPct != 0 {
t.Errorf("expected 0 utilization for i915, got gpu=%f mem=%f", gpus[0].UtilizationPct, gpus[0].MemUtilizationPct)
}
}
func TestReadGPUsFromSysfsMissingDir(t *testing.T) {
gpus := readGPUsFromSysfs("/nonexistent/drm")
if gpus != nil {
t.Errorf("expected nil, got %+v", gpus)
}
}
func TestReadGPUsFromSysfsSkipsNonGPU(t *testing.T) {
root := t.TempDir()
write(t, root, ".", "renderD128", "x")
write(t, root, ".", "card0-HDMI-1", "x")
gpus := readGPUsFromSysfs(root)
if len(gpus) != 0 {
t.Errorf("expected 0 GPUs, got %d", len(gpus))
}
}
func TestReadHwmonTemps(t *testing.T) {
root := t.TempDir()
// k10temp: CPU, labelled Tctl.
@@ -33,6 +133,20 @@ func TestReadHwmonTemps(t *testing.T) {
}
}
func mkdirAll(t *testing.T, path string) {
t.Helper()
if err := os.MkdirAll(path, 0o755); err != nil {
t.Fatal(err)
}
}
func mustSymlink(t *testing.T, target, link string) {
t.Helper()
if err := os.Symlink(target, link); err != nil {
t.Fatal(err)
}
}
func write(t *testing.T, root, chip, file, val string) {
t.Helper()
dir := filepath.Join(root, chip)
@@ -34,6 +34,14 @@ func TestSystemHandlers(t *testing.T) {
}
return oscmd.MockCommand{ExitCode: 1}
})
// Mock lspci to prevent real calls in case the test host has GPUs.
oscmd.SetMock("lspci", func(args []string) oscmd.MockCommand {
return oscmd.MockCommand{ExitCode: 1}
})
// Mock nvidia-smi: return failure so enrichment is a no-op.
oscmd.SetMock("nvidia-smi", func(args []string) oscmd.MockCommand {
return oscmd.MockCommand{ExitCode: 1}
})
defer oscmd.ClearMocks()
// 1. Test GET /api/system/info