Files
nadir-agent/internal/modules/system/gpu.go
T
urania 0415e905af
build-and-release / release (push) Successful in 2m34s
Feat: GPU detection via DRM sysfs, integrated into system info endpoint
Adds GPUInfo struct and readGPUsFromSysfs parsing DRM card entries
(/sys/class/drm/card*). Supports:
- AMD GPUs (amdgpu driver): VRAM totals/utilization from sysfs files
- NVIDIA GPUs: enrichment via nvidia-smi query
- Intel/other: basic PCI vendor/device/driver identification

Includes full test coverage for AMD enrichment, i915 fallback, missing
sysfs dir, and non-GPU DRM entry filtering.
2026-06-25 18:34:34 +02:00

188 lines
4.8 KiB
Go

package system
import (
"os"
"path/filepath"
"strconv"
"strings"
"nadir/internal/oscmd"
)
type GPUInfo struct {
Model string `json:"model" example:"AMD Radeon RX 7900 XTX" doc:"GPU model name, or vendor:device hex ID if lspci unavailable"`
Vendor string `json:"vendor" example:"1002" doc:"PCI vendor ID (hex)"`
DeviceID string `json:"device_id" example:"744c" doc:"PCI device ID (hex)"`
Driver string `json:"driver" example:"amdgpu" doc:"Kernel driver in use"`
MemoryTotalBytes uint64 `json:"memory_total_bytes" example:"8589934592" doc:"Total VRAM in bytes (driver-dependent; 0 if unavailable)"`
MemoryUsedBytes uint64 `json:"memory_used_bytes" example:"27267072" doc:"Used VRAM in bytes (driver-dependent; 0 if unavailable)"`
UtilizationPct float64 `json:"utilization_pct" example:"23.5" doc:"GPU compute utilization percentage (driver-dependent; 0 if unavailable)"`
MemUtilizationPct float64 `json:"mem_utilization_pct" example:"15.0" doc:"GPU memory controller utilization percentage (driver-dependent; 0 if unavailable)"`
}
func gpuInfo() []GPUInfo {
return readGPUsFromSysfs("/sys/class/drm")
}
func readGPUsFromSysfs(drmRoot string) []GPUInfo {
entries, err := os.ReadDir(drmRoot)
if err != nil {
return nil
}
seen := map[string]bool{}
var gpus []GPUInfo
for _, e := range entries {
if !isGPUCard(e.Name()) {
continue
}
pciAddr, vendor, device, driver := readGPUFromCard(drmRoot, e.Name())
if pciAddr == "" || seen[pciAddr] {
continue
}
seen[pciAddr] = true
model := vendor + ":" + device
if m := lookupGPUName(pciAddr); m != "" {
model = m
}
gpu := GPUInfo{
Model: model,
Vendor: vendor,
DeviceID: device,
Driver: driver,
}
devPath := filepath.Join(drmRoot, e.Name(), "device")
enrichGPUInfo(&gpu, devPath, pciAddr, driver)
gpus = append(gpus, gpu)
}
return gpus
}
func enrichGPUInfo(gpu *GPUInfo, devPath, pciAddr, driver string) {
switch driver {
case "amdgpu":
enrichAMDGPU(gpu, devPath)
case "nvidia":
enrichNvidiaGPU(gpu, pciAddr)
}
}
func enrichAMDGPU(gpu *GPUInfo, devPath string) {
if total := readUint64FromFile(filepath.Join(devPath, "mem_info_vram_total")); total > 0 {
gpu.MemoryTotalBytes = total
gpu.MemoryUsedBytes = readUint64FromFile(filepath.Join(devPath, "mem_info_vram_used"))
}
if pct := readIntFromFile(filepath.Join(devPath, "gpu_busy_percent")); pct >= 0 {
gpu.UtilizationPct = float64(pct)
}
if pct := readIntFromFile(filepath.Join(devPath, "mem_busy_percent")); pct >= 0 {
gpu.MemUtilizationPct = float64(pct)
}
}
func enrichNvidiaGPU(gpu *GPUInfo, pciAddr string) {
out, err := oscmd.Run("nvidia-smi",
"-i", pciAddr,
"--query-gpu=memory.total,memory.used,utilization.gpu,utilization.memory",
"--format=csv,noheader,nounits",
)
if err != nil {
return
}
parts := strings.Split(strings.TrimSpace(out), ", ")
if len(parts) < 4 {
return
}
if total, err := strconv.ParseUint(parts[0], 10, 64); err == nil && total > 0 {
gpu.MemoryTotalBytes = total * 1024 * 1024
if used, err := strconv.ParseUint(parts[1], 10, 64); err == nil {
gpu.MemoryUsedBytes = used * 1024 * 1024
}
}
if pct, err := strconv.ParseFloat(parts[2], 64); err == nil {
gpu.UtilizationPct = pct
}
if pct, err := strconv.ParseFloat(parts[3], 64); err == nil {
gpu.MemUtilizationPct = pct
}
}
func isGPUCard(name string) bool {
if !strings.HasPrefix(name, "card") {
return false
}
if len(name) == 4 {
return false
}
ch := name[4]
return ch >= '0' && ch <= '9'
}
func readGPUFromCard(drmRoot, name string) (pciAddr, vendor, device, driver string) {
cardPath := filepath.Join(drmRoot, name)
devPath := filepath.Join(cardPath, "device")
resolved, err := filepath.EvalSymlinks(cardPath)
if err != nil {
return "", "", "", ""
}
parts := strings.Split(resolved, "/")
for i, p := range parts {
if p == "drm" && i > 0 {
pciAddr = parts[i-1]
break
}
}
if pciAddr == "" {
return "", "", "", ""
}
vendor = strings.TrimPrefix(readTrim(filepath.Join(devPath, "vendor")), "0x")
device = strings.TrimPrefix(readTrim(filepath.Join(devPath, "device")), "0x")
driver = readDriver(devPath)
return
}
func readDriver(devPath string) string {
target, err := os.Readlink(filepath.Join(devPath, "driver"))
if err != nil {
return ""
}
return filepath.Base(target)
}
func lookupGPUName(pciAddr string) string {
out, err := oscmd.Run("lspci", "-nns", pciAddr)
if err != nil {
return ""
}
_, rest, ok := strings.Cut(out, " ")
if !ok {
return ""
}
return strings.TrimSpace(rest)
}
func readUint64FromFile(path string) uint64 {
v, err := strconv.ParseUint(readTrim(path), 10, 64)
if err != nil {
return 0
}
return v
}
func readIntFromFile(path string) int {
v, err := strconv.Atoi(readTrim(path))
if err != nil {
return -1
}
return v
}