Files
python-mcp/tools/gpu.py
AuroraCrimsonRose e471f9bc54 Added many tools
2026-06-03 06:01:06 -05:00

175 lines
4.7 KiB
Python

from __future__ import annotations
from typing import Any
from core.tools.base import BaseTool, ToolContext
from core.tools.registry import registry
from core.events import bus
from core.subprocess import run_command
class GPUTool(BaseTool):
"""
GPU introspection tool.
Uses nvidia-smi when available.
"""
name = "gpu"
description = "GPU usage, memory, and process inspection"
# =========================================================
# EXECUTE
# =========================================================
def execute(self, payload: dict[str, Any], ctx: ToolContext):
action = str(payload.get("action", "info")).strip()
bus.log(
"GPU",
"gpu_execute",
"INFO",
{"action": action}
)
match action:
case "info":
return self.gpu_info()
case "usage":
return self.gpu_usage()
case "processes":
return self.gpu_processes()
case "full":
return self.full_snapshot()
case _:
raise ValueError(f"Unknown gpu action: {action}")
# =========================================================
# GPU INFO
# =========================================================
def gpu_info(self):
result = run_command(
cmd=[
"nvidia-smi",
"--query-gpu=name,driver_version,memory.total",
"--format=csv,noheader"
]
)
if result.get("return_code") != 0:
return {
"status": "error",
"error": result.get("stderr", "nvidia-smi not available")
}
lines = result.get("stdout", "").strip().splitlines()
gpus = []
for line in lines:
parts = [p.strip() for p in line.split(",")]
if len(parts) >= 3:
gpus.append({
"name": parts[0],
"driver": parts[1],
"memory_total": parts[2]
})
return {
"gpu_count": len(gpus),
"gpus": gpus
}
# =========================================================
# GPU USAGE
# =========================================================
def gpu_usage(self):
result = run_command(
cmd=[
"nvidia-smi",
"--query-gpu=utilization.gpu,memory.used,memory.total,temperature.gpu",
"--format=csv,noheader,nounits"
]
)
if result.get("return_code") != 0:
return {
"status": "error",
"error": result.get("stderr", "")
}
lines = result.get("stdout", "").strip().splitlines()
usage = []
for line in lines:
parts = [p.strip() for p in line.split(",")]
if len(parts) >= 4:
usage.append({
"gpu_util_percent": parts[0],
"memory_used_mb": parts[1],
"memory_total_mb": parts[2],
"temperature_c": parts[3]
})
return {
"gpus": usage
}
# =========================================================
# GPU PROCESSES
# =========================================================
def gpu_processes(self):
result = run_command(
cmd=[
"nvidia-smi",
"--query-compute-apps=pid,process_name,used_memory",
"--format=csv,noheader"
]
)
if result.get("return_code") != 0:
return {
"status": "error",
"error": result.get("stderr", "")
}
lines = result.get("stdout", "").strip().splitlines()
processes = []
for line in lines:
parts = [p.strip() for p in line.split(",")]
if len(parts) >= 3:
processes.append({
"pid": parts[0],
"name": parts[1],
"memory": parts[2]
})
return {
"count": len(processes),
"processes": processes
}
# =========================================================
# FULL SNAPSHOT
# =========================================================
def full_snapshot(self):
return {
"info": self.gpu_info(),
"usage": self.gpu_usage(),
"processes": self.gpu_processes()
}
# =========================================================
# REGISTER
# =========================================================
registry.register(GPUTool())