Added many tools
This commit is contained in:
175
tools/gpu.py
Normal file
175
tools/gpu.py
Normal file
@@ -0,0 +1,175 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from core.tools.base import BaseTool, ToolContext
|
||||
from core.tools.registry import registry
|
||||
from core.events import bus
|
||||
from core.subprocess import run_command
|
||||
|
||||
|
||||
class GPUTool(BaseTool):
|
||||
"""
|
||||
GPU introspection tool.
|
||||
|
||||
Uses nvidia-smi when available.
|
||||
"""
|
||||
|
||||
name = "gpu"
|
||||
description = "GPU usage, memory, and process inspection"
|
||||
|
||||
# =========================================================
|
||||
# EXECUTE
|
||||
# =========================================================
|
||||
|
||||
def execute(self, payload: dict[str, Any], ctx: ToolContext):
|
||||
action = str(payload.get("action", "info")).strip()
|
||||
|
||||
bus.log(
|
||||
"GPU",
|
||||
"gpu_execute",
|
||||
"INFO",
|
||||
{"action": action}
|
||||
)
|
||||
|
||||
match action:
|
||||
case "info":
|
||||
return self.gpu_info()
|
||||
|
||||
case "usage":
|
||||
return self.gpu_usage()
|
||||
|
||||
case "processes":
|
||||
return self.gpu_processes()
|
||||
|
||||
case "full":
|
||||
return self.full_snapshot()
|
||||
|
||||
case _:
|
||||
raise ValueError(f"Unknown gpu action: {action}")
|
||||
|
||||
# =========================================================
|
||||
# GPU INFO
|
||||
# =========================================================
|
||||
|
||||
def gpu_info(self):
|
||||
result = run_command(
|
||||
cmd=[
|
||||
"nvidia-smi",
|
||||
"--query-gpu=name,driver_version,memory.total",
|
||||
"--format=csv,noheader"
|
||||
]
|
||||
)
|
||||
|
||||
if result.get("return_code") != 0:
|
||||
return {
|
||||
"status": "error",
|
||||
"error": result.get("stderr", "nvidia-smi not available")
|
||||
}
|
||||
|
||||
lines = result.get("stdout", "").strip().splitlines()
|
||||
|
||||
gpus = []
|
||||
for line in lines:
|
||||
parts = [p.strip() for p in line.split(",")]
|
||||
if len(parts) >= 3:
|
||||
gpus.append({
|
||||
"name": parts[0],
|
||||
"driver": parts[1],
|
||||
"memory_total": parts[2]
|
||||
})
|
||||
|
||||
return {
|
||||
"gpu_count": len(gpus),
|
||||
"gpus": gpus
|
||||
}
|
||||
|
||||
# =========================================================
|
||||
# GPU USAGE
|
||||
# =========================================================
|
||||
|
||||
def gpu_usage(self):
|
||||
result = run_command(
|
||||
cmd=[
|
||||
"nvidia-smi",
|
||||
"--query-gpu=utilization.gpu,memory.used,memory.total,temperature.gpu",
|
||||
"--format=csv,noheader,nounits"
|
||||
]
|
||||
)
|
||||
|
||||
if result.get("return_code") != 0:
|
||||
return {
|
||||
"status": "error",
|
||||
"error": result.get("stderr", "")
|
||||
}
|
||||
|
||||
lines = result.get("stdout", "").strip().splitlines()
|
||||
|
||||
usage = []
|
||||
for line in lines:
|
||||
parts = [p.strip() for p in line.split(",")]
|
||||
if len(parts) >= 4:
|
||||
usage.append({
|
||||
"gpu_util_percent": parts[0],
|
||||
"memory_used_mb": parts[1],
|
||||
"memory_total_mb": parts[2],
|
||||
"temperature_c": parts[3]
|
||||
})
|
||||
|
||||
return {
|
||||
"gpus": usage
|
||||
}
|
||||
|
||||
# =========================================================
|
||||
# GPU PROCESSES
|
||||
# =========================================================
|
||||
|
||||
def gpu_processes(self):
|
||||
result = run_command(
|
||||
cmd=[
|
||||
"nvidia-smi",
|
||||
"--query-compute-apps=pid,process_name,used_memory",
|
||||
"--format=csv,noheader"
|
||||
]
|
||||
)
|
||||
|
||||
if result.get("return_code") != 0:
|
||||
return {
|
||||
"status": "error",
|
||||
"error": result.get("stderr", "")
|
||||
}
|
||||
|
||||
lines = result.get("stdout", "").strip().splitlines()
|
||||
|
||||
processes = []
|
||||
for line in lines:
|
||||
parts = [p.strip() for p in line.split(",")]
|
||||
if len(parts) >= 3:
|
||||
processes.append({
|
||||
"pid": parts[0],
|
||||
"name": parts[1],
|
||||
"memory": parts[2]
|
||||
})
|
||||
|
||||
return {
|
||||
"count": len(processes),
|
||||
"processes": processes
|
||||
}
|
||||
|
||||
# =========================================================
|
||||
# FULL SNAPSHOT
|
||||
# =========================================================
|
||||
|
||||
def full_snapshot(self):
|
||||
return {
|
||||
"info": self.gpu_info(),
|
||||
"usage": self.gpu_usage(),
|
||||
"processes": self.gpu_processes()
|
||||
}
|
||||
|
||||
|
||||
# =========================================================
|
||||
# REGISTER
|
||||
# =========================================================
|
||||
|
||||
registry.register(GPUTool())
|
||||
Reference in New Issue
Block a user
