# Nodes

Nodes are worker machines in Kubernetes and may be either a virtual or a physical machine, depending on the cluster. Each Node is managed by the NVIDIA Run:ai control plane. For more information, see [Nodes](https://run-ai-docs.nvidia.com/self-hosted/2.21/platform-management/aiinitiatives/resources/nodes).

## Get a list of nodes.

> Retrieve a list of nodes from the Kubernetes cluster.

```json
{"openapi":"3.0.3","info":{"title":"Runai API","version":"2.21"},"tags":[{"name":"Nodes","description":"Nodes are worker machines in Kubernetes and may be either a virtual or a physical machine, depending on the cluster. \nEach Node is managed by the NVIDIA Run:ai control plane. For more information, see [Nodes](https://run-ai-docs.nvidia.com/self-hosted/2.21/platform-management/aiinitiatives/resources/nodes).\n"}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"ClusterUuid":{"name":"clusterUuid","in":"path","required":true,"description":"The Universally Unique Identifier (UUID) of the cluster.","schema":{"type":"string","format":"uuid","minLength":1}},"QueryNodeName":{"name":"nodeName","in":"query","description":"The node name.","schema":{"type":"string"},"required":false}},"schemas":{"Nodes":{"type":"object","required":["nodes"],"properties":{"nodes":{"type":"array","items":{"$ref":"#/components/schemas/Node"}}}},"Node":{"allOf":[{"$ref":"#/components/schemas/NodeInfo"},{"$ref":"#/components/schemas/NodeAdditionalFields"},{"$ref":"#/components/schemas/NodeAdditionalReadFields"}]},"NodeInfo":{"type":"object","required":["status","nodePool","createdAt"],"properties":{"status":{"type":"string","enum":["Ready","NotReady","Unknown"],"description":"The calculated status of the node."},"conditions":{"type":"array","items":{"$ref":"#/components/schemas/NodeStatusConditionDetails"}},"taints":{"type":"array","items":{"$ref":"#/components/schemas/NodeTaint"}},"nodePool":{"type":"string","description":"The node's NodePool."},"createdAt":{"type":"string","format":"date-time"},"gpuInfo":{"$ref":"#/components/schemas/GpuInfo"},"nvLinkDomainUid":{"type":"string","description":"NV Link Domain Uid","nullable":true},"nvLinkCliqueId":{"type":"string","description":"NV Link Clique Id","nullable":true}}},"NodeStatusConditionDetails":{"type":"object","required":["type","reason"],"properties":{"type":{"type":"string","description":"Type of node condition."},"reason":{"type":"string","description":"(brief) reason for the condition's last transition."},"message":{"type":"string","description":"Human readable message indicating details about last transition."}}},"NodeTaint":{"type":"object","required":["key","effect"],"properties":{"key":{"type":"string","description":"The taint key to be applied to a node."},"value":{"type":"string","description":"The taint value corresponding to the taint key."},"effect":{"type":"string","description":"The effect of the taint on pods that do not tolerate the taint.","enum":["NoSchedule","PreferNoSchedule","NoExecute"]}}},"GpuInfo":{"type":"object","nullable":true,"required":["gpuType","gpuCount"],"properties":{"gpuType":{"type":"string"},"gpuCount":{"type":"integer"}}},"NodeAdditionalFields":{"type":"object","required":["name"],"properties":{"name":{"type":"string","description":"The name of the node"}}},"NodeAdditionalReadFields":{"type":"object","required":["clusterUuid","updatedAt"],"properties":{"id":{"type":"string","format":"uuid","description":"The unique identifier of the node."},"clusterUuid":{"$ref":"#/components/schemas/ClusterId"},"updatedAt":{"type":"string","format":"date-time"}}},"ClusterId":{"description":"The id of the cluster.","type":"string","format":"uuid"},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/clusters/{clusterUuid}/nodes":{"get":{"summary":"Get a list of nodes.","description":"Retrieve a list of nodes from the Kubernetes cluster.","operationId":"get_nodes","tags":["Nodes"],"parameters":[{"$ref":"#/components/parameters/ClusterUuid"},{"$ref":"#/components/parameters/QueryNodeName"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Nodes"}}}},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Get node telemetry data.

> Retrieve node telemetry data for use in analysis applications.

```json
{"openapi":"3.0.3","info":{"title":"Runai API","version":"2.21"},"tags":[{"name":"Nodes","description":"Nodes are worker machines in Kubernetes and may be either a virtual or a physical machine, depending on the cluster. \nEach Node is managed by the NVIDIA Run:ai control plane. For more information, see [Nodes](https://run-ai-docs.nvidia.com/self-hosted/2.21/platform-management/aiinitiatives/resources/nodes).\n"}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"FilterByCluster":{"name":"clusterId","in":"query","description":"Filter using the Universally Unique Identifier (UUID) of the cluster.","required":false,"schema":{"type":"string","format":"uuid"}},"FilterByNodepoolName":{"name":"nodepoolName","in":"query","description":"Filter using the nodepool.","required":false,"schema":{"type":"string"}},"TelemetryGroupBy":{"name":"groupBy","in":"query","description":"workload fields to group the data by","explode":false,"required":false,"schema":{"type":"array","maxItems":2,"items":{"type":"string","enum":["ClusterId","Nodepool","Node"]}}},"TelemetryType":{"name":"telemetryType","in":"query","required":true,"description":"specifies what data to request","schema":{"$ref":"#/components/schemas/NodeTelemetryType"}}},"schemas":{"NodeTelemetryType":{"type":"string","enum":["READY_GPU_NODES","READY_GPUS","TOTAL_GPU_NODES","TOTAL_GPUS","IDLE_ALLOCATED_GPUS","FREE_GPUS","ALLOCATED_GPUS","TOTAL_CPU_CORES","USED_CPU_CORES","ALLOCATED_CPU_CORES","TOTAL_GPU_MEMORY_BYTES","USED_GPU_MEMORY_BYTES","TOTAL_CPU_MEMORY_BYTES","USED_CPU_MEMORY_BYTES","ALLOCATED_CPU_MEMORY_BYTES"]},"TelemetryResponse":{"type":"object","required":["type","timestamp","values"],"properties":{"type":{"type":"string","description":"specifies what data returned"},"timestamp":{"type":"string","format":"date-time"},"values":{"type":"array","items":{"type":"object","required":["value"],"properties":{"value":{"type":"string"},"groups":{"type":"array","description":"columns the data is grouped by","maxItems":2,"items":{"type":"object","required":["key","value"],"properties":{"key":{"type":"string"},"value":{"type":"string"},"name":{"type":"string"}}}}}}}}},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"400BadRequest":{"description":"Bad request.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/nodes/telemetry":{"get":{"summary":"Get node telemetry data.","description":"Retrieve node telemetry data for use in analysis applications.","operationId":"get_node_telemetry","tags":["Nodes"],"parameters":[{"$ref":"#/components/parameters/FilterByCluster"},{"$ref":"#/components/parameters/FilterByNodepoolName"},{"$ref":"#/components/parameters/TelemetryGroupBy"},{"$ref":"#/components/parameters/TelemetryType"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/TelemetryResponse"}},"text/csv":{}}},"400":{"$ref":"#/components/responses/400BadRequest"},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Get the node metrics data.

> Retrieve the node metrics data by Universally Unique Identifier (UUID).

```json
{"openapi":"3.0.3","info":{"title":"Runai API","version":"2.21"},"tags":[{"name":"Nodes","description":"Nodes are worker machines in Kubernetes and may be either a virtual or a physical machine, depending on the cluster. \nEach Node is managed by the NVIDIA Run:ai control plane. For more information, see [Nodes](https://run-ai-docs.nvidia.com/self-hosted/2.21/platform-management/aiinitiatives/resources/nodes).\n"}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"NodeId":{"name":"nodeId","in":"path","description":"The node UUID.","schema":{"type":"string","format":"uuid","minLength":1},"required":true},"NodeMetricTypes":{"name":"metricType","in":"query","required":true,"description":"Specify which data to request.","explode":false,"schema":{"type":"array","items":{"$ref":"#/components/schemas/NodeMetricType"}}},"StartRequired":{"name":"start","in":"query","description":"Start date of time range to fetch data in ISO 8601 timestamp format.","required":true,"schema":{"type":"string","format":"date-time"}},"EndRequired":{"name":"end","in":"query","description":"End date of time range to fetch data in ISO 8601 timestamp format.","required":true,"schema":{"type":"string","format":"date-time"}},"NumberOfSamples":{"name":"numberOfSamples","in":"query","description":"The number of samples to take in the specified time range.","required":false,"schema":{"type":"integer","maximum":1000,"minimum":0,"default":20}}},"schemas":{"NodeMetricType":{"type":"string","description":"Specify which metric data to request.","enum":["GPU_UTILIZATION_PER_GPU","GPU_UTILIZATION","GPU_MEMORY_UTILIZATION_PER_GPU","GPU_MEMORY_UTILIZATION","GPU_MEMORY_USAGE_BYTES_PER_GPU","GPU_MEMORY_USAGE_BYTES","CPU_USAGE_CORES","CPU_UTILIZATION","CPU_MEMORY_USAGE_BYTES","CPU_MEMORY_UTILIZATION","GPU_OOMKILL_BURST_COUNT_PER_GPU","GPU_OOMKILL_IDLE_COUNT_PER_GPU","GPU_OOMKILL_SWAP_OUT_OF_RAM_COUNT_PER_GPU","GPU_GRAPHICS_ENGINE_ACTIVITY_PER_GPU","GPU_SM_ACTIVITY_PER_GPU","GPU_SM_OCCUPANCY_PER_GPU","GPU_TENSOR_ACTIVITY_PER_GPU","GPU_FP64_ENGINE_ACTIVITY_PER_GPU","GPU_FP32_ENGINE_ACTIVITY_PER_GPU","GPU_FP16_ENGINE_ACTIVITY_PER_GPU","GPU_MEMORY_BANDWIDTH_UTILIZATION_PER_GPU","GPU_NVLINK_TRANSMITTED_BANDWIDTH_PER_GPU","GPU_NVLINK_RECEIVED_BANDWIDTH_PER_GPU","GPU_PCIE_TRANSMITTED_BANDWIDTH_PER_GPU","GPU_PCIE_RECEIVED_BANDWIDTH_PER_GPU"]},"MetricsResponse":{"type":"object","required":["measurements"],"properties":{"measurements":{"type":"array","items":{"$ref":"#/components/schemas/MeasurementResponse"}}}},"MeasurementResponse":{"type":"object","required":["type","values"],"properties":{"type":{"type":"string","description":"specifies what data returned"},"labels":{"type":"object","nullable":true,"description":"labels of the metric measurement","additionalProperties":{"type":"string"}},"values":{"type":"array","nullable":true,"items":{"type":"object","required":["value","timestamp"],"properties":{"value":{"type":"string"},"timestamp":{"type":"string","format":"date-time","nullable":true}}}}}},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"400BadRequest":{"description":"Bad request.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/nodes/{nodeId}/metrics":{"get":{"summary":"Get the node metrics data.","description":"Retrieve the node metrics data by Universally Unique Identifier (UUID).","operationId":"get_node_metrics","tags":["Nodes"],"parameters":[{"$ref":"#/components/parameters/NodeId"},{"$ref":"#/components/parameters/NodeMetricTypes"},{"$ref":"#/components/parameters/StartRequired"},{"$ref":"#/components/parameters/EndRequired"},{"$ref":"#/components/parameters/NumberOfSamples"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MetricsResponse"}},"text/csv":{}}},"207":{"description":"Partial success.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MetricsResponse"}}}},"400":{"$ref":"#/components/responses/400BadRequest"},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```
