# Workloads

## List workloads.

> Retrieve a list of active workloads with details.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.25"},"tags":[{"name":"Workloads-API"}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"Deleted":{"name":"deleted","in":"query","description":"Return only deleted resources when `true`.","schema":{"type":"boolean"}},"Offset":{"name":"offset","in":"query","required":false,"description":"The offset of the first item returned in the collection.","schema":{"type":"integer","format":"int32"}},"Limit":{"name":"limit","in":"query","required":false,"description":"The maximum number of entries to return.","schema":{"type":"integer","format":"int32","default":50,"minimum":1,"maximum":500}},"SortOrder":{"name":"sortOrder","in":"query","required":false,"description":"Sort results in descending or ascending order.","schema":{"type":"string","enum":["asc","desc"],"default":"asc"}},"WorkloadsSort":{"name":"sortBy","in":"query","required":false,"description":"Sort results by a parameter.","schema":{"type":"string","enum":["type","name","clusterId","projectId","projectName","departmentId","departmentName","createdAt","deletedAt","submittedBy","phase","completedAt","nodepool","distributedFramework","allocatedGPU","idleGpus","idleAllocatedGpus","phaseUpdatedAt","category","priority","totalPendingTimeSeconds","totalRunningTimeSeconds","priorityClassName","guaranteedRuntimeEndsAt","aiApplicationId","aiApplicationName"]}},"WorkloadsFilter":{"name":"filterBy","in":"query","required":false,"description":"Filter results by a parameter. Use the format field-name operator value. Operators are `==` Equals, `!=` Not equals, `<=` Less than or equal, `>=` Greater than or equal, `=@` contains, `!@` Does not contain, `=^` Starts with and `=$` Ends with. Dates are in ISO 8601 timestamp format and available for operators `==`, `!=`, `<=` and `>=`.","schema":{"type":"array","maxItems":10,"items":{"type":"string","pattern":"^(type|name|clusterId|projectId|projectName|departmentId|departmentName|createdAt|deletedAt|submittedBy|phase|completedAt|nodepool|distributedFramework|allocatedGPU|idleGpus|idleAllocatedGpus|phaseUpdatedAt|category|totalPendingTimeSeconds|totalRunningTimeSeconds|priority|priorityClassName|guaranteedRuntimeEndsAt|aiApplicationId|aiApplicationName)(==|!=|<=|>=|=@|!@|=\\^|=\\$).+$"}},"explode":false},"Search":{"name":"search","in":"query","required":false,"description":"Filter results by a free text search.","schema":{"type":"string"}}},"schemas":{"Workloads":{"type":"array","items":{"$ref":"#/components/schemas/Workload"}},"Workload":{"type":"object","required":["type","name","id","source","priority","priorityClassName","clusterId","projectName","projectId","departmentName","departmentId","namespace","createdAt","phase","conditions","k8sPhase","tenantId","runningPods","phaseUpdatedAt","k8sPhaseUpdatedAt","updatedAt","deletedAt","category"],"properties":{"tenantId":{"$ref":"#/components/schemas/TenantId"},"runningPods":{"type":"integer","format":"int32"},"phaseUpdatedAt":{"type":"string","format":"date-time"},"k8sPhaseUpdatedAt":{"type":"string","format":"date-time"},"updatedAt":{"type":"string","format":"date-time"},"source":{"$ref":"#/components/schemas/Source"},"deletedAt":{"type":"string","format":"date-time","nullable":true},"type":{"type":"string"},"name":{"type":"string"},"id":{"type":"string","format":"uuid"},"priority":{"type":"integer","format":"int32","nullable":true},"priorityClassName":{"type":"string"},"submittedBy":{"type":"string"},"clusterId":{"$ref":"#/components/schemas/ClusterId"},"projectName":{"type":"string"},"projectId":{"type":"string"},"departmentName":{"type":"string"},"departmentId":{"type":"string"},"namespace":{"type":"string"},"createdAt":{"type":"string","format":"date-time"},"workloadRequestedResources":{"$ref":"#/components/schemas/WorkloadRequestResources"},"podsRequestedResources":{"$ref":"#/components/schemas/WorkloadRequestResources"},"allocatedResources":{"$ref":"#/components/schemas/WorkloadAllocatedResources"},"actionsSupport":{"$ref":"#/components/schemas/ActionsSupport"},"phase":{"$ref":"#/components/schemas/Phase"},"conditions":{"$ref":"#/components/schemas/Conditions"},"phaseMessage":{"type":"string"},"k8sPhase":{"type":"string"},"requestedPods":{"$ref":"#/components/schemas/RequestedPods"},"requestedNodePools":{"type":"array","items":{"type":"string"}},"currentNodePools":{"type":"array","items":{"type":"string"}},"completedAt":{"type":"string","format":"date-time","nullable":true},"images":{"type":"array","items":{"type":"string"}},"childrenIds":{"type":"array","writeOnly":true,"items":{"type":"object","required":["id"],"properties":{"id":{"type":"string","format":"uuid"},"type":{"type":"string"}}}},"urls":{"description":"**DEPRECATED:** This field is deprecated. Use [Get workload endpoints](/latest/#operation/get_workload_endpoints) to retrieve workload network endpoints instead.Requires cluster version 2.25 or above.","type":"array","items":{"type":"string"}},"datasources":{"type":"array","items":{"$ref":"#/components/schemas/Datasource"}},"environments":{"type":"array","items":{"$ref":"#/components/schemas/Environment"}},"externalConnections":{"description":"**DEPRECATED:** This field is deprecated. Use [Get workload endpoints](/latest/#operation/get_workload_endpoints) to retrieve workload network endpoints instead.Requires cluster version 2.25 or above.","type":"array","items":{"$ref":"#/components/schemas/Connection1"}},"distributedFramework":{"type":"string"},"additionalFields":{"type":"object","additionalProperties":true},"preemptible":{"type":"boolean","nullable":true},"environmentVariables":{"type":"object","additionalProperties":{"type":"string"}},"command":{"type":"string"},"arguments":{"type":"string"},"phaseReason":{"type":"string","nullable":true,"allOf":[{"$ref":"#/components/schemas/PhaseReason"}]},"idleGpus":{"deprecated":true,"type":"integer","nullable":true,"description":"deprecated. use idleAllocatedGpus instead"},"idleAllocatedGpus":{"type":"number","description":"sum of idle allocated gpus in the workload","nullable":true},"totalPendingTimeSeconds":{"type":"integer","description":"The total cumulative time, in seconds, that the workload has spent in the Pending phase since submission.","nullable":true},"totalRunningTimeSeconds":{"type":"integer","description":"The total cumulative time, in seconds, that the workload has spent in the Running phase since submission.","nullable":true},"category":{"type":"string","description":"Category Description"},"guaranteedRuntimeEndsAt":{"type":"string","format":"date-time","nullable":true,"description":"A timestamp indicating when the workload will reach its minimum guaranteed runtime, as defined by minGuaranteedRuntime. Until this time, the workload is considered non-preemptible and cannot be interrupted by higher-priority workloads."},"aiApplicationId":{"type":"string"},"aiApplicationName":{"type":"string"},"sourceApi":{"$ref":"#/components/schemas/SourceApi"},"topology":{"type":"object","description":"The workload’s network topology.","nullable":true,"properties":{"name":{"type":"string","description":"The name of the network topology associated with the workload."},"preferredPlacement":{"type":"string","description":"The topology level to use as a preferred placement constraint for the workload."},"requiredPlacement":{"type":"string","description":"The topology level to use as required placement constraint for the workload. If the requirement cannot be satisfied at this topology level, placement will fail"},"actualPlacement":{"$ref":"#/components/schemas/WorkloadTopologyActualPlacement"}}},"endpoints":{"description":"List of network endpoints through which the workload is exposed.","type":"array","items":{"$ref":"#/components/schemas/Endpoint"}}}},"TenantId":{"description":"The id of the tenant.","type":"integer","format":"int32"},"Source":{"type":"string","enum":["CLI","Control-plane","Other"]},"ClusterId":{"description":"The id of the cluster.","type":"string","format":"uuid"},"WorkloadRequestResources":{"type":"object","nullable":true,"properties":{"gpuRequestType":{"$ref":"#/components/schemas/GpuRequestType"},"gpu":{"$ref":"#/components/schemas/RequestResourceCores"},"gpuMemory":{"$ref":"#/components/schemas/RequestResourceQuantity"},"cpu":{"$ref":"#/components/schemas/RequestResourceCores"},"cpuMemory":{"$ref":"#/components/schemas/RequestResourceQuantity"},"extendedResources":{"$ref":"#/components/schemas/WorkloadsExtendedResources"}}},"GpuRequestType":{"description":"Sets the unit type for GPU resources requests. Stated in terms of portion or memory. Sets the unit type for other GPU request fields. If `gpuDevicesRequest > 1`, only `portion` is supported. If `gpuDeviceRequest = 1`, the request type can be stated as `portion` or `memory`.","type":"string","minLength":1,"enum":["portion","memory"],"nullable":true},"RequestResourceCores":{"type":"object","properties":{"limit":{"type":"number","nullable":true,"format":"double"},"request":{"type":"number","nullable":true,"format":"double"}},"nullable":true},"RequestResourceQuantity":{"type":"object","properties":{"limit":{"type":"string","nullable":true,"pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$"},"request":{"type":"string","nullable":true,"pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$"}},"nullable":true},"WorkloadsExtendedResources":{"description":"Set of extended resources with their quantity","type":"array","items":{"$ref":"#/components/schemas/WorkloadsExtendedResource"},"nullable":true},"WorkloadsExtendedResource":{"description":"Quantity of an extended resource.","type":"object","properties":{"resource":{"description":"The name of the extended resource (mandatory)","type":"string","minLength":1,"nullable":true},"quantity":{"description":"The requested quantity for the given resource.","type":"string","minLength":1,"nullable":true},"exclude":{"description":"Whether to exclude this extended resource from the workload. This is necessary in case the extended resource is inherited from the policy defaults and it is desired not to include it in this workload.","type":"boolean","nullable":true}},"nullable":true},"WorkloadAllocatedResources":{"type":"object","nullable":true,"properties":{"gpu":{"type":"number","nullable":true,"format":"double","description":"Required if and only if gpuRequestType is portion. States the number of GPUs allocated for the created workload. The default is no allocated GPUs."},"gpuMemory":{"type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"cpu":{"type":"number","nullable":true,"format":"double","description":"States the amount of CPU cores used by the workload running."},"cpuMemory":{"type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"extendedResources":{"$ref":"#/components/schemas/WorkloadsExtendedResources"}}},"ActionsSupport":{"description":"The actions support provided for the workload.","type":"object","properties":{"delete":{"type":"boolean"},"suspend":{"description":"This permission includes resume as well as suspend.","type":"boolean"}}},"Phase":{"type":"string","enum":["Creating","Initializing","Resuming","Pending","Deleting","Running","Updating","Stopped","Stopping","Degraded","Failed","Completed","Terminating","Unknown"]},"Conditions":{"type":"array","items":{"$ref":"#/components/schemas/Condition1"}},"Condition1":{"type":"object","required":["type","status"],"properties":{"type":{"description":"The type of the condition, such as Failed or Available. See Types of domain status conditions.","type":"string"},"status":{"type":"string","description":"The status of the condition, such as True, False or Unknown."},"message":{"type":"string","description":"An optional, human-readable message providing more details about the condition."},"reason":{"type":"string","description":"The reason for the Failed condition. Not applicable to other types of condition."},"lastTransitionTime":{"description":"A timestamp of when the condition was created or the last time the condition transitioned from one status to another.","type":"string","nullable":true,"format":"date-time"}}},"RequestedPods":{"type":"object","properties":{"number":{"type":"integer","format":"int32","nullable":true},"min":{"type":"integer","format":"int32","nullable":true},"max":{"type":"integer","format":"int32","nullable":true},"parallelism":{"type":"integer","format":"int32","description":"specifies how many Pods can run in parallel","nullable":true},"completions":{"type":"integer","description":"specifies how many Pods should terminate successfully before the Workload is completed","format":"int32","nullable":true}}},"Datasource":{"type":"object","required":["name","id"],"properties":{"type":{"type":"string"},"name":{"type":"string"},"id":{"type":"string","format":"uuid"}}},"Environment":{"type":"object","required":["name","id"],"properties":{"connections":{"type":"array","items":{"$ref":"#/components/schemas/Connection1"}},"name":{"type":"string"},"id":{"type":"string","format":"uuid"},"replicaType":{"$ref":"#/components/schemas/ReplicaType"}}},"Connection1":{"type":"object","description":"Connection that either expose port from the container (a port is associated with a tool that the container runs), or URL to be used for connecting to an external tool that is related to the action of the container (such as Weights & Biases).","required":["toolType","name","connectionType"],"properties":{"name":{"type":"string"},"toolType":{"type":"string"},"connectionType":{"type":"string"},"url":{"type":"string"},"authorizationType":{"type":"string","description":"Specifies who can access the connection URL:\n- `authenticatedUsers`: Any authenticated user or service account can access the URL; the authorizedUsers and authorizedGroups fields are ignored.\n- `authorizedUsers`: Only users listed in the authorizedUsers field are allowed to access the URL; the authorizedGroups field is ignored.\n- `authorizedGroups`: Only members of user groups listed in the authorizedGroups field are allowed to access the URL; the authorizedUsers field is ignored.\nIf not specified, authorization is determined by whether authorizedUsers or authorizedGroups is present. If both fields are set, this results in an error. If neither is set, any authenticatedUser can access.\n","enum":["public","authenticatedUsers","authorizedUsersOrGroups"]},"authorizedUsers":{"type":"array","description":"List of users or service accounts that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","items":{"type":"string"}},"authorizedGroups":{"type":"array","items":{"type":"string"}},"containerPort":{"description":"The port that the container running the workload exposes.","type":"integer","format":"int32"}}},"ReplicaType":{"type":"string","nullable":true,"enum":["Master","Worker","Leader"]},"PhaseReason":{"type":"string","enum":["NonPreemptibleOverQuota","OverLimit"]},"SourceApi":{"type":"string","nullable":true,"enum":["WorkloadsV2"]},"WorkloadTopologyActualPlacement":{"type":"object","description":"Indicates the lowest network topology level at which all of the workload’s pods are colocated, representing where the workload was placed in the topology hierarchy.","nullable":true,"properties":{"level":{"description":"The network topology level at which the workload was placed (for example, rack, block, or zone).","type":"string"},"value":{"description":"The specific topology value at that level where the workload was placed (for example, rack-1 or block-a).","type":"string"}}},"Endpoint":{"type":"object","properties":{"host":{"type":"string","description":"The hostname used to access the endpoint (for example, chat.example.com). This value is typically defined by the exposing Kubernetes resource (such as an Ingress)."},"port":{"type":"integer","description":"The network port exposed by the endpoint. This is the port used when connecting to the workload."},"path":{"type":"string","description":"The URL path associated with the endpoint (for example, /v1/completions). Combined with host, it forms the full HTTP route to the workload."},"tlsEnabled":{"type":"boolean","nullable":true,"description":"Indicates whether TLS is enabled for the endpoint. When true, the endpoint expects encrypted connections (typically HTTPS)."},"protocol":{"type":"string","description":"The network protocol used for communication with the endpoint (for example, HTTP or HTTPS)."},"url":{"type":"string","description":"The fully qualified URL through which the workload is accessible. This value represents the complete access address derived from the protocol, host, and path (for example, https://chat.example.com/v1/completions)."}}},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/workloads":{"get":{"summary":"List workloads.","description":"Retrieve a list of active workloads with details.","operationId":"get_workloads","tags":["Workloads-API"],"parameters":[{"$ref":"#/components/parameters/Deleted"},{"$ref":"#/components/parameters/Offset"},{"$ref":"#/components/parameters/Limit"},{"$ref":"#/components/parameters/SortOrder"},{"$ref":"#/components/parameters/WorkloadsSort"},{"$ref":"#/components/parameters/WorkloadsFilter"},{"$ref":"#/components/parameters/Search"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"type":"object","required":["workloads"],"properties":{"next":{"type":"integer"},"workloads":{"$ref":"#/components/schemas/Workloads"}}}}}},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Get a workload.

> Retrieve workload data using a \`workloadId\`.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.25"},"tags":[{"name":"Workloads-API"}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadId":{"name":"workloadId","in":"path","required":true,"description":"The  Universally Unique Identifier (UUID) of the workload.","schema":{"type":"string","format":"uuid"}}},"schemas":{"WorkloadDetailed":{"allOf":[{"$ref":"#/components/schemas/Workload"},{"type":"object","properties":{"pendingSchedulingMessages":{"nullable":true,"type":"array","items":{"$ref":"#/components/schemas/PendingSchedulingMessage"}}}}]},"Workload":{"type":"object","required":["type","name","id","source","priority","priorityClassName","clusterId","projectName","projectId","departmentName","departmentId","namespace","createdAt","phase","conditions","k8sPhase","tenantId","runningPods","phaseUpdatedAt","k8sPhaseUpdatedAt","updatedAt","deletedAt","category"],"properties":{"tenantId":{"$ref":"#/components/schemas/TenantId"},"runningPods":{"type":"integer","format":"int32"},"phaseUpdatedAt":{"type":"string","format":"date-time"},"k8sPhaseUpdatedAt":{"type":"string","format":"date-time"},"updatedAt":{"type":"string","format":"date-time"},"source":{"$ref":"#/components/schemas/Source"},"deletedAt":{"type":"string","format":"date-time","nullable":true},"type":{"type":"string"},"name":{"type":"string"},"id":{"type":"string","format":"uuid"},"priority":{"type":"integer","format":"int32","nullable":true},"priorityClassName":{"type":"string"},"submittedBy":{"type":"string"},"clusterId":{"$ref":"#/components/schemas/ClusterId"},"projectName":{"type":"string"},"projectId":{"type":"string"},"departmentName":{"type":"string"},"departmentId":{"type":"string"},"namespace":{"type":"string"},"createdAt":{"type":"string","format":"date-time"},"workloadRequestedResources":{"$ref":"#/components/schemas/WorkloadRequestResources"},"podsRequestedResources":{"$ref":"#/components/schemas/WorkloadRequestResources"},"allocatedResources":{"$ref":"#/components/schemas/WorkloadAllocatedResources"},"actionsSupport":{"$ref":"#/components/schemas/ActionsSupport"},"phase":{"$ref":"#/components/schemas/Phase"},"conditions":{"$ref":"#/components/schemas/Conditions"},"phaseMessage":{"type":"string"},"k8sPhase":{"type":"string"},"requestedPods":{"$ref":"#/components/schemas/RequestedPods"},"requestedNodePools":{"type":"array","items":{"type":"string"}},"currentNodePools":{"type":"array","items":{"type":"string"}},"completedAt":{"type":"string","format":"date-time","nullable":true},"images":{"type":"array","items":{"type":"string"}},"childrenIds":{"type":"array","writeOnly":true,"items":{"type":"object","required":["id"],"properties":{"id":{"type":"string","format":"uuid"},"type":{"type":"string"}}}},"urls":{"description":"**DEPRECATED:** This field is deprecated. Use [Get workload endpoints](/latest/#operation/get_workload_endpoints) to retrieve workload network endpoints instead.Requires cluster version 2.25 or above.","type":"array","items":{"type":"string"}},"datasources":{"type":"array","items":{"$ref":"#/components/schemas/Datasource"}},"environments":{"type":"array","items":{"$ref":"#/components/schemas/Environment"}},"externalConnections":{"description":"**DEPRECATED:** This field is deprecated. Use [Get workload endpoints](/latest/#operation/get_workload_endpoints) to retrieve workload network endpoints instead.Requires cluster version 2.25 or above.","type":"array","items":{"$ref":"#/components/schemas/Connection1"}},"distributedFramework":{"type":"string"},"additionalFields":{"type":"object","additionalProperties":true},"preemptible":{"type":"boolean","nullable":true},"environmentVariables":{"type":"object","additionalProperties":{"type":"string"}},"command":{"type":"string"},"arguments":{"type":"string"},"phaseReason":{"type":"string","nullable":true,"allOf":[{"$ref":"#/components/schemas/PhaseReason"}]},"idleGpus":{"deprecated":true,"type":"integer","nullable":true,"description":"deprecated. use idleAllocatedGpus instead"},"idleAllocatedGpus":{"type":"number","description":"sum of idle allocated gpus in the workload","nullable":true},"totalPendingTimeSeconds":{"type":"integer","description":"The total cumulative time, in seconds, that the workload has spent in the Pending phase since submission.","nullable":true},"totalRunningTimeSeconds":{"type":"integer","description":"The total cumulative time, in seconds, that the workload has spent in the Running phase since submission.","nullable":true},"category":{"type":"string","description":"Category Description"},"guaranteedRuntimeEndsAt":{"type":"string","format":"date-time","nullable":true,"description":"A timestamp indicating when the workload will reach its minimum guaranteed runtime, as defined by minGuaranteedRuntime. Until this time, the workload is considered non-preemptible and cannot be interrupted by higher-priority workloads."},"aiApplicationId":{"type":"string"},"aiApplicationName":{"type":"string"},"sourceApi":{"$ref":"#/components/schemas/SourceApi"},"topology":{"type":"object","description":"The workload’s network topology.","nullable":true,"properties":{"name":{"type":"string","description":"The name of the network topology associated with the workload."},"preferredPlacement":{"type":"string","description":"The topology level to use as a preferred placement constraint for the workload."},"requiredPlacement":{"type":"string","description":"The topology level to use as required placement constraint for the workload. If the requirement cannot be satisfied at this topology level, placement will fail"},"actualPlacement":{"$ref":"#/components/schemas/WorkloadTopologyActualPlacement"}}},"endpoints":{"description":"List of network endpoints through which the workload is exposed.","type":"array","items":{"$ref":"#/components/schemas/Endpoint"}}}},"TenantId":{"description":"The id of the tenant.","type":"integer","format":"int32"},"Source":{"type":"string","enum":["CLI","Control-plane","Other"]},"ClusterId":{"description":"The id of the cluster.","type":"string","format":"uuid"},"WorkloadRequestResources":{"type":"object","nullable":true,"properties":{"gpuRequestType":{"$ref":"#/components/schemas/GpuRequestType"},"gpu":{"$ref":"#/components/schemas/RequestResourceCores"},"gpuMemory":{"$ref":"#/components/schemas/RequestResourceQuantity"},"cpu":{"$ref":"#/components/schemas/RequestResourceCores"},"cpuMemory":{"$ref":"#/components/schemas/RequestResourceQuantity"},"extendedResources":{"$ref":"#/components/schemas/WorkloadsExtendedResources"}}},"GpuRequestType":{"description":"Sets the unit type for GPU resources requests. Stated in terms of portion or memory. Sets the unit type for other GPU request fields. If `gpuDevicesRequest > 1`, only `portion` is supported. If `gpuDeviceRequest = 1`, the request type can be stated as `portion` or `memory`.","type":"string","minLength":1,"enum":["portion","memory"],"nullable":true},"RequestResourceCores":{"type":"object","properties":{"limit":{"type":"number","nullable":true,"format":"double"},"request":{"type":"number","nullable":true,"format":"double"}},"nullable":true},"RequestResourceQuantity":{"type":"object","properties":{"limit":{"type":"string","nullable":true,"pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$"},"request":{"type":"string","nullable":true,"pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$"}},"nullable":true},"WorkloadsExtendedResources":{"description":"Set of extended resources with their quantity","type":"array","items":{"$ref":"#/components/schemas/WorkloadsExtendedResource"},"nullable":true},"WorkloadsExtendedResource":{"description":"Quantity of an extended resource.","type":"object","properties":{"resource":{"description":"The name of the extended resource (mandatory)","type":"string","minLength":1,"nullable":true},"quantity":{"description":"The requested quantity for the given resource.","type":"string","minLength":1,"nullable":true},"exclude":{"description":"Whether to exclude this extended resource from the workload. This is necessary in case the extended resource is inherited from the policy defaults and it is desired not to include it in this workload.","type":"boolean","nullable":true}},"nullable":true},"WorkloadAllocatedResources":{"type":"object","nullable":true,"properties":{"gpu":{"type":"number","nullable":true,"format":"double","description":"Required if and only if gpuRequestType is portion. States the number of GPUs allocated for the created workload. The default is no allocated GPUs."},"gpuMemory":{"type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"cpu":{"type":"number","nullable":true,"format":"double","description":"States the amount of CPU cores used by the workload running."},"cpuMemory":{"type":"string","pattern":"^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","nullable":true},"extendedResources":{"$ref":"#/components/schemas/WorkloadsExtendedResources"}}},"ActionsSupport":{"description":"The actions support provided for the workload.","type":"object","properties":{"delete":{"type":"boolean"},"suspend":{"description":"This permission includes resume as well as suspend.","type":"boolean"}}},"Phase":{"type":"string","enum":["Creating","Initializing","Resuming","Pending","Deleting","Running","Updating","Stopped","Stopping","Degraded","Failed","Completed","Terminating","Unknown"]},"Conditions":{"type":"array","items":{"$ref":"#/components/schemas/Condition1"}},"Condition1":{"type":"object","required":["type","status"],"properties":{"type":{"description":"The type of the condition, such as Failed or Available. See Types of domain status conditions.","type":"string"},"status":{"type":"string","description":"The status of the condition, such as True, False or Unknown."},"message":{"type":"string","description":"An optional, human-readable message providing more details about the condition."},"reason":{"type":"string","description":"The reason for the Failed condition. Not applicable to other types of condition."},"lastTransitionTime":{"description":"A timestamp of when the condition was created or the last time the condition transitioned from one status to another.","type":"string","nullable":true,"format":"date-time"}}},"RequestedPods":{"type":"object","properties":{"number":{"type":"integer","format":"int32","nullable":true},"min":{"type":"integer","format":"int32","nullable":true},"max":{"type":"integer","format":"int32","nullable":true},"parallelism":{"type":"integer","format":"int32","description":"specifies how many Pods can run in parallel","nullable":true},"completions":{"type":"integer","description":"specifies how many Pods should terminate successfully before the Workload is completed","format":"int32","nullable":true}}},"Datasource":{"type":"object","required":["name","id"],"properties":{"type":{"type":"string"},"name":{"type":"string"},"id":{"type":"string","format":"uuid"}}},"Environment":{"type":"object","required":["name","id"],"properties":{"connections":{"type":"array","items":{"$ref":"#/components/schemas/Connection1"}},"name":{"type":"string"},"id":{"type":"string","format":"uuid"},"replicaType":{"$ref":"#/components/schemas/ReplicaType"}}},"Connection1":{"type":"object","description":"Connection that either expose port from the container (a port is associated with a tool that the container runs), or URL to be used for connecting to an external tool that is related to the action of the container (such as Weights & Biases).","required":["toolType","name","connectionType"],"properties":{"name":{"type":"string"},"toolType":{"type":"string"},"connectionType":{"type":"string"},"url":{"type":"string"},"authorizationType":{"type":"string","description":"Specifies who can access the connection URL:\n- `authenticatedUsers`: Any authenticated user or service account can access the URL; the authorizedUsers and authorizedGroups fields are ignored.\n- `authorizedUsers`: Only users listed in the authorizedUsers field are allowed to access the URL; the authorizedGroups field is ignored.\n- `authorizedGroups`: Only members of user groups listed in the authorizedGroups field are allowed to access the URL; the authorizedUsers field is ignored.\nIf not specified, authorization is determined by whether authorizedUsers or authorizedGroups is present. If both fields are set, this results in an error. If neither is set, any authenticatedUser can access.\n","enum":["public","authenticatedUsers","authorizedUsersOrGroups"]},"authorizedUsers":{"type":"array","description":"List of users or service accounts that are allowed to access the URL. Note that authorizedUsers and authorizedGroups are mutually exclusive.","items":{"type":"string"}},"authorizedGroups":{"type":"array","items":{"type":"string"}},"containerPort":{"description":"The port that the container running the workload exposes.","type":"integer","format":"int32"}}},"ReplicaType":{"type":"string","nullable":true,"enum":["Master","Worker","Leader"]},"PhaseReason":{"type":"string","enum":["NonPreemptibleOverQuota","OverLimit"]},"SourceApi":{"type":"string","nullable":true,"enum":["WorkloadsV2"]},"WorkloadTopologyActualPlacement":{"type":"object","description":"Indicates the lowest network topology level at which all of the workload’s pods are colocated, representing where the workload was placed in the topology hierarchy.","nullable":true,"properties":{"level":{"description":"The network topology level at which the workload was placed (for example, rack, block, or zone).","type":"string"},"value":{"description":"The specific topology value at that level where the workload was placed (for example, rack-1 or block-a).","type":"string"}}},"Endpoint":{"type":"object","properties":{"host":{"type":"string","description":"The hostname used to access the endpoint (for example, chat.example.com). This value is typically defined by the exposing Kubernetes resource (such as an Ingress)."},"port":{"type":"integer","description":"The network port exposed by the endpoint. This is the port used when connecting to the workload."},"path":{"type":"string","description":"The URL path associated with the endpoint (for example, /v1/completions). Combined with host, it forms the full HTTP route to the workload."},"tlsEnabled":{"type":"boolean","nullable":true,"description":"Indicates whether TLS is enabled for the endpoint. When true, the endpoint expects encrypted connections (typically HTTPS)."},"protocol":{"type":"string","description":"The network protocol used for communication with the endpoint (for example, HTTP or HTTPS)."},"url":{"type":"string","description":"The fully qualified URL through which the workload is accessible. This value represents the complete access address derived from the protocol, host, and path (for example, https://chat.example.com/v1/completions)."}}},"PendingSchedulingMessage":{"type":"object","required":["nodePool","phaseReason","reason"],"properties":{"nodePool":{"type":"string"},"phaseReason":{"$ref":"#/components/schemas/PhaseReason"},"reason":{"type":"string"},"orgType":{"type":"string","nullable":true,"allOf":[{"$ref":"#/components/schemas/OrgType"}]},"userMessage":{"nullable":true,"type":"string"}}},"OrgType":{"type":"string","enum":["PROJECT","DEPARTMENT"]},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/workloads/{workloadId}":{"get":{"summary":"Get a workload.","operationId":"get_workload","description":"Retrieve workload data using a `workloadId`.","tags":["Workloads-API"],"parameters":[{"$ref":"#/components/parameters/WorkloadId"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/WorkloadDetailed"}}}},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Count workloads.

> Retrieve the number of workloads.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.25"},"tags":[{"name":"Workloads-API"}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"Deleted":{"name":"deleted","in":"query","description":"Return only deleted resources when `true`.","schema":{"type":"boolean"}},"WorkloadsFilter":{"name":"filterBy","in":"query","required":false,"description":"Filter results by a parameter. Use the format field-name operator value. Operators are `==` Equals, `!=` Not equals, `<=` Less than or equal, `>=` Greater than or equal, `=@` contains, `!@` Does not contain, `=^` Starts with and `=$` Ends with. Dates are in ISO 8601 timestamp format and available for operators `==`, `!=`, `<=` and `>=`.","schema":{"type":"array","maxItems":10,"items":{"type":"string","pattern":"^(type|name|clusterId|projectId|projectName|departmentId|departmentName|createdAt|deletedAt|submittedBy|phase|completedAt|nodepool|distributedFramework|allocatedGPU|idleGpus|idleAllocatedGpus|phaseUpdatedAt|category|totalPendingTimeSeconds|totalRunningTimeSeconds|priority|priorityClassName|guaranteedRuntimeEndsAt|aiApplicationId|aiApplicationName)(==|!=|<=|>=|=@|!@|=\\^|=\\$).+$"}},"explode":false},"Search":{"name":"search","in":"query","required":false,"description":"Filter results by a free text search.","schema":{"type":"string"}}},"responses":{"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}},"schemas":{"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}}},"paths":{"/api/v1/workloads/count":{"get":{"summary":"Count workloads.","description":"Retrieve the number of workloads.","operationId":"count_workloads","tags":["Workloads-API"],"parameters":[{"$ref":"#/components/parameters/Deleted"},{"$ref":"#/components/parameters/WorkloadsFilter"},{"$ref":"#/components/parameters/Search"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"type":"object","required":["count"],"properties":{"count":{"type":"integer","format":"int64"}}}}}},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Get the workloads telemetry.

> Retrieves workload data by telemetry type. Optionally filter by specific workload phases.\
> \
> Telemetry Types:\
> \- WORKLOADS\_COUNT: Count of workloads\
> \- GPU\_ALLOCATION: GPU allocation metrics\
> \- PENDING\_TIME\_DISTRIBUTION: Distribution of workloads by current pending time (time since entering Pending phase).\
> \
> \
> \
> \
> \
> \
> &#x20; For this type, results are automatically grouped by 4 time buckets and any additional groupBy parameters.\
> \- IDLE\_ALLOCATED\_GPUS: Sum of idle allocated GPU devices across workloads<br>

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.25"},"tags":[{"name":"Workloads-API"}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"FilterByCluster":{"name":"clusterId","in":"query","description":"Filter using the Universally Unique Identifier (UUID) of the cluster.","required":false,"schema":{"type":"string","format":"uuid"}},"FilterByNodepoolName":{"name":"nodepoolName","in":"query","description":"Filter using the nodepool.","required":false,"schema":{"type":"string"}},"FilterByDepartment":{"name":"departmentId","in":"query","description":"Filter using the department id.","required":false,"schema":{"type":"string"}},"TelemetryGroupBy1":{"name":"groupBy","in":"query","description":"Group workloads by field.","explode":false,"required":false,"schema":{"type":"array","maxItems":2,"items":{"type":"string","enum":["ClusterId","DepartmentId","ProjectId","Type","CurrentNodepools","RequestedNodepools","Phase","Category"]}}},"TelemetryType2":{"name":"telemetryType","in":"query","required":true,"description":"Specifies the telemetry type.","schema":{"$ref":"#/components/schemas/WorkloadTelemetryType"}},"FilterByPhases":{"name":"filterByPhases","in":"query","required":false,"description":"Filter workloads by specific phases. If not specified, all phases are included.","explode":false,"schema":{"type":"array","maxItems":10,"items":{"$ref":"#/components/schemas/Phase"}}}},"schemas":{"WorkloadTelemetryType":{"type":"string","description":"Select a telemetry type.","enum":["WORKLOADS_COUNT","GPU_ALLOCATION","PENDING_TIME_DISTRIBUTION","IDLE_ALLOCATED_GPUS"]},"Phase":{"type":"string","enum":["Creating","Initializing","Resuming","Pending","Deleting","Running","Updating","Stopped","Stopping","Degraded","Failed","Completed","Terminating","Unknown"]},"TelemetryResponse":{"type":"object","required":["type","timestamp","values"],"properties":{"type":{"type":"string","description":"specifies what data returned"},"timestamp":{"type":"string","format":"date-time"},"values":{"type":"array","items":{"type":"object","required":["value"],"properties":{"value":{"type":"string"},"groups":{"type":"array","description":"columns the data is grouped by","maxItems":2,"items":{"type":"object","required":["key","value"],"properties":{"key":{"type":"string"},"value":{"type":"string"},"name":{"type":"string"}}}}}}}}},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"400BadRequest":{"description":"Bad request.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/workloads/telemetry":{"get":{"summary":"Get the workloads telemetry.","operationId":"get_workloads_telemetry","description":"Retrieves workload data by telemetry type. Optionally filter by specific workload phases.\n\nTelemetry Types:\n- WORKLOADS_COUNT: Count of workloads\n- GPU_ALLOCATION: GPU allocation metrics\n- PENDING_TIME_DISTRIBUTION: Distribution of workloads by current pending time (time since entering Pending phase).\n\n\n\n\n\n\n  For this type, results are automatically grouped by 4 time buckets and any additional groupBy parameters.\n- IDLE_ALLOCATED_GPUS: Sum of idle allocated GPU devices across workloads\n","tags":["Workloads-API"],"parameters":[{"$ref":"#/components/parameters/FilterByCluster"},{"$ref":"#/components/parameters/FilterByNodepoolName"},{"$ref":"#/components/parameters/FilterByDepartment"},{"$ref":"#/components/parameters/TelemetryGroupBy1"},{"$ref":"#/components/parameters/TelemetryType2"},{"$ref":"#/components/parameters/FilterByPhases"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/TelemetryResponse"}},"text/csv":{}}},"400":{"$ref":"#/components/responses/400BadRequest"},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Get workload metrics data.

> Retrieves workloads data metrics from the metrics database. Use in reporting and analysis tools.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.25"},"tags":[{"name":"Workloads-API"}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadId":{"name":"workloadId","in":"path","required":true,"description":"The  Universally Unique Identifier (UUID) of the workload.","schema":{"type":"string","format":"uuid"}},"WorkloadMetricTypes":{"name":"metricType","in":"query","required":true,"description":"Specify which data to request.","explode":false,"schema":{"type":"array","minItems":1,"maxItems":10,"items":{"$ref":"#/components/schemas/WorkloadMetricType"}}},"StartRequired":{"name":"start","in":"query","description":"Start date of time range to fetch data in ISO 8601 timestamp format.","required":true,"schema":{"type":"string","format":"date-time"}},"EndRequired":{"name":"end","in":"query","description":"End date of time range to fetch data in ISO 8601 timestamp format.","required":true,"schema":{"type":"string","format":"date-time"}},"NumberOfSamples":{"name":"numberOfSamples","in":"query","description":"The number of samples to take in the specified time range.","required":false,"schema":{"type":"integer","maximum":1000,"minimum":0,"default":20}}},"schemas":{"WorkloadMetricType":{"type":"string","description":"Specify which metric data to request.","enum":["GPU_UTILIZATION","GPU_MEMORY_USAGE_BYTES","GPU_MEMORY_REQUEST_BYTES","CPU_USAGE_CORES","CPU_REQUEST_CORES","CPU_LIMIT_CORES","CPU_MEMORY_USAGE_BYTES","CPU_MEMORY_REQUEST_BYTES","CPU_MEMORY_LIMIT_BYTES","POD_COUNT","RUNNING_POD_COUNT","GPU_ALLOCATION","NIM_NUM_REQUESTS_RUNNING","NIM_NUM_REQUESTS_WAITING","NIM_NUM_REQUEST_MAX","NIM_REQUEST_SUCCESS_TOTAL","NIM_REQUEST_FAILURE_TOTAL","NIM_GPU_CACHE_USAGE_PERC","NIM_TIME_TO_FIRST_TOKEN_SECONDS","NIM_E2E_REQUEST_LATENCY_SECONDS","NIM_TIME_TO_FIRST_TOKEN_SECONDS_PERCENTILES","NIM_E2E_REQUEST_LATENCY_SECONDS_PERCENTILES","NVLINK_BANDWIDTH_TOTAL"]},"MetricsCompositeResponse":{"type":"object","required":["measurements"],"properties":{"measurements":{"type":"array","items":{"$ref":"#/components/schemas/MeasurementResponse"}},"histogram":{"type":"array","nullable":true,"items":{"$ref":"#/components/schemas/HistogramSeries"}}}},"MeasurementResponse":{"type":"object","required":["type","values"],"properties":{"type":{"type":"string","description":"specifies what data returned"},"labels":{"type":"object","nullable":true,"description":"labels of the metric measurement","additionalProperties":{"type":"string"}},"values":{"type":"array","nullable":true,"items":{"type":"object","required":["value","timestamp"],"properties":{"value":{"type":"string"},"timestamp":{"type":"string","format":"date-time","nullable":true}}}}}},"HistogramSeries":{"type":"object","required":["type","values"],"properties":{"type":{"type":"string","description":"specifies what data returned"},"values":{"type":"array","items":{"$ref":"#/components/schemas/HistogramValue"}}}},"HistogramValue":{"type":"object","required":["timestamp","data"],"properties":{"timestamp":{"type":"string","format":"date-time","nullable":true},"data":{"type":"object","additionalProperties":{"type":"string"}}}},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"400BadRequest":{"description":"Bad request.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/workloads/{workloadId}/metrics":{"get":{"summary":"Get workload metrics data.","operationId":"get_workload_metrics","description":"Retrieves workloads data metrics from the metrics database. Use in reporting and analysis tools.","tags":["Workloads-API"],"parameters":[{"$ref":"#/components/parameters/WorkloadId"},{"$ref":"#/components/parameters/WorkloadMetricTypes"},{"$ref":"#/components/parameters/StartRequired"},{"$ref":"#/components/parameters/EndRequired"},{"$ref":"#/components/parameters/NumberOfSamples"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MetricsCompositeResponse"}},"text/csv":{}}},"207":{"description":"Partial success.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/MetricsCompositeResponse"}}}},"400":{"$ref":"#/components/responses/400BadRequest"},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Get workload endpoints.

> Retrieve the network endpoints exposed by a workload using a workload ID.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.25"},"tags":[{"name":"Workloads-API"}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadId":{"name":"workloadId","in":"path","required":true,"description":"The  Universally Unique Identifier (UUID) of the workload.","schema":{"type":"string","format":"uuid"}}},"schemas":{"Endpoint":{"type":"object","properties":{"host":{"type":"string","description":"The hostname used to access the endpoint (for example, chat.example.com). This value is typically defined by the exposing Kubernetes resource (such as an Ingress)."},"port":{"type":"integer","description":"The network port exposed by the endpoint. This is the port used when connecting to the workload."},"path":{"type":"string","description":"The URL path associated with the endpoint (for example, /v1/completions). Combined with host, it forms the full HTTP route to the workload."},"tlsEnabled":{"type":"boolean","nullable":true,"description":"Indicates whether TLS is enabled for the endpoint. When true, the endpoint expects encrypted connections (typically HTTPS)."},"protocol":{"type":"string","description":"The network protocol used for communication with the endpoint (for example, HTTP or HTTPS)."},"url":{"type":"string","description":"The fully qualified URL through which the workload is accessible. This value represents the complete access address derived from the protocol, host, and path (for example, https://chat.example.com/v1/completions)."}}},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/workloads/{workloadId}/endpoints":{"get":{"summary":"Get workload endpoints.","description":"Retrieve the network endpoints exposed by a workload using a workload ID.","operationId":"get_workload_endpoints","tags":["Workloads-API"],"parameters":[{"$ref":"#/components/parameters/WorkloadId"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"type":"object","required":["endpoints"],"properties":{"endpoints":{"type":"array","items":{"$ref":"#/components/schemas/Endpoint"}}}}}}},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Get workload structure elements by workload id.

> Retrieve the hierarchy structure elements for a workload. Returns a flat list ordered by level. Requires cluster version >= 2.25.<br>

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.25"},"tags":[{"name":"Workloads-API"}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadId":{"name":"workloadId","in":"path","required":true,"description":"The  Universally Unique Identifier (UUID) of the workload.","schema":{"type":"string","format":"uuid"}},"Offset":{"name":"offset","in":"query","required":false,"description":"The offset of the first item returned in the collection.","schema":{"type":"integer","format":"int32"}},"Limit":{"name":"limit","in":"query","required":false,"description":"The maximum number of entries to return.","schema":{"type":"integer","format":"int32","default":50,"minimum":1,"maximum":500}}},"schemas":{"WorkloadElementsResponse":{"type":"object","required":["elements"],"properties":{"next":{"type":"integer","format":"int32","description":"Offset for the next page of results. Present only when there are more elements beyond the current page (i.e., the number of returned elements equals the limit).\n"},"elements":{"type":"array","description":"A flat list of structure elements for this workload, ordered by hierarchy level. This page may contain fewer elements than are stored in the database — use the \"next\" field to retrieve additional pages. Use the /elements/count endpoint to retrieve total and stored counts.\n","items":{"$ref":"#/components/schemas/WorkloadElement"}}}},"WorkloadElement":{"type":"object","required":["id","name","hierarchyLevel"],"properties":{"id":{"type":"string","description":"The unique identifier of the structure element.","format":"uuid"},"name":{"type":"string","description":"The display name of the structure element.","maxLength":250},"hierarchyLevel":{"type":"integer","format":"int32","description":"The position of the element in the workload structure hierarchy. Valid values are 1 (top-level), 2, and 3.","minimum":1,"maximum":3},"parentId":{"type":"string","nullable":true,"description":"The unique identifier of the parent element. Empty for level 1 elements, whose parent is implicitly the workload.","format":"uuid"},"createdAt":{"type":"string","format":"date-time","description":"The timestamp when the structure element was created."},"kubernetesRef":{"type":"object","nullable":true,"description":"Reference to the backing K8s object. Present for object-backed elements, nil for logical elements.","properties":{"uid":{"description":"The unique identifier (UID) of the Kubernetes object backing this element.","type":"string"},"name":{"description":"The name of the Kubernetes object backing this element.","type":"string"},"gvk":{"description":"The group, version and kind of the Kubernetes resource.","type":"object","properties":{"group":{"description":"The API group of the Kubernetes resource (for example, apps or batch).","type":"string"},"version":{"description":"The API version of the Kubernetes resource (for example, v1 or v1beta1).","type":"string"},"kind":{"description":"The kind of the Kubernetes resource (for example, Deployment or Job).","type":"string"}}}}}}},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"400BadRequest":{"description":"Bad request.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v1/workloads/{workloadId}/elements":{"get":{"summary":"Get workload structure elements by workload id.","description":"Retrieve the hierarchy structure elements for a workload. Returns a flat list ordered by level. Requires cluster version >= 2.25.\n","operationId":"get_workload_elements","tags":["Workloads-API"],"parameters":[{"$ref":"#/components/parameters/WorkloadId"},{"$ref":"#/components/parameters/Offset"},{"$ref":"#/components/parameters/Limit"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/WorkloadElementsResponse"}}}},"400":{"$ref":"#/components/responses/400BadRequest"},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Count workload structure elements.

> Returns the total count of structure elements reported by the cluster and the count of elements stored in the database for the given workload. Requires cluster version >= 2.25.<br>

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.25"},"tags":[{"name":"Workloads-API"}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadId":{"name":"workloadId","in":"path","required":true,"description":"The  Universally Unique Identifier (UUID) of the workload.","schema":{"type":"string","format":"uuid"}}},"responses":{"400BadRequest":{"description":"Bad request.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}},"schemas":{"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}}},"paths":{"/api/v1/workloads/{workloadId}/elements/count":{"get":{"summary":"Count workload structure elements.","description":"Returns the total count of structure elements reported by the cluster and the count of elements stored in the database for the given workload. Requires cluster version >= 2.25.\n","operationId":"count_workload_elements","tags":["Workloads-API"],"parameters":[{"$ref":"#/components/parameters/WorkloadId"}],"responses":{"200":{"description":"Executed successfully.","content":{"application/json":{"schema":{"type":"object","required":["totalElementsCount","storedElementsCount"],"properties":{"storedElementsCount":{"type":"integer","format":"int32","minimum":0,"description":"The number of structure elements stored for this workload. When truncation occurs, this will be less than totalElementsCount."},"totalElementsCount":{"type":"integer","format":"int32","minimum":0,"description":"The total number of structure elements that exist for this workload. When no truncation occurred, this equals storedElementsCount."}}}}}},"400":{"$ref":"#/components/responses/400BadRequest"},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```


---

# Agent Instructions: Querying This Documentation

If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.

Perform an HTTP GET request on the current page URL with the `ask` query parameter:

```
GET https://run-ai-docs.nvidia.com/api/2.25/workloads/workloads.md?ask=<question>
```

The question should be specific, self-contained, and written in natural language.
The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.

Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.
