# NVIDIA NIM

The NVIDIA NIM API provides endpoints to create and manage workloads that deploy NVIDIA Inference Microservices (NIM) through the NIM Operator. These workloads package optimized NVIDIA model servers and run as managed services on the NVIDIA Run:ai platform. Each request includes NVIDIA Run:ai scheduling metadata (for example, project, priority, and category) and a NIM service specification that defines the container image, compute resources, environment variables, storage, and networking configuration. Once submitted, NVIDIA Run:ai handles scheduling, orchestration, and lifecycle management of the NIM service to ensure reliable and efficient model serving.

## Create a NVIDIA NIM service. \[Experimental]

> Create a NVIDIA NIM service

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.24"},"tags":[{"name":"NVIDIA NIM","description":"The NVIDIA NIM API provides endpoints to create and manage workloads that deploy NVIDIA Inference Microservices (NIM) through the NIM Operator. These workloads package optimized NVIDIA model servers and run as managed services on the NVIDIA Run:ai platform.\nEach request includes NVIDIA Run:ai scheduling metadata (for example, project, priority, and category) and a NIM service specification that defines the container image, compute resources, environment variables, storage, and networking configuration. Once submitted, NVIDIA Run:ai handles scheduling, orchestration, and lifecycle management of the NIM service to ensure reliable and efficient model serving.\n"}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"schemas":{"NimServiceCreateRequest":{"type":"object","required":["metadata","spec"],"properties":{"metadata":{"$ref":"#/components/schemas/NIMServiceMetadataCreateParams"},"spec":{"$ref":"#/components/schemas/NimServiceSpec"}}},"NIMServiceMetadataCreateParams":{"type":"object","required":["name","projectId"],"properties":{"name":{"$ref":"#/components/schemas/WorkloadName"},"useGivenNameAsPrefix":{"description":"When true, the requested name will be treated as a prefix. The final name of the workload will be composed of the name followed by a random set of characters.","type":"boolean","default":false},"projectId":{"$ref":"#/components/schemas/ProjectId"}}},"WorkloadName":{"description":"The name of the workload.","type":"string","minLength":1,"pattern":".*"},"ProjectId":{"description":"The id of the project.","type":"string","pattern":".*"},"NimServiceSpec":{"allOf":[{"nullable":true,"properties":{"annotations":{"$ref":"#/components/schemas/Annotations"},"autoscaling":{"nullable":true,"properties":{"maxReplicas":{"$ref":"#/components/schemas/AutoScalingMaxReplicas"},"metric":{"$ref":"#/components/schemas/AutoScalingMetricNim"},"metricThreshold":{"$ref":"#/components/schemas/AutoScalingMetricThreshold"},"minReplicas":{"$ref":"#/components/schemas/AutoScalingMinReplicas"},"scaleWindowSeconds":{"$ref":"#/components/schemas/AutoScalingScaleWindowSeconds"}},"type":"object"},"category":{"$ref":"#/components/schemas/Category"},"compute":{"nullable":true,"properties":{"cpuCoreLimit":{"$ref":"#/components/schemas/CpuCoreLimit"},"cpuCoreRequest":{"$ref":"#/components/schemas/CpuCoreRequest"},"cpuMemoryLimit":{"$ref":"#/components/schemas/CpuMemoryLimit"},"cpuMemoryRequest":{"$ref":"#/components/schemas/CpuMemoryRequest"},"gpuDevicesRequest":{"$ref":"#/components/schemas/GpuDevicesRequest"},"gpuMemoryLimit":{"$ref":"#/components/schemas/GpuMemoryLimit"},"gpuMemoryRequest":{"$ref":"#/components/schemas/GpuMemoryRequest"},"gpuPortionLimit":{"$ref":"#/components/schemas/GpuPortionLimit"},"gpuPortionRequest":{"$ref":"#/components/schemas/GpuPortionRequest"},"gpuRequestType":{"$ref":"#/components/schemas/GpuRequestType"}},"type":"object"},"environmentVariables":{"$ref":"#/components/schemas/EnvironmentVariables"},"image":{"$ref":"#/components/schemas/Image"},"imagePullPolicy":{"$ref":"#/components/schemas/ImagePullPolicy"},"imagePullSecrets":{"$ref":"#/components/schemas/ImagePullSecrets"},"labels":{"$ref":"#/components/schemas/Labels"},"modelStore":{"nullable":true,"properties":{"nimCache":{"$ref":"#/components/schemas/NimCache"},"pvc":{"$ref":"#/components/schemas/NimServicePvcFields"}},"type":"object"},"multiNode":{"$ref":"#/components/schemas/NimServiceMultiNode"},"ngcAuthSecret":{"$ref":"#/components/schemas/NimServiceNgcAuthSecret"},"nodePools":{"$ref":"#/components/schemas/NodePools"},"preemptibility":{"$ref":"#/components/schemas/Preemptibility"},"priorityClass":{"$ref":"#/components/schemas/PriorityClass"},"probes":{"$ref":"#/components/schemas/Probes"},"replicas":{"$ref":"#/components/schemas/NimServiceReplicas"},"security":{"nullable":true,"properties":{"runAsGid":{"$ref":"#/components/schemas/RunAsGid"},"runAsUid":{"$ref":"#/components/schemas/RunAsUid"}},"type":"object"},"servingPort":{"$ref":"#/components/schemas/NimServiceServingPort"},"tolerations":{"$ref":"#/components/schemas/Tolerations"}},"type":"object"}]},"Annotations":{"description":"Set of annotations to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Annotation"},"maxItems":1000,"nullable":true},"Annotation":{"description":"Annotation details to be populated into the container.","properties":{"name":{"description":"The name of the annotation (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the annotation.","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the annotation is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"AutoScalingMaxReplicas":{"description":"The maximum number of replicas for autoscaling. Defaults to minReplicas. Must be no less than minReplicas.","type":"integer","format":"int32","minimum":1,"nullable":true},"AutoScalingMetricNim":{"description":"The metric to use for autoscaling (mandatory).","type":"string","pattern":"^[a-zA-Z_:][a-zA-Z0-9_:]*$","nullable":true},"AutoScalingMetricThreshold":{"description":"The threshold to use with the specified metric for autoscaling (mandatory).","type":"integer","format":"int32","nullable":true},"AutoScalingMinReplicas":{"description":"The minimum number of replicas for autoscaling. Defaults to 1.","type":"integer","format":"int32","minimum":1,"nullable":true},"AutoScalingScaleWindowSeconds":{"description":"The time window for autoscaling decisions, in seconds. Defaults to 300 seconds.","type":"integer","format":"int32","minimum":60,"maximum":3600,"nullable":true},"Category":{"description":"Specify the workload category assigned to the workload. Categories are used to classify and monitor different types of workloads within the NVIDIA Run:ai platform.","type":"string","nullable":true,"pattern":".*"},"CpuCoreLimit":{"description":"Limitations on the number of CPUs consumed by the workload (0.5, 1, .etc). The system guarantees that this workload will not be able to consume more than this amount of CPUs.","format":"double","type":"number","nullable":true,"minimum":0},"CpuCoreRequest":{"description":"CPU units to allocate for the created workload (0.5, 1, .etc). The workload will receive at least this amount of CPU. Note that the workload will not be scheduled unless the system can guarantee this amount of CPUs to the workload.","format":"double","type":"number","nullable":true,"minimum":0},"CpuMemoryLimit":{"description":"Limitations on the CPU memory to allocate for this workload (1G, 20M, .etc). The system guarantees that this workload will not be able to consume more than this amount of memory. The workload will receive an error when trying to allocate more memory than this limit.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"CpuMemoryRequest":{"description":"The amount of CPU memory to allocate for this workload (1G, 20M, .etc). The workload will receive at least this amount of memory. Note that the workload will not be scheduled unless the system can guarantee this amount of memory to the workload","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"GpuDevicesRequest":{"description":"Requested number of GPU devices. Currently if more than one device is requested, it is not possible to provide values for gpuMemory or gpuPortion.","type":"integer","format":"int32","nullable":true,"minimum":0},"GpuMemoryLimit":{"description":"Limitation on the memory consumed by the workload, per GPU device. The system guarantees The gpuMemoryLimit must be no less than gpuMemoryRequest.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"GpuMemoryRequest":{"description":"Required if and only if gpuRequestType is memory. States the GPU memory to allocate for the created workload, per GPU device. Note that the workload will not be scheduled unless the system can guarantee this amount of GPU memory to the workload.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"GpuPortionLimit":{"description":"Limitations on the portion consumed by the workload, per GPU device. The system guarantees The gpuPotionLimit must be no less than the gpuPortionRequest.","type":"number","format":"double","nullable":true,"minimum":0},"GpuPortionRequest":{"description":"Required if and only if gpuRequestType is portion. States the portion of the GPU to allocate for the created workload, per GPU device, between 0 and 1. The default is no allocated GPUs.","type":"number","format":"double","nullable":true,"minimum":0},"GpuRequestType":{"description":"Sets the unit type for GPU resources requests. Stated in terms of portion or memory. Sets the unit type for other GPU request fields. If `gpuDevicesRequest > 1`, only `portion` is supported. If `gpuDeviceRequest = 1`, the request type can be stated as `portion` or `memory`.","type":"string","minLength":1,"enum":["portion","memory"],"nullable":true},"EnvironmentVariables":{"description":"Set of environment variables to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/EnvironmentVariable"},"maxItems":1000,"nullable":true},"EnvironmentVariable":{"description":"Details of an environment variable which is populated into the container.","properties":{"name":{"description":"The name of the environment variable. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the environment variable. (mutually exclusive with secret, userCredential, configMap and podFieldRef)","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"secret":{"$ref":"#/components/schemas/EnvironmentVariableSecret"},"configMap":{"$ref":"#/components/schemas/EnvironmentVariableConfigMap"},"podFieldRef":{"$ref":"#/components/schemas/EnvironmentVariablePodFieldReference"},"userCredential":{"$ref":"#/components/schemas/EnvironmentVariableUserCredential"},"exclude":{"description":"Use 'true' in case the environment variable is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true},"description":{"description":"Description of the environment variable.","type":"string","maxLength":250,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableSecret":{"description":"Details of the secret and key use to populate the environment variable","properties":{"name":{"description":"The name of the secret resource. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"key":{"description":"The key in the secret resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableConfigMap":{"description":"Details of the configMap and key use to populate the environment variable","properties":{"name":{"description":"The name of the config-map resource. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"key":{"description":"The key in the config-map resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariablePodFieldReference":{"description":"Details of the field-reference and key use to populate the environment variable","properties":{"path":{"description":"The field path resource. (mandatory)","type":"string","minLength":1,"maxLength":10000,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableUserCredential":{"description":"Defines a reference to a user-created credential and a specific key within that credential whose value will populate the environment variable. User credentials can only be accessed by the user who created them.","properties":{"name":{"description":"The name of the user credential.  (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true},"key":{"description":"The key in the user credential resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true}},"nullable":true,"type":"object"},"Image":{"description":"Docker image name. For more information, see [Images](https://kubernetes.io/docs/concepts/containers/images). The image name is mandatory for creating a workload.","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"ImagePullPolicy":{"description":"Image pull policy. Defaults to `Always` if `:latest` tag is specified, otherwise it is `IfNotPresent`.","type":"string","minLength":1,"enum":["Always","Never","IfNotPresent"],"nullable":true},"ImagePullSecrets":{"description":"A list of references to Kubernetes secrets in the same namespace used for pulling container images.","type":"array","items":{"$ref":"#/components/schemas/ImagePullSecret"},"maxItems":1000,"nullable":true},"ImagePullSecret":{"description":"A reference to a secret in the same namespace used to pull container images.","properties":{"name":{"type":"string","description":"The name of the Kubernetes secret containing the image pull credentials.","pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$","maxLength":253},"userCredential":{"type":"boolean","description":"Indicates whether the secret is a user credential. Set to true if the secret was created by the user and is only accessible by them.","nullable":true},"exclude":{"description":"Use 'true' in case the secret is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"Labels":{"description":"Set of labels to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Label"},"maxItems":1000,"nullable":true},"Label":{"description":"Label details to be populated into the container.","properties":{"name":{"description":"The name of the label (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the label.","type":"string","nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"NimCache":{"description":"The specification of a NIM cache volume.","type":"object","properties":{"name":{"description":"The NIMCache resource name (mandatory).","type":"string","minLength":1,"nullable":true},"profile":{"description":"The NIM profile to use (optional).","type":"string","minLength":1,"nullable":true}},"nullable":true},"NimServicePvcFields":{"properties":{"existingPvc":{"description":"Verify existing PVC. PVC is assumed to exist when set to `true`. If set to `false`, the PVC will be created, if it does not exist.","type":"boolean","default":false,"nullable":true},"claimName":{"description":"Name for the PVC. Allow referencing it across workloads. If not provided, a name based on the workload name and scope will be auto-generated.","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"readOnly":{"description":"Permit only read access to PVC.","type":"boolean","default":false,"nullable":true},"claimInfo":{"$ref":"#/components/schemas/ClaimInfo"}},"nullable":true,"type":"object"},"ClaimInfo":{"description":"Claim information for the newly created PVC. The information should not be provided when attempting to use existing PVC.","properties":{"size":{"$ref":"#/components/schemas/PvcClaimSize"},"storageClass":{"description":"Storage class name to associate with the PVC. This parameter may be omitted if there is a single storage class in the system, or you are using the default storage class. For more information, see [Storage class](https://kubernetes.io/docs/concepts/storage/storage-classes).","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"accessModes":{"$ref":"#/components/schemas/PvcAccessModes"},"volumeMode":{"$ref":"#/components/schemas/PvcVolumeMode"},"addedAttrValues":{"$ref":"#/components/schemas/PvcAddedAttrValues"}},"nullable":true,"type":"object"},"PvcClaimSize":{"description":"Requested size for the PVC. Mandatory when existingPvc is false. Recommended sizes: TB/GB/MB/TIB/GIB/MIB","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"PvcAccessModes":{"description":"Default access mode(s) applied to newly created PVCs unless explicitly overridden.","properties":{"readWriteOnce":{"description":"Mount the volume as read/write by a single node.","type":"boolean","default":true,"nullable":true},"readOnlyMany":{"description":"Mount the volume as read-only by many nodes.","type":"boolean","default":false,"nullable":true},"readWriteMany":{"description":"Mount the volume as read/write by many nodes.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"PvcVolumeMode":{"description":"Default volume mode for the PVC. Choose between Filesystem (default) or Block.","type":"string","enum":["Filesystem","Block"],"nullable":true},"PvcAddedAttrValues":{"description":"an optional array of key-values pairs that are written as annotations on the created PVC. the allowed attributes are determined according to the storage class configuration (see k8s-objects-tracker for further info).","type":"array","items":{"$ref":"#/components/schemas/PvcAddedAttrValue"},"maxItems":1000},"PvcAddedAttrValue":{"type":"object","required":["key"],"properties":{"key":{"type":"string","minLength":1,"maxLength":63,"pattern":"^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$"},"value":{"type":"string","pattern":".*","maxLength":10000}}},"NimServiceMultiNode":{"description":"Defines whether the NIM service runs as a multi-node deployment. If workers is set to 1 or more, the service runs in multi-node.","properties":{"workers":{"$ref":"#/components/schemas/NimServiceWorkers"}},"nullable":true,"type":"object"},"NimServiceWorkers":{"description":"Specifies the number of worker nodes to use when running the NIM service in multi-node.","type":"integer","format":"int32","minimum":1,"maximum":1000,"nullable":true},"NimServiceNgcAuthSecret":{"description":"The name of a Kubernetes secret containing the NGC access credentials. The secret must contain a key named NGC_API_KEY with the API key as the value.","type":"string","pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$","nullable":true},"NodePools":{"description":"A prioritized list of node pools for the scheduler to run the workload on. The scheduler will always try to use the first node pool before moving to the next one if the first is not available.","type":"array","items":{"type":"string","maxLength":63,"pattern":".*"},"maxItems":1000,"nullable":true},"Preemptibility":{"description":"Specifies whether the workload can be preempted by higher-priority workloads. Valid values are preemptible and non-preemptible. If explicitly set, this value takes precedence. If not set, the system derives the preemptibility from the priorityClassName field, ensuring backward compatibility. Each workload type has a default preemptibility. To view the default preemptibility for each workload type, use the GET /workload-types endpoint.","type":"string","minLength":1,"enum":["preemptible","non-preemptible"],"nullable":true},"PriorityClass":{"description":"Specifies the priority class for the workload, which determines its scheduling behavior. Valid values are: very-low, low, medium-low, medium, medium-high, high, and very-high. Each workload type has a default priority. To view the default priority for each workload type, use the GET /workload-types endpoint. Once you change the priority from the default value defined for that workload type, the preemptibility field is not automatically updated. Make sure to set the desired preemptibility value.","type":"string","nullable":true,"pattern":".*"},"Probes":{"description":"Probes are used to determine if the container is healthy and ready to accept traffic.","type":"object","properties":{"readiness":{"$ref":"#/components/schemas/Probe"}},"nullable":true},"Probe":{"type":"object","properties":{"initialDelaySeconds":{"description":"Number of seconds after the container has started before liveness or readiness probes are initiated.","type":"integer","format":"int32","minimum":0,"nullable":true},"periodSeconds":{"description":"How often (in seconds) to perform the probe.","type":"integer","format":"int32","minimum":1,"nullable":true},"timeoutSeconds":{"description":"Number of seconds after which the probe times out.","type":"integer","format":"int32","minimum":1,"nullable":true},"successThreshold":{"description":"Minimum consecutive successes for the probe to be considered successful after having failed.","type":"integer","format":"int32","minimum":1,"nullable":true},"failureThreshold":{"description":"When a probe fails, the number of times to try before giving up.","type":"integer","format":"int32","minimum":1,"nullable":true},"handler":{"$ref":"#/components/schemas/ProbeHandler"}},"nullable":true},"ProbeHandler":{"description":"The action taken to determine the health of the container. (mandatory)","type":"object","properties":{"httpGet":{"description":"An action based on HTTP Get requests.","type":"object","properties":{"path":{"description":"Path to access on the HTTP server, defaults to /.","type":"string","pattern":"^(\\x2F[a-zA-Z0-9\\-_.\\x2F]*)?$","nullable":true,"maxLength":2048},"port":{"description":"Number of the port to access on the container.","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"host":{"description":"Host name to connect to, defaults to the pod IP.","type":"string","format":"hostname","nullable":true,"pattern":".*","maxLength":253},"scheme":{"$ref":"#/components/schemas/ProbeHandlerScheme"}}}},"nullable":true},"ProbeHandlerScheme":{"description":"Scheme to use for connecting to the host, defaults to HTTP.","type":"string","enum":["HTTP","HTTPS"],"nullable":true},"NimServiceReplicas":{"default":1,"description":"The number of replicas to deploy.","type":"integer","format":"int32","minimum":0,"maximum":1000,"nullable":true},"RunAsGid":{"description":"The group id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsGid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"RunAsUid":{"description":"The user id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsUid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"NimServiceServingPort":{"description":"A port for accessing the inference service","properties":{"serviceType":{"$ref":"#/components/schemas/ServingPortServiceType"},"port":{"$ref":"#/components/schemas/ServingPortPort"},"grpcPort":{"$ref":"#/components/schemas/ServingPortGrpcPort"},"metricsPort":{"$ref":"#/components/schemas/ServingPortMetricsPort"},"exposeExternally":{"$ref":"#/components/schemas/ServingPortExposeExternally"},"exposedUrl":{"$ref":"#/components/schemas/ServingPortExposedUrl"},"exposedProtocol":{"$ref":"#/components/schemas/ServingPortExposedProtocol"}},"nullable":true,"type":"object"},"ServingPortServiceType":{"description":"The type of Kubernetes service to create for the inference deployment. Options include 'ClusterIP' (default), 'NodePort', 'LoadBalancer', and 'ExternalName'.","type":"string","default":"ClusterIP","enum":["ClusterIP","NodePort","LoadBalancer","ExternalName"],"nullable":true},"ServingPortPort":{"description":"The port that the container running the inference service exposes (mandatory).","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"ServingPortGrpcPort":{"description":"The GRPC port that the container running the inference service exposes.","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"ServingPortMetricsPort":{"description":"The port where metrics are exposed, required only if it's different than the main port.","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"ServingPortExposeExternally":{"description":"Indicates whether the inference serving endpoint should be accessible outside the cluster. If set to true, the endpoint will be exposed externally. To enable external access, your administrator must configure the cluster as described in the [inference requirements](https://run-ai-docs.nvidia.com/self-hosted/2.24/getting-started/installation/system-requirements#inference). section.","type":"boolean","nullable":true,"default":true},"ServingPortExposedUrl":{"description":"The custom URL to use for the serving port. If empty (default), an autogenerated URL will be used.","type":"string","nullable":true,"pattern":".*"},"ServingPortExposedProtocol":{"description":"The protocol to use for the exposed URL. If grpcPort is set, this defaults to grpc. Otherwise, it defaults to http.","type":"string","enum":["http","grpc"],"nullable":true},"Tolerations":{"description":"Set of tolerations to apply to the workload.","type":"array","items":{"$ref":"#/components/schemas/Toleration"},"maxItems":1000,"nullable":true},"Toleration":{"description":"Toleration details.","properties":{"name":{"description":"The name of the toleration.","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"operator":{"$ref":"#/components/schemas/TolerationOperator"},"key":{"description":"The taint key that the toleration applies to. (mandatory)","type":"string","maxLength":253,"nullable":true,"pattern":".*"},"value":{"description":"The taint value the toleration matches to. Mandatory if operator is Exists, forbidden otherwise.","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"effect":{"$ref":"#/components/schemas/TolerationEffect"},"seconds":{"description":"The period of time the toleration tolerates the taint. Valid only if effect is NoExecute. taint.","type":"integer","minimum":1,"nullable":true},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"TolerationOperator":{"description":"A key's relationship to the value. Equal uses key and value. Exists is equivalent to wildcard for value, so that a workload can tolerate all taints of a particular category. (mandatory)","type":"string","enum":["Equal","Exists"],"nullable":true},"TolerationEffect":{"description":"The taint effect to match. (mandatory)","type":"string","enum":["NoSchedule","NoExecute","PreferNoSchedule","Any"],"nullable":true},"NimServiceResponse":{"type":"object","required":["spec","metadata","desiredPhase"],"properties":{"metadata":{"$ref":"#/components/schemas/WorkloadV2Metadata"},"desiredPhase":{"$ref":"#/components/schemas/DesiredPhase"},"spec":{"$ref":"#/components/schemas/NimServiceSpec"}}},"WorkloadV2Metadata":{"allOf":[{"$ref":"#/components/schemas/WorkloadV2MetadataResponse"},{"$ref":"#/components/schemas/WorkloadV2MetadataAutoFill"}]},"WorkloadV2MetadataResponse":{"type":"object","required":["name","projectId"],"properties":{"name":{"$ref":"#/components/schemas/WorkloadName"},"projectId":{"$ref":"#/components/schemas/ProjectId"},"priority":{"$ref":"#/components/schemas/PriorityClass"},"category":{"$ref":"#/components/schemas/Category"},"preemptibility":{"$ref":"#/components/schemas/Preemptibility"},"configuration":{"$ref":"#/components/schemas/WorkloadV2Configuration"}}},"WorkloadV2Configuration":{"type":"object","description":"Specifies the features configuration to apply to a workload.","properties":{"mnnvl":{"allOf":[{"$ref":"#/components/schemas/MnnvlMode"},{"type":"string","default":"None","nullable":true}]}}},"MnnvlMode":{"type":"string","enum":["Required","None"],"description":"MNNVL (Multi-Node NVLink) mode:\n- Required: The workload is configured to use MNNVL acceleration. The workload type and at least one project’s node pool must support MNNVL, or the request fails. When set, the workload is scheduled only on compatible nodes, and may remain pending until sufficient MNNVL-capable capacity is available.\n- None: The workload will not use MNNVL acceleration, even if MNNVL-capable nodes are available.\n"},"WorkloadV2MetadataAutoFill":{"type":"object","required":["id","gvk","projectName","clusterId","tenantId","departmentId","departmentName","createdAt","createdBy","updatedAt","updatedBy"],"properties":{"id":{"$ref":"#/components/schemas/WorkloadId3"},"gvk":{"$ref":"#/components/schemas/GVK"},"projectName":{"$ref":"#/components/schemas/ProjectName2"},"clusterId":{"$ref":"#/components/schemas/ClusterId"},"tenantId":{"$ref":"#/components/schemas/TenantId"},"departmentId":{"$ref":"#/components/schemas/DepartmentId3"},"departmentName":{"$ref":"#/components/schemas/DepartmentName1"},"createdAt":{"type":"string","format":"date-time","description":"The timestamp for when the workload was created."},"createdBy":{"type":"string","description":"Identifier of the user who created the workload.","maxLength":250,"format":".*"},"updatedAt":{"type":"string","format":"date-time","description":"The timestamp for the last time the workload was updated."},"updatedBy":{"type":"string","description":"Identifier of the user who last updated the workload.","maxLength":250,"format":".*"},"deletedAt":{"type":"string","format":"date-time","description":"The timestamp indicating when the workload was deleted.","nullable":true},"deletedBy":{"type":"string","maxLength":250,"format":".*","description":"Identifier of the user who deleted the workload.","nullable":true}}},"WorkloadId3":{"description":"A unique ID of the workload.","type":"string","format":"uuid"},"GVK":{"type":"object","description":"Specifies the Group, Version, and Kind (GVK) of the Kubernetes resource that defines the workload.","required":["group","version","kind"],"properties":{"group":{"description":"The API group of the Kubernetes resource.","type":"string","maxLength":253},"version":{"description":"The API version of the resource within the specified group.","type":"string","maxLength":250},"kind":{"description":"The type of Kubernetes resource being referenced.","type":"string","maxLength":250}}},"ProjectName2":{"type":"string","description":"The name of the project","minLength":1,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"ClusterId":{"description":"The id of the cluster.","type":"string","format":"uuid"},"TenantId":{"description":"The id of the tenant.","type":"integer","format":"int32"},"DepartmentId3":{"description":"The id of the department.","type":"string","minLength":1,"pattern":".*"},"DepartmentName1":{"type":"string","description":"The name of the department","minLength":1,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"DesiredPhase":{"description":"The desired phase of the workload.","type":"string","enum":["Running","Stopped","Deleted"]},"SubmissionErrorV2":{"allOf":[{"$ref":"#/components/schemas/Error"},{"$ref":"#/components/schemas/ComplianceIssuesV2"}]},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}},"ComplianceIssuesV2":{"properties":{"complianceIssues":{"type":"array","items":{"type":"object","required":["details","field"],"properties":{"field":{"type":"string"},"details":{"type":"string"},"rule":{"$ref":"#/components/schemas/PolicyRuleEnum"}}}}},"nullable":true,"type":"object"},"PolicyRuleEnum":{"description":"Indicates which validation rule (e.g., min, max, step, options, required, canEdit, canAdd) conflicted with policy restrictions, causing the asset or template to be rejected.","type":"string","nullable":true,"enum":["min","max","step","options","required","canEdit","canAdd","locked","blocked"]}},"responses":{"400SubmissionErrorV2":{"description":"Bad submission request.","content":{"application/json":{"schema":{"$ref":"#/components/schemas/SubmissionErrorV2"}}}},"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"409Conflict":{"description":"The specified resource already exists","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v2/workloads/nim-services":{"post":{"summary":"Create a NVIDIA NIM service. [Experimental]","description":"Create a NVIDIA NIM service","operationId":"create_nim_service","tags":["NVIDIA NIM"],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/NimServiceCreateRequest"}}}},"responses":{"202":{"description":"Workload creation accepted","content":{"application/json":{"schema":{"$ref":"#/components/schemas/NimServiceResponse"}}}},"400":{"$ref":"#/components/responses/400SubmissionErrorV2"},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"409":{"$ref":"#/components/responses/409Conflict"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Get a NVIDIA NIM service. \[Experimental]

> Retrieve details of a specific NVIDIA NIM service, by id

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.24"},"tags":[{"name":"NVIDIA NIM","description":"The NVIDIA NIM API provides endpoints to create and manage workloads that deploy NVIDIA Inference Microservices (NIM) through the NIM Operator. These workloads package optimized NVIDIA model servers and run as managed services on the NVIDIA Run:ai platform.\nEach request includes NVIDIA Run:ai scheduling metadata (for example, project, priority, and category) and a NIM service specification that defines the container image, compute resources, environment variables, storage, and networking configuration. Once submitted, NVIDIA Run:ai handles scheduling, orchestration, and lifecycle management of the NIM service to ensure reliable and efficient model serving.\n"}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadV2Id":{"name":"WorkloadV2Id","in":"path","required":true,"schema":{"type":"string","format":"uuid","description":"The ID of the workload."}}},"schemas":{"NimServiceResponse":{"type":"object","required":["spec","metadata","desiredPhase"],"properties":{"metadata":{"$ref":"#/components/schemas/WorkloadV2Metadata"},"desiredPhase":{"$ref":"#/components/schemas/DesiredPhase"},"spec":{"$ref":"#/components/schemas/NimServiceSpec"}}},"WorkloadV2Metadata":{"allOf":[{"$ref":"#/components/schemas/WorkloadV2MetadataResponse"},{"$ref":"#/components/schemas/WorkloadV2MetadataAutoFill"}]},"WorkloadV2MetadataResponse":{"type":"object","required":["name","projectId"],"properties":{"name":{"$ref":"#/components/schemas/WorkloadName"},"projectId":{"$ref":"#/components/schemas/ProjectId"},"priority":{"$ref":"#/components/schemas/PriorityClass"},"category":{"$ref":"#/components/schemas/Category"},"preemptibility":{"$ref":"#/components/schemas/Preemptibility"},"configuration":{"$ref":"#/components/schemas/WorkloadV2Configuration"}}},"WorkloadName":{"description":"The name of the workload.","type":"string","minLength":1,"pattern":".*"},"ProjectId":{"description":"The id of the project.","type":"string","pattern":".*"},"PriorityClass":{"description":"Specifies the priority class for the workload, which determines its scheduling behavior. Valid values are: very-low, low, medium-low, medium, medium-high, high, and very-high. Each workload type has a default priority. To view the default priority for each workload type, use the GET /workload-types endpoint. Once you change the priority from the default value defined for that workload type, the preemptibility field is not automatically updated. Make sure to set the desired preemptibility value.","type":"string","nullable":true,"pattern":".*"},"Category":{"description":"Specify the workload category assigned to the workload. Categories are used to classify and monitor different types of workloads within the NVIDIA Run:ai platform.","type":"string","nullable":true,"pattern":".*"},"Preemptibility":{"description":"Specifies whether the workload can be preempted by higher-priority workloads. Valid values are preemptible and non-preemptible. If explicitly set, this value takes precedence. If not set, the system derives the preemptibility from the priorityClassName field, ensuring backward compatibility. Each workload type has a default preemptibility. To view the default preemptibility for each workload type, use the GET /workload-types endpoint.","type":"string","minLength":1,"enum":["preemptible","non-preemptible"],"nullable":true},"WorkloadV2Configuration":{"type":"object","description":"Specifies the features configuration to apply to a workload.","properties":{"mnnvl":{"allOf":[{"$ref":"#/components/schemas/MnnvlMode"},{"type":"string","default":"None","nullable":true}]}}},"MnnvlMode":{"type":"string","enum":["Required","None"],"description":"MNNVL (Multi-Node NVLink) mode:\n- Required: The workload is configured to use MNNVL acceleration. The workload type and at least one project’s node pool must support MNNVL, or the request fails. When set, the workload is scheduled only on compatible nodes, and may remain pending until sufficient MNNVL-capable capacity is available.\n- None: The workload will not use MNNVL acceleration, even if MNNVL-capable nodes are available.\n"},"WorkloadV2MetadataAutoFill":{"type":"object","required":["id","gvk","projectName","clusterId","tenantId","departmentId","departmentName","createdAt","createdBy","updatedAt","updatedBy"],"properties":{"id":{"$ref":"#/components/schemas/WorkloadId3"},"gvk":{"$ref":"#/components/schemas/GVK"},"projectName":{"$ref":"#/components/schemas/ProjectName2"},"clusterId":{"$ref":"#/components/schemas/ClusterId"},"tenantId":{"$ref":"#/components/schemas/TenantId"},"departmentId":{"$ref":"#/components/schemas/DepartmentId3"},"departmentName":{"$ref":"#/components/schemas/DepartmentName1"},"createdAt":{"type":"string","format":"date-time","description":"The timestamp for when the workload was created."},"createdBy":{"type":"string","description":"Identifier of the user who created the workload.","maxLength":250,"format":".*"},"updatedAt":{"type":"string","format":"date-time","description":"The timestamp for the last time the workload was updated."},"updatedBy":{"type":"string","description":"Identifier of the user who last updated the workload.","maxLength":250,"format":".*"},"deletedAt":{"type":"string","format":"date-time","description":"The timestamp indicating when the workload was deleted.","nullable":true},"deletedBy":{"type":"string","maxLength":250,"format":".*","description":"Identifier of the user who deleted the workload.","nullable":true}}},"WorkloadId3":{"description":"A unique ID of the workload.","type":"string","format":"uuid"},"GVK":{"type":"object","description":"Specifies the Group, Version, and Kind (GVK) of the Kubernetes resource that defines the workload.","required":["group","version","kind"],"properties":{"group":{"description":"The API group of the Kubernetes resource.","type":"string","maxLength":253},"version":{"description":"The API version of the resource within the specified group.","type":"string","maxLength":250},"kind":{"description":"The type of Kubernetes resource being referenced.","type":"string","maxLength":250}}},"ProjectName2":{"type":"string","description":"The name of the project","minLength":1,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"ClusterId":{"description":"The id of the cluster.","type":"string","format":"uuid"},"TenantId":{"description":"The id of the tenant.","type":"integer","format":"int32"},"DepartmentId3":{"description":"The id of the department.","type":"string","minLength":1,"pattern":".*"},"DepartmentName1":{"type":"string","description":"The name of the department","minLength":1,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"DesiredPhase":{"description":"The desired phase of the workload.","type":"string","enum":["Running","Stopped","Deleted"]},"NimServiceSpec":{"allOf":[{"nullable":true,"properties":{"annotations":{"$ref":"#/components/schemas/Annotations"},"autoscaling":{"nullable":true,"properties":{"maxReplicas":{"$ref":"#/components/schemas/AutoScalingMaxReplicas"},"metric":{"$ref":"#/components/schemas/AutoScalingMetricNim"},"metricThreshold":{"$ref":"#/components/schemas/AutoScalingMetricThreshold"},"minReplicas":{"$ref":"#/components/schemas/AutoScalingMinReplicas"},"scaleWindowSeconds":{"$ref":"#/components/schemas/AutoScalingScaleWindowSeconds"}},"type":"object"},"category":{"$ref":"#/components/schemas/Category"},"compute":{"nullable":true,"properties":{"cpuCoreLimit":{"$ref":"#/components/schemas/CpuCoreLimit"},"cpuCoreRequest":{"$ref":"#/components/schemas/CpuCoreRequest"},"cpuMemoryLimit":{"$ref":"#/components/schemas/CpuMemoryLimit"},"cpuMemoryRequest":{"$ref":"#/components/schemas/CpuMemoryRequest"},"gpuDevicesRequest":{"$ref":"#/components/schemas/GpuDevicesRequest"},"gpuMemoryLimit":{"$ref":"#/components/schemas/GpuMemoryLimit"},"gpuMemoryRequest":{"$ref":"#/components/schemas/GpuMemoryRequest"},"gpuPortionLimit":{"$ref":"#/components/schemas/GpuPortionLimit"},"gpuPortionRequest":{"$ref":"#/components/schemas/GpuPortionRequest"},"gpuRequestType":{"$ref":"#/components/schemas/GpuRequestType"}},"type":"object"},"environmentVariables":{"$ref":"#/components/schemas/EnvironmentVariables"},"image":{"$ref":"#/components/schemas/Image"},"imagePullPolicy":{"$ref":"#/components/schemas/ImagePullPolicy"},"imagePullSecrets":{"$ref":"#/components/schemas/ImagePullSecrets"},"labels":{"$ref":"#/components/schemas/Labels"},"modelStore":{"nullable":true,"properties":{"nimCache":{"$ref":"#/components/schemas/NimCache"},"pvc":{"$ref":"#/components/schemas/NimServicePvcFields"}},"type":"object"},"multiNode":{"$ref":"#/components/schemas/NimServiceMultiNode"},"ngcAuthSecret":{"$ref":"#/components/schemas/NimServiceNgcAuthSecret"},"nodePools":{"$ref":"#/components/schemas/NodePools"},"preemptibility":{"$ref":"#/components/schemas/Preemptibility"},"priorityClass":{"$ref":"#/components/schemas/PriorityClass"},"probes":{"$ref":"#/components/schemas/Probes"},"replicas":{"$ref":"#/components/schemas/NimServiceReplicas"},"security":{"nullable":true,"properties":{"runAsGid":{"$ref":"#/components/schemas/RunAsGid"},"runAsUid":{"$ref":"#/components/schemas/RunAsUid"}},"type":"object"},"servingPort":{"$ref":"#/components/schemas/NimServiceServingPort"},"tolerations":{"$ref":"#/components/schemas/Tolerations"}},"type":"object"}]},"Annotations":{"description":"Set of annotations to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Annotation"},"maxItems":1000,"nullable":true},"Annotation":{"description":"Annotation details to be populated into the container.","properties":{"name":{"description":"The name of the annotation (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the annotation.","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the annotation is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"AutoScalingMaxReplicas":{"description":"The maximum number of replicas for autoscaling. Defaults to minReplicas. Must be no less than minReplicas.","type":"integer","format":"int32","minimum":1,"nullable":true},"AutoScalingMetricNim":{"description":"The metric to use for autoscaling (mandatory).","type":"string","pattern":"^[a-zA-Z_:][a-zA-Z0-9_:]*$","nullable":true},"AutoScalingMetricThreshold":{"description":"The threshold to use with the specified metric for autoscaling (mandatory).","type":"integer","format":"int32","nullable":true},"AutoScalingMinReplicas":{"description":"The minimum number of replicas for autoscaling. Defaults to 1.","type":"integer","format":"int32","minimum":1,"nullable":true},"AutoScalingScaleWindowSeconds":{"description":"The time window for autoscaling decisions, in seconds. Defaults to 300 seconds.","type":"integer","format":"int32","minimum":60,"maximum":3600,"nullable":true},"CpuCoreLimit":{"description":"Limitations on the number of CPUs consumed by the workload (0.5, 1, .etc). The system guarantees that this workload will not be able to consume more than this amount of CPUs.","format":"double","type":"number","nullable":true,"minimum":0},"CpuCoreRequest":{"description":"CPU units to allocate for the created workload (0.5, 1, .etc). The workload will receive at least this amount of CPU. Note that the workload will not be scheduled unless the system can guarantee this amount of CPUs to the workload.","format":"double","type":"number","nullable":true,"minimum":0},"CpuMemoryLimit":{"description":"Limitations on the CPU memory to allocate for this workload (1G, 20M, .etc). The system guarantees that this workload will not be able to consume more than this amount of memory. The workload will receive an error when trying to allocate more memory than this limit.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"CpuMemoryRequest":{"description":"The amount of CPU memory to allocate for this workload (1G, 20M, .etc). The workload will receive at least this amount of memory. Note that the workload will not be scheduled unless the system can guarantee this amount of memory to the workload","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"GpuDevicesRequest":{"description":"Requested number of GPU devices. Currently if more than one device is requested, it is not possible to provide values for gpuMemory or gpuPortion.","type":"integer","format":"int32","nullable":true,"minimum":0},"GpuMemoryLimit":{"description":"Limitation on the memory consumed by the workload, per GPU device. The system guarantees The gpuMemoryLimit must be no less than gpuMemoryRequest.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"GpuMemoryRequest":{"description":"Required if and only if gpuRequestType is memory. States the GPU memory to allocate for the created workload, per GPU device. Note that the workload will not be scheduled unless the system can guarantee this amount of GPU memory to the workload.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"GpuPortionLimit":{"description":"Limitations on the portion consumed by the workload, per GPU device. The system guarantees The gpuPotionLimit must be no less than the gpuPortionRequest.","type":"number","format":"double","nullable":true,"minimum":0},"GpuPortionRequest":{"description":"Required if and only if gpuRequestType is portion. States the portion of the GPU to allocate for the created workload, per GPU device, between 0 and 1. The default is no allocated GPUs.","type":"number","format":"double","nullable":true,"minimum":0},"GpuRequestType":{"description":"Sets the unit type for GPU resources requests. Stated in terms of portion or memory. Sets the unit type for other GPU request fields. If `gpuDevicesRequest > 1`, only `portion` is supported. If `gpuDeviceRequest = 1`, the request type can be stated as `portion` or `memory`.","type":"string","minLength":1,"enum":["portion","memory"],"nullable":true},"EnvironmentVariables":{"description":"Set of environment variables to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/EnvironmentVariable"},"maxItems":1000,"nullable":true},"EnvironmentVariable":{"description":"Details of an environment variable which is populated into the container.","properties":{"name":{"description":"The name of the environment variable. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the environment variable. (mutually exclusive with secret, userCredential, configMap and podFieldRef)","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"secret":{"$ref":"#/components/schemas/EnvironmentVariableSecret"},"configMap":{"$ref":"#/components/schemas/EnvironmentVariableConfigMap"},"podFieldRef":{"$ref":"#/components/schemas/EnvironmentVariablePodFieldReference"},"userCredential":{"$ref":"#/components/schemas/EnvironmentVariableUserCredential"},"exclude":{"description":"Use 'true' in case the environment variable is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true},"description":{"description":"Description of the environment variable.","type":"string","maxLength":250,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableSecret":{"description":"Details of the secret and key use to populate the environment variable","properties":{"name":{"description":"The name of the secret resource. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"key":{"description":"The key in the secret resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableConfigMap":{"description":"Details of the configMap and key use to populate the environment variable","properties":{"name":{"description":"The name of the config-map resource. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"key":{"description":"The key in the config-map resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariablePodFieldReference":{"description":"Details of the field-reference and key use to populate the environment variable","properties":{"path":{"description":"The field path resource. (mandatory)","type":"string","minLength":1,"maxLength":10000,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableUserCredential":{"description":"Defines a reference to a user-created credential and a specific key within that credential whose value will populate the environment variable. User credentials can only be accessed by the user who created them.","properties":{"name":{"description":"The name of the user credential.  (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true},"key":{"description":"The key in the user credential resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true}},"nullable":true,"type":"object"},"Image":{"description":"Docker image name. For more information, see [Images](https://kubernetes.io/docs/concepts/containers/images). The image name is mandatory for creating a workload.","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"ImagePullPolicy":{"description":"Image pull policy. Defaults to `Always` if `:latest` tag is specified, otherwise it is `IfNotPresent`.","type":"string","minLength":1,"enum":["Always","Never","IfNotPresent"],"nullable":true},"ImagePullSecrets":{"description":"A list of references to Kubernetes secrets in the same namespace used for pulling container images.","type":"array","items":{"$ref":"#/components/schemas/ImagePullSecret"},"maxItems":1000,"nullable":true},"ImagePullSecret":{"description":"A reference to a secret in the same namespace used to pull container images.","properties":{"name":{"type":"string","description":"The name of the Kubernetes secret containing the image pull credentials.","pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$","maxLength":253},"userCredential":{"type":"boolean","description":"Indicates whether the secret is a user credential. Set to true if the secret was created by the user and is only accessible by them.","nullable":true},"exclude":{"description":"Use 'true' in case the secret is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"Labels":{"description":"Set of labels to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Label"},"maxItems":1000,"nullable":true},"Label":{"description":"Label details to be populated into the container.","properties":{"name":{"description":"The name of the label (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the label.","type":"string","nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"NimCache":{"description":"The specification of a NIM cache volume.","type":"object","properties":{"name":{"description":"The NIMCache resource name (mandatory).","type":"string","minLength":1,"nullable":true},"profile":{"description":"The NIM profile to use (optional).","type":"string","minLength":1,"nullable":true}},"nullable":true},"NimServicePvcFields":{"properties":{"existingPvc":{"description":"Verify existing PVC. PVC is assumed to exist when set to `true`. If set to `false`, the PVC will be created, if it does not exist.","type":"boolean","default":false,"nullable":true},"claimName":{"description":"Name for the PVC. Allow referencing it across workloads. If not provided, a name based on the workload name and scope will be auto-generated.","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"readOnly":{"description":"Permit only read access to PVC.","type":"boolean","default":false,"nullable":true},"claimInfo":{"$ref":"#/components/schemas/ClaimInfo"}},"nullable":true,"type":"object"},"ClaimInfo":{"description":"Claim information for the newly created PVC. The information should not be provided when attempting to use existing PVC.","properties":{"size":{"$ref":"#/components/schemas/PvcClaimSize"},"storageClass":{"description":"Storage class name to associate with the PVC. This parameter may be omitted if there is a single storage class in the system, or you are using the default storage class. For more information, see [Storage class](https://kubernetes.io/docs/concepts/storage/storage-classes).","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"accessModes":{"$ref":"#/components/schemas/PvcAccessModes"},"volumeMode":{"$ref":"#/components/schemas/PvcVolumeMode"},"addedAttrValues":{"$ref":"#/components/schemas/PvcAddedAttrValues"}},"nullable":true,"type":"object"},"PvcClaimSize":{"description":"Requested size for the PVC. Mandatory when existingPvc is false. Recommended sizes: TB/GB/MB/TIB/GIB/MIB","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"PvcAccessModes":{"description":"Default access mode(s) applied to newly created PVCs unless explicitly overridden.","properties":{"readWriteOnce":{"description":"Mount the volume as read/write by a single node.","type":"boolean","default":true,"nullable":true},"readOnlyMany":{"description":"Mount the volume as read-only by many nodes.","type":"boolean","default":false,"nullable":true},"readWriteMany":{"description":"Mount the volume as read/write by many nodes.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"PvcVolumeMode":{"description":"Default volume mode for the PVC. Choose between Filesystem (default) or Block.","type":"string","enum":["Filesystem","Block"],"nullable":true},"PvcAddedAttrValues":{"description":"an optional array of key-values pairs that are written as annotations on the created PVC. the allowed attributes are determined according to the storage class configuration (see k8s-objects-tracker for further info).","type":"array","items":{"$ref":"#/components/schemas/PvcAddedAttrValue"},"maxItems":1000},"PvcAddedAttrValue":{"type":"object","required":["key"],"properties":{"key":{"type":"string","minLength":1,"maxLength":63,"pattern":"^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$"},"value":{"type":"string","pattern":".*","maxLength":10000}}},"NimServiceMultiNode":{"description":"Defines whether the NIM service runs as a multi-node deployment. If workers is set to 1 or more, the service runs in multi-node.","properties":{"workers":{"$ref":"#/components/schemas/NimServiceWorkers"}},"nullable":true,"type":"object"},"NimServiceWorkers":{"description":"Specifies the number of worker nodes to use when running the NIM service in multi-node.","type":"integer","format":"int32","minimum":1,"maximum":1000,"nullable":true},"NimServiceNgcAuthSecret":{"description":"The name of a Kubernetes secret containing the NGC access credentials. The secret must contain a key named NGC_API_KEY with the API key as the value.","type":"string","pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$","nullable":true},"NodePools":{"description":"A prioritized list of node pools for the scheduler to run the workload on. The scheduler will always try to use the first node pool before moving to the next one if the first is not available.","type":"array","items":{"type":"string","maxLength":63,"pattern":".*"},"maxItems":1000,"nullable":true},"Probes":{"description":"Probes are used to determine if the container is healthy and ready to accept traffic.","type":"object","properties":{"readiness":{"$ref":"#/components/schemas/Probe"}},"nullable":true},"Probe":{"type":"object","properties":{"initialDelaySeconds":{"description":"Number of seconds after the container has started before liveness or readiness probes are initiated.","type":"integer","format":"int32","minimum":0,"nullable":true},"periodSeconds":{"description":"How often (in seconds) to perform the probe.","type":"integer","format":"int32","minimum":1,"nullable":true},"timeoutSeconds":{"description":"Number of seconds after which the probe times out.","type":"integer","format":"int32","minimum":1,"nullable":true},"successThreshold":{"description":"Minimum consecutive successes for the probe to be considered successful after having failed.","type":"integer","format":"int32","minimum":1,"nullable":true},"failureThreshold":{"description":"When a probe fails, the number of times to try before giving up.","type":"integer","format":"int32","minimum":1,"nullable":true},"handler":{"$ref":"#/components/schemas/ProbeHandler"}},"nullable":true},"ProbeHandler":{"description":"The action taken to determine the health of the container. (mandatory)","type":"object","properties":{"httpGet":{"description":"An action based on HTTP Get requests.","type":"object","properties":{"path":{"description":"Path to access on the HTTP server, defaults to /.","type":"string","pattern":"^(\\x2F[a-zA-Z0-9\\-_.\\x2F]*)?$","nullable":true,"maxLength":2048},"port":{"description":"Number of the port to access on the container.","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"host":{"description":"Host name to connect to, defaults to the pod IP.","type":"string","format":"hostname","nullable":true,"pattern":".*","maxLength":253},"scheme":{"$ref":"#/components/schemas/ProbeHandlerScheme"}}}},"nullable":true},"ProbeHandlerScheme":{"description":"Scheme to use for connecting to the host, defaults to HTTP.","type":"string","enum":["HTTP","HTTPS"],"nullable":true},"NimServiceReplicas":{"default":1,"description":"The number of replicas to deploy.","type":"integer","format":"int32","minimum":0,"maximum":1000,"nullable":true},"RunAsGid":{"description":"The group id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsGid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"RunAsUid":{"description":"The user id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsUid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"NimServiceServingPort":{"description":"A port for accessing the inference service","properties":{"serviceType":{"$ref":"#/components/schemas/ServingPortServiceType"},"port":{"$ref":"#/components/schemas/ServingPortPort"},"grpcPort":{"$ref":"#/components/schemas/ServingPortGrpcPort"},"metricsPort":{"$ref":"#/components/schemas/ServingPortMetricsPort"},"exposeExternally":{"$ref":"#/components/schemas/ServingPortExposeExternally"},"exposedUrl":{"$ref":"#/components/schemas/ServingPortExposedUrl"},"exposedProtocol":{"$ref":"#/components/schemas/ServingPortExposedProtocol"}},"nullable":true,"type":"object"},"ServingPortServiceType":{"description":"The type of Kubernetes service to create for the inference deployment. Options include 'ClusterIP' (default), 'NodePort', 'LoadBalancer', and 'ExternalName'.","type":"string","default":"ClusterIP","enum":["ClusterIP","NodePort","LoadBalancer","ExternalName"],"nullable":true},"ServingPortPort":{"description":"The port that the container running the inference service exposes (mandatory).","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"ServingPortGrpcPort":{"description":"The GRPC port that the container running the inference service exposes.","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"ServingPortMetricsPort":{"description":"The port where metrics are exposed, required only if it's different than the main port.","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"ServingPortExposeExternally":{"description":"Indicates whether the inference serving endpoint should be accessible outside the cluster. If set to true, the endpoint will be exposed externally. To enable external access, your administrator must configure the cluster as described in the [inference requirements](https://run-ai-docs.nvidia.com/self-hosted/2.24/getting-started/installation/system-requirements#inference). section.","type":"boolean","nullable":true,"default":true},"ServingPortExposedUrl":{"description":"The custom URL to use for the serving port. If empty (default), an autogenerated URL will be used.","type":"string","nullable":true,"pattern":".*"},"ServingPortExposedProtocol":{"description":"The protocol to use for the exposed URL. If grpcPort is set, this defaults to grpc. Otherwise, it defaults to http.","type":"string","enum":["http","grpc"],"nullable":true},"Tolerations":{"description":"Set of tolerations to apply to the workload.","type":"array","items":{"$ref":"#/components/schemas/Toleration"},"maxItems":1000,"nullable":true},"Toleration":{"description":"Toleration details.","properties":{"name":{"description":"The name of the toleration.","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"operator":{"$ref":"#/components/schemas/TolerationOperator"},"key":{"description":"The taint key that the toleration applies to. (mandatory)","type":"string","maxLength":253,"nullable":true,"pattern":".*"},"value":{"description":"The taint value the toleration matches to. Mandatory if operator is Exists, forbidden otherwise.","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"effect":{"$ref":"#/components/schemas/TolerationEffect"},"seconds":{"description":"The period of time the toleration tolerates the taint. Valid only if effect is NoExecute. taint.","type":"integer","minimum":1,"nullable":true},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"TolerationOperator":{"description":"A key's relationship to the value. Equal uses key and value. Exists is equivalent to wildcard for value, so that a workload can tolerate all taints of a particular category. (mandatory)","type":"string","enum":["Equal","Exists"],"nullable":true},"TolerationEffect":{"description":"The taint effect to match. (mandatory)","type":"string","enum":["NoSchedule","NoExecute","PreferNoSchedule","Any"],"nullable":true},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v2/workloads/nim-services/{WorkloadV2Id}":{"get":{"summary":"Get a NVIDIA NIM service. [Experimental]","description":"Retrieve details of a specific NVIDIA NIM service, by id","operationId":"get_nim_service_by_id","tags":["NVIDIA NIM"],"parameters":[{"$ref":"#/components/parameters/WorkloadV2Id"}],"responses":{"200":{"description":"Successfully retrieved the workload","content":{"application/json":{"schema":{"$ref":"#/components/schemas/NimServiceResponse"}}}},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```

## Update NVIDIA NIM service spec. \[Experimental]

> Update the specification of an existing NVIDIA NIM service.

```json
{"openapi":"3.0.3","info":{"title":"NVIDIA Run:ai","version":"2.24"},"tags":[{"name":"NVIDIA NIM","description":"The NVIDIA NIM API provides endpoints to create and manage workloads that deploy NVIDIA Inference Microservices (NIM) through the NIM Operator. These workloads package optimized NVIDIA model servers and run as managed services on the NVIDIA Run:ai platform.\nEach request includes NVIDIA Run:ai scheduling metadata (for example, project, priority, and category) and a NIM service specification that defines the container image, compute resources, environment variables, storage, and networking configuration. Once submitted, NVIDIA Run:ai handles scheduling, orchestration, and lifecycle management of the NIM service to ensure reliable and efficient model serving.\n"}],"servers":[{"url":"https://app.run.ai"}],"security":[{"bearerAuth":[]}],"components":{"securitySchemes":{"bearerAuth":{"type":"http","scheme":"bearer","bearerFormat":"JWT","description":"Bearer authentication"}},"parameters":{"WorkloadV2Id":{"name":"WorkloadV2Id","in":"path","required":true,"schema":{"type":"string","format":"uuid","description":"The ID of the workload."}}},"schemas":{"NimServiceUpdateRequest":{"type":"object","required":["spec"],"properties":{"spec":{"$ref":"#/components/schemas/NimServiceSpec"},"fieldsToRemove":{"type":"array","description":"List of field names to remove from the NIM service spec.\nNote: replicas and autoscaling are mutually exclusive - adding one automatically removes the other.\n","items":{"$ref":"#/components/schemas/NimServiceRemovableFields"}}}},"NimServiceSpec":{"allOf":[{"nullable":true,"properties":{"annotations":{"$ref":"#/components/schemas/Annotations"},"autoscaling":{"nullable":true,"properties":{"maxReplicas":{"$ref":"#/components/schemas/AutoScalingMaxReplicas"},"metric":{"$ref":"#/components/schemas/AutoScalingMetricNim"},"metricThreshold":{"$ref":"#/components/schemas/AutoScalingMetricThreshold"},"minReplicas":{"$ref":"#/components/schemas/AutoScalingMinReplicas"},"scaleWindowSeconds":{"$ref":"#/components/schemas/AutoScalingScaleWindowSeconds"}},"type":"object"},"category":{"$ref":"#/components/schemas/Category"},"compute":{"nullable":true,"properties":{"cpuCoreLimit":{"$ref":"#/components/schemas/CpuCoreLimit"},"cpuCoreRequest":{"$ref":"#/components/schemas/CpuCoreRequest"},"cpuMemoryLimit":{"$ref":"#/components/schemas/CpuMemoryLimit"},"cpuMemoryRequest":{"$ref":"#/components/schemas/CpuMemoryRequest"},"gpuDevicesRequest":{"$ref":"#/components/schemas/GpuDevicesRequest"},"gpuMemoryLimit":{"$ref":"#/components/schemas/GpuMemoryLimit"},"gpuMemoryRequest":{"$ref":"#/components/schemas/GpuMemoryRequest"},"gpuPortionLimit":{"$ref":"#/components/schemas/GpuPortionLimit"},"gpuPortionRequest":{"$ref":"#/components/schemas/GpuPortionRequest"},"gpuRequestType":{"$ref":"#/components/schemas/GpuRequestType"}},"type":"object"},"environmentVariables":{"$ref":"#/components/schemas/EnvironmentVariables"},"image":{"$ref":"#/components/schemas/Image"},"imagePullPolicy":{"$ref":"#/components/schemas/ImagePullPolicy"},"imagePullSecrets":{"$ref":"#/components/schemas/ImagePullSecrets"},"labels":{"$ref":"#/components/schemas/Labels"},"modelStore":{"nullable":true,"properties":{"nimCache":{"$ref":"#/components/schemas/NimCache"},"pvc":{"$ref":"#/components/schemas/NimServicePvcFields"}},"type":"object"},"multiNode":{"$ref":"#/components/schemas/NimServiceMultiNode"},"ngcAuthSecret":{"$ref":"#/components/schemas/NimServiceNgcAuthSecret"},"nodePools":{"$ref":"#/components/schemas/NodePools"},"preemptibility":{"$ref":"#/components/schemas/Preemptibility"},"priorityClass":{"$ref":"#/components/schemas/PriorityClass"},"probes":{"$ref":"#/components/schemas/Probes"},"replicas":{"$ref":"#/components/schemas/NimServiceReplicas"},"security":{"nullable":true,"properties":{"runAsGid":{"$ref":"#/components/schemas/RunAsGid"},"runAsUid":{"$ref":"#/components/schemas/RunAsUid"}},"type":"object"},"servingPort":{"$ref":"#/components/schemas/NimServiceServingPort"},"tolerations":{"$ref":"#/components/schemas/Tolerations"}},"type":"object"}]},"Annotations":{"description":"Set of annotations to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Annotation"},"maxItems":1000,"nullable":true},"Annotation":{"description":"Annotation details to be populated into the container.","properties":{"name":{"description":"The name of the annotation (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the annotation.","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the annotation is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"AutoScalingMaxReplicas":{"description":"The maximum number of replicas for autoscaling. Defaults to minReplicas. Must be no less than minReplicas.","type":"integer","format":"int32","minimum":1,"nullable":true},"AutoScalingMetricNim":{"description":"The metric to use for autoscaling (mandatory).","type":"string","pattern":"^[a-zA-Z_:][a-zA-Z0-9_:]*$","nullable":true},"AutoScalingMetricThreshold":{"description":"The threshold to use with the specified metric for autoscaling (mandatory).","type":"integer","format":"int32","nullable":true},"AutoScalingMinReplicas":{"description":"The minimum number of replicas for autoscaling. Defaults to 1.","type":"integer","format":"int32","minimum":1,"nullable":true},"AutoScalingScaleWindowSeconds":{"description":"The time window for autoscaling decisions, in seconds. Defaults to 300 seconds.","type":"integer","format":"int32","minimum":60,"maximum":3600,"nullable":true},"Category":{"description":"Specify the workload category assigned to the workload. Categories are used to classify and monitor different types of workloads within the NVIDIA Run:ai platform.","type":"string","nullable":true,"pattern":".*"},"CpuCoreLimit":{"description":"Limitations on the number of CPUs consumed by the workload (0.5, 1, .etc). The system guarantees that this workload will not be able to consume more than this amount of CPUs.","format":"double","type":"number","nullable":true,"minimum":0},"CpuCoreRequest":{"description":"CPU units to allocate for the created workload (0.5, 1, .etc). The workload will receive at least this amount of CPU. Note that the workload will not be scheduled unless the system can guarantee this amount of CPUs to the workload.","format":"double","type":"number","nullable":true,"minimum":0},"CpuMemoryLimit":{"description":"Limitations on the CPU memory to allocate for this workload (1G, 20M, .etc). The system guarantees that this workload will not be able to consume more than this amount of memory. The workload will receive an error when trying to allocate more memory than this limit.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"CpuMemoryRequest":{"description":"The amount of CPU memory to allocate for this workload (1G, 20M, .etc). The workload will receive at least this amount of memory. Note that the workload will not be scheduled unless the system can guarantee this amount of memory to the workload","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"GpuDevicesRequest":{"description":"Requested number of GPU devices. Currently if more than one device is requested, it is not possible to provide values for gpuMemory or gpuPortion.","type":"integer","format":"int32","nullable":true,"minimum":0},"GpuMemoryLimit":{"description":"Limitation on the memory consumed by the workload, per GPU device. The system guarantees The gpuMemoryLimit must be no less than gpuMemoryRequest.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"GpuMemoryRequest":{"description":"Required if and only if gpuRequestType is memory. States the GPU memory to allocate for the created workload, per GPU device. Note that the workload will not be scheduled unless the system can guarantee this amount of GPU memory to the workload.","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"GpuPortionLimit":{"description":"Limitations on the portion consumed by the workload, per GPU device. The system guarantees The gpuPotionLimit must be no less than the gpuPortionRequest.","type":"number","format":"double","nullable":true,"minimum":0},"GpuPortionRequest":{"description":"Required if and only if gpuRequestType is portion. States the portion of the GPU to allocate for the created workload, per GPU device, between 0 and 1. The default is no allocated GPUs.","type":"number","format":"double","nullable":true,"minimum":0},"GpuRequestType":{"description":"Sets the unit type for GPU resources requests. Stated in terms of portion or memory. Sets the unit type for other GPU request fields. If `gpuDevicesRequest > 1`, only `portion` is supported. If `gpuDeviceRequest = 1`, the request type can be stated as `portion` or `memory`.","type":"string","minLength":1,"enum":["portion","memory"],"nullable":true},"EnvironmentVariables":{"description":"Set of environment variables to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/EnvironmentVariable"},"maxItems":1000,"nullable":true},"EnvironmentVariable":{"description":"Details of an environment variable which is populated into the container.","properties":{"name":{"description":"The name of the environment variable. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the environment variable. (mutually exclusive with secret, userCredential, configMap and podFieldRef)","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"secret":{"$ref":"#/components/schemas/EnvironmentVariableSecret"},"configMap":{"$ref":"#/components/schemas/EnvironmentVariableConfigMap"},"podFieldRef":{"$ref":"#/components/schemas/EnvironmentVariablePodFieldReference"},"userCredential":{"$ref":"#/components/schemas/EnvironmentVariableUserCredential"},"exclude":{"description":"Use 'true' in case the environment variable is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true},"description":{"description":"Description of the environment variable.","type":"string","maxLength":250,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableSecret":{"description":"Details of the secret and key use to populate the environment variable","properties":{"name":{"description":"The name of the secret resource. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"key":{"description":"The key in the secret resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableConfigMap":{"description":"Details of the configMap and key use to populate the environment variable","properties":{"name":{"description":"The name of the config-map resource. (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"key":{"description":"The key in the config-map resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariablePodFieldReference":{"description":"Details of the field-reference and key use to populate the environment variable","properties":{"path":{"description":"The field path resource. (mandatory)","type":"string","minLength":1,"maxLength":10000,"nullable":true,"pattern":".*"}},"nullable":true,"type":"object"},"EnvironmentVariableUserCredential":{"description":"Defines a reference to a user-created credential and a specific key within that credential whose value will populate the environment variable. User credentials can only be accessed by the user who created them.","properties":{"name":{"description":"The name of the user credential.  (mandatory)","type":"string","minLength":1,"maxLength":253,"nullable":true},"key":{"description":"The key in the user credential resource. (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true}},"nullable":true,"type":"object"},"Image":{"description":"Docker image name. For more information, see [Images](https://kubernetes.io/docs/concepts/containers/images). The image name is mandatory for creating a workload.","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"ImagePullPolicy":{"description":"Image pull policy. Defaults to `Always` if `:latest` tag is specified, otherwise it is `IfNotPresent`.","type":"string","minLength":1,"enum":["Always","Never","IfNotPresent"],"nullable":true},"ImagePullSecrets":{"description":"A list of references to Kubernetes secrets in the same namespace used for pulling container images.","type":"array","items":{"$ref":"#/components/schemas/ImagePullSecret"},"maxItems":1000,"nullable":true},"ImagePullSecret":{"description":"A reference to a secret in the same namespace used to pull container images.","properties":{"name":{"type":"string","description":"The name of the Kubernetes secret containing the image pull credentials.","pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$","maxLength":253},"userCredential":{"type":"boolean","description":"Indicates whether the secret is a user credential. Set to true if the secret was created by the user and is only accessible by them.","nullable":true},"exclude":{"description":"Use 'true' in case the secret is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"Labels":{"description":"Set of labels to populate into the container running the workload.","type":"array","items":{"$ref":"#/components/schemas/Label"},"maxItems":1000,"nullable":true},"Label":{"description":"Label details to be populated into the container.","properties":{"name":{"description":"The name of the label (mandatory)","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"value":{"description":"The value of the label.","type":"string","nullable":true,"pattern":".*"},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"NimCache":{"description":"The specification of a NIM cache volume.","type":"object","properties":{"name":{"description":"The NIMCache resource name (mandatory).","type":"string","minLength":1,"nullable":true},"profile":{"description":"The NIM profile to use (optional).","type":"string","minLength":1,"nullable":true}},"nullable":true},"NimServicePvcFields":{"properties":{"existingPvc":{"description":"Verify existing PVC. PVC is assumed to exist when set to `true`. If set to `false`, the PVC will be created, if it does not exist.","type":"boolean","default":false,"nullable":true},"claimName":{"description":"Name for the PVC. Allow referencing it across workloads. If not provided, a name based on the workload name and scope will be auto-generated.","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":".*"},"readOnly":{"description":"Permit only read access to PVC.","type":"boolean","default":false,"nullable":true},"claimInfo":{"$ref":"#/components/schemas/ClaimInfo"}},"nullable":true,"type":"object"},"ClaimInfo":{"description":"Claim information for the newly created PVC. The information should not be provided when attempting to use existing PVC.","properties":{"size":{"$ref":"#/components/schemas/PvcClaimSize"},"storageClass":{"description":"Storage class name to associate with the PVC. This parameter may be omitted if there is a single storage class in the system, or you are using the default storage class. For more information, see [Storage class](https://kubernetes.io/docs/concepts/storage/storage-classes).","type":"string","minLength":1,"maxLength":63,"nullable":true,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"accessModes":{"$ref":"#/components/schemas/PvcAccessModes"},"volumeMode":{"$ref":"#/components/schemas/PvcVolumeMode"},"addedAttrValues":{"$ref":"#/components/schemas/PvcAddedAttrValues"}},"nullable":true,"type":"object"},"PvcClaimSize":{"description":"Requested size for the PVC. Mandatory when existingPvc is false. Recommended sizes: TB/GB/MB/TIB/GIB/MIB","type":"string","pattern":"^([+]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$","maxLength":63,"nullable":true},"PvcAccessModes":{"description":"Default access mode(s) applied to newly created PVCs unless explicitly overridden.","properties":{"readWriteOnce":{"description":"Mount the volume as read/write by a single node.","type":"boolean","default":true,"nullable":true},"readOnlyMany":{"description":"Mount the volume as read-only by many nodes.","type":"boolean","default":false,"nullable":true},"readWriteMany":{"description":"Mount the volume as read/write by many nodes.","type":"boolean","default":false,"nullable":true}},"nullable":true,"type":"object"},"PvcVolumeMode":{"description":"Default volume mode for the PVC. Choose between Filesystem (default) or Block.","type":"string","enum":["Filesystem","Block"],"nullable":true},"PvcAddedAttrValues":{"description":"an optional array of key-values pairs that are written as annotations on the created PVC. the allowed attributes are determined according to the storage class configuration (see k8s-objects-tracker for further info).","type":"array","items":{"$ref":"#/components/schemas/PvcAddedAttrValue"},"maxItems":1000},"PvcAddedAttrValue":{"type":"object","required":["key"],"properties":{"key":{"type":"string","minLength":1,"maxLength":63,"pattern":"^([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]$"},"value":{"type":"string","pattern":".*","maxLength":10000}}},"NimServiceMultiNode":{"description":"Defines whether the NIM service runs as a multi-node deployment. If workers is set to 1 or more, the service runs in multi-node.","properties":{"workers":{"$ref":"#/components/schemas/NimServiceWorkers"}},"nullable":true,"type":"object"},"NimServiceWorkers":{"description":"Specifies the number of worker nodes to use when running the NIM service in multi-node.","type":"integer","format":"int32","minimum":1,"maximum":1000,"nullable":true},"NimServiceNgcAuthSecret":{"description":"The name of a Kubernetes secret containing the NGC access credentials. The secret must contain a key named NGC_API_KEY with the API key as the value.","type":"string","pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$","nullable":true},"NodePools":{"description":"A prioritized list of node pools for the scheduler to run the workload on. The scheduler will always try to use the first node pool before moving to the next one if the first is not available.","type":"array","items":{"type":"string","maxLength":63,"pattern":".*"},"maxItems":1000,"nullable":true},"Preemptibility":{"description":"Specifies whether the workload can be preempted by higher-priority workloads. Valid values are preemptible and non-preemptible. If explicitly set, this value takes precedence. If not set, the system derives the preemptibility from the priorityClassName field, ensuring backward compatibility. Each workload type has a default preemptibility. To view the default preemptibility for each workload type, use the GET /workload-types endpoint.","type":"string","minLength":1,"enum":["preemptible","non-preemptible"],"nullable":true},"PriorityClass":{"description":"Specifies the priority class for the workload, which determines its scheduling behavior. Valid values are: very-low, low, medium-low, medium, medium-high, high, and very-high. Each workload type has a default priority. To view the default priority for each workload type, use the GET /workload-types endpoint. Once you change the priority from the default value defined for that workload type, the preemptibility field is not automatically updated. Make sure to set the desired preemptibility value.","type":"string","nullable":true,"pattern":".*"},"Probes":{"description":"Probes are used to determine if the container is healthy and ready to accept traffic.","type":"object","properties":{"readiness":{"$ref":"#/components/schemas/Probe"}},"nullable":true},"Probe":{"type":"object","properties":{"initialDelaySeconds":{"description":"Number of seconds after the container has started before liveness or readiness probes are initiated.","type":"integer","format":"int32","minimum":0,"nullable":true},"periodSeconds":{"description":"How often (in seconds) to perform the probe.","type":"integer","format":"int32","minimum":1,"nullable":true},"timeoutSeconds":{"description":"Number of seconds after which the probe times out.","type":"integer","format":"int32","minimum":1,"nullable":true},"successThreshold":{"description":"Minimum consecutive successes for the probe to be considered successful after having failed.","type":"integer","format":"int32","minimum":1,"nullable":true},"failureThreshold":{"description":"When a probe fails, the number of times to try before giving up.","type":"integer","format":"int32","minimum":1,"nullable":true},"handler":{"$ref":"#/components/schemas/ProbeHandler"}},"nullable":true},"ProbeHandler":{"description":"The action taken to determine the health of the container. (mandatory)","type":"object","properties":{"httpGet":{"description":"An action based on HTTP Get requests.","type":"object","properties":{"path":{"description":"Path to access on the HTTP server, defaults to /.","type":"string","pattern":"^(\\x2F[a-zA-Z0-9\\-_.\\x2F]*)?$","nullable":true,"maxLength":2048},"port":{"description":"Number of the port to access on the container.","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"host":{"description":"Host name to connect to, defaults to the pod IP.","type":"string","format":"hostname","nullable":true,"pattern":".*","maxLength":253},"scheme":{"$ref":"#/components/schemas/ProbeHandlerScheme"}}}},"nullable":true},"ProbeHandlerScheme":{"description":"Scheme to use for connecting to the host, defaults to HTTP.","type":"string","enum":["HTTP","HTTPS"],"nullable":true},"NimServiceReplicas":{"default":1,"description":"The number of replicas to deploy.","type":"integer","format":"int32","minimum":0,"maximum":1000,"nullable":true},"RunAsGid":{"description":"The group id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsGid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"RunAsUid":{"description":"The user id to run the entrypoint of the container which executes the workspace. Default to the value specified in the environment asset `runAsUid` field (optional). Use only when the source uid/gid of the environment asset is not `fromTheImage`, and `overrideUidGidInWorkspace` is enabled.","type":"integer","format":"int64","nullable":true},"NimServiceServingPort":{"description":"A port for accessing the inference service","properties":{"serviceType":{"$ref":"#/components/schemas/ServingPortServiceType"},"port":{"$ref":"#/components/schemas/ServingPortPort"},"grpcPort":{"$ref":"#/components/schemas/ServingPortGrpcPort"},"metricsPort":{"$ref":"#/components/schemas/ServingPortMetricsPort"},"exposeExternally":{"$ref":"#/components/schemas/ServingPortExposeExternally"},"exposedUrl":{"$ref":"#/components/schemas/ServingPortExposedUrl"},"exposedProtocol":{"$ref":"#/components/schemas/ServingPortExposedProtocol"}},"nullable":true,"type":"object"},"ServingPortServiceType":{"description":"The type of Kubernetes service to create for the inference deployment. Options include 'ClusterIP' (default), 'NodePort', 'LoadBalancer', and 'ExternalName'.","type":"string","default":"ClusterIP","enum":["ClusterIP","NodePort","LoadBalancer","ExternalName"],"nullable":true},"ServingPortPort":{"description":"The port that the container running the inference service exposes (mandatory).","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"ServingPortGrpcPort":{"description":"The GRPC port that the container running the inference service exposes.","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"ServingPortMetricsPort":{"description":"The port where metrics are exposed, required only if it's different than the main port.","type":"integer","format":"int32","minimum":1,"maximum":65535,"nullable":true},"ServingPortExposeExternally":{"description":"Indicates whether the inference serving endpoint should be accessible outside the cluster. If set to true, the endpoint will be exposed externally. To enable external access, your administrator must configure the cluster as described in the [inference requirements](https://run-ai-docs.nvidia.com/self-hosted/2.24/getting-started/installation/system-requirements#inference). section.","type":"boolean","nullable":true,"default":true},"ServingPortExposedUrl":{"description":"The custom URL to use for the serving port. If empty (default), an autogenerated URL will be used.","type":"string","nullable":true,"pattern":".*"},"ServingPortExposedProtocol":{"description":"The protocol to use for the exposed URL. If grpcPort is set, this defaults to grpc. Otherwise, it defaults to http.","type":"string","enum":["http","grpc"],"nullable":true},"Tolerations":{"description":"Set of tolerations to apply to the workload.","type":"array","items":{"$ref":"#/components/schemas/Toleration"},"maxItems":1000,"nullable":true},"Toleration":{"description":"Toleration details.","properties":{"name":{"description":"The name of the toleration.","type":"string","minLength":1,"maxLength":253,"nullable":true,"pattern":".*"},"operator":{"$ref":"#/components/schemas/TolerationOperator"},"key":{"description":"The taint key that the toleration applies to. (mandatory)","type":"string","maxLength":253,"nullable":true,"pattern":".*"},"value":{"description":"The taint value the toleration matches to. Mandatory if operator is Exists, forbidden otherwise.","type":"string","maxLength":10000,"nullable":true,"pattern":".*"},"effect":{"$ref":"#/components/schemas/TolerationEffect"},"seconds":{"description":"The period of time the toleration tolerates the taint. Valid only if effect is NoExecute. taint.","type":"integer","minimum":1,"nullable":true},"exclude":{"description":"Use 'true' in case the label is defined in defaults of the policy, and you wish to exclude it from the workload.","type":"boolean","nullable":true}},"nullable":true,"type":"object"},"TolerationOperator":{"description":"A key's relationship to the value. Equal uses key and value. Exists is equivalent to wildcard for value, so that a workload can tolerate all taints of a particular category. (mandatory)","type":"string","enum":["Equal","Exists"],"nullable":true},"TolerationEffect":{"description":"The taint effect to match. (mandatory)","type":"string","enum":["NoSchedule","NoExecute","PreferNoSchedule","Any"],"nullable":true},"NimServiceRemovableFields":{"description":"Field names that can be removed from a NIM service spec","type":"string","enum":["autoscaling","replicas","compute","environmentVariables","security","modelStore","multiNode","probes","annotations","labels","tolerations","nodePools","imagePullSecrets"]},"NimServiceResponse":{"type":"object","required":["spec","metadata","desiredPhase"],"properties":{"metadata":{"$ref":"#/components/schemas/WorkloadV2Metadata"},"desiredPhase":{"$ref":"#/components/schemas/DesiredPhase"},"spec":{"$ref":"#/components/schemas/NimServiceSpec"}}},"WorkloadV2Metadata":{"allOf":[{"$ref":"#/components/schemas/WorkloadV2MetadataResponse"},{"$ref":"#/components/schemas/WorkloadV2MetadataAutoFill"}]},"WorkloadV2MetadataResponse":{"type":"object","required":["name","projectId"],"properties":{"name":{"$ref":"#/components/schemas/WorkloadName"},"projectId":{"$ref":"#/components/schemas/ProjectId"},"priority":{"$ref":"#/components/schemas/PriorityClass"},"category":{"$ref":"#/components/schemas/Category"},"preemptibility":{"$ref":"#/components/schemas/Preemptibility"},"configuration":{"$ref":"#/components/schemas/WorkloadV2Configuration"}}},"WorkloadName":{"description":"The name of the workload.","type":"string","minLength":1,"pattern":".*"},"ProjectId":{"description":"The id of the project.","type":"string","pattern":".*"},"WorkloadV2Configuration":{"type":"object","description":"Specifies the features configuration to apply to a workload.","properties":{"mnnvl":{"allOf":[{"$ref":"#/components/schemas/MnnvlMode"},{"type":"string","default":"None","nullable":true}]}}},"MnnvlMode":{"type":"string","enum":["Required","None"],"description":"MNNVL (Multi-Node NVLink) mode:\n- Required: The workload is configured to use MNNVL acceleration. The workload type and at least one project’s node pool must support MNNVL, or the request fails. When set, the workload is scheduled only on compatible nodes, and may remain pending until sufficient MNNVL-capable capacity is available.\n- None: The workload will not use MNNVL acceleration, even if MNNVL-capable nodes are available.\n"},"WorkloadV2MetadataAutoFill":{"type":"object","required":["id","gvk","projectName","clusterId","tenantId","departmentId","departmentName","createdAt","createdBy","updatedAt","updatedBy"],"properties":{"id":{"$ref":"#/components/schemas/WorkloadId3"},"gvk":{"$ref":"#/components/schemas/GVK"},"projectName":{"$ref":"#/components/schemas/ProjectName2"},"clusterId":{"$ref":"#/components/schemas/ClusterId"},"tenantId":{"$ref":"#/components/schemas/TenantId"},"departmentId":{"$ref":"#/components/schemas/DepartmentId3"},"departmentName":{"$ref":"#/components/schemas/DepartmentName1"},"createdAt":{"type":"string","format":"date-time","description":"The timestamp for when the workload was created."},"createdBy":{"type":"string","description":"Identifier of the user who created the workload.","maxLength":250,"format":".*"},"updatedAt":{"type":"string","format":"date-time","description":"The timestamp for the last time the workload was updated."},"updatedBy":{"type":"string","description":"Identifier of the user who last updated the workload.","maxLength":250,"format":".*"},"deletedAt":{"type":"string","format":"date-time","description":"The timestamp indicating when the workload was deleted.","nullable":true},"deletedBy":{"type":"string","maxLength":250,"format":".*","description":"Identifier of the user who deleted the workload.","nullable":true}}},"WorkloadId3":{"description":"A unique ID of the workload.","type":"string","format":"uuid"},"GVK":{"type":"object","description":"Specifies the Group, Version, and Kind (GVK) of the Kubernetes resource that defines the workload.","required":["group","version","kind"],"properties":{"group":{"description":"The API group of the Kubernetes resource.","type":"string","maxLength":253},"version":{"description":"The API version of the resource within the specified group.","type":"string","maxLength":250},"kind":{"description":"The type of Kubernetes resource being referenced.","type":"string","maxLength":250}}},"ProjectName2":{"type":"string","description":"The name of the project","minLength":1,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"ClusterId":{"description":"The id of the cluster.","type":"string","format":"uuid"},"TenantId":{"description":"The id of the tenant.","type":"integer","format":"int32"},"DepartmentId3":{"description":"The id of the department.","type":"string","minLength":1,"pattern":".*"},"DepartmentName1":{"type":"string","description":"The name of the department","minLength":1,"pattern":"^[a-z0-9]([a-z0-9-]*[a-z0-9])?$"},"DesiredPhase":{"description":"The desired phase of the workload.","type":"string","enum":["Running","Stopped","Deleted"]},"Error":{"required":["code","message"],"properties":{"code":{"type":"integer","minimum":100,"maximum":599},"message":{"type":"string"},"details":{"type":"string"}}}},"responses":{"401Unauthorized":{"description":"Unauthorized","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"403Forbidden":{"description":"Forbidden","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"404NotFound":{"description":"The specified resource was not found","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"500InternalServerError":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}},"503ServiceUnavailable":{"description":"unexpected error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Error"}}}}}},"paths":{"/api/v2/workloads/nim-services/{WorkloadV2Id}":{"patch":{"summary":"Update NVIDIA NIM service spec. [Experimental]","operationId":"update_nim_service_spec","description":"Update the specification of an existing NVIDIA NIM service.","tags":["NVIDIA NIM"],"parameters":[{"$ref":"#/components/parameters/WorkloadV2Id"}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/NimServiceUpdateRequest"}}}},"responses":{"202":{"description":"Workload update request accepted","content":{"application/json":{"schema":{"$ref":"#/components/schemas/NimServiceResponse"}}}},"401":{"$ref":"#/components/responses/401Unauthorized"},"403":{"$ref":"#/components/responses/403Forbidden"},"404":{"$ref":"#/components/responses/404NotFound"},"500":{"$ref":"#/components/responses/500InternalServerError"},"503":{"$ref":"#/components/responses/503ServiceUnavailable"}}}}}}
```
