NVIDIA NIM

The NVIDIA NIM API provides endpoints to create and manage workloads that deploy NVIDIA Inference Microservices (NIM) through the NIM Operator. These workloads package optimized NVIDIA model servers and run as managed services on the NVIDIA Run:ai platform. Each request includes NVIDIA Run:ai scheduling metadata (for example, project, priority, and category) and a NIM service specification that defines the container image, compute resources, environment variables, storage, and networking configuration. Once submitted, NVIDIA Run:ai handles scheduling, orchestration, and lifecycle management of the NIM service to ensure reliable and efficient model serving.

Create a NVIDIA NIM service. [Experimental]

post

Create a NVIDIA NIM service

Authorizations

AuthorizationstringRequired

Bearer authentication

Body

Responses

202

Workload creation accepted

application/json

400

Bad submission request.

application/json

401

Unauthorized

application/json

403

Forbidden

application/json

409

The specified resource already exists

application/json

500

unexpected error

application/json

503

unexpected error

application/json

post

/api/v2/workloads/nim-services

POST /api/v2/workloads/nim-services HTTP/1.1
Host: app.run.ai
Authorization: Bearer YOUR_SECRET_TOKEN
Content-Type: application/json
Accept: */*
Content-Length: 2226

{
  "metadata": {
    "name": "my-workload-name",
    "useGivenNameAsPrefix": true,
    "projectId": 1
  },
  "spec": {
    "annotations": [
      {
        "name": "billing",
        "value": "my-billing-unit",
        "exclude": false
      }
    ],
    "autoscaling": {
      "maxReplicas": 1,
      "metric": "http_requests_total",
      "metricThreshold": 1,
      "minReplicas": 1,
      "scaleWindowSeconds": 1
    },
    "category": "text",
    "compute": {
      "cpuCoreLimit": 2,
      "cpuCoreRequest": 0.5,
      "cpuMemoryLimit": "30M",
      "cpuMemoryRequest": "20M",
      "gpuDevicesRequest": 1,
      "gpuMemoryLimit": "10M",
      "gpuMemoryRequest": "10M",
      "gpuPortionLimit": 0.5,
      "gpuPortionRequest": 0.5,
      "gpuRequestType": "portion"
    },
    "environmentVariables": [
      {
        "name": "HOME",
        "value": "/home/my-folder",
        "secret": {
          "name": "postgress_secret",
          "key": "POSTGRES_PASSWORD"
        },
        "configMap": {
          "name": "my-config-map",
          "key": "MY_POSTGRES_SCHEMA"
        },
        "podFieldRef": {
          "path": "metadata.name"
        },
        "userCredential": {
          "name": "my_postgres_user_and_password",
          "key": "POSTGRES_PASSWORD"
        },
        "exclude": false,
        "description": "Home directory of the user."
      }
    ],
    "image": "python:3.8",
    "imagePullPolicy": "Always",
    "imagePullSecrets": [
      {
        "name": "text",
        "userCredential": true,
        "exclude": false
      }
    ],
    "labels": [
      {
        "name": "stage",
        "value": "initial-research",
        "exclude": false
      }
    ],
    "modelStore": {
      "nimCache": {
        "name": "nim-cache-a",
        "profile": "tensorrt_llm-b200-fp8-tp2-pp1-latency-2901:10de-2"
      },
      "pvc": {
        "existingPvc": false,
        "claimName": "my-claim",
        "readOnly": false,
        "claimInfo": {
          "size": "1G",
          "storageClass": "my-storage-class",
          "accessModes": {
            "readWriteOnce": true,
            "readOnlyMany": false,
            "readWriteMany": false
          },
          "volumeMode": "Filesystem",
          "addedAttrValues": [
            {
              "key": "dnsname",
              "value": "my.dns.com"
            }
          ]
        }
      }
    },
    "multiNode": {
      "workers": 3
    },
    "ngcAuthSecret": "text",
    "nodePools": [
      "my-node-pool-a",
      "my-node-pool-b"
    ],
    "preemptibility": "preemptible",
    "priorityClass": "text",
    "probes": {
      "readiness": {
        "initialDelaySeconds": 1,
        "periodSeconds": 1,
        "timeoutSeconds": 1,
        "successThreshold": 1,
        "failureThreshold": 1,
        "handler": {
          "httpGet": {
            "path": "/",
            "port": 1,
            "host": "example.com",
            "scheme": "HTTP"
          }
        }
      }
    },
    "replicas": 2,
    "security": {
      "runAsGid": 30,
      "runAsUid": 500
    },
    "servingPort": {
      "serviceType": "ClusterIP",
      "port": 8000,
      "grpcPort": 8001,
      "metricsPort": 8002,
      "exposeExternally": true,
      "exposedUrl": "text",
      "exposedProtocol": "http"
    },
    "tolerations": [
      {
        "name": "text",
        "operator": "Equal",
        "key": "text",
        "value": "text",
        "effect": "NoSchedule",
        "seconds": 1,
        "exclude": false
      }
    ]
  }
}

{
  "metadata": {
    "name": "my-workload-name",
    "projectId": 1,
    "priority": "text",
    "category": "text",
    "preemptibility": "preemptible",
    "configuration": {
      "mnnvl": "None"
    },
    "id": "123e4567-e89b-12d3-a456-426614174000",
    "gvk": {
      "group": "apps",
      "version": "v1",
      "kind": "Deployment"
    },
    "projectName": "project-a",
    "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
    "tenantId": 1001,
    "departmentId": 2,
    "departmentName": "default",
    "createdAt": "2024-01-15T10:30:00Z",
    "createdBy": "[email protected]",
    "updatedAt": "2024-01-15T10:35:00Z",
    "updatedBy": "[email protected]",
    "deletedAt": "2024-01-15T10:35:00Z",
    "deletedBy": "[email protected]"
  },
  "desiredPhase": "Running",
  "spec": {
    "annotations": [
      {
        "name": "billing",
        "value": "my-billing-unit",
        "exclude": false
      }
    ],
    "autoscaling": {
      "maxReplicas": 1,
      "metric": "http_requests_total",
      "metricThreshold": 1,
      "minReplicas": 1,
      "scaleWindowSeconds": 1
    },
    "category": "text",
    "compute": {
      "cpuCoreLimit": 2,
      "cpuCoreRequest": 0.5,
      "cpuMemoryLimit": "30M",
      "cpuMemoryRequest": "20M",
      "gpuDevicesRequest": 1,
      "gpuMemoryLimit": "10M",
      "gpuMemoryRequest": "10M",
      "gpuPortionLimit": 0.5,
      "gpuPortionRequest": 0.5,
      "gpuRequestType": "portion"
    },
    "environmentVariables": [
      {
        "name": "HOME",
        "value": "/home/my-folder",
        "secret": {
          "name": "postgress_secret",
          "key": "POSTGRES_PASSWORD"
        },
        "configMap": {
          "name": "my-config-map",
          "key": "MY_POSTGRES_SCHEMA"
        },
        "podFieldRef": {
          "path": "metadata.name"
        },
        "userCredential": {
          "name": "my_postgres_user_and_password",
          "key": "POSTGRES_PASSWORD"
        },
        "exclude": false,
        "description": "Home directory of the user."
      }
    ],
    "image": "python:3.8",
    "imagePullPolicy": "Always",
    "imagePullSecrets": [
      {
        "name": "text",
        "userCredential": true,
        "exclude": false
      }
    ],
    "labels": [
      {
        "name": "stage",
        "value": "initial-research",
        "exclude": false
      }
    ],
    "modelStore": {
      "nimCache": {
        "name": "nim-cache-a",
        "profile": "tensorrt_llm-b200-fp8-tp2-pp1-latency-2901:10de-2"
      },
      "pvc": {
        "existingPvc": false,
        "claimName": "my-claim",
        "readOnly": false,
        "claimInfo": {
          "size": "1G",
          "storageClass": "my-storage-class",
          "accessModes": {
            "readWriteOnce": true,
            "readOnlyMany": false,
            "readWriteMany": false
          },
          "volumeMode": "Filesystem",
          "addedAttrValues": [
            {
              "key": "dnsname",
              "value": "my.dns.com"
            }
          ]
        }
      }
    },
    "multiNode": {
      "workers": 3
    },
    "ngcAuthSecret": "text",
    "nodePools": [
      "my-node-pool-a",
      "my-node-pool-b"
    ],
    "preemptibility": "preemptible",
    "priorityClass": "text",
    "probes": {
      "readiness": {
        "initialDelaySeconds": 1,
        "periodSeconds": 1,
        "timeoutSeconds": 1,
        "successThreshold": 1,
        "failureThreshold": 1,
        "handler": {
          "httpGet": {
            "path": "/",
            "port": 1,
            "host": "example.com",
            "scheme": "HTTP"
          }
        }
      }
    },
    "replicas": 2,
    "security": {
      "runAsGid": 30,
      "runAsUid": 500
    },
    "servingPort": {
      "serviceType": "ClusterIP",
      "port": 8000,
      "grpcPort": 8001,
      "metricsPort": 8002,
      "exposeExternally": true,
      "exposedUrl": "text",
      "exposedProtocol": "http"
    },
    "tolerations": [
      {
        "name": "text",
        "operator": "Equal",
        "key": "text",
        "value": "text",
        "effect": "NoSchedule",
        "seconds": 1,
        "exclude": false
      }
    ]
  }
}

Get a NVIDIA NIM service. [Experimental]

get

Retrieve details of a specific NVIDIA NIM service, by id

Authorizations

AuthorizationstringRequired

Bearer authentication

Path parameters

WorkloadV2Idstring · uuidRequired

The ID of the workload.

Responses

200

Successfully retrieved the workload

application/json

401

Unauthorized

application/json

403

Forbidden

application/json

404

The specified resource was not found

application/json

500

unexpected error

application/json

503

unexpected error

application/json

get

/api/v2/workloads/nim-services/{WorkloadV2Id}

GET /api/v2/workloads/nim-services/{WorkloadV2Id} HTTP/1.1
Host: app.run.ai
Authorization: Bearer YOUR_SECRET_TOKEN
Accept: */*

{
  "metadata": {
    "name": "my-workload-name",
    "projectId": 1,
    "priority": "text",
    "category": "text",
    "preemptibility": "preemptible",
    "configuration": {
      "mnnvl": "None"
    },
    "id": "123e4567-e89b-12d3-a456-426614174000",
    "gvk": {
      "group": "apps",
      "version": "v1",
      "kind": "Deployment"
    },
    "projectName": "project-a",
    "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
    "tenantId": 1001,
    "departmentId": 2,
    "departmentName": "default",
    "createdAt": "2024-01-15T10:30:00Z",
    "createdBy": "[email protected]",
    "updatedAt": "2024-01-15T10:35:00Z",
    "updatedBy": "[email protected]",
    "deletedAt": "2024-01-15T10:35:00Z",
    "deletedBy": "[email protected]"
  },
  "desiredPhase": "Running",
  "spec": {
    "annotations": [
      {
        "name": "billing",
        "value": "my-billing-unit",
        "exclude": false
      }
    ],
    "autoscaling": {
      "maxReplicas": 1,
      "metric": "http_requests_total",
      "metricThreshold": 1,
      "minReplicas": 1,
      "scaleWindowSeconds": 1
    },
    "category": "text",
    "compute": {
      "cpuCoreLimit": 2,
      "cpuCoreRequest": 0.5,
      "cpuMemoryLimit": "30M",
      "cpuMemoryRequest": "20M",
      "gpuDevicesRequest": 1,
      "gpuMemoryLimit": "10M",
      "gpuMemoryRequest": "10M",
      "gpuPortionLimit": 0.5,
      "gpuPortionRequest": 0.5,
      "gpuRequestType": "portion"
    },
    "environmentVariables": [
      {
        "name": "HOME",
        "value": "/home/my-folder",
        "secret": {
          "name": "postgress_secret",
          "key": "POSTGRES_PASSWORD"
        },
        "configMap": {
          "name": "my-config-map",
          "key": "MY_POSTGRES_SCHEMA"
        },
        "podFieldRef": {
          "path": "metadata.name"
        },
        "userCredential": {
          "name": "my_postgres_user_and_password",
          "key": "POSTGRES_PASSWORD"
        },
        "exclude": false,
        "description": "Home directory of the user."
      }
    ],
    "image": "python:3.8",
    "imagePullPolicy": "Always",
    "imagePullSecrets": [
      {
        "name": "text",
        "userCredential": true,
        "exclude": false
      }
    ],
    "labels": [
      {
        "name": "stage",
        "value": "initial-research",
        "exclude": false
      }
    ],
    "modelStore": {
      "nimCache": {
        "name": "nim-cache-a",
        "profile": "tensorrt_llm-b200-fp8-tp2-pp1-latency-2901:10de-2"
      },
      "pvc": {
        "existingPvc": false,
        "claimName": "my-claim",
        "readOnly": false,
        "claimInfo": {
          "size": "1G",
          "storageClass": "my-storage-class",
          "accessModes": {
            "readWriteOnce": true,
            "readOnlyMany": false,
            "readWriteMany": false
          },
          "volumeMode": "Filesystem",
          "addedAttrValues": [
            {
              "key": "dnsname",
              "value": "my.dns.com"
            }
          ]
        }
      }
    },
    "multiNode": {
      "workers": 3
    },
    "ngcAuthSecret": "text",
    "nodePools": [
      "my-node-pool-a",
      "my-node-pool-b"
    ],
    "preemptibility": "preemptible",
    "priorityClass": "text",
    "probes": {
      "readiness": {
        "initialDelaySeconds": 1,
        "periodSeconds": 1,
        "timeoutSeconds": 1,
        "successThreshold": 1,
        "failureThreshold": 1,
        "handler": {
          "httpGet": {
            "path": "/",
            "port": 1,
            "host": "example.com",
            "scheme": "HTTP"
          }
        }
      }
    },
    "replicas": 2,
    "security": {
      "runAsGid": 30,
      "runAsUid": 500
    },
    "servingPort": {
      "serviceType": "ClusterIP",
      "port": 8000,
      "grpcPort": 8001,
      "metricsPort": 8002,
      "exposeExternally": true,
      "exposedUrl": "text",
      "exposedProtocol": "http"
    },
    "tolerations": [
      {
        "name": "text",
        "operator": "Equal",
        "key": "text",
        "value": "text",
        "effect": "NoSchedule",
        "seconds": 1,
        "exclude": false
      }
    ]
  }
}

Update NVIDIA NIM service spec. [Experimental]

patch

Update the specification of an existing NVIDIA NIM service.

Authorizations

AuthorizationstringRequired

Bearer authentication

Path parameters

WorkloadV2Idstring · uuidRequired

The ID of the workload.

Body

Responses

202

Workload update request accepted

application/json

401

Unauthorized

application/json

403

Forbidden

application/json

404

The specified resource was not found

application/json

500

unexpected error

application/json

503

unexpected error

application/json

patch

/api/v2/workloads/nim-services/{WorkloadV2Id}

PATCH /api/v2/workloads/nim-services/{WorkloadV2Id} HTTP/1.1
Host: app.run.ai
Authorization: Bearer YOUR_SECRET_TOKEN
Content-Type: application/json
Accept: */*
Content-Length: 2201

{
  "spec": {
    "annotations": [
      {
        "name": "billing",
        "value": "my-billing-unit",
        "exclude": false
      }
    ],
    "autoscaling": {
      "maxReplicas": 1,
      "metric": "http_requests_total",
      "metricThreshold": 1,
      "minReplicas": 1,
      "scaleWindowSeconds": 1
    },
    "category": "text",
    "compute": {
      "cpuCoreLimit": 2,
      "cpuCoreRequest": 0.5,
      "cpuMemoryLimit": "30M",
      "cpuMemoryRequest": "20M",
      "gpuDevicesRequest": 1,
      "gpuMemoryLimit": "10M",
      "gpuMemoryRequest": "10M",
      "gpuPortionLimit": 0.5,
      "gpuPortionRequest": 0.5,
      "gpuRequestType": "portion"
    },
    "environmentVariables": [
      {
        "name": "HOME",
        "value": "/home/my-folder",
        "secret": {
          "name": "postgress_secret",
          "key": "POSTGRES_PASSWORD"
        },
        "configMap": {
          "name": "my-config-map",
          "key": "MY_POSTGRES_SCHEMA"
        },
        "podFieldRef": {
          "path": "metadata.name"
        },
        "userCredential": {
          "name": "my_postgres_user_and_password",
          "key": "POSTGRES_PASSWORD"
        },
        "exclude": false,
        "description": "Home directory of the user."
      }
    ],
    "image": "python:3.8",
    "imagePullPolicy": "Always",
    "imagePullSecrets": [
      {
        "name": "text",
        "userCredential": true,
        "exclude": false
      }
    ],
    "labels": [
      {
        "name": "stage",
        "value": "initial-research",
        "exclude": false
      }
    ],
    "modelStore": {
      "nimCache": {
        "name": "nim-cache-a",
        "profile": "tensorrt_llm-b200-fp8-tp2-pp1-latency-2901:10de-2"
      },
      "pvc": {
        "existingPvc": false,
        "claimName": "my-claim",
        "readOnly": false,
        "claimInfo": {
          "size": "1G",
          "storageClass": "my-storage-class",
          "accessModes": {
            "readWriteOnce": true,
            "readOnlyMany": false,
            "readWriteMany": false
          },
          "volumeMode": "Filesystem",
          "addedAttrValues": [
            {
              "key": "dnsname",
              "value": "my.dns.com"
            }
          ]
        }
      }
    },
    "multiNode": {
      "workers": 3
    },
    "ngcAuthSecret": "text",
    "nodePools": [
      "my-node-pool-a",
      "my-node-pool-b"
    ],
    "preemptibility": "preemptible",
    "priorityClass": "text",
    "probes": {
      "readiness": {
        "initialDelaySeconds": 1,
        "periodSeconds": 1,
        "timeoutSeconds": 1,
        "successThreshold": 1,
        "failureThreshold": 1,
        "handler": {
          "httpGet": {
            "path": "/",
            "port": 1,
            "host": "example.com",
            "scheme": "HTTP"
          }
        }
      }
    },
    "replicas": 2,
    "security": {
      "runAsGid": 30,
      "runAsUid": 500
    },
    "servingPort": {
      "serviceType": "ClusterIP",
      "port": 8000,
      "grpcPort": 8001,
      "metricsPort": 8002,
      "exposeExternally": true,
      "exposedUrl": "text",
      "exposedProtocol": "http"
    },
    "tolerations": [
      {
        "name": "text",
        "operator": "Equal",
        "key": "text",
        "value": "text",
        "effect": "NoSchedule",
        "seconds": 1,
        "exclude": false
      }
    ]
  },
  "fieldsToRemove": [
    "autoscaling",
    "environmentVariables"
  ]
}

{
  "metadata": {
    "name": "my-workload-name",
    "projectId": 1,
    "priority": "text",
    "category": "text",
    "preemptibility": "preemptible",
    "configuration": {
      "mnnvl": "None"
    },
    "id": "123e4567-e89b-12d3-a456-426614174000",
    "gvk": {
      "group": "apps",
      "version": "v1",
      "kind": "Deployment"
    },
    "projectName": "project-a",
    "clusterId": "71f69d83-ba66-4822-adf5-55ce55efd210",
    "tenantId": 1001,
    "departmentId": 2,
    "departmentName": "default",
    "createdAt": "2024-01-15T10:30:00Z",
    "createdBy": "[email protected]",
    "updatedAt": "2024-01-15T10:35:00Z",
    "updatedBy": "[email protected]",
    "deletedAt": "2024-01-15T10:35:00Z",
    "deletedBy": "[email protected]"
  },
  "desiredPhase": "Running",
  "spec": {
    "annotations": [
      {
        "name": "billing",
        "value": "my-billing-unit",
        "exclude": false
      }
    ],
    "autoscaling": {
      "maxReplicas": 1,
      "metric": "http_requests_total",
      "metricThreshold": 1,
      "minReplicas": 1,
      "scaleWindowSeconds": 1
    },
    "category": "text",
    "compute": {
      "cpuCoreLimit": 2,
      "cpuCoreRequest": 0.5,
      "cpuMemoryLimit": "30M",
      "cpuMemoryRequest": "20M",
      "gpuDevicesRequest": 1,
      "gpuMemoryLimit": "10M",
      "gpuMemoryRequest": "10M",
      "gpuPortionLimit": 0.5,
      "gpuPortionRequest": 0.5,
      "gpuRequestType": "portion"
    },
    "environmentVariables": [
      {
        "name": "HOME",
        "value": "/home/my-folder",
        "secret": {
          "name": "postgress_secret",
          "key": "POSTGRES_PASSWORD"
        },
        "configMap": {
          "name": "my-config-map",
          "key": "MY_POSTGRES_SCHEMA"
        },
        "podFieldRef": {
          "path": "metadata.name"
        },
        "userCredential": {
          "name": "my_postgres_user_and_password",
          "key": "POSTGRES_PASSWORD"
        },
        "exclude": false,
        "description": "Home directory of the user."
      }
    ],
    "image": "python:3.8",
    "imagePullPolicy": "Always",
    "imagePullSecrets": [
      {
        "name": "text",
        "userCredential": true,
        "exclude": false
      }
    ],
    "labels": [
      {
        "name": "stage",
        "value": "initial-research",
        "exclude": false
      }
    ],
    "modelStore": {
      "nimCache": {
        "name": "nim-cache-a",
        "profile": "tensorrt_llm-b200-fp8-tp2-pp1-latency-2901:10de-2"
      },
      "pvc": {
        "existingPvc": false,
        "claimName": "my-claim",
        "readOnly": false,
        "claimInfo": {
          "size": "1G",
          "storageClass": "my-storage-class",
          "accessModes": {
            "readWriteOnce": true,
            "readOnlyMany": false,
            "readWriteMany": false
          },
          "volumeMode": "Filesystem",
          "addedAttrValues": [
            {
              "key": "dnsname",
              "value": "my.dns.com"
            }
          ]
        }
      }
    },
    "multiNode": {
      "workers": 3
    },
    "ngcAuthSecret": "text",
    "nodePools": [
      "my-node-pool-a",
      "my-node-pool-b"
    ],
    "preemptibility": "preemptible",
    "priorityClass": "text",
    "probes": {
      "readiness": {
        "initialDelaySeconds": 1,
        "periodSeconds": 1,
        "timeoutSeconds": 1,
        "successThreshold": 1,
        "failureThreshold": 1,
        "handler": {
          "httpGet": {
            "path": "/",
            "port": 1,
            "host": "example.com",
            "scheme": "HTTP"
          }
        }
      }
    },
    "replicas": 2,
    "security": {
      "runAsGid": 30,
      "runAsUid": 500
    },
    "servingPort": {
      "serviceType": "ClusterIP",
      "port": 8000,
      "grpcPort": 8001,
      "metricsPort": 8002,
      "exposeExternally": true,
      "exposedUrl": "text",
      "exposedProtocol": "http"
    },
    "tolerations": [
      {
        "name": "text",
        "operator": "Equal",
        "key": "text",
        "value": "text",
        "effect": "NoSchedule",
        "seconds": 1,
        "exclude": false
      }
    ]
  }
}

PreviousWorkloads V2 NextWorkspaces

Last updated 3 days ago

Good night

hashtagCreate a NVIDIA NIM service. [Experimental]

hashtagGet a NVIDIA NIM service. [Experimental]

hashtagUpdate NVIDIA NIM service spec. [Experimental]

Create a NVIDIA NIM service. [Experimental]

Get a NVIDIA NIM service. [Experimental]

Update NVIDIA NIM service spec. [Experimental]