resources:
# 1x A10 GPU
accelerators: A10:1
# Open port 8000
ports: [8000]
envs:
# set the environment variable MY_ENV_VAR to my-value
MY_ENV_VAR: my-value
# Copy the contents of the current directory onto the remote machine
workdir: .
# Typical use: pip install -r requirements.txt
# Invoked under the workdir (ie. can use its files)
setup: |
echo "Running setup operations"
# Typical uses:
# torchserve ..
# python -u -m vllm.entrypoints.openai.api_server ...
# Invoked under the workdir (ie. can use its files)
run: |
python -m http.server --port 8000
service:
replica_policy:
min_replicas: 1
max_replicas: 5
# When the average QPS (queries per second) per replica goes above this number,
# Komodo will dynamically scale up the number of replicas running your service,
# up to a maximum of max_replicas.
# Similarly, when the average QPS per replica goes below this number, Komodo will
# scale down the number of replicas running your service, down to a minimum of min_replicas.
target_qps_per_replica: 10
readiness_probe:
# this is the endpoint path within your service that Komodo uses to check if your service is running
path: /