|
1 | 1 | {
|
2 | 2 | "versions": {
|
| 3 | + "0.8.2": { |
| 4 | + "imageName": "runpod/worker-v1-vllm:v2.2.0stable-cuda12.1.0", |
| 5 | + "minimumCudaVersion": "12.1", |
| 6 | + "categories": [ |
| 7 | + { |
| 8 | + "title": "LLM Settings", |
| 9 | + "settings": [ |
| 10 | + "TOKENIZER", "TOKENIZER_MODE", "SKIP_TOKENIZER_INIT", "TRUST_REMOTE_CODE", |
| 11 | + "DOWNLOAD_DIR", "LOAD_FORMAT", "DTYPE", "KV_CACHE_DTYPE", "QUANTIZATION_PARAM_PATH", |
| 12 | + "MAX_MODEL_LEN", "GUIDED_DECODING_BACKEND", "DISTRIBUTED_EXECUTOR_BACKEND", |
| 13 | + "WORKER_USE_RAY", "RAY_WORKERS_USE_NSIGHT", "PIPELINE_PARALLEL_SIZE", |
| 14 | + "TENSOR_PARALLEL_SIZE", "MAX_PARALLEL_LOADING_WORKERS", "ENABLE_PREFIX_CACHING", |
| 15 | + "DISABLE_SLIDING_WINDOW", "NUM_LOOKAHEAD_SLOTS", |
| 16 | + "SEED", "NUM_GPU_BLOCKS_OVERRIDE", "MAX_NUM_BATCHED_TOKENS", "MAX_NUM_SEQS", |
| 17 | + "MAX_LOGPROBS", "DISABLE_LOG_STATS", "QUANTIZATION", "ROPE_SCALING", "ROPE_THETA", |
| 18 | + "TOKENIZER_POOL_SIZE", "TOKENIZER_POOL_TYPE", "TOKENIZER_POOL_EXTRA_CONFIG", |
| 19 | + "ENABLE_LORA", "MAX_LORAS", "MAX_LORA_RANK", "LORA_EXTRA_VOCAB_SIZE", |
| 20 | + "LORA_DTYPE", "LONG_LORA_SCALING_FACTORS", "MAX_CPU_LORAS", "FULLY_SHARDED_LORAS", |
| 21 | + "DEVICE", "SCHEDULER_DELAY_FACTOR", "ENABLE_CHUNKED_PREFILL", "SPECULATIVE_MODEL", |
| 22 | + "NUM_SPECULATIVE_TOKENS", "SPECULATIVE_DRAFT_TENSOR_PARALLEL_SIZE", |
| 23 | + "SPECULATIVE_MAX_MODEL_LEN", "SPECULATIVE_DISABLE_BY_BATCH_SIZE", |
| 24 | + "NGRAM_PROMPT_LOOKUP_MAX", "NGRAM_PROMPT_LOOKUP_MIN", "SPEC_DECODING_ACCEPTANCE_METHOD", |
| 25 | + "TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_THRESHOLD", "TYPICAL_ACCEPTANCE_SAMPLER_POSTERIOR_ALPHA", |
| 26 | + "MODEL_LOADER_EXTRA_CONFIG", "PREEMPTION_MODE", "PREEMPTION_CHECK_PERIOD", |
| 27 | + "PREEMPTION_CPU_CAPACITY", "MAX_LOG_LEN", "DISABLE_LOGGING_REQUEST", |
| 28 | + "ENABLE_AUTO_TOOL_CHOICE", "TOOL_CALL_PARSER" |
| 29 | + ] |
| 30 | + }, |
| 31 | + { |
| 32 | + "title": "Tokenizer Settings", |
| 33 | + "settings": [ |
| 34 | + "TOKENIZER_NAME", "TOKENIZER_REVISION", "CUSTOM_CHAT_TEMPLATE" |
| 35 | + ] |
| 36 | + }, |
| 37 | + { |
| 38 | + "title": "System Settings", |
| 39 | + "settings": [ |
| 40 | + "GPU_MEMORY_UTILIZATION", "MAX_PARALLEL_LOADING_WORKERS", "BLOCK_SIZE", |
| 41 | + "SWAP_SPACE", "ENFORCE_EAGER", "MAX_SEQ_LEN_TO_CAPTURE", "DISABLE_CUSTOM_ALL_REDUCE" |
| 42 | + ] |
| 43 | + }, |
| 44 | + { |
| 45 | + "title": "Streaming Settings", |
| 46 | + "settings": [ |
| 47 | + "DEFAULT_BATCH_SIZE", "DEFAULT_MIN_BATCH_SIZE", "DEFAULT_BATCH_SIZE_GROWTH_FACTOR" |
| 48 | + ] |
| 49 | + }, |
| 50 | + { |
| 51 | + "title": "OpenAI Settings", |
| 52 | + "settings": [ |
| 53 | + "RAW_OPENAI_OUTPUT", "OPENAI_RESPONSE_ROLE", "OPENAI_SERVED_MODEL_NAME_OVERRIDE" |
| 54 | + ] |
| 55 | + }, |
| 56 | + { |
| 57 | + "title": "Serverless Settings", |
| 58 | + "settings": [ |
| 59 | + "MAX_CONCURRENCY", "DISABLE_LOG_STATS", "DISABLE_LOG_REQUESTS" |
| 60 | + ] |
| 61 | + } |
| 62 | + ] |
| 63 | + }, |
3 | 64 | "0.7.3": {
|
4 | 65 | "imageName": "runpod/worker-v1-vllm:v2.1.0stable-cuda12.1.0",
|
5 | 66 | "minimumCudaVersion": "12.1",
|
|
0 commit comments