openai/gpt-oss-20b · no able to deploy using vLLM on A10G

INFO 08-06 07:25:38 [ray_env.py:68] If certain env vars should NOT be copied, add them to /home/ubuntu/.config/vllm/ray_non_carry_over_env_vars.json file
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003914) W0806 07:25:40.697000 1003914 vllm_env312/lib/python3.12/site-packages/torch/utils/cpp_extension.py:2425] TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation.
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003914) W0806 07:25:40.697000 1003914 vllm_env312/lib/python3.12/site-packages/torch/utils/cpp_extension.py:2425] If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'] to specific architectures.
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] EngineCore failed to start.
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] Traceback (most recent call last):
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 709, in run_engine_core
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] engine_core = EngineCoreProc(*args, **kwargs)
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 510, in init
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] super().init(vllm_config, executor_class, log_stats,
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 82, in init
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] self.model_executor = executor_class(vllm_config)
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 264, in init
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] super().init(*args, **kwargs)
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 54, in init
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] self._init_executor()
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/executor/ray_distributed_executor.py", line 49, in _init_executor
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] super()._init_executor()
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/executor/ray_distributed_executor.py", line 107, in _init_executor
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] self._init_workers_ray(placement_group)
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/executor/ray_distributed_executor.py", line 377, in _init_workers_ray
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] self._run_workers("init_device")
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/executor/ray_distributed_executor.py", line 503, in _run_workers
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] ray_worker_outputs = ray.get(ray_worker_outputs)
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] return fn(*args, **kwargs)
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] ^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] return func(*args, **kwargs)
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/ray/_private/worker.py", line 2858, in get
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/ray/_private/worker.py", line 958, in get_objects
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] raise value.as_instanceof_cause()
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] ray.exceptions.RayTaskError(AcceleratorError): ray::RayWorkerWrapper.execute_method() (pid=1003915, ip=1or_id=ff905be7fb13191ce0be511601000000, repr=<vllm.executor.ray_utils.RayWorkerWrapper object at 0x7307aee3b500>)
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/worker/worker_base.py", line 620, in execute_method
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] raise e
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/worker/worker_base.py", line 611, in execute_method
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] return run_method(self, method, args, kwargs)
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/utils/init.py", line 2948, in run_method
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] return func(*args, **kwargs)
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/worker/worker_base.py", line 603, in init_device
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] self.worker.init_device() # type: ignore
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] ^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 166, in init_device
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] current_platform.set_device(self.device)
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/platforms/cuda.py", line 80, in set_device
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] torch.cuda.set_device(device)
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/torch/cuda/init.py", line 567, in set_device
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] torch._C._cuda_setDevice(device)
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] torch.AcceleratorError: CUDA error: invalid device ordinal
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] GPU device may be out of range, do you have enough GPUs?
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] For debugging consider passing CUDA_LAUNCH_BLOCKING=1
(EngineCore_0 pid=1003529) ERROR 08-06 07:25:42 [core.py:718] Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.
(EngineCore_0 pid=1003529) Process EngineCore_0:
(EngineCore_0 pid=1003529) Traceback (most recent call last):
(EngineCore_0 pid=1003529) File "/usr/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
(EngineCore_0 pid=1003529) self.run()
(EngineCore_0 pid=1003529) File "/usr/lib/python3.12/multiprocessing/process.py", line 108, in run
(EngineCore_0 pid=1003529) self._target(*self._args, **self._kwargs)
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 722, in run_engine_core
(EngineCore_0 pid=1003529) raise e
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 709, in run_engine_core
(EngineCore_0 pid=1003529) engine_core = EngineCoreProc(*args, **kwargs)
(EngineCore_0 pid=1003529) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 510, in init
(EngineCore_0 pid=1003529) super().init(vllm_config, executor_class, log_stats,
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/engine/core.py", line 82, in init
(EngineCore_0 pid=1003529) self.model_executor = executor_class(vllm_config)
(EngineCore_0 pid=1003529) ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 264, in init
(EngineCore_0 pid=1003529) super().init(*args, **kwargs)
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/executor/executor_base.py", line 54, in init
(EngineCore_0 pid=1003529) self._init_executor()
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/executor/ray_distributed_executor.py", line 49, in _init_executor
(EngineCore_0 pid=1003529) super()._init_executor()
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/executor/ray_distributed_executor.py", line 107, in _init_executor
(EngineCore_0 pid=1003529) self._init_workers_ray(placement_group)
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/executor/ray_distributed_executor.py", line 377, in _init_workers_ray
(EngineCore_0 pid=1003529) self._run_workers("init_device")
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/executor/ray_distributed_executor.py", line 503, in _run_workers
(EngineCore_0 pid=1003529) ray_worker_outputs = ray.get(ray_worker_outputs)
(EngineCore_0 pid=1003529) ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
(EngineCore_0 pid=1003529) return fn(*args, **kwargs)
(EngineCore_0 pid=1003529) ^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
(EngineCore_0 pid=1003529) return func(*args, **kwargs)
(EngineCore_0 pid=1003529) ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/ray/_private/worker.py", line 2858, in get
(EngineCore_0 pid=1003529) values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
(EngineCore_0 pid=1003529) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/ray/_private/worker.py", line 958, in get_objects
(EngineCore_0 pid=1003529) raise value.as_instanceof_cause()
(EngineCore_0 pid=1003529) ray.exceptions.RayTaskError(AcceleratorError): ray::RayWorkerWrapper.execute_method() (pid=1003915, ip=172., actor_id=ff905be7fb13191ce0be511601000000, repr=<vllm.executor.ray_utils.RayWorkerWrapper object at 0x7307aee3b500>)
(EngineCore_0 pid=1003529) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/worker/worker_base.py", line 620, in execute_method
(EngineCore_0 pid=1003529) raise e
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/worker/worker_base.py", line 611, in execute_method
(EngineCore_0 pid=1003529) return run_method(self, method, args, kwargs)
(EngineCore_0 pid=1003529) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/utils/init.py", line 2948, in run_method
(EngineCore_0 pid=1003529) return func(*args, **kwargs)
(EngineCore_0 pid=1003529) ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/worker/worker_base.py", line 603, in init_device
(EngineCore_0 pid=1003529) self.worker.init_device() # type: ignore
(EngineCore_0 pid=1003529) ^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 166, in init_device
(EngineCore_0 pid=1003529) current_platform.set_device(self.device)
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/platforms/cuda.py", line 80, in set_device
(EngineCore_0 pid=1003529) torch.cuda.set_device(device)
(EngineCore_0 pid=1003529) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/torch/cuda/init.py", line 567, in set_device
(EngineCore_0 pid=1003529) torch._C._cuda_setDevice(device)
(EngineCore_0 pid=1003529) torch.AcceleratorError: CUDA error: invalid device ordinal
(EngineCore_0 pid=1003529) GPU device may be out of range, do you have enough GPUs?
(EngineCore_0 pid=1003529) CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
(EngineCore_0 pid=1003529) For debugging consider passing CUDA_LAUNCH_BLOCKING=1
(EngineCore_0 pid=1003529) Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.
(EngineCore_0 pid=1003529) INFO 08-06 07:25:42 [ray_distributed_executor.py:120] Shutting down Ray distributed executor. If you see error log from logging.cc regarding SIGTERM received, please ignore because this is the expected termination process in Ray.
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) W0806 07:25:40.697000 1003915 vllm_env312/lib/python3.12/site-packages/torch/utils/cpp_extension.py:2425] TORCH_CUDA_ARCH_LIST is not set, all archs for visible cards are included for compilation.
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) W0806 07:25:40.697000 1003915 vllm_env312/lib/python3.12/site-packages/torch/utils/cpp_extension.py:2425] If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'] to specific architectures.
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] Error executing method 'init_device'. This might cause deadlock in distributed execution.
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] Traceback (most recent call last):
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/worker/worker_base.py", line 611, in execute_method
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] return run_method(self, method, args, kwargs)
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/utils/init.py", line 2948, in run_method
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] return func(*args, **kwargs)
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/ray/util/tracing/tracing_helper.py", line 461, in _resume_span
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] return method(self, *_args, **_kwargs)
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/worker/worker_base.py", line 603, in init_device
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] self.worker.init_device() # type: ignore
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] ^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/worker/gpu_worker.py", line 166, in init_device
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] current_platform.set_device(self.device)
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/platforms/cuda.py", line 80, in set_device
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] torch.cuda.set_device(device)
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/torch/cuda/init.py", line 567, in set_device
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] torch._C._cuda_setDevice(device)
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] torch.AcceleratorError: CUDA error: invalid device ordinal
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] GPU device may be out of range, do you have enough GPUs?
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] For debugging consider passing CUDA_LAUNCH_BLOCKING=1
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619] Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.
(EngineCore_0 pid=1003529) (RayWorkerWrapper pid=1003915) ERROR 08-06 07:25:42 [worker_base.py:619]
(EngineCore_0 pid=1003529) (pid=1003915) INFO 08-06 07:25:37 [init.py:241] Automatically detected platform cuda.
(APIServer pid=1003221) Traceback (most recent call last):
(APIServer pid=1003221) File "", line 198, in _run_module_as_main
(APIServer pid=1003221) File "", line 88, in _run_code
(APIServer pid=1003221) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 1895, in
(APIServer pid=1003221) uvloop.run(run_server(args))
(APIServer pid=1003221) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/uvloop/init.py", line 109, in run
(APIServer pid=1003221) return __asyncio.run(
(APIServer pid=1003221) ^^^^^^^^^^^^^^
(APIServer pid=1003221) File "/usr/lib/python3.12/asyncio/runners.py", line 195, in run
(APIServer pid=1003221) return runner.run(main)
(APIServer pid=1003221) ^^^^^^^^^^^^^^^^
(APIServer pid=1003221) File "/usr/lib/python3.12/asyncio/runners.py", line 118, in run
(APIServer pid=1003221) return self._loop.run_until_complete(task)
(APIServer pid=1003221) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1003221) File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
(APIServer pid=1003221) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/uvloop/init.py", line 61, in wrapper
(APIServer pid=1003221) return await main
(APIServer pid=1003221) ^^^^^^^^^^
(APIServer pid=1003221) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 1827, in run_server
(APIServer pid=1003221) await run_server_worker(listen_address, sock, args, **uvicorn_kwargs)
(APIServer pid=1003221) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 1847, in run_server_worker
(APIServer pid=1003221) async with build_async_engine_client(
(APIServer pid=1003221) ^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1003221) File "/usr/lib/python3.12/contextlib.py", line 210, in aenter
(APIServer pid=1003221) return await anext(self.gen)
(APIServer pid=1003221) ^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1003221) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 167, in build_async_engine_client
(APIServer pid=1003221) async with build_async_engine_client_from_engine_args(
(APIServer pid=1003221) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1003221) File "/usr/lib/python3.12/contextlib.py", line 210, in aenter
(APIServer pid=1003221) return await anext(self.gen)
(APIServer pid=1003221) ^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1003221) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 209, in build_async_engine_client_from_engine_args
(APIServer pid=1003221) async_llm = AsyncLLM.from_vllm_config(
(APIServer pid=1003221) ^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1003221) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/utils/init.py", line 1520, in inner
(APIServer pid=1003221) return fn(*args, **kwargs)
(APIServer pid=1003221) ^^^^^^^^^^^^^^^^^^^
(APIServer pid=1003221) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/engine/async_llm.py", line 173, in from_vllm_config
(APIServer pid=1003221) return cls(
(APIServer pid=1003221) ^^^^
(APIServer pid=1003221) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/engine/async_llm.py", line 119, in init
(APIServer pid=1003221) self.engine_core = EngineCoreClient.make_async_mp_client(
(APIServer pid=1003221) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1003221) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 101, in make_async_mp_client
(APIServer pid=1003221) return AsyncMPClient(*client_args)
(APIServer pid=1003221) ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1003221) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 733, in init
(APIServer pid=1003221) super().init(
(APIServer pid=1003221) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/engine/core_client.py", line 421, in init
(APIServer pid=1003221) with launch_core_engines(vllm_config, executor_class,
(APIServer pid=1003221) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=1003221) File "/usr/lib/python3.12/contextlib.py", line 144, in exit
(APIServer pid=1003221) next(self.gen)
(APIServer pid=1003221) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/engine/utils.py", line 697, in launch_core_engines
(APIServer pid=1003221) wait_for_engine_startup(
(APIServer pid=1003221) File "/home/ubuntu/vllm_env312/lib/python3.12/site-packages/vllm/v1/engine/utils.py", line 750, in wait_for_engine_startup
(APIServer pid=1003221) raise RuntimeError("Engine core initialization failed. "
(APIServer pid=1003221) RuntimeError: Engine core initialization failed. See root cause above. Failed core proc(s): {}