使用 Vllm 部署 Qwen2.5-Omni-7B
1. 安装 Vllm
pip install vllm vllm[audio] --pre --extra-index-url https://wheels.vllm.ai/nightly -U
2. 安装 transformers
pip install git+https://github.com/huggingface/transformers
3. 安装 accelerate
pip install accelerate
4. 启动 Qwen2.5-Omni-7B
CUDA_VISIBLE_DEVICES=3,1,0,2 \
VLLM_USE_V1=0 \
VLLM_WORKER_MULTIPROC_METHOD=spawn \
vllm serve Qwen/Qwen2.5-Omni-7B \
--trust-remote-code --served-model-name gpt-4o gpt-4 gpt-3.5-turbo o1 o1-mini o3-mini \
--dtype bfloat16 \
--gpu-memory-utilization 0.99 --tensor-parallel-size 4 \
--port 8000 --api-key sk-123456
5. 测试
curl http://192.168.31.15:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-123456" \
-d '{
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": [
{"type": "image_url", "image_url": {"url": "https://modelscope.oss-cn-beijing.aliyuncs.com/resource/qwen.png"}},
{"type": "audio_url", "audio_url": {"url": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2.5-Omni/cough.wav"}},
{"type": "text", "text": "What is the text in the illustrate ans what it the sound in the audio?"}
]}
]
}'
示例输出,
{
"id": "chatcmpl-6d5d3481e76c4416bfbd5526ff0f8588",
"object": "chat.completion",
"created": 1745636549,
"model": "gpt-4o",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"reasoning_content": null,
"content": "The text in the illustration is \"TONGYI\" and \"Qwen\". The sound in the audio is a cough.",
"tool_calls": []
},
"logprobs": null,
"finish_reason": "stop",
"stop_reason": null
}
],
"usage": {
"prompt_tokens": 156,
"total_tokens": 183,
"completion_tokens": 27,
"prompt_tokens_details": null
},
"prompt_logprobs": null
}