前言
ComfyUI 是一个为 Stable Diffusion 专门设计的基于节点的图形用户界面(GUI)。它使用户能够通过链接不同的块(称为节点)来构建复杂的图像生成工作流程。这些节点可以包括各种任务,如加载检查点模型、输入提示、指定采样器等。
ComfyUI 提供了极高的自由度和灵活性,支持高度的定制化和工作流复用,同时对系统配置的要求较低,并且能够加快原始图像的生成速度。然而,由于它拥有众多的插件节点,以及较为复杂的操作流程,学习起来相对困难。
另一方面,WebUI 特点是拥有固定的操作界面,使得其易于学习和快速上手。经过一年多的发展,它已经建立了一个成熟且稳定的开源生态系统。但相比 ComfyUI,在性能方面可能稍逊一筹,且在工作流程复制方面存在局限性,用户需在每次操作时手动进行设置。
一、平台环境准备
镜像选择:pytorch:v24.10-torch2.4.0-torchmlu1.23.1-ubuntu22.04-py310
【请注意仔细查看自己的镜像版本,老版本改法,请查阅之前文章】
卡选择:任意一款MLU3系列及以上卡
二、环境配置
comfyui安装
comfyui-v0.3.0.tar.gz 私信获取该包
diffusers_mlu-0.3.0+diffusers0.30.1-py3-none-any.whl 私信获取该包
git clone -bv0.2.2 https://githubfast.com/comfyanonymous/ComfyUI.git #拉取官方版本的v0.2.0
tar -zxvf comfyui-v0.3.0.tar.gz #解压私信后的补丁包
cp comfyui_0.2.2_mlu0.3.0.patch ComfyUI/ #复制到项目中
git apply comfyui_0.2.2_mlu0.3.0.patch #打补丁
pip install -r requirements.txt
pip install diffusers_mlu-0.3.0+diffusers0.30.1-py3-none-any.whl
三 、模型下载
#flux模型下载
git-lfs clone https://www.modelscope.cn/black-forest-labs/FLUX.1-dev.git models/diffusion_models/FLUX.1-schnell
#clip模型下载
git clone https://www.modelscope.cn/livehouse/clip_l.git models/clip/clip_l
git clone https://www.modelscope.cn/muse/t5xxl_fp16.git models/clip/t5xxl_fp16
#ae模型下载
git-lfs clone https://www.modelscope.cn/Kidoai/ae.git models/vae/ae
四、配置comfyui流
python main.py --listen --port 8998 --force-fp16 --tmo --fp16-vae #打开服务后
浏览器 http://0.0.0.0:8998
删除所有的组件,导入以下json
{
"last_node_id": 23,
"last_link_id": 18,
"nodes": [
{
"id": 21,
"type": "VAEDecode",
"pos": {
"0": 1398,
"1": 409
},
"size": {
"0": 210,
"1": 46
},
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "samples",
"type": "LATENT",
"link": 17
},
{
"name": "vae",
"type": "VAE",
"link": 16
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
18
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "VAEDecode"
}
},
{
"id": 14,
"type": "UNETLoader",
"pos": {
"0": 237,
"1": 403
},
"size": {
"0": 315,
"1": 82
},
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "MODEL",
"type": "MODEL",
"links": [
12
],
"shape": 3
}
],
"properties": {
"Node name for S&R": "UNETLoader"
},
"widgets_values": [
"FLUX.1-dev/flux1-dev.safetensors",
"default"
]
},
{
"id": 20,
"type": "EmptyLatentImage",
"pos": {
"0": 1027,
"1": 295
},
"size": {
"0": 315,
"1": 106
},
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
15
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "EmptyLatentImage"
},
"widgets_values": [
720,
1024,
1
]
},
{
"id": 17,
"type": "CLIPTextEncode",
"pos": {
"0": 593,
"1": 513
},
"size": {
"0": 360.86590576171875,
"1": 97.03411102294922
},
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 10
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
13
],
"shape": 3
}
],
"properties": {
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
"A cat holding a sign that says hello world"
]
},
{
"id": 18,
"type": "CLIPTextEncode",
"pos": {
"0": 661,
"1": 727
},
"size": {
"0": 210,
"1": 76
},
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "clip",
"type": "CLIP",
"link": 11
}
],
"outputs": [
{
"name": "CONDITIONING",
"type": "CONDITIONING",
"links": [
14
],
"shape": 3
}
],
"properties": {
"Node name for S&R": "CLIPTextEncode"
},
"widgets_values": [
""
]
},
{
"id": 15,
"type": "DualCLIPLoader",
"pos": {
"0": 232,
"1": 565
},
"size": {
"0": 315,
"1": 106
},
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "CLIP",
"type": "CLIP",
"links": [
10,
11
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "DualCLIPLoader"
},
"widgets_values": [
"t5xxl_fp16.safetensors",
"clip_l/clip_l.safetensors",
"flux"
]
},
{
"id": 22,
"type": "SaveImage",
"pos": {
"0": 1394,
"1": 549
},
"size": {
"0": 315,
"1": 270
},
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 18
}
],
"outputs": [],
"properties": {},
"widgets_values": [
"ComfyUI"
]
},
{
"id": 19,
"type": "KSampler",
"pos": {
"0": 1022,
"1": 492
},
"size": {
"0": 315,
"1": 262
},
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "model",
"type": "MODEL",
"link": 12
},
{
"name": "positive",
"type": "CONDITIONING",
"link": 13
},
{
"name": "negative",
"type": "CONDITIONING",
"link": 14
},
{
"name": "latent_image",
"type": "LATENT",
"link": 15
}
],
"outputs": [
{
"name": "LATENT",
"type": "LATENT",
"links": [
17
],
"slot_index": 0,
"shape": 3
}
],
"properties": {
"Node name for S&R": "KSampler"
},
"widgets_values": [
142384532935922,
"randomize",
50,
8,
"euler",
"simple",
1
]
},
{
"id": 16,
"type": "VAELoader",
"pos": {
"0": 232,
"1": 753
},
"size": {
"0": 315,
"1": 58
},
"flags": {},
"order": 3,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VAE",
"type": "VAE",
"links": [
16
],
"shape": 3
}
],
"properties": {
"Node name for S&R": "VAELoader"
},
"widgets_values": [
"ae/flux-ae.safetensors"
]
}
],
"links": [
[
10,
15,
0,
17,
0,
"CLIP"
],
[
11,
15,
0,
18,
0,
"CLIP"
],
[
12,
14,
0,
19,
0,
"MODEL"
],
[
13,
17,
0,
19,
1,
"CONDITIONING"
],
[
14,
18,
0,
19,
2,
"CONDITIONING"
],
[
15,
20,
0,
19,
3,
"LATENT"
],
[
16,
16,
0,
21,
1,
"VAE"
],
[
17,
19,
0,
21,
0,
"LATENT"
],
[
18,
21,
0,
22,
0,
"IMAGE"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 1.6105100000000014,
"offset": [
-167.00672753282976,
-130.94025196722782
]
}
},
"version": 0.4
}
点击queue prompt即可进行推理
输出结果
五、api调用comfyui
打开开发者api
点击之后将我们的apijson文件命名为text_to_image.json
ws api调用
以下代码
#This is an example that uses the websockets api to know when a prompt execution is done
#Once the prompt execution is done it downloads the images using the /history endpoint
import websocket #NOTE: websocket-client (https://github.com/websocket-client/websocket-client)
import uuid
import json
import urllib.request
import urllib.parse
server_address = "0.0.0.0:8998"
# server_address = "localhost:23004"
client_id = str(uuid.uuid4())
def queue_prompt(prompt):
p = {"prompt": prompt, "client_id": client_id}
data = json.dumps(p).encode('utf-8')
print(data)
req = urllib.request.Request("http://{}/prompt".format(server_address), data=data)
return json.loads(urllib.request.urlopen(req).read())
def get_image(filename, subfolder, folder_type):
data = {"filename": filename, "subfolder": subfolder, "type": folder_type}
url_values = urllib.parse.urlencode(data)
with urllib.request.urlopen("http://{}/view?{}".format(server_address, url_values)) as response:
return response.read()
def get_history(prompt_id):
with urllib.request.urlopen("http://{}/history/{}".format(server_address, prompt_id)) as response:
return json.loads(response.read())
def get_images(ws, prompt):
prompt_id = queue_prompt(prompt)['prompt_id']
output_images = {}
while True:
out = ws.recv()
if isinstance(out, str):
message = json.loads(out)
if message['type'] == 'executing':
data = message['data']
if data['node'] is None and data['prompt_id'] == prompt_id:
break #Execution is done
else:
# If you want to be able to decode the binary stream for latent previews, here is how you can do it:
# bytesIO = BytesIO(out[8:])
# preview_image = Image.open(bytesIO) # This is your preview in PIL image format, store it in a global
continue #previews are binary data
history = get_history(prompt_id)[prompt_id]
for node_id in history['outputs']:
node_output = history['outputs'][node_id]
images_output = []
if 'images' in node_output:
for image in node_output['images']:
image_data = get_image(image['filename'], image['subfolder'], image['type'])
images_output.append(image_data)
output_images[node_id] = images_output
return output_images
def text_to_image(text):
with open("/workspace/volume/guojunceshi2/yanshicomfyui/ComfyUI/text_to_images.json", 'r', encoding='utf-8') as f:
prompt_json = json.load(f)
print(prompt_json)
prompt_json["17"]["inputs"]["text"] = text
ws = websocket.WebSocket()
ws.connect("ws://{}/ws?clientId={}".format(server_address, client_id))
images = get_images(ws, prompt_json)
ws.close() # for in case this example is used in an environment where it will be repeatedly called, like in a Gradio app. otherwise, you'll randomly receive connection timeouts
return images
# 调用函数并保存图像
images = text_to_image("A cat holding a sign that says hello world")
# 保存图像
for node_id in images:
for image_data in images[node_id]:
from PIL import Image
import io
image = Image.open(io.BytesIO(image_data))
image.save(f"output_{node_id}.png")
关键点解读
server_address 变量改成我们comfyui启动的端口
#61行改成刚才我们到处的json
#64行对应我们json的inputs
直接运行Python文件,即可在本地输出图片