图像生成与操作:GANs的进阶应用
立即解锁
发布时间: 2025-09-01 01:19:30 阅读量: 5 订阅数: 17 AIGC 

### 图像生成与操作:GANs的进阶应用
#### 1. 条件GANs实现特定类别图像生成
在图像生成任务中,我们有时希望能够生成特定类别的图像,比如猫、狗或戴眼镜的人。条件GANs(Conditional GANs)可以帮助我们实现这一目标。下面我们以生成男性和女性面部图像为例,介绍条件GANs的实现步骤。
##### 1.1 策略概述
实现特定类别图像生成的策略如下:
1. 将想要生成的图像标签进行one-hot编码。
2. 将标签通过嵌入层,生成每个类别的多维表示。
3. 生成随机噪声,并与上一步生成的嵌入层进行拼接。
4. 像之前一样训练模型,但这次使用拼接了图像类别嵌入的噪声向量。
##### 1.2 代码实现
以下是实现上述策略的代码:
```python
# 1. 导入图像和相关包
!wget https://www.dropbox.com/s/rbajpdlh7efkdo1/male_female_face_images.zip
!unzip male_female_face_images.zip
!pip install -q --upgrade torch_snippets
from torch_snippets import *
device = "cuda" if torch.cuda.is_available() else "cpu"
from torchvision.utils import make_grid
from torch_snippets import *
from PIL import Image
import torchvision
from torchvision import transforms
import torchvision.utils as vutils
# 2. 创建数据集和数据加载器
# i. 存储男性和女性图像路径
female_images = Glob('/content/females/*.jpg')
male_images = Glob('/content/males/*.jpg')
# ii. 下载级联滤波器以裁剪图像,只保留面部
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
# iii. 创建新文件夹并保存裁剪后的面部图像
!mkdir cropped_faces_females
!mkdir cropped_faces_males
def crop_images(folder):
images = Glob(folder+'/*.jpg')
for i in range(len(images)):
img = read(images[i],1)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, 1.3, 5)
for (x,y,w,h) in faces:
img2 = img[y:(y+h),x:(x+w),:]
cv2.imwrite('cropped_faces_'+folder+'/'+ str(i)+'.jpg',cv2.cvtColor(img2, cv2.COLOR_RGB2BGR))
crop_images('females')
crop_images('males')
# iv. 指定图像转换
transform=transforms.Compose([
transforms.Resize(64),
transforms.CenterCrop(64),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# v. 创建Faces数据集类
class Faces(Dataset):
def __init__(self, folders):
super().__init__()
self.folderfemale = folders[0]
self.foldermale = folders[1]
self.images = sorted(Glob(self.folderfemale)) + sorted(Glob(self.foldermale))
def __len__(self):
return len(self.images)
def __getitem__(self, ix):
image_path = self.images[ix]
image = Image.open(image_path)
image = transform(image)
gender = np.where('female' in image_path,1,0)
return image, torch.tensor(gender).long()
# vi. 定义数据集和数据加载器
ds = Faces(folders=['cropped_faces_females', 'cropped_faces_males'])
dataloader = DataLoader(ds, batch_size=64, shuffle=True, num_workers=8)
# 3. 定义权重初始化方法
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
nn.init.normal_(m.weight.data, 0.0, 0.02)
elif classname.find('BatchNorm') != -1:
nn.init.normal_(m.weight.data, 1.0, 0.02)
nn.init.constant_(m.bias.data, 0)
# 4. 定义判别器模型类
# i. 定义模型架构
class Discriminator(nn.Module):
def __init__(self, emb_size=32):
super(Discriminator, self).__init__()
self.emb_size = 32
self.label_embeddings = nn.Embedding(2, self.emb_size)
self.model = nn.Sequential(
nn.Conv2d(3,64,4,2,1,bias=False),
nn.LeakyReLU(0.2,inplace=True),
nn.Conv2d(64,64*2,4,2,1,bias=False),
nn.BatchNorm2d(64*2),
nn.LeakyReLU(0.2,inplace=True),
nn.Conv2d(64*2,64*4,4,2,1,bias=False),
nn.BatchNorm2d(64*4),
nn.LeakyReLU(0.2,inplace=True),
nn.Conv2d(64*4,64*8,4,2,1,bias=False),
nn.BatchNorm2d(64*8),
nn.LeakyReLU(0.2,inplace=True),
nn.Conv2d(64*8,64,4,2,1,bias=False),
nn.BatchNorm2d(64),
nn.LeakyReLU(0.2,inplace=True),
nn.Flatten()
)
self.model2 = nn.Sequential(
nn.Linear(288,100),
nn.LeakyReLU(0.2,inplace=True),
nn.Linear(100,1),
nn.Sigmoid()
)
self.apply(weights_init)
# ii. 定义前向传播方法
def forward(self, input, labels):
x = self.model(input)
y = self.label_embeddings(labels)
input = torch.cat([x, y], 1)
final_output = self.model2(input)
return final_output
# iii. 获取模型摘要
!pip install torch_summary
from torchsummary import summary
discriminator = Discriminator().to(device)
summary(discriminator,torch.zeros(32,3,64,64).to(device), torch.zeros(32).long().to(device));
# 5. 定义生成器网络类
# i. 定义__init__方法
class Generator(nn.Module):
def __init__(self, emb_size=32):
super(Generator,self).__init__()
self.emb_size = emb_size
self.label_embeddings = nn.Embedding(2, self.emb_size)
self.model = nn.Sequential(
nn.ConvTranspose2d(100+self.emb_size, 64*8,4,1,0,bias=False),
nn.BatchNorm2d(64*8),
nn.ReLU(True),
nn.ConvTranspose2d(64*8,64*4,4,2,1,bias=False),
nn.BatchNorm2d(64*4),
nn.ReLU(True),
nn.ConvTranspose2d(64*4,64*2,4,2,1,bias=False),
nn.BatchNorm2d(64*2),
nn.ReLU(True),
nn.ConvTranspose2d(64*2,64,4,2,1,bias=False),
nn.BatchNorm2d(64),
nn.ReLU(True),
nn.ConvTranspose2d(64,3,4,2,1,bias=False),
nn.Tanh()
)
# ii. 应用权重初始化
self.apply(weights_init)
# iii. 定义前向传播方法
def forward(self,input_noise,labels):
label_embeddings = self.label_embeddings(labels).view(len(labels), self.emb_size,1, 1)
input = torch.cat([input_noise, label_embeddings], 1)
return self.model(input)
# iv. 获取生成器函数摘要
generator = Generator().to(device)
summary(generator,torch.zeros(32,100,1,1).to(device), torch.zeros(32).long().to(device));
# 6. 定义生成随机噪声的函数
def noise(size):
n = torch.randn(size, 100, 1, 1, device=device)
return n.to(device)
# 7. 定义训练判别器的函数
def discriminator_train_step(real_data, real_labels, fake_data, fake_labels):
d_optimizer.zero_grad()
prediction_real = discriminator(real_data, real_labels)
error_real = loss(prediction_real, torch.ones(len(real_data),1).to(device))
error_real.backward()
prediction_fake = discriminator(fake_data, fake_labels)
error_fake = loss(prediction_fake, torch.zeros(len(fake_data),1).to(device))
error_fake.backward()
d_optimizer.step()
return error_real + error_fake
# 8. 定义训练生成器的步骤
def generator_train_step(fake_data, fake_labels):
g_optimizer.zero_grad()
prediction = discriminator(fake_data, fake_labels)
error = loss(prediction, torch.ones(len(fake_data), 1).to(device))
error.backward()
g_optimizer.step()
return error
# 9. 定义生成器和判别器模型对象、损失优化器和损失函数
discriminator = Discriminator().to(device)
generator = Generator().to(device)
loss = nn.BCELoss()
d_optimizer = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))
g_optimizer = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
fixed_noise = torch.randn(64, 100, 1, 1, device=device)
fixed_fake_labels = torch.LongTensor([0]* (len(fixed_noise)//2) + [1]*(len(fixed_noise)//2)).to(device)
loss = nn.BCELoss()
n_epochs = 25
img_list = []
# 10. 训练模型
log = Report(n_epochs)
for epoch in range(n_epochs):
N = len(dataloader)
for bx, (images, labels) in enumerate(dataloader):
real_data, real_labels = images.to(device), labels.to(device)
fake_labels = torch.LongTensor(np.random.randint(0, 2,len(real_data))).to(device)
fake_data=generator(noise(len(real_data)),fake_labels)
fake_data = fake_data.detach()
d_loss = discriminator_train_step(real_data, real_labels, fake_data, fake_labels)
fake_labels = torch.LongTensor(np.random.randint(0, 2,len(real_data))).to(device)
fake_data = generator(noise(len(real_data)), fake_labels).to(device)
g_loss = generator_train_step(fake_data, fake_labels)
pos = epoch + (1+bx)/N
log.record(pos, d_loss=d_loss.detach(), g_loss=g_loss.detach(), end='\r')
log.report_avgs(epoch+1)
# 11. 生成男性和女性图像
with torch.no_grad():
fake = generator(fixed_noise, fixed_fake_labels).detach().cpu()
imgs = vutils.make_grid(fake, padding=2, normalize=True).permute(1,2
```
0
0
复制全文
相关推荐










