Pytorch实用技巧

Pytorch实用技巧

0: 相关参考资料

0.1:pytorch 设计

PyTorch under the hood:A guide to understand PyTorch internals
PyTorch快速入门视频教程

0.2:自动微分技术

知乎解释

Hands-On Machine Learning with Scikit-Learn & TensorFlow: Appendix D

0.3 PyTorch源码解析

PyTorch源码解析

0.4 PyTorch动态图

知乎:Pytorch实现动态图执行的原理和机制

1:手动搭建梯度

构造一个具有:y=(x+1)*3z=y*y样式的复合函数,分步计算梯度。

import torch.nn as nn
import torch as th

def show_tensor(tensor, name="None"):
    print("\n***********%s*************" % name)
    print("Tensor: ", tensor)
    print("Tensor.shape: ", tensor.shape)
    print("Tensor required gradient: ", tensor.requires_grad)
    print("Tensor.data: ", tensor.data)
    print("Tensor.grad: ", tensor.grad)

length = 10

x = th.ones(length, requires_grad=True)
grads = th.FloatTensor([x for x in range(1, length+1)])
z_grads = th.ones(length)

show_tensor(grads, "tmp_grad")
show_tensor(z_grads, "z_grads")
show_tensor(x, "x=1")

# x=1,x'=0
# y=(x+1)*3=6, y'=3
# z=y*y=36,z'=2y

y = (x + 1) * 3
show_tensor(y, "y=(x+1)*3")

# cut down from the backward chain
y_t = y.detach()
y_t.requires_grad = True

show_tensor(y_t, "y_t=y")

z = y_t * y_t
show_tensor(z, "z=y*y")
# x.data *= 100  # we can adjust data for one tensor.
#show_tensor(x, "x")

z.backward(z_grads)
show_tensor(z, "z-grad with z backward")
show_tensor(y_t, "y_t-grad with z backward")
show_tensor(x, "x-grad with z backward")

y.backward(y_t.grad)
show_tensor(z, "z-grad with y backward")
show_tensor(y, "y-grad with y backward")
show_tensor(x, "x-grad with y backward")

tensor.backward: https://discuss.pytorch.org/t/what-does-tensor-backward-do-mathematically/27953
refer to: https://www.lizenghai.com/archives/29498.html

2:网络模型搭建

我们以LeNet5训练MNIST为例说明。

1: 默认情况下,构建标准LeNet5网络模型如下:
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x)) #24*24*20
        x = F.max_pool2d(x, 2, 2) #12*12*50
        x = F.relu(self.conv2(x)) #8*8*50
        x = F.max_pool2d(x, 2, 2) #4*4*50
        x = x.view(-1, 4*4*50)   #800
        x = F.relu(self.fc1(x))  #500
        x = self.fc2(x)       #10
        return F.log_softmax(x, dim=1) #log_softmax函数 在softmax的基础上 取了对数 为了使用交叉熵函数

此时,打印网络模型如下:

Model:
 LeNet5(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=800, out_features=500, bias=True)
  (fc2): Linear(in_features=500, out_features=10, bias=True)
)
name:  conv1.weight Shape:  torch.Size([20, 1, 5, 5])
name:  conv1.bias Shape:  torch.Size([20])
name:  conv2.weight Shape:  torch.Size([50, 20, 5, 5])
name:  conv2.bias Shape:  torch.Size([50])
name:  fc1.weight Shape:  torch.Size([500, 800])
name:  fc1.bias Shape:  torch.Size([500])
name:  fc2.weight Shape:  torch.Size([10, 500])
name:  fc2.bias Shape:  torch.Size([10])
2:动态搭建模型

为了方便构建指定层数的网络模型,我们可以选择使用一个列表,根据传入的参数决定模型的构建。但是注意默认情况下,使用一个list(也就是self.net_group=[])是不可以,会导致你的optimizer找不到优化的目标,因此我们可以选择使用nn.ModuleList()指定我们的模型list:

class LeNet5_v1(nn.Module):
    def __init__(self):
        super(LeNet5_v1, self).__init__()
        self.net_group=nn.ModuleList() # 如果 self.net_group=[],则会显示没有可优化参数,这与pytorch动态图有关
        self.net_group.append(nn.Conv2d(1, 20, 5, 1))
        self.net_group.append(nn.Conv2d(20, 50, 5, 1))
        self.net_group.append(nn.Linear(4*4*50, 500))
        self.net_group.append(nn.Linear(500, 10))

    def forward(self, x):
        x = F.relu(self.net_group[0](x)) #24*24*20
        x = F.max_pool2d(x, 2, 2) #12*12*50
        x = F.relu(self.net_group[1](x)) #8*8*50
        x = F.max_pool2d(x, 2, 2) #4*4*50
        x = x.view(-1, 4*4*50)   #800
        x = F.relu(self.net_group[2](x))  #500
        x = self.net_group[3](x)       #10
        return F.log_softmax(x, dim=1) #log_softmax函数 在softmax的基础上 取了对数 为了使用交叉熵函数

此时,打印网络模型如下:

Model:
 LeNet5_v1(
  (net_group): ModuleList(
    (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
    (1): Conv2d(20, 50, kernel_size=(5, 5), stride=(1, 1))
    (2): Linear(in_features=800, out_features=500, bias=True)
    (3): Linear(in_features=500, out_features=10, bias=True)
  )
)
name:  net_group.0.weight Shape:  torch.Size([20, 1, 5, 5])
name:  net_group.0.bias Shape:  torch.Size([20])
name:  net_group.1.weight Shape:  torch.Size([50, 20, 5, 5])
name:  net_group.1.bias Shape:  torch.Size([50])
name:  net_group.2.weight Shape:  torch.Size([500, 800])
name:  net_group.2.bias Shape:  torch.Size([500])
name:  net_group.3.weight Shape:  torch.Size([10, 500])
name:  net_group.3.bias Shape:  torch.Size([10])
'''
3:一个完整MNIST模型的例子

在如下替换掉LeNet5模型,就可以跑了。

from __future__ import print_function
import argparse 
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

'''
class LeNet5_v1(nn.Module):
    def __init__(self):
    	#TODO: init your network
    	pass
    def forward(self, x):
    	#TODO: connect your network
'''

#训练函数
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device) #若device有cuda ,则使用gpu
        optimizer.zero_grad() #梯度清零
        output = model(data) #前向传播
        loss = F.nll_loss(output, target) #计算交叉熵函数
        loss.backward() #反向传播
        optimizer.step() #参数更新
        if batch_idx % args.log_interval == 0: #能整除log_interval时 打印 相关记录 epoch, batch_index/总的训练样本 ,当前训练数据占总样本比例 ,loss
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
#定义测试函数
def test(args, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


def model_show(model,name=""): 
	print("Model: ",name," \n",model)
	for name, value in model.named_parameters():
		print("name: ", name, "Shape: ",value.shape)

#主程序
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)') #batch-size数目
    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                        help='input batch size for testing (default: 1000)') #测试数据的每次大小
    parser.add_argument('--epochs', type=int, default=10, metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                        help='how many batches to wait before logging training status')
    
    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    print("Using cuda? ",use_cuda )

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=args.batch_size, shuffle=True, **kwargs)
    
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('./data', train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=args.test_batch_size, shuffle=True, **kwargs)


    model = LeNet5().to(device)
    #model_v1=LeNet5_v1().to(device)
    #model_show(model)
    #model_show(model_v1)

    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(args, model, device, test_loader)

    if (args.save_model):
        torch.save(model.state_dict(),"mnist_cnn.pt")
        
if __name__ == '__main__':
    main()

3:tensor追踪神器:torchsnooper

https://blog.csdn.net/iodjSVf8U1J7KYc/article/details/93549944

4:C++ frontend

4.1 安装PyTorch C++版

参考官方例子: https://pytorch.org/cppdocs/installing.html

4.2 构建C++版本运行环境

使用cmake环境构建编译工具链
编写CMakeLists.txt

cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
project(dcgan)

find_package(Torch REQUIRED)

add_executable(dcgan dcgan.cpp)
target_link_libraries(dcgan "${TORCH_LIBRARIES}")
set_property(TARGET dcgan PROPERTY CXX_STANDARD 11)

使用绝对路径指定pytorch运行库:
cd path/to/pytorch
pwd打印出来

4.3 用一个端到端的MNIST例子

创建dcgan.cpp如下:

#include <torch/torch.h>

// Define a new Module.
struct Net : torch::nn::Module
{
	Net()
	{
		// Construct and register two Linear submodules.
		fc1 = register_module("fc1", torch::nn::Linear(784, 64));
		fc2 = register_module("fc2", torch::nn::Linear(64, 32));
		fc3 = register_module("fc3", torch::nn::Linear(32, 10));
	}

	// Implement the Net's algorithm.
	torch::Tensor forward(torch::Tensor x)
	{
		// Use one of many tensor manipulation functions.
		x = torch::relu(fc1->forward(x.reshape({x.size(0), 784})));
		x = torch::dropout(x, /*p=*/0.5, /*train=*/is_training());
		x = torch::relu(fc2->forward(x));
		x = torch::log_softmax(fc3->forward(x), /*dim=*/1);
		return x;
	}

	// Use one of many "standard library" modules.
	torch::nn::Linear fc1{nullptr}, fc2{nullptr}, fc3{nullptr};
};

int main()
{
	// Create a new Net.
	auto net = std::make_shared<Net>();
	for (const auto& p : net->named_parameters())
	{
		std::cout << "Tensor Name: " << p.key() << "; "
		          << "\tTensor Size: " << p.value().nbytes()
		          << std::endl;
		//<< ", Tensor Value: \n" << p.value() << std::endl;
	}
	//std::cout << "Show the Network: \n" << *net << std::endl;

	// Create a multi-threaded data loader for the MNIST dataset.
	auto data_loader = torch::data::make_data_loader(
	                       torch::data::datasets::MNIST("./data").map(
	                           torch::data::transforms::Stack<>()),
	                       /*batch_size=*/64);

	// Instantiate an SGD optimization algorithm to update our Net's parameters.
	torch::optim::SGD optimizer(net->parameters(), /*lr=*/0.01);

	for (size_t epoch = 1; epoch <= 10; ++epoch)
	{
		size_t batch_index = 0;
		// Iterate the data loader to yield batches from the dataset.
		for (auto& batch : *data_loader)
		{
			// Reset gradients.
			optimizer.zero_grad();
			// Execute the model on the input data.
			torch::Tensor prediction = net->forward(batch.data);
			// Compute a loss value to judge the prediction of our model.
			torch::Tensor loss = torch::nll_loss(prediction, batch.target);
			// Compute gradients of the loss w.r.t. the parameters of our model.
			loss.backward();
			// Update the parameters based on the calculated gradients.
			optimizer.step();
			// Output the loss and checkpoint every 100 batches.
			if (++batch_index % 100 == 0)
			{
				std::cout << "Epoch: " << epoch << " | Batch: " << batch_index
				          << " | Loss: " << loss.item<float>() << std::endl;
				// Serialize your model periodically as a checkpoint.
				torch::save(net, "net.pt");
			}
		}
	}
}
4.4 使用cmake编译

mkdir build && cd build
cmake -DCMAKE_PREFIX_PATH=the-abs-path-to-libtorch ..

4.5 下载MNIST数据集

参考:http://yann.lecun.com/exdb/mnist/
使用gzip -d 加压 到运行程序下面的data文件夹下

4.6 运行程序

./dcgan

5 GPU stream加速

GPU stream 加速

6 源码编译DEBUG

6.1 进入sudo权限,安装依赖

pip3 install -r requirements.txt

6.2 编译源码

sudo DEBUG=1 USE_OPENCV=1 USE_FFMPEG=1 USE_LMDB=1 USE_CUDA=1 USE_CUDNN=1 python3 setup.py build develop

注意:sudo必须要用,而且还要放在最前面,否则生成avx代码失败。
(NO_CUDA=1)
参考: PyTorch源码编译

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值