SlideShare a Scribd company logo
Dive into PyTorch. Comparison with TensorFlow.
About me
Illarion Khlestov
Researcher at the RingLabs, Faces department
GitHub: https://github.com/ikhlestov
Blog: https://medium.com/@illarionkhlestov
Why use pytorch?
1.Fast to produce production ready code
2.Easy to use if you know numpy
3.Small overhead above the CUDA
4.Ready as for large abstract layers, as for self designed layers
Resources
1.Documentation
2.Tutorials
3.Source code
Notes:
- Documentation and tutorials are stored separately
- Docs, tutorials and source code can have different versions
Pytorch as numpy
import torch
# define pytorch tensors
x = torch.randn(10, 20)
y = torch.ones(20, 5)
# `@` mean matrix multiplication from python3.5, PEP-0465
res = x @ y
# get the shape
res.shape # torch.Size([10, 5])
# in place operations
x.add_(torch.ones(10, 20))
# get the mean and std
x.mean(dim=0)
x.std(dim=1)
# reshaping
x = x.view(3, -1)
Pytorch as numpy
import torch
import numpy as np
numpy_tensor = np.random.randn(10, 20)
# convert numpy array to pytorch array
pytorch_tensor = torch.Tensor(numpy_tensor)
# or another way
pytorch_tensor = torch.from_numpy(numpy_tensor)
# convert torch tensor to numpy representation
pytorch_tensor.numpy()
# if we want to use tensor on GPU provide another type
dtype = torch.cuda.FloatTensor
gpu_tensor = torch.randn(10, 20).type(dtype)
# or just call `cuda()` method
gpu_tensor = pytorch_tensor.cuda()
# call back to the CPU
cpu_tensor = gpu_tensor.cpu()
From tensors to variables
import torch
from torch.autograd import Variable
# create variable
x = Variable(torch.ones(2), requires_grad=True)
# access variable tensor
x.data
# access variable gradient
x.grad # None
y = 5 * (x + 2) ** 2
# backward should be called only on a scalar
o = (1 / 2) * torch.sum(y)
# compute backward
o.backward()
# now we have the gradients of x
x.grad # 10, 10
From tensors to variables
# define an inputs
x_tensor = torch.randn(10, 20)
y_tensor = torch.randn(10, 5)
x = Variable(x_tensor, requires_grad=False)
y = Variable(y_tensor, requires_grad=False)
# define some weights
w = Variable(torch.randn(20, 5), requires_grad=True)
# get variable tensor
print(type(w.data)) # torch.FloatTensor
# get variable gradient
print(w.grad) # None
loss = torch.mean((y - x @ w) ** 2)
# calculate the gradients
loss.backward()
print(w.grad) # some gradients
# manually apply gradients
w.data -= 0.01 * w.grad.data
# manually zero gradients after update
w.grad.data.zero_()
Simple layer with optimizer and loss
import torch
from torch.autograd import Variable
import torch.nn.functional as F
x = Variable(torch.randn(10, 20), requires_grad=False)
y = Variable(torch.randn(10, 3), requires_grad=False)
# define some weights
w1 = Variable(torch.randn(20, 5), requires_grad=True)
w2 = Variable(torch.randn(5, 3), requires_grad=True)
learning_rate = 0.1
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.SGD([w1, w2], lr=learning_rate)
for step in range(5):
pred = F.sigmoid(x @ w1)
pred = F.sigmoid(pred @ w2)
loss = loss_fn(pred, y)
# you still should manually zero all previous gradients
optimizer.zero_grad()
loss.backward()
optimizer.step()
Tensorflow static graphs
# placeholders should be defined prior graph
x = tf.placeholder(tf.float32, shape=(None, 20))
y = tf.placeholder(tf.float32, shape=(None, 3))
w1 = tf.Variable(tf.random_normal((20, 5)))
w2 = tf.Variable(tf.random_normal((5, 3)))
pred = tf.sigmoid(x @ w1)
pred = tf.sigmoid(pred @ w2)
loss = tf.reduce_sum((y - pred) ** 2)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1)
minimization = optimizer.minimize(loss)
with tf.Session() as sess:
# Run the graph once to initialize the Variables w1 and w2.
sess.run(tf.global_variables_initializer())
x_value = np.random.randn(10, 20)
y_value = np.random.randn(10, 3)
for step in range(5):
loss_value, _ = sess.run([loss, minimization],
feed_dict={x: x_value, y: y_value})
Tensorflow control flow
first_counter = tf.constant(0)
second_counter = tf.constant(10)
some_value = tf.Variable(15)
# condition should handle all args:
def cond(first_counter, second_counter):
return first_counter < second_counter
def body(first_counter, second_counter, some_value):
first_counter = tf.add(first_counter, 2)
second_counter = tf.add(second_counter, 1)
some_value = tf.add(some_value, second_counter)
return first_counter, second_counter, some_value
c1, c2, val = tf.while_loop(
cond, body, [first_counter, second_counter, some_value])
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
counter_1_res, counter_2_res = sess.run([c1, c2])
Pytorch control flow
import torch
first_counter = torch.Tensor([0])
second_counter = torch.Tensor([10])
some_value = torch.Tensor(15)
while (first_counter < second_counter)[0]:
first_counter += 2
second_counter += 1
some_value += second_counter
Style transfer example
Sequential models definition
from collections import OrderedDict
import torch.nn as nn
# Example of using Sequential
model = nn.Sequential(
nn.Conv2d(1, 20, 5),
nn.ReLU(),
nn.Conv2d(20, 64, 5),
nn.ReLU()
)
# Example of using Sequential with OrderedDict
model = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(1, 20, 5)),
('relu1', nn.ReLU()),
('conv2', nn.Conv2d(20, 64, 5)),
('relu2', nn.ReLU())
]))
output = model(some_input)
nn.Module models definition
import torch.nn as nn
import torch.nn.functional as F
# names of layers will be based on class attribute name
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
def forward(self, x):
x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
x = F.max_pool2d(F.relu(self.conv2(x)), 2)
return x
model = Net()
output = model(some_input)
Mixed model definition
from torch import nn
class Model(nn.Module):
def __init__(self):
super().__init__()
self.feature_extractor = nn.Sequential(
nn.Conv2d(3, 12, kernel_size=3, padding=1, stride=1),
nn.Conv2d(12, 24, kernel_size=3, padding=1, stride=1),
)
self.second_extractor = nn.Conv2d(
24, 36, kernel_size=3, padding=1, stride=1)
def forward(self, x):
x = self.feature_extractor(x)
x = self.second_extractor(x)
return x
XNOR-net example
Self defined layers(old style)
class MyFunction(torch.autograd.Function):
def forward(self, input):
self.save_for_backward(input)
output = torch.sign(input)
return output
def backward(self, grad_output):
input, = self.saved_tensors
grad_output[input.ge(1)] = 0
grad_output[input.le(-1)] = 0
return grad_output
# usage
x = torch.randn(10, 20)
y = MyFunction()(x)
# and if we want to use inside nn.Module
class MyFunctionModule(torch.nn.Module):
def forward(self, x):
return MyFunction()(x)
Self defined layers(new style)
class MyFunction(torch.autograd.Function):
@staticmethod
def forward(ctx, input):
ctx.save_for_backward(input)
output = torch.sign(input)
return output
@staticmethod
def backward(ctx, grad_output):
# saved tensors - tuple of tensors, so we need get first
input, = ctx.saved_variables
grad_output[input.ge(1)] = 0
grad_output[input.le(-1)] = 0
return grad_output
x = torch.randn(10, 20)
y = MyFunction.apply(x)
my_func = MyFunction.apply
y = my_func(x)
class MyFunctionModule(torch.nn.Module):
def forward(self, x):
return MyFunction.apply(x)
Train on CUDA
import torch
### tensor example
x_cpu = torch.randn(10, 20)
w_cpu = torch.randn(20, 10)
# direct transfer to the GPU
x_gpu = x_cpu.cuda()
w_gpu = w_cpu.cuda()
result_gpu = x_gpu @ w_gpu
# get back from GPU to CPU
result_cpu = result_gpu.cpu()
### model example
model = model.cuda()
# train step
inputs = Variable(inputs.cuda())
outputs = model(inputs)
# get back from GPU to CPU
outputs = outputs.cpu()
CUDA device allocation
import torch
# check is cuda enabled
torch.cuda.is_available()
# set required device
torch.cuda.set_device(0)
# work with some required cuda device
with torch.cuda.device(1):
# allocates a tensor on GPU 1
a = torch.cuda.FloatTensor(1)
assert a.get_device() == 1
# but you still can manually assign tensor to required device
d = torch.randn(2).cuda(2)
assert d.get_device() == 2
CUDA wrapper
class Trainer:
def __init__(self, model, use_cuda=False, gpu_idx=0):
self.use_cuda = use_cuda
self.gpu_idx = gpu_idx
self.model = self.to_gpu(model)
def to_gpu(self, tensor):
if self.use_cuda:
return tensor.cuda(self.gpu_idx)
else:
return tensor
def from_gpu(self, tensor):
if self.use_cuda:
return tensor.cpu()
else:
return tensor
def train(self, inputs):
inputs = self.to_gpu(inputs)
outputs = self.model(inputs)
outputs = self.from_gpu(outputs)
Weights initialization
import torch
from torch.autograd import Variable
# new way with `init` module
w = torch.Tensor(3, 5)
torch.nn.init.normal(w)
# work for Variables also
w2 = Variable(w)
torch.nn.init.normal(w2)
# old styled direct access to tensors data attribute
w2.data.normal_()
# example for some module
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
m.weight.data.normal_(0.0, 0.02)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
Weights initialization
import math
from torch import nn
# for loop approach with direct access
class MyModel(nn.Module):
def __init__(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.bias.data.zero_()
Find the owl
Learning rate scheduler
from torch.optim import lr_scheduler
from torch import nn
import torch
model = nn.Sequential(
nn.Conv2d(1, 20, 5),
nn.ReLU(),
nn.Conv2d(20, 64, 5),
nn.ReLU()
)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
for epoch in range(100):
scheduler.step()
train()
validate()
Random seed and train flag
import torch
# CPU seed
torch.manual_seed(42)
# GPU seed
torch.cuda.manual_seed_all(42)
# Train flag can be updated with boolean
# to disable dropout and batch norm learning
model.train(True)
# execute train step
model.train(False)
# run inference step
Variables modes - requires_grad and volatile
import torch
from torch.autograd import Variable
# requires grad
# If there’s a single input to an operation that requires gradient,
# its output will also require gradient.
x = Variable(torch.randn(5, 5))
y = Variable(torch.randn(5, 5))
z = Variable(torch.randn(5, 5), requires_grad=True)
a = x + y
a.requires_grad # False
b = a + z
b.requires_grad # True
# Volatile differs from requires_grad in how the flag propagates.
# If there’s even a single volatile input to an operation,
# its output is also going to be volatile.
x = Variable(torch.randn(5, 5), requires_grad=True)
y = Variable(torch.randn(5, 5), volatile=True)
a = x + y
a.requires_grad # False
Print model info
import torch.nn as nn
model = nn.Sequential(
nn.Conv2d(1, 20, 5),
nn.ReLU())
print(model)
# Sequential (
# (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
# (1): ReLU ()
# )
Print model info
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
def forward(self, x):
return x
model = Net()
print(model)
# layers name as attributes names
# Net (
# (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
# (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
# )
Model saving/loading
import torch
import torch.nn as nn
model = nn.Sequential(
nn.Conv2d(1, 20, 5),
nn.ReLU(),
nn.Conv2d(20, 64, 5),
nn.ReLU()
)
save_path = 'model.pkl'
# save/load only the model parameters(prefered solution)
torch.save(model.state_dict(), save_path)
model.load_state_dict(torch.load(save_path))
# save whole model
torch.save(model, save_path)
model = torch.load(save_path)
Tensorflow data loader
Pytorch data loader(definition)
import torchvision as tv
class ImagesDataset(torch.utils.data.Dataset):
def __init__(self, df, transform=None,
loader=tv.datasets.folder.default_loader):
self.df = df
self.transform = transform
self.loader = loader
def __getitem__(self, index):
row = self.df.iloc[index]
target = row['class_']
path = row['path']
img = self.loader(path)
if self.transform is not None:
img = self.transform(img)
return img, target
def __len__(self):
n, _ = self.df.shape
return n
Pytorch data loader(usage)
import torchvision as tv
data_transforms = tv.transforms.Compose([
tv.transforms.RandomCrop((64, 64), padding=4),
tv.transforms.RandomHorizontalFlip(),
tv.transforms.ToTensor(),
])
train_df = pd.read_csv('path/to/some.csv')
train_dataset = ImagesDataset(
df=train_df,
transform=data_transforms
)
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=10, shuffle=True,
num_workers=16)
# fetch the batch, same as `__getitem__` method
# NOTE: images dimensions in another order than tensorflow
for img, target in train_loader:
pass
Logging.. Only text or third party
- https://github.com/oval-group/logger
- https://github.com/torrvision/crayon
- https://github.com/TeamHG-Memex/tensorboard_logger
- https://github.com/lanpa/tensorboard-pytorch
- https://github.com/facebookresearch/visdom
Final architecture overview
- Data loader
- Model definition
- Trainer
• Optimizer
• Learning rate scheduler
• Model saving/restoring
• Monitoring
Final architecture example
model = Net()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
criterion = torch.nn.MSELoss()
dataset = ImagesDataset(path_to_images)
data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=10)
train = True
for epoch in range(epochs):
if train:
lr_scheduler.step()
for inputs, labels in data_loader:
inputs = Variable(to_gpu(inputs))
labels = Variable(to_gpu(labels))
outputs = model(inputs)
loss = criterion(outputs, labels)
if train:
optimizer.zero_grad()
loss.backward()
optimizer.step()
if not train:
save_best_model(epoch_validation_accuracy)
Conclusion
- PyTorch can be used as drop-in replacement of numpy with CUDA
- Fast for prototyping and writing custom models/layers
- Easy to debug
- Not so easy to monitor or deploy to devices without python
- Create tools out of the box
Thank you!
Questions?

More Related Content

What's hot (20)

PDF
Google TensorFlow Tutorial
台灣資料科學年會
 
PDF
PyTorch crash course
Nader Karimi
 
PDF
Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014
PyData
 
PDF
Introduction to NumPy (PyData SV 2013)
PyData
 
PDF
Introduction to NumPy for Machine Learning Programmers
Kimikazu Kato
 
PPTX
Introduction to Tensorflow
Tzar Umang
 
PDF
Effective Numerical Computation in NumPy and SciPy
Kimikazu Kato
 
PPTX
Machine Learning - Introduction to Tensorflow
Andrew Ferlitsch
 
PDF
Beyond tf idf why, what & how
lucenerevolution
 
PPT
Scientific Computing with Python Webinar March 19: 3D Visualization with Mayavi
Enthought, Inc.
 
PPTX
Introduction to theano, case study of Word Embeddings
Shashank Gupta
 
PDF
Deep Learning in theano
Massimo Quadrana
 
PDF
Manual specialization
Szymon Matejczyk
 
PDF
Scientific Computing with Python - NumPy | WeiYuan
Wei-Yuan Chang
 
PPT
Profiling and optimization
g3_nittala
 
PPTX
Working with tf.data (TF 2)
Oswald Campesato
 
PDF
Introduction to TensorFlow 2.0
Databricks
 
PPTX
Introduction To TensorFlow | Deep Learning Using TensorFlow | CloudxLab
CloudxLab
 
KEY
NumPy/SciPy Statistics
Enthought, Inc.
 
Google TensorFlow Tutorial
台灣資料科學年會
 
PyTorch crash course
Nader Karimi
 
Pythran: Static compiler for high performance by Mehdi Amini PyData SV 2014
PyData
 
Introduction to NumPy (PyData SV 2013)
PyData
 
Introduction to NumPy for Machine Learning Programmers
Kimikazu Kato
 
Introduction to Tensorflow
Tzar Umang
 
Effective Numerical Computation in NumPy and SciPy
Kimikazu Kato
 
Machine Learning - Introduction to Tensorflow
Andrew Ferlitsch
 
Beyond tf idf why, what & how
lucenerevolution
 
Scientific Computing with Python Webinar March 19: 3D Visualization with Mayavi
Enthought, Inc.
 
Introduction to theano, case study of Word Embeddings
Shashank Gupta
 
Deep Learning in theano
Massimo Quadrana
 
Manual specialization
Szymon Matejczyk
 
Scientific Computing with Python - NumPy | WeiYuan
Wei-Yuan Chang
 
Profiling and optimization
g3_nittala
 
Working with tf.data (TF 2)
Oswald Campesato
 
Introduction to TensorFlow 2.0
Databricks
 
Introduction To TensorFlow | Deep Learning Using TensorFlow | CloudxLab
CloudxLab
 
NumPy/SciPy Statistics
Enthought, Inc.
 

Similar to Dive Into PyTorch (20)

PPTX
[Update] PyTorch Tutorial for NTU Machine Learing Course 2017
Yu-Hsun (lymanblue) Lin
 
PPTX
2Wisjshsbebe pehele isienew Dorene isksnwnw
YashAbhayKawdiyaH44
 
PDF
pytdddddddddddddddddddddddddddddddddorch.pdf
drjigarsoni28
 
PDF
PyTorch Introduction
Yash Kawdiya
 
PPTX
Deep learning study 3
San Kim
 
PPTX
pytorch_tutorial_follow_this_to_start.pptx
gyungmindenniskim
 
PPTX
Pytorch and Machine Learning for the Math Impaired
Tyrel Denison
 
PDF
OpenPOWER Workshop in Silicon Valley
Ganesan Narayanasamy
 
PDF
pytorch-cheatsheet.pdf for ML study with pythroch
JunZhao68
 
PDF
Icpp power ai-workshop 2018
Ganesan Narayanasamy
 
PDF
Pytorch A Detailed Overview Agladze Mikhail
ilzobrzan47
 
PDF
TensorFlow example for AI Ukraine2016
Andrii Babii
 
PPTX
Pytroch-basic.pptx
rebeen4
 
PDF
Reproducible AI using MLflow and PyTorch
Databricks
 
PDF
Julien Simon - Deep Dive: Compiling Deep Learning Models
Julien SIMON
 
PPTX
From Tensorflow Graph to Tensorflow Eager
Guy Hadash
 
PDF
1-pytorch-CNN-RNN.pdf
Andrey63387
 
PDF
From NumPy to PyTorch
Mike Ruberry
 
PDF
A Tale of Three Deep Learning Frameworks: TensorFlow, Keras, & PyTorch with B...
Databricks
 
PDF
JMI Techtalk: 한재근 - How to use GPU for developing AI
Lablup Inc.
 
[Update] PyTorch Tutorial for NTU Machine Learing Course 2017
Yu-Hsun (lymanblue) Lin
 
2Wisjshsbebe pehele isienew Dorene isksnwnw
YashAbhayKawdiyaH44
 
pytdddddddddddddddddddddddddddddddddorch.pdf
drjigarsoni28
 
PyTorch Introduction
Yash Kawdiya
 
Deep learning study 3
San Kim
 
pytorch_tutorial_follow_this_to_start.pptx
gyungmindenniskim
 
Pytorch and Machine Learning for the Math Impaired
Tyrel Denison
 
OpenPOWER Workshop in Silicon Valley
Ganesan Narayanasamy
 
pytorch-cheatsheet.pdf for ML study with pythroch
JunZhao68
 
Icpp power ai-workshop 2018
Ganesan Narayanasamy
 
Pytorch A Detailed Overview Agladze Mikhail
ilzobrzan47
 
TensorFlow example for AI Ukraine2016
Andrii Babii
 
Pytroch-basic.pptx
rebeen4
 
Reproducible AI using MLflow and PyTorch
Databricks
 
Julien Simon - Deep Dive: Compiling Deep Learning Models
Julien SIMON
 
From Tensorflow Graph to Tensorflow Eager
Guy Hadash
 
1-pytorch-CNN-RNN.pdf
Andrey63387
 
From NumPy to PyTorch
Mike Ruberry
 
A Tale of Three Deep Learning Frameworks: TensorFlow, Keras, & PyTorch with B...
Databricks
 
JMI Techtalk: 한재근 - How to use GPU for developing AI
Lablup Inc.
 
Ad

Recently uploaded (20)

PDF
SWEBOK Guide and Software Services Engineering Education
Hironori Washizaki
 
PDF
"AI Transformation: Directions and Challenges", Pavlo Shaternik
Fwdays
 
PDF
[Newgen] NewgenONE Marvin Brochure 1.pdf
darshakparmar
 
PPTX
UiPath Academic Alliance Educator Panels: Session 2 - Business Analyst Content
DianaGray10
 
PPTX
"Autonomy of LLM Agents: Current State and Future Prospects", Oles` Petriv
Fwdays
 
PDF
Reverse Engineering of Security Products: Developing an Advanced Microsoft De...
nwbxhhcyjv
 
PDF
CIFDAQ Market Insights for July 7th 2025
CIFDAQ
 
PDF
Newgen Beyond Frankenstein_Build vs Buy_Digital_version.pdf
darshakparmar
 
PDF
NewMind AI - Journal 100 Insights After The 100th Issue
NewMind AI
 
PPTX
WooCommerce Workshop: Bring Your Laptop
Laura Hartwig
 
PPTX
AUTOMATION AND ROBOTICS IN PHARMA INDUSTRY.pptx
sameeraaabegumm
 
PDF
Presentation - Vibe Coding The Future of Tech
yanuarsinggih1
 
PPTX
COMPARISON OF RASTER ANALYSIS TOOLS OF QGIS AND ARCGIS
Sharanya Sarkar
 
PDF
From Code to Challenge: Crafting Skill-Based Games That Engage and Reward
aiyshauae
 
PPTX
AI Penetration Testing Essentials: A Cybersecurity Guide for 2025
defencerabbit Team
 
PDF
Chris Elwell Woburn, MA - Passionate About IT Innovation
Chris Elwell Woburn, MA
 
PDF
CIFDAQ Weekly Market Wrap for 11th July 2025
CIFDAQ
 
PDF
Newgen 2022-Forrester Newgen TEI_13 05 2022-The-Total-Economic-Impact-Newgen-...
darshakparmar
 
PDF
Jak MŚP w Europie Środkowo-Wschodniej odnajdują się w świecie AI
dominikamizerska1
 
PDF
Agentic AI lifecycle for Enterprise Hyper-Automation
Debmalya Biswas
 
SWEBOK Guide and Software Services Engineering Education
Hironori Washizaki
 
"AI Transformation: Directions and Challenges", Pavlo Shaternik
Fwdays
 
[Newgen] NewgenONE Marvin Brochure 1.pdf
darshakparmar
 
UiPath Academic Alliance Educator Panels: Session 2 - Business Analyst Content
DianaGray10
 
"Autonomy of LLM Agents: Current State and Future Prospects", Oles` Petriv
Fwdays
 
Reverse Engineering of Security Products: Developing an Advanced Microsoft De...
nwbxhhcyjv
 
CIFDAQ Market Insights for July 7th 2025
CIFDAQ
 
Newgen Beyond Frankenstein_Build vs Buy_Digital_version.pdf
darshakparmar
 
NewMind AI - Journal 100 Insights After The 100th Issue
NewMind AI
 
WooCommerce Workshop: Bring Your Laptop
Laura Hartwig
 
AUTOMATION AND ROBOTICS IN PHARMA INDUSTRY.pptx
sameeraaabegumm
 
Presentation - Vibe Coding The Future of Tech
yanuarsinggih1
 
COMPARISON OF RASTER ANALYSIS TOOLS OF QGIS AND ARCGIS
Sharanya Sarkar
 
From Code to Challenge: Crafting Skill-Based Games That Engage and Reward
aiyshauae
 
AI Penetration Testing Essentials: A Cybersecurity Guide for 2025
defencerabbit Team
 
Chris Elwell Woburn, MA - Passionate About IT Innovation
Chris Elwell Woburn, MA
 
CIFDAQ Weekly Market Wrap for 11th July 2025
CIFDAQ
 
Newgen 2022-Forrester Newgen TEI_13 05 2022-The-Total-Economic-Impact-Newgen-...
darshakparmar
 
Jak MŚP w Europie Środkowo-Wschodniej odnajdują się w świecie AI
dominikamizerska1
 
Agentic AI lifecycle for Enterprise Hyper-Automation
Debmalya Biswas
 
Ad

Dive Into PyTorch

  • 1. Dive into PyTorch. Comparison with TensorFlow.
  • 2. About me Illarion Khlestov Researcher at the RingLabs, Faces department GitHub: https://github.com/ikhlestov Blog: https://medium.com/@illarionkhlestov
  • 3. Why use pytorch? 1.Fast to produce production ready code 2.Easy to use if you know numpy 3.Small overhead above the CUDA 4.Ready as for large abstract layers, as for self designed layers
  • 4. Resources 1.Documentation 2.Tutorials 3.Source code Notes: - Documentation and tutorials are stored separately - Docs, tutorials and source code can have different versions
  • 5. Pytorch as numpy import torch # define pytorch tensors x = torch.randn(10, 20) y = torch.ones(20, 5) # `@` mean matrix multiplication from python3.5, PEP-0465 res = x @ y # get the shape res.shape # torch.Size([10, 5]) # in place operations x.add_(torch.ones(10, 20)) # get the mean and std x.mean(dim=0) x.std(dim=1) # reshaping x = x.view(3, -1)
  • 6. Pytorch as numpy import torch import numpy as np numpy_tensor = np.random.randn(10, 20) # convert numpy array to pytorch array pytorch_tensor = torch.Tensor(numpy_tensor) # or another way pytorch_tensor = torch.from_numpy(numpy_tensor) # convert torch tensor to numpy representation pytorch_tensor.numpy() # if we want to use tensor on GPU provide another type dtype = torch.cuda.FloatTensor gpu_tensor = torch.randn(10, 20).type(dtype) # or just call `cuda()` method gpu_tensor = pytorch_tensor.cuda() # call back to the CPU cpu_tensor = gpu_tensor.cpu()
  • 7. From tensors to variables import torch from torch.autograd import Variable # create variable x = Variable(torch.ones(2), requires_grad=True) # access variable tensor x.data # access variable gradient x.grad # None y = 5 * (x + 2) ** 2 # backward should be called only on a scalar o = (1 / 2) * torch.sum(y) # compute backward o.backward() # now we have the gradients of x x.grad # 10, 10
  • 8. From tensors to variables # define an inputs x_tensor = torch.randn(10, 20) y_tensor = torch.randn(10, 5) x = Variable(x_tensor, requires_grad=False) y = Variable(y_tensor, requires_grad=False) # define some weights w = Variable(torch.randn(20, 5), requires_grad=True) # get variable tensor print(type(w.data)) # torch.FloatTensor # get variable gradient print(w.grad) # None loss = torch.mean((y - x @ w) ** 2) # calculate the gradients loss.backward() print(w.grad) # some gradients # manually apply gradients w.data -= 0.01 * w.grad.data # manually zero gradients after update w.grad.data.zero_()
  • 9. Simple layer with optimizer and loss import torch from torch.autograd import Variable import torch.nn.functional as F x = Variable(torch.randn(10, 20), requires_grad=False) y = Variable(torch.randn(10, 3), requires_grad=False) # define some weights w1 = Variable(torch.randn(20, 5), requires_grad=True) w2 = Variable(torch.randn(5, 3), requires_grad=True) learning_rate = 0.1 loss_fn = torch.nn.MSELoss() optimizer = torch.optim.SGD([w1, w2], lr=learning_rate) for step in range(5): pred = F.sigmoid(x @ w1) pred = F.sigmoid(pred @ w2) loss = loss_fn(pred, y) # you still should manually zero all previous gradients optimizer.zero_grad() loss.backward() optimizer.step()
  • 10. Tensorflow static graphs # placeholders should be defined prior graph x = tf.placeholder(tf.float32, shape=(None, 20)) y = tf.placeholder(tf.float32, shape=(None, 3)) w1 = tf.Variable(tf.random_normal((20, 5))) w2 = tf.Variable(tf.random_normal((5, 3))) pred = tf.sigmoid(x @ w1) pred = tf.sigmoid(pred @ w2) loss = tf.reduce_sum((y - pred) ** 2) optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1) minimization = optimizer.minimize(loss) with tf.Session() as sess: # Run the graph once to initialize the Variables w1 and w2. sess.run(tf.global_variables_initializer()) x_value = np.random.randn(10, 20) y_value = np.random.randn(10, 3) for step in range(5): loss_value, _ = sess.run([loss, minimization], feed_dict={x: x_value, y: y_value})
  • 11. Tensorflow control flow first_counter = tf.constant(0) second_counter = tf.constant(10) some_value = tf.Variable(15) # condition should handle all args: def cond(first_counter, second_counter): return first_counter < second_counter def body(first_counter, second_counter, some_value): first_counter = tf.add(first_counter, 2) second_counter = tf.add(second_counter, 1) some_value = tf.add(some_value, second_counter) return first_counter, second_counter, some_value c1, c2, val = tf.while_loop( cond, body, [first_counter, second_counter, some_value]) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) counter_1_res, counter_2_res = sess.run([c1, c2])
  • 12. Pytorch control flow import torch first_counter = torch.Tensor([0]) second_counter = torch.Tensor([10]) some_value = torch.Tensor(15) while (first_counter < second_counter)[0]: first_counter += 2 second_counter += 1 some_value += second_counter
  • 14. Sequential models definition from collections import OrderedDict import torch.nn as nn # Example of using Sequential model = nn.Sequential( nn.Conv2d(1, 20, 5), nn.ReLU(), nn.Conv2d(20, 64, 5), nn.ReLU() ) # Example of using Sequential with OrderedDict model = nn.Sequential(OrderedDict([ ('conv1', nn.Conv2d(1, 20, 5)), ('relu1', nn.ReLU()), ('conv2', nn.Conv2d(20, 64, 5)), ('relu2', nn.ReLU()) ])) output = model(some_input)
  • 15. nn.Module models definition import torch.nn as nn import torch.nn.functional as F # names of layers will be based on class attribute name class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 6, 5) self.conv2 = nn.Conv2d(6, 16, 5) def forward(self, x): x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) x = F.max_pool2d(F.relu(self.conv2(x)), 2) return x model = Net() output = model(some_input)
  • 16. Mixed model definition from torch import nn class Model(nn.Module): def __init__(self): super().__init__() self.feature_extractor = nn.Sequential( nn.Conv2d(3, 12, kernel_size=3, padding=1, stride=1), nn.Conv2d(12, 24, kernel_size=3, padding=1, stride=1), ) self.second_extractor = nn.Conv2d( 24, 36, kernel_size=3, padding=1, stride=1) def forward(self, x): x = self.feature_extractor(x) x = self.second_extractor(x) return x
  • 18. Self defined layers(old style) class MyFunction(torch.autograd.Function): def forward(self, input): self.save_for_backward(input) output = torch.sign(input) return output def backward(self, grad_output): input, = self.saved_tensors grad_output[input.ge(1)] = 0 grad_output[input.le(-1)] = 0 return grad_output # usage x = torch.randn(10, 20) y = MyFunction()(x) # and if we want to use inside nn.Module class MyFunctionModule(torch.nn.Module): def forward(self, x): return MyFunction()(x)
  • 19. Self defined layers(new style) class MyFunction(torch.autograd.Function): @staticmethod def forward(ctx, input): ctx.save_for_backward(input) output = torch.sign(input) return output @staticmethod def backward(ctx, grad_output): # saved tensors - tuple of tensors, so we need get first input, = ctx.saved_variables grad_output[input.ge(1)] = 0 grad_output[input.le(-1)] = 0 return grad_output x = torch.randn(10, 20) y = MyFunction.apply(x) my_func = MyFunction.apply y = my_func(x) class MyFunctionModule(torch.nn.Module): def forward(self, x): return MyFunction.apply(x)
  • 20. Train on CUDA import torch ### tensor example x_cpu = torch.randn(10, 20) w_cpu = torch.randn(20, 10) # direct transfer to the GPU x_gpu = x_cpu.cuda() w_gpu = w_cpu.cuda() result_gpu = x_gpu @ w_gpu # get back from GPU to CPU result_cpu = result_gpu.cpu() ### model example model = model.cuda() # train step inputs = Variable(inputs.cuda()) outputs = model(inputs) # get back from GPU to CPU outputs = outputs.cpu()
  • 21. CUDA device allocation import torch # check is cuda enabled torch.cuda.is_available() # set required device torch.cuda.set_device(0) # work with some required cuda device with torch.cuda.device(1): # allocates a tensor on GPU 1 a = torch.cuda.FloatTensor(1) assert a.get_device() == 1 # but you still can manually assign tensor to required device d = torch.randn(2).cuda(2) assert d.get_device() == 2
  • 22. CUDA wrapper class Trainer: def __init__(self, model, use_cuda=False, gpu_idx=0): self.use_cuda = use_cuda self.gpu_idx = gpu_idx self.model = self.to_gpu(model) def to_gpu(self, tensor): if self.use_cuda: return tensor.cuda(self.gpu_idx) else: return tensor def from_gpu(self, tensor): if self.use_cuda: return tensor.cpu() else: return tensor def train(self, inputs): inputs = self.to_gpu(inputs) outputs = self.model(inputs) outputs = self.from_gpu(outputs)
  • 23. Weights initialization import torch from torch.autograd import Variable # new way with `init` module w = torch.Tensor(3, 5) torch.nn.init.normal(w) # work for Variables also w2 = Variable(w) torch.nn.init.normal(w2) # old styled direct access to tensors data attribute w2.data.normal_() # example for some module def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0)
  • 24. Weights initialization import math from torch import nn # for loop approach with direct access class MyModel(nn.Module): def __init__(self): for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.bias.data.zero_()
  • 26. Learning rate scheduler from torch.optim import lr_scheduler from torch import nn import torch model = nn.Sequential( nn.Conv2d(1, 20, 5), nn.ReLU(), nn.Conv2d(20, 64, 5), nn.ReLU() ) optimizer = torch.optim.SGD(model.parameters(), lr=0.01) scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) for epoch in range(100): scheduler.step() train() validate()
  • 27. Random seed and train flag import torch # CPU seed torch.manual_seed(42) # GPU seed torch.cuda.manual_seed_all(42) # Train flag can be updated with boolean # to disable dropout and batch norm learning model.train(True) # execute train step model.train(False) # run inference step
  • 28. Variables modes - requires_grad and volatile import torch from torch.autograd import Variable # requires grad # If there’s a single input to an operation that requires gradient, # its output will also require gradient. x = Variable(torch.randn(5, 5)) y = Variable(torch.randn(5, 5)) z = Variable(torch.randn(5, 5), requires_grad=True) a = x + y a.requires_grad # False b = a + z b.requires_grad # True # Volatile differs from requires_grad in how the flag propagates. # If there’s even a single volatile input to an operation, # its output is also going to be volatile. x = Variable(torch.randn(5, 5), requires_grad=True) y = Variable(torch.randn(5, 5), volatile=True) a = x + y a.requires_grad # False
  • 29. Print model info import torch.nn as nn model = nn.Sequential( nn.Conv2d(1, 20, 5), nn.ReLU()) print(model) # Sequential ( # (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1)) # (1): ReLU () # )
  • 30. Print model info class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 6, 5) self.conv2 = nn.Conv2d(6, 16, 5) def forward(self, x): return x model = Net() print(model) # layers name as attributes names # Net ( # (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1)) # (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1)) # )
  • 31. Model saving/loading import torch import torch.nn as nn model = nn.Sequential( nn.Conv2d(1, 20, 5), nn.ReLU(), nn.Conv2d(20, 64, 5), nn.ReLU() ) save_path = 'model.pkl' # save/load only the model parameters(prefered solution) torch.save(model.state_dict(), save_path) model.load_state_dict(torch.load(save_path)) # save whole model torch.save(model, save_path) model = torch.load(save_path)
  • 33. Pytorch data loader(definition) import torchvision as tv class ImagesDataset(torch.utils.data.Dataset): def __init__(self, df, transform=None, loader=tv.datasets.folder.default_loader): self.df = df self.transform = transform self.loader = loader def __getitem__(self, index): row = self.df.iloc[index] target = row['class_'] path = row['path'] img = self.loader(path) if self.transform is not None: img = self.transform(img) return img, target def __len__(self): n, _ = self.df.shape return n
  • 34. Pytorch data loader(usage) import torchvision as tv data_transforms = tv.transforms.Compose([ tv.transforms.RandomCrop((64, 64), padding=4), tv.transforms.RandomHorizontalFlip(), tv.transforms.ToTensor(), ]) train_df = pd.read_csv('path/to/some.csv') train_dataset = ImagesDataset( df=train_df, transform=data_transforms ) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=10, shuffle=True, num_workers=16) # fetch the batch, same as `__getitem__` method # NOTE: images dimensions in another order than tensorflow for img, target in train_loader: pass
  • 35. Logging.. Only text or third party - https://github.com/oval-group/logger - https://github.com/torrvision/crayon - https://github.com/TeamHG-Memex/tensorboard_logger - https://github.com/lanpa/tensorboard-pytorch - https://github.com/facebookresearch/visdom
  • 36. Final architecture overview - Data loader - Model definition - Trainer • Optimizer • Learning rate scheduler • Model saving/restoring • Monitoring
  • 37. Final architecture example model = Net() optimizer = torch.optim.SGD(model.parameters(), lr=0.01) scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) criterion = torch.nn.MSELoss() dataset = ImagesDataset(path_to_images) data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=10) train = True for epoch in range(epochs): if train: lr_scheduler.step() for inputs, labels in data_loader: inputs = Variable(to_gpu(inputs)) labels = Variable(to_gpu(labels)) outputs = model(inputs) loss = criterion(outputs, labels) if train: optimizer.zero_grad() loss.backward() optimizer.step() if not train: save_best_model(epoch_validation_accuracy)
  • 38. Conclusion - PyTorch can be used as drop-in replacement of numpy with CUDA - Fast for prototyping and writing custom models/layers - Easy to debug - Not so easy to monitor or deploy to devices without python - Create tools out of the box