SlideShare a Scribd company logo
Memory Efficient Pytorch
SNU RPLab

Hyungjoo Cho
Computation Graph
w₁ x₁ w₂ x₂ b
z
h
L
y
Computation Graph
w₁ x₁ w₂ x₂ b
z
h
L
y
Computation Graph
w₁ x₁ w₂ x₂ b
z
h
yL
Computation Graph
w₁ x₁ w₂ x₂ b
z
h
yL
Computation Graph
w₁ x₁ w₂ x₂
z
h
yL
b
Network Configuration
INPUT
FC
Sigmoid
LogSigmoid Label
Gradient Calculation Graph
INPUT
FC-forward
Sigm-forward
LogSigm-forward LogSigm-backward
FC-backward
Sigm-backward
Label
INPUT-Grad
Memory Allocation
INPUT
FC-forward
Sigm-forward
LogSigm-forward LogSigm-backward
FC-backward
Sigm-backward
Label
INPUT-Grad
Efficient Memory Allocation
INPUT
FC-forward
Sigm-forward
LogSigm-forward LogSigm-backward
FC-backward
Sigm-backward
Label
INPUT-Grad
Efficient Memory Allocation
INPUT
FC-forward
Sigm-forward
LogSigm-forward LogSigm-backward
FC-backward
Sigm-backward
Label
INPUT-Grad
①
②
1. refcount(①) is 1.
2. size(output(①)) 

is same as
size(output(②)).
Efficient Memory Allocation
INPUT
FC-forward
Sigm-forward
LogSigm-forward LogSigm-backward
FC-backward
Sigm-backward
Label
INPUT-Grad
①
②
1. refcount(①) is 1.
2. size(output(①)) 

is same as
size(output(②)).
In-place operation
In-place Operation
Sorting
Bubble Merge
In-place Operation
• The input is usually overwritten by the output as the algorithm executes.

• In-place operation updates input sequence only through replacement or
swapping of elements.
In-place in Pytorch
a = torch.zeros(1, 3)
In-place in Pytorch
a = torch.zeros(1, 3)
print(a)
print(hex(id(a)))
In-place in Pytorch
a = torch.zeros(1, 3)
print(a)
print(hex(id(a)))
0 0 0
[torch.FloatTensor of size 1x3]
0x7f4b08813188
In-place in Pytorch
a = torch.zeros(1, 3)
print(a)
print(hex(id(a)))
0 0 0
[torch.FloatTensor of size 1x3]
0x7f4b08813188
0x7f4b08813188 FloatTensor([0, 0, 0])a
In-place in Pytorch
case 1)
a = a + 1
print(a.numpy())
print(hex(id(a)))
0x7f4b08813188 FloatTensor([0, 0, 0])a
case 2)
for i in range(3):
a[:, i] += 1
print(a.numpy())
print(hex(id(a)))
In-place in Pytorch
case 1)
a = a + 1
print(a.numpy())
print(hex(id(a)))
[[1. 1. 1.]]
0x7f4b088135c8
0x7f4b08813188 FloatTensor([0, 0, 0])a
case 2)
for i in range(3):
a[:, i] += 1
print(a.numpy())
print(hex(id(a)))
[[1. 1. 1.]]
0x7f4b08813188
In-place in Pytorch
case 1)
0x7f4b08813188 FloatTensor([0, 0, 0])a
case 2)
0x7f4b08813188 FloatTensor([0, 0, 0])
a
0x7f4b088135c8 FloatTensor([1, 1, 1])
0x7f4b08813188
FloatTensor([0, 0, 0])
FloatTensor([1, 1, 1])
a
In-place in Pytorch
case 1)
0x7f4b08813188 FloatTensor([0, 0, 0])a
case 2)
0x7f4b08813188 FloatTensor([0, 0, 0])
a
0x7f4b088135c8 FloatTensor([1, 1, 1])
0x7f4b08813188
FloatTensor([0, 0, 0])
FloatTensor([1, 1, 1])
a
Out of place In-place
In-place in Pytorch
case 3)
a = a.add(1)
print(a.numpy())
print(hex(id(a)))
0x7f4b08813188 FloatTensor([0, 0, 0])a
case 4)
a.add_(1)
print(a.numpy())
print(hex(id(a)))
In-place in Pytorch
case 3)
a = a.add(1)
print(a.numpy())
print(hex(id(a)))
[[1. 1. 1.]]
0x7f4b088135c8
0x7f4b08813188 FloatTensor([0, 0, 0])a
case 4)
a.add_(1)
print(a.numpy())
print(hex(id(a)))
[[1. 1. 1.]]
0x7f4b08813188
In-place in Pytorch
Out of place In-place
case 5)
a += 1
print(a.numpy())
print(hex(id(a)))
0x7f4b08813188 FloatTensor([0, 0, 0])a
In-place in Pytorch
case 5)
a += 1
print(a.numpy())
print(hex(id(a)))
[[1. 1. 1.]]
0x7f4b08813188
0x7f4b08813188 FloatTensor([0, 0, 0])a
In-place in Pytorch
In-place
case 5)
a += 1
print(a.numpy())
print(hex(id(a)))
[[1. 1. 1.]]
0x7f4b08813188
0x7f4b08813188 FloatTensor([0, 0, 0])a
In-place in Pytorch
In-place
torch/autograd/variable.py::Variable( )
a = a.fill(3)
In-place in Pytorch
a = a.fill(3)
AttributeError: ‘torch.FloatTensor’
object has no attribute ‘fill’
In-place in Pytorch
a.fill_(3)
print(a.numpy())
[[3. 3. 3.]]
In-place in Pytorch
In-place in Pytorch
In-place ‘only’
- fill_()
- zero_()
- normal_()
- uniform_()
- exponential_()
- etc…
In-place in Pytorch
a = a.t_()
3.
3.
3.
[torch.FloatTensor of size 3x1]
Different tensor size, but same buffer size
Non-linear Activation
self.add_module(‘conv’, conv2d_3x3(in_dim, out_dim))
self.add_module(‘bn’, nn.BatchNorm2d(in_dim)
self.add_module(‘act’, nn.ReLU(inplace=True))
Non-linear Activation
void THNN_(Threshold_updateOutput)(THNNState *state,
THTensor *input,
THTensor *output,
accreal threshold_,
accreal val_,
bool inplace)
{
float threshold = (float)threshold_;
float val = (float)val_;
if (inplace)
{
int TH_TENSOR_APPLY_hasFinished = 0;
int64_t TH_TENSOR_dim_index = 0;
TH_TENSOR_APPLYX_PREAMBLE(float, input, -1, 0);
while (!TH_TENSORAPPLY_hasFinished)
{
for (; input_i < input_size; input_i++, input_data += input_stride)
{
if(*input_data <= threshold)
*input_data = val;
}
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR, 1);
}
THFree(input_counter);
THFloatTensor_set(output, input);
}
else
{
THFloatTensor_resizeAs(output, input);
int TH_TENSOR_APPLY_hasFinished = 0;
int64_t TH_TENSOR_dim_index = 0;
TH_TENSOR_APPLYX_PREAMBLE(float, output, -1, 0);
TH_TENSOR_APPLYX_PREAMBLE(float, input, -1, 0);
if(output_n != input_n)
{
THDescBuff T1buff = _THSizeDesc(output->size, output->nDimension);
THDescBuff T2buff = _THSizeDesc(input->size, input->nDimension);
}
while (!TH_TENSOR_APPLY_hasFinished)
{
for (; output_i < output_size && input_i < input_size; output_i++,
input_++, output_data += output_stride, input_data += input_stride)
{
*output_data = (*input_data > threshold) ? *input_data : val;
}
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(output, 0);
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(input, 0);
}
if(output_counter != NULL)
THFree(output_counter);
if(input_counter != NULL)
THFree(input_counter);
}
}
Non-linear Activation
void THNN_(Threshold_updateOutput)(THNNState *state,
THTensor *input,
THTensor *output,
accreal threshold_,
accreal val_,
bool inplace)
{
float threshold = (float)threshold_;
float val = (float)val_;
if (inplace)
{
int TH_TENSOR_APPLY_hasFinished = 0;
int64_t TH_TENSOR_dim_index = 0;
TH_TENSOR_APPLYX_PREAMBLE(float, input, -1, 0);
while (!TH_TENSORAPPLY_hasFinished)
{
for (; input_i < input_size; input_i++, input_data += input_stride)
{
if(*input_data <= threshold)
*input_data = val;
}
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(TENSOR, 1);
}
THFree(input_counter);
THFloatTensor_set(output, input);
}
else
{
THFloatTensor_resizeAs(output, input);
int TH_TENSOR_APPLY_hasFinished = 0;
int64_t TH_TENSOR_dim_index = 0;
TH_TENSOR_APPLYX_PREAMBLE(float, output, -1, 0);
TH_TENSOR_APPLYX_PREAMBLE(float, input, -1, 0);
if(output_n != input_n)
{
THDescBuff T1buff = _THSizeDesc(output->size, output->nDimension);
THDescBuff T2buff = _THSizeDesc(input->size, input->nDimension);
}
while (!TH_TENSOR_APPLY_hasFinished)
{
for (; output_i < output_size && input_i < input_size; output_i++,
input_++, output_data += output_stride, input_data += input_stride)
{
*output_data = (*input_data > threshold) ? *input_data : val;
}
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(output, 0);
__TH_TENSOR_APPLYX_UPDATE_COUNTERS(input, 0);
}
if(output_counter != NULL)
THFree(output_counter);
if(input_counter != NULL)
THFree(input_counter);
}
}
In-place Out of place
Non-linear Activation
A
C
B
INPUT
Sigmoid(A)
Sigmoid(B)
D FPool(C) D + E
E
B
Pool(B)
Reference Count
A
C
B
INPUT
Sigmoid(A)
Sigmoid(B)
D FPool(C) D + E
E
B
Pool(B)
Node refcount
A 1
B 2
C 1
D 1
E 1
F 1
Mark Dirty
A
C
B
INPUT
Sigmoid(A)
Sigmoid(B)
D FPool(C) D + E
E
B
Pool(B)
Node refcount
A 1
B 2
C 1
D 1
E 1
F 1
If B is In-place operator,
mark_dirty( ) raises an error.
Memory sharing
A
B
INPUT
Sigmoid(A)
Sigmoid(B)
FPool(C) D + E
B
Pool(B)C
D
E
Memory sharing
A
B
INPUT
Sigmoid(A)
Sigmoid(B)
FPool(C) D + E
B
Pool(B)C
D
ERe-use
Release B after allocating C, E
Reuse for D
Memory sharing : Memory used by intermediate results that are no longer needed can be recycled and used in another node.
Memory sharing
A
B
INPUT
Sigmoid(A)
Sigmoid(B)
FPool(C) D + E
B
Pool(B)C
D
E
In-place
A
B
INPUT
Sigmoid(A)
Sigmoid(B)
FPool(C) D + E
B
Pool(B)C
D
E
In-place
OR A
B
INPUT
Sigmoid(A)
Sigmoid(B)
FPool(C) D + E
B
Pool(B)C
D
E
OR
Re-use
Trade Computation for Memory
• Apply normalization and non-linearities before/after the conv-operation.

• Convolution is most efficient when input lies in a contiguous block of
memory

• To make a contiguous input, each layer must copy all previous features
(concatenation → mem-copy)

• Above operations are computationally extremely cheap
• Copying to pre-allocated memory is significantly faster than
allocating new memory
Shared storage for concatenation
• Rather than allocating memory for each concatenation operation, assign
the outputs to a memory allocation shared across all layers

• Shared memory storage is used by all network layers, its data is not
permanent

• Need to be recomputed during back-propagation
Shared storage for batch normalization 

& non-linearity activation
• Assign the outputs of batch normalization / activation to a shared
memory allocation
• The data in shared memory storage is not permanent and will be
overwritten by the next layer

• Should recompute the batch normalization / activation outputs during
back-propagation
Re-computation
INPUT
conv-forward
bn-forward
relu-forward
conv-forward
bn-forward
relu-forward
Re-computation
INPUT
conv-forward
bn-forward
relu-forward
conv-forward
bn-forward
relu-forward
conv-backward
bn-backward
relu-backward
conv-backward
bn-backward
relu-backward
INPUT-Grad
Re-computation
INPUT
conv-forward
bn-forward
relu-forward
conv-forward
bn-forward
relu-forward
conv-backward
bn-backward
relu-backward
conv-backward
bn-backward
relu-backward
INPUT-Grad
Result
ResNet
Result
DenseNet
Benefit
• Can increase mini-batch size

→ Speed up

• Build deeper model

→ Accuracy up

• Can use deep model using small GPU

→ Money up
Thanks 😊

More Related Content

What's hot

DEEP LEARNING、トレーニング・インファレンスのGPUによる高速化
DEEP LEARNING、トレーニング・インファレンスのGPUによる高速化DEEP LEARNING、トレーニング・インファレンスのGPUによる高速化
DEEP LEARNING、トレーニング・インファレンスのGPUによる高速化
RCCSRENKEI
 
Knowledge Distillation for Federated Learning: a Practical Guide
Knowledge Distillation for Federated Learning: a Practical GuideKnowledge Distillation for Federated Learning: a Practical Guide
Knowledge Distillation for Federated Learning: a Practical Guide
XiachongFeng
 
RAPIDS: GPU-Accelerated ETL and Feature Engineering
RAPIDS: GPU-Accelerated ETL and Feature EngineeringRAPIDS: GPU-Accelerated ETL and Feature Engineering
RAPIDS: GPU-Accelerated ETL and Feature Engineering
Keith Kraus
 
Improving the Life of Data Scientists: Automating ML Lifecycle through MLflow
Improving the Life of Data Scientists: Automating ML Lifecycle through MLflowImproving the Life of Data Scientists: Automating ML Lifecycle through MLflow
Improving the Life of Data Scientists: Automating ML Lifecycle through MLflow
Databricks
 
Introduction to MLflow
Introduction to MLflowIntroduction to MLflow
Introduction to MLflow
Databricks
 
Introducing MlFlow: An Open Source Platform for the Machine Learning Lifecycl...
Introducing MlFlow: An Open Source Platform for the Machine Learning Lifecycl...Introducing MlFlow: An Open Source Platform for the Machine Learning Lifecycl...
Introducing MlFlow: An Open Source Platform for the Machine Learning Lifecycl...
DataWorks Summit
 
Dynamic Partition Pruning in Apache Spark
Dynamic Partition Pruning in Apache SparkDynamic Partition Pruning in Apache Spark
Dynamic Partition Pruning in Apache Spark
Databricks
 
RAPIDS – Open GPU-accelerated Data Science
RAPIDS – Open GPU-accelerated Data ScienceRAPIDS – Open GPU-accelerated Data Science
RAPIDS – Open GPU-accelerated Data Science
Data Works MD
 
“TensorFlow Lite for Microcontrollers (TFLM): Recent Developments,” a Present...
“TensorFlow Lite for Microcontrollers (TFLM): Recent Developments,” a Present...“TensorFlow Lite for Microcontrollers (TFLM): Recent Developments,” a Present...
“TensorFlow Lite for Microcontrollers (TFLM): Recent Developments,” a Present...
Edge AI and Vision Alliance
 
Distributed Deep Learning with Hadoop and TensorFlow
Distributed Deep Learning with Hadoop and TensorFlowDistributed Deep Learning with Hadoop and TensorFlow
Distributed Deep Learning with Hadoop and TensorFlow
Jan Wiegelmann
 
Change data capture
Change data captureChange data capture
Change data capture
Ron Barabash
 
Service Function Chaining in Openstack Neutron
Service Function Chaining in Openstack NeutronService Function Chaining in Openstack Neutron
Service Function Chaining in Openstack Neutron
Michelle Holley
 
Accelerating Data Science With GPUs
Accelerating Data Science With GPUsAccelerating Data Science With GPUs
Accelerating Data Science With GPUs
iguazio
 
The year of the graph: do you really need a graph database? How do you choose...
The year of the graph: do you really need a graph database? How do you choose...The year of the graph: do you really need a graph database? How do you choose...
The year of the graph: do you really need a graph database? How do you choose...
George Anadiotis
 
Magnum IO GPUDirect Storage 最新情報
Magnum IO GPUDirect Storage 最新情報Magnum IO GPUDirect Storage 最新情報
Magnum IO GPUDirect Storage 最新情報
NVIDIA Japan
 
Advanced Apache Spark Meetup Project Tungsten Nov 12 2015
Advanced Apache Spark Meetup Project Tungsten Nov 12 2015Advanced Apache Spark Meetup Project Tungsten Nov 12 2015
Advanced Apache Spark Meetup Project Tungsten Nov 12 2015
Chris Fregly
 
Supermicro’s Universal GPU: Modular, Standards Based and Built for the Future
Supermicro’s Universal GPU: Modular, Standards Based and Built for the FutureSupermicro’s Universal GPU: Modular, Standards Based and Built for the Future
Supermicro’s Universal GPU: Modular, Standards Based and Built for the Future
Rebekah Rodriguez
 
Apache Arrow: Open Source Standard Becomes an Enterprise Necessity
Apache Arrow: Open Source Standard Becomes an Enterprise NecessityApache Arrow: Open Source Standard Becomes an Enterprise Necessity
Apache Arrow: Open Source Standard Becomes an Enterprise Necessity
Wes McKinney
 
Graph neural networks overview
Graph neural networks overviewGraph neural networks overview
Graph neural networks overview
Rodion Kiryukhin
 

What's hot (20)

DEEP LEARNING、トレーニング・インファレンスのGPUによる高速化
DEEP LEARNING、トレーニング・インファレンスのGPUによる高速化DEEP LEARNING、トレーニング・インファレンスのGPUによる高速化
DEEP LEARNING、トレーニング・インファレンスのGPUによる高速化
 
Knowledge Distillation for Federated Learning: a Practical Guide
Knowledge Distillation for Federated Learning: a Practical GuideKnowledge Distillation for Federated Learning: a Practical Guide
Knowledge Distillation for Federated Learning: a Practical Guide
 
RAPIDS: GPU-Accelerated ETL and Feature Engineering
RAPIDS: GPU-Accelerated ETL and Feature EngineeringRAPIDS: GPU-Accelerated ETL and Feature Engineering
RAPIDS: GPU-Accelerated ETL and Feature Engineering
 
PyCUDAの紹介
PyCUDAの紹介PyCUDAの紹介
PyCUDAの紹介
 
Improving the Life of Data Scientists: Automating ML Lifecycle through MLflow
Improving the Life of Data Scientists: Automating ML Lifecycle through MLflowImproving the Life of Data Scientists: Automating ML Lifecycle through MLflow
Improving the Life of Data Scientists: Automating ML Lifecycle through MLflow
 
Introduction to MLflow
Introduction to MLflowIntroduction to MLflow
Introduction to MLflow
 
Introducing MlFlow: An Open Source Platform for the Machine Learning Lifecycl...
Introducing MlFlow: An Open Source Platform for the Machine Learning Lifecycl...Introducing MlFlow: An Open Source Platform for the Machine Learning Lifecycl...
Introducing MlFlow: An Open Source Platform for the Machine Learning Lifecycl...
 
Dynamic Partition Pruning in Apache Spark
Dynamic Partition Pruning in Apache SparkDynamic Partition Pruning in Apache Spark
Dynamic Partition Pruning in Apache Spark
 
RAPIDS – Open GPU-accelerated Data Science
RAPIDS – Open GPU-accelerated Data ScienceRAPIDS – Open GPU-accelerated Data Science
RAPIDS – Open GPU-accelerated Data Science
 
“TensorFlow Lite for Microcontrollers (TFLM): Recent Developments,” a Present...
“TensorFlow Lite for Microcontrollers (TFLM): Recent Developments,” a Present...“TensorFlow Lite for Microcontrollers (TFLM): Recent Developments,” a Present...
“TensorFlow Lite for Microcontrollers (TFLM): Recent Developments,” a Present...
 
Distributed Deep Learning with Hadoop and TensorFlow
Distributed Deep Learning with Hadoop and TensorFlowDistributed Deep Learning with Hadoop and TensorFlow
Distributed Deep Learning with Hadoop and TensorFlow
 
Change data capture
Change data captureChange data capture
Change data capture
 
Service Function Chaining in Openstack Neutron
Service Function Chaining in Openstack NeutronService Function Chaining in Openstack Neutron
Service Function Chaining in Openstack Neutron
 
Accelerating Data Science With GPUs
Accelerating Data Science With GPUsAccelerating Data Science With GPUs
Accelerating Data Science With GPUs
 
The year of the graph: do you really need a graph database? How do you choose...
The year of the graph: do you really need a graph database? How do you choose...The year of the graph: do you really need a graph database? How do you choose...
The year of the graph: do you really need a graph database? How do you choose...
 
Magnum IO GPUDirect Storage 最新情報
Magnum IO GPUDirect Storage 最新情報Magnum IO GPUDirect Storage 最新情報
Magnum IO GPUDirect Storage 最新情報
 
Advanced Apache Spark Meetup Project Tungsten Nov 12 2015
Advanced Apache Spark Meetup Project Tungsten Nov 12 2015Advanced Apache Spark Meetup Project Tungsten Nov 12 2015
Advanced Apache Spark Meetup Project Tungsten Nov 12 2015
 
Supermicro’s Universal GPU: Modular, Standards Based and Built for the Future
Supermicro’s Universal GPU: Modular, Standards Based and Built for the FutureSupermicro’s Universal GPU: Modular, Standards Based and Built for the Future
Supermicro’s Universal GPU: Modular, Standards Based and Built for the Future
 
Apache Arrow: Open Source Standard Becomes an Enterprise Necessity
Apache Arrow: Open Source Standard Becomes an Enterprise NecessityApache Arrow: Open Source Standard Becomes an Enterprise Necessity
Apache Arrow: Open Source Standard Becomes an Enterprise Necessity
 
Graph neural networks overview
Graph neural networks overviewGraph neural networks overview
Graph neural networks overview
 

Similar to Memory efficient pytorch

A Speculative Technique for Auto-Memoization Processor with Multithreading
A Speculative Technique for Auto-Memoization Processor with MultithreadingA Speculative Technique for Auto-Memoization Processor with Multithreading
A Speculative Technique for Auto-Memoization Processor with Multithreading
Matsuo and Tsumura lab.
 
Python for Scientific Computing -- Ricardo Cruz
Python for Scientific Computing -- Ricardo CruzPython for Scientific Computing -- Ricardo Cruz
Python for Scientific Computing -- Ricardo Cruz
rpmcruz
 
Chapter Eight(3)
Chapter Eight(3)Chapter Eight(3)
Chapter Eight(3)
bolovv
 
unit 5.ppt
unit 5.pptunit 5.ppt
LDCQ paper Dec21 with answer key_62cb2996afc60f6aedeb248c1d9283e5.pdf
LDCQ paper Dec21 with answer key_62cb2996afc60f6aedeb248c1d9283e5.pdfLDCQ paper Dec21 with answer key_62cb2996afc60f6aedeb248c1d9283e5.pdf
LDCQ paper Dec21 with answer key_62cb2996afc60f6aedeb248c1d9283e5.pdf
Vedant Gavhane
 
GCC
GCCGCC
Unit iii mca 1st year
Unit iii mca 1st yearUnit iii mca 1st year
Unit iii mca 1st year
akuladananjaya
 
Address/Thread/Memory Sanitizer
Address/Thread/Memory SanitizerAddress/Thread/Memory Sanitizer
Address/Thread/Memory Sanitizer
Platonov Sergey
 
Ch9b
Ch9bCh9b
Computer science ms
Computer science msComputer science ms
Computer science ms
B Bhuvanesh
 
15CS664- Python Application Programming- Question bank 1
15CS664- Python Application Programming- Question bank 115CS664- Python Application Programming- Question bank 1
15CS664- Python Application Programming- Question bank 1
Syed Mustafa
 
C Programming - Refresher - Part III
C Programming - Refresher - Part IIIC Programming - Refresher - Part III
C Programming - Refresher - Part III
Emertxe Information Technologies Pvt Ltd
 
CTSD-2 Presentation about dynamic memory allocation.pptx
CTSD-2 Presentation about dynamic memory allocation.pptxCTSD-2 Presentation about dynamic memory allocation.pptx
CTSD-2 Presentation about dynamic memory allocation.pptx
kmrinank
 
Revision1 C programming
Revision1 C programmingRevision1 C programming
Revision1 C programming
Kho コー。イエー。イエン
 
Code Optimization.ppt
Code Optimization.pptCode Optimization.ppt
Code Optimization.ppt
JohnSamuel280314
 
CS540-2-lecture11 - Copy.ppt
CS540-2-lecture11 - Copy.pptCS540-2-lecture11 - Copy.ppt
CS540-2-lecture11 - Copy.ppt
ssuser0be977
 
Advanced+pointers
Advanced+pointersAdvanced+pointers
Advanced+pointers
Rubal Bansal
 
Yampa AFRP Introduction
Yampa AFRP IntroductionYampa AFRP Introduction
Yampa AFRP Introduction
ChengHui Weng
 
Faster Python, FOSDEM
Faster Python, FOSDEMFaster Python, FOSDEM
Faster Python, FOSDEM
Victor Stinner
 
C++ memory leak detection
C++ memory leak detectionC++ memory leak detection
C++ memory leak detection
Võ Hòa
 

Similar to Memory efficient pytorch (20)

A Speculative Technique for Auto-Memoization Processor with Multithreading
A Speculative Technique for Auto-Memoization Processor with MultithreadingA Speculative Technique for Auto-Memoization Processor with Multithreading
A Speculative Technique for Auto-Memoization Processor with Multithreading
 
Python for Scientific Computing -- Ricardo Cruz
Python for Scientific Computing -- Ricardo CruzPython for Scientific Computing -- Ricardo Cruz
Python for Scientific Computing -- Ricardo Cruz
 
Chapter Eight(3)
Chapter Eight(3)Chapter Eight(3)
Chapter Eight(3)
 
unit 5.ppt
unit 5.pptunit 5.ppt
unit 5.ppt
 
LDCQ paper Dec21 with answer key_62cb2996afc60f6aedeb248c1d9283e5.pdf
LDCQ paper Dec21 with answer key_62cb2996afc60f6aedeb248c1d9283e5.pdfLDCQ paper Dec21 with answer key_62cb2996afc60f6aedeb248c1d9283e5.pdf
LDCQ paper Dec21 with answer key_62cb2996afc60f6aedeb248c1d9283e5.pdf
 
GCC
GCCGCC
GCC
 
Unit iii mca 1st year
Unit iii mca 1st yearUnit iii mca 1st year
Unit iii mca 1st year
 
Address/Thread/Memory Sanitizer
Address/Thread/Memory SanitizerAddress/Thread/Memory Sanitizer
Address/Thread/Memory Sanitizer
 
Ch9b
Ch9bCh9b
Ch9b
 
Computer science ms
Computer science msComputer science ms
Computer science ms
 
15CS664- Python Application Programming- Question bank 1
15CS664- Python Application Programming- Question bank 115CS664- Python Application Programming- Question bank 1
15CS664- Python Application Programming- Question bank 1
 
C Programming - Refresher - Part III
C Programming - Refresher - Part IIIC Programming - Refresher - Part III
C Programming - Refresher - Part III
 
CTSD-2 Presentation about dynamic memory allocation.pptx
CTSD-2 Presentation about dynamic memory allocation.pptxCTSD-2 Presentation about dynamic memory allocation.pptx
CTSD-2 Presentation about dynamic memory allocation.pptx
 
Revision1 C programming
Revision1 C programmingRevision1 C programming
Revision1 C programming
 
Code Optimization.ppt
Code Optimization.pptCode Optimization.ppt
Code Optimization.ppt
 
CS540-2-lecture11 - Copy.ppt
CS540-2-lecture11 - Copy.pptCS540-2-lecture11 - Copy.ppt
CS540-2-lecture11 - Copy.ppt
 
Advanced+pointers
Advanced+pointersAdvanced+pointers
Advanced+pointers
 
Yampa AFRP Introduction
Yampa AFRP IntroductionYampa AFRP Introduction
Yampa AFRP Introduction
 
Faster Python, FOSDEM
Faster Python, FOSDEMFaster Python, FOSDEM
Faster Python, FOSDEM
 
C++ memory leak detection
C++ memory leak detectionC++ memory leak detection
C++ memory leak detection
 

Recently uploaded

Neo4j - Product Vision and Knowledge Graphs - GraphSummit Paris
Neo4j - Product Vision and Knowledge Graphs - GraphSummit ParisNeo4j - Product Vision and Knowledge Graphs - GraphSummit Paris
Neo4j - Product Vision and Knowledge Graphs - GraphSummit Paris
Neo4j
 
8 Best Automated Android App Testing Tool and Framework in 2024.pdf
8 Best Automated Android App Testing Tool and Framework in 2024.pdf8 Best Automated Android App Testing Tool and Framework in 2024.pdf
8 Best Automated Android App Testing Tool and Framework in 2024.pdf
kalichargn70th171
 
OpenMetadata Community Meeting - 5th June 2024
OpenMetadata Community Meeting - 5th June 2024OpenMetadata Community Meeting - 5th June 2024
OpenMetadata Community Meeting - 5th June 2024
OpenMetadata
 
Webinar On-Demand: Using Flutter for Embedded
Webinar On-Demand: Using Flutter for EmbeddedWebinar On-Demand: Using Flutter for Embedded
Webinar On-Demand: Using Flutter for Embedded
ICS
 
Using Query Store in Azure PostgreSQL to Understand Query Performance
Using Query Store in Azure PostgreSQL to Understand Query PerformanceUsing Query Store in Azure PostgreSQL to Understand Query Performance
Using Query Store in Azure PostgreSQL to Understand Query Performance
Grant Fritchey
 
WWDC 2024 Keynote Review: For CocoaCoders Austin
WWDC 2024 Keynote Review: For CocoaCoders AustinWWDC 2024 Keynote Review: For CocoaCoders Austin
WWDC 2024 Keynote Review: For CocoaCoders Austin
Patrick Weigel
 
LORRAINE ANDREI_LEQUIGAN_HOW TO USE ZOOM
LORRAINE ANDREI_LEQUIGAN_HOW TO USE ZOOMLORRAINE ANDREI_LEQUIGAN_HOW TO USE ZOOM
LORRAINE ANDREI_LEQUIGAN_HOW TO USE ZOOM
lorraineandreiamcidl
 
E-Invoicing Implementation: A Step-by-Step Guide for Saudi Arabian Companies
E-Invoicing Implementation: A Step-by-Step Guide for Saudi Arabian CompaniesE-Invoicing Implementation: A Step-by-Step Guide for Saudi Arabian Companies
E-Invoicing Implementation: A Step-by-Step Guide for Saudi Arabian Companies
Quickdice ERP
 
Hand Rolled Applicative User Validation Code Kata
Hand Rolled Applicative User ValidationCode KataHand Rolled Applicative User ValidationCode Kata
Hand Rolled Applicative User Validation Code Kata
Philip Schwarz
 
openEuler Case Study - The Journey to Supply Chain Security
openEuler Case Study - The Journey to Supply Chain SecurityopenEuler Case Study - The Journey to Supply Chain Security
openEuler Case Study - The Journey to Supply Chain Security
Shane Coughlan
 
UI5con 2024 - Bring Your Own Design System
UI5con 2024 - Bring Your Own Design SystemUI5con 2024 - Bring Your Own Design System
UI5con 2024 - Bring Your Own Design System
Peter Muessig
 
Fundamentals of Programming and Language Processors
Fundamentals of Programming and Language ProcessorsFundamentals of Programming and Language Processors
Fundamentals of Programming and Language Processors
Rakesh Kumar R
 
ALGIT - Assembly Line for Green IT - Numbers, Data, Facts
ALGIT - Assembly Line for Green IT - Numbers, Data, FactsALGIT - Assembly Line for Green IT - Numbers, Data, Facts
ALGIT - Assembly Line for Green IT - Numbers, Data, Facts
Green Software Development
 
Everything You Need to Know About X-Sign: The eSign Functionality of XfilesPr...
Everything You Need to Know About X-Sign: The eSign Functionality of XfilesPr...Everything You Need to Know About X-Sign: The eSign Functionality of XfilesPr...
Everything You Need to Know About X-Sign: The eSign Functionality of XfilesPr...
XfilesPro
 
KuberTENes Birthday Bash Guadalajara - Introducción a Argo CD
KuberTENes Birthday Bash Guadalajara - Introducción a Argo CDKuberTENes Birthday Bash Guadalajara - Introducción a Argo CD
KuberTENes Birthday Bash Guadalajara - Introducción a Argo CD
rodomar2
 
Requirement Traceability in Xen Functional Safety
Requirement Traceability in Xen Functional SafetyRequirement Traceability in Xen Functional Safety
Requirement Traceability in Xen Functional Safety
Ayan Halder
 
原版定制美国纽约州立大学奥尔巴尼分校毕业证学位证书原版一模一样
原版定制美国纽约州立大学奥尔巴尼分校毕业证学位证书原版一模一样原版定制美国纽约州立大学奥尔巴尼分校毕业证学位证书原版一模一样
原版定制美国纽约州立大学奥尔巴尼分校毕业证学位证书原版一模一样
mz5nrf0n
 
Mobile App Development Company In Noida | Drona Infotech
Mobile App Development Company In Noida | Drona InfotechMobile App Development Company In Noida | Drona Infotech
Mobile App Development Company In Noida | Drona Infotech
Drona Infotech
 
Automated software refactoring with OpenRewrite and Generative AI.pptx.pdf
Automated software refactoring with OpenRewrite and Generative AI.pptx.pdfAutomated software refactoring with OpenRewrite and Generative AI.pptx.pdf
Automated software refactoring with OpenRewrite and Generative AI.pptx.pdf
timtebeek1
 
Lecture 2 - software testing SE 412.pptx
Lecture 2 - software testing SE 412.pptxLecture 2 - software testing SE 412.pptx
Lecture 2 - software testing SE 412.pptx
TaghreedAltamimi
 

Recently uploaded (20)

Neo4j - Product Vision and Knowledge Graphs - GraphSummit Paris
Neo4j - Product Vision and Knowledge Graphs - GraphSummit ParisNeo4j - Product Vision and Knowledge Graphs - GraphSummit Paris
Neo4j - Product Vision and Knowledge Graphs - GraphSummit Paris
 
8 Best Automated Android App Testing Tool and Framework in 2024.pdf
8 Best Automated Android App Testing Tool and Framework in 2024.pdf8 Best Automated Android App Testing Tool and Framework in 2024.pdf
8 Best Automated Android App Testing Tool and Framework in 2024.pdf
 
OpenMetadata Community Meeting - 5th June 2024
OpenMetadata Community Meeting - 5th June 2024OpenMetadata Community Meeting - 5th June 2024
OpenMetadata Community Meeting - 5th June 2024
 
Webinar On-Demand: Using Flutter for Embedded
Webinar On-Demand: Using Flutter for EmbeddedWebinar On-Demand: Using Flutter for Embedded
Webinar On-Demand: Using Flutter for Embedded
 
Using Query Store in Azure PostgreSQL to Understand Query Performance
Using Query Store in Azure PostgreSQL to Understand Query PerformanceUsing Query Store in Azure PostgreSQL to Understand Query Performance
Using Query Store in Azure PostgreSQL to Understand Query Performance
 
WWDC 2024 Keynote Review: For CocoaCoders Austin
WWDC 2024 Keynote Review: For CocoaCoders AustinWWDC 2024 Keynote Review: For CocoaCoders Austin
WWDC 2024 Keynote Review: For CocoaCoders Austin
 
LORRAINE ANDREI_LEQUIGAN_HOW TO USE ZOOM
LORRAINE ANDREI_LEQUIGAN_HOW TO USE ZOOMLORRAINE ANDREI_LEQUIGAN_HOW TO USE ZOOM
LORRAINE ANDREI_LEQUIGAN_HOW TO USE ZOOM
 
E-Invoicing Implementation: A Step-by-Step Guide for Saudi Arabian Companies
E-Invoicing Implementation: A Step-by-Step Guide for Saudi Arabian CompaniesE-Invoicing Implementation: A Step-by-Step Guide for Saudi Arabian Companies
E-Invoicing Implementation: A Step-by-Step Guide for Saudi Arabian Companies
 
Hand Rolled Applicative User Validation Code Kata
Hand Rolled Applicative User ValidationCode KataHand Rolled Applicative User ValidationCode Kata
Hand Rolled Applicative User Validation Code Kata
 
openEuler Case Study - The Journey to Supply Chain Security
openEuler Case Study - The Journey to Supply Chain SecurityopenEuler Case Study - The Journey to Supply Chain Security
openEuler Case Study - The Journey to Supply Chain Security
 
UI5con 2024 - Bring Your Own Design System
UI5con 2024 - Bring Your Own Design SystemUI5con 2024 - Bring Your Own Design System
UI5con 2024 - Bring Your Own Design System
 
Fundamentals of Programming and Language Processors
Fundamentals of Programming and Language ProcessorsFundamentals of Programming and Language Processors
Fundamentals of Programming and Language Processors
 
ALGIT - Assembly Line for Green IT - Numbers, Data, Facts
ALGIT - Assembly Line for Green IT - Numbers, Data, FactsALGIT - Assembly Line for Green IT - Numbers, Data, Facts
ALGIT - Assembly Line for Green IT - Numbers, Data, Facts
 
Everything You Need to Know About X-Sign: The eSign Functionality of XfilesPr...
Everything You Need to Know About X-Sign: The eSign Functionality of XfilesPr...Everything You Need to Know About X-Sign: The eSign Functionality of XfilesPr...
Everything You Need to Know About X-Sign: The eSign Functionality of XfilesPr...
 
KuberTENes Birthday Bash Guadalajara - Introducción a Argo CD
KuberTENes Birthday Bash Guadalajara - Introducción a Argo CDKuberTENes Birthday Bash Guadalajara - Introducción a Argo CD
KuberTENes Birthday Bash Guadalajara - Introducción a Argo CD
 
Requirement Traceability in Xen Functional Safety
Requirement Traceability in Xen Functional SafetyRequirement Traceability in Xen Functional Safety
Requirement Traceability in Xen Functional Safety
 
原版定制美国纽约州立大学奥尔巴尼分校毕业证学位证书原版一模一样
原版定制美国纽约州立大学奥尔巴尼分校毕业证学位证书原版一模一样原版定制美国纽约州立大学奥尔巴尼分校毕业证学位证书原版一模一样
原版定制美国纽约州立大学奥尔巴尼分校毕业证学位证书原版一模一样
 
Mobile App Development Company In Noida | Drona Infotech
Mobile App Development Company In Noida | Drona InfotechMobile App Development Company In Noida | Drona Infotech
Mobile App Development Company In Noida | Drona Infotech
 
Automated software refactoring with OpenRewrite and Generative AI.pptx.pdf
Automated software refactoring with OpenRewrite and Generative AI.pptx.pdfAutomated software refactoring with OpenRewrite and Generative AI.pptx.pdf
Automated software refactoring with OpenRewrite and Generative AI.pptx.pdf
 
Lecture 2 - software testing SE 412.pptx
Lecture 2 - software testing SE 412.pptxLecture 2 - software testing SE 412.pptx
Lecture 2 - software testing SE 412.pptx
 

Memory efficient pytorch