PyTorch
Imports { .cols-1 }
General
import torch # root packagefrom torch.utils.data import Dataset, DataLoader # dataset representation and loading
Neural Network API
import torch.autograd as autograd # computation graphfrom torch import Tensor # tensor node in the computation graphimport torch.nn as nn # neural networksimport torch.nn.functional as F # layers, activations and moreimport torch.optim as optim # optimizers e.g. gradient descent, ADAM, etc.from torch.jit import script, trace # hybrid frontend decorator and tracing jit
Torchscript and JIT
torch.jit.trace() # takes your module or function and an example # data input, and traces the computational steps # that the data encounters as it progresses through the model
@script # decorator used to indicate data-dependent # control flow within the code being traced
ONNX
torch.onnx.export(model, dummy data, xxxx.proto) # exports an ONNX formatted # model using a trained model, dummy # data and the desired file name
model = onnx.load("alexnet.proto") # load an ONNX modelonnx.checker.check_model(model) # check that the model # IR is well formed
onnx.helper.printable_graph(model.graph) # print a human readable # representation of the graph
Vision
from torchvision import datasets, models, transforms # vision datasets, # architectures & # transforms
import torchvision.transforms as transforms # composable transforms
Distributed Training
import torch.distributed as dist # distributed communicationfrom torch.multiprocessing import Process # memory sharing processes
Tensors { .cols-1 }
Creation
x = torch.randn(*size) # tensor with independent N(0,1) entriesx = torch.[ones|zeros](*size) # tensor with all 1's [or 0's]x = torch.tensor(L) # create tensor from [nested] list or ndarray Ly = x.clone() # clone of xwith torch.no_grad(): # code wrap that stops autograd from tracking tensor historyrequires_grad=True # arg, when set to True, tracks computation # history for future derivative calculations
Dimensionality
x.size() # return tuple-like object of dimensionsx = torch.cat(tensor_seq, dim=0) # concatenates tensors along dimy = x.view(a,b,...) # reshapes x into size (a,b,...)y = x.view(-1,a) # reshapes x into size (b,a) for some by = x.transpose(a,b) # swaps dimensions a and by = x.permute(*dims) # permutes dimensionsy = x.unsqueeze(dim) # tensor with added axisy = x.unsqueeze(dim=2) # (a,b,c) tensor -> (a,b,1,c) tensory = x.squeeze() # removes all dimensions of size 1 (a,1,b,1) -> (a,b)y = x.squeeze(dim=1) # removes specified dimension of size 1 (a,1,b,1) -> (a,b,1)
Algebra
ret = A.mm(B) # matrix multiplicationret = A.mv(x) # matrix-vector multiplicationx = x.t() # matrix transpose
GPU Usage
torch.cuda.is_available # check for cudax = x.cuda() # move x's data from # CPU to GPU and return new object
x = x.cpu() # move x's data from GPU to CPU # and return new object
if not args.disable_cuda and torch.cuda.is_available(): # device agnostic code args.device = torch.device('cuda') # and modularityelse: # args.device = torch.device('cpu') #
net.to(device) # recursively convert their # parameters and buffers to # device specific tensors
x = x.to(device) # copy your tensors to a device # (gpu, cpu)
Deep Learning
nn.Linear(m,n) # fully connected layer from # m to n units
nn.ConvXd(m,n,s) # X dimensional conv layer from # m to n channels where X⍷{1,2,3} # and the kernel size is s
nn.MaxPoolXd(s) # X dimension pooling layer # (notation as above)
nn.BatchNormXd # batch norm layernn.RNN/LSTM/GRU # recurrent layersnn.Dropout(p=0.5, inplace=False) # dropout layer for any dimensional inputnn.Dropout2d(p=0.5, inplace=False) # 2-dimensional channel-wise dropoutnn.Embedding(num_embeddings, embedding_dim) # (tensor-wise) mapping from # indices to embedding vectors
Loss Functions
nn.X # where X is L1Loss, MSELoss, CrossEntropyLoss # CTCLoss, NLLLoss, PoissonNLLLoss, # KLDivLoss, BCELoss, BCEWithLogitsLoss, # MarginRankingLoss, HingeEmbeddingLoss, # MultiLabelMarginLoss, SmoothL1Loss, # SoftMarginLoss, MultiLabelSoftMarginLoss, # CosineEmbeddingLoss, MultiMarginLoss, # or TripletMarginLoss
Activation Functions
nn.X # where X is ReLU, ReLU6, ELU, SELU, PReLU, LeakyReLU, # RReLu, CELU, GELU, Threshold, Hardshrink, HardTanh, # Sigmoid, LogSigmoid, Softplus, SoftShrink, # Softsign, Tanh, TanhShrink, Softmin, Softmax, # Softmax2d, LogSoftmax or AdaptiveSoftmaxWithLoss
Optimizers
opt = optim.x(model.parameters(), ...) # create optimizeropt.step() # update weightsoptim.X # where X is SGD, Adadelta, Adagrad, Adam, # AdamW, SparseAdam, Adamax, ASGD, # LBFGS, RMSprop or Rprop
Learning rate scheduling
scheduler = optim.X(optimizer,...) # create lr schedulerscheduler.step() # update lr after optimizer updates weightsoptim.lr_scheduler.X # where X is LambdaLR, MultiplicativeLR, # StepLR, MultiStepLR, ExponentialLR, # CosineAnnealingLR, ReduceLROnPlateau, CyclicLR, # OneCycleLR, CosineAnnealingWarmRestarts,
Data Utilities { .cols-1 }
Datasets
Dataset # abstract class representing datasetTensorDataset # labelled dataset in the form of tensorsConcat Dataset # concatenation of Datasets
Dataloaders and DataSamplers
DataLoader(dataset, batch_size=1, ...) # loads data batches agnostic # of structure of individual data points
sampler.Sampler(dataset,...) # abstract class dealing with # ways to sample from dataset
sampler.XSampler where ... # Sequential, Random, SubsetRandom, # WeightedRandom, Batch, Distributed