Use the new and updated torchinfo.
Keras has a neat API to view the visualization of the model which is very helpful while debugging your network. Here is a barebone code to try and mimic the same in PyTorch. The aim is to provide information complementary to, what is not provided by print(your_model) in PyTorch.
pip install torchsummaryorgit clone https://github.com/sksq96/pytorch-summary
from torchsummary import summary summary(your_model, input_size=(channels, H, W))- Note that the
input_sizeis required to make a forward pass through the network.
import torch import torch.nn as nn import torch.nn.functional as F from torchsummary import summary class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 10, kernel_size=5) self.conv2 = nn.Conv2d(10, 20, kernel_size=5) self.conv2_drop = nn.Dropout2d() self.fc1 = nn.Linear(320, 50) self.fc2 = nn.Linear(50, 10) def forward(self, x): x = F.relu(F.max_pool2d(self.conv1(x), 2)) x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) x = x.view(-1, 320) x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) return F.log_softmax(x, dim=1) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # PyTorch v0.4.0 model = Net().to(device) summary(model, (1, 28, 28))---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ Conv2d-1 [-1, 10, 24, 24] 260 Conv2d-2 [-1, 20, 8, 8] 5,020 Dropout2d-3 [-1, 20, 8, 8] 0 Linear-4 [-1, 50] 16,050 Linear-5 [-1, 10] 510 ================================================================ Total params: 21,840 Trainable params: 21,840 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.00 Forward/backward pass size (MB): 0.06 Params size (MB): 0.08 Estimated Total Size (MB): 0.15 ---------------------------------------------------------------- import torch from torchvision import models from torchsummary import summary device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') vgg = models.vgg16().to(device) summary(vgg, (3, 224, 224))---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ Conv2d-1 [-1, 64, 224, 224] 1,792 ReLU-2 [-1, 64, 224, 224] 0 Conv2d-3 [-1, 64, 224, 224] 36,928 ReLU-4 [-1, 64, 224, 224] 0 MaxPool2d-5 [-1, 64, 112, 112] 0 Conv2d-6 [-1, 128, 112, 112] 73,856 ReLU-7 [-1, 128, 112, 112] 0 Conv2d-8 [-1, 128, 112, 112] 147,584 ReLU-9 [-1, 128, 112, 112] 0 MaxPool2d-10 [-1, 128, 56, 56] 0 Conv2d-11 [-1, 256, 56, 56] 295,168 ReLU-12 [-1, 256, 56, 56] 0 Conv2d-13 [-1, 256, 56, 56] 590,080 ReLU-14 [-1, 256, 56, 56] 0 Conv2d-15 [-1, 256, 56, 56] 590,080 ReLU-16 [-1, 256, 56, 56] 0 MaxPool2d-17 [-1, 256, 28, 28] 0 Conv2d-18 [-1, 512, 28, 28] 1,180,160 ReLU-19 [-1, 512, 28, 28] 0 Conv2d-20 [-1, 512, 28, 28] 2,359,808 ReLU-21 [-1, 512, 28, 28] 0 Conv2d-22 [-1, 512, 28, 28] 2,359,808 ReLU-23 [-1, 512, 28, 28] 0 MaxPool2d-24 [-1, 512, 14, 14] 0 Conv2d-25 [-1, 512, 14, 14] 2,359,808 ReLU-26 [-1, 512, 14, 14] 0 Conv2d-27 [-1, 512, 14, 14] 2,359,808 ReLU-28 [-1, 512, 14, 14] 0 Conv2d-29 [-1, 512, 14, 14] 2,359,808 ReLU-30 [-1, 512, 14, 14] 0 MaxPool2d-31 [-1, 512, 7, 7] 0 Linear-32 [-1, 4096] 102,764,544 ReLU-33 [-1, 4096] 0 Dropout-34 [-1, 4096] 0 Linear-35 [-1, 4096] 16,781,312 ReLU-36 [-1, 4096] 0 Dropout-37 [-1, 4096] 0 Linear-38 [-1, 1000] 4,097,000 ================================================================ Total params: 138,357,544 Trainable params: 138,357,544 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.57 Forward/backward pass size (MB): 218.59 Params size (MB): 527.79 Estimated Total Size (MB): 746.96 ---------------------------------------------------------------- import torch import torch.nn as nn from torchsummary import summary class SimpleConv(nn.Module): def __init__(self): super(SimpleConv, self).__init__() self.features = nn.Sequential( nn.Conv2d(1, 1, kernel_size=3, stride=1, padding=1), nn.ReLU(), ) def forward(self, x, y): x1 = self.features(x) x2 = self.features(y) return x1, x2 device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = SimpleConv().to(device) summary(model, [(1, 16, 16), (1, 28, 28)])---------------------------------------------------------------- Layer (type) Output Shape Param # ================================================================ Conv2d-1 [-1, 1, 16, 16] 10 ReLU-2 [-1, 1, 16, 16] 0 Conv2d-3 [-1, 1, 28, 28] 10 ReLU-4 [-1, 1, 28, 28] 0 ================================================================ Total params: 20 Trainable params: 20 Non-trainable params: 0 ---------------------------------------------------------------- Input size (MB): 0.77 Forward/backward pass size (MB): 0.02 Params size (MB): 0.00 Estimated Total Size (MB): 0.78 ---------------------------------------------------------------- - The idea for this package sparked from this PyTorch issue.
- Thanks to @ncullen93 and @HTLife.
- For Model Size Estimation @jacobkimmel (details here)
pytorch-summary is MIT-licensed.