-
Notifications
You must be signed in to change notification settings - Fork 1
/
densenet.py
175 lines (138 loc) · 7.45 KB
/
densenet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
import torch.onnx
# 2d conv as per paper (BN->RELU->Conv) Repeat 1x1 (bottleneck) then 3x3 (composite)
# bn_factor is set to be 4, in paper it writes 4k where k is growth rate
# bn_factor is the bottleneck layer (it serves to reduce the dimensionality before 3x3 conv)
# note that dimensionality reduction is not true at the first few layers of the FIRST dense block.
# k indicates the expected output of the feature map channels for each dense layer
# note that we need to separate the 1x1 (bn) and 3x3 conv (composite function) because 1x1 is used to concat features
class DenseLayer(nn.Module):
def __init__(self, in_, bn_factor=4, growth_rate=32):
super().__init__()
self.bn_layer = nn.Sequential(
nn.BatchNorm2d(in_),
nn.ReLU(inplace=True),
nn.Conv2d(in_, bn_factor * growth_rate, kernel_size=1),
)
self.composite_layer = nn.Sequential(
nn.BatchNorm2d(bn_factor * growth_rate),
nn.ReLU(inplace=True),
nn.Conv2d(bn_factor * growth_rate, growth_rate, kernel_size=3, padding=1),
)
# feed in the concated features (note: features are in the form of a Python List!)
def bn_concat(self, concat_features):
concated_features = torch.cat(concat_features, 1)
out_ = self.bn_layer(concated_features)
return out_
# thus, when u call DenseLayer, you will feed in a list of features.
def forward(self, x):
x = self.bn_concat(x)
x = self.composite_layer(x)
return x
class DenseBlock(nn.Module):
def __init__(self, in_, num_layers, bn_factor=4, growth_rate=32):
super().__init__()
self.num_layers = num_layers
# Each feature, you will add i*k of feature maps (concat from preceding layers)
# The first feature map (in_) you can set to anything.
self.dense_layer = nn.ModuleList(
[DenseLayer(in_ + i * growth_rate, bn_factor, growth_rate) for i in range(num_layers)])
def forward(self, x):
# Very important, the feature maps passed forward has to be appended!
features = [x] # Form a python list
# now you go through the layers and append the feature maps as u go along
for i in range(self.num_layers):
new_features = self.dense_layer[i](features)
features.append(new_features) # as u append, later on in DenseLayer, it will concat.
return torch.cat(features, 1)
class Transition(nn.Module):
def __init__(self, in_, out_):
super().__init__()
self.downsample = nn.Sequential(
nn.BatchNorm2d(in_),
nn.ReLU(inplace=True),
nn.Conv2d(in_, out_, kernel_size=1),
nn.AvgPool2d(kernel_size=2, stride=2)
)
def forward(self, x):
return self.downsample(x)
class DenseNet(nn.Module):
def __init__(self, input_channels=64, dense_block_config=(6, 12, 24, 16), bn_factor=4, growth_rate=32,
num_classes=1000):
super().__init__()
self.base_features = nn.Sequential(
nn.Conv2d(3, input_channels, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(input_channels),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
)
# After every DenseBlock is a transition layer to downsample.
self.Dense_and_Transition = nn.ModuleList()
for i, num_layers in enumerate(dense_block_config):
self.Dense_and_Transition.append(DenseBlock(input_channels, num_layers, bn_factor, growth_rate=32))
# Each DenseBlock will have different input_channels due to transition layers downsampling channels too.
# Author suggest to compress channels with the bottleneck layers (can refer to DenseNet-BC version).
# I divided/compress the channels by half (you can experiment with different numbers)
# Surprising results from paper is that the compressed versions performs better with less parameters.
# remember that going to the next DenseBlock, your first channels now follows from the transition layers
# Transition will take the outputs of DenseBlocks.
output_channels = input_channels + num_layers * growth_rate # amount of output_channels
transition_output = output_channels // 2
input_channels = transition_output # update for next dense block (input to dense block is output from transition)
# Note: at the last DenseBlock operation, there is no transition layer. I actually caught this by debugging the code
# and then realised that the paper clearly does not put it, ops.
if i == len(dense_block_config) - 1:
break # avoid using transition layer at last DenseBlock
self.Dense_and_Transition.append(Transition(output_channels, transition_output))
# Linear layers
self.classifier = nn.Sequential(
nn.AvgPool2d(7),
nn.Flatten(),
nn.ReLU(inplace=True),
nn.Linear(1024, num_classes)
)
def forward(self, x):
x = self.base_features(x)
for layer in self.Dense_and_Transition:
x = layer(x)
x = self.classifier(x)
return x
def calculate_parameters(operation):
return sum(param.numel() for param in operation.parameters() if param.requires_grad)
if __name__ == "__main__":
batch_size = 10
num_classes = 1000
x = torch.randn(batch_size, 3, 224, 224)
# Configs as such (following paper), note that I am already compressing the channels by half at every transition.
# - DenseNet-121 -> dense_block_config = (6, 12, 24, 16)
# - DenseNet-169 -> dense_block_config = (6, 12, 32, 32)
# - DenseNet-201 -> dense_block_config = (6, 12, 48, 32)
# - DenseNet-161 -> dense_block_config = (6, 12, 36, 24), growth_rate=48
model = DenseNet(input_channels=64, dense_block_config=(6, 12, 24, 16), bn_factor=4, growth_rate=32)
output = model(x)
print(output.shape)
# Good sanity check to have for your output, expected output is [batch, class] size.
assert output.shape[0] == batch_size and output.shape[1] == num_classes
print(f"Output shape: {output.shape}, batch size: {batch_size}, number of classes: {num_classes}")
print(calculate_parameters(model))
model.eval()
writer = SummaryWriter()
writer.add_graph(model, x)
writer.close()
# Export the model
torch.onnx.export(model, # model being run
x, # model input (or a tuple for multiple inputs)
"densenet.onnx", # where to save the model (can be a file or file-like object)
export_params=True, # store the trained parameter weights inside the model file
opset_version=10, # the ONNX version to export the model to
do_constant_folding=True, # whether to execute constant folding for optimization
input_names = ['input'], # the model's input names
output_names = ['output'], # the model's output names
dynamic_axes={'input' : {0 : 'batch_size'}, # variable length axes
'output' : {0 : 'batch_size'}})
# Run these command in your terminal:
# pip install netron
# netron densenet.onnx
# Click on localhost for visualisation.