VGG 모델을 구현한 torch.nn.Module을 상속한 VGG 클래스
class VGG(nn.Module):
def __init__(self, num_layers, num_classes=2, init_weights=True):
super(VGG, self).__init__()
# input image size (N, 3, 224, 224)
# after maxpooling layer, h and w are devided by 2 : 224->112->56->28->14->7
self.in_channels = 3
# there are out_channels and M(maxpool) in self.vgg_cfg
if num_layers==11:
self.vgg_cfg = [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']
elif num_layers==13:
self.vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']
elif num_layers==16:
self.vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
elif num_layers==19:
self.vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']
else:
print("unavailable number of layers")
sys.exit()
self.conv_layers = self._make_layers(self.vgg_cfg)
# fc layers part : adaptiveaveragepooling->FC->ReLU->Dropout->FC->ReLU->Dropout->FC (-> softmax)
self.adaptive_avgpooling = nn.AdaptiveAvgPool2d(7)
self.fc_layers = nn.Sequential(
nn.Linear(512*7*7, 4096),
nn.ReLU(),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.Dropout(),
nn.Linear(4096, num_classes)
)
# placeholder for the gradients
self.gradients = None
if init_weights:
self._initialize_weights()
def _make_layers(self, cfg):
layers = []
in_channels = self.in_channels
for v in cfg:
if v == "M":
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
layers += [nn.Conv2d(in_channels=in_channels, out_channels=v, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(v),
nn.ReLU()]
in_channels = v
return nn.Sequential(*layers)
def forward(self, x):
output = self.conv_layers(x)
output = self.adaptive_avgpooling(output)
output = output.view(-1, 512*7*7)
output = self.fc_layers(output)
return output
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # He initialization
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01) # Fills the given 2-dimensional matrix with values drawn from a normal distribution parameterized by mean and std.
nn.init.constant_(m.bias, 0)
Python
복사
1. init
def __init__(self, num_layers, num_classes=2, init_weights=True):
super(VGG, self).__init__()
# input image size (N, 3, 224, 224)
# after maxpooling layer, h and w are devided by 2 : 224->112->56->28->14->7
self.in_channels = 3
# there are out_channels and M(maxpool) in self.vgg_cfg
if num_layers==11:
self.vgg_cfg = [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']
elif num_layers==13:
self.vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']
elif num_layers==16:
self.vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
elif num_layers==19:
self.vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']
else:
print("unavailable number of layers")
sys.exit()
Python
복사
num_layer 파라미터는 vgg의 레이어 수를 뜻합니다. vgg11, 13, 16, 19의 구조를 리스트로 나타냅니다(output 채널수들과 maxpooling을 뜻하는 ‘M’ 으로)
self.conv_layers = self._make_layers(self.vgg_cfg)
# fc layers part : adaptiveaveragepooling->FC->ReLU->Dropout->FC->ReLU->Dropout->FC (-> softmax)
self.adaptive_avgpooling = nn.AdaptiveAvgPool2d(7)
self.fc_layers = nn.Sequential(
nn.Linear(512*7*7, 4096),
nn.ReLU(),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.Dropout(),
nn.Linear(4096, num_classes)
)
# placeholder for the gradients
self.gradients = None
if init_weights:
self._initialize_weights()
Python
복사
_make_layers(self, cfg) 함수로 conv부분을 만들어주므로 _make_layers(self, cfg)함수를 보겠습니다.
_make_layers(self, cfg)
def _make_layers(self, cfg):
layers = []
in_channels = self.in_channels
for v in cfg:
if v == "M":
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
layers += [nn.Conv2d(in_channels=in_channels, out_channels=v, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(v),
nn.ReLU()]
in_channels = v
return nn.Sequential(*layers)
Python
복사
cfg자리에 self.vgg_cfg를 넣어주면 채널수에 맞춰 conv-bn-relu레이어가 쌓이고, M자리에 maxpooling레이어가 쌓입니다.
레이어 리스트를 unpack하여 nn.Sequential에 넣어준 후 return 해줍니다
다시 init으로 돌아가 self.fc_layers를 살펴보면
self.fc_layers
self.fc_layers = nn.Sequential(
nn.Linear(512*7*7, 4096),
nn.ReLU(),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.Dropout(),
nn.Linear(4096, num_classes)
)
Python
복사
마지막 conv레이어의 output이 채널이 512개 , h, w가 7이므로 512*7*7이 들어가 4096이 나오는 fc레이어 → relu→dropout→ 다시 fc+relu+dropout 그리고 마지막에 클래스개수만큼을 output 채널로 갖는 fc레이어를 추가해줍니다.
forward
def forward(self, x):
output = self.conv_layers(x)
output = self.adaptive_avgpooling(output)
output = output.view(-1, 512*7*7)
output = self.fc_layers(output)
return output
Python
복사
conv 레이어 → adaptive avg pooling 을 거쳐 .view()를 통해 일차원으로 펴준 후 FC레이어에 넣어줍니다
마지막 output 은 (N, num_class, 1, 1)의 tensor