Search

VGG

VGG모델 설명 VGG
VGG 모델을 구현한 torch.nn.Module을 상속한 VGG 클래스
class VGG(nn.Module): def __init__(self, num_layers, num_classes=2, init_weights=True): super(VGG, self).__init__() # input image size (N, 3, 224, 224) # after maxpooling layer, h and w are devided by 2 : 224->112->56->28->14->7 self.in_channels = 3 # there are out_channels and M(maxpool) in self.vgg_cfg if num_layers==11: self.vgg_cfg = [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'] elif num_layers==13: self.vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'] elif num_layers==16: self.vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'] elif num_layers==19: self.vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'] else: print("unavailable number of layers") sys.exit() self.conv_layers = self._make_layers(self.vgg_cfg) # fc layers part : adaptiveaveragepooling->FC->ReLU->Dropout->FC->ReLU->Dropout->FC (-> softmax) self.adaptive_avgpooling = nn.AdaptiveAvgPool2d(7) self.fc_layers = nn.Sequential( nn.Linear(512*7*7, 4096), nn.ReLU(), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(), nn.Linear(4096, num_classes) ) # placeholder for the gradients self.gradients = None if init_weights: self._initialize_weights() def _make_layers(self, cfg): layers = [] in_channels = self.in_channels for v in cfg: if v == "M": layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: layers += [nn.Conv2d(in_channels=in_channels, out_channels=v, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(v), nn.ReLU()] in_channels = v return nn.Sequential(*layers) def forward(self, x): output = self.conv_layers(x) output = self.adaptive_avgpooling(output) output = output.view(-1, 512*7*7) output = self.fc_layers(output) return output def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') # He initialization if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) elif isinstance(m, nn.Linear): nn.init.normal_(m.weight, 0, 0.01) # Fills the given 2-dimensional matrix with values drawn from a normal distribution parameterized by mean and std. nn.init.constant_(m.bias, 0)
Python
복사

1. init

def __init__(self, num_layers, num_classes=2, init_weights=True): super(VGG, self).__init__() # input image size (N, 3, 224, 224) # after maxpooling layer, h and w are devided by 2 : 224->112->56->28->14->7 self.in_channels = 3 # there are out_channels and M(maxpool) in self.vgg_cfg if num_layers==11: self.vgg_cfg = [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'] elif num_layers==13: self.vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'] elif num_layers==16: self.vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'] elif num_layers==19: self.vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'] else: print("unavailable number of layers") sys.exit()
Python
복사
num_layer 파라미터는 vgg의 레이어 수를 뜻합니다. vgg11, 13, 16, 19의 구조를 리스트로 나타냅니다(output 채널수들과 maxpooling을 뜻하는 ‘M’ 으로)
self.conv_layers = self._make_layers(self.vgg_cfg) # fc layers part : adaptiveaveragepooling->FC->ReLU->Dropout->FC->ReLU->Dropout->FC (-> softmax) self.adaptive_avgpooling = nn.AdaptiveAvgPool2d(7) self.fc_layers = nn.Sequential( nn.Linear(512*7*7, 4096), nn.ReLU(), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(), nn.Linear(4096, num_classes) ) # placeholder for the gradients self.gradients = None if init_weights: self._initialize_weights()
Python
복사
_make_layers(self, cfg) 함수로 conv부분을 만들어주므로 _make_layers(self, cfg)함수를 보겠습니다.

_make_layers(self, cfg)

def _make_layers(self, cfg): layers = [] in_channels = self.in_channels for v in cfg: if v == "M": layers += [nn.MaxPool2d(kernel_size=2, stride=2)] else: layers += [nn.Conv2d(in_channels=in_channels, out_channels=v, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(v), nn.ReLU()] in_channels = v return nn.Sequential(*layers)
Python
복사
cfg자리에 self.vgg_cfg를 넣어주면 채널수에 맞춰 conv-bn-relu레이어가 쌓이고, M자리에 maxpooling레이어가 쌓입니다.
레이어 리스트를 unpack하여 nn.Sequential에 넣어준 후 return 해줍니다
다시 init으로 돌아가 self.fc_layers를 살펴보면
self.fc_layers
self.fc_layers = nn.Sequential( nn.Linear(512*7*7, 4096), nn.ReLU(), nn.Dropout(), nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(), nn.Linear(4096, num_classes) )
Python
복사
마지막 conv레이어의 output이 채널이 512개 , h, w가 7이므로 512*7*7이 들어가 4096이 나오는 fc레이어 → relu→dropout→ 다시 fc+relu+dropout 그리고 마지막에 클래스개수만큼을 output 채널로 갖는 fc레이어를 추가해줍니다.

forward

def forward(self, x): output = self.conv_layers(x) output = self.adaptive_avgpooling(output) output = output.view(-1, 512*7*7) output = self.fc_layers(output) return output
Python
복사
conv 레이어 → adaptive avg pooling 을 거쳐 .view()를 통해 일차원으로 펴준 후 FC레이어에 넣어줍니다
마지막 output 은 (N, num_class, 1, 1)의 tensor