import torch from torch import nn import torch.nn.functional as F from torch.utils.data import DataLoader, TensorDataset from torchaudio import transforms from torchvision import models from torch.quantization import QuantStub, DeQuantStub class BlazeFace(nn.Module): def __init__(self, input_channels=1, use_double_block=False, activation="relu", use_optional_block=True): super(BlazeFace, self).__init__() self.activation = activation self.use_double_block = use_double_block self.use_optional_block = use_optional_block def conv_block(in_channels, out_channels, kernel_size, stride, padding): return nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding), nn.BatchNorm2d(out_channels), nn.ReLU() if activation == "relu" else nn.Sigmoid() # Apply ReLU activation (default) or Sigmoid ) def depthwise_separable_block(in_channels, out_channels, stride): return nn.Sequential( nn.Conv2d(in_channels, in_channels, kernel_size=5, stride=stride, padding=2, groups=in_channels, bias=False), nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(out_channels), nn.ReLU() if activation == "relu" else nn.Sigmoid() ) def double_block(in_channels, filters_1, filters_2, stride): return nn.Sequential( depthwise_separable_block(in_channels, filters_1, stride), depthwise_separable_block(filters_1, filters_2, 1) ) # Define layers (first part: conv layers) self.conv1 = conv_block(input_channels, 24, kernel_size=5, stride=2, padding=2) # Define single blocks (subsequent conv blocks) self.single_blocks = nn.ModuleList([ depthwise_separable_block(24, 24, stride=1), depthwise_separable_block(24, 24, stride=1), depthwise_separable_block(24, 48, stride=2), depthwise_separable_block(48, 48, stride=1), depthwise_separable_block(48, 48, stride=1) ]) # Define double blocks if `use_double_block` is True if self.use_double_block: self.double_blocks = nn.ModuleList([ double_block(48, 24, 96, stride=2), double_block(96, 24, 96, stride=1), double_block(96, 24, 96, stride=2), double_block(96, 24, 96, stride=1), double_block(96, 24, 96, stride=2) ]) else: self.double_blocks = nn.ModuleList([ depthwise_separable_block(48, 96, stride=2), depthwise_separable_block(96, 96, stride=1), depthwise_separable_block(96, 96, stride=2), depthwise_separable_block(96, 96, stride=1), depthwise_separable_block(96, 96, stride=2) ]) # Final convolutional head self.conv_head = nn.Conv2d(96, 64, kernel_size=1, stride=1) self.bn_head = nn.BatchNorm2d(64) # Global Average Pooling self.global_avg_pooling = nn.AdaptiveAvgPool2d(1) def forward(self, x): # First conv layer x = self.conv1(x) # Apply single blocks for block in self.single_blocks: x = block(x) # Apply double blocks for block in self.double_blocks: x = block(x) # Final head x = self.conv_head(x) x = self.bn_head(x) x = F.relu(x) # Global Average Pooling and Flatten x = self.global_avg_pooling(x) x = torch.flatten(x, 1) return x class BlazeFaceModel(nn.Module): def __init__(self, input_channels, label_count, use_double_block=False, activation="relu", use_optional_block=True): super(BlazeFaceModel, self).__init__() self.blazeface_backbone = BlazeFace(input_channels=input_channels, use_double_block=use_double_block, activation=activation, use_optional_block=use_optional_block) self.fc = nn.Linear(64, label_count) def forward(self, x): features = self.blazeface_backbone(x) output = self.fc(features) return output class QuantizedBlazeFaceModel(nn.Module): def __init__(self, model_fp32): super(QuantizedBlazeFaceModel, self).__init__() self.quant = QuantStub() self.dequant = DeQuantStub() self.backbone = model_fp32.blazeface_backbone self.fc = model_fp32.fc def forward(self, x): x = self.quant(x) x = self.backbone(x) x = self.fc(x) x = self.dequant(x) return x