submission

Sleeping

App Files Files Community

AB739 commited on Jan 13

Commit

733ce78

verified ·

1 Parent(s): 128d765

Create audio.py

Browse files

Files changed (1) hide show

tasks/audio.py +255 -0

tasks/audio.py ADDED Viewed

	@@ -0,0 +1,255 @@

+from fastapi import APIRouter
+from datetime import datetime
+from datasets import load_dataset
+from sklearn.metrics import accuracy_score
+import os
+import torch
+from torch import nn
+from torch.utils.data import DataLoader, TensorDataset
+from torchaudio import transforms
+from torchvision import models
+from .utils.evaluation import AudioEvaluationRequest
+from .utils.emissions import tracker, clean_emissions_data, get_space_info
+from dotenv import load_dotenv
+load_dotenv()
+router = APIRouter()
+DESCRIPTION = "ResNet18 Model"
+ROUTE = "/audio"
+@router.post(ROUTE, tags=["Audio Task"],
+             description=DESCRIPTION)
+async def evaluate_audio(request: AudioEvaluationRequest):
+    """
+    Evaluate audio classification for rainforest sound detection.
+    Current Model: Random Baseline
+    - Makes random predictions from the label space (0-1)
+    - Used as a baseline for comparison
+    """
+    # Get space info
+    username, space_url = get_space_info()
+    # Define the label mapping
+    LABEL_MAPPING = {
+        "chainsaw": 0,
+        "environment": 1
+    }
+    # Load and prepare the dataset
+    # Because the dataset is gated, we need to use the HF_TOKEN environment variable to authenticate
+    dataset = load_dataset(request.dataset_name,token=os.getenv("HF_TOKEN"))
+    # Split dataset
+    train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
+    test_dataset = train_test["test"]
+    # Start tracking emissions
+    tracker.start()
+    tracker.start_task("inference")
+    #--------------------------------------------------------------------------------------------
+    # YOUR MODEL INFERENCE CODE HERE
+    # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
+    #--------------------------------------------------------------------------------------------
+    # Make predictions using M5 Model
+    true_labels = test_dataset["label"]
+    resampler = transforms.Resample(orig_freq=12000, new_freq=16000)
+    mel_transform = transforms.MelSpectrogram(sample_rate=16000, n_mels=64)
+    amplitude_to_db = transforms.AmplitudeToDB()
+    def resize_audio(_waveform, target_length):
+        """Resizes the audio waveform to the target length using resampling"""
+        num_frames = _waveform.shape[-1]
+        if num_frames != target_length:
+            _resampler = transforms.Resample(orig_freq=num_frames, new_freq=target_length)
+            _waveform = _resampler(_waveform)
+        return _waveform
+    resized_waveforms = [
+        resize_audio(torch.tensor(sample['audio']['array'], dtype=torch.float32).unsqueeze(0), target_length=72000)
+        for sample in test_dataset
+    ]
+    waveforms, labels = [], []
+    for waveform, label in zip(resized_waveforms, true_labels):
+        waveforms.append(amplitude_to_db(mel_transform(resampler(waveform))))
+        labels.append(label)
+    waveforms = torch.stack(waveforms)
+    labels = torch.tensor(labels)
+    test_loader = DataLoader(
+        TensorDataset(waveforms, labels),
+        batch_size=32,
+        shuffle=False
+    )
+    class ResNetForAudio(nn.Module):
+        def __init__(self, num_classes=10):
+            super(ResNetForAudio, self).__init__()
+            self.resnet = models.resnet18(pretrained=False)
+            self.resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
+            self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes)
+        def forward(self, x):
+            return self.resnet(x)
+    #model = ResNetForAudio(num_classes=2)
+    class BlazeFace(nn.Module):
+        def __init__(self, input_channels=1, use_double_block=False, activation="relu", use_optional_block=True):
+            super(BlazeFace, self).__init__()
+            self.activation = activation
+            self.use_double_block = use_double_block
+            self.use_optional_block = use_optional_block
+            def conv_block(in_channels, out_channels, kernel_size, stride, padding):
+                return nn.Sequential(
+                    nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding),
+                    nn.BatchNorm2d(out_channels),
+                    nn.ReLU() if activation == "relu" else nn.Sigmoid()  # Apply ReLU activation (default) or Sigmoid
+                )
+            def depthwise_separable_block(in_channels, out_channels, stride):
+                return nn.Sequential(
+                    nn.Conv2d(in_channels, in_channels, kernel_size=5, stride=stride, padding=2, groups=in_channels, bias=False),
+                    nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0),
+                    nn.BatchNorm2d(out_channels),
+                    nn.ReLU() if activation == "relu" else nn.Sigmoid()
+                )
+            def double_block(in_channels, filters_1, filters_2, stride):
+                return nn.Sequential(
+                    depthwise_separable_block(in_channels, filters_1, stride),
+                    depthwise_separable_block(filters_1, filters_2, 1)
+                )
+            # Define layers (first part: conv layers)
+            self.conv1 = conv_block(input_channels, 24, kernel_size=5, stride=2, padding=2)
+            # Define single blocks (subsequent conv blocks)
+            self.single_blocks = nn.ModuleList([
+                depthwise_separable_block(24, 24, stride=1),
+                depthwise_separable_block(24, 24, stride=1),
+                depthwise_separable_block(24, 48, stride=2),
+                depthwise_separable_block(48, 48, stride=1),
+                depthwise_separable_block(48, 48, stride=1)
+            ])
+            # Define double blocks if `use_double_block` is True
+            if self.use_double_block:
+                self.double_blocks = nn.ModuleList([
+                    double_block(48, 24, 96, stride=2),
+                    double_block(96, 24, 96, stride=1),
+                    double_block(96, 24, 96, stride=2),
+                    double_block(96, 24, 96, stride=1),
+                    double_block(96, 24, 96, stride=2)
+                ])
+            else:
+                self.double_blocks = nn.ModuleList([
+                    depthwise_separable_block(48, 96, stride=2),
+                    depthwise_separable_block(96, 96, stride=1),
+                    depthwise_separable_block(96, 96, stride=2),
+                    depthwise_separable_block(96, 96, stride=1),
+                    depthwise_separable_block(96, 96, stride=2)
+                ])
+            # Final convolutional head
+            self.conv_head = nn.Conv2d(96, 64, kernel_size=1, stride=1)
+            self.bn_head = nn.BatchNorm2d(64)
+            # Global Average Pooling
+            self.global_avg_pooling = nn.AdaptiveAvgPool2d(1)
+        def forward(self, x):
+            # First conv layer
+            x = self.conv1(x)
+            # Apply single blocks
+            for block in self.single_blocks:
+                x = block(x)
+            # Apply double blocks
+            for block in self.double_blocks:
+                x = block(x)
+            # Final head
+            x = self.conv_head(x)
+            x = self.bn_head(x)
+            x = F.relu(x)
+            # Global Average Pooling and Flatten
+            x = self.global_avg_pooling(x)
+            x = torch.flatten(x, 1)
+            return x
+    class BlazeFaceModel(nn.Module):
+        def __init__(self, input_channels, label_count, use_double_block=False, activation="relu", use_optional_block=True):
+            super(BlazeFaceModel, self).__init__()
+            self.blazeface_backbone = BlazeFace(input_channels=input_channels, use_double_block=use_double_block, activation=activation, use_optional_block=use_optional_block)
+            self.fc = nn.Linear(64, label_count)  # 64 is the output feature size after pooling
+        def forward(self, x):
+            features = self.blazeface_backbone(x)
+            output = self.fc(features)
+            return output
+    # Example Usage
+    model_settings = {
+        'spectrogram_length': 64,
+        'dct_coefficient_count': 481,
+        'label_count': 2
+    }
+    # Create model
+    model = BlazeFaceModel(input_channels=1, label_count=model_settings['label_count'], use_double_block=False, activation='relu', use_optional_block=False)
+    model.load_state_dict(torch.load("./best_blazeface_model.pth", map_location=torch.device('cpu')))
+    predictions = []
+    with torch.inference_mode():
+        for data, target in test_loader:
+            output = model(data).squeeze()
+            pred = torch.argmax(output, dim=-1)
+            predictions.extend(pred.tolist())
+    #--------------------------------------------------------------------------------------------
+    # YOUR MODEL INFERENCE STOPS HERE
+    #--------------------------------------------------------------------------------------------
+    # Stop tracking emissions
+    emissions_data = tracker.stop_task()
+    # Calculate accuracy
+    accuracy = accuracy_score(true_labels, predictions)
+    # Prepare results dictionary
+    results = {
+        "username": username,
+        "space_url": space_url,
+        "submission_timestamp": datetime.now().isoformat(),
+        "model_description": DESCRIPTION,
+        "accuracy": float(accuracy),
+        "energy_consumed_wh": emissions_data.energy_consumed * 1000,
+        "emissions_gco2eq": emissions_data.emissions * 1000,
+        "emissions_data": clean_emissions_data(emissions_data),
+        "api_route": ROUTE,
+        "dataset_config": {
+            "dataset_name": request.dataset_name,
+            "test_size": request.test_size,
+            "test_seed": request.test_seed
+        }
+    }
+    return results