Spaces:

ccmusic-database
/

pianos

Running

File size: 2,459 Bytes

import os
import torch
import torch.nn as nn
import huggingface_hub
import modelscope
from torchvision.models import squeezenet1_1

TMP_DIR = "./__pycache__/tmp"
EN_US = os.getenv("LANG") != "zh_CN.UTF-8"

ZH2EN = {
    "上传钢琴录音": "Upload a piano recording",
    "状态栏": "Status",
    "音频文件名": "Audio filename",
    "钢琴分类结果": "Piano classification result",
    "建议录音时长保持在 3s 左右, 过长会影响识别效率": "It is recommended to keep the duration of recording around 3s, too long will affect the recognition efficiency.",
    "引用": "Cite",
    "珠江": "Pearl River",
    "英昌": "YOUNG CHANG",
    "施坦威剧场": "STEINWAY Theater",
    "星海": "HSINGHAI",
    "卡瓦依": "KAWAI",
    "施坦威": "STEINWAY",
    "卡瓦依三角": "KAWAI Grand",
    "雅马哈": "YAMAHA",
}

MODEL_DIR = (
    huggingface_hub.snapshot_download(
        "ccmusic-database/pianos",
        cache_dir="./__pycache__",
    )
    if EN_US
    else modelscope.snapshot_download(
        "ccmusic-database/pianos",
        cache_dir="./__pycache__",
    )
)


def _L(zh_txt: str):
    return ZH2EN[zh_txt] if EN_US else zh_txt


def Classifier(cls_num=8, output_size=512, linear_output=False):
    q = (1.0 * output_size / cls_num) ** 0.25
    l1 = int(q * cls_num)
    l2 = int(q * l1)
    l3 = int(q * l2)
    if linear_output:
        return torch.nn.Sequential(
            nn.Dropout(),
            nn.Linear(output_size, l3),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(l3, l2),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(l2, l1),
            nn.ReLU(inplace=True),
            nn.Linear(l1, cls_num),
        )

    else:
        return torch.nn.Sequential(
            nn.Dropout(),
            nn.Conv2d(output_size, l3, kernel_size=(1, 1), stride=(1, 1)),
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d(output_size=(1, 1)),
            nn.Flatten(),
            nn.Linear(l3, l2),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(l2, l1),
            nn.ReLU(inplace=True),
            nn.Linear(l1, cls_num),
        )


def net(weights=MODEL_DIR + "/save.pt"):
    model = squeezenet1_1(pretrained=False)
    model.classifier = Classifier()
    model.load_state_dict(torch.load(weights, map_location=torch.device("cpu")))
    model.eval()
    return model