|
|
|
|
|
|
|
|
import json |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
from pathlib import Path |
|
|
from tensorflow.keras.models import load_model |
|
|
from xgboost import XGBRegressor |
|
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
REPO_ID = "nicolamustone/minires" |
|
|
|
|
|
NN_FILENAME = "minires.keras" |
|
|
XGB_FILENAME = "minires_xgb.json" |
|
|
META_FILENAME = "minires_meta.json" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class minires: |
|
|
""" |
|
|
minires: ensemble predictor for resin usage (grams). |
|
|
|
|
|
Downloads artifacts from Hugging Face Hub (repo: nicolamustone/minires) |
|
|
and caches them locally via huggingface_hub. |
|
|
""" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
cache_dir: str | None = None, |
|
|
revision: str | None = None, |
|
|
verbose: int = 0, |
|
|
): |
|
|
download_kwargs = {} |
|
|
if cache_dir is not None: |
|
|
download_kwargs["local_dir"] = cache_dir |
|
|
download_kwargs["local_dir_use_symlinks"] = False |
|
|
if revision is not None: |
|
|
download_kwargs["revision"] = revision |
|
|
|
|
|
self.nn_path = hf_hub_download( |
|
|
repo_id=REPO_ID, |
|
|
filename=NN_FILENAME, |
|
|
**download_kwargs, |
|
|
) |
|
|
self.xgb_path = hf_hub_download( |
|
|
repo_id=REPO_ID, |
|
|
filename=XGB_FILENAME, |
|
|
**download_kwargs, |
|
|
) |
|
|
self.meta_path = hf_hub_download( |
|
|
repo_id=REPO_ID, |
|
|
filename=META_FILENAME, |
|
|
**download_kwargs, |
|
|
) |
|
|
|
|
|
self.nn = load_model(self.nn_path, compile=False) |
|
|
self.xgb = XGBRegressor() |
|
|
|
|
|
self.xgb.load_model(self.xgb_path) |
|
|
|
|
|
with open(self.meta_path, "r") as f: |
|
|
meta = json.load(f) |
|
|
|
|
|
self.w_nn: float = float(meta["w_nn"]) |
|
|
self.features: list[str] = list(meta["features"]) |
|
|
self.verbose: int = verbose |
|
|
|
|
|
if self.verbose: |
|
|
w_xgb = 1.0 - self.w_nn |
|
|
print(f"[minires] ensemble weights: NN={self.w_nn:.3f}, XGB={w_xgb:.3f}") |
|
|
|
|
|
|
|
|
|
|
|
def _prepare_X(self, X) -> pd.DataFrame: |
|
|
if isinstance(X, pd.DataFrame): |
|
|
df = X |
|
|
else: |
|
|
df = pd.DataFrame(X, columns=self.features) |
|
|
|
|
|
missing = [c for c in self.features if c not in df.columns] |
|
|
if missing: |
|
|
raise ValueError(f"Missing required feature columns: {missing}") |
|
|
return df[self.features] |
|
|
|
|
|
|
|
|
|
|
|
def predict(self, X, verbose: int | None = None) -> np.ndarray: |
|
|
""" |
|
|
Predict resin usage (grams) for the given samples. |
|
|
""" |
|
|
Xp = self._prepare_X(X) |
|
|
v = self.verbose if verbose is None else verbose |
|
|
|
|
|
nn_pred = self.nn.predict(Xp, verbose=v).flatten() |
|
|
xgb_pred = self.xgb.predict(Xp) |
|
|
|
|
|
return self.w_nn * nn_pred + (1.0 - self.w_nn) * xgb_pred |
|
|
|