Spaces:
Running
Running
| import io, numpy as np | |
| from PIL import Image, ImageDraw | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| from .config import CANVAS_W, CANVAS_H, MARG_L, MARG_R, MARG_T, MARG_B | |
| PALETTE = { | |
| "gt": "#1f77b4", # blue | |
| "target":"#ff7f0e", # orange | |
| "out": "#2ca02c", # green | |
| } | |
| def _new_axes(fig_w_in=8.5, fig_h_in=4.5, dpi=None): | |
| if dpi is None: | |
| from .config import CANVAS_W | |
| dpi = int(CANVAS_W / fig_w_in * 2.0) | |
| plt.style.use("default") # ensure light style | |
| fig = plt.figure(figsize=(fig_w_in, fig_h_in), dpi=dpi, facecolor="white") | |
| ax = fig.add_subplot(111, facecolor="white") | |
| ax.grid(True, color="#eef2f7", linewidth=1.0, alpha=1.0) | |
| for spine in ax.spines.values(): | |
| spine.set_color("#e6e9ef") | |
| ax.tick_params(colors="#475569") | |
| ax.xaxis.label.set_color("#334155") | |
| ax.yaxis.label.set_color("#334155") | |
| return fig, ax | |
| def render_background_png(times, f0_gt, hint_hz, y_min=None, y_max=None): | |
| gt = f0_gt.astype(float).copy(); gt[gt <= 0] = np.nan | |
| tar = hint_hz.astype(float).copy(); tar[tar <= 0] = np.nan | |
| # y range from voiced GT | |
| voiced = gt[np.isfinite(gt)] | |
| if voiced.size == 0: y0, y1 = 60, 400 | |
| else: | |
| y0, y1 = float(np.percentile(voiced, 5)), float(np.percentile(voiced, 95)) | |
| if y1 - y0 < 40: y0 = max(40.0, y0 - 20); y1 += 20 | |
| if y_min is not None: y0 = y_min | |
| if y_max is not None: y1 = y_max | |
| fig, ax = _new_axes() | |
| ax.plot(times, gt, label="GT (Hz)", color=PALETTE['gt'], linewidth=1.0) | |
| ax.plot(times, tar, label="Target", color=PALETTE['target'], linewidth=1.0, linestyle="dotted") | |
| ax.set_xlim(times[0], times[-1] if len(times) else 1.0) | |
| ax.set_ylim(y0, y1); ax.set_xlabel("Time (s)"); ax.set_ylabel("F0 (Hz)"); ax.legend(loc="upper right") | |
| fig.tight_layout() | |
| buf = io.BytesIO(); fig.savefig(buf, format="png", bbox_inches="tight"); plt.close(fig); buf.seek(0) | |
| bg = Image.open(buf).convert("RGB").resize((CANVAS_W, CANVAS_H), Image.BICUBIC) | |
| return bg | |
| def render_triplet_png(times, f0_gt, target_logf0, f0_out, y_min=None, y_max=None): | |
| gt = np.asarray(f0_gt, float); out = np.asarray(f0_out, float) | |
| gt[gt <= 0] = np.nan; out[out <= 0] = np.nan | |
| target_hz = np.zeros_like(target_logf0, dtype=np.float32); v = target_logf0 != 0; target_hz[v] = 2.0 ** target_logf0[v]; target_hz[target_hz <= 0] = np.nan | |
| voiced = gt[np.isfinite(gt)] | |
| if voiced.size == 0: y0, y1 = 60, 400 | |
| else: | |
| y0, y1 = float(np.percentile(voiced, 5)), float(np.percentile(voiced, 95)) | |
| if y1 - y0 < 40: y0 = max(40.0, y0 - 20); y1 += 20 | |
| if y_min is not None: y0 = y_min | |
| if y_max is not None: y1 = y_max | |
| fig, ax = _new_axes() | |
| ax.plot(times, gt, label="GT (Hz)", color=PALETTE['gt'], linewidth=1.0) | |
| ax.plot(times, target_hz, label="Target", color=PALETTE['target'], linewidth=1.0, linestyle="dotted") | |
| ax.plot(times, out, label="Output", color=PALETTE['out'], linewidth=1.0) | |
| ax.set_xlim(times[0], times[-1] if len(times) else 1.0) | |
| ax.set_ylim(y0, y1); ax.set_xlabel("Time (s)"); ax.set_ylabel("F0 (Hz)"); ax.legend(loc="upper right") | |
| fig.tight_layout() | |
| buf = io.BytesIO(); fig.savefig(buf, format="png", bbox_inches="tight"); plt.close(fig); buf.seek(0) | |
| bg = Image.open(buf).convert("RGB").resize((CANVAS_W, CANVAS_H), Image.BICUBIC) | |
| ImageDraw.Draw(bg).line([(5,5),(6,5)], fill=(0,0,0), width=1) | |
| return bg | |
| def build_axis_mappings(times, y_min, y_max): | |
| x0, x1 = times[0], times[-1] if len(times) else 1.0 | |
| plot_w = CANVAS_W - (MARG_L + MARG_R) | |
| plot_h = CANVAS_H - (MARG_T + MARG_B) | |
| def t_to_x(t): return MARG_L + ((t - x0) / max(1e-8, (x1 - x0))) * plot_w | |
| def hz_to_y(hz): return MARG_T + (1.0 - ((hz - y_min) / max(1e-8, (y_max - y_min)))) * plot_h | |
| def x_to_t(x): return x0 + max(0.0, min(1.0, (x - MARG_L) / max(1e-8, plot_w))) * (x1 - x0) | |
| def y_to_hz(y): | |
| frac = 1.0 - max(0.0, min(1.0, (y - MARG_T) / max(1e-8, plot_h))) | |
| return y_min + frac * (y_max - y_min) | |
| return t_to_x, hz_to_y, x_to_t, y_to_hz | |