diegotg343's picture
decent version
d9b6fe5
import io, numpy as np
from PIL import Image, ImageDraw
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from .config import CANVAS_W, CANVAS_H, MARG_L, MARG_R, MARG_T, MARG_B
PALETTE = {
"gt": "#1f77b4", # blue
"target":"#ff7f0e", # orange
"out": "#2ca02c", # green
}
def _new_axes(fig_w_in=8.5, fig_h_in=4.5, dpi=None):
if dpi is None:
from .config import CANVAS_W
dpi = int(CANVAS_W / fig_w_in * 2.0)
plt.style.use("default") # ensure light style
fig = plt.figure(figsize=(fig_w_in, fig_h_in), dpi=dpi, facecolor="white")
ax = fig.add_subplot(111, facecolor="white")
ax.grid(True, color="#eef2f7", linewidth=1.0, alpha=1.0)
for spine in ax.spines.values():
spine.set_color("#e6e9ef")
ax.tick_params(colors="#475569")
ax.xaxis.label.set_color("#334155")
ax.yaxis.label.set_color("#334155")
return fig, ax
def render_background_png(times, f0_gt, hint_hz, y_min=None, y_max=None):
gt = f0_gt.astype(float).copy(); gt[gt <= 0] = np.nan
tar = hint_hz.astype(float).copy(); tar[tar <= 0] = np.nan
# y range from voiced GT
voiced = gt[np.isfinite(gt)]
if voiced.size == 0: y0, y1 = 60, 400
else:
y0, y1 = float(np.percentile(voiced, 5)), float(np.percentile(voiced, 95))
if y1 - y0 < 40: y0 = max(40.0, y0 - 20); y1 += 20
if y_min is not None: y0 = y_min
if y_max is not None: y1 = y_max
fig, ax = _new_axes()
ax.plot(times, gt, label="GT (Hz)", color=PALETTE['gt'], linewidth=1.0)
ax.plot(times, tar, label="Target", color=PALETTE['target'], linewidth=1.0, linestyle="dotted")
ax.set_xlim(times[0], times[-1] if len(times) else 1.0)
ax.set_ylim(y0, y1); ax.set_xlabel("Time (s)"); ax.set_ylabel("F0 (Hz)"); ax.legend(loc="upper right")
fig.tight_layout()
buf = io.BytesIO(); fig.savefig(buf, format="png", bbox_inches="tight"); plt.close(fig); buf.seek(0)
bg = Image.open(buf).convert("RGB").resize((CANVAS_W, CANVAS_H), Image.BICUBIC)
return bg
def render_triplet_png(times, f0_gt, target_logf0, f0_out, y_min=None, y_max=None):
gt = np.asarray(f0_gt, float); out = np.asarray(f0_out, float)
gt[gt <= 0] = np.nan; out[out <= 0] = np.nan
target_hz = np.zeros_like(target_logf0, dtype=np.float32); v = target_logf0 != 0; target_hz[v] = 2.0 ** target_logf0[v]; target_hz[target_hz <= 0] = np.nan
voiced = gt[np.isfinite(gt)]
if voiced.size == 0: y0, y1 = 60, 400
else:
y0, y1 = float(np.percentile(voiced, 5)), float(np.percentile(voiced, 95))
if y1 - y0 < 40: y0 = max(40.0, y0 - 20); y1 += 20
if y_min is not None: y0 = y_min
if y_max is not None: y1 = y_max
fig, ax = _new_axes()
ax.plot(times, gt, label="GT (Hz)", color=PALETTE['gt'], linewidth=1.0)
ax.plot(times, target_hz, label="Target", color=PALETTE['target'], linewidth=1.0, linestyle="dotted")
ax.plot(times, out, label="Output", color=PALETTE['out'], linewidth=1.0)
ax.set_xlim(times[0], times[-1] if len(times) else 1.0)
ax.set_ylim(y0, y1); ax.set_xlabel("Time (s)"); ax.set_ylabel("F0 (Hz)"); ax.legend(loc="upper right")
fig.tight_layout()
buf = io.BytesIO(); fig.savefig(buf, format="png", bbox_inches="tight"); plt.close(fig); buf.seek(0)
bg = Image.open(buf).convert("RGB").resize((CANVAS_W, CANVAS_H), Image.BICUBIC)
ImageDraw.Draw(bg).line([(5,5),(6,5)], fill=(0,0,0), width=1)
return bg
def build_axis_mappings(times, y_min, y_max):
x0, x1 = times[0], times[-1] if len(times) else 1.0
plot_w = CANVAS_W - (MARG_L + MARG_R)
plot_h = CANVAS_H - (MARG_T + MARG_B)
def t_to_x(t): return MARG_L + ((t - x0) / max(1e-8, (x1 - x0))) * plot_w
def hz_to_y(hz): return MARG_T + (1.0 - ((hz - y_min) / max(1e-8, (y_max - y_min)))) * plot_h
def x_to_t(x): return x0 + max(0.0, min(1.0, (x - MARG_L) / max(1e-8, plot_w))) * (x1 - x0)
def y_to_hz(y):
frac = 1.0 - max(0.0, min(1.0, (y - MARG_T) / max(1e-8, plot_h)))
return y_min + frac * (y_max - y_min)
return t_to_x, hz_to_y, x_to_t, y_to_hz