Spaces:

diegotg343
/

pitchflower_demo

Running

App Files Files Community

pitchflower_demo / pitchflower_app /render.py

diegotg343

decent version

d9b6fe5 about 1 month ago

raw

history blame contribute delete

4.1 kB

	import io, numpy as np
	from PIL import Image, ImageDraw
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt

	from .config import CANVAS_W, CANVAS_H, MARG_L, MARG_R, MARG_T, MARG_B


	PALETTE = {
	"gt": "#1f77b4", # blue
	"target":"#ff7f0e", # orange
	"out": "#2ca02c", # green
	}

	def _new_axes(fig_w_in=8.5, fig_h_in=4.5, dpi=None):
	if dpi is None:
	from .config import CANVAS_W
	dpi = int(CANVAS_W / fig_w_in * 2.0)
	plt.style.use("default") # ensure light style
	fig = plt.figure(figsize=(fig_w_in, fig_h_in), dpi=dpi, facecolor="white")
	ax = fig.add_subplot(111, facecolor="white")
	ax.grid(True, color="#eef2f7", linewidth=1.0, alpha=1.0)
	for spine in ax.spines.values():
	spine.set_color("#e6e9ef")
	ax.tick_params(colors="#475569")
	ax.xaxis.label.set_color("#334155")
	ax.yaxis.label.set_color("#334155")
	return fig, ax


	def render_background_png(times, f0_gt, hint_hz, y_min=None, y_max=None):
	gt = f0_gt.astype(float).copy(); gt[gt <= 0] = np.nan
	tar = hint_hz.astype(float).copy(); tar[tar <= 0] = np.nan
	# y range from voiced GT
	voiced = gt[np.isfinite(gt)]
	if voiced.size == 0: y0, y1 = 60, 400
	else:
	y0, y1 = float(np.percentile(voiced, 5)), float(np.percentile(voiced, 95))
	if y1 - y0 < 40: y0 = max(40.0, y0 - 20); y1 += 20
	if y_min is not None: y0 = y_min
	if y_max is not None: y1 = y_max

	fig, ax = _new_axes()
	ax.plot(times, gt, label="GT (Hz)", color=PALETTE['gt'], linewidth=1.0)
	ax.plot(times, tar, label="Target", color=PALETTE['target'], linewidth=1.0, linestyle="dotted")
	ax.set_xlim(times[0], times[-1] if len(times) else 1.0)
	ax.set_ylim(y0, y1); ax.set_xlabel("Time (s)"); ax.set_ylabel("F0 (Hz)"); ax.legend(loc="upper right")
	fig.tight_layout()

	buf = io.BytesIO(); fig.savefig(buf, format="png", bbox_inches="tight"); plt.close(fig); buf.seek(0)
	bg = Image.open(buf).convert("RGB").resize((CANVAS_W, CANVAS_H), Image.BICUBIC)
	return bg

	def render_triplet_png(times, f0_gt, target_logf0, f0_out, y_min=None, y_max=None):
	gt = np.asarray(f0_gt, float); out = np.asarray(f0_out, float)
	gt[gt <= 0] = np.nan; out[out <= 0] = np.nan
	target_hz = np.zeros_like(target_logf0, dtype=np.float32); v = target_logf0 != 0; target_hz[v] = 2.0 ** target_logf0[v]; target_hz[target_hz <= 0] = np.nan

	voiced = gt[np.isfinite(gt)]
	if voiced.size == 0: y0, y1 = 60, 400
	else:
	y0, y1 = float(np.percentile(voiced, 5)), float(np.percentile(voiced, 95))
	if y1 - y0 < 40: y0 = max(40.0, y0 - 20); y1 += 20
	if y_min is not None: y0 = y_min
	if y_max is not None: y1 = y_max

	fig, ax = _new_axes()
	ax.plot(times, gt, label="GT (Hz)", color=PALETTE['gt'], linewidth=1.0)
	ax.plot(times, target_hz, label="Target", color=PALETTE['target'], linewidth=1.0, linestyle="dotted")
	ax.plot(times, out, label="Output", color=PALETTE['out'], linewidth=1.0)
	ax.set_xlim(times[0], times[-1] if len(times) else 1.0)
	ax.set_ylim(y0, y1); ax.set_xlabel("Time (s)"); ax.set_ylabel("F0 (Hz)"); ax.legend(loc="upper right")
	fig.tight_layout()

	buf = io.BytesIO(); fig.savefig(buf, format="png", bbox_inches="tight"); plt.close(fig); buf.seek(0)
	bg = Image.open(buf).convert("RGB").resize((CANVAS_W, CANVAS_H), Image.BICUBIC)
	ImageDraw.Draw(bg).line([(5,5),(6,5)], fill=(0,0,0), width=1)
	return bg

	def build_axis_mappings(times, y_min, y_max):
	x0, x1 = times[0], times[-1] if len(times) else 1.0
	plot_w = CANVAS_W - (MARG_L + MARG_R)
	plot_h = CANVAS_H - (MARG_T + MARG_B)

	def t_to_x(t): return MARG_L + ((t - x0) / max(1e-8, (x1 - x0))) * plot_w
	def hz_to_y(hz): return MARG_T + (1.0 - ((hz - y_min) / max(1e-8, (y_max - y_min)))) * plot_h
	def x_to_t(x): return x0 + max(0.0, min(1.0, (x - MARG_L) / max(1e-8, plot_w))) * (x1 - x0)
	def y_to_hz(y):
	frac = 1.0 - max(0.0, min(1.0, (y - MARG_T) / max(1e-8, plot_h)))
	return y_min + frac * (y_max - y_min)

	return t_to_x, hz_to_y, x_to_t, y_to_hz