import { URL } from "node:url"; export function buildUrl(base: string, params: Record): URL { const url = new URL(base); url.search = new URLSearchParams(params).toString(); return url; } export async function fetchJson(url: URL, context: string): Promise { const response = await fetch(url); if (!response.ok) { throw new Error(`${context}: HTTP ${response.status}`); } return response.json(); } export async function getDefaultConfigAndSplit(datasetId: string): Promise<{ config: string; split: string }> { const infoUrl = buildUrl("https://datasets-server.huggingface.co/info", { dataset: datasetId }); const info = await fetchJson(infoUrl, "Failed to get dataset info"); const datasetInfo = info?.dataset_info; if (!datasetInfo) throw new Error("dataset_info missing in /info response"); // Get the first available config const configNames = Object.keys(datasetInfo); if (configNames.length === 0) throw new Error("No configurations found for dataset"); const config = configNames[0]; const configInfo = datasetInfo[config]; const splits = configInfo.splits || {}; const splitNames = Object.keys(splits); if (splitNames.length === 0) throw new Error("No splits found for dataset"); const split = splitNames[0]; return { config, split }; } export async function searchDatasets(query: string): Promise> { const url = buildUrl("https://huggingface.co/api/datasets", { search: query, limit: "20" }); const datasets = await fetchJson(url, "Dataset search failed"); return datasets.map((dataset: any) => ({ id: dataset.id, title: dataset.id, url: `https://huggingface.co/datasets/${dataset.id}`, })); } export async function fetchDatasetAggregate(datasetId: string): Promise<{ id: string; title: string; text: string; url: string; }> { const { config, split } = await getDefaultConfigAndSplit(datasetId); const rowsUrl = buildUrl("https://datasets-server.huggingface.co/rows", { dataset: datasetId, config, split, offset: "0", length: "50", }); let text: string; try { const rowsResponse = await fetch(rowsUrl); if (rowsResponse.ok) { const rowsResult = await rowsResponse.json(); text = `Sample data (${config}/${split}):\n${JSON.stringify(rowsResult.rows || [], null, 2)}`; } else { text = "Sample data: Not available"; } } catch { text = "Sample data: Not available"; } return { id: datasetId, title: datasetId, text, url: `https://huggingface.co/datasets/${datasetId}`, }; } export function textContent(payload: unknown) { return { content: [{ type: "text" as const, text: JSON.stringify(payload) }] }; }