Spaces:
Sleeping
Sleeping
| import { URL } from "node:url"; | |
| export function buildUrl(base: string, params: Record<string, string>): URL { | |
| const url = new URL(base); | |
| url.search = new URLSearchParams(params).toString(); | |
| return url; | |
| } | |
| export async function fetchJson<T = any>(url: URL, context: string): Promise<T> { | |
| const response = await fetch(url); | |
| if (!response.ok) { | |
| throw new Error(`${context}: HTTP ${response.status}`); | |
| } | |
| return response.json(); | |
| } | |
| export async function getDefaultConfigAndSplit(datasetId: string): Promise<{ config: string; split: string }> { | |
| const infoUrl = buildUrl("https://datasets-server.huggingface.co/info", { dataset: datasetId }); | |
| const info = await fetchJson<any>(infoUrl, "Failed to get dataset info"); | |
| const datasetInfo = info?.dataset_info; | |
| if (!datasetInfo) throw new Error("dataset_info missing in /info response"); | |
| // Get the first available config | |
| const configNames = Object.keys(datasetInfo); | |
| if (configNames.length === 0) throw new Error("No configurations found for dataset"); | |
| const config = configNames[0]; | |
| const configInfo = datasetInfo[config]; | |
| const splits = configInfo.splits || {}; | |
| const splitNames = Object.keys(splits); | |
| if (splitNames.length === 0) throw new Error("No splits found for dataset"); | |
| const split = splitNames[0]; | |
| return { config, split }; | |
| } | |
| export async function searchDatasets(query: string): Promise<Array<{ id: string; title: string; url: string }>> { | |
| const url = buildUrl("https://huggingface.co/api/datasets", { search: query, limit: "20" }); | |
| const datasets = await fetchJson<any[]>(url, "Dataset search failed"); | |
| return datasets.map((dataset: any) => ({ | |
| id: dataset.id, | |
| title: dataset.id, | |
| url: `https://huggingface.co/datasets/${dataset.id}`, | |
| })); | |
| } | |
| export async function fetchDatasetAggregate(datasetId: string): Promise<{ | |
| id: string; | |
| title: string; | |
| text: string; | |
| url: string; | |
| }> { | |
| const { config, split } = await getDefaultConfigAndSplit(datasetId); | |
| const rowsUrl = buildUrl("https://datasets-server.huggingface.co/rows", { | |
| dataset: datasetId, | |
| config, | |
| split, | |
| offset: "0", | |
| length: "50", | |
| }); | |
| let text: string; | |
| try { | |
| const rowsResponse = await fetch(rowsUrl); | |
| if (rowsResponse.ok) { | |
| const rowsResult = await rowsResponse.json(); | |
| text = `Sample data (${config}/${split}):\n${JSON.stringify(rowsResult.rows || [], null, 2)}`; | |
| } else { | |
| text = "Sample data: Not available"; | |
| } | |
| } catch { | |
| text = "Sample data: Not available"; | |
| } | |
| return { | |
| id: datasetId, | |
| title: datasetId, | |
| text, | |
| url: `https://huggingface.co/datasets/${datasetId}`, | |
| }; | |
| } | |
| export function textContent(payload: unknown) { | |
| return { content: [{ type: "text" as const, text: JSON.stringify(payload) }] }; | |
| } | |