File size: 3,140 Bytes
6b55d56
 
50f8caf
6b55d56
 
 
 
 
 
 
 
 
 
 
2ecf91e
6b55d56
 
 
 
2ecf91e
 
6b55d56
a144848
 
 
 
 
6b55d56
a144848
6b55d56
a144848
 
6b55d56
 
 
e56e4de
62e5be4
e56e4de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7dce4eb
e56e4de
 
 
 
 
a144848
e56e4de
 
 
 
 
 
 
 
 
 
 
6b55d56
e56e4de
b3a7e26
62e5be4
b3a7e26
e56e4de
b3a7e26
a144848
 
 
 
 
 
 
 
 
 
 
 
6b55d56
a144848
 
 
6b55d56
 
 
 
 
2ecf91e
cf65555
2ecf91e
6b55d56
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import express from 'express';
import cors from 'cors';
import { HfInference } from '@huggingface/inference';

const app = express();
const PORT = 3000;

app.use(cors());
app.use(express.json({ limit: '10mb' }));

// Health check
app.get('/api/health', (req, res) => {
  res.json({ 
    status: 'ok', 
    service: 'ai-inference-proxy',
    hasKey: !!process.env.HF_API_KEY 
  });
});

// AI inference proxy endpoint
app.post('/api/inference', async (req, res) => {
  const apiKey = process.env.HF_API_KEY;
  const { prompt } = req.body || {};

  if (!prompt || typeof prompt !== 'string' || !prompt.trim()) {
    return res.status(400).json({ error: 'Prompt is required' });
  }

  // If no key, return your graceful fallback
  if (!apiKey) {
    console.log('No HF_API_KEY set -> using fallback');
    return res.status(200).json({ fallback: true, message: 'Using rule-based recommendations' });
  }

  try {
    console.log('Calling HF Inference Providers API (router)...');
    
    // Use direct HTTP call to router (as per docs)
    const response = await fetch(
      'https://router.huggingface.co/v1/chat/completions',
      {
        method: 'POST',
        headers: {
          'Authorization': `Bearer ${apiKey}`,
          'Content-Type': 'application/json',
        },
        body: JSON.stringify({
          model: 'meta-llama/Llama-3.2-3B-Instruct',
          messages: [
            {
              role: 'user',
              content: prompt
            }
          ],
          max_tokens: 512,
          temperature: 0.2,
          stream: false
        }),
      }
    );

    if (!response.ok) {
      const errorText = await response.text();
      console.error('HF Router API error:', response.status, response.statusText);
      console.error('Response body:', errorText);
      return res.status(200).json({
        fallback: true,
        error: 'HF Router API error',
        status: response.status,
        details: errorText
      });
    }

    const data = await response.json();
    console.log('HF Inference Providers API response received');
    
    // Return in expected format
    const generatedText = data.choices[0].message.content;
    return res.status(200).json([{ generated_text: generatedText }]);
  } catch (e) {
    // Hugging Face SDK puts details on e.cause
    const status = e?.cause?.status || e?.response?.status;
    const body = e?.cause?.response?.text ? await e.cause.response.text() :
                 e?.response?.text ? await e.response.text() : undefined;

    console.error('Proxy error:', e);
    console.error('Status:', status);
    if (body) console.error('Body:', body);

    // Graceful fallback so the UI still works
    return res.status(200).json({
      fallback: true,
      error: 'HF call failed',
      status,
      details: body || e.message || 'Unknown error'
    });
  }
});

app.listen(PORT, '0.0.0.0', () => {
  console.log(`✅ AI inference proxy running on port ${PORT}`);
  console.log(`Node version: ${process.version}`);
  console.log(`Model: meta-llama/Llama-3.2-3B-Instruct`);
  console.log(`API key configured: ${!!process.env.HF_API_KEY}`);
});