English
AGofficial commited on
Commit
839a654
·
verified ·
1 Parent(s): 430f94f

Upload NeuralCode7.py

Browse files
Files changed (1) hide show
  1. NeuralCode7.py +334 -0
NeuralCode7.py ADDED
@@ -0,0 +1,334 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import random
3
+ import time
4
+
5
+ # CUSTOMIZATION
6
+
7
+ CONTEXT_WINDOW = 5
8
+ EPOCHS = 500
9
+ LR = 0.01
10
+
11
+ def relu(x):
12
+ return max(0.0, x)
13
+
14
+ def stable_softmax(x_list):
15
+ if not x_list:
16
+ return []
17
+ m = max(x_list)
18
+ exps = [math.exp(i - m) for i in x_list]
19
+ s = sum(exps)
20
+ if s == 0:
21
+ return [1.0 / len(x_list)] * len(x_list)
22
+ return [e / s for e in exps]
23
+
24
+ class NeuralNetwork:
25
+ def __init__(self, layer_sizes=None, activation='relu', output_activation='softmax',
26
+ init_range=0.1, grad_clip=1.0, seed=None, context_window=5):
27
+ if seed is not None:
28
+ random.seed(seed)
29
+ self.layer_sizes = layer_sizes[:] if layer_sizes is not None else None
30
+ self.activation = relu if activation == 'relu' else (lambda x: x)
31
+ self.output_activation = stable_softmax if output_activation == 'softmax' else (lambda x: x)
32
+ self.init_range = float(init_range)
33
+ self.grad_clip = grad_clip
34
+ self.context_window = context_window
35
+ self.weights = []
36
+ self.biases = []
37
+ self.vocab = []
38
+ self.word_to_idx = {}
39
+ self.idx_to_word = {}
40
+
41
+ def prepare_data_with_context(self, text):
42
+ words = [w.strip() for w in text.replace('\n', ' ').split(' ') if w.strip()]
43
+ self.vocab = sorted(list(set(words)))
44
+ self.word_to_idx = {w: i for i, w in enumerate(self.vocab)}
45
+ self.idx_to_word = {i: w for w, i in self.word_to_idx.items()}
46
+
47
+ vocab_size = len(self.vocab)
48
+ X = []
49
+ Y = []
50
+
51
+ for i in range(len(words) - self.context_window):
52
+ context_words = words[i : i + self.context_window]
53
+ target_word = words[i + self.context_window]
54
+
55
+ x = [0.0] * vocab_size
56
+ for word in context_words:
57
+ if word in self.word_to_idx:
58
+ x[self.word_to_idx[word]] = 1.0
59
+
60
+ y = [0.0] * vocab_size
61
+ if target_word in self.word_to_idx:
62
+ y[self.word_to_idx[target_word]] = 1.0
63
+
64
+ X.append(x)
65
+ Y.append(y)
66
+
67
+ return X, Y
68
+
69
+ def initialize_weights(self):
70
+ if self.layer_sizes is None:
71
+ raise ValueError("layer_sizes must be set before initializing weights.")
72
+ if self.weights:
73
+ return
74
+ for i in range(len(self.layer_sizes) - 1):
75
+ in_dim = self.layer_sizes[i]
76
+ out_dim = self.layer_sizes[i + 1]
77
+ W = [[random.uniform(-self.init_range, self.init_range) for _ in range(out_dim)] for _ in range(in_dim)]
78
+ b = [0.0 for _ in range(out_dim)]
79
+ self.weights.append(W)
80
+ self.biases.append(b)
81
+
82
+ def forward(self, x):
83
+ a = x[:]
84
+ for i in range(len(self.weights) - 1):
85
+ next_a = []
86
+ W = self.weights[i]
87
+ b = self.biases[i]
88
+ out_dim = len(W[0])
89
+ for j in range(out_dim):
90
+ s = sum(a[k] * W[k][j] for k in range(len(a))) + b[j]
91
+ next_a.append(self.activation(s))
92
+ a = next_a
93
+
94
+ W = self.weights[-1]
95
+ b = self.biases[-1]
96
+ out = []
97
+ out_dim = len(W[0])
98
+ for j in range(out_dim):
99
+ s = sum(a[k] * W[k][j] for k in range(len(a))) + b[j]
100
+ out.append(s)
101
+ return self.output_activation(out)
102
+
103
+ def train(self, training_data, lr=0.01, epochs=500, verbose_every=50):
104
+ X, Y = self.prepare_data_with_context(training_data)
105
+ if not X:
106
+ raise ValueError("Not enough tokens in training data to create context windows.")
107
+
108
+ vocab_size = len(self.vocab)
109
+ if self.layer_sizes is None:
110
+ self.layer_sizes = [vocab_size, 64, vocab_size]
111
+ else:
112
+ self.layer_sizes[0] = vocab_size
113
+ self.layer_sizes[-1] = vocab_size
114
+
115
+ self.initialize_weights()
116
+
117
+ for epoch in range(epochs):
118
+ total_loss = 0.0
119
+ indices = list(range(len(X)))
120
+ random.shuffle(indices)
121
+
122
+ for idx in indices:
123
+ x = X[idx]
124
+ y = Y[idx]
125
+
126
+ activations = [x[:]]
127
+ pre_acts = []
128
+ a = x[:]
129
+
130
+ for i in range(len(self.weights) - 1):
131
+ W, b = self.weights[i], self.biases[i]
132
+ z = []
133
+ out_dim = len(W[0])
134
+ for j in range(out_dim):
135
+ s = sum(a[k] * W[k][j] for k in range(len(a))) + b[j]
136
+ z.append(s)
137
+ pre_acts.append(z)
138
+ a = [self.activation(val) for val in z]
139
+ activations.append(a)
140
+
141
+ W, b = self.weights[-1], self.biases[-1]
142
+ z_final = []
143
+ out_dim = len(W[0])
144
+ for j in range(out_dim):
145
+ s = sum(a[k] * W[k][j] for k in range(len(a))) + b[j]
146
+ z_final.append(s)
147
+ pre_acts.append(z_final)
148
+ out = self.output_activation(z_final)
149
+
150
+ delta = [out[j] - y[j] for j in range(len(y))]
151
+
152
+ for i in reversed(range(len(self.weights))):
153
+ in_act = activations[i]
154
+ in_dim = len(in_act)
155
+ out_dim = len(delta)
156
+
157
+ db = delta[:]
158
+ if self.grad_clip is not None:
159
+ db = [max(-self.grad_clip, min(self.grad_clip, g)) for g in db]
160
+ for j in range(len(self.biases[i])):
161
+ self.biases[i][j] -= lr * db[j]
162
+
163
+ for k in range(in_dim):
164
+ for j in range(out_dim):
165
+ grad_w = in_act[k] * delta[j]
166
+ if self.grad_clip is not None:
167
+ grad_w = max(-self.grad_clip, min(self.grad_clip, grad_w))
168
+ self.weights[i][k][j] -= lr * grad_w
169
+
170
+ if i != 0:
171
+ prev_delta = [0.0] * in_dim
172
+ for p in range(in_dim):
173
+ s = sum(self.weights[i][p][j] * delta[j] for j in range(out_dim))
174
+ if pre_acts[i-1][p] > 0:
175
+ prev_delta[p] = s
176
+ delta = prev_delta
177
+
178
+ if epoch % verbose_every == 0 or epoch == epochs - 1:
179
+ loss = 0.0
180
+ for x_val, y_val in zip(X, Y):
181
+ p = self.forward(x_val)
182
+ for j in range(len(y_val)):
183
+ if y_val[j] > 0:
184
+ loss -= math.log(p[j] + 1e-12)
185
+ print(f"Epoch {epoch}, Loss: {loss / len(X):.6f}")
186
+
187
+ def export_to_python(self, filename):
188
+ lines = []
189
+ lines.append("import math\n")
190
+ lines.append("import time\n\n")
191
+ lines.append("def relu(x):\n return max(0.0, x)\n\n")
192
+ lines.append("def softmax(x_list):\n")
193
+ lines.append(" if not x_list:\n")
194
+ lines.append(" return []\n")
195
+ lines.append(" m = max(x_list)\n")
196
+ lines.append(" exps = [math.exp(i - m) for i in x_list]\n")
197
+ lines.append(" s = sum(exps)\n")
198
+ lines.append(" if s == 0:\n")
199
+ lines.append(" return [1.0 / len(x_list)] * len(x_list)\n")
200
+ lines.append(" return [e / s for e in exps]\n\n")
201
+
202
+ neuron_id = 0
203
+ for layer_idx, (W, b) in enumerate(zip(self.weights, self.biases)):
204
+ in_dim, out_dim = len(W), len(W[0])
205
+ for j in range(out_dim):
206
+ terms = " + ".join([f"{W[i][j]:.8f}*inputs[{i}]" for i in range(in_dim)]) or "0.0"
207
+ b_term = f"{b[j]:.8f}"
208
+ if layer_idx != len(self.weights) - 1:
209
+ lines.append(f"def neuron_{neuron_id}(inputs):\n return relu({terms} + {b_term})\n\n")
210
+ else:
211
+ lines.append(f"def neuron_{neuron_id}(inputs):\n return {terms} + {b_term}\n\n")
212
+ neuron_id += 1
213
+
214
+ neuron_counter = 0
215
+ for layer_idx, (W, b) in enumerate(zip(self.weights, self.biases)):
216
+ out_dim = len(W[0])
217
+ lines.append(f"def layer_{layer_idx}(inputs):\n")
218
+ inner = ", ".join([f"neuron_{neuron_counter + j}(inputs)" for j in range(out_dim)])
219
+ lines.append(f" return [{inner}]\n\n")
220
+ neuron_counter += out_dim
221
+
222
+ lines.append("def predict(inputs):\n")
223
+ lines.append(" a = inputs\n")
224
+ for i in range(len(self.weights)):
225
+ lines.append(f" a = layer_{i}(a)\n")
226
+ lines.append(" return softmax(a)\n\n")
227
+
228
+ lines.append(f"vocab = {self.vocab}\n")
229
+ lines.append(f"word_to_idx = {{w: i for i, w in enumerate(vocab)}}\n")
230
+ lines.append(f"context_window = {self.context_window}\n\n")
231
+
232
+ lines.append("if __name__ == '__main__':\n")
233
+ lines.append(" print('Interactive multi-word text completion.')\n")
234
+ lines.append(" print(f'Model context window: {context_window} words. Type text or empty to exit.')\n")
235
+ lines.append(" while True:\n")
236
+ lines.append(" inp = input('> ').strip()\n")
237
+ lines.append(" if not inp:\n")
238
+ lines.append(" break\n")
239
+ lines.append(" words = [w.strip() for w in inp.split(' ') if w.strip()]\n")
240
+ lines.append(" generated_words = words[:]\n")
241
+ lines.append(" print('Input:', ' '.join(generated_words), end='', flush=True)\n")
242
+ lines.append(" for _ in range(20):\n")
243
+ lines.append(" context = generated_words[-context_window:]\n")
244
+ lines.append(" x = [0.0] * len(vocab)\n")
245
+ lines.append(" for word in context:\n")
246
+ lines.append(" if word in word_to_idx:\n")
247
+ lines.append(" x[word_to_idx[word]] = 1.0\n")
248
+ lines.append(" out = predict(x)\n")
249
+ lines.append(" idx = out.index(max(out))\n")
250
+ lines.append(" next_word = vocab[idx]\n")
251
+ lines.append(" if next_word == '<|endoftext|>': break\n")
252
+ lines.append(" generated_words.append(next_word)\n")
253
+ lines.append(" print(' ' + next_word, end='', flush=True)\n")
254
+ lines.append(" time.sleep(0.1)\n")
255
+ lines.append(" print('\\n')\n")
256
+
257
+ with open(filename, "w") as f:
258
+ f.writelines(lines)
259
+ print(f"Exported network to {filename}")
260
+
261
+ @staticmethod
262
+ def load_network(filename):
263
+ ns = {"__name__": "__loaded_model__"}
264
+ with open(filename, "r") as f:
265
+ code = f.read()
266
+ exec(code, ns)
267
+ class ModelWrapper:
268
+ def __init__(self, ns):
269
+ self.ns = ns
270
+ self.vocab = ns.get("vocab", [])
271
+ self.word_to_idx = ns.get("word_to_idx", {})
272
+ self.context_window = ns.get("context_window", 5)
273
+
274
+ def complete(self, input_text, max_new_words=20):
275
+ words = [w.strip() for w in input_text.strip().split(' ') if w.strip()]
276
+ generated = words[:]
277
+ for _ in range(max_new_words):
278
+ context = generated[-self.context_window:]
279
+ x = [0.0] * len(self.vocab)
280
+ for word in context:
281
+ if word in self.word_to_idx:
282
+ x[self.word_to_idx[word]] = 1.0
283
+
284
+ out = self.ns["predict"](x)
285
+ idx = out.index(max(out))
286
+ next_word = self.vocab[idx]
287
+
288
+ if next_word == '<|endoftext|>':
289
+ break
290
+ generated.append(next_word)
291
+ return ' '.join(generated)
292
+
293
+ return ModelWrapper(ns)
294
+
295
+
296
+ if __name__ == "__main__":
297
+ sample_text = """
298
+ user: hi
299
+ ai: Hello! How can I help you today?
300
+ <|endoftext|>
301
+ user: hi
302
+ ai: Hi! What can I do for you today?
303
+ <|endoftext|>
304
+ user: hello
305
+ ai: Hello! How can I help you today?
306
+ <|endoftext|>
307
+ user: hey
308
+ ai: Hi! What can I do for you today?
309
+ <|endoftext|>
310
+ user: How's your day going?
311
+ ai: It's been great! Thanks for asking! How about yours?
312
+ <|endoftext|>
313
+ user: What's new with you?
314
+ ai: Not much, just here and ready to help! What's new with you?
315
+ <|endoftext|>
316
+ user: What can you do?
317
+ ai: I can help you with a variety of tasks. What's on your mind?
318
+ <|endoftext|>
319
+ user: Tell me a joke.
320
+ ai: Why did the scarecrow win an award? Because he was outstanding in his field!
321
+ <|endoftext|>
322
+ """
323
+ nn = NeuralNetwork(context_window=CONTEXT_WINDOW, seed=42)
324
+ nn.train(training_data=sample_text, lr=LR, epochs=EPOCHS, verbose_every=100)
325
+ nn.export_to_python("exported_model.py")
326
+
327
+ model = NeuralNetwork.load_network("exported_model.py")
328
+ print("\n--- Testing loaded model ---")
329
+ print(f"Vocabulary size: {len(model.vocab)}")
330
+
331
+ test_inputs = ["user: hi", "user: What's new", "ai: It's been"]
332
+ for test_input in test_inputs:
333
+ completion = model.complete(test_input, max_new_words=10)
334
+ print(f"Input: '{test_input}'\nOutput: '{completion}'\n")