Upload NeuralCode7.py
Browse files- NeuralCode7.py +334 -0
NeuralCode7.py
ADDED
|
@@ -0,0 +1,334 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
import random
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
# CUSTOMIZATION
|
| 6 |
+
|
| 7 |
+
CONTEXT_WINDOW = 5
|
| 8 |
+
EPOCHS = 500
|
| 9 |
+
LR = 0.01
|
| 10 |
+
|
| 11 |
+
def relu(x):
|
| 12 |
+
return max(0.0, x)
|
| 13 |
+
|
| 14 |
+
def stable_softmax(x_list):
|
| 15 |
+
if not x_list:
|
| 16 |
+
return []
|
| 17 |
+
m = max(x_list)
|
| 18 |
+
exps = [math.exp(i - m) for i in x_list]
|
| 19 |
+
s = sum(exps)
|
| 20 |
+
if s == 0:
|
| 21 |
+
return [1.0 / len(x_list)] * len(x_list)
|
| 22 |
+
return [e / s for e in exps]
|
| 23 |
+
|
| 24 |
+
class NeuralNetwork:
|
| 25 |
+
def __init__(self, layer_sizes=None, activation='relu', output_activation='softmax',
|
| 26 |
+
init_range=0.1, grad_clip=1.0, seed=None, context_window=5):
|
| 27 |
+
if seed is not None:
|
| 28 |
+
random.seed(seed)
|
| 29 |
+
self.layer_sizes = layer_sizes[:] if layer_sizes is not None else None
|
| 30 |
+
self.activation = relu if activation == 'relu' else (lambda x: x)
|
| 31 |
+
self.output_activation = stable_softmax if output_activation == 'softmax' else (lambda x: x)
|
| 32 |
+
self.init_range = float(init_range)
|
| 33 |
+
self.grad_clip = grad_clip
|
| 34 |
+
self.context_window = context_window
|
| 35 |
+
self.weights = []
|
| 36 |
+
self.biases = []
|
| 37 |
+
self.vocab = []
|
| 38 |
+
self.word_to_idx = {}
|
| 39 |
+
self.idx_to_word = {}
|
| 40 |
+
|
| 41 |
+
def prepare_data_with_context(self, text):
|
| 42 |
+
words = [w.strip() for w in text.replace('\n', ' ').split(' ') if w.strip()]
|
| 43 |
+
self.vocab = sorted(list(set(words)))
|
| 44 |
+
self.word_to_idx = {w: i for i, w in enumerate(self.vocab)}
|
| 45 |
+
self.idx_to_word = {i: w for w, i in self.word_to_idx.items()}
|
| 46 |
+
|
| 47 |
+
vocab_size = len(self.vocab)
|
| 48 |
+
X = []
|
| 49 |
+
Y = []
|
| 50 |
+
|
| 51 |
+
for i in range(len(words) - self.context_window):
|
| 52 |
+
context_words = words[i : i + self.context_window]
|
| 53 |
+
target_word = words[i + self.context_window]
|
| 54 |
+
|
| 55 |
+
x = [0.0] * vocab_size
|
| 56 |
+
for word in context_words:
|
| 57 |
+
if word in self.word_to_idx:
|
| 58 |
+
x[self.word_to_idx[word]] = 1.0
|
| 59 |
+
|
| 60 |
+
y = [0.0] * vocab_size
|
| 61 |
+
if target_word in self.word_to_idx:
|
| 62 |
+
y[self.word_to_idx[target_word]] = 1.0
|
| 63 |
+
|
| 64 |
+
X.append(x)
|
| 65 |
+
Y.append(y)
|
| 66 |
+
|
| 67 |
+
return X, Y
|
| 68 |
+
|
| 69 |
+
def initialize_weights(self):
|
| 70 |
+
if self.layer_sizes is None:
|
| 71 |
+
raise ValueError("layer_sizes must be set before initializing weights.")
|
| 72 |
+
if self.weights:
|
| 73 |
+
return
|
| 74 |
+
for i in range(len(self.layer_sizes) - 1):
|
| 75 |
+
in_dim = self.layer_sizes[i]
|
| 76 |
+
out_dim = self.layer_sizes[i + 1]
|
| 77 |
+
W = [[random.uniform(-self.init_range, self.init_range) for _ in range(out_dim)] for _ in range(in_dim)]
|
| 78 |
+
b = [0.0 for _ in range(out_dim)]
|
| 79 |
+
self.weights.append(W)
|
| 80 |
+
self.biases.append(b)
|
| 81 |
+
|
| 82 |
+
def forward(self, x):
|
| 83 |
+
a = x[:]
|
| 84 |
+
for i in range(len(self.weights) - 1):
|
| 85 |
+
next_a = []
|
| 86 |
+
W = self.weights[i]
|
| 87 |
+
b = self.biases[i]
|
| 88 |
+
out_dim = len(W[0])
|
| 89 |
+
for j in range(out_dim):
|
| 90 |
+
s = sum(a[k] * W[k][j] for k in range(len(a))) + b[j]
|
| 91 |
+
next_a.append(self.activation(s))
|
| 92 |
+
a = next_a
|
| 93 |
+
|
| 94 |
+
W = self.weights[-1]
|
| 95 |
+
b = self.biases[-1]
|
| 96 |
+
out = []
|
| 97 |
+
out_dim = len(W[0])
|
| 98 |
+
for j in range(out_dim):
|
| 99 |
+
s = sum(a[k] * W[k][j] for k in range(len(a))) + b[j]
|
| 100 |
+
out.append(s)
|
| 101 |
+
return self.output_activation(out)
|
| 102 |
+
|
| 103 |
+
def train(self, training_data, lr=0.01, epochs=500, verbose_every=50):
|
| 104 |
+
X, Y = self.prepare_data_with_context(training_data)
|
| 105 |
+
if not X:
|
| 106 |
+
raise ValueError("Not enough tokens in training data to create context windows.")
|
| 107 |
+
|
| 108 |
+
vocab_size = len(self.vocab)
|
| 109 |
+
if self.layer_sizes is None:
|
| 110 |
+
self.layer_sizes = [vocab_size, 64, vocab_size]
|
| 111 |
+
else:
|
| 112 |
+
self.layer_sizes[0] = vocab_size
|
| 113 |
+
self.layer_sizes[-1] = vocab_size
|
| 114 |
+
|
| 115 |
+
self.initialize_weights()
|
| 116 |
+
|
| 117 |
+
for epoch in range(epochs):
|
| 118 |
+
total_loss = 0.0
|
| 119 |
+
indices = list(range(len(X)))
|
| 120 |
+
random.shuffle(indices)
|
| 121 |
+
|
| 122 |
+
for idx in indices:
|
| 123 |
+
x = X[idx]
|
| 124 |
+
y = Y[idx]
|
| 125 |
+
|
| 126 |
+
activations = [x[:]]
|
| 127 |
+
pre_acts = []
|
| 128 |
+
a = x[:]
|
| 129 |
+
|
| 130 |
+
for i in range(len(self.weights) - 1):
|
| 131 |
+
W, b = self.weights[i], self.biases[i]
|
| 132 |
+
z = []
|
| 133 |
+
out_dim = len(W[0])
|
| 134 |
+
for j in range(out_dim):
|
| 135 |
+
s = sum(a[k] * W[k][j] for k in range(len(a))) + b[j]
|
| 136 |
+
z.append(s)
|
| 137 |
+
pre_acts.append(z)
|
| 138 |
+
a = [self.activation(val) for val in z]
|
| 139 |
+
activations.append(a)
|
| 140 |
+
|
| 141 |
+
W, b = self.weights[-1], self.biases[-1]
|
| 142 |
+
z_final = []
|
| 143 |
+
out_dim = len(W[0])
|
| 144 |
+
for j in range(out_dim):
|
| 145 |
+
s = sum(a[k] * W[k][j] for k in range(len(a))) + b[j]
|
| 146 |
+
z_final.append(s)
|
| 147 |
+
pre_acts.append(z_final)
|
| 148 |
+
out = self.output_activation(z_final)
|
| 149 |
+
|
| 150 |
+
delta = [out[j] - y[j] for j in range(len(y))]
|
| 151 |
+
|
| 152 |
+
for i in reversed(range(len(self.weights))):
|
| 153 |
+
in_act = activations[i]
|
| 154 |
+
in_dim = len(in_act)
|
| 155 |
+
out_dim = len(delta)
|
| 156 |
+
|
| 157 |
+
db = delta[:]
|
| 158 |
+
if self.grad_clip is not None:
|
| 159 |
+
db = [max(-self.grad_clip, min(self.grad_clip, g)) for g in db]
|
| 160 |
+
for j in range(len(self.biases[i])):
|
| 161 |
+
self.biases[i][j] -= lr * db[j]
|
| 162 |
+
|
| 163 |
+
for k in range(in_dim):
|
| 164 |
+
for j in range(out_dim):
|
| 165 |
+
grad_w = in_act[k] * delta[j]
|
| 166 |
+
if self.grad_clip is not None:
|
| 167 |
+
grad_w = max(-self.grad_clip, min(self.grad_clip, grad_w))
|
| 168 |
+
self.weights[i][k][j] -= lr * grad_w
|
| 169 |
+
|
| 170 |
+
if i != 0:
|
| 171 |
+
prev_delta = [0.0] * in_dim
|
| 172 |
+
for p in range(in_dim):
|
| 173 |
+
s = sum(self.weights[i][p][j] * delta[j] for j in range(out_dim))
|
| 174 |
+
if pre_acts[i-1][p] > 0:
|
| 175 |
+
prev_delta[p] = s
|
| 176 |
+
delta = prev_delta
|
| 177 |
+
|
| 178 |
+
if epoch % verbose_every == 0 or epoch == epochs - 1:
|
| 179 |
+
loss = 0.0
|
| 180 |
+
for x_val, y_val in zip(X, Y):
|
| 181 |
+
p = self.forward(x_val)
|
| 182 |
+
for j in range(len(y_val)):
|
| 183 |
+
if y_val[j] > 0:
|
| 184 |
+
loss -= math.log(p[j] + 1e-12)
|
| 185 |
+
print(f"Epoch {epoch}, Loss: {loss / len(X):.6f}")
|
| 186 |
+
|
| 187 |
+
def export_to_python(self, filename):
|
| 188 |
+
lines = []
|
| 189 |
+
lines.append("import math\n")
|
| 190 |
+
lines.append("import time\n\n")
|
| 191 |
+
lines.append("def relu(x):\n return max(0.0, x)\n\n")
|
| 192 |
+
lines.append("def softmax(x_list):\n")
|
| 193 |
+
lines.append(" if not x_list:\n")
|
| 194 |
+
lines.append(" return []\n")
|
| 195 |
+
lines.append(" m = max(x_list)\n")
|
| 196 |
+
lines.append(" exps = [math.exp(i - m) for i in x_list]\n")
|
| 197 |
+
lines.append(" s = sum(exps)\n")
|
| 198 |
+
lines.append(" if s == 0:\n")
|
| 199 |
+
lines.append(" return [1.0 / len(x_list)] * len(x_list)\n")
|
| 200 |
+
lines.append(" return [e / s for e in exps]\n\n")
|
| 201 |
+
|
| 202 |
+
neuron_id = 0
|
| 203 |
+
for layer_idx, (W, b) in enumerate(zip(self.weights, self.biases)):
|
| 204 |
+
in_dim, out_dim = len(W), len(W[0])
|
| 205 |
+
for j in range(out_dim):
|
| 206 |
+
terms = " + ".join([f"{W[i][j]:.8f}*inputs[{i}]" for i in range(in_dim)]) or "0.0"
|
| 207 |
+
b_term = f"{b[j]:.8f}"
|
| 208 |
+
if layer_idx != len(self.weights) - 1:
|
| 209 |
+
lines.append(f"def neuron_{neuron_id}(inputs):\n return relu({terms} + {b_term})\n\n")
|
| 210 |
+
else:
|
| 211 |
+
lines.append(f"def neuron_{neuron_id}(inputs):\n return {terms} + {b_term}\n\n")
|
| 212 |
+
neuron_id += 1
|
| 213 |
+
|
| 214 |
+
neuron_counter = 0
|
| 215 |
+
for layer_idx, (W, b) in enumerate(zip(self.weights, self.biases)):
|
| 216 |
+
out_dim = len(W[0])
|
| 217 |
+
lines.append(f"def layer_{layer_idx}(inputs):\n")
|
| 218 |
+
inner = ", ".join([f"neuron_{neuron_counter + j}(inputs)" for j in range(out_dim)])
|
| 219 |
+
lines.append(f" return [{inner}]\n\n")
|
| 220 |
+
neuron_counter += out_dim
|
| 221 |
+
|
| 222 |
+
lines.append("def predict(inputs):\n")
|
| 223 |
+
lines.append(" a = inputs\n")
|
| 224 |
+
for i in range(len(self.weights)):
|
| 225 |
+
lines.append(f" a = layer_{i}(a)\n")
|
| 226 |
+
lines.append(" return softmax(a)\n\n")
|
| 227 |
+
|
| 228 |
+
lines.append(f"vocab = {self.vocab}\n")
|
| 229 |
+
lines.append(f"word_to_idx = {{w: i for i, w in enumerate(vocab)}}\n")
|
| 230 |
+
lines.append(f"context_window = {self.context_window}\n\n")
|
| 231 |
+
|
| 232 |
+
lines.append("if __name__ == '__main__':\n")
|
| 233 |
+
lines.append(" print('Interactive multi-word text completion.')\n")
|
| 234 |
+
lines.append(" print(f'Model context window: {context_window} words. Type text or empty to exit.')\n")
|
| 235 |
+
lines.append(" while True:\n")
|
| 236 |
+
lines.append(" inp = input('> ').strip()\n")
|
| 237 |
+
lines.append(" if not inp:\n")
|
| 238 |
+
lines.append(" break\n")
|
| 239 |
+
lines.append(" words = [w.strip() for w in inp.split(' ') if w.strip()]\n")
|
| 240 |
+
lines.append(" generated_words = words[:]\n")
|
| 241 |
+
lines.append(" print('Input:', ' '.join(generated_words), end='', flush=True)\n")
|
| 242 |
+
lines.append(" for _ in range(20):\n")
|
| 243 |
+
lines.append(" context = generated_words[-context_window:]\n")
|
| 244 |
+
lines.append(" x = [0.0] * len(vocab)\n")
|
| 245 |
+
lines.append(" for word in context:\n")
|
| 246 |
+
lines.append(" if word in word_to_idx:\n")
|
| 247 |
+
lines.append(" x[word_to_idx[word]] = 1.0\n")
|
| 248 |
+
lines.append(" out = predict(x)\n")
|
| 249 |
+
lines.append(" idx = out.index(max(out))\n")
|
| 250 |
+
lines.append(" next_word = vocab[idx]\n")
|
| 251 |
+
lines.append(" if next_word == '<|endoftext|>': break\n")
|
| 252 |
+
lines.append(" generated_words.append(next_word)\n")
|
| 253 |
+
lines.append(" print(' ' + next_word, end='', flush=True)\n")
|
| 254 |
+
lines.append(" time.sleep(0.1)\n")
|
| 255 |
+
lines.append(" print('\\n')\n")
|
| 256 |
+
|
| 257 |
+
with open(filename, "w") as f:
|
| 258 |
+
f.writelines(lines)
|
| 259 |
+
print(f"Exported network to {filename}")
|
| 260 |
+
|
| 261 |
+
@staticmethod
|
| 262 |
+
def load_network(filename):
|
| 263 |
+
ns = {"__name__": "__loaded_model__"}
|
| 264 |
+
with open(filename, "r") as f:
|
| 265 |
+
code = f.read()
|
| 266 |
+
exec(code, ns)
|
| 267 |
+
class ModelWrapper:
|
| 268 |
+
def __init__(self, ns):
|
| 269 |
+
self.ns = ns
|
| 270 |
+
self.vocab = ns.get("vocab", [])
|
| 271 |
+
self.word_to_idx = ns.get("word_to_idx", {})
|
| 272 |
+
self.context_window = ns.get("context_window", 5)
|
| 273 |
+
|
| 274 |
+
def complete(self, input_text, max_new_words=20):
|
| 275 |
+
words = [w.strip() for w in input_text.strip().split(' ') if w.strip()]
|
| 276 |
+
generated = words[:]
|
| 277 |
+
for _ in range(max_new_words):
|
| 278 |
+
context = generated[-self.context_window:]
|
| 279 |
+
x = [0.0] * len(self.vocab)
|
| 280 |
+
for word in context:
|
| 281 |
+
if word in self.word_to_idx:
|
| 282 |
+
x[self.word_to_idx[word]] = 1.0
|
| 283 |
+
|
| 284 |
+
out = self.ns["predict"](x)
|
| 285 |
+
idx = out.index(max(out))
|
| 286 |
+
next_word = self.vocab[idx]
|
| 287 |
+
|
| 288 |
+
if next_word == '<|endoftext|>':
|
| 289 |
+
break
|
| 290 |
+
generated.append(next_word)
|
| 291 |
+
return ' '.join(generated)
|
| 292 |
+
|
| 293 |
+
return ModelWrapper(ns)
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
if __name__ == "__main__":
|
| 297 |
+
sample_text = """
|
| 298 |
+
user: hi
|
| 299 |
+
ai: Hello! How can I help you today?
|
| 300 |
+
<|endoftext|>
|
| 301 |
+
user: hi
|
| 302 |
+
ai: Hi! What can I do for you today?
|
| 303 |
+
<|endoftext|>
|
| 304 |
+
user: hello
|
| 305 |
+
ai: Hello! How can I help you today?
|
| 306 |
+
<|endoftext|>
|
| 307 |
+
user: hey
|
| 308 |
+
ai: Hi! What can I do for you today?
|
| 309 |
+
<|endoftext|>
|
| 310 |
+
user: How's your day going?
|
| 311 |
+
ai: It's been great! Thanks for asking! How about yours?
|
| 312 |
+
<|endoftext|>
|
| 313 |
+
user: What's new with you?
|
| 314 |
+
ai: Not much, just here and ready to help! What's new with you?
|
| 315 |
+
<|endoftext|>
|
| 316 |
+
user: What can you do?
|
| 317 |
+
ai: I can help you with a variety of tasks. What's on your mind?
|
| 318 |
+
<|endoftext|>
|
| 319 |
+
user: Tell me a joke.
|
| 320 |
+
ai: Why did the scarecrow win an award? Because he was outstanding in his field!
|
| 321 |
+
<|endoftext|>
|
| 322 |
+
"""
|
| 323 |
+
nn = NeuralNetwork(context_window=CONTEXT_WINDOW, seed=42)
|
| 324 |
+
nn.train(training_data=sample_text, lr=LR, epochs=EPOCHS, verbose_every=100)
|
| 325 |
+
nn.export_to_python("exported_model.py")
|
| 326 |
+
|
| 327 |
+
model = NeuralNetwork.load_network("exported_model.py")
|
| 328 |
+
print("\n--- Testing loaded model ---")
|
| 329 |
+
print(f"Vocabulary size: {len(model.vocab)}")
|
| 330 |
+
|
| 331 |
+
test_inputs = ["user: hi", "user: What's new", "ai: It's been"]
|
| 332 |
+
for test_input in test_inputs:
|
| 333 |
+
completion = model.complete(test_input, max_new_words=10)
|
| 334 |
+
print(f"Input: '{test_input}'\nOutput: '{completion}'\n")
|