Start by building a very simple network:
import numpy as np
class NeuralNetwork:
def __init__(self, x,y): self.input = x self.y = y self.Weights1 = np.random.randn(self.input.shape[1],5) self.Weights2 = np.random.randn(5,1) self.output = np.zeros(self.y.shape) def sigmoid_z(self,x): #create a sigmoid function z = 1/(1 + np.exp(-x)) return z def sigmoid_z_derivative(self,x): return self.sigmoid_z(x)*(1-self.sigmoid_z(x)) def forwardpropogation(self): self.layer1 = self.sigmoid_z(np.dot(self.input,self.Weights1) ) self.output = self.sigmoid_z(np.dot(self.layer1,self.Weights2) ) def predict(self,x): self.input = x self.forwardpropogation() return nn.output
#optimization stage def backpropogation(self): #Derivative of cost function with respect to w2: #is the product of the chain derivatives that combine them: #self.output = y_hat = a2 = sigmoid(z2), z2 = a1*w2 #a1 = sigmoid(z1), z1 = x*w1 #derivative of the cost function with respect to the output of the network y_hat dcost = (self.y - self.output) #derivative of activation function dz2 = self.sigmoid_z_derivative(np.dot(self.layer1,self.Weights2)) #derivative of previous layer output functionn da2 =self.layer1.T #derivative of cost with respect to w2 is therefore: d_weights2 = np.dot(da2, dcost*dz2) #derivative of activation function dz1 = self.sigmoid_z_derivative(np.dot(self.input,self.Weights1)) #derivative of input layer output functionn da1 = self.input.T #Derivative of cost function with respect to w1: d_weights1 = np.dot(da1,np.dot(dcost * dz2, self.Weights2.T) * dz1) self.Weights1 -= d_weights1 self.Weights2 -= d_weights2
if __name__ == "__main__":
X = np.array([[1,1,0,1],
[1,0,1,1],
[1,0,0,1],
[1,1,1,1]])
y = np.array([[0],[0],[1],[1]])
nn = NeuralNetwork(X,y)
for i in range(1500): nn.forwardpropogation() nn.backpropogation() print(nn.predict([0,1,1,0]))
same network can be easily done in keras:
from keras.models import Sequential from keras.layers import Dense from keras import optimizers import numpy as np np.random.seed(0) model = Sequential() model.add(Dense(units=5, activation='sigmoid', input_dim=4)) model.add(Dense(units=1, activation='sigmoid')) sgd = optimizers.SGD(lr=1) model.compile(loss='mean_squared_error', optimizer=sgd) X = np.array([[1,1,0,1], [1,0,1,1], [1,0,0,1], [1,1,1,1]]) y = np.array([[0],[1],[1],[0]]) model.fit(X, y, epochs=1500, verbose=False) test_X = np.array([[0,1,1,0]]) print(model.predict(test_X))
or tensorflow:
import tensorflow.compat.v1 as tf
import numpy as np
import matplotlib.pyplot as plt
np.random.seed(10)
tf.disable_eager_execution()
Parameters
learning_rate = 1
training_epochs = 1500
display_step = 50
train_X = np.array([[1,1,0,1],
[1,0,1,1],
[1,0,0,1],
[1,1,1,1]])
train_Y = np.array([[0],[1],[1],[0]])
n_samples = train_X.shape[0]
X = tf.placeholder(tf.float32,shape=[None,4])
Y = tf.placeholder(tf.float32,shape=[None,1])
dense_1 = tf.layers.dense(X, units=5)
out_1 =tf.nn.sigmoid(dense_1)
dense_2 = tf.layers.dense(out_1,units=1)
pred =tf.nn.sigmoid(dense_2)
Mean squared error
cost = (tf.pow(pred-Y, 2))/(2*n_samples)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init) for epoch in range(training_epochs): for (x, y) in zip(train_X, train_Y): sess.run(optimizer, feed_dict={X: [x], Y: [y]}) test_X = np.array([0,1,1,0]) prediction = sess.run(pred,feed_dict={X: [test_X]}) print("prediction",prediction)
or pytorch or any other nn network for that matter such as lua , caffe , cntk etc..
import torch import torch.nn as nn import numpy as np np.random.seed(10) class NeuralNetwork(nn.Module): def init(self, ): super(NeuralNetwork, self).init() self.inputSize = 4 self.outputSize = 1 self.hiddenSize = 5 self.W1 = torch.randn(self.inputSize, self.hiddenSize) self.W2 = torch.randn(self.hiddenSize, self.outputSize)
def forward(self, X): self.z1 = torch.matmul(X, self.W1) self.z2 = self.sigmoid(self.z1) self.z3 = torch.matmul(self.z2, self.W2) o = self.sigmoid(self.z3) return o def sigmoid(self, s): return 1 / (1 + torch.exp(-s)) def sigmoid_derivative(self, z): # derivative of sigmoid return self.sigmoid(z) * (1 - self.sigmoid(z)) def backward(self, X, y, o): self.o_error = y - o self.o_delta = self.o_error * self.sigmoid_derivative(self.z3) self.z2_error = torch.matmul(self.o_delta, torch.t(self.W2)) self.z2_delta = self.z2_error * self.sigmoid_derivative(self.z1) self.W1 += torch.matmul(torch.t(X), self.z2_delta) self.W2 += torch.matmul(torch.t(self.z2), self.o_delta) def train(self, X, y): o = self.forward(X) self.backward(X, y, o) def predict(self,xPredicted): print ("Output: \n" + str(self.forward(xPredicted)))
if __name__ == "__main__":
X = torch.tensor(np.array([[1,1,0,1],
[1,0,1,1],
[1,0,0,1],
[1,1,1,1]]),dtype=torch.float)
y = torch.tensor(np.array([[0],[0],[1],[1]]),dtype=torch.float)
nn = NeuralNetwork()
for i in range(1500): nn.train(X,y) test_X = torch.tensor(np.array([0,1,1,0]),dtype=torch.float) nn.predict(test_X)