Commit f334d8dd by Paktalin

Started building the LSTM model

parent 7a49bc01
Showing with 49 additions and 0 deletions
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from keras.models import Sequential
from keras.layers import Bidirectional, Dense, Activation, LSTM, Dropout
import pickle
# load the input array
sentences = np.genfromtxt('encoded_forms.csv', delimiter='~')
# set sequence length and step for sentences splitting
SEQUENCE_LEN = 3
STEP = 1
forms = 114
batch_size = 128
# create ampty lists
sequences = []
next_words = []
# set sequences and next_words (x, y)
for i in tqdm(range(len(sentences))):
sentence = sentences[i]
# loop over each sentence splitting it into sequences
for j in range(0, len(sentence) - SEQUENCE_LEN, STEP):
# split the sentences into sequences of SEQUENCE_LEN
sequences.append(sentence[j: j + SEQUENCE_LEN])
# set next words for the current sequence
next_words.append(sentence[j + SEQUENCE_LEN])
#save the lists
with open('sequences', 'wb') as fp:
pickle.dump(sequences, fp)
with open('next_words', 'wb') as fp:
pickle.dump(next_words, fp)
# split training and test sets
x_train, x_test, y_train, y_test = train_test_split(sequences, next_words, test_size=0.33)
dropout = 0.2
model = Sequential()
model.add(Bidirectional(LSTM(128), input_shape=(SEQUENCE_LEN, forms)))
if dropout > 0:
model.add(Dropout(dropout))
model.add(Dense(forms))
model.add(Activation('softmax'))
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=15, validation_data=(x_test, y_test))
model.save('lstm.h5')
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment