I am trying to perform a 10-fold cross-validation on a LSTM, the code is the following:
Predict Closing Prices using a 3 day window of previous closing prices.we use window_size = 4 for this.
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
# Build the LSTM model.
# Define the LSTM RNN model.
model = Sequential()
number_units = 9
dropout_fraction = 0.5
# Layer 1
model.add(LSTM(
units=number_units,
return_sequences=True,
input_shape=(X_train.shape[1], 1))
)
model.add(Dropout(dropout_fraction))
# Layer 2
# The return_sequences parameter needs to set to True every time we add a new LSTM layer, excluding the final layer.
model.add(LSTM(units=number_units, return_sequences=True))
model.add(Dropout(dropout_fraction))
# Layer 3
model.add(LSTM(units=number_units))
model.add(Dropout(dropout_fraction))
# Output layer
model.add(Dense(1))
# Compile the model
model.compile(optimizer="adam", loss="mean_squared_error")
# Summarize the model
model.summary()
# Train the model
model.fit(X_train, y_train, epochs=500, shuffle=False, batch_size=5, verbose=1)
# Evaluate the model for loss
model.evaluate(X_test, y_test)
# Make some predictions
predicted = model.predict(X_test)
import sklearn.metrics as metrics
# Evaluating the model
print('RMSD ( Root Mean Squared Error ) :', np.sqrt(metrics.mean_squared_error(y_test, predicted)))
print('R-squared :', metrics.r2_score(y_test, predicted))
# Recover the original prices instead of the scaled version
predicted_prices = y_test_scaler.inverse_transform(predicted)
real_prices = y_test_scaler.inverse_transform(y_test.reshape(-1, 1))
# Create a DataFrame of Real and Predicted values
stocks = pd.DataFrame({
"Real": real_prices.ravel(),
"Predicted": predicted_prices.ravel()
}, index = dw.index[-len(real_prices): ])
stocks.head(30)
The idea is to perform a 10-fold cross-validation and improve RMSE and R-squared results.
A note about my input:
X, y = window_data(dw, window_size, feature_col_number1, feature_col_number2, feature_col_number3
, feature_col_number4 , feature_col_number5, feature_col_number6, target_col_number)
# Use 70% of the data for training and the remaineder for testing
X_split = int(0.7 * len(X))
y_split = int(0.7 * len(y))
X_train = X[: X_split]
X_test = X[X_split:]
y_train = y[: y_split]
y_test = y[y_split:]
#Scaling Data with MinMaxScaler
from sklearn.preprocessing import MinMaxScaler
# Use the MinMaxScaler to scale data between 0 and 1.
x_train_scaler = MinMaxScaler()
x_test_scaler = MinMaxScaler()
y_train_scaler = MinMaxScaler()
y_test_scaler = MinMaxScaler()
# Fit the scaler for the Training Data
x_train_scaler.fit(X_train)
y_train_scaler.fit(y_train)
# Scale the training data
X_train = x_train_scaler.transform(X_train)
y_train = y_train_scaler.transform(y_train)
# Fit the scaler for the Testing Data
x_test_scaler.fit(X_test)
y_test_scaler.fit(y_test)
# Scale the y_test data
X_test = x_test_scaler.transform(X_test)
y_test = y_test_scaler.transform(y_test)
# Reshape the features for the model
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
How do I enter k_fold cross validation? Its code and where to put it?
Thanks.
What I have tried:
How do I enter k_fold cross validation? Its code and where to put it?
Thanks.