import pathlib
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, Dense, SimpleRNN
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from tensorflow import keras
import numpy as np
path=str(pathlib.Path().resolve())+"/"
df=pd.read_csv(path+"Intel_Market.csv")
df
Fecha | Cerrar/último | Volumen | Abrir | Alto | Bajo | |
---|---|---|---|---|---|---|
0 | 03/04/2024 | 40,33 | 83717980 | 41,40 | 41,65 | 40,22 |
1 | 02/04/2024 | 43,94 | 32748360 | 43,925 | 44,08 | 43,50 |
2 | 01/04/2024 | 44,52 | 31712590 | 44,29 | 45,41 | 44,18 |
3 | 28/03/2024 | 44,17 | 54204190 | 43,76 | 44,60 | 43,71 |
4 | 27/03/2024 | 43,77 | 51017830 | 42,54 | 43,83 | 42,50 |
... | ... | ... | ... | ... | ... | ... |
2511 | 10/04/2014 | 26,425 | 40390460 | 27,01 | 27,09 | 26,36 |
2512 | 09/04/2014 | 26,98 | 34467000 | 26,93 | 27,02 | 26,73 |
2513 | 08/04/2014 | 26,91 | 56564370 | 26,36 | 26,98 | 26,35 |
2514 | 07/04/2014 | 26,485 | 49693490 | 26,30 | 26,74 | 26,20 |
2515 | 04/04/2014 | 26,16 | 47829470 | 26,49 | 26,73 | 26,105 |
2516 rows × 6 columns
Convertimos el campo fecha en fecha y lo seleccionamos como Index para hacerle un rolling, lo cambiamos a formato float el precio de cierre y le hacemos un Rolling de 7 días
df['Fecha'] = pd.to_datetime(df['Fecha'], format='%d/%m/%Y').dt.date
df = df.set_index("Fecha")
df['Cerrar/último'] = df['Cerrar/último'].str.replace(",",".").astype(np.float32)
df = df[['Cerrar/último']].rolling(7).mean().dropna()
df=df[['Cerrar/último']]
Le hacemos un MinMaxScaler de los datos en un rango de 0-1
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))
df_transformed = scaler.fit_transform(df)
Recogemos los datos de Y y de X
X = df_transformed[:-1]
y = df_transformed[1:]
X
array([[0.42192749], [0.4296045 ], [0.42439509], ..., [0.03236994], [0.03192439], [0.03039927]])
#from sklearn.preprocessing import LabelEncoder
#l = LabelEncoder()
#X = l.fit_transform(X)
Dividimos los datos donde:
df.index[len(X_train):][:50]
Index([2018-03-28, 2018-03-27, 2018-03-26, 2018-03-23, 2018-03-22, 2018-03-21, 2018-03-20, 2018-03-19, 2018-03-16, 2018-03-15, 2018-03-14, 2018-03-13, 2018-03-12, 2018-03-09, 2018-03-08, 2018-03-07, 2018-03-06, 2018-03-05, 2018-03-02, 2018-03-01, 2018-02-28, 2018-02-27, 2018-02-26, 2018-02-23, 2018-02-22, 2018-02-21, 2018-02-20, 2018-02-16, 2018-02-15, 2018-02-14, 2018-02-13, 2018-02-12, 2018-02-09, 2018-02-08, 2018-02-07, 2018-02-06, 2018-02-05, 2018-02-02, 2018-02-01, 2018-01-31, 2018-01-30, 2018-01-29, 2018-01-26, 2018-01-25, 2018-01-24, 2018-01-23, 2018-01-22, 2018-01-19, 2018-01-18, 2018-01-17], dtype='object', name='Fecha')
from sklearn.model_selection import train_test_split
value = int(len(X)*0.2)
X_train_full, y_train_full, X_test, y_test = X[value:],y[value:],X[:value],y[:value]
value = int(len(X_train_full)*0.2)
X_train, y_train, X_val, y_val = X_train_full[value:],y_train_full[value:],X_train_full[:value],y_train_full[:value]
import numpy as np
#X_train = np.reshape(X_train, (X_train.shape[0], 1))
#X_test = np.reshape(X_test, (X_test.shape[0], 1))
Definimos una Red Neuronal con la siguiente arquitectura:
"""OJO CON EL RETURN_SEQUENCES"""
import sys
def modelo_red_recurrente_3(x_train, y_train,n_neuronas,x_val,y_val):
early_stopping_cb = keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
# return_sequences significa que solo de salida por Vector no por secuencia, lo que hará que vayan todas de vez
model = keras.models.Sequential([
keras.layers.LSTM(n_neuronas, return_sequences=True, input_shape=[None, 1]),
keras.layers.SimpleRNN(n_neuronas, return_sequences=False),
keras.layers.Dense(1)
])
model.compile(loss="mse", optimizer="adam")
model.fit(x_train, y_train, epochs=sys.maxsize,validation_data=[x_val,y_val],callbacks=[early_stopping_cb])
return model
Le decimos que use 40 neuronas y compruebe el Mean_squared_error para ver el resultado que ronda sobre 0.00X
from sklearn.metrics import mean_squared_error
modelo = modelo_red_recurrente_3(X_train, y_train,40,X_val,y_val)
y_pred = modelo.predict(X_test)
rms = mean_squared_error(y_test, y_pred, squared=False)
print(f"rms modelo_red_recurrente_1 = {rms}")
Epoch 1/9223372036854775807 51/51 [==============================] - 3s 14ms/step - loss: 0.0582 - val_loss: 0.0179 Epoch 2/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 0.0106 - val_loss: 0.0027 Epoch 3/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 5.4185e-04 - val_loss: 1.4545e-04 Epoch 4/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 7.1967e-05 - val_loss: 1.1650e-04 Epoch 5/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 7.0184e-05 - val_loss: 1.1477e-04 Epoch 6/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.9998e-05 - val_loss: 1.1426e-04 Epoch 7/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 7.1392e-05 - val_loss: 1.1535e-04 Epoch 8/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.8308e-05 - val_loss: 1.1206e-04 Epoch 9/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.7404e-05 - val_loss: 1.1097e-04 Epoch 10/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.7048e-05 - val_loss: 1.1692e-04 Epoch 11/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.8139e-05 - val_loss: 1.1057e-04 Epoch 12/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.5583e-05 - val_loss: 1.1012e-04 Epoch 13/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.6447e-05 - val_loss: 1.0556e-04 Epoch 14/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.7869e-05 - val_loss: 1.0425e-04 Epoch 15/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.6549e-05 - val_loss: 1.1274e-04 Epoch 16/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.3289e-05 - val_loss: 1.0801e-04 Epoch 17/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.4133e-05 - val_loss: 1.0408e-04 Epoch 18/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.2056e-05 - val_loss: 1.0579e-04 Epoch 19/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.2006e-05 - val_loss: 1.0241e-04 Epoch 20/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.2238e-05 - val_loss: 9.9154e-05 Epoch 21/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.2241e-05 - val_loss: 1.0884e-04 Epoch 22/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.0439e-05 - val_loss: 1.0025e-04 Epoch 23/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.3023e-05 - val_loss: 1.0107e-04 Epoch 24/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 5.8843e-05 - val_loss: 1.1497e-04 Epoch 25/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 5.9559e-05 - val_loss: 9.8045e-05 Epoch 26/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 5.8448e-05 - val_loss: 1.0252e-04 Epoch 27/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.1504e-05 - val_loss: 1.0357e-04 Epoch 28/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 5.9748e-05 - val_loss: 9.6570e-05 Epoch 29/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 5.9806e-05 - val_loss: 9.5402e-05 Epoch 30/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 5.9747e-05 - val_loss: 9.6208e-05 Epoch 31/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 5.6852e-05 - val_loss: 9.8589e-05 Epoch 32/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 5.8804e-05 - val_loss: 9.6486e-05 Epoch 33/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 5.7513e-05 - val_loss: 9.7110e-05 Epoch 34/9223372036854775807 51/51 [==============================] - 0s 3ms/step - loss: 6.1899e-05 - val_loss: 1.0136e-04 16/16 [==============================] - 0s 1ms/step rms modelo_red_recurrente_1 = 0.008100431357431943
Mostramos un gráfico de barras que va de X-train a (X_test | X_pred), para comparar la predicción con los datos reales
Index([2022-06-07, 2022-06-06, 2022-06-03, 2022-06-02, 2022-06-01, 2022-05-31, 2022-05-27, 2022-05-26, 2022-05-25, 2022-05-24, 2022-05-23, 2022-05-20, 2022-05-19, 2022-05-18, 2022-05-17, 2022-05-16, 2022-05-13, 2022-05-12, 2022-05-11, 2022-05-10, 2022-05-09, 2022-05-06, 2022-05-05, 2022-05-04, 2022-05-03, 2022-05-02, 2022-04-29, 2022-04-28, 2022-04-27, 2022-04-26, 2022-04-25, 2022-04-22, 2022-04-21, 2022-04-20, 2022-04-19, 2022-04-18, 2022-04-14, 2022-04-13, 2022-04-12, 2022-04-11, 2022-04-08, 2022-04-07, 2022-04-06, 2022-04-05, 2022-04-04, 2022-04-01, 2022-03-31, 2022-03-30, 2022-03-29, 2022-03-28], dtype='object', name='Fecha')
import matplotlib.pyplot as plt
ALL = False
V_Test = len(X_test) if ALL else 50
V_Train = len(X_train) if ALL else 50
# Concatenar los índices de train y test para plotear
index_train = df.index[-len(X_train_full):][:V_Train] # Últimos valores de train
index_test = df.index[:len(X_test)][-V_Test:] # Primeros valores de test
# Invertir la transformación de MinMaxScaler para obtener los valores verdaderos
y_train_inverse = scaler.inverse_transform(y_train)
y_test_inverse = scaler.inverse_transform(y_test)
y_pred_inverse = scaler.inverse_transform(y_pred)
# Plot de datos de entrenamiento
plt.plot(index_train, y_train_inverse[:V_Train], label='Train Data')
# Plot de datos de prueba
plt.plot(index_test, y_test_inverse[-V_Test:], label='Test Data')
# Plot de predicciones
plt.plot(index_test, y_pred_inverse[-V_Test:], label='Predictions')
plt.title('Comparación de Datos de Prueba y Predicciones (Valores Verdaderos)')
plt.xlabel('Fecha')
plt.ylabel('Cerrar/último')
plt.legend()
plt.xticks(rotation=45)
plt.show()
def genera_train(serie,lg_secuencia,lg_y):
"""Genera el set de entrenamiento
Args:
serie (pd.Serie): Serie de datos
lg_secuencia (int): Longitud de la secuencia
lg_y (int): Longitud de los datos recogidos
"""
# Imaginemos que es de 20 entonces será 1000 - 20 - los valores que vamos recoger
x=[]
y=[]
ultimo = len(serie)-lg_secuencia-lg_y
for ix in range(ultimo):
x.append(serie[ix:ix+lg_secuencia])
y.append(serie[ix+lg_secuencia:ix+lg_secuencia+lg_y])
return np.array(x), np.array(y)
X,y=genera_train(df_transformed,50,10)
value = int(len(X)*0.2)
X_train_full, y_train_full, X_test, y_test = X[value:],y[value:],X[:value],y[:value]
X_train, y_train, X_val, y_val = X_train_full[value:],y_train_full[value:],X_train_full[:value],y_train_full[:value]
y_test = y_test.reshape((y_test.shape[0],y_test.shape[1]))
y_train = y_train.reshape((y_train.shape[0],y_train.shape[1]))
y_val = y_val.reshape((y_val.shape[0],y_val.shape[1]))
lg_y = 10
early_stopping_cb = keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
rnn_predice_10 = keras.models.Sequential([
keras.layers.SimpleRNN(40, return_sequences=True, input_shape=[None, 1]),
keras.layers.SimpleRNN(40),
keras.layers.Dense(lg_y)
])
rnn_predice_10.compile(loss="mse", optimizer="adam")
rnn_predice_10.fit(X_train, y_train,epochs=sys.maxsize,validation_data=[X_val,y_val],callbacks=[early_stopping_cb])
Epoch 1/9223372036854775807 46/46 [==============================] - 2s 18ms/step - loss: 0.0533 - val_loss: 0.0097 Epoch 2/9223372036854775807 46/46 [==============================] - 1s 11ms/step - loss: 0.0038 - val_loss: 0.0077 Epoch 3/9223372036854775807 46/46 [==============================] - 1s 11ms/step - loss: 0.0025 - val_loss: 0.0061 Epoch 4/9223372036854775807 46/46 [==============================] - 1s 11ms/step - loss: 0.0021 - val_loss: 0.0058 Epoch 5/9223372036854775807 46/46 [==============================] - 1s 11ms/step - loss: 0.0018 - val_loss: 0.0046 Epoch 6/9223372036854775807 46/46 [==============================] - 1s 12ms/step - loss: 0.0017 - val_loss: 0.0050 Epoch 7/9223372036854775807 46/46 [==============================] - 1s 11ms/step - loss: 0.0015 - val_loss: 0.0045 Epoch 8/9223372036854775807 46/46 [==============================] - 1s 11ms/step - loss: 0.0015 - val_loss: 0.0042 Epoch 9/9223372036854775807 46/46 [==============================] - 0s 11ms/step - loss: 0.0015 - val_loss: 0.0040 Epoch 10/9223372036854775807 46/46 [==============================] - 0s 10ms/step - loss: 0.0014 - val_loss: 0.0045 Epoch 11/9223372036854775807 46/46 [==============================] - 1s 12ms/step - loss: 0.0013 - val_loss: 0.0038 Epoch 12/9223372036854775807 46/46 [==============================] - 1s 11ms/step - loss: 0.0014 - val_loss: 0.0046 Epoch 13/9223372036854775807 46/46 [==============================] - 1s 11ms/step - loss: 0.0013 - val_loss: 0.0041 Epoch 14/9223372036854775807 46/46 [==============================] - 1s 11ms/step - loss: 0.0013 - val_loss: 0.0038 Epoch 15/9223372036854775807 46/46 [==============================] - 1s 11ms/step - loss: 0.0012 - val_loss: 0.0045 Epoch 16/9223372036854775807 46/46 [==============================] - 1s 11ms/step - loss: 0.0011 - val_loss: 0.0044 Epoch 17/9223372036854775807 46/46 [==============================] - 1s 12ms/step - loss: 0.0012 - val_loss: 0.0042 Epoch 18/9223372036854775807 46/46 [==============================] - 1s 12ms/step - loss: 0.0011 - val_loss: 0.0034 Epoch 19/9223372036854775807 46/46 [==============================] - 0s 10ms/step - loss: 0.0011 - val_loss: 0.0035 Epoch 20/9223372036854775807 46/46 [==============================] - 0s 11ms/step - loss: 0.0012 - val_loss: 0.0054 Epoch 21/9223372036854775807 46/46 [==============================] - 0s 10ms/step - loss: 0.0012 - val_loss: 0.0035 Epoch 22/9223372036854775807 46/46 [==============================] - 0s 11ms/step - loss: 0.0012 - val_loss: 0.0045 Epoch 23/9223372036854775807 46/46 [==============================] - 0s 10ms/step - loss: 0.0011 - val_loss: 0.0032 Epoch 24/9223372036854775807 46/46 [==============================] - 0s 11ms/step - loss: 0.0010 - val_loss: 0.0035 Epoch 25/9223372036854775807 46/46 [==============================] - 0s 10ms/step - loss: 0.0010 - val_loss: 0.0033 Epoch 26/9223372036854775807 46/46 [==============================] - 0s 11ms/step - loss: 0.0011 - val_loss: 0.0036 Epoch 27/9223372036854775807 46/46 [==============================] - 0s 11ms/step - loss: 0.0010 - val_loss: 0.0034 Epoch 28/9223372036854775807 46/46 [==============================] - 0s 11ms/step - loss: 0.0010 - val_loss: 0.0033
<keras.src.callbacks.History at 0x17bcf5a8890>
y_pred = rnn_predice_10.predict(X_test)
#nsamples, nx, ny = y_pred.shape
#y_pred = X_test.reshape((nsamples,nx*ny))
y_test = y_test.reshape((y_test.shape[0],y_test.shape[1]))
rms = mean_squared_error(y_test, y_pred, squared=False)
print(f"rms rnn_predice_10 = {rms}")
16/16 [==============================] - 0s 3ms/step rms rnn_predice_10 = 0.03905146580528988