In [90]:
import pathlib
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.layers import Input, Dense, SimpleRNN
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from tensorflow import keras
import numpy as np
In [91]:
path=str(pathlib.Path().resolve())+"/"
In [92]:
df=pd.read_csv(path+"Intel_Market.csv")
df
Out[92]:
Fecha Cerrar/último Volumen Abrir Alto Bajo
0 03/04/2024 40,33 83717980 41,40 41,65 40,22
1 02/04/2024 43,94 32748360 43,925 44,08 43,50
2 01/04/2024 44,52 31712590 44,29 45,41 44,18
3 28/03/2024 44,17 54204190 43,76 44,60 43,71
4 27/03/2024 43,77 51017830 42,54 43,83 42,50
... ... ... ... ... ... ...
2511 10/04/2014 26,425 40390460 27,01 27,09 26,36
2512 09/04/2014 26,98 34467000 26,93 27,02 26,73
2513 08/04/2014 26,91 56564370 26,36 26,98 26,35
2514 07/04/2014 26,485 49693490 26,30 26,74 26,20
2515 04/04/2014 26,16 47829470 26,49 26,73 26,105

2516 rows × 6 columns

Tratamiento¶

Convertimos el campo fecha en fecha y lo seleccionamos como Index para hacerle un rolling, lo cambiamos a formato float el precio de cierre y le hacemos un Rolling de 7 días

In [93]:
df['Fecha'] = pd.to_datetime(df['Fecha'], format='%d/%m/%Y').dt.date
df = df.set_index("Fecha")
df['Cerrar/último'] = df['Cerrar/último'].str.replace(",",".").astype(np.float32)
df = df[['Cerrar/último']].rolling(7).mean().dropna()
df=df[['Cerrar/último']]

Le hacemos un MinMaxScaler de los datos en un rango de 0-1

In [94]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0,1))
df_transformed = scaler.fit_transform(df)

Recogemos los datos de Y y de X

  • Y : Valor de mercado
  • X : Fecha
In [95]:
X = df_transformed[:-1]
y = df_transformed[1:]
In [96]:
X
Out[96]:
array([[0.42192749],
       [0.4296045 ],
       [0.42439509],
       ...,
       [0.03236994],
       [0.03192439],
       [0.03039927]])
In [97]:
#from sklearn.preprocessing import LabelEncoder

#l = LabelEncoder()
#X = l.fit_transform(X)

División de datos¶

Dividimos los datos donde:

  • Test: Será el 20% del Total del DataSet
  • Val: Será el 20% del DataSet restante (16% del Total)
  • Train: Será el resto del DataSet un total de 64% del DataSet
In [98]:
df.index[len(X_train):][:50]
Out[98]:
Index([2018-03-28, 2018-03-27, 2018-03-26, 2018-03-23, 2018-03-22, 2018-03-21,
       2018-03-20, 2018-03-19, 2018-03-16, 2018-03-15, 2018-03-14, 2018-03-13,
       2018-03-12, 2018-03-09, 2018-03-08, 2018-03-07, 2018-03-06, 2018-03-05,
       2018-03-02, 2018-03-01, 2018-02-28, 2018-02-27, 2018-02-26, 2018-02-23,
       2018-02-22, 2018-02-21, 2018-02-20, 2018-02-16, 2018-02-15, 2018-02-14,
       2018-02-13, 2018-02-12, 2018-02-09, 2018-02-08, 2018-02-07, 2018-02-06,
       2018-02-05, 2018-02-02, 2018-02-01, 2018-01-31, 2018-01-30, 2018-01-29,
       2018-01-26, 2018-01-25, 2018-01-24, 2018-01-23, 2018-01-22, 2018-01-19,
       2018-01-18, 2018-01-17],
      dtype='object', name='Fecha')
In [99]:
from sklearn.model_selection import train_test_split

value = int(len(X)*0.2)
X_train_full, y_train_full, X_test, y_test = X[value:],y[value:],X[:value],y[:value]

value = int(len(X_train_full)*0.2)
X_train, y_train, X_val, y_val = X_train_full[value:],y_train_full[value:],X_train_full[:value],y_train_full[:value]
In [100]:
import numpy as np


#X_train = np.reshape(X_train, (X_train.shape[0], 1))
#X_test = np.reshape(X_test, (X_test.shape[0], 1))

Red de predicción con 3 Neuronas¶

Definimos una Red Neuronal con la siguiente arquitectura:

  • LSTM (X_neuronas, return_sequences=True, input_shape=[None,1])
  • SimpleRNN (X_neuronas, return_sequences=False)
  • Dense (1)
In [101]:
"""OJO CON EL RETURN_SEQUENCES"""
import sys


def modelo_red_recurrente_3(x_train, y_train,n_neuronas,x_val,y_val):
    early_stopping_cb = keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
    # return_sequences significa que solo de salida por Vector no por secuencia, lo que hará que vayan todas de vez
    model = keras.models.Sequential([
        keras.layers.LSTM(n_neuronas, return_sequences=True, input_shape=[None, 1]),
        keras.layers.SimpleRNN(n_neuronas, return_sequences=False),
        keras.layers.Dense(1)
    ])
    model.compile(loss="mse", optimizer="adam")
    model.fit(x_train, y_train, epochs=sys.maxsize,validation_data=[x_val,y_val],callbacks=[early_stopping_cb])
    return model

Le decimos que use 40 neuronas y compruebe el Mean_squared_error para ver el resultado que ronda sobre 0.00X

In [102]:
from sklearn.metrics import mean_squared_error


modelo = modelo_red_recurrente_3(X_train, y_train,40,X_val,y_val)
y_pred = modelo.predict(X_test)

rms = mean_squared_error(y_test, y_pred, squared=False)
print(f"rms modelo_red_recurrente_1 = {rms}")
Epoch 1/9223372036854775807
51/51 [==============================] - 3s 14ms/step - loss: 0.0582 - val_loss: 0.0179
Epoch 2/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 0.0106 - val_loss: 0.0027
Epoch 3/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 5.4185e-04 - val_loss: 1.4545e-04
Epoch 4/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 7.1967e-05 - val_loss: 1.1650e-04
Epoch 5/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 7.0184e-05 - val_loss: 1.1477e-04
Epoch 6/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.9998e-05 - val_loss: 1.1426e-04
Epoch 7/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 7.1392e-05 - val_loss: 1.1535e-04
Epoch 8/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.8308e-05 - val_loss: 1.1206e-04
Epoch 9/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.7404e-05 - val_loss: 1.1097e-04
Epoch 10/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.7048e-05 - val_loss: 1.1692e-04
Epoch 11/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.8139e-05 - val_loss: 1.1057e-04
Epoch 12/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.5583e-05 - val_loss: 1.1012e-04
Epoch 13/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.6447e-05 - val_loss: 1.0556e-04
Epoch 14/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.7869e-05 - val_loss: 1.0425e-04
Epoch 15/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.6549e-05 - val_loss: 1.1274e-04
Epoch 16/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.3289e-05 - val_loss: 1.0801e-04
Epoch 17/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.4133e-05 - val_loss: 1.0408e-04
Epoch 18/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.2056e-05 - val_loss: 1.0579e-04
Epoch 19/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.2006e-05 - val_loss: 1.0241e-04
Epoch 20/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.2238e-05 - val_loss: 9.9154e-05
Epoch 21/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.2241e-05 - val_loss: 1.0884e-04
Epoch 22/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.0439e-05 - val_loss: 1.0025e-04
Epoch 23/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.3023e-05 - val_loss: 1.0107e-04
Epoch 24/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 5.8843e-05 - val_loss: 1.1497e-04
Epoch 25/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 5.9559e-05 - val_loss: 9.8045e-05
Epoch 26/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 5.8448e-05 - val_loss: 1.0252e-04
Epoch 27/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.1504e-05 - val_loss: 1.0357e-04
Epoch 28/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 5.9748e-05 - val_loss: 9.6570e-05
Epoch 29/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 5.9806e-05 - val_loss: 9.5402e-05
Epoch 30/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 5.9747e-05 - val_loss: 9.6208e-05
Epoch 31/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 5.6852e-05 - val_loss: 9.8589e-05
Epoch 32/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 5.8804e-05 - val_loss: 9.6486e-05
Epoch 33/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 5.7513e-05 - val_loss: 9.7110e-05
Epoch 34/9223372036854775807
51/51 [==============================] - 0s 3ms/step - loss: 6.1899e-05 - val_loss: 1.0136e-04
16/16 [==============================] - 0s 1ms/step
rms modelo_red_recurrente_1 = 0.008100431357431943

Mostramos un gráfico de barras que va de X-train a (X_test | X_pred), para comparar la predicción con los datos reales

  • Creación de una constante ALL para ver la comparativa 50 datos - 50 datos de prediccion | 50 datos de test
In [136]:
 
Out[136]:
Index([2022-06-07, 2022-06-06, 2022-06-03, 2022-06-02, 2022-06-01, 2022-05-31,
       2022-05-27, 2022-05-26, 2022-05-25, 2022-05-24, 2022-05-23, 2022-05-20,
       2022-05-19, 2022-05-18, 2022-05-17, 2022-05-16, 2022-05-13, 2022-05-12,
       2022-05-11, 2022-05-10, 2022-05-09, 2022-05-06, 2022-05-05, 2022-05-04,
       2022-05-03, 2022-05-02, 2022-04-29, 2022-04-28, 2022-04-27, 2022-04-26,
       2022-04-25, 2022-04-22, 2022-04-21, 2022-04-20, 2022-04-19, 2022-04-18,
       2022-04-14, 2022-04-13, 2022-04-12, 2022-04-11, 2022-04-08, 2022-04-07,
       2022-04-06, 2022-04-05, 2022-04-04, 2022-04-01, 2022-03-31, 2022-03-30,
       2022-03-29, 2022-03-28],
      dtype='object', name='Fecha')
In [126]:
import matplotlib.pyplot as plt

ALL = False
V_Test = len(X_test) if ALL else 50
V_Train = len(X_train) if ALL else 50

# Concatenar los índices de train y test para plotear
index_train = df.index[-len(X_train_full):][:V_Train]  # Últimos valores de train
index_test = df.index[:len(X_test)][-V_Test:]  # Primeros valores de test

# Invertir la transformación de MinMaxScaler para obtener los valores verdaderos
y_train_inverse = scaler.inverse_transform(y_train)
y_test_inverse = scaler.inverse_transform(y_test)
y_pred_inverse = scaler.inverse_transform(y_pred)

# Plot de datos de entrenamiento
plt.plot(index_train, y_train_inverse[:V_Train], label='Train Data')

# Plot de datos de prueba
plt.plot(index_test, y_test_inverse[-V_Test:], label='Test Data')

# Plot de predicciones
plt.plot(index_test, y_pred_inverse[-V_Test:], label='Predictions')

plt.title('Comparación de Datos de Prueba y Predicciones (Valores Verdaderos)')
plt.xlabel('Fecha')
plt.ylabel('Cerrar/último')
plt.legend()
plt.xticks(rotation=45)
plt.show()
In [ ]:
def genera_train(serie,lg_secuencia,lg_y):
    """Genera el set de entrenamiento

    Args:
        serie (pd.Serie): Serie de datos
        lg_secuencia (int): Longitud de la secuencia
        lg_y (int): Longitud de los datos recogidos
    """
    # Imaginemos que es de 20 entonces será 1000 - 20 - los valores que vamos recoger
    x=[]
    y=[]
    ultimo = len(serie)-lg_secuencia-lg_y
    for ix in range(ultimo):
      x.append(serie[ix:ix+lg_secuencia])
      y.append(serie[ix+lg_secuencia:ix+lg_secuencia+lg_y])
    return np.array(x), np.array(y)

X,y=genera_train(df_transformed,50,10)
value = int(len(X)*0.2)
X_train_full, y_train_full, X_test, y_test = X[value:],y[value:],X[:value],y[:value]
X_train, y_train, X_val, y_val = X_train_full[value:],y_train_full[value:],X_train_full[:value],y_train_full[:value]
In [ ]:
y_test = y_test.reshape((y_test.shape[0],y_test.shape[1]))
y_train = y_train.reshape((y_train.shape[0],y_train.shape[1]))
y_val = y_val.reshape((y_val.shape[0],y_val.shape[1]))
In [ ]:
lg_y = 10
early_stopping_cb = keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)
rnn_predice_10 = keras.models.Sequential([
    keras.layers.SimpleRNN(40, return_sequences=True, input_shape=[None, 1]),
    keras.layers.SimpleRNN(40),
    keras.layers.Dense(lg_y)
    ])
rnn_predice_10.compile(loss="mse", optimizer="adam")
In [ ]:
rnn_predice_10.fit(X_train, y_train,epochs=sys.maxsize,validation_data=[X_val,y_val],callbacks=[early_stopping_cb])
Epoch 1/9223372036854775807
46/46 [==============================] - 2s 18ms/step - loss: 0.0533 - val_loss: 0.0097
Epoch 2/9223372036854775807
46/46 [==============================] - 1s 11ms/step - loss: 0.0038 - val_loss: 0.0077
Epoch 3/9223372036854775807
46/46 [==============================] - 1s 11ms/step - loss: 0.0025 - val_loss: 0.0061
Epoch 4/9223372036854775807
46/46 [==============================] - 1s 11ms/step - loss: 0.0021 - val_loss: 0.0058
Epoch 5/9223372036854775807
46/46 [==============================] - 1s 11ms/step - loss: 0.0018 - val_loss: 0.0046
Epoch 6/9223372036854775807
46/46 [==============================] - 1s 12ms/step - loss: 0.0017 - val_loss: 0.0050
Epoch 7/9223372036854775807
46/46 [==============================] - 1s 11ms/step - loss: 0.0015 - val_loss: 0.0045
Epoch 8/9223372036854775807
46/46 [==============================] - 1s 11ms/step - loss: 0.0015 - val_loss: 0.0042
Epoch 9/9223372036854775807
46/46 [==============================] - 0s 11ms/step - loss: 0.0015 - val_loss: 0.0040
Epoch 10/9223372036854775807
46/46 [==============================] - 0s 10ms/step - loss: 0.0014 - val_loss: 0.0045
Epoch 11/9223372036854775807
46/46 [==============================] - 1s 12ms/step - loss: 0.0013 - val_loss: 0.0038
Epoch 12/9223372036854775807
46/46 [==============================] - 1s 11ms/step - loss: 0.0014 - val_loss: 0.0046
Epoch 13/9223372036854775807
46/46 [==============================] - 1s 11ms/step - loss: 0.0013 - val_loss: 0.0041
Epoch 14/9223372036854775807
46/46 [==============================] - 1s 11ms/step - loss: 0.0013 - val_loss: 0.0038
Epoch 15/9223372036854775807
46/46 [==============================] - 1s 11ms/step - loss: 0.0012 - val_loss: 0.0045
Epoch 16/9223372036854775807
46/46 [==============================] - 1s 11ms/step - loss: 0.0011 - val_loss: 0.0044
Epoch 17/9223372036854775807
46/46 [==============================] - 1s 12ms/step - loss: 0.0012 - val_loss: 0.0042
Epoch 18/9223372036854775807
46/46 [==============================] - 1s 12ms/step - loss: 0.0011 - val_loss: 0.0034
Epoch 19/9223372036854775807
46/46 [==============================] - 0s 10ms/step - loss: 0.0011 - val_loss: 0.0035
Epoch 20/9223372036854775807
46/46 [==============================] - 0s 11ms/step - loss: 0.0012 - val_loss: 0.0054
Epoch 21/9223372036854775807
46/46 [==============================] - 0s 10ms/step - loss: 0.0012 - val_loss: 0.0035
Epoch 22/9223372036854775807
46/46 [==============================] - 0s 11ms/step - loss: 0.0012 - val_loss: 0.0045
Epoch 23/9223372036854775807
46/46 [==============================] - 0s 10ms/step - loss: 0.0011 - val_loss: 0.0032
Epoch 24/9223372036854775807
46/46 [==============================] - 0s 11ms/step - loss: 0.0010 - val_loss: 0.0035
Epoch 25/9223372036854775807
46/46 [==============================] - 0s 10ms/step - loss: 0.0010 - val_loss: 0.0033
Epoch 26/9223372036854775807
46/46 [==============================] - 0s 11ms/step - loss: 0.0011 - val_loss: 0.0036
Epoch 27/9223372036854775807
46/46 [==============================] - 0s 11ms/step - loss: 0.0010 - val_loss: 0.0034
Epoch 28/9223372036854775807
46/46 [==============================] - 0s 11ms/step - loss: 0.0010 - val_loss: 0.0033
Out[ ]:
<keras.src.callbacks.History at 0x17bcf5a8890>
In [ ]:
y_pred = rnn_predice_10.predict(X_test)
#nsamples, nx, ny = y_pred.shape
#y_pred = X_test.reshape((nsamples,nx*ny))
y_test = y_test.reshape((y_test.shape[0],y_test.shape[1]))
rms = mean_squared_error(y_test, y_pred, squared=False)
print(f"rms rnn_predice_10 = {rms}")
16/16 [==============================] - 0s 3ms/step
rms rnn_predice_10 = 0.03905146580528988