!pip install --upgrade --force-reinstall --no-cache-dir numpy==1.23.5 pmdarima sktime tensorflow==2.12.0rc0 keras-tuner


import itertools
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose, STL
from statsmodels.stats.diagnostic import acorr_ljungbox
import statsmodels.graphics.api as smgraphics
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.statespace.sarimax import SARIMAX
from scipy.stats import boxcox
from scipy.special import inv_boxcox
from pmdarima import auto_arima
import warnings
from sktime.forecasting.compose import (TransformedTargetForecaster, make_reduction)
from sktime.forecasting.model_selection import (ExpandingWindowSplitter, ForecastingGridSearchCV)
from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError
from sktime.forecasting.trend import PolynomialTrendForecaster
from sktime.transformations.series.detrend import Deseasonalizer, Detrender
from sktime.forecasting.base import ForecastingHorizon
from xgboost import XGBRegressor
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense,LSTM, Dropout, InputLayer
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
import keras_tuner as kt
from keras.regularizers import l1_l2
from keras.optimizers import Adam, SGD, RMSprop
from keras.callbacks import EarlyStopping

# Suppress specific warnings.
warnings.filterwarnings("ignore", category=FutureWarning)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)


isbn = pd.read_excel("/content/ISBN List.xlsx", sheet_name=None)
uk_weekly = pd.read_excel("/content/UK Weekly Trended Timeline from 200101_202429.xlsx", sheet_name=None)


isbn_Educational = pd.DataFrame(isbn["Y Childrens, YA & Educational"])
isbn_Trade = pd.DataFrame(isbn['T Adult Non-Fiction Trade'])
isbn_Specialist = pd.DataFrame(isbn['S Adult Non-Fiction Specialist'])
isbn_Fiction = pd.DataFrame(isbn['F - Adult Fiction'])


uk_weekly_Educational = pd.DataFrame(uk_weekly["Y Children's, YA & Educational"])
uk_weekly_Fiction = pd.DataFrame(uk_weekly['F Adult Fiction'])
uk_weekly_Trade = pd.DataFrame(uk_weekly['T Adult Non-Fiction Trade'])
uk_weekly_Specialist = pd.DataFrame(uk_weekly['S Adult Non-Fiction Specialist'])


isbn_names = {"isbn_Educational": isbn_Educational, "isbn_Trade": isbn_Trade, "isbn_Specialist": isbn_Specialist,
              "isbn_Fiction": isbn_Fiction}
uk_weekly_name = {"uk_weekly_Educational":uk_weekly_Educational, "uk_weekly_Trade": uk_weekly_Trade,
                  "uk_weekly_Specialist": uk_weekly_Specialist,"uk_weekly_Fiction": uk_weekly_Fiction}


def explore(dataframe_name):

  for i, data in dataframe_name.items():

    print(f"{' '.join(i.split('_'))} have the follwing properties")
    print("------------------------------------------------------------------------------------------")
    print(f"Shape \n {data.shape}")
    print("------------------------------------------------------------------------------------------")
    print(data.info())
    print("------------------------------------------------------------------------------------------")
    print(data.describe())
    print("******************************************************************************************")


explore(uk_weekly_name)

uk weekly Educational have the follwing properties
------------------------------------------------------------------------------------------
Shape 
 (55286, 13)
------------------------------------------------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55286 entries, 0 to 55285
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   ISBN             55286 non-null  int64         
 1   Title            55286 non-null  object        
 2   Author           50113 non-null  object        
 3   Interval         55286 non-null  int64         
 4   End Date         55286 non-null  datetime64[ns]
 5   Volume           55286 non-null  int64         
 6   Value            55286 non-null  float64       
 7   ASP              55193 non-null  float64       
 8   RRP              54856 non-null  float64       
 9   Binding          55286 non-null  object        
 10  Imprint          55286 non-null  object        
 11  Publisher Group  55286 non-null  object        
 12  Product Class    55286 non-null  object        
dtypes: datetime64[ns](1), float64(3), int64(3), object(6)
memory usage: 5.5+ MB
None
------------------------------------------------------------------------------------------
               ISBN       Interval                       End Date  \
count  5.528600e+04   55286.000000                          55286   
mean   9.780811e+12  200744.123720  2007-09-04 13:22:46.523170304   
min    9.780002e+12  200101.000000            2001-01-06 00:00:00   
25%    9.780441e+12  200318.000000            2003-05-03 00:00:00   
50%    9.780721e+12  200601.000000            2006-01-07 00:00:00   
75%    9.780753e+12  201023.000000            2010-06-05 00:00:00   
max    9.781904e+12  202429.000000            2024-07-20 00:00:00   
std    5.779893e+08     561.901387                            NaN   

              Volume          Value           ASP           RRP  
count   55286.000000   55286.000000  55193.000000  54856.000000  
mean      530.412564    2743.474619      5.556607      6.921442  
min      -269.000000   -1348.460000    -11.010000      0.100000  
25%        18.000000      90.835000      4.406400      4.990000  
50%       151.000000     784.520000      4.962700      6.500000  
75%       493.000000    2435.630000      5.990000      7.990000  
max    193645.000000  483767.750000     40.000000     25.000000  
std      2314.946914   10668.029749      2.709133      3.219413  
******************************************************************************************
uk weekly Trade have the follwing properties
------------------------------------------------------------------------------------------
Shape 
 (65344, 13)
------------------------------------------------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 65344 entries, 0 to 65343
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   ISBN             65344 non-null  int64         
 1   Title            65344 non-null  object        
 2   Author           60655 non-null  object        
 3   Interval         65344 non-null  int64         
 4   End Date         65344 non-null  datetime64[ns]
 5   Volume           65344 non-null  int64         
 6   Value            65344 non-null  float64       
 7   ASP              65274 non-null  float64       
 8   RRP              65210 non-null  float64       
 9   Binding          65344 non-null  object        
 10  Imprint          65344 non-null  object        
 11  Publisher Group  65344 non-null  object        
 12  Product Class    65344 non-null  object        
dtypes: datetime64[ns](1), float64(3), int64(3), object(6)
memory usage: 6.5+ MB
None
------------------------------------------------------------------------------------------
               ISBN       Interval                       End Date  \
count  6.534400e+04   65344.000000                          65344   
mean   9.780467e+12  200884.679603  2009-01-30 21:32:22.360430848   
min    9.780003e+12  200101.000000            2001-01-06 00:00:00   
25%    9.780140e+12  200347.000000            2003-11-22 00:00:00   
50%    9.780416e+12  200731.000000            2007-08-04 00:00:00   
75%    9.780718e+12  201317.000000            2013-04-27 00:00:00   
max    9.781904e+12  202429.000000            2024-07-20 00:00:00   
std    3.994394e+08     611.839318                            NaN   

             Volume         Value           ASP           RRP  
count  65344.000000  6.534400e+04  65274.000000  65210.000000  
mean     376.208849  3.081678e+03      9.206646     12.781906  
min      -74.000000 -4.372600e+02    -77.460000      1.000000  
25%        7.000000  5.921750e+01      6.767075      8.990000  
50%       38.000000  3.285200e+02      7.986300     10.990000  
75%      239.000000  1.902920e+03     10.470900     15.990000  
max    80620.000000  1.056257e+06     32.010000     30.000000  
std     1591.597425  1.664461e+04      4.683351      6.206623  
******************************************************************************************
uk weekly Specialist have the follwing properties
------------------------------------------------------------------------------------------
Shape 
 (32827, 13)
------------------------------------------------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32827 entries, 0 to 32826
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   ISBN             32827 non-null  int64         
 1   Title            32827 non-null  object        
 2   Author           28077 non-null  object        
 3   Interval         32827 non-null  int64         
 4   End Date         32827 non-null  datetime64[ns]
 5   Volume           32827 non-null  int64         
 6   Value            32827 non-null  float64       
 7   ASP              32731 non-null  float64       
 8   RRP              27429 non-null  float64       
 9   Binding          32827 non-null  object        
 10  Imprint          32827 non-null  object        
 11  Publisher Group  32827 non-null  object        
 12  Product Class    32827 non-null  object        
dtypes: datetime64[ns](1), float64(3), int64(3), object(6)
memory usage: 3.3+ MB
None
------------------------------------------------------------------------------------------
               ISBN       Interval                       End Date  \
count  3.282700e+04   32827.000000                          32827   
mean   9.780743e+12  200450.247479  2004-09-24 01:00:03.180308864   
min    9.780003e+12  200101.000000            2001-01-06 00:00:00   
25%    9.780341e+12  200218.000000            2002-05-04 00:00:00   
50%    9.780672e+12  200340.000000            2003-10-04 00:00:00   
75%    9.780765e+12  200602.000000            2006-01-14 00:00:00   
max    9.781904e+12  202429.000000            2024-07-20 00:00:00   
std    5.613454e+08     342.780391                            NaN   

             Volume         Value           ASP           RRP  
count  32827.000000  32827.000000  32731.000000  27429.000000  
mean      87.351296   1176.246431     13.479478     16.302991  
min      -86.000000   -863.620000     -5.765000      2.950000  
25%        5.000000     49.545000      8.429300      9.990000  
50%       41.000000    456.540000     12.020600     12.990000  
75%      114.000000   1451.050000     17.132050     19.950000  
max     4378.000000  39125.560000     46.690000     69.990000  
std      143.989186   2137.410382      7.445967     10.155029  
******************************************************************************************
uk weekly Fiction have the follwing properties
------------------------------------------------------------------------------------------
Shape 
 (73767, 13)
------------------------------------------------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 73767 entries, 0 to 73766
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   ISBN             73767 non-null  int64         
 1   Title            73767 non-null  object        
 2   Author           73500 non-null  object        
 3   Interval         73767 non-null  int64         
 4   End Date         73767 non-null  datetime64[ns]
 5   Volume           73767 non-null  int64         
 6   Value            73767 non-null  float64       
 7   ASP              73683 non-null  float64       
 8   RRP              73767 non-null  float64       
 9   Binding          73767 non-null  object        
 10  Imprint          73767 non-null  object        
 11  Publisher Group  73767 non-null  object        
 12  Product Class    73767 non-null  object        
dtypes: datetime64[ns](1), float64(3), int64(3), object(6)
memory usage: 7.3+ MB
None
------------------------------------------------------------------------------------------
               ISBN       Interval                       End Date  \
count  7.376700e+04   73767.000000                          73767   
mean   9.780393e+12  200877.213347  2009-01-03 22:50:01.049249536   
min    9.780002e+12  200101.000000            2001-01-06 00:00:00   
25%    9.780140e+12  200410.000000            2004-03-06 00:00:00   
50%    9.780341e+12  200735.000000            2007-09-01 00:00:00   
75%    9.780554e+12  201229.000000            2012-07-21 00:00:00   
max    9.781860e+12  202429.000000            2024-07-20 00:00:00   
std    3.179225e+08     592.264956                            NaN   

             Volume          Value           ASP           RRP  
count  73767.000000   73767.000000  73683.000000  73767.000000  
mean     381.227473    2448.109140      7.157384      9.566535  
min      -20.000000     -96.420000     -4.010000      2.500000  
25%        9.000000      62.270000      6.200400      7.990000  
50%       55.000000     383.900000      6.892600      8.990000  
75%      192.000000    1331.300000      7.730400      9.990000  
max    51175.000000  385543.990000     60.000000     25.000000  
std     1541.015144    9746.887969      2.323545      3.012778  
******************************************************************************************


def initial_processing(dataframe_name):

  for i, data in dataframe_name.items():


    data.set_index("End Date", inplace = True)
    data.sort_index(inplace = True)
    data["ISBN"] = data["ISBN"].astype("str")
    data.resample("W").sum().fillna(0, inplace = True)


initial_processing(uk_weekly_name)


def plot_isbn(dataframe_name):
  data2024_07_01 = {}
  unique_after2024_07_01 = []
  cutoff = pd.Timestamp('2024-07-01')

  for i, data in dataframe_name.items():

    for isbncutoff in data[data.index > cutoff]["ISBN"].unique():

      unique_after2024_07_01.append(isbncutoff)
      plt.figure(figsize=(12,8))
      plt.plot(data[data["ISBN"] == isbncutoff]["Volume"])
      plt.title(f"Sales Volume Over Time for ISBN {isbncutoff}")
      plt.xlabel("Sales Date")
      plt.ylabel("Volume")
      plt.show()

  print(unique_after2024_07_01)


plot_isbn(uk_weekly_name)

['9781841462400', '9780006647553', '9780440864141', '9780241003008', '9781841460406', '9780744523232', '9781841462301', '9780440864554', '9781841461502', '9780752844299', '9781841460307', '9781841462509', '9780752846576', '9780099422587', '9780340696767', '9780099285823', '9780552145954', '9780552997034', '9780593048153', '9780140275421', '9780091816971', '9781841150437', '9780006531203', '9780140281293', '9780091867775', '9780749395698', '9780140259506', '9780719559792', '9780140276619', '9780340766057', '9780099286578', '9780099428558', '9780140294231', '9780224060875', '9780330355667', '9780340786055', '9780099286387', '9780552998482', '9780261103252', '9780099771517', '9780349114033', '9780552998727', '9780552997348', '9780006514213', '9780140276336', '9780140285215', '9780552998000', '9780747268161', '9780140295962', '9780552998444', '9780349113609', '9780349112763', '9780099244721', '9780749397548', '9780006512134', '9780722532935', '9780006514091', '9780007101887', '9780552145060', '9780006550433', '9780552145053']


books = ["very hungry caterpillar", "alchemist"]
def filter_books(isbn_books, weekly_sales, books):
  lookedup_isbn = {}

  for i in books:
      for (key1, data1), (key2, data2) in zip(isbn_books.items(), weekly_sales.items()):

        data1["ISBN"] = data1["ISBN"].astype("str")
        result = data1[data1["Title"].str.lower().str.contains(i)]["ISBN"]
        if not result.empty:
          data2_result = data2[data2["ISBN"].isin(list(result))]
          data2_result.index = data2_result.index + pd.Timedelta(days=1)
          lookedup_isbn[i.split()[-1]] = data2_result.groupby(data2_result.index).sum().asfreq('W').fillna(0)["Volume"]


  return lookedup_isbn


selected_books = filter_books(isbn_names, uk_weekly_name, books)


alchemist = pd.DataFrame(selected_books['alchemist'])
caterpillar = pd.DataFrame(selected_books['caterpillar'])


print(type(alchemist["Volume"].index))
print(alchemist.index.freq)
print(alchemist.index.inferred_freq)

<class 'pandas.core.indexes.datetimes.DatetimeIndex'>
<Week: weekday=6>
W-SUN


cutoff = pd.Timestamp('2012-01-01')
alchemist_cutoff = alchemist[alchemist.index > cutoff]
caterpillar_cutoff = caterpillar[caterpillar.index > cutoff]


two_books = {"The Alchemist": alchemist_cutoff, "The Very Hungry Caterpillar": caterpillar_cutoff}
explore(two_books)

The Alchemist have the follwing properties
------------------------------------------------------------------------------------------
Shape 
 (655, 1)
------------------------------------------------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 655 entries, 2012-01-08 to 2024-07-21
Freq: W-SUN
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Volume  655 non-null    float64
dtypes: float64(1)
memory usage: 10.2 KB
None
------------------------------------------------------------------------------------------
            Volume
count   655.000000
mean    528.102290
std     227.965588
min       0.000000
25%     415.500000
50%     508.000000
75%     606.000000
max    2201.000000
******************************************************************************************
The Very Hungry Caterpillar have the follwing properties
------------------------------------------------------------------------------------------
Shape 
 (655, 1)
------------------------------------------------------------------------------------------
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 655 entries, 2012-01-08 to 2024-07-21
Freq: W-SUN
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Volume  655 non-null    float64
dtypes: float64(1)
memory usage: 10.2 KB
None
------------------------------------------------------------------------------------------
            Volume
count   655.000000
mean   1348.909924
std     710.689315
min       0.000000
25%     723.000000
50%    1324.000000
75%    1758.500000
max    3905.000000
******************************************************************************************


plt.figure(figsize=(15,8))
alchemist_cutoff["Volume"].plot(label='Alchemist')
caterpillar_cutoff["Volume"].plot(label='Caterpillar')
plt.title('Sales Volume for both Alchemist and Caterpillar')
plt.xlabel('Date')
plt.ylabel('Volume')
plt.legend()
plt.show()


def decomposition(data, pd):

  data_stl = STL(data , period=pd)
  data_fit = data_stl.fit()
  residual = data_fit.resid
  #data_fit.plot();
  print('Ljung-Box test output\n', acorr_ljungbox(residual), '...\n')
  return residual


print("Decomposition for The Very Hungry Caterpillar")
residual_cat = decomposition(caterpillar_cutoff, 52)
print("\n Decomposition for The Alchemist")
residual_alch = decomposition(alchemist_cutoff, 52)

Decomposition for The Very Hungry Caterpillar
Ljung-Box test output
        lb_stat      lb_pvalue
1   370.471831   1.477278e-82
2   563.416272  4.525964e-123
3   655.510867  9.300685e-142
4   703.722530  5.447512e-151
5   735.165914  1.221569e-156
6   745.231614  1.044323e-157
7   746.925241  5.238363e-157
8   747.070784  5.223687e-156
9   747.322418  4.603664e-155
10  752.384656  3.530257e-155 ...


 Decomposition for The Alchemist
Ljung-Box test output
        lb_stat      lb_pvalue
1   322.397402   4.352123e-72
2   493.431912  7.122324e-108
3   598.685446  1.942542e-129
4   669.332329  1.521385e-143
5   712.116376  1.178602e-151
6   735.396404  1.390001e-155
7   743.813088  2.457366e-156
8   745.613803  1.076019e-155
9   748.310139  2.822442e-155
10  763.421832  1.500879e-157 ...


def acf_pacf_plot(data):
  smgraphics.tsa.plot_acf(data, lags=52);
  smgraphics.tsa.plot_pacf(data, lags=52);


print("ACF and PACF Plot for The Very Hungry Caterpillar using Decomposition Residual")
acf_pacf_plot(residual_cat)
plt.show()
print("\n ACF and PACF Plot for The Alchemist using Decomposition Residual")
acf_pacf_plot(residual_alch)
plt.show()

ACF and PACF Plot for The Very Hungry Caterpillar using Decomposition Residual

 ACF and PACF Plot for The Alchemist using Decomposition Residual


print("ACF and PACF Plot for The Very Hungry Caterpillar using Volume")
acf_pacf_plot(caterpillar_cutoff)
plt.show()
print("\n ACF and PACF Plot for The Alchemist using Volume")
acf_pacf_plot(alchemist_cutoff)
plt.show()

ACF and PACF Plot for The Very Hungry Caterpillar using Volume

 ACF and PACF Plot for The Alchemist using Volume


def stationarity(data):

  p_value = 0.05
  adf_result = adfuller(data['Volume'])
  if adf_result[1] < p_value:
    print("The Data in it's original form is stationary it has a p-value of:", adf_result[1])
  else:
    seasonal_diff = data['Volume'].diff(1).dropna()
    adf_result_seasonal_diff = adfuller(seasonal_diff)
    if adf_result_seasonal_diff[1] < p_value:
      print('The data after difference is now stationary with a p-value of:', adf_result_seasonal_diff[1])
    else:
      print("The data is still not stationary, it has a p-value of:", adf_result_seasonal_diff[1])


print("Stationarity testing for The Very Hungry Caterpillar")
stationarity(caterpillar_cutoff)
print("\n Stationarity testing for The Alchemist")
stationarity(alchemist_cutoff)

Stationarity testing for The Very Hungry Caterpillar
The Data in it's original form is stationary it has a p-value of: 0.029614783911209044

 Stationarity testing for The Alchemist
The Data in it's original form is stationary it has a p-value of: 4.139950997174928e-13


val_size = 32


def plot_arima(val_df, forecast):

  forecast_index = val_df.index
  forecast_series = pd.Series(forecast, index=forecast_index)

  plt.figure(figsize=(15, 8))
  ax = plt.gca()
  val_df.plot(ax=ax, color='green', label='Validation (Actual)')
  forecast_series.plot(ax=ax, color='red',  label='Forecast')

  plt.title(' Validation, and Forecast')
  plt.xlabel('Date')
  plt.ylabel('Value')
  plt.legend()
  print(f'MAE for the next 32 weeks forcast is, {mean_absolute_error(val_df["Volume"], forecast)}')
  print(f'MAPE for the next 32 forcast is, {mean_absolute_percentage_error(val_df["Volume"], forecast)}')
  plt.show()


def auto(data, m , size):

  train_df, val_df = data.iloc[:-size], data.iloc[-size:]

  model = auto_arima(train_df, seasonal=True, m=m, stationary=False, trace=False, error_action='ignore', suppress_warnings=True, information_criterion='aic',
                   scoring='mse', stepwise=True)
  print(model.summary())

  forecast, conf_int = model.predict(size, return_conf_int=True, alpha=0.05)
  fitted = model.predict_in_sample()

  plot_arima(val_df, forecast)


auto(alchemist_cutoff, 52, val_size)

                                      SARIMAX Results                                      
===========================================================================================
Dep. Variable:                                   y   No. Observations:                  623
Model:             SARIMAX(1, 1, 2)x(2, 0, [], 52)   Log Likelihood               -3804.646
Date:                             Tue, 27 May 2025   AIC                           7623.292
Time:                                     13:33:31   BIC                           7654.322
Sample:                                 01-08-2012   HQIC                          7635.352
                                      - 12-10-2023                                         
Covariance Type:                               opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
intercept     -0.0154      0.163     -0.095      0.925      -0.334       0.304
ar.L1          0.7663      0.052     14.793      0.000       0.665       0.868
ma.L1         -1.0745      0.058    -18.422      0.000      -1.189      -0.960
ma.L2          0.1201      0.045      2.678      0.007       0.032       0.208
ar.S.L52       0.4296      0.026     16.401      0.000       0.378       0.481
ar.S.L104      0.2961      0.026     11.497      0.000       0.246       0.347
sigma2      1.139e+04    360.075     31.619      0.000    1.07e+04    1.21e+04
===================================================================================
Ljung-Box (L1) (Q):                   0.04   Jarque-Bera (JB):              1329.94
Prob(Q):                              0.84   Prob(JB):                         0.00
Heteroskedasticity (H):               2.95   Skew:                             0.54
Prob(H) (two-sided):                  0.00   Kurtosis:                        10.08
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
MAE for the next 32 weeks forcast is, 155.07091806095434
MAPE for the next 32 forcast is, 0.2975146917185349


auto(caterpillar_cutoff, 52, val_size)

                                        SARIMAX Results                                        
===============================================================================================
Dep. Variable:                                       y   No. Observations:                  623
Model:             SARIMAX(2, 1, 1)x(1, 0, [1, 2], 52)   Log Likelihood               -4482.088
Date:                                 Tue, 27 May 2025   AIC                           8978.177
Time:                                         14:49:20   BIC                           9009.207
Sample:                                     01-08-2012   HQIC                          8990.237
                                          - 12-10-2023                                         
Covariance Type:                                   opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.8131      0.034     23.604      0.000       0.746       0.881
ar.L2         -0.0665      0.032     -2.052      0.040      -0.130      -0.003
ma.L1         -0.9646      0.013    -74.651      0.000      -0.990      -0.939
ar.S.L52       0.6274      0.086      7.312      0.000       0.459       0.796
ma.S.L52      -0.3466      0.089     -3.884      0.000      -0.521      -0.172
ma.S.L104      0.1031      0.051      2.018      0.044       0.003       0.203
sigma2      1.041e+05   3031.737     34.324      0.000    9.81e+04     1.1e+05
===================================================================================
Ljung-Box (L1) (Q):                   0.01   Jarque-Bera (JB):              5951.93
Prob(Q):                              0.94   Prob(JB):                         0.00
Heteroskedasticity (H):               5.46   Skew:                             0.91
Prob(H) (two-sided):                  0.00   Kurtosis:                        18.05
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
MAE for the next 32 weeks forcast is, 353.1408830193634
MAPE for the next 32 forcast is, 0.1863215351859458


def meachine_learning_xgboost(sp=52, degree=1):

  regressor = XGBRegressor(base_score=0.5,
                      n_estimators=400,
                      min_child_weight=1,
                      max_depth=10,
                      learning_rate=0.1,
                      booster='gbtree',
                      tree_method='exact',
                      reg_alpha=0,
                      subsample=0.5,
                      validate_parameters=1,
                      colsample_bylevel=1,
                      colsample_bynode=1,
                      colsample_bytree=1,
                      gamma=0,
                      eval_metric="mae",
                    )
  forecaster = TransformedTargetForecaster(
        [
            ("deseasonalize", Deseasonalizer(model="additive", sp=sp)),
            ("detrend", Detrender(forecaster=PolynomialTrendForecaster(degree=degree))),
            (
                "forecast",
                make_reduction(regressor, window_length=52, strategy="recursive"),
            ),
        ]
    )
  return forecaster


def grid_search_predictor(train, test1, predictor, param_grid):

  cv = ExpandingWindowSplitter(initial_window=int(len(train) * 0.7))

  gscv = ForecastingGridSearchCV(
      predictor, strategy="refit", cv=cv, param_grid=param_grid,
      scoring=MeanAbsolutePercentageError(symmetric=True),
      error_score="raise"
  )

  future_horizon = np.arange(len(test1)) + 1
  gscv.fit(train)
  print(f"Best parameters: {gscv.best_params_}")
  predictions1 = gscv.predict(fh=future_horizon)

  return predictions1


param_grid = {"forecast__window_length": [26, 52, 60, 78],
              "forecast__estimator__max_depth": [3, 5, 10, 15]}


def plot_prediction(test1, forcast1):
  plt.figure(figsize=(15,8))
  #train.plot(c="blue")
  test1.plot(c ="green")
  forcast1.plot(c = "red")

  print(f"MAE for the next 32 weeks forcast is, {mean_absolute_error(test1, forcast1)}")
  print(f"MAPE for the next 32 forcast is, {mean_absolute_percentage_error(test1, forcast1)}")
  print("***********************************************************************************")


predictor = meachine_learning_xgboost(degree=5)
alchemist_train, alchemist_test = alchemist[:-val_size], alchemist[-val_size:]
alchemist_train.index =  pd.PeriodIndex(alchemist_train.index, freq='W')
alchemist_test.index =  pd.PeriodIndex(alchemist_test.index, freq='W')

predictions1= grid_search_predictor( (alchemist_train["Volume"]), (alchemist_test["Volume"]),
                                                   predictor, param_grid)

Best parameters: {'forecast__estimator__max_depth': 5, 'forecast__window_length': 78}


plot_prediction((alchemist_test["Volume"]),  predictions1 )

MAE for the next 32 weeks forcast is, 404.6617338888266
MAPE for the next 32 forcast is, 0.8247771223330276
***********************************************************************************


predictor2 = meachine_learning_xgboost(degree=5)
caterpillar_train, caterpillar_test = caterpillar[:-val_size], caterpillar[-val_size:]
caterpillar_train.index =  pd.PeriodIndex(caterpillar_train.index, freq='W')
caterpillar_test.index =  pd.PeriodIndex(caterpillar_test.index, freq='W')


predictions1 = grid_search_predictor(caterpillar_train["Volume"], caterpillar_test["Volume"],
                                                   predictor2, param_grid)

Best parameters: {'forecast__estimator__max_depth': 15, 'forecast__window_length': 52}


plot_prediction(caterpillar_test["Volume"],  predictions1)

MAE for the next 32 weeks forcast is, 387.05080382864935
MAPE for the next 32 forcast is, 0.1951807988572985
***********************************************************************************


lookback = 52
forecast = 32


def input_output1(data, lookback, forecast):
  input_seq = []
  output_seq = []

  for i in range(len(data) - lookback - forecast + 1):

    input_seq.append(data[i:i + lookback])
    output_seq.append(data[i + lookback:i + lookback + forecast])

  return np.array(input_seq), np.array(output_seq)


alchemist_scaler = MinMaxScaler(feature_range=(0,1))
alchemist_transform = alchemist_scaler.fit_transform(alchemist)

caterpillar_scaler = MinMaxScaler(feature_range=(0,1))
caterpillar_transform = caterpillar_scaler.fit_transform(caterpillar)


alchemist_input_seq, alchemist_output_seq = input_output1(alchemist_transform, lookback, forecast)
caterpillar_input_seq, caterpillar_output_seq = input_output1(caterpillar_transform, lookback, forecast)


def lstm_timeseries(hp, lookback, forecast):


  model = Sequential()
  model.add(InputLayer(input_shape=(lookback, 1)))

  num_layers = hp.Int("num_layers", min_value = 1, max_value =4, step = 1)
  units = hp.Int("units", min_value = 32, max_value = 128, step = 32)
  activation = hp.Choice("activation", values = ["relu", "tanh", "swish"])
  dropout_rate = hp.Float("dropout_rate", min_value = 0.1, max_value = 0.5, step = 0.1)
  reg = hp.Float('reg', min_value=1e-4, max_value=1e-2, sampling='log')
  optimizer_choice = hp.Choice("optimizer_choice", values = ["adam", "sgd", "rmsprop"])
  optimizer = {
      "adam": Adam(),
      "sgd": SGD(),
      "rmsprop": RMSprop()
      }[optimizer_choice]


  for i in range(num_layers):

    return_seq = True if i < num_layers - 1 else False
    model.add(LSTM(units= units, activation = activation, kernel_regularizer= l1_l2(reg), return_sequences = return_seq))
    model.add(Dropout(dropout_rate))

  model.add(Dense(forecast))
  model.compile(loss='mean_absolute_error', optimizer=optimizer, metrics= ["mae"])

  return model


def search_best_model(input_train, output_train, project_name):

  train_size = int(len(input_train)* 0.8)
  x_train, x_val = input_train[:train_size], input_train[train_size:]
  y_train, y_val = output_train[:train_size], output_train[train_size:]

  tuner = kt.RandomSearch(lambda hp: lstm_timeseries(hp, lookback, forecast),
                        objective = "val_loss",
                        max_trials=10,
                        executions_per_trial=1,
                        directory='dir',
                        project_name= project_name)
  early_stop = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
  tuner.search(x_train, y_train, epochs=200, batch_size=32, validation_data=(x_val, y_val), callbacks=[early_stop], verbose=1)
  best_model = tuner.get_best_models(num_models=1)[0]

  return best_model


def plot_lstm_result(actual, result):

  plt.figure(figsize=(12,8))
  plt.plot(actual, label='Actual', c = "green")
  plt.plot(result, label='Predicted', c = "red")

  print(f"MAE for the next 32 weeks forcast is, {mean_absolute_error(actual, result)}")
  print(f"MAPE for the next 32 forcast is, {mean_absolute_percentage_error(actual, result)}")
  plt.show()


predict_alchemist_next32 = alchemist_transform[- lookback:]
predict_alchemist_next32 = predict_alchemist_next32.reshape(1, lookback, 1)
alchemist_model  = search_best_model(alchemist_input_seq, alchemist_output_seq,  "model_a")
prediction_result =  alchemist_model.predict(predict_alchemist_next32)
prediction_result = alchemist_scaler.inverse_transform(prediction_result)

Trial 10 Complete [00h 02m 34s]
val_loss: 0.03139953687787056

Best val_loss So Far: 0.028060106560587883
Total elapsed time: 00h 23m 27s
1/1 [==============================] - 0s 247ms/step


alchemist_prediction =  pd.DataFrame({"Date": alchemist[- val_size:].index, "Volume": prediction_result.flatten()})
alchemist_prediction.set_index("Date", inplace = True)
plot_lstm_result(alchemist[- val_size:], alchemist_prediction)

MAE for the next 32 weeks forcast is, 206.83562850952148
MAPE for the next 32 forcast is, 0.3541896030075633


predict_caterpillar_next32 = caterpillar_transform[- lookback:]
predict_caterpillar_next32 = predict_caterpillar_next32.reshape(1, lookback, 1)
caterpillar_model = search_best_model(caterpillar_input_seq, caterpillar_output_seq, "model_b")
caterpillar_prediction_result =  caterpillar_model.predict(predict_caterpillar_next32)
caterpillar_prediction_result = caterpillar_scaler.inverse_transform(caterpillar_prediction_result)

Trial 10 Complete [00h 18m 23s]
val_loss: 0.6193820238113403

Best val_loss So Far: 0.10460196435451508
Total elapsed time: 00h 54m 04s
1/1 [==============================] - 0s 366ms/step


caterpillar_prediction =  pd.DataFrame({"Date": caterpillar[- val_size:].index, "Volume": caterpillar_prediction_result.flatten()})
caterpillar_prediction.set_index("Date", inplace = True)
plot_lstm_result(caterpillar[- val_size:], caterpillar_prediction)

MAE for the next 32 weeks forcast is, 504.5280418395996
MAPE for the next 32 forcast is, 0.24724440109298584


def hybrid_sarima(data, p, d, q, P, D, Q, S):

  model_sarima = SARIMAX(endog= data, order=(p, d, q), seasonal_order=(P, D, Q, S))
  model_sarima_fit = model_sarima.fit(maxiter=500, disp=False)
  print(model_sarima_fit)

  return model_sarima_fit


hybrid_alchemist_train, hybrid_alchemist_test = alchemist[:-val_size], alchemist[-val_size:]
hybrid_caterpillar_train, hybrid_caterpillar_test = caterpillar[:-val_size], caterpillar[-val_size:]

sarima_alchemist_model = hybrid_sarima(hybrid_alchemist_train, 1, 1, 2, 2, 0, 0, 52)
sarima_caterpillar_model = hybrid_sarima(hybrid_caterpillar_train, 2, 1, 1, 1, 0, [1, 2], 52)

<statsmodels.tsa.statespace.sarimax.SARIMAXResultsWrapper object at 0x7db15d627b50>

/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/statespace/sarimax.py:966: UserWarning: Non-stationary starting autoregressive parameters found. Using zeros as starting parameters.
  warn('Non-stationary starting autoregressive parameters'
/usr/local/lib/python3.11/dist-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.
  warn('Non-invertible starting MA parameters found.'

<statsmodels.tsa.statespace.sarimax.SARIMAXResultsWrapper object at 0x7db15d653a10>


hybrid_alchemist_scaler = MinMaxScaler(feature_range=(0,1))
hybrid_alchemist_scaled =  hybrid_alchemist_scaler.fit_transform (pd.DataFrame(sarima_alchemist_model.resid))

hybrid_caterpillar_scaler = MinMaxScaler(feature_range=(0,1))
hybrid_caterpillar_scaled = hybrid_caterpillar_scaler.fit_transform(pd.DataFrame(sarima_caterpillar_model.resid))


hybrid_alchemist_input, hybrid_alchemist_output = input_output1(hybrid_alchemist_scaled, lookback, forecast)
hybrid_caterpillar_input, hybrid_caterpillar_output = input_output1(hybrid_caterpillar_scaled, lookback, forecast)


lstm_alchemist_model  = search_best_model(hybrid_alchemist_input, hybrid_alchemist_output,  "model_c")
lstm_caterpillar_model  = search_best_model(hybrid_caterpillar_input, hybrid_caterpillar_output,  "model_d")

Trial 10 Complete [00h 08m 50s]
val_loss: 0.28357160091400146

Best val_loss So Far: 0.05631991848349571
Total elapsed time: 00h 36m 42s


sarima_hybrid_result_alchemist = sarima_alchemist_model.get_forecast(len(hybrid_alchemist_test)).predicted_mean
sarima_hybrid_result_caterpillar = sarima_caterpillar_model.get_forecast(len(hybrid_caterpillar_test)).predicted_mean


lstm_hybrid_result_alchemist = hybrid_alchemist_scaler.inverse_transform(lstm_alchemist_model.predict(hybrid_alchemist_scaled[-lookback:].reshape(1, lookback, 1)))
lstm_hybrid_result_caterpillar = hybrid_alchemist_scaler.inverse_transform(lstm_caterpillar_model.predict(hybrid_caterpillar_scaled[-lookback:].reshape(1,lookback,1)))

1/1 [==============================] - 0s 186ms/step
1/1 [==============================] - 0s 467ms/step


alchemist_hybrid_df = pd.DataFrame({"Original Volume": hybrid_alchemist_test['Volume'], "LSTM + SARIMAX": lstm_hybrid_result_alchemist.flatten() + sarima_hybrid_result_alchemist,
                                    "LSTM": lstm_hybrid_result_alchemist.flatten(), "SARIMAX": sarima_hybrid_result_alchemist})
plot_lstm_result(alchemist_hybrid_df['Original Volume'], alchemist_hybrid_df['LSTM + SARIMAX'])

MAE for the next 32 weeks forcast is, 172.478822418803
MAPE for the next 32 forcast is, 0.3191276734873695


caterpillar_hybrid_df = pd.DataFrame({"Original Volume": hybrid_caterpillar_test['Volume'], "LSTM + SARIMAX": lstm_hybrid_result_caterpillar.flatten() + sarima_hybrid_result_caterpillar,
                                    "LSTM": lstm_hybrid_result_caterpillar.flatten(), "SARIMAX": sarima_hybrid_result_caterpillar})
plot_lstm_result(caterpillar_hybrid_df['Original Volume'], caterpillar_hybrid_df['LSTM + SARIMAX'])

MAE for the next 32 weeks forcast is, 517.1055934317556
MAPE for the next 32 forcast is, 0.3004058804758878


percentage = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]


pairings = list(itertools.product(percentage, repeat = 2))


parallel_alchemist = pd.DataFrame({"SARIMA": pd.DataFrame(sarima_hybrid_result_alchemist)["predicted_mean"],
                                   "LSTM": alchemist_prediction['Volume']})
parallel_caterpillar = pd.DataFrame({"SARIMA": pd.DataFrame(sarima_hybrid_result_caterpillar)["predicted_mean"],
                                   "LSTM": caterpillar_prediction['Volume']})


def best_weightage(sarima_prediction, lstm_prediction, actual):

  parallel_best = []
  mape = 100
  for pair in pairings:
    combined = ((pair[0]* sarima_prediction) + (pair[1]* lstm_prediction))

    if mean_absolute_percentage_error(actual, combined) < mape:
      mape = mean_absolute_percentage_error(actual, combined)
      parallel_best = pair

  combined_best = ((parallel_best[0]* sarima_prediction) + (parallel_best[1]* lstm_prediction))
  return parallel_best, combined_best


alchemist_best_weightage, parallel_alchemist_combined = best_weightage(parallel_alchemist["SARIMA"], parallel_alchemist["LSTM"], alchemist[- val_size:]["Volume"])
caterpillar_best_weightage, parallel_caterpillar_combined = best_weightage(parallel_caterpillar["SARIMA"], parallel_caterpillar["LSTM"], caterpillar[- val_size:]["Volume"])


plot_lstm_result(alchemist[- val_size:]["Volume"], parallel_alchemist_combined)

MAE for the next 32 weeks forcast is, 188.80709457312167
MAPE for the next 32 forcast is, 0.248985302918793


plot_lstm_result(caterpillar[- val_size:]["Volume"], parallel_caterpillar_combined)

MAE for the next 32 weeks forcast is, 346.18424311371456
MAPE for the next 32 forcast is, 0.17325468482225337


monthly = 12
next_8months = 8


monthly_alchemist_df = alchemist.resample("M").sum()
monthly_caterpillar_df = caterpillar.resample("M").sum()


plt.figure(figsize=(15,8))
plt.plot(monthly_alchemist_df[monthly_alchemist_df.index > pd.Timestamp("2019-01-01")])
plt.plot(monthly_caterpillar_df[monthly_caterpillar_df.index > pd.Timestamp("2019-01-01")])

[<matplotlib.lines.Line2D at 0x7db016b5b090>]


print(monthly_alchemist_df.index.freq)
print(monthly_alchemist_df.index.inferred_freq)

<MonthEnd>
ME


monthly_param_grid = {"forecast__window_length": [3, 6, 12, 24, 48, 60, 96],
              "forecast__estimator__max_depth": [3, 5, 10, 15]}


monthly_predictor = meachine_learning_xgboost(sp = monthly, degree=5)
alchemist_monthly_train, alchemist_monthly_test, = monthly_alchemist_df[:- next_8months],  monthly_alchemist_df[-next_8months:]
alchemist_monthly_train.index =  pd.PeriodIndex(alchemist_monthly_train.index, freq='M')
alchemist_monthly_test.index =  pd.PeriodIndex(alchemist_monthly_test.index, freq='M')


predictions1= grid_search_predictor(
    (alchemist_monthly_train["Volume"]), (alchemist_monthly_test["Volume"]), monthly_predictor, monthly_param_grid)

Best parameters: {'forecast__estimator__max_depth': 3, 'forecast__window_length': 60}


alchemist_monthly_test.index = alchemist_monthly_test.index.to_timestamp(freq='M')
plot_lstm_result(alchemist_monthly_test["Volume"], predictions1)

MAE for the next 32 weeks forcast is, 699.7449794132554
MAPE for the next 32 forcast is, 0.34185199931935517


monthly_predictor2 = meachine_learning_xgboost(sp = monthly, degree=5)
caterpillar_monthly_train, caterpillar_monthly_test, = monthly_caterpillar_df[:- next_8months],  monthly_caterpillar_df[-next_8months:]
caterpillar_monthly_train.index =  pd.PeriodIndex(caterpillar_monthly_train.index, freq='M')
caterpillar_monthly_test.index =  pd.PeriodIndex(caterpillar_monthly_test.index, freq='M')


predictions3 = grid_search_predictor(
    (caterpillar_monthly_train["Volume"]), (caterpillar_monthly_test["Volume"]), monthly_predictor2, monthly_param_grid)

caterpillar_monthly_test.index = caterpillar_monthly_test.index.to_timestamp(freq='M')
plot_lstm_result(caterpillar_monthly_test["Volume"], predictions3)

Best parameters: {'forecast__estimator__max_depth': 3, 'forecast__window_length': 60}
MAE for the next 32 weeks forcast is, 2349.7801125306532
MAPE for the next 32 forcast is, 0.24289293120752836


auto(monthly_alchemist_df, monthly, next_8months)

/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(

                                      SARIMAX Results                                      
===========================================================================================
Dep. Variable:                                   y   No. Observations:                  275
Model:             SARIMAX(1, 1, 2)x(2, 0, [], 12)   Log Likelihood               -2433.862
Date:                             Wed, 28 May 2025   AIC                           4881.724
Time:                                     13:03:42   BIC                           4907.016
Sample:                                 01-31-2001   HQIC                          4891.876
                                      - 11-30-2023                                         
Covariance Type:                               opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
intercept     -0.9956      2.098     -0.475      0.635      -5.107       3.115
ar.L1          0.8514      0.043     19.605      0.000       0.766       0.936
ma.L1         -1.3218      0.068    -19.564      0.000      -1.454      -1.189
ma.L2          0.3290      0.059      5.597      0.000       0.214       0.444
ar.S.L12       0.4901      0.036     13.616      0.000       0.420       0.561
ar.S.L24       0.0969      0.054      1.802      0.072      -0.009       0.202
sigma2      3.238e+06   1.35e+05     23.966      0.000    2.97e+06     3.5e+06
===================================================================================
Ljung-Box (L1) (Q):                   0.40   Jarque-Bera (JB):              4471.35
Prob(Q):                              0.53   Prob(JB):                         0.00
Heteroskedasticity (H):               0.06   Skew:                             2.56
Prob(H) (two-sided):                  0.00   Kurtosis:                        22.12
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
MAE for the next 32 weeks forcast is, 701.2787020345199
MAPE for the next 32 forcast is, 0.278878890867559

/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(


auto(monthly_caterpillar_df, 12, next_8months)

/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(

                                      SARIMAX Results                                       
============================================================================================
Dep. Variable:                                    y   No. Observations:                  275
Model:             SARIMAX(1, 0, 4)x(1, 0, [1], 12)   Log Likelihood               -2466.475
Date:                              Wed, 28 May 2025   AIC                           4950.949
Time:                                      13:04:51   BIC                           4983.500
Sample:                                  01-31-2001   HQIC                          4964.013
                                       - 11-30-2023                                         
Covariance Type:                                opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
intercept   1258.7035   1384.431      0.909      0.363   -1454.732    3972.139
ar.L1         -0.4917      1.512     -0.325      0.745      -3.456       2.472
ma.L1          1.0677      1.502      0.711      0.477      -1.876       4.011
ma.L2          0.6316      0.850      0.743      0.458      -1.035       2.298
ma.L3          0.4216      0.491      0.859      0.391      -0.541       1.384
ma.L4          0.1437      0.330      0.435      0.663      -0.503       0.791
ar.S.L12       0.8803      0.039     22.655      0.000       0.804       0.956
ma.S.L12      -0.4364      0.086     -5.099      0.000      -0.604      -0.269
sigma2      3.465e+06   2.74e+05     12.661      0.000    2.93e+06       4e+06
===================================================================================
Ljung-Box (L1) (Q):                   0.36   Jarque-Bera (JB):                12.38
Prob(Q):                              0.55   Prob(JB):                         0.00
Heteroskedasticity (H):               0.95   Skew:                             0.23
Prob(H) (two-sided):                  0.80   Kurtosis:                         3.93
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
[2] Covariance matrix is singular or near-singular, with condition number 5.47e+14. Standard errors may be unstable.
MAE for the next 32 weeks forcast is, 1997.430827032542
MAPE for the next 32 forcast is, 0.1860452061994639

/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(
/usr/local/lib/python3.11/dist-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.
  warnings.warn(

Book	MAE	MAPE
The Alchemist	155	30%
The Very Hungry Caterpillar	353	19%

Book	LSTM MAE	LSTM MAPE	XGBoost MAE	XGBoost MAPE
The Alchemist	196	29%	404	83%
The Very Hungry Caterpillar	574	26%	387	20%

Book	Sequential MAE	Sequential MAPE	Parallel MAE	Parallel MAPE
The Alchemist	172	32%	118	24%
The Very Hungry Caterpillar	517	30%	346	17%

Book	XGBoost MAE	XGBoost MAPE	Auto ARIMA MAE	Auto ARIMA MAPE
The Alchemist	699	34%	701	27%
The Very Hungry Caterpillar	2349	24%	1997	19%

Table of Content¶

Importing packages and Dataset¶

Packages¶

Importing dataset¶

Conducting initial data investigation¶

Findings¶

📘 UK Weekly Book Sales - Dataset Summary¶

📂 Dataset Shapes¶

🔍 Columns for all four books¶

🧼 Missing Values¶

📊 Key Averages¶

📅 Date Range¶

⚠️ Notes¶

Plot Findings¶

Classical techniques¶

📊 Time Series Analysis Summary¶

📚 Dataset Overview¶

🔍 Exploratory Analysis¶

🧪 Decomposition Insights¶

⚖️ Stationarity Tests¶

🤖 Model Fitting¶

📈 Model Performance¶

Machine learning and deep learning techniques¶

XGBoost model¶

LSTM¶

📊 Model Performance Comparison: LSTM vs XGBoost¶

📌 Summary¶

Hybrid Model¶

Sequential Combination¶

Parallel Combination¶

🔀 Hybrid Model Performance: Sequential vs Parallel Combinations¶

📌 Summary¶

Monthly Prediction¶

XGBoost¶

Auto Arima¶

📅 Monthly Model Performance: XGBoost vs Auto ARIMA¶

📌 Summary¶

Category	Rows	Columns
Educational	55,286	13
Trade	65,344	13
Specialist	32,827	13
Fiction	73,767	13

Metric	Educational	Trade	Specialist	Fiction
Volume	530	376	87	381
Value (£)	2,743	3,082	1,176	2,448
ASP (£)	5.56	9.21	13.48	7.16
RRP (£)	6.92	12.78	16.30	9.57